ragfs_embed/
lib.rs

1//! # ragfs-embed
2//!
3//! Local embedding generation for RAGFS using the Candle ML framework.
4//!
5//! This crate provides offline, privacy-preserving vector embeddings without external APIs.
6//! Embeddings are generated using the `gte-small` model from Hugging Face.
7//!
8//! ## Features
9//!
10//! - **Local-first**: All computation happens on your machine
11//! - **Offline capable**: Works without internet after initial model download
12//! - **No API costs**: No rate limits or usage fees
13//! - **Concurrent**: Thread pool for parallel embedding generation
14//! - **Cached**: LRU cache to avoid redundant computations
15//!
16//! ## Cargo Features
17//!
18//! - `candle` (default): Enables the Candle ML stack for real embeddings
19//! - Without `candle`: Only `NoopEmbedder` is available (for testing/development)
20//!
21//! ## Model Details
22//!
23//! | Property | Value |
24//! |----------|-------|
25//! | Model | `thenlper/gte-small` |
26//! | Dimension | 384 |
27//! | Max tokens | 512 |
28//! | Architecture | BERT-based |
29//! | Size | ~100MB |
30//!
31//! ## Usage
32//!
33//! ```rust,ignore
34//! use ragfs_embed::{CandleEmbedder, EmbedderPool, EmbeddingCache};
35//! use ragfs_core::{Embedder, EmbeddingConfig};
36//! use std::sync::Arc;
37//!
38//! // Create and initialize the embedder
39//! let embedder = CandleEmbedder::new("~/.local/share/ragfs/models".into());
40//! embedder.init().await?;  // Downloads model on first run
41//!
42//! // Wrap with a thread pool for concurrency
43//! let pool = EmbedderPool::new(Arc::new(embedder), 4);
44//!
45//! // Embed documents
46//! let config = EmbeddingConfig::default();
47//! let texts = vec!["Hello world", "Machine learning"];
48//! let embeddings = pool.embed_batch(&texts, &config).await?;
49//! // Each embedding is a Vec<f32> with 384 dimensions
50//! ```
51//!
52//! ## Caching
53//!
54//! Use [`EmbeddingCache`] to avoid recomputing embeddings for identical text:
55//!
56//! ```rust,ignore
57//! use ragfs_embed::EmbeddingCache;
58//!
59//! // Create a cache with default capacity (10,000 entries)
60//! let cache = EmbeddingCache::new(embedder);
61//!
62//! // Or with custom capacity
63//! let cache = EmbeddingCache::with_capacity(embedder, 50_000);
64//!
65//! // Embeddings are cached by content hash
66//! let result = cache.embed_text(&["Hello"], &config).await?;
67//! ```
68//!
69//! ## Components
70//!
71//! | Type | Description |
72//! |------|-------------|
73//! | [`CandleEmbedder`] | Transformer-based embeddings using `gte-small` (requires `candle` feature) |
74//! | [`EmbeddingCache`] | LRU cache for embedding results (requires `candle` feature) |
75//! | [`EmbedderPool`] | Concurrent embedding with semaphore limiting (always available) |
76//! | [`NoopEmbedder`] | No-op embedder for testing (always available) |
77
78// Candle-based modules (optional)
79#[cfg(feature = "candle")]
80pub mod cache;
81#[cfg(feature = "candle")]
82pub mod candle;
83
84#[cfg(feature = "candle")]
85pub use cache::EmbeddingCache;
86#[cfg(feature = "candle")]
87pub use candle::CandleEmbedder;
88
89// Always available modules
90pub mod noop;
91pub mod pool;
92
93pub use noop::NoopEmbedder;
94pub use pool::EmbedderPool;