ragfs_extract/
registry.rs

1//! Extractor registry for managing content extractors.
2
3use ragfs_core::{ContentExtractor, ExtractError, ExtractedContent};
4use std::collections::HashMap;
5use std::path::Path;
6use std::sync::Arc;
7
8/// Registry of content extractors.
9pub struct ExtractorRegistry {
10    /// Named extractors
11    extractors: HashMap<String, Arc<dyn ContentExtractor>>,
12    /// MIME type to extractor name mapping
13    mime_mapping: HashMap<String, String>,
14}
15
16impl ExtractorRegistry {
17    /// Create a new empty registry.
18    #[must_use]
19    pub fn new() -> Self {
20        Self {
21            extractors: HashMap::new(),
22            mime_mapping: HashMap::new(),
23        }
24    }
25
26    /// Register an extractor.
27    pub fn register<E: ContentExtractor + 'static>(&mut self, name: &str, extractor: E) {
28        let extractor = Arc::new(extractor);
29        for mime in extractor.supported_types() {
30            self.mime_mapping
31                .insert((*mime).to_string(), name.to_string());
32        }
33        self.extractors.insert(name.to_string(), extractor);
34    }
35
36    /// Get an extractor for a MIME type.
37    #[must_use]
38    pub fn get_for_mime(&self, mime_type: &str) -> Option<Arc<dyn ContentExtractor>> {
39        self.mime_mapping
40            .get(mime_type)
41            .and_then(|name| self.extractors.get(name))
42            .cloned()
43    }
44
45    /// Get an extractor that can handle a file.
46    #[must_use]
47    pub fn get_for_file(&self, path: &Path, mime_type: &str) -> Option<Arc<dyn ContentExtractor>> {
48        // First try by MIME type
49        if let Some(extractor) = self.get_for_mime(mime_type) {
50            return Some(extractor);
51        }
52
53        // Then try by extension
54        for extractor in self.extractors.values() {
55            if extractor.can_extract(path, mime_type) {
56                return Some(extractor.clone());
57            }
58        }
59
60        None
61    }
62
63    /// Extract content from a file.
64    pub async fn extract(
65        &self,
66        path: &Path,
67        mime_type: &str,
68    ) -> Result<ExtractedContent, ExtractError> {
69        let extractor = self
70            .get_for_file(path, mime_type)
71            .ok_or_else(|| ExtractError::UnsupportedType(mime_type.to_string()))?;
72
73        extractor.extract(path).await
74    }
75}
76
77impl Default for ExtractorRegistry {
78    fn default() -> Self {
79        Self::new()
80    }
81}
82
83#[cfg(test)]
84mod tests {
85    use super::*;
86    use crate::TextExtractor;
87    use tempfile::tempdir;
88
89    #[test]
90    fn test_new_registry_is_empty() {
91        let registry = ExtractorRegistry::new();
92        assert!(registry.extractors.is_empty());
93        assert!(registry.mime_mapping.is_empty());
94    }
95
96    #[test]
97    fn test_register_extractor() {
98        let mut registry = ExtractorRegistry::new();
99        registry.register("text", TextExtractor::new());
100
101        assert!(registry.extractors.contains_key("text"));
102        // TextExtractor supports text/plain and text/markdown
103        assert!(registry.mime_mapping.contains_key("text/plain"));
104    }
105
106    #[test]
107    fn test_get_for_mime_existing() {
108        let mut registry = ExtractorRegistry::new();
109        registry.register("text", TextExtractor::new());
110
111        let extractor = registry.get_for_mime("text/plain");
112        assert!(extractor.is_some());
113    }
114
115    #[test]
116    fn test_get_for_mime_nonexistent() {
117        let registry = ExtractorRegistry::new();
118        let extractor = registry.get_for_mime("video/mp4");
119        assert!(extractor.is_none());
120    }
121
122    #[test]
123    fn test_get_for_file_by_mime() {
124        let mut registry = ExtractorRegistry::new();
125        registry.register("text", TextExtractor::new());
126
127        let path = std::path::PathBuf::from("/test/file.txt");
128        let extractor = registry.get_for_file(&path, "text/plain");
129        assert!(extractor.is_some());
130    }
131
132    #[test]
133    fn test_get_for_file_unknown_type() {
134        let registry = ExtractorRegistry::new();
135        let path = std::path::PathBuf::from("/test/file.xyz");
136        let extractor = registry.get_for_file(&path, "application/unknown");
137        assert!(extractor.is_none());
138    }
139
140    #[tokio::test]
141    async fn test_extract_success() {
142        let temp_dir = tempdir().unwrap();
143        let file_path = temp_dir.path().join("test.txt");
144        std::fs::write(&file_path, "Hello, world!").unwrap();
145
146        let mut registry = ExtractorRegistry::new();
147        registry.register("text", TextExtractor::new());
148
149        let result = registry.extract(&file_path, "text/plain").await;
150        assert!(result.is_ok());
151
152        let content = result.unwrap();
153        assert_eq!(content.text, "Hello, world!");
154    }
155
156    #[tokio::test]
157    async fn test_extract_unsupported_type() {
158        let temp_dir = tempdir().unwrap();
159        let file_path = temp_dir.path().join("test.bin");
160        std::fs::write(&file_path, [0u8; 10]).unwrap();
161
162        let registry = ExtractorRegistry::new();
163
164        let result = registry
165            .extract(&file_path, "application/octet-stream")
166            .await;
167        assert!(result.is_err());
168
169        match result.unwrap_err() {
170            ExtractError::UnsupportedType(mime) => {
171                assert_eq!(mime, "application/octet-stream");
172            }
173            _ => panic!("Expected UnsupportedType error"),
174        }
175    }
176
177    #[test]
178    fn test_multiple_extractors() {
179        let mut registry = ExtractorRegistry::new();
180        registry.register("text", TextExtractor::new());
181        // Could register more extractors here
182
183        assert_eq!(registry.extractors.len(), 1);
184        // TextExtractor registers multiple MIME types
185        assert!(!registry.mime_mapping.is_empty());
186    }
187
188    #[test]
189    fn test_default_implementation() {
190        let registry = ExtractorRegistry::default();
191        assert!(registry.extractors.is_empty());
192    }
193}