ragfs_extract/
registry.rs1use ragfs_core::{ContentExtractor, ExtractError, ExtractedContent};
4use std::collections::HashMap;
5use std::path::Path;
6use std::sync::Arc;
7
8pub struct ExtractorRegistry {
10 extractors: HashMap<String, Arc<dyn ContentExtractor>>,
12 mime_mapping: HashMap<String, String>,
14}
15
16impl ExtractorRegistry {
17 #[must_use]
19 pub fn new() -> Self {
20 Self {
21 extractors: HashMap::new(),
22 mime_mapping: HashMap::new(),
23 }
24 }
25
26 pub fn register<E: ContentExtractor + 'static>(&mut self, name: &str, extractor: E) {
28 let extractor = Arc::new(extractor);
29 for mime in extractor.supported_types() {
30 self.mime_mapping
31 .insert((*mime).to_string(), name.to_string());
32 }
33 self.extractors.insert(name.to_string(), extractor);
34 }
35
36 #[must_use]
38 pub fn get_for_mime(&self, mime_type: &str) -> Option<Arc<dyn ContentExtractor>> {
39 self.mime_mapping
40 .get(mime_type)
41 .and_then(|name| self.extractors.get(name))
42 .cloned()
43 }
44
45 #[must_use]
47 pub fn get_for_file(&self, path: &Path, mime_type: &str) -> Option<Arc<dyn ContentExtractor>> {
48 if let Some(extractor) = self.get_for_mime(mime_type) {
50 return Some(extractor);
51 }
52
53 for extractor in self.extractors.values() {
55 if extractor.can_extract(path, mime_type) {
56 return Some(extractor.clone());
57 }
58 }
59
60 None
61 }
62
63 pub async fn extract(
65 &self,
66 path: &Path,
67 mime_type: &str,
68 ) -> Result<ExtractedContent, ExtractError> {
69 let extractor = self
70 .get_for_file(path, mime_type)
71 .ok_or_else(|| ExtractError::UnsupportedType(mime_type.to_string()))?;
72
73 extractor.extract(path).await
74 }
75}
76
77impl Default for ExtractorRegistry {
78 fn default() -> Self {
79 Self::new()
80 }
81}
82
83#[cfg(test)]
84mod tests {
85 use super::*;
86 use crate::TextExtractor;
87 use tempfile::tempdir;
88
89 #[test]
90 fn test_new_registry_is_empty() {
91 let registry = ExtractorRegistry::new();
92 assert!(registry.extractors.is_empty());
93 assert!(registry.mime_mapping.is_empty());
94 }
95
96 #[test]
97 fn test_register_extractor() {
98 let mut registry = ExtractorRegistry::new();
99 registry.register("text", TextExtractor::new());
100
101 assert!(registry.extractors.contains_key("text"));
102 assert!(registry.mime_mapping.contains_key("text/plain"));
104 }
105
106 #[test]
107 fn test_get_for_mime_existing() {
108 let mut registry = ExtractorRegistry::new();
109 registry.register("text", TextExtractor::new());
110
111 let extractor = registry.get_for_mime("text/plain");
112 assert!(extractor.is_some());
113 }
114
115 #[test]
116 fn test_get_for_mime_nonexistent() {
117 let registry = ExtractorRegistry::new();
118 let extractor = registry.get_for_mime("video/mp4");
119 assert!(extractor.is_none());
120 }
121
122 #[test]
123 fn test_get_for_file_by_mime() {
124 let mut registry = ExtractorRegistry::new();
125 registry.register("text", TextExtractor::new());
126
127 let path = std::path::PathBuf::from("/test/file.txt");
128 let extractor = registry.get_for_file(&path, "text/plain");
129 assert!(extractor.is_some());
130 }
131
132 #[test]
133 fn test_get_for_file_unknown_type() {
134 let registry = ExtractorRegistry::new();
135 let path = std::path::PathBuf::from("/test/file.xyz");
136 let extractor = registry.get_for_file(&path, "application/unknown");
137 assert!(extractor.is_none());
138 }
139
140 #[tokio::test]
141 async fn test_extract_success() {
142 let temp_dir = tempdir().unwrap();
143 let file_path = temp_dir.path().join("test.txt");
144 std::fs::write(&file_path, "Hello, world!").unwrap();
145
146 let mut registry = ExtractorRegistry::new();
147 registry.register("text", TextExtractor::new());
148
149 let result = registry.extract(&file_path, "text/plain").await;
150 assert!(result.is_ok());
151
152 let content = result.unwrap();
153 assert_eq!(content.text, "Hello, world!");
154 }
155
156 #[tokio::test]
157 async fn test_extract_unsupported_type() {
158 let temp_dir = tempdir().unwrap();
159 let file_path = temp_dir.path().join("test.bin");
160 std::fs::write(&file_path, [0u8; 10]).unwrap();
161
162 let registry = ExtractorRegistry::new();
163
164 let result = registry
165 .extract(&file_path, "application/octet-stream")
166 .await;
167 assert!(result.is_err());
168
169 match result.unwrap_err() {
170 ExtractError::UnsupportedType(mime) => {
171 assert_eq!(mime, "application/octet-stream");
172 }
173 _ => panic!("Expected UnsupportedType error"),
174 }
175 }
176
177 #[test]
178 fn test_multiple_extractors() {
179 let mut registry = ExtractorRegistry::new();
180 registry.register("text", TextExtractor::new());
181 assert_eq!(registry.extractors.len(), 1);
184 assert!(!registry.mime_mapping.is_empty());
186 }
187
188 #[test]
189 fn test_default_implementation() {
190 let registry = ExtractorRegistry::default();
191 assert!(registry.extractors.is_empty());
192 }
193}