1use chrono::{DateTime, Utc};
12use ragfs_core::{
13 Chunk, DistanceMetric, Embedder, EmbeddingConfig, FileRecord, SearchQuery, VectorStore,
14};
15use serde::{Deserialize, Serialize};
16use std::collections::{HashMap, HashSet};
17use std::fs;
18use std::path::PathBuf;
19use std::sync::Arc;
20use tokio::sync::RwLock;
21use tracing::{debug, info, warn};
22use uuid::Uuid;
23
24#[derive(Debug, Clone, Serialize, Deserialize)]
26pub struct OrganizeRequest {
27 pub scope: PathBuf,
29 pub strategy: OrganizeStrategy,
31 #[serde(default = "default_max_groups")]
33 pub max_groups: usize,
34 #[serde(default = "default_similarity_threshold")]
36 pub similarity_threshold: f32,
37}
38
39fn default_max_groups() -> usize {
40 10
41}
42
43fn default_similarity_threshold() -> f32 {
44 0.7
45}
46
47#[derive(Debug, Clone, Serialize, Deserialize)]
49#[serde(rename_all = "snake_case")]
50pub enum OrganizeStrategy {
51 ByTopic,
53 ByType,
55 ByProject,
57 Custom { categories: Vec<String> },
59}
60
61#[derive(Debug, Clone, Serialize, Deserialize)]
63pub struct SemanticPlan {
64 pub id: Uuid,
66 pub created_at: DateTime<Utc>,
68 pub operation: PlanOperation,
70 pub description: String,
72 pub actions: Vec<PlanAction>,
74 pub status: PlanStatus,
76 pub impact: PlanImpact,
78}
79
80#[derive(Debug, Clone, Serialize, Deserialize)]
82#[serde(rename_all = "snake_case")]
83pub enum PlanOperation {
84 Organize {
86 scope: PathBuf,
87 strategy: OrganizeStrategy,
88 },
89 Cleanup { scope: PathBuf },
91 Dedupe { scope: PathBuf },
93}
94
95#[derive(Debug, Clone, Serialize, Deserialize)]
97pub struct PlanAction {
98 pub action: ActionType,
100 pub confidence: f32,
102 pub reason: String,
104}
105
106#[derive(Debug, Clone, Serialize, Deserialize)]
108#[serde(rename_all = "snake_case")]
109pub enum ActionType {
110 Move { from: PathBuf, to: PathBuf },
112 Mkdir { path: PathBuf },
114 Delete { path: PathBuf },
116 Symlink { target: PathBuf, link: PathBuf },
118}
119
120#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
122#[serde(rename_all = "snake_case")]
123pub enum PlanStatus {
124 Pending,
126 Approved,
128 Rejected,
130 Completed,
132 Failed { error: String },
134}
135
136#[derive(Debug, Clone, Default, Serialize, Deserialize)]
138pub struct PlanImpact {
139 pub files_affected: usize,
141 pub dirs_created: usize,
143 pub files_moved: usize,
145 pub files_deleted: usize,
147}
148
149#[derive(Debug, Clone, Serialize, Deserialize)]
151pub struct CleanupAnalysis {
152 pub analyzed_at: DateTime<Utc>,
154 pub total_files: usize,
156 pub candidates: Vec<CleanupCandidate>,
158 pub potential_savings_bytes: u64,
160}
161
162#[derive(Debug, Clone, Serialize, Deserialize)]
164pub struct CleanupCandidate {
165 pub path: PathBuf,
167 pub reason: CleanupReason,
169 pub confidence: f32,
171 pub size_bytes: u64,
173}
174
175#[derive(Debug, Clone, Serialize, Deserialize)]
177#[serde(rename_all = "snake_case")]
178pub enum CleanupReason {
179 Duplicate {
181 similar_to: PathBuf,
182 similarity: f32,
183 },
184 Stale { last_accessed: DateTime<Utc> },
186 Temporary,
188 Generated { source: PathBuf },
190 Empty,
192}
193
194#[derive(Debug, Clone, Serialize, Deserialize)]
196pub struct DuplicateGroups {
197 pub analyzed_at: DateTime<Utc>,
199 pub threshold: f32,
201 pub groups: Vec<DuplicateGroup>,
203 pub potential_savings_bytes: u64,
205}
206
207#[derive(Debug, Clone, Serialize, Deserialize)]
209pub struct DuplicateGroup {
210 pub id: Uuid,
212 pub representative: PathBuf,
214 pub duplicates: Vec<DuplicateEntry>,
216 pub wasted_bytes: u64,
218}
219
220#[derive(Debug, Clone, Serialize, Deserialize)]
222pub struct DuplicateEntry {
223 pub path: PathBuf,
225 pub similarity: f32,
227 pub size_bytes: u64,
229}
230
231#[derive(Debug, Clone, Serialize, Deserialize)]
233pub struct SimilarFilesResult {
234 pub source: PathBuf,
236 pub similar: Vec<SimilarFile>,
238}
239
240#[derive(Debug, Clone, Serialize, Deserialize)]
242pub struct SimilarFile {
243 pub path: PathBuf,
245 pub similarity: f32,
247 #[serde(skip_serializing_if = "Option::is_none")]
249 pub preview: Option<String>,
250}
251
252#[derive(Debug, Clone)]
254pub struct SemanticConfig {
255 pub duplicate_threshold: f32,
257 pub similar_limit: usize,
259 pub plan_retention_hours: u32,
261 pub data_dir: PathBuf,
263}
264
265impl Default for SemanticConfig {
266 fn default() -> Self {
267 let data_dir = dirs::data_local_dir()
268 .unwrap_or_else(|| PathBuf::from("."))
269 .join("ragfs");
270
271 Self {
272 duplicate_threshold: 0.95,
273 similar_limit: 10,
274 plan_retention_hours: 24,
275 data_dir,
276 }
277 }
278}
279
280#[derive(Debug, Clone, Serialize, Deserialize)]
282pub struct ActionResult {
283 pub success: bool,
285 #[serde(skip_serializing_if = "Option::is_none")]
287 pub undo_id: Option<Uuid>,
288 #[serde(skip_serializing_if = "Option::is_none")]
290 pub error: Option<String>,
291 pub executed_at: DateTime<Utc>,
293}
294
295pub struct SemanticManager {
297 source: PathBuf,
299 store: Option<Arc<dyn VectorStore>>,
301 embedder: Option<Arc<dyn Embedder>>,
303 config: SemanticConfig,
305 pending_plans: Arc<RwLock<HashMap<Uuid, SemanticPlan>>>,
307 last_similar_result: Arc<RwLock<Option<SimilarFilesResult>>>,
309 cleanup_cache: Arc<RwLock<Option<CleanupAnalysis>>>,
311 dedupe_cache: Arc<RwLock<Option<DuplicateGroups>>>,
313 plans_dir: PathBuf,
315 ops_manager: Option<Arc<crate::ops::OpsManager>>,
317}
318
319impl SemanticManager {
320 pub fn new(
322 source: PathBuf,
323 store: Option<Arc<dyn VectorStore>>,
324 embedder: Option<Arc<dyn Embedder>>,
325 config: Option<SemanticConfig>,
326 ) -> Self {
327 let config = config.unwrap_or_default();
328
329 let index_hash = blake3::hash(source.to_string_lossy().as_bytes())
331 .to_hex()
332 .chars()
333 .take(16)
334 .collect::<String>();
335
336 let plans_dir = config.data_dir.join("plans").join(&index_hash);
337
338 if let Err(e) = fs::create_dir_all(&plans_dir) {
340 warn!("Failed to create plans directory: {e}");
341 }
342
343 let plans = Self::load_plans(&plans_dir);
345 info!("Loaded {} existing semantic plans", plans.len());
346
347 Self {
348 source,
349 store,
350 embedder,
351 config,
352 pending_plans: Arc::new(RwLock::new(plans)),
353 last_similar_result: Arc::new(RwLock::new(None)),
354 cleanup_cache: Arc::new(RwLock::new(None)),
355 dedupe_cache: Arc::new(RwLock::new(None)),
356 plans_dir,
357 ops_manager: None,
358 }
359 }
360
361 pub fn with_ops(
363 source: PathBuf,
364 store: Option<Arc<dyn VectorStore>>,
365 embedder: Option<Arc<dyn Embedder>>,
366 config: Option<SemanticConfig>,
367 ops_manager: Arc<crate::ops::OpsManager>,
368 ) -> Self {
369 let mut manager = Self::new(source, store, embedder, config);
370 manager.ops_manager = Some(ops_manager);
371 manager
372 }
373
374 pub fn set_ops_manager(&mut self, ops_manager: Arc<crate::ops::OpsManager>) {
376 self.ops_manager = Some(ops_manager);
377 }
378
379 fn load_plans(plans_dir: &PathBuf) -> HashMap<Uuid, SemanticPlan> {
381 let mut plans = HashMap::new();
382
383 if !plans_dir.exists() {
384 return plans;
385 }
386
387 let entries = match fs::read_dir(plans_dir) {
388 Ok(e) => e,
389 Err(e) => {
390 warn!("Failed to read plans directory: {e}");
391 return plans;
392 }
393 };
394
395 for entry in entries.flatten() {
396 let path = entry.path();
397 if path.extension().is_some_and(|e| e == "json")
398 && let Ok(content) = fs::read_to_string(&path)
399 {
400 match serde_json::from_str::<SemanticPlan>(&content) {
401 Ok(plan) => {
402 plans.insert(plan.id, plan);
403 }
404 Err(e) => {
405 warn!("Failed to parse plan file {:?}: {e}", path);
406 }
407 }
408 }
409 }
410
411 plans
412 }
413
414 fn save_plan(&self, plan: &SemanticPlan) -> std::io::Result<()> {
416 let plan_path = self.plans_dir.join(format!("{}.json", plan.id));
417 let temp_path = self.plans_dir.join(format!("{}.json.tmp", plan.id));
418
419 let content = serde_json::to_string_pretty(plan)?;
421 fs::write(&temp_path, content)?;
422
423 fs::rename(&temp_path, &plan_path)?;
425
426 Ok(())
427 }
428
429 fn delete_plan_file(&self, plan_id: Uuid) -> std::io::Result<()> {
431 let plan_path = self.plans_dir.join(format!("{plan_id}.json"));
432 if plan_path.exists() {
433 fs::remove_file(&plan_path)?;
434 }
435 Ok(())
436 }
437
438 pub async fn purge_expired_plans(&self) -> usize {
440 let now = Utc::now();
441 let retention = chrono::Duration::hours(i64::from(self.config.plan_retention_hours));
442 let cutoff = now - retention;
443
444 let mut plans = self.pending_plans.write().await;
445 let expired: Vec<Uuid> = plans
446 .iter()
447 .filter(|(_, p)| {
448 matches!(
451 p.status,
452 PlanStatus::Completed | PlanStatus::Rejected | PlanStatus::Failed { .. }
453 ) && p.created_at < cutoff
454 })
455 .map(|(id, _)| *id)
456 .collect();
457
458 let mut purged = 0;
459 for id in &expired {
460 plans.remove(id);
461 if let Err(e) = self.delete_plan_file(*id) {
462 warn!("Failed to delete expired plan file {}: {e}", id);
463 } else {
464 purged += 1;
465 }
466 }
467
468 if purged > 0 {
469 info!("Purged {} expired semantic plans", purged);
470 }
471
472 purged
473 }
474
475 #[must_use]
477 pub fn is_available(&self) -> bool {
478 self.store.is_some() && self.embedder.is_some()
479 }
480
481 pub async fn find_similar(&self, path: &PathBuf) -> Result<SimilarFilesResult, String> {
483 let store = self.store.as_ref().ok_or("Vector store not available")?;
484 let embedder = self.embedder.as_ref().ok_or("Embedder not available")?;
485
486 let full_path = if path.is_absolute() {
487 path.clone()
488 } else {
489 self.source.join(path)
490 };
491
492 debug!("Finding files similar to: {}", full_path.display());
493
494 let content =
496 std::fs::read_to_string(&full_path).map_err(|e| format!("Failed to read file: {e}"))?;
497
498 let config = EmbeddingConfig::default();
500 let embedding_output = embedder
501 .embed_query(&content, &config)
502 .await
503 .map_err(|e| format!("Failed to generate embedding: {e}"))?;
504
505 let query = SearchQuery {
507 embedding: embedding_output.embedding,
508 text: None,
509 limit: self.config.similar_limit + 1, filters: Vec::new(),
511 metric: DistanceMetric::Cosine,
512 };
513 let results = store
514 .search(query)
515 .await
516 .map_err(|e| format!("Search failed: {e}"))?;
517
518 let similar: Vec<SimilarFile> = results
520 .into_iter()
521 .filter(|r| r.file_path != full_path)
522 .take(self.config.similar_limit)
523 .map(|r| SimilarFile {
524 path: r.file_path,
525 similarity: r.score, preview: Some(truncate_content(&r.content, 200)),
527 })
528 .collect();
529
530 let result = SimilarFilesResult {
531 source: full_path,
532 similar,
533 };
534
535 *self.last_similar_result.write().await = Some(result.clone());
537
538 info!("Found {} similar files", result.similar.len());
539 Ok(result)
540 }
541
542 pub async fn get_last_similar_result(&self) -> Option<SimilarFilesResult> {
544 self.last_similar_result.read().await.clone()
545 }
546
547 pub async fn analyze_cleanup(&self) -> Result<CleanupAnalysis, String> {
549 let store = self.store.as_ref().ok_or("Vector store not available")?;
550
551 debug!("Analyzing files for cleanup candidates");
552
553 let stats = store
555 .stats()
556 .await
557 .map_err(|e| format!("Failed to get stats: {e}"))?;
558
559 let mut candidates = Vec::new();
560 let mut potential_savings: u64 = 0;
561
562 if let Ok(dupes) = self.find_duplicates().await {
567 for group in &dupes.groups {
568 for dup in &group.duplicates {
569 if dup.similarity >= self.config.duplicate_threshold {
570 candidates.push(CleanupCandidate {
571 path: dup.path.clone(),
572 reason: CleanupReason::Duplicate {
573 similar_to: group.representative.clone(),
574 similarity: dup.similarity,
575 },
576 confidence: dup.similarity,
577 size_bytes: dup.size_bytes,
578 });
579 potential_savings += dup.size_bytes;
580 }
581 }
582 }
583 }
584
585 let analysis = CleanupAnalysis {
586 analyzed_at: Utc::now(),
587 total_files: stats.total_files as usize,
588 candidates,
589 potential_savings_bytes: potential_savings,
590 };
591
592 *self.cleanup_cache.write().await = Some(analysis.clone());
594
595 info!(
596 "Cleanup analysis: {} candidates, {} bytes potential savings",
597 analysis.candidates.len(),
598 analysis.potential_savings_bytes
599 );
600
601 Ok(analysis)
602 }
603
604 pub async fn get_cleanup_analysis(&self) -> Option<CleanupAnalysis> {
606 self.cleanup_cache.read().await.clone()
607 }
608
609 pub async fn find_duplicates(&self) -> Result<DuplicateGroups, String> {
611 let store = self.store.as_ref().ok_or("Vector store not available")?;
612 let _embedder = self.embedder.as_ref().ok_or("Embedder not available")?;
613
614 debug!("Finding duplicate files");
615
616 let all_chunks = store
618 .get_all_chunks()
619 .await
620 .map_err(|e| format!("Failed to get chunks: {e}"))?;
621
622 let all_files = store
623 .get_all_files()
624 .await
625 .map_err(|e| format!("Failed to get files: {e}"))?;
626
627 if all_files.is_empty() {
628 return Ok(DuplicateGroups {
629 analyzed_at: Utc::now(),
630 threshold: self.config.duplicate_threshold,
631 groups: Vec::new(),
632 potential_savings_bytes: 0,
633 });
634 }
635
636 let mut file_chunks: HashMap<PathBuf, Vec<&Chunk>> = HashMap::new();
638 for chunk in &all_chunks {
639 if chunk.embedding.is_some() {
640 file_chunks
641 .entry(chunk.file_path.clone())
642 .or_default()
643 .push(chunk);
644 }
645 }
646
647 let file_info: HashMap<PathBuf, &FileRecord> =
649 all_files.iter().map(|f| (f.path.clone(), f)).collect();
650
651 let file_embeddings: HashMap<PathBuf, Vec<f32>> = file_chunks
653 .iter()
654 .filter_map(|(path, chunks)| {
655 let embeddings: Vec<&Vec<f32>> =
656 chunks.iter().filter_map(|c| c.embedding.as_ref()).collect();
657
658 if embeddings.is_empty() {
659 return None;
660 }
661
662 let dim = embeddings[0].len();
664 let mut avg = vec![0.0f32; dim];
665 for emb in &embeddings {
666 for (i, &v) in emb.iter().enumerate() {
667 avg[i] += v;
668 }
669 }
670 let count = embeddings.len() as f32;
671 for v in &mut avg {
672 *v /= count;
673 }
674
675 let norm: f32 = avg.iter().map(|x| x * x).sum::<f32>().sqrt();
677 if norm > 0.0 {
678 for v in &mut avg {
679 *v /= norm;
680 }
681 }
682
683 Some((path.clone(), avg))
684 })
685 .collect();
686
687 let file_paths: Vec<&PathBuf> = file_embeddings.keys().collect();
689 let mut similarity_pairs: Vec<(PathBuf, PathBuf, f32)> = Vec::new();
690
691 for (i, path_a) in file_paths.iter().enumerate() {
692 let emb_a = &file_embeddings[*path_a];
693 for path_b in file_paths.iter().skip(i + 1) {
694 let emb_b = &file_embeddings[*path_b];
695 let similarity = cosine_similarity(emb_a, emb_b);
696
697 if similarity >= self.config.duplicate_threshold {
698 similarity_pairs.push(((*path_a).clone(), (*path_b).clone(), similarity));
699 }
700 }
701 }
702
703 let mut groups: Vec<DuplicateGroup> = Vec::new();
705 let mut processed: HashSet<PathBuf> = HashSet::new();
706
707 for (path_a, path_b, similarity) in similarity_pairs {
708 if processed.contains(&path_a) || processed.contains(&path_b) {
709 continue;
710 }
711
712 let size_a = file_info.get(&path_a).map_or(0, |f| f.size_bytes);
714 let size_b = file_info.get(&path_b).map_or(0, |f| f.size_bytes);
715
716 let (representative, duplicate, dup_similarity, dup_size) = if size_a >= size_b {
718 (path_a.clone(), path_b.clone(), similarity, size_b)
719 } else {
720 (path_b.clone(), path_a.clone(), similarity, size_a)
721 };
722
723 if let Some(group) = groups
725 .iter_mut()
726 .find(|g| g.representative == representative)
727 {
728 group.duplicates.push(DuplicateEntry {
729 path: duplicate.clone(),
730 similarity: dup_similarity,
731 size_bytes: dup_size,
732 });
733 group.wasted_bytes += dup_size;
734 processed.insert(duplicate);
735 } else {
736 groups.push(DuplicateGroup {
738 id: Uuid::new_v4(),
739 representative: representative.clone(),
740 duplicates: vec![DuplicateEntry {
741 path: duplicate.clone(),
742 similarity: dup_similarity,
743 size_bytes: dup_size,
744 }],
745 wasted_bytes: dup_size,
746 });
747 processed.insert(representative);
748 processed.insert(duplicate);
749 }
750 }
751
752 let potential_savings: u64 = groups.iter().map(|g| g.wasted_bytes).sum();
753
754 info!(
755 "Found {} duplicate groups with {} bytes potential savings",
756 groups.len(),
757 potential_savings
758 );
759
760 let result = DuplicateGroups {
761 analyzed_at: Utc::now(),
762 threshold: self.config.duplicate_threshold,
763 groups,
764 potential_savings_bytes: potential_savings,
765 };
766
767 *self.dedupe_cache.write().await = Some(result.clone());
769
770 Ok(result)
771 }
772
773 pub async fn get_duplicate_groups(&self) -> Option<DuplicateGroups> {
775 self.dedupe_cache.read().await.clone()
776 }
777
778 pub async fn create_organize_plan(
780 &self,
781 request: OrganizeRequest,
782 ) -> Result<SemanticPlan, String> {
783 let store = self.store.as_ref().ok_or("Vector store not available")?;
784 let embedder = self.embedder.as_ref();
785
786 debug!(
787 "Creating organization plan for: {}",
788 request.scope.display()
789 );
790
791 let all_chunks = store
793 .get_all_chunks()
794 .await
795 .map_err(|e| format!("Failed to get chunks: {e}"))?;
796
797 let all_files = store
798 .get_all_files()
799 .await
800 .map_err(|e| format!("Failed to get files: {e}"))?;
801
802 let scope_path = if request.scope.is_absolute() {
804 request.scope.clone()
805 } else {
806 self.source.join(&request.scope)
807 };
808
809 let scoped_files: Vec<&FileRecord> = all_files
810 .iter()
811 .filter(|f| f.path.starts_with(&scope_path))
812 .collect();
813
814 if scoped_files.is_empty() {
815 return Ok(SemanticPlan {
816 id: Uuid::new_v4(),
817 created_at: Utc::now(),
818 operation: PlanOperation::Organize {
819 scope: request.scope.clone(),
820 strategy: request.strategy.clone(),
821 },
822 description: format!("No files found in scope: {}", request.scope.display()),
823 actions: Vec::new(),
824 status: PlanStatus::Pending,
825 impact: PlanImpact::default(),
826 });
827 }
828
829 let mut file_chunks: HashMap<PathBuf, Vec<&Chunk>> = HashMap::new();
831 for chunk in &all_chunks {
832 if chunk.embedding.is_some() && chunk.file_path.starts_with(&scope_path) {
833 file_chunks
834 .entry(chunk.file_path.clone())
835 .or_default()
836 .push(chunk);
837 }
838 }
839
840 let file_embeddings: HashMap<PathBuf, Vec<f32>> = file_chunks
842 .iter()
843 .filter_map(|(path, chunks)| {
844 let embeddings: Vec<&Vec<f32>> =
845 chunks.iter().filter_map(|c| c.embedding.as_ref()).collect();
846
847 if embeddings.is_empty() {
848 return None;
849 }
850
851 let dim = embeddings[0].len();
852 let mut avg = vec![0.0f32; dim];
853 for emb in &embeddings {
854 for (i, &v) in emb.iter().enumerate() {
855 avg[i] += v;
856 }
857 }
858 let count = embeddings.len() as f32;
859 for v in &mut avg {
860 *v /= count;
861 }
862
863 let norm: f32 = avg.iter().map(|x| x * x).sum::<f32>().sqrt();
865 if norm > 0.0 {
866 for v in &mut avg {
867 *v /= norm;
868 }
869 }
870
871 Some((path.clone(), avg))
872 })
873 .collect();
874
875 let (actions, description) = match &request.strategy {
877 OrganizeStrategy::ByTopic => self.plan_by_topic(
878 &file_embeddings,
879 &scope_path,
880 request.max_groups,
881 request.similarity_threshold,
882 ),
883 OrganizeStrategy::ByType => self.plan_by_type(&scoped_files, &scope_path),
884 OrganizeStrategy::ByProject => self.plan_by_project(&scoped_files, &scope_path),
885 OrganizeStrategy::Custom { categories } => {
886 self.plan_by_custom(&file_embeddings, &scope_path, categories, embedder)
887 .await
888 }
889 };
890
891 let dirs_created = actions
892 .iter()
893 .filter(|a| matches!(a.action, ActionType::Mkdir { .. }))
894 .count();
895 let files_moved = actions
896 .iter()
897 .filter(|a| matches!(a.action, ActionType::Move { .. }))
898 .count();
899
900 let plan = SemanticPlan {
901 id: Uuid::new_v4(),
902 created_at: Utc::now(),
903 operation: PlanOperation::Organize {
904 scope: request.scope,
905 strategy: request.strategy,
906 },
907 description,
908 actions,
909 status: PlanStatus::Pending,
910 impact: PlanImpact {
911 files_affected: files_moved,
912 dirs_created,
913 files_moved,
914 files_deleted: 0,
915 },
916 };
917
918 self.pending_plans
920 .write()
921 .await
922 .insert(plan.id, plan.clone());
923
924 if let Err(e) = self.save_plan(&plan) {
926 warn!("Failed to persist plan {}: {e}", plan.id);
927 }
928
929 info!(
930 "Created organization plan: {} with {} actions",
931 plan.id,
932 plan.actions.len()
933 );
934 Ok(plan)
935 }
936
937 fn plan_by_topic(
939 &self,
940 file_embeddings: &HashMap<PathBuf, Vec<f32>>,
941 scope_path: &PathBuf,
942 max_groups: usize,
943 similarity_threshold: f32,
944 ) -> (Vec<PlanAction>, String) {
945 if file_embeddings.is_empty() {
946 return (Vec::new(), "No files with embeddings found".to_string());
947 }
948
949 let file_paths: Vec<&PathBuf> = file_embeddings.keys().collect();
951 let num_files = file_paths.len();
952 let num_clusters = max_groups.min(num_files);
953
954 let step = if num_files > num_clusters {
956 num_files / num_clusters
957 } else {
958 1
959 };
960 let mut centroids: Vec<Vec<f32>> = (0..num_clusters)
961 .map(|i| file_embeddings[file_paths[i * step.min(num_files - 1)]].clone())
962 .collect();
963
964 let mut cluster_assignments: HashMap<PathBuf, usize> = HashMap::new();
966
967 for _ in 0..5 {
968 cluster_assignments.clear();
970 for path in &file_paths {
971 let emb = &file_embeddings[*path];
972 let mut best_cluster = 0;
973 let mut best_sim = -1.0f32;
974
975 for (cluster_idx, centroid) in centroids.iter().enumerate() {
976 let sim = cosine_similarity(emb, centroid);
977 if sim > best_sim {
978 best_sim = sim;
979 best_cluster = cluster_idx;
980 }
981 }
982
983 cluster_assignments.insert((*path).clone(), best_cluster);
984 }
985
986 for (cluster_idx, centroid) in centroids.iter_mut().enumerate() {
988 let members: Vec<&PathBuf> = cluster_assignments
989 .iter()
990 .filter(|&(_, c)| *c == cluster_idx)
991 .map(|(p, _)| p)
992 .collect();
993
994 if members.is_empty() {
995 continue;
996 }
997
998 let dim = centroid.len();
999 let mut new_centroid = vec![0.0f32; dim];
1000
1001 for path in &members {
1002 let emb = &file_embeddings[*path];
1003 for (i, &v) in emb.iter().enumerate() {
1004 new_centroid[i] += v;
1005 }
1006 }
1007
1008 let count = members.len() as f32;
1009 for v in &mut new_centroid {
1010 *v /= count;
1011 }
1012
1013 let norm: f32 = new_centroid.iter().map(|x| x * x).sum::<f32>().sqrt();
1015 if norm > 0.0 {
1016 for v in &mut new_centroid {
1017 *v /= norm;
1018 }
1019 }
1020
1021 *centroid = new_centroid;
1022 }
1023 }
1024
1025 let mut actions = Vec::new();
1027
1028 for cluster_idx in 0..num_clusters {
1030 let topic_dir = scope_path.join(format!("topic_{}", cluster_idx + 1));
1031 actions.push(PlanAction {
1032 action: ActionType::Mkdir { path: topic_dir },
1033 confidence: 1.0,
1034 reason: format!("Create directory for topic cluster {}", cluster_idx + 1),
1035 });
1036 }
1037
1038 for (path, &cluster_idx) in &cluster_assignments {
1040 let file_name = path.file_name().unwrap_or_default();
1041 let topic_dir = scope_path.join(format!("topic_{}", cluster_idx + 1));
1042 let new_path = topic_dir.join(file_name);
1043
1044 if new_path != *path {
1045 let emb = &file_embeddings[path];
1047 let centroid = ¢roids[cluster_idx];
1048 let confidence = cosine_similarity(emb, centroid).max(similarity_threshold);
1049
1050 actions.push(PlanAction {
1051 action: ActionType::Move {
1052 from: path.clone(),
1053 to: new_path,
1054 },
1055 confidence,
1056 reason: format!(
1057 "Move to topic cluster {} based on content similarity",
1058 cluster_idx + 1
1059 ),
1060 });
1061 }
1062 }
1063
1064 let description = format!(
1065 "Organize {} files into {} topic clusters",
1066 file_paths.len(),
1067 num_clusters
1068 );
1069
1070 (actions, description)
1071 }
1072
1073 fn plan_by_type(
1075 &self,
1076 files: &[&FileRecord],
1077 scope_path: &PathBuf,
1078 ) -> (Vec<PlanAction>, String) {
1079 let mut actions = Vec::new();
1080 let mut type_dirs: HashSet<String> = HashSet::new();
1081
1082 for file in files {
1083 let type_dir = if let Some(ext) = file.path.extension() {
1085 ext.to_string_lossy().to_string()
1086 } else {
1087 file.mime_type
1089 .split('/')
1090 .next()
1091 .unwrap_or("other")
1092 .to_string()
1093 };
1094
1095 if type_dirs.insert(type_dir.clone()) {
1097 actions.push(PlanAction {
1098 action: ActionType::Mkdir {
1099 path: scope_path.join(&type_dir),
1100 },
1101 confidence: 1.0,
1102 reason: format!("Create directory for {type_dir} files"),
1103 });
1104 }
1105
1106 let file_name = file.path.file_name().unwrap_or_default();
1108 let new_path = scope_path.join(&type_dir).join(file_name);
1109
1110 if new_path != file.path {
1111 actions.push(PlanAction {
1112 action: ActionType::Move {
1113 from: file.path.clone(),
1114 to: new_path,
1115 },
1116 confidence: 1.0,
1117 reason: format!("Move to {type_dir} directory based on file type"),
1118 });
1119 }
1120 }
1121
1122 let description = format!(
1123 "Organize {} files into {} type-based directories",
1124 files.len(),
1125 type_dirs.len()
1126 );
1127
1128 (actions, description)
1129 }
1130
1131 fn plan_by_project(
1133 &self,
1134 files: &[&FileRecord],
1135 scope_path: &PathBuf,
1136 ) -> (Vec<PlanAction>, String) {
1137 let mut actions = Vec::new();
1140 let mut project_dirs: HashSet<String> = HashSet::new();
1141
1142 for file in files {
1143 let relative = file.path.strip_prefix(scope_path).unwrap_or(&file.path);
1145 let project = relative.components().next().map_or_else(
1146 || "root".to_string(),
1147 |c| c.as_os_str().to_string_lossy().to_string(),
1148 );
1149
1150 if project_dirs.insert(project.clone()) && !project.contains('.') {
1151 actions.push(PlanAction {
1152 action: ActionType::Mkdir {
1153 path: scope_path.join(&project),
1154 },
1155 confidence: 0.8,
1156 reason: format!("Create project directory: {project}"),
1157 });
1158 }
1159 }
1160
1161 let description = format!(
1162 "Organize {} files into {} project directories",
1163 files.len(),
1164 project_dirs.len()
1165 );
1166
1167 (actions, description)
1168 }
1169
1170 async fn plan_by_custom(
1175 &self,
1176 file_embeddings: &HashMap<PathBuf, Vec<f32>>,
1177 scope_path: &PathBuf,
1178 categories: &[String],
1179 embedder: Option<&Arc<dyn Embedder>>,
1180 ) -> (Vec<PlanAction>, String) {
1181 let mut actions = Vec::new();
1182
1183 for category in categories {
1185 actions.push(PlanAction {
1186 action: ActionType::Mkdir {
1187 path: scope_path.join(category),
1188 },
1189 confidence: 1.0,
1190 reason: format!("Create custom category directory: {category}"),
1191 });
1192 }
1193
1194 if let Some(embedder) = embedder {
1196 let category_texts: Vec<&str> = categories.iter().map(String::as_str).collect();
1197 let config = EmbeddingConfig::default();
1198
1199 match embedder.embed_text(&category_texts, &config).await {
1200 Ok(category_embeddings) if category_embeddings.len() == categories.len() => {
1201 const MIN_SIMILARITY: f32 = 0.3;
1203 let mut assigned_count = 0;
1204
1205 for (file_path, file_emb) in file_embeddings {
1206 let best = category_embeddings
1208 .iter()
1209 .zip(categories.iter())
1210 .map(|(emb, cat)| (cat, cosine_similarity(file_emb, &emb.embedding)))
1211 .max_by(|a, b| {
1212 a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal)
1213 });
1214
1215 if let Some((category, score)) = best
1216 && score >= MIN_SIMILARITY
1217 && let Some(file_name) = file_path.file_name()
1218 {
1219 let new_path = scope_path.join(category).join(file_name);
1220 if new_path != *file_path {
1221 actions.push(PlanAction {
1222 action: ActionType::Move {
1223 from: file_path.clone(),
1224 to: new_path,
1225 },
1226 confidence: score,
1227 reason: format!(
1228 "Move to category '{category}' (similarity: {score:.2})"
1229 ),
1230 });
1231 assigned_count += 1;
1232 }
1233 }
1234 }
1235
1236 let description = format!(
1237 "Organize {} files into {} custom categories ({} files assigned)",
1238 file_embeddings.len(),
1239 categories.len(),
1240 assigned_count
1241 );
1242 return (actions, description);
1243 }
1244 Ok(_) => {
1245 warn!(
1246 "Category embedding count mismatch, falling back to directory creation only"
1247 );
1248 }
1249 Err(e) => {
1250 warn!(
1251 "Failed to embed categories: {}, falling back to directory creation only",
1252 e
1253 );
1254 }
1255 }
1256 }
1257
1258 let description = format!(
1260 "Created {} custom category directories for {} files (manual assignment needed)",
1261 categories.len(),
1262 file_embeddings.len()
1263 );
1264
1265 (actions, description)
1266 }
1267
1268 pub async fn list_pending_plans(&self) -> Vec<SemanticPlan> {
1270 self.pending_plans
1271 .read()
1272 .await
1273 .values()
1274 .filter(|p| p.status == PlanStatus::Pending)
1275 .cloned()
1276 .collect()
1277 }
1278
1279 pub async fn get_plan(&self, plan_id: Uuid) -> Option<SemanticPlan> {
1281 self.pending_plans.read().await.get(&plan_id).cloned()
1282 }
1283
1284 async fn execute_action(&self, action: &ActionType) -> Result<ActionResult, String> {
1286 let ops = self
1287 .ops_manager
1288 .as_ref()
1289 .ok_or("OpsManager not configured - cannot execute plan actions")?;
1290
1291 let result = match action {
1292 ActionType::Move { from, to } => ops.move_file(from, to).await,
1293 ActionType::Mkdir { path } => ops.mkdir(path).await,
1294 ActionType::Delete { path } => ops.delete(path).await,
1295 ActionType::Symlink { target, link } => ops.symlink(target, link).await,
1296 };
1297
1298 Ok(ActionResult {
1299 success: result.success,
1300 undo_id: result.undo_id,
1301 error: if result.success {
1302 None
1303 } else {
1304 Some(result.error.unwrap_or_else(|| "Unknown error".to_string()))
1305 },
1306 executed_at: Utc::now(),
1307 })
1308 }
1309
1310 pub async fn approve_plan(&self, plan_id: Uuid) -> Result<SemanticPlan, String> {
1312 if self.ops_manager.is_none() {
1314 return Err("OpsManager not configured - cannot execute plan actions".to_string());
1315 }
1316
1317 let mut plans = self.pending_plans.write().await;
1318 let plan = plans
1319 .get_mut(&plan_id)
1320 .ok_or_else(|| "Plan not found".to_string())?;
1321
1322 if plan.status != PlanStatus::Pending {
1323 return Err(format!("Plan is not pending: {:?}", plan.status));
1324 }
1325
1326 info!(
1327 "Approving plan: {} with {} actions",
1328 plan_id,
1329 plan.actions.len()
1330 );
1331 plan.status = PlanStatus::Approved;
1332
1333 let total_actions = plan.actions.len();
1335 let mut completed_actions = 0;
1336
1337 let actions_to_execute: Vec<ActionType> =
1339 plan.actions.iter().map(|a| a.action.clone()).collect();
1340
1341 drop(plans);
1343
1344 for (idx, action) in actions_to_execute.iter().enumerate() {
1345 debug!(
1346 "Executing action {}/{}: {:?}",
1347 idx + 1,
1348 total_actions,
1349 action
1350 );
1351
1352 match self.execute_action(action).await {
1353 Ok(result) if result.success => {
1354 completed_actions += 1;
1355 debug!(
1356 "Action {}/{} succeeded (undo_id: {:?})",
1357 idx + 1,
1358 total_actions,
1359 result.undo_id
1360 );
1361 }
1362 Ok(result) => {
1363 let error_msg = result.error.unwrap_or_else(|| "Unknown error".to_string());
1365 warn!("Action {}/{} failed: {}", idx + 1, total_actions, error_msg);
1366
1367 let mut plans = self.pending_plans.write().await;
1369 if let Some(plan) = plans.get_mut(&plan_id) {
1370 plan.status = PlanStatus::Failed {
1371 error: format!(
1372 "Action {} of {} failed: {}",
1373 idx + 1,
1374 total_actions,
1375 error_msg
1376 ),
1377 };
1378
1379 let result = plan.clone();
1380 if let Err(e) = self.save_plan(&result) {
1381 warn!("Failed to persist failed plan {}: {e}", plan_id);
1382 }
1383 return Ok(result);
1384 }
1385 return Err("Plan disappeared during execution".to_string());
1386 }
1387 Err(e) => {
1388 warn!(
1390 "Failed to execute action {}/{}: {}",
1391 idx + 1,
1392 total_actions,
1393 e
1394 );
1395
1396 let mut plans = self.pending_plans.write().await;
1397 if let Some(plan) = plans.get_mut(&plan_id) {
1398 plan.status = PlanStatus::Failed { error: e.clone() };
1399
1400 let result = plan.clone();
1401 if let Err(e) = self.save_plan(&result) {
1402 warn!("Failed to persist failed plan {}: {e}", plan_id);
1403 }
1404 return Ok(result);
1405 }
1406 return Err("Plan disappeared during execution".to_string());
1407 }
1408 }
1409 }
1410
1411 let mut plans = self.pending_plans.write().await;
1413 if let Some(plan) = plans.get_mut(&plan_id) {
1414 plan.status = PlanStatus::Completed;
1415 info!(
1416 "Plan {} completed successfully: {} actions executed",
1417 plan_id, completed_actions
1418 );
1419
1420 let result = plan.clone();
1421 if let Err(e) = self.save_plan(&result) {
1422 warn!("Failed to persist completed plan {}: {e}", plan_id);
1423 }
1424 return Ok(result);
1425 }
1426
1427 Err("Plan disappeared during execution".to_string())
1428 }
1429
1430 pub async fn reject_plan(&self, plan_id: Uuid) -> Result<SemanticPlan, String> {
1432 let mut plans = self.pending_plans.write().await;
1433 let plan = plans
1434 .get_mut(&plan_id)
1435 .ok_or_else(|| "Plan not found".to_string())?;
1436
1437 if plan.status != PlanStatus::Pending {
1438 return Err(format!("Plan is not pending: {:?}", plan.status));
1439 }
1440
1441 info!("Rejecting plan: {}", plan_id);
1442 plan.status = PlanStatus::Rejected;
1443
1444 let result = plan.clone();
1445
1446 if let Err(e) = self.save_plan(&result) {
1448 warn!("Failed to persist rejected plan {}: {e}", plan_id);
1449 }
1450
1451 Ok(result)
1452 }
1453
1454 pub async fn get_cleanup_json(&self) -> Vec<u8> {
1456 if let Some(analysis) = self.get_cleanup_analysis().await {
1457 serde_json::to_string_pretty(&analysis)
1458 .unwrap_or_else(|_| "{}".to_string())
1459 .into_bytes()
1460 } else {
1461 let msg = serde_json::json!({
1463 "message": "No cleanup analysis available. Run analyze_cleanup first.",
1464 "hint": "Write any content to .semantic/.cleanup to trigger analysis"
1465 });
1466 serde_json::to_string_pretty(&msg)
1467 .unwrap_or_default()
1468 .into_bytes()
1469 }
1470 }
1471
1472 pub async fn get_dedupe_json(&self) -> Vec<u8> {
1474 if let Some(groups) = self.get_duplicate_groups().await {
1475 serde_json::to_string_pretty(&groups)
1476 .unwrap_or_else(|_| "{}".to_string())
1477 .into_bytes()
1478 } else {
1479 let msg = serde_json::json!({
1480 "message": "No duplicate analysis available. Run find_duplicates first.",
1481 "hint": "Write any content to .semantic/.dedupe to trigger analysis"
1482 });
1483 serde_json::to_string_pretty(&msg)
1484 .unwrap_or_default()
1485 .into_bytes()
1486 }
1487 }
1488
1489 pub async fn get_similar_json(&self) -> Vec<u8> {
1491 if let Some(result) = self.get_last_similar_result().await {
1492 serde_json::to_string_pretty(&result)
1493 .unwrap_or_else(|_| "{}".to_string())
1494 .into_bytes()
1495 } else {
1496 let msg = serde_json::json!({
1497 "message": "No similar files search performed yet.",
1498 "hint": "Write a file path to .semantic/.similar to find similar files"
1499 });
1500 serde_json::to_string_pretty(&msg)
1501 .unwrap_or_default()
1502 .into_bytes()
1503 }
1504 }
1505
1506 pub async fn get_pending_plan_ids(&self) -> Vec<String> {
1508 self.pending_plans
1509 .read()
1510 .await
1511 .iter()
1512 .filter(|(_, p)| p.status == PlanStatus::Pending)
1513 .map(|(id, _)| id.to_string())
1514 .collect()
1515 }
1516
1517 pub async fn get_plan_json(&self, plan_id: &str) -> Vec<u8> {
1519 if let Ok(uuid) = Uuid::parse_str(plan_id)
1520 && let Some(plan) = self.get_plan(uuid).await
1521 {
1522 return serde_json::to_string_pretty(&plan)
1523 .unwrap_or_else(|_| "{}".to_string())
1524 .into_bytes();
1525 }
1526 let msg = serde_json::json!({
1527 "error": "Plan not found",
1528 "plan_id": plan_id
1529 });
1530 serde_json::to_string_pretty(&msg)
1531 .unwrap_or_default()
1532 .into_bytes()
1533 }
1534}
1535
1536fn truncate_content(content: &str, max_len: usize) -> String {
1538 if content.len() <= max_len {
1539 content.to_string()
1540 } else {
1541 format!("{}...", &content[..max_len])
1542 }
1543}
1544
1545fn cosine_similarity(a: &[f32], b: &[f32]) -> f32 {
1547 if a.len() != b.len() {
1548 return 0.0;
1549 }
1550
1551 let dot: f32 = a.iter().zip(b.iter()).map(|(x, y)| x * y).sum();
1552 let norm_a: f32 = a.iter().map(|x| x * x).sum::<f32>().sqrt();
1553 let norm_b: f32 = b.iter().map(|x| x * x).sum::<f32>().sqrt();
1554
1555 if norm_a == 0.0 || norm_b == 0.0 {
1556 return 0.0;
1557 }
1558
1559 (dot / (norm_a * norm_b)).clamp(-1.0, 1.0)
1560}
1561
1562#[cfg(test)]
1563mod tests {
1564 use super::*;
1565
1566 #[test]
1567 fn test_organize_request_serialization() {
1568 let request = OrganizeRequest {
1569 scope: PathBuf::from("docs/"),
1570 strategy: OrganizeStrategy::ByTopic,
1571 max_groups: 5,
1572 similarity_threshold: 0.8,
1573 };
1574
1575 let json = serde_json::to_string(&request).unwrap();
1576 let parsed: OrganizeRequest = serde_json::from_str(&json).unwrap();
1577
1578 assert_eq!(parsed.scope, request.scope);
1579 assert_eq!(parsed.max_groups, 5);
1580 }
1581
1582 #[test]
1583 fn test_organize_request_defaults() {
1584 let json = r#"{"scope":"src/","strategy":"by_topic"}"#;
1585 let request: OrganizeRequest = serde_json::from_str(json).unwrap();
1586
1587 assert_eq!(request.max_groups, 10);
1588 assert!((request.similarity_threshold - 0.7).abs() < f32::EPSILON);
1589 }
1590
1591 #[test]
1592 fn test_plan_status_serialization() {
1593 let status = PlanStatus::Failed {
1594 error: "test error".to_string(),
1595 };
1596 let json = serde_json::to_string(&status).unwrap();
1597 assert!(json.contains("failed"));
1598 assert!(json.contains("test error"));
1599 }
1600
1601 #[test]
1602 fn test_cleanup_reason_variants() {
1603 let duplicate = CleanupReason::Duplicate {
1604 similar_to: PathBuf::from("/original.txt"),
1605 similarity: 0.98,
1606 };
1607 let json = serde_json::to_string(&duplicate).unwrap();
1608 assert!(json.contains("duplicate"));
1609
1610 let stale = CleanupReason::Stale {
1611 last_accessed: Utc::now(),
1612 };
1613 let json = serde_json::to_string(&stale).unwrap();
1614 assert!(json.contains("stale"));
1615 }
1616
1617 #[test]
1618 fn test_semantic_config_default() {
1619 let config = SemanticConfig::default();
1620 assert!((config.duplicate_threshold - 0.95).abs() < f32::EPSILON);
1621 assert_eq!(config.similar_limit, 10);
1622 assert_eq!(config.plan_retention_hours, 24);
1623 }
1624
1625 #[test]
1626 fn test_truncate_content() {
1627 assert_eq!(truncate_content("short", 100), "short");
1628 assert_eq!(truncate_content("hello world", 5), "hello...");
1629 }
1630
1631 #[test]
1632 fn test_action_type_serialization() {
1633 let action = ActionType::Move {
1634 from: PathBuf::from("/old/path.txt"),
1635 to: PathBuf::from("/new/path.txt"),
1636 };
1637 let json = serde_json::to_string(&action).unwrap();
1638 assert!(json.contains("move"));
1639 assert!(json.contains("/old/path.txt"));
1640 }
1641
1642 #[test]
1643 fn test_similar_file_serialization() {
1644 let similar = SimilarFile {
1645 path: PathBuf::from("/doc.txt"),
1646 similarity: 0.85,
1647 preview: Some("This is a preview...".to_string()),
1648 };
1649 let json = serde_json::to_string(&similar).unwrap();
1650 assert!(json.contains("0.85"));
1651 assert!(json.contains("preview"));
1652 }
1653
1654 #[tokio::test]
1655 async fn test_semantic_manager_without_store() {
1656 let manager = SemanticManager::new(PathBuf::from("/tmp"), None, None, None);
1657 assert!(!manager.is_available());
1658 }
1659
1660 #[tokio::test]
1661 async fn test_pending_plans_empty() {
1662 let manager = SemanticManager::new(PathBuf::from("/tmp"), None, None, None);
1663 let plans = manager.list_pending_plans().await;
1664 assert!(plans.is_empty());
1665 }
1666
1667 #[tokio::test]
1668 async fn test_get_plan_not_found() {
1669 let manager = SemanticManager::new(PathBuf::from("/tmp"), None, None, None);
1670 let plan = manager.get_plan(Uuid::new_v4()).await;
1671 assert!(plan.is_none());
1672 }
1673
1674 #[tokio::test]
1675 async fn test_get_cleanup_json_empty() {
1676 let manager = SemanticManager::new(PathBuf::from("/tmp"), None, None, None);
1677 let json = manager.get_cleanup_json().await;
1678 let json_str = String::from_utf8(json).unwrap();
1679 assert!(json_str.contains("No cleanup analysis"));
1680 }
1681
1682 #[tokio::test]
1683 async fn test_get_dedupe_json_empty() {
1684 let manager = SemanticManager::new(PathBuf::from("/tmp"), None, None, None);
1685 let json = manager.get_dedupe_json().await;
1686 let json_str = String::from_utf8(json).unwrap();
1687 assert!(json_str.contains("No duplicate analysis"));
1688 }
1689
1690 #[tokio::test]
1691 async fn test_get_similar_json_empty() {
1692 let manager = SemanticManager::new(PathBuf::from("/tmp"), None, None, None);
1693 let json = manager.get_similar_json().await;
1694 let json_str = String::from_utf8(json).unwrap();
1695 assert!(json_str.contains("No similar files search"));
1696 }
1697
1698 #[tokio::test]
1699 async fn test_plan_by_custom_without_embedder() {
1700 let manager = SemanticManager::new(PathBuf::from("/tmp/test"), None, None, None);
1702 let mut file_embeddings = HashMap::new();
1703 file_embeddings.insert(PathBuf::from("/tmp/test/doc1.txt"), vec![0.1, 0.2, 0.3]);
1704 file_embeddings.insert(PathBuf::from("/tmp/test/doc2.txt"), vec![0.4, 0.5, 0.6]);
1705
1706 let scope_path = PathBuf::from("/tmp/test");
1707 let categories = vec!["code".to_string(), "docs".to_string()];
1708
1709 let (actions, description) = manager
1710 .plan_by_custom(&file_embeddings, &scope_path, &categories, None)
1711 .await;
1712
1713 let mkdir_count = actions
1715 .iter()
1716 .filter(|a| matches!(a.action, ActionType::Mkdir { .. }))
1717 .count();
1718 let move_count = actions
1719 .iter()
1720 .filter(|a| matches!(a.action, ActionType::Move { .. }))
1721 .count();
1722
1723 assert_eq!(mkdir_count, 2, "Should create 2 category directories");
1724 assert_eq!(move_count, 0, "Should not move files without embedder");
1725 assert!(
1726 description.contains("manual assignment needed"),
1727 "Description should indicate manual assignment needed"
1728 );
1729 }
1730
1731 #[test]
1732 fn test_custom_categories_serialization() {
1733 let request = OrganizeRequest {
1734 scope: PathBuf::from("src/"),
1735 strategy: OrganizeStrategy::Custom {
1736 categories: vec!["code".to_string(), "docs".to_string(), "tests".to_string()],
1737 },
1738 max_groups: 10,
1739 similarity_threshold: 0.7,
1740 };
1741
1742 let json = serde_json::to_string(&request).unwrap();
1743 assert!(json.contains("custom"));
1744 assert!(json.contains("code"));
1745 assert!(json.contains("docs"));
1746 assert!(json.contains("tests"));
1747
1748 let parsed: OrganizeRequest = serde_json::from_str(&json).unwrap();
1749 if let OrganizeStrategy::Custom { categories } = parsed.strategy {
1750 assert_eq!(categories.len(), 3);
1751 } else {
1752 panic!("Expected Custom strategy");
1753 }
1754 }
1755}