ragfs_fuse/
semantic.rs

1//! Semantic operations for intelligent file management.
2//!
3//! This module provides AI-powered file operations based on vector embeddings:
4//! - File organization by topic/similarity
5//! - Duplicate detection
6//! - Cleanup analysis
7//! - Similar file discovery
8//!
9//! All operations follow a Propose-Review-Apply pattern for safety.
10
11use chrono::{DateTime, Utc};
12use ragfs_core::{
13    Chunk, DistanceMetric, Embedder, EmbeddingConfig, FileRecord, SearchQuery, VectorStore,
14};
15use serde::{Deserialize, Serialize};
16use std::collections::{HashMap, HashSet};
17use std::fs;
18use std::path::PathBuf;
19use std::sync::Arc;
20use tokio::sync::RwLock;
21use tracing::{debug, info, warn};
22use uuid::Uuid;
23
24/// Request to organize files in a directory.
25#[derive(Debug, Clone, Serialize, Deserialize)]
26pub struct OrganizeRequest {
27    /// Directory scope (relative to source root)
28    pub scope: PathBuf,
29    /// Organization strategy
30    pub strategy: OrganizeStrategy,
31    /// Maximum number of groups to create
32    #[serde(default = "default_max_groups")]
33    pub max_groups: usize,
34    /// Minimum similarity threshold for grouping (0.0-1.0)
35    #[serde(default = "default_similarity_threshold")]
36    pub similarity_threshold: f32,
37}
38
39fn default_max_groups() -> usize {
40    10
41}
42
43fn default_similarity_threshold() -> f32 {
44    0.7
45}
46
47/// Strategy for organizing files.
48#[derive(Debug, Clone, Serialize, Deserialize)]
49#[serde(rename_all = "snake_case")]
50pub enum OrganizeStrategy {
51    /// Group by semantic topic/content similarity
52    ByTopic,
53    /// Group by file type first, then by content
54    ByType,
55    /// Group by project/module structure
56    ByProject,
57    /// Custom grouping with specified categories
58    Custom { categories: Vec<String> },
59}
60
61/// A proposed semantic operation plan.
62#[derive(Debug, Clone, Serialize, Deserialize)]
63pub struct SemanticPlan {
64    /// Unique plan identifier
65    pub id: Uuid,
66    /// When the plan was created
67    pub created_at: DateTime<Utc>,
68    /// Type of operation
69    pub operation: PlanOperation,
70    /// Human-readable description
71    pub description: String,
72    /// Proposed file operations
73    pub actions: Vec<PlanAction>,
74    /// Status of the plan
75    pub status: PlanStatus,
76    /// Estimated impact (files affected)
77    pub impact: PlanImpact,
78}
79
80/// Type of semantic operation.
81#[derive(Debug, Clone, Serialize, Deserialize)]
82#[serde(rename_all = "snake_case")]
83pub enum PlanOperation {
84    /// Organize files into groups
85    Organize {
86        scope: PathBuf,
87        strategy: OrganizeStrategy,
88    },
89    /// Clean up files
90    Cleanup { scope: PathBuf },
91    /// Deduplicate files
92    Dedupe { scope: PathBuf },
93}
94
95/// A single action in a plan.
96#[derive(Debug, Clone, Serialize, Deserialize)]
97pub struct PlanAction {
98    /// Type of action
99    pub action: ActionType,
100    /// Confidence score (0.0-1.0)
101    pub confidence: f32,
102    /// Reason for this action
103    pub reason: String,
104}
105
106/// Type of file action.
107#[derive(Debug, Clone, Serialize, Deserialize)]
108#[serde(rename_all = "snake_case")]
109pub enum ActionType {
110    /// Move a file to a new location
111    Move { from: PathBuf, to: PathBuf },
112    /// Create a new directory
113    Mkdir { path: PathBuf },
114    /// Delete a file (will use soft delete)
115    Delete { path: PathBuf },
116    /// Create a symlink
117    Symlink { target: PathBuf, link: PathBuf },
118}
119
120/// Status of a plan.
121#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
122#[serde(rename_all = "snake_case")]
123pub enum PlanStatus {
124    /// Plan is pending review
125    Pending,
126    /// Plan was approved and is being executed
127    Approved,
128    /// Plan was rejected
129    Rejected,
130    /// Plan was executed successfully
131    Completed,
132    /// Plan execution failed
133    Failed { error: String },
134}
135
136/// Impact summary of a plan.
137#[derive(Debug, Clone, Default, Serialize, Deserialize)]
138pub struct PlanImpact {
139    /// Total files affected
140    pub files_affected: usize,
141    /// Directories created
142    pub dirs_created: usize,
143    /// Files moved
144    pub files_moved: usize,
145    /// Files deleted
146    pub files_deleted: usize,
147}
148
149/// Analysis of cleanup candidates.
150#[derive(Debug, Clone, Serialize, Deserialize)]
151pub struct CleanupAnalysis {
152    /// When the analysis was performed
153    pub analyzed_at: DateTime<Utc>,
154    /// Total files analyzed
155    pub total_files: usize,
156    /// Cleanup candidates
157    pub candidates: Vec<CleanupCandidate>,
158    /// Potential space savings in bytes
159    pub potential_savings_bytes: u64,
160}
161
162/// A file that could be cleaned up.
163#[derive(Debug, Clone, Serialize, Deserialize)]
164pub struct CleanupCandidate {
165    /// File path
166    pub path: PathBuf,
167    /// Reason for cleanup suggestion
168    pub reason: CleanupReason,
169    /// Confidence score (0.0-1.0)
170    pub confidence: f32,
171    /// File size in bytes
172    pub size_bytes: u64,
173}
174
175/// Reason a file is suggested for cleanup.
176#[derive(Debug, Clone, Serialize, Deserialize)]
177#[serde(rename_all = "snake_case")]
178pub enum CleanupReason {
179    /// File appears to be a duplicate
180    Duplicate {
181        similar_to: PathBuf,
182        similarity: f32,
183    },
184    /// File hasn't been accessed in a long time
185    Stale { last_accessed: DateTime<Utc> },
186    /// Temporary file pattern
187    Temporary,
188    /// Generated file that can be recreated
189    Generated { source: PathBuf },
190    /// Empty or near-empty file
191    Empty,
192}
193
194/// Groups of duplicate/similar files.
195#[derive(Debug, Clone, Serialize, Deserialize)]
196pub struct DuplicateGroups {
197    /// When the analysis was performed
198    pub analyzed_at: DateTime<Utc>,
199    /// Minimum similarity threshold used
200    pub threshold: f32,
201    /// Groups of similar files
202    pub groups: Vec<DuplicateGroup>,
203    /// Total potential savings if duplicates removed
204    pub potential_savings_bytes: u64,
205}
206
207/// A group of similar files.
208#[derive(Debug, Clone, Serialize, Deserialize)]
209pub struct DuplicateGroup {
210    /// Group identifier
211    pub id: Uuid,
212    /// Representative file (keep this one)
213    pub representative: PathBuf,
214    /// Similar files (candidates for removal)
215    pub duplicates: Vec<DuplicateEntry>,
216    /// Total size of duplicates
217    pub wasted_bytes: u64,
218}
219
220/// A duplicate file entry.
221#[derive(Debug, Clone, Serialize, Deserialize)]
222pub struct DuplicateEntry {
223    /// File path
224    pub path: PathBuf,
225    /// Similarity to representative (0.0-1.0)
226    pub similarity: f32,
227    /// File size
228    pub size_bytes: u64,
229}
230
231/// Result of finding similar files.
232#[derive(Debug, Clone, Serialize, Deserialize)]
233pub struct SimilarFilesResult {
234    /// Source file
235    pub source: PathBuf,
236    /// Similar files found
237    pub similar: Vec<SimilarFile>,
238}
239
240/// A similar file.
241#[derive(Debug, Clone, Serialize, Deserialize)]
242pub struct SimilarFile {
243    /// File path
244    pub path: PathBuf,
245    /// Similarity score (0.0-1.0)
246    pub similarity: f32,
247    /// Preview of content
248    #[serde(skip_serializing_if = "Option::is_none")]
249    pub preview: Option<String>,
250}
251
252/// Configuration for semantic operations.
253#[derive(Debug, Clone)]
254pub struct SemanticConfig {
255    /// Minimum similarity for duplicate detection
256    pub duplicate_threshold: f32,
257    /// Number of results for similar file search
258    pub similar_limit: usize,
259    /// Maximum plan retention (in hours)
260    pub plan_retention_hours: u32,
261    /// Base directory for persistence (plans, etc.)
262    pub data_dir: PathBuf,
263}
264
265impl Default for SemanticConfig {
266    fn default() -> Self {
267        let data_dir = dirs::data_local_dir()
268            .unwrap_or_else(|| PathBuf::from("."))
269            .join("ragfs");
270
271        Self {
272            duplicate_threshold: 0.95,
273            similar_limit: 10,
274            plan_retention_hours: 24,
275            data_dir,
276        }
277    }
278}
279
280/// Result of executing a plan action.
281#[derive(Debug, Clone, Serialize, Deserialize)]
282pub struct ActionResult {
283    /// Whether the action succeeded
284    pub success: bool,
285    /// ID for undoing this action (if reversible)
286    #[serde(skip_serializing_if = "Option::is_none")]
287    pub undo_id: Option<Uuid>,
288    /// Error message if failed
289    #[serde(skip_serializing_if = "Option::is_none")]
290    pub error: Option<String>,
291    /// When the action was executed
292    pub executed_at: DateTime<Utc>,
293}
294
295/// Semantic manager for intelligent file operations.
296pub struct SemanticManager {
297    /// Source directory root
298    source: PathBuf,
299    /// Vector store for similarity search
300    store: Option<Arc<dyn VectorStore>>,
301    /// Embedder for generating embeddings
302    embedder: Option<Arc<dyn Embedder>>,
303    /// Configuration
304    config: SemanticConfig,
305    /// Pending plans (`plan_id` -> plan)
306    pending_plans: Arc<RwLock<HashMap<Uuid, SemanticPlan>>>,
307    /// Last similar files result
308    last_similar_result: Arc<RwLock<Option<SimilarFilesResult>>>,
309    /// Cached cleanup analysis
310    cleanup_cache: Arc<RwLock<Option<CleanupAnalysis>>>,
311    /// Cached duplicate groups
312    dedupe_cache: Arc<RwLock<Option<DuplicateGroups>>>,
313    /// Directory for storing plans
314    plans_dir: PathBuf,
315    /// Operations manager for executing actions
316    ops_manager: Option<Arc<crate::ops::OpsManager>>,
317}
318
319impl SemanticManager {
320    /// Create a new semantic manager.
321    pub fn new(
322        source: PathBuf,
323        store: Option<Arc<dyn VectorStore>>,
324        embedder: Option<Arc<dyn Embedder>>,
325        config: Option<SemanticConfig>,
326    ) -> Self {
327        let config = config.unwrap_or_default();
328
329        // Create index hash for isolation (same pattern as SafetyManager)
330        let index_hash = blake3::hash(source.to_string_lossy().as_bytes())
331            .to_hex()
332            .chars()
333            .take(16)
334            .collect::<String>();
335
336        let plans_dir = config.data_dir.join("plans").join(&index_hash);
337
338        // Ensure plans directory exists
339        if let Err(e) = fs::create_dir_all(&plans_dir) {
340            warn!("Failed to create plans directory: {e}");
341        }
342
343        // Load existing plans from disk
344        let plans = Self::load_plans(&plans_dir);
345        info!("Loaded {} existing semantic plans", plans.len());
346
347        Self {
348            source,
349            store,
350            embedder,
351            config,
352            pending_plans: Arc::new(RwLock::new(plans)),
353            last_similar_result: Arc::new(RwLock::new(None)),
354            cleanup_cache: Arc::new(RwLock::new(None)),
355            dedupe_cache: Arc::new(RwLock::new(None)),
356            plans_dir,
357            ops_manager: None,
358        }
359    }
360
361    /// Create a semantic manager with an operations manager for plan execution.
362    pub fn with_ops(
363        source: PathBuf,
364        store: Option<Arc<dyn VectorStore>>,
365        embedder: Option<Arc<dyn Embedder>>,
366        config: Option<SemanticConfig>,
367        ops_manager: Arc<crate::ops::OpsManager>,
368    ) -> Self {
369        let mut manager = Self::new(source, store, embedder, config);
370        manager.ops_manager = Some(ops_manager);
371        manager
372    }
373
374    /// Set the operations manager.
375    pub fn set_ops_manager(&mut self, ops_manager: Arc<crate::ops::OpsManager>) {
376        self.ops_manager = Some(ops_manager);
377    }
378
379    /// Load all plans from disk.
380    fn load_plans(plans_dir: &PathBuf) -> HashMap<Uuid, SemanticPlan> {
381        let mut plans = HashMap::new();
382
383        if !plans_dir.exists() {
384            return plans;
385        }
386
387        let entries = match fs::read_dir(plans_dir) {
388            Ok(e) => e,
389            Err(e) => {
390                warn!("Failed to read plans directory: {e}");
391                return plans;
392            }
393        };
394
395        for entry in entries.flatten() {
396            let path = entry.path();
397            if path.extension().is_some_and(|e| e == "json")
398                && let Ok(content) = fs::read_to_string(&path)
399            {
400                match serde_json::from_str::<SemanticPlan>(&content) {
401                    Ok(plan) => {
402                        plans.insert(plan.id, plan);
403                    }
404                    Err(e) => {
405                        warn!("Failed to parse plan file {:?}: {e}", path);
406                    }
407                }
408            }
409        }
410
411        plans
412    }
413
414    /// Save a single plan to disk.
415    fn save_plan(&self, plan: &SemanticPlan) -> std::io::Result<()> {
416        let plan_path = self.plans_dir.join(format!("{}.json", plan.id));
417        let temp_path = self.plans_dir.join(format!("{}.json.tmp", plan.id));
418
419        // Write to temp file first for atomic operation
420        let content = serde_json::to_string_pretty(plan)?;
421        fs::write(&temp_path, content)?;
422
423        // Atomic rename
424        fs::rename(&temp_path, &plan_path)?;
425
426        Ok(())
427    }
428
429    /// Delete a plan file from disk.
430    fn delete_plan_file(&self, plan_id: Uuid) -> std::io::Result<()> {
431        let plan_path = self.plans_dir.join(format!("{plan_id}.json"));
432        if plan_path.exists() {
433            fs::remove_file(&plan_path)?;
434        }
435        Ok(())
436    }
437
438    /// Purge expired plans from memory and disk.
439    pub async fn purge_expired_plans(&self) -> usize {
440        let now = Utc::now();
441        let retention = chrono::Duration::hours(i64::from(self.config.plan_retention_hours));
442        let cutoff = now - retention;
443
444        let mut plans = self.pending_plans.write().await;
445        let expired: Vec<Uuid> = plans
446            .iter()
447            .filter(|(_, p)| {
448                // Purge completed/rejected/failed plans past retention
449                // Keep pending plans regardless of age
450                matches!(
451                    p.status,
452                    PlanStatus::Completed | PlanStatus::Rejected | PlanStatus::Failed { .. }
453                ) && p.created_at < cutoff
454            })
455            .map(|(id, _)| *id)
456            .collect();
457
458        let mut purged = 0;
459        for id in &expired {
460            plans.remove(id);
461            if let Err(e) = self.delete_plan_file(*id) {
462                warn!("Failed to delete expired plan file {}: {e}", id);
463            } else {
464                purged += 1;
465            }
466        }
467
468        if purged > 0 {
469            info!("Purged {} expired semantic plans", purged);
470        }
471
472        purged
473    }
474
475    /// Check if semantic operations are available.
476    #[must_use]
477    pub fn is_available(&self) -> bool {
478        self.store.is_some() && self.embedder.is_some()
479    }
480
481    /// Find files similar to a given path.
482    pub async fn find_similar(&self, path: &PathBuf) -> Result<SimilarFilesResult, String> {
483        let store = self.store.as_ref().ok_or("Vector store not available")?;
484        let embedder = self.embedder.as_ref().ok_or("Embedder not available")?;
485
486        let full_path = if path.is_absolute() {
487            path.clone()
488        } else {
489            self.source.join(path)
490        };
491
492        debug!("Finding files similar to: {}", full_path.display());
493
494        // Read the file content
495        let content =
496            std::fs::read_to_string(&full_path).map_err(|e| format!("Failed to read file: {e}"))?;
497
498        // Generate embedding for the content
499        let config = EmbeddingConfig::default();
500        let embedding_output = embedder
501            .embed_query(&content, &config)
502            .await
503            .map_err(|e| format!("Failed to generate embedding: {e}"))?;
504
505        // Search for similar files
506        let query = SearchQuery {
507            embedding: embedding_output.embedding,
508            text: None,
509            limit: self.config.similar_limit + 1, // +1 to exclude self
510            filters: Vec::new(),
511            metric: DistanceMetric::Cosine,
512        };
513        let results = store
514            .search(query)
515            .await
516            .map_err(|e| format!("Search failed: {e}"))?;
517
518        // Convert results, excluding the source file itself
519        let similar: Vec<SimilarFile> = results
520            .into_iter()
521            .filter(|r| r.file_path != full_path)
522            .take(self.config.similar_limit)
523            .map(|r| SimilarFile {
524                path: r.file_path,
525                similarity: r.score, // score is already similarity (higher = more similar)
526                preview: Some(truncate_content(&r.content, 200)),
527            })
528            .collect();
529
530        let result = SimilarFilesResult {
531            source: full_path,
532            similar,
533        };
534
535        // Cache the result
536        *self.last_similar_result.write().await = Some(result.clone());
537
538        info!("Found {} similar files", result.similar.len());
539        Ok(result)
540    }
541
542    /// Get the last similar files result.
543    pub async fn get_last_similar_result(&self) -> Option<SimilarFilesResult> {
544        self.last_similar_result.read().await.clone()
545    }
546
547    /// Analyze files for cleanup candidates.
548    pub async fn analyze_cleanup(&self) -> Result<CleanupAnalysis, String> {
549        let store = self.store.as_ref().ok_or("Vector store not available")?;
550
551        debug!("Analyzing files for cleanup candidates");
552
553        // Get all file records from the store
554        let stats = store
555            .stats()
556            .await
557            .map_err(|e| format!("Failed to get stats: {e}"))?;
558
559        let mut candidates = Vec::new();
560        let mut potential_savings: u64 = 0;
561
562        // For now, we'll focus on duplicate detection as the primary cleanup criterion
563        // This could be expanded to include stale file detection, etc.
564
565        // Get duplicate groups and convert high-confidence duplicates to cleanup candidates
566        if let Ok(dupes) = self.find_duplicates().await {
567            for group in &dupes.groups {
568                for dup in &group.duplicates {
569                    if dup.similarity >= self.config.duplicate_threshold {
570                        candidates.push(CleanupCandidate {
571                            path: dup.path.clone(),
572                            reason: CleanupReason::Duplicate {
573                                similar_to: group.representative.clone(),
574                                similarity: dup.similarity,
575                            },
576                            confidence: dup.similarity,
577                            size_bytes: dup.size_bytes,
578                        });
579                        potential_savings += dup.size_bytes;
580                    }
581                }
582            }
583        }
584
585        let analysis = CleanupAnalysis {
586            analyzed_at: Utc::now(),
587            total_files: stats.total_files as usize,
588            candidates,
589            potential_savings_bytes: potential_savings,
590        };
591
592        // Cache the result
593        *self.cleanup_cache.write().await = Some(analysis.clone());
594
595        info!(
596            "Cleanup analysis: {} candidates, {} bytes potential savings",
597            analysis.candidates.len(),
598            analysis.potential_savings_bytes
599        );
600
601        Ok(analysis)
602    }
603
604    /// Get cached cleanup analysis.
605    pub async fn get_cleanup_analysis(&self) -> Option<CleanupAnalysis> {
606        self.cleanup_cache.read().await.clone()
607    }
608
609    /// Find duplicate file groups.
610    pub async fn find_duplicates(&self) -> Result<DuplicateGroups, String> {
611        let store = self.store.as_ref().ok_or("Vector store not available")?;
612        let _embedder = self.embedder.as_ref().ok_or("Embedder not available")?;
613
614        debug!("Finding duplicate files");
615
616        // Get all chunks and files from the store
617        let all_chunks = store
618            .get_all_chunks()
619            .await
620            .map_err(|e| format!("Failed to get chunks: {e}"))?;
621
622        let all_files = store
623            .get_all_files()
624            .await
625            .map_err(|e| format!("Failed to get files: {e}"))?;
626
627        if all_files.is_empty() {
628            return Ok(DuplicateGroups {
629                analyzed_at: Utc::now(),
630                threshold: self.config.duplicate_threshold,
631                groups: Vec::new(),
632                potential_savings_bytes: 0,
633            });
634        }
635
636        // Build a map of file_path -> chunks with embeddings
637        let mut file_chunks: HashMap<PathBuf, Vec<&Chunk>> = HashMap::new();
638        for chunk in &all_chunks {
639            if chunk.embedding.is_some() {
640                file_chunks
641                    .entry(chunk.file_path.clone())
642                    .or_default()
643                    .push(chunk);
644            }
645        }
646
647        // Build file info map
648        let file_info: HashMap<PathBuf, &FileRecord> =
649            all_files.iter().map(|f| (f.path.clone(), f)).collect();
650
651        // Calculate average embedding for each file
652        let file_embeddings: HashMap<PathBuf, Vec<f32>> = file_chunks
653            .iter()
654            .filter_map(|(path, chunks)| {
655                let embeddings: Vec<&Vec<f32>> =
656                    chunks.iter().filter_map(|c| c.embedding.as_ref()).collect();
657
658                if embeddings.is_empty() {
659                    return None;
660                }
661
662                // Average the embeddings
663                let dim = embeddings[0].len();
664                let mut avg = vec![0.0f32; dim];
665                for emb in &embeddings {
666                    for (i, &v) in emb.iter().enumerate() {
667                        avg[i] += v;
668                    }
669                }
670                let count = embeddings.len() as f32;
671                for v in &mut avg {
672                    *v /= count;
673                }
674
675                // Normalize the averaged embedding
676                let norm: f32 = avg.iter().map(|x| x * x).sum::<f32>().sqrt();
677                if norm > 0.0 {
678                    for v in &mut avg {
679                        *v /= norm;
680                    }
681                }
682
683                Some((path.clone(), avg))
684            })
685            .collect();
686
687        // Find similar file pairs using cosine similarity
688        let file_paths: Vec<&PathBuf> = file_embeddings.keys().collect();
689        let mut similarity_pairs: Vec<(PathBuf, PathBuf, f32)> = Vec::new();
690
691        for (i, path_a) in file_paths.iter().enumerate() {
692            let emb_a = &file_embeddings[*path_a];
693            for path_b in file_paths.iter().skip(i + 1) {
694                let emb_b = &file_embeddings[*path_b];
695                let similarity = cosine_similarity(emb_a, emb_b);
696
697                if similarity >= self.config.duplicate_threshold {
698                    similarity_pairs.push(((*path_a).clone(), (*path_b).clone(), similarity));
699                }
700            }
701        }
702
703        // Cluster similar files using Union-Find
704        let mut groups: Vec<DuplicateGroup> = Vec::new();
705        let mut processed: HashSet<PathBuf> = HashSet::new();
706
707        for (path_a, path_b, similarity) in similarity_pairs {
708            if processed.contains(&path_a) || processed.contains(&path_b) {
709                continue;
710            }
711
712            // Find or create group for path_a
713            let size_a = file_info.get(&path_a).map_or(0, |f| f.size_bytes);
714            let size_b = file_info.get(&path_b).map_or(0, |f| f.size_bytes);
715
716            // Use the larger file as representative
717            let (representative, duplicate, dup_similarity, dup_size) = if size_a >= size_b {
718                (path_a.clone(), path_b.clone(), similarity, size_b)
719            } else {
720                (path_b.clone(), path_a.clone(), similarity, size_a)
721            };
722
723            // Check if representative already has a group
724            if let Some(group) = groups
725                .iter_mut()
726                .find(|g| g.representative == representative)
727            {
728                group.duplicates.push(DuplicateEntry {
729                    path: duplicate.clone(),
730                    similarity: dup_similarity,
731                    size_bytes: dup_size,
732                });
733                group.wasted_bytes += dup_size;
734                processed.insert(duplicate);
735            } else {
736                // Create new group
737                groups.push(DuplicateGroup {
738                    id: Uuid::new_v4(),
739                    representative: representative.clone(),
740                    duplicates: vec![DuplicateEntry {
741                        path: duplicate.clone(),
742                        similarity: dup_similarity,
743                        size_bytes: dup_size,
744                    }],
745                    wasted_bytes: dup_size,
746                });
747                processed.insert(representative);
748                processed.insert(duplicate);
749            }
750        }
751
752        let potential_savings: u64 = groups.iter().map(|g| g.wasted_bytes).sum();
753
754        info!(
755            "Found {} duplicate groups with {} bytes potential savings",
756            groups.len(),
757            potential_savings
758        );
759
760        let result = DuplicateGroups {
761            analyzed_at: Utc::now(),
762            threshold: self.config.duplicate_threshold,
763            groups,
764            potential_savings_bytes: potential_savings,
765        };
766
767        // Cache the result
768        *self.dedupe_cache.write().await = Some(result.clone());
769
770        Ok(result)
771    }
772
773    /// Get cached duplicate groups.
774    pub async fn get_duplicate_groups(&self) -> Option<DuplicateGroups> {
775        self.dedupe_cache.read().await.clone()
776    }
777
778    /// Create an organization plan.
779    pub async fn create_organize_plan(
780        &self,
781        request: OrganizeRequest,
782    ) -> Result<SemanticPlan, String> {
783        let store = self.store.as_ref().ok_or("Vector store not available")?;
784        let embedder = self.embedder.as_ref();
785
786        debug!(
787            "Creating organization plan for: {}",
788            request.scope.display()
789        );
790
791        // Get all chunks and files
792        let all_chunks = store
793            .get_all_chunks()
794            .await
795            .map_err(|e| format!("Failed to get chunks: {e}"))?;
796
797        let all_files = store
798            .get_all_files()
799            .await
800            .map_err(|e| format!("Failed to get files: {e}"))?;
801
802        // Filter files within scope
803        let scope_path = if request.scope.is_absolute() {
804            request.scope.clone()
805        } else {
806            self.source.join(&request.scope)
807        };
808
809        let scoped_files: Vec<&FileRecord> = all_files
810            .iter()
811            .filter(|f| f.path.starts_with(&scope_path))
812            .collect();
813
814        if scoped_files.is_empty() {
815            return Ok(SemanticPlan {
816                id: Uuid::new_v4(),
817                created_at: Utc::now(),
818                operation: PlanOperation::Organize {
819                    scope: request.scope.clone(),
820                    strategy: request.strategy.clone(),
821                },
822                description: format!("No files found in scope: {}", request.scope.display()),
823                actions: Vec::new(),
824                status: PlanStatus::Pending,
825                impact: PlanImpact::default(),
826            });
827        }
828
829        // Build file embeddings map
830        let mut file_chunks: HashMap<PathBuf, Vec<&Chunk>> = HashMap::new();
831        for chunk in &all_chunks {
832            if chunk.embedding.is_some() && chunk.file_path.starts_with(&scope_path) {
833                file_chunks
834                    .entry(chunk.file_path.clone())
835                    .or_default()
836                    .push(chunk);
837            }
838        }
839
840        // Calculate average embedding for each file
841        let file_embeddings: HashMap<PathBuf, Vec<f32>> = file_chunks
842            .iter()
843            .filter_map(|(path, chunks)| {
844                let embeddings: Vec<&Vec<f32>> =
845                    chunks.iter().filter_map(|c| c.embedding.as_ref()).collect();
846
847                if embeddings.is_empty() {
848                    return None;
849                }
850
851                let dim = embeddings[0].len();
852                let mut avg = vec![0.0f32; dim];
853                for emb in &embeddings {
854                    for (i, &v) in emb.iter().enumerate() {
855                        avg[i] += v;
856                    }
857                }
858                let count = embeddings.len() as f32;
859                for v in &mut avg {
860                    *v /= count;
861                }
862
863                // Normalize
864                let norm: f32 = avg.iter().map(|x| x * x).sum::<f32>().sqrt();
865                if norm > 0.0 {
866                    for v in &mut avg {
867                        *v /= norm;
868                    }
869                }
870
871                Some((path.clone(), avg))
872            })
873            .collect();
874
875        // Generate actions based on strategy
876        let (actions, description) = match &request.strategy {
877            OrganizeStrategy::ByTopic => self.plan_by_topic(
878                &file_embeddings,
879                &scope_path,
880                request.max_groups,
881                request.similarity_threshold,
882            ),
883            OrganizeStrategy::ByType => self.plan_by_type(&scoped_files, &scope_path),
884            OrganizeStrategy::ByProject => self.plan_by_project(&scoped_files, &scope_path),
885            OrganizeStrategy::Custom { categories } => {
886                self.plan_by_custom(&file_embeddings, &scope_path, categories, embedder)
887                    .await
888            }
889        };
890
891        let dirs_created = actions
892            .iter()
893            .filter(|a| matches!(a.action, ActionType::Mkdir { .. }))
894            .count();
895        let files_moved = actions
896            .iter()
897            .filter(|a| matches!(a.action, ActionType::Move { .. }))
898            .count();
899
900        let plan = SemanticPlan {
901            id: Uuid::new_v4(),
902            created_at: Utc::now(),
903            operation: PlanOperation::Organize {
904                scope: request.scope,
905                strategy: request.strategy,
906            },
907            description,
908            actions,
909            status: PlanStatus::Pending,
910            impact: PlanImpact {
911                files_affected: files_moved,
912                dirs_created,
913                files_moved,
914                files_deleted: 0,
915            },
916        };
917
918        // Store the plan in memory
919        self.pending_plans
920            .write()
921            .await
922            .insert(plan.id, plan.clone());
923
924        // Persist to disk
925        if let Err(e) = self.save_plan(&plan) {
926            warn!("Failed to persist plan {}: {e}", plan.id);
927        }
928
929        info!(
930            "Created organization plan: {} with {} actions",
931            plan.id,
932            plan.actions.len()
933        );
934        Ok(plan)
935    }
936
937    /// Plan organization by semantic topic using clustering.
938    fn plan_by_topic(
939        &self,
940        file_embeddings: &HashMap<PathBuf, Vec<f32>>,
941        scope_path: &PathBuf,
942        max_groups: usize,
943        similarity_threshold: f32,
944    ) -> (Vec<PlanAction>, String) {
945        if file_embeddings.is_empty() {
946            return (Vec::new(), "No files with embeddings found".to_string());
947        }
948
949        // Simple clustering: find centroids and group files
950        let file_paths: Vec<&PathBuf> = file_embeddings.keys().collect();
951        let num_files = file_paths.len();
952        let num_clusters = max_groups.min(num_files);
953
954        // Initialize clusters with k random files (here we use evenly spaced indices)
955        let step = if num_files > num_clusters {
956            num_files / num_clusters
957        } else {
958            1
959        };
960        let mut centroids: Vec<Vec<f32>> = (0..num_clusters)
961            .map(|i| file_embeddings[file_paths[i * step.min(num_files - 1)]].clone())
962            .collect();
963
964        // Simple k-means iterations
965        let mut cluster_assignments: HashMap<PathBuf, usize> = HashMap::new();
966
967        for _ in 0..5 {
968            // Assign each file to nearest centroid
969            cluster_assignments.clear();
970            for path in &file_paths {
971                let emb = &file_embeddings[*path];
972                let mut best_cluster = 0;
973                let mut best_sim = -1.0f32;
974
975                for (cluster_idx, centroid) in centroids.iter().enumerate() {
976                    let sim = cosine_similarity(emb, centroid);
977                    if sim > best_sim {
978                        best_sim = sim;
979                        best_cluster = cluster_idx;
980                    }
981                }
982
983                cluster_assignments.insert((*path).clone(), best_cluster);
984            }
985
986            // Update centroids
987            for (cluster_idx, centroid) in centroids.iter_mut().enumerate() {
988                let members: Vec<&PathBuf> = cluster_assignments
989                    .iter()
990                    .filter(|&(_, c)| *c == cluster_idx)
991                    .map(|(p, _)| p)
992                    .collect();
993
994                if members.is_empty() {
995                    continue;
996                }
997
998                let dim = centroid.len();
999                let mut new_centroid = vec![0.0f32; dim];
1000
1001                for path in &members {
1002                    let emb = &file_embeddings[*path];
1003                    for (i, &v) in emb.iter().enumerate() {
1004                        new_centroid[i] += v;
1005                    }
1006                }
1007
1008                let count = members.len() as f32;
1009                for v in &mut new_centroid {
1010                    *v /= count;
1011                }
1012
1013                // Normalize
1014                let norm: f32 = new_centroid.iter().map(|x| x * x).sum::<f32>().sqrt();
1015                if norm > 0.0 {
1016                    for v in &mut new_centroid {
1017                        *v /= norm;
1018                    }
1019                }
1020
1021                *centroid = new_centroid;
1022            }
1023        }
1024
1025        // Generate actions
1026        let mut actions = Vec::new();
1027
1028        // Create topic directories
1029        for cluster_idx in 0..num_clusters {
1030            let topic_dir = scope_path.join(format!("topic_{}", cluster_idx + 1));
1031            actions.push(PlanAction {
1032                action: ActionType::Mkdir { path: topic_dir },
1033                confidence: 1.0,
1034                reason: format!("Create directory for topic cluster {}", cluster_idx + 1),
1035            });
1036        }
1037
1038        // Move files to their clusters
1039        for (path, &cluster_idx) in &cluster_assignments {
1040            let file_name = path.file_name().unwrap_or_default();
1041            let topic_dir = scope_path.join(format!("topic_{}", cluster_idx + 1));
1042            let new_path = topic_dir.join(file_name);
1043
1044            if new_path != *path {
1045                // Calculate confidence based on distance to centroid
1046                let emb = &file_embeddings[path];
1047                let centroid = &centroids[cluster_idx];
1048                let confidence = cosine_similarity(emb, centroid).max(similarity_threshold);
1049
1050                actions.push(PlanAction {
1051                    action: ActionType::Move {
1052                        from: path.clone(),
1053                        to: new_path,
1054                    },
1055                    confidence,
1056                    reason: format!(
1057                        "Move to topic cluster {} based on content similarity",
1058                        cluster_idx + 1
1059                    ),
1060                });
1061            }
1062        }
1063
1064        let description = format!(
1065            "Organize {} files into {} topic clusters",
1066            file_paths.len(),
1067            num_clusters
1068        );
1069
1070        (actions, description)
1071    }
1072
1073    /// Plan organization by file type.
1074    fn plan_by_type(
1075        &self,
1076        files: &[&FileRecord],
1077        scope_path: &PathBuf,
1078    ) -> (Vec<PlanAction>, String) {
1079        let mut actions = Vec::new();
1080        let mut type_dirs: HashSet<String> = HashSet::new();
1081
1082        for file in files {
1083            // Determine type directory based on extension or MIME type
1084            let type_dir = if let Some(ext) = file.path.extension() {
1085                ext.to_string_lossy().to_string()
1086            } else {
1087                // Use MIME type category
1088                file.mime_type
1089                    .split('/')
1090                    .next()
1091                    .unwrap_or("other")
1092                    .to_string()
1093            };
1094
1095            // Create type directory if needed
1096            if type_dirs.insert(type_dir.clone()) {
1097                actions.push(PlanAction {
1098                    action: ActionType::Mkdir {
1099                        path: scope_path.join(&type_dir),
1100                    },
1101                    confidence: 1.0,
1102                    reason: format!("Create directory for {type_dir} files"),
1103                });
1104            }
1105
1106            // Move file
1107            let file_name = file.path.file_name().unwrap_or_default();
1108            let new_path = scope_path.join(&type_dir).join(file_name);
1109
1110            if new_path != file.path {
1111                actions.push(PlanAction {
1112                    action: ActionType::Move {
1113                        from: file.path.clone(),
1114                        to: new_path,
1115                    },
1116                    confidence: 1.0,
1117                    reason: format!("Move to {type_dir} directory based on file type"),
1118                });
1119            }
1120        }
1121
1122        let description = format!(
1123            "Organize {} files into {} type-based directories",
1124            files.len(),
1125            type_dirs.len()
1126        );
1127
1128        (actions, description)
1129    }
1130
1131    /// Plan organization by project structure (based on imports/dependencies).
1132    fn plan_by_project(
1133        &self,
1134        files: &[&FileRecord],
1135        scope_path: &PathBuf,
1136    ) -> (Vec<PlanAction>, String) {
1137        // For project-based organization, we look at file paths to infer structure
1138        // This is a simplified implementation
1139        let mut actions = Vec::new();
1140        let mut project_dirs: HashSet<String> = HashSet::new();
1141
1142        for file in files {
1143            // Use the first directory component after scope as "project"
1144            let relative = file.path.strip_prefix(scope_path).unwrap_or(&file.path);
1145            let project = relative.components().next().map_or_else(
1146                || "root".to_string(),
1147                |c| c.as_os_str().to_string_lossy().to_string(),
1148            );
1149
1150            if project_dirs.insert(project.clone()) && !project.contains('.') {
1151                actions.push(PlanAction {
1152                    action: ActionType::Mkdir {
1153                        path: scope_path.join(&project),
1154                    },
1155                    confidence: 0.8,
1156                    reason: format!("Create project directory: {project}"),
1157                });
1158            }
1159        }
1160
1161        let description = format!(
1162            "Organize {} files into {} project directories",
1163            files.len(),
1164            project_dirs.len()
1165        );
1166
1167        (actions, description)
1168    }
1169
1170    /// Plan organization by custom categories.
1171    ///
1172    /// Generates embeddings for category names and assigns files to the
1173    /// best matching category using cosine similarity.
1174    async fn plan_by_custom(
1175        &self,
1176        file_embeddings: &HashMap<PathBuf, Vec<f32>>,
1177        scope_path: &PathBuf,
1178        categories: &[String],
1179        embedder: Option<&Arc<dyn Embedder>>,
1180    ) -> (Vec<PlanAction>, String) {
1181        let mut actions = Vec::new();
1182
1183        // Create category directories
1184        for category in categories {
1185            actions.push(PlanAction {
1186                action: ActionType::Mkdir {
1187                    path: scope_path.join(category),
1188                },
1189                confidence: 1.0,
1190                reason: format!("Create custom category directory: {category}"),
1191            });
1192        }
1193
1194        // Try to generate embeddings for categories and assign files automatically
1195        if let Some(embedder) = embedder {
1196            let category_texts: Vec<&str> = categories.iter().map(String::as_str).collect();
1197            let config = EmbeddingConfig::default();
1198
1199            match embedder.embed_text(&category_texts, &config).await {
1200                Ok(category_embeddings) if category_embeddings.len() == categories.len() => {
1201                    // Minimum similarity threshold for assignment
1202                    const MIN_SIMILARITY: f32 = 0.3;
1203                    let mut assigned_count = 0;
1204
1205                    for (file_path, file_emb) in file_embeddings {
1206                        // Find best matching category
1207                        let best = category_embeddings
1208                            .iter()
1209                            .zip(categories.iter())
1210                            .map(|(emb, cat)| (cat, cosine_similarity(file_emb, &emb.embedding)))
1211                            .max_by(|a, b| {
1212                                a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal)
1213                            });
1214
1215                        if let Some((category, score)) = best
1216                            && score >= MIN_SIMILARITY
1217                            && let Some(file_name) = file_path.file_name()
1218                        {
1219                            let new_path = scope_path.join(category).join(file_name);
1220                            if new_path != *file_path {
1221                                actions.push(PlanAction {
1222                                    action: ActionType::Move {
1223                                        from: file_path.clone(),
1224                                        to: new_path,
1225                                    },
1226                                    confidence: score,
1227                                    reason: format!(
1228                                        "Move to category '{category}' (similarity: {score:.2})"
1229                                    ),
1230                                });
1231                                assigned_count += 1;
1232                            }
1233                        }
1234                    }
1235
1236                    let description = format!(
1237                        "Organize {} files into {} custom categories ({} files assigned)",
1238                        file_embeddings.len(),
1239                        categories.len(),
1240                        assigned_count
1241                    );
1242                    return (actions, description);
1243                }
1244                Ok(_) => {
1245                    warn!(
1246                        "Category embedding count mismatch, falling back to directory creation only"
1247                    );
1248                }
1249                Err(e) => {
1250                    warn!(
1251                        "Failed to embed categories: {}, falling back to directory creation only",
1252                        e
1253                    );
1254                }
1255            }
1256        }
1257
1258        // Fallback: just create directories without automatic assignment
1259        let description = format!(
1260            "Created {} custom category directories for {} files (manual assignment needed)",
1261            categories.len(),
1262            file_embeddings.len()
1263        );
1264
1265        (actions, description)
1266    }
1267
1268    /// List all pending plans.
1269    pub async fn list_pending_plans(&self) -> Vec<SemanticPlan> {
1270        self.pending_plans
1271            .read()
1272            .await
1273            .values()
1274            .filter(|p| p.status == PlanStatus::Pending)
1275            .cloned()
1276            .collect()
1277    }
1278
1279    /// Get a specific plan.
1280    pub async fn get_plan(&self, plan_id: Uuid) -> Option<SemanticPlan> {
1281        self.pending_plans.read().await.get(&plan_id).cloned()
1282    }
1283
1284    /// Execute a single action via `OpsManager`.
1285    async fn execute_action(&self, action: &ActionType) -> Result<ActionResult, String> {
1286        let ops = self
1287            .ops_manager
1288            .as_ref()
1289            .ok_or("OpsManager not configured - cannot execute plan actions")?;
1290
1291        let result = match action {
1292            ActionType::Move { from, to } => ops.move_file(from, to).await,
1293            ActionType::Mkdir { path } => ops.mkdir(path).await,
1294            ActionType::Delete { path } => ops.delete(path).await,
1295            ActionType::Symlink { target, link } => ops.symlink(target, link).await,
1296        };
1297
1298        Ok(ActionResult {
1299            success: result.success,
1300            undo_id: result.undo_id,
1301            error: if result.success {
1302                None
1303            } else {
1304                Some(result.error.unwrap_or_else(|| "Unknown error".to_string()))
1305            },
1306            executed_at: Utc::now(),
1307        })
1308    }
1309
1310    /// Approve and execute a plan.
1311    pub async fn approve_plan(&self, plan_id: Uuid) -> Result<SemanticPlan, String> {
1312        // Verify OpsManager is available before starting
1313        if self.ops_manager.is_none() {
1314            return Err("OpsManager not configured - cannot execute plan actions".to_string());
1315        }
1316
1317        let mut plans = self.pending_plans.write().await;
1318        let plan = plans
1319            .get_mut(&plan_id)
1320            .ok_or_else(|| "Plan not found".to_string())?;
1321
1322        if plan.status != PlanStatus::Pending {
1323            return Err(format!("Plan is not pending: {:?}", plan.status));
1324        }
1325
1326        info!(
1327            "Approving plan: {} with {} actions",
1328            plan_id,
1329            plan.actions.len()
1330        );
1331        plan.status = PlanStatus::Approved;
1332
1333        // Execute actions sequentially, stopping on first failure
1334        let total_actions = plan.actions.len();
1335        let mut completed_actions = 0;
1336
1337        // Clone actions to avoid holding lock during execution
1338        let actions_to_execute: Vec<ActionType> =
1339            plan.actions.iter().map(|a| a.action.clone()).collect();
1340
1341        // Release write lock during execution to avoid deadlock
1342        drop(plans);
1343
1344        for (idx, action) in actions_to_execute.iter().enumerate() {
1345            debug!(
1346                "Executing action {}/{}: {:?}",
1347                idx + 1,
1348                total_actions,
1349                action
1350            );
1351
1352            match self.execute_action(action).await {
1353                Ok(result) if result.success => {
1354                    completed_actions += 1;
1355                    debug!(
1356                        "Action {}/{} succeeded (undo_id: {:?})",
1357                        idx + 1,
1358                        total_actions,
1359                        result.undo_id
1360                    );
1361                }
1362                Ok(result) => {
1363                    // Action failed
1364                    let error_msg = result.error.unwrap_or_else(|| "Unknown error".to_string());
1365                    warn!("Action {}/{} failed: {}", idx + 1, total_actions, error_msg);
1366
1367                    // Update plan status to failed
1368                    let mut plans = self.pending_plans.write().await;
1369                    if let Some(plan) = plans.get_mut(&plan_id) {
1370                        plan.status = PlanStatus::Failed {
1371                            error: format!(
1372                                "Action {} of {} failed: {}",
1373                                idx + 1,
1374                                total_actions,
1375                                error_msg
1376                            ),
1377                        };
1378
1379                        let result = plan.clone();
1380                        if let Err(e) = self.save_plan(&result) {
1381                            warn!("Failed to persist failed plan {}: {e}", plan_id);
1382                        }
1383                        return Ok(result);
1384                    }
1385                    return Err("Plan disappeared during execution".to_string());
1386                }
1387                Err(e) => {
1388                    // Execution error (OpsManager issue)
1389                    warn!(
1390                        "Failed to execute action {}/{}: {}",
1391                        idx + 1,
1392                        total_actions,
1393                        e
1394                    );
1395
1396                    let mut plans = self.pending_plans.write().await;
1397                    if let Some(plan) = plans.get_mut(&plan_id) {
1398                        plan.status = PlanStatus::Failed { error: e.clone() };
1399
1400                        let result = plan.clone();
1401                        if let Err(e) = self.save_plan(&result) {
1402                            warn!("Failed to persist failed plan {}: {e}", plan_id);
1403                        }
1404                        return Ok(result);
1405                    }
1406                    return Err("Plan disappeared during execution".to_string());
1407                }
1408            }
1409        }
1410
1411        // All actions completed successfully
1412        let mut plans = self.pending_plans.write().await;
1413        if let Some(plan) = plans.get_mut(&plan_id) {
1414            plan.status = PlanStatus::Completed;
1415            info!(
1416                "Plan {} completed successfully: {} actions executed",
1417                plan_id, completed_actions
1418            );
1419
1420            let result = plan.clone();
1421            if let Err(e) = self.save_plan(&result) {
1422                warn!("Failed to persist completed plan {}: {e}", plan_id);
1423            }
1424            return Ok(result);
1425        }
1426
1427        Err("Plan disappeared during execution".to_string())
1428    }
1429
1430    /// Reject a plan.
1431    pub async fn reject_plan(&self, plan_id: Uuid) -> Result<SemanticPlan, String> {
1432        let mut plans = self.pending_plans.write().await;
1433        let plan = plans
1434            .get_mut(&plan_id)
1435            .ok_or_else(|| "Plan not found".to_string())?;
1436
1437        if plan.status != PlanStatus::Pending {
1438            return Err(format!("Plan is not pending: {:?}", plan.status));
1439        }
1440
1441        info!("Rejecting plan: {}", plan_id);
1442        plan.status = PlanStatus::Rejected;
1443
1444        let result = plan.clone();
1445
1446        // Persist the status change
1447        if let Err(e) = self.save_plan(&result) {
1448            warn!("Failed to persist rejected plan {}: {e}", plan_id);
1449        }
1450
1451        Ok(result)
1452    }
1453
1454    /// Get cleanup analysis as JSON bytes (for FUSE read).
1455    pub async fn get_cleanup_json(&self) -> Vec<u8> {
1456        if let Some(analysis) = self.get_cleanup_analysis().await {
1457            serde_json::to_string_pretty(&analysis)
1458                .unwrap_or_else(|_| "{}".to_string())
1459                .into_bytes()
1460        } else {
1461            // Return a message indicating analysis hasn't been run
1462            let msg = serde_json::json!({
1463                "message": "No cleanup analysis available. Run analyze_cleanup first.",
1464                "hint": "Write any content to .semantic/.cleanup to trigger analysis"
1465            });
1466            serde_json::to_string_pretty(&msg)
1467                .unwrap_or_default()
1468                .into_bytes()
1469        }
1470    }
1471
1472    /// Get duplicate groups as JSON bytes (for FUSE read).
1473    pub async fn get_dedupe_json(&self) -> Vec<u8> {
1474        if let Some(groups) = self.get_duplicate_groups().await {
1475            serde_json::to_string_pretty(&groups)
1476                .unwrap_or_else(|_| "{}".to_string())
1477                .into_bytes()
1478        } else {
1479            let msg = serde_json::json!({
1480                "message": "No duplicate analysis available. Run find_duplicates first.",
1481                "hint": "Write any content to .semantic/.dedupe to trigger analysis"
1482            });
1483            serde_json::to_string_pretty(&msg)
1484                .unwrap_or_default()
1485                .into_bytes()
1486        }
1487    }
1488
1489    /// Get similar files result as JSON bytes (for FUSE read).
1490    pub async fn get_similar_json(&self) -> Vec<u8> {
1491        if let Some(result) = self.get_last_similar_result().await {
1492            serde_json::to_string_pretty(&result)
1493                .unwrap_or_else(|_| "{}".to_string())
1494                .into_bytes()
1495        } else {
1496            let msg = serde_json::json!({
1497                "message": "No similar files search performed yet.",
1498                "hint": "Write a file path to .semantic/.similar to find similar files"
1499            });
1500            serde_json::to_string_pretty(&msg)
1501                .unwrap_or_default()
1502                .into_bytes()
1503        }
1504    }
1505
1506    /// Get pending plans directory listing.
1507    pub async fn get_pending_plan_ids(&self) -> Vec<String> {
1508        self.pending_plans
1509            .read()
1510            .await
1511            .iter()
1512            .filter(|(_, p)| p.status == PlanStatus::Pending)
1513            .map(|(id, _)| id.to_string())
1514            .collect()
1515    }
1516
1517    /// Get a plan as JSON bytes (for FUSE read).
1518    pub async fn get_plan_json(&self, plan_id: &str) -> Vec<u8> {
1519        if let Ok(uuid) = Uuid::parse_str(plan_id)
1520            && let Some(plan) = self.get_plan(uuid).await
1521        {
1522            return serde_json::to_string_pretty(&plan)
1523                .unwrap_or_else(|_| "{}".to_string())
1524                .into_bytes();
1525        }
1526        let msg = serde_json::json!({
1527            "error": "Plan not found",
1528            "plan_id": plan_id
1529        });
1530        serde_json::to_string_pretty(&msg)
1531            .unwrap_or_default()
1532            .into_bytes()
1533    }
1534}
1535
1536/// Truncate content for preview.
1537fn truncate_content(content: &str, max_len: usize) -> String {
1538    if content.len() <= max_len {
1539        content.to_string()
1540    } else {
1541        format!("{}...", &content[..max_len])
1542    }
1543}
1544
1545/// Calculate cosine similarity between two embeddings.
1546fn cosine_similarity(a: &[f32], b: &[f32]) -> f32 {
1547    if a.len() != b.len() {
1548        return 0.0;
1549    }
1550
1551    let dot: f32 = a.iter().zip(b.iter()).map(|(x, y)| x * y).sum();
1552    let norm_a: f32 = a.iter().map(|x| x * x).sum::<f32>().sqrt();
1553    let norm_b: f32 = b.iter().map(|x| x * x).sum::<f32>().sqrt();
1554
1555    if norm_a == 0.0 || norm_b == 0.0 {
1556        return 0.0;
1557    }
1558
1559    (dot / (norm_a * norm_b)).clamp(-1.0, 1.0)
1560}
1561
1562#[cfg(test)]
1563mod tests {
1564    use super::*;
1565
1566    #[test]
1567    fn test_organize_request_serialization() {
1568        let request = OrganizeRequest {
1569            scope: PathBuf::from("docs/"),
1570            strategy: OrganizeStrategy::ByTopic,
1571            max_groups: 5,
1572            similarity_threshold: 0.8,
1573        };
1574
1575        let json = serde_json::to_string(&request).unwrap();
1576        let parsed: OrganizeRequest = serde_json::from_str(&json).unwrap();
1577
1578        assert_eq!(parsed.scope, request.scope);
1579        assert_eq!(parsed.max_groups, 5);
1580    }
1581
1582    #[test]
1583    fn test_organize_request_defaults() {
1584        let json = r#"{"scope":"src/","strategy":"by_topic"}"#;
1585        let request: OrganizeRequest = serde_json::from_str(json).unwrap();
1586
1587        assert_eq!(request.max_groups, 10);
1588        assert!((request.similarity_threshold - 0.7).abs() < f32::EPSILON);
1589    }
1590
1591    #[test]
1592    fn test_plan_status_serialization() {
1593        let status = PlanStatus::Failed {
1594            error: "test error".to_string(),
1595        };
1596        let json = serde_json::to_string(&status).unwrap();
1597        assert!(json.contains("failed"));
1598        assert!(json.contains("test error"));
1599    }
1600
1601    #[test]
1602    fn test_cleanup_reason_variants() {
1603        let duplicate = CleanupReason::Duplicate {
1604            similar_to: PathBuf::from("/original.txt"),
1605            similarity: 0.98,
1606        };
1607        let json = serde_json::to_string(&duplicate).unwrap();
1608        assert!(json.contains("duplicate"));
1609
1610        let stale = CleanupReason::Stale {
1611            last_accessed: Utc::now(),
1612        };
1613        let json = serde_json::to_string(&stale).unwrap();
1614        assert!(json.contains("stale"));
1615    }
1616
1617    #[test]
1618    fn test_semantic_config_default() {
1619        let config = SemanticConfig::default();
1620        assert!((config.duplicate_threshold - 0.95).abs() < f32::EPSILON);
1621        assert_eq!(config.similar_limit, 10);
1622        assert_eq!(config.plan_retention_hours, 24);
1623    }
1624
1625    #[test]
1626    fn test_truncate_content() {
1627        assert_eq!(truncate_content("short", 100), "short");
1628        assert_eq!(truncate_content("hello world", 5), "hello...");
1629    }
1630
1631    #[test]
1632    fn test_action_type_serialization() {
1633        let action = ActionType::Move {
1634            from: PathBuf::from("/old/path.txt"),
1635            to: PathBuf::from("/new/path.txt"),
1636        };
1637        let json = serde_json::to_string(&action).unwrap();
1638        assert!(json.contains("move"));
1639        assert!(json.contains("/old/path.txt"));
1640    }
1641
1642    #[test]
1643    fn test_similar_file_serialization() {
1644        let similar = SimilarFile {
1645            path: PathBuf::from("/doc.txt"),
1646            similarity: 0.85,
1647            preview: Some("This is a preview...".to_string()),
1648        };
1649        let json = serde_json::to_string(&similar).unwrap();
1650        assert!(json.contains("0.85"));
1651        assert!(json.contains("preview"));
1652    }
1653
1654    #[tokio::test]
1655    async fn test_semantic_manager_without_store() {
1656        let manager = SemanticManager::new(PathBuf::from("/tmp"), None, None, None);
1657        assert!(!manager.is_available());
1658    }
1659
1660    #[tokio::test]
1661    async fn test_pending_plans_empty() {
1662        let manager = SemanticManager::new(PathBuf::from("/tmp"), None, None, None);
1663        let plans = manager.list_pending_plans().await;
1664        assert!(plans.is_empty());
1665    }
1666
1667    #[tokio::test]
1668    async fn test_get_plan_not_found() {
1669        let manager = SemanticManager::new(PathBuf::from("/tmp"), None, None, None);
1670        let plan = manager.get_plan(Uuid::new_v4()).await;
1671        assert!(plan.is_none());
1672    }
1673
1674    #[tokio::test]
1675    async fn test_get_cleanup_json_empty() {
1676        let manager = SemanticManager::new(PathBuf::from("/tmp"), None, None, None);
1677        let json = manager.get_cleanup_json().await;
1678        let json_str = String::from_utf8(json).unwrap();
1679        assert!(json_str.contains("No cleanup analysis"));
1680    }
1681
1682    #[tokio::test]
1683    async fn test_get_dedupe_json_empty() {
1684        let manager = SemanticManager::new(PathBuf::from("/tmp"), None, None, None);
1685        let json = manager.get_dedupe_json().await;
1686        let json_str = String::from_utf8(json).unwrap();
1687        assert!(json_str.contains("No duplicate analysis"));
1688    }
1689
1690    #[tokio::test]
1691    async fn test_get_similar_json_empty() {
1692        let manager = SemanticManager::new(PathBuf::from("/tmp"), None, None, None);
1693        let json = manager.get_similar_json().await;
1694        let json_str = String::from_utf8(json).unwrap();
1695        assert!(json_str.contains("No similar files search"));
1696    }
1697
1698    #[tokio::test]
1699    async fn test_plan_by_custom_without_embedder() {
1700        // Test the fallback behavior when embedder is None
1701        let manager = SemanticManager::new(PathBuf::from("/tmp/test"), None, None, None);
1702        let mut file_embeddings = HashMap::new();
1703        file_embeddings.insert(PathBuf::from("/tmp/test/doc1.txt"), vec![0.1, 0.2, 0.3]);
1704        file_embeddings.insert(PathBuf::from("/tmp/test/doc2.txt"), vec![0.4, 0.5, 0.6]);
1705
1706        let scope_path = PathBuf::from("/tmp/test");
1707        let categories = vec!["code".to_string(), "docs".to_string()];
1708
1709        let (actions, description) = manager
1710            .plan_by_custom(&file_embeddings, &scope_path, &categories, None)
1711            .await;
1712
1713        // Should create 2 category directories but no file moves (no embedder)
1714        let mkdir_count = actions
1715            .iter()
1716            .filter(|a| matches!(a.action, ActionType::Mkdir { .. }))
1717            .count();
1718        let move_count = actions
1719            .iter()
1720            .filter(|a| matches!(a.action, ActionType::Move { .. }))
1721            .count();
1722
1723        assert_eq!(mkdir_count, 2, "Should create 2 category directories");
1724        assert_eq!(move_count, 0, "Should not move files without embedder");
1725        assert!(
1726            description.contains("manual assignment needed"),
1727            "Description should indicate manual assignment needed"
1728        );
1729    }
1730
1731    #[test]
1732    fn test_custom_categories_serialization() {
1733        let request = OrganizeRequest {
1734            scope: PathBuf::from("src/"),
1735            strategy: OrganizeStrategy::Custom {
1736                categories: vec!["code".to_string(), "docs".to_string(), "tests".to_string()],
1737            },
1738            max_groups: 10,
1739            similarity_threshold: 0.7,
1740        };
1741
1742        let json = serde_json::to_string(&request).unwrap();
1743        assert!(json.contains("custom"));
1744        assert!(json.contains("code"));
1745        assert!(json.contains("docs"));
1746        assert!(json.contains("tests"));
1747
1748        let parsed: OrganizeRequest = serde_json::from_str(&json).unwrap();
1749        if let OrganizeStrategy::Custom { categories } = parsed.strategy {
1750            assert_eq!(categories.len(), 3);
1751        } else {
1752            panic!("Expected Custom strategy");
1753        }
1754    }
1755}