acme_disk_use/
cache.rs

1//! Cache management module for storing and retrieving disk usage statistics
2
3use serde::{Deserialize, Serialize};
4use std::{
5    collections::HashMap,
6    fs, io,
7    path::{Path, PathBuf},
8};
9
10use crate::error::DiskUseError;
11use crate::scanner::DirStat;
12
13/// Cache structure for storing multiple directory scan results
14#[derive(Serialize, Deserialize, Debug, Default)]
15pub(crate) struct Cache {
16    pub(crate) roots: HashMap<PathBuf, DirStat>,
17    pub(crate) version: u32,
18}
19
20/// Public interface for cache operations with lazy writing
21pub struct CacheManager {
22    cache: Cache,
23    cache_path: PathBuf,
24    dirty: bool, // Track if cache needs to be saved
25}
26
27impl CacheManager {
28    /// Create a new cache manager with specified path
29    pub fn new(cache_path: impl AsRef<Path>) -> Self {
30        let cache_path = cache_path.as_ref().to_path_buf();
31        let cache = Self::load_from_file(&cache_path);
32
33        Self {
34            cache,
35            cache_path,
36            dirty: false,
37        }
38    }
39
40    /// Load cache from file using binary format
41    fn load_from_file(cache_path: &Path) -> Cache {
42        match fs::read(cache_path) {
43            Ok(bytes) => match bincode::deserialize::<Cache>(&bytes) {
44                Ok(cache) => cache,
45                Err(_) => {
46                    eprintln!(
47                        "Warning: Cache file '{}' is corrupted, starting with empty cache",
48                        cache_path.display()
49                    );
50                    Cache::default()
51                }
52            },
53            Err(err) => {
54                // Only log if it's not a "not found" error (expected on first run)
55                if err.kind() != io::ErrorKind::NotFound {
56                    let disk_err = DiskUseError::CacheReadError {
57                        path: cache_path.to_path_buf(),
58                        source: err,
59                    };
60                    eprintln!("Warning: {}", disk_err);
61                }
62                Cache::default()
63            }
64        }
65    }
66
67    /// Save cache to file using binary format
68    pub fn save(&mut self) -> io::Result<()> {
69        if !self.dirty {
70            return Ok(()); // Skip if nothing changed
71        }
72
73        // Ensure parent directory exists
74        if let Some(parent) = self.cache_path.parent() {
75            fs::create_dir_all(parent).map_err(|err| {
76                io::Error::from(DiskUseError::CacheWriteError {
77                    path: parent.to_path_buf(),
78                    source: err,
79                })
80            })?;
81        }
82
83        // Serialize to binary format (much faster than JSON)
84        let bytes = bincode::serialize(&self.cache).map_err(|e| {
85            io::Error::from(DiskUseError::CacheSerializationError {
86                path: self.cache_path.clone(),
87                message: e.to_string(),
88            })
89        })?;
90
91        fs::write(&self.cache_path, bytes).map_err(|err| {
92            io::Error::from(DiskUseError::CacheWriteError {
93                path: self.cache_path.clone(),
94                source: err,
95            })
96        })?;
97
98        self.dirty = false;
99        Ok(())
100    }
101
102    /// Get a cached directory stat by path
103    pub fn get(&self, path: &Path) -> Option<&DirStat> {
104        // Normalize path for lookup
105        let lookup_path = path.canonicalize().unwrap_or_else(|_| path.to_path_buf());
106
107        // 1. Try direct lookup in roots
108        if let Some(stat) = self.cache.roots.get(&lookup_path) {
109            return Some(stat);
110        }
111
112        // 2. Search inside roots
113        // Find the root that is a parent of lookup_path with the longest path
114        let mut best_root: Option<&DirStat> = None;
115
116        for root_stat in self.cache.roots.values() {
117            if lookup_path.starts_with(&root_stat.path) {
118                match best_root {
119                    None => best_root = Some(root_stat),
120                    Some(current_best) => {
121                        // Pick the more specific root (longer path)
122                        if root_stat.path.components().count()
123                            > current_best.path.components().count()
124                        {
125                            best_root = Some(root_stat);
126                        }
127                    }
128                }
129            }
130        }
131
132        // If we found a containing root, traverse down to find the target
133        if let Some(mut current) = best_root {
134            // We know lookup_path starts with current.path
135            if let Ok(relative) = lookup_path.strip_prefix(&current.path) {
136                let mut path_so_far = current.path.clone();
137
138                for component in relative.components() {
139                    path_so_far.push(component);
140
141                    // Try to find the next child
142                    if let Some(child) = current.children.get(&path_so_far) {
143                        current = child;
144                    } else {
145                        // Path diverges from cache
146                        return None;
147                    }
148                }
149
150                // If we consumed all components, we found it
151                return Some(current);
152            }
153        }
154
155        None
156    }
157
158    /// Insert or update a directory stat in the cache
159    /// Path is automatically canonicalized to ensure consistent lookups
160    #[allow(dead_code)]
161    pub fn insert(&mut self, path: PathBuf, stats: DirStat) {
162        // Canonicalize the path before storing to ensure consistent lookups
163        let canonical_path = path.canonicalize().unwrap_or(path);
164        self.cache.roots.insert(canonical_path, stats);
165        self.dirty = true;
166    }
167
168    /// Update an existing entry with new stats
169    /// This is just a convenience wrapper around insert
170    pub fn update(&mut self, path: &Path, new_stats: DirStat) {
171        self.insert(path.to_path_buf(), new_stats);
172    }
173
174    /// Clear all cache contents
175    pub fn clear(&mut self) -> io::Result<()> {
176        self.cache = Cache::default();
177        self.dirty = true;
178        self.save()
179    }
180
181    /// Delete the cache file
182    pub fn delete(&self) -> io::Result<()> {
183        if self.cache_path.exists() {
184            fs::remove_file(&self.cache_path)
185        } else {
186            Ok(())
187        }
188    }
189
190    /// Get the cache file path
191    pub fn path(&self) -> &Path {
192        &self.cache_path
193    }
194
195    /// Get all cached root entries
196    pub fn get_roots(&self) -> Vec<&DirStat> {
197        self.cache.roots.values().collect()
198    }
199
200    /// Check if cache is empty (no roots)
201    pub fn is_empty(&self) -> bool {
202        self.cache.roots.is_empty()
203    }
204}
205
206// Implement Drop to auto-save on destruction
207impl Drop for CacheManager {
208    fn drop(&mut self) {
209        if self.dirty {
210            // Try to save, but don't panic if it fails
211            let _ = self.save();
212        }
213    }
214}
215
216#[cfg(test)]
217mod tests {
218    use super::*;
219    use std::time::SystemTime;
220    use tempfile::TempDir;
221
222    #[test]
223    fn test_cache_manager_basic_operations() -> io::Result<()> {
224        // This test verifies the basic operations of the CacheManager:
225        // 1. Inserting a new entry into the cache.
226        // 2. Retrieving an entry from the cache.
227        // 3. Saving the cache to disk.
228        // 4. Loading the cache from disk and verifying the data persists.
229        let temp_dir = TempDir::new()?;
230        let cache_file = temp_dir.path().join("test_cache.json");
231
232        let mut cache_mgr = CacheManager::new(&cache_file);
233
234        // Test insert
235        let test_stat = DirStat {
236            path: PathBuf::from("/test/path"),
237            total_size: 1000,
238            file_count: 10,
239            last_scan: SystemTime::now(),
240            children: HashMap::new(),
241        };
242
243        cache_mgr.insert(PathBuf::from("/test/path"), test_stat.clone());
244
245        // Test get
246        let retrieved = cache_mgr.get(Path::new("/test/path"));
247        assert!(retrieved.is_some());
248        assert_eq!(retrieved.unwrap().total_size, 1000);
249        assert_eq!(retrieved.unwrap().file_count, 10);
250
251        // Test save
252        cache_mgr.save()?;
253        assert!(cache_file.exists());
254
255        // Test loading from file
256        let cache_mgr2 = CacheManager::new(&cache_file);
257        let retrieved2 = cache_mgr2.get(Path::new("/test/path"));
258        assert!(retrieved2.is_some());
259        assert_eq!(retrieved2.unwrap().total_size, 1000);
260
261        Ok(())
262    }
263
264    #[test]
265    fn test_cache_clear_and_delete() -> io::Result<()> {
266        // This test verifies the cache cleanup operations:
267        // 1. `clear()`: Should remove all entries from the in-memory cache.
268        // 2. `delete()`: Should remove the cache file from the disk.
269        let temp_dir = TempDir::new()?;
270        let cache_file = temp_dir.path().join("test_cache.json");
271
272        let mut cache_mgr = CacheManager::new(&cache_file);
273
274        let test_stat = DirStat {
275            path: PathBuf::from("/test"),
276            total_size: 500,
277            file_count: 5,
278            last_scan: SystemTime::now(),
279            children: HashMap::new(),
280        };
281
282        cache_mgr.insert(PathBuf::from("/test"), test_stat);
283        cache_mgr.save()?;
284
285        // Test clear
286        cache_mgr.clear()?;
287        assert!(cache_mgr.get(Path::new("/test")).is_none());
288
289        // Test delete
290        cache_mgr.delete()?;
291        assert!(!cache_file.exists());
292
293        Ok(())
294    }
295
296    #[test]
297    fn test_get_nested_path() -> io::Result<()> {
298        // This test verifies the nested path retrieval logic.
299        // It creates a cache with a root directory that contains nested children.
300        // It then attempts to retrieve stats for the children directly using `get()`.
301        // This ensures that `get()` can traverse the cached tree structure to find
302        // subdirectories even if they are not top-level roots.
303        let temp_dir = TempDir::new()?;
304        let cache_file = temp_dir.path().join("test_cache.json");
305        let mut cache_mgr = CacheManager::new(&cache_file);
306
307        // Create a nested structure
308        // /root
309        //   /root/child
310        //     /root/child/grandchild
311
312        let grandchild_path = PathBuf::from("/root/child/grandchild");
313        let grandchild_stat = DirStat {
314            path: grandchild_path.clone(),
315            total_size: 10,
316            file_count: 1,
317            last_scan: SystemTime::now(),
318            children: HashMap::new(),
319        };
320
321        let child_path = PathBuf::from("/root/child");
322        let mut child_stat = DirStat {
323            path: child_path.clone(),
324            total_size: 20,
325            file_count: 2,
326            last_scan: SystemTime::now(),
327            children: HashMap::new(),
328        };
329        child_stat
330            .children
331            .insert(grandchild_path.clone(), grandchild_stat);
332
333        let root_path = PathBuf::from("/root");
334        let mut root_stat = DirStat {
335            path: root_path.clone(),
336            total_size: 30,
337            file_count: 3,
338            last_scan: SystemTime::now(),
339            children: HashMap::new(),
340        };
341        root_stat.children.insert(child_path.clone(), child_stat);
342
343        cache_mgr.insert(root_path, root_stat);
344
345        // Test retrieving nested paths
346        let retrieved_child = cache_mgr.get(Path::new("/root/child"));
347        assert!(retrieved_child.is_some());
348        assert_eq!(retrieved_child.unwrap().total_size, 20);
349
350        let retrieved_grandchild = cache_mgr.get(Path::new("/root/child/grandchild"));
351        assert!(retrieved_grandchild.is_some());
352        assert_eq!(retrieved_grandchild.unwrap().total_size, 10);
353
354        // Test non-existent path
355        assert!(cache_mgr.get(Path::new("/root/nonexistent")).is_none());
356        assert!(cache_mgr
357            .get(Path::new("/root/child/nonexistent"))
358            .is_none());
359
360        Ok(())
361    }
362
363    #[test]
364    fn test_cache_write_to_readonly_location() {
365        // Test that writing cache to a readonly location fails gracefully
366        // Note: This test may not work on all platforms or may require special setup
367        let cache_file = PathBuf::from("/dev/null/cannot_write_here");
368        let mut cache_mgr = CacheManager::new(&cache_file);
369
370        let test_stat = DirStat {
371            path: PathBuf::from("/test"),
372            total_size: 100,
373            file_count: 1,
374            last_scan: SystemTime::now(),
375            children: HashMap::new(),
376        };
377
378        cache_mgr.insert(PathBuf::from("/test"), test_stat);
379
380        // Try to save - should fail gracefully
381        let result = cache_mgr.save();
382        assert!(result.is_err());
383        let err = result.unwrap_err();
384        assert!(
385            err.to_string().contains("Failed to write cache file")
386                || err.to_string().contains("Failed to serialize"),
387            "Error message should be descriptive: {}",
388            err
389        );
390    }
391}