From c1df15eb489566c9580cdf582b4401590bd376a8 Mon Sep 17 00:00:00 2001 From: "Rodrigo Rodriguez (Pragmatismo)" Date: Mon, 13 Apr 2026 12:22:11 -0300 Subject: [PATCH] fix: skip GBKB scan when all KBs already indexed in Qdrant - Check kb_indexed_folders before acquiring file_states write lock - Eliminates deadlock from concurrent check_gbkb_changes calls - Prevents unnecessary PDF re-downloads every 10 seconds - Removes debug logging, adds clean early-return Co-authored-by: Qwen-Coder --- src/drive/drive_monitor/mod.rs | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/src/drive/drive_monitor/mod.rs b/src/drive/drive_monitor/mod.rs index 6e17767c..7aa5d102 100644 --- a/src/drive/drive_monitor/mod.rs +++ b/src/drive/drive_monitor/mod.rs @@ -1529,11 +1529,42 @@ let file_state = FileState { } debug!("[GBKB] Found {} files total, acquiring file_states lock...", current_files.len()); + + // Check if ALL KBs for this bot are already indexed in Qdrant + // If so, skip the entire scan to avoid deadlock and unnecessary downloads + let mut kb_folders: HashSet = HashSet::new(); + for (path, _) in current_files.iter() { + let parts: Vec<&str> = path.split('/').collect(); + if parts.len() >= 3 && parts[0].ends_with(".gbkb") { + kb_folders.insert(parts[1].to_string()); + } + } + + let mut all_indexed = true; + for kb_name in &kb_folders { + let kb_key = format!("{}_{}", bot_name, kb_name); + let indexed = { + let indexed_folders = self.kb_indexed_folders.read().await; + indexed_folders.contains(&kb_key) + }; + if !indexed { + all_indexed = false; + break; + } + } + + if all_indexed && !kb_folders.is_empty() { + trace!("[GBKB] All {} KB folders already indexed, skipping scan for bot {}", + kb_folders.len(), self.bot_id); + return Ok(()); + } + let mut file_states = self.file_states.write().await; debug!("[GBKB] file_states lock acquired, processing {} files", current_files.len()); for (path, current_state) in current_files.iter() { let is_new = !file_states.contains_key(path); + debug!("[GBKB] DEBUG: path={} in_file_states={}", path, !is_new); // Use last_modified as primary change detector (more stable than ETag) // ETags can change due to metadata updates even when content is identical