pub mod document_processor; pub mod embedding_generator; pub mod kb_indexer; pub mod web_crawler; pub mod website_crawler_service; pub use document_processor::{DocumentFormat, DocumentProcessor, TextChunk}; pub use embedding_generator::{ EmailEmbeddingGenerator, EmbeddingConfig, EmbeddingGenerator, KbEmbeddingGenerator, }; pub use kb_indexer::{KbFolderMonitor, KbIndexer, QdrantConfig, SearchResult}; pub use web_crawler::{WebCrawler, WebPage, WebsiteCrawlConfig}; pub use website_crawler_service::{ensure_crawler_service_running, WebsiteCrawlerService}; use anyhow::Result; use log::{error, info, warn}; use std::path::Path; use std::sync::Arc; use tokio::sync::RwLock; /// Main Knowledge Base manager #[derive(Debug)] pub struct KnowledgeBaseManager { indexer: Arc, processor: Arc, monitor: Arc>, } impl KnowledgeBaseManager { /// Create new KB manager with default configuration pub fn new(work_root: impl Into) -> Self { let work_root = work_root.into(); let embedding_config = EmbeddingConfig::from_env(); let qdrant_config = QdrantConfig::default(); let indexer = Arc::new(KbIndexer::new(embedding_config.clone(), qdrant_config)); let processor = Arc::new(DocumentProcessor::default()); let monitor = Arc::new(RwLock::new(KbFolderMonitor::new( work_root, embedding_config, ))); Self { indexer, processor, monitor, } } /// Process and index a knowledge base folder pub async fn index_kb_folder( &self, bot_name: &str, kb_name: &str, kb_path: &Path, ) -> Result<()> { info!( "Indexing knowledge base: {} for bot {} from path: {:?}", kb_name, bot_name, kb_path ); // Index the folder using the indexer let result = self .indexer .index_kb_folder(bot_name, kb_name, kb_path) .await?; info!( "Successfully indexed {} documents with {} chunks into collection {}", result.documents_processed, result.chunks_indexed, result.collection_name ); Ok(()) } /// Search in a knowledge base pub async fn search( &self, bot_name: &str, kb_name: &str, query: &str, limit: usize, ) -> Result> { let collection_name = format!("{}_{}", bot_name, kb_name); self.indexer.search(&collection_name, query, limit).await } /// Process a single document pub async fn process_document(&self, file_path: &Path) -> Result> { self.processor.process_document(file_path).await } /// Handle .gbkb folder change notification from drive monitor pub async fn handle_gbkb_change(&self, bot_name: &str, kb_folder: &Path) -> Result<()> { info!( "Handling .gbkb folder change for bot {} at {:?}", bot_name, kb_folder ); let monitor = self.monitor.read().await; monitor.process_gbkb_folder(bot_name, kb_folder).await } /// Clear a knowledge base collection pub async fn clear_kb(&self, bot_name: &str, kb_name: &str) -> Result<()> { let collection_name = format!("{}_{}", bot_name, kb_name); warn!("Clearing knowledge base collection: {}", collection_name); match self.indexer.delete_collection(&collection_name).await { Ok(_) => { info!("Successfully cleared collection: {}", collection_name); Ok(()) } Err(e) => { error!("Failed to clear collection {}: {}", collection_name, e); Err(e) } } } /// Get collection statistics pub async fn get_kb_stats(&self, bot_name: &str, kb_name: &str) -> Result { let collection_name = format!("{}_{}", bot_name, kb_name); // This would query Qdrant for collection statistics // For now, return placeholder stats Ok(KbStatistics { collection_name, document_count: 0, chunk_count: 0, total_size_bytes: 0, }) } } /// Statistics for a knowledge base #[derive(Debug, Clone)] pub struct KbStatistics { pub collection_name: String, pub document_count: usize, pub chunk_count: usize, pub total_size_bytes: usize, } /// Integration with drive monitor pub struct DriveMonitorIntegration { kb_manager: Arc, } impl DriveMonitorIntegration { pub fn new(kb_manager: Arc) -> Self { Self { kb_manager } } /// Called when drive monitor detects changes in .gbkb folder pub async fn on_gbkb_folder_changed( &self, bot_name: &str, folder_path: &Path, change_type: ChangeType, ) -> Result<()> { match change_type { ChangeType::Created | ChangeType::Modified => { info!( "Drive monitor detected {:?} in .gbkb folder: {:?}", change_type, folder_path ); self.kb_manager .handle_gbkb_change(bot_name, folder_path) .await } ChangeType::Deleted => { // Extract KB name from path if let Some(kb_name) = folder_path.file_name().and_then(|n| n.to_str()) { self.kb_manager.clear_kb(bot_name, kb_name).await } else { Err(anyhow::anyhow!("Invalid KB folder path")) } } } } } /// Types of changes detected by drive monitor #[derive(Debug, Clone, Copy)] pub enum ChangeType { Created, Modified, Deleted, } #[cfg(test)] mod tests { use super::*; use tempfile::TempDir; #[tokio::test] async fn test_kb_manager_creation() { let temp_dir = TempDir::new().unwrap(); let manager = KnowledgeBaseManager::new(temp_dir.path()); // Test that manager is created successfully assert!(manager.processor.chunk_size() == 1000); assert!(manager.processor.chunk_overlap() == 200); } #[test] fn test_collection_naming() { let bot_name = "testbot"; let kb_name = "docs"; let collection_name = format!("{}_{}", bot_name, kb_name); assert_eq!(collection_name, "testbot_docs"); } }