file and distributes its functionality to individual modules. The calendar and task modules now have their own route configuration and API handlers. Remove centralized API router in favor of module-based routing Decentralizes API route configuration by moving route definitions and handlers to their respective modules. Each module now exports its own `configure_*_routes()` function that is merged in main.rs. - Delete api_router.rs with its mon
216 lines
6.4 KiB
Rust
216 lines
6.4 KiB
Rust
pub mod document_processor;
|
|
pub mod embedding_generator;
|
|
pub mod kb_indexer;
|
|
pub mod web_crawler;
|
|
pub mod website_crawler_service;
|
|
|
|
pub use document_processor::{DocumentFormat, DocumentProcessor, TextChunk};
|
|
pub use embedding_generator::{
|
|
EmailEmbeddingGenerator, EmbeddingConfig, EmbeddingGenerator, KbEmbeddingGenerator,
|
|
};
|
|
pub use kb_indexer::{KbFolderMonitor, KbIndexer, QdrantConfig, SearchResult};
|
|
pub use web_crawler::{WebCrawler, WebPage, WebsiteCrawlConfig};
|
|
pub use website_crawler_service::{ensure_crawler_service_running, WebsiteCrawlerService};
|
|
|
|
use anyhow::Result;
|
|
use log::{error, info, warn};
|
|
use std::path::Path;
|
|
use std::sync::Arc;
|
|
use tokio::sync::RwLock;
|
|
|
|
/// Main Knowledge Base manager
|
|
#[derive(Debug)]
|
|
pub struct KnowledgeBaseManager {
|
|
indexer: Arc<KbIndexer>,
|
|
processor: Arc<DocumentProcessor>,
|
|
monitor: Arc<RwLock<KbFolderMonitor>>,
|
|
}
|
|
|
|
impl KnowledgeBaseManager {
|
|
/// Create new KB manager with default configuration
|
|
pub fn new(work_root: impl Into<std::path::PathBuf>) -> Self {
|
|
let work_root = work_root.into();
|
|
let embedding_config = EmbeddingConfig::from_env();
|
|
let qdrant_config = QdrantConfig::default();
|
|
|
|
let indexer = Arc::new(KbIndexer::new(embedding_config.clone(), qdrant_config));
|
|
let processor = Arc::new(DocumentProcessor::default());
|
|
let monitor = Arc::new(RwLock::new(KbFolderMonitor::new(
|
|
work_root,
|
|
embedding_config,
|
|
)));
|
|
|
|
Self {
|
|
indexer,
|
|
processor,
|
|
monitor,
|
|
}
|
|
}
|
|
|
|
/// Process and index a knowledge base folder
|
|
pub async fn index_kb_folder(
|
|
&self,
|
|
bot_name: &str,
|
|
kb_name: &str,
|
|
kb_path: &Path,
|
|
) -> Result<()> {
|
|
info!(
|
|
"Indexing knowledge base: {} for bot {} from path: {:?}",
|
|
kb_name, bot_name, kb_path
|
|
);
|
|
|
|
// Index the folder using the indexer
|
|
let result = self
|
|
.indexer
|
|
.index_kb_folder(bot_name, kb_name, kb_path)
|
|
.await?;
|
|
|
|
info!(
|
|
"Successfully indexed {} documents with {} chunks into collection {}",
|
|
result.documents_processed, result.chunks_indexed, result.collection_name
|
|
);
|
|
|
|
Ok(())
|
|
}
|
|
|
|
/// Search in a knowledge base
|
|
pub async fn search(
|
|
&self,
|
|
bot_name: &str,
|
|
kb_name: &str,
|
|
query: &str,
|
|
limit: usize,
|
|
) -> Result<Vec<SearchResult>> {
|
|
let collection_name = format!("{}_{}", bot_name, kb_name);
|
|
self.indexer.search(&collection_name, query, limit).await
|
|
}
|
|
|
|
/// Process a single document
|
|
pub async fn process_document(&self, file_path: &Path) -> Result<Vec<TextChunk>> {
|
|
self.processor.process_document(file_path).await
|
|
}
|
|
|
|
/// Handle .gbkb folder change notification from drive monitor
|
|
pub async fn handle_gbkb_change(&self, bot_name: &str, kb_folder: &Path) -> Result<()> {
|
|
info!(
|
|
"Handling .gbkb folder change for bot {} at {:?}",
|
|
bot_name, kb_folder
|
|
);
|
|
|
|
let monitor = self.monitor.read().await;
|
|
monitor.process_gbkb_folder(bot_name, kb_folder).await
|
|
}
|
|
|
|
/// Clear a knowledge base collection
|
|
pub async fn clear_kb(&self, bot_name: &str, kb_name: &str) -> Result<()> {
|
|
let collection_name = format!("{}_{}", bot_name, kb_name);
|
|
|
|
warn!("Clearing knowledge base collection: {}", collection_name);
|
|
|
|
match self.indexer.delete_collection(&collection_name).await {
|
|
Ok(_) => {
|
|
info!("Successfully cleared collection: {}", collection_name);
|
|
Ok(())
|
|
}
|
|
Err(e) => {
|
|
error!("Failed to clear collection {}: {}", collection_name, e);
|
|
Err(e)
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Get collection statistics
|
|
pub async fn get_kb_stats(&self, bot_name: &str, kb_name: &str) -> Result<KbStatistics> {
|
|
let collection_name = format!("{}_{}", bot_name, kb_name);
|
|
|
|
// This would query Qdrant for collection statistics
|
|
// For now, return placeholder stats
|
|
Ok(KbStatistics {
|
|
collection_name,
|
|
document_count: 0,
|
|
chunk_count: 0,
|
|
total_size_bytes: 0,
|
|
})
|
|
}
|
|
}
|
|
|
|
/// Statistics for a knowledge base
|
|
#[derive(Debug, Clone)]
|
|
pub struct KbStatistics {
|
|
pub collection_name: String,
|
|
pub document_count: usize,
|
|
pub chunk_count: usize,
|
|
pub total_size_bytes: usize,
|
|
}
|
|
|
|
/// Integration with drive monitor
|
|
#[derive(Debug)]
|
|
pub struct DriveMonitorIntegration {
|
|
kb_manager: Arc<KnowledgeBaseManager>,
|
|
}
|
|
|
|
impl DriveMonitorIntegration {
|
|
pub fn new(kb_manager: Arc<KnowledgeBaseManager>) -> Self {
|
|
Self { kb_manager }
|
|
}
|
|
|
|
/// Called when drive monitor detects changes in .gbkb folder
|
|
pub async fn on_gbkb_folder_changed(
|
|
&self,
|
|
bot_name: &str,
|
|
folder_path: &Path,
|
|
change_type: ChangeType,
|
|
) -> Result<()> {
|
|
match change_type {
|
|
ChangeType::Created | ChangeType::Modified => {
|
|
info!(
|
|
"Drive monitor detected {:?} in .gbkb folder: {:?}",
|
|
change_type, folder_path
|
|
);
|
|
self.kb_manager
|
|
.handle_gbkb_change(bot_name, folder_path)
|
|
.await
|
|
}
|
|
ChangeType::Deleted => {
|
|
// Extract KB name from path
|
|
if let Some(kb_name) = folder_path.file_name().and_then(|n| n.to_str()) {
|
|
self.kb_manager.clear_kb(bot_name, kb_name).await
|
|
} else {
|
|
Err(anyhow::anyhow!("Invalid KB folder path"))
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Types of changes detected by drive monitor
|
|
#[derive(Debug, Clone, Copy)]
|
|
pub enum ChangeType {
|
|
Created,
|
|
Modified,
|
|
Deleted,
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
use tempfile::TempDir;
|
|
|
|
#[tokio::test]
|
|
async fn test_kb_manager_creation() {
|
|
let temp_dir = TempDir::new().unwrap();
|
|
let manager = KnowledgeBaseManager::new(temp_dir.path());
|
|
|
|
// Test that manager is created successfully
|
|
assert!(manager.processor.chunk_size() == 1000);
|
|
assert!(manager.processor.chunk_overlap() == 200);
|
|
}
|
|
|
|
#[test]
|
|
fn test_collection_naming() {
|
|
let bot_name = "testbot";
|
|
let kb_name = "docs";
|
|
let collection_name = format!("{}_{}", bot_name, kb_name);
|
|
assert_eq!(collection_name, "testbot_docs");
|
|
}
|
|
}
|