use anyhow::Result; use chrono::{DateTime, Utc}; use serde::{Deserialize, Serialize}; use std::path::PathBuf; use std::sync::Arc; use tokio::fs; use uuid::Uuid; #[cfg(feature = "vectordb")] use qdrant_client::{ prelude::*, qdrant::{vectors_config::Config, CreateCollection, Distance, VectorParams, VectorsConfig}, }; /// Email metadata for vector DB indexing #[derive(Debug, Clone, Serialize, Deserialize)] pub struct EmailDocument { pub id: String, pub account_id: String, pub from_email: String, pub from_name: String, pub to_email: String, pub subject: String, pub body_text: String, pub date: DateTime, pub folder: String, pub has_attachments: bool, pub thread_id: Option, } /// Email search query #[derive(Debug, Clone, Serialize, Deserialize)] pub struct EmailSearchQuery { pub query_text: String, pub account_id: Option, pub folder: Option, pub date_from: Option>, pub date_to: Option>, pub limit: usize, } /// Email search result #[derive(Debug, Clone, Serialize, Deserialize)] pub struct EmailSearchResult { pub email: EmailDocument, pub score: f32, pub snippet: String, } /// Per-user email vector DB manager pub struct UserEmailVectorDB { user_id: Uuid, bot_id: Uuid, collection_name: String, db_path: PathBuf, #[cfg(feature = "vectordb")] client: Option>, } impl UserEmailVectorDB { /// Create new user email vector DB instance pub fn new(user_id: Uuid, bot_id: Uuid, db_path: PathBuf) -> Self { let collection_name = format!("emails_{}_{}", bot_id, user_id); Self { user_id, bot_id, collection_name, db_path, #[cfg(feature = "vectordb")] client: None, } } /// Initialize vector DB collection #[cfg(feature = "vectordb")] pub async fn initialize(&mut self, qdrant_url: &str) -> Result<()> { let client = QdrantClient::from_url(qdrant_url).build()?; // Check if collection exists let collections = client.list_collections().await?; let exists = collections .collections .iter() .any(|c| c.name == self.collection_name); if !exists { // Create collection for email embeddings (1536 dimensions for OpenAI embeddings) client .create_collection(&CreateCollection { collection_name: self.collection_name.clone(), vectors_config: Some(VectorsConfig { config: Some(Config::Params(VectorParams { size: 1536, distance: Distance::Cosine.into(), ..Default::default() })), }), ..Default::default() }) .await?; log::info!("Created email vector collection: {}", self.collection_name); } self.client = Some(Arc::new(client)); Ok(()) } #[cfg(not(feature = "vectordb"))] pub async fn initialize(&mut self, _qdrant_url: &str) -> Result<()> { log::warn!("Vector DB feature not enabled, using fallback storage"); Ok(()) } /// Index a single email (on-demand) #[cfg(feature = "vectordb")] pub async fn index_email(&self, email: &EmailDocument, embedding: Vec) -> Result<()> { let client = self .client .as_ref() .ok_or_else(|| anyhow::anyhow!("Vector DB not initialized"))?; let point = PointStruct::new(email.id.clone(), embedding, serde_json::to_value(email)?); client .upsert_points_blocking(self.collection_name.clone(), vec![point], None) .await?; log::debug!("Indexed email: {} - {}", email.id, email.subject); Ok(()) } #[cfg(not(feature = "vectordb"))] pub async fn index_email(&self, email: &EmailDocument, _embedding: Vec) -> Result<()> { // Fallback: Store in JSON file let file_path = self.db_path.join(format!("{}.json", email.id)); let json = serde_json::to_string_pretty(email)?; fs::write(file_path, json).await?; Ok(()) } /// Index multiple emails in batch pub async fn index_emails_batch(&self, emails: &[(EmailDocument, Vec)]) -> Result<()> { for (email, embedding) in emails { self.index_email(email, embedding.clone()).await?; } Ok(()) } /// Search emails using vector similarity #[cfg(feature = "vectordb")] pub async fn search( &self, query: &EmailSearchQuery, query_embedding: Vec, ) -> Result> { let client = self .client .as_ref() .ok_or_else(|| anyhow::anyhow!("Vector DB not initialized"))?; // Build filter if specified let mut filter = None; if query.account_id.is_some() || query.folder.is_some() { let mut conditions = vec![]; if let Some(account_id) = &query.account_id { conditions.push(qdrant_client::qdrant::Condition::matches( "account_id", account_id.clone(), )); } if let Some(folder) = &query.folder { conditions.push(qdrant_client::qdrant::Condition::matches( "folder", folder.clone(), )); } filter = Some(qdrant_client::qdrant::Filter::must(conditions)); } let search_result = client .search_points(&qdrant_client::qdrant::SearchPoints { collection_name: self.collection_name.clone(), vector: query_embedding, limit: query.limit as u64, filter, with_payload: Some(true.into()), ..Default::default() }) .await?; let mut results = Vec::new(); for point in search_result.result { if let Some(payload) = point.payload { let email: EmailDocument = serde_json::from_value(serde_json::to_value(&payload)?)?; // Create snippet from body (first 200 chars) let snippet = if email.body_text.len() > 200 { format!("{}...", &email.body_text[..200]) } else { email.body_text.clone() }; results.push(EmailSearchResult { email, score: point.score, snippet, }); } } Ok(results) } #[cfg(not(feature = "vectordb"))] pub async fn search( &self, query: &EmailSearchQuery, _query_embedding: Vec, ) -> Result> { // Fallback: Simple text search in JSON files let mut results = Vec::new(); let mut entries = fs::read_dir(&self.db_path).await?; while let Some(entry) = entries.next_entry().await? { if entry.path().extension().and_then(|s| s.to_str()) == Some("json") { let content = fs::read_to_string(entry.path()).await?; if let Ok(email) = serde_json::from_str::(&content) { // Simple text matching let query_lower = query.query_text.to_lowercase(); if email.subject.to_lowercase().contains(&query_lower) || email.body_text.to_lowercase().contains(&query_lower) || email.from_email.to_lowercase().contains(&query_lower) { let snippet = if email.body_text.len() > 200 { format!("{}...", &email.body_text[..200]) } else { email.body_text.clone() }; results.push(EmailSearchResult { email, score: 1.0, snippet, }); } } if results.len() >= query.limit { break; } } } Ok(results) } /// Delete email from index #[cfg(feature = "vectordb")] pub async fn delete_email(&self, email_id: &str) -> Result<()> { let client = self .client .as_ref() .ok_or_else(|| anyhow::anyhow!("Vector DB not initialized"))?; client .delete_points( self.collection_name.clone(), &vec![email_id.into()].into(), None, ) .await?; log::debug!("Deleted email from index: {}", email_id); Ok(()) } #[cfg(not(feature = "vectordb"))] pub async fn delete_email(&self, email_id: &str) -> Result<()> { let file_path = self.db_path.join(format!("{}.json", email_id)); if file_path.exists() { fs::remove_file(file_path).await?; } Ok(()) } /// Get indexed email count #[cfg(feature = "vectordb")] pub async fn get_count(&self) -> Result { let client = self .client .as_ref() .ok_or_else(|| anyhow::anyhow!("Vector DB not initialized"))?; let info = client.collection_info(self.collection_name.clone()).await?; Ok(info.result.unwrap().points_count.unwrap_or(0)) } #[cfg(not(feature = "vectordb"))] pub async fn get_count(&self) -> Result { let mut count = 0; let mut entries = fs::read_dir(&self.db_path).await?; while let Some(entry) = entries.next_entry().await? { if entry.path().extension().and_then(|s| s.to_str()) == Some("json") { count += 1; } } Ok(count) } /// Clear all indexed emails #[cfg(feature = "vectordb")] pub async fn clear(&self) -> Result<()> { let client = self .client .as_ref() .ok_or_else(|| anyhow::anyhow!("Vector DB not initialized"))?; client .delete_collection(self.collection_name.clone()) .await?; // Recreate empty collection client .create_collection(&CreateCollection { collection_name: self.collection_name.clone(), vectors_config: Some(VectorsConfig { config: Some(Config::Params(VectorParams { size: 1536, distance: Distance::Cosine.into(), ..Default::default() })), }), ..Default::default() }) .await?; log::info!("Cleared email vector collection: {}", self.collection_name); Ok(()) } #[cfg(not(feature = "vectordb"))] pub async fn clear(&self) -> Result<()> { if self.db_path.exists() { fs::remove_dir_all(&self.db_path).await?; fs::create_dir_all(&self.db_path).await?; } Ok(()) } } /// Email embedding generator using LLM pub struct EmailEmbeddingGenerator { llm_endpoint: String, } impl EmailEmbeddingGenerator { pub fn new(llm_endpoint: String) -> Self { Self { llm_endpoint } } /// Generate embedding for email content pub async fn generate_embedding(&self, email: &EmailDocument) -> Result> { // Combine email fields for embedding let text = format!( "From: {} <{}>\nSubject: {}\n\n{}", email.from_name, email.from_email, email.subject, email.body_text ); // Truncate if too long (max 8000 chars for most embedding models) let text = if text.len() > 8000 { &text[..8000] } else { &text }; // Call LLM embedding endpoint // This is a placeholder - implement actual LLM call self.generate_text_embedding(text).await } /// Generate embedding from raw text pub async fn generate_text_embedding(&self, text: &str) -> Result> { // TODO: Implement actual embedding generation using: // - OpenAI embeddings API // - Local embedding model (sentence-transformers) // - Or other embedding service // Placeholder: Return dummy embedding log::warn!("Using placeholder embedding - implement actual embedding generation!"); Ok(vec![0.0; 1536]) } } #[cfg(test)] mod tests { use super::*; #[test] fn test_email_document_creation() { let email = EmailDocument { id: "test-123".to_string(), account_id: "account-456".to_string(), from_email: "sender@example.com".to_string(), from_name: "Test Sender".to_string(), to_email: "receiver@example.com".to_string(), subject: "Test Subject".to_string(), body_text: "Test email body".to_string(), date: Utc::now(), folder: "INBOX".to_string(), has_attachments: false, thread_id: None, }; assert_eq!(email.id, "test-123"); assert_eq!(email.subject, "Test Subject"); } #[tokio::test] async fn test_user_email_vectordb_creation() { let temp_dir = std::env::temp_dir().join("test_vectordb"); let db = UserEmailVectorDB::new(Uuid::new_v4(), Uuid::new_v4(), temp_dir); assert!(db.collection_name.starts_with("emails_")); } }