2025-11-21 09:28:35 -03:00
|
|
|
use anyhow::Result;
|
|
|
|
|
use chrono::{DateTime, Utc};
|
|
|
|
|
use serde::{Deserialize, Serialize};
|
|
|
|
|
use std::path::PathBuf;
|
|
|
|
|
use std::sync::Arc;
|
2025-12-24 09:42:16 -03:00
|
|
|
#[cfg(not(feature = "vectordb"))]
|
2025-12-21 23:40:43 -03:00
|
|
|
use tokio::fs;
|
2025-11-21 09:28:35 -03:00
|
|
|
use uuid::Uuid;
|
|
|
|
|
|
|
|
|
|
#[cfg(feature = "vectordb")]
|
|
|
|
|
use qdrant_client::{
|
2025-12-23 15:52:35 -03:00
|
|
|
qdrant::{Distance, PointStruct, VectorParams},
|
|
|
|
|
Qdrant,
|
2025-11-21 09:28:35 -03:00
|
|
|
};
|
|
|
|
|
|
|
|
|
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
|
|
|
|
pub struct EmailDocument {
|
|
|
|
|
pub id: String,
|
|
|
|
|
pub account_id: String,
|
|
|
|
|
pub from_email: String,
|
|
|
|
|
pub from_name: String,
|
|
|
|
|
pub to_email: String,
|
|
|
|
|
pub subject: String,
|
|
|
|
|
pub body_text: String,
|
|
|
|
|
pub date: DateTime<Utc>,
|
|
|
|
|
pub folder: String,
|
|
|
|
|
pub has_attachments: bool,
|
|
|
|
|
pub thread_id: Option<String>,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
|
|
|
|
pub struct EmailSearchQuery {
|
|
|
|
|
pub query_text: String,
|
|
|
|
|
pub account_id: Option<String>,
|
|
|
|
|
pub folder: Option<String>,
|
|
|
|
|
pub date_from: Option<DateTime<Utc>>,
|
|
|
|
|
pub date_to: Option<DateTime<Utc>>,
|
|
|
|
|
pub limit: usize,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
|
|
|
|
pub struct EmailSearchResult {
|
|
|
|
|
pub email: EmailDocument,
|
|
|
|
|
pub score: f32,
|
|
|
|
|
pub snippet: String,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
pub struct UserEmailVectorDB {
|
|
|
|
|
user_id: Uuid,
|
|
|
|
|
bot_id: Uuid,
|
|
|
|
|
collection_name: String,
|
|
|
|
|
db_path: PathBuf,
|
|
|
|
|
#[cfg(feature = "vectordb")]
|
2025-12-23 15:52:35 -03:00
|
|
|
client: Option<Arc<Qdrant>>,
|
2025-11-21 09:28:35 -03:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
impl UserEmailVectorDB {
|
|
|
|
|
pub fn new(user_id: Uuid, bot_id: Uuid, db_path: PathBuf) -> Self {
|
|
|
|
|
let collection_name = format!("emails_{}_{}", bot_id, user_id);
|
2025-12-26 08:59:25 -03:00
|
|
|
log::trace!(
|
feat(autotask): Implement AutoTask system with intent classification and app generation
- Add IntentClassifier with 7 intent types (APP_CREATE, TODO, MONITOR, ACTION, SCHEDULE, GOAL, TOOL)
- Add AppGenerator with LLM-powered app structure analysis
- Add DesignerAI for modifying apps through conversation
- Add app_server for serving generated apps with clean URLs
- Add db_api for CRUD operations on bot database tables
- Add ask_later keyword for pending info collection
- Add migration 6.1.1 with tables: pending_info, auto_tasks, execution_plans, task_approvals, task_decisions, safety_audit_log, generated_apps, intent_classifications, designer_changes
- Write apps to S3 drive and sync to SITE_ROOT for serving
- Clean URL structure: /apps/{app_name}/
- Integrate with DriveMonitor for file sync
Based on Chapter 17 - Autonomous Tasks specification
2025-12-27 21:10:09 -03:00
|
|
|
"Creating UserEmailVectorDB for user={} bot={} path={}",
|
2025-12-26 08:59:25 -03:00
|
|
|
user_id,
|
|
|
|
|
bot_id,
|
feat(autotask): Implement AutoTask system with intent classification and app generation
- Add IntentClassifier with 7 intent types (APP_CREATE, TODO, MONITOR, ACTION, SCHEDULE, GOAL, TOOL)
- Add AppGenerator with LLM-powered app structure analysis
- Add DesignerAI for modifying apps through conversation
- Add app_server for serving generated apps with clean URLs
- Add db_api for CRUD operations on bot database tables
- Add ask_later keyword for pending info collection
- Add migration 6.1.1 with tables: pending_info, auto_tasks, execution_plans, task_approvals, task_decisions, safety_audit_log, generated_apps, intent_classifications, designer_changes
- Write apps to S3 drive and sync to SITE_ROOT for serving
- Clean URL structure: /apps/{app_name}/
- Integrate with DriveMonitor for file sync
Based on Chapter 17 - Autonomous Tasks specification
2025-12-27 21:10:09 -03:00
|
|
|
db_path.display()
|
2025-12-26 08:59:25 -03:00
|
|
|
);
|
2025-11-21 09:28:35 -03:00
|
|
|
|
|
|
|
|
Self {
|
|
|
|
|
user_id,
|
|
|
|
|
bot_id,
|
|
|
|
|
collection_name,
|
|
|
|
|
db_path,
|
|
|
|
|
#[cfg(feature = "vectordb")]
|
|
|
|
|
client: None,
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[cfg(feature = "vectordb")]
|
|
|
|
|
pub async fn initialize(&mut self, qdrant_url: &str) -> Result<()> {
|
2025-12-26 08:59:25 -03:00
|
|
|
log::info!(
|
feat(autotask): Implement AutoTask system with intent classification and app generation
- Add IntentClassifier with 7 intent types (APP_CREATE, TODO, MONITOR, ACTION, SCHEDULE, GOAL, TOOL)
- Add AppGenerator with LLM-powered app structure analysis
- Add DesignerAI for modifying apps through conversation
- Add app_server for serving generated apps with clean URLs
- Add db_api for CRUD operations on bot database tables
- Add ask_later keyword for pending info collection
- Add migration 6.1.1 with tables: pending_info, auto_tasks, execution_plans, task_approvals, task_decisions, safety_audit_log, generated_apps, intent_classifications, designer_changes
- Write apps to S3 drive and sync to SITE_ROOT for serving
- Clean URL structure: /apps/{app_name}/
- Integrate with DriveMonitor for file sync
Based on Chapter 17 - Autonomous Tasks specification
2025-12-27 21:10:09 -03:00
|
|
|
"Initializing email vector DB for user={} bot={} at {}",
|
2025-12-26 08:59:25 -03:00
|
|
|
self.user_id,
|
|
|
|
|
self.bot_id,
|
feat(autotask): Implement AutoTask system with intent classification and app generation
- Add IntentClassifier with 7 intent types (APP_CREATE, TODO, MONITOR, ACTION, SCHEDULE, GOAL, TOOL)
- Add AppGenerator with LLM-powered app structure analysis
- Add DesignerAI for modifying apps through conversation
- Add app_server for serving generated apps with clean URLs
- Add db_api for CRUD operations on bot database tables
- Add ask_later keyword for pending info collection
- Add migration 6.1.1 with tables: pending_info, auto_tasks, execution_plans, task_approvals, task_decisions, safety_audit_log, generated_apps, intent_classifications, designer_changes
- Write apps to S3 drive and sync to SITE_ROOT for serving
- Clean URL structure: /apps/{app_name}/
- Integrate with DriveMonitor for file sync
Based on Chapter 17 - Autonomous Tasks specification
2025-12-27 21:10:09 -03:00
|
|
|
self.db_path.display()
|
2025-12-26 08:59:25 -03:00
|
|
|
);
|
2025-12-23 15:52:35 -03:00
|
|
|
let client = Qdrant::from_url(qdrant_url).build()?;
|
2025-11-21 09:28:35 -03:00
|
|
|
|
|
|
|
|
let collections = client.list_collections().await?;
|
|
|
|
|
let exists = collections
|
|
|
|
|
.collections
|
|
|
|
|
.iter()
|
|
|
|
|
.any(|c| c.name == self.collection_name);
|
|
|
|
|
|
|
|
|
|
if !exists {
|
|
|
|
|
client
|
2025-12-23 15:52:35 -03:00
|
|
|
.create_collection(
|
|
|
|
|
qdrant_client::qdrant::CreateCollectionBuilder::new(&self.collection_name)
|
|
|
|
|
.vectors_config(VectorParams {
|
2025-11-21 09:28:35 -03:00
|
|
|
size: 1536,
|
|
|
|
|
distance: Distance::Cosine.into(),
|
|
|
|
|
..Default::default()
|
2025-12-23 15:52:35 -03:00
|
|
|
}),
|
|
|
|
|
)
|
2025-11-21 09:28:35 -03:00
|
|
|
.await?;
|
|
|
|
|
|
|
|
|
|
log::info!("Created email vector collection: {}", self.collection_name);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
self.client = Some(Arc::new(client));
|
|
|
|
|
Ok(())
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[cfg(not(feature = "vectordb"))]
|
|
|
|
|
pub async fn initialize(&mut self, _qdrant_url: &str) -> Result<()> {
|
|
|
|
|
log::warn!("Vector DB feature not enabled, using fallback storage");
|
|
|
|
|
Ok(())
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[cfg(feature = "vectordb")]
|
|
|
|
|
pub async fn index_email(&self, email: &EmailDocument, embedding: Vec<f32>) -> Result<()> {
|
|
|
|
|
let client = self
|
|
|
|
|
.client
|
|
|
|
|
.as_ref()
|
|
|
|
|
.ok_or_else(|| anyhow::anyhow!("Vector DB not initialized"))?;
|
|
|
|
|
|
2025-12-23 15:52:35 -03:00
|
|
|
let payload: qdrant_client::Payload = serde_json::to_value(email)?
|
|
|
|
|
.as_object()
|
feat(autotask): Implement AutoTask system with intent classification and app generation
- Add IntentClassifier with 7 intent types (APP_CREATE, TODO, MONITOR, ACTION, SCHEDULE, GOAL, TOOL)
- Add AppGenerator with LLM-powered app structure analysis
- Add DesignerAI for modifying apps through conversation
- Add app_server for serving generated apps with clean URLs
- Add db_api for CRUD operations on bot database tables
- Add ask_later keyword for pending info collection
- Add migration 6.1.1 with tables: pending_info, auto_tasks, execution_plans, task_approvals, task_decisions, safety_audit_log, generated_apps, intent_classifications, designer_changes
- Write apps to S3 drive and sync to SITE_ROOT for serving
- Clean URL structure: /apps/{app_name}/
- Integrate with DriveMonitor for file sync
Based on Chapter 17 - Autonomous Tasks specification
2025-12-27 21:10:09 -03:00
|
|
|
.cloned()
|
2025-12-23 15:52:35 -03:00
|
|
|
.unwrap_or_default()
|
|
|
|
|
.into_iter()
|
|
|
|
|
.map(|(k, v)| (k, qdrant_client::qdrant::Value::from(v.to_string())))
|
|
|
|
|
.collect::<std::collections::HashMap<_, _>>()
|
|
|
|
|
.into();
|
|
|
|
|
|
|
|
|
|
let point = PointStruct::new(email.id.clone(), embedding, payload);
|
2025-11-21 09:28:35 -03:00
|
|
|
|
|
|
|
|
client
|
Update attendance, keywords, calendar, compliance, console, core, drive, email, llm, msteams, security, and tasks modules
2025-12-24 09:29:27 -03:00
|
|
|
.upsert_points(qdrant_client::qdrant::UpsertPointsBuilder::new(
|
|
|
|
|
&self.collection_name,
|
|
|
|
|
vec![point],
|
|
|
|
|
))
|
2025-11-21 09:28:35 -03:00
|
|
|
.await?;
|
|
|
|
|
|
|
|
|
|
log::debug!("Indexed email: {} - {}", email.id, email.subject);
|
|
|
|
|
Ok(())
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[cfg(not(feature = "vectordb"))]
|
|
|
|
|
pub async fn index_email(&self, email: &EmailDocument, _embedding: Vec<f32>) -> Result<()> {
|
|
|
|
|
let file_path = self.db_path.join(format!("{}.json", email.id));
|
|
|
|
|
let json = serde_json::to_string_pretty(email)?;
|
|
|
|
|
fs::write(file_path, json).await?;
|
|
|
|
|
Ok(())
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
pub async fn index_emails_batch(&self, emails: &[(EmailDocument, Vec<f32>)]) -> Result<()> {
|
|
|
|
|
for (email, embedding) in emails {
|
|
|
|
|
self.index_email(email, embedding.clone()).await?;
|
|
|
|
|
}
|
|
|
|
|
Ok(())
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[cfg(feature = "vectordb")]
|
|
|
|
|
pub async fn search(
|
|
|
|
|
&self,
|
|
|
|
|
query: &EmailSearchQuery,
|
|
|
|
|
query_embedding: Vec<f32>,
|
|
|
|
|
) -> Result<Vec<EmailSearchResult>> {
|
|
|
|
|
let client = self
|
|
|
|
|
.client
|
|
|
|
|
.as_ref()
|
|
|
|
|
.ok_or_else(|| anyhow::anyhow!("Vector DB not initialized"))?;
|
|
|
|
|
|
2025-12-23 15:52:35 -03:00
|
|
|
let filter = if query.account_id.is_some() || query.folder.is_some() {
|
2025-11-21 09:28:35 -03:00
|
|
|
let mut conditions = vec![];
|
|
|
|
|
|
|
|
|
|
if let Some(account_id) = &query.account_id {
|
|
|
|
|
conditions.push(qdrant_client::qdrant::Condition::matches(
|
|
|
|
|
"account_id",
|
|
|
|
|
account_id.clone(),
|
|
|
|
|
));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if let Some(folder) = &query.folder {
|
|
|
|
|
conditions.push(qdrant_client::qdrant::Condition::matches(
|
|
|
|
|
"folder",
|
|
|
|
|
folder.clone(),
|
|
|
|
|
));
|
|
|
|
|
}
|
|
|
|
|
|
2025-12-23 15:52:35 -03:00
|
|
|
Some(qdrant_client::qdrant::Filter::must(conditions))
|
|
|
|
|
} else {
|
|
|
|
|
None
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
let mut search_builder = qdrant_client::qdrant::SearchPointsBuilder::new(
|
|
|
|
|
&self.collection_name,
|
|
|
|
|
query_embedding,
|
|
|
|
|
query.limit as u64,
|
|
|
|
|
)
|
|
|
|
|
.with_payload(true);
|
|
|
|
|
|
|
|
|
|
if let Some(f) = filter {
|
|
|
|
|
search_builder = search_builder.filter(f);
|
2025-11-21 09:28:35 -03:00
|
|
|
}
|
|
|
|
|
|
2025-12-23 15:52:35 -03:00
|
|
|
let search_result = client.search_points(search_builder).await?;
|
2025-11-21 09:28:35 -03:00
|
|
|
|
|
|
|
|
let mut results = Vec::new();
|
|
|
|
|
for point in search_result.result {
|
2025-12-23 15:52:35 -03:00
|
|
|
let payload = &point.payload;
|
|
|
|
|
if !payload.is_empty() {
|
|
|
|
|
let get_str = |key: &str| -> String {
|
Update attendance, keywords, calendar, compliance, console, core, drive, email, llm, msteams, security, and tasks modules
2025-12-24 09:29:27 -03:00
|
|
|
payload
|
|
|
|
|
.get(key)
|
2025-12-23 15:52:35 -03:00
|
|
|
.and_then(|v| v.as_str())
|
|
|
|
|
.map(|s| s.to_string())
|
|
|
|
|
.unwrap_or_default()
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
let email = EmailDocument {
|
|
|
|
|
id: get_str("id"),
|
|
|
|
|
account_id: get_str("account_id"),
|
|
|
|
|
from_email: get_str("from_email"),
|
|
|
|
|
from_name: get_str("from_name"),
|
|
|
|
|
to_email: get_str("to_email"),
|
|
|
|
|
subject: get_str("subject"),
|
|
|
|
|
body_text: get_str("body_text"),
|
2025-12-23 18:40:58 -03:00
|
|
|
date: chrono::Utc::now(),
|
2025-12-23 15:52:35 -03:00
|
|
|
folder: get_str("folder"),
|
|
|
|
|
has_attachments: false,
|
Update attendance, keywords, calendar, compliance, console, core, drive, email, llm, msteams, security, and tasks modules
2025-12-24 09:29:27 -03:00
|
|
|
thread_id: payload
|
|
|
|
|
.get("thread_id")
|
|
|
|
|
.and_then(|v| v.as_str())
|
|
|
|
|
.map(|s| s.to_string()),
|
2025-12-23 15:52:35 -03:00
|
|
|
};
|
2025-11-21 09:28:35 -03:00
|
|
|
|
|
|
|
|
let snippet = if email.body_text.len() > 200 {
|
|
|
|
|
format!("{}...", &email.body_text[..200])
|
|
|
|
|
} else {
|
|
|
|
|
email.body_text.clone()
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
results.push(EmailSearchResult {
|
|
|
|
|
email,
|
|
|
|
|
score: point.score,
|
|
|
|
|
snippet,
|
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
Ok(results)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[cfg(not(feature = "vectordb"))]
|
|
|
|
|
pub async fn search(
|
|
|
|
|
&self,
|
|
|
|
|
query: &EmailSearchQuery,
|
|
|
|
|
_query_embedding: Vec<f32>,
|
|
|
|
|
) -> Result<Vec<EmailSearchResult>> {
|
|
|
|
|
let mut results = Vec::new();
|
|
|
|
|
let mut entries = fs::read_dir(&self.db_path).await?;
|
|
|
|
|
|
|
|
|
|
while let Some(entry) = entries.next_entry().await? {
|
|
|
|
|
if entry.path().extension().and_then(|s| s.to_str()) == Some("json") {
|
|
|
|
|
let content = fs::read_to_string(entry.path()).await?;
|
|
|
|
|
if let Ok(email) = serde_json::from_str::<EmailDocument>(&content) {
|
|
|
|
|
let query_lower = query.query_text.to_lowercase();
|
|
|
|
|
if email.subject.to_lowercase().contains(&query_lower)
|
|
|
|
|
|| email.body_text.to_lowercase().contains(&query_lower)
|
|
|
|
|
|| email.from_email.to_lowercase().contains(&query_lower)
|
|
|
|
|
{
|
|
|
|
|
let snippet = if email.body_text.len() > 200 {
|
|
|
|
|
format!("{}...", &email.body_text[..200])
|
|
|
|
|
} else {
|
|
|
|
|
email.body_text.clone()
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
results.push(EmailSearchResult {
|
|
|
|
|
email,
|
|
|
|
|
score: 1.0,
|
|
|
|
|
snippet,
|
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if results.len() >= query.limit {
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
Ok(results)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[cfg(feature = "vectordb")]
|
|
|
|
|
pub async fn delete_email(&self, email_id: &str) -> Result<()> {
|
|
|
|
|
let client = self
|
|
|
|
|
.client
|
|
|
|
|
.as_ref()
|
|
|
|
|
.ok_or_else(|| anyhow::anyhow!("Vector DB not initialized"))?;
|
|
|
|
|
|
|
|
|
|
client
|
|
|
|
|
.delete_points(
|
Update attendance, keywords, calendar, compliance, console, core, drive, email, llm, msteams, security, and tasks modules
2025-12-24 09:29:27 -03:00
|
|
|
qdrant_client::qdrant::DeletePointsBuilder::new(&self.collection_name).points(
|
|
|
|
|
vec![qdrant_client::qdrant::PointId::from(email_id.to_string())],
|
|
|
|
|
),
|
2025-11-21 09:28:35 -03:00
|
|
|
)
|
|
|
|
|
.await?;
|
|
|
|
|
|
|
|
|
|
log::debug!("Deleted email from index: {}", email_id);
|
|
|
|
|
Ok(())
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[cfg(not(feature = "vectordb"))]
|
|
|
|
|
pub async fn delete_email(&self, email_id: &str) -> Result<()> {
|
|
|
|
|
let file_path = self.db_path.join(format!("{}.json", email_id));
|
|
|
|
|
if file_path.exists() {
|
|
|
|
|
fs::remove_file(file_path).await?;
|
|
|
|
|
}
|
|
|
|
|
Ok(())
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[cfg(feature = "vectordb")]
|
|
|
|
|
pub async fn get_count(&self) -> Result<u64> {
|
|
|
|
|
let client = self
|
|
|
|
|
.client
|
|
|
|
|
.as_ref()
|
|
|
|
|
.ok_or_else(|| anyhow::anyhow!("Vector DB not initialized"))?;
|
|
|
|
|
|
|
|
|
|
let info = client.collection_info(self.collection_name.clone()).await?;
|
|
|
|
|
|
feat(security): Complete security infrastructure implementation
SECURITY MODULES ADDED:
- security/auth.rs: Full RBAC with roles (Anonymous, User, Moderator, Admin, SuperAdmin, Service, Bot, BotOwner, BotOperator, BotViewer) and permissions
- security/cors.rs: Hardened CORS (no wildcard in production, env-based config)
- security/panic_handler.rs: Panic catching middleware with safe 500 responses
- security/path_guard.rs: Path traversal protection, null byte prevention
- security/request_id.rs: UUID request tracking with correlation IDs
- security/error_sanitizer.rs: Sensitive data redaction from responses
- security/zitadel_auth.rs: Zitadel token introspection and role mapping
- security/sql_guard.rs: SQL injection prevention with table whitelist
- security/command_guard.rs: Command injection prevention
- security/secrets.rs: Zeroizing secret management
- security/validation.rs: Input validation utilities
- security/rate_limiter.rs: Rate limiting with governor crate
- security/headers.rs: Security headers (CSP, HSTS, X-Frame-Options)
MAIN.RS UPDATES:
- Replaced tower_http::cors::Any with hardened create_cors_layer()
- Added panic handler middleware
- Added request ID tracking middleware
- Set global panic hook
SECURITY STATUS:
- 0 unwrap() in production code
- 0 panic! in production code
- 0 unsafe blocks
- cargo audit: PASS (no vulnerabilities)
- Estimated completion: ~98%
Remaining: Wire auth middleware to handlers, audit logs for sensitive data
2025-12-28 19:29:18 -03:00
|
|
|
Ok(info.result.expect("valid result").points_count.unwrap_or(0))
|
2025-11-21 09:28:35 -03:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[cfg(not(feature = "vectordb"))]
|
|
|
|
|
pub async fn get_count(&self) -> Result<u64> {
|
|
|
|
|
let mut count = 0;
|
|
|
|
|
let mut entries = fs::read_dir(&self.db_path).await?;
|
|
|
|
|
|
|
|
|
|
while let Some(entry) = entries.next_entry().await? {
|
|
|
|
|
if entry.path().extension().and_then(|s| s.to_str()) == Some("json") {
|
|
|
|
|
count += 1;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
Ok(count)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[cfg(feature = "vectordb")]
|
|
|
|
|
pub async fn clear(&self) -> Result<()> {
|
|
|
|
|
let client = self
|
|
|
|
|
.client
|
|
|
|
|
.as_ref()
|
|
|
|
|
.ok_or_else(|| anyhow::anyhow!("Vector DB not initialized"))?;
|
|
|
|
|
|
Update attendance, keywords, calendar, compliance, console, core, drive, email, llm, msteams, security, and tasks modules
2025-12-24 09:29:27 -03:00
|
|
|
client.delete_collection(&self.collection_name).await?;
|
2025-12-23 18:40:58 -03:00
|
|
|
|
2025-11-21 09:28:35 -03:00
|
|
|
client
|
2025-12-23 15:52:35 -03:00
|
|
|
.create_collection(
|
|
|
|
|
qdrant_client::qdrant::CreateCollectionBuilder::new(&self.collection_name)
|
|
|
|
|
.vectors_config(VectorParams {
|
2025-11-21 09:28:35 -03:00
|
|
|
size: 1536,
|
|
|
|
|
distance: Distance::Cosine.into(),
|
|
|
|
|
..Default::default()
|
2025-12-23 15:52:35 -03:00
|
|
|
}),
|
|
|
|
|
)
|
2025-11-21 09:28:35 -03:00
|
|
|
.await?;
|
|
|
|
|
|
|
|
|
|
log::info!("Cleared email vector collection: {}", self.collection_name);
|
|
|
|
|
Ok(())
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[cfg(not(feature = "vectordb"))]
|
|
|
|
|
pub async fn clear(&self) -> Result<()> {
|
|
|
|
|
if self.db_path.exists() {
|
|
|
|
|
fs::remove_dir_all(&self.db_path).await?;
|
|
|
|
|
fs::create_dir_all(&self.db_path).await?;
|
|
|
|
|
}
|
|
|
|
|
Ok(())
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
pub struct EmailEmbeddingGenerator {
|
2025-12-23 15:52:35 -03:00
|
|
|
pub llm_endpoint: String,
|
2025-11-21 09:28:35 -03:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
impl EmailEmbeddingGenerator {
|
|
|
|
|
pub fn new(llm_endpoint: String) -> Self {
|
|
|
|
|
Self { llm_endpoint }
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
pub async fn generate_embedding(&self, email: &EmailDocument) -> Result<Vec<f32>> {
|
|
|
|
|
let text = format!(
|
|
|
|
|
"From: {} <{}>\nSubject: {}\n\n{}",
|
|
|
|
|
email.from_name, email.from_email, email.subject, email.body_text
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
let text = if text.len() > 8000 {
|
|
|
|
|
&text[..8000]
|
|
|
|
|
} else {
|
|
|
|
|
&text
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
self.generate_text_embedding(text).await
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
pub async fn generate_text_embedding(&self, text: &str) -> Result<Vec<f32>> {
|
Add .env.example with comprehensive configuration template
The commit adds a complete example environment configuration file
documenting all available settings for BotServer, including logging,
database, server, drive, LLM, Redis, email, and feature flags.
Also removes hardcoded environment variable usage throughout the
codebase, replacing them with configuration via config.csv or
appropriate defaults. This includes:
- WhatsApp, Teams, Instagram adapter configurations
- Weather API key handling
- Email and directory service configurations
- Console feature conditionally compiles monitoring code
- Improved logging configuration with library suppression
2025-11-28 13:19:03 -03:00
|
|
|
let embedding_url = "http://localhost:8082".to_string();
|
2025-12-26 08:59:25 -03:00
|
|
|
match self.generate_local_embedding(text, &embedding_url).await {
|
|
|
|
|
Ok(embedding) => Ok(embedding),
|
|
|
|
|
Err(e) => {
|
|
|
|
|
log::warn!("Local embedding failed: {e}, falling back to hash embedding");
|
feat(autotask): Implement AutoTask system with intent classification and app generation
- Add IntentClassifier with 7 intent types (APP_CREATE, TODO, MONITOR, ACTION, SCHEDULE, GOAL, TOOL)
- Add AppGenerator with LLM-powered app structure analysis
- Add DesignerAI for modifying apps through conversation
- Add app_server for serving generated apps with clean URLs
- Add db_api for CRUD operations on bot database tables
- Add ask_later keyword for pending info collection
- Add migration 6.1.1 with tables: pending_info, auto_tasks, execution_plans, task_approvals, task_decisions, safety_audit_log, generated_apps, intent_classifications, designer_changes
- Write apps to S3 drive and sync to SITE_ROOT for serving
- Clean URL structure: /apps/{app_name}/
- Integrate with DriveMonitor for file sync
Based on Chapter 17 - Autonomous Tasks specification
2025-12-27 21:10:09 -03:00
|
|
|
Self::generate_hash_embedding(text)
|
2025-12-26 08:59:25 -03:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
pub async fn generate_text_embedding_with_openai(
|
|
|
|
|
&self,
|
|
|
|
|
text: &str,
|
|
|
|
|
api_key: &str,
|
|
|
|
|
) -> Result<Vec<f32>> {
|
|
|
|
|
self.generate_openai_embedding(text, api_key).await
|
2025-11-27 23:10:43 -03:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
async fn generate_openai_embedding(&self, text: &str, api_key: &str) -> Result<Vec<f32>> {
|
|
|
|
|
use reqwest::header::{HeaderMap, HeaderValue, AUTHORIZATION, CONTENT_TYPE};
|
|
|
|
|
use serde_json::json;
|
|
|
|
|
|
|
|
|
|
let client = reqwest::Client::new();
|
|
|
|
|
let mut headers = HeaderMap::new();
|
|
|
|
|
headers.insert(CONTENT_TYPE, HeaderValue::from_static("application/json"));
|
|
|
|
|
headers.insert(
|
|
|
|
|
AUTHORIZATION,
|
|
|
|
|
HeaderValue::from_str(&format!("Bearer {}", api_key))?,
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
let body = json!({
|
|
|
|
|
"input": text,
|
|
|
|
|
"model": "text-embedding-3-small"
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
let response = client
|
|
|
|
|
.post("https://api.openai.com/v1/embeddings")
|
|
|
|
|
.headers(headers)
|
|
|
|
|
.json(&body)
|
|
|
|
|
.send()
|
|
|
|
|
.await?;
|
|
|
|
|
|
|
|
|
|
if !response.status().is_success() {
|
|
|
|
|
return Err(anyhow::anyhow!("OpenAI API error: {}", response.status()));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
let result: serde_json::Value = response.json().await?;
|
|
|
|
|
let embedding = result["data"][0]["embedding"]
|
|
|
|
|
.as_array()
|
|
|
|
|
.ok_or_else(|| anyhow::anyhow!("Invalid OpenAI response format"))?
|
|
|
|
|
.iter()
|
|
|
|
|
.map(|v| v.as_f64().unwrap_or(0.0) as f32)
|
|
|
|
|
.collect();
|
|
|
|
|
|
|
|
|
|
Ok(embedding)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
async fn generate_local_embedding(&self, text: &str, embedding_url: &str) -> Result<Vec<f32>> {
|
|
|
|
|
use serde_json::json;
|
|
|
|
|
|
|
|
|
|
let client = reqwest::Client::new();
|
|
|
|
|
let body = json!({
|
|
|
|
|
"text": text,
|
|
|
|
|
"model": "sentence-transformers/all-MiniLM-L6-v2"
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
let response = client.post(embedding_url).json(&body).send().await?;
|
|
|
|
|
|
|
|
|
|
if !response.status().is_success() {
|
|
|
|
|
return Err(anyhow::anyhow!(
|
|
|
|
|
"Local embedding service error: {}",
|
|
|
|
|
response.status()
|
|
|
|
|
));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
let result: serde_json::Value = response.json().await?;
|
|
|
|
|
let embedding = result["embedding"]
|
|
|
|
|
.as_array()
|
|
|
|
|
.ok_or_else(|| anyhow::anyhow!("Invalid embedding response format"))?
|
|
|
|
|
.iter()
|
|
|
|
|
.map(|v| v.as_f64().unwrap_or(0.0) as f32)
|
|
|
|
|
.collect();
|
|
|
|
|
|
|
|
|
|
Ok(embedding)
|
|
|
|
|
}
|
|
|
|
|
|
feat(autotask): Implement AutoTask system with intent classification and app generation
- Add IntentClassifier with 7 intent types (APP_CREATE, TODO, MONITOR, ACTION, SCHEDULE, GOAL, TOOL)
- Add AppGenerator with LLM-powered app structure analysis
- Add DesignerAI for modifying apps through conversation
- Add app_server for serving generated apps with clean URLs
- Add db_api for CRUD operations on bot database tables
- Add ask_later keyword for pending info collection
- Add migration 6.1.1 with tables: pending_info, auto_tasks, execution_plans, task_approvals, task_decisions, safety_audit_log, generated_apps, intent_classifications, designer_changes
- Write apps to S3 drive and sync to SITE_ROOT for serving
- Clean URL structure: /apps/{app_name}/
- Integrate with DriveMonitor for file sync
Based on Chapter 17 - Autonomous Tasks specification
2025-12-27 21:10:09 -03:00
|
|
|
fn generate_hash_embedding(text: &str) -> Result<Vec<f32>> {
|
2025-11-27 23:10:43 -03:00
|
|
|
use std::collections::hash_map::DefaultHasher;
|
|
|
|
|
use std::hash::{Hash, Hasher};
|
|
|
|
|
|
|
|
|
|
const EMBEDDING_DIM: usize = 1536;
|
|
|
|
|
let mut embedding = vec![0.0f32; EMBEDDING_DIM];
|
|
|
|
|
|
|
|
|
|
let words: Vec<&str> = text.split_whitespace().collect();
|
|
|
|
|
|
|
|
|
|
for (i, chunk) in words.chunks(10).enumerate() {
|
|
|
|
|
let mut hasher = DefaultHasher::new();
|
|
|
|
|
chunk.join(" ").hash(&mut hasher);
|
|
|
|
|
let hash = hasher.finish();
|
|
|
|
|
|
|
|
|
|
for j in 0..64 {
|
|
|
|
|
let idx = (i * 64 + j) % EMBEDDING_DIM;
|
|
|
|
|
let value = ((hash >> j) & 1) as f32;
|
|
|
|
|
embedding[idx] += value;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
let norm: f32 = embedding.iter().map(|x| x * x).sum::<f32>().sqrt();
|
|
|
|
|
if norm > 0.0 {
|
|
|
|
|
for val in &mut embedding {
|
|
|
|
|
*val /= norm;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
Ok(embedding)
|
2025-11-21 09:28:35 -03:00
|
|
|
}
|
|
|
|
|
}
|