Fix more clippy warnings: unused imports, raw string hashes, conditional imports
This commit is contained in:
parent
3a8510d191
commit
3a260a5703
3 changed files with 37 additions and 162 deletions
|
|
@ -3,6 +3,7 @@ use chrono::{DateTime, Utc};
|
|||
use serde::{Deserialize, Serialize};
|
||||
use std::path::PathBuf;
|
||||
use std::sync::Arc;
|
||||
#[cfg(not(feature = "vectordb"))]
|
||||
use tokio::fs;
|
||||
use uuid::Uuid;
|
||||
|
||||
|
|
|
|||
|
|
@ -1,54 +1,12 @@
|
|||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
use log::{debug, error, info, trace, warn};
|
||||
use log::{debug, info, warn};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::collections::HashMap;
|
||||
use std::sync::Arc;
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::shared::state::AppState;
|
||||
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct HybridSearchConfig {
|
||||
|
||||
pub dense_weight: f32,
|
||||
|
||||
pub sparse_weight: f32,
|
||||
|
|
@ -82,7 +40,6 @@ impl Default for HybridSearchConfig {
|
|||
}
|
||||
|
||||
impl HybridSearchConfig {
|
||||
|
||||
pub fn from_bot_config(state: &AppState, bot_id: Uuid) -> Self {
|
||||
use diesel::prelude::*;
|
||||
|
||||
|
|
@ -136,7 +93,6 @@ impl HybridSearchConfig {
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
let total = config.dense_weight + config.sparse_weight;
|
||||
if total > 0.0 {
|
||||
config.dense_weight /= total;
|
||||
|
|
@ -151,21 +107,17 @@ impl HybridSearchConfig {
|
|||
config
|
||||
}
|
||||
|
||||
|
||||
pub fn use_sparse_search(&self) -> bool {
|
||||
self.bm25_enabled && self.sparse_weight > 0.0
|
||||
}
|
||||
|
||||
|
||||
pub fn use_dense_search(&self) -> bool {
|
||||
self.dense_weight > 0.0
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct SearchResult {
|
||||
|
||||
pub doc_id: String,
|
||||
|
||||
pub content: String,
|
||||
|
|
@ -179,7 +131,6 @@ pub struct SearchResult {
|
|||
pub search_method: SearchMethod,
|
||||
}
|
||||
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
|
||||
pub enum SearchMethod {
|
||||
Dense,
|
||||
|
|
@ -188,8 +139,6 @@ pub enum SearchMethod {
|
|||
Reranked,
|
||||
}
|
||||
|
||||
|
||||
|
||||
pub struct BM25Index {
|
||||
doc_freq: HashMap<String, usize>,
|
||||
doc_count: usize,
|
||||
|
|
@ -341,7 +290,6 @@ impl Default for BM25Index {
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct BM25Stats {
|
||||
pub doc_count: usize,
|
||||
|
|
@ -350,7 +298,6 @@ pub struct BM25Stats {
|
|||
pub enabled: bool,
|
||||
}
|
||||
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
struct DocumentEntry {
|
||||
pub content: String,
|
||||
|
|
@ -358,9 +305,7 @@ struct DocumentEntry {
|
|||
pub metadata: HashMap<String, String>,
|
||||
}
|
||||
|
||||
|
||||
pub struct HybridSearchEngine {
|
||||
|
||||
bm25_index: BM25Index,
|
||||
|
||||
documents: HashMap<String, DocumentEntry>,
|
||||
|
|
@ -391,7 +336,6 @@ impl HybridSearchEngine {
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
pub async fn index_document(
|
||||
&mut self,
|
||||
doc_id: &str,
|
||||
|
|
@ -400,10 +344,8 @@ impl HybridSearchEngine {
|
|||
metadata: HashMap<String, String>,
|
||||
embedding: Option<Vec<f32>>,
|
||||
) -> Result<(), String> {
|
||||
|
||||
self.bm25_index.add_document(doc_id, content, source);
|
||||
|
||||
|
||||
self.documents.insert(
|
||||
doc_id.to_string(),
|
||||
DocumentEntry {
|
||||
|
|
@ -413,7 +355,6 @@ impl HybridSearchEngine {
|
|||
},
|
||||
);
|
||||
|
||||
|
||||
if let Some(emb) = embedding {
|
||||
self.upsert_to_qdrant(doc_id, &emb).await?;
|
||||
}
|
||||
|
|
@ -421,12 +362,10 @@ impl HybridSearchEngine {
|
|||
Ok(())
|
||||
}
|
||||
|
||||
|
||||
pub fn commit(&mut self) -> Result<(), String> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
||||
pub async fn remove_document(&mut self, doc_id: &str) -> Result<(), String> {
|
||||
self.bm25_index.remove_document(doc_id);
|
||||
self.documents.remove(doc_id);
|
||||
|
|
@ -434,7 +373,6 @@ impl HybridSearchEngine {
|
|||
Ok(())
|
||||
}
|
||||
|
||||
|
||||
pub async fn search(
|
||||
&self,
|
||||
query: &str,
|
||||
|
|
@ -442,7 +380,6 @@ impl HybridSearchEngine {
|
|||
) -> Result<Vec<SearchResult>, String> {
|
||||
let fetch_count = self.config.max_results * 3;
|
||||
|
||||
|
||||
let sparse_results: Vec<(String, f32)> = if self.config.use_sparse_search() {
|
||||
self.bm25_index
|
||||
.search(query, fetch_count)
|
||||
|
|
@ -453,7 +390,6 @@ impl HybridSearchEngine {
|
|||
Vec::new()
|
||||
};
|
||||
|
||||
|
||||
let dense_results = if self.config.use_dense_search() {
|
||||
if let Some(embedding) = query_embedding {
|
||||
self.search_qdrant(&embedding, fetch_count).await?
|
||||
|
|
@ -464,7 +400,6 @@ impl HybridSearchEngine {
|
|||
Vec::new()
|
||||
};
|
||||
|
||||
|
||||
let (results, method) = if sparse_results.is_empty() && dense_results.is_empty() {
|
||||
(Vec::new(), SearchMethod::Hybrid)
|
||||
} else if sparse_results.is_empty() {
|
||||
|
|
@ -478,7 +413,6 @@ impl HybridSearchEngine {
|
|||
)
|
||||
};
|
||||
|
||||
|
||||
let mut search_results: Vec<SearchResult> = results
|
||||
.into_iter()
|
||||
.filter_map(|(doc_id, score)| {
|
||||
|
|
@ -495,7 +429,6 @@ impl HybridSearchEngine {
|
|||
.take(self.config.max_results)
|
||||
.collect();
|
||||
|
||||
|
||||
if self.config.reranker_enabled && !search_results.is_empty() {
|
||||
search_results = self.rerank(query, search_results).await?;
|
||||
}
|
||||
|
|
@ -503,7 +436,6 @@ impl HybridSearchEngine {
|
|||
Ok(search_results)
|
||||
}
|
||||
|
||||
|
||||
pub fn sparse_search(&self, query: &str) -> Vec<SearchResult> {
|
||||
let results = self.bm25_index.search(query, self.config.max_results);
|
||||
|
||||
|
|
@ -522,7 +454,6 @@ impl HybridSearchEngine {
|
|||
.collect()
|
||||
}
|
||||
|
||||
|
||||
pub async fn dense_search(
|
||||
&self,
|
||||
query_embedding: Vec<f32>,
|
||||
|
|
@ -548,7 +479,6 @@ impl HybridSearchEngine {
|
|||
Ok(search_results)
|
||||
}
|
||||
|
||||
|
||||
fn reciprocal_rank_fusion(
|
||||
&self,
|
||||
sparse: &[(String, f32)],
|
||||
|
|
@ -557,23 +487,19 @@ impl HybridSearchEngine {
|
|||
let k = self.config.rrf_k as f32;
|
||||
let mut scores: HashMap<String, f32> = HashMap::new();
|
||||
|
||||
|
||||
for (rank, (doc_id, _)) in sparse.iter().enumerate() {
|
||||
let rrf_score = self.config.sparse_weight / (k + rank as f32 + 1.0);
|
||||
*scores.entry(doc_id.clone()).or_insert(0.0) += rrf_score;
|
||||
}
|
||||
|
||||
|
||||
for (rank, (doc_id, _)) in dense.iter().enumerate() {
|
||||
let rrf_score = self.config.dense_weight / (k + rank as f32 + 1.0);
|
||||
*scores.entry(doc_id.clone()).or_insert(0.0) += rrf_score;
|
||||
}
|
||||
|
||||
|
||||
let mut results: Vec<(String, f32)> = scores.into_iter().collect();
|
||||
results.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
|
||||
|
||||
|
||||
let max_score = results.first().map(|(_, s)| *s).unwrap_or(0.0);
|
||||
if max_score > 0.0 {
|
||||
for (_, score) in &mut results {
|
||||
|
|
@ -584,19 +510,18 @@ impl HybridSearchEngine {
|
|||
results
|
||||
}
|
||||
|
||||
|
||||
async fn rerank(
|
||||
&self,
|
||||
query: &str,
|
||||
results: Vec<SearchResult>,
|
||||
) -> Result<Vec<SearchResult>, String> {
|
||||
|
||||
|
||||
let mut reranked = results;
|
||||
|
||||
let query_lower = query.to_lowercase();
|
||||
let query_terms: std::collections::HashSet<String> =
|
||||
query_lower.split_whitespace().map(|s| s.to_string()).collect();
|
||||
let query_terms: std::collections::HashSet<String> = query_lower
|
||||
.split_whitespace()
|
||||
.map(|s| s.to_string())
|
||||
.collect();
|
||||
let query_terms_len = query_terms.len();
|
||||
|
||||
for result in &mut reranked {
|
||||
|
|
@ -623,7 +548,6 @@ impl HybridSearchEngine {
|
|||
Ok(reranked)
|
||||
}
|
||||
|
||||
|
||||
async fn search_qdrant(
|
||||
&self,
|
||||
embedding: &[f32],
|
||||
|
|
@ -673,7 +597,6 @@ impl HybridSearchEngine {
|
|||
Ok(results)
|
||||
}
|
||||
|
||||
|
||||
async fn upsert_to_qdrant(&self, doc_id: &str, embedding: &[f32]) -> Result<(), String> {
|
||||
let client = reqwest::Client::new();
|
||||
|
||||
|
|
@ -702,7 +625,6 @@ impl HybridSearchEngine {
|
|||
Ok(())
|
||||
}
|
||||
|
||||
|
||||
async fn delete_from_qdrant(&self, doc_id: &str) -> Result<(), String> {
|
||||
let client = reqwest::Client::new();
|
||||
|
||||
|
|
@ -731,7 +653,6 @@ impl HybridSearchEngine {
|
|||
Ok(())
|
||||
}
|
||||
|
||||
|
||||
pub fn stats(&self) -> HybridSearchStats {
|
||||
let bm25_stats = self.bm25_index.stats();
|
||||
|
||||
|
|
@ -746,7 +667,6 @@ impl HybridSearchEngine {
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct HybridSearchStats {
|
||||
pub total_documents: usize,
|
||||
|
|
@ -757,7 +677,6 @@ pub struct HybridSearchStats {
|
|||
pub config: HybridSearchConfig,
|
||||
}
|
||||
|
||||
|
||||
pub struct QueryDecomposer {
|
||||
llm_endpoint: String,
|
||||
api_key: String,
|
||||
|
|
@ -771,11 +690,9 @@ impl QueryDecomposer {
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
pub async fn decompose(&self, query: &str) -> Result<Vec<String>, String> {
|
||||
let mut sub_queries = Vec::new();
|
||||
|
||||
|
||||
let conjunctions = ["and", "also", "as well as", "in addition to"];
|
||||
let mut parts: Vec<&str> = vec![query];
|
||||
|
||||
|
|
@ -821,7 +738,6 @@ impl QueryDecomposer {
|
|||
Ok(sub_queries)
|
||||
}
|
||||
|
||||
|
||||
pub fn synthesize(&self, query: &str, sub_answers: &[String]) -> String {
|
||||
if sub_answers.len() == 1 {
|
||||
return sub_answers[0].clone();
|
||||
|
|
|
|||
|
|
@ -1,6 +1,5 @@
|
|||
use anyhow::Result;
|
||||
use chrono::{DateTime, Utc};
|
||||
use diesel::RunQueryDsl;
|
||||
use log::{error, info, warn};
|
||||
use std::collections::HashMap;
|
||||
use std::path::PathBuf;
|
||||
|
|
@ -50,7 +49,6 @@ impl UserWorkspace {
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub enum IndexingStatus {
|
||||
Idle,
|
||||
|
|
@ -59,7 +57,6 @@ pub enum IndexingStatus {
|
|||
Failed(String),
|
||||
}
|
||||
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct IndexingStats {
|
||||
pub emails_indexed: u64,
|
||||
|
|
@ -70,7 +67,6 @@ pub struct IndexingStats {
|
|||
pub errors: u64,
|
||||
}
|
||||
|
||||
|
||||
struct UserIndexingJob {
|
||||
user_id: Uuid,
|
||||
bot_id: Uuid,
|
||||
|
|
@ -83,7 +79,6 @@ struct UserIndexingJob {
|
|||
status: IndexingStatus,
|
||||
}
|
||||
|
||||
|
||||
pub struct VectorDBIndexer {
|
||||
db_pool: DbPool,
|
||||
work_root: PathBuf,
|
||||
|
|
@ -96,7 +91,6 @@ pub struct VectorDBIndexer {
|
|||
}
|
||||
|
||||
impl VectorDBIndexer {
|
||||
|
||||
pub fn new(
|
||||
db_pool: DbPool,
|
||||
work_root: PathBuf,
|
||||
|
|
@ -115,7 +109,6 @@ impl VectorDBIndexer {
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
pub async fn start(self: Arc<Self>) -> Result<()> {
|
||||
let mut running = self.running.write().await;
|
||||
if *running {
|
||||
|
|
@ -135,17 +128,14 @@ impl VectorDBIndexer {
|
|||
Ok(())
|
||||
}
|
||||
|
||||
|
||||
pub async fn stop(&self) {
|
||||
let mut running = self.running.write().await;
|
||||
*running = false;
|
||||
info!("🛑 Stopping Vector DB Indexer");
|
||||
}
|
||||
|
||||
|
||||
async fn run_indexing_loop(self: Arc<Self>) {
|
||||
loop {
|
||||
|
||||
{
|
||||
let running = self.running.read().await;
|
||||
if !*running {
|
||||
|
|
@ -155,7 +145,6 @@ impl VectorDBIndexer {
|
|||
|
||||
info!(" Running vector DB indexing cycle...");
|
||||
|
||||
|
||||
match self.get_active_users().await {
|
||||
Ok(users) => {
|
||||
info!("Found {} active users to index", users.len());
|
||||
|
|
@ -173,14 +162,12 @@ impl VectorDBIndexer {
|
|||
|
||||
info!(" Indexing cycle complete");
|
||||
|
||||
|
||||
sleep(Duration::from_secs(self.interval_seconds)).await;
|
||||
}
|
||||
|
||||
info!("Vector DB Indexer stopped");
|
||||
}
|
||||
|
||||
|
||||
async fn get_active_users(&self) -> Result<Vec<(Uuid, Uuid)>> {
|
||||
let conn = self.db_pool.clone();
|
||||
|
||||
|
|
@ -190,7 +177,6 @@ impl VectorDBIndexer {
|
|||
|
||||
let mut db_conn = conn.get()?;
|
||||
|
||||
|
||||
let results: Vec<(Uuid, Uuid)> = user_sessions
|
||||
.select((user_id, bot_id))
|
||||
.distinct()
|
||||
|
|
@ -201,11 +187,9 @@ impl VectorDBIndexer {
|
|||
.await?
|
||||
}
|
||||
|
||||
|
||||
async fn index_user_data(&self, user_id: Uuid, bot_id: Uuid) -> Result<()> {
|
||||
info!("Indexing user: {} (bot: {})", user_id, bot_id);
|
||||
|
||||
|
||||
let mut jobs = self.jobs.write().await;
|
||||
let job = jobs.entry(user_id).or_insert_with(|| {
|
||||
let workspace = UserWorkspace::new(self.work_root.clone(), &bot_id, &user_id);
|
||||
|
|
@ -235,7 +219,6 @@ impl VectorDBIndexer {
|
|||
|
||||
job.status = IndexingStatus::Running;
|
||||
|
||||
|
||||
if job.email_db.is_none() {
|
||||
let mut email_db =
|
||||
UserEmailVectorDB::new(user_id, bot_id, job.workspace.email_vectordb().into());
|
||||
|
|
@ -264,17 +247,14 @@ impl VectorDBIndexer {
|
|||
|
||||
drop(jobs);
|
||||
|
||||
|
||||
if let Err(e) = self.index_user_emails(user_id).await {
|
||||
error!("Failed to index emails for user {}: {}", user_id, e);
|
||||
}
|
||||
|
||||
|
||||
if let Err(e) = self.index_user_files(user_id).await {
|
||||
error!("Failed to index files for user {}: {}", user_id, e);
|
||||
}
|
||||
|
||||
|
||||
let mut jobs = self.jobs.write().await;
|
||||
if let Some(job) = jobs.get_mut(&user_id) {
|
||||
job.status = IndexingStatus::Idle;
|
||||
|
|
@ -284,7 +264,6 @@ impl VectorDBIndexer {
|
|||
Ok(())
|
||||
}
|
||||
|
||||
|
||||
async fn index_user_emails(&self, user_id: Uuid) -> Result<()> {
|
||||
let jobs = self.jobs.read().await;
|
||||
let job = jobs
|
||||
|
|
@ -299,7 +278,6 @@ impl VectorDBIndexer {
|
|||
}
|
||||
};
|
||||
|
||||
|
||||
let accounts = self.get_user_email_accounts(user_id).await?;
|
||||
|
||||
info!(
|
||||
|
|
@ -309,7 +287,6 @@ impl VectorDBIndexer {
|
|||
);
|
||||
|
||||
for account_id in accounts {
|
||||
|
||||
match self.get_unindexed_emails(user_id, &account_id).await {
|
||||
Ok(emails) => {
|
||||
if emails.is_empty() {
|
||||
|
|
@ -322,7 +299,6 @@ impl VectorDBIndexer {
|
|||
account_id
|
||||
);
|
||||
|
||||
|
||||
for chunk in emails.chunks(self.batch_size) {
|
||||
for email in chunk {
|
||||
match self.embedding_generator.generate_embedding(&email).await {
|
||||
|
|
@ -342,7 +318,6 @@ impl VectorDBIndexer {
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
sleep(Duration::from_millis(100)).await;
|
||||
}
|
||||
}
|
||||
|
|
@ -358,7 +333,6 @@ impl VectorDBIndexer {
|
|||
Ok(())
|
||||
}
|
||||
|
||||
|
||||
async fn index_user_files(&self, user_id: Uuid) -> Result<()> {
|
||||
let jobs = self.jobs.read().await;
|
||||
let job = jobs
|
||||
|
|
@ -373,7 +347,6 @@ impl VectorDBIndexer {
|
|||
}
|
||||
};
|
||||
|
||||
|
||||
match self.get_unindexed_files(user_id).await {
|
||||
Ok(files) => {
|
||||
if files.is_empty() {
|
||||
|
|
@ -382,16 +355,13 @@ impl VectorDBIndexer {
|
|||
|
||||
info!("Indexing {} files for user {}", files.len(), user_id);
|
||||
|
||||
|
||||
for chunk in files.chunks(self.batch_size) {
|
||||
for file in chunk {
|
||||
|
||||
let mime_type = file.mime_type.as_ref().map(|s| s.as_str()).unwrap_or("");
|
||||
if !FileContentExtractor::should_index(&mime_type, file.file_size) {
|
||||
continue;
|
||||
}
|
||||
|
||||
|
||||
let text = format!(
|
||||
"File: {}\nType: {}\n\n{}",
|
||||
file.file_name, file.file_type, file.content_text
|
||||
|
|
@ -415,7 +385,6 @@ impl VectorDBIndexer {
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
sleep(Duration::from_millis(100)).await;
|
||||
}
|
||||
}
|
||||
|
|
@ -427,7 +396,6 @@ impl VectorDBIndexer {
|
|||
Ok(())
|
||||
}
|
||||
|
||||
|
||||
async fn get_user_email_accounts(&self, user_id: Uuid) -> Result<Vec<String>> {
|
||||
let conn = self.db_pool.clone();
|
||||
|
||||
|
|
@ -490,7 +458,7 @@ impl VectorDBIndexer {
|
|||
folder: String,
|
||||
}
|
||||
|
||||
let query = r#"
|
||||
let query = r"
|
||||
SELECT e.id, e.message_id, e.subject, e.from_address, e.to_addresses,
|
||||
e.body_text, e.body_html, e.received_at, e.folder
|
||||
FROM emails e
|
||||
|
|
@ -500,7 +468,7 @@ impl VectorDBIndexer {
|
|||
AND (eis.indexed_at IS NULL OR eis.needs_reindex = true)
|
||||
ORDER BY e.received_at DESC
|
||||
LIMIT 100
|
||||
"#;
|
||||
";
|
||||
|
||||
let rows: Vec<EmailRow> = diesel::sql_query(query)
|
||||
.bind::<diesel::sql_types::Uuid, _>(user_id)
|
||||
|
|
@ -510,20 +478,18 @@ impl VectorDBIndexer {
|
|||
|
||||
let emails: Vec<EmailDocument> = rows
|
||||
.into_iter()
|
||||
.map(|row| {
|
||||
EmailDocument {
|
||||
id: row.id.to_string(),
|
||||
account_id: account_id.clone(),
|
||||
from_email: row.from_address.clone(),
|
||||
from_name: row.from_address,
|
||||
to_email: row.to_addresses,
|
||||
subject: row.subject,
|
||||
body_text: row.body_text.unwrap_or_default(),
|
||||
date: row.received_at,
|
||||
folder: row.folder,
|
||||
has_attachments: false,
|
||||
thread_id: None,
|
||||
}
|
||||
.map(|row| EmailDocument {
|
||||
id: row.id.to_string(),
|
||||
account_id: account_id.clone(),
|
||||
from_email: row.from_address.clone(),
|
||||
from_name: row.from_address,
|
||||
to_email: row.to_addresses,
|
||||
subject: row.subject,
|
||||
body_text: row.body_text.unwrap_or_default(),
|
||||
date: row.received_at,
|
||||
folder: row.folder,
|
||||
has_attachments: false,
|
||||
thread_id: None,
|
||||
})
|
||||
.collect();
|
||||
|
||||
|
|
@ -566,17 +532,16 @@ impl VectorDBIndexer {
|
|||
modified_at: DateTime<Utc>,
|
||||
}
|
||||
|
||||
let query = r#"
|
||||
let query = r"
|
||||
SELECT f.id, f.file_path, f.file_name, f.file_type, f.file_size,
|
||||
f.bucket, f.mime_type, f.created_at, f.modified_at
|
||||
FROM files f
|
||||
FROM user_files f
|
||||
LEFT JOIN file_index_status fis ON f.id = fis.file_id
|
||||
WHERE f.user_id = $1
|
||||
AND (fis.indexed_at IS NULL OR fis.needs_reindex = true)
|
||||
AND f.file_size < 10485760
|
||||
ORDER BY f.modified_at DESC
|
||||
LIMIT 100
|
||||
"#;
|
||||
";
|
||||
|
||||
let rows: Vec<FileRow> = diesel::sql_query(query)
|
||||
.bind::<diesel::sql_types::Uuid, _>(user_id)
|
||||
|
|
@ -585,22 +550,20 @@ impl VectorDBIndexer {
|
|||
|
||||
let files: Vec<FileDocument> = rows
|
||||
.into_iter()
|
||||
.map(|row| {
|
||||
FileDocument {
|
||||
id: row.id.to_string(),
|
||||
file_path: row.file_path,
|
||||
file_name: row.file_name,
|
||||
file_type: row.file_type,
|
||||
file_size: row.file_size as u64,
|
||||
bucket: row.bucket,
|
||||
content_text: String::new(),
|
||||
content_summary: None,
|
||||
created_at: row.created_at,
|
||||
modified_at: row.modified_at,
|
||||
indexed_at: Utc::now(),
|
||||
mime_type: row.mime_type,
|
||||
tags: Vec::new(),
|
||||
}
|
||||
.map(|row| FileDocument {
|
||||
id: row.id.to_string(),
|
||||
file_path: row.file_path,
|
||||
file_name: row.file_name,
|
||||
file_type: row.file_type,
|
||||
file_size: row.file_size as u64,
|
||||
bucket: row.bucket,
|
||||
content_text: String::new(),
|
||||
content_summary: None,
|
||||
created_at: row.created_at,
|
||||
modified_at: row.modified_at,
|
||||
indexed_at: Utc::now(),
|
||||
mime_type: row.mime_type,
|
||||
tags: Vec::new(),
|
||||
})
|
||||
.collect();
|
||||
|
||||
|
|
@ -611,13 +574,11 @@ impl VectorDBIndexer {
|
|||
Ok(results)
|
||||
}
|
||||
|
||||
|
||||
pub async fn get_user_stats(&self, user_id: Uuid) -> Option<IndexingStats> {
|
||||
let jobs = self.jobs.read().await;
|
||||
jobs.get(&user_id).map(|job| job.stats.clone())
|
||||
}
|
||||
|
||||
|
||||
pub async fn get_overall_stats(&self) -> IndexingStats {
|
||||
let jobs = self.jobs.read().await;
|
||||
|
||||
|
|
@ -647,7 +608,6 @@ impl VectorDBIndexer {
|
|||
total_stats
|
||||
}
|
||||
|
||||
|
||||
pub async fn pause_user_indexing(&self, user_id: Uuid) -> Result<()> {
|
||||
let mut jobs = self.jobs.write().await;
|
||||
if let Some(job) = jobs.get_mut(&user_id) {
|
||||
|
|
@ -657,7 +617,6 @@ impl VectorDBIndexer {
|
|||
Ok(())
|
||||
}
|
||||
|
||||
|
||||
pub async fn resume_user_indexing(&self, user_id: Uuid) -> Result<()> {
|
||||
let mut jobs = self.jobs.write().await;
|
||||
if let Some(job) = jobs.get_mut(&user_id) {
|
||||
|
|
@ -667,7 +626,6 @@ impl VectorDBIndexer {
|
|||
Ok(())
|
||||
}
|
||||
|
||||
|
||||
pub async fn trigger_user_indexing(&self, user_id: Uuid, bot_id: Uuid) -> Result<()> {
|
||||
info!(" Triggering immediate indexing for user {}", user_id);
|
||||
self.index_user_data(user_id, bot_id).await
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue