Fix KB indexing: upsert kb_collections, consistent collection names, preserve indexed flag
All checks were successful
BotServer CI/CD / build (push) Successful in 3m23s
All checks were successful
BotServer CI/CD / build (push) Successful in 3m23s
- Bug 1: check_gbkb_changes now preserves indexed=true from previous state when etag matches, preventing redundant re-indexing every cycle - Bug 2: USE KB fallback uses bot_id_short (8 chars) instead of random UUID, matching the collection name convention used by DriveMonitor - Bug 3: handle_gbkb_change now upserts into kb_collections table after successful indexing, so USE KB can find the collection at runtime - Changed ON CONFLICT DO NOTHING to DO UPDATE for kb_collections inserts - Changed process_gbkb_folder return type to Result<IndexingResult>
This commit is contained in:
parent
e81aee6221
commit
7a1ec157f1
4 changed files with 61 additions and 9 deletions
|
|
@ -220,18 +220,21 @@ fn add_kb_to_session(
|
|||
(kb_result.folder_path, kb_result.qdrant_collection)
|
||||
} else {
|
||||
let default_path = format!("work/{}/{}.gbkb/{}", bot_name, bot_name, kb_name);
|
||||
let bot_id_short: String = bot_id.to_string().chars().take(8).collect();
|
||||
let default_collection = format!("{}_{}_{}", bot_name, bot_id_short, kb_name);
|
||||
let kb_id = Uuid::new_v4();
|
||||
let default_collection = format!("{}_{}_{}", bot_name, kb_id, kb_name);
|
||||
|
||||
warn!(
|
||||
"KB '{}' not found in kb_collections for bot {}. Using default path: {}",
|
||||
kb_name, bot_name, default_path
|
||||
"KB '{}' not found in kb_collections for bot {}. Using default path: {}, collection: {}",
|
||||
kb_name, bot_name, default_path, default_collection
|
||||
);
|
||||
|
||||
diesel::sql_query(
|
||||
"INSERT INTO kb_collections (id, bot_id, name, folder_path, qdrant_collection, document_count)
|
||||
VALUES ($1, $2, $3, $4, $5, 0)
|
||||
ON CONFLICT (bot_id, name) DO NOTHING"
|
||||
ON CONFLICT (bot_id, name) DO UPDATE SET
|
||||
folder_path = EXCLUDED.folder_path,
|
||||
qdrant_collection = EXCLUDED.qdrant_collection"
|
||||
)
|
||||
.bind::<diesel::sql_types::Uuid, _>(kb_id)
|
||||
.bind::<diesel::sql_types::Uuid, _>(bot_id)
|
||||
|
|
|
|||
|
|
@ -941,7 +941,7 @@ impl KbFolderMonitor {
|
|||
Self { indexer, work_root }
|
||||
}
|
||||
|
||||
pub async fn process_gbkb_folder(&self, bot_id: Uuid, bot_name: &str, kb_folder: &Path) -> Result<()> {
|
||||
pub async fn process_gbkb_folder(&self, bot_id: Uuid, bot_name: &str, kb_folder: &Path) -> Result<IndexingResult> {
|
||||
let kb_name = kb_folder
|
||||
.file_name()
|
||||
.and_then(|n| n.to_str())
|
||||
|
|
@ -965,6 +965,6 @@ impl KbFolderMonitor {
|
|||
result.documents_processed, result.chunks_indexed, result.collection_name
|
||||
);
|
||||
|
||||
Ok(())
|
||||
Ok(result)
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -9,11 +9,12 @@ pub use document_processor::{DocumentFormat, DocumentProcessor, TextChunk};
|
|||
pub use embedding_generator::{
|
||||
EmailEmbeddingGenerator, EmbeddingConfig, EmbeddingGenerator, KbEmbeddingGenerator,
|
||||
};
|
||||
pub use kb_indexer::{CollectionInfo, KbFolderMonitor, KbIndexer, QdrantConfig, SearchResult};
|
||||
pub use kb_indexer::{CollectionInfo, IndexingResult, KbFolderMonitor, KbIndexer, QdrantConfig, SearchResult};
|
||||
pub use web_crawler::{WebCrawler, WebPage, WebsiteCrawlConfig};
|
||||
pub use website_crawler_service::{ensure_crawler_service_running, WebsiteCrawlerService};
|
||||
|
||||
use anyhow::Result;
|
||||
use diesel::prelude::*;
|
||||
use log::{error, info, warn};
|
||||
use std::path::Path;
|
||||
use std::sync::Arc;
|
||||
|
|
@ -172,7 +173,50 @@ impl KnowledgeBaseManager {
|
|||
);
|
||||
|
||||
let monitor = self.monitor.read().await;
|
||||
monitor.process_gbkb_folder(bot_id, bot_name, kb_folder).await
|
||||
let result = monitor.process_gbkb_folder(bot_id, bot_name, kb_folder).await?;
|
||||
|
||||
let kb_name = kb_folder
|
||||
.file_name()
|
||||
.and_then(|n| n.to_str())
|
||||
.unwrap_or("unknown");
|
||||
let collection_name = result.collection_name.clone();
|
||||
let folder_path = kb_folder.to_string_lossy().to_string();
|
||||
let doc_count = result.documents_processed;
|
||||
|
||||
if let Some(pool) = self.indexer.get_db_pool() {
|
||||
if let Ok(mut conn) = pool.get() {
|
||||
diesel::sql_query(
|
||||
"INSERT INTO kb_collections (id, bot_id, name, folder_path, qdrant_collection, document_count)
|
||||
VALUES ($1, $2, $3, $4, $5, $6)
|
||||
ON CONFLICT (bot_id, name) DO UPDATE SET
|
||||
folder_path = EXCLUDED.folder_path,
|
||||
qdrant_collection = EXCLUDED.qdrant_collection,
|
||||
document_count = EXCLUDED.document_count,
|
||||
updated_at = NOW()"
|
||||
)
|
||||
.bind::<diesel::sql_types::Uuid, _>(Uuid::new_v4())
|
||||
.bind::<diesel::sql_types::Uuid, _>(bot_id)
|
||||
.bind::<diesel::sql_types::Text, _>(kb_name)
|
||||
.bind::<diesel::sql_types::Text, _>(&folder_path)
|
||||
.bind::<diesel::sql_types::Text, _>(&collection_name)
|
||||
.bind::<diesel::sql_types::Integer, _>(doc_count as i32)
|
||||
.execute(&mut conn)
|
||||
.map_err(|e| {
|
||||
error!("Failed to upsert kb_collections for {}/{}: {}", bot_name, kb_name, e);
|
||||
e
|
||||
})?;
|
||||
info!(
|
||||
"Upserted kb_collections: bot={}/{}, kb={}, collection={}, docs={}",
|
||||
bot_name, bot_id, kb_name, collection_name, doc_count
|
||||
);
|
||||
} else {
|
||||
warn!("No DB connection available to upsert kb_collections for {}/{}", bot_name, kb_name);
|
||||
}
|
||||
} else {
|
||||
warn!("No DB pool available to upsert kb_collections for {}/{}", bot_name, kb_name);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn clear_kb(&self, bot_id: Uuid, bot_name: &str, kb_name: &str) -> Result<()> {
|
||||
|
|
|
|||
|
|
@ -1373,7 +1373,12 @@ impl DriveMonitor {
|
|||
files_processed, pdf_files_found
|
||||
);
|
||||
}
|
||||
for (path, state) in current_files {
|
||||
for (path, mut state) in current_files {
|
||||
if let Some(previous) = file_states.get(&path) {
|
||||
if previous.indexed && state.etag == previous.etag {
|
||||
state.indexed = true;
|
||||
}
|
||||
}
|
||||
file_states.insert(path, state);
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue