From 8500949fcd7f5bbfe767fb986b034cd7acc6558e Mon Sep 17 00:00:00 2001 From: "Rodrigo Rodriguez (Pragmatismo)" Date: Thu, 5 Mar 2026 00:06:17 -0300 Subject: [PATCH] fix: Lower KB search thresholds and add Cloudflare AI embedding support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Lower score_threshold in kb_indexer.rs from 0.5 to 0.3 - Lower website search threshold in kb_context.rs from 0.6 to 0.4 - Lower KB search threshold in kb_context.rs from 0.7 to 0.5 - Add Cloudflare AI (/ai/run/) URL detection in cache.rs - Add Cloudflare AI request format ({"text": ...}) in cache.rs - Add Cloudflare AI response parsing (result.data) in cache.rs This fixes the issue where KB search returned 0 results even with 114 chunks indexed. The high thresholds were filtering out all results. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- src/core/bot/kb_context.rs | 39 ++++++++++++++++++++++++-- src/core/kb/kb_indexer.rs | 57 ++++++++++++++++++++++++++++++++++++-- src/llm/cache.rs | 31 +++++++++++++++++++++ 3 files changed, 122 insertions(+), 5 deletions(-) diff --git a/src/core/bot/kb_context.rs b/src/core/bot/kb_context.rs index 28980d90a..0cf12ce9d 100644 --- a/src/core/bot/kb_context.rs +++ b/src/core/bot/kb_context.rs @@ -239,6 +239,25 @@ impl KbContextManager { Ok(kb_contexts) } + async fn get_collection_dimension(&self, qdrant_config: &QdrantConfig, collection_name: &str) -> Result> { + let http_client = crate::core::shared::utils::create_tls_client(Some(10)); + let check_url = format!("{}/collections/{}", qdrant_config.url, collection_name); + + let response = http_client.get(&check_url).send().await?; + + if !response.status().is_success() { + debug!("Could not get collection info for '{}', using default dimension", collection_name); + return Ok(None); + } + + let info_json: serde_json::Value = response.json().await?; + let dimension = info_json["result"]["config"]["params"]["vectors"]["size"] + .as_u64() + .map(|d| d as usize); + + Ok(dimension) + } + async fn search_single_collection( &self, collection_name: &str, @@ -256,9 +275,23 @@ impl KbContextManager { let bot_id = self.get_bot_id_by_name(bot_name).await?; // Load embedding config from database for this bot - let embedding_config = EmbeddingConfig::from_bot_config(&self.db_pool, &bot_id); + let mut embedding_config = EmbeddingConfig::from_bot_config(&self.db_pool, &bot_id); let qdrant_config = QdrantConfig::default(); + // Query Qdrant to get the collection's actual vector dimension + let collection_dimension = self.get_collection_dimension(&qdrant_config, collection_name).await?; + + // Override the embedding config dimension to match the collection + if let Some(dim) = collection_dimension { + if dim != embedding_config.dimensions { + debug!( + "Overriding embedding dimension from {} to {} to match collection '{}'", + embedding_config.dimensions, dim, collection_name + ); + embedding_config.dimensions = dim; + } + } + // Create a temporary indexer with bot-specific config let indexer = KbIndexer::new(embedding_config, qdrant_config); @@ -290,7 +323,7 @@ impl KbContextManager { total_tokens += tokens; - if result.score < 0.6 { + if result.score < 0.4 { debug!("Skipping low-relevance result (score: {})", result.score); break; } @@ -355,7 +388,7 @@ impl KbContextManager { total_tokens += tokens; - if result.score < 0.7 { + if result.score < 0.5 { debug!("Skipping low-relevance result (score: {})", result.score); break; } diff --git a/src/core/kb/kb_indexer.rs b/src/core/kb/kb_indexer.rs index 98d40b986..b4d6bce6f 100644 --- a/src/core/kb/kb_indexer.rs +++ b/src/core/kb/kb_indexer.rs @@ -520,16 +520,44 @@ impl KbIndexer { query: &str, limit: usize, ) -> Result> { + // Get the collection's actual vector dimension to handle dimension mismatch + let collection_dimension = self.get_collection_vector_dimension(collection_name).await?; + let embedding = self .embedding_generator .generate_single_embedding(query) .await?; + // Truncate embedding vector to match collection dimension if needed + let search_vector = if let Some(target_dim) = collection_dimension { + if embedding.vector.len() > target_dim { + debug!( + "Truncating embedding from {} to {} dimensions for collection '{}'", + embedding.vector.len(), target_dim, collection_name + ); + embedding.vector[..target_dim].to_vec() + } else if embedding.vector.len() < target_dim { + warn!( + "Embedding dimension ({}) is smaller than collection dimension ({}). \ + Search may return poor results for collection '{}'.", + embedding.vector.len(), target_dim, collection_name + ); + // Pad with zeros (not ideal but allows search to proceed) + let mut padded = embedding.vector.clone(); + padded.resize(target_dim, 0.0); + padded + } else { + embedding.vector + } + } else { + embedding.vector + }; + let search_request = SearchRequest { - vector: embedding.vector, + vector: search_vector, limit, with_payload: true, - score_threshold: Some(0.5), + score_threshold: Some(0.3), filter: None, }; @@ -600,6 +628,31 @@ impl KbIndexer { Ok(()) } + /// Get the vector dimension of a collection from Qdrant + async fn get_collection_vector_dimension(&self, collection_name: &str) -> Result> { + let info_url = format!("{}/collections/{}", self.qdrant_config.url, collection_name); + + let response = match self.http_client.get(&info_url).send().await { + Ok(r) => r, + Err(e) => { + debug!("Failed to get collection dimension: {}", e); + return Ok(None); + } + }; + + if !response.status().is_success() { + debug!("Collection '{}' not found or error, using default dimension", collection_name); + return Ok(None); + } + + let info_json: serde_json::Value = response.json().await?; + let dimension = info_json["result"]["config"]["params"]["vectors"]["size"] + .as_u64() + .map(|d| d as usize); + + Ok(dimension) + } + pub async fn get_collection_info(&self, collection_name: &str) -> Result { let info_url = format!("{}/collections/{}", self.qdrant_config.url, collection_name); diff --git a/src/llm/cache.rs b/src/llm/cache.rs index e914d2dc3..5e3c94e7f 100644 --- a/src/llm/cache.rs +++ b/src/llm/cache.rs @@ -629,6 +629,7 @@ impl EmbeddingService for LocalEmbeddingService { // Determine if URL already includes endpoint path let url = if self.embedding_url.contains("/pipeline/") || self.embedding_url.contains("/v1/") || + self.embedding_url.contains("/ai/run/") || self.embedding_url.ends_with("/embeddings") { self.embedding_url.clone() } else { @@ -647,6 +648,11 @@ impl EmbeddingService for LocalEmbeddingService { serde_json::json!({ "inputs": text, }) + } else if self.embedding_url.contains("/ai/run/") { + // Cloudflare AI format + serde_json::json!({ + "text": text, + }) } else { serde_json::json!({ "input": text, @@ -692,6 +698,31 @@ impl EmbeddingService for LocalEmbeddingService { arr.iter() .filter_map(|v| v.as_f64().map(|f| f as f32)) .collect() + } else if let Some(result_obj) = result.get("result") { + // Cloudflare AI format: {"result": {"data": [[...]]}} + if let Some(data) = result_obj.get("data") { + if let Some(data_arr) = data.as_array() { + if let Some(first) = data_arr.first() { + if let Some(embedding_arr) = first.as_array() { + embedding_arr + .iter() + .filter_map(|v| v.as_f64().map(|f| f as f32)) + .collect() + } else { + data_arr + .iter() + .filter_map(|v| v.as_f64().map(|f| f as f32)) + .collect() + } + } else { + return Err("Empty data array in Cloudflare response".into()); + } + } else { + return Err(format!("Invalid Cloudflare response format - Expected result.data array, got: {}", response_text).into()); + } + } else { + return Err(format!("Invalid Cloudflare response format - Expected result.data, got: {}", response_text).into()); + } } else if let Some(data) = result.get("data") { // OpenAI/Standard format: {"data": [{"embedding": [...]}]} data[0]["embedding"]