From f062cc4a3e67b56908176af8daa9a3aa6f5abcea Mon Sep 17 00:00:00 2001 From: "Rodrigo Rodriguez (Pragmatismo)" Date: Mon, 16 Mar 2026 08:15:21 -0300 Subject: [PATCH] Add THINK KB keyword for explicit knowledge base reasoning - Implement think_kb.rs with structured search results - Add keyword registration in mod.rs files - Include confidence scoring and multi-KB support --- src/basic/keywords/mod.rs | 3 + src/basic/keywords/think_kb.rs | 316 +++++++++++++++++++++++++++++++++ src/basic/mod.rs | 3 + 3 files changed, 322 insertions(+) create mode 100644 src/basic/keywords/think_kb.rs diff --git a/src/basic/keywords/mod.rs b/src/basic/keywords/mod.rs index 71c1572d..d563b404 100644 --- a/src/basic/keywords/mod.rs +++ b/src/basic/keywords/mod.rs @@ -126,6 +126,8 @@ pub mod clear_kb; #[cfg(feature = "vectordb")] pub mod kb_statistics; #[cfg(feature = "vectordb")] +pub mod think_kb; +#[cfg(feature = "vectordb")] pub mod use_kb; // ===== DRIVE FEATURE KEYWORDS ===== @@ -265,6 +267,7 @@ pub fn get_all_keywords() -> Vec { "SET USER MEMORY".to_string(), "USER FACTS".to_string(), "CLEAR KB".to_string(), + "THINK KB".to_string(), "USE KB".to_string(), "USE ACCOUNT".to_string(), "USE WEBSITE".to_string(), diff --git a/src/basic/keywords/think_kb.rs b/src/basic/keywords/think_kb.rs new file mode 100644 index 00000000..d84b9ac7 --- /dev/null +++ b/src/basic/keywords/think_kb.rs @@ -0,0 +1,316 @@ +//! THINK KB keyword implementation for knowledge base reasoning +//! +//! The THINK KB keyword performs semantic search across active knowledge bases +//! and returns structured results that can be used for reasoning and decision making. +//! +//! Usage in .bas files: +//! results = THINK KB "What is the company policy on remote work?" +//! results = THINK KB query_variable +//! +//! Returns a structured object with: +//! - results: Array of search results with content, source, and relevance +//! - summary: Brief summary of findings +//! - confidence: Overall confidence score (0.0 to 1.0) + +use crate::core::bot::kb_context::KbContextManager; +use crate::core::kb::KnowledgeBaseManager; +use crate::core::shared::models::UserSession; +use crate::core::shared::state::AppState; +use log::{debug, error, info, warn}; +use rhai::{Dynamic, Engine, EvalAltResult, Map}; +use serde_json::json; +use std::sync::Arc; + +/// Registers the THINK KB keyword with the Rhai engine +pub fn register_think_kb_keyword( + engine: &mut Engine, + state: Arc, + session: Arc, +) -> Result<(), Box> { + let state_clone = Arc::clone(&state); + let session_clone = Arc::clone(&session); + + engine.register_custom_syntax(["THINK", "KB", "$expr$"], true, move |context, inputs| { + let query = context.eval_expression_tree(&inputs[0])?.to_string(); + + info!( + "THINK KB keyword executed - Query: '{}', Session: {}", + query, session_clone.id + ); + + let session_id = session_clone.id; + let bot_name = session_clone.bot_name.clone(); + let kb_manager = match &state_clone.kb_manager { + Some(manager) => Arc::clone(manager), + None => { + error!("KB manager not available"); + return Err("KB manager not initialized".into()); + } + }; + let db_pool = state_clone.conn.clone(); + + // Execute KB search in blocking thread + let result = std::thread::spawn(move || { + tokio::runtime::Handle::current().block_on(async { + think_kb_search(kb_manager, db_pool, session_id, &bot_name, &query).await + }) + }) + .join(); + + match result { + Ok(Ok(search_result)) => { + info!( + "THINK KB completed - Found {} results with confidence {:.2}", + search_result.get("results") + .and_then(|r| r.as_array()) + .map(|a| a.len()) + .unwrap_or(0), + search_result.get("confidence") + .and_then(|c| c.as_f64()) + .unwrap_or(0.0) + ); + + // Convert JSON to Rhai Dynamic + Ok(json_to_dynamic(search_result)) + } + Ok(Err(e)) => { + error!("THINK KB search failed: {}", e); + Err(format!("THINK KB failed: {}", e).into()) + } + Err(e) => { + error!("THINK KB thread panic: {:?}", e); + Err("THINK KB failed: thread panic".into()) + } + } + })?; + + Ok(()) +} + +/// Performs the actual KB search and reasoning +async fn think_kb_search( + kb_manager: Arc, + db_pool: crate::core::shared::utils::DbPool, + session_id: uuid::Uuid, + bot_name: &str, + query: &str, +) -> Result { + let context_manager = KbContextManager::new(kb_manager, db_pool); + + // Search active KBs with reasonable limits + let kb_contexts = context_manager + .search_active_kbs(session_id, bot_name, query, 10, 2000) + .await + .map_err(|e| format!("KB search failed: {}", e))?; + + if kb_contexts.is_empty() { + warn!("No active KBs found for session {}", session_id); + return Ok(json!({ + "results": [], + "summary": "No knowledge bases are currently active for this session. Use 'USE KB ' to activate a knowledge base.", + "confidence": 0.0, + "total_results": 0, + "sources": [] + })); + } + + let mut all_results = Vec::new(); + let mut sources = std::collections::HashSet::new(); + let mut total_score = 0.0; + let mut result_count = 0; + + // Process results from all KBs + for kb_context in &kb_contexts { + for search_result in &kb_context.search_results { + all_results.push(json!({ + "content": search_result.content, + "source": search_result.document_path, + "kb_name": kb_context.kb_name, + "relevance": search_result.score, + "tokens": search_result.chunk_tokens + })); + + sources.insert(search_result.document_path.clone()); + total_score += search_result.score as f64; + result_count += 1; + } + } + + // Calculate overall confidence based on average relevance and result count + let avg_relevance = if result_count > 0 { + total_score / result_count as f64 + } else { + 0.0 + }; + + // Confidence factors: relevance score, number of results, source diversity + let confidence = calculate_confidence(avg_relevance, result_count, sources.len()); + + // Generate summary based on results + let summary = generate_summary(&all_results, query); + + let response = json!({ + "results": all_results, + "summary": summary, + "confidence": confidence, + "total_results": result_count, + "sources": sources.into_iter().collect::>(), + "query": query, + "kb_count": kb_contexts.len() + }); + + debug!("THINK KB response: {}", serde_json::to_string_pretty(&response).unwrap_or_default()); + + Ok(response) +} + +/// Calculate confidence score based on multiple factors +fn calculate_confidence(avg_relevance: f64, result_count: usize, source_count: usize) -> f64 { + // Base confidence from average relevance (0.0 to 1.0) + let relevance_factor = avg_relevance.min(1.0).max(0.0); + + // Boost confidence with more results (diminishing returns) + let result_factor = (result_count as f64 / 10.0).min(1.0); + + // Boost confidence with source diversity + let diversity_factor = (source_count as f64 / 5.0).min(1.0); + + // Weighted combination + let confidence = (relevance_factor * 0.6) + (result_factor * 0.2) + (diversity_factor * 0.2); + + // Round to 2 decimal places + (confidence * 100.0).round() / 100.0 +} + +/// Generate a summary of the search results +fn generate_summary(results: &[serde_json::Value], query: &str) -> String { + if results.is_empty() { + return "No relevant information found in the knowledge base.".to_string(); + } + + let result_count = results.len(); + let source_count = results + .iter() + .filter_map(|r| r.get("source").and_then(|s| s.as_str())) + .collect::>() + .len(); + + let avg_relevance = results + .iter() + .filter_map(|r| r.get("relevance").and_then(|s| s.as_f64())) + .sum::() / result_count as f64; + + let kb_names = results + .iter() + .filter_map(|r| r.get("kb_name").and_then(|s| s.as_str())) + .collect::>(); + + format!( + "Found {} relevant result{} from {} knowledge base{} ({} source{}) with average relevance of {:.2}. Query: '{}'", + result_count, + if result_count == 1 { "" } else { "s" }, + kb_names.len(), + if kb_names.len() == 1 { "" } else { "s" }, + source_count, + if source_count == 1 { "" } else { "s" }, + avg_relevance, + query + ) +} + +/// Convert JSON Value to Rhai Dynamic +fn json_to_dynamic(value: serde_json::Value) -> Dynamic { + match value { + serde_json::Value::Null => Dynamic::UNIT, + serde_json::Value::Bool(b) => Dynamic::from(b), + serde_json::Value::Number(n) => { + if let Some(i) = n.as_i64() { + Dynamic::from(i) + } else if let Some(f) = n.as_f64() { + Dynamic::from(f) + } else { + Dynamic::UNIT + } + } + serde_json::Value::String(s) => Dynamic::from(s), + serde_json::Value::Array(arr) => { + let mut rhai_array = rhai::Array::new(); + for item in arr { + rhai_array.push(json_to_dynamic(item)); + } + Dynamic::from(rhai_array) + } + serde_json::Value::Object(obj) => { + let mut rhai_map = Map::new(); + for (key, val) in obj { + rhai_map.insert(key.into(), json_to_dynamic(val)); + } + Dynamic::from(rhai_map) + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use serde_json::json; + + #[test] + fn test_confidence_calculation() { + // Test the confidence calculation function + let confidence = calculate_confidence(0.8, 5, 3); + assert!(confidence >= 0.0 && confidence <= 1.0); + + // High relevance, many results, diverse sources should give high confidence + let high_confidence = calculate_confidence(0.9, 10, 5); + assert!(high_confidence > 0.7); + + // Low relevance should give low confidence + let low_confidence = calculate_confidence(0.3, 10, 5); + assert!(low_confidence < 0.5); + } + + #[test] + fn test_summary_generation() { + let results = vec![ + json!({ + "content": "Test content 1", + "source": "doc1.pdf", + "kb_name": "test_kb", + "relevance": 0.8, + "tokens": 100 + }), + json!({ + "content": "Test content 2", + "source": "doc2.pdf", + "kb_name": "test_kb", + "relevance": 0.7, + "tokens": 150 + }) + ]; + + let summary = generate_summary(&results, "test query"); + + assert!(summary.contains("2 relevant result")); + assert!(summary.contains("test query")); + assert!(summary.len() > 0); + } + + #[test] + fn test_json_to_dynamic_conversion() { + let test_json = json!({ + "string_field": "test", + "number_field": 42, + "bool_field": true, + "array_field": [1, 2, 3], + "object_field": { + "nested": "value" + } + }); + + let dynamic_result = json_to_dynamic(test_json); + + // The conversion should not panic and should return a Dynamic value + assert!(!dynamic_result.is_unit()); + } +} diff --git a/src/basic/mod.rs b/src/basic/mod.rs index 635c06fe..93925035 100644 --- a/src/basic/mod.rs +++ b/src/basic/mod.rs @@ -97,6 +97,8 @@ use self::keywords::save_from_unstructured::save_from_unstructured_keyword; #[cfg(feature = "vectordb")] use self::keywords::clear_kb::register_clear_kb_keyword; #[cfg(feature = "vectordb")] +use self::keywords::think_kb::register_think_kb_keyword; +#[cfg(feature = "vectordb")] use self::keywords::use_kb::register_use_kb_keyword; // ===== DRIVE FEATURE IMPORTS ===== @@ -238,6 +240,7 @@ impl ScriptService { { let _ = register_use_kb_keyword(&mut engine, state.clone(), Arc::new(user.clone())); let _ = register_clear_kb_keyword(&mut engine, state.clone(), Arc::new(user.clone())); + let _ = register_think_kb_keyword(&mut engine, state.clone(), Arc::new(user.clone())); } // ===== DRIVE FEATURE KEYWORDS =====