Add THINK KB keyword for explicit knowledge base reasoning
- Implement think_kb.rs with structured search results - Add keyword registration in mod.rs files - Include confidence scoring and multi-KB support
This commit is contained in:
parent
7ef1efa047
commit
f062cc4a3e
3 changed files with 322 additions and 0 deletions
|
|
@ -126,6 +126,8 @@ pub mod clear_kb;
|
||||||
#[cfg(feature = "vectordb")]
|
#[cfg(feature = "vectordb")]
|
||||||
pub mod kb_statistics;
|
pub mod kb_statistics;
|
||||||
#[cfg(feature = "vectordb")]
|
#[cfg(feature = "vectordb")]
|
||||||
|
pub mod think_kb;
|
||||||
|
#[cfg(feature = "vectordb")]
|
||||||
pub mod use_kb;
|
pub mod use_kb;
|
||||||
|
|
||||||
// ===== DRIVE FEATURE KEYWORDS =====
|
// ===== DRIVE FEATURE KEYWORDS =====
|
||||||
|
|
@ -265,6 +267,7 @@ pub fn get_all_keywords() -> Vec<String> {
|
||||||
"SET USER MEMORY".to_string(),
|
"SET USER MEMORY".to_string(),
|
||||||
"USER FACTS".to_string(),
|
"USER FACTS".to_string(),
|
||||||
"CLEAR KB".to_string(),
|
"CLEAR KB".to_string(),
|
||||||
|
"THINK KB".to_string(),
|
||||||
"USE KB".to_string(),
|
"USE KB".to_string(),
|
||||||
"USE ACCOUNT".to_string(),
|
"USE ACCOUNT".to_string(),
|
||||||
"USE WEBSITE".to_string(),
|
"USE WEBSITE".to_string(),
|
||||||
|
|
|
||||||
316
src/basic/keywords/think_kb.rs
Normal file
316
src/basic/keywords/think_kb.rs
Normal file
|
|
@ -0,0 +1,316 @@
|
||||||
|
//! THINK KB keyword implementation for knowledge base reasoning
|
||||||
|
//!
|
||||||
|
//! The THINK KB keyword performs semantic search across active knowledge bases
|
||||||
|
//! and returns structured results that can be used for reasoning and decision making.
|
||||||
|
//!
|
||||||
|
//! Usage in .bas files:
|
||||||
|
//! results = THINK KB "What is the company policy on remote work?"
|
||||||
|
//! results = THINK KB query_variable
|
||||||
|
//!
|
||||||
|
//! Returns a structured object with:
|
||||||
|
//! - results: Array of search results with content, source, and relevance
|
||||||
|
//! - summary: Brief summary of findings
|
||||||
|
//! - confidence: Overall confidence score (0.0 to 1.0)
|
||||||
|
|
||||||
|
use crate::core::bot::kb_context::KbContextManager;
|
||||||
|
use crate::core::kb::KnowledgeBaseManager;
|
||||||
|
use crate::core::shared::models::UserSession;
|
||||||
|
use crate::core::shared::state::AppState;
|
||||||
|
use log::{debug, error, info, warn};
|
||||||
|
use rhai::{Dynamic, Engine, EvalAltResult, Map};
|
||||||
|
use serde_json::json;
|
||||||
|
use std::sync::Arc;
|
||||||
|
|
||||||
|
/// Registers the THINK KB keyword with the Rhai engine
|
||||||
|
pub fn register_think_kb_keyword(
|
||||||
|
engine: &mut Engine,
|
||||||
|
state: Arc<AppState>,
|
||||||
|
session: Arc<UserSession>,
|
||||||
|
) -> Result<(), Box<EvalAltResult>> {
|
||||||
|
let state_clone = Arc::clone(&state);
|
||||||
|
let session_clone = Arc::clone(&session);
|
||||||
|
|
||||||
|
engine.register_custom_syntax(["THINK", "KB", "$expr$"], true, move |context, inputs| {
|
||||||
|
let query = context.eval_expression_tree(&inputs[0])?.to_string();
|
||||||
|
|
||||||
|
info!(
|
||||||
|
"THINK KB keyword executed - Query: '{}', Session: {}",
|
||||||
|
query, session_clone.id
|
||||||
|
);
|
||||||
|
|
||||||
|
let session_id = session_clone.id;
|
||||||
|
let bot_name = session_clone.bot_name.clone();
|
||||||
|
let kb_manager = match &state_clone.kb_manager {
|
||||||
|
Some(manager) => Arc::clone(manager),
|
||||||
|
None => {
|
||||||
|
error!("KB manager not available");
|
||||||
|
return Err("KB manager not initialized".into());
|
||||||
|
}
|
||||||
|
};
|
||||||
|
let db_pool = state_clone.conn.clone();
|
||||||
|
|
||||||
|
// Execute KB search in blocking thread
|
||||||
|
let result = std::thread::spawn(move || {
|
||||||
|
tokio::runtime::Handle::current().block_on(async {
|
||||||
|
think_kb_search(kb_manager, db_pool, session_id, &bot_name, &query).await
|
||||||
|
})
|
||||||
|
})
|
||||||
|
.join();
|
||||||
|
|
||||||
|
match result {
|
||||||
|
Ok(Ok(search_result)) => {
|
||||||
|
info!(
|
||||||
|
"THINK KB completed - Found {} results with confidence {:.2}",
|
||||||
|
search_result.get("results")
|
||||||
|
.and_then(|r| r.as_array())
|
||||||
|
.map(|a| a.len())
|
||||||
|
.unwrap_or(0),
|
||||||
|
search_result.get("confidence")
|
||||||
|
.and_then(|c| c.as_f64())
|
||||||
|
.unwrap_or(0.0)
|
||||||
|
);
|
||||||
|
|
||||||
|
// Convert JSON to Rhai Dynamic
|
||||||
|
Ok(json_to_dynamic(search_result))
|
||||||
|
}
|
||||||
|
Ok(Err(e)) => {
|
||||||
|
error!("THINK KB search failed: {}", e);
|
||||||
|
Err(format!("THINK KB failed: {}", e).into())
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
error!("THINK KB thread panic: {:?}", e);
|
||||||
|
Err("THINK KB failed: thread panic".into())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
})?;
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Performs the actual KB search and reasoning
|
||||||
|
async fn think_kb_search(
|
||||||
|
kb_manager: Arc<KnowledgeBaseManager>,
|
||||||
|
db_pool: crate::core::shared::utils::DbPool,
|
||||||
|
session_id: uuid::Uuid,
|
||||||
|
bot_name: &str,
|
||||||
|
query: &str,
|
||||||
|
) -> Result<serde_json::Value, String> {
|
||||||
|
let context_manager = KbContextManager::new(kb_manager, db_pool);
|
||||||
|
|
||||||
|
// Search active KBs with reasonable limits
|
||||||
|
let kb_contexts = context_manager
|
||||||
|
.search_active_kbs(session_id, bot_name, query, 10, 2000)
|
||||||
|
.await
|
||||||
|
.map_err(|e| format!("KB search failed: {}", e))?;
|
||||||
|
|
||||||
|
if kb_contexts.is_empty() {
|
||||||
|
warn!("No active KBs found for session {}", session_id);
|
||||||
|
return Ok(json!({
|
||||||
|
"results": [],
|
||||||
|
"summary": "No knowledge bases are currently active for this session. Use 'USE KB <name>' to activate a knowledge base.",
|
||||||
|
"confidence": 0.0,
|
||||||
|
"total_results": 0,
|
||||||
|
"sources": []
|
||||||
|
}));
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut all_results = Vec::new();
|
||||||
|
let mut sources = std::collections::HashSet::new();
|
||||||
|
let mut total_score = 0.0;
|
||||||
|
let mut result_count = 0;
|
||||||
|
|
||||||
|
// Process results from all KBs
|
||||||
|
for kb_context in &kb_contexts {
|
||||||
|
for search_result in &kb_context.search_results {
|
||||||
|
all_results.push(json!({
|
||||||
|
"content": search_result.content,
|
||||||
|
"source": search_result.document_path,
|
||||||
|
"kb_name": kb_context.kb_name,
|
||||||
|
"relevance": search_result.score,
|
||||||
|
"tokens": search_result.chunk_tokens
|
||||||
|
}));
|
||||||
|
|
||||||
|
sources.insert(search_result.document_path.clone());
|
||||||
|
total_score += search_result.score as f64;
|
||||||
|
result_count += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Calculate overall confidence based on average relevance and result count
|
||||||
|
let avg_relevance = if result_count > 0 {
|
||||||
|
total_score / result_count as f64
|
||||||
|
} else {
|
||||||
|
0.0
|
||||||
|
};
|
||||||
|
|
||||||
|
// Confidence factors: relevance score, number of results, source diversity
|
||||||
|
let confidence = calculate_confidence(avg_relevance, result_count, sources.len());
|
||||||
|
|
||||||
|
// Generate summary based on results
|
||||||
|
let summary = generate_summary(&all_results, query);
|
||||||
|
|
||||||
|
let response = json!({
|
||||||
|
"results": all_results,
|
||||||
|
"summary": summary,
|
||||||
|
"confidence": confidence,
|
||||||
|
"total_results": result_count,
|
||||||
|
"sources": sources.into_iter().collect::<Vec<_>>(),
|
||||||
|
"query": query,
|
||||||
|
"kb_count": kb_contexts.len()
|
||||||
|
});
|
||||||
|
|
||||||
|
debug!("THINK KB response: {}", serde_json::to_string_pretty(&response).unwrap_or_default());
|
||||||
|
|
||||||
|
Ok(response)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Calculate confidence score based on multiple factors
|
||||||
|
fn calculate_confidence(avg_relevance: f64, result_count: usize, source_count: usize) -> f64 {
|
||||||
|
// Base confidence from average relevance (0.0 to 1.0)
|
||||||
|
let relevance_factor = avg_relevance.min(1.0).max(0.0);
|
||||||
|
|
||||||
|
// Boost confidence with more results (diminishing returns)
|
||||||
|
let result_factor = (result_count as f64 / 10.0).min(1.0);
|
||||||
|
|
||||||
|
// Boost confidence with source diversity
|
||||||
|
let diversity_factor = (source_count as f64 / 5.0).min(1.0);
|
||||||
|
|
||||||
|
// Weighted combination
|
||||||
|
let confidence = (relevance_factor * 0.6) + (result_factor * 0.2) + (diversity_factor * 0.2);
|
||||||
|
|
||||||
|
// Round to 2 decimal places
|
||||||
|
(confidence * 100.0).round() / 100.0
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Generate a summary of the search results
|
||||||
|
fn generate_summary(results: &[serde_json::Value], query: &str) -> String {
|
||||||
|
if results.is_empty() {
|
||||||
|
return "No relevant information found in the knowledge base.".to_string();
|
||||||
|
}
|
||||||
|
|
||||||
|
let result_count = results.len();
|
||||||
|
let source_count = results
|
||||||
|
.iter()
|
||||||
|
.filter_map(|r| r.get("source").and_then(|s| s.as_str()))
|
||||||
|
.collect::<std::collections::HashSet<_>>()
|
||||||
|
.len();
|
||||||
|
|
||||||
|
let avg_relevance = results
|
||||||
|
.iter()
|
||||||
|
.filter_map(|r| r.get("relevance").and_then(|s| s.as_f64()))
|
||||||
|
.sum::<f64>() / result_count as f64;
|
||||||
|
|
||||||
|
let kb_names = results
|
||||||
|
.iter()
|
||||||
|
.filter_map(|r| r.get("kb_name").and_then(|s| s.as_str()))
|
||||||
|
.collect::<std::collections::HashSet<_>>();
|
||||||
|
|
||||||
|
format!(
|
||||||
|
"Found {} relevant result{} from {} knowledge base{} ({} source{}) with average relevance of {:.2}. Query: '{}'",
|
||||||
|
result_count,
|
||||||
|
if result_count == 1 { "" } else { "s" },
|
||||||
|
kb_names.len(),
|
||||||
|
if kb_names.len() == 1 { "" } else { "s" },
|
||||||
|
source_count,
|
||||||
|
if source_count == 1 { "" } else { "s" },
|
||||||
|
avg_relevance,
|
||||||
|
query
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Convert JSON Value to Rhai Dynamic
|
||||||
|
fn json_to_dynamic(value: serde_json::Value) -> Dynamic {
|
||||||
|
match value {
|
||||||
|
serde_json::Value::Null => Dynamic::UNIT,
|
||||||
|
serde_json::Value::Bool(b) => Dynamic::from(b),
|
||||||
|
serde_json::Value::Number(n) => {
|
||||||
|
if let Some(i) = n.as_i64() {
|
||||||
|
Dynamic::from(i)
|
||||||
|
} else if let Some(f) = n.as_f64() {
|
||||||
|
Dynamic::from(f)
|
||||||
|
} else {
|
||||||
|
Dynamic::UNIT
|
||||||
|
}
|
||||||
|
}
|
||||||
|
serde_json::Value::String(s) => Dynamic::from(s),
|
||||||
|
serde_json::Value::Array(arr) => {
|
||||||
|
let mut rhai_array = rhai::Array::new();
|
||||||
|
for item in arr {
|
||||||
|
rhai_array.push(json_to_dynamic(item));
|
||||||
|
}
|
||||||
|
Dynamic::from(rhai_array)
|
||||||
|
}
|
||||||
|
serde_json::Value::Object(obj) => {
|
||||||
|
let mut rhai_map = Map::new();
|
||||||
|
for (key, val) in obj {
|
||||||
|
rhai_map.insert(key.into(), json_to_dynamic(val));
|
||||||
|
}
|
||||||
|
Dynamic::from(rhai_map)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
use serde_json::json;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_confidence_calculation() {
|
||||||
|
// Test the confidence calculation function
|
||||||
|
let confidence = calculate_confidence(0.8, 5, 3);
|
||||||
|
assert!(confidence >= 0.0 && confidence <= 1.0);
|
||||||
|
|
||||||
|
// High relevance, many results, diverse sources should give high confidence
|
||||||
|
let high_confidence = calculate_confidence(0.9, 10, 5);
|
||||||
|
assert!(high_confidence > 0.7);
|
||||||
|
|
||||||
|
// Low relevance should give low confidence
|
||||||
|
let low_confidence = calculate_confidence(0.3, 10, 5);
|
||||||
|
assert!(low_confidence < 0.5);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_summary_generation() {
|
||||||
|
let results = vec![
|
||||||
|
json!({
|
||||||
|
"content": "Test content 1",
|
||||||
|
"source": "doc1.pdf",
|
||||||
|
"kb_name": "test_kb",
|
||||||
|
"relevance": 0.8,
|
||||||
|
"tokens": 100
|
||||||
|
}),
|
||||||
|
json!({
|
||||||
|
"content": "Test content 2",
|
||||||
|
"source": "doc2.pdf",
|
||||||
|
"kb_name": "test_kb",
|
||||||
|
"relevance": 0.7,
|
||||||
|
"tokens": 150
|
||||||
|
})
|
||||||
|
];
|
||||||
|
|
||||||
|
let summary = generate_summary(&results, "test query");
|
||||||
|
|
||||||
|
assert!(summary.contains("2 relevant result"));
|
||||||
|
assert!(summary.contains("test query"));
|
||||||
|
assert!(summary.len() > 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_json_to_dynamic_conversion() {
|
||||||
|
let test_json = json!({
|
||||||
|
"string_field": "test",
|
||||||
|
"number_field": 42,
|
||||||
|
"bool_field": true,
|
||||||
|
"array_field": [1, 2, 3],
|
||||||
|
"object_field": {
|
||||||
|
"nested": "value"
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
let dynamic_result = json_to_dynamic(test_json);
|
||||||
|
|
||||||
|
// The conversion should not panic and should return a Dynamic value
|
||||||
|
assert!(!dynamic_result.is_unit());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -97,6 +97,8 @@ use self::keywords::save_from_unstructured::save_from_unstructured_keyword;
|
||||||
#[cfg(feature = "vectordb")]
|
#[cfg(feature = "vectordb")]
|
||||||
use self::keywords::clear_kb::register_clear_kb_keyword;
|
use self::keywords::clear_kb::register_clear_kb_keyword;
|
||||||
#[cfg(feature = "vectordb")]
|
#[cfg(feature = "vectordb")]
|
||||||
|
use self::keywords::think_kb::register_think_kb_keyword;
|
||||||
|
#[cfg(feature = "vectordb")]
|
||||||
use self::keywords::use_kb::register_use_kb_keyword;
|
use self::keywords::use_kb::register_use_kb_keyword;
|
||||||
|
|
||||||
// ===== DRIVE FEATURE IMPORTS =====
|
// ===== DRIVE FEATURE IMPORTS =====
|
||||||
|
|
@ -238,6 +240,7 @@ impl ScriptService {
|
||||||
{
|
{
|
||||||
let _ = register_use_kb_keyword(&mut engine, state.clone(), Arc::new(user.clone()));
|
let _ = register_use_kb_keyword(&mut engine, state.clone(), Arc::new(user.clone()));
|
||||||
let _ = register_clear_kb_keyword(&mut engine, state.clone(), Arc::new(user.clone()));
|
let _ = register_clear_kb_keyword(&mut engine, state.clone(), Arc::new(user.clone()));
|
||||||
|
let _ = register_think_kb_keyword(&mut engine, state.clone(), Arc::new(user.clone()));
|
||||||
}
|
}
|
||||||
|
|
||||||
// ===== DRIVE FEATURE KEYWORDS =====
|
// ===== DRIVE FEATURE KEYWORDS =====
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue