fix: Improve embedding error handling and add semantic cache toggle
- Enhanced error messages in LocalEmbeddingService to show actual HTTP status and response - Added semantic-cache-enabled config parameter to disable semantic matching when embedding service unavailable - Improved error logging with full response details for debugging production issues - Prevents 'Invalid embedding response' errors by allowing graceful fallback
This commit is contained in:
parent
5404e3e7ba
commit
d5b877f8e8
2 changed files with 46 additions and 7 deletions
|
|
@ -632,10 +632,39 @@ impl EmbeddingService for LocalEmbeddingService {
|
||||||
.send()
|
.send()
|
||||||
.await?;
|
.await?;
|
||||||
|
|
||||||
let result: Value = response.json().await?;
|
let status = response.status();
|
||||||
|
let response_text = response.text().await?;
|
||||||
|
|
||||||
|
if !status.is_success() {
|
||||||
|
debug!(
|
||||||
|
"Embedding service HTTP error {}: {}",
|
||||||
|
status,
|
||||||
|
response_text
|
||||||
|
);
|
||||||
|
return Err(format!(
|
||||||
|
"Embedding service returned HTTP {}: {}",
|
||||||
|
status,
|
||||||
|
response_text
|
||||||
|
).into());
|
||||||
|
}
|
||||||
|
|
||||||
|
let result: Value = serde_json::from_str(&response_text)
|
||||||
|
.map_err(|e| {
|
||||||
|
debug!("Failed to parse embedding JSON: {} - Response: {}", e, response_text);
|
||||||
|
format!("Failed to parse embedding response JSON: {} - Response: {}", e, response_text)
|
||||||
|
})?;
|
||||||
|
|
||||||
|
if let Some(error) = result.get("error") {
|
||||||
|
debug!("Embedding service returned error: {}", error);
|
||||||
|
return Err(format!("Embedding service error: {}", error).into());
|
||||||
|
}
|
||||||
|
|
||||||
let embedding = result["data"][0]["embedding"]
|
let embedding = result["data"][0]["embedding"]
|
||||||
.as_array()
|
.as_array()
|
||||||
.ok_or("Invalid embedding response")?
|
.ok_or_else(|| {
|
||||||
|
debug!("Invalid embedding response format. Expected data[0].embedding array. Got: {}", response_text);
|
||||||
|
format!("Invalid embedding response format - Expected data[0].embedding array, got: {}", response_text)
|
||||||
|
})?
|
||||||
.iter()
|
.iter()
|
||||||
.filter_map(|v| v.as_f64().map(|f| f as f32))
|
.filter_map(|v| v.as_f64().map(|f| f as f32))
|
||||||
.collect();
|
.collect();
|
||||||
|
|
|
||||||
|
|
@ -748,17 +748,27 @@ fn init_llm_provider(
|
||||||
let embedding_model = config_manager
|
let embedding_model = config_manager
|
||||||
.get_config(&bot_id, "embedding-model", Some("all-MiniLM-L6-v2"))
|
.get_config(&bot_id, "embedding-model", Some("all-MiniLM-L6-v2"))
|
||||||
.unwrap_or_else(|_| "all-MiniLM-L6-v2".to_string());
|
.unwrap_or_else(|_| "all-MiniLM-L6-v2".to_string());
|
||||||
|
let semantic_cache_enabled = config_manager
|
||||||
|
.get_config(&bot_id, "semantic-cache-enabled", Some("true"))
|
||||||
|
.unwrap_or_else(|_| "true".to_string())
|
||||||
|
.to_lowercase() == "true";
|
||||||
|
|
||||||
info!("Embedding URL: {}", embedding_url);
|
info!("Embedding URL: {}", embedding_url);
|
||||||
info!("Embedding Model: {}", embedding_model);
|
info!("Embedding Model: {}", embedding_model);
|
||||||
|
info!("Semantic Cache Enabled: {}", semantic_cache_enabled);
|
||||||
|
|
||||||
let embedding_service = Some(Arc::new(LocalEmbeddingService::new(
|
let embedding_service = if semantic_cache_enabled {
|
||||||
embedding_url,
|
Some(Arc::new(LocalEmbeddingService::new(
|
||||||
embedding_model,
|
embedding_url,
|
||||||
)) as Arc<dyn EmbeddingService>);
|
embedding_model,
|
||||||
|
)) as Arc<dyn EmbeddingService>)
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
};
|
||||||
|
|
||||||
let cache_config = CacheConfig {
|
let cache_config = CacheConfig {
|
||||||
ttl: 3600,
|
ttl: 3600,
|
||||||
semantic_matching: true,
|
semantic_matching: semantic_cache_enabled,
|
||||||
similarity_threshold: 0.85,
|
similarity_threshold: 0.85,
|
||||||
max_similarity_checks: 100,
|
max_similarity_checks: 100,
|
||||||
key_prefix: "llm_cache".to_string(),
|
key_prefix: "llm_cache".to_string(),
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue