fix: Enable vector_db by default with health check and fallback to local LLM
- Add vector_db_health_check() function to verify Qdrant availability - Add wait loop for vector_db startup in bootstrap (15 seconds) - Fallback to local LLM when external URL configured but no API key provided - Prevent external LLM (api.z.ai) usage without authentication key This fixes the production issues: - Qdrant vector database not available at https://localhost:6333 - External LLM being used instead of local when no key is configured - Ensures vector_db is properly started and ready before use Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
This commit is contained in:
parent
307809bbdd
commit
0c9665dd8b
4 changed files with 82 additions and 16 deletions
|
|
@ -1,6 +1,6 @@
|
|||
// Bootstrap manager implementation
|
||||
use crate::core::bootstrap::bootstrap_types::{BootstrapManager, BootstrapProgress};
|
||||
use crate::core::bootstrap::bootstrap_utils::{cache_health_check, safe_pkill, vault_health_check};
|
||||
use crate::core::bootstrap::bootstrap_utils::{cache_health_check, safe_pkill, vault_health_check, vector_db_health_check};
|
||||
use crate::core::config::AppConfig;
|
||||
use crate::core::package_manager::{InstallMode, PackageManager};
|
||||
use log::{info, warn};
|
||||
|
|
@ -79,13 +79,29 @@ impl BootstrapManager {
|
|||
}
|
||||
|
||||
if pm.is_installed("vector_db") {
|
||||
info!("Starting Vector database...");
|
||||
match pm.start("vector_db") {
|
||||
Ok(_child) => {
|
||||
info!("Vector database started");
|
||||
}
|
||||
Err(e) => {
|
||||
warn!("Failed to start Vector database: {}", e);
|
||||
let vector_db_already_running = vector_db_health_check();
|
||||
if vector_db_already_running {
|
||||
info!("Vector database (Qdrant) is already running");
|
||||
} else {
|
||||
info!("Starting Vector database (Qdrant)...");
|
||||
match pm.start("vector_db") {
|
||||
Ok(_child) => {
|
||||
info!("Vector database process started, waiting for readiness...");
|
||||
// Wait for vector_db to be ready
|
||||
for i in 0..15 {
|
||||
sleep(Duration::from_secs(1)).await;
|
||||
if vector_db_health_check() {
|
||||
info!("Vector database (Qdrant) is responding");
|
||||
break;
|
||||
}
|
||||
if i == 14 {
|
||||
warn!("Vector database did not respond after 15 seconds");
|
||||
}
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
warn!("Failed to start Vector database: {}", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -146,6 +146,40 @@ pub fn cache_health_check() -> bool {
|
|||
}
|
||||
}
|
||||
|
||||
/// Check if Qdrant vector database is healthy
|
||||
pub fn vector_db_health_check() -> bool {
|
||||
// Qdrant has a /healthz endpoint, use curl to check
|
||||
// Try both HTTP and HTTPS
|
||||
let urls = [
|
||||
"http://localhost:6333/healthz",
|
||||
"https://localhost:6333/healthz",
|
||||
];
|
||||
|
||||
for url in &urls {
|
||||
if let Ok(output) = Command::new("curl")
|
||||
.args(["-f", "-s", "--connect-timeout", "2", "-k", url])
|
||||
.output()
|
||||
{
|
||||
if output.status.success() {
|
||||
// Qdrant healthz returns "OK" or JSON with status
|
||||
let response = String::from_utf8_lossy(&output.stdout);
|
||||
if response.contains("OK") || response.contains("\"status\":\"ok\"") {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Fallback: just check if port 6333 is listening
|
||||
match Command::new("nc")
|
||||
.args(["-z", "-w", "1", "127.0.0.1", "6333"])
|
||||
.output()
|
||||
{
|
||||
Ok(output) => output.status.success(),
|
||||
Err(_) => false,
|
||||
}
|
||||
}
|
||||
|
||||
/// Get current user safely
|
||||
pub fn safe_fuser() -> String {
|
||||
// Return shell command that uses $USER environment variable
|
||||
|
|
|
|||
|
|
@ -81,18 +81,22 @@ pub async fn ensure_llama_servers_running(
|
|||
};
|
||||
|
||||
let llm_model = if llm_model.is_empty() {
|
||||
info!("No LLM model configured, using default: ../../../../data/llm/DeepSeek-R1-Distill-Qwen-1.5B-Q3_K_M.gguf");
|
||||
"../../../../data/llm/DeepSeek-R1-Distill-Qwen-1.5B-Q3_K_M.gguf".to_string()
|
||||
info!("No LLM model configured, using default: DeepSeek-R1-Distill-Qwen-1.5B-Q3_K_M.gguf");
|
||||
"DeepSeek-R1-Distill-Qwen-1.5B-Q3_K_M.gguf".to_string()
|
||||
} else {
|
||||
llm_model
|
||||
};
|
||||
|
||||
let embedding_model = if embedding_model.is_empty() {
|
||||
info!("No embedding model configured, using default: ../../../../data/llm/bge-small-en-v1.5-f32.gguf");
|
||||
"../../../../data/llm/bge-small-en-v1.5-f32.gguf".to_string()
|
||||
info!("No embedding model configured, using default: bge-small-en-v1.5-f32.gguf");
|
||||
"bge-small-en-v1.5-f32.gguf".to_string()
|
||||
} else {
|
||||
embedding_model
|
||||
};
|
||||
|
||||
// For llama-server startup, we need the full path
|
||||
let llm_model_path = format!("{}/../../../../data/llm/{}", llm_server_path, llm_model);
|
||||
let embedding_model_path = format!("{}/../../../../data/llm/{}", llm_server_path, embedding_model);
|
||||
if !llm_server_enabled {
|
||||
info!("Local LLM server management disabled (llm-server=false). Using external endpoints.");
|
||||
info!(" LLM URL: {llm_url}");
|
||||
|
|
@ -160,13 +164,13 @@ pub async fn ensure_llama_servers_running(
|
|||
info!("Starting LLM server...");
|
||||
let app_state_clone = Arc::clone(&app_state);
|
||||
let llm_server_path_clone = llm_server_path.clone();
|
||||
let llm_model_clone = llm_model.clone();
|
||||
let llm_model_path_clone = llm_model_path.clone();
|
||||
let llm_url_clone = llm_url.clone();
|
||||
tasks.push(tokio::spawn(async move {
|
||||
start_llm_server(
|
||||
app_state_clone,
|
||||
llm_server_path_clone,
|
||||
llm_model_clone,
|
||||
llm_model_path_clone,
|
||||
llm_url_clone,
|
||||
)
|
||||
}));
|
||||
|
|
@ -177,7 +181,7 @@ pub async fn ensure_llama_servers_running(
|
|||
info!("Starting Embedding server...");
|
||||
tasks.push(tokio::spawn(start_embedding_server(
|
||||
llm_server_path.clone(),
|
||||
embedding_model.clone(),
|
||||
embedding_model_path.clone(),
|
||||
embedding_url.clone(),
|
||||
)));
|
||||
} else if embedding_model.is_empty() {
|
||||
|
|
|
|||
|
|
@ -432,7 +432,7 @@ pub async fn create_app_state(
|
|||
info!("LLM Model: {}", llm_model);
|
||||
}
|
||||
|
||||
let _llm_key = std::env::var("LLM_KEY")
|
||||
let llm_key = std::env::var("LLM_KEY")
|
||||
.or_else(|_| std::env::var("OPENAI_API_KEY"))
|
||||
.or_else(|_| {
|
||||
config_manager
|
||||
|
|
@ -441,6 +441,18 @@ pub async fn create_app_state(
|
|||
})
|
||||
.unwrap_or_default();
|
||||
|
||||
// If llm-url points to external API but no key is configured, fall back to local LLM
|
||||
let llm_url = if llm_key.is_empty()
|
||||
&& !llm_url.contains("localhost")
|
||||
&& !llm_url.contains("127.0.0.1")
|
||||
&& (llm_url.contains("api.z.ai") || llm_url.contains("openai.com") || llm_url.contains("anthropic.com"))
|
||||
{
|
||||
warn!("External LLM URL configured ({}), but no API key provided. Falling back to local LLM at http://localhost:8081", llm_url);
|
||||
"http://localhost:8081".to_string()
|
||||
} else {
|
||||
llm_url
|
||||
};
|
||||
|
||||
// LLM endpoint path configuration
|
||||
let llm_endpoint_path = config_manager
|
||||
.get_config(
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue