fix: Increase default n_predict to 512 for DeepSeek R1 reasoning

DeepSeek R1 model outputs reasoning_content first, then content. With n_predict=50, responses were truncated during reasoning phase. Increased to 512 to allow full reasoning + response. Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
fix: Use correct default LLM model name for local DeepSeek server
2026-02-15 20:27:35 +00:00 · 2026-02-15 20:23:04 +00:00 · 2026-02-15 20:15:17 +00:00 · 2026-02-15 14:57:22 +00:00 · 2026-02-15 14:54:17 +00:00
6 changed files with 117 additions and 28 deletions
--- a/src/core/bootstrap/bootstrap_manager.rs
+++ b/src/core/bootstrap/bootstrap_manager.rs
@ -1,6 +1,6 @@
 // Bootstrap manager implementation
 use crate::core::bootstrap::bootstrap_types::{BootstrapManager, BootstrapProgress};
-use crate::core::bootstrap::bootstrap_utils::{cache_health_check, safe_pkill, vault_health_check};
+use crate::core::bootstrap::bootstrap_utils::{cache_health_check, safe_pkill, vault_health_check, vector_db_health_check};
 use crate::core::config::AppConfig;
 use crate::core::package_manager::{InstallMode, PackageManager};
 use log::{info, warn};
@ -79,13 +79,29 @@ impl BootstrapManager {
        }

        if pm.is_installed("vector_db") {
-            info!("Starting Vector database...");
-            match pm.start("vector_db") {
-                Ok(_child) => {
-                    info!("Vector database started");
-                }
-                Err(e) => {
-                    warn!("Failed to start Vector database: {}", e);
+            let vector_db_already_running = vector_db_health_check();
+            if vector_db_already_running {
+                info!("Vector database (Qdrant) is already running");
+            } else {
+                info!("Starting Vector database (Qdrant)...");
+                match pm.start("vector_db") {
+                    Ok(_child) => {
+                        info!("Vector database process started, waiting for readiness...");
+                        // Wait for vector_db to be ready
+                        for i in 0..15 {
+                            sleep(Duration::from_secs(1)).await;
+                            if vector_db_health_check() {
+                                info!("Vector database (Qdrant) is responding");
+                                break;
+                            }
+                            if i == 14 {
+                                warn!("Vector database did not respond after 15 seconds");
+                            }
+                        }
+                    }
+                    Err(e) => {
+                        warn!("Failed to start Vector database: {}", e);
+                    }
                }
            }
        }
--- a/src/core/bootstrap/bootstrap_utils.rs
+++ b/src/core/bootstrap/bootstrap_utils.rs
@ -146,6 +146,40 @@ pub fn cache_health_check() -> bool {
    }
 }

+/// Check if Qdrant vector database is healthy
+pub fn vector_db_health_check() -> bool {
+    // Qdrant has a /healthz endpoint, use curl to check
+    // Try both HTTP and HTTPS
+    let urls = [
+        "http://localhost:6333/healthz",
+        "https://localhost:6333/healthz",
+    ];
+
+    for url in &urls {
+        if let Ok(output) = Command::new("curl")
+            .args(["-f", "-s", "--connect-timeout", "2", "-k", url])
+            .output()
+        {
+            if output.status.success() {
+                // Qdrant healthz returns "OK" or JSON with status
+                let response = String::from_utf8_lossy(&output.stdout);
+                if response.contains("OK") || response.contains("\"status\":\"ok\"") {
+                    return true;
+                }
+            }
+        }
+    }
+
+    // Fallback: just check if port 6333 is listening
+    match Command::new("nc")
+        .args(["-z", "-w", "1", "127.0.0.1", "6333"])
+        .output()
+    {
+        Ok(output) => output.status.success(),
+        Err(_) => false,
+    }
+}
+
 /// Get current user safely
 pub fn safe_fuser() -> String {
    // Return shell command that uses $USER environment variable
--- a/src/core/bot/mod.rs
+++ b/src/core/bot/mod.rs
@ -427,9 +427,11 @@ impl BotOrchestrator {
                    // DEBUG: Log which bot we're getting config for
                    info!("[CONFIG_TRACE] Getting LLM config for bot_id: {}", session.bot_id);

+                    // For local LLM server, use the actual model name
+                    // Default to DeepSeek model if not configured
                    let model = config_manager
-                        .get_config(&session.bot_id, "llm-model", Some("gpt-3.5-turbo"))
-                        .unwrap_or_else(|_| "gpt-3.5-turbo".to_string());
+                        .get_config(&session.bot_id, "llm-model", Some("DeepSeek-R1-Distill-Qwen-1.5B-Q3_K_M.gguf"))
+                        .unwrap_or_else(|_| "DeepSeek-R1-Distill-Qwen-1.5B-Q3_K_M.gguf".to_string());

                    let key = config_manager
                        .get_config(&session.bot_id, "llm-key", Some(""))
--- a/src/core/bot_database.rs
+++ b/src/core/bot_database.rs
@ -105,9 +105,25 @@ impl BotDatabaseManager {
            }
        }

-        // Get database name for this bot
-        let db_name = self.get_bot_database_name(bot_id)?
-            .ok_or_else(|| format!("No database configured for bot {}", bot_id))?;
+        // Get bot info (including name) from database
+        let mut conn = self.main_pool.get()?;
+        let bot_info: Option<BotDatabaseInfo> = sql_query(
+            "SELECT id, name, database_name FROM bots WHERE id = $1 AND is_active = true",
+        )
+        .bind::<diesel::sql_types::Uuid, _>(bot_id)
+        .get_result(&mut conn)
+        .optional()?;
+
+        let bot_info = bot_info.ok_or_else(|| format!("Bot {} not found or not active", bot_id))?;
+
+        // Ensure bot has a database, create if needed
+        let db_name = if let Some(name) = bot_info.database_name {
+            name
+        } else {
+            // Bot doesn't have a database configured, create it now
+            info!("Bot {} ({}) has no database, creating now", bot_info.name, bot_id);
+            self.ensure_bot_has_database(bot_id, &bot_info.name)?
+        };

        // Create new pool
        let pool = self.create_pool_for_database(&db_name)?;
--- a/src/llm/local.rs
+++ b/src/llm/local.rs
@ -81,18 +81,23 @@ pub async fn ensure_llama_servers_running(
    };

    let llm_model = if llm_model.is_empty() {
-        info!("No LLM model configured, using default: ../../../../data/llm/DeepSeek-R1-Distill-Qwen-1.5B-Q3_K_M.gguf");
-        "../../../../data/llm/DeepSeek-R1-Distill-Qwen-1.5B-Q3_K_M.gguf".to_string()
+        info!("No LLM model configured, using default: DeepSeek-R1-Distill-Qwen-1.5B-Q3_K_M.gguf");
+        "DeepSeek-R1-Distill-Qwen-1.5B-Q3_K_M.gguf".to_string()
    } else {
        llm_model
    };

    let embedding_model = if embedding_model.is_empty() {
-        info!("No embedding model configured, using default: ../../../../data/llm/bge-small-en-v1.5-f32.gguf");
-        "../../../../data/llm/bge-small-en-v1.5-f32.gguf".to_string()
+        info!("No embedding model configured, using default: bge-small-en-v1.5-f32.gguf");
+        "bge-small-en-v1.5-f32.gguf".to_string()
    } else {
        embedding_model
    };
+
+    // For llama-server startup, use path relative to botserver root
+    // The models are in ./data/llm/ and the llama-server runs from botserver root
+    let llm_model_path = format!("./data/llm/{}", llm_model);
+    let embedding_model_path = format!("./data/llm/{}", embedding_model);
    if !llm_server_enabled {
        info!("Local LLM server management disabled (llm-server=false). Using external endpoints.");
        info!("  LLM URL: {llm_url}");
@ -160,13 +165,13 @@ pub async fn ensure_llama_servers_running(
        info!("Starting LLM server...");
        let app_state_clone = Arc::clone(&app_state);
        let llm_server_path_clone = llm_server_path.clone();
-        let llm_model_clone = llm_model.clone();
+        let llm_model_path_clone = llm_model_path.clone();
        let llm_url_clone = llm_url.clone();
        tasks.push(tokio::spawn(async move {
            start_llm_server(
                app_state_clone,
                llm_server_path_clone,
-                llm_model_clone,
+                llm_model_path_clone,
                llm_url_clone,
            )
        }));
@ -177,7 +182,7 @@ pub async fn ensure_llama_servers_running(
        info!("Starting Embedding server...");
        tasks.push(tokio::spawn(start_embedding_server(
            llm_server_path.clone(),
-            embedding_model.clone(),
+            embedding_model_path.clone(),
            embedding_url.clone(),
        )));
    } else if embedding_model.is_empty() {
@ -381,8 +386,8 @@ pub fn start_llm_server(

    let n_predict = config_manager
        .get_config(&default_bot_id, "llm-server-n-predict", None)
-        .unwrap_or_else(|_| "50".to_string());
-    let n_predict = if n_predict.is_empty() { "50".to_string() } else { n_predict };
+        .unwrap_or_else(|_| "512".to_string());  // Increased default for DeepSeek R1 reasoning
+    let n_predict = if n_predict.is_empty() { "512".to_string() } else { n_predict };

    let n_ctx_size = config_manager
        .get_config(&default_bot_id, "llm-server-ctx-size", None)
@ -436,10 +441,10 @@ pub fn start_llm_server(
        })?;
    } else {
        let cmd_arg = format!(
-            "cd {llama_cpp_path} && ./llama-server {args} --verbose >llm-stdout.log 2>&1 &"
+            "{llama_cpp_path}/llama-server {args} --verbose >{llama_cpp_path}/llm-stdout.log 2>&1 &"
        );
        info!(
-            "Executing LLM server command: cd {llama_cpp_path} && ./llama-server {args} --verbose"
+            "Executing LLM server command: {llama_cpp_path}/llama-server {args} --verbose"
        );
        let cmd = SafeCommand::new("sh")
            .and_then(|c| c.arg("-c"))
@ -464,9 +469,13 @@ pub async fn start_embedding_server(
 ) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
    let port = extract_port(&url);

-    let full_model_path = if model_path.starts_with('/') {
+    // model_path is already the full path (constructed with ../../../../data/llm/ prefix)
+    // Only prepend llama_cpp_path if model_path is a simple filename (not a path)
+    let full_model_path = if model_path.contains('/') || model_path.contains('.') {
+        // model_path is already a full or relative path, use as-is
        model_path.clone()
    } else {
+        // model_path is just a filename, prepend llama_cpp_path
        format!("{llama_cpp_path}/{model_path}")
    };

@ -496,10 +505,10 @@ pub async fn start_embedding_server(
        })?;
    } else {
        let cmd_arg = format!(
-            "cd {llama_cpp_path} && ./llama-server -m {model_path} --verbose --host 0.0.0.0 --port {port} --embedding --n-gpu-layers 99 --ubatch-size 2048 >llmembd-stdout.log 2>&1 &"
+            "{llama_cpp_path}/llama-server -m {model_path} --verbose --host 0.0.0.0 --port {port} --embedding --n-gpu-layers 99 --ubatch-size 2048 >{llama_cpp_path}/llmembd-stdout.log 2>&1 &"
        );
        info!(
-            "Executing embedding server command: cd {llama_cpp_path} && ./llama-server -m {model_path} --host 0.0.0.0 --port {port} --embedding"
+            "Executing embedding server command: {llama_cpp_path}/llama-server -m {model_path} --host 0.0.0.0 --port {port} --embedding"
        );
        let cmd = SafeCommand::new("sh")
            .and_then(|c| c.arg("-c"))
--- a/src/main_module/bootstrap.rs
+++ b/src/main_module/bootstrap.rs
@ -432,7 +432,7 @@ pub async fn create_app_state(
        info!("LLM Model: {}", llm_model);
    }

-    let _llm_key = std::env::var("LLM_KEY")
+    let llm_key = std::env::var("LLM_KEY")
        .or_else(|_| std::env::var("OPENAI_API_KEY"))
        .or_else(|_| {
            config_manager
@ -441,6 +441,18 @@ pub async fn create_app_state(
        })
        .unwrap_or_default();

+    // If llm-url points to external API but no key is configured, fall back to local LLM
+    let llm_url = if llm_key.is_empty()
+        && !llm_url.contains("localhost")
+        && !llm_url.contains("127.0.0.1")
+        && (llm_url.contains("api.z.ai") || llm_url.contains("openai.com") || llm_url.contains("anthropic.com"))
+    {
+        warn!("External LLM URL configured ({}), but no API key provided. Falling back to local LLM at http://localhost:8081", llm_url);
+        "http://localhost:8081".to_string()
+    } else {
+        llm_url
+    };
+
    // LLM endpoint path configuration
    let llm_endpoint_path = config_manager
        .get_config(
Author	SHA1	Message	Date
Rodrigo Rodriguez	0a1bd25869	fix: Increase default n_predict to 512 for DeepSeek R1 reasoning All checks were successful BotServer CI / build (push) Successful in 9m26s Details DeepSeek R1 model outputs reasoning_content first, then content. With n_predict=50, responses were truncated during reasoning phase. Increased to 512 to allow full reasoning + response. Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>	2026-02-15 20:27:35 +00:00
Rodrigo Rodriguez	a9cbbbffa0	fix: Use correct default LLM model name for local DeepSeek server Changed default model from 'gpt-3.5-turbo' to 'DeepSeek-R1-Distill-Qwen-1.5B-Q3_K_M.gguf' in bot message handler. This ensures the local llama-server receives the correct model name and can process requests properly. Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>	2026-02-15 20:23:04 +00:00
Rodrigo Rodriguez	1cee912b72	fix: Correct LLM model paths and remove unnecessary cd command - Change model paths to use ./data/llm/ instead of relative paths from build dir - Remove cd command when starting llama-server to keep botserver root as cwd - This fixes model loading when servers are started from different directories - Both LLM and embedding servers now start successfully Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>	2026-02-15 20:15:17 +00:00
Rodrigo Rodriguez	e9a428ab1c	fix: Auto-create bot database when not configured Modified get_bot_pool() to automatically create the database for a bot if it doesn't exist, instead of failing with "No database configured" error. This fixes the issue where bots created after the initial sync don't have a database_name set in the bots table, causing table creation to fail. Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>	2026-02-15 14:57:22 +00:00
Rodrigo Rodriguez	0c9665dd8b	fix: Enable vector_db by default with health check and fallback to local LLM - Add vector_db_health_check() function to verify Qdrant availability - Add wait loop for vector_db startup in bootstrap (15 seconds) - Fallback to local LLM when external URL configured but no API key provided - Prevent external LLM (api.z.ai) usage without authentication key This fixes the production issues: - Qdrant vector database not available at https://localhost:6333 - External LLM being used instead of local when no key is configured - Ensures vector_db is properly started and ready before use Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>	2026-02-15 14:54:17 +00:00