refactor(config): replace raw SQL with Diesel query for bot config

Updated `ConfigManager::get_config` to use Diesel query builder instead of raw SQL for improved safety and maintainability. Adjusted parameter naming and integrated schema references. Also refactored `ensure_llama_servers_running` to fetch configuration from the database using `AppState` and `ConfigManager`. Removed unused imports in bootstrap module.
2025-11-01 11:48:46 -03:00 · 2025-11-01 11:48:46 -03:00 · 6d68585c71
commit 6d68585c71
parent 7170663b6f
6 changed files with 90 additions and 117 deletions
--- a/src/bootstrap/mod.rs
+++ b/src/bootstrap/mod.rs
@ -2,7 +2,7 @@ use crate::config::AppConfig;
 use crate::package_manager::{InstallMode, PackageManager};
 use crate::shared::utils::establish_pg_connection;
 use anyhow::Result;
-use diesel::{connection::SimpleConnection, RunQueryDsl, QueryableByName};
+use diesel::{connection::SimpleConnection, QueryableByName};
 use dotenvy::dotenv;
 use log::{debug, error, info, trace};
 use aws_sdk_s3::Client;
--- a/src/config/mod.rs
+++ b/src/config/mod.rs
@ -419,27 +419,22 @@ impl ConfigManager {

    pub fn get_config(
        &self,
-        bot_id: &uuid::Uuid,
+        code_bot_id: &uuid::Uuid,
        key: &str,
        fallback: Option<&str>,
    ) -> Result<String, diesel::result::Error> {
+        use crate::shared::models::schema::bot_configuration::dsl::*;
+        
        let mut conn = self.conn.lock().unwrap();
        let fallback_str = fallback.unwrap_or("");

-        #[derive(Debug, QueryableByName)]
-        struct ConfigValue {
-            #[diesel(sql_type = Text)]
-            value: String,
-        }
-
-        let result = diesel::sql_query(
-            "SELECT get_bot_config($1, $2, $3) as value"
-        )
-            .bind::<diesel::sql_types::Uuid, _>(bot_id)
-            .bind::<Text, _>(key)
-            .bind::<Text, _>(fallback_str)
-            .get_result::<ConfigValue>(&mut *conn)
-            .map(|row| row.value)?;
+        let result = bot_configuration
+            .filter(bot_id.eq(code_bot_id))
+            .filter(config_key.eq(key))
+            .select(config_value)
+            .first::<String>(&mut *conn)
+            .unwrap_or(fallback_str.to_string());
+            
        Ok(result)
    }

--- a/src/llm_legacy/llm_local.rs
+++ b/src/llm_legacy/llm_local.rs
@ -1,12 +1,15 @@
 use actix_web::{post, web, HttpRequest, HttpResponse, Result};
-use crate::config::{AppConfig, ConfigManager};
 use dotenvy::dotenv;
 use log::{error, info};
 use reqwest::Client;
 use serde::{Deserialize, Serialize};
 use std::env;
 use tokio::time::{sleep, Duration};
-use uuid::Uuid;
+
+use crate::config::ConfigManager;
+use crate::shared::models::schema::bots::dsl::*;
+use crate::shared::state::AppState;
+use diesel::prelude::*;

 // OpenAI-compatible request/response structures
 #[derive(Debug, Serialize, Deserialize)]
@ -55,61 +58,37 @@ struct LlamaCppResponse {
    generation_settings: Option<serde_json::Value>,
 }

-pub async fn ensure_llama_servers_running() -> Result<(), Box<dyn std::error::Error + Send + Sync>>
-{
-    let llm_local = env::var("LLM_LOCAL").unwrap_or_else(|_| "true".to_string());
+pub async fn ensure_llama_servers_running(
+    app_state: &AppState,
+) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
+    let conn = app_state.conn.clone();
+    let config_manager = ConfigManager::new(conn.clone());

-    if llm_local.to_lowercase() != "true" {
-        info!("ℹ️  LLM_LOCAL is not enabled, skipping local server startup");
-        return Ok(());
-    }
-
-    // Get configuration with fallback to default bot config
-    let default_bot_id = Uuid::parse_str("00000000-0000-0000-0000-000000000000").unwrap();
-    let config_manager = AppConfig::from_env().db_conn.map(ConfigManager::new);
-
-    let llm_url = match &config_manager {
-        Some(cm) => env::var("LLM_URL").unwrap_or_else(|_| 
-            cm.get_config(&default_bot_id, "LLM_URL", None)
-                .unwrap_or("http://localhost:8081".to_string())
-        ),
-        None => env::var("LLM_URL").unwrap_or("http://localhost:8081".to_string())
-    };
-    let embedding_url = match &config_manager {
-        Some(cm) => env::var("EMBEDDING_URL").unwrap_or_else(|_| 
-            cm.get_config(&default_bot_id, "EMBEDDING_URL", None)
-                .unwrap_or("http://localhost:8082".to_string())
-        ),
-        None => env::var("EMBEDDING_URL").unwrap_or("http://localhost:8082".to_string())
-    };
-    let llama_cpp_path = match &config_manager {
-        Some(cm) => env::var("LLM_CPP_PATH").unwrap_or_else(|_| 
-            cm.get_config(&default_bot_id, "LLM_CPP_PATH", None)
-                .unwrap_or("~/llama.cpp".to_string())
-        ),
-        None => env::var("LLM_CPP_PATH").unwrap_or("~/llama.cpp".to_string())
-    };
-    let llm_model_path = match &config_manager {
-        Some(cm) => env::var("LLM_MODEL_PATH").unwrap_or_else(|_| 
-            cm.get_config(&default_bot_id, "LLM_MODEL_PATH", None)
-                .unwrap_or("".to_string())
-        ),
-        None => env::var("LLM_MODEL_PATH").unwrap_or("".to_string())
-    };
-    let embedding_model_path = match &config_manager {
-        Some(cm) => env::var("EMBEDDING_MODEL_PATH").unwrap_or_else(|_| 
-            cm.get_config(&default_bot_id, "EMBEDDING_MODEL_PATH", None)
-                .unwrap_or("".to_string())
-        ),
-        None => env::var("EMBEDDING_MODEL_PATH").unwrap_or("".to_string())
+    // Get default bot ID from database
+    let default_bot_id = {
+        let mut conn = conn.lock().unwrap();
+        bots.filter(name.eq("default"))
+            .select(id)
+            .first::<uuid::Uuid>(&mut *conn)
+            .unwrap_or_else(|_| uuid::Uuid::nil())
    };

-    info!("🚀 Starting local llama.cpp servers...");
-    info!("📋 Configuration:");
+    // Get configuration from config using default bot ID
+    let llm_url = config_manager.get_config(&default_bot_id, "llm-url", None)?;
+    let llm_model = config_manager.get_config(&default_bot_id, "llm-model", None)?;
+
+    let embedding_url = config_manager.get_config(&default_bot_id, "embedding-url", None)?;
+    let embedding_model = config_manager.get_config(&default_bot_id, "embedding-model", None)?;
+
+    let llm_server_path = config_manager.get_config(&default_bot_id, "llm-server-path", None)?;
+
+    info!(" Starting LLM servers...");
+    info!(" Configuration:");
    info!("   LLM URL: {}", llm_url);
    info!("   Embedding URL: {}", embedding_url);
-    info!("   LLM Model: {}", llm_model_path);
-    info!("   Embedding Model: {}", embedding_model_path);
+    info!("   LLM Model: {}", llm_model);
+    info!("   Embedding Model: {}", embedding_model);
+    info!("   LLM Server Path: {}", llm_server_path);

    // Check if servers are already running
    let llm_running = is_server_running(&llm_url).await;
@ -123,26 +102,26 @@ pub async fn ensure_llama_servers_running() -> Result<(), Box<dyn std::error::Er
    // Start servers that aren't running
    let mut tasks = vec![];

-    if !llm_running && !llm_model_path.is_empty() {
+    if !llm_running && !llm_model.is_empty() {
        info!("🔄 Starting LLM server...");
        tasks.push(tokio::spawn(start_llm_server(
-            llama_cpp_path.clone(),
-            llm_model_path.clone(),
+            llm_server_path.clone(),
+            llm_model.clone(),
            llm_url.clone(),
        )));
-    } else if llm_model_path.is_empty() {
-        info!("⚠️  LLM_MODEL_PATH not set, skipping LLM server");
+    } else if llm_model.is_empty() {
+        info!("⚠️  LLM_MODEL not set, skipping LLM server");
    }

-    if !embedding_running && !embedding_model_path.is_empty() {
+    if !embedding_running && !embedding_model.is_empty() {
        info!("🔄 Starting Embedding server...");
        tasks.push(tokio::spawn(start_embedding_server(
-            llama_cpp_path.clone(),
-            embedding_model_path.clone(),
+            llm_server_path.clone(),
+            embedding_model.clone(),
            embedding_url.clone(),
        )));
-    } else if embedding_model_path.is_empty() {
-        info!("⚠️  EMBEDDING_MODEL_PATH not set, skipping Embedding server");
+    } else if embedding_model.is_empty() {
+        info!("⚠️  EMBEDDING_MODEL not set, skipping Embedding server");
    }

    // Wait for all server startup tasks
@ -153,8 +132,8 @@ pub async fn ensure_llama_servers_running() -> Result<(), Box<dyn std::error::Er
    // Wait for servers to be ready with verbose logging
    info!("⏳ Waiting for servers to become ready...");

-    let mut llm_ready = llm_running || llm_model_path.is_empty();
-    let mut embedding_ready = embedding_running || embedding_model_path.is_empty();
+    let mut llm_ready = llm_running || llm_model.is_empty();
+    let mut embedding_ready = embedding_running || embedding_model.is_empty();

    let mut attempts = 0;
    let max_attempts = 60; // 2 minutes total
@ -168,7 +147,7 @@ pub async fn ensure_llama_servers_running() -> Result<(), Box<dyn std::error::Er
            max_attempts
        );

-        if !llm_ready && !llm_model_path.is_empty() {
+        if !llm_ready && !llm_model.is_empty() {
            if is_server_running(&llm_url).await {
                info!("   ✅ LLM server ready at {}", llm_url);
                llm_ready = true;
@ -177,7 +156,7 @@ pub async fn ensure_llama_servers_running() -> Result<(), Box<dyn std::error::Er
            }
        }

-        if !embedding_ready && !embedding_model_path.is_empty() {
+        if !embedding_ready && !embedding_model.is_empty() {
            if is_server_running(&embedding_url).await {
                info!("   ✅ Embedding server ready at {}", embedding_url);
                embedding_ready = true;
@ -201,10 +180,10 @@ pub async fn ensure_llama_servers_running() -> Result<(), Box<dyn std::error::Er
        Ok(())
    } else {
        let mut error_msg = "❌ Servers failed to start within timeout:".to_string();
-        if !llm_ready && !llm_model_path.is_empty() {
+        if !llm_ready && !llm_model.is_empty() {
            error_msg.push_str(&format!("\n   - LLM server at {}", llm_url));
        }
-        if !embedding_ready && !embedding_model_path.is_empty() {
+        if !embedding_ready && !embedding_model.is_empty() {
            error_msg.push_str(&format!("\n   - Embedding server at {}", embedding_url));
        }
        Err(error_msg.into())
@ -239,7 +218,7 @@ async fn start_llm_server(
    );

    if n_moe != "0" {
-        args.push_str(&format!(" --n-moe {}", n_moe));
+        args.push_str(&format!(" --n-cpu-moe {}", n_moe));
    }
    if parallel != "1" {
        args.push_str(&format!(" --parallel {}", parallel));
@ -298,7 +277,10 @@ async fn start_embedding_server(

    Ok(())
 }
+
 async fn is_server_running(url: &str) -> bool {
+    
+    
    let client = reqwest::Client::new();
    match client.get(&format!("{}/health", url)).send().await {
        Ok(response) => response.status().is_success(),
@ -364,7 +346,7 @@ pub async fn chat_completions_local(
        })?;

    let response = client
-        .post(&format!("{}/completion", llama_url))
+        .post(&format!("{}/v1/completion", llama_url))
        .header("Content-Type", "application/json")
        .json(&llama_request)
        .send()
@ -639,20 +621,3 @@ pub async fn embeddings_local(
    Ok(HttpResponse::Ok().json(openai_response))
 }

-// Health check endpoint
-#[actix_web::get("/health")]
-pub async fn health() -> Result<HttpResponse> {
-    let llama_url = env::var("LLM_URL").unwrap_or_else(|_| "http://localhost:8081".to_string());
-
-    if is_server_running(&llama_url).await {
-        Ok(HttpResponse::Ok().json(serde_json::json!({
-            "status": "healthy",
-            "llama_server": "running"
-        })))
-    } else {
-        Ok(HttpResponse::ServiceUnavailable().json(serde_json::json!({
-            "status": "unhealthy",
-            "llama_server": "not running"
-        })))
-    }
-}
--- a/src/main.rs
+++ b/src/main.rs
@ -172,10 +172,6 @@ async fn main() -> std::io::Result<()> {
    };

    let db_custom_pool = db_pool.clone();
-    ensure_llama_servers_running()
-        .await
-        .expect("Failed to initialize LLM local server");
-
    let cache_url = std::env::var("CACHE_URL")
        .or_else(|_| std::env::var("REDIS_URL"))
        .unwrap_or_else(|_| "redis://localhost:6379".to_string());
@ -272,8 +268,14 @@ async fn main() -> std::io::Result<()> {
        log::error!("Failed to mount bots: {}", e);
    }

-    
+
+    ensure_llama_servers_running(&app_state)
+        .await
+        .expect("Failed to initialize LLM local server");
+
+        
    HttpServer::new(move || {
+
        let cors = Cors::default()
            .allow_any_origin()
            .allow_any_method()
--- a/src/shared/models.rs
+++ b/src/shared/models.rs
@ -401,6 +401,18 @@ pub mod schema {
            added_at -> Text,
        }
    }
+
+    diesel::table! {
+        bot_configuration (id) {
+            id -> Uuid,
+            bot_id -> Uuid,
+            config_key -> Text,
+            config_value -> Text,
+            config_type -> Text,
+            created_at -> Timestamptz,
+            updated_at -> Timestamptz,
+        }
+    }
 }

 pub use schema::*;
--- a/templates/default.gbai/default.gbot/config.csv
+++ b/templates/default.gbai/default.gbot/config.csv
@ -5,21 +5,20 @@ server_port,8080
 sites_root,/tmp

 llm-key,none
-llm-url,http://localhost:8080/v1
-llm-model,botserver-stack/data/llm/DeepSeek-R1-Distill-Qwen-1.5B-Q3_K_M.gguf
+llm-url,http://localhost:8081
+llm-model,../../../../data/llm/DeepSeek-R1-Distill-Qwen-1.5B-Q3_K_M.gguf

 embedding-url,http://localhost:8082
-embedding-model,botserver-stack/data/llm/bge-small-en-v1.5-f32.gguf
+embedding-model,../../../../data/llm/bge-small-en-v1.5-f32.gguf

 llm-server,false
-llm-server-path,botserver-stack/bin/llm/
-llm-server-model,botserver-stack/data/llm/DeepSeek-R1-Distill-Qwen-1.5B-Q3_K_M.gguf
+llm-server-path,botserver-stack/bin/llm/build/bin
 llm-server-host,0.0.0.0
-llm-server-port,8080
+llm-server-port,8081
 llm-server-gpu-layers,35
-llm-server-n-moe,4
-llm-server-ctx-size,2048
-llm-server-parallel,4
+llm-server-n-moe,16
+llm-server-ctx-size,16000
+llm-server-parallel,8
 llm-server-cont-batching,true
 llm-server-mlock,true
 llm-server-no-mmap,true