diff --git a/src/bootstrap/mod.rs b/src/bootstrap/mod.rs index da424a45..512ffe47 100644 --- a/src/bootstrap/mod.rs +++ b/src/bootstrap/mod.rs @@ -2,7 +2,7 @@ use crate::config::AppConfig; use crate::package_manager::{InstallMode, PackageManager}; use crate::shared::utils::establish_pg_connection; use anyhow::Result; -use diesel::{connection::SimpleConnection, RunQueryDsl, QueryableByName}; +use diesel::{connection::SimpleConnection, QueryableByName}; use dotenvy::dotenv; use log::{debug, error, info, trace}; use aws_sdk_s3::Client; diff --git a/src/config/mod.rs b/src/config/mod.rs index fcec5d8d..49aae185 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -419,27 +419,22 @@ impl ConfigManager { pub fn get_config( &self, - bot_id: &uuid::Uuid, + code_bot_id: &uuid::Uuid, key: &str, fallback: Option<&str>, ) -> Result { + use crate::shared::models::schema::bot_configuration::dsl::*; + let mut conn = self.conn.lock().unwrap(); let fallback_str = fallback.unwrap_or(""); - #[derive(Debug, QueryableByName)] - struct ConfigValue { - #[diesel(sql_type = Text)] - value: String, - } - - let result = diesel::sql_query( - "SELECT get_bot_config($1, $2, $3) as value" - ) - .bind::(bot_id) - .bind::(key) - .bind::(fallback_str) - .get_result::(&mut *conn) - .map(|row| row.value)?; + let result = bot_configuration + .filter(bot_id.eq(code_bot_id)) + .filter(config_key.eq(key)) + .select(config_value) + .first::(&mut *conn) + .unwrap_or(fallback_str.to_string()); + Ok(result) } diff --git a/src/llm_legacy/llm_local.rs b/src/llm_legacy/llm_local.rs index b6c9f3b8..4d8f67ec 100644 --- a/src/llm_legacy/llm_local.rs +++ b/src/llm_legacy/llm_local.rs @@ -1,12 +1,15 @@ use actix_web::{post, web, HttpRequest, HttpResponse, Result}; -use crate::config::{AppConfig, ConfigManager}; use dotenvy::dotenv; use log::{error, info}; use reqwest::Client; use serde::{Deserialize, Serialize}; use std::env; use tokio::time::{sleep, Duration}; -use uuid::Uuid; + +use crate::config::ConfigManager; +use crate::shared::models::schema::bots::dsl::*; +use crate::shared::state::AppState; +use diesel::prelude::*; // OpenAI-compatible request/response structures #[derive(Debug, Serialize, Deserialize)] @@ -55,61 +58,37 @@ struct LlamaCppResponse { generation_settings: Option, } -pub async fn ensure_llama_servers_running() -> Result<(), Box> -{ - let llm_local = env::var("LLM_LOCAL").unwrap_or_else(|_| "true".to_string()); +pub async fn ensure_llama_servers_running( + app_state: &AppState, +) -> Result<(), Box> { + let conn = app_state.conn.clone(); + let config_manager = ConfigManager::new(conn.clone()); - if llm_local.to_lowercase() != "true" { - info!("â„šī¸ LLM_LOCAL is not enabled, skipping local server startup"); - return Ok(()); - } - - // Get configuration with fallback to default bot config - let default_bot_id = Uuid::parse_str("00000000-0000-0000-0000-000000000000").unwrap(); - let config_manager = AppConfig::from_env().db_conn.map(ConfigManager::new); - - let llm_url = match &config_manager { - Some(cm) => env::var("LLM_URL").unwrap_or_else(|_| - cm.get_config(&default_bot_id, "LLM_URL", None) - .unwrap_or("http://localhost:8081".to_string()) - ), - None => env::var("LLM_URL").unwrap_or("http://localhost:8081".to_string()) - }; - let embedding_url = match &config_manager { - Some(cm) => env::var("EMBEDDING_URL").unwrap_or_else(|_| - cm.get_config(&default_bot_id, "EMBEDDING_URL", None) - .unwrap_or("http://localhost:8082".to_string()) - ), - None => env::var("EMBEDDING_URL").unwrap_or("http://localhost:8082".to_string()) - }; - let llama_cpp_path = match &config_manager { - Some(cm) => env::var("LLM_CPP_PATH").unwrap_or_else(|_| - cm.get_config(&default_bot_id, "LLM_CPP_PATH", None) - .unwrap_or("~/llama.cpp".to_string()) - ), - None => env::var("LLM_CPP_PATH").unwrap_or("~/llama.cpp".to_string()) - }; - let llm_model_path = match &config_manager { - Some(cm) => env::var("LLM_MODEL_PATH").unwrap_or_else(|_| - cm.get_config(&default_bot_id, "LLM_MODEL_PATH", None) - .unwrap_or("".to_string()) - ), - None => env::var("LLM_MODEL_PATH").unwrap_or("".to_string()) - }; - let embedding_model_path = match &config_manager { - Some(cm) => env::var("EMBEDDING_MODEL_PATH").unwrap_or_else(|_| - cm.get_config(&default_bot_id, "EMBEDDING_MODEL_PATH", None) - .unwrap_or("".to_string()) - ), - None => env::var("EMBEDDING_MODEL_PATH").unwrap_or("".to_string()) + // Get default bot ID from database + let default_bot_id = { + let mut conn = conn.lock().unwrap(); + bots.filter(name.eq("default")) + .select(id) + .first::(&mut *conn) + .unwrap_or_else(|_| uuid::Uuid::nil()) }; - info!("🚀 Starting local llama.cpp servers..."); - info!("📋 Configuration:"); + // Get configuration from config using default bot ID + let llm_url = config_manager.get_config(&default_bot_id, "llm-url", None)?; + let llm_model = config_manager.get_config(&default_bot_id, "llm-model", None)?; + + let embedding_url = config_manager.get_config(&default_bot_id, "embedding-url", None)?; + let embedding_model = config_manager.get_config(&default_bot_id, "embedding-model", None)?; + + let llm_server_path = config_manager.get_config(&default_bot_id, "llm-server-path", None)?; + + info!(" Starting LLM servers..."); + info!(" Configuration:"); info!(" LLM URL: {}", llm_url); info!(" Embedding URL: {}", embedding_url); - info!(" LLM Model: {}", llm_model_path); - info!(" Embedding Model: {}", embedding_model_path); + info!(" LLM Model: {}", llm_model); + info!(" Embedding Model: {}", embedding_model); + info!(" LLM Server Path: {}", llm_server_path); // Check if servers are already running let llm_running = is_server_running(&llm_url).await; @@ -123,26 +102,26 @@ pub async fn ensure_llama_servers_running() -> Result<(), Box Result<(), Box Result<(), Box Result<(), Box Result<(), Box bool { + + let client = reqwest::Client::new(); match client.get(&format!("{}/health", url)).send().await { Ok(response) => response.status().is_success(), @@ -364,7 +346,7 @@ pub async fn chat_completions_local( })?; let response = client - .post(&format!("{}/completion", llama_url)) + .post(&format!("{}/v1/completion", llama_url)) .header("Content-Type", "application/json") .json(&llama_request) .send() @@ -639,20 +621,3 @@ pub async fn embeddings_local( Ok(HttpResponse::Ok().json(openai_response)) } -// Health check endpoint -#[actix_web::get("/health")] -pub async fn health() -> Result { - let llama_url = env::var("LLM_URL").unwrap_or_else(|_| "http://localhost:8081".to_string()); - - if is_server_running(&llama_url).await { - Ok(HttpResponse::Ok().json(serde_json::json!({ - "status": "healthy", - "llama_server": "running" - }))) - } else { - Ok(HttpResponse::ServiceUnavailable().json(serde_json::json!({ - "status": "unhealthy", - "llama_server": "not running" - }))) - } -} diff --git a/src/main.rs b/src/main.rs index 90a6283a..7a9ebfc7 100644 --- a/src/main.rs +++ b/src/main.rs @@ -172,10 +172,6 @@ async fn main() -> std::io::Result<()> { }; let db_custom_pool = db_pool.clone(); - ensure_llama_servers_running() - .await - .expect("Failed to initialize LLM local server"); - let cache_url = std::env::var("CACHE_URL") .or_else(|_| std::env::var("REDIS_URL")) .unwrap_or_else(|_| "redis://localhost:6379".to_string()); @@ -272,8 +268,14 @@ async fn main() -> std::io::Result<()> { log::error!("Failed to mount bots: {}", e); } - + + ensure_llama_servers_running(&app_state) + .await + .expect("Failed to initialize LLM local server"); + + HttpServer::new(move || { + let cors = Cors::default() .allow_any_origin() .allow_any_method() diff --git a/src/shared/models.rs b/src/shared/models.rs index 375db639..7ef9d34e 100644 --- a/src/shared/models.rs +++ b/src/shared/models.rs @@ -401,6 +401,18 @@ pub mod schema { added_at -> Text, } } + + diesel::table! { + bot_configuration (id) { + id -> Uuid, + bot_id -> Uuid, + config_key -> Text, + config_value -> Text, + config_type -> Text, + created_at -> Timestamptz, + updated_at -> Timestamptz, + } + } } pub use schema::*; diff --git a/templates/default.gbai/default.gbot/config.csv b/templates/default.gbai/default.gbot/config.csv index 340ca8f2..ece77065 100644 --- a/templates/default.gbai/default.gbot/config.csv +++ b/templates/default.gbai/default.gbot/config.csv @@ -5,21 +5,20 @@ server_port,8080 sites_root,/tmp llm-key,none -llm-url,http://localhost:8080/v1 -llm-model,botserver-stack/data/llm/DeepSeek-R1-Distill-Qwen-1.5B-Q3_K_M.gguf +llm-url,http://localhost:8081 +llm-model,../../../../data/llm/DeepSeek-R1-Distill-Qwen-1.5B-Q3_K_M.gguf embedding-url,http://localhost:8082 -embedding-model,botserver-stack/data/llm/bge-small-en-v1.5-f32.gguf +embedding-model,../../../../data/llm/bge-small-en-v1.5-f32.gguf llm-server,false -llm-server-path,botserver-stack/bin/llm/ -llm-server-model,botserver-stack/data/llm/DeepSeek-R1-Distill-Qwen-1.5B-Q3_K_M.gguf +llm-server-path,botserver-stack/bin/llm/build/bin llm-server-host,0.0.0.0 -llm-server-port,8080 +llm-server-port,8081 llm-server-gpu-layers,35 -llm-server-n-moe,4 -llm-server-ctx-size,2048 -llm-server-parallel,4 +llm-server-n-moe,16 +llm-server-ctx-size,16000 +llm-server-parallel,8 llm-server-cont-batching,true llm-server-mlock,true llm-server-no-mmap,true