diff --git a/.vscode/launch.json b/.vscode/launch.json index 27393a06..ef4168dd 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -17,7 +17,7 @@ }, "args": [], "env": { - "RUST_LOG": "debug" + "RUST_LOG": "info" }, "cwd": "${workspaceFolder}" }, diff --git a/migrations/6.0.6.sql b/migrations/6.0.6.sql deleted file mode 100644 index e69de29b..00000000 diff --git a/src/bootstrap/mod.rs b/src/bootstrap/mod.rs index 942d5ee6..ae38989d 100644 --- a/src/bootstrap/mod.rs +++ b/src/bootstrap/mod.rs @@ -1,7 +1,8 @@ use crate::config::AppConfig; use crate::package_manager::{InstallMode, PackageManager}; +use crate::shared::utils::establish_pg_connection; use anyhow::Result; -use diesel::{connection::SimpleConnection, RunQueryDsl, Connection, QueryableByName}; +use diesel::{connection::SimpleConnection, RunQueryDsl, QueryableByName, Connection}; use dotenvy::dotenv; use log::{debug, error, info, trace}; use aws_sdk_s3::Client; @@ -140,10 +141,7 @@ impl BootstrapManager { if pm.is_installed(component.name) { pm.start(component.name)?; } else { - let database_url = std::env::var("DATABASE_URL") - .unwrap_or_else(|_| "postgres://gbuser:@localhost:5432/botserver".to_string()); - let mut conn = diesel::pg::PgConnection::establish(&database_url) - .map_err(|e| anyhow::anyhow!("Failed to connect to database: {}", e))?; + let mut conn = establish_pg_connection()?; let default_bot_id: uuid::Uuid = diesel::sql_query("SELECT id FROM bots LIMIT 1") .load::(&mut conn) .map(|rows| rows.first().map(|r| r.id).unwrap_or_else(|| uuid::Uuid::new_v4())) @@ -189,7 +187,7 @@ impl BootstrapManager { return Ok(config); } - match diesel::PgConnection::establish(&database_url) { + match establish_pg_connection() { Ok(mut conn) => { if let Err(e) = self.apply_migrations(&mut conn) { log::warn!("Failed to apply migrations: {}", e); @@ -197,7 +195,7 @@ impl BootstrapManager { return Ok(AppConfig::from_database(&mut conn)); } Err(e) => { - log::warn!("Failed to connect to legacy database: {}", e); + log::warn!("Failed to connect to database: {}", e); return Ok(AppConfig::from_env()); } } @@ -205,7 +203,7 @@ impl BootstrapManager { } let pm = PackageManager::new(self.install_mode.clone(), self.tenant.clone())?; - let required_components = vec!["tables", "drive", "cache"]; + let required_components = vec!["tables", "drive", "cache", "llm"]; let mut config = AppConfig::from_env(); for component in required_components { @@ -260,8 +258,7 @@ impl BootstrapManager { futures::executor::block_on(pm.install(component))?; if component == "tables" { - let database_url = std::env::var("DATABASE_URL").unwrap(); - let mut conn = diesel::PgConnection::establish(&database_url) + let mut conn = establish_pg_connection() .map_err(|e| anyhow::anyhow!("Failed to connect to database: {}", e))?; let migration_dir = include_dir::include_dir!("./migrations"); @@ -363,9 +360,7 @@ impl BootstrapManager { fn update_bot_config(&self, bot_id: &uuid::Uuid, component: &str) -> Result<()> { use diesel::sql_types::{Text, Uuid as SqlUuid}; - let database_url = std::env::var("DATABASE_URL") - .unwrap_or_else(|_| "postgres://gbuser:@localhost:5432/botserver".to_string()); - let mut conn = diesel::pg::PgConnection::establish(&database_url)?; + let mut conn = establish_pg_connection()?; // Ensure globally unique keys and update values atomically let config_key = format!("{}_{}", bot_id, component); @@ -388,8 +383,7 @@ impl BootstrapManager { } pub async fn upload_templates_to_drive(&self, config: &AppConfig) -> Result<()> { - let database_url = std::env::var("DATABASE_URL").unwrap_or_else(|_| config.database_url()); - let mut conn = diesel::PgConnection::establish(&database_url)?; + let mut conn = establish_pg_connection()?; self.create_bots_from_templates(&mut conn)?; let templates_dir = Path::new("templates"); if !templates_dir.exists() { @@ -539,10 +533,8 @@ impl BootstrapManager { let bytes = response.body.collect().await?.into_bytes(); let csv_content = String::from_utf8(bytes.to_vec())?; - let database_url = std::env::var("DATABASE_URL") - .unwrap_or_else(|_| "postgres://gbuser:@localhost:5432/botserver".to_string()); // Create new connection for config loading - let config_conn = diesel::PgConnection::establish(&database_url)?; + let config_conn = establish_pg_connection()?; let config_manager = ConfigManager::new(Arc::new(Mutex::new(config_conn))); // Use default bot ID or create one if needed @@ -556,7 +548,7 @@ impl BootstrapManager { .map_err(|e| anyhow::anyhow!("Failed to sync gbot config: {}", e))?; // Load config from database which now has the CSV values - let mut config_conn = diesel::PgConnection::establish(&database_url)?; + let mut config_conn = establish_pg_connection()?; let config = AppConfig::from_database(&mut config_conn); info!("Successfully loaded config from CSV"); Ok(config) diff --git a/src/config/mod.rs b/src/config/mod.rs index 2724ddbc..06eae187 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -439,6 +439,32 @@ impl ConfigManager { Self { conn } } + pub fn get_config( + &self, + bot_id: &uuid::Uuid, + key: &str, + fallback: Option<&str>, + ) -> Result { + let mut conn = self.conn.lock().unwrap(); + let fallback_str = fallback.unwrap_or(""); + + #[derive(Debug, QueryableByName)] + struct ConfigValue { + #[diesel(sql_type = Text)] + value: String, + } + + let result = diesel::sql_query( + "SELECT get_bot_config($1, $2, $3) as value" + ) + .bind::(bot_id) + .bind::(key) + .bind::(fallback_str) + .get_result::(&mut *conn) + .map(|row| row.value)?; + Ok(result) + } + pub fn sync_gbot_config( &self, bot_id: &uuid::Uuid, diff --git a/src/llm_legacy/llm_local.rs b/src/llm_legacy/llm_local.rs index a0a7f46f..b6c9f3b8 100644 --- a/src/llm_legacy/llm_local.rs +++ b/src/llm_legacy/llm_local.rs @@ -1,10 +1,12 @@ use actix_web::{post, web, HttpRequest, HttpResponse, Result}; +use crate::config::{AppConfig, ConfigManager}; use dotenvy::dotenv; use log::{error, info}; use reqwest::Client; use serde::{Deserialize, Serialize}; use std::env; use tokio::time::{sleep, Duration}; +use uuid::Uuid; // OpenAI-compatible request/response structures #[derive(Debug, Serialize, Deserialize)] @@ -62,13 +64,45 @@ pub async fn ensure_llama_servers_running() -> Result<(), Box env::var("LLM_URL").unwrap_or_else(|_| + cm.get_config(&default_bot_id, "LLM_URL", None) + .unwrap_or("http://localhost:8081".to_string()) + ), + None => env::var("LLM_URL").unwrap_or("http://localhost:8081".to_string()) + }; + let embedding_url = match &config_manager { + Some(cm) => env::var("EMBEDDING_URL").unwrap_or_else(|_| + cm.get_config(&default_bot_id, "EMBEDDING_URL", None) + .unwrap_or("http://localhost:8082".to_string()) + ), + None => env::var("EMBEDDING_URL").unwrap_or("http://localhost:8082".to_string()) + }; + let llama_cpp_path = match &config_manager { + Some(cm) => env::var("LLM_CPP_PATH").unwrap_or_else(|_| + cm.get_config(&default_bot_id, "LLM_CPP_PATH", None) + .unwrap_or("~/llama.cpp".to_string()) + ), + None => env::var("LLM_CPP_PATH").unwrap_or("~/llama.cpp".to_string()) + }; + let llm_model_path = match &config_manager { + Some(cm) => env::var("LLM_MODEL_PATH").unwrap_or_else(|_| + cm.get_config(&default_bot_id, "LLM_MODEL_PATH", None) + .unwrap_or("".to_string()) + ), + None => env::var("LLM_MODEL_PATH").unwrap_or("".to_string()) + }; + let embedding_model_path = match &config_manager { + Some(cm) => env::var("EMBEDDING_MODEL_PATH").unwrap_or_else(|_| + cm.get_config(&default_bot_id, "EMBEDDING_MODEL_PATH", None) + .unwrap_or("".to_string()) + ), + None => env::var("EMBEDDING_MODEL_PATH").unwrap_or("".to_string()) + }; info!("🚀 Starting local llama.cpp servers..."); info!("📋 Configuration:"); @@ -189,18 +223,49 @@ async fn start_llm_server( std::env::set_var("OMP_PROC_BIND", "close"); // "cd {} && numactl --interleave=all ./llama-server -m {} --host 0.0.0.0 --port {} --threads 20 --threads-batch 40 --temp 0.7 --parallel 1 --repeat-penalty 1.1 --ctx-size 8192 --batch-size 8192 -n 4096 --mlock --no-mmap --flash-attn --no-kv-offload --no-mmap &", + // Read config values with defaults + let n_moe = env::var("LLM_SERVER_N_MOE").unwrap_or("4".to_string()); + let ctx_size = env::var("LLM_SERVER_CTX_SIZE").unwrap_or("4096".to_string()); + let parallel = env::var("LLM_SERVER_PARALLEL").unwrap_or("1".to_string()); + let cont_batching = env::var("LLM_SERVER_CONT_BATCHING").unwrap_or("true".to_string()); + let mlock = env::var("LLM_SERVER_MLOCK").unwrap_or("true".to_string()); + let no_mmap = env::var("LLM_SERVER_NO_MMAP").unwrap_or("true".to_string()); + let gpu_layers = env::var("LLM_SERVER_GPU_LAYERS").unwrap_or("20".to_string()); + + // Build command arguments dynamically + let mut args = format!( + "-m {} --host 0.0.0.0 --port {} --top_p 0.95 --temp 0.6 --ctx-size {} --repeat-penalty 1.2 -ngl {}", + model_path, port, ctx_size, gpu_layers + ); + + if n_moe != "0" { + args.push_str(&format!(" --n-moe {}", n_moe)); + } + if parallel != "1" { + args.push_str(&format!(" --parallel {}", parallel)); + } + if cont_batching == "true" { + args.push_str(" --cont-batching"); + } + if mlock == "true" { + args.push_str(" --mlock"); + } + if no_mmap == "true" { + args.push_str(" --no-mmap"); + } + if cfg!(windows) { let mut cmd = tokio::process::Command::new("cmd"); cmd.arg("/C").arg(format!( - "cd {} && .\\llama-server.exe -m {} --host 0.0.0.0 --port {} --top_p 0.95 --temp 0.6 --flash-attn on --ctx-size 4096 --repeat-penalty 1.2 -ngl 20 ", - llama_cpp_path, model_path, port + "cd {} && .\\llama-server.exe {}", + llama_cpp_path, args )); cmd.spawn()?; } else { let mut cmd = tokio::process::Command::new("sh"); cmd.arg("-c").arg(format!( - "cd {} && ./llama-server -m {} --host 0.0.0.0 --port {} --top_p 0.95 --temp 0.6 --flash-attn on --ctx-size 4096 --repeat-penalty 1.2 -ngl 20 &", - llama_cpp_path, model_path, port + "cd {} && ./llama-server {} &", + llama_cpp_path, args )); cmd.spawn()?; } diff --git a/src/shared/utils.rs b/src/shared/utils.rs index 64a3983e..31cc27f4 100644 --- a/src/shared/utils.rs +++ b/src/shared/utils.rs @@ -1,4 +1,6 @@ use crate::config::AIConfig; +use anyhow::{Context, Result}; +use diesel::{Connection, PgConnection}; use futures_util::StreamExt; use indicatif::{ProgressBar, ProgressStyle}; use log::trace; @@ -177,3 +179,12 @@ pub async fn call_llm( ) -> Result> { Ok(format!("Generated response for: {}", prompt)) } + +/// Establishes a PostgreSQL connection using DATABASE_URL environment variable +pub fn establish_pg_connection() -> Result { + let database_url = std::env::var("DATABASE_URL") + .unwrap_or_else(|_| "postgres://gbuser:@localhost:5432/botserver".to_string()); + + PgConnection::establish(&database_url) + .with_context(|| format!("Failed to connect to database at {}", database_url)) +} diff --git a/templates/default.gbai/default.gbot/config.csv b/templates/default.gbai/default.gbot/config.csv index 3d3fcf93..4ac4a5da 100644 --- a/templates/default.gbai/default.gbot/config.csv +++ b/templates/default.gbai/default.gbot/config.csv @@ -4,18 +4,25 @@ server_host,0.0.0.0 server_port,8080 sites_root,/tmp -llm-key,gsk_ -llm-model,openai/gpt-oss-20b -llm-url,https://api.groq.com/openai/v1/chat/completions - +llm-key,none llm-url,http://localhost:8080/v1 -llm-model,./botserver-stack/llm/data/DeepSeek-R1-Distill-Qwen-1.5B-Q3_K_M.gguf +llm-model,botserver-stack/data/llm/DeepSeek-R1-Distill-Qwen-1.5B-Q3_K_M.gguf embedding-url,http://localhost:8082 -embedding-model-path,./botserver-stack/llm/data/bge-small-en-v1.5-f32.gguf +embedding-model-path,botserver-stack/data/llm/bge-small-en-v1.5-f32.gguf llm-server,false -llm-server-path,~/llama.cpp +llm-server-path,botserver-stack/bin/llm/ +llm-server-model,botserver-stack/data/llm/DeepSeek-R1-Distill-Qwen-1.5B-Q3_K_M.gguf +llm-server-host,0.0.0.0 +llm-server-port,8080 +llm-server-gpu-layers,35 +llm-server-n-moe,4 +llm-server-ctx-size,2048 +llm-server-parallel,4 +llm-server-cont-batching,true +llm-server-mlock,true +llm-server-no-mmap,true email-from,from@domain.com email-server,mail.domain.com