chore: point LLM client and default configs to local endpoint

Updated the 6.0.4 migration to use `http://localhost:8081/v1` for the default OpenAI model configurations (gpt‑4 and gpt‑3.5‑turbo) and the local embed service. Adjusted `OpenAIClient` to default to the same localhost base URL instead of the production OpenAI API. Reorganized imports and module ordering in `src/main.rs` (moved `mod llm`, `mod nvidia`, and `BotOrchestrator` import), cleaned up formatting, and removed unused imports. These changes streamline development by directing LLM calls to a local server and improve code readability.
2025-11-07 16:40:19 -03:00 · 2025-11-07 16:40:19 -03:00 · 120d06a0db
commit 120d06a0db
parent 21355df0c8
4 changed files with 69 additions and 76 deletions
--- a/migrations/6.0.4.sql
+++ b/migrations/6.0.4.sql
@ -192,8 +192,8 @@ ON CONFLICT (slug) DO NOTHING;
 -- Add some default model configurations
 -- ============================================================================
 INSERT INTO model_configurations (id, model_name, model_type, provider, endpoint, model_id, context_window, max_tokens, is_default) VALUES
-    (gen_random_uuid()::text, 'gpt-4', 'llm', 'openai', 'https://api.openai.com/v1', 'gpt-4', 8192, 4096, true),
-    (gen_random_uuid()::text, 'gpt-3.5-turbo', 'llm', 'openai', 'https://api.openai.com/v1', 'gpt-3.5-turbo', 4096, 2048, false),
+    (gen_random_uuid()::text, 'gpt-4', 'llm', 'openai', 'http://localhost:8081/v1', 'gpt-4', 8192, 4096, true),
+    (gen_random_uuid()::text, 'gpt-3.5-turbo', 'llm', 'openai', 'http://localhost:8081/v1', 'gpt-3.5-turbo', 4096, 2048, false),
    (gen_random_uuid()::text, 'bge-large', 'embed', 'local', 'http://localhost:8081', 'BAAI/bge-large-en-v1.5', 512, 1024, true)
 ON CONFLICT (model_name) DO NOTHING;

--- a/src/bot/mod.rs
+++ b/src/bot/mod.rs
@ -407,26 +407,21 @@ impl BotOrchestrator {
                .unwrap_or(-1)
        };

-        // Acquire lock briefly for history retrieval with configurable limit
-let history = "".to_string();
+        let mut sm = self.state.session_manager.lock().await;
+        let mut history = sm.get_conversation_history(session.id, user_id)?;

-// {
-//     let mut sm = self.state.session_manager.lock().await;
-//     let mut history = sm.get_conversation_history(session.id, user_id)?;
+        // Skip all messages before the most recent compacted message (type 9)
+        if let Some(last_compacted_index) = history.iter().rposition(|(role, content)| {
+            role == "COMPACTED" || content.starts_with("SUMMARY:")
+        }) {
+            history = history.split_off(last_compacted_index);
+        }

-//     // Skip all messages before the most recent compacted message (type 9)
-//     if let Some(last_compacted_index) = history.iter().rposition(|(role, content)| {
-//         role == "COMPACTED" || content.starts_with("SUMMARY:")
-//     }) {
-//         history = history.split_off(last_compacted_index);
-//     }
-
-//     if history_limit > 0 && history.len() > history_limit as usize {
-//         let start = history.len() - history_limit as usize;
-//         history.drain(0..start);
-//     }
-//     history
-// };
+        // Apply history limit if configured
+        if history_limit > 0 && history.len() > history_limit as usize {
+            let start = history.len() - history_limit as usize;
+            history.drain(0..start);
+        }

        let mut prompt = String::new();
        if !system_prompt.is_empty() {
@ -435,15 +430,15 @@ let history = "".to_string();
        if !context_data.is_empty() {
            prompt.push_str(&format!("CONTEXT: *** {} *** \n", context_data));
        }
-        // for (role, content) in &history {
-        //     prompt.push_str(&format!("{}:{}\n", role, content));
-        // }
+        for (role, content) in &history {
+            prompt.push_str(&format!("{}:{}\n", role, content));
+        }
        prompt.push_str(&format!("Human: {}\nBot:", message.content));

-        // trace!(
-        //     "Stream prompt constructed with {} history entries",
-        //     history.len()
-        // );
+        trace!(
+            "Stream prompt constructed with {} history entries",
+            history.len()
+        );

        let (stream_tx, mut stream_rx) = mpsc::channel::<String>(100);
        let llm = self.state.llm_provider.clone();
@ -1316,3 +1311,8 @@ async fn send_warning_handler(

    Ok(HttpResponse::Ok().json(serde_json::json!({"status": "warning_sent"})))
 }
+        );
+    }
+
+    Ok(HttpResponse::Ok().json(serde_json::json!({"status": "warning_sent"})))
+}
--- a/src/llm/mod.rs
+++ b/src/llm/mod.rs
@ -47,7 +47,7 @@ impl OpenAIClient {
        Self {
            client: reqwest::Client::new(),
            api_key,
-            base_url: base_url.unwrap_or_else(|| "https://api.openai.com/v1".to_string()),
+            base_url: base_url.unwrap_or_else(|| "http://localhost:8081/v1".to_string()),
        }
    }
 }
--- a/src/main.rs
+++ b/src/main.rs
@ -1,13 +1,12 @@
 #![cfg_attr(feature = "desktop", windows_subsystem = "windows")]
-use log::error;
 use actix_cors::Cors;
 use actix_web::middleware::Logger;
 use actix_web::{web, App, HttpServer};
 use dotenvy::dotenv;
+use log::error;
 use log::info;
 use std::collections::HashMap;
 use std::sync::{Arc, Mutex};
-mod llm;
 mod auth;
 mod automation;
 mod basic;
@ -19,22 +18,24 @@ mod context;
 mod drive_monitor;
 #[cfg(feature = "email")]
 mod email;
-#[cfg(feature = "desktop")]
-mod ui;
 mod file;
+mod llm;
 mod llm_models;
 mod meet;
+mod nvidia;
 mod package_manager;
 mod session;
 mod shared;
 pub mod tests;
+#[cfg(feature = "desktop")]
+mod ui;
 mod web_server;
-mod nvidia;

 use crate::auth::auth_handler;
 use crate::automation::AutomationService;
 use crate::bootstrap::BootstrapManager;
-use crate::bot::{websocket_handler};
+use crate::bot::websocket_handler;
+use crate::bot::BotOrchestrator;
 use crate::channels::{VoiceAdapter, WebChannelAdapter};
 use crate::config::AppConfig;
 #[cfg(feature = "email")]
@ -47,7 +48,6 @@ use crate::package_manager::InstallMode;
 use crate::session::{create_session, get_session_history, get_sessions, start_session};
 use crate::shared::state::AppState;
 use crate::web_server::{bot_index, index, static_files};
-use crate::bot::BotOrchestrator;

 #[cfg(not(feature = "desktop"))]
 #[tokio::main]
@ -56,7 +56,6 @@ async fn main() -> std::io::Result<()> {

    use crate::llm::local::ensure_llama_servers_running;

-
    let args: Vec<String> = std::env::args().collect();
    if args.len() > 1 {
        let command = &args[1];
@ -104,16 +103,16 @@ async fn main() -> std::io::Result<()> {
    let mut bootstrap = BootstrapManager::new(install_mode.clone(), tenant.clone()).await;

    // Prevent double bootstrap: skip if environment already initialized
-    let env_path = std::env::current_dir()?.join("botserver-stack").join(".env");
+    let env_path = std::env::current_dir()?
+        .join("botserver-stack")
+        .join(".env");
    let cfg = if env_path.exists() {
        info!("Environment already initialized, skipping bootstrap");

-
-        match diesel::Connection::establish(
-            &std::env::var("DATABASE_URL")
-                .unwrap()
-        ) {
-            Ok(mut conn) => AppConfig::from_database(&mut conn).expect("Failed to load config from DB"),
+        match diesel::Connection::establish(&std::env::var("DATABASE_URL").unwrap()) {
+            Ok(mut conn) => {
+                AppConfig::from_database(&mut conn).expect("Failed to load config from DB")
+            }
            Err(_) => AppConfig::from_env().expect("Failed to load config from env"),
        }
    } else {
@ -125,10 +124,13 @@ async fn main() -> std::io::Result<()> {
            Err(e) => {
                log::error!("Bootstrap failed: {}", e);
                match diesel::Connection::establish(
-            &std::env::var("DATABASE_URL")
-                .unwrap_or_else(|_| "postgres://gbuser:@localhost:5432/botserver".to_string()),
+                    &std::env::var("DATABASE_URL").unwrap_or_else(|_| {
+                        "postgres://gbuser:@localhost:5432/botserver".to_string()
+                    }),
                ) {
-            Ok(mut conn) => AppConfig::from_database(&mut conn).expect("Failed to load config from DB"),
+                    Ok(mut conn) => {
+                        AppConfig::from_database(&mut conn).expect("Failed to load config from DB")
+                    }
                    Err(_) => AppConfig::from_env().expect("Failed to load config from env"),
                }
            }
@ -171,8 +173,7 @@ async fn main() -> std::io::Result<()> {
        }
    };
    let web_adapter = Arc::new(WebChannelAdapter::new());
-    let voice_adapter = Arc::new(VoiceAdapter::new(
-    ));
+    let voice_adapter = Arc::new(VoiceAdapter::new());

    let drive = init_drive(&config.drive)
        .await
@ -183,26 +184,21 @@ async fn main() -> std::io::Result<()> {
        redis_client.clone(),
    )));

-    let auth_service = Arc::new(tokio::sync::Mutex::new(auth::AuthService::new(
-    )));
-
+    let auth_service = Arc::new(tokio::sync::Mutex::new(auth::AuthService::new()));

    let conn = diesel::Connection::establish(&cfg.database_url()).unwrap();
    let config_manager = ConfigManager::new(Arc::new(Mutex::new(conn)));
    let mut bot_conn = diesel::Connection::establish(&cfg.database_url()).unwrap();
    let (default_bot_id, _default_bot_name) = crate::bot::get_default_bot(&mut bot_conn);
    let llm_url = config_manager
-    .get_config(&default_bot_id, "llm-url", Some("https://api.openai.com/v1"))
-    
-    .unwrap_or_else(|_| "https://api.openai.com/v1".to_string());
+        .get_config(&default_bot_id, "llm-url", Some("http://localhost:8081/v1"))
+        .unwrap_or_else(|_| "http://localhost:8081/v1".to_string());

    let llm_provider = Arc::new(crate::llm::OpenAIClient::new(
        "empty".to_string(),
        Some(llm_url.clone()),
    ));

-
-
    let app_state = Arc::new(AppState {
        drive: Some(drive),
        config: Some(cfg.clone()),
@ -225,12 +221,14 @@ let llm_url = config_manager
        voice_adapter: voice_adapter.clone(),
    });

-    info!("Starting HTTP server on {}:{}", config.server.host, config.server.port);
+    info!(
+        "Starting HTTP server on {}:{}",
+        config.server.host, config.server.port
+    );
    let worker_count = std::thread::available_parallelism()
        .map(|n| n.get())
        .unwrap_or(4);

-
    // Initialize bot orchestrator and mount all bots
    let bot_orchestrator = BotOrchestrator::new(app_state.clone());

@ -239,14 +237,14 @@ let llm_url = config_manager
        log::error!("Failed to mount bots: {}", e);
        // Use BotOrchestrator::send_warning to notify system admins
        let msg = format!("Bot mount failure: {}", e);
-        let _ = bot_orchestrator.send_warning("System", "AdminBot", msg.as_str()).await;
+        let _ = bot_orchestrator
+            .send_warning("System", "AdminBot", msg.as_str())
+            .await;
    } else {
        let _sessions = get_sessions;
        log::info!("Session handler registered successfully");
    }

-
-
    let automation_state = app_state.clone();
    std::thread::spawn(move || {
        let rt = tokio::runtime::Builder::new_current_thread()
@ -261,13 +259,10 @@ let llm_url = config_manager
    });

    if let Err(e) = ensure_llama_servers_running(&app_state).await {
-
        error!("Failed to stat LLM servers: {}", e);
    }

-
    HttpServer::new(move || {
-
        let cors = Cors::default()
            .allow_any_origin()
            .allow_any_method()
@ -305,12 +300,10 @@ let llm_url = config_manager
                .service(send_email)
                .service(save_draft)
                .service(save_click);
-
        }
        app = app.service(static_files);
        app = app.service(bot_index);
        app
-
    })
    .workers(worker_count)
    .bind((config.server.host.clone(), config.server.port))?