chore: point LLM client and default configs to local endpoint

Updated the 6.0.4 migration to use `http://localhost:8081/v1` for the default OpenAI model configurations (gpt‑4 and gpt‑3.5‑turbo) and the local embed service. Adjusted `OpenAIClient` to default to the same localhost base URL instead of the production OpenAI API.

Reorganized imports and module ordering in `src/main.rs` (moved `mod llm`, `mod nvidia`, and `BotOrchestrator` import), cleaned up formatting, and removed unused imports. These changes streamline development by directing LLM calls to a local server and improve code readability.
This commit is contained in:
Rodrigo Rodriguez (Pragmatismo) 2025-11-07 16:40:19 -03:00
parent 21355df0c8
commit 120d06a0db
4 changed files with 69 additions and 76 deletions

View file

@ -192,8 +192,8 @@ ON CONFLICT (slug) DO NOTHING;
-- Add some default model configurations -- Add some default model configurations
-- ============================================================================ -- ============================================================================
INSERT INTO model_configurations (id, model_name, model_type, provider, endpoint, model_id, context_window, max_tokens, is_default) VALUES INSERT INTO model_configurations (id, model_name, model_type, provider, endpoint, model_id, context_window, max_tokens, is_default) VALUES
(gen_random_uuid()::text, 'gpt-4', 'llm', 'openai', 'https://api.openai.com/v1', 'gpt-4', 8192, 4096, true), (gen_random_uuid()::text, 'gpt-4', 'llm', 'openai', 'http://localhost:8081/v1', 'gpt-4', 8192, 4096, true),
(gen_random_uuid()::text, 'gpt-3.5-turbo', 'llm', 'openai', 'https://api.openai.com/v1', 'gpt-3.5-turbo', 4096, 2048, false), (gen_random_uuid()::text, 'gpt-3.5-turbo', 'llm', 'openai', 'http://localhost:8081/v1', 'gpt-3.5-turbo', 4096, 2048, false),
(gen_random_uuid()::text, 'bge-large', 'embed', 'local', 'http://localhost:8081', 'BAAI/bge-large-en-v1.5', 512, 1024, true) (gen_random_uuid()::text, 'bge-large', 'embed', 'local', 'http://localhost:8081', 'BAAI/bge-large-en-v1.5', 512, 1024, true)
ON CONFLICT (model_name) DO NOTHING; ON CONFLICT (model_name) DO NOTHING;

View file

@ -407,26 +407,21 @@ impl BotOrchestrator {
.unwrap_or(-1) .unwrap_or(-1)
}; };
// Acquire lock briefly for history retrieval with configurable limit let mut sm = self.state.session_manager.lock().await;
let history = "".to_string(); let mut history = sm.get_conversation_history(session.id, user_id)?;
// { // Skip all messages before the most recent compacted message (type 9)
// let mut sm = self.state.session_manager.lock().await; if let Some(last_compacted_index) = history.iter().rposition(|(role, content)| {
// let mut history = sm.get_conversation_history(session.id, user_id)?; role == "COMPACTED" || content.starts_with("SUMMARY:")
}) {
history = history.split_off(last_compacted_index);
}
// // Skip all messages before the most recent compacted message (type 9) // Apply history limit if configured
// if let Some(last_compacted_index) = history.iter().rposition(|(role, content)| { if history_limit > 0 && history.len() > history_limit as usize {
// role == "COMPACTED" || content.starts_with("SUMMARY:") let start = history.len() - history_limit as usize;
// }) { history.drain(0..start);
// history = history.split_off(last_compacted_index); }
// }
// if history_limit > 0 && history.len() > history_limit as usize {
// let start = history.len() - history_limit as usize;
// history.drain(0..start);
// }
// history
// };
let mut prompt = String::new(); let mut prompt = String::new();
if !system_prompt.is_empty() { if !system_prompt.is_empty() {
@ -435,15 +430,15 @@ let history = "".to_string();
if !context_data.is_empty() { if !context_data.is_empty() {
prompt.push_str(&format!("CONTEXT: *** {} *** \n", context_data)); prompt.push_str(&format!("CONTEXT: *** {} *** \n", context_data));
} }
// for (role, content) in &history { for (role, content) in &history {
// prompt.push_str(&format!("{}:{}\n", role, content)); prompt.push_str(&format!("{}:{}\n", role, content));
// } }
prompt.push_str(&format!("Human: {}\nBot:", message.content)); prompt.push_str(&format!("Human: {}\nBot:", message.content));
// trace!( trace!(
// "Stream prompt constructed with {} history entries", "Stream prompt constructed with {} history entries",
// history.len() history.len()
// ); );
let (stream_tx, mut stream_rx) = mpsc::channel::<String>(100); let (stream_tx, mut stream_rx) = mpsc::channel::<String>(100);
let llm = self.state.llm_provider.clone(); let llm = self.state.llm_provider.clone();
@ -1316,3 +1311,8 @@ async fn send_warning_handler(
Ok(HttpResponse::Ok().json(serde_json::json!({"status": "warning_sent"}))) Ok(HttpResponse::Ok().json(serde_json::json!({"status": "warning_sent"})))
} }
);
}
Ok(HttpResponse::Ok().json(serde_json::json!({"status": "warning_sent"})))
}

View file

@ -47,7 +47,7 @@ impl OpenAIClient {
Self { Self {
client: reqwest::Client::new(), client: reqwest::Client::new(),
api_key, api_key,
base_url: base_url.unwrap_or_else(|| "https://api.openai.com/v1".to_string()), base_url: base_url.unwrap_or_else(|| "http://localhost:8081/v1".to_string()),
} }
} }
} }

View file

@ -1,13 +1,12 @@
#![cfg_attr(feature = "desktop", windows_subsystem = "windows")] #![cfg_attr(feature = "desktop", windows_subsystem = "windows")]
use log::error;
use actix_cors::Cors; use actix_cors::Cors;
use actix_web::middleware::Logger; use actix_web::middleware::Logger;
use actix_web::{web, App, HttpServer}; use actix_web::{web, App, HttpServer};
use dotenvy::dotenv; use dotenvy::dotenv;
use log::error;
use log::info; use log::info;
use std::collections::HashMap; use std::collections::HashMap;
use std::sync::{Arc, Mutex}; use std::sync::{Arc, Mutex};
mod llm;
mod auth; mod auth;
mod automation; mod automation;
mod basic; mod basic;
@ -19,22 +18,24 @@ mod context;
mod drive_monitor; mod drive_monitor;
#[cfg(feature = "email")] #[cfg(feature = "email")]
mod email; mod email;
#[cfg(feature = "desktop")]
mod ui;
mod file; mod file;
mod llm;
mod llm_models; mod llm_models;
mod meet; mod meet;
mod nvidia;
mod package_manager; mod package_manager;
mod session; mod session;
mod shared; mod shared;
pub mod tests; pub mod tests;
#[cfg(feature = "desktop")]
mod ui;
mod web_server; mod web_server;
mod nvidia;
use crate::auth::auth_handler; use crate::auth::auth_handler;
use crate::automation::AutomationService; use crate::automation::AutomationService;
use crate::bootstrap::BootstrapManager; use crate::bootstrap::BootstrapManager;
use crate::bot::{websocket_handler}; use crate::bot::websocket_handler;
use crate::bot::BotOrchestrator;
use crate::channels::{VoiceAdapter, WebChannelAdapter}; use crate::channels::{VoiceAdapter, WebChannelAdapter};
use crate::config::AppConfig; use crate::config::AppConfig;
#[cfg(feature = "email")] #[cfg(feature = "email")]
@ -47,7 +48,6 @@ use crate::package_manager::InstallMode;
use crate::session::{create_session, get_session_history, get_sessions, start_session}; use crate::session::{create_session, get_session_history, get_sessions, start_session};
use crate::shared::state::AppState; use crate::shared::state::AppState;
use crate::web_server::{bot_index, index, static_files}; use crate::web_server::{bot_index, index, static_files};
use crate::bot::BotOrchestrator;
#[cfg(not(feature = "desktop"))] #[cfg(not(feature = "desktop"))]
#[tokio::main] #[tokio::main]
@ -56,7 +56,6 @@ async fn main() -> std::io::Result<()> {
use crate::llm::local::ensure_llama_servers_running; use crate::llm::local::ensure_llama_servers_running;
let args: Vec<String> = std::env::args().collect(); let args: Vec<String> = std::env::args().collect();
if args.len() > 1 { if args.len() > 1 {
let command = &args[1]; let command = &args[1];
@ -104,16 +103,16 @@ async fn main() -> std::io::Result<()> {
let mut bootstrap = BootstrapManager::new(install_mode.clone(), tenant.clone()).await; let mut bootstrap = BootstrapManager::new(install_mode.clone(), tenant.clone()).await;
// Prevent double bootstrap: skip if environment already initialized // Prevent double bootstrap: skip if environment already initialized
let env_path = std::env::current_dir()?.join("botserver-stack").join(".env"); let env_path = std::env::current_dir()?
.join("botserver-stack")
.join(".env");
let cfg = if env_path.exists() { let cfg = if env_path.exists() {
info!("Environment already initialized, skipping bootstrap"); info!("Environment already initialized, skipping bootstrap");
match diesel::Connection::establish( match diesel::Connection::establish(&std::env::var("DATABASE_URL").unwrap()) {
&std::env::var("DATABASE_URL") Ok(mut conn) => {
.unwrap() AppConfig::from_database(&mut conn).expect("Failed to load config from DB")
) { }
Ok(mut conn) => AppConfig::from_database(&mut conn).expect("Failed to load config from DB"),
Err(_) => AppConfig::from_env().expect("Failed to load config from env"), Err(_) => AppConfig::from_env().expect("Failed to load config from env"),
} }
} else { } else {
@ -124,13 +123,16 @@ async fn main() -> std::io::Result<()> {
} }
Err(e) => { Err(e) => {
log::error!("Bootstrap failed: {}", e); log::error!("Bootstrap failed: {}", e);
match diesel::Connection::establish( match diesel::Connection::establish(
&std::env::var("DATABASE_URL") &std::env::var("DATABASE_URL").unwrap_or_else(|_| {
.unwrap_or_else(|_| "postgres://gbuser:@localhost:5432/botserver".to_string()), "postgres://gbuser:@localhost:5432/botserver".to_string()
) { }),
Ok(mut conn) => AppConfig::from_database(&mut conn).expect("Failed to load config from DB"), ) {
Err(_) => AppConfig::from_env().expect("Failed to load config from env"), Ok(mut conn) => {
} AppConfig::from_database(&mut conn).expect("Failed to load config from DB")
}
Err(_) => AppConfig::from_env().expect("Failed to load config from env"),
}
} }
} }
}; };
@ -171,8 +173,7 @@ async fn main() -> std::io::Result<()> {
} }
}; };
let web_adapter = Arc::new(WebChannelAdapter::new()); let web_adapter = Arc::new(WebChannelAdapter::new());
let voice_adapter = Arc::new(VoiceAdapter::new( let voice_adapter = Arc::new(VoiceAdapter::new());
));
let drive = init_drive(&config.drive) let drive = init_drive(&config.drive)
.await .await
@ -183,26 +184,21 @@ async fn main() -> std::io::Result<()> {
redis_client.clone(), redis_client.clone(),
))); )));
let auth_service = Arc::new(tokio::sync::Mutex::new(auth::AuthService::new( let auth_service = Arc::new(tokio::sync::Mutex::new(auth::AuthService::new()));
)));
let conn = diesel::Connection::establish(&cfg.database_url()).unwrap(); let conn = diesel::Connection::establish(&cfg.database_url()).unwrap();
let config_manager = ConfigManager::new(Arc::new(Mutex::new(conn))); let config_manager = ConfigManager::new(Arc::new(Mutex::new(conn)));
let mut bot_conn = diesel::Connection::establish(&cfg.database_url()).unwrap(); let mut bot_conn = diesel::Connection::establish(&cfg.database_url()).unwrap();
let (default_bot_id, _default_bot_name) = crate::bot::get_default_bot(&mut bot_conn); let (default_bot_id, _default_bot_name) = crate::bot::get_default_bot(&mut bot_conn);
let llm_url = config_manager let llm_url = config_manager
.get_config(&default_bot_id, "llm-url", Some("https://api.openai.com/v1")) .get_config(&default_bot_id, "llm-url", Some("http://localhost:8081/v1"))
.unwrap_or_else(|_| "http://localhost:8081/v1".to_string());
.unwrap_or_else(|_| "https://api.openai.com/v1".to_string());
let llm_provider = Arc::new(crate::llm::OpenAIClient::new( let llm_provider = Arc::new(crate::llm::OpenAIClient::new(
"empty".to_string(), "empty".to_string(),
Some(llm_url.clone()), Some(llm_url.clone()),
)); ));
let app_state = Arc::new(AppState { let app_state = Arc::new(AppState {
drive: Some(drive), drive: Some(drive),
config: Some(cfg.clone()), config: Some(cfg.clone()),
@ -225,28 +221,30 @@ let llm_url = config_manager
voice_adapter: voice_adapter.clone(), voice_adapter: voice_adapter.clone(),
}); });
info!("Starting HTTP server on {}:{}", config.server.host, config.server.port); info!(
"Starting HTTP server on {}:{}",
config.server.host, config.server.port
);
let worker_count = std::thread::available_parallelism() let worker_count = std::thread::available_parallelism()
.map(|n| n.get()) .map(|n| n.get())
.unwrap_or(4); .unwrap_or(4);
// Initialize bot orchestrator and mount all bots // Initialize bot orchestrator and mount all bots
let bot_orchestrator = BotOrchestrator::new(app_state.clone()); let bot_orchestrator = BotOrchestrator::new(app_state.clone());
// Mount all active bots from database // Mount all active bots from database
if let Err(e) = bot_orchestrator.mount_all_bots().await { if let Err(e) = bot_orchestrator.mount_all_bots().await {
log::error!("Failed to mount bots: {}", e); log::error!("Failed to mount bots: {}", e);
// Use BotOrchestrator::send_warning to notify system admins // Use BotOrchestrator::send_warning to notify system admins
let msg = format!("Bot mount failure: {}", e); let msg = format!("Bot mount failure: {}", e);
let _ = bot_orchestrator.send_warning("System", "AdminBot", msg.as_str()).await; let _ = bot_orchestrator
.send_warning("System", "AdminBot", msg.as_str())
.await;
} else { } else {
let _sessions = get_sessions; let _sessions = get_sessions;
log::info!("Session handler registered successfully"); log::info!("Session handler registered successfully");
} }
let automation_state = app_state.clone(); let automation_state = app_state.clone();
std::thread::spawn(move || { std::thread::spawn(move || {
let rt = tokio::runtime::Builder::new_current_thread() let rt = tokio::runtime::Builder::new_current_thread()
@ -261,13 +259,10 @@ let llm_url = config_manager
}); });
if let Err(e) = ensure_llama_servers_running(&app_state).await { if let Err(e) = ensure_llama_servers_running(&app_state).await {
error!("Failed to stat LLM servers: {}", e); error!("Failed to stat LLM servers: {}", e);
} }
HttpServer::new(move || { HttpServer::new(move || {
let cors = Cors::default() let cors = Cors::default()
.allow_any_origin() .allow_any_origin()
.allow_any_method() .allow_any_method()
@ -295,7 +290,7 @@ let llm_url = config_manager
.service(crate::bot::handle_user_input_handler) .service(crate::bot::handle_user_input_handler)
.service(crate::bot::get_user_sessions_handler) .service(crate::bot::get_user_sessions_handler)
.service(crate::bot::get_conversation_history_handler); .service(crate::bot::get_conversation_history_handler);
#[cfg(feature = "email")] #[cfg(feature = "email")]
{ {
app = app app = app
@ -305,12 +300,10 @@ let llm_url = config_manager
.service(send_email) .service(send_email)
.service(save_draft) .service(save_draft)
.service(save_click); .service(save_click);
} }
app = app.service(static_files); app = app.service(static_files);
app = app.service(bot_index); app = app.service(bot_index);
app app
}) })
.workers(worker_count) .workers(worker_count)
.bind((config.server.host.clone(), config.server.port))? .bind((config.server.host.clone(), config.server.port))?