chore: point LLM client and default configs to local endpoint

Updated the 6.0.4 migration to use `http://localhost:8081/v1` for the default OpenAI model configurations (gpt‑4 and gpt‑3.5‑turbo) and the local embed service. Adjusted `OpenAIClient` to default to the same localhost base URL instead of the production OpenAI API.

Reorganized imports and module ordering in `src/main.rs` (moved `mod llm`, `mod nvidia`, and `BotOrchestrator` import), cleaned up formatting, and removed unused imports. These changes streamline development by directing LLM calls to a local server and improve code readability.
This commit is contained in:
Rodrigo Rodriguez (Pragmatismo) 2025-11-07 16:40:19 -03:00
parent 21355df0c8
commit 120d06a0db
4 changed files with 69 additions and 76 deletions

View file

@ -192,8 +192,8 @@ ON CONFLICT (slug) DO NOTHING;
-- Add some default model configurations
-- ============================================================================
INSERT INTO model_configurations (id, model_name, model_type, provider, endpoint, model_id, context_window, max_tokens, is_default) VALUES
(gen_random_uuid()::text, 'gpt-4', 'llm', 'openai', 'https://api.openai.com/v1', 'gpt-4', 8192, 4096, true),
(gen_random_uuid()::text, 'gpt-3.5-turbo', 'llm', 'openai', 'https://api.openai.com/v1', 'gpt-3.5-turbo', 4096, 2048, false),
(gen_random_uuid()::text, 'gpt-4', 'llm', 'openai', 'http://localhost:8081/v1', 'gpt-4', 8192, 4096, true),
(gen_random_uuid()::text, 'gpt-3.5-turbo', 'llm', 'openai', 'http://localhost:8081/v1', 'gpt-3.5-turbo', 4096, 2048, false),
(gen_random_uuid()::text, 'bge-large', 'embed', 'local', 'http://localhost:8081', 'BAAI/bge-large-en-v1.5', 512, 1024, true)
ON CONFLICT (model_name) DO NOTHING;

View file

@ -407,26 +407,21 @@ impl BotOrchestrator {
.unwrap_or(-1)
};
// Acquire lock briefly for history retrieval with configurable limit
let history = "".to_string();
let mut sm = self.state.session_manager.lock().await;
let mut history = sm.get_conversation_history(session.id, user_id)?;
// {
// let mut sm = self.state.session_manager.lock().await;
// let mut history = sm.get_conversation_history(session.id, user_id)?;
// Skip all messages before the most recent compacted message (type 9)
if let Some(last_compacted_index) = history.iter().rposition(|(role, content)| {
role == "COMPACTED" || content.starts_with("SUMMARY:")
}) {
history = history.split_off(last_compacted_index);
}
// // Skip all messages before the most recent compacted message (type 9)
// if let Some(last_compacted_index) = history.iter().rposition(|(role, content)| {
// role == "COMPACTED" || content.starts_with("SUMMARY:")
// }) {
// history = history.split_off(last_compacted_index);
// }
// if history_limit > 0 && history.len() > history_limit as usize {
// let start = history.len() - history_limit as usize;
// history.drain(0..start);
// }
// history
// };
// Apply history limit if configured
if history_limit > 0 && history.len() > history_limit as usize {
let start = history.len() - history_limit as usize;
history.drain(0..start);
}
let mut prompt = String::new();
if !system_prompt.is_empty() {
@ -435,15 +430,15 @@ let history = "".to_string();
if !context_data.is_empty() {
prompt.push_str(&format!("CONTEXT: *** {} *** \n", context_data));
}
// for (role, content) in &history {
// prompt.push_str(&format!("{}:{}\n", role, content));
// }
for (role, content) in &history {
prompt.push_str(&format!("{}:{}\n", role, content));
}
prompt.push_str(&format!("Human: {}\nBot:", message.content));
// trace!(
// "Stream prompt constructed with {} history entries",
// history.len()
// );
trace!(
"Stream prompt constructed with {} history entries",
history.len()
);
let (stream_tx, mut stream_rx) = mpsc::channel::<String>(100);
let llm = self.state.llm_provider.clone();
@ -1316,3 +1311,8 @@ async fn send_warning_handler(
Ok(HttpResponse::Ok().json(serde_json::json!({"status": "warning_sent"})))
}
);
}
Ok(HttpResponse::Ok().json(serde_json::json!({"status": "warning_sent"})))
}

View file

@ -47,7 +47,7 @@ impl OpenAIClient {
Self {
client: reqwest::Client::new(),
api_key,
base_url: base_url.unwrap_or_else(|| "https://api.openai.com/v1".to_string()),
base_url: base_url.unwrap_or_else(|| "http://localhost:8081/v1".to_string()),
}
}
}

View file

@ -1,13 +1,12 @@
#![cfg_attr(feature = "desktop", windows_subsystem = "windows")]
use log::error;
use actix_cors::Cors;
use actix_web::middleware::Logger;
use actix_web::{web, App, HttpServer};
use dotenvy::dotenv;
use log::error;
use log::info;
use std::collections::HashMap;
use std::sync::{Arc, Mutex};
mod llm;
mod auth;
mod automation;
mod basic;
@ -19,22 +18,24 @@ mod context;
mod drive_monitor;
#[cfg(feature = "email")]
mod email;
#[cfg(feature = "desktop")]
mod ui;
mod file;
mod llm;
mod llm_models;
mod meet;
mod nvidia;
mod package_manager;
mod session;
mod shared;
pub mod tests;
#[cfg(feature = "desktop")]
mod ui;
mod web_server;
mod nvidia;
use crate::auth::auth_handler;
use crate::automation::AutomationService;
use crate::bootstrap::BootstrapManager;
use crate::bot::{websocket_handler};
use crate::bot::websocket_handler;
use crate::bot::BotOrchestrator;
use crate::channels::{VoiceAdapter, WebChannelAdapter};
use crate::config::AppConfig;
#[cfg(feature = "email")]
@ -47,7 +48,6 @@ use crate::package_manager::InstallMode;
use crate::session::{create_session, get_session_history, get_sessions, start_session};
use crate::shared::state::AppState;
use crate::web_server::{bot_index, index, static_files};
use crate::bot::BotOrchestrator;
#[cfg(not(feature = "desktop"))]
#[tokio::main]
@ -56,7 +56,6 @@ async fn main() -> std::io::Result<()> {
use crate::llm::local::ensure_llama_servers_running;
let args: Vec<String> = std::env::args().collect();
if args.len() > 1 {
let command = &args[1];
@ -104,16 +103,16 @@ async fn main() -> std::io::Result<()> {
let mut bootstrap = BootstrapManager::new(install_mode.clone(), tenant.clone()).await;
// Prevent double bootstrap: skip if environment already initialized
let env_path = std::env::current_dir()?.join("botserver-stack").join(".env");
let env_path = std::env::current_dir()?
.join("botserver-stack")
.join(".env");
let cfg = if env_path.exists() {
info!("Environment already initialized, skipping bootstrap");
match diesel::Connection::establish(
&std::env::var("DATABASE_URL")
.unwrap()
) {
Ok(mut conn) => AppConfig::from_database(&mut conn).expect("Failed to load config from DB"),
match diesel::Connection::establish(&std::env::var("DATABASE_URL").unwrap()) {
Ok(mut conn) => {
AppConfig::from_database(&mut conn).expect("Failed to load config from DB")
}
Err(_) => AppConfig::from_env().expect("Failed to load config from env"),
}
} else {
@ -125,10 +124,13 @@ async fn main() -> std::io::Result<()> {
Err(e) => {
log::error!("Bootstrap failed: {}", e);
match diesel::Connection::establish(
&std::env::var("DATABASE_URL")
.unwrap_or_else(|_| "postgres://gbuser:@localhost:5432/botserver".to_string()),
&std::env::var("DATABASE_URL").unwrap_or_else(|_| {
"postgres://gbuser:@localhost:5432/botserver".to_string()
}),
) {
Ok(mut conn) => AppConfig::from_database(&mut conn).expect("Failed to load config from DB"),
Ok(mut conn) => {
AppConfig::from_database(&mut conn).expect("Failed to load config from DB")
}
Err(_) => AppConfig::from_env().expect("Failed to load config from env"),
}
}
@ -171,8 +173,7 @@ async fn main() -> std::io::Result<()> {
}
};
let web_adapter = Arc::new(WebChannelAdapter::new());
let voice_adapter = Arc::new(VoiceAdapter::new(
));
let voice_adapter = Arc::new(VoiceAdapter::new());
let drive = init_drive(&config.drive)
.await
@ -183,26 +184,21 @@ async fn main() -> std::io::Result<()> {
redis_client.clone(),
)));
let auth_service = Arc::new(tokio::sync::Mutex::new(auth::AuthService::new(
)));
let auth_service = Arc::new(tokio::sync::Mutex::new(auth::AuthService::new()));
let conn = diesel::Connection::establish(&cfg.database_url()).unwrap();
let config_manager = ConfigManager::new(Arc::new(Mutex::new(conn)));
let mut bot_conn = diesel::Connection::establish(&cfg.database_url()).unwrap();
let (default_bot_id, _default_bot_name) = crate::bot::get_default_bot(&mut bot_conn);
let llm_url = config_manager
.get_config(&default_bot_id, "llm-url", Some("https://api.openai.com/v1"))
.unwrap_or_else(|_| "https://api.openai.com/v1".to_string());
.get_config(&default_bot_id, "llm-url", Some("http://localhost:8081/v1"))
.unwrap_or_else(|_| "http://localhost:8081/v1".to_string());
let llm_provider = Arc::new(crate::llm::OpenAIClient::new(
"empty".to_string(),
Some(llm_url.clone()),
));
let app_state = Arc::new(AppState {
drive: Some(drive),
config: Some(cfg.clone()),
@ -225,12 +221,14 @@ let llm_url = config_manager
voice_adapter: voice_adapter.clone(),
});
info!("Starting HTTP server on {}:{}", config.server.host, config.server.port);
info!(
"Starting HTTP server on {}:{}",
config.server.host, config.server.port
);
let worker_count = std::thread::available_parallelism()
.map(|n| n.get())
.unwrap_or(4);
// Initialize bot orchestrator and mount all bots
let bot_orchestrator = BotOrchestrator::new(app_state.clone());
@ -239,14 +237,14 @@ let llm_url = config_manager
log::error!("Failed to mount bots: {}", e);
// Use BotOrchestrator::send_warning to notify system admins
let msg = format!("Bot mount failure: {}", e);
let _ = bot_orchestrator.send_warning("System", "AdminBot", msg.as_str()).await;
let _ = bot_orchestrator
.send_warning("System", "AdminBot", msg.as_str())
.await;
} else {
let _sessions = get_sessions;
log::info!("Session handler registered successfully");
}
let automation_state = app_state.clone();
std::thread::spawn(move || {
let rt = tokio::runtime::Builder::new_current_thread()
@ -261,13 +259,10 @@ let llm_url = config_manager
});
if let Err(e) = ensure_llama_servers_running(&app_state).await {
error!("Failed to stat LLM servers: {}", e);
}
HttpServer::new(move || {
let cors = Cors::default()
.allow_any_origin()
.allow_any_method()
@ -305,12 +300,10 @@ let llm_url = config_manager
.service(send_email)
.service(save_draft)
.service(save_click);
}
app = app.service(static_files);
app = app.service(bot_index);
app
})
.workers(worker_count)
.bind((config.server.host.clone(), config.server.port))?