2025-11-02 18:36:21 -03:00
|
|
|
use crate::config::ConfigManager;
|
2025-11-12 08:19:21 -03:00
|
|
|
use crate::shared::models::schema::bots::dsl::*;
|
|
|
|
|
use crate::shared::state::AppState;
|
2025-11-02 18:36:21 -03:00
|
|
|
use diesel::prelude::*;
|
2025-12-10 08:35:35 -03:00
|
|
|
use log::{error, info, warn};
|
2025-11-12 08:19:21 -03:00
|
|
|
use reqwest;
|
2025-12-26 08:59:25 -03:00
|
|
|
use std::fmt::Write;
|
2025-11-02 18:36:21 -03:00
|
|
|
use std::sync::Arc;
|
|
|
|
|
use tokio;
|
2025-11-20 13:28:35 -03:00
|
|
|
|
2025-11-02 18:36:21 -03:00
|
|
|
pub async fn ensure_llama_servers_running(
|
2025-11-12 08:19:21 -03:00
|
|
|
app_state: Arc<AppState>,
|
2025-11-02 18:36:21 -03:00
|
|
|
) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
|
2025-12-14 15:58:54 -03:00
|
|
|
if std::env::var("SKIP_LLM_SERVER").is_ok() {
|
|
|
|
|
info!("SKIP_LLM_SERVER set - skipping local LLM server startup (using mock/external LLM)");
|
|
|
|
|
return Ok(());
|
|
|
|
|
}
|
|
|
|
|
|
2025-11-12 08:19:21 -03:00
|
|
|
let config_values = {
|
|
|
|
|
let conn_arc = app_state.conn.clone();
|
|
|
|
|
let default_bot_id = tokio::task::spawn_blocking(move || {
|
|
|
|
|
let mut conn = conn_arc.get().unwrap();
|
|
|
|
|
bots.filter(name.eq("default"))
|
|
|
|
|
.select(id)
|
|
|
|
|
.first::<uuid::Uuid>(&mut *conn)
|
|
|
|
|
.unwrap_or_else(|_| uuid::Uuid::nil())
|
|
|
|
|
})
|
|
|
|
|
.await?;
|
|
|
|
|
let config_manager = ConfigManager::new(app_state.conn.clone());
|
|
|
|
|
(
|
|
|
|
|
default_bot_id,
|
2025-12-08 23:39:01 -03:00
|
|
|
config_manager
|
2025-12-15 16:33:13 -03:00
|
|
|
.get_config(&default_bot_id, "llm-server", Some("true"))
|
|
|
|
|
.unwrap_or_else(|_| "true".to_string()),
|
2025-11-12 08:19:21 -03:00
|
|
|
config_manager
|
|
|
|
|
.get_config(&default_bot_id, "llm-url", None)
|
|
|
|
|
.unwrap_or_default(),
|
|
|
|
|
config_manager
|
|
|
|
|
.get_config(&default_bot_id, "llm-model", None)
|
|
|
|
|
.unwrap_or_default(),
|
|
|
|
|
config_manager
|
|
|
|
|
.get_config(&default_bot_id, "embedding-url", None)
|
|
|
|
|
.unwrap_or_default(),
|
|
|
|
|
config_manager
|
|
|
|
|
.get_config(&default_bot_id, "embedding-model", None)
|
|
|
|
|
.unwrap_or_default(),
|
|
|
|
|
config_manager
|
|
|
|
|
.get_config(&default_bot_id, "llm-server-path", None)
|
|
|
|
|
.unwrap_or_default(),
|
|
|
|
|
)
|
|
|
|
|
};
|
2025-12-08 23:39:01 -03:00
|
|
|
let (
|
|
|
|
|
_default_bot_id,
|
|
|
|
|
llm_server_enabled,
|
|
|
|
|
llm_url,
|
|
|
|
|
llm_model,
|
|
|
|
|
embedding_url,
|
|
|
|
|
embedding_model,
|
|
|
|
|
llm_server_path,
|
|
|
|
|
) = config_values;
|
|
|
|
|
|
|
|
|
|
let llm_server_enabled = llm_server_enabled.to_lowercase() == "true";
|
|
|
|
|
if !llm_server_enabled {
|
|
|
|
|
info!("Local LLM server management disabled (llm-server=false). Using external endpoints.");
|
2025-12-26 08:59:25 -03:00
|
|
|
info!(" LLM URL: {llm_url}");
|
|
|
|
|
info!(" Embedding URL: {embedding_url}");
|
2025-12-08 23:39:01 -03:00
|
|
|
return Ok(());
|
|
|
|
|
}
|
2025-11-02 18:36:21 -03:00
|
|
|
info!("Starting LLM servers...");
|
|
|
|
|
info!("Configuration:");
|
2025-12-26 08:59:25 -03:00
|
|
|
info!(" LLM URL: {llm_url}");
|
|
|
|
|
info!(" Embedding URL: {embedding_url}");
|
|
|
|
|
info!(" LLM Model: {llm_model}");
|
|
|
|
|
info!(" Embedding Model: {embedding_model}");
|
|
|
|
|
info!(" LLM Server Path: {llm_server_path}");
|
2025-11-02 18:36:21 -03:00
|
|
|
info!("Restarting any existing llama-server processes...");
|
2025-11-20 13:28:35 -03:00
|
|
|
|
2025-11-02 18:36:21 -03:00
|
|
|
if let Err(e) = tokio::process::Command::new("sh")
|
|
|
|
|
.arg("-c")
|
2025-11-12 00:26:42 -03:00
|
|
|
.arg("pkill llama-server -9 || true")
|
2025-11-02 18:36:21 -03:00
|
|
|
.spawn()
|
|
|
|
|
{
|
2025-12-26 08:59:25 -03:00
|
|
|
error!("Failed to execute pkill for llama-server: {e}");
|
2025-11-02 18:36:21 -03:00
|
|
|
} else {
|
|
|
|
|
tokio::time::sleep(tokio::time::Duration::from_secs(2)).await;
|
|
|
|
|
info!("Existing llama-server processes terminated (if any)");
|
|
|
|
|
}
|
2025-11-12 08:19:21 -03:00
|
|
|
|
|
|
|
|
let llm_running = if llm_url.starts_with("https://") {
|
|
|
|
|
info!("Using external HTTPS LLM server, skipping local startup");
|
|
|
|
|
true
|
|
|
|
|
} else {
|
|
|
|
|
is_server_running(&llm_url).await
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
let embedding_running = if embedding_url.starts_with("https://") {
|
|
|
|
|
info!("Using external HTTPS embedding server, skipping local startup");
|
|
|
|
|
true
|
|
|
|
|
} else {
|
|
|
|
|
is_server_running(&embedding_url).await
|
|
|
|
|
};
|
2025-11-02 18:36:21 -03:00
|
|
|
if llm_running && embedding_running {
|
|
|
|
|
info!("Both LLM and Embedding servers are already running");
|
|
|
|
|
return Ok(());
|
|
|
|
|
}
|
|
|
|
|
let mut tasks = vec![];
|
|
|
|
|
if !llm_running && !llm_model.is_empty() {
|
|
|
|
|
info!("Starting LLM server...");
|
|
|
|
|
tasks.push(tokio::spawn(start_llm_server(
|
2025-11-11 09:42:52 -03:00
|
|
|
Arc::clone(&app_state),
|
2025-11-02 18:36:21 -03:00
|
|
|
llm_server_path.clone(),
|
|
|
|
|
llm_model.clone(),
|
|
|
|
|
llm_url.clone(),
|
|
|
|
|
)));
|
|
|
|
|
} else if llm_model.is_empty() {
|
|
|
|
|
info!("LLM_MODEL not set, skipping LLM server");
|
|
|
|
|
}
|
|
|
|
|
if !embedding_running && !embedding_model.is_empty() {
|
|
|
|
|
info!("Starting Embedding server...");
|
|
|
|
|
tasks.push(tokio::spawn(start_embedding_server(
|
|
|
|
|
llm_server_path.clone(),
|
|
|
|
|
embedding_model.clone(),
|
|
|
|
|
embedding_url.clone(),
|
|
|
|
|
)));
|
|
|
|
|
} else if embedding_model.is_empty() {
|
|
|
|
|
info!("EMBEDDING_MODEL not set, skipping Embedding server");
|
|
|
|
|
}
|
|
|
|
|
for task in tasks {
|
|
|
|
|
task.await??;
|
|
|
|
|
}
|
|
|
|
|
info!("Waiting for servers to become ready...");
|
|
|
|
|
let mut llm_ready = llm_running || llm_model.is_empty();
|
|
|
|
|
let mut embedding_ready = embedding_running || embedding_model.is_empty();
|
|
|
|
|
let mut attempts = 0;
|
2025-12-23 18:40:58 -03:00
|
|
|
let max_attempts = 120;
|
2025-11-02 18:36:21 -03:00
|
|
|
while attempts < max_attempts && (!llm_ready || !embedding_ready) {
|
|
|
|
|
tokio::time::sleep(tokio::time::Duration::from_secs(2)).await;
|
Bootstrap started! 6.1.0
- Add rss and scraper dependencies for web data keywords
- Add SMS keyword with priority support (low, normal, high, urgent)
- Add web_data keywords: RSS, SCRAPE, SCRAPE_ALL, SCRAPE_TABLE, SCRAPE_LINKS, SCRAPE_IMAGES
- Add ai_tools keywords: TRANSLATE, OCR, SENTIMENT, CLASSIFY
- Improve LLM server health check with better diagnostics and increased timeout
- Fix compilation errors and warnings
- Register SMS keywords in BASIC engine
2025-12-10 18:22:02 -03:00
|
|
|
|
|
|
|
|
if attempts % 5 == 0 {
|
|
|
|
|
info!(
|
2025-12-26 08:59:25 -03:00
|
|
|
"Checking server health (attempt {}/{max_attempts})...",
|
|
|
|
|
attempts + 1
|
Bootstrap started! 6.1.0
- Add rss and scraper dependencies for web data keywords
- Add SMS keyword with priority support (low, normal, high, urgent)
- Add web_data keywords: RSS, SCRAPE, SCRAPE_ALL, SCRAPE_TABLE, SCRAPE_LINKS, SCRAPE_IMAGES
- Add ai_tools keywords: TRANSLATE, OCR, SENTIMENT, CLASSIFY
- Improve LLM server health check with better diagnostics and increased timeout
- Fix compilation errors and warnings
- Register SMS keywords in BASIC engine
2025-12-10 18:22:02 -03:00
|
|
|
);
|
|
|
|
|
}
|
2025-11-02 18:36:21 -03:00
|
|
|
if !llm_ready && !llm_model.is_empty() {
|
|
|
|
|
if is_server_running(&llm_url).await {
|
2025-12-26 08:59:25 -03:00
|
|
|
info!("LLM server ready at {llm_url}");
|
2025-11-02 18:36:21 -03:00
|
|
|
llm_ready = true;
|
|
|
|
|
} else {
|
|
|
|
|
info!("LLM server not ready yet");
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if !embedding_ready && !embedding_model.is_empty() {
|
|
|
|
|
if is_server_running(&embedding_url).await {
|
2025-12-26 08:59:25 -03:00
|
|
|
info!("Embedding server ready at {embedding_url}");
|
2025-11-02 18:36:21 -03:00
|
|
|
embedding_ready = true;
|
Bootstrap started! 6.1.0
- Add rss and scraper dependencies for web data keywords
- Add SMS keyword with priority support (low, normal, high, urgent)
- Add web_data keywords: RSS, SCRAPE, SCRAPE_ALL, SCRAPE_TABLE, SCRAPE_LINKS, SCRAPE_IMAGES
- Add ai_tools keywords: TRANSLATE, OCR, SENTIMENT, CLASSIFY
- Improve LLM server health check with better diagnostics and increased timeout
- Fix compilation errors and warnings
- Register SMS keywords in BASIC engine
2025-12-10 18:22:02 -03:00
|
|
|
} else if attempts % 10 == 0 {
|
2025-12-26 08:59:25 -03:00
|
|
|
warn!("Embedding server not ready yet at {embedding_url}");
|
2025-12-23 18:40:58 -03:00
|
|
|
|
Bootstrap started! 6.1.0
- Add rss and scraper dependencies for web data keywords
- Add SMS keyword with priority support (low, normal, high, urgent)
- Add web_data keywords: RSS, SCRAPE, SCRAPE_ALL, SCRAPE_TABLE, SCRAPE_LINKS, SCRAPE_IMAGES
- Add ai_tools keywords: TRANSLATE, OCR, SENTIMENT, CLASSIFY
- Improve LLM server health check with better diagnostics and increased timeout
- Fix compilation errors and warnings
- Register SMS keywords in BASIC engine
2025-12-10 18:22:02 -03:00
|
|
|
if let Ok(log_content) =
|
2025-12-26 08:59:25 -03:00
|
|
|
std::fs::read_to_string(format!("{llm_server_path}/llmembd-stdout.log"))
|
Bootstrap started! 6.1.0
- Add rss and scraper dependencies for web data keywords
- Add SMS keyword with priority support (low, normal, high, urgent)
- Add web_data keywords: RSS, SCRAPE, SCRAPE_ALL, SCRAPE_TABLE, SCRAPE_LINKS, SCRAPE_IMAGES
- Add ai_tools keywords: TRANSLATE, OCR, SENTIMENT, CLASSIFY
- Improve LLM server health check with better diagnostics and increased timeout
- Fix compilation errors and warnings
- Register SMS keywords in BASIC engine
2025-12-10 18:22:02 -03:00
|
|
|
{
|
|
|
|
|
let last_lines: Vec<&str> = log_content.lines().rev().take(5).collect();
|
|
|
|
|
if !last_lines.is_empty() {
|
|
|
|
|
info!("Embedding server log (last 5 lines):");
|
|
|
|
|
for line in last_lines.iter().rev() {
|
2025-12-26 08:59:25 -03:00
|
|
|
info!(" {line}");
|
Bootstrap started! 6.1.0
- Add rss and scraper dependencies for web data keywords
- Add SMS keyword with priority support (low, normal, high, urgent)
- Add web_data keywords: RSS, SCRAPE, SCRAPE_ALL, SCRAPE_TABLE, SCRAPE_LINKS, SCRAPE_IMAGES
- Add ai_tools keywords: TRANSLATE, OCR, SENTIMENT, CLASSIFY
- Improve LLM server health check with better diagnostics and increased timeout
- Fix compilation errors and warnings
- Register SMS keywords in BASIC engine
2025-12-10 18:22:02 -03:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
2025-11-02 18:36:21 -03:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
attempts += 1;
|
Bootstrap started! 6.1.0
- Add rss and scraper dependencies for web data keywords
- Add SMS keyword with priority support (low, normal, high, urgent)
- Add web_data keywords: RSS, SCRAPE, SCRAPE_ALL, SCRAPE_TABLE, SCRAPE_LINKS, SCRAPE_IMAGES
- Add ai_tools keywords: TRANSLATE, OCR, SENTIMENT, CLASSIFY
- Improve LLM server health check with better diagnostics and increased timeout
- Fix compilation errors and warnings
- Register SMS keywords in BASIC engine
2025-12-10 18:22:02 -03:00
|
|
|
if attempts % 20 == 0 {
|
|
|
|
|
warn!(
|
2025-12-26 08:59:25 -03:00
|
|
|
"Still waiting for servers... (attempt {attempts}/{max_attempts}) - this may take a while for large models"
|
2025-11-12 08:19:21 -03:00
|
|
|
);
|
2025-11-02 18:36:21 -03:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if llm_ready && embedding_ready {
|
|
|
|
|
info!("All llama.cpp servers are ready and responding!");
|
2025-11-20 13:28:35 -03:00
|
|
|
|
2025-11-12 12:48:06 -03:00
|
|
|
let _llm_provider1 = Arc::new(crate::llm::OpenAIClient::new(
|
2025-11-12 08:19:21 -03:00
|
|
|
llm_model.clone(),
|
|
|
|
|
Some(llm_url.clone()),
|
|
|
|
|
));
|
2025-11-02 18:36:21 -03:00
|
|
|
Ok(())
|
|
|
|
|
} else {
|
|
|
|
|
let mut error_msg = "Servers failed to start within timeout:".to_string();
|
|
|
|
|
if !llm_ready && !llm_model.is_empty() {
|
2025-12-26 08:59:25 -03:00
|
|
|
let _ = write!(error_msg, "\n - LLM server at {llm_url}");
|
2025-11-02 18:36:21 -03:00
|
|
|
}
|
|
|
|
|
if !embedding_ready && !embedding_model.is_empty() {
|
2025-12-26 08:59:25 -03:00
|
|
|
let _ = write!(error_msg, "\n - Embedding server at {embedding_url}");
|
2025-11-02 18:36:21 -03:00
|
|
|
}
|
|
|
|
|
Err(error_msg.into())
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
pub async fn is_server_running(url: &str) -> bool {
|
Bootstrap started! 6.1.0
- Add rss and scraper dependencies for web data keywords
- Add SMS keyword with priority support (low, normal, high, urgent)
- Add web_data keywords: RSS, SCRAPE, SCRAPE_ALL, SCRAPE_TABLE, SCRAPE_LINKS, SCRAPE_IMAGES
- Add ai_tools keywords: TRANSLATE, OCR, SENTIMENT, CLASSIFY
- Improve LLM server health check with better diagnostics and increased timeout
- Fix compilation errors and warnings
- Register SMS keywords in BASIC engine
2025-12-10 18:22:02 -03:00
|
|
|
let client = reqwest::Client::builder()
|
|
|
|
|
.timeout(std::time::Duration::from_secs(5))
|
|
|
|
|
.build()
|
|
|
|
|
.unwrap_or_default();
|
|
|
|
|
|
2025-12-26 08:59:25 -03:00
|
|
|
match client.get(format!("{url}/health")).send().await {
|
Bootstrap started! 6.1.0
- Add rss and scraper dependencies for web data keywords
- Add SMS keyword with priority support (low, normal, high, urgent)
- Add web_data keywords: RSS, SCRAPE, SCRAPE_ALL, SCRAPE_TABLE, SCRAPE_LINKS, SCRAPE_IMAGES
- Add ai_tools keywords: TRANSLATE, OCR, SENTIMENT, CLASSIFY
- Improve LLM server health check with better diagnostics and increased timeout
- Fix compilation errors and warnings
- Register SMS keywords in BASIC engine
2025-12-10 18:22:02 -03:00
|
|
|
Ok(response) => {
|
|
|
|
|
if response.status().is_success() {
|
|
|
|
|
return true;
|
|
|
|
|
}
|
2025-12-23 18:40:58 -03:00
|
|
|
|
Bootstrap started! 6.1.0
- Add rss and scraper dependencies for web data keywords
- Add SMS keyword with priority support (low, normal, high, urgent)
- Add web_data keywords: RSS, SCRAPE, SCRAPE_ALL, SCRAPE_TABLE, SCRAPE_LINKS, SCRAPE_IMAGES
- Add ai_tools keywords: TRANSLATE, OCR, SENTIMENT, CLASSIFY
- Improve LLM server health check with better diagnostics and increased timeout
- Fix compilation errors and warnings
- Register SMS keywords in BASIC engine
2025-12-10 18:22:02 -03:00
|
|
|
info!("Health check returned status: {}", response.status());
|
|
|
|
|
false
|
|
|
|
|
}
|
2025-12-26 08:59:25 -03:00
|
|
|
Err(e) => match client.get(url).send().await {
|
|
|
|
|
Ok(response) => response.status().is_success(),
|
|
|
|
|
Err(_) => {
|
|
|
|
|
if !e.is_connect() {
|
|
|
|
|
warn!("Health check error for {url}: {e}");
|
Bootstrap started! 6.1.0
- Add rss and scraper dependencies for web data keywords
- Add SMS keyword with priority support (low, normal, high, urgent)
- Add web_data keywords: RSS, SCRAPE, SCRAPE_ALL, SCRAPE_TABLE, SCRAPE_LINKS, SCRAPE_IMAGES
- Add ai_tools keywords: TRANSLATE, OCR, SENTIMENT, CLASSIFY
- Improve LLM server health check with better diagnostics and increased timeout
- Fix compilation errors and warnings
- Register SMS keywords in BASIC engine
2025-12-10 18:22:02 -03:00
|
|
|
}
|
2025-12-26 08:59:25 -03:00
|
|
|
false
|
Bootstrap started! 6.1.0
- Add rss and scraper dependencies for web data keywords
- Add SMS keyword with priority support (low, normal, high, urgent)
- Add web_data keywords: RSS, SCRAPE, SCRAPE_ALL, SCRAPE_TABLE, SCRAPE_LINKS, SCRAPE_IMAGES
- Add ai_tools keywords: TRANSLATE, OCR, SENTIMENT, CLASSIFY
- Improve LLM server health check with better diagnostics and increased timeout
- Fix compilation errors and warnings
- Register SMS keywords in BASIC engine
2025-12-10 18:22:02 -03:00
|
|
|
}
|
2025-12-26 08:59:25 -03:00
|
|
|
},
|
2025-11-02 18:36:21 -03:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
pub async fn start_llm_server(
|
|
|
|
|
app_state: Arc<AppState>,
|
|
|
|
|
llama_cpp_path: String,
|
|
|
|
|
model_path: String,
|
|
|
|
|
url: String,
|
|
|
|
|
) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
|
2025-12-26 08:59:25 -03:00
|
|
|
let port = extract_port(&url);
|
2025-11-02 18:36:21 -03:00
|
|
|
std::env::set_var("OMP_NUM_THREADS", "20");
|
|
|
|
|
std::env::set_var("OMP_PLACES", "cores");
|
|
|
|
|
std::env::set_var("OMP_PROC_BIND", "close");
|
|
|
|
|
let conn = app_state.conn.clone();
|
|
|
|
|
let config_manager = ConfigManager::new(conn.clone());
|
2025-11-12 08:19:21 -03:00
|
|
|
let mut conn = conn.get().unwrap();
|
|
|
|
|
let default_bot_id = bots
|
|
|
|
|
.filter(name.eq("default"))
|
|
|
|
|
.select(id)
|
|
|
|
|
.first::<uuid::Uuid>(&mut *conn)
|
|
|
|
|
.unwrap_or_else(|_| uuid::Uuid::nil());
|
|
|
|
|
let n_moe = config_manager
|
|
|
|
|
.get_config(&default_bot_id, "llm-server-n-moe", None)
|
2025-12-26 08:59:25 -03:00
|
|
|
.unwrap_or_else(|_| "4".to_string());
|
2025-11-12 08:19:21 -03:00
|
|
|
let parallel = config_manager
|
|
|
|
|
.get_config(&default_bot_id, "llm-server-parallel", None)
|
2025-12-26 08:59:25 -03:00
|
|
|
.unwrap_or_else(|_| "1".to_string());
|
2025-11-12 08:19:21 -03:00
|
|
|
let cont_batching = config_manager
|
|
|
|
|
.get_config(&default_bot_id, "llm-server-cont-batching", None)
|
2025-12-26 08:59:25 -03:00
|
|
|
.unwrap_or_else(|_| "true".to_string());
|
2025-11-12 08:19:21 -03:00
|
|
|
let mlock = config_manager
|
|
|
|
|
.get_config(&default_bot_id, "llm-server-mlock", None)
|
2025-12-26 08:59:25 -03:00
|
|
|
.unwrap_or_else(|_| "true".to_string());
|
2025-11-12 08:19:21 -03:00
|
|
|
let no_mmap = config_manager
|
|
|
|
|
.get_config(&default_bot_id, "llm-server-no-mmap", None)
|
2025-12-26 08:59:25 -03:00
|
|
|
.unwrap_or_else(|_| "true".to_string());
|
2025-11-12 08:19:21 -03:00
|
|
|
let gpu_layers = config_manager
|
|
|
|
|
.get_config(&default_bot_id, "llm-server-gpu-layers", None)
|
2025-12-26 08:59:25 -03:00
|
|
|
.unwrap_or_else(|_| "20".to_string());
|
2025-11-12 08:19:21 -03:00
|
|
|
let reasoning_format = config_manager
|
|
|
|
|
.get_config(&default_bot_id, "llm-server-reasoning-format", None)
|
2025-12-26 08:59:25 -03:00
|
|
|
.unwrap_or_else(|_| String::new());
|
2025-11-12 08:19:21 -03:00
|
|
|
let n_predict = config_manager
|
|
|
|
|
.get_config(&default_bot_id, "llm-server-n-predict", None)
|
2025-12-26 08:59:25 -03:00
|
|
|
.unwrap_or_else(|_| "50".to_string());
|
2025-11-12 17:22:33 -03:00
|
|
|
|
2025-11-20 13:28:35 -03:00
|
|
|
let n_ctx_size = config_manager
|
2025-11-12 17:27:32 -03:00
|
|
|
.get_config(&default_bot_id, "llm-server-ctx-size", None)
|
2025-12-26 08:59:25 -03:00
|
|
|
.unwrap_or_else(|_| "4096".to_string());
|
2025-11-20 13:28:35 -03:00
|
|
|
|
|
|
|
|
let mut args = format!(
|
2025-12-26 08:59:25 -03:00
|
|
|
"-m {model_path} --host 0.0.0.0 --port {port} --top_p 0.95 --temp 0.6 --repeat-penalty 1.2 --n-gpu-layers {gpu_layers}"
|
2025-11-02 18:36:21 -03:00
|
|
|
);
|
2025-11-07 17:04:33 -03:00
|
|
|
if !reasoning_format.is_empty() {
|
2025-12-26 08:59:25 -03:00
|
|
|
let _ = write!(args, " --reasoning-format {reasoning_format}");
|
2025-11-07 17:04:33 -03:00
|
|
|
}
|
2025-11-12 18:37:01 -03:00
|
|
|
|
2025-11-15 19:08:26 -03:00
|
|
|
if n_moe != "0" {
|
2025-12-26 08:59:25 -03:00
|
|
|
let _ = write!(args, " --n-cpu-moe {n_moe}");
|
2025-11-15 19:08:26 -03:00
|
|
|
}
|
|
|
|
|
if parallel != "1" {
|
2025-12-26 08:59:25 -03:00
|
|
|
let _ = write!(args, " --parallel {parallel}");
|
2025-11-15 19:08:26 -03:00
|
|
|
}
|
|
|
|
|
if cont_batching == "true" {
|
|
|
|
|
args.push_str(" --cont-batching");
|
|
|
|
|
}
|
|
|
|
|
if mlock == "true" {
|
|
|
|
|
args.push_str(" --mlock");
|
|
|
|
|
}
|
|
|
|
|
if no_mmap == "true" {
|
|
|
|
|
args.push_str(" --no-mmap");
|
|
|
|
|
}
|
|
|
|
|
if n_predict != "0" {
|
2025-12-26 08:59:25 -03:00
|
|
|
let _ = write!(args, " --n-predict {n_predict}");
|
2025-11-15 19:08:26 -03:00
|
|
|
}
|
2025-12-26 08:59:25 -03:00
|
|
|
let _ = write!(args, " --ctx-size {n_ctx_size}");
|
2025-11-20 13:28:35 -03:00
|
|
|
|
2025-11-02 18:36:21 -03:00
|
|
|
if cfg!(windows) {
|
|
|
|
|
let mut cmd = tokio::process::Command::new("cmd");
|
2025-12-26 08:59:25 -03:00
|
|
|
cmd.arg("/C")
|
|
|
|
|
.arg(format!("cd {llama_cpp_path} && .\\llama-server.exe {args}"));
|
2025-11-12 08:19:21 -03:00
|
|
|
info!(
|
2025-12-26 08:59:25 -03:00
|
|
|
"Executing LLM server command: cd {llama_cpp_path} && .\\llama-server.exe {args} --verbose"
|
2025-11-12 08:19:21 -03:00
|
|
|
);
|
2025-11-02 18:36:21 -03:00
|
|
|
cmd.spawn()?;
|
|
|
|
|
} else {
|
|
|
|
|
let mut cmd = tokio::process::Command::new("sh");
|
|
|
|
|
cmd.arg("-c").arg(format!(
|
2025-12-26 08:59:25 -03:00
|
|
|
"cd {llama_cpp_path} && ./llama-server {args} --verbose >llm-stdout.log 2>&1 &"
|
2025-11-02 18:36:21 -03:00
|
|
|
));
|
2025-11-12 08:19:21 -03:00
|
|
|
info!(
|
2025-12-26 08:59:25 -03:00
|
|
|
"Executing LLM server command: cd {llama_cpp_path} && ./llama-server {args} --verbose"
|
2025-11-12 08:19:21 -03:00
|
|
|
);
|
2025-11-02 18:36:21 -03:00
|
|
|
cmd.spawn()?;
|
|
|
|
|
}
|
|
|
|
|
Ok(())
|
|
|
|
|
}
|
|
|
|
|
pub async fn start_embedding_server(
|
|
|
|
|
llama_cpp_path: String,
|
|
|
|
|
model_path: String,
|
|
|
|
|
url: String,
|
|
|
|
|
) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
|
2025-12-26 08:59:25 -03:00
|
|
|
let port = extract_port(&url);
|
2025-12-23 18:40:58 -03:00
|
|
|
|
Bootstrap started! 6.1.0
- Add rss and scraper dependencies for web data keywords
- Add SMS keyword with priority support (low, normal, high, urgent)
- Add web_data keywords: RSS, SCRAPE, SCRAPE_ALL, SCRAPE_TABLE, SCRAPE_LINKS, SCRAPE_IMAGES
- Add ai_tools keywords: TRANSLATE, OCR, SENTIMENT, CLASSIFY
- Improve LLM server health check with better diagnostics and increased timeout
- Fix compilation errors and warnings
- Register SMS keywords in BASIC engine
2025-12-10 18:22:02 -03:00
|
|
|
let full_model_path = if model_path.starts_with('/') {
|
|
|
|
|
model_path.clone()
|
|
|
|
|
} else {
|
2025-12-26 08:59:25 -03:00
|
|
|
format!("{llama_cpp_path}/{model_path}")
|
Bootstrap started! 6.1.0
- Add rss and scraper dependencies for web data keywords
- Add SMS keyword with priority support (low, normal, high, urgent)
- Add web_data keywords: RSS, SCRAPE, SCRAPE_ALL, SCRAPE_TABLE, SCRAPE_LINKS, SCRAPE_IMAGES
- Add ai_tools keywords: TRANSLATE, OCR, SENTIMENT, CLASSIFY
- Improve LLM server health check with better diagnostics and increased timeout
- Fix compilation errors and warnings
- Register SMS keywords in BASIC engine
2025-12-10 18:22:02 -03:00
|
|
|
};
|
|
|
|
|
|
|
|
|
|
if !std::path::Path::new(&full_model_path).exists() {
|
2025-12-26 08:59:25 -03:00
|
|
|
error!("Embedding model file not found: {full_model_path}");
|
|
|
|
|
return Err(format!("Embedding model file not found: {full_model_path}").into());
|
Bootstrap started! 6.1.0
- Add rss and scraper dependencies for web data keywords
- Add SMS keyword with priority support (low, normal, high, urgent)
- Add web_data keywords: RSS, SCRAPE, SCRAPE_ALL, SCRAPE_TABLE, SCRAPE_LINKS, SCRAPE_IMAGES
- Add ai_tools keywords: TRANSLATE, OCR, SENTIMENT, CLASSIFY
- Improve LLM server health check with better diagnostics and increased timeout
- Fix compilation errors and warnings
- Register SMS keywords in BASIC engine
2025-12-10 18:22:02 -03:00
|
|
|
}
|
|
|
|
|
|
2025-12-26 08:59:25 -03:00
|
|
|
info!("Starting embedding server on port {port} with model: {model_path}");
|
Bootstrap started! 6.1.0
- Add rss and scraper dependencies for web data keywords
- Add SMS keyword with priority support (low, normal, high, urgent)
- Add web_data keywords: RSS, SCRAPE, SCRAPE_ALL, SCRAPE_TABLE, SCRAPE_LINKS, SCRAPE_IMAGES
- Add ai_tools keywords: TRANSLATE, OCR, SENTIMENT, CLASSIFY
- Improve LLM server health check with better diagnostics and increased timeout
- Fix compilation errors and warnings
- Register SMS keywords in BASIC engine
2025-12-10 18:22:02 -03:00
|
|
|
|
2025-11-02 18:36:21 -03:00
|
|
|
if cfg!(windows) {
|
|
|
|
|
let mut cmd = tokio::process::Command::new("cmd");
|
|
|
|
|
cmd.arg("/c").arg(format!(
|
2025-12-26 08:59:25 -03:00
|
|
|
"cd {llama_cpp_path} && .\\llama-server.exe -m {model_path} --verbose --host 0.0.0.0 --port {port} --embedding --n-gpu-layers 99 >stdout.log 2>&1"
|
2025-11-02 18:36:21 -03:00
|
|
|
));
|
|
|
|
|
cmd.spawn()?;
|
|
|
|
|
} else {
|
|
|
|
|
let mut cmd = tokio::process::Command::new("sh");
|
|
|
|
|
cmd.arg("-c").arg(format!(
|
2025-12-26 08:59:25 -03:00
|
|
|
"cd {llama_cpp_path} && ./llama-server -m {model_path} --verbose --host 0.0.0.0 --port {port} --embedding --n-gpu-layers 99 >llmembd-stdout.log 2>&1 &"
|
2025-11-02 18:36:21 -03:00
|
|
|
));
|
Bootstrap started! 6.1.0
- Add rss and scraper dependencies for web data keywords
- Add SMS keyword with priority support (low, normal, high, urgent)
- Add web_data keywords: RSS, SCRAPE, SCRAPE_ALL, SCRAPE_TABLE, SCRAPE_LINKS, SCRAPE_IMAGES
- Add ai_tools keywords: TRANSLATE, OCR, SENTIMENT, CLASSIFY
- Improve LLM server health check with better diagnostics and increased timeout
- Fix compilation errors and warnings
- Register SMS keywords in BASIC engine
2025-12-10 18:22:02 -03:00
|
|
|
info!(
|
2025-12-26 08:59:25 -03:00
|
|
|
"Executing embedding server command: cd {llama_cpp_path} && ./llama-server -m {model_path} --host 0.0.0.0 --port {port} --embedding"
|
Bootstrap started! 6.1.0
- Add rss and scraper dependencies for web data keywords
- Add SMS keyword with priority support (low, normal, high, urgent)
- Add web_data keywords: RSS, SCRAPE, SCRAPE_ALL, SCRAPE_TABLE, SCRAPE_LINKS, SCRAPE_IMAGES
- Add ai_tools keywords: TRANSLATE, OCR, SENTIMENT, CLASSIFY
- Improve LLM server health check with better diagnostics and increased timeout
- Fix compilation errors and warnings
- Register SMS keywords in BASIC engine
2025-12-10 18:22:02 -03:00
|
|
|
);
|
2025-11-02 18:36:21 -03:00
|
|
|
cmd.spawn()?;
|
|
|
|
|
}
|
Bootstrap started! 6.1.0
- Add rss and scraper dependencies for web data keywords
- Add SMS keyword with priority support (low, normal, high, urgent)
- Add web_data keywords: RSS, SCRAPE, SCRAPE_ALL, SCRAPE_TABLE, SCRAPE_LINKS, SCRAPE_IMAGES
- Add ai_tools keywords: TRANSLATE, OCR, SENTIMENT, CLASSIFY
- Improve LLM server health check with better diagnostics and increased timeout
- Fix compilation errors and warnings
- Register SMS keywords in BASIC engine
2025-12-10 18:22:02 -03:00
|
|
|
|
|
|
|
|
tokio::time::sleep(tokio::time::Duration::from_secs(1)).await;
|
|
|
|
|
|
2025-11-02 18:36:21 -03:00
|
|
|
Ok(())
|
|
|
|
|
}
|
2025-12-26 08:59:25 -03:00
|
|
|
|
|
|
|
|
fn extract_port(url: &str) -> &str {
|
|
|
|
|
url.rsplit(':').next().unwrap_or("8081")
|
|
|
|
|
}
|