feat(drive_monitor): restart LLaMA servers on llm- config changes

Add logic to detect changes in llm-related configuration properties when syncing gbot configs. If any llm- keys differ from stored values, automatically restart LLaMA servers to apply updates. This ensures model servers stay in sync with configuration changes without unnecessary restarts.
This commit is contained in:
Rodrigo Rodriguez (Pragmatismo) 2025-11-01 14:23:40 -03:00
parent 6d68585c71
commit 4e781b1815
3 changed files with 63 additions and 12 deletions

View file

@ -232,9 +232,13 @@ impl DriveMonitor {
} }
async fn check_gbot(&self, client: &Client) -> Result<(), Box<dyn Error + Send + Sync>> { async fn check_gbot(&self, client: &Client) -> Result<(), Box<dyn Error + Send + Sync>> {
let config_manager = ConfigManager::new(Arc::clone(&self.state.conn));
let mut continuation_token = None; let mut continuation_token = None;
loop { loop {
let list_objects = client let list_objects = client
.list_objects_v2() .list_objects_v2()
.bucket(&self.bucket_name.to_lowercase()) .bucket(&self.bucket_name.to_lowercase())
@ -272,28 +276,61 @@ impl DriveMonitor {
.bucket(&self.bucket_name) .bucket(&self.bucket_name)
.key(&path) .key(&path)
.send() .send()
.await?; .await?;
debug!( debug!(
"GetObject successful for {}, content length: {}", "GetObject successful for {}, content length: {}",
path, path,
response.content_length().unwrap_or(0) response.content_length().unwrap_or(0)
); );
let bytes = response.body.collect().await?.into_bytes(); let bytes = response.body.collect().await?.into_bytes();
debug!("Collected {} bytes for {}", bytes.len(), path); debug!("Collected {} bytes for {}", bytes.len(), path);
let csv_content = String::from_utf8(bytes.to_vec()) let csv_content = String::from_utf8(bytes.to_vec())
.map_err(|e| format!("UTF-8 error in {}: {}", path, e))?; .map_err(|e| format!("UTF-8 error in {}: {}", path, e))?;
debug!("Found {}: {} bytes", path, csv_content.len()); debug!("Found {}: {} bytes", path, csv_content.len());
let config_manager = ConfigManager::new(Arc::clone(&self.state.conn));
if let Err(e) = config_manager.sync_gbot_config(&self.bot_id, &csv_content)
{ // Restart LLaMA servers only if llm- properties changed
error!( let llm_lines: Vec<_> = csv_content
"Failed to sync config for bot {} {}: {}", .lines()
path, self.bot_id, e .filter(|line| line.trim_start().starts_with("llm-"))
); .collect();
} else {
info!("Successfully synced config for bot {}", self.bot_id); if !llm_lines.is_empty() {
use crate::llm_legacy::llm_local::ensure_llama_servers_running;
let mut restart_needed = false;
for line in llm_lines {
let parts: Vec<&str> = line.split(',').collect();
if parts.len() >= 2 {
let key = parts[0].trim();
let new_value = parts[1].trim();
match config_manager.get_config(&self.bot_id, key, None) {
Ok(old_value) => {
if old_value != new_value {
info!("Detected change in {} (old: {}, new: {})", key, old_value, new_value);
restart_needed = true;
}
}
Err(_) => {
info!("New llm- property detected: {}", key);
restart_needed = true;
}
}
}
}
if restart_needed {
info!("Detected llm- configuration change, restarting LLaMA servers...");
if let Err(e) = ensure_llama_servers_running(&self.state).await {
error!("Failed to restart LLaMA servers after llm- config change: {}", e);
}
} else {
info!("No llm- property changes detected; skipping LLaMA server restart.");
}
config_manager.sync_gbot_config(&self.bot_id, &csv_content);
} }
} }
Err(e) => { Err(e) => {

View file

@ -90,6 +90,20 @@ pub async fn ensure_llama_servers_running(
info!(" Embedding Model: {}", embedding_model); info!(" Embedding Model: {}", embedding_model);
info!(" LLM Server Path: {}", llm_server_path); info!(" LLM Server Path: {}", llm_server_path);
// Restart any existing llama-server processes before starting new ones
info!("🔁 Restarting any existing llama-server processes...");
if let Err(e) = tokio::process::Command::new("sh")
.arg("-c")
.arg("pkill -f llama-server || true")
.spawn()
{
error!("Failed to execute pkill for llama-server: {}", e);
} else {
sleep(Duration::from_secs(2)).await;
info!("✅ Existing llama-server processes terminated (if any)");
}
// Check if servers are already running // Check if servers are already running
let llm_running = is_server_running(&llm_url).await; let llm_running = is_server_running(&llm_url).await;
let embedding_running = is_server_running(&embedding_url).await; let embedding_running = is_server_running(&embedding_url).await;
@ -99,6 +113,7 @@ pub async fn ensure_llama_servers_running(
return Ok(()); return Ok(());
} }
// Start servers that aren't running // Start servers that aren't running
let mut tasks = vec![]; let mut tasks = vec![];
@ -620,4 +635,3 @@ pub async fn embeddings_local(
Ok(HttpResponse::Ok().json(openai_response)) Ok(HttpResponse::Ok().json(openai_response))
} }

View file

@ -16,7 +16,7 @@ llm-server-path,botserver-stack/bin/llm/build/bin
llm-server-host,0.0.0.0 llm-server-host,0.0.0.0
llm-server-port,8081 llm-server-port,8081
llm-server-gpu-layers,35 llm-server-gpu-layers,35
llm-server-n-moe,16 llm-server-n-moe,23
llm-server-ctx-size,16000 llm-server-ctx-size,16000
llm-server-parallel,8 llm-server-parallel,8
llm-server-cont-batching,true llm-server-cont-batching,true

1 name value
16 llm-server-ctx-size 16000
17 llm-server-parallel 8
18 llm-server-cont-batching true
19 llm-server-mlock true
20 llm-server-no-mmap true
21 email-from from@domain.com
22 email-server mail.domain.com