diff --git a/src/drive_monitor/mod.rs b/src/drive_monitor/mod.rs index a1539005..b082621f 100644 --- a/src/drive_monitor/mod.rs +++ b/src/drive_monitor/mod.rs @@ -232,9 +232,13 @@ impl DriveMonitor { } async fn check_gbot(&self, client: &Client) -> Result<(), Box> { + + let config_manager = ConfigManager::new(Arc::clone(&self.state.conn)); + let mut continuation_token = None; loop { + let list_objects = client .list_objects_v2() .bucket(&self.bucket_name.to_lowercase()) @@ -272,28 +276,61 @@ impl DriveMonitor { .bucket(&self.bucket_name) .key(&path) .send() - .await?; + .await?; debug!( "GetObject successful for {}, content length: {}", path, response.content_length().unwrap_or(0) ); + let bytes = response.body.collect().await?.into_bytes(); debug!("Collected {} bytes for {}", bytes.len(), path); let csv_content = String::from_utf8(bytes.to_vec()) .map_err(|e| format!("UTF-8 error in {}: {}", path, e))?; debug!("Found {}: {} bytes", path, csv_content.len()); - let config_manager = ConfigManager::new(Arc::clone(&self.state.conn)); - if let Err(e) = config_manager.sync_gbot_config(&self.bot_id, &csv_content) - { - error!( - "Failed to sync config for bot {} {}: {}", - path, self.bot_id, e - ); - } else { - info!("Successfully synced config for bot {}", self.bot_id); + + + // Restart LLaMA servers only if llm- properties changed + let llm_lines: Vec<_> = csv_content + .lines() + .filter(|line| line.trim_start().starts_with("llm-")) + .collect(); + + if !llm_lines.is_empty() { + use crate::llm_legacy::llm_local::ensure_llama_servers_running; + let mut restart_needed = false; + + for line in llm_lines { + let parts: Vec<&str> = line.split(',').collect(); + if parts.len() >= 2 { + let key = parts[0].trim(); + let new_value = parts[1].trim(); + match config_manager.get_config(&self.bot_id, key, None) { + Ok(old_value) => { + if old_value != new_value { + info!("Detected change in {} (old: {}, new: {})", key, old_value, new_value); + restart_needed = true; + } + } + Err(_) => { + info!("New llm- property detected: {}", key); + restart_needed = true; + } + } + } + } + + if restart_needed { + info!("Detected llm- configuration change, restarting LLaMA servers..."); + if let Err(e) = ensure_llama_servers_running(&self.state).await { + error!("Failed to restart LLaMA servers after llm- config change: {}", e); + } + } else { + info!("No llm- property changes detected; skipping LLaMA server restart."); + } + config_manager.sync_gbot_config(&self.bot_id, &csv_content); } } Err(e) => { diff --git a/src/llm_legacy/llm_local.rs b/src/llm_legacy/llm_local.rs index 4d8f67ec..6b54d91a 100644 --- a/src/llm_legacy/llm_local.rs +++ b/src/llm_legacy/llm_local.rs @@ -90,6 +90,20 @@ pub async fn ensure_llama_servers_running( info!(" Embedding Model: {}", embedding_model); info!(" LLM Server Path: {}", llm_server_path); + + // Restart any existing llama-server processes before starting new ones + info!("🔁 Restarting any existing llama-server processes..."); + if let Err(e) = tokio::process::Command::new("sh") + .arg("-c") + .arg("pkill -f llama-server || true") + .spawn() + { + error!("Failed to execute pkill for llama-server: {}", e); + } else { + sleep(Duration::from_secs(2)).await; + info!("✅ Existing llama-server processes terminated (if any)"); + } + // Check if servers are already running let llm_running = is_server_running(&llm_url).await; let embedding_running = is_server_running(&embedding_url).await; @@ -99,6 +113,7 @@ pub async fn ensure_llama_servers_running( return Ok(()); } + // Start servers that aren't running let mut tasks = vec![]; @@ -620,4 +635,3 @@ pub async fn embeddings_local( Ok(HttpResponse::Ok().json(openai_response)) } - diff --git a/templates/default.gbai/default.gbot/config.csv b/templates/default.gbai/default.gbot/config.csv index ece77065..ecc06a96 100644 --- a/templates/default.gbai/default.gbot/config.csv +++ b/templates/default.gbai/default.gbot/config.csv @@ -16,7 +16,7 @@ llm-server-path,botserver-stack/bin/llm/build/bin llm-server-host,0.0.0.0 llm-server-port,8081 llm-server-gpu-layers,35 -llm-server-n-moe,16 +llm-server-n-moe,23 llm-server-ctx-size,16000 llm-server-parallel,8 llm-server-cont-batching,true