feat(drive_monitor): restart LLaMA servers on llm- config changes
Add logic to detect changes in llm-related configuration properties when syncing gbot configs. If any llm- keys differ from stored values, automatically restart LLaMA servers to apply updates. This ensures model servers stay in sync with configuration changes without unnecessary restarts.
This commit is contained in:
parent
6d68585c71
commit
4e781b1815
3 changed files with 63 additions and 12 deletions
|
|
@ -232,9 +232,13 @@ impl DriveMonitor {
|
|||
}
|
||||
|
||||
async fn check_gbot(&self, client: &Client) -> Result<(), Box<dyn Error + Send + Sync>> {
|
||||
|
||||
let config_manager = ConfigManager::new(Arc::clone(&self.state.conn));
|
||||
|
||||
let mut continuation_token = None;
|
||||
|
||||
loop {
|
||||
|
||||
let list_objects = client
|
||||
.list_objects_v2()
|
||||
.bucket(&self.bucket_name.to_lowercase())
|
||||
|
|
@ -279,21 +283,54 @@ impl DriveMonitor {
|
|||
response.content_length().unwrap_or(0)
|
||||
);
|
||||
|
||||
|
||||
let bytes = response.body.collect().await?.into_bytes();
|
||||
debug!("Collected {} bytes for {}", bytes.len(), path);
|
||||
let csv_content = String::from_utf8(bytes.to_vec())
|
||||
.map_err(|e| format!("UTF-8 error in {}: {}", path, e))?;
|
||||
debug!("Found {}: {} bytes", path, csv_content.len());
|
||||
|
||||
let config_manager = ConfigManager::new(Arc::clone(&self.state.conn));
|
||||
if let Err(e) = config_manager.sync_gbot_config(&self.bot_id, &csv_content)
|
||||
{
|
||||
error!(
|
||||
"Failed to sync config for bot {} {}: {}",
|
||||
path, self.bot_id, e
|
||||
);
|
||||
} else {
|
||||
info!("Successfully synced config for bot {}", self.bot_id);
|
||||
|
||||
|
||||
// Restart LLaMA servers only if llm- properties changed
|
||||
let llm_lines: Vec<_> = csv_content
|
||||
.lines()
|
||||
.filter(|line| line.trim_start().starts_with("llm-"))
|
||||
.collect();
|
||||
|
||||
if !llm_lines.is_empty() {
|
||||
use crate::llm_legacy::llm_local::ensure_llama_servers_running;
|
||||
let mut restart_needed = false;
|
||||
|
||||
for line in llm_lines {
|
||||
let parts: Vec<&str> = line.split(',').collect();
|
||||
if parts.len() >= 2 {
|
||||
let key = parts[0].trim();
|
||||
let new_value = parts[1].trim();
|
||||
match config_manager.get_config(&self.bot_id, key, None) {
|
||||
Ok(old_value) => {
|
||||
if old_value != new_value {
|
||||
info!("Detected change in {} (old: {}, new: {})", key, old_value, new_value);
|
||||
restart_needed = true;
|
||||
}
|
||||
}
|
||||
Err(_) => {
|
||||
info!("New llm- property detected: {}", key);
|
||||
restart_needed = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if restart_needed {
|
||||
info!("Detected llm- configuration change, restarting LLaMA servers...");
|
||||
if let Err(e) = ensure_llama_servers_running(&self.state).await {
|
||||
error!("Failed to restart LLaMA servers after llm- config change: {}", e);
|
||||
}
|
||||
} else {
|
||||
info!("No llm- property changes detected; skipping LLaMA server restart.");
|
||||
}
|
||||
config_manager.sync_gbot_config(&self.bot_id, &csv_content);
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
|
|
|
|||
|
|
@ -90,6 +90,20 @@ pub async fn ensure_llama_servers_running(
|
|||
info!(" Embedding Model: {}", embedding_model);
|
||||
info!(" LLM Server Path: {}", llm_server_path);
|
||||
|
||||
|
||||
// Restart any existing llama-server processes before starting new ones
|
||||
info!("🔁 Restarting any existing llama-server processes...");
|
||||
if let Err(e) = tokio::process::Command::new("sh")
|
||||
.arg("-c")
|
||||
.arg("pkill -f llama-server || true")
|
||||
.spawn()
|
||||
{
|
||||
error!("Failed to execute pkill for llama-server: {}", e);
|
||||
} else {
|
||||
sleep(Duration::from_secs(2)).await;
|
||||
info!("✅ Existing llama-server processes terminated (if any)");
|
||||
}
|
||||
|
||||
// Check if servers are already running
|
||||
let llm_running = is_server_running(&llm_url).await;
|
||||
let embedding_running = is_server_running(&embedding_url).await;
|
||||
|
|
@ -99,6 +113,7 @@ pub async fn ensure_llama_servers_running(
|
|||
return Ok(());
|
||||
}
|
||||
|
||||
|
||||
// Start servers that aren't running
|
||||
let mut tasks = vec![];
|
||||
|
||||
|
|
@ -620,4 +635,3 @@ pub async fn embeddings_local(
|
|||
|
||||
Ok(HttpResponse::Ok().json(openai_response))
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -16,7 +16,7 @@ llm-server-path,botserver-stack/bin/llm/build/bin
|
|||
llm-server-host,0.0.0.0
|
||||
llm-server-port,8081
|
||||
llm-server-gpu-layers,35
|
||||
llm-server-n-moe,16
|
||||
llm-server-n-moe,23
|
||||
llm-server-ctx-size,16000
|
||||
llm-server-parallel,8
|
||||
llm-server-cont-batching,true
|
||||
|
|
|
|||
|
Loading…
Add table
Reference in a new issue