feat(drive_monitor): restart LLaMA servers on llm- config changes
Add logic to detect changes in llm-related configuration properties when syncing gbot configs. If any llm- keys differ from stored values, automatically restart LLaMA servers to apply updates. This ensures model servers stay in sync with configuration changes without unnecessary restarts.
This commit is contained in:
parent
6d68585c71
commit
4e781b1815
3 changed files with 63 additions and 12 deletions
|
|
@ -232,9 +232,13 @@ impl DriveMonitor {
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn check_gbot(&self, client: &Client) -> Result<(), Box<dyn Error + Send + Sync>> {
|
async fn check_gbot(&self, client: &Client) -> Result<(), Box<dyn Error + Send + Sync>> {
|
||||||
|
|
||||||
|
let config_manager = ConfigManager::new(Arc::clone(&self.state.conn));
|
||||||
|
|
||||||
let mut continuation_token = None;
|
let mut continuation_token = None;
|
||||||
|
|
||||||
loop {
|
loop {
|
||||||
|
|
||||||
let list_objects = client
|
let list_objects = client
|
||||||
.list_objects_v2()
|
.list_objects_v2()
|
||||||
.bucket(&self.bucket_name.to_lowercase())
|
.bucket(&self.bucket_name.to_lowercase())
|
||||||
|
|
@ -279,21 +283,54 @@ impl DriveMonitor {
|
||||||
response.content_length().unwrap_or(0)
|
response.content_length().unwrap_or(0)
|
||||||
);
|
);
|
||||||
|
|
||||||
|
|
||||||
let bytes = response.body.collect().await?.into_bytes();
|
let bytes = response.body.collect().await?.into_bytes();
|
||||||
debug!("Collected {} bytes for {}", bytes.len(), path);
|
debug!("Collected {} bytes for {}", bytes.len(), path);
|
||||||
let csv_content = String::from_utf8(bytes.to_vec())
|
let csv_content = String::from_utf8(bytes.to_vec())
|
||||||
.map_err(|e| format!("UTF-8 error in {}: {}", path, e))?;
|
.map_err(|e| format!("UTF-8 error in {}: {}", path, e))?;
|
||||||
debug!("Found {}: {} bytes", path, csv_content.len());
|
debug!("Found {}: {} bytes", path, csv_content.len());
|
||||||
|
|
||||||
let config_manager = ConfigManager::new(Arc::clone(&self.state.conn));
|
|
||||||
if let Err(e) = config_manager.sync_gbot_config(&self.bot_id, &csv_content)
|
|
||||||
{
|
// Restart LLaMA servers only if llm- properties changed
|
||||||
error!(
|
let llm_lines: Vec<_> = csv_content
|
||||||
"Failed to sync config for bot {} {}: {}",
|
.lines()
|
||||||
path, self.bot_id, e
|
.filter(|line| line.trim_start().starts_with("llm-"))
|
||||||
);
|
.collect();
|
||||||
|
|
||||||
|
if !llm_lines.is_empty() {
|
||||||
|
use crate::llm_legacy::llm_local::ensure_llama_servers_running;
|
||||||
|
let mut restart_needed = false;
|
||||||
|
|
||||||
|
for line in llm_lines {
|
||||||
|
let parts: Vec<&str> = line.split(',').collect();
|
||||||
|
if parts.len() >= 2 {
|
||||||
|
let key = parts[0].trim();
|
||||||
|
let new_value = parts[1].trim();
|
||||||
|
match config_manager.get_config(&self.bot_id, key, None) {
|
||||||
|
Ok(old_value) => {
|
||||||
|
if old_value != new_value {
|
||||||
|
info!("Detected change in {} (old: {}, new: {})", key, old_value, new_value);
|
||||||
|
restart_needed = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Err(_) => {
|
||||||
|
info!("New llm- property detected: {}", key);
|
||||||
|
restart_needed = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if restart_needed {
|
||||||
|
info!("Detected llm- configuration change, restarting LLaMA servers...");
|
||||||
|
if let Err(e) = ensure_llama_servers_running(&self.state).await {
|
||||||
|
error!("Failed to restart LLaMA servers after llm- config change: {}", e);
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
info!("Successfully synced config for bot {}", self.bot_id);
|
info!("No llm- property changes detected; skipping LLaMA server restart.");
|
||||||
|
}
|
||||||
|
config_manager.sync_gbot_config(&self.bot_id, &csv_content);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
|
|
|
||||||
|
|
@ -90,6 +90,20 @@ pub async fn ensure_llama_servers_running(
|
||||||
info!(" Embedding Model: {}", embedding_model);
|
info!(" Embedding Model: {}", embedding_model);
|
||||||
info!(" LLM Server Path: {}", llm_server_path);
|
info!(" LLM Server Path: {}", llm_server_path);
|
||||||
|
|
||||||
|
|
||||||
|
// Restart any existing llama-server processes before starting new ones
|
||||||
|
info!("🔁 Restarting any existing llama-server processes...");
|
||||||
|
if let Err(e) = tokio::process::Command::new("sh")
|
||||||
|
.arg("-c")
|
||||||
|
.arg("pkill -f llama-server || true")
|
||||||
|
.spawn()
|
||||||
|
{
|
||||||
|
error!("Failed to execute pkill for llama-server: {}", e);
|
||||||
|
} else {
|
||||||
|
sleep(Duration::from_secs(2)).await;
|
||||||
|
info!("✅ Existing llama-server processes terminated (if any)");
|
||||||
|
}
|
||||||
|
|
||||||
// Check if servers are already running
|
// Check if servers are already running
|
||||||
let llm_running = is_server_running(&llm_url).await;
|
let llm_running = is_server_running(&llm_url).await;
|
||||||
let embedding_running = is_server_running(&embedding_url).await;
|
let embedding_running = is_server_running(&embedding_url).await;
|
||||||
|
|
@ -99,6 +113,7 @@ pub async fn ensure_llama_servers_running(
|
||||||
return Ok(());
|
return Ok(());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// Start servers that aren't running
|
// Start servers that aren't running
|
||||||
let mut tasks = vec![];
|
let mut tasks = vec![];
|
||||||
|
|
||||||
|
|
@ -620,4 +635,3 @@ pub async fn embeddings_local(
|
||||||
|
|
||||||
Ok(HttpResponse::Ok().json(openai_response))
|
Ok(HttpResponse::Ok().json(openai_response))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -16,7 +16,7 @@ llm-server-path,botserver-stack/bin/llm/build/bin
|
||||||
llm-server-host,0.0.0.0
|
llm-server-host,0.0.0.0
|
||||||
llm-server-port,8081
|
llm-server-port,8081
|
||||||
llm-server-gpu-layers,35
|
llm-server-gpu-layers,35
|
||||||
llm-server-n-moe,16
|
llm-server-n-moe,23
|
||||||
llm-server-ctx-size,16000
|
llm-server-ctx-size,16000
|
||||||
llm-server-parallel,8
|
llm-server-parallel,8
|
||||||
llm-server-cont-batching,true
|
llm-server-cont-batching,true
|
||||||
|
|
|
||||||
|
Loading…
Add table
Reference in a new issue