diff --git a/src/services/llm_local.rs b/src/services/llm_local.rs index 83ab03d..7ba198b 100644 --- a/src/services/llm_local.rs +++ b/src/services/llm_local.rs @@ -178,6 +178,102 @@ pub async fn ensure_llama_servers_running() -> Result<(), Box Result<(), Box> { + let port = url.split(':').last().unwrap_or("8081"); + + std::env::set_var("OMP_NUM_THREADS", "20"); + std::env::set_var("OMP_PLACES", "cores"); + std::env::set_var("OMP_PROC_BIND", "close"); + + // Verify paths exist + let llama_path = Path::new(&llama_cpp_path); + let model_path = Path::new(&model_path); + + if !llama_path.exists() { + return Err(format!("Llama path does not exist: {}", llama_cpp_path).into()); + } + + if !model_path.exists() { + return Err(format!("Model path does not exist: {}", model_path.display()).into()); + } + + #[cfg(target_os = "linux")] + { + let executable_path = llama_path.join("llama-server"); + + if !executable_path.exists() { + return Err(format!("Executable not found: {}", executable_path.display()).into()); + } + + info!("Starting LLM server on port: {}", port); + info!("Llama path: {}", llama_cpp_path); + info!("Model path: {}", model_path.display()); + + // Use absolute paths and proper process management + let mut cmd = Command::new("numactl"); + cmd.arg("--interleave=all") + .arg(executable_path) + .arg("-m") + .arg(model_path) + .arg("--host") + .arg("0.0.0.0") + .arg("--port") + .arg(port) + .arg("--n-gpu-layers") + .arg("99") + .arg("--threads") + .arg("20") + .arg("--threads-batch") + .arg("40") + .current_dir(llama_path) // Set working directory + .stdout(Stdio::piped()) + .stderr(Stdio::piped()); + + // Spawn and don't wait for completion + let child = cmd.spawn()?; + + // Store the child process if you need to manage it later + // You might want to add this to a process manager + info!("LLM server started with PID: {}", child.id()); + } + + #[cfg(target_os = "windows")] + { + let executable_path = llama_path.join("llama-server.exe"); + + if !executable_path.exists() { + return Err(format!("Executable not found: {}", executable_path.display()).into()); + } + + info!("Starting LLM server on port: {}", port); + info!("Llama path: {}", llama_cpp_path); + info!("Model path: {}", model_path.display()); + + let mut cmd = Command::new(executable_path); + cmd.arg("-m") + .arg(model_path) + .arg("--host") + .arg("0.0.0.0") + .arg("--port") + .arg(port) + .arg("--n-gpu-layers") + .arg("99") + .current_dir(llama_path) + .stdout(Stdio::piped()) + .stderr(Stdio::piped()); + + let child = cmd.spawn()?; + info!("LLM server started with PID: {}", child.id()); + } + + Ok(()) +} + async fn start_embedding_server( llama_cpp_path: String, model_path: String, @@ -274,6 +370,7 @@ async fn start_embedding_server( Ok(()) } + async fn is_server_running(url: &str) -> bool { let client = reqwest::Client::new(); match client.get(&format!("{}/health", url)).send().await {