diff --git a/src/scripts/utils/cleaner.sh b/src/scripts/utils/cleaner.sh old mode 100644 new mode 100755 diff --git a/src/services/llm_local.rs b/src/services/llm_local.rs index 023a5f6..d47e8fa 100644 --- a/src/services/llm_local.rs +++ b/src/services/llm_local.rs @@ -3,7 +3,9 @@ use dotenv::dotenv; use log::{error, info}; use reqwest::Client; use serde::{Deserialize, Serialize}; -use std::{env, process::Command}; +use std::env; +use std::path::Path; +use std::process::{Command, Stdio}; use tokio::time::{sleep, Duration}; // OpenAI-compatible request/response structures @@ -188,24 +190,77 @@ async fn start_llm_server( std::env::set_var("OMP_PLACES", "cores"); std::env::set_var("OMP_PROC_BIND", "close"); - let mut cmd = Command::new("sh"); - // "cd {} && numactl --interleave=all ./llama-server -m {} --host 0.0.0.0 --port {} --threads 20 --threads-batch 40 --temp 0.7 --parallel 1 --repeat-penalty 1.1 --ctx-size 8192 --batch-size 8192 -n 4096 --mlock --no-mmap --flash-attn --no-kv-offload --no-mmap &", + // Verify paths exist + let llama_path = Path::new(&llama_cpp_path); + let model_path = Path::new(&model_path); - #[cfg(target_os = "linux")] + if !llama_path.exists() { + return Err(format!("Llama path does not exist: {}", llama_cpp_path).into()); + } + + if !model_path.exists() { + return Err(format!("Model path does not exist: {}", model_path.display()).into()); + } + + #[cfg(target_os = "linux")] { - cmd.arg("-c").arg(format!( - "cd {} && ./llama-server -m {} --host 0.0.0.0 --port {} --n-gpu-layers 99 &", - llama_cpp_path, model_path, port - )); - + let executable_path = llama_path.join("llama-server"); + + if !executable_path.exists() { + return Err(format!("Executable not found: {}", executable_path.display()).into()); + } + + // Use absolute paths and proper process management + let mut cmd = Command::new("numactl"); + cmd.arg("--interleave=all") + .arg(executable_path) + .arg("-m") + .arg(model_path) + .arg("--host") + .arg("0.0.0.0") + .arg("--port") + .arg(port) + .arg("--n-gpu-layers") + .arg("99") + .arg("--threads") + .arg("20") + .arg("--threads-batch") + .arg("40") + .current_dir(llama_path) // Set working directory + .stdout(Stdio::piped()) + .stderr(Stdio::piped()); + + // Spawn and don't wait for completion + let child = cmd.spawn()?; + + // Store the child process if you need to manage it later + // You might want to add this to a process manager + println!("LLM server started with PID: {}", child.id()); } #[cfg(target_os = "windows")] { - cmd.arg("/C").arg(format!( - "cd {} && llama-server.exe -m {} --host 0.0.0.0 --port {} --n-gpu-layers 99", - llama_cpp_path, model_path, port - )); + let executable_path = llama_path.join("llama-server.exe"); + + if !executable_path.exists() { + return Err(format!("Executable not found: {}", executable_path.display()).into()); + } + + let mut cmd = Command::new(executable_path); + cmd.arg("-m") + .arg(model_path) + .arg("--host") + .arg("0.0.0.0") + .arg("--port") + .arg(port) + .arg("--n-gpu-layers") + .arg("99") + .current_dir(llama_path) + .stdout(Stdio::piped()) + .stderr(Stdio::piped()); + + let child = cmd.spawn()?; + println!("LLM server started with PID: {}", child.id()); } Ok(()) @@ -225,7 +280,6 @@ async fn start_embedding_server( "cd {} && llama-server.exe -m {} --host 0.0.0.0 --port {} --embedding --n-gpu-layers 99", llama_cpp_path, model_path, port )); - } #[cfg(any(target_os = "linux", target_os = "macos"))] @@ -235,7 +289,7 @@ async fn start_embedding_server( llama_cpp_path, model_path, port )); } - + cmd.spawn()?; Ok(()) }