diff --git a/src/services/llm_local.rs b/src/services/llm_local.rs index d47e8fa..83ab03d 100644 --- a/src/services/llm_local.rs +++ b/src/services/llm_local.rs @@ -178,13 +178,12 @@ pub async fn ensure_llama_servers_running() -> Result<(), Box Result<(), Box> { - let port = url.split(':').last().unwrap_or("8081"); + let port = url.split(':').last().unwrap_or("8082"); std::env::set_var("OMP_NUM_THREADS", "20"); std::env::set_var("OMP_PLACES", "cores"); @@ -210,6 +209,10 @@ async fn start_llm_server( return Err(format!("Executable not found: {}", executable_path.display()).into()); } + info!("Starting embedding server on port: {}", port); + info!("Llama path: {}", llama_cpp_path); + info!("Model path: {}", model_path.display()); + // Use absolute paths and proper process management let mut cmd = Command::new("numactl"); cmd.arg("--interleave=all") @@ -220,6 +223,7 @@ async fn start_llm_server( .arg("0.0.0.0") .arg("--port") .arg(port) + .arg("--embedding") .arg("--n-gpu-layers") .arg("99") .arg("--threads") @@ -235,7 +239,7 @@ async fn start_llm_server( // Store the child process if you need to manage it later // You might want to add this to a process manager - println!("LLM server started with PID: {}", child.id()); + info!("Embedding server started with PID: {}", child.id()); } #[cfg(target_os = "windows")] @@ -246,6 +250,10 @@ async fn start_llm_server( return Err(format!("Executable not found: {}", executable_path.display()).into()); } + info!("Starting embedding server on port: {}", port); + info!("Llama path: {}", llama_cpp_path); + info!("Model path: {}", model_path.display()); + let mut cmd = Command::new(executable_path); cmd.arg("-m") .arg(model_path) @@ -253,6 +261,7 @@ async fn start_llm_server( .arg("0.0.0.0") .arg("--port") .arg(port) + .arg("--embedding") .arg("--n-gpu-layers") .arg("99") .current_dir(llama_path) @@ -260,40 +269,11 @@ async fn start_llm_server( .stderr(Stdio::piped()); let child = cmd.spawn()?; - println!("LLM server started with PID: {}", child.id()); + info!("Embedding server started with PID: {}", child.id()); } Ok(()) } - -async fn start_embedding_server( - llama_cpp_path: String, - model_path: String, - url: String, -) -> Result<(), Box> { - let port = url.split(':').last().unwrap_or("8082"); - let mut cmd = Command::new("cmd"); - - #[cfg(target_os = "windows")] - { - cmd.arg("/C").arg(format!( - "cd {} && llama-server.exe -m {} --host 0.0.0.0 --port {} --embedding --n-gpu-layers 99", - llama_cpp_path, model_path, port - )); - } - - #[cfg(any(target_os = "linux", target_os = "macos"))] - { - cmd.arg("-c").arg(format!( - "cd {} && ./llama-server -m {} --host 0.0.0.0 --port {} --embedding --n-gpu-layers 99 &", - llama_cpp_path, model_path, port - )); - } - - cmd.spawn()?; - Ok(()) -} - async fn is_server_running(url: &str) -> bool { let client = reqwest::Client::new(); match client.get(&format!("{}/health", url)).send().await {