Make cleaner executable and add LLM path checks

Adds existence checks for llama path and model path, Uses numactl on Linux to spawn child process and handles Windows paths.
2025-10-01 07:44:34 -03:00 · 2025-10-01 07:44:34 -03:00 · 1aa79a30c4
commit 1aa79a30c4
parent 4d60f2f208
2 changed files with 69 additions and 15 deletions
--- a/src/scripts/utils/cleaner.sh
+++ b/src/scripts/utils/cleaner.sh
--- a/src/services/llm_local.rs
+++ b/src/services/llm_local.rs
@ -3,7 +3,9 @@ use dotenv::dotenv;
 use log::{error, info};
 use reqwest::Client;
 use serde::{Deserialize, Serialize};
-use std::{env, process::Command};
+use std::env;
+use std::path::Path;
+use std::process::{Command, Stdio};
 use tokio::time::{sleep, Duration};

 // OpenAI-compatible request/response structures
@ -188,24 +190,77 @@ async fn start_llm_server(
    std::env::set_var("OMP_PLACES", "cores");
    std::env::set_var("OMP_PROC_BIND", "close");

-    let mut cmd = Command::new("sh");
-    // "cd {} && numactl --interleave=all ./llama-server -m {} --host 0.0.0.0 --port {} --threads 20 --threads-batch 40 --temp 0.7 --parallel 1 --repeat-penalty 1.1 --ctx-size 8192 --batch-size 8192 -n 4096 --mlock --no-mmap --flash-attn  --no-kv-offload  --no-mmap &",
+    // Verify paths exist
+    let llama_path = Path::new(&llama_cpp_path);
+    let model_path = Path::new(&model_path);

-     #[cfg(target_os = "linux")]
+    if !llama_path.exists() {
+        return Err(format!("Llama path does not exist: {}", llama_cpp_path).into());
+    }
+
+    if !model_path.exists() {
+        return Err(format!("Model path does not exist: {}", model_path.display()).into());
+    }
+
+    #[cfg(target_os = "linux")]
    {
-        cmd.arg("-c").arg(format!(
-            "cd {} && ./llama-server -m {} --host 0.0.0.0 --port {} --n-gpu-layers 99 &",
-            llama_cpp_path, model_path, port
-        ));
-        
+        let executable_path = llama_path.join("llama-server");
+
+        if !executable_path.exists() {
+            return Err(format!("Executable not found: {}", executable_path.display()).into());
+        }
+
+        // Use absolute paths and proper process management
+        let mut cmd = Command::new("numactl");
+        cmd.arg("--interleave=all")
+            .arg(executable_path)
+            .arg("-m")
+            .arg(model_path)
+            .arg("--host")
+            .arg("0.0.0.0")
+            .arg("--port")
+            .arg(port)
+            .arg("--n-gpu-layers")
+            .arg("99")
+            .arg("--threads")
+            .arg("20")
+            .arg("--threads-batch")
+            .arg("40")
+            .current_dir(llama_path) // Set working directory
+            .stdout(Stdio::piped())
+            .stderr(Stdio::piped());
+
+        // Spawn and don't wait for completion
+        let child = cmd.spawn()?;
+
+        // Store the child process if you need to manage it later
+        // You might want to add this to a process manager
+        println!("LLM server started with PID: {}", child.id());
    }

    #[cfg(target_os = "windows")]
    {
-        cmd.arg("/C").arg(format!(
-            "cd {} && llama-server.exe -m {} --host 0.0.0.0 --port {} --n-gpu-layers 99",
-            llama_cpp_path, model_path, port
-        ));
+        let executable_path = llama_path.join("llama-server.exe");
+
+        if !executable_path.exists() {
+            return Err(format!("Executable not found: {}", executable_path.display()).into());
+        }
+
+        let mut cmd = Command::new(executable_path);
+        cmd.arg("-m")
+            .arg(model_path)
+            .arg("--host")
+            .arg("0.0.0.0")
+            .arg("--port")
+            .arg(port)
+            .arg("--n-gpu-layers")
+            .arg("99")
+            .current_dir(llama_path)
+            .stdout(Stdio::piped())
+            .stderr(Stdio::piped());
+
+        let child = cmd.spawn()?;
+        println!("LLM server started with PID: {}", child.id());
    }

    Ok(())
@ -225,7 +280,6 @@ async fn start_embedding_server(
            "cd {} && llama-server.exe -m {} --host 0.0.0.0 --port {} --embedding --n-gpu-layers 99",
            llama_cpp_path, model_path, port
        ));
-        
    }

    #[cfg(any(target_os = "linux", target_os = "macos"))]
@ -235,7 +289,7 @@ async fn start_embedding_server(
            llama_cpp_path, model_path, port
        ));
    }
-    
+
    cmd.spawn()?;
    Ok(())
 }