Make cleaner executable and add LLM path checks
All checks were successful
GBCI / build (push) Successful in 7m33s
All checks were successful
GBCI / build (push) Successful in 7m33s
Adds existence checks for llama path and model path, Uses numactl on Linux to spawn child process and handles Windows paths.
This commit is contained in:
parent
4d60f2f208
commit
1aa79a30c4
2 changed files with 69 additions and 15 deletions
0
src/scripts/utils/cleaner.sh
Normal file → Executable file
0
src/scripts/utils/cleaner.sh
Normal file → Executable file
|
|
@ -3,7 +3,9 @@ use dotenv::dotenv;
|
|||
use log::{error, info};
|
||||
use reqwest::Client;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::{env, process::Command};
|
||||
use std::env;
|
||||
use std::path::Path;
|
||||
use std::process::{Command, Stdio};
|
||||
use tokio::time::{sleep, Duration};
|
||||
|
||||
// OpenAI-compatible request/response structures
|
||||
|
|
@ -188,24 +190,77 @@ async fn start_llm_server(
|
|||
std::env::set_var("OMP_PLACES", "cores");
|
||||
std::env::set_var("OMP_PROC_BIND", "close");
|
||||
|
||||
let mut cmd = Command::new("sh");
|
||||
// "cd {} && numactl --interleave=all ./llama-server -m {} --host 0.0.0.0 --port {} --threads 20 --threads-batch 40 --temp 0.7 --parallel 1 --repeat-penalty 1.1 --ctx-size 8192 --batch-size 8192 -n 4096 --mlock --no-mmap --flash-attn --no-kv-offload --no-mmap &",
|
||||
// Verify paths exist
|
||||
let llama_path = Path::new(&llama_cpp_path);
|
||||
let model_path = Path::new(&model_path);
|
||||
|
||||
if !llama_path.exists() {
|
||||
return Err(format!("Llama path does not exist: {}", llama_cpp_path).into());
|
||||
}
|
||||
|
||||
if !model_path.exists() {
|
||||
return Err(format!("Model path does not exist: {}", model_path.display()).into());
|
||||
}
|
||||
|
||||
#[cfg(target_os = "linux")]
|
||||
{
|
||||
cmd.arg("-c").arg(format!(
|
||||
"cd {} && ./llama-server -m {} --host 0.0.0.0 --port {} --n-gpu-layers 99 &",
|
||||
llama_cpp_path, model_path, port
|
||||
));
|
||||
let executable_path = llama_path.join("llama-server");
|
||||
|
||||
if !executable_path.exists() {
|
||||
return Err(format!("Executable not found: {}", executable_path.display()).into());
|
||||
}
|
||||
|
||||
// Use absolute paths and proper process management
|
||||
let mut cmd = Command::new("numactl");
|
||||
cmd.arg("--interleave=all")
|
||||
.arg(executable_path)
|
||||
.arg("-m")
|
||||
.arg(model_path)
|
||||
.arg("--host")
|
||||
.arg("0.0.0.0")
|
||||
.arg("--port")
|
||||
.arg(port)
|
||||
.arg("--n-gpu-layers")
|
||||
.arg("99")
|
||||
.arg("--threads")
|
||||
.arg("20")
|
||||
.arg("--threads-batch")
|
||||
.arg("40")
|
||||
.current_dir(llama_path) // Set working directory
|
||||
.stdout(Stdio::piped())
|
||||
.stderr(Stdio::piped());
|
||||
|
||||
// Spawn and don't wait for completion
|
||||
let child = cmd.spawn()?;
|
||||
|
||||
// Store the child process if you need to manage it later
|
||||
// You might want to add this to a process manager
|
||||
println!("LLM server started with PID: {}", child.id());
|
||||
}
|
||||
|
||||
#[cfg(target_os = "windows")]
|
||||
{
|
||||
cmd.arg("/C").arg(format!(
|
||||
"cd {} && llama-server.exe -m {} --host 0.0.0.0 --port {} --n-gpu-layers 99",
|
||||
llama_cpp_path, model_path, port
|
||||
));
|
||||
let executable_path = llama_path.join("llama-server.exe");
|
||||
|
||||
if !executable_path.exists() {
|
||||
return Err(format!("Executable not found: {}", executable_path.display()).into());
|
||||
}
|
||||
|
||||
let mut cmd = Command::new(executable_path);
|
||||
cmd.arg("-m")
|
||||
.arg(model_path)
|
||||
.arg("--host")
|
||||
.arg("0.0.0.0")
|
||||
.arg("--port")
|
||||
.arg(port)
|
||||
.arg("--n-gpu-layers")
|
||||
.arg("99")
|
||||
.current_dir(llama_path)
|
||||
.stdout(Stdio::piped())
|
||||
.stderr(Stdio::piped());
|
||||
|
||||
let child = cmd.spawn()?;
|
||||
println!("LLM server started with PID: {}", child.id());
|
||||
}
|
||||
|
||||
Ok(())
|
||||
|
|
@ -225,7 +280,6 @@ async fn start_embedding_server(
|
|||
"cd {} && llama-server.exe -m {} --host 0.0.0.0 --port {} --embedding --n-gpu-layers 99",
|
||||
llama_cpp_path, model_path, port
|
||||
));
|
||||
|
||||
}
|
||||
|
||||
#[cfg(any(target_os = "linux", target_os = "macos"))]
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue