Make cleaner executable and add LLM path checks
All checks were successful
GBCI / build (push) Successful in 7m33s
All checks were successful
GBCI / build (push) Successful in 7m33s
Adds existence checks for llama path and model path, Uses numactl on Linux to spawn child process and handles Windows paths.
This commit is contained in:
parent
4d60f2f208
commit
1aa79a30c4
2 changed files with 69 additions and 15 deletions
0
src/scripts/utils/cleaner.sh
Normal file → Executable file
0
src/scripts/utils/cleaner.sh
Normal file → Executable file
|
|
@ -3,7 +3,9 @@ use dotenv::dotenv;
|
||||||
use log::{error, info};
|
use log::{error, info};
|
||||||
use reqwest::Client;
|
use reqwest::Client;
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
use std::{env, process::Command};
|
use std::env;
|
||||||
|
use std::path::Path;
|
||||||
|
use std::process::{Command, Stdio};
|
||||||
use tokio::time::{sleep, Duration};
|
use tokio::time::{sleep, Duration};
|
||||||
|
|
||||||
// OpenAI-compatible request/response structures
|
// OpenAI-compatible request/response structures
|
||||||
|
|
@ -188,24 +190,77 @@ async fn start_llm_server(
|
||||||
std::env::set_var("OMP_PLACES", "cores");
|
std::env::set_var("OMP_PLACES", "cores");
|
||||||
std::env::set_var("OMP_PROC_BIND", "close");
|
std::env::set_var("OMP_PROC_BIND", "close");
|
||||||
|
|
||||||
let mut cmd = Command::new("sh");
|
// Verify paths exist
|
||||||
// "cd {} && numactl --interleave=all ./llama-server -m {} --host 0.0.0.0 --port {} --threads 20 --threads-batch 40 --temp 0.7 --parallel 1 --repeat-penalty 1.1 --ctx-size 8192 --batch-size 8192 -n 4096 --mlock --no-mmap --flash-attn --no-kv-offload --no-mmap &",
|
let llama_path = Path::new(&llama_cpp_path);
|
||||||
|
let model_path = Path::new(&model_path);
|
||||||
|
|
||||||
#[cfg(target_os = "linux")]
|
if !llama_path.exists() {
|
||||||
|
return Err(format!("Llama path does not exist: {}", llama_cpp_path).into());
|
||||||
|
}
|
||||||
|
|
||||||
|
if !model_path.exists() {
|
||||||
|
return Err(format!("Model path does not exist: {}", model_path.display()).into());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(target_os = "linux")]
|
||||||
{
|
{
|
||||||
cmd.arg("-c").arg(format!(
|
let executable_path = llama_path.join("llama-server");
|
||||||
"cd {} && ./llama-server -m {} --host 0.0.0.0 --port {} --n-gpu-layers 99 &",
|
|
||||||
llama_cpp_path, model_path, port
|
if !executable_path.exists() {
|
||||||
));
|
return Err(format!("Executable not found: {}", executable_path.display()).into());
|
||||||
|
}
|
||||||
|
|
||||||
|
// Use absolute paths and proper process management
|
||||||
|
let mut cmd = Command::new("numactl");
|
||||||
|
cmd.arg("--interleave=all")
|
||||||
|
.arg(executable_path)
|
||||||
|
.arg("-m")
|
||||||
|
.arg(model_path)
|
||||||
|
.arg("--host")
|
||||||
|
.arg("0.0.0.0")
|
||||||
|
.arg("--port")
|
||||||
|
.arg(port)
|
||||||
|
.arg("--n-gpu-layers")
|
||||||
|
.arg("99")
|
||||||
|
.arg("--threads")
|
||||||
|
.arg("20")
|
||||||
|
.arg("--threads-batch")
|
||||||
|
.arg("40")
|
||||||
|
.current_dir(llama_path) // Set working directory
|
||||||
|
.stdout(Stdio::piped())
|
||||||
|
.stderr(Stdio::piped());
|
||||||
|
|
||||||
|
// Spawn and don't wait for completion
|
||||||
|
let child = cmd.spawn()?;
|
||||||
|
|
||||||
|
// Store the child process if you need to manage it later
|
||||||
|
// You might want to add this to a process manager
|
||||||
|
println!("LLM server started with PID: {}", child.id());
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(target_os = "windows")]
|
#[cfg(target_os = "windows")]
|
||||||
{
|
{
|
||||||
cmd.arg("/C").arg(format!(
|
let executable_path = llama_path.join("llama-server.exe");
|
||||||
"cd {} && llama-server.exe -m {} --host 0.0.0.0 --port {} --n-gpu-layers 99",
|
|
||||||
llama_cpp_path, model_path, port
|
if !executable_path.exists() {
|
||||||
));
|
return Err(format!("Executable not found: {}", executable_path.display()).into());
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut cmd = Command::new(executable_path);
|
||||||
|
cmd.arg("-m")
|
||||||
|
.arg(model_path)
|
||||||
|
.arg("--host")
|
||||||
|
.arg("0.0.0.0")
|
||||||
|
.arg("--port")
|
||||||
|
.arg(port)
|
||||||
|
.arg("--n-gpu-layers")
|
||||||
|
.arg("99")
|
||||||
|
.current_dir(llama_path)
|
||||||
|
.stdout(Stdio::piped())
|
||||||
|
.stderr(Stdio::piped());
|
||||||
|
|
||||||
|
let child = cmd.spawn()?;
|
||||||
|
println!("LLM server started with PID: {}", child.id());
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
|
|
@ -225,7 +280,6 @@ async fn start_embedding_server(
|
||||||
"cd {} && llama-server.exe -m {} --host 0.0.0.0 --port {} --embedding --n-gpu-layers 99",
|
"cd {} && llama-server.exe -m {} --host 0.0.0.0 --port {} --embedding --n-gpu-layers 99",
|
||||||
llama_cpp_path, model_path, port
|
llama_cpp_path, model_path, port
|
||||||
));
|
));
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(any(target_os = "linux", target_os = "macos"))]
|
#[cfg(any(target_os = "linux", target_os = "macos"))]
|
||||||
|
|
@ -235,7 +289,7 @@ async fn start_embedding_server(
|
||||||
llama_cpp_path, model_path, port
|
llama_cpp_path, model_path, port
|
||||||
));
|
));
|
||||||
}
|
}
|
||||||
|
|
||||||
cmd.spawn()?;
|
cmd.spawn()?;
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue