Add LLM server starter with validation
All checks were successful
GBCI / build (push) Successful in 7m15s
All checks were successful
GBCI / build (push) Successful in 7m15s
This commit is contained in:
parent
dc22618dd2
commit
164f1db961
1 changed files with 97 additions and 0 deletions
|
|
@ -178,6 +178,102 @@ pub async fn ensure_llama_servers_running() -> Result<(), Box<dyn std::error::Er
|
||||||
Err(error_msg.into())
|
Err(error_msg.into())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async fn start_llm_server(
|
||||||
|
llama_cpp_path: String,
|
||||||
|
model_path: String,
|
||||||
|
url: String,
|
||||||
|
) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
|
||||||
|
let port = url.split(':').last().unwrap_or("8081");
|
||||||
|
|
||||||
|
std::env::set_var("OMP_NUM_THREADS", "20");
|
||||||
|
std::env::set_var("OMP_PLACES", "cores");
|
||||||
|
std::env::set_var("OMP_PROC_BIND", "close");
|
||||||
|
|
||||||
|
// Verify paths exist
|
||||||
|
let llama_path = Path::new(&llama_cpp_path);
|
||||||
|
let model_path = Path::new(&model_path);
|
||||||
|
|
||||||
|
if !llama_path.exists() {
|
||||||
|
return Err(format!("Llama path does not exist: {}", llama_cpp_path).into());
|
||||||
|
}
|
||||||
|
|
||||||
|
if !model_path.exists() {
|
||||||
|
return Err(format!("Model path does not exist: {}", model_path.display()).into());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(target_os = "linux")]
|
||||||
|
{
|
||||||
|
let executable_path = llama_path.join("llama-server");
|
||||||
|
|
||||||
|
if !executable_path.exists() {
|
||||||
|
return Err(format!("Executable not found: {}", executable_path.display()).into());
|
||||||
|
}
|
||||||
|
|
||||||
|
info!("Starting LLM server on port: {}", port);
|
||||||
|
info!("Llama path: {}", llama_cpp_path);
|
||||||
|
info!("Model path: {}", model_path.display());
|
||||||
|
|
||||||
|
// Use absolute paths and proper process management
|
||||||
|
let mut cmd = Command::new("numactl");
|
||||||
|
cmd.arg("--interleave=all")
|
||||||
|
.arg(executable_path)
|
||||||
|
.arg("-m")
|
||||||
|
.arg(model_path)
|
||||||
|
.arg("--host")
|
||||||
|
.arg("0.0.0.0")
|
||||||
|
.arg("--port")
|
||||||
|
.arg(port)
|
||||||
|
.arg("--n-gpu-layers")
|
||||||
|
.arg("99")
|
||||||
|
.arg("--threads")
|
||||||
|
.arg("20")
|
||||||
|
.arg("--threads-batch")
|
||||||
|
.arg("40")
|
||||||
|
.current_dir(llama_path) // Set working directory
|
||||||
|
.stdout(Stdio::piped())
|
||||||
|
.stderr(Stdio::piped());
|
||||||
|
|
||||||
|
// Spawn and don't wait for completion
|
||||||
|
let child = cmd.spawn()?;
|
||||||
|
|
||||||
|
// Store the child process if you need to manage it later
|
||||||
|
// You might want to add this to a process manager
|
||||||
|
info!("LLM server started with PID: {}", child.id());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(target_os = "windows")]
|
||||||
|
{
|
||||||
|
let executable_path = llama_path.join("llama-server.exe");
|
||||||
|
|
||||||
|
if !executable_path.exists() {
|
||||||
|
return Err(format!("Executable not found: {}", executable_path.display()).into());
|
||||||
|
}
|
||||||
|
|
||||||
|
info!("Starting LLM server on port: {}", port);
|
||||||
|
info!("Llama path: {}", llama_cpp_path);
|
||||||
|
info!("Model path: {}", model_path.display());
|
||||||
|
|
||||||
|
let mut cmd = Command::new(executable_path);
|
||||||
|
cmd.arg("-m")
|
||||||
|
.arg(model_path)
|
||||||
|
.arg("--host")
|
||||||
|
.arg("0.0.0.0")
|
||||||
|
.arg("--port")
|
||||||
|
.arg(port)
|
||||||
|
.arg("--n-gpu-layers")
|
||||||
|
.arg("99")
|
||||||
|
.current_dir(llama_path)
|
||||||
|
.stdout(Stdio::piped())
|
||||||
|
.stderr(Stdio::piped());
|
||||||
|
|
||||||
|
let child = cmd.spawn()?;
|
||||||
|
info!("LLM server started with PID: {}", child.id());
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
async fn start_embedding_server(
|
async fn start_embedding_server(
|
||||||
llama_cpp_path: String,
|
llama_cpp_path: String,
|
||||||
model_path: String,
|
model_path: String,
|
||||||
|
|
@ -274,6 +370,7 @@ async fn start_embedding_server(
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn is_server_running(url: &str) -> bool {
|
async fn is_server_running(url: &str) -> bool {
|
||||||
let client = reqwest::Client::new();
|
let client = reqwest::Client::new();
|
||||||
match client.get(&format!("{}/health", url)).send().await {
|
match client.get(&format!("{}/health", url)).send().await {
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue