Rename start_llm_server to start_embedding_server
Some checks failed
GBCI / build (push) Has been cancelled

Update server startup to use port 8082 and include --embedding flag.
Added info logging, removed duplicate start_embedding_server.
This commit is contained in:
Rodrigo Rodriguez (Pragmatismo) 2025-10-01 08:28:27 -03:00
parent 1aa79a30c4
commit dc22618dd2

View file

@ -178,13 +178,12 @@ pub async fn ensure_llama_servers_running() -> Result<(), Box<dyn std::error::Er
Err(error_msg.into()) Err(error_msg.into())
} }
} }
async fn start_embedding_server(
async fn start_llm_server(
llama_cpp_path: String, llama_cpp_path: String,
model_path: String, model_path: String,
url: String, url: String,
) -> Result<(), Box<dyn std::error::Error + Send + Sync>> { ) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
let port = url.split(':').last().unwrap_or("8081"); let port = url.split(':').last().unwrap_or("8082");
std::env::set_var("OMP_NUM_THREADS", "20"); std::env::set_var("OMP_NUM_THREADS", "20");
std::env::set_var("OMP_PLACES", "cores"); std::env::set_var("OMP_PLACES", "cores");
@ -210,6 +209,10 @@ async fn start_llm_server(
return Err(format!("Executable not found: {}", executable_path.display()).into()); return Err(format!("Executable not found: {}", executable_path.display()).into());
} }
info!("Starting embedding server on port: {}", port);
info!("Llama path: {}", llama_cpp_path);
info!("Model path: {}", model_path.display());
// Use absolute paths and proper process management // Use absolute paths and proper process management
let mut cmd = Command::new("numactl"); let mut cmd = Command::new("numactl");
cmd.arg("--interleave=all") cmd.arg("--interleave=all")
@ -220,6 +223,7 @@ async fn start_llm_server(
.arg("0.0.0.0") .arg("0.0.0.0")
.arg("--port") .arg("--port")
.arg(port) .arg(port)
.arg("--embedding")
.arg("--n-gpu-layers") .arg("--n-gpu-layers")
.arg("99") .arg("99")
.arg("--threads") .arg("--threads")
@ -235,7 +239,7 @@ async fn start_llm_server(
// Store the child process if you need to manage it later // Store the child process if you need to manage it later
// You might want to add this to a process manager // You might want to add this to a process manager
println!("LLM server started with PID: {}", child.id()); info!("Embedding server started with PID: {}", child.id());
} }
#[cfg(target_os = "windows")] #[cfg(target_os = "windows")]
@ -246,6 +250,10 @@ async fn start_llm_server(
return Err(format!("Executable not found: {}", executable_path.display()).into()); return Err(format!("Executable not found: {}", executable_path.display()).into());
} }
info!("Starting embedding server on port: {}", port);
info!("Llama path: {}", llama_cpp_path);
info!("Model path: {}", model_path.display());
let mut cmd = Command::new(executable_path); let mut cmd = Command::new(executable_path);
cmd.arg("-m") cmd.arg("-m")
.arg(model_path) .arg(model_path)
@ -253,6 +261,7 @@ async fn start_llm_server(
.arg("0.0.0.0") .arg("0.0.0.0")
.arg("--port") .arg("--port")
.arg(port) .arg(port)
.arg("--embedding")
.arg("--n-gpu-layers") .arg("--n-gpu-layers")
.arg("99") .arg("99")
.current_dir(llama_path) .current_dir(llama_path)
@ -260,40 +269,11 @@ async fn start_llm_server(
.stderr(Stdio::piped()); .stderr(Stdio::piped());
let child = cmd.spawn()?; let child = cmd.spawn()?;
println!("LLM server started with PID: {}", child.id()); info!("Embedding server started with PID: {}", child.id());
} }
Ok(()) Ok(())
} }
async fn start_embedding_server(
llama_cpp_path: String,
model_path: String,
url: String,
) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
let port = url.split(':').last().unwrap_or("8082");
let mut cmd = Command::new("cmd");
#[cfg(target_os = "windows")]
{
cmd.arg("/C").arg(format!(
"cd {} && llama-server.exe -m {} --host 0.0.0.0 --port {} --embedding --n-gpu-layers 99",
llama_cpp_path, model_path, port
));
}
#[cfg(any(target_os = "linux", target_os = "macos"))]
{
cmd.arg("-c").arg(format!(
"cd {} && ./llama-server -m {} --host 0.0.0.0 --port {} --embedding --n-gpu-layers 99 &",
llama_cpp_path, model_path, port
));
}
cmd.spawn()?;
Ok(())
}
async fn is_server_running(url: &str) -> bool { async fn is_server_running(url: &str) -> bool {
let client = reqwest::Client::new(); let client = reqwest::Client::new();
match client.get(&format!("{}/health", url)).send().await { match client.get(&format!("{}/health", url)).send().await {