Rename start_llm_server to start_embedding_server
Some checks failed
GBCI / build (push) Has been cancelled
Some checks failed
GBCI / build (push) Has been cancelled
Update server startup to use port 8082 and include --embedding flag. Added info logging, removed duplicate start_embedding_server.
This commit is contained in:
parent
1aa79a30c4
commit
dc22618dd2
1 changed files with 14 additions and 34 deletions
|
|
@ -178,13 +178,12 @@ pub async fn ensure_llama_servers_running() -> Result<(), Box<dyn std::error::Er
|
|||
Err(error_msg.into())
|
||||
}
|
||||
}
|
||||
|
||||
async fn start_llm_server(
|
||||
async fn start_embedding_server(
|
||||
llama_cpp_path: String,
|
||||
model_path: String,
|
||||
url: String,
|
||||
) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
|
||||
let port = url.split(':').last().unwrap_or("8081");
|
||||
let port = url.split(':').last().unwrap_or("8082");
|
||||
|
||||
std::env::set_var("OMP_NUM_THREADS", "20");
|
||||
std::env::set_var("OMP_PLACES", "cores");
|
||||
|
|
@ -210,6 +209,10 @@ async fn start_llm_server(
|
|||
return Err(format!("Executable not found: {}", executable_path.display()).into());
|
||||
}
|
||||
|
||||
info!("Starting embedding server on port: {}", port);
|
||||
info!("Llama path: {}", llama_cpp_path);
|
||||
info!("Model path: {}", model_path.display());
|
||||
|
||||
// Use absolute paths and proper process management
|
||||
let mut cmd = Command::new("numactl");
|
||||
cmd.arg("--interleave=all")
|
||||
|
|
@ -220,6 +223,7 @@ async fn start_llm_server(
|
|||
.arg("0.0.0.0")
|
||||
.arg("--port")
|
||||
.arg(port)
|
||||
.arg("--embedding")
|
||||
.arg("--n-gpu-layers")
|
||||
.arg("99")
|
||||
.arg("--threads")
|
||||
|
|
@ -235,7 +239,7 @@ async fn start_llm_server(
|
|||
|
||||
// Store the child process if you need to manage it later
|
||||
// You might want to add this to a process manager
|
||||
println!("LLM server started with PID: {}", child.id());
|
||||
info!("Embedding server started with PID: {}", child.id());
|
||||
}
|
||||
|
||||
#[cfg(target_os = "windows")]
|
||||
|
|
@ -246,6 +250,10 @@ async fn start_llm_server(
|
|||
return Err(format!("Executable not found: {}", executable_path.display()).into());
|
||||
}
|
||||
|
||||
info!("Starting embedding server on port: {}", port);
|
||||
info!("Llama path: {}", llama_cpp_path);
|
||||
info!("Model path: {}", model_path.display());
|
||||
|
||||
let mut cmd = Command::new(executable_path);
|
||||
cmd.arg("-m")
|
||||
.arg(model_path)
|
||||
|
|
@ -253,6 +261,7 @@ async fn start_llm_server(
|
|||
.arg("0.0.0.0")
|
||||
.arg("--port")
|
||||
.arg(port)
|
||||
.arg("--embedding")
|
||||
.arg("--n-gpu-layers")
|
||||
.arg("99")
|
||||
.current_dir(llama_path)
|
||||
|
|
@ -260,40 +269,11 @@ async fn start_llm_server(
|
|||
.stderr(Stdio::piped());
|
||||
|
||||
let child = cmd.spawn()?;
|
||||
println!("LLM server started with PID: {}", child.id());
|
||||
info!("Embedding server started with PID: {}", child.id());
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn start_embedding_server(
|
||||
llama_cpp_path: String,
|
||||
model_path: String,
|
||||
url: String,
|
||||
) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
|
||||
let port = url.split(':').last().unwrap_or("8082");
|
||||
let mut cmd = Command::new("cmd");
|
||||
|
||||
#[cfg(target_os = "windows")]
|
||||
{
|
||||
cmd.arg("/C").arg(format!(
|
||||
"cd {} && llama-server.exe -m {} --host 0.0.0.0 --port {} --embedding --n-gpu-layers 99",
|
||||
llama_cpp_path, model_path, port
|
||||
));
|
||||
}
|
||||
|
||||
#[cfg(any(target_os = "linux", target_os = "macos"))]
|
||||
{
|
||||
cmd.arg("-c").arg(format!(
|
||||
"cd {} && ./llama-server -m {} --host 0.0.0.0 --port {} --embedding --n-gpu-layers 99 &",
|
||||
llama_cpp_path, model_path, port
|
||||
));
|
||||
}
|
||||
|
||||
cmd.spawn()?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn is_server_running(url: &str) -> bool {
|
||||
let client = reqwest::Client::new();
|
||||
match client.get(&format!("{}/health", url)).send().await {
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue