This commit is contained in:
parent
d8b40f73ff
commit
dabc8171e0
1 changed files with 5 additions and 1 deletions
|
|
@ -185,9 +185,13 @@ async fn start_llm_server(
|
|||
) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
|
||||
let port = url.split(':').last().unwrap_or("8081");
|
||||
|
||||
std::env::set_var("OMP_NUM_THREADS", "20");
|
||||
std::env::set_var("OMP_PLACES", "cores");
|
||||
std::env::set_var("OMP_PROC_BIND", "close");
|
||||
|
||||
let mut cmd = tokio::process::Command::new("sh");
|
||||
cmd.arg("-c").arg(format!(
|
||||
"cd {} && ./llama-server -m {} --host 0.0.0.0 --port {} --threads 8 --threads-batch 18 --temp 0.7 --parallel 3 --repeat-penalty 1.1 --ctx-size 1024 --batch-size 4096 -n 255 --mlock --no-mmap &",
|
||||
"cd {} && numactl --interleave=all ./llama-server -m {} --host 0.0.0.0 --port {} --threads 20 --threads-batch 40 --temp 0.7 --parallel 1 --repeat-penalty 1.1 --ctx-size 10000 --batch-size 10000 -n 4096 --mlock --no-mmap --prompt-cache-all --flash-attn --no-kv-offload --tensor-split 100 --no-mmap &",
|
||||
llama_cpp_path, model_path, port
|
||||
));
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue