This commit is contained in:
parent
8936d0ce58
commit
b682eead39
1 changed files with 3 additions and 1 deletions
|
|
@ -189,9 +189,11 @@ async fn start_llm_server(
|
||||||
std::env::set_var("OMP_PLACES", "cores");
|
std::env::set_var("OMP_PLACES", "cores");
|
||||||
std::env::set_var("OMP_PROC_BIND", "close");
|
std::env::set_var("OMP_PROC_BIND", "close");
|
||||||
|
|
||||||
|
// "cd {} && numactl --interleave=all ./llama-server -m {} --host 0.0.0.0 --port {} --threads 20 --threads-batch 40 --temp 0.7 --parallel 1 --repeat-penalty 1.1 --ctx-size 8192 --batch-size 8192 -n 4096 --mlock --no-mmap --flash-attn --no-kv-offload --no-mmap &",
|
||||||
|
|
||||||
let mut cmd = tokio::process::Command::new("sh");
|
let mut cmd = tokio::process::Command::new("sh");
|
||||||
cmd.arg("-c").arg(format!(
|
cmd.arg("-c").arg(format!(
|
||||||
"cd {} && numactl --interleave=all ./llama-server -m {} --host 0.0.0.0 --port {} --threads 20 --threads-batch 40 --temp 0.7 --parallel 1 --repeat-penalty 1.1 --ctx-size 8192 --batch-size 8192 -n 4096 --mlock --no-mmap --flash-attn --no-kv-offload --no-mmap &",
|
"cd {} && ./llama-server -m {} --host 0.0.0.0 --port {} --n-gpu-layers 99 --mlock --no-mmap --threads 20 --threads-batch 40 &",
|
||||||
llama_cpp_path, model_path, port
|
llama_cpp_path, model_path, port
|
||||||
));
|
));
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue