From dabc8171e070a91df7c6fab38979d4d4e77b1dbc Mon Sep 17 00:00:00 2001 From: "Rodrigo Rodriguez (Pragmatismo)" Date: Thu, 11 Sep 2025 16:04:46 -0300 Subject: [PATCH] -Lamma.cpp hardcoded config tests. --- src/services/llm_local.rs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/services/llm_local.rs b/src/services/llm_local.rs index 349b3a4..4fd144f 100644 --- a/src/services/llm_local.rs +++ b/src/services/llm_local.rs @@ -185,9 +185,13 @@ async fn start_llm_server( ) -> Result<(), Box> { let port = url.split(':').last().unwrap_or("8081"); + std::env::set_var("OMP_NUM_THREADS", "20"); + std::env::set_var("OMP_PLACES", "cores"); + std::env::set_var("OMP_PROC_BIND", "close"); + let mut cmd = tokio::process::Command::new("sh"); cmd.arg("-c").arg(format!( - "cd {} && ./llama-server -m {} --host 0.0.0.0 --port {} --threads 8 --threads-batch 18 --temp 0.7 --parallel 3 --repeat-penalty 1.1 --ctx-size 1024 --batch-size 4096 -n 255 --mlock --no-mmap &", + "cd {} && numactl --interleave=all ./llama-server -m {} --host 0.0.0.0 --port {} --threads 20 --threads-batch 40 --temp 0.7 --parallel 1 --repeat-penalty 1.1 --ctx-size 10000 --batch-size 10000 -n 4096 --mlock --no-mmap --prompt-cache-all --flash-attn --no-kv-offload --tensor-split 100 --no-mmap &", llama_cpp_path, model_path, port ));