From 8c8b3b84685a930bc226b8571f89c548a50f92e6 Mon Sep 17 00:00:00 2001 From: "Rodrigo Rodriguez (Pragmatismo)" Date: Fri, 14 Nov 2025 14:14:21 -0300 Subject: [PATCH] feat(llm): remove deprecated args and clean up server startup Removed commented-out code for deprecated LLM server arguments (n_moe, parallel, cont_batching, etc.) since these are no longer used. Also cleaned up the model arguments string by removing --jinja and --flash-attn flags which were moved to TODO comments for future config implementation. The change simplifies the server startup code while maintaining core functionality. --- src/llm/local.rs | 41 +++++++++++++++++++++-------------------- 1 file changed, 21 insertions(+), 20 deletions(-) diff --git a/src/llm/local.rs b/src/llm/local.rs index cf5269dd..28729ba4 100644 --- a/src/llm/local.rs +++ b/src/llm/local.rs @@ -235,9 +235,10 @@ pub async fn start_llm_server( // TODO: Move flash-attn, temp, top_p, repeat-penalty to config as well. // TODO: Create --jinja. + // --jinja --flash-attn on let mut args = format!( - "-m {} --host 0.0.0.0 --port {} --top_p 0.95 --jinja --flash-attn on --temp 0.6 --repeat-penalty 1.2 --n-gpu-layers {}", + "-m {} --host 0.0.0.0 --port {} --top_p 0.95 --temp 0.6 --repeat-penalty 1.2 --n-gpu-layers {}", model_path, port, gpu_layers ); if !reasoning_format.is_empty() { @@ -246,25 +247,25 @@ pub async fn start_llm_server( - if n_moe != "0" { - args.push_str(&format!(" --n-cpu-moe {}", n_moe)); - } - if parallel != "1" { - args.push_str(&format!(" --parallel {}", parallel)); - } - if cont_batching == "true" { - args.push_str(" --cont-batching"); - } - if mlock == "true" { - args.push_str(" --mlock"); - } - if no_mmap == "true" { - args.push_str(" --no-mmap"); - } - if n_predict != "0" { - args.push_str(&format!(" --n-predict {}", n_predict)); - } - args.push_str(&format!(" --ctx-size {}", n_ctx_size)); + // if n_moe != "0" { + // args.push_str(&format!(" --n-cpu-moe {}", n_moe)); + // } + // if parallel != "1" { + // args.push_str(&format!(" --parallel {}", parallel)); + // } + // if cont_batching == "true" { + // args.push_str(" --cont-batching"); + // } + // if mlock == "true" { + // args.push_str(" --mlock"); + // } + // if no_mmap == "true" { + // args.push_str(" --no-mmap"); + // } + // if n_predict != "0" { + // args.push_str(&format!(" --n-predict {}", n_predict)); + // } + // args.push_str(&format!(" --ctx-size {}", n_ctx_size)); if cfg!(windows) { let mut cmd = tokio::process::Command::new("cmd");