From 8c8b3b84685a930bc226b8571f89c548a50f92e6 Mon Sep 17 00:00:00 2001
From: "Rodrigo Rodriguez (Pragmatismo)" <me@rodrigorodriguez.com>
Date: Fri, 14 Nov 2025 14:14:21 -0300
Subject: [PATCH] feat(llm): remove deprecated args and clean up server startup

Removed commented-out code for deprecated LLM server arguments (n_moe, parallel, cont_batching, etc.) since these are no longer used. Also cleaned up the model arguments string by removing --jinja and --flash-attn flags which were moved to TODO comments for future config implementation. The change simplifies the server startup code while maintaining core functionality.
---
 src/llm/local.rs | 41 +++++++++++++++++++++--------------------
 1 file changed, 21 insertions(+), 20 deletions(-)

diff --git a/src/llm/local.rs b/src/llm/local.rs
index cf5269dd..28729ba4 100644
--- a/src/llm/local.rs
+++ b/src/llm/local.rs
@@ -235,9 +235,10 @@ pub async fn start_llm_server(
 
         // TODO: Move flash-attn, temp, top_p, repeat-penalty to config as well.
         // TODO: Create --jinja.
+        // --jinja --flash-attn on 
         
         let mut args = format!(
-        "-m {} --host 0.0.0.0 --port {} --top_p 0.95 --jinja --flash-attn on --temp 0.6 --repeat-penalty 1.2 --n-gpu-layers {}",
+        "-m {} --host 0.0.0.0 --port {} --top_p 0.95 --temp 0.6 --repeat-penalty 1.2 --n-gpu-layers {}",
         model_path, port,  gpu_layers
     );
     if !reasoning_format.is_empty() {
@@ -246,25 +247,25 @@ pub async fn start_llm_server(
 
 
 
-    if n_moe != "0" {
-        args.push_str(&format!(" --n-cpu-moe {}", n_moe));
-    }
-    if parallel != "1" {
-        args.push_str(&format!(" --parallel {}", parallel));
-    }
-    if cont_batching == "true" {
-        args.push_str(" --cont-batching");
-    }
-    if mlock == "true" {
-        args.push_str(" --mlock");
-    }
-    if no_mmap == "true" {
-        args.push_str(" --no-mmap");
-    }
-    if n_predict != "0" {
-        args.push_str(&format!(" --n-predict {}", n_predict));
-    }
-        args.push_str(&format!(" --ctx-size {}", n_ctx_size));
+    // if n_moe != "0" {
+    //     args.push_str(&format!(" --n-cpu-moe {}", n_moe));
+    // }
+    // if parallel != "1" {
+    //     args.push_str(&format!(" --parallel {}", parallel));
+    // }
+    // if cont_batching == "true" {
+    //     args.push_str(" --cont-batching");
+    // }
+    // if mlock == "true" {
+    //     args.push_str(" --mlock");
+    // }
+    // if no_mmap == "true" {
+    //     args.push_str(" --no-mmap");
+    // }
+    // if n_predict != "0" {
+    //     args.push_str(&format!(" --n-predict {}", n_predict));
+    // }
+    //     args.push_str(&format!(" --ctx-size {}", n_ctx_size));
     
     if cfg!(windows) {
         let mut cmd = tokio::process::Command::new("cmd");