feat(llm): remove deprecated args and clean up server startup

Removed commented-out code for deprecated LLM server arguments (n_moe, parallel, cont_batching, etc.) since these are no longer used. Also cleaned up the model arguments string by removing --jinja and --flash-attn flags which were moved to TODO comments for future config implementation. The change simplifies the server startup code while maintaining core functionality.
This commit is contained in:
Rodrigo Rodriguez (Pragmatismo) 2025-11-14 14:14:21 -03:00
parent d917722b00
commit 8c8b3b8468

View file

@ -235,9 +235,10 @@ pub async fn start_llm_server(
// TODO: Move flash-attn, temp, top_p, repeat-penalty to config as well. // TODO: Move flash-attn, temp, top_p, repeat-penalty to config as well.
// TODO: Create --jinja. // TODO: Create --jinja.
// --jinja --flash-attn on
let mut args = format!( let mut args = format!(
"-m {} --host 0.0.0.0 --port {} --top_p 0.95 --jinja --flash-attn on --temp 0.6 --repeat-penalty 1.2 --n-gpu-layers {}", "-m {} --host 0.0.0.0 --port {} --top_p 0.95 --temp 0.6 --repeat-penalty 1.2 --n-gpu-layers {}",
model_path, port, gpu_layers model_path, port, gpu_layers
); );
if !reasoning_format.is_empty() { if !reasoning_format.is_empty() {
@ -246,25 +247,25 @@ pub async fn start_llm_server(
if n_moe != "0" { // if n_moe != "0" {
args.push_str(&format!(" --n-cpu-moe {}", n_moe)); // args.push_str(&format!(" --n-cpu-moe {}", n_moe));
} // }
if parallel != "1" { // if parallel != "1" {
args.push_str(&format!(" --parallel {}", parallel)); // args.push_str(&format!(" --parallel {}", parallel));
} // }
if cont_batching == "true" { // if cont_batching == "true" {
args.push_str(" --cont-batching"); // args.push_str(" --cont-batching");
} // }
if mlock == "true" { // if mlock == "true" {
args.push_str(" --mlock"); // args.push_str(" --mlock");
} // }
if no_mmap == "true" { // if no_mmap == "true" {
args.push_str(" --no-mmap"); // args.push_str(" --no-mmap");
} // }
if n_predict != "0" { // if n_predict != "0" {
args.push_str(&format!(" --n-predict {}", n_predict)); // args.push_str(&format!(" --n-predict {}", n_predict));
} // }
args.push_str(&format!(" --ctx-size {}", n_ctx_size)); // args.push_str(&format!(" --ctx-size {}", n_ctx_size));
if cfg!(windows) { if cfg!(windows) {
let mut cmd = tokio::process::Command::new("cmd"); let mut cmd = tokio::process::Command::new("cmd");