feat(llm): add n_ctx_size parameter to LLM server startup

Added support for configuring the context window size (n_ctx_size) when starting the local LLM server. The parameter is read from config with a default value of 4096 if not specified. This allows for better control over the model's memory usage and performance characteristics.
This commit is contained in:
Rodrigo Rodriguez (Pragmatismo) 2025-11-12 17:22:33 -03:00
parent f93bb3576c
commit d7387b09b7

View file

@ -228,7 +228,13 @@ pub async fn start_llm_server(
let n_predict = config_manager
.get_config(&default_bot_id, "llm-server-n-predict", None)
.unwrap_or("50".to_string());
let mut args = format!(
let n_ctx_size = config_manager
.get_config(&default_bot_id, "llm-server-n_ctx_size", None)
.unwrap_or("4096".to_string());
let mut args = format!(
"-m {} --host 0.0.0.0 --port {} --top_p 0.95 --temp 0.6 --repeat-penalty 1.2 --n-gpu-layers {}",
model_path, port, gpu_layers
);
@ -253,6 +259,8 @@ pub async fn start_llm_server(
if n_predict != "0" {
args.push_str(&format!(" --n-predict {}", n_predict));
}
args.push_str(&format!(" --n-ctx-size {}", n_ctx_size));
if cfg!(windows) {
let mut cmd = tokio::process::Command::new("cmd");
cmd.arg("/C").arg(format!(