feat(llm): add n_ctx_size parameter to LLM server startup

Added support for configuring the context window size (n_ctx_size) when starting the local LLM server. The parameter is read from config with a default value of 4096 if not specified. This allows for better control over the model's memory usage and performance characteristics.
2025-11-12 17:22:33 -03:00 · 2025-11-12 17:22:33 -03:00 · d7387b09b7
commit d7387b09b7
parent f93bb3576c
1 changed files with 9 additions and 1 deletions
--- a/src/llm/local.rs
+++ b/src/llm/local.rs
@ -228,7 +228,13 @@ pub async fn start_llm_server(
    let n_predict = config_manager
        .get_config(&default_bot_id, "llm-server-n-predict", None)
        .unwrap_or("50".to_string());
-    let mut args = format!(
+
+        let n_ctx_size = config_manager
+        .get_config(&default_bot_id, "llm-server-n_ctx_size", None)
+        .unwrap_or("4096".to_string());
+
+
+        let mut args = format!(
        "-m {} --host 0.0.0.0 --port {} --top_p 0.95 --temp 0.6 --repeat-penalty 1.2 --n-gpu-layers {}",
        model_path, port,  gpu_layers
    );
@ -253,6 +259,8 @@ pub async fn start_llm_server(
    if n_predict != "0" {
        args.push_str(&format!(" --n-predict {}", n_predict));
    }
+        args.push_str(&format!(" --n-ctx-size {}", n_ctx_size));
+    
    if cfg!(windows) {
        let mut cmd = tokio::process::Command::new("cmd");
        cmd.arg("/C").arg(format!(