From c4c0a1d693acf9687f47769e53ad8848d8606ce4 Mon Sep 17 00:00:00 2001 From: "Rodrigo Rodriguez (Pragmatismo)" Date: Fri, 7 Nov 2025 17:04:33 -0300 Subject: [PATCH] feat(llm): add configurable reasoning-format flag support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Added retrieval of `llm-server-reasoning-format` configuration in `src/llm/local.rs`. - When the config value is non‑empty, the server start command now includes `--reasoning-format `. - Updated argument construction to conditionally append the new flag. - Cleaned up `src/automation/mod.rs` by removing an unused `std::sync::Arc` import, simplifying the module and eliminating a dead dependency. --- src/automation/mod.rs | 1 - src/llm/local.rs | 5 +++++ src/shared/state.rs | 2 +- 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/src/automation/mod.rs b/src/automation/mod.rs index 7d5148e7..bd62d4b9 100644 --- a/src/automation/mod.rs +++ b/src/automation/mod.rs @@ -186,7 +186,6 @@ pub async fn execute_compact_prompt(state: Arc) use crate::shared::models::system_automations::dsl::{is_active, system_automations}; use diesel::prelude::*; use log::info; - use std::sync::Arc; let state_clone = state.clone(); let service = AutomationService::new(state_clone); diff --git a/src/llm/local.rs b/src/llm/local.rs index 9134ba02..bedcd475 100644 --- a/src/llm/local.rs +++ b/src/llm/local.rs @@ -199,6 +199,7 @@ pub async fn start_llm_server( let mlock = config_manager.get_config(&default_bot_id, "llm-server-mlock", None).unwrap_or("true".to_string()); let no_mmap = config_manager.get_config(&default_bot_id, "llm-server-no-mmap", None).unwrap_or("true".to_string()); let gpu_layers = config_manager.get_config(&default_bot_id, "llm-server-gpu-layers", None).unwrap_or("20".to_string()); + let reasoning_format = config_manager.get_config(&default_bot_id, "llm-server-reasoning-format", None).unwrap_or("".to_string()); let n_predict = config_manager.get_config(&default_bot_id, "llm-server-n-predict", None).unwrap_or("50".to_string()); // Build command arguments dynamically @@ -206,6 +207,9 @@ pub async fn start_llm_server( "-m {} --host 0.0.0.0 --port {} --top_p 0.95 --temp 0.6 --repeat-penalty 1.2 -ngl {}", model_path, port, gpu_layers ); + if !reasoning_format.is_empty() { + args.push_str(&format!(" --reasoning-format {}", reasoning_format)); + } if n_moe != "0" { args.push_str(&format!(" --n-cpu-moe {}", n_moe)); @@ -219,6 +223,7 @@ pub async fn start_llm_server( if mlock == "true" { args.push_str(" --mlock"); } + if no_mmap == "true" { args.push_str(" --no-mmap"); } diff --git a/src/shared/state.rs b/src/shared/state.rs index eb8ff400..4f16187c 100644 --- a/src/shared/state.rs +++ b/src/shared/state.rs @@ -2,7 +2,7 @@ use crate::channels::{ChannelAdapter, VoiceAdapter, WebChannelAdapter}; use crate::config::AppConfig; use crate::llm::LLMProvider; use crate::session::SessionManager; -use diesel::{Connection, PgConnection}; +use diesel::{ PgConnection}; use aws_sdk_s3::Client as S3Client; use redis::Client as RedisClient; use std::collections::HashMap;