feat(llm): add configurable reasoning-format flag support
- Added retrieval of `llm-server-reasoning-format` configuration in `src/llm/local.rs`. - When the config value is non‑empty, the server start command now includes `--reasoning-format <value>`. - Updated argument construction to conditionally append the new flag. - Cleaned up `src/automation/mod.rs` by removing an unused `std::sync::Arc` import, simplifying the module and eliminating a dead dependency.
This commit is contained in:
parent
892572843c
commit
c4c0a1d693
3 changed files with 6 additions and 2 deletions
|
|
@ -186,7 +186,6 @@ pub async fn execute_compact_prompt(state: Arc<crate::shared::state::AppState>)
|
|||
use crate::shared::models::system_automations::dsl::{is_active, system_automations};
|
||||
use diesel::prelude::*;
|
||||
use log::info;
|
||||
use std::sync::Arc;
|
||||
|
||||
let state_clone = state.clone();
|
||||
let service = AutomationService::new(state_clone);
|
||||
|
|
|
|||
|
|
@ -199,6 +199,7 @@ pub async fn start_llm_server(
|
|||
let mlock = config_manager.get_config(&default_bot_id, "llm-server-mlock", None).unwrap_or("true".to_string());
|
||||
let no_mmap = config_manager.get_config(&default_bot_id, "llm-server-no-mmap", None).unwrap_or("true".to_string());
|
||||
let gpu_layers = config_manager.get_config(&default_bot_id, "llm-server-gpu-layers", None).unwrap_or("20".to_string());
|
||||
let reasoning_format = config_manager.get_config(&default_bot_id, "llm-server-reasoning-format", None).unwrap_or("".to_string());
|
||||
let n_predict = config_manager.get_config(&default_bot_id, "llm-server-n-predict", None).unwrap_or("50".to_string());
|
||||
|
||||
// Build command arguments dynamically
|
||||
|
|
@ -206,6 +207,9 @@ pub async fn start_llm_server(
|
|||
"-m {} --host 0.0.0.0 --port {} --top_p 0.95 --temp 0.6 --repeat-penalty 1.2 -ngl {}",
|
||||
model_path, port, gpu_layers
|
||||
);
|
||||
if !reasoning_format.is_empty() {
|
||||
args.push_str(&format!(" --reasoning-format {}", reasoning_format));
|
||||
}
|
||||
|
||||
if n_moe != "0" {
|
||||
args.push_str(&format!(" --n-cpu-moe {}", n_moe));
|
||||
|
|
@ -219,6 +223,7 @@ pub async fn start_llm_server(
|
|||
if mlock == "true" {
|
||||
args.push_str(" --mlock");
|
||||
}
|
||||
|
||||
if no_mmap == "true" {
|
||||
args.push_str(" --no-mmap");
|
||||
}
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@ use crate::channels::{ChannelAdapter, VoiceAdapter, WebChannelAdapter};
|
|||
use crate::config::AppConfig;
|
||||
use crate::llm::LLMProvider;
|
||||
use crate::session::SessionManager;
|
||||
use diesel::{Connection, PgConnection};
|
||||
use diesel::{ PgConnection};
|
||||
use aws_sdk_s3::Client as S3Client;
|
||||
use redis::Client as RedisClient;
|
||||
use std::collections::HashMap;
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue