diff --git a/src/bot/mod.rs b/src/bot/mod.rs index ef8fa382..51c3531c 100644 --- a/src/bot/mod.rs +++ b/src/bot/mod.rs @@ -408,23 +408,25 @@ impl BotOrchestrator { }; // Acquire lock briefly for history retrieval with configurable limit -let history = { - let mut sm = self.state.session_manager.lock().await; - let mut history = sm.get_conversation_history(session.id, user_id)?; +let history = "".to_string(); - // Skip all messages before the most recent compacted message (type 9) - if let Some(last_compacted_index) = history.iter().rposition(|(role, content)| { - role == "COMPACTED" || content.starts_with("SUMMARY:") - }) { - history = history.split_off(last_compacted_index); - } +// { +// let mut sm = self.state.session_manager.lock().await; +// let mut history = sm.get_conversation_history(session.id, user_id)?; - if history_limit > 0 && history.len() > history_limit as usize { - let start = history.len() - history_limit as usize; - history.drain(0..start); - } - history -}; +// // Skip all messages before the most recent compacted message (type 9) +// if let Some(last_compacted_index) = history.iter().rposition(|(role, content)| { +// role == "COMPACTED" || content.starts_with("SUMMARY:") +// }) { +// history = history.split_off(last_compacted_index); +// } + +// if history_limit > 0 && history.len() > history_limit as usize { +// let start = history.len() - history_limit as usize; +// history.drain(0..start); +// } +// history +// }; let mut prompt = String::new(); if !system_prompt.is_empty() { @@ -433,15 +435,15 @@ let history = { if !context_data.is_empty() { prompt.push_str(&format!("CONTEXT: *** {} *** \n", context_data)); } - for (role, content) in &history { - prompt.push_str(&format!("{}:{}\n", role, content)); - } + // for (role, content) in &history { + // prompt.push_str(&format!("{}:{}\n", role, content)); + // } prompt.push_str(&format!("Human: {}\nBot:", message.content)); - trace!( - "Stream prompt constructed with {} history entries", - history.len() - ); + // trace!( + // "Stream prompt constructed with {} history entries", + // history.len() + // ); let (stream_tx, mut stream_rx) = mpsc::channel::(100); let llm = self.state.llm_provider.clone(); diff --git a/src/llm/local.rs b/src/llm/local.rs index 9bc53f44..9134ba02 100644 --- a/src/llm/local.rs +++ b/src/llm/local.rs @@ -194,7 +194,6 @@ pub async fn start_llm_server( }; let n_moe = config_manager.get_config(&default_bot_id, "llm-server-n-moe", None).unwrap_or("4".to_string()); - let ctx_size = config_manager.get_config(&default_bot_id, "llm-server-ctx-size", None).unwrap_or("4096".to_string()); let parallel = config_manager.get_config(&default_bot_id, "llm-server-parallel", None).unwrap_or("1".to_string()); let cont_batching = config_manager.get_config(&default_bot_id, "llm-server-cont-batching", None).unwrap_or("true".to_string()); let mlock = config_manager.get_config(&default_bot_id, "llm-server-mlock", None).unwrap_or("true".to_string()); @@ -204,8 +203,8 @@ pub async fn start_llm_server( // Build command arguments dynamically let mut args = format!( - "-m {} --host 0.0.0.0 --port {} --top_p 0.95 --temp 0.6 --ctx-size {} --repeat-penalty 1.2 -ngl {}", - model_path, port, ctx_size, gpu_layers + "-m {} --host 0.0.0.0 --port {} --top_p 0.95 --temp 0.6 --repeat-penalty 1.2 -ngl {}", + model_path, port, gpu_layers ); if n_moe != "0" {