refactor(bot): disable history retrieval and simplify LLM args
Removed the conversation history loading logic in `BotOrchestrator` and replaced it with a placeholder string, commenting out related prompt construction and tracing. This change streamlines prompt generation while debugging and prevents unnecessary history processing. In the local LLM server setup, eliminated the `llm-server-ctx-size` configuration and its corresponding command‑line argument, as the context size parameter is no longer required. This simplifies server initialization and avoids passing an unused flag.
This commit is contained in:
parent
dca836a429
commit
21355df0c8
2 changed files with 26 additions and 25 deletions
|
|
@ -408,23 +408,25 @@ impl BotOrchestrator {
|
|||
};
|
||||
|
||||
// Acquire lock briefly for history retrieval with configurable limit
|
||||
let history = {
|
||||
let mut sm = self.state.session_manager.lock().await;
|
||||
let mut history = sm.get_conversation_history(session.id, user_id)?;
|
||||
let history = "".to_string();
|
||||
|
||||
// Skip all messages before the most recent compacted message (type 9)
|
||||
if let Some(last_compacted_index) = history.iter().rposition(|(role, content)| {
|
||||
role == "COMPACTED" || content.starts_with("SUMMARY:")
|
||||
}) {
|
||||
history = history.split_off(last_compacted_index);
|
||||
}
|
||||
// {
|
||||
// let mut sm = self.state.session_manager.lock().await;
|
||||
// let mut history = sm.get_conversation_history(session.id, user_id)?;
|
||||
|
||||
if history_limit > 0 && history.len() > history_limit as usize {
|
||||
let start = history.len() - history_limit as usize;
|
||||
history.drain(0..start);
|
||||
}
|
||||
history
|
||||
};
|
||||
// // Skip all messages before the most recent compacted message (type 9)
|
||||
// if let Some(last_compacted_index) = history.iter().rposition(|(role, content)| {
|
||||
// role == "COMPACTED" || content.starts_with("SUMMARY:")
|
||||
// }) {
|
||||
// history = history.split_off(last_compacted_index);
|
||||
// }
|
||||
|
||||
// if history_limit > 0 && history.len() > history_limit as usize {
|
||||
// let start = history.len() - history_limit as usize;
|
||||
// history.drain(0..start);
|
||||
// }
|
||||
// history
|
||||
// };
|
||||
|
||||
let mut prompt = String::new();
|
||||
if !system_prompt.is_empty() {
|
||||
|
|
@ -433,15 +435,15 @@ let history = {
|
|||
if !context_data.is_empty() {
|
||||
prompt.push_str(&format!("CONTEXT: *** {} *** \n", context_data));
|
||||
}
|
||||
for (role, content) in &history {
|
||||
prompt.push_str(&format!("{}:{}\n", role, content));
|
||||
}
|
||||
// for (role, content) in &history {
|
||||
// prompt.push_str(&format!("{}:{}\n", role, content));
|
||||
// }
|
||||
prompt.push_str(&format!("Human: {}\nBot:", message.content));
|
||||
|
||||
trace!(
|
||||
"Stream prompt constructed with {} history entries",
|
||||
history.len()
|
||||
);
|
||||
// trace!(
|
||||
// "Stream prompt constructed with {} history entries",
|
||||
// history.len()
|
||||
// );
|
||||
|
||||
let (stream_tx, mut stream_rx) = mpsc::channel::<String>(100);
|
||||
let llm = self.state.llm_provider.clone();
|
||||
|
|
|
|||
|
|
@ -194,7 +194,6 @@ pub async fn start_llm_server(
|
|||
};
|
||||
|
||||
let n_moe = config_manager.get_config(&default_bot_id, "llm-server-n-moe", None).unwrap_or("4".to_string());
|
||||
let ctx_size = config_manager.get_config(&default_bot_id, "llm-server-ctx-size", None).unwrap_or("4096".to_string());
|
||||
let parallel = config_manager.get_config(&default_bot_id, "llm-server-parallel", None).unwrap_or("1".to_string());
|
||||
let cont_batching = config_manager.get_config(&default_bot_id, "llm-server-cont-batching", None).unwrap_or("true".to_string());
|
||||
let mlock = config_manager.get_config(&default_bot_id, "llm-server-mlock", None).unwrap_or("true".to_string());
|
||||
|
|
@ -204,8 +203,8 @@ pub async fn start_llm_server(
|
|||
|
||||
// Build command arguments dynamically
|
||||
let mut args = format!(
|
||||
"-m {} --host 0.0.0.0 --port {} --top_p 0.95 --temp 0.6 --ctx-size {} --repeat-penalty 1.2 -ngl {}",
|
||||
model_path, port, ctx_size, gpu_layers
|
||||
"-m {} --host 0.0.0.0 --port {} --top_p 0.95 --temp 0.6 --repeat-penalty 1.2 -ngl {}",
|
||||
model_path, port, gpu_layers
|
||||
);
|
||||
|
||||
if n_moe != "0" {
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue