feat: implement message deduplication and LLM config improvements
- Deduplicate consecutive messages with same role in conversation history - Add n_predict configuration option for LLM server - Prevent duplicate message storage in session manager - Update announcement schedule timing from 37 to 55 minutes - Add default n_predict value in default bot config
This commit is contained in:
parent
53b49ba616
commit
41d7377ab7
5 changed files with 37 additions and 5 deletions
|
|
@ -553,10 +553,21 @@ impl BotOrchestrator {
|
||||||
session_manager.get_conversation_history(session.id, session.user_id)?
|
session_manager.get_conversation_history(session.id, session.user_id)?
|
||||||
};
|
};
|
||||||
|
|
||||||
let recent_history = if history.len() > 10 {
|
// Deduplicate consecutive messages from same role
|
||||||
&history[history.len() - 10..]
|
let mut deduped_history: Vec<(String, String)> = Vec::new();
|
||||||
|
let mut last_role = None;
|
||||||
|
for (role, content) in history.iter() {
|
||||||
|
if last_role != Some(role) || !deduped_history.is_empty() &&
|
||||||
|
content != &deduped_history.last().unwrap().1 {
|
||||||
|
deduped_history.push((role.clone(), content.clone()));
|
||||||
|
last_role = Some(role);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let recent_history = if deduped_history.len() > 10 {
|
||||||
|
&deduped_history[deduped_history.len() - 10..]
|
||||||
} else {
|
} else {
|
||||||
&history[..]
|
&deduped_history[..]
|
||||||
};
|
};
|
||||||
|
|
||||||
for (role, content) in recent_history {
|
for (role, content) in recent_history {
|
||||||
|
|
@ -1331,7 +1342,7 @@ async fn websocket_handler(
|
||||||
match orchestrator.process_message(user_message.clone()).await {
|
match orchestrator.process_message(user_message.clone()).await {
|
||||||
Ok(_) => (),
|
Ok(_) => (),
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
error!("Failed to process message: {}", e);
|
error!("Failed to process message: {}", e);
|
||||||
// Fall back to streaming if processing fails
|
// Fall back to streaming if processing fails
|
||||||
if let Err(e) = orchestrator.stream_response(user_message, tx.clone()).await {
|
if let Err(e) = orchestrator.stream_response(user_message, tx.clone()).await {
|
||||||
error!("Failed to stream response: {}", e);
|
error!("Failed to stream response: {}", e);
|
||||||
|
|
|
||||||
|
|
@ -200,6 +200,7 @@ pub async fn start_llm_server(
|
||||||
let mlock = config_manager.get_config(&default_bot_id, "llm-server-mlock", None).unwrap_or("true".to_string());
|
let mlock = config_manager.get_config(&default_bot_id, "llm-server-mlock", None).unwrap_or("true".to_string());
|
||||||
let no_mmap = config_manager.get_config(&default_bot_id, "llm-server-no-mmap", None).unwrap_or("true".to_string());
|
let no_mmap = config_manager.get_config(&default_bot_id, "llm-server-no-mmap", None).unwrap_or("true".to_string());
|
||||||
let gpu_layers = config_manager.get_config(&default_bot_id, "llm-server-gpu-layers", None).unwrap_or("20".to_string());
|
let gpu_layers = config_manager.get_config(&default_bot_id, "llm-server-gpu-layers", None).unwrap_or("20".to_string());
|
||||||
|
let n_predict = config_manager.get_config(&default_bot_id, "llm-server-n-predict", None).unwrap_or("50".to_string());
|
||||||
|
|
||||||
// Build command arguments dynamically
|
// Build command arguments dynamically
|
||||||
let mut args = format!(
|
let mut args = format!(
|
||||||
|
|
@ -222,6 +223,9 @@ pub async fn start_llm_server(
|
||||||
if no_mmap == "true" {
|
if no_mmap == "true" {
|
||||||
args.push_str(" --no-mmap");
|
args.push_str(" --no-mmap");
|
||||||
}
|
}
|
||||||
|
if n_predict != "0" {
|
||||||
|
args.push_str(&format!(" --n-predict {}", n_predict));
|
||||||
|
}
|
||||||
|
|
||||||
if cfg!(windows) {
|
if cfg!(windows) {
|
||||||
let mut cmd = tokio::process::Command::new("cmd");
|
let mut cmd = tokio::process::Command::new("cmd");
|
||||||
|
|
|
||||||
|
|
@ -189,6 +189,22 @@ impl SessionManager {
|
||||||
) -> Result<(), Box<dyn Error + Send + Sync>> {
|
) -> Result<(), Box<dyn Error + Send + Sync>> {
|
||||||
use crate::shared::models::message_history::dsl::*;
|
use crate::shared::models::message_history::dsl::*;
|
||||||
|
|
||||||
|
// Check if this exact message already exists
|
||||||
|
let exists = message_history
|
||||||
|
.filter(session_id.eq(sess_id))
|
||||||
|
.filter(user_id.eq(uid))
|
||||||
|
.filter(role.eq(ro))
|
||||||
|
.filter(content_encrypted.eq(content))
|
||||||
|
.filter(message_type.eq(msg_type))
|
||||||
|
.select(id)
|
||||||
|
.first::<Uuid>(&mut self.conn)
|
||||||
|
.optional()?;
|
||||||
|
|
||||||
|
if exists.is_some() {
|
||||||
|
debug!("Duplicate message detected, skipping save");
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
|
||||||
let next_index = message_history
|
let next_index = message_history
|
||||||
.filter(session_id.eq(sess_id))
|
.filter(session_id.eq(sess_id))
|
||||||
.count()
|
.count()
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,4 @@
|
||||||
SET_SCHEDULE "37 * * * *"
|
SET_SCHEDULE "55 * * * *"
|
||||||
|
|
||||||
let text = GET "announcements.gbkb/news/news.pdf"
|
let text = GET "announcements.gbkb/news/news.pdf"
|
||||||
let resume = LLM "In a few words, resume this: " + text
|
let resume = LLM "In a few words, resume this: " + text
|
||||||
|
|
|
||||||
|
|
@ -18,6 +18,7 @@ llm-server-port,8081
|
||||||
llm-server-gpu-layers,0
|
llm-server-gpu-layers,0
|
||||||
llm-server-n-moe,0
|
llm-server-n-moe,0
|
||||||
llm-server-ctx-size,512
|
llm-server-ctx-size,512
|
||||||
|
llm-server-n-predict, 50
|
||||||
llm-server-parallel,6
|
llm-server-parallel,6
|
||||||
llm-server-cont-batching,true
|
llm-server-cont-batching,true
|
||||||
llm-server-mlock,false
|
llm-server-mlock,false
|
||||||
|
|
|
||||||
|
Can't render this file because it has a wrong number of fields in line 26.
|
Loading…
Add table
Reference in a new issue