feat(automation): improve prompt compaction with async LLM summarization

- Added initial 30s delay to compact prompt scheduler
- Implemented async LLM summarization for conversation history
- Reduced lock contention by minimizing critical sections
- Added fallback to original text if summarization fails
- Updated README with guidance for failed requirements
- Added new `summarize` method to LLMProvider trait
- Improved session manager query with proper DSL usage

The changes optimize the prompt compaction process by:
1. Reducing lock contention through better resource management
2. Adding LLM-based summarization for better conversation compression
3. Making the system more resilient with proper error handling
4. Improving documentation for development practices
This commit is contained in:
Rodrigo Rodriguez (Pragmatismo) 2025-11-06 17:07:12 -03:00
parent df0536f739
commit 4ce06daf75
4 changed files with 58 additions and 10 deletions

View file

@ -13,5 +13,6 @@ When initial attempts fail, sequentially try these LLMs:
- **One requirement at a time** with sequential commits - **One requirement at a time** with sequential commits
- **On error**: Stop and consult Claude for guidance - **On error**: Stop and consult Claude for guidance
- **Change progression**: Start with DeepSeek, conclude with gpt-oss-120b - **Change progression**: Start with DeepSeek, conclude with gpt-oss-120b
- If a big req. fail, specify a @code file that has similar pattern or sample from official docs.
- **Final validation**: Use prompt "cargo check" with gpt-oss-120b - **Final validation**: Use prompt "cargo check" with gpt-oss-120b
- Be humble, one requirement, one commit. But sometimes, freedom of caos is welcome - when no deadlines are set. - Be humble, one requirement, one commit. But sometimes, freedom of caos is welcome - when no deadlines are set.

View file

@ -5,9 +5,12 @@ use diesel::prelude::*;
use log::{error, info}; use log::{error, info};
use std::sync::Arc; use std::sync::Arc;
use tokio::time::{interval, Duration}; use tokio::time::{interval, Duration};
use uuid::Uuid;
pub fn start_compact_prompt_scheduler(state: Arc<AppState>) { pub fn start_compact_prompt_scheduler(state: Arc<AppState>) {
tokio::spawn(async move { tokio::spawn(async move {
// Initial 30 second delay before first run
tokio::time::sleep(Duration::from_secs(30)).await;
let mut interval = interval(Duration::from_secs(60)); let mut interval = interval(Duration::from_secs(60));
loop { loop {
interval.tick().await; interval.tick().await;
@ -60,15 +63,22 @@ async fn compact_prompt_for_bot(
return Ok(()); return Ok(());
} }
let mut session_manager = state.session_manager.lock().await; // Get sessions without holding lock
let sessions = session_manager.get_user_sessions(uuid::Uuid::nil())?; let sessions = {
let mut session_manager = state.session_manager.lock().await;
session_manager.get_user_sessions(Uuid::nil())?
};
for session in sessions { for session in sessions {
if session.bot_id != automation.bot_id { if session.bot_id != automation.bot_id {
continue; continue;
} }
let history = session_manager.get_conversation_history(session.id, session.user_id)?; // Get history without holding lock
let history = {
let mut session_manager = state.session_manager.lock().await;
session_manager.get_conversation_history(session.id, session.user_id)?
};
if history.len() > compact_threshold { if history.len() > compact_threshold {
info!( info!(
@ -82,8 +92,29 @@ async fn compact_prompt_for_bot(
compacted.push_str(&format!("{}: {}\n", role, content)); compacted.push_str(&format!("{}: {}\n", role, content));
} }
let summarized = format!("SUMMARY: {}", compacted); // Clone needed references for async task
session_manager.save_message(session.id, session.user_id, 3, &summarized, 1)?; let llm_provider = state.llm_provider.clone();
let compacted_clone = compacted.clone();
// Run LLM summarization
let summarized = match llm_provider.generate(&compacted_clone, &serde_json::Value::Null).await {
Ok(summary) => format!("SUMMARY: {}", summary),
Err(e) => {
error!("Failed to summarize conversation: {}", e);
format!("SUMMARY: {}", compacted) // Fallback
}
};
info!(
"Prompt compacted {}: {} messages",
session.id,
history.len()
);
// Save with minimal lock time
{
let mut session_manager = state.session_manager.lock().await;
session_manager.save_message(session.id, session.user_id, 3, &summarized, 1)?;
}
} }
} }

View file

@ -21,6 +21,14 @@ pub trait LLMProvider: Send + Sync {
tx: mpsc::Sender<String>, tx: mpsc::Sender<String>,
) -> Result<(), Box<dyn std::error::Error + Send + Sync>>; ) -> Result<(), Box<dyn std::error::Error + Send + Sync>>;
async fn summarize(
&self,
text: &str,
) -> Result<String, Box<dyn std::error::Error + Send + Sync>> {
let prompt = format!("Summarize the following conversation while preserving key details:\n\n{}", text);
self.generate(&prompt, &serde_json::json!({"max_tokens": 500}))
.await
}
async fn cancel_job( async fn cancel_job(
&self, &self,

View file

@ -331,11 +331,19 @@ impl SessionManager {
&mut self, &mut self,
uid: Uuid, uid: Uuid,
) -> Result<Vec<UserSession>, Box<dyn Error + Send + Sync>> { ) -> Result<Vec<UserSession>, Box<dyn Error + Send + Sync>> {
use crate::shared::models::user_sessions; use crate::shared::models::user_sessions::dsl::*;
let sessions = user_sessions::table
.filter(user_sessions::user_id.eq(uid)) let sessions = if uid == Uuid::nil() {
.order(user_sessions::created_at.desc()) user_sessions
.load::<UserSession>(&mut self.conn)?; .order(created_at.desc())
.load::<UserSession>(&mut self.conn)?
} else {
user_sessions
.filter(user_id.eq(uid))
.order(created_at.desc())
.load::<UserSession>(&mut self.conn)?
};
Ok(sessions) Ok(sessions)
} }