feat(automation): improve prompt compaction with async LLM summarization
- Added initial 30s delay to compact prompt scheduler - Implemented async LLM summarization for conversation history - Reduced lock contention by minimizing critical sections - Added fallback to original text if summarization fails - Updated README with guidance for failed requirements - Added new `summarize` method to LLMProvider trait - Improved session manager query with proper DSL usage The changes optimize the prompt compaction process by: 1. Reducing lock contention through better resource management 2. Adding LLM-based summarization for better conversation compression 3. Making the system more resilient with proper error handling 4. Improving documentation for development practices
This commit is contained in:
parent
df0536f739
commit
4ce06daf75
4 changed files with 58 additions and 10 deletions
|
|
@ -13,5 +13,6 @@ When initial attempts fail, sequentially try these LLMs:
|
|||
- **One requirement at a time** with sequential commits
|
||||
- **On error**: Stop and consult Claude for guidance
|
||||
- **Change progression**: Start with DeepSeek, conclude with gpt-oss-120b
|
||||
- If a big req. fail, specify a @code file that has similar pattern or sample from official docs.
|
||||
- **Final validation**: Use prompt "cargo check" with gpt-oss-120b
|
||||
- Be humble, one requirement, one commit. But sometimes, freedom of caos is welcome - when no deadlines are set.
|
||||
|
|
@ -5,9 +5,12 @@ use diesel::prelude::*;
|
|||
use log::{error, info};
|
||||
use std::sync::Arc;
|
||||
use tokio::time::{interval, Duration};
|
||||
use uuid::Uuid;
|
||||
|
||||
pub fn start_compact_prompt_scheduler(state: Arc<AppState>) {
|
||||
tokio::spawn(async move {
|
||||
// Initial 30 second delay before first run
|
||||
tokio::time::sleep(Duration::from_secs(30)).await;
|
||||
let mut interval = interval(Duration::from_secs(60));
|
||||
loop {
|
||||
interval.tick().await;
|
||||
|
|
@ -60,15 +63,22 @@ async fn compact_prompt_for_bot(
|
|||
return Ok(());
|
||||
}
|
||||
|
||||
// Get sessions without holding lock
|
||||
let sessions = {
|
||||
let mut session_manager = state.session_manager.lock().await;
|
||||
let sessions = session_manager.get_user_sessions(uuid::Uuid::nil())?;
|
||||
session_manager.get_user_sessions(Uuid::nil())?
|
||||
};
|
||||
|
||||
for session in sessions {
|
||||
if session.bot_id != automation.bot_id {
|
||||
continue;
|
||||
}
|
||||
|
||||
let history = session_manager.get_conversation_history(session.id, session.user_id)?;
|
||||
// Get history without holding lock
|
||||
let history = {
|
||||
let mut session_manager = state.session_manager.lock().await;
|
||||
session_manager.get_conversation_history(session.id, session.user_id)?
|
||||
};
|
||||
|
||||
if history.len() > compact_threshold {
|
||||
info!(
|
||||
|
|
@ -82,10 +92,31 @@ async fn compact_prompt_for_bot(
|
|||
compacted.push_str(&format!("{}: {}\n", role, content));
|
||||
}
|
||||
|
||||
let summarized = format!("SUMMARY: {}", compacted);
|
||||
// Clone needed references for async task
|
||||
let llm_provider = state.llm_provider.clone();
|
||||
let compacted_clone = compacted.clone();
|
||||
|
||||
// Run LLM summarization
|
||||
let summarized = match llm_provider.generate(&compacted_clone, &serde_json::Value::Null).await {
|
||||
Ok(summary) => format!("SUMMARY: {}", summary),
|
||||
Err(e) => {
|
||||
error!("Failed to summarize conversation: {}", e);
|
||||
format!("SUMMARY: {}", compacted) // Fallback
|
||||
}
|
||||
};
|
||||
info!(
|
||||
"Prompt compacted {}: {} messages",
|
||||
session.id,
|
||||
history.len()
|
||||
);
|
||||
|
||||
// Save with minimal lock time
|
||||
{
|
||||
let mut session_manager = state.session_manager.lock().await;
|
||||
session_manager.save_message(session.id, session.user_id, 3, &summarized, 1)?;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
|
|
|||
|
|
@ -21,6 +21,14 @@ pub trait LLMProvider: Send + Sync {
|
|||
tx: mpsc::Sender<String>,
|
||||
) -> Result<(), Box<dyn std::error::Error + Send + Sync>>;
|
||||
|
||||
async fn summarize(
|
||||
&self,
|
||||
text: &str,
|
||||
) -> Result<String, Box<dyn std::error::Error + Send + Sync>> {
|
||||
let prompt = format!("Summarize the following conversation while preserving key details:\n\n{}", text);
|
||||
self.generate(&prompt, &serde_json::json!({"max_tokens": 500}))
|
||||
.await
|
||||
}
|
||||
|
||||
async fn cancel_job(
|
||||
&self,
|
||||
|
|
|
|||
|
|
@ -331,11 +331,19 @@ impl SessionManager {
|
|||
&mut self,
|
||||
uid: Uuid,
|
||||
) -> Result<Vec<UserSession>, Box<dyn Error + Send + Sync>> {
|
||||
use crate::shared::models::user_sessions;
|
||||
let sessions = user_sessions::table
|
||||
.filter(user_sessions::user_id.eq(uid))
|
||||
.order(user_sessions::created_at.desc())
|
||||
.load::<UserSession>(&mut self.conn)?;
|
||||
use crate::shared::models::user_sessions::dsl::*;
|
||||
|
||||
let sessions = if uid == Uuid::nil() {
|
||||
user_sessions
|
||||
.order(created_at.desc())
|
||||
.load::<UserSession>(&mut self.conn)?
|
||||
} else {
|
||||
user_sessions
|
||||
.filter(user_id.eq(uid))
|
||||
.order(created_at.desc())
|
||||
.load::<UserSession>(&mut self.conn)?
|
||||
};
|
||||
|
||||
Ok(sessions)
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue