feat(automation): improve prompt compaction with async LLM summarization

- Added initial 30s delay to compact prompt scheduler - Implemented async LLM summarization for conversation history - Reduced lock contention by minimizing critical sections - Added fallback to original text if summarization fails - Updated README with guidance for failed requirements - Added new `summarize` method to LLMProvider trait - Improved session manager query with proper DSL usage The changes optimize the prompt compaction process by: 1. Reducing lock contention through better resource management 2. Adding LLM-based summarization for better conversation compression 3. Making the system more resilient with proper error handling 4. Improving documentation for development practices
2025-11-06 17:07:12 -03:00 · 2025-11-06 17:07:12 -03:00 · 4ce06daf75
commit 4ce06daf75
parent df0536f739
4 changed files with 58 additions and 10 deletions
--- a/prompts/dev/platform/README.md
+++ b/prompts/dev/platform/README.md
@ -13,5 +13,6 @@ When initial attempts fail, sequentially try these LLMs:
 - **One requirement at a time** with sequential commits
 - **On error**: Stop and consult Claude for guidance
 - **Change progression**: Start with DeepSeek, conclude with gpt-oss-120b
 - If a big req. fail, specify a @code file that has similar pattern or sample from official docs.
 - **Final validation**: Use prompt "cargo check" with gpt-oss-120b
 - Be humble, one requirement, one commit. But sometimes, freedom of caos is welcome - when no deadlines are set.
--- a/src/automation/compact_prompt.rs
+++ b/src/automation/compact_prompt.rs
@ -5,9 +5,12 @@ use diesel::prelude::*;
 use log::{error, info};
 use std::sync::Arc;
 use tokio::time::{interval, Duration};
 use uuid::Uuid;
 pub fn start_compact_prompt_scheduler(state: Arc<AppState>) {
    tokio::spawn(async move {
        // Initial 30 second delay before first run
        tokio::time::sleep(Duration::from_secs(30)).await;
        let mut interval = interval(Duration::from_secs(60));
        loop {
            interval.tick().await;
@ -60,15 +63,22 @@ async fn compact_prompt_for_bot(
        return Ok(());
    }
-    let mut session_manager = state.session_manager.lock().await;
+    // Get sessions without holding lock
-    let sessions = session_manager.get_user_sessions(uuid::Uuid::nil())?;
+    let sessions = {
        let mut session_manager = state.session_manager.lock().await;
        session_manager.get_user_sessions(Uuid::nil())?
    };
    for session in sessions {
        if session.bot_id != automation.bot_id {
            continue;
        }
-        let history = session_manager.get_conversation_history(session.id, session.user_id)?;
+        // Get history without holding lock
        let history = {
            let mut session_manager = state.session_manager.lock().await;
            session_manager.get_conversation_history(session.id, session.user_id)?
        };
        if history.len() > compact_threshold {
            info!(
@ -82,8 +92,29 @@ async fn compact_prompt_for_bot(
                compacted.push_str(&format!("{}: {}\n", role, content));
            }
-            let summarized = format!("SUMMARY: {}", compacted);
+            // Clone needed references for async task
-            session_manager.save_message(session.id, session.user_id, 3, &summarized, 1)?;
+            let llm_provider = state.llm_provider.clone();
            let compacted_clone = compacted.clone();
            // Run LLM summarization
            let summarized = match llm_provider.generate(&compacted_clone, &serde_json::Value::Null).await {
                Ok(summary) => format!("SUMMARY: {}", summary),
                Err(e) => {
                    error!("Failed to summarize conversation: {}", e);
                    format!("SUMMARY: {}", compacted) // Fallback
                }
            };
            info!(
                "Prompt compacted {}: {} messages",
                session.id,
                history.len()
            );
            // Save with minimal lock time
            {
                let mut session_manager = state.session_manager.lock().await;
                session_manager.save_message(session.id, session.user_id, 3, &summarized, 1)?;
            }
        }
    }
--- a/src/llm/mod.rs
+++ b/src/llm/mod.rs
@ -21,6 +21,14 @@ pub trait LLMProvider: Send + Sync {
        tx: mpsc::Sender<String>,
    ) -> Result<(), Box<dyn std::error::Error + Send + Sync>>;
    async fn summarize(
        &self,
        text: &str,
    ) -> Result<String, Box<dyn std::error::Error + Send + Sync>> {
        let prompt = format!("Summarize the following conversation while preserving key details:\n\n{}", text);
        self.generate(&prompt, &serde_json::json!({"max_tokens": 500}))
            .await
    }
    async fn cancel_job(
        &self,
--- a/src/session/mod.rs
+++ b/src/session/mod.rs
@ -331,11 +331,19 @@ impl SessionManager {
        &mut self,
        uid: Uuid,
    ) -> Result<Vec<UserSession>, Box<dyn Error + Send + Sync>> {
-        use crate::shared::models::user_sessions;
+        use crate::shared::models::user_sessions::dsl::*;
-        let sessions = user_sessions::table
+
-            .filter(user_sessions::user_id.eq(uid))
+        let sessions = if uid == Uuid::nil() {
-            .order(user_sessions::created_at.desc())
+            user_sessions
-            .load::<UserSession>(&mut self.conn)?;
+                .order(created_at.desc())
                .load::<UserSession>(&mut self.conn)?
        } else {
            user_sessions
                .filter(user_id.eq(uid))
                .order(created_at.desc())
                .load::<UserSession>(&mut self.conn)?
        };
        Ok(sessions)
    }