feat(automation): improve prompt compaction with async LLM summarization

- Added initial 30s delay to compact prompt scheduler - Implemented async LLM summarization for conversation history - Reduced lock contention by minimizing critical sections - Added fallback to original text if summarization fails - Updated README with guidance for failed requirements - Added new `summarize` method to LLMProvider trait - Improved session manager query with proper DSL usage The changes optimize the prompt compaction process by: 1. Reducing lock contention through better resource management 2. Adding LLM-based summarization for better conversation compression 3. Making the system more resilient with proper error handling 4. Improving documentation for development practices
2025-11-06 17:07:12 -03:00 · 2025-11-06 17:07:12 -03:00 · 4ce06daf75
commit 4ce06daf75
parent df0536f739
4 changed files with 58 additions and 10 deletions
--- a/prompts/dev/platform/README.md
+++ b/prompts/dev/platform/README.md
@ -13,5 +13,6 @@ When initial attempts fail, sequentially try these LLMs:
 - **One requirement at a time** with sequential commits
 - **On error**: Stop and consult Claude for guidance
 - **Change progression**: Start with DeepSeek, conclude with gpt-oss-120b
+- If a big req. fail, specify a @code file that has similar pattern or sample from official docs.
 - **Final validation**: Use prompt "cargo check" with gpt-oss-120b
 - Be humble, one requirement, one commit. But sometimes, freedom of caos is welcome - when no deadlines are set.
--- a/src/automation/compact_prompt.rs
+++ b/src/automation/compact_prompt.rs
@ -5,9 +5,12 @@ use diesel::prelude::*;
 use log::{error, info};
 use std::sync::Arc;
 use tokio::time::{interval, Duration};
+use uuid::Uuid;

 pub fn start_compact_prompt_scheduler(state: Arc<AppState>) {
    tokio::spawn(async move {
+        // Initial 30 second delay before first run
+        tokio::time::sleep(Duration::from_secs(30)).await;
        let mut interval = interval(Duration::from_secs(60));
        loop {
            interval.tick().await;
@ -60,15 +63,22 @@ async fn compact_prompt_for_bot(
        return Ok(());
    }

+    // Get sessions without holding lock
+    let sessions = {
        let mut session_manager = state.session_manager.lock().await;
-    let sessions = session_manager.get_user_sessions(uuid::Uuid::nil())?;
+        session_manager.get_user_sessions(Uuid::nil())?
+    };

    for session in sessions {
        if session.bot_id != automation.bot_id {
            continue;
        }

-        let history = session_manager.get_conversation_history(session.id, session.user_id)?;
+        // Get history without holding lock
+        let history = {
+            let mut session_manager = state.session_manager.lock().await;
+            session_manager.get_conversation_history(session.id, session.user_id)?
+        };

        if history.len() > compact_threshold {
            info!(
@ -82,10 +92,31 @@ async fn compact_prompt_for_bot(
                compacted.push_str(&format!("{}: {}\n", role, content));
            }

-            let summarized = format!("SUMMARY: {}", compacted);
+            // Clone needed references for async task
+            let llm_provider = state.llm_provider.clone();
+            let compacted_clone = compacted.clone();
+            
+            // Run LLM summarization
+            let summarized = match llm_provider.generate(&compacted_clone, &serde_json::Value::Null).await {
+                Ok(summary) => format!("SUMMARY: {}", summary),
+                Err(e) => {
+                    error!("Failed to summarize conversation: {}", e);
+                    format!("SUMMARY: {}", compacted) // Fallback
+                }
+            };
+            info!(
+                "Prompt compacted {}: {} messages",
+                session.id,
+                history.len()
+            );
+
+            // Save with minimal lock time
+            {
+                let mut session_manager = state.session_manager.lock().await;
                session_manager.save_message(session.id, session.user_id, 3, &summarized, 1)?;
            }
        }
+    }

    Ok(())
 }
--- a/src/llm/mod.rs
+++ b/src/llm/mod.rs
@ -21,6 +21,14 @@ pub trait LLMProvider: Send + Sync {
        tx: mpsc::Sender<String>,
    ) -> Result<(), Box<dyn std::error::Error + Send + Sync>>;

+    async fn summarize(
+        &self,
+        text: &str,
+    ) -> Result<String, Box<dyn std::error::Error + Send + Sync>> {
+        let prompt = format!("Summarize the following conversation while preserving key details:\n\n{}", text);
+        self.generate(&prompt, &serde_json::json!({"max_tokens": 500}))
+            .await
+    }

    async fn cancel_job(
        &self,
--- a/src/session/mod.rs
+++ b/src/session/mod.rs
@ -331,11 +331,19 @@ impl SessionManager {
        &mut self,
        uid: Uuid,
    ) -> Result<Vec<UserSession>, Box<dyn Error + Send + Sync>> {
-        use crate::shared::models::user_sessions;
-        let sessions = user_sessions::table
-            .filter(user_sessions::user_id.eq(uid))
-            .order(user_sessions::created_at.desc())
-            .load::<UserSession>(&mut self.conn)?;
+        use crate::shared::models::user_sessions::dsl::*;
+
+        let sessions = if uid == Uuid::nil() {
+            user_sessions
+                .order(created_at.desc())
+                .load::<UserSession>(&mut self.conn)?
+        } else {
+            user_sessions
+                .filter(user_id.eq(uid))
+                .order(created_at.desc())
+                .load::<UserSession>(&mut self.conn)?
+        };
+
        Ok(sessions)
    }