From 4d9d38ffdae2fe297d2f1e22c23399e337082d70 Mon Sep 17 00:00:00 2001
From: "Rodrigo Rodriguez (Pragmatismo)" <me@rodrigorodriguez.com>
Date: Mon, 13 Apr 2026 19:23:19 -0300
Subject: [PATCH] fix: enable chat_template_kwargs for GLM thinking mode, add
 stream traces, fix config_manager scope

---
 src/core/bot/mod.rs        | 13 +++++++++----
 src/core/session/mod.rs    | 11 ++++++++++-
 src/llm/episodic_memory.rs |  2 +-
 src/llm/glm.rs             | 39 ++++++++++++++++++++++++++++++--------
 4 files changed, 51 insertions(+), 14 deletions(-)
diff --git a/src/core/bot/mod.rs b/src/core/bot/mod.rs
index d8de1f23..f4662186 100644
--- a/src/core/bot/mod.rs
+++ b/src/core/bot/mod.rs
@@ -507,13 +507,18 @@ impl BotOrchestrator {
                         sm.get_session_context_data(&session.id, &session.user_id)?
                     };
 
+                    let config_manager = ConfigManager::new(state_clone.conn.clone());
+
+                    let history_limit = config_manager
+                        .get_bot_config_value(&session.bot_id, "history-limit")
+                        .ok()
+                        .and_then(|v| v.parse::<i64>().ok());
+
                     let history = {
                         let mut sm = state_clone.session_manager.blocking_lock();
-                        sm.get_conversation_history(session.id, user_id)?
+                        sm.get_conversation_history(session.id, user_id, history_limit)?
                     };
 
-                    let config_manager = ConfigManager::new(state_clone.conn.clone());
-
                     // For local LLM server, use the actual model name
                     // Default to DeepSeek model if not configured
                     let model = config_manager
@@ -1234,7 +1239,7 @@ impl BotOrchestrator {
         user_id: Uuid,
     ) -> Result<Vec<(String, String)>, Box<dyn std::error::Error + Send + Sync>> {
         let mut session_manager = self.state.session_manager.lock().await;
-        let history = session_manager.get_conversation_history(session_id, user_id)?;
+        let history = session_manager.get_conversation_history(session_id, user_id, None)?;
         Ok(history)
     }
 }
diff --git a/src/core/session/mod.rs b/src/core/session/mod.rs
index e70a890a..e847bd59 100644
--- a/src/core/session/mod.rs
+++ b/src/core/session/mod.rs
@@ -336,13 +336,22 @@ impl SessionManager {
         &mut self,
         sess_id: Uuid,
         _uid: Uuid,
+        history_limit: Option<i64>,
     ) -> Result<Vec<(String, String)>, Box<dyn Error + Send + Sync>> {
         use crate::core::shared::models::message_history::dsl::*;
+        let limit_val = history_limit.unwrap_or(50);
+
         let messages = message_history
             .filter(session_id.eq(sess_id))
-            .order(message_index.asc())
+            .order(message_index.desc())
+            .limit(limit_val)
             .select((role, content_encrypted))
             .load::<(i32, String)>(&mut self.conn)?;
+
+        // Reverse to get chronological order (oldest first)
+        let mut messages: Vec<(i32, String)> = messages;
+        messages.reverse();
+
         let mut history: Vec<(String, String)> = Vec::new();
         for (other_role, content) in messages {
             let role_str = match other_role {
diff --git a/src/llm/episodic_memory.rs b/src/llm/episodic_memory.rs
index 8299ee30..77b4f8b1 100644
--- a/src/llm/episodic_memory.rs
+++ b/src/llm/episodic_memory.rs
@@ -59,7 +59,7 @@ async fn process_episodic_memory(
         let session_id = session.id;
         let history = {
             let mut session_manager = state.session_manager.lock().await;
-            session_manager.get_conversation_history(session.id, session.user_id)?
+            session_manager.get_conversation_history(session.id, session.user_id, None)?
         };
 
         let mut messages_since_summary = 0;
diff --git a/src/llm/glm.rs b/src/llm/glm.rs
index 7bbdbe2d..c1530093 100644
--- a/src/llm/glm.rs
+++ b/src/llm/glm.rs
@@ -158,11 +158,14 @@ impl LLMProvider for GLMClient {
             top_p: Some(1.0),
             tools: None,
             tool_choice: None,
-            chat_template_kwargs: None,
+            chat_template_kwargs: Some(GLMChatTemplateKwargs {
+                enable_thinking: true,
+                clear_thinking: false,
+            }),
         };
 
         let url = self.build_url();
-        info!("[GLM] Non-streaming request to: {}", url);
+        info!("[GLM] Non-streaming request to: {} model={}", url, model_name);
 
         let response = self
             .client
@@ -244,11 +247,14 @@ impl LLMProvider for GLMClient {
             top_p: Some(1.0),
             tools: tools.cloned(),
             tool_choice,
-            chat_template_kwargs: None,
+chat_template_kwargs: Some(GLMChatTemplateKwargs {
+                enable_thinking: true,
+                clear_thinking: false,
+            }),
         };
 
         let url = self.build_url();
-        info!("[GLM] Streaming request to: {}", url);
+        info!("[GLM] Streaming request to: {} model={} max_tokens=131072", url, model_name);
 
         let response = self
             .client
@@ -265,9 +271,14 @@ impl LLMProvider for GLMClient {
             return Err(format!("GLM streaming error: {}", error_text).into());
         }
 
+        info!("[GLM] Connection established, starting stream processing");
+
         let mut stream = response.bytes_stream();
         let mut in_reasoning = false;
         let mut has_sent_thinking = false;
+        let mut total_content_chars: usize = 0;
+        let mut total_reasoning_chars: usize = 0;
+        let mut chunk_count: usize = 0;
         let mut buffer = Vec::new();
 
         while let Some(chunk_result) = stream.next().await {
@@ -282,6 +293,7 @@ impl LLMProvider for GLMClient {
                 }
 
                 if line == "data: [DONE]" {
+                    info!("[GLM] Stream done: {} chunks, {} reasoning chars, {} content chars sent", chunk_count, total_reasoning_chars, total_content_chars);
                     let _ = tx.send(String::new()).await;
                     return Ok(());
                 }
@@ -292,6 +304,8 @@ impl LLMProvider for GLMClient {
                         if let Some(choices) = chunk_data.get("choices").and_then(|c| c.as_array()) {
                             for choice in choices {
                                 if let Some(delta) = choice.get("delta") {
+                                    chunk_count += 1;
+
                                     // Handle tool_calls
                                     if let Some(tool_calls) = delta.get("tool_calls").and_then(|t| t.as_array()) {
                                         for tool_call in tool_calls {
@@ -313,13 +327,16 @@ impl LLMProvider for GLMClient {
                                     // Enter reasoning mode
                                     if reasoning.is_some() && content.is_none() {
                                         if !in_reasoning {
-                                            trace!("[GLM] Entering reasoning/thinking mode");
+                                            info!("[GLM] Entering reasoning mode");
                                             in_reasoning = true;
                                         }
+                                        if let Some(r) = reasoning {
+                                            total_reasoning_chars += r.len();
+                                        }
                                         if !has_sent_thinking {
                                             let thinking = serde_json::json!({
                                                 "type": "thinking",
-                                                "content": "\u{1f914} Pensando..."
+                                                "content": "🤔 Pensando..."
                                             }).to_string();
                                             let _ = tx.send(thinking).await;
                                             has_sent_thinking = true;
@@ -329,7 +346,7 @@ impl LLMProvider for GLMClient {
 
                                     // Exited reasoning — content is now real response
                                     if in_reasoning && content.is_some() {
-                                        trace!("[GLM] Exited reasoning mode");
+                                        info!("[GLM] Exited reasoning mode, {} reasoning chars discarded, content starting", total_reasoning_chars);
                                         in_reasoning = false;
                                         let clear = serde_json::json!({
                                             "type": "thinking_clear",
@@ -341,14 +358,18 @@ impl LLMProvider for GLMClient {
                                     // Send actual content to user
                                     if let Some(text) = content {
                                         if !text.is_empty() {
+                                            total_content_chars += text.len();
                                             let _ = tx.send(text.to_string()).await;
                                         }
                                     }
+                                } else {
+                                    // No delta in choice
+                                    trace!("[GLM] Chunk has no delta");
                                 }
 
                                 if let Some(reason) = choice.get("finish_reason").and_then(|r| r.as_str()) {
                                     if !reason.is_empty() {
-                                        info!("[GLM] Stream finished: {}", reason);
+                                        info!("[GLM] Stream finished: {}, reasoning={} content={}", reason, total_reasoning_chars, total_content_chars);
                                         let _ = tx.send(String::new()).await;
                                         return Ok(());
                                     }
@@ -359,11 +380,13 @@ impl LLMProvider for GLMClient {
                 }
             }
 
+            // Keep unprocessed data in buffer
             if let Some(last_newline) = data.rfind('\n') {
                 buffer = buffer[last_newline + 1..].to_vec();
             }
         }
 
+        info!("[GLM] Stream ended (no [DONE]), reasoning={} content={}", total_reasoning_chars, total_content_chars);
         let _ = tx.send(String::new()).await;
         Ok(())
     }