From 4d9d38ffdae2fe297d2f1e22c23399e337082d70 Mon Sep 17 00:00:00 2001 From: "Rodrigo Rodriguez (Pragmatismo)" Date: Mon, 13 Apr 2026 19:23:19 -0300 Subject: [PATCH] fix: enable chat_template_kwargs for GLM thinking mode, add stream traces, fix config_manager scope --- src/core/bot/mod.rs | 13 +++++++++---- src/core/session/mod.rs | 11 ++++++++++- src/llm/episodic_memory.rs | 2 +- src/llm/glm.rs | 39 ++++++++++++++++++++++++++++++-------- 4 files changed, 51 insertions(+), 14 deletions(-) diff --git a/src/core/bot/mod.rs b/src/core/bot/mod.rs index d8de1f23..f4662186 100644 --- a/src/core/bot/mod.rs +++ b/src/core/bot/mod.rs @@ -507,13 +507,18 @@ impl BotOrchestrator { sm.get_session_context_data(&session.id, &session.user_id)? }; + let config_manager = ConfigManager::new(state_clone.conn.clone()); + + let history_limit = config_manager + .get_bot_config_value(&session.bot_id, "history-limit") + .ok() + .and_then(|v| v.parse::().ok()); + let history = { let mut sm = state_clone.session_manager.blocking_lock(); - sm.get_conversation_history(session.id, user_id)? + sm.get_conversation_history(session.id, user_id, history_limit)? }; - let config_manager = ConfigManager::new(state_clone.conn.clone()); - // For local LLM server, use the actual model name // Default to DeepSeek model if not configured let model = config_manager @@ -1234,7 +1239,7 @@ impl BotOrchestrator { user_id: Uuid, ) -> Result, Box> { let mut session_manager = self.state.session_manager.lock().await; - let history = session_manager.get_conversation_history(session_id, user_id)?; + let history = session_manager.get_conversation_history(session_id, user_id, None)?; Ok(history) } } diff --git a/src/core/session/mod.rs b/src/core/session/mod.rs index e70a890a..e847bd59 100644 --- a/src/core/session/mod.rs +++ b/src/core/session/mod.rs @@ -336,13 +336,22 @@ impl SessionManager { &mut self, sess_id: Uuid, _uid: Uuid, + history_limit: Option, ) -> Result, Box> { use crate::core::shared::models::message_history::dsl::*; + let limit_val = history_limit.unwrap_or(50); + let messages = message_history .filter(session_id.eq(sess_id)) - .order(message_index.asc()) + .order(message_index.desc()) + .limit(limit_val) .select((role, content_encrypted)) .load::<(i32, String)>(&mut self.conn)?; + + // Reverse to get chronological order (oldest first) + let mut messages: Vec<(i32, String)> = messages; + messages.reverse(); + let mut history: Vec<(String, String)> = Vec::new(); for (other_role, content) in messages { let role_str = match other_role { diff --git a/src/llm/episodic_memory.rs b/src/llm/episodic_memory.rs index 8299ee30..77b4f8b1 100644 --- a/src/llm/episodic_memory.rs +++ b/src/llm/episodic_memory.rs @@ -59,7 +59,7 @@ async fn process_episodic_memory( let session_id = session.id; let history = { let mut session_manager = state.session_manager.lock().await; - session_manager.get_conversation_history(session.id, session.user_id)? + session_manager.get_conversation_history(session.id, session.user_id, None)? }; let mut messages_since_summary = 0; diff --git a/src/llm/glm.rs b/src/llm/glm.rs index 7bbdbe2d..c1530093 100644 --- a/src/llm/glm.rs +++ b/src/llm/glm.rs @@ -158,11 +158,14 @@ impl LLMProvider for GLMClient { top_p: Some(1.0), tools: None, tool_choice: None, - chat_template_kwargs: None, + chat_template_kwargs: Some(GLMChatTemplateKwargs { + enable_thinking: true, + clear_thinking: false, + }), }; let url = self.build_url(); - info!("[GLM] Non-streaming request to: {}", url); + info!("[GLM] Non-streaming request to: {} model={}", url, model_name); let response = self .client @@ -244,11 +247,14 @@ impl LLMProvider for GLMClient { top_p: Some(1.0), tools: tools.cloned(), tool_choice, - chat_template_kwargs: None, +chat_template_kwargs: Some(GLMChatTemplateKwargs { + enable_thinking: true, + clear_thinking: false, + }), }; let url = self.build_url(); - info!("[GLM] Streaming request to: {}", url); + info!("[GLM] Streaming request to: {} model={} max_tokens=131072", url, model_name); let response = self .client @@ -265,9 +271,14 @@ impl LLMProvider for GLMClient { return Err(format!("GLM streaming error: {}", error_text).into()); } + info!("[GLM] Connection established, starting stream processing"); + let mut stream = response.bytes_stream(); let mut in_reasoning = false; let mut has_sent_thinking = false; + let mut total_content_chars: usize = 0; + let mut total_reasoning_chars: usize = 0; + let mut chunk_count: usize = 0; let mut buffer = Vec::new(); while let Some(chunk_result) = stream.next().await { @@ -282,6 +293,7 @@ impl LLMProvider for GLMClient { } if line == "data: [DONE]" { + info!("[GLM] Stream done: {} chunks, {} reasoning chars, {} content chars sent", chunk_count, total_reasoning_chars, total_content_chars); let _ = tx.send(String::new()).await; return Ok(()); } @@ -292,6 +304,8 @@ impl LLMProvider for GLMClient { if let Some(choices) = chunk_data.get("choices").and_then(|c| c.as_array()) { for choice in choices { if let Some(delta) = choice.get("delta") { + chunk_count += 1; + // Handle tool_calls if let Some(tool_calls) = delta.get("tool_calls").and_then(|t| t.as_array()) { for tool_call in tool_calls { @@ -313,13 +327,16 @@ impl LLMProvider for GLMClient { // Enter reasoning mode if reasoning.is_some() && content.is_none() { if !in_reasoning { - trace!("[GLM] Entering reasoning/thinking mode"); + info!("[GLM] Entering reasoning mode"); in_reasoning = true; } + if let Some(r) = reasoning { + total_reasoning_chars += r.len(); + } if !has_sent_thinking { let thinking = serde_json::json!({ "type": "thinking", - "content": "\u{1f914} Pensando..." + "content": "🤔 Pensando..." }).to_string(); let _ = tx.send(thinking).await; has_sent_thinking = true; @@ -329,7 +346,7 @@ impl LLMProvider for GLMClient { // Exited reasoning — content is now real response if in_reasoning && content.is_some() { - trace!("[GLM] Exited reasoning mode"); + info!("[GLM] Exited reasoning mode, {} reasoning chars discarded, content starting", total_reasoning_chars); in_reasoning = false; let clear = serde_json::json!({ "type": "thinking_clear", @@ -341,14 +358,18 @@ impl LLMProvider for GLMClient { // Send actual content to user if let Some(text) = content { if !text.is_empty() { + total_content_chars += text.len(); let _ = tx.send(text.to_string()).await; } } + } else { + // No delta in choice + trace!("[GLM] Chunk has no delta"); } if let Some(reason) = choice.get("finish_reason").and_then(|r| r.as_str()) { if !reason.is_empty() { - info!("[GLM] Stream finished: {}", reason); + info!("[GLM] Stream finished: {}, reasoning={} content={}", reason, total_reasoning_chars, total_content_chars); let _ = tx.send(String::new()).await; return Ok(()); } @@ -359,11 +380,13 @@ impl LLMProvider for GLMClient { } } + // Keep unprocessed data in buffer if let Some(last_newline) = data.rfind('\n') { buffer = buffer[last_newline + 1..].to_vec(); } } + info!("[GLM] Stream ended (no [DONE]), reasoning={} content={}", total_reasoning_chars, total_content_chars); let _ = tx.send(String::new()).await; Ok(()) }