From 679bf05504854f0161e3582ffabf36f823f81b3b Mon Sep 17 00:00:00 2001
From: "Rodrigo Rodriguez (Pragmatismo)" <me@rodrigorodriguez.com>
Date: Tue, 14 Apr 2026 10:20:02 -0300
Subject: [PATCH] fix: Kimi K2.5 factory + LLM chunk traces
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Kimi factory: add max_tokens=16384, temperature=1.0, top_p=1.0,
  and chat_template_kwargs.thinking=true for kimi models
- Add chunk count traces in stream_response so we see LLM progress
  immediately in logs: 'LLM chunk #N received (len=X)'
- Keep generic stream parser clean — model-specific logic lives in
  the request builder (Kimi factory pattern)

Co-authored-by: Qwen-Coder <qwen-coder@alibabacloud.com>
---
 src/core/bot/mod.rs |  6 ++++++
 src/llm/mod.rs      | 11 ++++++++++-
 2 files changed, 16 insertions(+), 1 deletion(-)

diff --git a/src/core/bot/mod.rs b/src/core/bot/mod.rs
index 23b3bad2..6e08b60a 100644
--- a/src/core/bot/mod.rs
+++ b/src/core/bot/mod.rs
@@ -836,7 +836,9 @@ impl BotOrchestrator {
         let _handler = llm_models::get_handler(&model);
 
         trace!("Using model handler for {}", model);
+        info!("LLM streaming started for session {}", session.id);
         trace!("Receiving LLM stream chunks...");
+        let mut chunk_count: usize = 0;
 
         #[cfg(feature = "nvidia")]
         {
@@ -860,6 +862,10 @@ impl BotOrchestrator {
         }
 
         while let Some(chunk) = stream_rx.recv().await {
+            chunk_count += 1;
+            if chunk_count <= 3 || chunk_count % 50 == 0 {
+                info!("LLM chunk #{chunk_count} received for session {} (len={})", session.id, chunk.len());
+            }
 
             // ===== GENERIC TOOL EXECUTION =====
             // Add chunk to tool_call_buffer and try to parse
diff --git a/src/llm/mod.rs b/src/llm/mod.rs
index 6fd3ba29..00629f6e 100644
--- a/src/llm/mod.rs
+++ b/src/llm/mod.rs
@@ -382,9 +382,18 @@ impl LLMProvider for OpenAIClient {
         let mut request_body = serde_json::json!({
             "model": model,
             "messages": messages,
-            "stream": true
+            "stream": true,
+            "max_tokens": 16384,
+            "temperature": 1.0,
+            "top_p": 1.0
         });
 
+        // Kimi K2.5 factory: enable thinking mode via chat_template_kwargs
+        if model.contains("kimi") {
+            request_body["chat_template_kwargs"] = serde_json::json!({"thinking": true});
+            info!("Kimi factory: enabled thinking mode (chat_template_kwargs)");
+        }
+
         // Add tools to the request if provided
         if let Some(tools_value) = tools {
             if !tools_value.is_empty() {