diff --git a/src/core/bot/mod.rs b/src/core/bot/mod.rs
index 23b3bad2..6e08b60a 100644
--- a/src/core/bot/mod.rs
+++ b/src/core/bot/mod.rs
@@ -836,7 +836,9 @@ impl BotOrchestrator {
         let _handler = llm_models::get_handler(&model);
 
         trace!("Using model handler for {}", model);
+        info!("LLM streaming started for session {}", session.id);
         trace!("Receiving LLM stream chunks...");
+        let mut chunk_count: usize = 0;
 
         #[cfg(feature = "nvidia")]
         {
@@ -860,6 +862,10 @@ impl BotOrchestrator {
         }
 
         while let Some(chunk) = stream_rx.recv().await {
+            chunk_count += 1;
+            if chunk_count <= 3 || chunk_count % 50 == 0 {
+                info!("LLM chunk #{chunk_count} received for session {} (len={})", session.id, chunk.len());
+            }
 
             // ===== GENERIC TOOL EXECUTION =====
             // Add chunk to tool_call_buffer and try to parse
diff --git a/src/llm/mod.rs b/src/llm/mod.rs
index 6fd3ba29..00629f6e 100644
--- a/src/llm/mod.rs
+++ b/src/llm/mod.rs
@@ -382,9 +382,18 @@ impl LLMProvider for OpenAIClient {
         let mut request_body = serde_json::json!({
             "model": model,
             "messages": messages,
-            "stream": true
+            "stream": true,
+            "max_tokens": 16384,
+            "temperature": 1.0,
+            "top_p": 1.0
         });
 
+        // Kimi K2.5 factory: enable thinking mode via chat_template_kwargs
+        if model.contains("kimi") {
+            request_body["chat_template_kwargs"] = serde_json::json!({"thinking": true});
+            info!("Kimi factory: enabled thinking mode (chat_template_kwargs)");
+        }
+
         // Add tools to the request if provided
         if let Some(tools_value) = tools {
             if !tools_value.is_empty() {