From 679bf05504854f0161e3582ffabf36f823f81b3b Mon Sep 17 00:00:00 2001 From: "Rodrigo Rodriguez (Pragmatismo)" Date: Tue, 14 Apr 2026 10:20:02 -0300 Subject: [PATCH] fix: Kimi K2.5 factory + LLM chunk traces MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Kimi factory: add max_tokens=16384, temperature=1.0, top_p=1.0, and chat_template_kwargs.thinking=true for kimi models - Add chunk count traces in stream_response so we see LLM progress immediately in logs: 'LLM chunk #N received (len=X)' - Keep generic stream parser clean — model-specific logic lives in the request builder (Kimi factory pattern) Co-authored-by: Qwen-Coder --- src/core/bot/mod.rs | 6 ++++++ src/llm/mod.rs | 11 ++++++++++- 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/src/core/bot/mod.rs b/src/core/bot/mod.rs index 23b3bad2..6e08b60a 100644 --- a/src/core/bot/mod.rs +++ b/src/core/bot/mod.rs @@ -836,7 +836,9 @@ impl BotOrchestrator { let _handler = llm_models::get_handler(&model); trace!("Using model handler for {}", model); + info!("LLM streaming started for session {}", session.id); trace!("Receiving LLM stream chunks..."); + let mut chunk_count: usize = 0; #[cfg(feature = "nvidia")] { @@ -860,6 +862,10 @@ impl BotOrchestrator { } while let Some(chunk) = stream_rx.recv().await { + chunk_count += 1; + if chunk_count <= 3 || chunk_count % 50 == 0 { + info!("LLM chunk #{chunk_count} received for session {} (len={})", session.id, chunk.len()); + } // ===== GENERIC TOOL EXECUTION ===== // Add chunk to tool_call_buffer and try to parse diff --git a/src/llm/mod.rs b/src/llm/mod.rs index 6fd3ba29..00629f6e 100644 --- a/src/llm/mod.rs +++ b/src/llm/mod.rs @@ -382,9 +382,18 @@ impl LLMProvider for OpenAIClient { let mut request_body = serde_json::json!({ "model": model, "messages": messages, - "stream": true + "stream": true, + "max_tokens": 16384, + "temperature": 1.0, + "top_p": 1.0 }); + // Kimi K2.5 factory: enable thinking mode via chat_template_kwargs + if model.contains("kimi") { + request_body["chat_template_kwargs"] = serde_json::json!({"thinking": true}); + info!("Kimi factory: enabled thinking mode (chat_template_kwargs)"); + } + // Add tools to the request if provided if let Some(tools_value) = tools { if !tools_value.is_empty() {