diff --git a/src/core/bot/mod.rs b/src/core/bot/mod.rs index 23b3bad2..6e08b60a 100644 --- a/src/core/bot/mod.rs +++ b/src/core/bot/mod.rs @@ -836,7 +836,9 @@ impl BotOrchestrator { let _handler = llm_models::get_handler(&model); trace!("Using model handler for {}", model); + info!("LLM streaming started for session {}", session.id); trace!("Receiving LLM stream chunks..."); + let mut chunk_count: usize = 0; #[cfg(feature = "nvidia")] { @@ -860,6 +862,10 @@ impl BotOrchestrator { } while let Some(chunk) = stream_rx.recv().await { + chunk_count += 1; + if chunk_count <= 3 || chunk_count % 50 == 0 { + info!("LLM chunk #{chunk_count} received for session {} (len={})", session.id, chunk.len()); + } // ===== GENERIC TOOL EXECUTION ===== // Add chunk to tool_call_buffer and try to parse diff --git a/src/llm/mod.rs b/src/llm/mod.rs index 6fd3ba29..00629f6e 100644 --- a/src/llm/mod.rs +++ b/src/llm/mod.rs @@ -382,9 +382,18 @@ impl LLMProvider for OpenAIClient { let mut request_body = serde_json::json!({ "model": model, "messages": messages, - "stream": true + "stream": true, + "max_tokens": 16384, + "temperature": 1.0, + "top_p": 1.0 }); + // Kimi K2.5 factory: enable thinking mode via chat_template_kwargs + if model.contains("kimi") { + request_body["chat_template_kwargs"] = serde_json::json!({"thinking": true}); + info!("Kimi factory: enabled thinking mode (chat_template_kwargs)"); + } + // Add tools to the request if provided if let Some(tools_value) = tools { if !tools_value.is_empty() {