fix: Kimi K2.5 factory + LLM chunk traces
All checks were successful
BotServer CI/CD / build (push) Successful in 4m35s
All checks were successful
BotServer CI/CD / build (push) Successful in 4m35s
- Kimi factory: add max_tokens=16384, temperature=1.0, top_p=1.0, and chat_template_kwargs.thinking=true for kimi models - Add chunk count traces in stream_response so we see LLM progress immediately in logs: 'LLM chunk #N received (len=X)' - Keep generic stream parser clean — model-specific logic lives in the request builder (Kimi factory pattern) Co-authored-by: Qwen-Coder <qwen-coder@alibabacloud.com>
This commit is contained in:
parent
03f060680e
commit
679bf05504
2 changed files with 16 additions and 1 deletions
|
|
@ -836,7 +836,9 @@ impl BotOrchestrator {
|
|||
let _handler = llm_models::get_handler(&model);
|
||||
|
||||
trace!("Using model handler for {}", model);
|
||||
info!("LLM streaming started for session {}", session.id);
|
||||
trace!("Receiving LLM stream chunks...");
|
||||
let mut chunk_count: usize = 0;
|
||||
|
||||
#[cfg(feature = "nvidia")]
|
||||
{
|
||||
|
|
@ -860,6 +862,10 @@ impl BotOrchestrator {
|
|||
}
|
||||
|
||||
while let Some(chunk) = stream_rx.recv().await {
|
||||
chunk_count += 1;
|
||||
if chunk_count <= 3 || chunk_count % 50 == 0 {
|
||||
info!("LLM chunk #{chunk_count} received for session {} (len={})", session.id, chunk.len());
|
||||
}
|
||||
|
||||
// ===== GENERIC TOOL EXECUTION =====
|
||||
// Add chunk to tool_call_buffer and try to parse
|
||||
|
|
|
|||
|
|
@ -382,9 +382,18 @@ impl LLMProvider for OpenAIClient {
|
|||
let mut request_body = serde_json::json!({
|
||||
"model": model,
|
||||
"messages": messages,
|
||||
"stream": true
|
||||
"stream": true,
|
||||
"max_tokens": 16384,
|
||||
"temperature": 1.0,
|
||||
"top_p": 1.0
|
||||
});
|
||||
|
||||
// Kimi K2.5 factory: enable thinking mode via chat_template_kwargs
|
||||
if model.contains("kimi") {
|
||||
request_body["chat_template_kwargs"] = serde_json::json!({"thinking": true});
|
||||
info!("Kimi factory: enabled thinking mode (chat_template_kwargs)");
|
||||
}
|
||||
|
||||
// Add tools to the request if provided
|
||||
if let Some(tools_value) = tools {
|
||||
if !tools_value.is_empty() {
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue