feat: add thinking indicator for reasoning models (GLM4.7, Kimi K2.5)

- Show thinking indicator while LLM is in reasoning mode - Skip reasoning content (thinking text) from user response - Only show actual HTML content after thinking ends Co-authored-by: Qwen-Coder <qwen-coder@alibabacloud.com>
2026-04-13 15:35:22 -03:00 · 2026-04-13 15:35:22 -03:00 · 498c771d7b
commit 498c771d7b
parent 3e99235a49
1 changed files with 42 additions and 7 deletions
--- a/src/llm/mod.rs
+++ b/src/llm/mod.rs
@ -1,6 +1,6 @@
 use async_trait::async_trait;
 use futures::StreamExt;
-use log::{error, info};
+use log::{error, info, trace};
 use serde_json::Value;
 use std::sync::Arc;
 use tokio::sync::{mpsc, RwLock};
@ -447,25 +447,60 @@ impl LLMProvider for OpenAIClient {

        let handler = get_handler(model);
        let mut stream = response.bytes_stream();
-        
+
        // Accumulate tool calls here because OpenAI streams them in fragments
        let mut active_tool_calls: Vec<serde_json::Value> = Vec::new();

+        // Track reasoning state for thinking indicator
+        let mut in_reasoning = false;
+        let mut has_sent_thinking = false;
+        let mut reasoning_buffer = String::new();
+
        while let Some(chunk_result) = stream.next().await {
            let chunk = chunk_result?;
            let chunk_str = String::from_utf8_lossy(&chunk);
            for line in chunk_str.lines() {
                if line.starts_with("data: ") && !line.contains("[DONE]") {
                    if let Ok(data) = serde_json::from_str::<Value>(&line[6..]) {
-                        // Handle reasoning models (GLM4.7, Kimi K2.5): content is null,
-                        // reasoning_content has the actual response
                        let content = data["choices"][0]["delta"]["content"].as_str();
                        let reasoning = data["choices"][0]["delta"]["reasoning_content"].as_str();

-                        // Prefer content field (normal models), fallback to reasoning_content
-                        let text_to_use = content.or(reasoning);
+                        // Detect reasoning phase (GLM4.7, Kimi K2.5)
+                        if reasoning.is_some() && content.is_none() {
+                            if !in_reasoning {
+                                trace!("[LLM] Entering reasoning/thinking mode");
+                                in_reasoning = true;
+                            }
+                            // Accumulate reasoning text but don't send to user
+                            if let Some(r) = reasoning {
+                                reasoning_buffer.push_str(r);
+                            }
+                            // Send thinking indicator only once
+                            if !has_sent_thinking {
+                                let thinking = serde_json::json!({
+                                    "type": "thinking",
+                                    "content": "🤔 Pensando..."
+                                }).to_string();
+                                let _ = tx.send(thinking).await;
+                                has_sent_thinking = true;
+                                trace!("[LLM] Sent thinking indicator");
+                            }
+                            continue; // Don't send reasoning content to user
+                        }

-                        if let Some(text) = text_to_use {
+                        // Exited reasoning mode - content is now real response
+                        if in_reasoning && content.is_some() {
+                            trace!("[LLM] Exited reasoning mode, {} chars of reasoning discarded", reasoning_buffer.len());
+                            in_reasoning = false;
+                            // Clear the thinking indicator
+                            let clear_thinking = serde_json::json!({
+                                "type": "thinking_clear",
+                                "content": ""
+                            }).to_string();
+                            let _ = tx.send(clear_thinking).await;
+                        }
+
+                        if let Some(text) = content {
                            let processed = handler.process_content(text);
                            if !processed.is_empty() {
                                let _ = tx.send(processed).await;