From d6ffe265ef2e32556bdc374991ce2c3c37747918 Mon Sep 17 00:00:00 2001
From: "Rodrigo Rodriguez (Pragmatismo)" <me@rodrigorodriguez.com>
Date: Mon, 13 Apr 2026 18:52:02 -0300
Subject: [PATCH] fix: GLM max_tokens 131072, disable thinking kwargs - model
 sends content after reasoning naturally

---
 src/llm/glm.rs | 14 ++++----------
 1 file changed, 4 insertions(+), 10 deletions(-)

diff --git a/src/llm/glm.rs b/src/llm/glm.rs
index 321c8b1d..7bbdbe2d 100644
--- a/src/llm/glm.rs
+++ b/src/llm/glm.rs
@@ -153,15 +153,12 @@ impl LLMProvider for GLMClient {
             model: model_name.to_string(),
             messages,
             stream: Some(false),
-            max_tokens: Some(16384),
+            max_tokens: Some(131072),
             temperature: Some(1.0),
             top_p: Some(1.0),
             tools: None,
             tool_choice: None,
-            chat_template_kwargs: Some(GLMChatTemplateKwargs {
-                enable_thinking: true,
-                clear_thinking: false,
-            }),
+            chat_template_kwargs: None,
         };
 
         let url = self.build_url();
@@ -242,15 +239,12 @@ impl LLMProvider for GLMClient {
             model: model_name.to_string(),
             messages,
             stream: Some(true),
-            max_tokens: Some(16384),
+            max_tokens: Some(131072),
             temperature: Some(1.0),
             top_p: Some(1.0),
             tools: tools.cloned(),
             tool_choice,
-            chat_template_kwargs: Some(GLMChatTemplateKwargs {
-                enable_thinking: true,
-                clear_thinking: false,
-            }),
+            chat_template_kwargs: None,
         };
 
         let url = self.build_url();