From d6ffe265ef2e32556bdc374991ce2c3c37747918 Mon Sep 17 00:00:00 2001 From: "Rodrigo Rodriguez (Pragmatismo)" Date: Mon, 13 Apr 2026 18:52:02 -0300 Subject: [PATCH] fix: GLM max_tokens 131072, disable thinking kwargs - model sends content after reasoning naturally --- src/llm/glm.rs | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/src/llm/glm.rs b/src/llm/glm.rs index 321c8b1d..7bbdbe2d 100644 --- a/src/llm/glm.rs +++ b/src/llm/glm.rs @@ -153,15 +153,12 @@ impl LLMProvider for GLMClient { model: model_name.to_string(), messages, stream: Some(false), - max_tokens: Some(16384), + max_tokens: Some(131072), temperature: Some(1.0), top_p: Some(1.0), tools: None, tool_choice: None, - chat_template_kwargs: Some(GLMChatTemplateKwargs { - enable_thinking: true, - clear_thinking: false, - }), + chat_template_kwargs: None, }; let url = self.build_url(); @@ -242,15 +239,12 @@ impl LLMProvider for GLMClient { model: model_name.to_string(), messages, stream: Some(true), - max_tokens: Some(16384), + max_tokens: Some(131072), temperature: Some(1.0), top_p: Some(1.0), tools: tools.cloned(), tool_choice, - chat_template_kwargs: Some(GLMChatTemplateKwargs { - enable_thinking: true, - clear_thinking: false, - }), + chat_template_kwargs: None, }; let url = self.build_url();