fix: GLM max_tokens 131072, disable thinking kwargs - model sends content after reasoning naturally
All checks were successful
BotServer CI/CD / build (push) Successful in 3m11s
All checks were successful
BotServer CI/CD / build (push) Successful in 3m11s
This commit is contained in:
parent
c9fa057203
commit
d6ffe265ef
1 changed files with 4 additions and 10 deletions
|
|
@ -153,15 +153,12 @@ impl LLMProvider for GLMClient {
|
|||
model: model_name.to_string(),
|
||||
messages,
|
||||
stream: Some(false),
|
||||
max_tokens: Some(16384),
|
||||
max_tokens: Some(131072),
|
||||
temperature: Some(1.0),
|
||||
top_p: Some(1.0),
|
||||
tools: None,
|
||||
tool_choice: None,
|
||||
chat_template_kwargs: Some(GLMChatTemplateKwargs {
|
||||
enable_thinking: true,
|
||||
clear_thinking: false,
|
||||
}),
|
||||
chat_template_kwargs: None,
|
||||
};
|
||||
|
||||
let url = self.build_url();
|
||||
|
|
@ -242,15 +239,12 @@ impl LLMProvider for GLMClient {
|
|||
model: model_name.to_string(),
|
||||
messages,
|
||||
stream: Some(true),
|
||||
max_tokens: Some(16384),
|
||||
max_tokens: Some(131072),
|
||||
temperature: Some(1.0),
|
||||
top_p: Some(1.0),
|
||||
tools: tools.cloned(),
|
||||
tool_choice,
|
||||
chat_template_kwargs: Some(GLMChatTemplateKwargs {
|
||||
enable_thinking: true,
|
||||
clear_thinking: false,
|
||||
}),
|
||||
chat_template_kwargs: None,
|
||||
};
|
||||
|
||||
let url = self.build_url();
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue