feat(llm): add jinja flag to local server args
Added the --jinja flag to the LLM server startup arguments to enable Jinja template support. This allows for more flexible prompt formatting when using the local LLM server. The change maintains all existing functionality while adding the new feature.
This commit is contained in:
parent
dbe19867ac
commit
a80da2e182
1 changed files with 6 additions and 2 deletions
|
|
@ -234,14 +234,18 @@ pub async fn start_llm_server(
|
||||||
.unwrap_or("4096".to_string());
|
.unwrap_or("4096".to_string());
|
||||||
|
|
||||||
// TODO: Move flash-attn, temp, top_p, repeat-penalty to config as well.
|
// TODO: Move flash-attn, temp, top_p, repeat-penalty to config as well.
|
||||||
|
// TODO: Create --jinja.
|
||||||
|
|
||||||
let mut args = format!(
|
let mut args = format!(
|
||||||
"-m {} --host 0.0.0.0 --port {} --top_p 0.95 --flash-attn on --temp 0.6 --repeat-penalty 1.2 --n-gpu-layers {}",
|
"-m {} --host 0.0.0.0 --port {} --top_p 0.95 --jinja --flash-attn on --temp 0.6 --repeat-penalty 1.2 --n-gpu-layers {}",
|
||||||
model_path, port, gpu_layers
|
model_path, port, gpu_layers
|
||||||
);
|
);
|
||||||
if !reasoning_format.is_empty() {
|
if !reasoning_format.is_empty() {
|
||||||
args.push_str(&format!(" --reasoning-format {}", reasoning_format));
|
args.push_str(&format!(" --reasoning-format {}", reasoning_format));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
if n_moe != "0" {
|
if n_moe != "0" {
|
||||||
args.push_str(&format!(" --n-cpu-moe {}", n_moe));
|
args.push_str(&format!(" --n-cpu-moe {}", n_moe));
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue