diff --git a/scripts/containers/system.sh b/scripts/containers/system.sh index d009a8263..66973d4d2 100644 --- a/scripts/containers/system.sh +++ b/scripts/containers/system.sh @@ -33,6 +33,7 @@ mv build/bin/* . rm build/bin -r rm llama-b6148-bin-ubuntu-x64.zip +sudo apt install lib-pq wget https://huggingface.co/bartowski/DeepSeek-R1-Distill-Qwen-1.5B-GGUF/resolve/main/DeepSeek-R1-Distill-Qwen-1.5B-Q3_K_M.gguf wget https://huggingface.co/CompendiumLabs/bge-small-en-v1.5-gguf/resolve/main/bge-small-en-v1.5-f32.gguf diff --git a/src/llm_legacy/llm_local.rs b/src/llm_legacy/llm_local.rs index d9840506d..6f55399a7 100644 --- a/src/llm_legacy/llm_local.rs +++ b/src/llm_legacy/llm_local.rs @@ -192,7 +192,7 @@ async fn start_llm_server( let mut cmd = tokio::process::Command::new("sh"); cmd.arg("-c").arg(format!( - "cd {} && ./llama-server -m {} --host 0.0.0.0 --port {} --n-gpu-layers 99 &", + "cd {} && ./llama-server -m {} --host 0.0.0.0 --port {} {} --top_p 0.95 --temp 0.6 --flash-attn on --ctx-size 4096 --repeat-penalty 1.2 -ngl 22 &", llama_cpp_path, model_path, port ));