feat: improve prompt formatting and system metrics
- Update prompt formatting in BotOrchestrator to use clearer labels (SYSTEM/CONTEXT) with emphasis markers - Remove unused token_ratio field from SystemMetrics struct - Increase default context size (2048->4096) and prediction length (512->1024) in config - Clean up metrics calculation by removing redundant token ratio computation The changes improve readability of system prompts and simplify metrics collection while increasing default model capacity.
This commit is contained in:
parent
dc0e0a9c51
commit
45e4a5e735
3 changed files with 4 additions and 12 deletions
|
|
@ -393,10 +393,10 @@ impl BotOrchestrator {
|
|||
|
||||
let mut prompt = String::new();
|
||||
if !system_prompt.is_empty() {
|
||||
prompt.push_str(&format!("AI:{}\n", system_prompt));
|
||||
prompt.push_str(&format!("SYSTEM: *** {} *** \n", system_prompt));
|
||||
}
|
||||
if !context_data.is_empty() {
|
||||
prompt.push_str(&format!("CTX:{}\n", context_data));
|
||||
prompt.push_str(&format!("CONTEXT: *** {} *** \n", context_data));
|
||||
}
|
||||
for (role, content) in &history {
|
||||
prompt.push_str(&format!("{}:{}\n", role, content));
|
||||
|
|
|
|||
|
|
@ -7,7 +7,6 @@ use sysinfo::{System};
|
|||
pub struct SystemMetrics {
|
||||
pub gpu_usage: Option<f32>,
|
||||
pub cpu_usage: f32,
|
||||
pub token_ratio: f32,
|
||||
}
|
||||
|
||||
/// Gets current system metrics
|
||||
|
|
@ -25,17 +24,10 @@ pub fn get_system_metrics(current_tokens: usize, max_tokens: usize) -> Result<Sy
|
|||
None
|
||||
};
|
||||
|
||||
// Calculate token ratio
|
||||
let token_ratio = if max_tokens > 0 {
|
||||
current_tokens as f32 / max_tokens as f32 * 100.0
|
||||
} else {
|
||||
0.0
|
||||
};
|
||||
|
||||
Ok(SystemMetrics {
|
||||
gpu_usage,
|
||||
cpu_usage,
|
||||
token_ratio,
|
||||
})
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -19,8 +19,8 @@ llm-server-host,0.0.0.0
|
|||
llm-server-port,8081
|
||||
llm-server-gpu-layers,0
|
||||
llm-server-n-moe,0
|
||||
llm-server-ctx-size,2048
|
||||
llm-server-n-predict,512
|
||||
llm-server-ctx-size,4096
|
||||
llm-server-n-predict,1024
|
||||
llm-server-parallel,6
|
||||
llm-server-cont-batching,true
|
||||
llm-server-mlock,false
|
||||
|
|
|
|||
|
Can't render this file because it has a wrong number of fields in line 28.
|
Loading…
Add table
Reference in a new issue