feat: improve prompt formatting and system metrics

- Update prompt formatting in BotOrchestrator to use clearer labels (SYSTEM/CONTEXT) with emphasis markers
- Remove unused token_ratio field from SystemMetrics struct
- Increase default context size (2048->4096) and prediction length (512->1024) in config
- Clean up metrics calculation by removing redundant token ratio computation

The changes improve readability of system prompts and simplify metrics collection while increasing default model capacity.
This commit is contained in:
Rodrigo Rodriguez (Pragmatismo) 2025-11-05 12:46:08 -03:00
parent dc0e0a9c51
commit 45e4a5e735
3 changed files with 4 additions and 12 deletions

View file

@ -393,10 +393,10 @@ impl BotOrchestrator {
let mut prompt = String::new();
if !system_prompt.is_empty() {
prompt.push_str(&format!("AI:{}\n", system_prompt));
prompt.push_str(&format!("SYSTEM: *** {} *** \n", system_prompt));
}
if !context_data.is_empty() {
prompt.push_str(&format!("CTX:{}\n", context_data));
prompt.push_str(&format!("CONTEXT: *** {} *** \n", context_data));
}
for (role, content) in &history {
prompt.push_str(&format!("{}:{}\n", role, content));

View file

@ -7,7 +7,6 @@ use sysinfo::{System};
pub struct SystemMetrics {
pub gpu_usage: Option<f32>,
pub cpu_usage: f32,
pub token_ratio: f32,
}
/// Gets current system metrics
@ -25,17 +24,10 @@ pub fn get_system_metrics(current_tokens: usize, max_tokens: usize) -> Result<Sy
None
};
// Calculate token ratio
let token_ratio = if max_tokens > 0 {
current_tokens as f32 / max_tokens as f32 * 100.0
} else {
0.0
};
Ok(SystemMetrics {
gpu_usage,
cpu_usage,
token_ratio,
})
}

View file

@ -19,8 +19,8 @@ llm-server-host,0.0.0.0
llm-server-port,8081
llm-server-gpu-layers,0
llm-server-n-moe,0
llm-server-ctx-size,2048
llm-server-n-predict,512
llm-server-ctx-size,4096
llm-server-n-predict,1024
llm-server-parallel,6
llm-server-cont-batching,true
llm-server-mlock,false

Can't render this file because it has a wrong number of fields in line 28.