feat: improve prompt formatting and system metrics

- Update prompt formatting in BotOrchestrator to use clearer labels (SYSTEM/CONTEXT) with emphasis markers
- Remove unused token_ratio field from SystemMetrics struct
- Increase default context size (2048->4096) and prediction length (512->1024) in config
- Clean up metrics calculation by removing redundant token ratio computation

The changes improve readability of system prompts and simplify metrics collection while increasing default model capacity.
This commit is contained in:
Rodrigo Rodriguez (Pragmatismo) 2025-11-05 12:46:08 -03:00
parent dc0e0a9c51
commit 45e4a5e735
3 changed files with 4 additions and 12 deletions

View file

@ -393,10 +393,10 @@ impl BotOrchestrator {
let mut prompt = String::new(); let mut prompt = String::new();
if !system_prompt.is_empty() { if !system_prompt.is_empty() {
prompt.push_str(&format!("AI:{}\n", system_prompt)); prompt.push_str(&format!("SYSTEM: *** {} *** \n", system_prompt));
} }
if !context_data.is_empty() { if !context_data.is_empty() {
prompt.push_str(&format!("CTX:{}\n", context_data)); prompt.push_str(&format!("CONTEXT: *** {} *** \n", context_data));
} }
for (role, content) in &history { for (role, content) in &history {
prompt.push_str(&format!("{}:{}\n", role, content)); prompt.push_str(&format!("{}:{}\n", role, content));

View file

@ -7,7 +7,6 @@ use sysinfo::{System};
pub struct SystemMetrics { pub struct SystemMetrics {
pub gpu_usage: Option<f32>, pub gpu_usage: Option<f32>,
pub cpu_usage: f32, pub cpu_usage: f32,
pub token_ratio: f32,
} }
/// Gets current system metrics /// Gets current system metrics
@ -25,17 +24,10 @@ pub fn get_system_metrics(current_tokens: usize, max_tokens: usize) -> Result<Sy
None None
}; };
// Calculate token ratio
let token_ratio = if max_tokens > 0 {
current_tokens as f32 / max_tokens as f32 * 100.0
} else {
0.0
};
Ok(SystemMetrics { Ok(SystemMetrics {
gpu_usage, gpu_usage,
cpu_usage, cpu_usage,
token_ratio,
}) })
} }

View file

@ -19,8 +19,8 @@ llm-server-host,0.0.0.0
llm-server-port,8081 llm-server-port,8081
llm-server-gpu-layers,0 llm-server-gpu-layers,0
llm-server-n-moe,0 llm-server-n-moe,0
llm-server-ctx-size,2048 llm-server-ctx-size,4096
llm-server-n-predict,512 llm-server-n-predict,1024
llm-server-parallel,6 llm-server-parallel,6
llm-server-cont-batching,true llm-server-cont-batching,true
llm-server-mlock,false llm-server-mlock,false

Can't render this file because it has a wrong number of fields in line 28.