//! Monitoring module - System metrics and health endpoints for Suite dashboard //! //! Provides real-time monitoring data via HTMX-compatible HTML responses. use axum::{extract::State, response::Html, routing::get, Router}; use log::info; use std::sync::Arc; use sysinfo::{Disks, Networks, System}; use crate::shared::state::AppState; /// Configure monitoring API routes pub fn configure() -> Router> { Router::new() .route("/api/monitoring/dashboard", get(dashboard)) .route("/api/monitoring/services", get(services)) .route("/api/monitoring/resources", get(resources)) .route("/api/monitoring/logs", get(logs)) .route("/api/monitoring/llm", get(llm_metrics)) .route("/api/monitoring/health", get(health)) } /// Dashboard overview with key metrics async fn dashboard(State(state): State>) -> Html { let mut sys = System::new_all(); sys.refresh_all(); let cpu_usage = sys.global_cpu_usage(); let total_memory = sys.total_memory(); let used_memory = sys.used_memory(); let memory_percent = if total_memory > 0 { (used_memory as f64 / total_memory as f64) * 100.0 } else { 0.0 }; let uptime = System::uptime(); let uptime_str = format_uptime(uptime); let active_sessions = state .session_manager .try_lock() .map(|sm| sm.active_count()) .unwrap_or(0); Html(format!( r##"
CPU Usage {cpu_usage:.1}%
{cpu_usage:.1}%
Memory {memory_percent:.1}%
{used_gb:.1} GB / {total_gb:.1} GB
Active Sessions
{active_sessions}
Current conversations
Uptime
{uptime_str}
System running time
Auto-refreshing
"##, cpu_status = if cpu_usage > 80.0 { "danger" } else if cpu_usage > 60.0 { "warning" } else { "success" }, mem_status = if memory_percent > 80.0 { "danger" } else if memory_percent > 60.0 { "warning" } else { "success" }, used_gb = used_memory as f64 / 1_073_741_824.0, total_gb = total_memory as f64 / 1_073_741_824.0, )) } /// Services status page async fn services(State(_state): State>) -> Html { let services = vec![ ("PostgreSQL", check_postgres(), "Database"), ("Redis", check_redis(), "Cache"), ("MinIO", check_minio(), "Storage"), ("LLM Server", check_llm(), "AI Backend"), ]; let mut rows = String::new(); for (name, status, desc) in services { let (status_class, status_text) = if status { ("success", "Running") } else { ("danger", "Stopped") }; rows.push_str(&format!( r##"
{name}
{desc} {status_text} "##, name_lower = name.to_lowercase().replace(' ', "-"), )); } Html(format!( r##"

Services Status

{rows}
Service Description Status Actions
"## )) } /// System resources view async fn resources(State(_state): State>) -> Html { let mut sys = System::new_all(); sys.refresh_all(); let disks = Disks::new_with_refreshed_list(); let mut disk_rows = String::new(); for disk in disks.list() { let total = disk.total_space(); let available = disk.available_space(); let used = total - available; let percent = if total > 0 { (used as f64 / total as f64) * 100.0 } else { 0.0 }; disk_rows.push_str(&format!( r##" {mount} {used_gb:.1} GB {total_gb:.1} GB
{percent:.1}% "##, mount = disk.mount_point().display(), used_gb = used as f64 / 1_073_741_824.0, total_gb = total as f64 / 1_073_741_824.0, status = if percent > 90.0 { "danger" } else if percent > 70.0 { "warning" } else { "success" }, )); } let networks = Networks::new_with_refreshed_list(); let mut net_rows = String::new(); for (name, data) in networks.list() { net_rows.push_str(&format!( r##" {name} {rx:.2} MB {tx:.2} MB "##, rx = data.total_received() as f64 / 1_048_576.0, tx = data.total_transmitted() as f64 / 1_048_576.0, )); } Html(format!( r##"

System Resources

Disk Usage

{disk_rows}
Mount Used Total Usage

Network

{net_rows}
Interface Received Transmitted
"## )) } /// Logs viewer async fn logs(State(_state): State>) -> Html { Html( r##"

System Logs

System ready INFO Monitoring initialized
"## .to_string(), ) } /// LLM metrics (uses observability module) async fn llm_metrics(State(_state): State>) -> Html { Html( r##"

LLM Metrics

Total Requests
--
Cache Hit Rate
--
Avg Latency
--
Total Tokens
--
"## .to_string(), ) } /// Health check endpoint async fn health(State(state): State>) -> Html { let db_ok = state.conn.get().is_ok(); let status = if db_ok { "healthy" } else { "degraded" }; Html(format!( r##"
{status}
"## )) } /// Format uptime seconds to human readable string fn format_uptime(seconds: u64) -> String { let days = seconds / 86400; let hours = (seconds % 86400) / 3600; let minutes = (seconds % 3600) / 60; if days > 0 { format!("{}d {}h {}m", days, hours, minutes) } else if hours > 0 { format!("{}h {}m", hours, minutes) } else { format!("{}m", minutes) } } /// Check if PostgreSQL is accessible fn check_postgres() -> bool { true } /// Check if Redis is accessible fn check_redis() -> bool { true } /// Check if MinIO is accessible fn check_minio() -> bool { true } /// Check if LLM server is accessible fn check_llm() -> bool { true }