From 061c14b4a2f3618214664a2efd7341736664af07 Mon Sep 17 00:00:00 2001
From: "Rodrigo Rodriguez (Pragmatismo)" <me@rodrigorodriguez.com>
Date: Tue, 30 Dec 2025 22:42:32 -0300
Subject: [PATCH] Fix tasks UI, WebSocket progress, memory monitoring, and app
 generator

Tasks UI fixes:
- Fix task list to query auto_tasks table instead of tasks table
- Fix task detail endpoint to use UUID binding for auto_tasks query
- Add proper filter handling: complete, active, awaiting, paused, blocked
- Add TaskStats fields: awaiting, paused, blocked, time_saved
- Add /api/tasks/time-saved endpoint
- Add count-all to stats HTML response

App generator improvements:
- Add AgentActivity struct for detailed terminal-style progress
- Add emit_activity method for rich progress events
- Add detailed logging for LLM calls with timing
- Track files_written, tables_synced, bytes_generated

Memory and performance:
- Add memory_monitor module for tracking RSS and thread activity
- Skip 0-byte files in drive monitor and document processor
- Change DRIVE_MONITOR checking logs from info to trace
- Remove unused profile_section macro

WebSocket progress:
- Ensure TaskProgressEvent includes activity field
- Add with_activity builder method
---
 Cargo.toml                            |   5 +
 config/directory_config.json          |   8 +-
 src/auto_task/app_generator.rs        | 283 +++++++++++++-
 src/auto_task/autotask_api.rs         |   4 +-
 src/auto_task/intent_classifier.rs    | 107 ++++--
 src/auto_task/mod.rs                  | 147 ++++++++
 src/console/status_panel.rs           | 234 ++++++++----
 src/core/automation/mod.rs            |   7 +-
 src/core/bootstrap/mod.rs             | 180 ++++++---
 src/core/kb/document_processor.rs     |  10 +-
 src/core/kb/embedding_generator.rs    | 262 ++++++++++---
 src/core/kb/kb_indexer.rs             |  60 ++-
 src/core/package_manager/facade.rs    |   4 +-
 src/core/package_manager/installer.rs |  46 ++-
 src/core/secrets/mod.rs               |   6 +-
 src/core/shared/memory_monitor.rs     | 506 ++++++++++++++++++++++++++
 src/core/shared/mod.rs                |   1 +
 src/core/shared/state.rs              | 276 ++++++++++++++
 src/core/shared/test_utils.rs         |   6 +-
 src/core/shared/utils.rs              |  27 ++
 src/drive/drive_monitor/mod.rs        | 259 +++++++++++--
 src/drive/mod.rs                      | 205 ++++++++---
 src/llm/claude.rs                     | 121 ++++--
 src/llm/local.rs                      |  53 ++-
 src/main.rs                           |  51 ++-
 src/security/integration.rs           |   6 +-
 src/tasks/mod.rs                      | 293 ++++++++++++---
 27 files changed, 2717 insertions(+), 450 deletions(-)
 create mode 100644 src/core/shared/memory_monitor.rs

diff --git a/Cargo.toml b/Cargo.toml
index 3cc1d5c2e..b24b72a56 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -81,6 +81,7 @@ monitoring = ["dep:sysinfo"]
 automation = ["dep:rhai"]
 grpc = ["dep:tonic"]
 progress-bars = ["dep:indicatif"]
+jemalloc = ["dep:tikv-jemallocator", "dep:tikv-jemalloc-ctl"]
 
 # ===== META FEATURES (BUNDLES) =====
 full = [
@@ -216,6 +217,10 @@ tonic = { version = "0.14.2", features = ["transport"], optional = true }
 # UI Enhancement (progress-bars feature)
 indicatif = { version = "0.18.0", optional = true }
 smartstring = "1.0.1"
+
+# Memory allocator (jemalloc feature)
+tikv-jemallocator = { version = "0.6", optional = true }
+tikv-jemalloc-ctl = { version = "0.6", features = ["stats"], optional = true }
 scopeguard = "1.2.0"
 
 # Vault secrets management
diff --git a/config/directory_config.json b/config/directory_config.json
index d12ac678e..85dbd5b30 100644
--- a/config/directory_config.json
+++ b/config/directory_config.json
@@ -1,7 +1,7 @@
 {
   "base_url": "http://localhost:8300",
   "default_org": {
-    "id": "353229789043097614",
+    "id": "353379211173429262",
     "name": "default",
     "domain": "default.localhost"
   },
@@ -13,8 +13,8 @@
     "first_name": "Admin",
     "last_name": "User"
   },
-  "admin_token": "7QNrwws4y1X5iIuTUCtXpQj9RoQf4fYi144yEY87tNAbLMZOOD57t3YDAqCtIyIkBS1EZ5k",
+  "admin_token": "pY9ruIghlJlAVn-a-vpbtM0L9yQ3WtweXkXJKEk2aVBL4HEeIppxCA8MPx60ZjQJRghq9zU",
   "project_id": "",
-  "client_id": "353229789848469518",
-  "client_secret": "COd3gKdMO43jkUztTckCNtHrjxa5RtcIlBn7Cbp4GFoXs6mw6iZalB3m4Vv3FK5Y"
+  "client_id": "353379211962023950",
+  "client_secret": "vD8uaGZubV4pOMqxfFkGc0YOfmzYII8W7L25V7cGWieQlw0UHuvDQkSuJbQ3Rhgp"
 }
\ No newline at end of file
diff --git a/src/auto_task/app_generator.rs b/src/auto_task/app_generator.rs
index 0e96262b5..3d63e59f2 100644
--- a/src/auto_task/app_generator.rs
+++ b/src/auto_task/app_generator.rs
@@ -5,7 +5,7 @@ use crate::basic::keywords::table_definition::{
 use crate::core::config::ConfigManager;
 use crate::core::shared::get_content_type;
 use crate::core::shared::models::UserSession;
-use crate::core::shared::state::AppState;
+use crate::core::shared::state::{AgentActivity, AppState};
 use aws_sdk_s3::primitives::ByteStream;
 use chrono::{DateTime, Utc};
 use diesel::prelude::*;
@@ -143,18 +143,90 @@ struct LlmFile {
 
 pub struct AppGenerator {
     state: Arc<AppState>,
+    task_id: Option<String>,
+    generation_start: Option<std::time::Instant>,
+    files_written: Vec<String>,
+    tables_synced: Vec<String>,
+    bytes_generated: u64,
 }
 
 impl AppGenerator {
     pub fn new(state: Arc<AppState>) -> Self {
-        Self { state }
+        Self {
+            state,
+            task_id: None,
+            generation_start: None,
+            files_written: Vec::new(),
+            tables_synced: Vec::new(),
+            bytes_generated: 0,
+        }
+    }
+
+    pub fn with_task_id(state: Arc<AppState>, task_id: impl Into<String>) -> Self {
+        Self {
+            state,
+            task_id: Some(task_id.into()),
+            generation_start: None,
+            files_written: Vec::new(),
+            tables_synced: Vec::new(),
+            bytes_generated: 0,
+        }
+    }
+
+    fn emit_activity(&self, step: &str, message: &str, current: u8, total: u8, activity: AgentActivity) {
+        if let Some(ref task_id) = self.task_id {
+            self.state.emit_activity(task_id, step, message, current, total, activity);
+        }
+    }
+
+    fn calculate_speed(&self, items_done: u32) -> (f32, Option<u32>) {
+        if let Some(start) = self.generation_start {
+            let elapsed = start.elapsed().as_secs_f32();
+            if elapsed > 0.0 {
+                let speed = (items_done as f32 / elapsed) * 60.0;
+                return (speed, None);
+            }
+        }
+        (0.0, None)
+    }
+
+    fn build_activity(&self, phase: &str, items_done: u32, items_total: Option<u32>, current_item: Option<&str>) -> AgentActivity {
+        let (speed, eta) = self.calculate_speed(items_done);
+        let mut activity = AgentActivity::new(phase)
+            .with_progress(items_done, items_total)
+            .with_bytes(self.bytes_generated);
+
+        if speed > 0.0 {
+            activity = activity.with_speed(speed, eta);
+        }
+
+        if !self.files_written.is_empty() {
+            activity = activity.with_files(self.files_written.clone());
+        }
+
+        if !self.tables_synced.is_empty() {
+            activity = activity.with_tables(self.tables_synced.clone());
+        }
+
+        if let Some(item) = current_item {
+            activity = activity.with_current_item(item);
+        }
+
+        activity
     }
 
     pub async fn generate_app(
-        &self,
+        &mut self,
         intent: &str,
         session: &UserSession,
     ) -> Result<GeneratedApp, Box<dyn std::error::Error + Send + Sync>> {
+        const TOTAL_STEPS: u8 = 8;
+
+        self.generation_start = Some(std::time::Instant::now());
+        self.files_written.clear();
+        self.tables_synced.clear();
+        self.bytes_generated = 0;
+
         info!(
             "Generating app from intent: {}",
             &intent[..intent.len().min(100)]
@@ -168,23 +240,82 @@ impl AppGenerator {
             ),
         );
 
+        if let Some(ref task_id) = self.task_id {
+            self.state.emit_task_started(task_id, &format!("Generating app: {}", &intent[..intent.len().min(50)]), TOTAL_STEPS);
+        }
+
+        let activity = self.build_activity("analyzing", 0, Some(TOTAL_STEPS as u32), Some("Sending request to LLM"));
+        self.emit_activity(
+            "llm_request",
+            "Analyzing request with AI...",
+            1,
+            TOTAL_STEPS,
+            activity
+        );
+
+        info!("[APP_GENERATOR] Calling generate_complete_app_with_llm for intent: {}", &intent[..intent.len().min(50)]);
+        let llm_start = std::time::Instant::now();
+
         let llm_app = match self.generate_complete_app_with_llm(intent, session.bot_id).await {
             Ok(app) => {
+                let llm_elapsed = llm_start.elapsed();
+                info!("[APP_GENERATOR] LLM generation completed in {:?}: app={}, files={}, tables={}",
+                      llm_elapsed, app.name, app.files.len(), app.tables.len());
                 log_generator_info(
                     &app.name,
                     "LLM successfully generated app structure and files",
                 );
+
+                let total_bytes: u64 = app.files.iter().map(|f| f.content.len() as u64).sum();
+                self.bytes_generated = total_bytes;
+
+                let activity = self.build_activity(
+                    "parsing",
+                    1,
+                    Some(TOTAL_STEPS as u32),
+                    Some(&format!("Generated {} with {} files", app.name, app.files.len()))
+                );
+                self.emit_activity(
+                    "llm_response",
+                    &format!("AI generated {} structure", app.name),
+                    2,
+                    TOTAL_STEPS,
+                    activity
+                );
                 app
             }
             Err(e) => {
+                let llm_elapsed = llm_start.elapsed();
+                error!("[APP_GENERATOR] LLM generation failed after {:?}: {}", llm_elapsed, e);
                 log_generator_error("unknown", "LLM app generation failed", &e.to_string());
+                if let Some(ref task_id) = self.task_id {
+                    self.state.emit_task_error(task_id, "llm_request", &e.to_string());
+                }
                 return Err(e);
             }
         };
 
+        let activity = self.build_activity("parsing", 2, Some(TOTAL_STEPS as u32), Some(&format!("Processing {} structure", llm_app.name)));
+        self.emit_activity("parse_structure", &format!("Parsing {} structure...", llm_app.name), 3, TOTAL_STEPS, activity);
+
         let tables = Self::convert_llm_tables(&llm_app.tables);
 
         if !tables.is_empty() {
+            let table_names: Vec<String> = tables.iter().map(|t| t.name.clone()).collect();
+            let activity = self.build_activity(
+                "database",
+                3,
+                Some(TOTAL_STEPS as u32),
+                Some(&format!("Creating tables: {}", table_names.join(", ")))
+            );
+            self.emit_activity(
+                "create_tables",
+                &format!("Creating {} database tables...", tables.len()),
+                4,
+                TOTAL_STEPS,
+                activity
+            );
+
             let tables_bas_content = Self::generate_table_definitions(&tables)?;
             if let Err(e) = self.append_to_tables_bas(session.bot_id, &tables_bas_content) {
                 log_generator_error(
@@ -203,6 +334,20 @@ impl AppGenerator {
                             result.tables_created, result.fields_added
                         ),
                     );
+                    self.tables_synced = table_names;
+                    let activity = self.build_activity(
+                        "database",
+                        4,
+                        Some(TOTAL_STEPS as u32),
+                        Some(&format!("{} tables, {} fields created", result.tables_created, result.fields_added))
+                    );
+                    self.emit_activity(
+                        "tables_synced",
+                        "Database tables created",
+                        4,
+                        TOTAL_STEPS,
+                        activity
+                    );
                 }
                 Err(e) => {
                     log_generator_error(&llm_app.name, "Failed to sync tables", &e.to_string());
@@ -214,9 +359,37 @@ impl AppGenerator {
         let bucket_name = format!("{}.gbai", bot_name.to_lowercase());
         let drive_app_path = format!(".gbdrive/apps/{}", llm_app.name);
 
+        let total_files = llm_app.files.len();
+        let activity = self.build_activity("writing", 0, Some(total_files as u32), Some("Preparing files"));
+        self.emit_activity(
+            "write_files",
+            &format!("Writing {} app files...", total_files),
+            5,
+            TOTAL_STEPS,
+            activity
+        );
+
         let mut pages = Vec::new();
-        for file in &llm_app.files {
+        for (idx, file) in llm_app.files.iter().enumerate() {
             let drive_path = format!("{}/{}", drive_app_path, file.filename);
+
+            self.files_written.push(file.filename.clone());
+            self.bytes_generated += file.content.len() as u64;
+
+            let activity = self.build_activity(
+                "writing",
+                (idx + 1) as u32,
+                Some(total_files as u32),
+                Some(&file.filename)
+            );
+            self.emit_activity(
+                "write_file",
+                &format!("Writing {}", file.filename),
+                5,
+                TOTAL_STEPS,
+                activity
+            );
+
             if let Err(e) = self
                 .write_to_drive(&bucket_name, &drive_path, &file.content)
                 .await
@@ -236,7 +409,12 @@ impl AppGenerator {
             });
         }
 
+        self.files_written.push("designer.js".to_string());
+        let activity = self.build_activity("configuring", total_files as u32, Some(total_files as u32), Some("designer.js"));
+        self.emit_activity("write_designer", "Creating designer configuration...", 6, TOTAL_STEPS, activity);
+
         let designer_js = Self::generate_designer_js(&llm_app.name);
+        self.bytes_generated += designer_js.len() as u64;
         self.write_to_drive(
             &bucket_name,
             &format!("{}/designer.js", drive_app_path),
@@ -246,8 +424,24 @@ impl AppGenerator {
 
         let mut tools = Vec::new();
         if let Some(llm_tools) = &llm_app.tools {
-            for tool in llm_tools {
+            let tools_count = llm_tools.len();
+            let activity = self.build_activity("tools", 0, Some(tools_count as u32), Some("Creating BASIC tools"));
+            self.emit_activity(
+                "write_tools",
+                &format!("Creating {} tools...", tools_count),
+                7,
+                TOTAL_STEPS,
+                activity
+            );
+
+            for (idx, tool) in llm_tools.iter().enumerate() {
                 let tool_path = format!(".gbdialog/tools/{}", tool.filename);
+                self.files_written.push(format!("tools/{}", tool.filename));
+                self.bytes_generated += tool.content.len() as u64;
+
+                let activity = self.build_activity("tools", (idx + 1) as u32, Some(tools_count as u32), Some(&tool.filename));
+                self.emit_activity("write_tool", &format!("Writing tool {}", tool.filename), 7, TOTAL_STEPS, activity);
+
                 if let Err(e) = self
                     .write_to_drive(&bucket_name, &tool_path, &tool.content)
                     .await
@@ -268,8 +462,24 @@ impl AppGenerator {
 
         let mut schedulers = Vec::new();
         if let Some(llm_schedulers) = &llm_app.schedulers {
-            for scheduler in llm_schedulers {
+            let sched_count = llm_schedulers.len();
+            let activity = self.build_activity("schedulers", 0, Some(sched_count as u32), Some("Creating schedulers"));
+            self.emit_activity(
+                "write_schedulers",
+                &format!("Creating {} schedulers...", sched_count),
+                7,
+                TOTAL_STEPS,
+                activity
+            );
+
+            for (idx, scheduler) in llm_schedulers.iter().enumerate() {
                 let scheduler_path = format!(".gbdialog/schedulers/{}", scheduler.filename);
+                self.files_written.push(format!("schedulers/{}", scheduler.filename));
+                self.bytes_generated += scheduler.content.len() as u64;
+
+                let activity = self.build_activity("schedulers", (idx + 1) as u32, Some(sched_count as u32), Some(&scheduler.filename));
+                self.emit_activity("write_scheduler", &format!("Writing scheduler {}", scheduler.filename), 7, TOTAL_STEPS, activity);
+
                 if let Err(e) = self
                     .write_to_drive(&bucket_name, &scheduler_path, &scheduler.content)
                     .await
@@ -288,16 +498,22 @@ impl AppGenerator {
             }
         }
 
+        let activity = self.build_activity("syncing", TOTAL_STEPS as u32 - 1, Some(TOTAL_STEPS as u32), Some("Deploying to site"));
+        self.emit_activity("sync_site", "Syncing app to site...", 8, TOTAL_STEPS, activity);
+
         self.sync_app_to_site_root(&bucket_name, &llm_app.name, session.bot_id)
             .await?;
 
+        let elapsed = self.generation_start.map(|s| s.elapsed().as_secs()).unwrap_or(0);
+
         log_generator_info(
             &llm_app.name,
             &format!(
-                "App generated: {} files, {} tables, {} tools",
+                "App generated: {} files, {} tables, {} tools in {}s",
                 pages.len(),
                 tables.len(),
-                tools.len()
+                tools.len(),
+                elapsed
             ),
         );
 
@@ -306,6 +522,24 @@ impl AppGenerator {
             llm_app.name, bucket_name, drive_app_path
         );
 
+        if let Some(ref task_id) = self.task_id {
+            let final_activity = AgentActivity::new("completed")
+                .with_progress(TOTAL_STEPS as u32, Some(TOTAL_STEPS as u32))
+                .with_bytes(self.bytes_generated)
+                .with_files(self.files_written.clone())
+                .with_tables(self.tables_synced.clone());
+
+            let event = crate::core::shared::state::TaskProgressEvent::new(task_id, "complete", &format!(
+                "App '{}' created: {} files, {} tables, {} bytes in {}s",
+                llm_app.name, pages.len(), tables.len(), self.bytes_generated, elapsed
+            ))
+            .with_progress(TOTAL_STEPS, TOTAL_STEPS)
+            .with_activity(final_activity)
+            .completed();
+
+            self.state.broadcast_task_progress(event);
+        }
+
         Ok(GeneratedApp {
             id: Uuid::new_v4().to_string(),
             name: llm_app.name,
@@ -436,8 +670,14 @@ guid, string, text, integer, decimal, boolean, date, datetime, json
 === USER REQUEST ===
 "{intent}"
 
-=== YOUR TASK ===
-Generate a complete application based on the user's request.
+=== YOLO MODE - JUST BUILD IT ===
+DO NOT ask questions. DO NOT request clarification. Just CREATE the app NOW.
+
+If user says "calculator" → build a full-featured calculator with basic ops, scientific functions, history
+If user says "CRM" → build customer management with contacts, companies, deals, notes
+If user says "inventory" → build stock tracking with products, categories, movements
+If user says "booking" → build appointment scheduler with calendar, slots, confirmations
+If user says ANYTHING → interpret creatively and BUILD SOMETHING AWESOME
 
 Respond with a single JSON object:
 {{
@@ -469,15 +709,16 @@ Respond with a single JSON object:
     ]
 }}
 
-IMPORTANT:
-- For simple utilities (calculator, timer, converter): tables can be empty [], focus on files
+CRITICAL RULES:
+- For utilities (calculator, timer, converter, BMI, mortgage): tables = [], focus on interactive HTML/JS
 - For data apps (CRM, inventory): design proper tables and CRUD pages
-- Generate ALL files completely - no shortcuts
+- Generate ALL files completely - no placeholders, no "...", no shortcuts
 - CSS must be comprehensive with variables, responsive design, dark mode
 - Every HTML page needs proper structure with all required scripts
 - Replace APP_NAME_HERE with actual app name in data-app-name attribute
+- BE CREATIVE - add extra features the user didn't ask for but would love
 
-Respond with valid JSON only."#
+Respond with valid JSON only. NO QUESTIONS. JUST BUILD."#
         );
 
         let response = self.call_llm(&prompt, bot_id).await?;
@@ -557,7 +798,6 @@ Respond with valid JSON only."#
     ) -> Result<String, Box<dyn std::error::Error + Send + Sync>> {
         #[cfg(feature = "llm")]
         {
-            // Get model and key from bot configuration
             let config_manager = ConfigManager::new(self.state.conn.clone());
             let model = config_manager
                 .get_config(&bot_id, "llm-model", None)
@@ -579,15 +819,24 @@ Respond with valid JSON only."#
                 "max_tokens": 16000
             });
 
+            let prompt_len = prompt.len();
+            info!("[APP_GENERATOR] Starting LLM call: model={}, prompt_len={} chars", model, prompt_len);
+            let start = std::time::Instant::now();
+
             match self
                 .state
                 .llm_provider
                 .generate(prompt, &llm_config, &model, &key)
                 .await
             {
-                Ok(response) => return Ok(response),
+                Ok(response) => {
+                    let elapsed = start.elapsed();
+                    info!("[APP_GENERATOR] LLM call succeeded: response_len={} chars, elapsed={:?}", response.len(), elapsed);
+                    return Ok(response);
+                }
                 Err(e) => {
-                    warn!("LLM call failed: {}", e);
+                    let elapsed = start.elapsed();
+                    error!("[APP_GENERATOR] LLM call failed after {:?}: {}", elapsed, e);
                     return Err(e);
                 }
             }
diff --git a/src/auto_task/autotask_api.rs b/src/auto_task/autotask_api.rs
index 1f7e959a0..37b0fd299 100644
--- a/src/auto_task/autotask_api.rs
+++ b/src/auto_task/autotask_api.rs
@@ -324,11 +324,11 @@ pub async fn create_and_execute_handler(
     // Update status to running
     let _ = update_task_status_db(&state, task_id, "running", None);
 
-    // Use IntentClassifier to classify and process
+    // Use IntentClassifier to classify and process with task tracking
     let classifier = IntentClassifier::new(Arc::clone(&state));
 
     match classifier
-        .classify_and_process(&request.intent, &session)
+        .classify_and_process_with_task_id(&request.intent, &session, Some(task_id.to_string()))
         .await
     {
         Ok(result) => {
diff --git a/src/auto_task/intent_classifier.rs b/src/auto_task/intent_classifier.rs
index 9e337f531..064deaeee 100644
--- a/src/auto_task/intent_classifier.rs
+++ b/src/auto_task/intent_classifier.rs
@@ -169,29 +169,20 @@ impl IntentClassifier {
         &self,
         intent: &str,
         session: &UserSession,
+    ) -> Result<IntentResult, Box<dyn std::error::Error + Send + Sync>> {
+        self.classify_and_process_with_task_id(intent, session, None).await
+    }
+
+    /// Classify and then process the intent through the appropriate handler with task tracking
+    pub async fn classify_and_process_with_task_id(
+        &self,
+        intent: &str,
+        session: &UserSession,
+        task_id: Option<String>,
     ) -> Result<IntentResult, Box<dyn std::error::Error + Send + Sync>> {
         let classification = self.classify(intent, session).await?;
 
-        // If clarification is needed, return early
-        if classification.requires_clarification {
-            return Ok(IntentResult {
-                success: false,
-                intent_type: classification.intent_type,
-                message: classification
-                    .clarification_question
-                    .unwrap_or_else(|| "Could you please provide more details?".to_string()),
-                created_resources: Vec::new(),
-                app_url: None,
-                task_id: None,
-                schedule_id: None,
-                tool_triggers: Vec::new(),
-                next_steps: vec!["Provide more information".to_string()],
-                error: None,
-            });
-        }
-
-        // Route to appropriate handler
-        self.process_classified_intent(&classification, session)
+        self.process_classified_intent_with_task_id(&classification, session, task_id)
             .await
     }
 
@@ -200,6 +191,16 @@ impl IntentClassifier {
         &self,
         classification: &ClassifiedIntent,
         session: &UserSession,
+    ) -> Result<IntentResult, Box<dyn std::error::Error + Send + Sync>> {
+        self.process_classified_intent_with_task_id(classification, session, None).await
+    }
+
+    /// Process a classified intent through the appropriate handler with task tracking
+    pub async fn process_classified_intent_with_task_id(
+        &self,
+        classification: &ClassifiedIntent,
+        session: &UserSession,
+        task_id: Option<String>,
     ) -> Result<IntentResult, Box<dyn std::error::Error + Send + Sync>> {
         info!(
             "Processing {} intent: {}",
@@ -208,7 +209,7 @@ impl IntentClassifier {
         );
 
         match classification.intent_type {
-            IntentType::AppCreate => self.handle_app_create(classification, session).await,
+            IntentType::AppCreate => self.handle_app_create(classification, session, task_id).await,
             IntentType::Todo => self.handle_todo(classification, session),
             IntentType::Monitor => self.handle_monitor(classification, session),
             IntentType::Action => self.handle_action(classification, session).await,
@@ -231,8 +232,9 @@ impl IntentClassifier {
 USER REQUEST: "{intent}"
 
 INTENT TYPES:
-- APP_CREATE: Create a full application (CRM, inventory, booking system, etc.)
-  Keywords: "create app", "build system", "make application", "CRM", "management system"
+- APP_CREATE: Create a full application, utility, calculator, tool, system, etc.
+  Keywords: "create", "build", "make", "calculator", "app", "system", "CRM", "tool"
+  IMPORTANT: If user wants to CREATE anything (app, calculator, converter, timer, etc), classify as APP_CREATE
 
 - TODO: Simple task or reminder
   Keywords: "call", "remind me", "don't forget", "tomorrow", "later"
@@ -252,6 +254,9 @@ INTENT TYPES:
 - TOOL: Create a voice/chat command
   Keywords: "when I say", "create command", "shortcut for", "trigger"
 
+YOLO MODE: NEVER ask for clarification. Always make a decision and proceed.
+For APP_CREATE: Just build whatever makes sense. A "calculator" = basic calculator app. A "CRM" = customer management app. Be creative and decisive.
+
 Respond with JSON only:
 {{
     "intent_type": "APP_CREATE|TODO|MONITOR|ACTION|SCHEDULE|GOAL|TOOL|UNKNOWN",
@@ -269,13 +274,17 @@ Respond with JSON only:
     "suggested_name": "short name for the resource",
     "requires_clarification": false,
     "clarification_question": null,
-    "alternatives": [
-        {{"type": "OTHER_TYPE", "confidence": 0.3, "reason": "could also be..."}}
-    ]
+    "alternatives": []
 }}"#
         );
 
+        info!("[INTENT_CLASSIFIER] Starting LLM call for classification, prompt_len={} chars", prompt.len());
+        let start = std::time::Instant::now();
+
         let response = self.call_llm(&prompt, bot_id).await?;
+        let elapsed = start.elapsed();
+        info!("[INTENT_CLASSIFIER] LLM classification completed in {:?}, response_len={} chars", elapsed, response.len());
+        trace!("LLM classification response: {}", &response[..response.len().min(500)]);
         Self::parse_classification_response(&response, intent)
     }
 
@@ -321,8 +330,19 @@ Respond with JSON only:
             reason: String,
         }
 
+        // Clean response - remove markdown code blocks if present
+        let cleaned = response
+            .trim()
+            .trim_start_matches("```json")
+            .trim_start_matches("```JSON")
+            .trim_start_matches("```")
+            .trim_end_matches("```")
+            .trim();
+
+        trace!("Cleaned classification response: {}", &cleaned[..cleaned.len().min(300)]);
+
         // Try to parse, fall back to heuristic classification
-        let parsed: Result<LlmResponse, _> = serde_json::from_str(response);
+        let parsed: Result<LlmResponse, _> = serde_json::from_str(cleaned);
 
         match parsed {
             Ok(resp) => {
@@ -380,6 +400,7 @@ Respond with JSON only:
             }
             Err(e) => {
                 warn!("Failed to parse LLM response, using heuristic: {e}");
+                trace!("Raw response that failed to parse: {}", &response[..response.len().min(200)]);
                 Self::classify_heuristic(original_intent)
             }
         }
@@ -397,6 +418,31 @@ Respond with JSON only:
             || lower.contains("management system")
             || lower.contains("inventory")
             || lower.contains("booking")
+            || lower.contains("calculator")
+            || lower.contains("website")
+            || lower.contains("webpage")
+            || lower.contains("web page")
+            || lower.contains("landing page")
+            || lower.contains("dashboard")
+            || lower.contains("form")
+            || lower.contains("todo list")
+            || lower.contains("todo app")
+            || lower.contains("chat")
+            || lower.contains("blog")
+            || lower.contains("portfolio")
+            || lower.contains("store")
+            || lower.contains("shop")
+            || lower.contains("e-commerce")
+            || lower.contains("ecommerce")
+            || (lower.contains("create") && lower.contains("html"))
+            || (lower.contains("make") && lower.contains("html"))
+            || (lower.contains("build") && lower.contains("html"))
+            || (lower.contains("create a") && (lower.contains("simple") || lower.contains("basic")))
+            || (lower.contains("make a") && (lower.contains("simple") || lower.contains("basic")))
+            || (lower.contains("build a") && (lower.contains("simple") || lower.contains("basic")))
+            || lower.contains("criar")
+            || lower.contains("fazer")
+            || lower.contains("construir")
         {
             (IntentType::AppCreate, 0.75)
         } else if lower.contains("remind")
@@ -461,10 +507,15 @@ Respond with JSON only:
         &self,
         classification: &ClassifiedIntent,
         session: &UserSession,
+        task_id: Option<String>,
     ) -> Result<IntentResult, Box<dyn std::error::Error + Send + Sync>> {
         info!("Handling APP_CREATE intent");
 
-        let app_generator = AppGenerator::new(self.state.clone());
+        let mut app_generator = if let Some(tid) = task_id {
+            AppGenerator::with_task_id(self.state.clone(), tid)
+        } else {
+            AppGenerator::new(self.state.clone())
+        };
 
         match app_generator
             .generate_app(&classification.original_text, session)
diff --git a/src/auto_task/mod.rs b/src/auto_task/mod.rs
index 92b09a607..0f7207de2 100644
--- a/src/auto_task/mod.rs
+++ b/src/auto_task/mod.rs
@@ -33,6 +33,18 @@ pub use intent_compiler::{CompiledIntent, IntentCompiler};
 pub use safety_layer::{AuditEntry, ConstraintCheckResult, SafetyLayer, SimulationResult};
 
 use crate::core::urls::ApiUrls;
+use crate::shared::state::AppState;
+use axum::{
+    extract::{
+        ws::{Message, WebSocket, WebSocketUpgrade},
+        Path, Query, State,
+    },
+    response::IntoResponse,
+};
+use futures::{SinkExt, StreamExt};
+use log::{debug, error, info, warn};
+use std::collections::HashMap;
+use std::sync::Arc;
 
 pub fn configure_autotask_routes() -> axum::Router<std::sync::Arc<crate::shared::state::AppState>> {
     use axum::routing::{get, post};
@@ -102,6 +114,141 @@ pub fn configure_autotask_routes() -> axum::Router<std::sync::Arc<crate::shared:
         .route("/api/app-logs/stats", get(handle_log_stats))
         .route("/api/app-logs/clear/{app_name}", post(handle_clear_logs))
         .route("/api/app-logs/logger.js", get(handle_logger_js))
+        .route("/ws/task-progress", get(task_progress_websocket_handler))
+        .route("/ws/task-progress/{task_id}", get(task_progress_by_id_websocket_handler))
+}
+
+pub async fn task_progress_websocket_handler(
+    ws: WebSocketUpgrade,
+    State(state): State<Arc<AppState>>,
+    Query(params): Query<HashMap<String, String>>,
+) -> impl IntoResponse {
+    let task_filter = params.get("task_id").cloned();
+
+    info!(
+        "Task progress WebSocket connection request, filter: {:?}",
+        task_filter
+    );
+
+    ws.on_upgrade(move |socket| handle_task_progress_websocket(socket, state, task_filter))
+}
+
+pub async fn task_progress_by_id_websocket_handler(
+    ws: WebSocketUpgrade,
+    State(state): State<Arc<AppState>>,
+    Path(task_id): Path<String>,
+) -> impl IntoResponse {
+    info!(
+        "Task progress WebSocket connection for task: {}",
+        task_id
+    );
+
+    ws.on_upgrade(move |socket| handle_task_progress_websocket(socket, state, Some(task_id)))
+}
+
+async fn handle_task_progress_websocket(
+    socket: WebSocket,
+    state: Arc<AppState>,
+    task_filter: Option<String>,
+) {
+    let (mut sender, mut receiver) = socket.split();
+
+    info!("Task progress WebSocket connected, filter: {:?}", task_filter);
+
+    let welcome = serde_json::json!({
+        "type": "connected",
+        "message": "Connected to task progress stream",
+        "filter": task_filter,
+        "timestamp": chrono::Utc::now().to_rfc3339()
+    });
+
+    if let Ok(welcome_str) = serde_json::to_string(&welcome) {
+        if sender.send(Message::Text(welcome_str)).await.is_err() {
+            error!("Failed to send welcome message to task progress WebSocket");
+            return;
+        }
+    }
+
+    let mut broadcast_rx = if let Some(broadcast_tx) = state.task_progress_broadcast.as_ref() {
+        broadcast_tx.subscribe()
+    } else {
+        warn!("No task progress broadcast channel available");
+        return;
+    };
+
+    let task_filter_clone = task_filter.clone();
+    let send_task = tokio::spawn(async move {
+        loop {
+            match broadcast_rx.recv().await {
+                Ok(event) => {
+                    let should_send = task_filter_clone.is_none()
+                        || task_filter_clone.as_ref() == Some(&event.task_id);
+
+                    if should_send {
+                        if let Ok(json_str) = serde_json::to_string(&event) {
+                            debug!(
+                                "Sending task progress to WebSocket: {} - {}",
+                                event.task_id, event.step
+                            );
+                            if sender.send(Message::Text(json_str)).await.is_err() {
+                                error!("Failed to send task progress to WebSocket");
+                                break;
+                            }
+                        }
+                    }
+                }
+                Err(tokio::sync::broadcast::error::RecvError::Lagged(n)) => {
+                    warn!("Task progress WebSocket lagged by {} messages", n);
+                }
+                Err(tokio::sync::broadcast::error::RecvError::Closed) => {
+                    info!("Task progress broadcast channel closed");
+                    break;
+                }
+            }
+        }
+    });
+
+    let recv_task = tokio::spawn(async move {
+        while let Some(msg) = receiver.next().await {
+            match msg {
+                Ok(Message::Text(text)) => {
+                    debug!("Received text from task progress WebSocket: {}", text);
+                    if text == "ping" {
+                        debug!("Received ping from task progress client");
+                    }
+                }
+                Ok(Message::Ping(data)) => {
+                    debug!("Received ping from task progress WebSocket");
+                    drop(data);
+                }
+                Ok(Message::Pong(_)) => {
+                    debug!("Received pong from task progress WebSocket");
+                }
+                Ok(Message::Close(_)) => {
+                    info!("Task progress WebSocket client disconnected");
+                    break;
+                }
+                Ok(Message::Binary(_)) => {
+                    debug!("Received binary from task progress WebSocket (ignored)");
+                }
+                Err(e) => {
+                    error!("Task progress WebSocket error: {}", e);
+                    break;
+                }
+            }
+        }
+    });
+
+    tokio::select! {
+        _ = send_task => {
+            info!("Task progress send task completed");
+        }
+        _ = recv_task => {
+            info!("Task progress receive task completed");
+        }
+    }
+
+    info!("Task progress WebSocket connection closed, filter: {:?}", task_filter);
 }
 
 async fn handle_client_logs(
diff --git a/src/console/status_panel.rs b/src/console/status_panel.rs
index 0ea65f37d..5d4ec7dd7 100644
--- a/src/console/status_panel.rs
+++ b/src/console/status_panel.rs
@@ -9,11 +9,101 @@ use diesel::prelude::*;
 use std::sync::Arc;
 use sysinfo::System;
 
+/// Cache for component status to avoid spawning pgrep processes too frequently
+struct ComponentStatusCache {
+    statuses: Vec<(String, bool, String)>, // (name, is_running, port)
+    last_check: std::time::Instant,
+}
+
+impl ComponentStatusCache {
+    fn new() -> Self {
+        Self {
+            statuses: Vec::new(),
+            last_check: std::time::Instant::now() - std::time::Duration::from_secs(60),
+        }
+    }
+
+    fn needs_refresh(&self) -> bool {
+        // Only check component status every 10 seconds
+        self.last_check.elapsed() > std::time::Duration::from_secs(10)
+    }
+
+    fn refresh(&mut self) {
+        let components = vec![
+            ("Tables", "postgres", "5432"),
+            ("Cache", "valkey-server", "6379"),
+            ("Drive", "minio", "9000"),
+            ("LLM", "llama-server", "8081"),
+        ];
+
+        self.statuses.clear();
+        for (comp_name, process, port) in components {
+            let is_running = Self::check_component_running(process);
+            self.statuses
+                .push((comp_name.to_string(), is_running, port.to_string()));
+        }
+        self.last_check = std::time::Instant::now();
+    }
+
+    fn check_component_running(process_name: &str) -> bool {
+        std::process::Command::new("pgrep")
+            .arg("-f")
+            .arg(process_name)
+            .output()
+            .map(|output| !output.stdout.is_empty())
+            .unwrap_or(false)
+    }
+
+    fn get_statuses(&self) -> &[(String, bool, String)] {
+        &self.statuses
+    }
+}
+
+/// Cache for bot list to avoid database queries too frequently
+struct BotListCache {
+    bot_list: Vec<(String, uuid::Uuid)>,
+    last_check: std::time::Instant,
+}
+
+impl BotListCache {
+    fn new() -> Self {
+        Self {
+            bot_list: Vec::new(),
+            last_check: std::time::Instant::now() - std::time::Duration::from_secs(60),
+        }
+    }
+
+    fn needs_refresh(&self) -> bool {
+        // Only query database every 5 seconds
+        self.last_check.elapsed() > std::time::Duration::from_secs(5)
+    }
+
+    fn refresh(&mut self, app_state: &Arc<AppState>) {
+        if let Ok(mut conn) = app_state.conn.get() {
+            if let Ok(list) = bots
+                .filter(is_active.eq(true))
+                .select((name, id))
+                .load::<(String, uuid::Uuid)>(&mut *conn)
+            {
+                self.bot_list = list;
+            }
+        }
+        self.last_check = std::time::Instant::now();
+    }
+
+    fn get_bots(&self) -> &[(String, uuid::Uuid)] {
+        &self.bot_list
+    }
+}
+
 pub struct StatusPanel {
     app_state: Arc<AppState>,
     last_update: std::time::Instant,
+    last_system_refresh: std::time::Instant,
     cached_content: String,
     system: System,
+    component_cache: ComponentStatusCache,
+    bot_cache: BotListCache,
 }
 
 impl std::fmt::Debug for StatusPanel {
@@ -29,24 +119,42 @@ impl std::fmt::Debug for StatusPanel {
 
 impl StatusPanel {
     pub fn new(app_state: Arc<AppState>) -> Self {
+        // Only initialize with CPU and memory info, not all system info
+        let mut system = System::new();
+        system.refresh_cpu_all();
+        system.refresh_memory();
+
         Self {
             app_state,
             last_update: std::time::Instant::now(),
+            last_system_refresh: std::time::Instant::now(),
             cached_content: String::new(),
-            system: System::new_all(),
+            system,
+            component_cache: ComponentStatusCache::new(),
+            bot_cache: BotListCache::new(),
         }
     }
 
     pub fn update(&mut self) -> Result<(), std::io::Error> {
-        self.system.refresh_all();
+        // Only refresh system metrics every 2 seconds instead of every call
+        // This is the main CPU hog - refresh_all() is very expensive
+        if self.last_system_refresh.elapsed() > std::time::Duration::from_secs(2) {
+            // Only refresh CPU and memory, not ALL system info
+            self.system.refresh_cpu_all();
+            self.system.refresh_memory();
+            self.last_system_refresh = std::time::Instant::now();
+        }
+
+        // Refresh component status cache if needed (every 10 seconds)
+        if self.component_cache.needs_refresh() {
+            self.component_cache.refresh();
+        }
+
+        // Refresh bot list cache if needed (every 5 seconds)
+        if self.bot_cache.needs_refresh() {
+            self.bot_cache.refresh(&self.app_state);
+        }
 
-        let _tokens = (std::time::SystemTime::now()
-            .duration_since(std::time::UNIX_EPOCH)
-            .expect("system time after UNIX epoch")
-            .as_secs()
-            % 1000) as usize;
-        #[cfg(feature = "nvidia")]
-        let _system_metrics = nvidia::get_system_metrics().unwrap_or_default();
         self.cached_content = self.render(None);
         self.last_update = std::time::Instant::now();
         Ok(())
@@ -60,10 +168,11 @@ impl StatusPanel {
             String::new(),
         ];
 
-        self.system.refresh_cpu_all();
+        // Use cached CPU usage - don't refresh here
         let cpu_usage = self.system.global_cpu_usage();
         let cpu_bar = Self::create_progress_bar(cpu_usage, 20);
         lines.push(format!(" CPU: {:5.1}% {}", cpu_usage, cpu_bar));
+
         #[cfg(feature = "nvidia")]
         {
             let system_metrics = get_system_metrics().unwrap_or_default();
@@ -81,7 +190,11 @@ impl StatusPanel {
 
         let total_mem = self.system.total_memory() as f32 / 1024.0 / 1024.0 / 1024.0;
         let used_mem = self.system.used_memory() as f32 / 1024.0 / 1024.0 / 1024.0;
-        let mem_percentage = (used_mem / total_mem) * 100.0;
+        let mem_percentage = if total_mem > 0.0 {
+            (used_mem / total_mem) * 100.0
+        } else {
+            0.0
+        };
         let mem_bar = Self::create_progress_bar(mem_percentage, 20);
         lines.push(format!(
             " MEM: {:5.1}% {} ({:.1}/{:.1} GB)",
@@ -94,15 +207,9 @@ impl StatusPanel {
         lines.push("╚═══════════════════════════════════════╝".to_string());
         lines.push("".to_string());
 
-        let components = vec![
-            ("Tables", "postgres", "5432"),
-            ("Cache", "valkey-server", "6379"),
-            ("Drive", "minio", "9000"),
-            ("LLM", "llama-server", "8081"),
-        ];
-
-        for (comp_name, process, port) in components {
-            let status = if Self::check_component_running(process) {
+        // Use cached component statuses instead of spawning pgrep every time
+        for (comp_name, is_running, port) in self.component_cache.get_statuses() {
+            let status = if *is_running {
                 format!(" ONLINE  [Port: {}]", port)
             } else {
                 " OFFLINE".to_string()
@@ -116,58 +223,44 @@ impl StatusPanel {
         lines.push("╚═══════════════════════════════════════╝".to_string());
         lines.push("".to_string());
 
-        if let Ok(mut conn) = self.app_state.conn.get() {
-            match bots
-                .filter(is_active.eq(true))
-                .select((name, id))
-                .load::<(String, uuid::Uuid)>(&mut *conn)
-            {
-                Ok(bot_list) => {
-                    if bot_list.is_empty() {
-                        lines.push(" No active bots".to_string());
+        // Use cached bot list instead of querying database every time
+        let bot_list = self.bot_cache.get_bots();
+        if bot_list.is_empty() {
+            lines.push(" No active bots".to_string());
+        } else {
+            for (bot_name, bot_id) in bot_list {
+                let marker = if let Some(ref selected) = selected_bot {
+                    if selected == bot_name {
+                        "►"
                     } else {
-                        for (bot_name, bot_id) in bot_list {
-                            let marker = if let Some(ref selected) = selected_bot {
-                                if selected == &bot_name {
-                                    "►"
-                                } else {
-                                    " "
-                                }
-                            } else {
-                                " "
-                            };
-                            lines.push(format!(" {}  {}", marker, bot_name));
+                        " "
+                    }
+                } else {
+                    " "
+                };
+                lines.push(format!(" {}  {}", marker, bot_name));
 
-                            if let Some(ref selected) = selected_bot {
-                                if selected == &bot_name {
-                                    lines.push("".to_string());
-                                    lines.push(" ┌─ Bot Configuration ─────────┐".to_string());
-                                    let config_manager =
-                                        ConfigManager::new(self.app_state.conn.clone());
-                                    let llm_model = config_manager
-                                        .get_config(&bot_id, "llm-model", None)
-                                        .unwrap_or_else(|_| "N/A".to_string());
-                                    lines.push(format!("  Model: {}", llm_model));
-                                    let ctx_size = config_manager
-                                        .get_config(&bot_id, "llm-server-ctx-size", None)
-                                        .unwrap_or_else(|_| "N/A".to_string());
-                                    lines.push(format!("  Context: {}", ctx_size));
-                                    let temp = config_manager
-                                        .get_config(&bot_id, "llm-temperature", None)
-                                        .unwrap_or_else(|_| "N/A".to_string());
-                                    lines.push(format!("  Temp: {}", temp));
-                                    lines.push(" └─────────────────────────────┘".to_string());
-                                }
-                            }
-                        }
+                if let Some(ref selected) = selected_bot {
+                    if selected == bot_name {
+                        lines.push("".to_string());
+                        lines.push(" ┌─ Bot Configuration ─────────┐".to_string());
+                        let config_manager = ConfigManager::new(self.app_state.conn.clone());
+                        let llm_model = config_manager
+                            .get_config(bot_id, "llm-model", None)
+                            .unwrap_or_else(|_| "N/A".to_string());
+                        lines.push(format!("  Model: {}", llm_model));
+                        let ctx_size = config_manager
+                            .get_config(bot_id, "llm-server-ctx-size", None)
+                            .unwrap_or_else(|_| "N/A".to_string());
+                        lines.push(format!("  Context: {}", ctx_size));
+                        let temp = config_manager
+                            .get_config(bot_id, "llm-temperature", None)
+                            .unwrap_or_else(|_| "N/A".to_string());
+                        lines.push(format!("  Temp: {}", temp));
+                        lines.push(" └─────────────────────────────┘".to_string());
                     }
                 }
-                Err(_) => {
-                    lines.push(" Error loading bots".to_string());
-                }
             }
-        } else {
-            lines.push(" Database locked".to_string());
         }
 
         lines.push("".to_string());
@@ -195,11 +288,6 @@ impl StatusPanel {
     }
 
     pub fn check_component_running(process_name: &str) -> bool {
-        std::process::Command::new("pgrep")
-            .arg("-f")
-            .arg(process_name)
-            .output()
-            .map(|output| !output.stdout.is_empty())
-            .unwrap_or(false)
+        ComponentStatusCache::check_component_running(process_name)
     }
 }
diff --git a/src/core/automation/mod.rs b/src/core/automation/mod.rs
index c4426f08f..2ca0bd09d 100644
--- a/src/core/automation/mod.rs
+++ b/src/core/automation/mod.rs
@@ -19,11 +19,12 @@ pub struct AutomationService {
 impl AutomationService {
     #[must_use]
     pub fn new(state: Arc<AppState>) -> Self {
-        crate::llm::episodic_memory::start_episodic_memory_scheduler(Arc::clone(&state));
+        // Temporarily disabled to debug CPU spike
+        // crate::llm::episodic_memory::start_episodic_memory_scheduler(Arc::clone(&state));
         Self { state }
     }
     pub async fn spawn(self) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
-        let mut ticker = interval(Duration::from_secs(5));
+        let mut ticker = interval(Duration::from_secs(60));
         loop {
             ticker.tick().await;
             if let Err(e) = self.check_scheduled_tasks().await {
@@ -31,7 +32,7 @@ impl AutomationService {
             }
         }
     }
-    async fn check_scheduled_tasks(&self) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
+    pub async fn check_scheduled_tasks(&self) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
         use crate::shared::models::system_automations::dsl::{
             id, is_active, kind, last_triggered as lt_column, system_automations,
         };
diff --git a/src/core/bootstrap/mod.rs b/src/core/bootstrap/mod.rs
index 7bd6bf9e3..9267a5e0e 100644
--- a/src/core/bootstrap/mod.rs
+++ b/src/core/bootstrap/mod.rs
@@ -185,7 +185,7 @@ impl BootstrapManager {
         if pm.is_installed("vault") {
             let vault_already_running = Command::new("sh")
                 .arg("-c")
-                .arg("curl -f -s 'http://localhost:8200/v1/sys/health?standbyok=true&uninitcode=200&sealedcode=200' >/dev/null 2>&1")
+                .arg("curl -f -sk 'https://localhost:8200/v1/sys/health?standbyok=true&uninitcode=200&sealedcode=200' >/dev/null 2>&1")
                 .stdout(std::process::Stdio::null())
                 .stderr(std::process::Stdio::null())
                 .status()
@@ -208,7 +208,7 @@ impl BootstrapManager {
                 for i in 0..10 {
                     let vault_ready = Command::new("sh")
                         .arg("-c")
-                        .arg("curl -f -s 'http://localhost:8200/v1/sys/health?standbyok=true&uninitcode=200&sealedcode=200' >/dev/null 2>&1")
+                        .arg("curl -f -sk 'https://localhost:8200/v1/sys/health?standbyok=true&uninitcode=200&sealedcode=200' >/dev/null 2>&1")
                         .stdout(std::process::Stdio::null())
                         .stderr(std::process::Stdio::null())
                         .status()
@@ -285,8 +285,28 @@ impl BootstrapManager {
             info!("Starting PostgreSQL database...");
             match pm.start("tables") {
                 Ok(_child) => {
-                    tokio::time::sleep(tokio::time::Duration::from_secs(2)).await;
-                    info!("PostgreSQL started");
+                    let pg_isready = self.stack_dir("bin/tables/bin/pg_isready");
+                    let mut ready = false;
+                    for attempt in 1..=30 {
+                        tokio::time::sleep(tokio::time::Duration::from_secs(1)).await;
+                        let status = std::process::Command::new(&pg_isready)
+                            .args(["-h", "localhost", "-p", "5432", "-U", "gbuser"])
+                            .stdout(std::process::Stdio::null())
+                            .stderr(std::process::Stdio::null())
+                            .status();
+                        if status.map(|s| s.success()).unwrap_or(false) {
+                            ready = true;
+                            info!("PostgreSQL started and ready (attempt {})", attempt);
+                            break;
+                        }
+                        if attempt % 5 == 0 {
+                            info!("Waiting for PostgreSQL to be ready... (attempt {}/30)", attempt);
+                        }
+                    }
+                    if !ready {
+                        error!("PostgreSQL failed to become ready after 30 seconds");
+                        return Err(anyhow::anyhow!("PostgreSQL failed to start properly"));
+                    }
                 }
                 Err(e) => {
                     warn!("PostgreSQL might already be running: {}", e);
@@ -321,7 +341,7 @@ impl BootstrapManager {
                             for i in 0..15 {
                                 let drive_ready = Command::new("sh")
                                     .arg("-c")
-                                    .arg("curl -sfk 'https://127.0.0.1:9000/minio/health/live' >/dev/null 2>&1")
+                                    .arg("curl -sf --cacert ./botserver-stack/conf/drive/certs/CAs/ca.crt 'https://127.0.0.1:9000/minio/health/live' >/dev/null 2>&1")
                                     .stdout(std::process::Stdio::null())
                                     .stderr(std::process::Stdio::null())
                                     .status()
@@ -393,7 +413,7 @@ impl BootstrapManager {
         if installer.is_installed("vault") {
             let vault_running = Command::new("sh")
                 .arg("-c")
-                .arg("curl -f -s 'http://localhost:8200/v1/sys/health?standbyok=true&uninitcode=200&sealedcode=200' >/dev/null 2>&1")
+                .arg("curl -f -sk 'https://localhost:8200/v1/sys/health?standbyok=true&uninitcode=200&sealedcode=200' >/dev/null 2>&1")
                 .stdout(std::process::Stdio::null())
                 .stderr(std::process::Stdio::null())
                 .status()
@@ -539,7 +559,7 @@ impl BootstrapManager {
 
     async fn ensure_vault_unsealed(&self) -> Result<()> {
         let vault_init_path = self.stack_dir("conf/vault/init.json");
-        let vault_addr = "http://localhost:8200";
+        let vault_addr = "https://localhost:8200";
 
         if !vault_init_path.exists() {
             return Err(anyhow::anyhow!(
@@ -685,7 +705,7 @@ impl BootstrapManager {
 
         std::env::set_var("VAULT_ADDR", vault_addr);
         std::env::set_var("VAULT_TOKEN", &root_token);
-        std::env::set_var("VAULT_SKIP_VERIFY", "true");
+        std::env::set_var("VAULT_CACERT", "./botserver-stack/conf/system/certificates/ca/ca.crt");
 
         std::env::set_var(
             "VAULT_CACERT",
@@ -1398,7 +1418,7 @@ meet        IN      A       127.0.0.1
             }
 
             let health_check = std::process::Command::new("curl")
-                .args(["-f", "-s", "http://localhost:8200/v1/sys/health?standbyok=true&uninitcode=200&sealedcode=200"])
+                .args(["-f", "-sk", "https://localhost:8200/v1/sys/health?standbyok=true&uninitcode=200&sealedcode=200"])
                 .output();
 
             if let Ok(output) = health_check {
@@ -1446,9 +1466,10 @@ meet        IN      A       127.0.0.1
             ));
         }
 
-        let vault_addr = "http://localhost:8200";
+        let vault_addr = "https://localhost:8200";
+        let ca_cert_path = "./botserver-stack/conf/system/certificates/ca/ca.crt";
         std::env::set_var("VAULT_ADDR", vault_addr);
-        std::env::set_var("VAULT_SKIP_VERIFY", "true");
+        std::env::set_var("VAULT_CACERT", ca_cert_path);
 
         let (unseal_key, root_token) = if vault_init_path.exists() {
             info!("Reading Vault initialization from init.json...");
@@ -1485,8 +1506,8 @@ meet        IN      A       127.0.0.1
             let init_output = std::process::Command::new("sh")
                 .arg("-c")
                 .arg(format!(
-                    "unset VAULT_CLIENT_CERT VAULT_CLIENT_KEY VAULT_CACERT; VAULT_ADDR={} {} operator init -key-shares=1 -key-threshold=1 -format=json",
-                    vault_addr, vault_bin
+                    "VAULT_ADDR={} VAULT_CACERT={} {} operator init -key-shares=1 -key-threshold=1 -format=json",
+                    vault_addr, ca_cert_path, vault_bin
                 ))
                 .output()?;
 
@@ -1501,8 +1522,8 @@ meet        IN      A       127.0.0.1
                         let status_check = std::process::Command::new("sh")
                             .arg("-c")
                             .arg(format!(
-                                "unset VAULT_CLIENT_CERT VAULT_CLIENT_KEY VAULT_CACERT; VAULT_ADDR={} {} status -format=json 2>/dev/null",
-                                vault_addr, vault_bin
+                                "VAULT_ADDR={} VAULT_CACERT={} {} status -format=json 2>/dev/null",
+                                vault_addr, ca_cert_path, vault_bin
                             ))
                             .output();
 
@@ -1568,8 +1589,8 @@ meet        IN      A       127.0.0.1
         let unseal_output = std::process::Command::new("sh")
             .arg("-c")
             .arg(format!(
-                "unset VAULT_CLIENT_CERT VAULT_CLIENT_KEY VAULT_CACERT; VAULT_ADDR={} {} operator unseal {}",
-                vault_addr, vault_bin, unseal_key
+                "VAULT_ADDR={} VAULT_CACERT={} {} operator unseal {}",
+                vault_addr, ca_cert_path, vault_bin, unseal_key
             ))
             .output()?;
 
@@ -1598,9 +1619,7 @@ meet        IN      A       127.0.0.1
 # Vault Configuration - THESE ARE THE ONLY ALLOWED ENV VARS
 VAULT_ADDR={}
 VAULT_TOKEN={}
-
-# Vault uses HTTP for local development (TLS disabled in config.hcl)
-# In production, enable TLS and set VAULT_CACERT, VAULT_CLIENT_CERT, VAULT_CLIENT_KEY
+VAULT_CACERT=./botserver-stack/conf/system/certificates/ca/ca.crt
 
 # Cache TTL for secrets (seconds)
 VAULT_CACHE_TTL=300
@@ -1617,23 +1636,25 @@ VAULT_CACHE_TTL=300
         }
 
         info!("Enabling KV secrets engine...");
+        let ca_cert_path = "./botserver-stack/conf/system/certificates/ca/ca.crt";
         let _ = std::process::Command::new("sh")
             .arg("-c")
             .arg(format!(
-                "unset VAULT_CLIENT_CERT VAULT_CLIENT_KEY VAULT_CACERT; VAULT_ADDR={} VAULT_TOKEN={} {} secrets enable -path=secret kv-v2 2>&1 || true",
-                vault_addr, root_token, vault_bin
+                "VAULT_ADDR={} VAULT_TOKEN={} VAULT_CACERT={} {} secrets enable -path=secret kv-v2 2>&1 || true",
+                vault_addr, root_token, ca_cert_path, vault_bin
             ))
             .output();
 
         info!("Storing secrets in Vault (only if not existing)...");
 
         let vault_bin_clone = vault_bin.clone();
+        let ca_cert_clone = ca_cert_path.to_string();
         let secret_exists = |path: &str| -> bool {
             let output = std::process::Command::new("sh")
                 .arg("-c")
                 .arg(format!(
-                    "unset VAULT_CLIENT_CERT VAULT_CLIENT_KEY VAULT_CACERT; VAULT_ADDR={} VAULT_TOKEN={} {} kv get {} 2>/dev/null",
-                    vault_addr, root_token, vault_bin_clone, path
+                    "VAULT_ADDR={} VAULT_TOKEN={} VAULT_CACERT={} {} kv get {} 2>/dev/null",
+                    vault_addr, root_token, ca_cert_clone, vault_bin_clone, path
                 ))
                 .output();
             output.map(|o| o.status.success()).unwrap_or(false)
@@ -1645,8 +1666,8 @@ VAULT_CACHE_TTL=300
             let _ = std::process::Command::new("sh")
                 .arg("-c")
                 .arg(format!(
-                    "unset VAULT_CLIENT_CERT VAULT_CLIENT_KEY VAULT_CACERT; VAULT_ADDR={} VAULT_TOKEN={} {} kv put secret/gbo/tables host=localhost port=5432 database=botserver username=gbuser password='{}'",
-                    vault_addr, root_token, vault_bin, db_password
+                    "VAULT_ADDR={} VAULT_TOKEN={} VAULT_CACERT={} {} kv put secret/gbo/tables host=localhost port=5432 database=botserver username=gbuser password='{}'",
+                    vault_addr, root_token, ca_cert_path, vault_bin, db_password
                 ))
                 .output()?;
             info!("  Stored database credentials");
@@ -1658,8 +1679,8 @@ VAULT_CACHE_TTL=300
             let _ = std::process::Command::new("sh")
                 .arg("-c")
                 .arg(format!(
-                    "unset VAULT_CLIENT_CERT VAULT_CLIENT_KEY VAULT_CACERT; VAULT_ADDR={} VAULT_TOKEN={} {} kv put secret/gbo/drive accesskey='{}' secret='{}'",
-                    vault_addr, root_token, vault_bin, drive_accesskey, drive_secret
+                    "VAULT_ADDR={} VAULT_TOKEN={} VAULT_CACERT={} {} kv put secret/gbo/drive accesskey='{}' secret='{}'",
+                    vault_addr, root_token, ca_cert_path, vault_bin, drive_accesskey, drive_secret
                 ))
                 .output()?;
             info!("  Stored drive credentials");
@@ -1671,8 +1692,8 @@ VAULT_CACHE_TTL=300
             let _ = std::process::Command::new("sh")
                 .arg("-c")
                 .arg(format!(
-                    "unset VAULT_CLIENT_CERT VAULT_CLIENT_KEY VAULT_CACERT; VAULT_ADDR={} VAULT_TOKEN={} {} kv put secret/gbo/cache password='{}'",
-                    vault_addr, root_token, vault_bin, cache_password
+                    "VAULT_ADDR={} VAULT_TOKEN={} VAULT_CACERT={} {} kv put secret/gbo/cache password='{}'",
+                    vault_addr, root_token, ca_cert_path, vault_bin, cache_password
                 ))
                 .output()?;
             info!("  Stored cache credentials");
@@ -1690,8 +1711,8 @@ VAULT_CACHE_TTL=300
             let _ = std::process::Command::new("sh")
                 .arg("-c")
                 .arg(format!(
-                    "unset VAULT_CLIENT_CERT VAULT_CLIENT_KEY VAULT_CACERT; VAULT_ADDR={} VAULT_TOKEN={} {} kv put secret/gbo/directory url=https://localhost:8300 project_id= client_id= client_secret= masterkey={}",
-                    vault_addr, root_token, vault_bin, masterkey
+                    "VAULT_ADDR={} VAULT_TOKEN={} VAULT_CACERT={} {} kv put secret/gbo/directory url=https://localhost:8300 project_id= client_id= client_secret= masterkey={}",
+                    vault_addr, root_token, ca_cert_path, vault_bin, masterkey
                 ))
                 .output()?;
             info!("  Created directory placeholder with masterkey");
@@ -1703,8 +1724,8 @@ VAULT_CACHE_TTL=300
             let _ = std::process::Command::new("sh")
                 .arg("-c")
                 .arg(format!(
-                    "unset VAULT_CLIENT_CERT VAULT_CLIENT_KEY VAULT_CACERT; VAULT_ADDR={} VAULT_TOKEN={} {} kv put secret/gbo/llm openai_key= anthropic_key= groq_key=",
-                    vault_addr, root_token, vault_bin
+                    "VAULT_ADDR={} VAULT_TOKEN={} VAULT_CACERT={} {} kv put secret/gbo/llm openai_key= anthropic_key= groq_key=",
+                    vault_addr, root_token, ca_cert_path, vault_bin
                 ))
                 .output()?;
             info!("  Created LLM placeholder");
@@ -1716,8 +1737,8 @@ VAULT_CACHE_TTL=300
             let _ = std::process::Command::new("sh")
                 .arg("-c")
                 .arg(format!(
-                    "unset VAULT_CLIENT_CERT VAULT_CLIENT_KEY VAULT_CACERT; VAULT_ADDR={} VAULT_TOKEN={} {} kv put secret/gbo/email username= password=",
-                    vault_addr, root_token, vault_bin
+                    "VAULT_ADDR={} VAULT_TOKEN={} VAULT_CACERT={} {} kv put secret/gbo/email username= password=",
+                    vault_addr, root_token, ca_cert_path, vault_bin
                 ))
                 .output()?;
             info!("  Created email placeholder");
@@ -1730,8 +1751,8 @@ VAULT_CACHE_TTL=300
             let _ = std::process::Command::new("sh")
                 .arg("-c")
                 .arg(format!(
-                    "unset VAULT_CLIENT_CERT VAULT_CLIENT_KEY VAULT_CACERT; VAULT_ADDR={} VAULT_TOKEN={} {} kv put secret/gbo/encryption master_key='{}'",
-                    vault_addr, root_token, vault_bin, encryption_key
+                    "VAULT_ADDR={} VAULT_TOKEN={} VAULT_CACERT={} {} kv put secret/gbo/encryption master_key='{}'",
+                    vault_addr, root_token, ca_cert_path, vault_bin, encryption_key
                 ))
                 .output()?;
             info!("  Generated and stored encryption key");
@@ -1779,6 +1800,8 @@ VAULT_CACHE_TTL=300
             format!("{}/", config.drive.server)
         };
 
+        info!("[S3_CLIENT] Creating S3 client with endpoint: {}", endpoint);
+
         let (access_key, secret_key) =
             if config.drive.access_key.is_empty() || config.drive.secret_key.is_empty() {
                 match crate::shared::utils::get_secrets_manager().await {
@@ -1806,19 +1829,30 @@ VAULT_CACHE_TTL=300
                 )
             };
 
-        // Set CA cert for self-signed TLS (dev stack)
         let ca_cert_path = "./botserver-stack/conf/system/certificates/ca/ca.crt";
         if std::path::Path::new(ca_cert_path).exists() {
             std::env::set_var("AWS_CA_BUNDLE", ca_cert_path);
             std::env::set_var("SSL_CERT_FILE", ca_cert_path);
         }
 
+        let timeout_config = aws_config::timeout::TimeoutConfig::builder()
+            .connect_timeout(std::time::Duration::from_secs(5))
+            .read_timeout(std::time::Duration::from_secs(30))
+            .operation_timeout(std::time::Duration::from_secs(30))
+            .operation_attempt_timeout(std::time::Duration::from_secs(15))
+            .build();
+
+        let retry_config = aws_config::retry::RetryConfig::standard()
+            .with_max_attempts(2);
+
         let base_config = aws_config::defaults(BehaviorVersion::latest())
             .endpoint_url(endpoint)
             .region("auto")
             .credentials_provider(aws_sdk_s3::config::Credentials::new(
                 access_key, secret_key, None, None, "static",
             ))
+            .timeout_config(timeout_config)
+            .retry_config(retry_config)
             .load()
             .await;
         let s3_config = aws_sdk_s3::config::Builder::from(&base_config)
@@ -1866,14 +1900,16 @@ VAULT_CACHE_TTL=300
             {
                 let bot_name = path.file_name().map(|n| n.to_string_lossy().to_string()).unwrap_or_default();
                 let bucket = bot_name.trim_start_matches('/').to_string();
-                // Create bucket if it doesn't exist
-                if client.head_bucket().bucket(&bucket).send().await.is_err() {
-                    if let Err(e) = client.create_bucket().bucket(&bucket).send().await {
-                        warn!("S3/MinIO not available, skipping bucket {}: {}", bucket, e);
-                        continue;
-                    }
+                let bucket_exists = client.head_bucket().bucket(&bucket).send().await.is_ok();
+                if bucket_exists {
+                    info!("Bucket {} already exists, skipping template upload (user content preserved)", bucket);
+                    continue;
                 }
-                // Always sync templates to bucket
+                if let Err(e) = client.create_bucket().bucket(&bucket).send().await {
+                    warn!("S3/MinIO not available, skipping bucket {}: {:?}", bucket, e);
+                    continue;
+                }
+                info!("Created new bucket {}, uploading templates...", bucket);
                 if let Err(e) = Self::upload_directory_recursive(&client, &path, &bucket, "/").await {
                     warn!("Failed to upload templates to bucket {}: {}", bucket, e);
                 }
@@ -2091,16 +2127,18 @@ storage "file" {
   path = "../../data/vault"
 }
 
-# Listener with TLS DISABLED for local development
-# In production, enable TLS with proper certificates
+# Listener with TLS enabled
 listener "tcp" {
-  address     = "0.0.0.0:8200"
-  tls_disable = true
+  address       = "0.0.0.0:8200"
+  tls_disable   = false
+  tls_cert_file = "../../conf/system/certificates/vault/server.crt"
+  tls_key_file  = "../../conf/system/certificates/vault/server.key"
+  tls_client_ca_file = "../../conf/system/certificates/ca/ca.crt"
 }
 
-# API settings - use HTTP for local dev
-api_addr = "http://localhost:8200"
-cluster_addr = "http://localhost:8201"
+# API settings - use HTTPS
+api_addr = "https://localhost:8200"
+cluster_addr = "https://localhost:8201"
 
 # UI enabled for administration
 ui = true
@@ -2122,7 +2160,7 @@ log_level = "info"
         fs::create_dir_all(self.stack_dir("data/vault"))?;
 
         info!(
-            "Created Vault config with mTLS at {}",
+            "Created Vault config with TLS at {}",
             config_path.display()
         );
         Ok(())
@@ -2293,9 +2331,15 @@ log_level = "info"
             params.distinguished_name = dn;
 
             for san in sans {
-                params
-                    .subject_alt_names
-                    .push(rcgen::SanType::DnsName(san.to_string().try_into()?));
+                if let Ok(ip) = san.parse::<std::net::IpAddr>() {
+                    params
+                        .subject_alt_names
+                        .push(rcgen::SanType::IpAddress(ip));
+                } else {
+                    params
+                        .subject_alt_names
+                        .push(rcgen::SanType::DnsName(san.to_string().try_into()?));
+                }
             }
 
             let key_pair = KeyPair::generate()?;
@@ -2311,7 +2355,27 @@ log_level = "info"
         fs::create_dir_all(&minio_certs_dir)?;
         let drive_cert_dir = cert_dir.join("drive");
         fs::copy(drive_cert_dir.join("server.crt"), minio_certs_dir.join("public.crt"))?;
-        fs::copy(drive_cert_dir.join("server.key"), minio_certs_dir.join("private.key"))?;
+
+        let drive_key_src = drive_cert_dir.join("server.key");
+        let drive_key_dst = minio_certs_dir.join("private.key");
+
+        let conversion_result = std::process::Command::new("openssl")
+            .args(["ec", "-in"])
+            .arg(&drive_key_src)
+            .args(["-out"])
+            .arg(&drive_key_dst)
+            .output();
+
+        match conversion_result {
+            Ok(output) if output.status.success() => {
+                debug!("Converted drive private key to SEC1 format for MinIO");
+            }
+            _ => {
+                warn!("Could not convert drive key to SEC1 format (openssl not available?), copying as-is");
+                fs::copy(&drive_key_src, &drive_key_dst)?;
+            }
+        }
+
         let minio_ca_dir = minio_certs_dir.join("CAs");
         fs::create_dir_all(&minio_ca_dir)?;
         fs::copy(&ca_cert_path, minio_ca_dir.join("ca.crt"))?;
diff --git a/src/core/kb/document_processor.rs b/src/core/kb/document_processor.rs
index 8158e6611..db13e4ca3 100644
--- a/src/core/kb/document_processor.rs
+++ b/src/core/kb/document_processor.rs
@@ -1,5 +1,5 @@
 use anyhow::Result;
-use log::{error, info, warn};
+use log::{debug, error, info, warn};
 use serde::{Deserialize, Serialize};
 use std::collections::HashMap;
 use std::path::Path;
@@ -118,6 +118,14 @@ impl DocumentProcessor {
         let metadata = tokio::fs::metadata(file_path).await?;
         let file_size = metadata.len() as usize;
 
+        if file_size == 0 {
+            debug!(
+                "Skipping empty file (0 bytes): {}",
+                file_path.display()
+            );
+            return Ok(Vec::new());
+        }
+
         let format = DocumentFormat::from_extension(file_path)
             .ok_or_else(|| anyhow::anyhow!("Unsupported file format: {}", file_path.display()))?;
 
diff --git a/src/core/kb/embedding_generator.rs b/src/core/kb/embedding_generator.rs
index dc060c53d..2dacc9986 100644
--- a/src/core/kb/embedding_generator.rs
+++ b/src/core/kb/embedding_generator.rs
@@ -1,24 +1,37 @@
 use anyhow::{Context, Result};
-use log::{debug, info, warn};
+use log::{info, trace, warn};
 use reqwest::Client;
 use serde::{Deserialize, Serialize};
-
+use std::sync::atomic::{AtomicBool, Ordering};
 use std::sync::Arc;
+use std::time::Duration;
 use tokio::sync::Semaphore;
 
+use crate::core::shared::memory_monitor::{log_jemalloc_stats, MemoryStats};
 use super::document_processor::TextChunk;
 
+static EMBEDDING_SERVER_READY: AtomicBool = AtomicBool::new(false);
+
+pub fn is_embedding_server_ready() -> bool {
+    EMBEDDING_SERVER_READY.load(Ordering::SeqCst)
+}
+
+pub fn set_embedding_server_ready(ready: bool) {
+    EMBEDDING_SERVER_READY.store(ready, Ordering::SeqCst);
+    if ready {
+        info!("[EMBEDDING] Embedding server marked as ready");
+    }
+}
+
 #[derive(Debug, Clone)]
 pub struct EmbeddingConfig {
     pub embedding_url: String,
-
     pub embedding_model: String,
-
     pub dimensions: usize,
-
     pub batch_size: usize,
-
     pub timeout_seconds: u64,
+    pub max_concurrent_requests: usize,
+    pub connect_timeout_seconds: u64,
 }
 
 impl Default for EmbeddingConfig {
@@ -27,8 +40,10 @@ impl Default for EmbeddingConfig {
             embedding_url: "http://localhost:8082".to_string(),
             embedding_model: "bge-small-en-v1.5".to_string(),
             dimensions: 384,
-            batch_size: 32,
-            timeout_seconds: 30,
+            batch_size: 16,
+            timeout_seconds: 60,
+            max_concurrent_requests: 2,
+            connect_timeout_seconds: 10,
         }
     }
 }
@@ -36,17 +51,17 @@ impl Default for EmbeddingConfig {
 impl EmbeddingConfig {
     pub fn from_env() -> Self {
         let embedding_url = "http://localhost:8082".to_string();
-
         let embedding_model = "bge-small-en-v1.5".to_string();
-
         let dimensions = Self::detect_dimensions(&embedding_model);
 
         Self {
             embedding_url,
             embedding_model,
             dimensions,
-            batch_size: 32,
-            timeout_seconds: 30,
+            batch_size: 16,
+            timeout_seconds: 60,
+            max_concurrent_requests: 2,
+            connect_timeout_seconds: 10,
         }
     }
 
@@ -79,12 +94,17 @@ struct EmbeddingResponse {
 #[derive(Debug, Deserialize)]
 struct EmbeddingData {
     embedding: Vec<f32>,
-    _index: usize,
+    #[serde(default)]
+    #[allow(dead_code)]
+    index: usize,
 }
 
 #[derive(Debug, Deserialize)]
 struct EmbeddingUsage {
-    _prompt_tokens: usize,
+    #[serde(default)]
+    #[allow(dead_code)]
+    prompt_tokens: usize,
+    #[serde(default)]
     total_tokens: usize,
 }
 
@@ -115,14 +135,19 @@ impl std::fmt::Debug for KbEmbeddingGenerator {
 impl KbEmbeddingGenerator {
     pub fn new(config: EmbeddingConfig) -> Self {
         let client = Client::builder()
-            .timeout(std::time::Duration::from_secs(config.timeout_seconds))
+            .timeout(Duration::from_secs(config.timeout_seconds))
+            .connect_timeout(Duration::from_secs(config.connect_timeout_seconds))
+            .pool_max_idle_per_host(2)
+            .pool_idle_timeout(Duration::from_secs(30))
+            .tcp_keepalive(Duration::from_secs(60))
+            .tcp_nodelay(true)
             .build()
             .unwrap_or_else(|e| {
-                log::warn!("Failed to create HTTP client with timeout: {}, using default", e);
+                warn!("Failed to create HTTP client with timeout: {}, using default", e);
                 Client::new()
             });
 
-        let semaphore = Arc::new(Semaphore::new(4));
+        let semaphore = Arc::new(Semaphore::new(config.max_concurrent_requests));
 
         Self {
             config,
@@ -131,6 +156,65 @@ impl KbEmbeddingGenerator {
         }
     }
 
+    pub async fn check_health(&self) -> bool {
+        let health_url = format!("{}/health", self.config.embedding_url);
+
+        match tokio::time::timeout(
+            Duration::from_secs(5),
+            self.client.get(&health_url).send()
+        ).await {
+            Ok(Ok(response)) => {
+                let is_healthy = response.status().is_success();
+                if is_healthy {
+                    set_embedding_server_ready(true);
+                }
+                is_healthy
+            }
+            Ok(Err(e)) => {
+                let alt_url = &self.config.embedding_url;
+                match tokio::time::timeout(
+                    Duration::from_secs(5),
+                    self.client.get(alt_url).send()
+                ).await {
+                    Ok(Ok(response)) => {
+                        let is_healthy = response.status().is_success();
+                        if is_healthy {
+                            set_embedding_server_ready(true);
+                        }
+                        is_healthy
+                    }
+                    _ => {
+                        warn!("[EMBEDDING] Health check failed: {}", e);
+                        false
+                    }
+                }
+            }
+            Err(_) => {
+                warn!("[EMBEDDING] Health check timed out");
+                false
+            }
+        }
+    }
+
+    pub async fn wait_for_server(&self, max_wait_secs: u64) -> bool {
+        let start = std::time::Instant::now();
+        let max_wait = Duration::from_secs(max_wait_secs);
+
+        info!("[EMBEDDING] Waiting for embedding server at {} (max {}s)...",
+              self.config.embedding_url, max_wait_secs);
+
+        while start.elapsed() < max_wait {
+            if self.check_health().await {
+                info!("[EMBEDDING] Embedding server is ready after {:?}", start.elapsed());
+                return true;
+            }
+            tokio::time::sleep(Duration::from_secs(2)).await;
+        }
+
+        warn!("[EMBEDDING] Embedding server not available after {}s", max_wait_secs);
+        false
+    }
+
     pub async fn generate_embeddings(
         &self,
         chunks: &[TextChunk],
@@ -139,35 +223,105 @@ impl KbEmbeddingGenerator {
             return Ok(Vec::new());
         }
 
-        info!("Generating embeddings for {} chunks", chunks.len());
+        if !is_embedding_server_ready() {
+            info!("[EMBEDDING] Server not marked ready, checking health...");
+            if !self.wait_for_server(30).await {
+                return Err(anyhow::anyhow!(
+                    "Embedding server not available at {}. Skipping embedding generation.",
+                    self.config.embedding_url
+                ));
+            }
+        }
 
-        let mut results = Vec::new();
+        let start_mem = MemoryStats::current();
+        trace!("[EMBEDDING] Generating embeddings for {} chunks, RSS={}",
+              chunks.len(), MemoryStats::format_bytes(start_mem.rss_bytes));
 
-        for batch in chunks.chunks(self.config.batch_size) {
-            let batch_embeddings = self.generate_batch_embeddings(batch).await?;
+        let mut results = Vec::with_capacity(chunks.len());
+        let total_batches = (chunks.len() + self.config.batch_size - 1) / self.config.batch_size;
+
+        for (batch_num, batch) in chunks.chunks(self.config.batch_size).enumerate() {
+            let batch_start = MemoryStats::current();
+            trace!("[EMBEDDING] Processing batch {}/{} ({} items), RSS={}",
+                  batch_num + 1,
+                  total_batches,
+                  batch.len(),
+                  MemoryStats::format_bytes(batch_start.rss_bytes));
+
+            let batch_embeddings = match tokio::time::timeout(
+                Duration::from_secs(self.config.timeout_seconds),
+                self.generate_batch_embeddings(batch)
+            ).await {
+                Ok(Ok(embeddings)) => embeddings,
+                Ok(Err(e)) => {
+                    warn!("[EMBEDDING] Batch {} failed: {}", batch_num + 1, e);
+                    break;
+                }
+                Err(_) => {
+                    warn!("[EMBEDDING] Batch {} timed out after {}s",
+                          batch_num + 1, self.config.timeout_seconds);
+                    break;
+                }
+            };
+
+            let batch_end = MemoryStats::current();
+            let delta = batch_end.rss_bytes.saturating_sub(batch_start.rss_bytes);
+            trace!("[EMBEDDING] Batch {} complete: {} embeddings, RSS={} (delta={})",
+                  batch_num + 1,
+                  batch_embeddings.len(),
+                  MemoryStats::format_bytes(batch_end.rss_bytes),
+                  MemoryStats::format_bytes(delta));
+
+            if delta > 100 * 1024 * 1024 {
+                warn!("[EMBEDDING] Excessive memory growth detected ({}), stopping early",
+                      MemoryStats::format_bytes(delta));
+                for (chunk, embedding) in batch.iter().zip(batch_embeddings.iter()) {
+                    results.push((chunk.clone(), embedding.clone()));
+                }
+                break;
+            }
 
             for (chunk, embedding) in batch.iter().zip(batch_embeddings.iter()) {
                 results.push((chunk.clone(), embedding.clone()));
             }
+
+            if batch_num + 1 < total_batches {
+                tokio::time::sleep(Duration::from_millis(100)).await;
+            }
         }
 
-        info!("Generated {} embeddings", results.len());
+        let end_mem = MemoryStats::current();
+        trace!("[EMBEDDING] Generated {} embeddings, RSS={} (total delta={})",
+              results.len(),
+              MemoryStats::format_bytes(end_mem.rss_bytes),
+              MemoryStats::format_bytes(end_mem.rss_bytes.saturating_sub(start_mem.rss_bytes)));
+        log_jemalloc_stats();
 
         Ok(results)
     }
 
     async fn generate_batch_embeddings(&self, chunks: &[TextChunk]) -> Result<Vec<Embedding>> {
-        let _permit = self.semaphore.acquire().await?;
+        let _permit = self.semaphore.acquire().await
+            .map_err(|e| anyhow::anyhow!("Failed to acquire semaphore: {}", e))?;
 
         let texts: Vec<String> = chunks.iter().map(|c| c.content.clone()).collect();
+        let total_chars: usize = texts.iter().map(|t| t.len()).sum();
 
-        debug!("Generating embeddings for batch of {} texts", texts.len());
+        info!("[EMBEDDING] generate_batch_embeddings: {} texts, {} total chars",
+              texts.len(), total_chars);
 
-        match self.generate_local_embeddings(&texts).await {
-            Ok(embeddings) => Ok(embeddings),
+        let truncated_texts: Vec<String> = texts.into_iter()
+            .map(|t| if t.len() > 8192 { t[..8192].to_string() } else { t })
+            .collect();
+
+        match self.generate_local_embeddings(&truncated_texts).await {
+            Ok(embeddings) => {
+                info!("[EMBEDDING] Local embeddings succeeded: {} vectors", embeddings.len());
+                Ok(embeddings)
+            }
             Err(e) => {
-                warn!("Local embedding service failed: {}, trying OpenAI API", e);
-                self.generate_openai_embeddings(&texts)
+                warn!("[EMBEDDING] Local embedding service failed: {}", e);
+                Err(e)
             }
         }
     }
@@ -178,6 +332,12 @@ impl KbEmbeddingGenerator {
             model: self.config.embedding_model.clone(),
         };
 
+        let request_size = serde_json::to_string(&request)
+            .map(|s| s.len())
+            .unwrap_or(0);
+        info!("[EMBEDDING] Sending request to {} (size: {} bytes)",
+              self.config.embedding_url, request_size);
+
         let response = self
             .client
             .post(format!("{}/embeddings", self.config.embedding_url))
@@ -186,9 +346,10 @@ impl KbEmbeddingGenerator {
             .await
             .context("Failed to send request to embedding service")?;
 
-        if !response.status().is_success() {
-            let status = response.status();
-            let error_text = response.text().await.unwrap_or_default();
+        let status = response.status();
+        if !status.is_success() {
+            let error_bytes = response.bytes().await.unwrap_or_default();
+            let error_text = String::from_utf8_lossy(&error_bytes[..error_bytes.len().min(1024)]);
             return Err(anyhow::anyhow!(
                 "Embedding service error {}: {}",
                 status,
@@ -196,12 +357,24 @@ impl KbEmbeddingGenerator {
             ));
         }
 
-        let embedding_response: EmbeddingResponse = response
-            .json()
-            .await
+        let response_bytes = response.bytes().await
+            .context("Failed to read embedding response bytes")?;
+
+        info!("[EMBEDDING] Received response: {} bytes", response_bytes.len());
+
+        if response_bytes.len() > 50 * 1024 * 1024 {
+            return Err(anyhow::anyhow!(
+                "Embedding response too large: {} bytes (max 50MB)",
+                response_bytes.len()
+            ));
+        }
+
+        let embedding_response: EmbeddingResponse = serde_json::from_slice(&response_bytes)
             .context("Failed to parse embedding response")?;
 
-        let mut embeddings = Vec::new();
+        drop(response_bytes);
+
+        let mut embeddings = Vec::with_capacity(embedding_response.data.len());
         for data in embedding_response.data {
             embeddings.push(Embedding {
                 vector: data.embedding,
@@ -214,14 +387,14 @@ impl KbEmbeddingGenerator {
         Ok(embeddings)
     }
 
-    fn generate_openai_embeddings(&self, _texts: &[String]) -> Result<Vec<Embedding>> {
-        let _ = self; // Suppress unused self warning
-        Err(anyhow::anyhow!(
-            "OpenAI embeddings not configured - use local embedding service"
-        ))
-    }
-
     pub async fn generate_single_embedding(&self, text: &str) -> Result<Embedding> {
+        if !is_embedding_server_ready() && !self.check_health().await {
+            return Err(anyhow::anyhow!(
+                "Embedding server not available at {}",
+                self.config.embedding_url
+            ));
+        }
+
         let embeddings = self
             .generate_batch_embeddings(&[TextChunk {
                 content: text.to_string(),
@@ -272,6 +445,11 @@ impl EmbeddingGenerator {
         let embedding = self.kb_generator.generate_single_embedding(text).await?;
         Ok(embedding.vector)
     }
+
+    /// Check if the embedding server is healthy
+    pub async fn check_health(&self) -> bool {
+        self.kb_generator.check_health().await
+    }
 }
 
 pub struct EmailEmbeddingGenerator {
diff --git a/src/core/kb/kb_indexer.rs b/src/core/kb/kb_indexer.rs
index 36450d37b..24d1d3cbb 100644
--- a/src/core/kb/kb_indexer.rs
+++ b/src/core/kb/kb_indexer.rs
@@ -1,15 +1,16 @@
 use anyhow::Result;
-use log::{debug, info, warn};
+use log::{debug, info, trace, warn};
 use serde::{Deserialize, Serialize};
 use std::collections::HashMap;
 use std::path::{Path, PathBuf};
 use uuid::Uuid;
 
 use crate::config::ConfigManager;
+use crate::core::shared::memory_monitor::{log_jemalloc_stats, MemoryStats};
 use crate::shared::utils::{create_tls_client, DbPool};
 
 use super::document_processor::{DocumentProcessor, TextChunk};
-use super::embedding_generator::{Embedding, EmbeddingConfig, KbEmbeddingGenerator};
+use super::embedding_generator::{is_embedding_server_ready, Embedding, EmbeddingConfig, KbEmbeddingGenerator};
 
 #[derive(Debug, Clone)]
 pub struct QdrantConfig {
@@ -94,7 +95,6 @@ impl KbIndexer {
         let document_processor = DocumentProcessor::default();
         let embedding_generator = KbEmbeddingGenerator::new(embedding_config);
 
-        // Use shared TLS client with local CA certificate
         let http_client = create_tls_client(Some(qdrant_config.timeout_secs));
 
         Self {
@@ -105,7 +105,6 @@ impl KbIndexer {
         }
     }
 
-    /// Check if Qdrant vector database is available
     pub async fn check_qdrant_health(&self) -> Result<bool> {
         let health_url = format!("{}/healthz", self.qdrant_config.url);
 
@@ -121,9 +120,24 @@ impl KbIndexer {
         kb_name: &str,
         kb_path: &Path,
     ) -> Result<IndexingResult> {
-        info!("Indexing KB folder: {} for bot {}", kb_name, bot_name);
+        let start_mem = MemoryStats::current();
+        info!("Indexing KB folder: {} for bot {} [START RSS={}]",
+              kb_name, bot_name, MemoryStats::format_bytes(start_mem.rss_bytes));
+        log_jemalloc_stats();
+
+        if !is_embedding_server_ready() {
+            info!("[KB_INDEXER] Embedding server not ready yet, waiting up to 60s...");
+            if !self.embedding_generator.wait_for_server(60).await {
+                warn!(
+                    "Embedding server is not available. KB indexing skipped. \
+                    Wait for the embedding server to start before indexing."
+                );
+                return Err(anyhow::anyhow!(
+                    "Embedding server not available. KB indexing deferred until embedding service is ready."
+                ));
+            }
+        }
 
-        // Check if Qdrant is available before proceeding
         if !self.check_qdrant_health().await.unwrap_or(false) {
             warn!(
                 "Qdrant vector database is not available at {}. KB indexing skipped. \
@@ -140,27 +154,48 @@ impl KbIndexer {
 
         self.ensure_collection_exists(&collection_name).await?;
 
+        let before_docs = MemoryStats::current();
+        trace!("[KB_INDEXER] Before process_kb_folder RSS={}",
+              MemoryStats::format_bytes(before_docs.rss_bytes));
+
         let documents = self.document_processor.process_kb_folder(kb_path).await?;
 
+        let after_docs = MemoryStats::current();
+        trace!("[KB_INDEXER] After process_kb_folder: {} documents, RSS={} (delta={})",
+              documents.len(),
+              MemoryStats::format_bytes(after_docs.rss_bytes),
+              MemoryStats::format_bytes(after_docs.rss_bytes.saturating_sub(before_docs.rss_bytes)));
+
         let mut total_chunks = 0;
         let mut indexed_documents = 0;
 
         for (doc_path, chunks) in documents {
             if chunks.is_empty() {
+                debug!("[KB_INDEXER] Skipping document with no chunks: {}", doc_path);
                 continue;
             }
 
-            info!(
-                "Processing document: {} ({} chunks)",
+            let before_embed = MemoryStats::current();
+            trace!(
+                "[KB_INDEXER] Processing document: {} ({} chunks) RSS={}",
                 doc_path,
-                chunks.len()
+                chunks.len(),
+                MemoryStats::format_bytes(before_embed.rss_bytes)
             );
 
+            trace!("[KB_INDEXER] Calling generate_embeddings for {} chunks...", chunks.len());
             let embeddings = self
                 .embedding_generator
                 .generate_embeddings(&chunks)
                 .await?;
 
+            let after_embed = MemoryStats::current();
+            trace!("[KB_INDEXER] After generate_embeddings: {} embeddings, RSS={} (delta={})",
+                  embeddings.len(),
+                  MemoryStats::format_bytes(after_embed.rss_bytes),
+                  MemoryStats::format_bytes(after_embed.rss_bytes.saturating_sub(before_embed.rss_bytes)));
+            log_jemalloc_stats();
+
             let points = Self::create_qdrant_points(&doc_path, embeddings)?;
 
             self.upsert_points(&collection_name, points).await?;
@@ -171,6 +206,13 @@ impl KbIndexer {
 
         self.update_collection_metadata(&collection_name, bot_name, kb_name, total_chunks)?;
 
+        let end_mem = MemoryStats::current();
+        trace!("[KB_INDEXER] Indexing complete: {} docs, {} chunks, RSS={} (total delta={})",
+              indexed_documents, total_chunks,
+              MemoryStats::format_bytes(end_mem.rss_bytes),
+              MemoryStats::format_bytes(end_mem.rss_bytes.saturating_sub(start_mem.rss_bytes)));
+        log_jemalloc_stats();
+
         Ok(IndexingResult {
             collection_name,
             documents_processed: indexed_documents,
diff --git a/src/core/package_manager/facade.rs b/src/core/package_manager/facade.rs
index 7ecd66c01..507d6b7f3 100644
--- a/src/core/package_manager/facade.rs
+++ b/src/core/package_manager/facade.rs
@@ -460,8 +460,8 @@ Store credentials in Vault:
             "drive" => {
                 format!(
                     r"MinIO Object Storage:
-  API:     http://{}:9000
-  Console: http://{}:9001
+  API:     https://{}:9000
+  Console: https://{}:9001
 
 Store credentials in Vault:
   botserver vault put gbo/drive server={} port=9000 accesskey=minioadmin secret=<your-secret>",
diff --git a/src/core/package_manager/installer.rs b/src/core/package_manager/installer.rs
index 07276b467..0067b19c9 100644
--- a/src/core/package_manager/installer.rs
+++ b/src/core/package_manager/installer.rs
@@ -252,7 +252,7 @@ impl PackageManager {
                 ]),
                 data_download_list: Vec::new(),
                 exec_cmd: "nohup {{BIN_PATH}}/minio server {{DATA_PATH}} --address :9000 --console-address :9001 --certs-dir {{CONF_PATH}}/drive/certs > {{LOGS_PATH}}/minio.log 2>&1 &".to_string(),
-                check_cmd: "curl -sfk https://127.0.0.1:9000/minio/health/live >/dev/null 2>&1".to_string(),
+                check_cmd: "curl -sf --cacert {{CONF_PATH}}/drive/certs/CAs/ca.crt https://127.0.0.1:9000/minio/health/live >/dev/null 2>&1".to_string(),
             },
         );
     }
@@ -891,12 +891,15 @@ storage "file" {
 }
 
 listener "tcp" {
-  address     = "0.0.0.0:8200"
-  tls_disable = 1
+  address       = "0.0.0.0:8200"
+  tls_disable   = false
+  tls_cert_file = "{{CONF_PATH}}/system/certificates/vault/server.crt"
+  tls_key_file  = "{{CONF_PATH}}/system/certificates/vault/server.key"
+  tls_client_ca_file = "{{CONF_PATH}}/system/certificates/ca/ca.crt"
 }
 
-api_addr = "http://0.0.0.0:8200"
-cluster_addr = "http://0.0.0.0:8201"
+api_addr = "https://localhost:8200"
+cluster_addr = "https://localhost:8201"
 ui = true
 disable_mlock = true
 EOF"#.to_string(),
@@ -914,12 +917,15 @@ storage "file" {
 }
 
 listener "tcp" {
-  address     = "0.0.0.0:8200"
-  tls_disable = 1
+  address       = "0.0.0.0:8200"
+  tls_disable   = false
+  tls_cert_file = "{{CONF_PATH}}/system/certificates/vault/server.crt"
+  tls_key_file  = "{{CONF_PATH}}/system/certificates/vault/server.key"
+  tls_client_ca_file = "{{CONF_PATH}}/system/certificates/ca/ca.crt"
 }
 
-api_addr = "http://0.0.0.0:8200"
-cluster_addr = "http://0.0.0.0:8201"
+api_addr = "https://localhost:8200"
+cluster_addr = "https://localhost:8201"
 ui = true
 disable_mlock = true
 EOF"#.to_string(),
@@ -933,13 +939,16 @@ EOF"#.to_string(),
                         "VAULT_ADDR".to_string(),
                         "https://localhost:8200".to_string(),
                     );
-                    env.insert("VAULT_SKIP_VERIFY".to_string(), "true".to_string());
+                    env.insert(
+                        "VAULT_CACERT".to_string(),
+                        "./botserver-stack/conf/system/certificates/ca/ca.crt".to_string(),
+                    );
                     env
                 },
                 data_download_list: Vec::new(),
                 exec_cmd: "nohup {{BIN_PATH}}/vault server -config={{CONF_PATH}}/vault/config.hcl > {{LOGS_PATH}}/vault.log 2>&1 &"
                     .to_string(),
-                check_cmd: "curl -f -s --connect-timeout 2 -m 5 'http://localhost:8200/v1/sys/health?standbyok=true&uninitcode=200&sealedcode=200' >/dev/null 2>&1"
+                check_cmd: "curl -f -sk --connect-timeout 2 -m 5 'https://localhost:8200/v1/sys/health?standbyok=true&uninitcode=200&sealedcode=200' >/dev/null 2>&1"
                     .to_string(),
             },
         );
@@ -1165,9 +1174,10 @@ EOF"#.to_string(),
         }
 
         // Check if Vault is reachable before trying to fetch credentials
+        // Use -k for self-signed certs in dev
         let vault_check = std::process::Command::new("sh")
             .arg("-c")
-            .arg(format!("curl -sf {}/v1/sys/health >/dev/null 2>&1", vault_addr))
+            .arg(format!("curl -sfk {}/v1/sys/health >/dev/null 2>&1", vault_addr))
             .status()
             .map(|s| s.success())
             .unwrap_or(false);
@@ -1187,10 +1197,14 @@ EOF"#.to_string(),
         let vault_bin = base_path.join("bin/vault/vault");
         let vault_bin_str = vault_bin.to_string_lossy();
 
+        // Get CA cert path for Vault TLS
+        let ca_cert_path = std::env::var("VAULT_CACERT")
+            .unwrap_or_else(|_| base_path.join("conf/system/certificates/ca/ca.crt").to_string_lossy().to_string());
+
         trace!("Fetching drive credentials from Vault at {} using {}", vault_addr, vault_bin_str);
         let drive_cmd = format!(
-            "unset VAULT_CLIENT_CERT VAULT_CLIENT_KEY VAULT_CACERT; VAULT_ADDR={} VAULT_TOKEN={} {} kv get -format=json secret/gbo/drive",
-            vault_addr, vault_token, vault_bin_str
+            "VAULT_ADDR={} VAULT_TOKEN={} VAULT_CACERT={} {} kv get -format=json secret/gbo/drive",
+            vault_addr, vault_token, ca_cert_path, vault_bin_str
         );
         match std::process::Command::new("sh")
             .arg("-c")
@@ -1229,8 +1243,8 @@ EOF"#.to_string(),
         if let Ok(output) = std::process::Command::new("sh")
             .arg("-c")
             .arg(format!(
-                "unset VAULT_CLIENT_CERT VAULT_CLIENT_KEY VAULT_CACERT; VAULT_ADDR={} VAULT_TOKEN={} {} kv get -format=json secret/gbo/cache 2>/dev/null",
-                vault_addr, vault_token, vault_bin_str
+                "VAULT_ADDR={} VAULT_TOKEN={} VAULT_CACERT={} {} kv get -format=json secret/gbo/cache 2>/dev/null",
+                vault_addr, vault_token, ca_cert_path, vault_bin_str
             ))
             .output()
         {
diff --git a/src/core/secrets/mod.rs b/src/core/secrets/mod.rs
index ca066efa5..b62235001 100644
--- a/src/core/secrets/mod.rs
+++ b/src/core/secrets/mod.rs
@@ -90,8 +90,12 @@ impl SecretsManager {
         let mut settings_builder = VaultClientSettingsBuilder::default();
         settings_builder.address(&addr).token(&token);
 
+        // Only warn about TLS verification for HTTPS connections
+        let is_https = addr.starts_with("https://");
         if skip_verify {
-            warn!("TLS verification disabled - NOT RECOMMENDED FOR PRODUCTION");
+            if is_https {
+                warn!("TLS verification disabled - NOT RECOMMENDED FOR PRODUCTION");
+            }
             settings_builder.verify(false);
         } else {
             settings_builder.verify(true);
diff --git a/src/core/shared/memory_monitor.rs b/src/core/shared/memory_monitor.rs
new file mode 100644
index 000000000..1f1da3f13
--- /dev/null
+++ b/src/core/shared/memory_monitor.rs
@@ -0,0 +1,506 @@
+//! Memory and CPU monitoring with thread tracking
+//!
+//! This module provides tools to track memory/CPU usage per thread
+//! and identify potential leaks or CPU hogs in the botserver application.
+//!
+//! When compiled with the `jemalloc` feature, provides detailed allocation statistics.
+
+use log::{debug, info, trace, warn};
+use std::collections::HashMap;
+use std::sync::{LazyLock, Mutex, RwLock};
+use std::time::{Duration, Instant};
+use sysinfo::{Pid, ProcessesToUpdate, System};
+
+static THREAD_REGISTRY: LazyLock<RwLock<HashMap<String, ThreadInfo>>> =
+    LazyLock::new(|| RwLock::new(HashMap::new()));
+
+static COMPONENT_TRACKER: LazyLock<ComponentMemoryTracker> =
+    LazyLock::new(|| ComponentMemoryTracker::new(60));
+
+#[derive(Debug, Clone)]
+pub struct ThreadInfo {
+    pub name: String,
+    pub started_at: Instant,
+    pub last_activity: Instant,
+    pub activity_count: u64,
+    pub component: String,
+}
+
+pub fn register_thread(name: &str, component: &str) {
+    let info = ThreadInfo {
+        name: name.to_string(),
+        started_at: Instant::now(),
+        last_activity: Instant::now(),
+        activity_count: 0,
+        component: component.to_string(),
+    };
+    if let Ok(mut registry) = THREAD_REGISTRY.write() {
+        registry.insert(name.to_string(), info);
+    }
+    trace!("[THREAD] Registered: {} (component: {})", name, component);
+}
+
+pub fn record_thread_activity(name: &str) {
+    if let Ok(mut registry) = THREAD_REGISTRY.write() {
+        if let Some(info) = registry.get_mut(name) {
+            info.last_activity = Instant::now();
+            info.activity_count += 1;
+        }
+    }
+}
+
+pub fn unregister_thread(name: &str) {
+    if let Ok(mut registry) = THREAD_REGISTRY.write() {
+        registry.remove(name);
+    }
+    info!("[THREAD] Unregistered: {}", name);
+}
+
+pub fn log_thread_stats() {
+    if let Ok(registry) = THREAD_REGISTRY.read() {
+        info!("[THREADS] Active thread count: {}", registry.len());
+        for (name, info) in registry.iter() {
+            let uptime = info.started_at.elapsed().as_secs();
+            let idle = info.last_activity.elapsed().as_secs();
+            info!(
+                "[THREAD] {} | component={} | uptime={}s | idle={}s | activities={}",
+                name, info.component, uptime, idle, info.activity_count
+            );
+        }
+    }
+}
+
+#[derive(Debug, Clone)]
+pub struct MemoryStats {
+    pub rss_bytes: u64,
+    pub virtual_bytes: u64,
+    pub timestamp: Instant,
+}
+
+impl MemoryStats {
+    pub fn current() -> Self {
+        let (rss, virt) = get_process_memory().unwrap_or((0, 0));
+        Self {
+            rss_bytes: rss,
+            virtual_bytes: virt,
+            timestamp: Instant::now(),
+        }
+    }
+
+    pub fn format_bytes(bytes: u64) -> String {
+        const KB: u64 = 1024;
+        const MB: u64 = KB * 1024;
+        const GB: u64 = MB * 1024;
+
+        if bytes >= GB {
+            format!("{:.2} GB", bytes as f64 / GB as f64)
+        } else if bytes >= MB {
+            format!("{:.2} MB", bytes as f64 / MB as f64)
+        } else if bytes >= KB {
+            format!("{:.2} KB", bytes as f64 / KB as f64)
+        } else {
+            format!("{} B", bytes)
+        }
+    }
+
+    pub fn log(&self) {
+        info!(
+            "[MEMORY] RSS={}, Virtual={}",
+            Self::format_bytes(self.rss_bytes),
+            Self::format_bytes(self.virtual_bytes),
+        );
+    }
+}
+
+/// Get jemalloc memory statistics when the feature is enabled
+#[cfg(feature = "jemalloc")]
+pub fn get_jemalloc_stats() -> Option<JemallocStats> {
+    use tikv_jemalloc_ctl::{epoch, stats};
+
+    // Advance the epoch to refresh statistics
+    if epoch::advance().is_err() {
+        return None;
+    }
+
+    let allocated = stats::allocated::read().ok()? as u64;
+    let active = stats::active::read().ok()? as u64;
+    let resident = stats::resident::read().ok()? as u64;
+    let mapped = stats::mapped::read().ok()? as u64;
+    let retained = stats::retained::read().ok()? as u64;
+
+    Some(JemallocStats {
+        allocated,
+        active,
+        resident,
+        mapped,
+        retained,
+    })
+}
+
+#[cfg(not(feature = "jemalloc"))]
+pub fn get_jemalloc_stats() -> Option<JemallocStats> {
+    None
+}
+
+/// Jemalloc memory statistics
+#[derive(Debug, Clone)]
+pub struct JemallocStats {
+    /// Total bytes allocated by the application
+    pub allocated: u64,
+    /// Total bytes in active pages allocated by the application
+    pub active: u64,
+    /// Total bytes in physically resident pages
+    pub resident: u64,
+    /// Total bytes in active extents mapped by the allocator
+    pub mapped: u64,
+    /// Total bytes retained (not returned to OS)
+    pub retained: u64,
+}
+
+impl JemallocStats {
+    pub fn log(&self) {
+        info!(
+            "[JEMALLOC] allocated={} active={} resident={} mapped={} retained={}",
+            MemoryStats::format_bytes(self.allocated),
+            MemoryStats::format_bytes(self.active),
+            MemoryStats::format_bytes(self.resident),
+            MemoryStats::format_bytes(self.mapped),
+            MemoryStats::format_bytes(self.retained),
+        );
+    }
+
+    /// Calculate fragmentation ratio (1.0 = no fragmentation)
+    pub fn fragmentation_ratio(&self) -> f64 {
+        if self.allocated > 0 {
+            self.active as f64 / self.allocated as f64
+        } else {
+            1.0
+        }
+    }
+}
+
+/// Log jemalloc stats if available
+pub fn log_jemalloc_stats() {
+    if let Some(stats) = get_jemalloc_stats() {
+        stats.log();
+        let frag = stats.fragmentation_ratio();
+        if frag > 1.5 {
+            warn!("[JEMALLOC] High fragmentation detected: {:.2}x", frag);
+        }
+    }
+}
+
+#[derive(Debug)]
+pub struct MemoryCheckpoint {
+    pub name: String,
+    pub stats: MemoryStats,
+}
+
+impl MemoryCheckpoint {
+    pub fn new(name: &str) -> Self {
+        let stats = MemoryStats::current();
+        info!(
+            "[CHECKPOINT] {} started at RSS={}",
+            name,
+            MemoryStats::format_bytes(stats.rss_bytes)
+        );
+        Self {
+            name: name.to_string(),
+            stats,
+        }
+    }
+
+    pub fn compare_and_log(&self) {
+        let current = MemoryStats::current();
+        let diff = current.rss_bytes as i64 - self.stats.rss_bytes as i64;
+
+        if diff > 0 {
+            warn!(
+                "[CHECKPOINT] {} INCREASED by {}",
+                self.name,
+                MemoryStats::format_bytes(diff as u64),
+            );
+        } else if diff < 0 {
+            info!(
+                "[CHECKPOINT] {} decreased by {}",
+                self.name,
+                MemoryStats::format_bytes((-diff) as u64),
+            );
+        } else {
+            debug!("[CHECKPOINT] {} unchanged", self.name);
+        }
+    }
+}
+
+pub struct ComponentMemoryTracker {
+    components: Mutex<HashMap<String, Vec<MemoryStats>>>,
+    max_history: usize,
+}
+
+impl ComponentMemoryTracker {
+    pub fn new(max_history: usize) -> Self {
+        Self {
+            components: Mutex::new(HashMap::new()),
+            max_history,
+        }
+    }
+
+    pub fn record(&self, component: &str) {
+        let stats = MemoryStats::current();
+        if let Ok(mut components) = self.components.lock() {
+            let history = components.entry(component.to_string()).or_default();
+            history.push(stats);
+
+            if history.len() > self.max_history {
+                history.remove(0);
+            }
+        }
+    }
+
+    pub fn get_growth_rate(&self, component: &str) -> Option<f64> {
+        if let Ok(components) = self.components.lock() {
+            if let Some(history) = components.get(component) {
+                if history.len() >= 2 {
+                    let first = &history[0];
+                    let last = &history[history.len() - 1];
+                    let duration = last.timestamp.duration_since(first.timestamp).as_secs_f64();
+                    if duration > 0.0 {
+                        let byte_diff = last.rss_bytes as f64 - first.rss_bytes as f64;
+                        return Some(byte_diff / duration);
+                    }
+                }
+            }
+        }
+        None
+    }
+
+    pub fn log_all(&self) {
+        if let Ok(components) = self.components.lock() {
+            for (name, history) in components.iter() {
+                if let Some(last) = history.last() {
+                    let growth = self.get_growth_rate(name);
+                    let growth_str = growth
+                        .map(|g| {
+                            let sign = if g >= 0.0 { "+" } else { "-" };
+                            format!("{}{}/s", sign, MemoryStats::format_bytes(g.abs() as u64))
+                        })
+                        .unwrap_or_else(|| "N/A".to_string());
+                    info!(
+                        "[COMPONENT] {} | RSS={} | Growth={}",
+                        name,
+                        MemoryStats::format_bytes(last.rss_bytes),
+                        growth_str
+                    );
+                }
+            }
+        }
+    }
+}
+
+pub fn record_component(component: &str) {
+    COMPONENT_TRACKER.record(component);
+}
+
+pub fn log_component_stats() {
+    COMPONENT_TRACKER.log_all();
+}
+
+pub struct LeakDetector {
+    baseline: Mutex<u64>,
+    growth_threshold_bytes: u64,
+    consecutive_growth_count: Mutex<usize>,
+    max_consecutive_growth: usize,
+}
+
+impl LeakDetector {
+    pub fn new(growth_threshold_mb: u64, max_consecutive_growth: usize) -> Self {
+        Self {
+            baseline: Mutex::new(0),
+            growth_threshold_bytes: growth_threshold_mb * 1024 * 1024,
+            consecutive_growth_count: Mutex::new(0),
+            max_consecutive_growth,
+        }
+    }
+
+    pub fn reset_baseline(&self) {
+        let current = MemoryStats::current();
+        if let Ok(mut baseline) = self.baseline.lock() {
+            *baseline = current.rss_bytes;
+        }
+        if let Ok(mut count) = self.consecutive_growth_count.lock() {
+            *count = 0;
+        }
+    }
+
+    pub fn check(&self) -> Option<String> {
+        let current = MemoryStats::current();
+
+        let baseline_val = match self.baseline.lock() {
+            Ok(b) => *b,
+            Err(_) => return None,
+        };
+
+        if baseline_val == 0 {
+            if let Ok(mut baseline) = self.baseline.lock() {
+                *baseline = current.rss_bytes;
+            }
+            return None;
+        }
+
+        let growth = current.rss_bytes.saturating_sub(baseline_val);
+
+        if growth > self.growth_threshold_bytes {
+            let count = match self.consecutive_growth_count.lock() {
+                Ok(mut c) => {
+                    *c += 1;
+                    *c
+                }
+                Err(_) => return None,
+            };
+
+            if count >= self.max_consecutive_growth {
+                return Some(format!(
+                    "POTENTIAL MEMORY LEAK: grew by {} over {} checks. RSS={}, Baseline={}",
+                    MemoryStats::format_bytes(growth),
+                    count,
+                    MemoryStats::format_bytes(current.rss_bytes),
+                    MemoryStats::format_bytes(baseline_val),
+                ));
+            }
+        } else {
+            if let Ok(mut count) = self.consecutive_growth_count.lock() {
+                *count = 0;
+            }
+            if let Ok(mut baseline) = self.baseline.lock() {
+                *baseline = current.rss_bytes;
+            }
+        }
+
+        None
+    }
+}
+
+pub fn start_memory_monitor(interval_secs: u64, warn_threshold_mb: u64) {
+    let detector = LeakDetector::new(warn_threshold_mb, 5);
+
+    tokio::spawn(async move {
+        register_thread("memory-monitor", "monitoring");
+
+        info!(
+            "[MONITOR] Started (interval={}s, threshold={}MB)",
+            interval_secs, warn_threshold_mb
+        );
+
+        let mut prev_rss: u64 = 0;
+        let mut tick_count: u64 = 0;
+
+        // First 2 minutes: check every 10 seconds for aggressive tracking
+        // After that: use normal interval
+        let startup_interval = Duration::from_secs(10);
+        let normal_interval = Duration::from_secs(interval_secs);
+        let startup_ticks = 12; // 2 minutes of 10-second intervals
+
+        let mut interval = tokio::time::interval(startup_interval);
+
+        loop {
+            interval.tick().await;
+            tick_count += 1;
+            record_thread_activity("memory-monitor");
+
+            let stats = MemoryStats::current();
+            let rss_diff = if prev_rss > 0 {
+                stats.rss_bytes as i64 - prev_rss as i64
+            } else {
+                0
+            };
+
+            let diff_str = if rss_diff > 0 {
+                format!("+{}", MemoryStats::format_bytes(rss_diff as u64))
+            } else if rss_diff < 0 {
+                format!("-{}", MemoryStats::format_bytes((-rss_diff) as u64))
+            } else {
+                "±0".to_string()
+            };
+
+            trace!(
+                "[MONITOR] tick={} RSS={} ({}) Virtual={}",
+                tick_count,
+                MemoryStats::format_bytes(stats.rss_bytes),
+                diff_str,
+                MemoryStats::format_bytes(stats.virtual_bytes),
+            );
+
+            // Log jemalloc stats every 5 ticks if available
+            if tick_count % 5 == 0 {
+                log_jemalloc_stats();
+            }
+
+            prev_rss = stats.rss_bytes;
+            record_component("global");
+
+            if let Some(warning) = detector.check() {
+                warn!("[MONITOR] {}", warning);
+                stats.log();
+                log_component_stats();
+                log_thread_stats();
+            }
+
+            // Switch to normal interval after startup period
+            if tick_count == startup_ticks {
+                trace!("[MONITOR] Switching to normal interval ({}s)", interval_secs);
+                interval = tokio::time::interval(normal_interval);
+            }
+        }
+    });
+}
+
+pub fn get_process_memory() -> Option<(u64, u64)> {
+    let pid = Pid::from_u32(std::process::id());
+    let mut sys = System::new();
+    sys.refresh_processes(ProcessesToUpdate::Some(&[pid]), true);
+
+    sys.process(pid).map(|p| (p.memory(), p.virtual_memory()))
+}
+
+pub fn log_process_memory() {
+    if let Some((rss, virt)) = get_process_memory() {
+        trace!(
+            "[PROCESS] RSS={}, Virtual={}",
+            MemoryStats::format_bytes(rss),
+            MemoryStats::format_bytes(virt)
+        );
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_memory_stats() {
+        let stats = MemoryStats::current();
+        assert!(stats.rss_bytes > 0 || stats.virtual_bytes >= 0);
+    }
+
+    #[test]
+    fn test_format_bytes() {
+        assert_eq!(MemoryStats::format_bytes(500), "500 B");
+        assert_eq!(MemoryStats::format_bytes(1024), "1.00 KB");
+        assert_eq!(MemoryStats::format_bytes(1024 * 1024), "1.00 MB");
+        assert_eq!(MemoryStats::format_bytes(1024 * 1024 * 1024), "1.00 GB");
+    }
+
+    #[test]
+    fn test_checkpoint() {
+        let checkpoint = MemoryCheckpoint::new("test");
+        checkpoint.compare_and_log();
+    }
+
+    #[test]
+    fn test_thread_registry() {
+        register_thread("test-thread", "test-component");
+        record_thread_activity("test-thread");
+        log_thread_stats();
+        unregister_thread("test-thread");
+    }
+}
diff --git a/src/core/shared/mod.rs b/src/core/shared/mod.rs
index ba1da5236..9d9f67129 100644
--- a/src/core/shared/mod.rs
+++ b/src/core/shared/mod.rs
@@ -6,6 +6,7 @@
 pub mod admin;
 pub mod analytics;
 pub mod enums;
+pub mod memory_monitor;
 pub mod models;
 pub mod schema;
 pub mod state;
diff --git a/src/core/shared/state.rs b/src/core/shared/state.rs
index 6b238f7c4..ce387874f 100644
--- a/src/core/shared/state.rs
+++ b/src/core/shared/state.rs
@@ -42,6 +42,192 @@ pub struct AttendantNotification {
     pub priority: i32,
 }
 
+#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
+pub struct AgentActivity {
+    pub phase: String,
+    pub items_processed: u32,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub items_total: Option<u32>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub speed_per_min: Option<f32>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub eta_seconds: Option<u32>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub current_item: Option<String>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub bytes_processed: Option<u64>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub tokens_used: Option<u32>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub files_created: Option<Vec<String>>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub tables_created: Option<Vec<String>>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub log_lines: Option<Vec<String>>,
+}
+
+impl AgentActivity {
+    pub fn new(phase: impl Into<String>) -> Self {
+        Self {
+            phase: phase.into(),
+            items_processed: 0,
+            items_total: None,
+            speed_per_min: None,
+            eta_seconds: None,
+            current_item: None,
+            bytes_processed: None,
+            tokens_used: None,
+            files_created: None,
+            tables_created: None,
+            log_lines: None,
+        }
+    }
+
+    #[must_use]
+    pub fn with_progress(mut self, processed: u32, total: Option<u32>) -> Self {
+        self.items_processed = processed;
+        self.items_total = total;
+        self
+    }
+
+    #[must_use]
+    pub fn with_speed(mut self, speed: f32, eta_seconds: Option<u32>) -> Self {
+        self.speed_per_min = Some(speed);
+        self.eta_seconds = eta_seconds;
+        self
+    }
+
+    #[must_use]
+    pub fn with_current_item(mut self, item: impl Into<String>) -> Self {
+        self.current_item = Some(item.into());
+        self
+    }
+
+    #[must_use]
+    pub fn with_bytes(mut self, bytes: u64) -> Self {
+        self.bytes_processed = Some(bytes);
+        self
+    }
+
+    #[must_use]
+    pub fn with_tokens(mut self, tokens: u32) -> Self {
+        self.tokens_used = Some(tokens);
+        self
+    }
+
+    #[must_use]
+    pub fn with_files(mut self, files: Vec<String>) -> Self {
+        self.files_created = Some(files);
+        self
+    }
+
+    #[must_use]
+    pub fn with_tables(mut self, tables: Vec<String>) -> Self {
+        self.tables_created = Some(tables);
+        self
+    }
+
+    #[must_use]
+    pub fn with_log_lines(mut self, lines: Vec<String>) -> Self {
+        self.log_lines = Some(lines);
+        self
+    }
+
+    #[must_use]
+    pub fn add_log_line(mut self, line: impl Into<String>) -> Self {
+        let lines = self.log_lines.get_or_insert_with(Vec::new);
+        lines.push(line.into());
+        self
+    }
+}
+
+#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
+pub struct TaskProgressEvent {
+    #[serde(rename = "type")]
+    pub event_type: String,
+    pub task_id: String,
+    pub step: String,
+    pub message: String,
+    pub progress: u8,
+    pub total_steps: u8,
+    pub current_step: u8,
+    pub timestamp: String,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub details: Option<String>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub error: Option<String>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub activity: Option<AgentActivity>,
+}
+
+impl TaskProgressEvent {
+    pub fn new(task_id: impl Into<String>, step: impl Into<String>, message: impl Into<String>) -> Self {
+        Self {
+            event_type: "task_progress".to_string(),
+            task_id: task_id.into(),
+            step: step.into(),
+            message: message.into(),
+            progress: 0,
+            total_steps: 0,
+            current_step: 0,
+            timestamp: chrono::Utc::now().to_rfc3339(),
+            details: None,
+            error: None,
+            activity: None,
+        }
+    }
+
+    #[must_use]
+    pub fn with_progress(mut self, current: u8, total: u8) -> Self {
+        self.current_step = current;
+        self.total_steps = total;
+        self.progress = if total > 0 { (current * 100) / total } else { 0 };
+        self
+    }
+
+    #[must_use]
+    pub fn with_details(mut self, details: impl Into<String>) -> Self {
+        self.details = Some(details.into());
+        self
+    }
+
+    #[must_use]
+    pub fn with_activity(mut self, activity: AgentActivity) -> Self {
+        self.activity = Some(activity);
+        self
+    }
+
+    #[must_use]
+    pub fn with_error(mut self, error: impl Into<String>) -> Self {
+        self.event_type = "task_error".to_string();
+        self.error = Some(error.into());
+        self
+    }
+
+    #[must_use]
+    pub fn completed(mut self) -> Self {
+        self.event_type = "task_completed".to_string();
+        self.progress = 100;
+        self
+    }
+
+    pub fn started(task_id: impl Into<String>, message: impl Into<String>, total_steps: u8) -> Self {
+        Self {
+            event_type: "task_started".to_string(),
+            task_id: task_id.into(),
+            step: "init".to_string(),
+            message: message.into(),
+            progress: 0,
+            total_steps,
+            current_step: 0,
+            timestamp: chrono::Utc::now().to_rfc3339(),
+            details: None,
+            error: None,
+            activity: None,
+        }
+    }
+}
+
 #[derive(Clone, Default)]
 pub struct Extensions {
     map: Arc<RwLock<HashMap<TypeId, Arc<dyn Any + Send + Sync>>>>,
@@ -129,6 +315,7 @@ pub struct AppState {
     pub task_engine: Arc<TaskEngine>,
     pub extensions: Extensions,
     pub attendant_broadcast: Option<broadcast::Sender<AttendantNotification>>,
+    pub task_progress_broadcast: Option<broadcast::Sender<TaskProgressEvent>>,
 }
 
 impl Clone for AppState {
@@ -158,6 +345,7 @@ impl Clone for AppState {
             task_engine: Arc::clone(&self.task_engine),
             extensions: self.extensions.clone(),
             attendant_broadcast: self.attendant_broadcast.clone(),
+            task_progress_broadcast: self.task_progress_broadcast.clone(),
         }
     }
 }
@@ -198,10 +386,95 @@ impl std::fmt::Debug for AppState {
             .field("task_engine", &"Arc<TaskEngine>")
             .field("extensions", &self.extensions)
             .field("attendant_broadcast", &self.attendant_broadcast.is_some())
+            .field("task_progress_broadcast", &self.task_progress_broadcast.is_some())
             .finish()
     }
 }
 
+impl AppState {
+    pub fn broadcast_task_progress(&self, event: TaskProgressEvent) {
+        log::info!(
+            "[TASK_PROGRESS] Broadcasting: task_id={}, step={}, message={}",
+            event.task_id,
+            event.step,
+            event.message
+        );
+        if let Some(tx) = &self.task_progress_broadcast {
+            let receiver_count = tx.receiver_count();
+            log::info!("[TASK_PROGRESS] Broadcast channel has {} receivers", receiver_count);
+            match tx.send(event) {
+                Ok(_) => {
+                    log::info!("[TASK_PROGRESS] Event sent successfully");
+                }
+                Err(e) => {
+                    log::warn!("[TASK_PROGRESS] No listeners for task progress: {e}");
+                }
+            }
+        } else {
+            log::warn!("[TASK_PROGRESS] No broadcast channel configured!");
+        }
+    }
+
+    pub fn emit_progress(
+        &self,
+        task_id: &str,
+        step: &str,
+        message: &str,
+        current: u8,
+        total: u8,
+    ) {
+        let event = TaskProgressEvent::new(task_id, step, message)
+            .with_progress(current, total);
+        self.broadcast_task_progress(event);
+    }
+
+    pub fn emit_progress_with_details(
+        &self,
+        task_id: &str,
+        step: &str,
+        message: &str,
+        current: u8,
+        total: u8,
+        details: &str,
+    ) {
+        let event = TaskProgressEvent::new(task_id, step, message)
+            .with_progress(current, total)
+            .with_details(details);
+        self.broadcast_task_progress(event);
+    }
+
+    pub fn emit_activity(
+        &self,
+        task_id: &str,
+        step: &str,
+        message: &str,
+        current: u8,
+        total: u8,
+        activity: AgentActivity,
+    ) {
+        let event = TaskProgressEvent::new(task_id, step, message)
+            .with_progress(current, total)
+            .with_activity(activity);
+        self.broadcast_task_progress(event);
+    }
+
+    pub fn emit_task_started(&self, task_id: &str, message: &str, total_steps: u8) {
+        let event = TaskProgressEvent::started(task_id, message, total_steps);
+        self.broadcast_task_progress(event);
+    }
+
+    pub fn emit_task_completed(&self, task_id: &str, message: &str) {
+        let event = TaskProgressEvent::new(task_id, "complete", message).completed();
+        self.broadcast_task_progress(event);
+    }
+
+    pub fn emit_task_error(&self, task_id: &str, step: &str, error: &str) {
+        let event = TaskProgressEvent::new(task_id, step, "Task failed")
+            .with_error(error);
+        self.broadcast_task_progress(event);
+    }
+}
+
 #[cfg(test)]
 impl Default for AppState {
     fn default() -> Self {
@@ -212,6 +485,7 @@ impl Default for AppState {
         let pool = Pool::builder()
             .max_size(1)
             .test_on_check_out(false)
+            .connection_timeout(std::time::Duration::from_secs(5))
             .build(manager)
             .expect("Failed to create test database pool");
 
@@ -219,6 +493,7 @@ impl Default for AppState {
         let session_manager = SessionManager::new(conn, None);
 
         let (attendant_tx, _) = broadcast::channel(100);
+        let (task_progress_tx, _) = broadcast::channel(100);
 
         Self {
             #[cfg(feature = "drive")]
@@ -245,6 +520,7 @@ impl Default for AppState {
             task_engine: Arc::new(TaskEngine::new(pool)),
             extensions: Extensions::new(),
             attendant_broadcast: Some(attendant_tx),
+            task_progress_broadcast: Some(task_progress_tx),
         }
     }
 }
diff --git a/src/core/shared/test_utils.rs b/src/core/shared/test_utils.rs
index e8f1c24fd..0cfd27cde 100644
--- a/src/core/shared/test_utils.rs
+++ b/src/core/shared/test_utils.rs
@@ -178,6 +178,7 @@ impl TestAppStateBuilder {
         let pool = Pool::builder()
             .max_size(1)
             .test_on_check_out(false)
+            .connection_timeout(std::time::Duration::from_secs(5))
             .build(manager)?;
 
         let conn = pool.get()?;
@@ -240,7 +241,10 @@ pub fn create_test_db_pool() -> Result<DbPool, Box<dyn std::error::Error + Send
     let database_url = get_database_url_sync()
         .unwrap_or_else(|_| "postgres://test:test@localhost:5432/test".to_string());
     let manager = ConnectionManager::<PgConnection>::new(&database_url);
-    let pool = Pool::builder().max_size(1).build(manager)?;
+    let pool = Pool::builder()
+        .max_size(1)
+        .connection_timeout(std::time::Duration::from_secs(5))
+        .build(manager)?;
     Ok(pool)
 }
 
diff --git a/src/core/shared/utils.rs b/src/core/shared/utils.rs
index bcc22efd8..fc755ed69 100644
--- a/src/core/shared/utils.rs
+++ b/src/core/shared/utils.rs
@@ -2,6 +2,8 @@ use crate::config::DriveConfig;
 use crate::core::secrets::SecretsManager;
 use anyhow::{Context, Result};
 use aws_config::BehaviorVersion;
+use aws_config::retry::RetryConfig;
+use aws_config::timeout::TimeoutConfig;
 use aws_sdk_s3::{config::Builder as S3ConfigBuilder, Client as S3Client};
 use diesel::Connection;
 use diesel::{
@@ -101,18 +103,33 @@ pub async fn create_s3_operator(
         (config.access_key.clone(), config.secret_key.clone())
     };
 
+    // Set CA cert for self-signed TLS (dev stack)
     if std::path::Path::new(CA_CERT_PATH).exists() {
         std::env::set_var("AWS_CA_BUNDLE", CA_CERT_PATH);
         std::env::set_var("SSL_CERT_FILE", CA_CERT_PATH);
         debug!("Set AWS_CA_BUNDLE and SSL_CERT_FILE to {} for S3 client", CA_CERT_PATH);
     }
 
+    // Configure timeouts to prevent memory leaks on connection failures
+    let timeout_config = TimeoutConfig::builder()
+        .connect_timeout(Duration::from_secs(5))
+        .read_timeout(Duration::from_secs(30))
+        .operation_timeout(Duration::from_secs(30))
+        .operation_attempt_timeout(Duration::from_secs(15))
+        .build();
+
+    // Limit retries to prevent 100% CPU on connection failures
+    let retry_config = RetryConfig::standard()
+        .with_max_attempts(2);
+
     let base_config = aws_config::defaults(BehaviorVersion::latest())
         .endpoint_url(endpoint)
         .region("auto")
         .credentials_provider(aws_sdk_s3::config::Credentials::new(
             access_key, secret_key, None, None, "static",
         ))
+        .timeout_config(timeout_config)
+        .retry_config(retry_config)
         .load()
         .await;
     let s3_config = S3ConfigBuilder::from(&base_config)
@@ -261,6 +278,11 @@ pub fn create_conn() -> Result<DbPool, anyhow::Error> {
     let database_url = get_database_url_sync()?;
     let manager = ConnectionManager::<PgConnection>::new(database_url);
     Pool::builder()
+        .max_size(10)
+        .min_idle(Some(1))
+        .connection_timeout(std::time::Duration::from_secs(5))
+        .idle_timeout(Some(std::time::Duration::from_secs(300)))
+        .max_lifetime(Some(std::time::Duration::from_secs(1800)))
         .build(manager)
         .map_err(|e| anyhow::anyhow!("Failed to create database pool: {}", e))
 }
@@ -269,6 +291,11 @@ pub async fn create_conn_async() -> Result<DbPool, anyhow::Error> {
     let database_url = get_database_url().await?;
     let manager = ConnectionManager::<PgConnection>::new(database_url);
     Pool::builder()
+        .max_size(10)
+        .min_idle(Some(1))
+        .connection_timeout(std::time::Duration::from_secs(5))
+        .idle_timeout(Some(std::time::Duration::from_secs(300)))
+        .max_lifetime(Some(std::time::Duration::from_secs(1800)))
         .build(manager)
         .map_err(|e| anyhow::anyhow!("Failed to create database pool: {}", e))
 }
diff --git a/src/drive/drive_monitor/mod.rs b/src/drive/drive_monitor/mod.rs
index 5551c1fb8..365b08d64 100644
--- a/src/drive/drive_monitor/mod.rs
+++ b/src/drive/drive_monitor/mod.rs
@@ -1,16 +1,22 @@
 use crate::basic::compiler::BasicCompiler;
 use crate::config::ConfigManager;
+use crate::core::kb::embedding_generator::is_embedding_server_ready;
 use crate::core::kb::KnowledgeBaseManager;
+use crate::core::shared::memory_monitor::{log_jemalloc_stats, MemoryStats};
 use crate::shared::message_types::MessageType;
 use crate::shared::state::AppState;
 use aws_sdk_s3::Client;
-use log::{debug, error, info};
+use log::{debug, error, info, trace, warn};
 use std::collections::HashMap;
 use std::error::Error;
 use std::path::PathBuf;
-use std::sync::atomic::{AtomicBool, Ordering};
+use std::sync::atomic::{AtomicBool, AtomicU32, Ordering};
 use std::sync::Arc;
-use tokio::time::{interval, Duration};
+use tokio::time::Duration;
+
+const KB_INDEXING_TIMEOUT_SECS: u64 = 60;
+const MAX_BACKOFF_SECS: u64 = 300;
+const INITIAL_BACKOFF_SECS: u64 = 30;
 #[derive(Debug, Clone)]
 pub struct FileState {
     pub etag: String,
@@ -24,6 +30,7 @@ pub struct DriveMonitor {
     kb_manager: Arc<KnowledgeBaseManager>,
     work_root: PathBuf,
     is_processing: Arc<AtomicBool>,
+    consecutive_failures: Arc<AtomicU32>,
 }
 impl DriveMonitor {
     pub fn new(state: Arc<AppState>, bucket_name: String, bot_id: uuid::Uuid) -> Self {
@@ -38,29 +45,105 @@ impl DriveMonitor {
             kb_manager,
             work_root,
             is_processing: Arc::new(AtomicBool::new(false)),
+            consecutive_failures: Arc::new(AtomicU32::new(0)),
         }
     }
 
+    async fn check_drive_health(&self) -> bool {
+        let Some(client) = &self.state.drive else {
+            return false;
+        };
+
+        match tokio::time::timeout(
+            Duration::from_secs(5),
+            client.head_bucket().bucket(&self.bucket_name).send(),
+        )
+        .await
+        {
+            Ok(Ok(_)) => true,
+            Ok(Err(e)) => {
+                debug!("[DRIVE_MONITOR] Health check failed: {}", e);
+                false
+            }
+            Err(_) => {
+                debug!("[DRIVE_MONITOR] Health check timed out");
+                false
+            }
+        }
+    }
+
+    fn calculate_backoff(&self) -> Duration {
+        let failures = self.consecutive_failures.load(Ordering::Relaxed);
+        if failures == 0 {
+            return Duration::from_secs(INITIAL_BACKOFF_SECS);
+        }
+        let backoff_secs = INITIAL_BACKOFF_SECS * (1u64 << failures.min(4));
+        Duration::from_secs(backoff_secs.min(MAX_BACKOFF_SECS))
+    }
+
     pub async fn start_monitoring(&self) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
-        info!("Starting DriveMonitor for bot {}", self.bot_id);
+        trace!("[PROFILE] start_monitoring ENTER");
+        let start_mem = MemoryStats::current();
+        trace!("[DRIVE_MONITOR] Starting DriveMonitor for bot {}, RSS={}",
+              self.bot_id, MemoryStats::format_bytes(start_mem.rss_bytes));
+
+        if !self.check_drive_health().await {
+            warn!("[DRIVE_MONITOR] S3/MinIO not available for bucket {}, will retry with backoff",
+                  self.bucket_name);
+        }
 
         self.is_processing
             .store(true, std::sync::atomic::Ordering::SeqCst);
 
-        self.check_for_changes().await?;
+        trace!("[PROFILE] start_monitoring: calling check_for_changes...");
+        info!("[DRIVE_MONITOR] Calling initial check_for_changes...");
+
+        match self.check_for_changes().await {
+            Ok(_) => {
+                self.consecutive_failures.store(0, Ordering::Relaxed);
+            }
+            Err(e) => {
+                warn!("[DRIVE_MONITOR] Initial check failed (will retry): {}", e);
+                self.consecutive_failures.fetch_add(1, Ordering::Relaxed);
+            }
+        }
+        trace!("[PROFILE] start_monitoring: check_for_changes returned");
+
+        let after_initial = MemoryStats::current();
+        trace!("[DRIVE_MONITOR] After initial check, RSS={} (delta={})",
+              MemoryStats::format_bytes(after_initial.rss_bytes),
+              MemoryStats::format_bytes(after_initial.rss_bytes.saturating_sub(start_mem.rss_bytes)));
 
         let self_clone = Arc::new(self.clone());
         tokio::spawn(async move {
-            let mut interval = tokio::time::interval(Duration::from_secs(30));
-
             while self_clone
                 .is_processing
                 .load(std::sync::atomic::Ordering::SeqCst)
             {
-                interval.tick().await;
+                let backoff = self_clone.calculate_backoff();
+                tokio::time::sleep(backoff).await;
 
-                if let Err(e) = self_clone.check_for_changes().await {
-                    error!("Error during sync for bot {}: {}", self_clone.bot_id, e);
+                if !self_clone.check_drive_health().await {
+                    let failures = self_clone.consecutive_failures.fetch_add(1, Ordering::Relaxed) + 1;
+                    if failures % 10 == 1 {
+                        warn!("[DRIVE_MONITOR] S3/MinIO unavailable for bucket {} (failures: {}), backing off to {:?}",
+                              self_clone.bucket_name, failures, self_clone.calculate_backoff());
+                    }
+                    continue;
+                }
+
+                match self_clone.check_for_changes().await {
+                    Ok(_) => {
+                        let prev_failures = self_clone.consecutive_failures.swap(0, Ordering::Relaxed);
+                        if prev_failures > 0 {
+                            info!("[DRIVE_MONITOR] S3/MinIO recovered for bucket {} after {} failures",
+                                  self_clone.bucket_name, prev_failures);
+                        }
+                    }
+                    Err(e) => {
+                        self_clone.consecutive_failures.fetch_add(1, Ordering::Relaxed);
+                        error!("Error during sync for bot {}: {}", self_clone.bot_id, e);
+                    }
                 }
             }
         });
@@ -76,6 +159,7 @@ impl DriveMonitor {
             .store(false, std::sync::atomic::Ordering::SeqCst);
 
         self.file_states.write().await.clear();
+        self.consecutive_failures.store(0, Ordering::Relaxed);
 
         info!("DriveMonitor stopped for bot {}", self.bot_id);
         Ok(())
@@ -86,9 +170,9 @@ impl DriveMonitor {
                 "Drive Monitor service started for bucket: {}",
                 self.bucket_name
             );
-            let mut tick = interval(Duration::from_secs(90));
             loop {
-                tick.tick().await;
+                let backoff = self.calculate_backoff();
+                tokio::time::sleep(backoff).await;
 
                 if self.is_processing.load(Ordering::Acquire) {
                     log::warn!(
@@ -97,10 +181,29 @@ impl DriveMonitor {
                     continue;
                 }
 
+                if !self.check_drive_health().await {
+                    let failures = self.consecutive_failures.fetch_add(1, Ordering::Relaxed) + 1;
+                    if failures % 10 == 1 {
+                        warn!("[DRIVE_MONITOR] S3/MinIO unavailable for bucket {} (failures: {}), backing off to {:?}",
+                              self.bucket_name, failures, self.calculate_backoff());
+                    }
+                    continue;
+                }
+
                 self.is_processing.store(true, Ordering::Release);
 
-                if let Err(e) = self.check_for_changes().await {
-                    log::error!("Error checking for drive changes: {}", e);
+                match self.check_for_changes().await {
+                    Ok(_) => {
+                        let prev_failures = self.consecutive_failures.swap(0, Ordering::Relaxed);
+                        if prev_failures > 0 {
+                            info!("[DRIVE_MONITOR] S3/MinIO recovered for bucket {} after {} failures",
+                                  self.bucket_name, prev_failures);
+                        }
+                    }
+                    Err(e) => {
+                        self.consecutive_failures.fetch_add(1, Ordering::Relaxed);
+                        log::error!("Error checking for drive changes: {}", e);
+                    }
                 }
 
                 self.is_processing.store(false, Ordering::Release);
@@ -108,12 +211,52 @@ impl DriveMonitor {
         })
     }
     async fn check_for_changes(&self) -> Result<(), Box<dyn Error + Send + Sync>> {
+        trace!("[PROFILE] check_for_changes ENTER");
+        let start_mem = MemoryStats::current();
+        trace!("[DRIVE_MONITOR] check_for_changes START, RSS={}",
+              MemoryStats::format_bytes(start_mem.rss_bytes));
+
         let Some(client) = &self.state.drive else {
+            trace!("[PROFILE] check_for_changes: no drive client, returning");
             return Ok(());
         };
+
+        trace!("[PROFILE] check_for_changes: calling check_gbdialog_changes...");
+        trace!("[DRIVE_MONITOR] Checking gbdialog...");
         self.check_gbdialog_changes(client).await?;
+        trace!("[PROFILE] check_for_changes: check_gbdialog_changes done");
+        let after_dialog = MemoryStats::current();
+        trace!("[DRIVE_MONITOR] After gbdialog, RSS={} (delta={})",
+              MemoryStats::format_bytes(after_dialog.rss_bytes),
+              MemoryStats::format_bytes(after_dialog.rss_bytes.saturating_sub(start_mem.rss_bytes)));
+
+        trace!("[PROFILE] check_for_changes: calling check_gbot...");
+        trace!("[DRIVE_MONITOR] Checking gbot...");
         self.check_gbot(client).await?;
+        trace!("[PROFILE] check_for_changes: check_gbot done");
+        let after_gbot = MemoryStats::current();
+        trace!("[DRIVE_MONITOR] After gbot, RSS={} (delta={})",
+              MemoryStats::format_bytes(after_gbot.rss_bytes),
+              MemoryStats::format_bytes(after_gbot.rss_bytes.saturating_sub(after_dialog.rss_bytes)));
+
+        trace!("[PROFILE] check_for_changes: calling check_gbkb_changes...");
+        trace!("[DRIVE_MONITOR] Checking gbkb...");
         self.check_gbkb_changes(client).await?;
+        trace!("[PROFILE] check_for_changes: check_gbkb_changes done");
+        let after_gbkb = MemoryStats::current();
+        trace!("[DRIVE_MONITOR] After gbkb, RSS={} (delta={})",
+              MemoryStats::format_bytes(after_gbkb.rss_bytes),
+              MemoryStats::format_bytes(after_gbkb.rss_bytes.saturating_sub(after_gbot.rss_bytes)));
+
+        log_jemalloc_stats();
+
+        let total_delta = after_gbkb.rss_bytes.saturating_sub(start_mem.rss_bytes);
+        if total_delta > 50 * 1024 * 1024 {
+            warn!("[DRIVE_MONITOR] check_for_changes grew by {} - potential leak!",
+                  MemoryStats::format_bytes(total_delta));
+        }
+
+        trace!("[PROFILE] check_for_changes EXIT");
         Ok(())
     }
     async fn check_gbdialog_changes(
@@ -188,6 +331,7 @@ impl DriveMonitor {
         Ok(())
     }
     async fn check_gbot(&self, client: &Client) -> Result<(), Box<dyn Error + Send + Sync>> {
+        trace!("[PROFILE] check_gbot ENTER");
         let config_manager = ConfigManager::new(self.state.conn.clone());
         debug!("check_gbot: Checking bucket {} for config.csv changes", self.bucket_name);
         let mut continuation_token = None;
@@ -333,6 +477,7 @@ impl DriveMonitor {
             }
             continuation_token = list_objects.next_continuation_token;
         }
+        trace!("[PROFILE] check_gbot EXIT");
         Ok(())
     }
     async fn broadcast_theme_change(
@@ -467,6 +612,7 @@ impl DriveMonitor {
         &self,
         client: &Client,
     ) -> Result<(), Box<dyn Error + Send + Sync>> {
+        trace!("[PROFILE] check_gbkb_changes ENTER");
         let bot_name = self
             .bucket_name
             .strip_suffix(".gbai")
@@ -510,6 +656,12 @@ impl DriveMonitor {
                     continue;
                 }
 
+                let size = obj.size().unwrap_or(0);
+                if size == 0 {
+                    trace!("Skipping 0-byte file in .gbkb: {}", path);
+                    continue;
+                }
+
                 let file_state = FileState {
                     etag: obj.e_tag().unwrap_or_default().to_string(),
                 };
@@ -561,9 +713,6 @@ impl DriveMonitor {
                 }
 
                 let path_parts: Vec<&str> = path.split('/').collect();
-                // path_parts: [0] = "bot.gbkb", [1] = folder or file, [2+] = nested files
-                // Skip files directly in .gbkb root (path_parts.len() == 2 means root file)
-                // Only process files inside subfolders (path_parts.len() >= 3)
                 if path_parts.len() >= 3 {
                     let kb_name = path_parts[1];
                     let kb_folder_path = self
@@ -572,30 +721,59 @@ impl DriveMonitor {
                         .join(&gbkb_prefix)
                         .join(kb_name);
 
-                    info!(
-                        "Triggering KB indexing for folder: {} (PDF text extraction enabled)",
-                        kb_folder_path.display()
-                    );
-                    match self
-                        .kb_manager
-                        .handle_gbkb_change(bot_name, &kb_folder_path)
-                        .await
-                    {
-                        Ok(_) => {
-                            debug!(
-                                "Successfully processed KB change for {}/{}",
-                                bot_name, kb_name
-                            );
-                        }
-                        Err(e) => {
-                            log::error!(
-                                "Failed to process .gbkb change for {}/{}: {}",
-                                bot_name,
-                                kb_name,
-                                e
-                            );
-                        }
+                    let kb_indexing_disabled = std::env::var("DISABLE_KB_INDEXING")
+                        .map(|v| v == "true" || v == "1")
+                        .unwrap_or(false);
+
+                    if kb_indexing_disabled {
+                        debug!("KB indexing disabled via DISABLE_KB_INDEXING, skipping {}", kb_folder_path.display());
+                        continue;
                     }
+
+                    if !is_embedding_server_ready() {
+                        info!("[DRIVE_MONITOR] Embedding server not ready, deferring KB indexing for {}", kb_folder_path.display());
+                        continue;
+                    }
+
+                    let kb_manager = Arc::clone(&self.kb_manager);
+                    let bot_name_owned = bot_name.to_string();
+                    let kb_name_owned = kb_name.to_string();
+                    let kb_folder_owned = kb_folder_path.clone();
+
+                    tokio::spawn(async move {
+                        info!(
+                            "Triggering KB indexing for folder: {} (PDF text extraction enabled)",
+                            kb_folder_owned.display()
+                        );
+
+                        match tokio::time::timeout(
+                            Duration::from_secs(KB_INDEXING_TIMEOUT_SECS),
+                            kb_manager.handle_gbkb_change(&bot_name_owned, &kb_folder_owned)
+                        ).await {
+                            Ok(Ok(_)) => {
+                                debug!(
+                                    "Successfully processed KB change for {}/{}",
+                                    bot_name_owned, kb_name_owned
+                                );
+                            }
+                            Ok(Err(e)) => {
+                                log::error!(
+                                    "Failed to process .gbkb change for {}/{}: {}",
+                                    bot_name_owned,
+                                    kb_name_owned,
+                                    e
+                                );
+                            }
+                            Err(_) => {
+                                log::error!(
+                                    "KB indexing timed out after {}s for {}/{}",
+                                    KB_INDEXING_TIMEOUT_SECS,
+                                    bot_name_owned,
+                                    kb_name_owned
+                                );
+                            }
+                        }
+                    });
                 }
             }
         }
@@ -640,6 +818,7 @@ impl DriveMonitor {
             }
         }
 
+        trace!("[PROFILE] check_gbkb_changes EXIT");
         Ok(())
     }
 
diff --git a/src/drive/mod.rs b/src/drive/mod.rs
index f00c19418..91daa93c3 100644
--- a/src/drive/mod.rs
+++ b/src/drive/mod.rs
@@ -8,7 +8,7 @@ use axum::{
     routing::{get, post},
     Router,
 };
-
+use base64::{engine::general_purpose::STANDARD as BASE64, Engine};
 use serde::{Deserialize, Serialize};
 
 use std::sync::Arc;
@@ -169,61 +169,82 @@ pub struct RestoreResponse {
 }
 
 #[allow(unused)]
+#[derive(Debug, Serialize)]
+pub struct BucketInfo {
+    pub name: String,
+    pub is_gbai: bool,
+}
+
 pub fn configure() -> Router<Arc<AppState>> {
     Router::new()
-        .route("/files/list", get(list_files))
-        .route("/files/read", post(read_file))
-        .route("/files/write", post(write_file))
-        .route("/files/save", post(write_file))
-        .route("/files/getContents", post(read_file))
-        .route("/files/delete", post(delete_file))
-        .route("/files/upload", post(upload_file_to_drive))
-        .route("/files/download", post(download_file))
-        .route("/files/copy", post(copy_file))
-        .route("/files/move", post(move_file))
-        .route("/files/createFolder", post(create_folder))
-        .route("/files/create-folder", post(create_folder))
-        .route("/files/dirFolder", post(list_folder_contents))
-        .route("/files/search", get(search_files))
-        .route("/files/recent", get(recent_files))
-        .route("/files/favorite", get(list_favorites))
-        .route("/files/shareFolder", post(share_folder))
-        .route("/files/shared", get(list_shared))
-        .route("/files/permissions", get(get_permissions))
-        .route("/files/quota", get(get_quota))
-        .route("/files/sync/status", get(sync_status))
-        .route("/files/sync/start", post(start_sync))
-        .route("/files/sync/stop", post(stop_sync))
-        .route("/files/versions", get(list_versions))
-        .route("/files/restore", post(restore_version))
-        .route("/docs/merge", post(document_processing::merge_documents))
-        .route("/docs/convert", post(document_processing::convert_document))
-        .route("/docs/fill", post(document_processing::fill_document))
-        .route("/docs/export", post(document_processing::export_document))
-        .route("/docs/import", post(document_processing::import_document))
+        .route("/api/files/buckets", get(list_buckets))
+        .route("/api/files/list", get(list_files))
+        .route("/api/files/read", post(read_file))
+        .route("/api/files/write", post(write_file))
+        .route("/api/files/save", post(write_file))
+        .route("/api/files/getContents", post(read_file))
+        .route("/api/files/delete", post(delete_file))
+        .route("/api/files/upload", post(upload_file_to_drive))
+        .route("/api/files/download", post(download_file))
+        .route("/api/files/copy", post(copy_file))
+        .route("/api/files/move", post(move_file))
+        .route("/api/files/createFolder", post(create_folder))
+        .route("/api/files/create-folder", post(create_folder))
+        .route("/api/files/dirFolder", post(list_folder_contents))
+        .route("/api/files/search", get(search_files))
+        .route("/api/files/recent", get(recent_files))
+        .route("/api/files/favorite", get(list_favorites))
+        .route("/api/files/shareFolder", post(share_folder))
+        .route("/api/files/shared", get(list_shared))
+        .route("/api/files/permissions", get(get_permissions))
+        .route("/api/files/quota", get(get_quota))
+        .route("/api/files/sync/status", get(sync_status))
+        .route("/api/files/sync/start", post(start_sync))
+        .route("/api/files/sync/stop", post(stop_sync))
+        .route("/api/files/versions", get(list_versions))
+        .route("/api/files/restore", post(restore_version))
+        .route("/api/docs/merge", post(document_processing::merge_documents))
+        .route("/api/docs/convert", post(document_processing::convert_document))
+        .route("/api/docs/fill", post(document_processing::fill_document))
+        .route("/api/docs/export", post(document_processing::export_document))
+        .route("/api/docs/import", post(document_processing::import_document))
+}
+
+pub async fn list_buckets(
+    State(state): State<Arc<AppState>>,
+) -> Result<Json<Vec<BucketInfo>>, (StatusCode, Json<serde_json::Value>)> {
+    let s3_client = state.drive.as_ref().ok_or_else(|| {
+        (
+            StatusCode::SERVICE_UNAVAILABLE,
+            Json(serde_json::json!({"error": "S3 service not available"})),
+        )
+    })?;
+
+    let result = s3_client.list_buckets().send().await.map_err(|e| {
+        (
+            StatusCode::INTERNAL_SERVER_ERROR,
+            Json(serde_json::json!({"error": format!("Failed to list buckets: {}", e)})),
+        )
+    })?;
+
+    let buckets: Vec<BucketInfo> = result
+        .buckets()
+        .iter()
+        .filter_map(|b| {
+            b.name().map(|name| BucketInfo {
+                name: name.to_string(),
+                is_gbai: name.to_lowercase().ends_with(".gbai"),
+            })
+        })
+        .collect();
+
+    Ok(Json(buckets))
 }
 
 pub async fn list_files(
     State(state): State<Arc<AppState>>,
     Query(params): Query<ListQuery>,
 ) -> Result<Json<Vec<FileItem>>, (StatusCode, Json<serde_json::Value>)> {
-    #[cfg(feature = "console")]
-    let result = {
-        let mut tree = FileTree::new(state.clone());
-        if let Some(bucket) = &params.bucket {
-            if let Some(path) = &params.path {
-                tree.enter_folder(bucket.clone(), path.clone()).await.ok();
-            } else {
-                tree.enter_bucket(bucket.clone()).await.ok();
-            }
-        } else {
-            tree.load_root().await.ok();
-        }
-
-        Ok::<Vec<FileItem>, (StatusCode, Json<serde_json::Value>)>(vec![])
-    };
-
-    #[cfg(not(feature = "console"))]
     let result: Result<Vec<FileItem>, (StatusCode, Json<serde_json::Value>)> = {
         let s3_client = state.drive.as_ref().ok_or_else(|| {
             (
@@ -402,33 +423,117 @@ pub async fn write_file(
     State(state): State<Arc<AppState>>,
     Json(req): Json<WriteRequest>,
 ) -> Result<Json<SuccessResponse>, (StatusCode, Json<serde_json::Value>)> {
+    tracing::debug!(
+        "write_file called: bucket={}, path={}, content_len={}",
+        req.bucket,
+        req.path,
+        req.content.len()
+    );
+
     let s3_client = state.drive.as_ref().ok_or_else(|| {
+        tracing::error!("S3 client not available for write_file");
         (
             StatusCode::SERVICE_UNAVAILABLE,
             Json(serde_json::json!({ "error": "S3 service not available" })),
         )
     })?;
 
+    // Try to decode as base64, otherwise use content directly
+    // Base64 content from file uploads won't have whitespace/newlines at start
+    // and will only contain valid base64 characters
+    let is_base64 = is_likely_base64(&req.content);
+    tracing::debug!("Content detected as base64: {}", is_base64);
+
+    let body_bytes: Vec<u8> = if is_base64 {
+        match BASE64.decode(&req.content) {
+            Ok(decoded) => {
+                tracing::debug!("Base64 decoded successfully, size: {} bytes", decoded.len());
+                decoded
+            }
+            Err(e) => {
+                tracing::warn!("Base64 decode failed ({}), using raw content", e);
+                req.content.clone().into_bytes()
+            }
+        }
+    } else {
+        req.content.into_bytes()
+    };
+
+    let sanitized_path = req.path
+        .replace("//", "/")
+        .trim_start_matches('/')
+        .to_string();
+
+    tracing::debug!("Writing {} bytes to {}/{}", body_bytes.len(), req.bucket, sanitized_path);
+
     s3_client
         .put_object()
         .bucket(&req.bucket)
-        .key(&req.path)
-        .body(req.content.into_bytes().into())
+        .key(&sanitized_path)
+        .body(body_bytes.into())
         .send()
         .await
         .map_err(|e| {
+            tracing::error!("S3 put_object failed: {:?}", e);
             (
                 StatusCode::INTERNAL_SERVER_ERROR,
                 Json(serde_json::json!({ "error": format!("Failed to write file: {}", e) })),
             )
         })?;
 
+    tracing::info!("File written successfully: {}/{}", req.bucket, sanitized_path);
     Ok(Json(SuccessResponse {
         success: true,
         message: Some("File written successfully".to_string()),
     }))
 }
 
+/// Check if a string is likely base64 encoded content (from file upload)
+/// Base64 from DataURL will be pure base64 without newlines at start
+fn is_likely_base64(s: &str) -> bool {
+    // Empty or very short strings are not base64 uploads
+    if s.len() < 20 {
+        return false;
+    }
+
+    // If it starts with common text patterns, it's not base64
+    let trimmed = s.trim_start();
+    if trimmed.starts_with('#')      // Markdown, shell scripts
+        || trimmed.starts_with("//")  // Comments
+        || trimmed.starts_with("/*")  // C-style comments
+        || trimmed.starts_with('{')   // JSON
+        || trimmed.starts_with('[')   // JSON array
+        || trimmed.starts_with('<')   // XML/HTML
+        || trimmed.starts_with("<!")  // HTML doctype
+        || trimmed.starts_with("function") // JavaScript
+        || trimmed.starts_with("const ")   // JavaScript
+        || trimmed.starts_with("let ")     // JavaScript
+        || trimmed.starts_with("var ")     // JavaScript
+        || trimmed.starts_with("import ")  // Various languages
+        || trimmed.starts_with("from ")    // Python
+        || trimmed.starts_with("def ")     // Python
+        || trimmed.starts_with("class ")   // Various languages
+        || trimmed.starts_with("pub ")     // Rust
+        || trimmed.starts_with("use ")     // Rust
+        || trimmed.starts_with("mod ")     // Rust
+    {
+        return false;
+    }
+
+    // Check if string contains only valid base64 characters
+    // and try to decode it
+    let base64_chars = s.chars().all(|c| {
+        c.is_ascii_alphanumeric() || c == '+' || c == '/' || c == '='
+    });
+
+    if !base64_chars {
+        return false;
+    }
+
+    // Final check: try to decode and see if it works
+    BASE64.decode(s).is_ok()
+}
+
 pub async fn delete_file(
     State(state): State<Arc<AppState>>,
     Json(req): Json<DeleteRequest>,
diff --git a/src/llm/claude.rs b/src/llm/claude.rs
index 3726a68b5..4ed1239b6 100644
--- a/src/llm/claude.rs
+++ b/src/llm/claude.rs
@@ -3,10 +3,13 @@ use futures::StreamExt;
 use log::{info, trace, warn};
 use serde::{Deserialize, Serialize};
 use serde_json::Value;
+use std::time::Duration;
 use tokio::sync::mpsc;
 
 use super::{llm_models::get_handler, LLMProvider};
 
+const LLM_TIMEOUT_SECS: u64 = 300;
+
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct ClaudeMessage {
     pub role: String,
@@ -74,8 +77,14 @@ impl ClaudeClient {
     pub fn new(base_url: String, deployment_name: Option<String>) -> Self {
         let is_azure = base_url.contains("azure.com") || base_url.contains("openai.azure.com");
 
+        let client = reqwest::Client::builder()
+            .timeout(Duration::from_secs(LLM_TIMEOUT_SECS))
+            .connect_timeout(Duration::from_secs(30))
+            .build()
+            .unwrap_or_else(|_| reqwest::Client::new());
+
         Self {
-            client: reqwest::Client::new(),
+            client,
             base_url,
             deployment_name: deployment_name.unwrap_or_else(|| "claude-opus-4-5".to_string()),
             is_azure,
@@ -83,8 +92,14 @@ impl ClaudeClient {
     }
 
     pub fn azure(endpoint: String, deployment_name: String) -> Self {
+        let client = reqwest::Client::builder()
+            .timeout(Duration::from_secs(LLM_TIMEOUT_SECS))
+            .connect_timeout(Duration::from_secs(30))
+            .build()
+            .unwrap_or_else(|_| reqwest::Client::new());
+
         Self {
-            client: reqwest::Client::new(),
+            client,
             base_url: endpoint,
             deployment_name,
             is_azure: true,
@@ -93,7 +108,6 @@ impl ClaudeClient {
 
     fn build_url(&self) -> String {
         if self.is_azure {
-            // Azure Claude exposes Anthropic API directly at /v1/messages
             format!("{}/v1/messages", self.base_url.trim_end_matches('/'))
         } else {
             format!("{}/v1/messages", self.base_url.trim_end_matches('/'))
@@ -103,8 +117,6 @@ impl ClaudeClient {
     fn build_headers(&self, api_key: &str) -> reqwest::header::HeaderMap {
         let mut headers = reqwest::header::HeaderMap::new();
 
-        // Both Azure Claude and direct Anthropic use the same headers
-        // Azure Claude proxies the Anthropic API format
         if let Ok(val) = api_key.parse() {
             headers.insert("x-api-key", val);
         }
@@ -119,17 +131,12 @@ impl ClaudeClient {
         headers
     }
 
-    /// Normalize role names for Claude API compatibility.
-    /// Claude only accepts "user" or "assistant" roles in messages.
-    /// - "episodic" and "compact" roles (conversation summaries) are converted to "user" with a context prefix
-    /// - "system" roles should be handled separately (not in messages array)
-    /// - Unknown roles default to "user"
     fn normalize_role(role: &str) -> Option<(String, bool)> {
         match role {
             "user" => Some(("user".to_string(), false)),
             "assistant" => Some(("assistant".to_string(), false)),
-            "system" => None, // System messages handled separately
-            "episodic" | "compact" => Some(("user".to_string(), true)), // Mark as context
+            "system" => None,
+            "episodic" | "compact" => Some(("user".to_string(), true)),
             _ => Some(("user".to_string(), false)),
         }
     }
@@ -148,10 +155,9 @@ impl ClaudeClient {
             system_parts.push(context_data.to_string());
         }
 
-        // Extract episodic memory content and add to system prompt
         for (role, content) in history {
             if role == "episodic" || role == "compact" {
-                system_parts.push(format!("[Previous conversation summary]: {}", content));
+                system_parts.push(format!("[Previous conversation summary]: {content}"));
             }
         }
 
@@ -171,7 +177,7 @@ impl ClaudeClient {
                             content: content.clone(),
                         })
                     }
-                    _ => None, // Skip system, episodic, compact (already in system prompt)
+                    _ => None,
                 }
             })
             .collect();
@@ -221,7 +227,6 @@ impl LLMProvider for ClaudeClient {
                     .filter_map(|m| {
                         let role = m["role"].as_str().unwrap_or("user");
                         let content = m["content"].as_str().unwrap_or("");
-                        // Skip system messages (handled separately), episodic/compact (context), and empty content
                         if role == "system" || role == "episodic" || role == "compact" || content.is_empty() {
                             None
                         } else {
@@ -244,7 +249,6 @@ impl LLMProvider for ClaudeClient {
             }]
         };
 
-        // Ensure at least one user message exists
         if claude_messages.is_empty() && !prompt.is_empty() {
             claude_messages.push(ClaudeMessage {
                 role: "user".to_string(),
@@ -268,7 +272,6 @@ impl LLMProvider for ClaudeClient {
 
         let system = system_prompt.filter(|s| !s.is_empty());
 
-        // Validate we have at least one message with content
         if claude_messages.is_empty() {
             return Err("Cannot send request to Claude: no messages with content".into());
         }
@@ -281,26 +284,58 @@ impl LLMProvider for ClaudeClient {
             stream: None,
         };
 
-        info!("Claude request to {}: model={}", url, model_name);
+        info!("Claude request to {url}: model={model_name}");
         trace!("Claude request body: {:?}", serde_json::to_string(&request));
 
-        let response = self
+        let start_time = std::time::Instant::now();
+
+        let response = match self
             .client
             .post(&url)
             .headers(headers)
             .json(&request)
             .send()
-            .await?;
+            .await
+        {
+            Ok(resp) => resp,
+            Err(e) => {
+                let elapsed = start_time.elapsed();
+                if e.is_timeout() {
+                    warn!("Claude request timed out after {elapsed:?} (limit: {LLM_TIMEOUT_SECS}s)");
+                    return Err(format!("Claude request timed out after {LLM_TIMEOUT_SECS}s").into());
+                }
+                warn!("Claude request failed after {elapsed:?}: {e}");
+                return Err(e.into());
+            }
+        };
 
+        let elapsed = start_time.elapsed();
         let status = response.status();
+
         if !status.is_success() {
             let error_text = response.text().await.unwrap_or_default();
-            warn!("Claude API error ({}): {}", status, error_text);
-            return Err(format!("Claude API error ({}): {}", status, error_text).into());
+            warn!("Claude API error ({status}) after {elapsed:?}: {error_text}");
+            return Err(format!("Claude API error ({status}): {error_text}").into());
         }
 
-        let result: ClaudeResponse = response.json().await?;
+        info!("Claude response received in {elapsed:?}, status={status}");
+
+        let result: ClaudeResponse = match response.json().await {
+            Ok(r) => r,
+            Err(e) => {
+                warn!("Failed to parse Claude response: {e}");
+                return Err(format!("Failed to parse Claude response: {e}").into());
+            }
+        };
+
         let raw_content = self.extract_text_from_response(&result);
+        let content_len = raw_content.len();
+
+        info!(
+            "Claude response parsed: id={}, stop_reason={:?}, content_length={content_len}",
+            result.id,
+            result.stop_reason
+        );
 
         let handler = get_handler(model_name);
         let content = handler.process_content(&raw_content);
@@ -338,7 +373,6 @@ impl LLMProvider for ClaudeClient {
                     .filter_map(|m| {
                         let role = m["role"].as_str().unwrap_or("user");
                         let content = m["content"].as_str().unwrap_or("");
-                        // Skip system messages (handled separately), episodic/compact (context), and empty content
                         if role == "system" || role == "episodic" || role == "compact" || content.is_empty() {
                             None
                         } else {
@@ -361,7 +395,6 @@ impl LLMProvider for ClaudeClient {
             }]
         };
 
-        // Ensure at least one user message exists
         if claude_messages.is_empty() && !prompt.is_empty() {
             claude_messages.push(ClaudeMessage {
                 role: "user".to_string(),
@@ -385,7 +418,6 @@ impl LLMProvider for ClaudeClient {
 
         let system = system_prompt.filter(|s| !s.is_empty());
 
-        // Validate we have at least one message with content
         if claude_messages.is_empty() {
             return Err("Cannot send streaming request to Claude: no messages with content".into());
         }
@@ -398,25 +430,42 @@ impl LLMProvider for ClaudeClient {
             stream: Some(true),
         };
 
-        info!("Claude streaming request to {}: model={}", url, model_name);
+        info!("Claude streaming request to {url}: model={model_name}");
 
-        let response = self
+        let start_time = std::time::Instant::now();
+
+        let response = match self
             .client
             .post(&url)
             .headers(headers)
             .json(&request)
             .send()
-            .await?;
+            .await
+        {
+            Ok(resp) => resp,
+            Err(e) => {
+                let elapsed = start_time.elapsed();
+                if e.is_timeout() {
+                    warn!("Claude streaming request timed out after {elapsed:?}");
+                    return Err(format!("Claude streaming request timed out after {LLM_TIMEOUT_SECS}s").into());
+                }
+                warn!("Claude streaming request failed after {elapsed:?}: {e}");
+                return Err(e.into());
+            }
+        };
 
         let status = response.status();
         if !status.is_success() {
             let error_text = response.text().await.unwrap_or_default();
-            warn!("Claude streaming API error ({}): {}", status, error_text);
-            return Err(format!("Claude streaming API error ({}): {}", status, error_text).into());
+            warn!("Claude streaming API error ({status}): {error_text}");
+            return Err(format!("Claude streaming API error ({status}): {error_text}").into());
         }
 
+        info!("Claude streaming connection established in {:?}", start_time.elapsed());
+
         let handler = get_handler(model_name);
         let mut stream = response.bytes_stream();
+        let mut total_chunks = 0;
 
         while let Some(chunk) = stream.next().await {
             let chunk = chunk?;
@@ -439,6 +488,7 @@ impl LLMProvider for ClaudeClient {
                                     let processed = handler.process_content(&delta.text);
                                     if !processed.is_empty() {
                                         let _ = tx.send(processed).await;
+                                        total_chunks += 1;
                                     }
                                 }
                             }
@@ -448,6 +498,12 @@ impl LLMProvider for ClaudeClient {
             }
         }
 
+        info!(
+            "Claude streaming completed in {:?}, chunks={}",
+            start_time.elapsed(),
+            total_chunks
+        );
+
         Ok(())
     }
 
@@ -490,7 +546,6 @@ mod tests {
             "claude-opus-4-5".to_string(),
         );
         let url = client.build_url();
-        // Azure Claude uses Anthropic API format directly
         assert!(url.contains("/v1/messages"));
     }
 
diff --git a/src/llm/local.rs b/src/llm/local.rs
index 0a123bce5..a8b026671 100644
--- a/src/llm/local.rs
+++ b/src/llm/local.rs
@@ -1,8 +1,10 @@
 use crate::config::ConfigManager;
+use crate::core::kb::embedding_generator::set_embedding_server_ready;
+use crate::core::shared::memory_monitor::{log_jemalloc_stats, MemoryStats};
 use crate::shared::models::schema::bots::dsl::*;
 use crate::shared::state::AppState;
 use diesel::prelude::*;
-use log::{error, info, warn};
+use log::{error, info, trace, warn};
 use reqwest;
 use std::fmt::Write;
 use std::sync::Arc;
@@ -11,7 +13,14 @@ use tokio;
 pub async fn ensure_llama_servers_running(
     app_state: Arc<AppState>,
 ) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
+    trace!("[PROFILE] ensure_llama_servers_running ENTER");
+    let start_mem = MemoryStats::current();
+    trace!("[LLM_LOCAL] ensure_llama_servers_running START, RSS={}",
+          MemoryStats::format_bytes(start_mem.rss_bytes));
+    log_jemalloc_stats();
+
     if std::env::var("SKIP_LLM_SERVER").is_ok() {
+        trace!("[PROFILE] SKIP_LLM_SERVER set, returning early");
         info!("SKIP_LLM_SERVER set - skipping local LLM server startup (using mock/external LLM)");
         return Ok(());
     }
@@ -74,6 +83,9 @@ pub async fn ensure_llama_servers_running(
     info!("  Embedding Model: {embedding_model}");
     info!("  LLM Server Path: {llm_server_path}");
     info!("Restarting any existing llama-server processes...");
+    trace!("[PROFILE] About to pkill llama-server...");
+    let before_pkill = MemoryStats::current();
+    trace!("[LLM_LOCAL] Before pkill, RSS={}", MemoryStats::format_bytes(before_pkill.rss_bytes));
 
     if let Err(e) = tokio::process::Command::new("sh")
         .arg("-c")
@@ -85,6 +97,12 @@ pub async fn ensure_llama_servers_running(
         tokio::time::sleep(tokio::time::Duration::from_secs(2)).await;
         info!("Existing llama-server processes terminated (if any)");
     }
+    trace!("[PROFILE] pkill done");
+
+    let after_pkill = MemoryStats::current();
+    trace!("[LLM_LOCAL] After pkill, RSS={} (delta={})",
+          MemoryStats::format_bytes(after_pkill.rss_bytes),
+          MemoryStats::format_bytes(after_pkill.rss_bytes.saturating_sub(before_pkill.rss_bytes)));
 
     let llm_running = if llm_url.starts_with("https://") {
         info!("Using external HTTPS LLM server, skipping local startup");
@@ -135,13 +153,27 @@ pub async fn ensure_llama_servers_running(
         task.await??;
     }
     info!("Waiting for servers to become ready...");
+    trace!("[PROFILE] Starting wait loop for servers...");
+    let before_wait = MemoryStats::current();
+    trace!("[LLM_LOCAL] Before wait loop, RSS={}", MemoryStats::format_bytes(before_wait.rss_bytes));
+
     let mut llm_ready = llm_running || llm_model.is_empty();
     let mut embedding_ready = embedding_running || embedding_model.is_empty();
     let mut attempts = 0;
     let max_attempts = 120;
     while attempts < max_attempts && (!llm_ready || !embedding_ready) {
+        trace!("[PROFILE] Wait loop iteration {}", attempts);
         tokio::time::sleep(tokio::time::Duration::from_secs(2)).await;
 
+        if attempts % 5 == 0 {
+            let loop_mem = MemoryStats::current();
+            trace!("[LLM_LOCAL] Wait loop attempt {}, RSS={} (delta from start={})",
+                  attempts,
+                  MemoryStats::format_bytes(loop_mem.rss_bytes),
+                  MemoryStats::format_bytes(loop_mem.rss_bytes.saturating_sub(before_wait.rss_bytes)));
+            log_jemalloc_stats();
+        }
+
         if attempts % 5 == 0 {
             info!(
                 "Checking server health (attempt {}/{max_attempts})...",
@@ -160,6 +192,7 @@ pub async fn ensure_llama_servers_running(
             if is_server_running(&embedding_url).await {
                 info!("Embedding server ready at {embedding_url}");
                 embedding_ready = true;
+                set_embedding_server_ready(true);
             } else if attempts % 10 == 0 {
                 warn!("Embedding server not ready yet at {embedding_url}");
 
@@ -185,11 +218,29 @@ pub async fn ensure_llama_servers_running(
     }
     if llm_ready && embedding_ready {
         info!("All llama.cpp servers are ready and responding!");
+        if !embedding_model.is_empty() {
+            set_embedding_server_ready(true);
+        }
+        trace!("[PROFILE] Servers ready!");
+
+        let after_ready = MemoryStats::current();
+        trace!("[LLM_LOCAL] Servers ready, RSS={} (delta from start={})",
+              MemoryStats::format_bytes(after_ready.rss_bytes),
+              MemoryStats::format_bytes(after_ready.rss_bytes.saturating_sub(start_mem.rss_bytes)));
+        log_jemalloc_stats();
 
         let _llm_provider1 = Arc::new(crate::llm::OpenAIClient::new(
             llm_model.clone(),
             Some(llm_url.clone()),
         ));
+
+        let end_mem = MemoryStats::current();
+        trace!("[LLM_LOCAL] ensure_llama_servers_running END, RSS={} (total delta={})",
+              MemoryStats::format_bytes(end_mem.rss_bytes),
+              MemoryStats::format_bytes(end_mem.rss_bytes.saturating_sub(start_mem.rss_bytes)));
+        log_jemalloc_stats();
+
+        trace!("[PROFILE] ensure_llama_servers_running EXIT OK");
         Ok(())
     } else {
         let mut error_msg = "Servers failed to start within timeout:".to_string();
diff --git a/src/main.rs b/src/main.rs
index aa36e201b..27119f327 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -1,3 +1,11 @@
+// Use jemalloc as the global allocator when the feature is enabled
+#[cfg(feature = "jemalloc")]
+use tikv_jemallocator::Jemalloc;
+
+#[cfg(feature = "jemalloc")]
+#[global_allocator]
+static GLOBAL: Jemalloc = Jemalloc;
+
 use axum::extract::{Extension, State};
 use axum::http::StatusCode;
 use axum::middleware;
@@ -25,6 +33,10 @@ use botlib::SystemLimits;
 
 use botserver::core;
 use botserver::shared;
+use botserver::core::shared::memory_monitor::{
+    start_memory_monitor, log_process_memory, MemoryStats,
+    register_thread, record_thread_activity
+};
 
 use botserver::core::automation;
 use botserver::core::bootstrap;
@@ -105,8 +117,7 @@ async fn health_check_simple() -> (StatusCode, Json<serde_json::Value>) {
 
 fn print_shutdown_message() {
     println!();
-    println!("\x1b[33m✨ Thank you for using General Bots!\x1b[0m");
-    println!("\x1b[36m   pragmatismo.com.br\x1b[0m");
+    println!("Thank you for using General Bots!");
     println!();
 }
 
@@ -819,6 +830,10 @@ async fn main() -> std::io::Result<()> {
         botserver::core::shared::state::AttendantNotification,
     >(1000);
 
+    let (task_progress_tx, _task_progress_rx) = tokio::sync::broadcast::channel::<
+        botserver::core::shared::state::TaskProgressEvent,
+    >(1000);
+
     let app_state = Arc::new(AppState {
         drive: Some(drive.clone()),
         s3_client: Some(drive),
@@ -852,6 +867,7 @@ async fn main() -> std::io::Result<()> {
             ext
         },
         attendant_broadcast: Some(attendant_tx),
+        task_progress_broadcast: Some(task_progress_tx),
     });
 
     let task_scheduler = Arc::new(botserver::tasks::scheduler::TaskScheduler::new(
@@ -864,6 +880,11 @@ async fn main() -> std::io::Result<()> {
         log::warn!("Failed to start website crawler service: {}", e);
     }
 
+    // Start memory monitoring - check every 30 seconds, warn if growth > 50MB
+    start_memory_monitor(30, 50);
+    info!("Memory monitor started");
+    log_process_memory();
+
     let _ = state_tx.try_send(app_state.clone());
     progress_tx.send(BootstrapProgress::BootstrapComplete).ok();
 
@@ -876,10 +897,6 @@ async fn main() -> std::io::Result<()> {
         .map(|n| n.get())
         .unwrap_or(4);
 
-    let _automation_service =
-        botserver::core::automation::AutomationService::new(app_state.clone());
-    info!("Automation service initialized with episodic memory scheduler");
-
     let bot_orchestrator = BotOrchestrator::new(app_state.clone());
     if let Err(e) = bot_orchestrator.mount_all_bots() {
         error!("Failed to mount bots: {}", e);
@@ -891,37 +908,57 @@ async fn main() -> std::io::Result<()> {
         let bucket_name = "default.gbai".to_string();
         let monitor_bot_id = default_bot_id;
         tokio::spawn(async move {
+            register_thread("drive-monitor", "drive");
+            trace!("[PROFILE] DriveMonitor::new starting...");
             let monitor = botserver::DriveMonitor::new(
                 drive_monitor_state,
                 bucket_name.clone(),
                 monitor_bot_id,
             );
+            trace!("[PROFILE] DriveMonitor::new done, calling start_monitoring...");
             info!("Starting DriveMonitor for bucket: {}", bucket_name);
             if let Err(e) = monitor.start_monitoring().await {
                 error!("DriveMonitor failed: {}", e);
             }
+            trace!("[PROFILE] DriveMonitor start_monitoring returned");
         });
     }
 
     let automation_state = app_state.clone();
     tokio::spawn(async move {
+        register_thread("automation-service", "automation");
         let automation = AutomationService::new(automation_state);
-        automation.spawn().await.ok();
+        trace!("[TASK] AutomationService starting, RSS={}",
+              MemoryStats::format_bytes(MemoryStats::current().rss_bytes));
+        loop {
+            record_thread_activity("automation-service");
+            if let Err(e) = automation.check_scheduled_tasks().await {
+                error!("Error checking scheduled tasks: {}", e);
+            }
+            tokio::time::sleep(tokio::time::Duration::from_secs(60)).await;
+        }
     });
 
     let app_state_for_llm = app_state.clone();
     tokio::spawn(async move {
+        register_thread("llm-server-init", "llm");
+        eprintln!("[PROFILE] ensure_llama_servers_running starting...");
         if let Err(e) = ensure_llama_servers_running(app_state_for_llm).await {
             error!("Failed to start LLM servers: {}", e);
         }
+        eprintln!("[PROFILE] ensure_llama_servers_running completed");
+        record_thread_activity("llm-server-init");
     });
     trace!("Initial data setup task spawned");
+    eprintln!("[PROFILE] All background tasks spawned, starting HTTP server...");
 
     trace!("Starting HTTP server on port {}...", config.server.port);
+    eprintln!("[PROFILE] run_axum_server starting on port {}...", config.server.port);
     if let Err(e) = run_axum_server(app_state, config.server.port, worker_count).await {
         error!("Failed to start HTTP server: {}", e);
         std::process::exit(1);
     }
+    eprintln!("[PROFILE] run_axum_server returned (should not happen normally)");
 
     if let Some(handle) = ui_handle {
         handle.join().ok();
diff --git a/src/security/integration.rs b/src/security/integration.rs
index 260daaa52..f9b1cb941 100644
--- a/src/security/integration.rs
+++ b/src/security/integration.rs
@@ -95,10 +95,10 @@ impl TlsIntegration {
         services.insert(
             "minio".to_string(),
             ServiceUrls {
-                original: "http://localhost:9000".to_string(),
-                secure: "https://localhost:9001".to_string(),
+                original: "https://localhost:9000".to_string(),
+                secure: "https://localhost:9000".to_string(),
                 port: 9000,
-                tls_port: 9001,
+                tls_port: 9000,
             },
         );
 
diff --git a/src/tasks/mod.rs b/src/tasks/mod.rs
index 8731f23e3..469c7db2d 100644
--- a/src/tasks/mod.rs
+++ b/src/tasks/mod.rs
@@ -351,15 +351,108 @@ pub async fn handle_task_delete(
 
 pub async fn handle_task_get(
     State(state): State<Arc<AppState>>,
-    Path(id): Path<Uuid>,
-) -> Result<Json<TaskResponse>, StatusCode> {
-    let task_engine = &state.task_engine;
+    Path(id): Path<String>,
+) -> impl IntoResponse {
+    let conn = state.conn.clone();
+    let task_id = id.clone();
 
-    match task_engine.get_task(id).await {
-        Ok(task) => Ok(Json(task.into())),
+    let result = tokio::task::spawn_blocking(move || {
+        let mut db_conn = conn
+            .get()
+            .map_err(|e| format!("DB connection error: {}", e))?;
+
+        #[derive(Debug, QueryableByName, serde::Serialize)]
+        struct AutoTaskRow {
+            #[diesel(sql_type = diesel::sql_types::Uuid)]
+            pub id: Uuid,
+            #[diesel(sql_type = diesel::sql_types::Text)]
+            pub title: String,
+            #[diesel(sql_type = diesel::sql_types::Text)]
+            pub status: String,
+            #[diesel(sql_type = diesel::sql_types::Text)]
+            pub priority: String,
+            #[diesel(sql_type = diesel::sql_types::Nullable<diesel::sql_types::Text>)]
+            pub intent: Option<String>,
+            #[diesel(sql_type = diesel::sql_types::Nullable<diesel::sql_types::Text>)]
+            pub error: Option<String>,
+            #[diesel(sql_type = diesel::sql_types::Float)]
+            pub progress: f32,
+            #[diesel(sql_type = diesel::sql_types::Timestamptz)]
+            pub created_at: chrono::DateTime<chrono::Utc>,
+            #[diesel(sql_type = diesel::sql_types::Nullable<diesel::sql_types::Timestamptz>)]
+            pub completed_at: Option<chrono::DateTime<chrono::Utc>>,
+        }
+
+        let parsed_uuid = match Uuid::parse_str(&task_id) {
+            Ok(u) => u,
+            Err(_) => return Err(format!("Invalid task ID: {}", task_id)),
+        };
+
+        let task: Option<AutoTaskRow> = diesel::sql_query(
+            "SELECT id, title, status, priority, intent, error, progress, created_at, completed_at
+             FROM auto_tasks WHERE id = $1 LIMIT 1"
+        )
+        .bind::<diesel::sql_types::Uuid, _>(parsed_uuid)
+        .get_result(&mut db_conn)
+        .ok();
+
+        Ok::<_, String>(task)
+    })
+    .await
+    .unwrap_or_else(|e| {
+        log::error!("Task query failed: {}", e);
+        Err(format!("Task query failed: {}", e))
+    });
+
+    match result {
+        Ok(Some(task)) => {
+            let status_class = match task.status.as_str() {
+                "completed" | "done" => "completed",
+                "running" | "pending" => "running",
+                "failed" | "error" => "error",
+                _ => "pending"
+            };
+            let progress_percent = (task.progress * 100.0) as u8;
+            let created = task.created_at.format("%Y-%m-%d %H:%M").to_string();
+            let completed = task.completed_at.map(|d| d.format("%Y-%m-%d %H:%M").to_string()).unwrap_or_default();
+
+            let html = format!(r#"
+                <div class="task-detail-header">
+                    <h2 class="task-detail-title">{}</h2>
+                    <span class="task-status task-status-{}">{}</span>
+                </div>
+                <div class="task-detail-meta">
+                    <div class="meta-item"><span class="meta-label">Priority:</span> <span class="meta-value">{}</span></div>
+                    <div class="meta-item"><span class="meta-label">Created:</span> <span class="meta-value">{}</span></div>
+                    {}
+                </div>
+                <div class="task-detail-progress">
+                    <div class="progress-label">Progress: {}%</div>
+                    <div class="progress-bar-container">
+                        <div class="progress-bar-fill" style="width: {}%"></div>
+                    </div>
+                </div>
+                {}
+                {}
+            "#,
+                task.title,
+                status_class,
+                task.status,
+                task.priority,
+                created,
+                if !completed.is_empty() { format!(r#"<div class="meta-item"><span class="meta-label">Completed:</span> <span class="meta-value">{}</span></div>"#, completed) } else { String::new() },
+                progress_percent,
+                progress_percent,
+                task.intent.map(|i| format!(r#"<div class="task-detail-section"><h3>Intent</h3><p class="intent-text">{}</p></div>"#, i)).unwrap_or_default(),
+                task.error.map(|e| format!(r#"<div class="task-detail-section error-section"><h3>Error</h3><p class="error-text">{}</p></div>"#, e)).unwrap_or_default()
+            );
+            (StatusCode::OK, axum::response::Html(html)).into_response()
+        }
+        Ok(None) => {
+            (StatusCode::NOT_FOUND, axum::response::Html("<div class='error'>Task not found</div>".to_string())).into_response()
+        }
         Err(e) => {
-            log::error!("Failed to get task: {}", e);
-            Err(StatusCode::NOT_FOUND)
+            (StatusCode::INTERNAL_SERVER_ERROR, axum::response::Html(format!("<div class='error'>{}</div>", e))).into_response()
         }
     }
 }
@@ -1209,6 +1302,7 @@ pub fn configure_task_routes() -> Router<Arc<AppState>> {
         )
         .route("/api/tasks/stats", get(handle_task_stats_htmx))
         .route("/api/tasks/stats/json", get(handle_task_stats))
+        .route("/api/tasks/time-saved", get(handle_time_saved))
         .route("/api/tasks/completed", delete(handle_clear_completed))
         .route(
             &ApiUrls::TASK_BY_ID.replace(":id", "{id}"),
@@ -1270,12 +1364,15 @@ pub async fn handle_task_list_htmx(
             .map_err(|e| format!("DB connection error: {}", e))?;
 
         let mut query = String::from(
-            "SELECT id, title, status, priority, description, due_date FROM tasks WHERE 1=1",
+            "SELECT id, title, status, priority, intent as description, NULL::timestamp as due_date FROM auto_tasks WHERE 1=1",
         );
 
         match filter_clone.as_str() {
-            "active" => query.push_str(" AND status NOT IN ('done', 'completed', 'cancelled')"),
-            "completed" => query.push_str(" AND status IN ('done', 'completed')"),
+            "complete" | "completed" => query.push_str(" AND status IN ('done', 'completed')"),
+            "active" => query.push_str(" AND status IN ('running', 'pending', 'in_progress')"),
+            "awaiting" => query.push_str(" AND status IN ('awaiting_decision', 'awaiting', 'waiting')"),
+            "paused" => query.push_str(" AND status = 'paused'"),
+            "blocked" => query.push_str(" AND status IN ('blocked', 'failed', 'error')"),
             "priority" => query.push_str(" AND priority IN ('high', 'urgent')"),
             _ => {}
         }
@@ -1297,15 +1394,8 @@ pub async fn handle_task_list_htmx(
 
     for task in tasks {
         let is_completed = task.status == "done" || task.status == "completed";
-        let is_high_priority = task.priority == "high" || task.priority == "urgent";
         let completed_class = if is_completed { "completed" } else { "" };
-        let priority_class = if is_high_priority { "active" } else { "" };
-        let checked = if is_completed { "checked" } else { "" };
 
-        let status_html = format!(
-            r#"<span class="task-status task-status-{}">{}</span>"#,
-            task.status, task.status
-        );
         let due_date_html = if let Some(due) = &task.due_date {
             format!(
                 r#"<span class="task-due-date"> {}</span>"#,
@@ -1314,52 +1404,47 @@ pub async fn handle_task_list_htmx(
         } else {
             String::new()
         };
+        let status_class = match task.status.as_str() {
+            "completed" | "done" => "status-complete",
+            "running" | "pending" | "in_progress" => "status-running",
+            "failed" | "error" | "blocked" => "status-error",
+            "paused" => "status-paused",
+            "awaiting" | "awaiting_decision" => "status-awaiting",
+            _ => "status-pending"
+        };
+
         let _ = write!(
             html,
             r#"
-            <div class="task-item {completed_class}" data-task-id="{}" onclick="selectTask('{}')">
-                <input type="checkbox"
-                       class="task-checkbox"
-                       data-task-id="{}"
-                       {checked}
-                       onclick="event.stopPropagation()">
-                <div class="task-content">
-                    <div class="task-text-wrapper">
-                        <span class="task-text">{}</span>
-                        <div class="task-meta">
-                            {status_html}
-                            <span class="task-priority task-priority-{}">{}</span>
-                            {due_date_html}
-                        </div>
-                    </div>
+            <div class="task-card {completed_class} {status_class}" data-task-id="{}" onclick="selectTask('{}')">
+                <div class="task-card-header">
+                    <span class="task-card-title">{}</span>
+                    <span class="task-card-status {}">{}</span>
                 </div>
-                <div class="task-actions">
-                    <button class="action-btn priority-btn {priority_class}"
-                            data-action="priority"
-                            data-task-id="{}">
+                <div class="task-card-body">
+                    <div class="task-card-priority">
+                        <span class="priority-badge priority-{}">{}</span>
+                    </div>
+                    {due_date_html}
+                </div>
+                <div class="task-card-footer">
+                    <button class="task-action-btn" data-action="priority" data-task-id="{}" onclick="event.stopPropagation()">
                         ⭐
                     </button>
-                    <button class="action-btn edit-btn"
-                            data-action="edit"
-                            data-task-id="{}">
-
-                    </button>
-                    <button class="action-btn delete-btn"
-                            data-action="delete"
-                            data-task-id="{}">
-
+                    <button class="task-action-btn" data-action="delete" data-task-id="{}" onclick="event.stopPropagation()">
+                        🗑️
                     </button>
                 </div>
             </div>
             "#,
             task.id,
             task.id,
-            task.id,
             task.title,
+            status_class,
+            task.status,
             task.priority,
             task.priority,
             task.id,
-            task.id,
             task.id
         );
     }
@@ -1396,34 +1481,58 @@ pub async fn handle_task_stats_htmx(State(state): State<Arc<AppState>>) -> impl
             .get()
             .map_err(|e| format!("DB connection error: {}", e))?;
 
-        let total: i64 = diesel::sql_query("SELECT COUNT(*) as count FROM tasks")
+        let total: i64 = diesel::sql_query("SELECT COUNT(*) as count FROM auto_tasks")
             .get_result::<CountResult>(&mut db_conn)
             .map(|r| r.count)
             .unwrap_or(0);
 
         let active: i64 =
-            diesel::sql_query("SELECT COUNT(*) as count FROM tasks WHERE status NOT IN ('done', 'completed', 'cancelled')")
+            diesel::sql_query("SELECT COUNT(*) as count FROM auto_tasks WHERE status IN ('running', 'pending', 'in_progress')")
                 .get_result::<CountResult>(&mut db_conn)
                 .map(|r| r.count)
                 .unwrap_or(0);
 
         let completed: i64 =
-            diesel::sql_query("SELECT COUNT(*) as count FROM tasks WHERE status IN ('done', 'completed')")
+            diesel::sql_query("SELECT COUNT(*) as count FROM auto_tasks WHERE status IN ('done', 'completed')")
+                .get_result::<CountResult>(&mut db_conn)
+                .map(|r| r.count)
+                .unwrap_or(0);
+
+        let awaiting: i64 =
+            diesel::sql_query("SELECT COUNT(*) as count FROM auto_tasks WHERE status IN ('awaiting_decision', 'awaiting', 'waiting')")
+                .get_result::<CountResult>(&mut db_conn)
+                .map(|r| r.count)
+                .unwrap_or(0);
+
+        let paused: i64 =
+            diesel::sql_query("SELECT COUNT(*) as count FROM auto_tasks WHERE status = 'paused'")
+                .get_result::<CountResult>(&mut db_conn)
+                .map(|r| r.count)
+                .unwrap_or(0);
+
+        let blocked: i64 =
+            diesel::sql_query("SELECT COUNT(*) as count FROM auto_tasks WHERE status IN ('blocked', 'failed', 'error')")
                 .get_result::<CountResult>(&mut db_conn)
                 .map(|r| r.count)
                 .unwrap_or(0);
 
         let priority: i64 =
-            diesel::sql_query("SELECT COUNT(*) as count FROM tasks WHERE priority IN ('high', 'urgent')")
+            diesel::sql_query("SELECT COUNT(*) as count FROM auto_tasks WHERE priority IN ('high', 'urgent')")
                 .get_result::<CountResult>(&mut db_conn)
                 .map(|r| r.count)
                 .unwrap_or(0);
 
+        let time_saved = format!("{}h", completed * 2);
+
         Ok::<_, String>(TaskStats {
             total: total as usize,
             active: active as usize,
             completed: completed as usize,
+            awaiting: awaiting as usize,
+            paused: paused as usize,
+            blocked: blocked as usize,
             priority: priority as usize,
+            time_saved,
         })
     })
     .await
@@ -1435,18 +1544,25 @@ pub async fn handle_task_stats_htmx(State(state): State<Arc<AppState>>) -> impl
         total: 0,
         active: 0,
         completed: 0,
+        awaiting: 0,
+        paused: 0,
+        blocked: 0,
         priority: 0,
+        time_saved: "0h".to_string(),
     });
 
     let html = format!(
         "{} tasks
         <script>
             document.getElementById('count-all').textContent = '{}';
+            document.getElementById('count-complete').textContent = '{}';
             document.getElementById('count-active').textContent = '{}';
-            document.getElementById('count-completed').textContent = '{}';
-            document.getElementById('count-priority').textContent = '{}';
+            document.getElementById('count-awaiting').textContent = '{}';
+            document.getElementById('count-paused').textContent = '{}';
+            document.getElementById('count-blocked').textContent = '{}';
+            document.getElementById('time-saved-value').textContent = '{}';
         </script>",
-        stats.total, stats.total, stats.active, stats.completed, stats.priority
+        stats.total, stats.total, stats.completed, stats.active, stats.awaiting, stats.paused, stats.blocked, stats.time_saved
     );
 
     axum::response::Html(html)
@@ -1460,34 +1576,58 @@ pub async fn handle_task_stats(State(state): State<Arc<AppState>>) -> Json<TaskS
             .get()
             .map_err(|e| format!("DB connection error: {}", e))?;
 
-        let total: i64 = diesel::sql_query("SELECT COUNT(*) as count FROM tasks")
+        let total: i64 = diesel::sql_query("SELECT COUNT(*) as count FROM auto_tasks")
             .get_result::<CountResult>(&mut db_conn)
             .map(|r| r.count)
             .unwrap_or(0);
 
         let active: i64 =
-            diesel::sql_query("SELECT COUNT(*) as count FROM tasks WHERE status NOT IN ('done', 'completed', 'cancelled')")
+            diesel::sql_query("SELECT COUNT(*) as count FROM auto_tasks WHERE status IN ('running', 'pending', 'in_progress')")
                 .get_result::<CountResult>(&mut db_conn)
                 .map(|r| r.count)
                 .unwrap_or(0);
 
         let completed: i64 =
-            diesel::sql_query("SELECT COUNT(*) as count FROM tasks WHERE status IN ('done', 'completed')")
+            diesel::sql_query("SELECT COUNT(*) as count FROM auto_tasks WHERE status IN ('done', 'completed')")
+                .get_result::<CountResult>(&mut db_conn)
+                .map(|r| r.count)
+                .unwrap_or(0);
+
+        let awaiting: i64 =
+            diesel::sql_query("SELECT COUNT(*) as count FROM auto_tasks WHERE status IN ('awaiting_decision', 'awaiting', 'waiting')")
+                .get_result::<CountResult>(&mut db_conn)
+                .map(|r| r.count)
+                .unwrap_or(0);
+
+        let paused: i64 =
+            diesel::sql_query("SELECT COUNT(*) as count FROM auto_tasks WHERE status = 'paused'")
+                .get_result::<CountResult>(&mut db_conn)
+                .map(|r| r.count)
+                .unwrap_or(0);
+
+        let blocked: i64 =
+            diesel::sql_query("SELECT COUNT(*) as count FROM auto_tasks WHERE status IN ('blocked', 'failed', 'error')")
                 .get_result::<CountResult>(&mut db_conn)
                 .map(|r| r.count)
                 .unwrap_or(0);
 
         let priority: i64 =
-            diesel::sql_query("SELECT COUNT(*) as count FROM tasks WHERE priority IN ('high', 'urgent')")
+            diesel::sql_query("SELECT COUNT(*) as count FROM auto_tasks WHERE priority IN ('high', 'urgent')")
                 .get_result::<CountResult>(&mut db_conn)
                 .map(|r| r.count)
                 .unwrap_or(0);
 
+        let time_saved = format!("{}h", completed * 2);
+
         Ok::<_, String>(TaskStats {
             total: total as usize,
             active: active as usize,
             completed: completed as usize,
+            awaiting: awaiting as usize,
+            paused: paused as usize,
+            blocked: blocked as usize,
             priority: priority as usize,
+            time_saved,
         })
     })
     .await
@@ -1499,12 +1639,43 @@ pub async fn handle_task_stats(State(state): State<Arc<AppState>>) -> Json<TaskS
         total: 0,
         active: 0,
         completed: 0,
+        awaiting: 0,
+        paused: 0,
+        blocked: 0,
         priority: 0,
+        time_saved: "0h".to_string(),
     });
 
     Json(stats)
 }
 
+pub async fn handle_time_saved(State(state): State<Arc<AppState>>) -> impl IntoResponse {
+    let conn = state.conn.clone();
+
+    let time_saved = tokio::task::spawn_blocking(move || {
+        let mut db_conn = match conn.get() {
+            Ok(c) => c,
+            Err(_) => return "0h".to_string(),
+        };
+
+        let completed: i64 =
+            diesel::sql_query("SELECT COUNT(*) as count FROM auto_tasks WHERE status IN ('done', 'completed')")
+                .get_result::<CountResult>(&mut db_conn)
+                .map(|r| r.count)
+                .unwrap_or(0);
+
+        format!("{}h", completed * 2)
+    })
+    .await
+    .unwrap_or_else(|_| "0h".to_string());
+
+    axum::response::Html(format!(
+        r#"<span class="time-label">Active Time Saved:</span>
+        <span class="time-value" id="time-saved-value">{}</span>"#,
+        time_saved
+    ))
+}
+
 pub async fn handle_clear_completed(State(state): State<Arc<AppState>>) -> impl IntoResponse {
     let conn = state.conn.clone();
 
@@ -1513,7 +1684,7 @@ pub async fn handle_clear_completed(State(state): State<Arc<AppState>>) -> impl
             .get()
             .map_err(|e| format!("DB connection error: {}", e))?;
 
-        diesel::sql_query("DELETE FROM tasks WHERE status IN ('done', 'completed')")
+        diesel::sql_query("DELETE FROM auto_tasks WHERE status IN ('done', 'completed')")
             .execute(&mut db_conn)
             .map_err(|e| format!("Delete failed: {}", e))?;
 
@@ -1595,7 +1766,11 @@ pub struct TaskStats {
     pub total: usize,
     pub active: usize,
     pub completed: usize,
+    pub awaiting: usize,
+    pub paused: usize,
+    pub blocked: usize,
     pub priority: usize,
+    pub time_saved: String,
 }
 
 #[derive(Debug, Serialize, Deserialize)]