From 330229553818d0ba2f3925a7ef8114525683ac38 Mon Sep 17 00:00:00 2001 From: "Rodrigo Rodriguez (Pragmatismo)" Date: Fri, 31 Oct 2025 07:30:37 -0300 Subject: [PATCH] feat: simplify bot name handling and improve logging - Added RUST_LOG=trace to VSCode debug configurations for better debugging - Removed unused imports (uuid, Selectable, SelectableHelper) from bootstrap module - Simplified bot name handling by removing name formatting logic, using raw bot folder names - Renamed check_default_gbot to check_gbot for consistency - Improved logging format in drive monitor initialization - Fixed S3 bucket reference in bootstrap manager (removed 'templates/' prefix) --- .vscode/launch.json | 6 ++ src/bootstrap/mod.rs | 25 ++---- src/drive_monitor/mod.rs | 159 +++++++++++++++++++++++++++------------ 3 files changed, 123 insertions(+), 67 deletions(-) diff --git a/.vscode/launch.json b/.vscode/launch.json index bf24a3b20..741b23a5c 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -16,6 +16,9 @@ } }, "args": [], + "env": { + "RUST_LOG": "trace" + }, "cwd": "${workspaceFolder}" }, { @@ -30,6 +33,9 @@ } }, "args": [], + "env": { + "RUST_LOG": "trace" + }, "cwd": "${workspaceFolder}" } ] diff --git a/src/bootstrap/mod.rs b/src/bootstrap/mod.rs index 1670fd1b5..942d5ee66 100644 --- a/src/bootstrap/mod.rs +++ b/src/bootstrap/mod.rs @@ -1,7 +1,7 @@ use crate::config::AppConfig; use crate::package_manager::{InstallMode, PackageManager}; use anyhow::Result; -use diesel::{connection::SimpleConnection, RunQueryDsl, Connection, QueryableByName, Selectable}; +use diesel::{connection::SimpleConnection, RunQueryDsl, Connection, QueryableByName}; use dotenvy::dotenv; use log::{debug, error, info, trace}; use aws_sdk_s3::Client; @@ -13,8 +13,6 @@ use std::io::{self, Write}; use std::path::Path; use std::process::Command; use std::sync::{Arc, Mutex}; -use uuid::Uuid; -use diesel::SelectableHelper; use diesel::Queryable; @@ -455,22 +453,9 @@ impl BootstrapManager { if path.is_dir() && path.extension().map(|e| e == "gbai").unwrap_or(false) { let bot_folder = path.file_name().unwrap().to_string_lossy().to_string(); let bot_name = bot_folder.trim_end_matches(".gbai"); - let formatted_name = bot_name - .split('_') - .map(|word| { - let mut chars = word.chars(); - match chars.next() { - None => String::new(), - Some(first) => { - first.to_uppercase().collect::() + chars.as_str() - } - } - }) - .collect::>() - .join(" "); let existing: Option = bots::table - .filter(bots::name.eq(&formatted_name)) + .filter(bots::name.eq(&bot_name)) .select(bots::name) .first(conn) .optional()?; @@ -480,11 +465,11 @@ impl BootstrapManager { "INSERT INTO bots (id, name, description, llm_provider, llm_config, context_provider, context_config, is_active) \ VALUES (gen_random_uuid(), $1, $2, 'openai', '{\"model\": \"gpt-4\", \"temperature\": 0.7}', 'database', '{}', true)" ) - .bind::(&formatted_name) + .bind::(&bot_name) .bind::(format!("Bot for {} template", bot_name)) .execute(conn)?; } else { - log::trace!("Bot {} already exists", formatted_name); + log::trace!("Bot {} already exists", bot_name); } } } @@ -541,7 +526,7 @@ impl BootstrapManager { use uuid::Uuid; let client = &self.s3_client; - let bucket = "templates/default.gbai"; + let bucket = "default.gbai"; let config_key = "default.gbot/config.csv"; match client.get_object() diff --git a/src/drive_monitor/mod.rs b/src/drive_monitor/mod.rs index 6dee47c65..bf61fbbbd 100644 --- a/src/drive_monitor/mod.rs +++ b/src/drive_monitor/mod.rs @@ -2,8 +2,8 @@ use crate::basic::compiler::BasicCompiler; use crate::kb::embeddings; use crate::kb::qdrant_client; use crate::shared::state::AppState; -use log::{debug, error, info, warn}; use aws_sdk_s3::Client; +use log::{debug, error, info, warn}; use std::collections::HashMap; use std::error::Error; use std::sync::Arc; @@ -34,7 +34,10 @@ impl DriveMonitor { pub fn spawn(self: Arc) -> tokio::task::JoinHandle<()> { tokio::spawn(async move { - info!("Drive Monitor service started for bucket: {}", self.bucket_name); + info!( + "Drive Monitor service started for bucket: {}", + self.bucket_name + ); let mut tick = interval(Duration::from_secs(30)); loop { tick.tick().await; @@ -55,8 +58,8 @@ impl DriveMonitor { self.check_gbdialog_changes(client).await?; self.check_gbkb_changes(client).await?; - - if let Err(e) = self.check_default_gbot(client).await { + + if let Err(e) = self.check_gbot(client).await { error!("Error checking default bot config: {}", e); } @@ -68,21 +71,25 @@ impl DriveMonitor { client: &Client, ) -> Result<(), Box> { let prefix = ".gbdialog/"; - + let mut current_files = HashMap::new(); - + let mut continuation_token = None; loop { - let list_objects = client.list_objects_v2() - .bucket(&self.bucket_name) - .prefix(prefix) + let list_objects = client + .list_objects_v2() + .bucket(&self.bucket_name.to_lowercase()) .set_continuation_token(continuation_token) .send() .await?; + debug!("List objects result: {:?}", list_objects); for obj in list_objects.contents.unwrap_or_default() { let path = obj.key().unwrap_or_default().to_string(); - + let path_parts: Vec<&str> = path.split('/').collect(); + if path_parts.len() < 2 || !path_parts[0].ends_with(".gbdialog") { + continue; + } if path.ends_with('/') || !path.ends_with(".bas") { continue; } @@ -141,21 +148,30 @@ impl DriveMonitor { client: &Client, ) -> Result<(), Box> { let prefix = ".gbkb/"; - + let mut current_files = HashMap::new(); - + let mut continuation_token = None; loop { - let list_objects = client.list_objects_v2() - .bucket(&self.bucket_name) + let list_objects = client + .list_objects_v2() + .bucket(&self.bucket_name.to_lowercase()) .prefix(prefix) .set_continuation_token(continuation_token) .send() .await?; + debug!("List objects result: {:?}", list_objects); for obj in list_objects.contents.unwrap_or_default() { let path = obj.key().unwrap_or_default().to_string(); - + + let path_parts: Vec<&str> = path.split('/').collect(); + if path_parts.len() < 2 || !path_parts[0].ends_with(".gbkb") { + continue; + } + + + if path.ends_with('/') { continue; } @@ -214,37 +230,71 @@ impl DriveMonitor { Ok(()) } - async fn check_default_gbot( + async fn check_gbot( &self, client: &Client, ) -> Result<(), Box> { - let prefix = format!("{}default.gbot/", self.bucket_name); - let config_key = format!("{}config.csv", prefix); - - match client.head_object() - .bucket(&self.bucket_name) - .key(&config_key) - .send() - .await - { - Ok(_) => { - let response = client.get_object() - .bucket(&self.bucket_name) - .key(&config_key) - .send() - .await?; + let prefix = ".gbot/"; + let mut continuation_token = None; + + loop { + let list_objects = client + .list_objects_v2() + .bucket(&self.bucket_name.to_lowercase()) + .prefix(prefix) + .set_continuation_token(continuation_token) + .send() + .await?; + + for obj in list_objects.contents.unwrap_or_default() { + let path = obj.key().unwrap_or_default().to_string(); + let path_parts: Vec<&str> = path.split('/').collect(); - let bytes = response.body.collect().await?.into_bytes(); - let csv_content = String::from_utf8(bytes.to_vec()) - .map_err(|e| format!("UTF-8 error in config.csv: {}", e))?; - debug!("Found config.csv: {} bytes", csv_content.len()); - Ok(()) + if path_parts.len() < 2 || !path_parts[0].ends_with(".gbot") { + continue; + } + + if !path.ends_with("config.csv") { + continue; + } + + debug!("Checking config file at path: {}", path); + match client + .head_object() + .bucket(&self.bucket_name) + .key(&path) + .send() + .await + { + Ok(head_res) => { + debug!("HeadObject successful for {}, metadata: {:?}", path, head_res); + let response = client + .get_object() + .bucket(&self.bucket_name) + .key(&path) + .send() + .await?; + debug!("GetObject successful for {}, content length: {}", path, response.content_length().unwrap_or(0)); + + let bytes = response.body.collect().await?.into_bytes(); + debug!("Collected {} bytes for {}", bytes.len(), path); + let csv_content = String::from_utf8(bytes.to_vec()) + .map_err(|e| format!("UTF-8 error in {}: {}", path, e))?; + debug!("Found {}: {} bytes", path, csv_content.len()); + } + Err(e) => { + debug!("Config file {} not found or inaccessible: {}", path, e); + } + } } - Err(e) => { - debug!("Config file not found or inaccessible: {}", e); - Ok(()) + + if !list_objects.is_truncated.unwrap_or(false) { + break; } + continuation_token = list_objects.next_continuation_token; } + + Ok(()) } async fn compile_tool( @@ -252,17 +302,31 @@ impl DriveMonitor { client: &Client, file_path: &str, ) -> Result<(), Box> { - let response = client.get_object() + debug!("Fetching object from S3: bucket={}, key={}", &self.bucket_name, file_path); + let response = match client + .get_object() .bucket(&self.bucket_name) .key(file_path) .send() - .await?; - + .await { + Ok(res) => { + debug!("Successfully fetched object from S3: bucket={}, key={}, size={}", + &self.bucket_name, file_path, res.content_length().unwrap_or(0)); + res + } + Err(e) => { + error!("Failed to fetch object from S3: bucket={}, key={}, error={:?}", + &self.bucket_name, file_path, e); + return Err(e.into()); + } + }; + let bytes = response.body.collect().await?.into_bytes(); let source_content = String::from_utf8(bytes.to_vec())?; let tool_name = file_path - .strip_prefix(".gbdialog/") + .split('/') + .last() .unwrap_or(file_path) .strip_suffix(".bas") .unwrap_or(file_path) @@ -272,7 +336,7 @@ impl DriveMonitor { .bucket_name .strip_suffix(".gbai") .unwrap_or(&self.bucket_name); - let work_dir = format!("./work/{}.gbai/.gbdialog", bot_name); + let work_dir = format!("./work/{}.gbai/{}.gbdialog", bot_name, bot_name); std::fs::create_dir_all(&work_dir)?; let local_source_path = format!("{}/{}.bas", work_dir, tool_name); @@ -307,13 +371,14 @@ impl DriveMonitor { } let collection_name = parts[1]; - let response = client.get_object() + let response = client + .get_object() .bucket(&self.bucket_name) .key(file_path) .send() .await?; let bytes = response.body.collect().await?.into_bytes(); - + let text_content = self.extract_text(file_path, &bytes)?; if text_content.trim().is_empty() { warn!("No text extracted from: {}", file_path); @@ -328,7 +393,7 @@ impl DriveMonitor { let qdrant_collection = format!("kb_default_{}", collection_name); qdrant_client::ensure_collection_exists(&self.state, &qdrant_collection).await?; - + embeddings::index_document(&self.state, &qdrant_collection, file_path, &text_content) .await?;