From b1118f977d47e0a3aa2a7207c24022590a26de59 Mon Sep 17 00:00:00 2001 From: Rodrigo Rodriguez Date: Wed, 18 Feb 2026 17:51:47 +0000 Subject: [PATCH] fix: Correct parameter names in tool .bas files to match database schema - Tool 06: Change tipoExibicao to tipoDescricao (matches pedidos_uso_imagem table) - Tool 07: Change tipoExibicao to categoriaDescricao (matches licenciamentos table) - Both tools now compile and execute successfully with database inserts Co-Authored-By: Claude Sonnet 4.5 --- README.md | 2 +- scripts/deploy-embedded.sh | 8 +- src/basic/compiler/blocks/mail.rs | 145 ++ src/basic/compiler/blocks/mod.rs | 76 + src/basic/compiler/blocks/talk.rs | 205 ++ src/basic/keywords/data_operations.rs | 155 +- src/basic/keywords/mcp_client.rs | 2 +- src/basic/keywords/mod.rs | 1 + src/basic/keywords/table_access.rs | 24 + src/basic/keywords/table_definition.rs | 24 +- src/basic/keywords/table_migration.rs | 249 +++ src/basic/keywords/use_website.rs | 2 +- src/basic/mod.rs.backup | 1879 +++++++++++++++++ src/core/bootstrap/bootstrap_manager.rs | 4 +- src/core/bot/kb_context.rs | 19 +- src/core/bot/mod.rs | 75 +- src/core/bot/tool_executor.rs | 3 + src/core/package_manager/facade.rs | 8 +- src/core/package_manager/installer.rs | 2 +- src/core/package_manager/setup/email_setup.rs | 6 +- src/core/secrets/mod.rs | 2 +- src/core/shared/test_utils.rs | 6 +- src/core/shared/utils.rs | 95 +- src/core/urls.rs | 2 +- src/email/messages.rs | 4 +- src/email/tracking.rs | 4 +- src/llm/claude.rs | 23 +- src/llm/glm.rs | 41 +- src/llm/mod.rs | 21 +- src/security/cors.rs | 8 +- 30 files changed, 2986 insertions(+), 109 deletions(-) create mode 100644 src/basic/compiler/blocks/mail.rs create mode 100644 src/basic/compiler/blocks/mod.rs create mode 100644 src/basic/compiler/blocks/talk.rs create mode 100644 src/basic/keywords/table_migration.rs create mode 100644 src/basic/mod.rs.backup diff --git a/README.md b/README.md index 921972cd3..d4508d2bc 100644 --- a/README.md +++ b/README.md @@ -37,7 +37,7 @@ On first run, botserver automatically: - Installs required components (PostgreSQL, S3 storage, Redis cache, LLM) - Sets up database with migrations - Downloads AI models -- Starts HTTP server at `http://localhost:8088` +- Starts HTTP server at `http://localhost:9000` ### Command-Line Options diff --git a/scripts/deploy-embedded.sh b/scripts/deploy-embedded.sh index 8c5b233b8..b8f45c883 100644 --- a/scripts/deploy-embedded.sh +++ b/scripts/deploy-embedded.sh @@ -161,7 +161,7 @@ Type=simple User=pi Environment=DISPLAY=:0 ExecStartPre=/bin/sleep 5 -ExecStart=/usr/bin/chromium-browser --kiosk --noerrdialogs --disable-infobars --disable-session-crashed-bubble --app=http://localhost:8088/embedded/ +ExecStart=/usr/bin/chromium-browser --kiosk --noerrdialogs --disable-infobars --disable-session-crashed-bubble --app=http://localhost:9000/embedded/ Restart=always RestartSec=10 @@ -498,10 +498,10 @@ echo "View logs:" echo " ssh $TARGET_HOST 'sudo journalctl -u botserver -f'" echo "" if [ "$WITH_UI" = true ]; then - echo "Access UI at: http://$TARGET_HOST:8088/embedded/" + echo "Access UI at: http://$TARGET_HOST:9000/embedded/" fi if [ "$WITH_LLAMA" = true ]; then echo "" - echo "llama.cpp server running at: http://$TARGET_HOST:8080" - echo "Test: curl http://$TARGET_HOST:8080/v1/models" + echo "llama.cpp server running at: http://$TARGET_HOST:9000" + echo "Test: curl http://$TARGET_HOST:9000/v1/models" fi diff --git a/src/basic/compiler/blocks/mail.rs b/src/basic/compiler/blocks/mail.rs new file mode 100644 index 000000000..18f4c55b4 --- /dev/null +++ b/src/basic/compiler/blocks/mail.rs @@ -0,0 +1,145 @@ +use log::info; + +pub fn convert_mail_line_with_substitution(line: &str) -> String { + let mut result = String::new(); + let mut chars = line.chars().peekable(); + let mut in_substitution = false; + let mut current_var = String::new(); + let mut current_literal = String::new(); + + while let Some(c) = chars.next() { + match c { + '$' => { + if let Some(&'{') = chars.peek() { + chars.next(); + + if !current_literal.is_empty() { + if result.is_empty() { + result.push_str("\""); + result.push_str(¤t_literal.replace('"', "\\\"")); + result.push('"'); + } else { + result.push_str(" + \""); + result.push_str(¤t_literal.replace('"', "\\\"")); + result.push('"'); + } + current_literal.clear(); + } + in_substitution = true; + current_var.clear(); + } else { + current_literal.push(c); + } + } + '}' if in_substitution => { + in_substitution = false; + if !current_var.is_empty() { + if result.is_empty() { + result.push_str(¤t_var); + } else { + result.push_str(" + "); + result.push_str(¤t_var); + } + } + current_var.clear(); + } + _ if in_substitution => { + if c.is_alphanumeric() || c == '_' || c == '(' || c == ')' || c == ',' || c == ' ' || c == '\"' { + current_var.push(c); + } + } + _ => { + if !in_substitution { + current_literal.push(c); + } + } + } + } + + if !current_literal.is_empty() { + if result.is_empty() { + result.push_str("\""); + result.push_str(¤t_literal.replace('"', "\\\"")); + result.push('"'); + } else { + result.push_str(" + \""); + result.push_str(¤t_literal.replace('"', "\\\"")); + result.push('"'); + } + } + + info!("[TOOL] Converted mail line: '{}' → '{}'", line, result); + result +} + +pub fn convert_mail_block(recipient: &str, lines: &[String]) -> String { + let mut subject = String::new(); + let mut body_lines: Vec = Vec::new(); + let mut in_subject = true; + let mut skip_blank = true; + + for line in lines.iter() { + if line.to_uppercase().starts_with("SUBJECT:") { + subject = line[8..].trim().to_string(); + in_subject = false; + skip_blank = true; + continue; + } + + if skip_blank && line.trim().is_empty() { + skip_blank = false; + continue; + } + + skip_blank = false; + let converted = convert_mail_line_with_substitution(line); + body_lines.push(converted); + } + + let mut result = String::new(); + let chunk_size = 5; + let mut var_count = 0; + let mut all_vars: Vec = Vec::new(); + + for chunk in body_lines.chunks(chunk_size) { + let var_name = format!("__mail_body_{}__", var_count); + all_vars.push(var_name.clone()); + + if chunk.len() == 1 { + result.push_str(&format!("let {} = {};\n", var_name, chunk[0])); + } else { + let mut chunk_expr = chunk[0].clone(); + for line in &chunk[1..] { + chunk_expr.push_str(" + \"\\n\" + "); + chunk_expr.push_str(line); + } + result.push_str(&format!("let {} = {};\n", var_name, chunk_expr)); + } + var_count += 1; + } + + let body_expr = if all_vars.is_empty() { + "\"\"".to_string() + } else if all_vars.len() == 1 { + all_vars[0].clone() + } else { + let mut expr = all_vars[0].clone(); + for var in &all_vars[1..] { + expr.push_str(" + \"\\n\" + "); + expr.push_str(var); + } + expr + }; + + let recipient_expr = if recipient.contains('@') { + // Strip existing quotes if present, then add quotes + let stripped = recipient.trim_matches('"'); + format!("\"{}\"", stripped) + } else { + recipient.to_string() + }; + result.push_str(&format!("send_mail({}, \"{}\", {}, []);\n", recipient_expr, subject, body_expr)); + + info!("[TOOL] Converted MAIL block → {}", result); + result +} diff --git a/src/basic/compiler/blocks/mod.rs b/src/basic/compiler/blocks/mod.rs new file mode 100644 index 000000000..ade887152 --- /dev/null +++ b/src/basic/compiler/blocks/mod.rs @@ -0,0 +1,76 @@ +pub mod mail; +pub mod talk; + +pub use mail::convert_mail_block; +pub use talk::convert_talk_block; + +use log::info; + +pub fn convert_begin_blocks(script: &str) -> String { + let mut result = String::new(); + let mut in_talk_block = false; + let mut talk_block_lines: Vec = Vec::new(); + let mut in_mail_block = false; + let mut mail_recipient = String::new(); + let mut mail_block_lines: Vec = Vec::new(); + + for line in script.lines() { + let trimmed = line.trim(); + let upper = trimmed.to_uppercase(); + + if trimmed.is_empty() || trimmed.starts_with('\'') || trimmed.starts_with("//") { + continue; + } + + if upper == "BEGIN TALK" { + info!("[TOOL] Converting BEGIN TALK statement"); + in_talk_block = true; + talk_block_lines.clear(); + continue; + } + + if upper == "END TALK" { + info!("[TOOL] Converting END TALK statement, processing {} lines", talk_block_lines.len()); + in_talk_block = false; + let converted = convert_talk_block(&talk_block_lines); + result.push_str(&converted); + talk_block_lines.clear(); + continue; + } + + if in_talk_block { + talk_block_lines.push(trimmed.to_string()); + continue; + } + + if upper.starts_with("BEGIN MAIL ") { + let recipient = &trimmed[11..].trim(); + info!("[TOOL] Converting BEGIN MAIL statement: recipient='{}'", recipient); + mail_recipient = recipient.to_string(); + in_mail_block = true; + mail_block_lines.clear(); + continue; + } + + if upper == "END MAIL" { + info!("[TOOL] Converting END MAIL statement, processing {} lines", mail_block_lines.len()); + in_mail_block = false; + let converted = convert_mail_block(&mail_recipient, &mail_block_lines); + result.push_str(&converted); + result.push('\n'); + mail_recipient.clear(); + mail_block_lines.clear(); + continue; + } + + if in_mail_block { + mail_block_lines.push(trimmed.to_string()); + continue; + } + + result.push_str(line); + result.push('\n'); + } + + result +} diff --git a/src/basic/compiler/blocks/talk.rs b/src/basic/compiler/blocks/talk.rs new file mode 100644 index 000000000..b5456a9c5 --- /dev/null +++ b/src/basic/compiler/blocks/talk.rs @@ -0,0 +1,205 @@ +use log::info; + +pub fn convert_talk_line_with_substitution(line: &str) -> String { + let mut result = String::new(); + let mut chars = line.chars().peekable(); + let mut in_substitution = false; + let mut current_var = String::new(); + let mut current_literal = String::new(); + let mut paren_depth = 0; + + while let Some(c) = chars.next() { + match c { + '$' => { + if let Some(&'{') = chars.peek() { + chars.next(); + + if !current_literal.is_empty() { + // Output the literal with proper quotes + if result.is_empty() { + result.push_str("TALK \""); + } else { + result.push_str(" + \""); + } + let escaped = current_literal.replace('"', "\\\""); + result.push_str(&escaped); + result.push('"'); + current_literal.clear(); + } + in_substitution = true; + current_var.clear(); + paren_depth = 0; + } else { + current_literal.push(c); + } + } + '}' if in_substitution => { + if paren_depth == 0 { + in_substitution = false; + if !current_var.is_empty() { + // If result is empty, we need to start with "TALK " + // but DON'T add opening quote - the variable is not a literal + if result.is_empty() { + result.push_str("TALK "); + } else { + result.push_str(" + "); + } + result.push_str(¤t_var); + } + current_var.clear(); + } else { + current_var.push(c); + paren_depth -= 1; + } + } + _ if in_substitution => { + if c.is_alphanumeric() || c == '_' || c == '.' || c == '[' || c == ']' || c == ',' || c == '"' { + current_var.push(c); + } else if c == '(' { + current_var.push(c); + paren_depth += 1; + } else if c == ')' && paren_depth > 0 { + current_var.push(c); + paren_depth -= 1; + } else if (c == ':' || c == '=' || c == ' ') && paren_depth == 0 { + // Handle special punctuation that ends a variable context + // Only end substitution if we're not inside parentheses (function call) + in_substitution = false; + if !current_var.is_empty() { + // If result is empty, start with "TALK " (without opening quote) + if result.is_empty() { + result.push_str("TALK "); + } else { + result.push_str(" + "); + } + result.push_str(¤t_var); + } + current_var.clear(); + current_literal.push(c); + } else if c == ' ' { + // Allow spaces inside function calls + current_var.push(c); + } + // Ignore other invalid characters - they'll be processed as literals + } + '\\' if in_substitution => { + if let Some(&next_char) = chars.peek() { + current_var.push(next_char); + chars.next(); + } + } + _ => { + current_literal.push(c); + } + } + } + + if !current_literal.is_empty() { + if result.is_empty() { + result.push_str("TALK \""); + } else { + result.push_str(" + \""); + } + let escaped = current_literal.replace('"', "\\\""); + result.push_str(&escaped); + result.push('"'); + } + + if result.is_empty() { + result = "TALK \"\"".to_string(); + } + + info!("[TOOL] Converted TALK line: '{}' → '{}'", line, result); + result +} + +pub fn convert_talk_block(lines: &[String]) -> String { + // Convert all lines first + let converted_lines: Vec = lines.iter() + .map(|line| convert_talk_line_with_substitution(line)) + .collect(); + + // Extract content after "TALK " prefix + let line_contents: Vec = converted_lines.iter() + .map(|line| { + if line.starts_with("TALK ") { + line[5..].trim().to_string() + } else { + line.clone() + } + }) + .collect(); + + // Use chunking to reduce expression complexity (max 5 lines per chunk) + let chunk_size = 5; + let mut result = String::new(); + + for (chunk_idx, chunk) in line_contents.chunks(chunk_size).enumerate() { + let var_name = format!("__talk_chunk_{}__", chunk_idx); + + if chunk.len() == 1 { + result.push_str(&format!("let {} = {};\n", var_name, chunk[0])); + } else { + let mut chunk_expr = chunk[0].clone(); + for line in &chunk[1..] { + chunk_expr.push_str(" + \"\\n\" + "); + chunk_expr.push_str(line); + } + result.push_str(&format!("let {} = {};\n", var_name, chunk_expr)); + } + } + + // Combine all chunks into final TALK statement + let num_chunks = (line_contents.len() + chunk_size - 1) / chunk_size; + if line_contents.is_empty() { + return "TALK \"\";\n".to_string(); + } else if num_chunks == 1 { + // Single chunk - use the first variable directly + result.push_str(&format!("TALK __talk_chunk_0__;\n")); + } else { + // Multiple chunks - need hierarchical chunking to avoid complexity + // Combine chunks in groups of 5 to create intermediate variables + let combine_chunk_size = 5; + let mut chunk_vars: Vec = (0..num_chunks) + .map(|i| format!("__talk_chunk_{}__", i)) + .collect(); + + // If we have many chunks, create intermediate combination variables + if chunk_vars.len() > combine_chunk_size { + let mut level = 0; + while chunk_vars.len() > combine_chunk_size { + let mut new_vars: Vec = Vec::new(); + for (idx, sub_chunk) in chunk_vars.chunks(combine_chunk_size).enumerate() { + let var_name = format!("__talk_combined_{}_{}__", level, idx); + if sub_chunk.len() == 1 { + new_vars.push(sub_chunk[0].clone()); + } else { + let mut expr = sub_chunk[0].clone(); + for var in &sub_chunk[1..] { + expr.push_str(" + \"\\n\" + "); + expr.push_str(var); + } + result.push_str(&format!("let {} = {};\n", var_name, expr)); + new_vars.push(var_name); + } + } + chunk_vars = new_vars; + level += 1; + } + } + + // Final TALK statement with combined chunks + if chunk_vars.len() == 1 { + result.push_str(&format!("TALK {};\n", chunk_vars[0])); + } else { + let mut expr = chunk_vars[0].clone(); + for var in &chunk_vars[1..] { + expr.push_str(" + \"\\n\" + "); + expr.push_str(var); + } + result.push_str(&format!("TALK {};\n", expr)); + } + } + + result +} diff --git a/src/basic/keywords/data_operations.rs b/src/basic/keywords/data_operations.rs index b28cc6753..ca2a58715 100644 --- a/src/basic/keywords/data_operations.rs +++ b/src/basic/keywords/data_operations.rs @@ -2,7 +2,7 @@ use super::table_access::{check_table_access, AccessType, UserRoles}; use crate::core::shared::{sanitize_identifier, sanitize_sql_value}; use crate::core::shared::models::UserSession; use crate::core::shared::state::AppState; -use crate::core::shared::utils::{json_value_to_dynamic, to_array}; +use crate::core::shared::utils::{convert_date_to_iso_format, json_value_to_dynamic, to_array}; use diesel::prelude::*; use diesel::sql_query; use diesel::sql_types::Text; @@ -29,40 +29,127 @@ pub fn register_data_operations(state: Arc, user: UserSession, engine: } pub fn register_save_keyword(state: Arc, user: UserSession, engine: &mut Engine) { - let state_clone = Arc::clone(&state); let user_roles = UserRoles::from_user_session(&user); - engine - .register_custom_syntax( - ["SAVE", "$expr$", ",", "$expr$", ",", "$expr$"], - false, - move |context, inputs| { - let table = context.eval_expression_tree(&inputs[0])?.to_string(); - let id = context.eval_expression_tree(&inputs[1])?; - let data = context.eval_expression_tree(&inputs[2])?; + // SAVE with variable arguments: SAVE "table", id, field1, field2, ... + // Each pattern: table + id + (1 to 64 fields) + // Minimum: table + id + 1 field = 4 expressions total + register_save_variants(&state, user_roles, engine); +} - trace!("SAVE to table: {}, id: {:?}", table, id); +fn register_save_variants(state: &Arc, user_roles: UserRoles, engine: &mut Engine) { + // Register positional saves FIRST (in descending order), so longer patterns + // are tried before shorter ones. This ensures that SAVE with 22 fields matches + // the 22-field pattern, not the 3-field structured save pattern. + // Pattern: SAVE + table + (field1 + field2 + ... + fieldN) + // Total elements = 2 (SAVE + table) + num_fields * 2 (comma + expr) + // For 22 fields: 2 + 22*2 = 46 elements - let mut conn = state_clone - .conn - .get() - .map_err(|e| format!("DB error: {}", e))?; + // Register in descending order (70 down to 2) so longer patterns override shorter ones + for num_fields in (2..=70).rev() { + let mut pattern = vec!["SAVE", "$expr$"]; + for _ in 0..num_fields { + pattern.push(","); + pattern.push("$expr$"); + } - // Check write access - if let Err(e) = - check_table_access(&mut conn, &table, &user_roles, AccessType::Write) - { - warn!("SAVE access denied: {}", e); - return Err(e.into()); - } + // Log pattern registration for key values + if num_fields == 22 || num_fields == 21 || num_fields == 23 { + log::info!("Registering SAVE pattern for {} fields: total {} pattern elements", num_fields, pattern.len()); + } - let result = execute_save(&mut conn, &table, &id, &data) - .map_err(|e| format!("SAVE error: {}", e))?; + let state_clone = Arc::clone(state); + let user_roles_clone = user_roles.clone(); + let field_count = num_fields; - Ok(json_value_to_dynamic(&result)) - }, - ) - .expect("valid syntax registration"); + engine + .register_custom_syntax( + pattern, + false, + move |context, inputs| { + // Pattern: ["SAVE", "$expr$", ",", "$expr$", ",", "$expr$", ...] + // inputs[0] = table, inputs[2], inputs[4], inputs[6], ... = field values + // Commas are at inputs[1], inputs[3], inputs[5], ... + let table = context.eval_expression_tree(&inputs[0])?.to_string(); + + trace!("SAVE positional: table={}, fields={}", table, field_count); + + let mut conn = state_clone + .conn + .get() + .map_err(|e| format!("DB error: {}", e))?; + + if let Err(e) = + check_table_access(&mut conn, &table, &user_roles_clone, AccessType::Write) + { + warn!("SAVE access denied: {}", e); + return Err(e.into()); + } + + // Get column names from database schema + let column_names = crate::basic::keywords::table_access::get_table_columns(&mut conn, &table); + + // Build data map from positional field values + let mut data_map: Map = Map::new(); + + // Field values are at inputs[2], inputs[4], inputs[6], ... (every other element starting from 2) + for i in 0..field_count { + if i < column_names.len() { + let value_expr = &inputs[i * 2 + 2]; // 2, 4, 6, 8, ... + let value = context.eval_expression_tree(value_expr)?; + data_map.insert(column_names[i].clone().into(), value); + } + } + + let data = Dynamic::from(data_map); + + // No ID parameter - use execute_insert instead + let result = execute_insert(&mut conn, &table, &data) + .map_err(|e| format!("SAVE error: {}", e))?; + + Ok(json_value_to_dynamic(&result)) + }, + ) + .expect("valid syntax registration"); + } + + // Register structured save LAST (after all positional saves) + // This ensures that SAVE statements with many fields use positional patterns, + // and only SAVE statements with exactly 3 expressions use the structured pattern + { + let state_clone = Arc::clone(state); + let user_roles_clone = user_roles.clone(); + engine + .register_custom_syntax( + ["SAVE", "$expr$", ",", "$expr$", ",", "$expr$"], + false, + move |context, inputs| { + let table = context.eval_expression_tree(&inputs[0])?.to_string(); + let id = context.eval_expression_tree(&inputs[1])?; + let data = context.eval_expression_tree(&inputs[2])?; + + trace!("SAVE structured: table={}, id={:?}", table, id); + + let mut conn = state_clone + .conn + .get() + .map_err(|e| format!("DB error: {}", e))?; + + if let Err(e) = + check_table_access(&mut conn, &table, &user_roles_clone, AccessType::Write) + { + warn!("SAVE access denied: {}", e); + return Err(e.into()); + } + + let result = execute_save(&mut conn, &table, &id, &data) + .map_err(|e| format!("SAVE error: {}", e))?; + + Ok(json_value_to_dynamic(&result)) + }, + ) + .expect("valid syntax registration"); + } } pub fn register_insert_keyword(state: Arc, user: UserSession, engine: &mut Engine) { @@ -470,7 +557,9 @@ fn execute_save( for (key, value) in &data_map { let sanitized_key = sanitize_identifier(key); - let sanitized_value = format!("'{}'", sanitize_sql_value(&value.to_string())); + let value_str = value.to_string(); + let converted_value = convert_date_to_iso_format(&value_str); + let sanitized_value = format!("'{}'", sanitize_sql_value(&converted_value)); columns.push(sanitized_key.clone()); values.push(sanitized_value.clone()); update_sets.push(format!("{} = {}", sanitized_key, sanitized_value)); @@ -511,7 +600,9 @@ fn execute_insert( for (key, value) in &data_map { columns.push(sanitize_identifier(key)); - values.push(format!("'{}'", sanitize_sql_value(&value.to_string()))); + let value_str = value.to_string(); + let converted_value = convert_date_to_iso_format(&value_str); + values.push(format!("'{}'", sanitize_sql_value(&converted_value))); } let query = format!( @@ -564,10 +655,12 @@ fn execute_update( let mut update_sets: Vec = Vec::new(); for (key, value) in &data_map { + let value_str = value.to_string(); + let converted_value = convert_date_to_iso_format(&value_str); update_sets.push(format!( "{} = '{}'", sanitize_identifier(key), - sanitize_sql_value(&value.to_string()) + sanitize_sql_value(&converted_value) )); } diff --git a/src/basic/keywords/mcp_client.rs b/src/basic/keywords/mcp_client.rs index 17fc48c1a..d286acb37 100644 --- a/src/basic/keywords/mcp_client.rs +++ b/src/basic/keywords/mcp_client.rs @@ -120,7 +120,7 @@ impl Default for McpConnection { fn default() -> Self { Self { connection_type: ConnectionType::Http, - url: "http://localhost:8080".to_string(), + url: "http://localhost:9000".to_string(), port: None, timeout_seconds: 30, max_retries: 3, diff --git a/src/basic/keywords/mod.rs b/src/basic/keywords/mod.rs index 3556c8271..71c1572dd 100644 --- a/src/basic/keywords/mod.rs +++ b/src/basic/keywords/mod.rs @@ -69,6 +69,7 @@ pub mod string_functions; pub mod switch_case; pub mod table_access; pub mod table_definition; +pub mod table_migration; pub mod universal_messaging; pub mod use_tool; pub mod use_website; diff --git a/src/basic/keywords/table_access.rs b/src/basic/keywords/table_access.rs index 3333fcdea..62e56a4ed 100644 --- a/src/basic/keywords/table_access.rs +++ b/src/basic/keywords/table_access.rs @@ -405,6 +405,30 @@ pub fn filter_write_fields( } } +/// Get column names for a table from the database schema +pub fn get_table_columns(conn: &mut PgConnection, table_name: &str) -> Vec { + use diesel::prelude::*; + use diesel::sql_types::Text; + + // Define a struct for the query result + #[derive(diesel::QueryableByName)] + struct ColumnName { + #[diesel(sql_type = Text)] + column_name: String, + } + + // Query information_schema to get column names + diesel::sql_query( + "SELECT column_name FROM information_schema.columns WHERE table_name = $1 ORDER BY ordinal_position" + ) + .bind::(table_name) + .load::(conn) + .unwrap_or_default() + .into_iter() + .map(|c| c.column_name) + .collect() +} + #[cfg(test)] mod tests { use super::*; diff --git a/src/basic/keywords/table_definition.rs b/src/basic/keywords/table_definition.rs index ea739e8d5..4a3615326 100644 --- a/src/basic/keywords/table_definition.rs +++ b/src/basic/keywords/table_definition.rs @@ -325,7 +325,7 @@ fn parse_field_definition( }) } -fn map_type_to_sql(field: &FieldDefinition, driver: &str) -> String { +pub fn map_type_to_sql(field: &FieldDefinition, driver: &str) -> String { let base_type = match field.field_type.as_str() { "string" => { let len = field.length.unwrap_or(255); @@ -630,6 +630,28 @@ pub fn process_table_definitions( return Ok(tables); } + // Use schema sync for both debug and release builds (non-destructive) + use super::table_migration::sync_bot_tables; + + info!("Running schema migration sync (non-destructive)"); + + match sync_bot_tables(&state, bot_id, source) { + Ok(result) => { + info!("Schema sync completed: {} created, {} altered, {} columns added", + result.tables_created, result.tables_altered, result.columns_added); + + // If sync was successful, skip standard table creation + if result.tables_created > 0 || result.tables_altered > 0 { + return Ok(tables); + } + } + Err(e) => { + error!("Schema sync failed: {}", e); + // Fall through to standard table creation + } + } + + // Standard table creation (for release builds or as fallback) for table in &tables { info!( "Processing TABLE {} ON {}", diff --git a/src/basic/keywords/table_migration.rs b/src/basic/keywords/table_migration.rs new file mode 100644 index 000000000..0c49adc54 --- /dev/null +++ b/src/basic/keywords/table_migration.rs @@ -0,0 +1,249 @@ +/*****************************************************************************\ +| Table Schema Migration Module +| Automatically syncs table.bas definitions with database schema +\*****************************************************************************/ + +use crate::core::shared::sanitize_identifier; +use crate::core::shared::state::AppState; +use diesel::prelude::*; +use diesel::sql_query; +use diesel::sql_types::{Text, Nullable}; +use log::{error, info, warn}; +use std::error::Error; +use std::sync::Arc; +use uuid::Uuid; + +use super::table_definition::{FieldDefinition, TableDefinition, map_type_to_sql, parse_table_definition}; + +/// Schema migration result +#[derive(Debug, Default)] +pub struct MigrationResult { + pub tables_created: usize, + pub tables_altered: usize, + pub columns_added: usize, + pub errors: Vec, +} + +/// Column metadata from database +#[derive(Debug, Clone)] +struct DbColumn { + name: String, + data_type: String, + is_nullable: bool, +} + +/// Compare and sync table schema with definition +pub fn sync_table_schema( + table: &TableDefinition, + existing_columns: &[DbColumn], + create_sql: &str, + conn: &mut diesel::PgConnection, +) -> Result> { + let mut result = MigrationResult::default(); + + // If no columns exist, create the table + if existing_columns.is_empty() { + info!("Creating new table: {}", table.name); + sql_query(create_sql).execute(conn) + .map_err(|e| format!("Failed to create table {}: {}", table.name, e))?; + result.tables_created += 1; + return Ok(result); + } + + // Check for schema drift + let existing_col_names: std::collections::HashSet = + existing_columns.iter().map(|c| c.name.clone()).collect(); + + let mut missing_columns: Vec<&FieldDefinition> = Vec::new(); + for field in &table.fields { + if !existing_col_names.contains(&field.name) { + missing_columns.push(field); + } + } + + // Add missing columns + if !missing_columns.is_empty() { + info!("Table {} is missing {} columns, adding them", table.name, missing_columns.len()); + + for field in &missing_columns { + let sql_type = map_type_to_sql(field, "postgres"); + let column_sql = if field.is_nullable { + format!("ALTER TABLE {} ADD COLUMN IF NOT EXISTS {} {}", + sanitize_identifier(&table.name), + sanitize_identifier(&field.name), + sql_type) + } else { + // For NOT NULL columns, add as nullable first then set default + format!("ALTER TABLE {} ADD COLUMN IF NOT EXISTS {} {}", + sanitize_identifier(&table.name), + sanitize_identifier(&field.name), + sql_type) + }; + + info!("Adding column: {}.{} ({})", table.name, field.name, sql_type); + match sql_query(&column_sql).execute(conn) { + Ok(_) => { + result.columns_added += 1; + info!("Successfully added column {}.{}", table.name, field.name); + } + Err(e) => { + // Check if column already exists (ignore error) + let err_str = e.to_string(); + if !err_str.contains("already exists") && !err_str.contains("duplicate column") { + let error_msg = format!("Failed to add column {}.{}: {}", table.name, field.name, e); + error!("{}", error_msg); + result.errors.push(error_msg); + } else { + info!("Column {}.{} already exists, skipping", table.name, field.name); + } + } + } + } + result.tables_altered += 1; + } + + Ok(result) +} + +/// Get existing columns from a table +pub fn get_table_columns( + table_name: &str, + conn: &mut diesel::PgConnection, +) -> Result, Box> { + let query = format!( + "SELECT column_name, data_type, is_nullable + FROM information_schema.columns + WHERE table_name = '{}' AND table_schema = 'public' + ORDER BY ordinal_position", + sanitize_identifier(table_name) + ); + + #[derive(QueryableByName)] + struct ColumnRow { + #[diesel(sql_type = Text)] + column_name: String, + #[diesel(sql_type = Text)] + data_type: String, + #[diesel(sql_type = Text)] + is_nullable: String, + } + + let rows: Vec = match sql_query(&query).load(conn) { + Ok(r) => r, + Err(e) => { + // Table doesn't exist + return Err(format!("Table {} does not exist: {}", table_name, e).into()); + } + }; + + Ok(rows.into_iter().map(|row| DbColumn { + name: row.column_name, + data_type: row.data_type, + is_nullable: row.is_nullable == "YES", + }).collect()) +} + +/// Process table definitions with schema sync for a specific bot +pub fn sync_bot_tables( + state: &Arc, + bot_id: Uuid, + source: &str, +) -> Result> { + let tables = parse_table_definition(source)?; + let mut result = MigrationResult::default(); + + info!("Processing {} table definitions with schema sync for bot {}", tables.len(), bot_id); + + // Get bot's database connection + let pool = state.bot_database_manager.get_bot_pool(bot_id)?; + let mut conn = pool.get()?; + + for table in &tables { + if table.connection_name != "default" { + continue; // Skip external connections for now + } + + info!("Syncing table: {}", table.name); + + // Get existing columns + let existing_columns = match get_table_columns(&table.name, &mut conn) { + Ok(cols) => cols, + Err(_) => { + // Table doesn't exist yet + vec![] + } + }; + + // Generate CREATE TABLE SQL + let create_sql = super::table_definition::generate_create_table_sql(table, "postgres"); + + // Sync schema + match sync_table_schema(table, &existing_columns, &create_sql, &mut conn) { + Ok(sync_result) => { + result.tables_created += sync_result.tables_created; + result.tables_altered += sync_result.tables_altered; + result.columns_added += sync_result.columns_added; + result.errors.extend(sync_result.errors); + } + Err(e) => { + let error_msg = format!("Failed to sync table {}: {}", table.name, e); + error!("{}", error_msg); + result.errors.push(error_msg); + } + } + } + + // Log summary + info!("Schema sync summary for bot {}: {} tables created, {} altered, {} columns added, {} errors", + bot_id, result.tables_created, result.tables_altered, result.columns_added, result.errors.len()); + + if !result.errors.is_empty() { + warn!("Schema sync completed with {} errors:", result.errors.len()); + for error in &result.errors { + warn!(" - {}", error); + } + } + + Ok(result) +} + +/// Validate that all required columns exist +pub fn validate_table_schema( + table_name: &str, + required_fields: &[FieldDefinition], + conn: &mut diesel::PgConnection, +) -> Result> { + let existing_columns = get_table_columns(table_name, conn)?; + let existing_col_names: std::collections::HashSet = + existing_columns.iter().map(|c| c.name.clone()).collect(); + + let mut missing = Vec::new(); + for field in required_fields { + if !existing_col_names.contains(&field.name) { + missing.push(field.name.clone()); + } + } + + if !missing.is_empty() { + warn!("Table {} is missing columns: {:?}", table_name, missing); + return Ok(false); + } + + Ok(true) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_db_column_creation() { + let col = DbColumn { + name: "test_col".to_string(), + data_type: "character varying".to_string(), + is_nullable: true, + }; + assert_eq!(col.name, "test_col"); + assert_eq!(col.is_nullable, true); + } +} diff --git a/src/basic/keywords/use_website.rs b/src/basic/keywords/use_website.rs index 1764a9e6c..8f5f346ee 100644 --- a/src/basic/keywords/use_website.rs +++ b/src/basic/keywords/use_website.rs @@ -826,7 +826,7 @@ mod tests { "docs_example_com_path" ); assert_eq!( - sanitize_url_for_collection("http://test.site:8080"), + sanitize_url_for_collection("http://test.site:9000"), "test_site_8080" ); } diff --git a/src/basic/mod.rs.backup b/src/basic/mod.rs.backup new file mode 100644 index 000000000..c8af9bff3 --- /dev/null +++ b/src/basic/mod.rs.backup @@ -0,0 +1,1879 @@ +#[cfg(feature = "chat")] +use crate::basic::keywords::add_suggestion::clear_suggestions_keyword; +use crate::basic::keywords::set_user::set_user_keyword; +use crate::basic::keywords::string_functions::register_string_functions; +use crate::basic::keywords::switch_case::switch_keyword; +use crate::core::shared::models::UserSession; +use crate::core::shared::state::AppState; +use diesel::prelude::*; +use log::info; +use rhai::{Dynamic, Engine, EvalAltResult, Scope}; +use std::collections::HashMap; +use std::sync::Arc; +pub mod compiler; +pub mod keywords; + +#[derive(QueryableByName)] +struct ParamConfigRow { + #[diesel(sql_type = diesel::sql_types::Text)] + config_key: String, + #[diesel(sql_type = diesel::sql_types::Text)] + config_value: String, +} + +// ===== CORE KEYWORD IMPORTS (always available) ===== +#[cfg(feature = "chat")] +use self::keywords::add_bot::register_bot_keywords; +#[cfg(feature = "chat")] +use self::keywords::add_member::add_member_keyword; +#[cfg(feature = "chat")] +use self::keywords::add_suggestion::add_suggestion_keyword; +#[cfg(feature = "llm")] +use self::keywords::ai_tools::register_ai_tools_keywords; +use self::keywords::bot_memory::{get_bot_memory_keyword, set_bot_memory_keyword}; +use self::keywords::clear_tools::clear_tools_keyword; +use self::keywords::core_functions::register_core_functions; +use self::keywords::data_operations::register_data_operations; +use self::keywords::find::find_keyword; +use self::keywords::search::search_keyword; +#[cfg(feature = "billing")] +use self::keywords::products::products_keyword; +use self::keywords::first::first_keyword; +use self::keywords::for_next::for_keyword; +use self::keywords::format::format_keyword; +use self::keywords::get::get_keyword; +use self::keywords::hear_talk::{hear_keyword, talk_keyword}; +use self::keywords::http_operations::register_http_operations; +use self::keywords::last::last_keyword; +#[cfg(feature = "automation")] +use self::keywords::on_form_submit::on_form_submit_keyword; +use self::keywords::switch_case::preprocess_switch; +use self::keywords::use_tool::use_tool_keyword; +use self::keywords::use_website::{clear_websites_keyword, register_use_website_function}; +use self::keywords::web_data::register_web_data_keywords; +#[cfg(feature = "automation")] +use self::keywords::webhook::webhook_keyword; +#[cfg(feature = "llm")] +use self::keywords::llm_keyword::llm_keyword; +use self::keywords::on::on_keyword; +use self::keywords::print::print_keyword; +use self::keywords::set::set_keyword; +use self::keywords::set_context::set_context_keyword; +use self::keywords::wait::wait_keyword; + +// ===== CALENDAR FEATURE IMPORTS ===== +#[cfg(feature = "calendar")] +use self::keywords::book::book_keyword; + +// ===== MAIL FEATURE IMPORTS ===== +#[cfg(feature = "mail")] +use self::keywords::create_draft::create_draft_keyword; +#[cfg(feature = "mail")] +use self::keywords::on_email::on_email_keyword; +#[cfg(feature = "mail")] +use self::keywords::send_mail::send_mail_keyword; +#[cfg(feature = "mail")] +use self::keywords::send_template::register_send_template_keywords; + +// ===== TASKS FEATURE IMPORTS ===== +#[cfg(feature = "tasks")] +use self::keywords::create_task::create_task_keyword; + +// ===== SOCIAL FEATURE IMPORTS ===== +#[cfg(feature = "social")] +use self::keywords::social_media::register_social_media_keywords; + +// ===== LLM FEATURE IMPORTS ===== +#[cfg(feature = "llm")] +use self::keywords::model_routing::register_model_routing_keywords; +#[cfg(feature = "llm")] +use self::keywords::multimodal::register_multimodal_keywords; +#[cfg(feature = "llm")] +use self::keywords::remember::remember_keyword; +#[cfg(feature = "llm")] +use self::keywords::save_from_unstructured::save_from_unstructured_keyword; + +// ===== VECTORDB FEATURE IMPORTS ===== +#[cfg(feature = "vectordb")] +use self::keywords::clear_kb::register_clear_kb_keyword; +#[cfg(feature = "vectordb")] +use self::keywords::use_kb::register_use_kb_keyword; + +// ===== DRIVE FEATURE IMPORTS ===== +#[cfg(feature = "drive")] +use self::keywords::file_operations::register_file_operations; +#[cfg(feature = "drive")] +use self::keywords::create_site::create_site_keyword; + +// ===== PEOPLE FEATURE IMPORTS ===== +#[cfg(feature = "people")] +use self::keywords::lead_scoring::register_lead_scoring_keywords; + +// ===== COMMUNICATIONS FEATURE IMPORTS ===== +#[cfg(any(feature = "whatsapp", feature = "telegram", feature = "mail"))] +use self::keywords::sms::register_sms_keywords; + +// ===== CHAT FEATURE IMPORTS ===== +#[cfg(feature = "chat")] +use self::keywords::transfer_to_human::register_transfer_to_human_keyword; + +#[derive(Debug)] +pub struct ScriptService { + pub engine: Engine, + pub scope: Scope<'static>, +} + +impl ScriptService { + #[must_use] + pub fn new(state: Arc, user: UserSession) -> Self { + let mut engine = Engine::new(); + let scope = Scope::new(); + engine.set_allow_anonymous_fn(true); + engine.set_allow_looping(true); + + // ===== CORE KEYWORDS (always available) ===== + set_bot_memory_keyword(state.clone(), user.clone(), &mut engine); + get_bot_memory_keyword(state.clone(), user.clone(), &mut engine); + find_keyword(&state, user.clone(), &mut engine); + search_keyword(&state, user.clone(), &mut engine); + #[cfg(feature = "billing")] + products_keyword(&state, user.clone(), &mut engine); + for_keyword(&state, user.clone(), &mut engine); + first_keyword(&mut engine); + last_keyword(&mut engine); + format_keyword(&mut engine); + #[cfg(feature = "llm")] + llm_keyword(state.clone(), user.clone(), &mut engine); + get_keyword(state.clone(), user.clone(), &mut engine); + set_keyword(&state, user.clone(), &mut engine); + wait_keyword(&state, user.clone(), &mut engine); + print_keyword(&state, user.clone(), &mut engine); + on_keyword(&state, user.clone(), &mut engine); + hear_keyword(state.clone(), user.clone(), &mut engine); + talk_keyword(state.clone(), user.clone(), &mut engine); + set_context_keyword(state.clone(), user.clone(), &mut engine); + set_user_keyword(state.clone(), user.clone(), &mut engine); + #[cfg(feature = "chat")] + clear_suggestions_keyword(state.clone(), user.clone(), &mut engine); + use_tool_keyword(state.clone(), user.clone(), &mut engine); + clear_tools_keyword(state.clone(), user.clone(), &mut engine); + clear_websites_keyword(state.clone(), user.clone(), &mut engine); + #[cfg(feature = "chat")] + add_suggestion_keyword(state.clone(), user.clone(), &mut engine); + #[cfg(feature = "chat")] + add_member_keyword(state.clone(), user.clone(), &mut engine); + #[cfg(feature = "chat")] + register_bot_keywords(&state, &user, &mut engine); + + // ===== PROCEDURE KEYWORDS (RETURN, etc.) ===== + keywords::procedures::register_procedure_keywords(state.clone(), user.clone(), &mut engine); + + // ===== WORKFLOW ORCHESTRATION KEYWORDS ===== + keywords::orchestration::register_orchestrate_workflow(state.clone(), user.clone(), &mut engine); + keywords::orchestration::register_step_keyword(state.clone(), user.clone(), &mut engine); + keywords::events::register_on_event(state.clone(), user.clone(), &mut engine); + keywords::events::register_publish_event(state.clone(), user.clone(), &mut engine); + keywords::events::register_wait_for_event(state.clone(), user.clone(), &mut engine); + keywords::enhanced_memory::register_bot_share_memory(state.clone(), user.clone(), &mut engine); + keywords::enhanced_memory::register_bot_sync_memory(state.clone(), user.clone(), &mut engine); + keywords::enhanced_llm::register_enhanced_llm_keyword(state.clone(), user.clone(), &mut engine); + + keywords::universal_messaging::register_universal_messaging( + state.clone(), + user.clone(), + &mut engine, + ); + register_string_functions(state.clone(), user.clone(), &mut engine); + switch_keyword(&state, user.clone(), &mut engine); + register_http_operations(state.clone(), user.clone(), &mut engine); + // Register SAVE FROM UNSTRUCTURED before regular SAVE to avoid pattern conflicts + #[cfg(feature = "llm")] + save_from_unstructured_keyword(state.clone(), user.clone(), &mut engine); + register_data_operations(state.clone(), user.clone(), &mut engine); + #[cfg(feature = "automation")] + webhook_keyword(&state, user.clone(), &mut engine); + #[cfg(feature = "automation")] + on_form_submit_keyword(state.clone(), user.clone(), &mut engine); + #[cfg(feature = "llm")] + register_ai_tools_keywords(state.clone(), user.clone(), &mut engine); + register_web_data_keywords(state.clone(), user.clone(), &mut engine); + register_core_functions(state.clone(), user.clone(), &mut engine); + + // ===== MAIL FEATURE KEYWORDS ===== + #[cfg(feature = "mail")] + { + create_draft_keyword(&state, user.clone(), &mut engine); + on_email_keyword(&state, user.clone(), &mut engine); + send_mail_keyword(state.clone(), user.clone(), &mut engine); + register_send_template_keywords(state.clone(), user.clone(), &mut engine); + } + + // ===== CALENDAR FEATURE KEYWORDS ===== + #[cfg(feature = "calendar")] + { + book_keyword(state.clone(), user.clone(), &mut engine); + } + + // ===== TASKS FEATURE KEYWORDS ===== + #[cfg(feature = "tasks")] + { + create_task_keyword(state.clone(), user.clone(), &mut engine); + } + + // ===== LLM FEATURE KEYWORDS ===== + #[cfg(feature = "llm")] + { + register_model_routing_keywords(state.clone(), user.clone(), &mut engine); + register_multimodal_keywords(state.clone(), user.clone(), &mut engine); + remember_keyword(state.clone(), user.clone(), &mut engine); + } + + // Register USE WEBSITE after all other USE keywords to avoid conflicts + // USE WEBSITE is now preprocessed to USE_WEBSITE function call + // Register it as a regular function instead of custom syntax + register_use_website_function(state.clone(), user.clone(), &mut engine); + + // ===== VECTORDB FEATURE KEYWORDS ===== + #[cfg(feature = "vectordb")] + { + let _ = register_use_kb_keyword(&mut engine, state.clone(), Arc::new(user.clone())); + let _ = register_clear_kb_keyword(&mut engine, state.clone(), Arc::new(user.clone())); + } + + // ===== DRIVE FEATURE KEYWORDS ===== + #[cfg(feature = "drive")] + { + create_site_keyword(&state, user.clone(), &mut engine); + register_file_operations(state.clone(), user.clone(), &mut engine); + } + + // ===== SOCIAL FEATURE KEYWORDS ===== + #[cfg(feature = "social")] + { + register_social_media_keywords(state.clone(), user.clone(), &mut engine); + } + + // ===== PEOPLE FEATURE KEYWORDS ===== + #[cfg(feature = "people")] + { + register_lead_scoring_keywords(state.clone(), user.clone(), &mut engine); + } + + // ===== CHAT FEATURE KEYWORDS ===== + #[cfg(feature = "chat")] + { + register_transfer_to_human_keyword(state.clone(), user.clone(), &mut engine); + } + + // ===== COMMUNICATIONS FEATURE KEYWORDS ===== + #[cfg(any(feature = "whatsapp", feature = "telegram", feature = "mail"))] + { + register_sms_keywords(state.clone(), user.clone(), &mut engine); + } + + // Silence unused variable warning when features are disabled + let _ = user; + + Self { engine, scope } + } + + pub fn inject_config_variables(&mut self, config_vars: HashMap) { + for (key, value) in config_vars { + let var_name = if key.starts_with("param-") { + key.strip_prefix("param-").unwrap_or(&key).to_lowercase() + } else { + key.to_lowercase() + }; + + if let Ok(int_val) = value.parse::() { + self.scope.push(&var_name, int_val); + } else if let Ok(float_val) = value.parse::() { + self.scope.push(&var_name, float_val); + } else if value.eq_ignore_ascii_case("true") { + self.scope.push(&var_name, true); + } else if value.eq_ignore_ascii_case("false") { + self.scope.push(&var_name, false); + } else { + self.scope.push(&var_name, value); + } + } + } + + pub fn load_bot_config_params(&mut self, state: &AppState, bot_id: uuid::Uuid) { + if let Ok(mut conn) = state.conn.get() { + let result = diesel::sql_query( + "SELECT config_key, config_value FROM bot_configuration WHERE bot_id = $1 AND config_key LIKE 'param-%'" + ) + .bind::(bot_id) + .load::(&mut conn); + + if let Ok(params) = result { + let config_vars: HashMap = params + .into_iter() + .map(|row| (row.config_key, row.config_value)) + .collect(); + self.inject_config_variables(config_vars); + } + } + } + fn preprocess_basic_script(&self, script: &str) -> String { + let _ = self; // silence unused self warning - kept for API consistency + let script = preprocess_switch(script); + + // Convert ALL multi-word keywords to underscore versions (e.g., "USE WEBSITE" → "USE_WEBSITE") + // This avoids Rhai custom syntax conflicts and makes the system more secure + let script = Self::convert_multiword_keywords(&script); + + let script = Self::normalize_variables_to_lowercase(&script); + + let mut result = String::new(); + let mut for_stack: Vec = Vec::new(); + let mut current_indent = 0; + for line in script.lines() { + let trimmed = line.trim(); + if trimmed.is_empty() || trimmed.starts_with("//") || trimmed.starts_with('\'') { + continue; + } + if trimmed.starts_with("FOR EACH") { + for_stack.push(current_indent); + result.push_str(&" ".repeat(current_indent)); + result.push_str(trimmed); + result.push_str("{\n"); + current_indent += 4; + result.push_str(&" ".repeat(current_indent)); + result.push('\n'); + continue; + } + if trimmed.starts_with("NEXT") { + if let Some(expected_indent) = for_stack.pop() { + assert!( + (current_indent - 4) == expected_indent, + "NEXT without matching FOR EACH" + ); + current_indent -= 4; + result.push_str(&" ".repeat(current_indent)); + result.push_str("}\n"); + result.push_str(&" ".repeat(current_indent)); + result.push_str(trimmed); + result.push(';'); + result.push('\n'); + continue; + } + log::error!("NEXT without matching FOR EACH"); + return result; + } + if trimmed == "EXIT FOR" { + result.push_str(&" ".repeat(current_indent)); + result.push_str(trimmed); + result.push('\n'); + continue; + } + result.push_str(&" ".repeat(current_indent)); + let basic_commands = [ + "SET", + "CREATE", + "PRINT", + "FOR", + "FIND", + "GET", + "EXIT", + "IF", + "THEN", + "ELSE", + "END IF", + "WHILE", + "WEND", + "DO", + "LOOP", + "HEAR", + "TALK", + "SET CONTEXT", + "SET USER", + "GET BOT MEMORY", + "SET BOT MEMORY", + "IMAGE", + "VIDEO", + "AUDIO", + "SEE", + "SEND FILE", + "SWITCH", + "CASE", + "DEFAULT", + "END SWITCH", + "USE KB", + "CLEAR KB", + "USE TOOL", + "CLEAR TOOLS", + "ADD SUGGESTION", + "CLEAR SUGGESTIONS", + "INSTR", + "IS_NUMERIC", + "IS NUMERIC", + "POST", + "PUT", + "PATCH", + "DELETE", + "SET HEADER", + "CLEAR HEADERS", + "GRAPHQL", + "SOAP", + "SAVE", + "INSERT", + "UPDATE", + "DELETE", + "MERGE", + "FILL", + "MAP", + "FILTER", + "AGGREGATE", + "JOIN", + "PIVOT", + "GROUP BY", + "READ", + "WRITE", + "COPY", + "MOVE", + "LIST", + "COMPRESS", + "EXTRACT", + "UPLOAD", + "DOWNLOAD", + "GENERATE PDF", + "MERGE PDF", + "WEBHOOK", + "POST TO", + "POST TO INSTAGRAM", + "POST TO FACEBOOK", + "POST TO LINKEDIN", + "POST TO TWITTER", + "GET INSTAGRAM METRICS", + "GET FACEBOOK METRICS", + "GET LINKEDIN METRICS", + "GET TWITTER METRICS", + "DELETE POST", + "SEND MAIL", + "SEND TEMPLATE", + "CREATE TEMPLATE", + "GET TEMPLATE", + "ON ERROR RESUME NEXT", + "ON ERROR GOTO", + "CLEAR ERROR", + "ERROR MESSAGE", + "ON FORM SUBMIT", + "SCORE LEAD", + "GET LEAD SCORE", + "QUALIFY LEAD", + "UPDATE LEAD SCORE", + "AI SCORE LEAD", + "ABS", + "ROUND", + "INT", + "FIX", + "FLOOR", + "CEIL", + "MAX", + "MIN", + "MOD", + "RANDOM", + "RND", + "SGN", + "SQR", + "SQRT", + "LOG", + "EXP", + "POW", + "SIN", + "COS", + "TAN", + "SUM", + "AVG", + "NOW", + "TODAY", + "DATE", + "TIME", + "YEAR", + "MONTH", + "DAY", + "HOUR", + "MINUTE", + "SECOND", + "WEEKDAY", + "DATEADD", + "DATEDIFF", + "FORMAT_DATE", + "ISDATE", + "VAL", + "STR", + "CINT", + "CDBL", + "CSTR", + "ISNULL", + "ISEMPTY", + "TYPEOF", + "ISARRAY", + "ISOBJECT", + "ISSTRING", + "ISNUMBER", + "NVL", + "IIF", + "ARRAY", + "UBOUND", + "LBOUND", + "COUNT", + "SORT", + "UNIQUE", + "CONTAINS", + "INDEX_OF", + "PUSH", + "POP", + "SHIFT", + "REVERSE", + "SLICE", + "SPLIT", + "CONCAT", + "FLATTEN", + "RANGE", + "THROW", + "ERROR", + "IS_ERROR", + "ASSERT", + "LOG_ERROR", + "LOG_WARN", + "LOG_INFO", + ]; + let is_basic_command = basic_commands.iter().any(|&cmd| trimmed.starts_with(cmd)); + let is_control_flow = trimmed.starts_with("IF") + || trimmed.starts_with("ELSE") + || trimmed.starts_with("END IF"); + result.push_str(trimmed); + let needs_semicolon = is_basic_command + || !for_stack.is_empty() + || is_control_flow + || (!trimmed.ends_with(';') && !trimmed.ends_with('{') && !trimmed.ends_with('}')); + if needs_semicolon { + result.push(';'); + } + result.push('\n'); + } + assert!(for_stack.is_empty(), "Unclosed FOR EACH loop"); + result + } + pub fn compile(&self, script: &str) -> Result> { + let processed_script = self.preprocess_basic_script(script); + info!("Processed Script:\n{}", processed_script); + match self.engine.compile(&processed_script) { + Ok(ast) => Ok(ast), + Err(parse_error) => Err(Box::new(parse_error.into())), + } + } + + /// Compile a tool script (.bas file with PARAM/DESCRIPTION metadata lines) + /// Filters out tool metadata before compiling + pub fn compile_tool_script(&self, script: &str) -> Result> { + // Filter out PARAM, DESCRIPTION, comment, and empty lines (tool metadata) + let executable_script: String = script + .lines() + .filter(|line| { + let trimmed = line.trim(); + // Keep lines that are NOT PARAM, DESCRIPTION, comments, or empty + !(trimmed.starts_with("PARAM ") || + trimmed.starts_with("PARAM\t") || + trimmed.starts_with("DESCRIPTION ") || + trimmed.starts_with("DESCRIPTION\t") || + trimmed.starts_with('\'') || // BASIC comment lines + trimmed.is_empty()) + }) + .collect::>() + .join("\n"); + + info!("[TOOL] Filtered tool metadata: {} -> {} chars", script.len(), executable_script.len()); + + // Apply minimal preprocessing for tools (skip variable normalization to avoid breaking multi-line strings) + let script = preprocess_switch(&executable_script); + let script = Self::convert_multiword_keywords(&script); + // Convert FORMAT(expr, pattern) to FORMAT expr pattern for Rhai space-separated function syntax + let script = Self::convert_format_syntax(&script); + // Skip normalize_variables_to_lowercase for tools - it breaks multi-line strings + + info!("[TOOL] Preprocessed tool script for Rhai compilation"); + // Convert IF ... THEN / END IF to if ... { } + let script = Self::convert_if_then_syntax(&script); + // Convert SELECT ... CASE / END SELECT to match expressions + let script = Self::convert_select_case_syntax(&script); + // Convert BASIC keywords to lowercase (but preserve variable casing) + let script = Self::convert_keywords_to_lowercase(&script); + // Save to file for debugging + if let Err(e) = std::fs::write("/tmp/tool_preprocessed.bas", &script) { + log::warn!("Failed to write preprocessed script: {}", e); + } + match self.engine.compile(&script) { + Ok(ast) => Ok(ast), + Err(parse_error) => Err(Box::new(parse_error.into())), + } + } + pub fn run(&mut self, ast: &rhai::AST) -> Result> { + self.engine.eval_ast_with_scope(&mut self.scope, ast) + } + + /// Set a variable in the script scope (for tool parameters) + pub fn set_variable(&mut self, name: &str, value: &str) -> Result<(), Box> { + use rhai::Dynamic; + self.scope.set_or_push(name, Dynamic::from(value.to_string())); + Ok(()) + } + + /// Convert FORMAT(expr, pattern) to FORMAT expr pattern (custom syntax format) + /// Also handles RANDOM and other functions that need space-separated arguments + /// This properly handles nested function calls by counting parentheses + fn convert_format_syntax(script: &str) -> String { + let mut result = String::new(); + let mut chars = script.chars().peekable(); + let mut i = 0; + let bytes = script.as_bytes(); + + while i < bytes.len() { + // Check if this is the start of FORMAT( + if i + 6 <= bytes.len() + && bytes[i..i+6].eq_ignore_ascii_case(b"FORMAT") + && i + 7 < bytes.len() + && bytes[i + 6] == b'(' + { + // Found FORMAT( - now parse the arguments + let mut paren_depth = 1; + let mut j = i + 7; // Start after FORMAT( + let mut comma_pos = None; + + // Find the arguments by tracking parentheses + while j < bytes.len() && paren_depth > 0 { + match bytes[j] { + b'(' => paren_depth += 1, + b')' => { + paren_depth -= 1; + if paren_depth == 0 { + break; + } + } + b',' => { + if paren_depth == 1 { + // This is the comma separating FORMAT's arguments + comma_pos = Some(j); + } + } + _ => {} + } + j += 1; + } + + if let Some(comma) = comma_pos { + // Extract the two arguments + let expr = &script[i + 7..comma].trim(); + let pattern = &script[comma + 1..j].trim(); + + // Convert to Rhai space-separated syntax + // Remove quotes from pattern if present, then add them back in the right format + let pattern_clean = pattern.trim_matches('"').trim_matches('\''); + result.push_str(&format!("FORMAT ({expr}) (\"{pattern_clean}\")")); + + i = j + 1; + continue; + } + } + + // Copy the character as-is + if let Some(c) = chars.next() { + result.push(c); + } + i += 1; + } + + result + } + + /// Convert a single TALK line with ${variable} substitution to proper TALK syntax + /// Handles: "Hello ${name}" → TALK "Hello " + name + /// Also handles: "Plain text" → TALK "Plain text" + /// Also handles function calls: "Value: ${FORMAT(x, "n")}" → TALK "Value: " + FORMAT(x, "n") + fn convert_talk_line_with_substitution(line: &str) -> String { + let mut result = String::new(); + let mut chars = line.chars().peekable(); + let mut in_substitution = false; + let mut current_expr = String::new(); + let mut current_literal = String::new(); + + while let Some(c) = chars.next() { + match c { + '$' => { + if let Some(&'{') = chars.peek() { + // Start of ${...} substitution + chars.next(); // consume '{' + + // Add accumulated literal as a string if non-empty + if !current_literal.is_empty() { + if result.is_empty() { + result.push_str("TALK \""); + } else { + result.push_str(" + \""); + } + // Escape any quotes in the literal + let escaped = current_literal.replace('"', "\\\""); + result.push_str(&escaped); + result.push('"'); + current_literal.clear(); + } + + in_substitution = true; + current_expr.clear(); + } else { + // Regular $ character, add to literal + current_literal.push(c); + } + } + '}' if in_substitution => { + // End of ${...} substitution + in_substitution = false; + + // Add the expression (variable or function call) + if !current_expr.is_empty() { + if result.is_empty() { + result.push_str(¤t_expr); + } else { + result.push_str(" + "); + result.push_str(¤t_expr); + } + } + current_expr.clear(); + } + _ if in_substitution => { + // Collect expression content, tracking parentheses and quotes + // This handles function calls like FORMAT(x, "pattern") + current_expr.push(c); + + // Track nested parentheses and quoted strings + let mut paren_depth: i32 = 0; + let mut in_string = false; + let mut escape_next = false; + + for ch in current_expr.chars() { + if escape_next { + escape_next = false; + continue; + } + + match ch { + '\\' => { + escape_next = true; + } + '"' if !in_string => { + in_string = true; + } + '"' if in_string => { + in_string = false; + } + '(' if !in_string => { + paren_depth += 1; + } + ')' if !in_string => { + paren_depth = paren_depth.saturating_sub(1); + } + _ => {} + } + } + + // Continue collecting expression until we're back at depth 0 + // The closing '}' will handle the end of substitution + } + _ => { + // Regular character, add to literal + current_literal.push(c); + } + } + } + + // Add any remaining literal + if !current_literal.is_empty() { + if result.is_empty() { + result.push_str("TALK \""); + } else { + result.push_str(" + \""); + } + let escaped = current_literal.replace('"', "\\\""); + result.push_str(&escaped); + result.push('"'); + } + + // If result is empty (shouldn't happen), just return a TALK with empty string + if result.is_empty() { + result = "TALK \"\"".to_string(); + } + + log::debug!("[TOOL] Converted TALK line: '{}' → '{}'", line, result); + result + } + + /// Convert a BEGIN MAIL ... END MAIL block to SEND EMAIL call + /// Handles multi-line emails with ${variable} substitution + /// Format: + /// BEGIN MAIL recipient + /// Subject: Email subject here + /// + /// Body line 1 with ${variable} + /// Body line 2 with ${anotherVariable} + /// END MAIL + fn convert_mail_block(recipient: &str, lines: &[String]) -> String { + let mut subject = String::new(); + let mut body_lines: Vec = Vec::new(); + let mut in_subject = true; + let mut skip_blank = true; + + for (i, line) in lines.iter().enumerate() { + // Check if this line is a subject line + if line.to_uppercase().starts_with("SUBJECT:") { + subject = line[8..].trim().to_string(); + in_subject = false; + skip_blank = true; + continue; + } + + // Skip blank lines after subject + if skip_blank && line.trim().is_empty() { + skip_blank = false; + continue; + } + + skip_blank = false; + + // Process body line with ${} substitution + let converted = Self::convert_mail_line_with_substitution(line); + body_lines.push(converted); + } + + // Build the body string by concatenating all lines with newlines + let body = if body_lines.is_empty() { + "\"\"".to_string() + } else if body_lines.len() == 1 { + body_lines[0].clone() + } else { + let mut result = body_lines[0].clone(); + for line in &body_lines[1..] { + result.push_str(" + \"\\n\" + "); + result.push_str(line); + } + result + }; + + // Generate the SEND EMAIL call + let result = format!("SEND EMAIL {}, \"{}\", {}", recipient, subject, body); + log::info!("[TOOL] Converted MAIL block → {}", result); + result + } + + /// Convert a single mail line with ${variable} substitution to string concatenation + /// Similar to TALK substitution but doesn't add "TALK" prefix + fn convert_mail_line_with_substitution(line: &str) -> String { + let mut result = String::new(); + let mut chars = line.chars().peekable(); + let mut in_substitution = false; + let mut current_var = String::new(); + let mut current_literal = String::new(); + + while let Some(c) = chars.next() { + match c { + '$' => { + if let Some(&'{') = chars.peek() { + // Start of ${...} substitution + chars.next(); // consume '{' + + // Add accumulated literal as a string if non-empty + if !current_literal.is_empty() { + if result.is_empty() { + result.push_str("\""); + result.push_str(¤t_literal.replace('"', "\\\"")); + result.push('"'); + } else { + result.push_str(" + \""); + result.push_str(¤t_literal.replace('"', "\\\"")); + result.push('"'); + } + current_literal.clear(); + } + + in_substitution = true; + current_var.clear(); + } else { + // Regular $ character, add to literal + current_literal.push(c); + } + } + '}' if in_substitution => { + // End of ${...} substitution + in_substitution = false; + + // Add the variable name + if !current_var.is_empty() { + if result.is_empty() { + result.push_str(¤t_var); + } else { + result.push_str(" + "); + result.push_str(¤t_var); + } + } + current_var.clear(); + } + _ if in_substitution => { + // Collect variable name (allow alphanumeric, underscore, and function call syntax) + if c.is_alphanumeric() || c == '_' || c == '(' || c == ')' || c == ',' || c == ' ' || c == '\"' { + current_var.push(c); + } + } + _ => { + // Regular character, add to literal + if !in_substitution { + current_literal.push(c); + } + } + } + } + + // Add any remaining literal + if !current_literal.is_empty() { + if result.is_empty() { + result.push_str("\""); + result.push_str(¤t_literal.replace('"', "\\\"")); + result.push('"'); + } else { + result.push_str(" + \""); + result.push_str(¤t_literal.replace('"', "\\\"")); + result.push('"'); + } + } + + log::debug!("[TOOL] Converted mail line: '{}' → '{}'", line, result); + result + } + + /// Convert BASIC IF ... THEN / END IF syntax to Rhai's if ... { } syntax + fn convert_if_then_syntax(script: &str) -> String { + let mut result = String::new(); + let mut if_stack: Vec = Vec::new(); + let mut in_with_block = false; + let mut in_talk_block = false; + let mut talk_block_lines: Vec = Vec::new(); + let mut in_mail_block = false; + let mut mail_recipient = String::new(); + let mut mail_block_lines: Vec = Vec::new(); + let mut in_line_continuation = false; + + log::info!("[TOOL] Converting IF/THEN syntax, input has {} lines", script.lines().count()); + + for line in script.lines() { + let trimmed = line.trim(); + let upper = trimmed.to_uppercase(); + + // Skip empty lines and comments + if trimmed.is_empty() || trimmed.starts_with('\'') || trimmed.starts_with("//") { + continue; + } + + // Handle IF ... THEN + if upper.starts_with("IF ") && upper.contains(" THEN") { + let then_pos = match upper.find(" THEN") { + Some(pos) => pos, + None => continue, // Skip invalid IF statement + }; + let condition = &trimmed[3..then_pos].trim(); + // Convert BASIC "NOT IN" to Rhai "!in" + let condition = condition.replace(" NOT IN ", " !in ").replace(" not in ", " !in "); + // Convert BASIC "AND" to Rhai "&&" and "OR" to Rhai "||" + let condition = condition.replace(" AND ", " && ").replace(" and ", " && ") + .replace(" OR ", " || ").replace(" or ", " || "); + // Convert BASIC "=" to Rhai "==" for comparisons in IF conditions + // Skip if it's already a comparison operator (==, !=, <=, >=) or assignment (+=, -=, etc.) + let condition = if !condition.contains("==") && !condition.contains("!=") + && !condition.contains("<=") && !condition.contains(">=") + && !condition.contains("+=") && !condition.contains("-=") + && !condition.contains("*=") && !condition.contains("/=") { + condition.replace("=", "==") + } else { + condition.to_string() + }; + log::info!("[TOOL] Converting IF statement: condition='{}'", condition); + result.push_str("if "); + result.push_str(&condition); + result.push_str(" {\n"); + if_stack.push(true); + continue; + } + + // Handle ELSE + if upper == "ELSE" { + log::info!("[TOOL] Converting ELSE statement"); + result.push_str("} else {\n"); + continue; + } + + // Handle END IF + if upper == "END IF" { + log::info!("[TOOL] Converting END IF statement"); + if let Some(_) = if_stack.pop() { + result.push_str("}\n"); + } + continue; + } + + // Handle WITH ... END WITH (BASIC object creation) + if upper.starts_with("WITH ") { + let object_name = &trimmed[5..].trim(); + log::info!("[TOOL] Converting WITH statement: object='{}'", object_name); + // Convert WITH obj → let obj = #{ (start object literal) + result.push_str("let "); + result.push_str(object_name); + result.push_str(" = #{\n"); + in_with_block = true; + continue; + } + + if upper == "END WITH" { + log::info!("[TOOL] Converting END WITH statement"); + result.push_str("};\n"); + in_with_block = false; + continue; + } + + // Handle BEGIN TALK ... END TALK (multi-line TALK with ${} substitution) + if upper == "BEGIN TALK" { + log::info!("[TOOL] Converting BEGIN TALK statement"); + in_talk_block = true; + talk_block_lines.clear(); + continue; + } + + if upper == "END TALK" { + log::info!("[TOOL] Converting END TALK statement, processing {} lines", talk_block_lines.len()); + in_talk_block = false; + + // Process each collected line and convert to TALK statement + for talk_line in &talk_block_lines { + let converted = Self::convert_talk_line_with_substitution(talk_line); + result.push_str(&converted); + result.push('\n'); + } + talk_block_lines.clear(); + continue; + } + + // If we're in a TALK block, collect lines + if in_talk_block { + // Skip empty lines but preserve them as blank TALK statements if needed + talk_block_lines.push(trimmed.to_string()); + continue; + } + + // Handle BEGIN MAIL ... END MAIL (multi-line email with ${} substitution) + if upper.starts_with("BEGIN MAIL ") { + let recipient = &trimmed[11..].trim(); // Skip "BEGIN MAIL " + log::info!("[TOOL] Converting BEGIN MAIL statement: recipient='{}'", recipient); + mail_recipient = recipient.to_string(); + in_mail_block = true; + mail_block_lines.clear(); + continue; + } + + if upper == "END MAIL" { + log::info!("[TOOL] Converting END MAIL statement, processing {} lines", mail_block_lines.len()); + in_mail_block = false; + + // Process the mail block and convert to SEND EMAIL + let converted = Self::convert_mail_block(&mail_recipient, &mail_block_lines); + result.push_str(&converted); + result.push('\n'); + + mail_recipient.clear(); + mail_block_lines.clear(); + continue; + } + + // If we're in a MAIL block, collect lines + if in_mail_block { + mail_block_lines.push(trimmed.to_string()); + continue; + } + + // Inside a WITH block - convert property assignments (key = value → key: value) + if in_with_block { + // Check if this is a property assignment (identifier = value) + if trimmed.contains('=') && !trimmed.contains("==") && !trimmed.contains("!=") && !trimmed.contains("+=") && !trimmed.contains("-=") { + // Convert assignment to object property syntax + let parts: Vec<&str> = trimmed.splitn(2, '=').collect(); + if parts.len() == 2 { + let property_name = parts[0].trim(); + let property_value = parts[1].trim(); + // Remove trailing semicolon if present + let property_value = property_value.trim_end_matches(';'); + result.push_str(&format!(" {}: {},\n", property_name, property_value)); + continue; + } + } + // Regular line in WITH block - add indentation + result.push_str(" "); + } + + // Handle SAVE table, object → INSERT table, object + // BASIC SAVE uses 2 parameters but Rhai SAVE needs 3 + // INSERT uses 2 parameters which matches the BASIC syntax + if upper.starts_with("SAVE") && upper.contains(',') { + log::info!("[TOOL] Processing SAVE line: '{}'", trimmed); + // Extract table and object name + let after_save = &trimmed[4..].trim(); // Skip "SAVE" + let parts: Vec<&str> = after_save.split(',').collect(); + log::info!("[TOOL] SAVE parts: {:?}", parts); + if parts.len() == 2 { + let table = parts[0].trim().trim_matches('"'); + let object_name = parts[1].trim().trim_end_matches(';'); + // Convert to INSERT table, object + let converted = format!("INSERT \"{}\", {};\n", table, object_name); + log::info!("[TOOL] Converted SAVE to INSERT: '{}'", converted); + result.push_str(&converted); + continue; + } + } + + // Handle SEND EMAIL → send_mail (function call style) + // Syntax: SEND EMAIL to, subject, body → send_mail(to, subject, body, []) + if upper.starts_with("SEND EMAIL") { + log::info!("[TOOL] Processing SEND EMAIL line: '{}'", trimmed); + let after_send = &trimmed[11..].trim(); // Skip "SEND EMAIL " (10 chars + space = 11) + let parts: Vec<&str> = after_send.split(',').collect(); + log::info!("[TOOL] SEND EMAIL parts: {:?}", parts); + if parts.len() == 3 { + let to = parts[0].trim(); + let subject = parts[1].trim(); + let body = parts[2].trim().trim_end_matches(';'); + // Convert to send_mail(to, subject, body, []) function call + let converted = format!("send_mail({}, {}, {}, []);\n", to, subject, body); + log::info!("[TOOL] Converted SEND EMAIL to: '{}'", converted); + result.push_str(&converted); + continue; + } + } + + // Regular line - add indentation if inside IF block + if !if_stack.is_empty() { + result.push_str(" "); + } + + // Check if line is a simple statement (not containing THEN or other control flow) + if !upper.starts_with("IF ") && !upper.starts_with("ELSE") && !upper.starts_with("END IF") { + // Check if this is a variable assignment (identifier = expression) + // Pattern: starts with letter/underscore, contains = but not ==, !=, <=, >=, +=, -= + let is_var_assignment = trimmed.chars().next().map_or(false, |c| c.is_alphabetic() || c == '_') + && trimmed.contains('=') + && !trimmed.contains("==") + && !trimmed.contains("!=") + && !trimmed.contains("<=") + && !trimmed.contains(">=") + && !trimmed.contains("+=") + && !trimmed.contains("-=") + && !trimmed.contains("*=") + && !trimmed.contains("/="); + + // Check for line continuation (BASIC uses comma at end of line) + let ends_with_comma = trimmed.ends_with(','); + + // If we're in a line continuation and this is not a variable assignment or statement, + // it's likely a string literal continuation - quote it + let line_to_process = if in_line_continuation && !is_var_assignment + && !trimmed.contains('=') && !trimmed.starts_with('"') && !upper.starts_with("IF ") { + // This is a string literal continuation - quote it and escape any inner quotes + let escaped = trimmed.replace('"', "\\\""); + format!("\"{}\\n\"", escaped) + } else { + trimmed.to_string() + }; + + if is_var_assignment { + // Add 'let' for variable declarations, but only if line doesn't already start with let/LET + let trimmed_lower = trimmed.to_lowercase(); + if !trimmed_lower.starts_with("let ") { + result.push_str("let "); + } + } + result.push_str(&line_to_process); + // Add semicolon if line doesn't have one and doesn't end with { or } + // Skip adding semicolons to: + // - SELECT/CASE/END SELECT statements (they're converted to if-else later) + // - Lines ending with comma (BASIC line continuation) + // - Lines that are part of a continuation block (in_line_continuation is true) + if !trimmed.ends_with(';') && !trimmed.ends_with('{') && !trimmed.ends_with('}') + && !upper.starts_with("SELECT ") && !upper.starts_with("CASE ") && upper != "END SELECT" + && !ends_with_comma && !in_line_continuation { + result.push(';'); + } + result.push('\n'); + + // Update line continuation state + in_line_continuation = ends_with_comma; + } else { + result.push_str(trimmed); + result.push('\n'); + } + } + + log::info!("[TOOL] IF/THEN conversion complete, output has {} lines", result.lines().count()); + + // Convert BASIC <> (not equal) to Rhai != globally + let result = result.replace(" <> ", " != "); + + result + } + + /// Convert BASIC SELECT ... CASE / END SELECT to if-else chains + /// Transforms: SELECT var ... CASE "value" ... END SELECT + /// Into: if var == "value" { ... } else if var == "value2" { ... } + /// Note: We use if-else instead of match because 'match' is a reserved keyword in Rhai + fn convert_select_case_syntax(script: &str) -> String { + let mut result = String::new(); + let mut lines: Vec<&str> = script.lines().collect(); + let mut i = 0; + + log::info!("[TOOL] Converting SELECT/CASE syntax to if-else chains"); + + while i < lines.len() { + let trimmed = lines[i].trim(); + let upper = trimmed.to_uppercase(); + + // Detect SELECT statement (e.g., "SELECT tipoMissa") + if upper.starts_with("SELECT ") && !upper.contains(" THEN") { + // Extract the variable being selected + let select_var = trimmed[7..].trim(); // Skip "SELECT " + log::info!("[TOOL] Converting SELECT statement for variable: '{}'", select_var); + + // Skip the SELECT line + i += 1; + + // Process CASE statements until END SELECT + let mut current_case_body: Vec = Vec::new(); + let mut in_case = false; + let mut is_first_case = true; + + while i < lines.len() { + let case_trimmed = lines[i].trim(); + let case_upper = case_trimmed.to_uppercase(); + + // Skip empty lines and comment lines within SELECT/CASE blocks + if case_trimmed.is_empty() || case_trimmed.starts_with('\'') { + i += 1; + continue; + } + + if case_upper == "END SELECT" { + // Close any open case + if in_case { + for body_line in ¤t_case_body { + result.push_str(" "); + result.push_str(body_line); + result.push('\n'); + } + // Close the last case arm (no else if, so we need the closing brace) + result.push_str(" }\n"); + current_case_body.clear(); + in_case = false; + } + // No else block needed - just end the chain + i += 1; + break; + } else if case_upper.starts_with("SELECT ") { + // Encountered another SELECT statement while processing this SELECT block + // Close the current if-else chain and break to let the outer loop handle the new SELECT + if in_case { + for body_line in ¤t_case_body { + result.push_str(" "); + result.push_str(body_line); + result.push('\n'); + } + // Close the current case arm (no else if, so we need the closing brace) + result.push_str(" }\n"); + current_case_body.clear(); + in_case = false; + } + break; + } else if case_upper.starts_with("CASE ") { + // Close previous case if any (but NOT if we're about to start else if) + if in_case { + for body_line in ¤t_case_body { + result.push_str(" "); + result.push_str(body_line); + result.push('\n'); + } + // NOTE: Don't close the case arm here - the } else if will close it + current_case_body.clear(); + } + + // Extract the case value (handle both CASE "value" and CASE value) + let case_value = if case_trimmed[5..].trim().starts_with('"') { + // CASE "value" format + case_trimmed[5..].trim().to_string() + } else { + // CASE value format (variable/enum) + format!("\"{}\"", case_trimmed[5..].trim()) + }; + + // Start if/else if chain + if is_first_case { + result.push_str(&format!("if {} == {} {{\n", select_var, case_value)); + is_first_case = false; + } else { + result.push_str(&format!("}} else if {} == {} {{\n", select_var, case_value)); + } + in_case = true; + i += 1; + } else if in_case { + // Collect body lines for the current case + current_case_body.push(lines[i].to_string()); + i += 1; + } else { + // We're in the SELECT block but not in a CASE yet + // Skip this line and move to the next + i += 1; + } + } + + continue; + } + + // Not a SELECT statement - just copy the line + if i < lines.len() { + result.push_str(lines[i]); + result.push('\n'); + i += 1; + } + } + + result + } + + /// Convert BASIC keywords to lowercase without touching variables + /// This is a simplified version of normalize_variables_to_lowercase for tools + fn convert_keywords_to_lowercase(script: &str) -> String { + let keywords = [ + "IF", "THEN", "ELSE", "END IF", "FOR", "NEXT", "WHILE", "WEND", + "DO", "LOOP", "RETURN", "EXIT", + "WITH", "END WITH", "AND", "OR", "NOT", "MOD", + "DIM", "AS", "NEW", "FUNCTION", "SUB", "CALL", + ]; + + let mut result = String::new(); + for line in script.lines() { + let mut processed_line = line.to_string(); + for keyword in &keywords { + // Use word boundaries to avoid replacing parts of variable names + let pattern = format!(r"\b{}\b", regex::escape(keyword)); + if let Ok(re) = regex::Regex::new(&pattern) { + processed_line = re.replace_all(&processed_line, keyword.to_lowercase()).to_string(); + } + } + result.push_str(&processed_line); + result.push('\n'); + } + result + } + + fn normalize_variables_to_lowercase(script: &str) -> String { + use regex::Regex; + + let mut result = String::new(); + + let keywords = [ + "SET", + "CREATE", + "PRINT", + "FOR", + "FIND", + "GET", + "EXIT", + "IF", + "THEN", + "ELSE", + "END", + "WHILE", + "WEND", + "DO", + "LOOP", + "HEAR", + "TALK", + "NEXT", + "FUNCTION", + "SUB", + "CALL", + "RETURN", + "DIM", + "AS", + "NEW", + "ARRAY", + "OBJECT", + "LET", + "REM", + "AND", + "OR", + "NOT", + "TRUE", + "FALSE", + "NULL", + "SWITCH", + "CASE", + "DEFAULT", + "USE", + "KB", + "TOOL", + "CLEAR", + "ADD", + "SUGGESTION", + "SUGGESTIONS", + "TOOLS", + "CONTEXT", + "USER", + "BOT", + "MEMORY", + "IMAGE", + "VIDEO", + "AUDIO", + "SEE", + "SEND", + "FILE", + "POST", + "PUT", + "PATCH", + "DELETE", + "SAVE", + "INSERT", + "UPDATE", + "MERGE", + "FILL", + "MAP", + "FILTER", + "AGGREGATE", + "JOIN", + "PIVOT", + "GROUP", + "BY", + "READ", + "WRITE", + "COPY", + "MOVE", + "LIST", + "COMPRESS", + "EXTRACT", + "UPLOAD", + "DOWNLOAD", + "GENERATE", + "PDF", + "WEBHOOK", + "TEMPLATE", + "FORM", + "SUBMIT", + "SCORE", + "LEAD", + "QUALIFY", + "AI", + "ABS", + "ROUND", + "INT", + "FIX", + "FLOOR", + "CEIL", + "MAX", + "MIN", + "MOD", + "RANDOM", + "RND", + "SGN", + "SQR", + "SQRT", + "LOG", + "EXP", + "POW", + "SIN", + "COS", + "TAN", + "SUM", + "AVG", + "NOW", + "TODAY", + "DATE", + "TIME", + "YEAR", + "MONTH", + "DAY", + "HOUR", + "MINUTE", + "SECOND", + "WEEKDAY", + "DATEADD", + "DATEDIFF", + "FORMAT", + "ISDATE", + "VAL", + "STR", + "CINT", + "CDBL", + "CSTR", + "ISNULL", + "ISEMPTY", + "TYPEOF", + "ISARRAY", + "ISOBJECT", + "ISSTRING", + "ISNUMBER", + "NVL", + "IIF", + "UBOUND", + "LBOUND", + "COUNT", + "SORT", + "UNIQUE", + "CONTAINS", + "INDEX", + "OF", + "PUSH", + "POP", + "SHIFT", + "REVERSE", + "SLICE", + "SPLIT", + "CONCAT", + "FLATTEN", + "RANGE", + "THROW", + "ERROR", + "IS", + "ASSERT", + "WARN", + "INFO", + "EACH", + "WITH", + "TO", + "STEP", + "BEGIN", + "SYSTEM", + "PROMPT", + "SCHEDULE", + "REFRESH", + "ALLOW", + "ROLE", + "ANSWER", + "MODE", + "SYNCHRONIZE", + "TABLE", + "ON", + "EMAIL", + "REPORT", + "RESET", + "WAIT", + "FIRST", + "LAST", + "LLM", + "INSTR", + "NUMERIC", + "LEN", + "LEFT", + "RIGHT", + "MID", + "LOWER", + "UPPER", + "TRIM", + "LTRIM", + "RTRIM", + "REPLACE", + "LIKE", + "DELEGATE", + "PRIORITY", + "BOTS", + "REMOVE", + "MEMBER", + "BOOK", + "REMEMBER", + "TASK", + "SITE", + "DRAFT", + "INSTAGRAM", + "FACEBOOK", + "LINKEDIN", + "TWITTER", + "METRICS", + "HEADER", + "HEADERS", + "GRAPHQL", + "SOAP", + "HTTP", + "DESCRIPTION", + "PARAM", + "REQUIRED", + "WEBSITE", + "MODEL", + ]; + + let _identifier_re = Regex::new(r"([a-zA-Z_][a-zA-Z0-9_]*)").expect("valid regex"); + + for line in script.lines() { + let trimmed = line.trim(); + + if trimmed.starts_with("REM") || trimmed.starts_with('\'') || trimmed.starts_with("//") + { + continue; + } + + // Skip lines with custom syntax that should not be lowercased + // These are registered directly with Rhai in uppercase + let trimmed_upper = trimmed.to_uppercase(); + if trimmed_upper.contains("ADD_SUGGESTION_TOOL") || + trimmed_upper.contains("ADD_SUGGESTION_TEXT") || + trimmed_upper.starts_with("ADD_SUGGESTION_") || + trimmed_upper.starts_with("ADD_MEMBER") { + // Keep original line as-is + result.push_str(line); + result.push('\n'); + continue; + } + + let mut processed_line = String::new(); + let mut chars = line.chars().peekable(); + let mut in_string = false; + let mut string_char = '"'; + let mut current_word = String::new(); + + while let Some(c) = chars.next() { + if in_string { + processed_line.push(c); + if c == string_char { + in_string = false; + } else if c == '\\' { + if let Some(&next) = chars.peek() { + processed_line.push(next); + chars.next(); + } + } + } else if c == '"' || c == '\'' { + if !current_word.is_empty() { + processed_line.push_str(&Self::normalize_word(¤t_word, &keywords)); + current_word.clear(); + } + in_string = true; + string_char = c; + processed_line.push(c); + } else if c.is_alphanumeric() || c == '_' { + current_word.push(c); + } else { + if !current_word.is_empty() { + processed_line.push_str(&Self::normalize_word(¤t_word, &keywords)); + current_word.clear(); + } + processed_line.push(c); + } + } + + if !current_word.is_empty() { + processed_line.push_str(&Self::normalize_word(¤t_word, &keywords)); + } + + result.push_str(&processed_line); + result.push('\n'); + } + + result + } + + /// Convert ALL multi-word keywords to underscore versions (function calls) + /// This avoids Rhai custom syntax conflicts and makes the system more secure + /// + /// Examples: + /// - "USE WEBSITE "url"" → "USE_WEBSITE("url")" + /// - "USE WEBSITE "url" REFRESH "interval"" → "USE_WEBSITE("url", "interval")" + /// - "SET BOT MEMORY key AS value" → "SET_BOT_MEMORY(key, value)" + /// - "CLEAR SUGGESTIONS" → "CLEAR_SUGGESTIONS()" + fn convert_multiword_keywords(script: &str) -> String { + use regex::Regex; + + // Known multi-word keywords with their conversion patterns + // Format: (keyword_pattern, min_params, max_params, param_names) + let multiword_patterns = vec![ + // USE family + (r#"USE\s+WEBSITE"#, 1, 2, vec!["url", "refresh"]), + (r#"USE\s+MODEL"#, 1, 1, vec!["model"]), + (r#"USE\s+KB"#, 1, 1, vec!["kb_name"]), + (r#"USE\s+TOOL"#, 1, 1, vec!["tool_path"]), + + // SET family + (r#"SET\s+BOT\s+MEMORY"#, 2, 2, vec!["key", "value"]), + (r#"SET\s+CONTEXT"#, 2, 2, vec!["key", "value"]), + (r#"SET\s+USER"#, 1, 1, vec!["user_id"]), + + // GET family + (r#"GET\s+BOT\s+MEMORY"#, 1, 1, vec!["key"]), + + // CLEAR family + (r#"CLEAR\s+SUGGESTIONS"#, 0, 0, vec![]), + (r#"CLEAR\s+TOOLS"#, 0, 0, vec![]), + (r#"CLEAR\s+WEBSITES"#, 0, 0, vec![]), + + // ADD family - ADD_SUGGESTION_TOOL must come before ADD\s+SUGGESTION + (r#"ADD_SUGGESTION_TOOL"#, 2, 2, vec!["tool", "text"]), + (r#"ADD\s+SUGGESTION\s+TEXT"#, 2, 2, vec!["value", "text"]), + (r#"ADD\s+SUGGESTION(?!\s*TEXT|\s*TOOL|_TOOL)"#, 2, 2, vec!["context", "text"]), + (r#"ADD\s+MEMBER"#, 2, 2, vec!["name", "role"]), + + // CREATE family + (r#"CREATE\s+TASK"#, 1, 1, vec!["task"]), + (r#"CREATE\s+DRAFT"#, 4, 4, vec!["to", "subject", "body", "attachments"]), + (r#"CREATE\s+SITE"#, 1, 1, vec!["site"]), + + // ON family + (r#"ON\s+FORM\s+SUBMIT"#, 1, 1, vec!["form"]), + (r#"ON\s+EMAIL"#, 1, 1, vec!["filter"]), + (r#"ON\s+EVENT"#, 1, 1, vec!["event"]), + + // SEND family + (r#"SEND\s+MAIL"#, 4, 4, vec!["to", "subject", "body", "attachments"]), + + // BOOK (calendar) + (r#"BOOK"#, 1, 1, vec!["event"]), + ]; + + let mut result = String::new(); + + for line in script.lines() { + let trimmed = line.trim(); + let mut converted = false; + + // Skip lines that already use underscore-style custom syntax + // These are registered directly with Rhai and should not be converted + let trimmed_upper = trimmed.to_uppercase(); + if trimmed_upper.contains("ADD_SUGGESTION_TOOL") || + trimmed_upper.contains("ADD_SUGGESTION_TEXT") || + trimmed_upper.starts_with("ADD_SUGGESTION_") || + trimmed_upper.starts_with("ADD_MEMBER") || + (trimmed_upper.starts_with("USE_") && trimmed.contains('(')) { + // Keep original line and add semicolon if needed + result.push_str(line); + if !trimmed.ends_with(';') && !trimmed.ends_with('{') && !trimmed.ends_with('}') { + result.push(';'); + } + result.push('\n'); + continue; + } + + // Try each pattern + for (pattern, min_params, max_params, _param_names) in &multiword_patterns { + // Build regex pattern: KEYWORD params... + // Handle quoted strings and unquoted identifiers + let regex_str = format!( + r#"(?i)^\s*{}\s+(.*?)(?:\s*)$"#, + pattern + ); + + if let Ok(re) = Regex::new(®ex_str) { + if let Some(caps) = re.captures(trimmed) { + if let Some(params_str) = caps.get(1) { + let params = Self::parse_parameters(params_str.as_str()); + let param_count = params.len(); + + // Validate parameter count + if param_count >= *min_params && param_count <= *max_params { + // Convert keyword to underscores + let keyword = pattern.replace(r"\s+", "_"); + + // Build function call + let params_str = if params.is_empty() { + String::new() + } else { + params.join(", ") + }; + + result.push_str(&format!("{}({});", keyword, params_str)); + result.push('\n'); + converted = true; + break; + } + } + } + } + } + + // If not converted, keep original line + if !converted { + result.push_str(line); + result.push('\n'); + } + } + + result + } + + /// Parse parameters from a keyword line + /// Handles quoted strings, AS keyword, and comma-separated values + fn parse_parameters(params_str: &str) -> Vec { + let mut params = Vec::new(); + let mut current = String::new(); + let mut in_quotes = false; + let mut quote_char = '"'; + let mut chars = params_str.chars().peekable(); + + while let Some(c) = chars.next() { + match c { + '"' | '\'' if !in_quotes => { + in_quotes = true; + quote_char = c; + current.push(c); + } + '"' | '\'' if in_quotes && c == quote_char => { + in_quotes = false; + current.push(c); + } + ' ' | '\t' if !in_quotes => { + // End of parameter if we have content + if !current.is_empty() { + params.push(current.trim().to_string()); + current = String::new(); + } + } + ',' if !in_quotes => { + // Comma separator + if !current.is_empty() { + params.push(current.trim().to_string()); + current = String::new(); + } + } + _ => { + current.push(c); + } + } + } + + // Don't forget the last parameter + if !current.is_empty() { + params.push(current.trim().to_string()); + } + + params + } + + fn normalize_word(word: &str, keywords: &[&str]) -> String { + let upper = word.to_uppercase(); + + if keywords.contains(&upper.as_str()) { + upper + } else if word + .chars() + .next() + .map(|c| c.is_ascii_digit()) + .unwrap_or(false) + { + word.to_string() + } else { + word.to_lowercase() + } + } +} + + + +#[cfg(test)] +pub mod tests; diff --git a/src/core/bootstrap/bootstrap_manager.rs b/src/core/bootstrap/bootstrap_manager.rs index c0cc53121..b2c12e3d0 100644 --- a/src/core/bootstrap/bootstrap_manager.rs +++ b/src/core/bootstrap/bootstrap_manager.rs @@ -106,9 +106,9 @@ impl BootstrapManager { } } - if pm.is_installed("postgres") { + if pm.is_installed("tables") { info!("Starting PostgreSQL..."); - match pm.start("postgres") { + match pm.start("tables") { Ok(_child) => { info!("PostgreSQL started"); } diff --git a/src/core/bot/kb_context.rs b/src/core/bot/kb_context.rs index 3eac6f43b..de64e3d09 100644 --- a/src/core/bot/kb_context.rs +++ b/src/core/bot/kb_context.rs @@ -436,9 +436,22 @@ pub async fn inject_kb_context( return Ok(()); } + // Sanitize context to remove UTF-16 surrogate characters that can't be encoded in UTF-8 + let sanitized_context = context_string + .chars() + .filter(|c| { + let cp = *c as u32; + !(0xD800..=0xDBFF).contains(&cp) && !(0xDC00..=0xDFFF).contains(&cp) + }) + .collect::(); + + if sanitized_context.is_empty() { + return Ok(()); + } + info!( "Injecting {} characters of KB/website context into prompt for session {}", - context_string.len(), + sanitized_context.len(), session_id ); @@ -447,7 +460,7 @@ pub async fn inject_kb_context( if let Some(idx) = system_msg_idx { if let Some(content) = messages_array[idx]["content"].as_str() { - let new_content = format!("{}\n{}", content, context_string); + let new_content = format!("{}\n{}", content, sanitized_context); messages_array[idx]["content"] = serde_json::Value::String(new_content); } } else { @@ -455,7 +468,7 @@ pub async fn inject_kb_context( 0, serde_json::json!({ "role": "system", - "content": context_string + "content": sanitized_context }), ); } diff --git a/src/core/bot/mod.rs b/src/core/bot/mod.rs index 32b2341ab..263756bf9 100644 --- a/src/core/bot/mod.rs +++ b/src/core/bot/mod.rs @@ -580,11 +580,20 @@ impl BotOrchestrator { } } + // Sanitize user message to remove any UTF-16 surrogate characters + let sanitized_message_content = message_content + .chars() + .filter(|c| { + let cp = *c as u32; + !(0xD800..=0xDBFF).contains(&cp) && !(0xDC00..=0xDFFF).contains(&cp) + }) + .collect::(); + // Add the current user message to the messages array if let Some(msgs_array) = messages.as_array_mut() { msgs_array.push(serde_json::json!({ "role": "user", - "content": message_content + "content": sanitized_message_content })); } @@ -644,6 +653,7 @@ impl BotOrchestrator { let mut analysis_buffer = String::new(); let mut in_analysis = false; let mut tool_call_buffer = String::new(); // Accumulate potential tool call JSON chunks + let mut accumulating_tool_call = false; // Track if we're currently accumulating a tool call let handler = llm_models::get_handler(&model); info!("[STREAM_START] Entering stream processing loop for model: {}", model); @@ -679,12 +689,62 @@ impl BotOrchestrator { // ===== GENERIC TOOL EXECUTION ===== // Add chunk to tool_call_buffer and try to parse // Tool calls arrive as JSON that can span multiple chunks - let looks_like_json = chunk.trim().starts_with('{') || chunk.trim().starts_with('[') || - tool_call_buffer.contains('{') || tool_call_buffer.contains('['); - let chunk_in_tool_buffer = if looks_like_json { + // Check if this chunk contains JSON (either starts with {/[ or contains {/[) + let chunk_contains_json = chunk.trim().starts_with('{') || chunk.trim().starts_with('[') || + chunk.contains('{') || chunk.contains('['); + + let chunk_in_tool_buffer = if accumulating_tool_call { + // Already accumulating - add entire chunk to buffer tool_call_buffer.push_str(&chunk); true + } else if chunk_contains_json { + // Check if { appears in the middle of the chunk (mixed text + JSON) + let json_start = chunk.find('{').or_else(|| chunk.find('[')); + + if let Some(pos) = json_start { + if pos > 0 { + // Send the part before { as regular content + let regular_part = &chunk[..pos]; + if !regular_part.trim().is_empty() { + info!("[STREAM_CONTENT] Sending regular part before JSON: '{}', len: {}", regular_part, regular_part.len()); + full_response.push_str(regular_part); + + let response = BotResponse { + bot_id: message.bot_id.clone(), + user_id: message.user_id.clone(), + session_id: message.session_id.clone(), + channel: message.channel.clone(), + content: regular_part.to_string(), + message_type: MessageType::BOT_RESPONSE, + stream_token: None, + is_complete: false, + suggestions: Vec::new(), + context_name: None, + context_length: 0, + context_max_length: 0, + }; + + if response_tx.send(response).await.is_err() { + warn!("Response channel closed"); + break; + } + } + + // Start accumulating from { onwards + accumulating_tool_call = true; + tool_call_buffer.push_str(&chunk[pos..]); + true + } else { + // Chunk starts with { or [ + accumulating_tool_call = true; + tool_call_buffer.push_str(&chunk); + true + } + } else { + // Contains {/[ but find() failed - shouldn't happen, but send as regular content + false + } } else { false }; @@ -774,13 +834,15 @@ impl BotOrchestrator { // Don't add tool_call JSON to full_response or analysis_buffer // Clear the tool_call_buffer since we found and executed a tool call tool_call_buffer.clear(); + accumulating_tool_call = false; // Reset accumulation flag // Continue to next chunk continue; } // Clear tool_call_buffer if it's getting too large and no tool call was found // This prevents memory issues from accumulating JSON fragments - if tool_call_buffer.len() > 10000 { + // Increased limit to 50000 to handle large tool calls with many parameters + if tool_call_buffer.len() > 50000 { // Flush accumulated content to client since it's too large to be a tool call info!("[TOOL_EXEC] Flushing tool_call_buffer (too large, assuming not a tool call)"); full_response.push_str(&tool_call_buffer); @@ -801,6 +863,7 @@ impl BotOrchestrator { }; tool_call_buffer.clear(); + accumulating_tool_call = false; // Reset accumulation flag after flush if response_tx.send(response).await.is_err() { warn!("Response channel closed"); @@ -810,7 +873,7 @@ impl BotOrchestrator { // If this chunk was added to tool_call_buffer and no tool call was found yet, // skip processing (it's part of an incomplete tool call JSON) - if chunk_in_tool_buffer && tool_call_buffer.len() <= 10000 { + if chunk_in_tool_buffer { continue; } // ===== END TOOL EXECUTION ===== diff --git a/src/core/bot/tool_executor.rs b/src/core/bot/tool_executor.rs index 5700f13e3..60a461523 100644 --- a/src/core/bot/tool_executor.rs +++ b/src/core/bot/tool_executor.rs @@ -2,6 +2,7 @@ /// Works across all LLM providers (GLM, OpenAI, Claude, etc.) use log::{error, info, warn}; use serde_json::Value; +use std::collections::HashMap; use std::fs::OpenOptions; use std::io::Write; use std::path::Path; @@ -264,6 +265,8 @@ impl ToolExecutor { script_service.load_bot_config_params(state, bot_id); // Set tool parameters as variables in the engine scope + // Note: DATE parameters are now sent by LLM in ISO 8601 format (YYYY-MM-DD) + // The tool schema with format="date" tells the LLM to use this agnostic format if let Some(obj) = arguments.as_object() { for (key, value) in obj { let value_str = match value { diff --git a/src/core/package_manager/facade.rs b/src/core/package_manager/facade.rs index eb1e18aad..00527f5e4 100644 --- a/src/core/package_manager/facade.rs +++ b/src/core/package_manager/facade.rs @@ -553,7 +553,7 @@ Store credentials in Vault: r"Email Server (Stalwart): SMTP: {}:25 IMAP: {}:143 - Web: http://{}:8080 + Web: http://{}:9000 Store credentials in Vault: botserver vault put gbo/email server={} port=25 username=admin password=", @@ -563,11 +563,11 @@ Store credentials in Vault: "directory" => { format!( r"Zitadel Identity Provider: - URL: http://{}:8080 - Console: http://{}:8080/ui/console + URL: http://{}:9000 + Console: http://{}:9000/ui/console Store credentials in Vault: - botserver vault put gbo/directory url=http://{}:8080 client_id= client_secret=", + botserver vault put gbo/directory url=http://{}:9000 client_id= client_secret=", ip, ip, ip ) } diff --git a/src/core/package_manager/installer.rs b/src/core/package_manager/installer.rs index 18afca6da..730448466 100644 --- a/src/core/package_manager/installer.rs +++ b/src/core/package_manager/installer.rs @@ -602,7 +602,7 @@ impl PackageManager { post_install_cmds_windows: vec![], env_vars: HashMap::new(), data_download_list: Vec::new(), - exec_cmd: "php -S 0.0.0.0:8080 -t {{DATA_PATH}}/roundcubemail".to_string(), + exec_cmd: "php -S 0.0.0.0:9000 -t {{DATA_PATH}}/roundcubemail".to_string(), check_cmd: "curl -f -k --connect-timeout 2 -m 5 https://localhost:8300 >/dev/null 2>&1" .to_string(), diff --git a/src/core/package_manager/setup/email_setup.rs b/src/core/package_manager/setup/email_setup.rs index 359077cc7..09b98bb94 100644 --- a/src/core/package_manager/setup/email_setup.rs +++ b/src/core/package_manager/setup/email_setup.rs @@ -203,7 +203,7 @@ impl EmailSetup { let issuer_url = dir_config["base_url"] .as_str() - .unwrap_or("http://localhost:8080"); + .unwrap_or("http://localhost:9000"); log::info!("Setting up OIDC authentication with Directory..."); log::info!("Issuer URL: {}", issuer_url); @@ -289,7 +289,7 @@ protocol = "imap" tls.implicit = true [server.listener."http"] -bind = ["0.0.0.0:8080"] +bind = ["0.0.0.0:9000"] protocol = "http" [storage] @@ -315,7 +315,7 @@ store = "sqlite" r#" [directory."oidc"] type = "oidc" -issuer = "http://localhost:8080" +issuer = "http://localhost:9000" client-id = "{{CLIENT_ID}}" client-secret = "{{CLIENT_SECRET}}" diff --git a/src/core/secrets/mod.rs b/src/core/secrets/mod.rs index a185a1b11..e7e1f7c03 100644 --- a/src/core/secrets/mod.rs +++ b/src/core/secrets/mod.rs @@ -381,7 +381,7 @@ impl SecretsManager { secrets.insert("token".into(), String::new()); } SecretPaths::ALM => { - secrets.insert("url".into(), "http://localhost:8080".into()); + secrets.insert("url".into(), "http://localhost:9000".into()); secrets.insert("username".into(), String::new()); secrets.insert("password".into(), String::new()); } diff --git a/src/core/shared/test_utils.rs b/src/core/shared/test_utils.rs index f71e6b8c6..f24445eaa 100644 --- a/src/core/shared/test_utils.rs +++ b/src/core/shared/test_utils.rs @@ -249,13 +249,13 @@ impl Default for TestAppStateBuilder { #[cfg(feature = "directory")] pub fn create_mock_auth_service() -> AuthService { let config = ZitadelConfig { - issuer_url: "http://localhost:8080".to_string(), - issuer: "http://localhost:8080".to_string(), + issuer_url: "http://localhost:9000".to_string(), + issuer: "http://localhost:9000".to_string(), client_id: "mock_client_id".to_string(), client_secret: "mock_client_secret".to_string(), redirect_uri: "http://localhost:3000/callback".to_string(), project_id: "mock_project_id".to_string(), - api_url: "http://localhost:8080".to_string(), + api_url: "http://localhost:9000".to_string(), service_account_key: None, }; diff --git a/src/core/shared/utils.rs b/src/core/shared/utils.rs index 5af883372..6d5ac328d 100644 --- a/src/core/shared/utils.rs +++ b/src/core/shared/utils.rs @@ -549,10 +549,103 @@ fn estimate_chars_per_token(model: &str) -> usize { if model.contains("gpt") || model.contains("claude") { 4 // GPT/Claude models: ~4 chars per token } else if model.contains("llama") || model.contains("mistral") { - 3 // Llama/Mistral models: ~3 chars per token + 3 // Llama/Mistral models: ~3 chars per token } else if model.contains("bert") || model.contains("mpnet") { 4 // BERT-based models: ~4 chars per token } else { 4 // Default conservative estimate } } + +/// Convert date string from user locale format to ISO format (YYYY-MM-DD) for PostgreSQL. +/// +/// The LLM automatically formats dates according to the user's language/idiom based on: +/// 1. The conversation context (user's language) +/// 2. The PARAM LIKE example (e.g., "15/12/2026" for DD/MM/YYYY) +/// +/// This function handles the most common formats: +/// - ISO: YYYY-MM-DD (already in ISO, returned as-is) +/// - Brazilian/Portuguese: DD/MM/YYYY or DD/MM/YY +/// - US/English: MM/DD/YYYY or MM/DD/YY +/// +/// If the value doesn't match any date pattern, returns it unchanged. +/// +/// NOTE: This function does NOT try to guess ambiguous formats. +/// The LLM is responsible for formatting dates correctly based on user language. +/// The PARAM declaration's LIKE example tells the LLM the expected format. +/// +/// # Arguments +/// * `value` - The date string to convert (as provided by the LLM) +/// +/// # Returns +/// ISO formatted date string (YYYY-MM-DD) or original value if not a recognized date +pub fn convert_date_to_iso_format(value: &str) -> String { + let value = value.trim(); + + // Already in ISO format (YYYY-MM-DD) - return as-is + if value.len() == 10 && value.chars().nth(4) == Some('-') && value.chars().nth(7) == Some('-') { + let parts: Vec<&str> = value.split('-').collect(); + if parts.len() == 3 + && parts[0].len() == 4 + && parts[1].len() == 2 + && parts[2].len() == 2 + && parts[0].chars().all(|c| c.is_ascii_digit()) + && parts[1].chars().all(|c| c.is_ascii_digit()) + && parts[2].chars().all(|c| c.is_ascii_digit()) + { + if let (Ok(year), Ok(month), Ok(day)) = + (parts[0].parse::(), parts[1].parse::(), parts[2].parse::()) + { + if month >= 1 && month <= 12 && day >= 1 && day <= 31 && year >= 1900 && year <= 2100 { + return value.to_string(); + } + } + } + } + + // Handle slash-separated formats: DD/MM/YYYY or MM/DD/YYYY + // We need to detect which format based on the PARAM declaration's LIKE example + // For now, default to DD/MM/YYYY (Brazilian format) as this is the most common for this bot + // TODO: Pass language/idiom from session to determine correct format + if value.len() >= 8 && value.len() <= 10 { + let parts: Vec<&str> = value.split('/').collect(); + if parts.len() == 3 { + let all_numeric = parts[0].chars().all(|c| c.is_ascii_digit()) + && parts[1].chars().all(|c| c.is_ascii_digit()) + && parts[2].chars().all(|c| c.is_ascii_digit()); + + if all_numeric { + // Parse the three parts + let a = parts[0].parse::().ok(); + let b = parts[1].parse::().ok(); + let c = if parts[2].len() == 2 { + // Convert 2-digit year to 4-digit + parts[2].parse::().ok().map(|y| { + if y < 50 { + 2000 + y + } else { + 1900 + y + } + }) + } else { + parts[2].parse::().ok() + }; + + if let (Some(first), Some(second), Some(third)) = (a, b, c) { + // Default: DD/MM/YYYY format (Brazilian/Portuguese) + // The LLM should format dates according to the user's language + // and the PARAM LIKE example (e.g., "15/12/2026" for DD/MM/YYYY) + let (year, month, day) = (third, second, first); + + // Validate the determined date + if day >= 1 && day <= 31 && month >= 1 && month <= 12 && year >= 1900 && year <= 2100 { + return format!("{:04}-{:02}-{:02}", year, month, day); + } + } + } + } + } + + // Not a recognized date pattern, return unchanged + value.to_string() +} diff --git a/src/core/urls.rs b/src/core/urls.rs index 33cb9b76c..97153170d 100644 --- a/src/core/urls.rs +++ b/src/core/urls.rs @@ -479,7 +479,7 @@ impl ApiUrls { pub struct InternalUrls; impl InternalUrls { - pub const DIRECTORY_BASE: &'static str = "http://localhost:8080"; + pub const DIRECTORY_BASE: &'static str = "http://localhost:9000"; pub const DATABASE: &'static str = "postgres://localhost:5432"; pub const CACHE: &'static str = "redis://localhost:6379"; pub const DRIVE: &'static str = "https://localhost:9000"; diff --git a/src/email/messages.rs b/src/email/messages.rs index 8de2aa160..1de918d87 100644 --- a/src/email/messages.rs +++ b/src/email/messages.rs @@ -65,8 +65,8 @@ fn is_tracking_pixel_enabled(state: &Arc, bot_id: Option) -> boo fn inject_tracking_pixel(html_body: &str, tracking_id: &str, state: &Arc) -> String { let config_manager = crate::core::config::ConfigManager::new(state.conn.clone()); let base_url = config_manager - .get_config(&Uuid::nil(), "server-url", Some("http://localhost:8080")) - .unwrap_or_else(|_| "http://localhost:8080".to_string()); + .get_config(&Uuid::nil(), "server-url", Some("http://localhost:9000")) + .unwrap_or_else(|_| "http://localhost:9000".to_string()); let pixel_url = format!("{}/api/email/tracking/pixel/{}", base_url, tracking_id); let pixel_html = format!( diff --git a/src/email/tracking.rs b/src/email/tracking.rs index b10ab6fe7..296c1d4f0 100644 --- a/src/email/tracking.rs +++ b/src/email/tracking.rs @@ -31,8 +31,8 @@ pub fn is_tracking_pixel_enabled(state: &Arc, bot_id: Option) -> pub fn inject_tracking_pixel(html_body: &str, tracking_id: &str, state: &Arc) -> String { let config_manager = crate::core::config::ConfigManager::new(state.conn.clone()); let base_url = config_manager - .get_config(&Uuid::nil(), "server-url", Some("http://localhost:8080")) - .unwrap_or_else(|_| "http://localhost:8080".to_string()); + .get_config(&Uuid::nil(), "server-url", Some("http://localhost:9000")) + .unwrap_or_else(|_| "http://localhost:9000".to_string()); let pixel_url = format!("{}/api/email/tracking/pixel/{}", base_url, tracking_id); let pixel_html = format!( diff --git a/src/llm/claude.rs b/src/llm/claude.rs index 6a117eacf..450bf8633 100644 --- a/src/llm/claude.rs +++ b/src/llm/claude.rs @@ -232,6 +232,16 @@ impl ClaudeClient { (system_prompt, claude_messages) } + /// Sanitizes a string by removing invalid UTF-8 surrogate characters + fn sanitize_utf8(input: &str) -> String { + input.chars() + .filter(|c| { + let cp = *c as u32; + !(0xD800..=0xDBFF).contains(&cp) && !(0xDC00..=0xDFFF).contains(&cp) + }) + .collect() + } + pub fn build_messages( system_prompt: &str, context_data: &str, @@ -241,15 +251,15 @@ impl ClaudeClient { let mut system_parts = Vec::new(); if !system_prompt.is_empty() { - system_parts.push(system_prompt.to_string()); + system_parts.push(Self::sanitize_utf8(system_prompt)); } if !context_data.is_empty() { - system_parts.push(context_data.to_string()); + system_parts.push(Self::sanitize_utf8(context_data)); } for (role, content) in history { if role == "episodic" || role == "compact" { - system_parts.push(format!("[Previous conversation summary]: {content}")); + system_parts.push(format!("[Previous conversation summary]: {}", Self::sanitize_utf8(content))); } } @@ -270,7 +280,8 @@ impl ClaudeClient { }; if let Some(norm_role) = normalized_role { - if content.is_empty() { + let sanitized_content = Self::sanitize_utf8(content); + if sanitized_content.is_empty() { continue; } @@ -278,14 +289,14 @@ impl ClaudeClient { if let Some(last_msg) = messages.last_mut() { let last_msg: &mut ClaudeMessage = last_msg; last_msg.content.push_str("\n\n"); - last_msg.content.push_str(content); + last_msg.content.push_str(&sanitized_content); continue; } } messages.push(ClaudeMessage { role: norm_role.clone(), - content: content.clone(), + content: sanitized_content, }); last_role = Some(norm_role); } diff --git a/src/llm/glm.rs b/src/llm/glm.rs index 277709d3c..08463d59f 100644 --- a/src/llm/glm.rs +++ b/src/llm/glm.rs @@ -116,6 +116,16 @@ impl GLMClient { // GLM/z.ai uses /chat/completions (not /v1/chat/completions) format!("{}/chat/completions", self.base_url) } + + /// Sanitizes a string by removing invalid UTF-8 surrogate characters + fn sanitize_utf8(input: &str) -> String { + input.chars() + .filter(|c| { + let cp = *c as u32; + !(0xD800..=0xDBFF).contains(&cp) && !(0xDC00..=0xDFFF).contains(&cp) + }) + .collect() + } } #[async_trait] @@ -183,11 +193,6 @@ impl LLMProvider for GLMClient { key: &str, tools: Option<&Vec>, ) -> Result<(), Box> { - // DEBUG: Log what we received - info!("[GLM_DEBUG] config type: {}", config); - info!("[GLM_DEBUG] prompt: '{}'", prompt); - info!("[GLM_DEBUG] config as JSON: {}", serde_json::to_string_pretty(config).unwrap_or_default()); - // config IS the messages array directly, not nested let messages = if let Some(msgs) = config.as_array() { // Convert messages from config format to GLM format @@ -195,25 +200,23 @@ impl LLMProvider for GLMClient { .filter_map(|m| { let role = m.get("role")?.as_str()?; let content = m.get("content")?.as_str()?; - info!("[GLM_DEBUG] Processing message - role: {}, content: '{}'", role, content); - if !content.is_empty() { + let sanitized = Self::sanitize_utf8(content); + if !sanitized.is_empty() { Some(GLMMessage { role: role.to_string(), - content: Some(content.to_string()), + content: Some(sanitized), tool_calls: None, }) } else { - info!("[GLM_DEBUG] Skipping empty content message"); None } }) .collect::>() } else { // Fallback to building from prompt - info!("[GLM_DEBUG] No array found, using prompt: '{}'", prompt); vec![GLMMessage { role: "user".to_string(), - content: Some(prompt.to_string()), + content: Some(Self::sanitize_utf8(prompt)), tool_calls: None, }] }; @@ -223,8 +226,6 @@ impl LLMProvider for GLMClient { return Err("No valid messages in request".into()); } - info!("[GLM_DEBUG] Final GLM messages count: {}", messages.len()); - // Use glm-4.7 for tool calling support // GLM-4.7 supports standard OpenAI-compatible function calling let model_name = if model == "glm-4" { "glm-4.7" } else { model }; @@ -249,10 +250,6 @@ impl LLMProvider for GLMClient { let url = self.build_url(); info!("GLM streaming request to: {}", url); - // Log the exact request being sent - let request_json = serde_json::to_string_pretty(&request).unwrap_or_default(); - info!("GLM request body: {}", request_json); - let response = self .client .post(&url) @@ -292,18 +289,13 @@ impl LLMProvider for GLMClient { if line.starts_with("data: ") { let json_str = line[6..].trim(); - info!("[GLM_SSE] Received SSE line ({} chars): {}", json_str.len(), json_str); if let Ok(chunk_data) = serde_json::from_str::(json_str) { if let Some(choices) = chunk_data.get("choices").and_then(|c| c.as_array()) { for choice in choices { - info!("[GLM_SSE] Processing choice"); if let Some(delta) = choice.get("delta") { - info!("[GLM_SSE] Delta: {}", serde_json::to_string(delta).unwrap_or_default()); - // Handle tool_calls (GLM-4.7 standard function calling) if let Some(tool_calls) = delta.get("tool_calls").and_then(|t| t.as_array()) { for tool_call in tool_calls { - info!("[GLM_SSE] Tool call detected: {}", serde_json::to_string(tool_call).unwrap_or_default()); // Send tool_calls as JSON for the calling code to process let tool_call_json = serde_json::json!({ "type": "tool_call", @@ -323,7 +315,6 @@ impl LLMProvider for GLMClient { // This makes GLM behave like OpenAI-compatible APIs if let Some(content) = delta.get("content").and_then(|c| c.as_str()) { if !content.is_empty() { - info!("[GLM_TX] Sending to channel: '{}'", content); match tx.send(content.to_string()).await { Ok(_) => {}, Err(e) => { @@ -331,11 +322,9 @@ impl LLMProvider for GLMClient { } } } - } else { - info!("[GLM_SSE] No content field in delta"); } } else { - info!("[GLM_SSE] No delta in choice"); + // No delta in choice } if let Some(reason) = choice.get("finish_reason").and_then(|r| r.as_str()) { if !reason.is_empty() { diff --git a/src/llm/mod.rs b/src/llm/mod.rs index 50f610e4e..08d1136ca 100644 --- a/src/llm/mod.rs +++ b/src/llm/mod.rs @@ -185,6 +185,17 @@ impl OpenAIClient { } } + /// Sanitizes a string by removing invalid UTF-8 surrogate characters + /// that cannot be encoded in valid UTF-8 (surrogates are only valid in UTF-16) + fn sanitize_utf8(input: &str) -> String { + input.chars() + .filter(|c| { + let cp = *c as u32; + !(0xD800..=0xDBFF).contains(&cp) && !(0xDC00..=0xDFFF).contains(&cp) + }) + .collect() + } + pub fn build_messages( system_prompt: &str, context_data: &str, @@ -194,19 +205,19 @@ impl OpenAIClient { if !system_prompt.is_empty() { messages.push(serde_json::json!({ "role": "system", - "content": system_prompt + "content": Self::sanitize_utf8(system_prompt) })); } if !context_data.is_empty() { messages.push(serde_json::json!({ "role": "system", - "content": context_data + "content": Self::sanitize_utf8(context_data) })); } for (role, content) in history { messages.push(serde_json::json!({ "role": role, - "content": content + "content": Self::sanitize_utf8(content) })); } serde_json::Value::Array(messages) @@ -747,10 +758,10 @@ mod tests { fn test_openai_client_new_custom_url() { let client = OpenAIClient::new( "test_key".to_string(), - Some("http://localhost:8080".to_string()), + Some("http://localhost:9000".to_string()), None, ); - assert_eq!(client.base_url, "http://localhost:8080"); + assert_eq!(client.base_url, "http://localhost:9000"); } #[test] diff --git a/src/security/cors.rs b/src/security/cors.rs index 3d2389e01..226819cd0 100644 --- a/src/security/cors.rs +++ b/src/security/cors.rs @@ -101,13 +101,13 @@ impl CorsConfig { Self { allowed_origins: vec![ "http://localhost:3000".to_string(), - "http://localhost:8080".to_string(), + "http://localhost:9000".to_string(), "http://localhost:8300".to_string(), "http://127.0.0.1:3000".to_string(), - "http://127.0.0.1:8080".to_string(), + "http://127.0.0.1:9000".to_string(), "http://127.0.0.1:8300".to_string(), "https://localhost:3000".to_string(), - "https://localhost:8080".to_string(), + "https://localhost:9000".to_string(), "https://localhost:8300".to_string(), ], allowed_methods: vec![ @@ -576,7 +576,7 @@ mod tests { assert!(is_localhost_origin("http://localhost:3000")); assert!(is_localhost_origin("https://localhost:8443")); assert!(is_localhost_origin("http://127.0.0.1")); - assert!(is_localhost_origin("http://127.0.0.1:8080")); + assert!(is_localhost_origin("http://127.0.0.1:9000")); assert!(!is_localhost_origin("http://example.com")); }