From 1c4cc2f986377a87b6d35af1a946324cc3ba63df Mon Sep 17 00:00:00 2001 From: "Rodrigo Rodriguez (Pragmatismo)" Date: Wed, 14 Jan 2026 10:13:33 -0300 Subject: [PATCH] Add SEARCH keyword and store tools: classify-product, search-products, calculate-shipping --- src/basic/keywords/mod.rs | 3 + src/basic/keywords/search.rs | 442 +++++++++++++++++++++++++++++++++++ src/basic/mod.rs | 2 + 3 files changed, 447 insertions(+) create mode 100644 src/basic/keywords/search.rs diff --git a/src/basic/keywords/mod.rs b/src/basic/keywords/mod.rs index ab3be8234..e0645fcec 100644 --- a/src/basic/keywords/mod.rs +++ b/src/basic/keywords/mod.rs @@ -26,6 +26,7 @@ pub mod face_api; pub mod file_operations; pub mod find; pub mod first; +pub mod search; pub mod for_next; pub mod format; pub mod get; @@ -123,6 +124,8 @@ pub fn get_all_keywords() -> Vec { "FILTER".to_string(), "FIND".to_string(), "FIRST".to_string(), + "SEARCH".to_string(), + "AUTOCOMPLETE".to_string(), "GROUP BY".to_string(), "INSERT".to_string(), "JOIN".to_string(), diff --git a/src/basic/keywords/search.rs b/src/basic/keywords/search.rs new file mode 100644 index 000000000..9e10e6b5c --- /dev/null +++ b/src/basic/keywords/search.rs @@ -0,0 +1,442 @@ +//! SEARCH keyword implementation for full-text search with autocomplete support +//! +//! Provides fast product search across tables using PostgreSQL full-text search +//! and trigram similarity for fuzzy matching and autocomplete. +//! +//! Usage in .bas files: +//! results = SEARCH "Products.csv", "chocolate", 10 +//! results = SEARCH "products", query, limit + +use super::table_access::{check_table_access, filter_fields_by_role, AccessType, UserRoles}; +use crate::security::sql_guard::sanitize_identifier; +use crate::shared::models::UserSession; +use crate::shared::state::AppState; +use crate::shared::utils; +use crate::shared::utils::to_array; +use diesel::pg::PgConnection; +use diesel::prelude::*; +use diesel::sql_types::{Integer, Text}; +use log::{error, trace, warn}; +use rhai::{Dynamic, Engine}; +use serde_json::{json, Value}; + +#[derive(QueryableByName)] +struct JsonRow { + #[diesel(sql_type = Text)] + row_data: String, +} + +#[derive(QueryableByName)] +struct SearchResultRow { + #[diesel(sql_type = Text)] + row_data: String, + #[diesel(sql_type = diesel::sql_types::Float)] + relevance: f32, +} + +/// Registers the SEARCH keyword with the Rhai engine +/// +/// Syntax: SEARCH "table_name", "query", limit +pub fn search_keyword(state: &AppState, user: UserSession, engine: &mut Engine) { + let connection = state.conn.clone(); + let user_roles = UserRoles::from_user_session(&user); + + // SEARCH table, query, limit + engine + .register_custom_syntax( + ["SEARCH", "$expr$", ",", "$expr$", ",", "$expr$"], + false, + { + let conn = connection.clone(); + let roles = user_roles.clone(); + move |context, inputs| { + let table_name = context.eval_expression_tree(&inputs[0])?; + let query = context.eval_expression_tree(&inputs[1])?; + let limit = context.eval_expression_tree(&inputs[2])?; + + let mut binding = conn.get().map_err(|e| format!("DB error: {e}"))?; + let table_str = table_name.to_string(); + let query_str = query.to_string(); + let limit_val = limit.as_int().unwrap_or(10) as i32; + + let access_info = match check_table_access( + &mut binding, + &table_str, + &roles, + AccessType::Read, + ) { + Ok(info) => info, + Err(e) => { + warn!("SEARCH access denied: {e}"); + return Err(e.into()); + } + }; + + let result = tokio::task::block_in_place(|| { + tokio::runtime::Handle::current().block_on(async { + execute_search(&mut binding, &table_str, &query_str, limit_val) + }) + }) + .map_err(|e| format!("Search error: {e}"))?; + + if let Some(results) = result.get("results") { + let filtered = + filter_fields_by_role(results.clone(), &roles, &access_info); + let array = to_array(utils::json_value_to_dynamic(&filtered)); + Ok(Dynamic::from(array)) + } else { + Ok(Dynamic::from(rhai::Array::new())) + } + } + }, + ) + .expect("valid syntax registration"); + + // SEARCH table, query (default limit = 10) + engine + .register_custom_syntax(["SEARCH", "$expr$", ",", "$expr$"], false, { + let conn = connection.clone(); + let roles = user_roles.clone(); + move |context, inputs| { + let table_name = context.eval_expression_tree(&inputs[0])?; + let query = context.eval_expression_tree(&inputs[1])?; + + let mut binding = conn.get().map_err(|e| format!("DB error: {e}"))?; + let table_str = table_name.to_string(); + let query_str = query.to_string(); + + let access_info = match check_table_access( + &mut binding, + &table_str, + &roles, + AccessType::Read, + ) { + Ok(info) => info, + Err(e) => { + warn!("SEARCH access denied: {e}"); + return Err(e.into()); + } + }; + + let result = tokio::task::block_in_place(|| { + tokio::runtime::Handle::current() + .block_on(async { execute_search(&mut binding, &table_str, &query_str, 10) }) + }) + .map_err(|e| format!("Search error: {e}"))?; + + if let Some(results) = result.get("results") { + let filtered = filter_fields_by_role(results.clone(), &roles, &access_info); + let array = to_array(utils::json_value_to_dynamic(&filtered)); + Ok(Dynamic::from(array)) + } else { + Ok(Dynamic::from(rhai::Array::new())) + } + } + }) + .expect("valid syntax registration"); + + // Register AUTOCOMPLETE function for quick suggestions + let conn_autocomplete = connection.clone(); + engine.register_fn( + "AUTOCOMPLETE", + move |table: String, prefix: String, limit: i64| -> rhai::Array { + let mut binding = match conn_autocomplete.get() { + Ok(c) => c, + Err(_) => return rhai::Array::new(), + }; + + match execute_autocomplete(&mut binding, &table, &prefix, limit as i32) { + Ok(suggestions) => suggestions + .into_iter() + .map(Dynamic::from) + .collect(), + Err(_) => rhai::Array::new(), + } + }, + ); + + // Register lowercase version + let conn_autocomplete2 = connection.clone(); + engine.register_fn( + "autocomplete", + move |table: String, prefix: String, limit: i64| -> rhai::Array { + let mut binding = match conn_autocomplete2.get() { + Ok(c) => c, + Err(_) => return rhai::Array::new(), + }; + + match execute_autocomplete(&mut binding, &table, &prefix, limit as i32) { + Ok(suggestions) => suggestions + .into_iter() + .map(Dynamic::from) + .collect(), + Err(_) => rhai::Array::new(), + } + }, + ); +} + +/// Execute full-text search with relevance ranking +pub fn execute_search( + conn: &mut PgConnection, + table_str: &str, + query_str: &str, + limit: i32, +) -> Result { + trace!("Starting execute_search: table={table_str}, query={query_str}, limit={limit}"); + + let safe_table = sanitize_identifier(table_str); + let safe_query = query_str.replace('\'', "''").replace('\\', "\\\\"); + + // Get searchable columns from the table + let searchable_columns = get_searchable_columns(conn, &safe_table)?; + + if searchable_columns.is_empty() { + warn!("No searchable columns found for table: {safe_table}"); + return Ok(json!({ + "command": "search", + "table": table_str, + "query": query_str, + "results": [], + "count": 0 + })); + } + + // Build search expression combining multiple columns + let search_columns: Vec = searchable_columns + .iter() + .map(|col| format!("COALESCE({}::text, '')", col)) + .collect(); + + let combined_columns = search_columns.join(" || ' ' || "); + + // Use trigram similarity for fuzzy matching + ILIKE for direct matches + let query = format!( + r#" + SELECT + row_to_json(t)::text as row_data, + GREATEST( + similarity({combined_columns}, $1), + CASE WHEN {combined_columns} ILIKE '%' || $1 || '%' THEN 0.5 ELSE 0 END + ) as relevance + FROM {safe_table} t + WHERE + {combined_columns} ILIKE '%' || $1 || '%' + OR similarity({combined_columns}, $1) > 0.1 + ORDER BY relevance DESC, id + LIMIT $2 + "#, + combined_columns = combined_columns, + safe_table = safe_table + ); + + // Try with trigram extension, fall back to simple ILIKE if not available + let raw_results: Vec = match diesel::sql_query(&query) + .bind::(&safe_query) + .bind::(limit) + .load(conn) + { + Ok(results) => results, + Err(e) => { + trace!("Trigram search failed, falling back to ILIKE: {e}"); + // Fallback to simple ILIKE search + return execute_simple_search(conn, &safe_table, &safe_query, limit, &searchable_columns); + } + }; + + let results: Vec = raw_results + .into_iter() + .filter_map(|row| { + let mut obj: Value = serde_json::from_str(&row.row_data).ok()?; + if let Value::Object(ref mut map) = obj { + map.insert("_relevance".to_string(), json!(row.relevance)); + } + Some(obj) + }) + .collect(); + + trace!("Search returned {} results", results.len()); + + Ok(json!({ + "command": "search", + "table": table_str, + "query": query_str, + "results": results, + "count": results.len() + })) +} + +/// Fallback simple search using ILIKE +fn execute_simple_search( + conn: &mut PgConnection, + safe_table: &str, + safe_query: &str, + limit: i32, + searchable_columns: &[String], +) -> Result { + let search_columns: Vec = searchable_columns + .iter() + .map(|col| format!("COALESCE({}::text, '')", col)) + .collect(); + + let combined_columns = search_columns.join(" || ' ' || "); + + let query = format!( + r#" + SELECT row_to_json(t)::text as row_data + FROM {safe_table} t + WHERE {combined_columns} ILIKE '%' || $1 || '%' + LIMIT $2 + "#, + safe_table = safe_table, + combined_columns = combined_columns + ); + + let raw_results: Vec = diesel::sql_query(&query) + .bind::(safe_query) + .bind::(limit) + .load(conn) + .map_err(|e| { + error!("Simple search error: {e}"); + e.to_string() + })?; + + let results: Vec = raw_results + .into_iter() + .filter_map(|row| serde_json::from_str(&row.row_data).ok()) + .collect(); + + Ok(json!({ + "command": "search", + "table": safe_table, + "query": safe_query, + "results": results, + "count": results.len() + })) +} + +/// Execute autocomplete query for quick suggestions +pub fn execute_autocomplete( + conn: &mut PgConnection, + table_str: &str, + prefix: &str, + limit: i32, +) -> Result, String> { + trace!("Autocomplete: table={table_str}, prefix={prefix}"); + + let safe_table = sanitize_identifier(table_str); + let safe_prefix = prefix.replace('\'', "''").replace('\\', "\\\\"); + + // Find the primary text column (name, title, or first text column) + let text_column = get_primary_text_column(conn, &safe_table)?; + + let query = format!( + r#" + SELECT DISTINCT {text_column}::text as suggestion + FROM {safe_table} + WHERE {text_column}::text ILIKE $1 || '%' + ORDER BY {text_column} + LIMIT $2 + "#, + text_column = text_column, + safe_table = safe_table + ); + + #[derive(QueryableByName)] + struct SuggestionRow { + #[diesel(sql_type = Text)] + suggestion: String, + } + + let results: Vec = diesel::sql_query(&query) + .bind::(&safe_prefix) + .bind::(limit) + .load(conn) + .map_err(|e| { + error!("Autocomplete error: {e}"); + e.to_string() + })?; + + Ok(results.into_iter().map(|r| r.suggestion).collect()) +} + +/// Get list of text/searchable columns from a table +fn get_searchable_columns(conn: &mut PgConnection, table_name: &str) -> Result, String> { + #[derive(QueryableByName)] + struct ColumnInfo { + #[diesel(sql_type = Text)] + column_name: String, + } + + let query = r#" + SELECT column_name::text + FROM information_schema.columns + WHERE table_name = $1 + AND data_type IN ('character varying', 'varchar', 'text', 'character', 'char', 'name') + AND column_name NOT LIKE '%password%' + AND column_name NOT LIKE '%secret%' + AND column_name NOT LIKE '%token%' + ORDER BY ordinal_position + "#; + + let columns: Vec = diesel::sql_query(query) + .bind::(table_name) + .load(conn) + .map_err(|e| e.to_string())?; + + // Prioritize common search columns + let priority_columns = ["name", "title", "description", "sku", "product_name", "productname"]; + let mut result: Vec = Vec::new(); + + // Add priority columns first + for col in &priority_columns { + if columns.iter().any(|c| c.column_name.to_lowercase() == *col) { + result.push(col.to_string()); + } + } + + // Add remaining columns + for col in columns { + let col_lower = col.column_name.to_lowercase(); + if !result.contains(&col_lower) && !result.contains(&col.column_name) { + result.push(col.column_name); + } + } + + // Limit to first 5 searchable columns for performance + result.truncate(5); + + Ok(result) +} + +/// Get the primary text column for autocomplete (usually name or title) +fn get_primary_text_column(conn: &mut PgConnection, table_name: &str) -> Result { + let columns = get_searchable_columns(conn, table_name)?; + + // Prefer specific column names + let preferred = ["name", "title", "productname", "product_name", "label", "sku"]; + + for pref in &preferred { + if columns.iter().any(|c| c.to_lowercase() == *pref) { + return Ok(pref.to_string()); + } + } + + // Return first available text column + columns + .into_iter() + .next() + .ok_or_else(|| "No text columns found".to_string()) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_sanitize_search_query() { + let query = "test' OR '1'='1"; + let safe = query.replace('\'', "''").replace('\\', "\\\\"); + assert!(!safe.contains("' OR '")); + } +} diff --git a/src/basic/mod.rs b/src/basic/mod.rs index 7e67f93f3..7dde696a8 100644 --- a/src/basic/mod.rs +++ b/src/basic/mod.rs @@ -34,6 +34,7 @@ use self::keywords::create_task::create_task_keyword; use self::keywords::data_operations::register_data_operations; use self::keywords::file_operations::register_file_operations; use self::keywords::find::find_keyword; +use self::keywords::search::search_keyword; use self::keywords::first::first_keyword; use self::keywords::for_next::for_keyword; use self::keywords::format::format_keyword; @@ -86,6 +87,7 @@ impl ScriptService { get_bot_memory_keyword(state.clone(), user.clone(), &mut engine); create_site_keyword(&state, user.clone(), &mut engine); find_keyword(&state, user.clone(), &mut engine); + search_keyword(&state, user.clone(), &mut engine); for_keyword(&state, user.clone(), &mut engine); let _ = register_use_kb_keyword(&mut engine, state.clone(), Arc::new(user.clone())); let _ = register_clear_kb_keyword(&mut engine, state.clone(), Arc::new(user.clone()));