From 7e357d278cdf1bb502dba6d154844caae91feb9d Mon Sep 17 00:00:00 2001 From: "Rodrigo Rodriguez (Pragmatismo)" Date: Sun, 24 Aug 2025 19:53:58 -0300 Subject: [PATCH] - CRM on-emulator-sent.bas --- .gitignore | 3 +- src/prompts/business/data-enrichment.bas | 1 + src/prompts/business/on-emulator-sent.bas | 12 +++ src/scripts/containers/llm.sh | 3 +- src/scripts/containers/system.sh | 1 + src/scripts/utils/set-size-5GB.sh | 6 +- src/services/keywords/get.rs | 117 +++++++++++++--------- 7 files changed, 88 insertions(+), 55 deletions(-) create mode 100644 src/prompts/business/on-emulator-sent.bas diff --git a/.gitignore b/.gitignore index 5c869ae..5a1fa37 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ target .env -*.env \ No newline at end of file +*.env +work diff --git a/src/prompts/business/data-enrichment.bas b/src/prompts/business/data-enrichment.bas index 124cb59..d752c1f 100644 --- a/src/prompts/business/data-enrichment.bas +++ b/src/prompts/business/data-enrichment.bas @@ -6,6 +6,7 @@ FOR EACH item IN items let website = item.website ?? "" if item.website == "" { website = WEBSITE OF item.company + SET "gb.rob", "id="+ item.id, "website=" + website PRINT website } diff --git a/src/prompts/business/on-emulator-sent.bas b/src/prompts/business/on-emulator-sent.bas new file mode 100644 index 0000000..49107c7 --- /dev/null +++ b/src/prompts/business/on-emulator-sent.bas @@ -0,0 +1,12 @@ +let items = FIND "gb.rob", "ACTION=EMUL_ASK" +FOR EACH item IN items + + let to = item.emailcto + let subject = "Sobre o Simulador de AI enviado" + let name = FIRST(item.contact) + let body = GET "/EMUL-message.html" + + CREATE_DRAFT to, subject, body + SET "gb.rob", "id="+ item.id, "ACTION=EMUL_ASKED" + WAIT 3000 +NEXT item diff --git a/src/scripts/containers/llm.sh b/src/scripts/containers/llm.sh index edd21cb..2d56680 100644 --- a/src/scripts/containers/llm.sh +++ b/src/scripts/containers/llm.sh @@ -2,8 +2,7 @@ wget https://github.com/ggml-org/llama.cpp/releases/download/b6148/llama-b6148-bin-ubuntu-x64.zip wget https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF/resolve/main/tinyllama-1.1b-chat-v1.0.Q4_0.gguf?download=true - -# DeepSeek-R1-Distill-Qwen-1.5B-Q3_K_M.gguf +wget https://huggingface.co/bartowski/DeepSeek-R1-Distill-Qwen-1.5B-GGUF/resolve/main/DeepSeek-R1-Distill-Qwen-1.5B-Q3_K_M.gguf # Phi-3.5-mini-instruct-IQ2_M.gguf # ./llama-cli -m tinyllama-1.1b-chat-v1.0.Q4_0.gguf --reasoning-budget 0 --reasoning-format none -mli diff --git a/src/scripts/containers/system.sh b/src/scripts/containers/system.sh index 226936d..6c2d724 100644 --- a/src/scripts/containers/system.sh +++ b/src/scripts/containers/system.sh @@ -49,6 +49,7 @@ lxc config device add $CONTAINER_NAME bin disk source="${HOST_BIN}" path=/opt/gb lxc config device add $CONTAINER_NAME data disk source="${HOST_DATA}" path=/opt/gbo/data lxc config device add $CONTAINER_NAME conf disk source="${HOST_CONF}" path=/opt/gbo/conf lxc config device add $CONTAINER_NAME logs disk source="${HOST_LOGS}" path=/opt/gbo/logs +lxc config device add $CONTAINER_NAME system-proxy disk source="/opt/gbo/tenants/$PARAM_TENANT/proxy" path=/opt/gbo/refs/proxy lxc config device remove $CONTAINER_NAME proxy 2>/dev/null || true diff --git a/src/scripts/utils/set-size-5GB.sh b/src/scripts/utils/set-size-5GB.sh index ea3c8b1..e2d5de2 100644 --- a/src/scripts/utils/set-size-5GB.sh +++ b/src/scripts/utils/set-size-5GB.sh @@ -1,8 +1,8 @@ -export container="pragmatismo-system" +export container="pragmatismo-alm-ci" lxc stop "$container" -lxc config device override "$container" root size=10GB -lxc config device set "$container" root size=10GB +lxc config device override "$container" root size=15GB +lxc config device set "$container" root size=15GB lxc start "$container" ROOT_DEV=$(lxc exec "$container" -- df / --output=source | tail -1) diff --git a/src/services/keywords/get.rs b/src/services/keywords/get.rs index 185e736..7bddedb 100644 --- a/src/services/keywords/get.rs +++ b/src/services/keywords/get.rs @@ -5,42 +5,55 @@ use scraper::{Html, Selector}; use std::error::Error; pub fn get_keyword(_state: &AppState, engine: &mut Engine) { - engine - .register_custom_syntax( - &["GET", "$expr$"], - false, // Expression, not statement - move |context, inputs| { - let url = context.eval_expression_tree(&inputs[0])?; - let url_str = url.to_string(); + let _ = engine.register_custom_syntax( + &["GET", "$expr$"], + false, // Expression, not statement + move |context, inputs| { + let url = context.eval_expression_tree(&inputs[0])?; + let url_str = url.to_string(); - if url_str.starts_with("https") { - println!("HTTPS GET request: {}", url_str); + // Prevent path traversal attacks + if url_str.contains("..") { + return Err("URL contains invalid path traversal sequences like '..'.".into()); + } - // Use the same pattern as find_keyword - let fut = execute_get(&url_str); - let result = tokio::task::block_in_place(|| { - tokio::runtime::Handle::current().block_on(fut) - }) - .map_err(|e| format!("HTTP request failed: {}", e))?; + let modified_url = if url_str.starts_with("/") { + let work_root = std::env::var("WORK_ROOT").unwrap_or_else(|_| "./work".to_string()); + let full_path = std::path::Path::new(&work_root) + .join(url_str.trim_start_matches('/')) + .to_string_lossy() + .into_owned(); - Ok(Dynamic::from(result)) - } else { - println!("GET executed: {}", url_str); - Ok(Dynamic::from(format!("Content from {}", url_str))) + let base_url = "file://"; + format!("{}{}", base_url, full_path) + } else { + url_str.to_string() + }; + + if modified_url.starts_with("https://") { + println!("HTTPS GET request: {}", modified_url); + + let fut = execute_get(&modified_url); + let result = + tokio::task::block_in_place(|| tokio::runtime::Handle::current().block_on(fut)) + .map_err(|e| format!("HTTP request failed: {}", e))?; + + Ok(Dynamic::from(result)) + } else if modified_url.starts_with("file://") { + // Handle file:// URLs + let file_path = modified_url.trim_start_matches("file://"); + match std::fs::read_to_string(file_path) { + Ok(content) => Ok(Dynamic::from(content)), + Err(e) => Err(format!("Failed to read file: {}", e).into()), } - }, - ) - .unwrap(); -} - -pub async fn _execute_get(url: &str) -> Result> { - println!("Starting execute_get with URL: {}", url); - - let response = reqwest::get(url).await?; - let content = response.text().await?; - - println!("GET request successful, got {} bytes", content.len()); - Ok(format!("Secure content fetched: {}", content)) + } else { + Err( + format!("GET request failed: URL must begin with 'https://' or 'file://'") + .into(), + ) + } + }, + ); } pub async fn execute_get(url: &str) -> Result> { @@ -54,23 +67,29 @@ pub async fn execute_get(url: &str) -> Result>().join(" "); + // Parse HTML and extract text only if it appears to be HTML + if html_content.trim_start().starts_with(">() - .join(" "); + let text_content = document + .select(&selector) + .flat_map(|element| element.text()) + .collect::>() + .join(" "); - println!( - "GET request successful, extracted {} characters of text", - cleaned_text.len() - ); - Ok(cleaned_text) + // Clean up the text + let cleaned_text = text_content + .replace('\n', " ") + .replace('\t', " ") + .split_whitespace() + .collect::>() + .join(" "); + + Ok(cleaned_text) + } else { + Ok(html_content) // Return plain content as is if not HTML + } }