This commit is contained in:
parent
5fc5b8b7b9
commit
7e357d278c
7 changed files with 88 additions and 55 deletions
3
.gitignore
vendored
3
.gitignore
vendored
|
@ -1,3 +1,4 @@
|
|||
target
|
||||
.env
|
||||
*.env
|
||||
*.env
|
||||
work
|
||||
|
|
|
@ -6,6 +6,7 @@ FOR EACH item IN items
|
|||
let website = item.website ?? ""
|
||||
if item.website == "" {
|
||||
website = WEBSITE OF item.company
|
||||
SET "gb.rob", "id="+ item.id, "website=" + website
|
||||
PRINT website
|
||||
}
|
||||
|
||||
|
|
12
src/prompts/business/on-emulator-sent.bas
Normal file
12
src/prompts/business/on-emulator-sent.bas
Normal file
|
@ -0,0 +1,12 @@
|
|||
let items = FIND "gb.rob", "ACTION=EMUL_ASK"
|
||||
FOR EACH item IN items
|
||||
|
||||
let to = item.emailcto
|
||||
let subject = "Sobre o Simulador de AI enviado"
|
||||
let name = FIRST(item.contact)
|
||||
let body = GET "/EMUL-message.html"
|
||||
|
||||
CREATE_DRAFT to, subject, body
|
||||
SET "gb.rob", "id="+ item.id, "ACTION=EMUL_ASKED"
|
||||
WAIT 3000
|
||||
NEXT item
|
|
@ -2,8 +2,7 @@
|
|||
wget https://github.com/ggml-org/llama.cpp/releases/download/b6148/llama-b6148-bin-ubuntu-x64.zip
|
||||
|
||||
wget https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF/resolve/main/tinyllama-1.1b-chat-v1.0.Q4_0.gguf?download=true
|
||||
|
||||
# DeepSeek-R1-Distill-Qwen-1.5B-Q3_K_M.gguf
|
||||
wget https://huggingface.co/bartowski/DeepSeek-R1-Distill-Qwen-1.5B-GGUF/resolve/main/DeepSeek-R1-Distill-Qwen-1.5B-Q3_K_M.gguf
|
||||
# Phi-3.5-mini-instruct-IQ2_M.gguf
|
||||
|
||||
# ./llama-cli -m tinyllama-1.1b-chat-v1.0.Q4_0.gguf --reasoning-budget 0 --reasoning-format none -mli
|
||||
|
|
|
@ -49,6 +49,7 @@ lxc config device add $CONTAINER_NAME bin disk source="${HOST_BIN}" path=/opt/gb
|
|||
lxc config device add $CONTAINER_NAME data disk source="${HOST_DATA}" path=/opt/gbo/data
|
||||
lxc config device add $CONTAINER_NAME conf disk source="${HOST_CONF}" path=/opt/gbo/conf
|
||||
lxc config device add $CONTAINER_NAME logs disk source="${HOST_LOGS}" path=/opt/gbo/logs
|
||||
lxc config device add $CONTAINER_NAME system-proxy disk source="/opt/gbo/tenants/$PARAM_TENANT/proxy" path=/opt/gbo/refs/proxy
|
||||
|
||||
|
||||
lxc config device remove $CONTAINER_NAME proxy 2>/dev/null || true
|
||||
|
|
|
@ -1,8 +1,8 @@
|
|||
export container="pragmatismo-system"
|
||||
export container="pragmatismo-alm-ci"
|
||||
lxc stop "$container"
|
||||
|
||||
lxc config device override "$container" root size=10GB
|
||||
lxc config device set "$container" root size=10GB
|
||||
lxc config device override "$container" root size=15GB
|
||||
lxc config device set "$container" root size=15GB
|
||||
lxc start "$container"
|
||||
ROOT_DEV=$(lxc exec "$container" -- df / --output=source | tail -1)
|
||||
|
||||
|
|
|
@ -5,42 +5,55 @@ use scraper::{Html, Selector};
|
|||
use std::error::Error;
|
||||
|
||||
pub fn get_keyword(_state: &AppState, engine: &mut Engine) {
|
||||
engine
|
||||
.register_custom_syntax(
|
||||
&["GET", "$expr$"],
|
||||
false, // Expression, not statement
|
||||
move |context, inputs| {
|
||||
let url = context.eval_expression_tree(&inputs[0])?;
|
||||
let url_str = url.to_string();
|
||||
let _ = engine.register_custom_syntax(
|
||||
&["GET", "$expr$"],
|
||||
false, // Expression, not statement
|
||||
move |context, inputs| {
|
||||
let url = context.eval_expression_tree(&inputs[0])?;
|
||||
let url_str = url.to_string();
|
||||
|
||||
if url_str.starts_with("https") {
|
||||
println!("HTTPS GET request: {}", url_str);
|
||||
// Prevent path traversal attacks
|
||||
if url_str.contains("..") {
|
||||
return Err("URL contains invalid path traversal sequences like '..'.".into());
|
||||
}
|
||||
|
||||
// Use the same pattern as find_keyword
|
||||
let fut = execute_get(&url_str);
|
||||
let result = tokio::task::block_in_place(|| {
|
||||
tokio::runtime::Handle::current().block_on(fut)
|
||||
})
|
||||
.map_err(|e| format!("HTTP request failed: {}", e))?;
|
||||
let modified_url = if url_str.starts_with("/") {
|
||||
let work_root = std::env::var("WORK_ROOT").unwrap_or_else(|_| "./work".to_string());
|
||||
let full_path = std::path::Path::new(&work_root)
|
||||
.join(url_str.trim_start_matches('/'))
|
||||
.to_string_lossy()
|
||||
.into_owned();
|
||||
|
||||
Ok(Dynamic::from(result))
|
||||
} else {
|
||||
println!("GET executed: {}", url_str);
|
||||
Ok(Dynamic::from(format!("Content from {}", url_str)))
|
||||
let base_url = "file://";
|
||||
format!("{}{}", base_url, full_path)
|
||||
} else {
|
||||
url_str.to_string()
|
||||
};
|
||||
|
||||
if modified_url.starts_with("https://") {
|
||||
println!("HTTPS GET request: {}", modified_url);
|
||||
|
||||
let fut = execute_get(&modified_url);
|
||||
let result =
|
||||
tokio::task::block_in_place(|| tokio::runtime::Handle::current().block_on(fut))
|
||||
.map_err(|e| format!("HTTP request failed: {}", e))?;
|
||||
|
||||
Ok(Dynamic::from(result))
|
||||
} else if modified_url.starts_with("file://") {
|
||||
// Handle file:// URLs
|
||||
let file_path = modified_url.trim_start_matches("file://");
|
||||
match std::fs::read_to_string(file_path) {
|
||||
Ok(content) => Ok(Dynamic::from(content)),
|
||||
Err(e) => Err(format!("Failed to read file: {}", e).into()),
|
||||
}
|
||||
},
|
||||
)
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
pub async fn _execute_get(url: &str) -> Result<String, Box<dyn Error + Send + Sync>> {
|
||||
println!("Starting execute_get with URL: {}", url);
|
||||
|
||||
let response = reqwest::get(url).await?;
|
||||
let content = response.text().await?;
|
||||
|
||||
println!("GET request successful, got {} bytes", content.len());
|
||||
Ok(format!("Secure content fetched: {}", content))
|
||||
} else {
|
||||
Err(
|
||||
format!("GET request failed: URL must begin with 'https://' or 'file://'")
|
||||
.into(),
|
||||
)
|
||||
}
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
pub async fn execute_get(url: &str) -> Result<String, Box<dyn Error + Send + Sync>> {
|
||||
|
@ -54,23 +67,29 @@ pub async fn execute_get(url: &str) -> Result<String, Box<dyn Error + Send + Syn
|
|||
let response = client.get(url).send().await?;
|
||||
let html_content = response.text().await?;
|
||||
|
||||
// Parse HTML and extract text
|
||||
let document = Html::parse_document(&html_content);
|
||||
let selector = Selector::parse("body").unwrap(); // Focus on body content
|
||||
let body = document.select(&selector).next().unwrap();
|
||||
let text_content = body.text().collect::<Vec<_>>().join(" ");
|
||||
// Parse HTML and extract text only if it appears to be HTML
|
||||
if html_content.trim_start().starts_with("<!DOCTYPE html")
|
||||
|| html_content.trim_start().starts_with("<html")
|
||||
{
|
||||
let document = Html::parse_document(&html_content);
|
||||
let selector = Selector::parse("body").unwrap_or_else(|_| Selector::parse("*").unwrap());
|
||||
|
||||
// Clean up the text (remove extra whitespace, newlines, etc.)
|
||||
let cleaned_text = text_content
|
||||
.replace('\n', " ")
|
||||
.replace('\t', " ")
|
||||
.split_whitespace()
|
||||
.collect::<Vec<_>>()
|
||||
.join(" ");
|
||||
let text_content = document
|
||||
.select(&selector)
|
||||
.flat_map(|element| element.text())
|
||||
.collect::<Vec<_>>()
|
||||
.join(" ");
|
||||
|
||||
println!(
|
||||
"GET request successful, extracted {} characters of text",
|
||||
cleaned_text.len()
|
||||
);
|
||||
Ok(cleaned_text)
|
||||
// Clean up the text
|
||||
let cleaned_text = text_content
|
||||
.replace('\n', " ")
|
||||
.replace('\t', " ")
|
||||
.split_whitespace()
|
||||
.collect::<Vec<_>>()
|
||||
.join(" ");
|
||||
|
||||
Ok(cleaned_text)
|
||||
} else {
|
||||
Ok(html_content) // Return plain content as is if not HTML
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue