- Script is running!
Some checks failed
GBCI / build (push) Has been cancelled

This commit is contained in:
Rodrigo Rodriguez (Pragmatismo) 2025-07-31 00:36:10 -03:00
parent a3118dcf0a
commit e15da79204
4 changed files with 81 additions and 45 deletions

View file

@ -9,8 +9,8 @@ FOR EACH item IN items
let page = GET website
let prompt = "Create a website for " + item.company + " with the following details: " + page
let alias = LLM "Return a single word for {item.company} like a token, no spaces, no special characters, no numbers, no uppercase letters."
let alias = LLM "Return a single word for " + item.company + " like a token, no spaces, no special characters, no numbers, no uppercase letters."
CREATE SITE item.company + "bot", item.company, website, "site", prompt

View file

@ -1,11 +1,14 @@
use rhai::Dynamic;
use rhai::Engine;
use std::error::Error;
use std::fs;
use std::path::Path;
use crate::services::state::AppState;
use crate::services::utils;
pub fn create_site_keyword(_state: &AppState, engine: &mut Engine) {
pub fn create_site_keyword(state: &AppState, engine: &mut Engine) {
let state_clone = state.clone();
engine
.register_custom_syntax(
&[
@ -19,29 +22,44 @@ pub fn create_site_keyword(_state: &AppState, engine: &mut Engine) {
}
let _name = context.eval_expression_tree(&inputs[0])?;
let _website = context.eval_expression_tree(&inputs[2])?;
let _template = context.eval_expression_tree(&inputs[3])?;
let prompt = context.eval_expression_tree(&inputs[4])?;
let ai_config = state_clone.config.as_ref().expect("Config must be initialized").ai.clone();
// Use the same pattern as find_keyword
let fut = create_site(&ai_config, _name, prompt);
let result =
tokio::task::block_in_place(|| tokio::runtime::Handle::current().block_on(fut))
.map_err(|e| format!("HTTP request failed: {}", e))?;
// Call the LLM to generate the HTML content
let llm_result = context.call_fn::<String>("chat", (prompt.to_string(),))?;
// Create the directory structure
let base_path = "/opt/gbo/tenants/pragmatismo/proxy/data/websites/sites.pragmatismo.com.br";
let site_name = format!("{}", _name.to_string());
let full_path = format!("{}/{}", base_path, site_name);
// Create directory if it doesn't exist
fs::create_dir_all(&full_path).map_err(|e| e.to_string())?;
// Write the HTML file
let index_path = Path::new(&full_path).join("index.html");
fs::write(index_path, llm_result).map_err(|e| e.to_string())?;
println!("Site created at: {}", full_path);
Ok(Dynamic::UNIT)
Ok(Dynamic::from(result))
},
)
.unwrap();
}
}
async fn create_site(
ai_config: &crate::services::config::AIConfig,
_name: Dynamic,
prompt: Dynamic,
) -> Result<String, Box<dyn Error + Send + Sync>> {
// Call the LLM to generate the HTML contents
let llm_result = utils::call_llm(&prompt.to_string(), &ai_config).await?;
// Create the directory structure
let base_path = "/opt/gbo/tenants/pragmatismo/proxy/data/websites/sites.pragmatismo.com.br";
let site_name = format!("{}", _name.to_string());
let full_path = format!("{}/{}", base_path, site_name);
// Create directory if it doesn't exist
fs::create_dir_all(&full_path).map_err(|e| e.to_string())?;
// Write the HTML file
let index_path = Path::new(&full_path).join("index.html");
fs::write(index_path, llm_result).map_err(|e| e.to_string())?;
println!("Site created at: {}", full_path);
Ok(full_path)
}

View file

@ -57,35 +57,28 @@ pub async fn execute_headless_browser_search(
Ok(result)
}
async fn perform_search(
driver: WebDriver,
search_term: &str) -> Result<String, Box<dyn Error + Send + Sync>> {
// Configure the search query
let query = search_term.to_string();
// Navigate to DuckDuckGo
println!("Navigating to DuckDuckGo...");
driver.goto("https://duckduckgo.com").await?;
// Wait for search box and type query
println!("Searching for: {}", query);
let search_input = driver.find(By::Name("q")).await?;
let search_input = driver.find(By::Id("searchbox_input")).await?;
search_input.click().await?;
search_input.send_keys(&query).await?;
search_input.send_keys(search_term).await?;
// Submit search by pressing Enter
search_input.send_keys("\n").await?;
// Wait for results to load
driver.find(By::Css(".result")).await?;
sleep(Duration::from_millis(2000)).await; // Give extra time for JS
// Wait for results to load - using a modern result selector
driver.find(By::Css("[data-testid='result']")).await?;
sleep(Duration::from_millis(2000)).await;
// Extract first result link
// Extract results
let results = extract_search_results(&driver).await?;
if !results.is_empty() {
println!("Found {} results", results.len());
Ok(results[0].clone())
} else {
Ok("No results found".to_string())
@ -97,20 +90,42 @@ async fn extract_search_results(
) -> Result<Vec<String>, Box<dyn Error + Send + Sync>> {
let mut results = Vec::new();
// Try different selectors for search results
// Try different selectors for search results, ordered by most specific to most general
let selectors = [
"a[data-testid='result-title-a']", // Modern DuckDuckGo
".result__a", // Classic DuckDuckGo
"a.result-link", // Alternative
".result a[href]", // Generic result links
// Modern DuckDuckGo (as seen in the HTML)
"a[data-testid='result-title-a']", // Primary result links
"a[data-testid='result-extras-url-link']", // URL links in results
"a.eVNpHGjtxRBq_gLOfGDr", // Class-based selector for result titles
"a.Rn_JXVtoPVAFyGkcaXyK", // Class-based selector for URL links
".ikg2IXiCD14iVX7AdZo1 a", // Heading container links
".OQ_6vPwNhCeusNiEDcGp a", // URL container links
// Fallback selectors
".result__a", // Classic DuckDuckGo
"a.result-link", // Alternative
".result a[href]", // Generic result links
];
for selector in &selectors {
if let Ok(elements) = driver.find_all(By::Css(selector)).await {
for element in elements {
if let Ok(Some(href)) = element.attr("href").await {
if href.starts_with("http") && !href.contains("duckduckgo.com") {
results.push(href);
// Filter out internal and non-http links
if href.starts_with("http")
&& !href.contains("duckduckgo.com")
&& !href.contains("duck.co")
&& !results.contains(&href) {
// Get the display URL for verification
let display_url = if let Ok(text) = element.text().await {
text.trim().to_string()
} else {
String::new()
};
// Only add if it looks like a real result (not an ad or internal link)
if !display_url.is_empty() && !display_url.contains("Ad") {
results.push(href);
}
}
}
}
@ -120,5 +135,8 @@ async fn extract_search_results(
}
}
// Deduplicate results
results.dedup();
Ok(results)
}
}

View file

@ -47,7 +47,7 @@ impl BrowserPool {
let mut caps = DesiredCapabilities::chrome();
caps.set_binary(&self.brave_path)?;
caps.add_chrome_arg("--headless=new")?;
//caps.add_chrome_arg("--headless=new")?;
caps.add_chrome_arg("--disable-gpu")?;
caps.add_chrome_arg("--no-sandbox")?;
@ -149,7 +149,7 @@ async fn setup_chromedriver() -> Result<String, Box<dyn std::error::Error>> {
// Extract the zip to a temporary directory first
let mut temp_extract_dir = std::env::temp_dir();
temp_extract_dir.push("chromedriver_extract");
let mut temp_extract_dir1 = temp_extract_dir.clone();
let temp_extract_dir1 = temp_extract_dir.clone();
// Clean up any previous extraction
let _ = fs::remove_dir_all(&temp_extract_dir).await;