- Script is running!

2025-07-31 00:36:10 -03:00 · 2025-07-31 00:36:10 -03:00 · e15da79204
commit e15da79204
parent a3118dcf0a
4 changed files with 81 additions and 45 deletions
--- a/src/prompts/business/data-enrichment.bas
+++ b/src/prompts/business/data-enrichment.bas
@ -10,7 +10,7 @@ FOR EACH item IN items
    let prompt = "Create a website for " + item.company + " with the following details: " + page
-    let alias = LLM "Return a single word for {item.company} like a token, no spaces, no special characters, no numbers, no uppercase letters." 
+    let alias = LLM "Return a single word for " + item.company + " like a token, no spaces, no special characters, no numbers, no uppercase letters."
    CREATE SITE item.company + "bot", item.company, website, "site", prompt 
--- a/src/services/keywords/create_site.rs
+++ b/src/services/keywords/create_site.rs
@ -1,11 +1,14 @@
 use rhai::Dynamic;
 use rhai::Engine;
 use std::error::Error;
 use std::fs;
 use std::path::Path;
 use crate::services::state::AppState;
 use crate::services::utils;
-pub fn create_site_keyword(_state: &AppState, engine: &mut Engine) {
+pub fn create_site_keyword(state: &AppState, engine: &mut Engine) {
    let state_clone = state.clone();
    engine
        .register_custom_syntax(
            &[
@ -23,25 +26,40 @@ pub fn create_site_keyword(_state: &AppState, engine: &mut Engine) {
                let _website = context.eval_expression_tree(&inputs[2])?;
                let _template = context.eval_expression_tree(&inputs[3])?;
                let prompt = context.eval_expression_tree(&inputs[4])?;
                let ai_config = state_clone.config.as_ref().expect("Config must be initialized").ai.clone();
                // Use the same pattern as find_keyword
                let fut = create_site(&ai_config, _name, prompt);
                let result =
                    tokio::task::block_in_place(|| tokio::runtime::Handle::current().block_on(fut))
                        .map_err(|e| format!("HTTP request failed: {}", e))?;
-                // Call the LLM to generate the HTML content
+                Ok(Dynamic::from(result))
                let llm_result = context.call_fn::<String>("chat", (prompt.to_string(),))?;
                // Create the directory structure
                let base_path = "/opt/gbo/tenants/pragmatismo/proxy/data/websites/sites.pragmatismo.com.br";
                let site_name = format!("{}", _name.to_string());
                let full_path = format!("{}/{}", base_path, site_name);
                // Create directory if it doesn't exist
                fs::create_dir_all(&full_path).map_err(|e| e.to_string())?;
                // Write the HTML file
                let index_path = Path::new(&full_path).join("index.html");
                fs::write(index_path, llm_result).map_err(|e| e.to_string())?;
                println!("Site created at: {}", full_path);
                Ok(Dynamic::UNIT)
            },
        )
        .unwrap();
 }
 async fn create_site(
    ai_config: &crate::services::config::AIConfig,
    _name: Dynamic,
    prompt: Dynamic,
 )  -> Result<String, Box<dyn Error + Send + Sync>> {
    // Call the LLM to generate the HTML contents
    let llm_result = utils::call_llm(&prompt.to_string(), &ai_config).await?;
    // Create the directory structure
    let base_path = "/opt/gbo/tenants/pragmatismo/proxy/data/websites/sites.pragmatismo.com.br";
    let site_name = format!("{}", _name.to_string());
    let full_path = format!("{}/{}", base_path, site_name);
    // Create directory if it doesn't exist
    fs::create_dir_all(&full_path).map_err(|e| e.to_string())?;
    // Write the HTML file
    let index_path = Path::new(&full_path).join("index.html");
    fs::write(index_path, llm_result).map_err(|e| e.to_string())?;
    println!("Site created at: {}", full_path);
    Ok(full_path)
 }
--- a/src/services/keywords/get_website.rs
+++ b/src/services/keywords/get_website.rs
@ -57,35 +57,28 @@ pub async fn execute_headless_browser_search(
    Ok(result)
 }
 async fn perform_search(
    driver: WebDriver,
    search_term: &str) -> Result<String, Box<dyn Error + Send + Sync>> {
    // Configure the search query
    let query = search_term.to_string();
    // Navigate to DuckDuckGo
    println!("Navigating to DuckDuckGo...");
    driver.goto("https://duckduckgo.com").await?;
    // Wait for search box and type query
-    println!("Searching for: {}", query);
+    let search_input = driver.find(By::Id("searchbox_input")).await?;
    let search_input = driver.find(By::Name("q")).await?;
    search_input.click().await?;
-    search_input.send_keys(&query).await?;
+    search_input.send_keys(search_term).await?;
    // Submit search by pressing Enter
    search_input.send_keys("\n").await?;
-    // Wait for results to load
+    // Wait for results to load - using a modern result selector
-    driver.find(By::Css(".result")).await?;
+    driver.find(By::Css("[data-testid='result']")).await?;
-    sleep(Duration::from_millis(2000)).await; // Give extra time for JS
+    sleep(Duration::from_millis(2000)).await;
-    // Extract first result link
+    // Extract results
    let results = extract_search_results(&driver).await?;
    if !results.is_empty() {
        println!("Found {} results", results.len());
        Ok(results[0].clone())
    } else {
        Ok("No results found".to_string())
@ -97,20 +90,42 @@ async fn extract_search_results(
 ) -> Result<Vec<String>, Box<dyn Error + Send + Sync>> {
    let mut results = Vec::new();
-    // Try different selectors for search results
+    // Try different selectors for search results, ordered by most specific to most general
    let selectors = [
-        "a[data-testid='result-title-a']", // Modern DuckDuckGo
+        // Modern DuckDuckGo (as seen in the HTML)
-        ".result__a",                      // Classic DuckDuckGo
+        "a[data-testid='result-title-a']", // Primary result links
-        "a.result-link",                   // Alternative
+        "a[data-testid='result-extras-url-link']", // URL links in results
-        ".result a[href]",                 // Generic result links
+        "a.eVNpHGjtxRBq_gLOfGDr", // Class-based selector for result titles
        "a.Rn_JXVtoPVAFyGkcaXyK", // Class-based selector for URL links
        ".ikg2IXiCD14iVX7AdZo1 a", // Heading container links
        ".OQ_6vPwNhCeusNiEDcGp a", // URL container links
        // Fallback selectors
        ".result__a", // Classic DuckDuckGo
        "a.result-link", // Alternative
        ".result a[href]", // Generic result links
    ];
    for selector in &selectors {
        if let Ok(elements) = driver.find_all(By::Css(selector)).await {
            for element in elements {
                if let Ok(Some(href)) = element.attr("href").await {
-                    if href.starts_with("http") && !href.contains("duckduckgo.com") {
+                    // Filter out internal and non-http links
-                        results.push(href);
+                    if href.starts_with("http") 
                        && !href.contains("duckduckgo.com")
                        && !href.contains("duck.co")
                        && !results.contains(&href) {
                        // Get the display URL for verification
                        let display_url = if let Ok(text) = element.text().await {
                            text.trim().to_string()
                        } else {
                            String::new()
                        };
                        // Only add if it looks like a real result (not an ad or internal link)
                        if !display_url.is_empty() && !display_url.contains("Ad") {
                            results.push(href);
                        }
                    }
                }
            }
@ -120,5 +135,8 @@ async fn extract_search_results(
        }
    }
    // Deduplicate results
    results.dedup();
    Ok(results)
 }
--- a/src/services/web_automation.rs
+++ b/src/services/web_automation.rs
@ -47,7 +47,7 @@ impl BrowserPool {
        let mut caps = DesiredCapabilities::chrome();
        caps.set_binary(&self.brave_path)?;
-        caps.add_chrome_arg("--headless=new")?;
+        //caps.add_chrome_arg("--headless=new")?;
        caps.add_chrome_arg("--disable-gpu")?;
        caps.add_chrome_arg("--no-sandbox")?;
@ -149,7 +149,7 @@ async fn setup_chromedriver() -> Result<String, Box<dyn std::error::Error>> {
        // Extract the zip to a temporary directory first
        let mut temp_extract_dir = std::env::temp_dir();
        temp_extract_dir.push("chromedriver_extract");
-        let mut temp_extract_dir1 = temp_extract_dir.clone();
+        let temp_extract_dir1 = temp_extract_dir.clone();
        // Clean up any previous extraction
        let _ = fs::remove_dir_all(&temp_extract_dir).await;