This commit is contained in:
parent
a3118dcf0a
commit
e15da79204
4 changed files with 81 additions and 45 deletions
|
@ -9,8 +9,8 @@ FOR EACH item IN items
|
|||
let page = GET website
|
||||
|
||||
let prompt = "Create a website for " + item.company + " with the following details: " + page
|
||||
|
||||
let alias = LLM "Return a single word for {item.company} like a token, no spaces, no special characters, no numbers, no uppercase letters."
|
||||
|
||||
let alias = LLM "Return a single word for " + item.company + " like a token, no spaces, no special characters, no numbers, no uppercase letters."
|
||||
|
||||
CREATE SITE item.company + "bot", item.company, website, "site", prompt
|
||||
|
||||
|
|
|
@ -1,11 +1,14 @@
|
|||
use rhai::Dynamic;
|
||||
use rhai::Engine;
|
||||
use std::error::Error;
|
||||
use std::fs;
|
||||
use std::path::Path;
|
||||
|
||||
use crate::services::state::AppState;
|
||||
use crate::services::utils;
|
||||
|
||||
pub fn create_site_keyword(_state: &AppState, engine: &mut Engine) {
|
||||
pub fn create_site_keyword(state: &AppState, engine: &mut Engine) {
|
||||
let state_clone = state.clone();
|
||||
engine
|
||||
.register_custom_syntax(
|
||||
&[
|
||||
|
@ -19,29 +22,44 @@ pub fn create_site_keyword(_state: &AppState, engine: &mut Engine) {
|
|||
}
|
||||
|
||||
let _name = context.eval_expression_tree(&inputs[0])?;
|
||||
|
||||
|
||||
let _website = context.eval_expression_tree(&inputs[2])?;
|
||||
let _template = context.eval_expression_tree(&inputs[3])?;
|
||||
let prompt = context.eval_expression_tree(&inputs[4])?;
|
||||
let ai_config = state_clone.config.as_ref().expect("Config must be initialized").ai.clone();
|
||||
// Use the same pattern as find_keyword
|
||||
let fut = create_site(&ai_config, _name, prompt);
|
||||
let result =
|
||||
tokio::task::block_in_place(|| tokio::runtime::Handle::current().block_on(fut))
|
||||
.map_err(|e| format!("HTTP request failed: {}", e))?;
|
||||
|
||||
// Call the LLM to generate the HTML content
|
||||
let llm_result = context.call_fn::<String>("chat", (prompt.to_string(),))?;
|
||||
|
||||
// Create the directory structure
|
||||
let base_path = "/opt/gbo/tenants/pragmatismo/proxy/data/websites/sites.pragmatismo.com.br";
|
||||
let site_name = format!("{}", _name.to_string());
|
||||
let full_path = format!("{}/{}", base_path, site_name);
|
||||
|
||||
// Create directory if it doesn't exist
|
||||
fs::create_dir_all(&full_path).map_err(|e| e.to_string())?;
|
||||
|
||||
// Write the HTML file
|
||||
let index_path = Path::new(&full_path).join("index.html");
|
||||
fs::write(index_path, llm_result).map_err(|e| e.to_string())?;
|
||||
|
||||
println!("Site created at: {}", full_path);
|
||||
Ok(Dynamic::UNIT)
|
||||
Ok(Dynamic::from(result))
|
||||
},
|
||||
)
|
||||
.unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
async fn create_site(
|
||||
ai_config: &crate::services::config::AIConfig,
|
||||
_name: Dynamic,
|
||||
prompt: Dynamic,
|
||||
) -> Result<String, Box<dyn Error + Send + Sync>> {
|
||||
|
||||
// Call the LLM to generate the HTML contents
|
||||
let llm_result = utils::call_llm(&prompt.to_string(), &ai_config).await?;
|
||||
|
||||
// Create the directory structure
|
||||
let base_path = "/opt/gbo/tenants/pragmatismo/proxy/data/websites/sites.pragmatismo.com.br";
|
||||
let site_name = format!("{}", _name.to_string());
|
||||
let full_path = format!("{}/{}", base_path, site_name);
|
||||
|
||||
// Create directory if it doesn't exist
|
||||
fs::create_dir_all(&full_path).map_err(|e| e.to_string())?;
|
||||
|
||||
// Write the HTML file
|
||||
let index_path = Path::new(&full_path).join("index.html");
|
||||
fs::write(index_path, llm_result).map_err(|e| e.to_string())?;
|
||||
|
||||
println!("Site created at: {}", full_path);
|
||||
Ok(full_path)
|
||||
}
|
||||
|
|
|
@ -57,35 +57,28 @@ pub async fn execute_headless_browser_search(
|
|||
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
async fn perform_search(
|
||||
driver: WebDriver,
|
||||
search_term: &str) -> Result<String, Box<dyn Error + Send + Sync>> {
|
||||
// Configure the search query
|
||||
let query = search_term.to_string();
|
||||
|
||||
// Navigate to DuckDuckGo
|
||||
println!("Navigating to DuckDuckGo...");
|
||||
driver.goto("https://duckduckgo.com").await?;
|
||||
|
||||
// Wait for search box and type query
|
||||
println!("Searching for: {}", query);
|
||||
let search_input = driver.find(By::Name("q")).await?;
|
||||
let search_input = driver.find(By::Id("searchbox_input")).await?;
|
||||
search_input.click().await?;
|
||||
search_input.send_keys(&query).await?;
|
||||
search_input.send_keys(search_term).await?;
|
||||
|
||||
// Submit search by pressing Enter
|
||||
search_input.send_keys("\n").await?;
|
||||
|
||||
// Wait for results to load
|
||||
driver.find(By::Css(".result")).await?;
|
||||
sleep(Duration::from_millis(2000)).await; // Give extra time for JS
|
||||
// Wait for results to load - using a modern result selector
|
||||
driver.find(By::Css("[data-testid='result']")).await?;
|
||||
sleep(Duration::from_millis(2000)).await;
|
||||
|
||||
// Extract first result link
|
||||
// Extract results
|
||||
let results = extract_search_results(&driver).await?;
|
||||
|
||||
if !results.is_empty() {
|
||||
println!("Found {} results", results.len());
|
||||
Ok(results[0].clone())
|
||||
} else {
|
||||
Ok("No results found".to_string())
|
||||
|
@ -97,20 +90,42 @@ async fn extract_search_results(
|
|||
) -> Result<Vec<String>, Box<dyn Error + Send + Sync>> {
|
||||
let mut results = Vec::new();
|
||||
|
||||
// Try different selectors for search results
|
||||
// Try different selectors for search results, ordered by most specific to most general
|
||||
let selectors = [
|
||||
"a[data-testid='result-title-a']", // Modern DuckDuckGo
|
||||
".result__a", // Classic DuckDuckGo
|
||||
"a.result-link", // Alternative
|
||||
".result a[href]", // Generic result links
|
||||
// Modern DuckDuckGo (as seen in the HTML)
|
||||
"a[data-testid='result-title-a']", // Primary result links
|
||||
"a[data-testid='result-extras-url-link']", // URL links in results
|
||||
"a.eVNpHGjtxRBq_gLOfGDr", // Class-based selector for result titles
|
||||
"a.Rn_JXVtoPVAFyGkcaXyK", // Class-based selector for URL links
|
||||
".ikg2IXiCD14iVX7AdZo1 a", // Heading container links
|
||||
".OQ_6vPwNhCeusNiEDcGp a", // URL container links
|
||||
// Fallback selectors
|
||||
".result__a", // Classic DuckDuckGo
|
||||
"a.result-link", // Alternative
|
||||
".result a[href]", // Generic result links
|
||||
];
|
||||
|
||||
for selector in &selectors {
|
||||
if let Ok(elements) = driver.find_all(By::Css(selector)).await {
|
||||
for element in elements {
|
||||
if let Ok(Some(href)) = element.attr("href").await {
|
||||
if href.starts_with("http") && !href.contains("duckduckgo.com") {
|
||||
results.push(href);
|
||||
// Filter out internal and non-http links
|
||||
if href.starts_with("http")
|
||||
&& !href.contains("duckduckgo.com")
|
||||
&& !href.contains("duck.co")
|
||||
&& !results.contains(&href) {
|
||||
|
||||
// Get the display URL for verification
|
||||
let display_url = if let Ok(text) = element.text().await {
|
||||
text.trim().to_string()
|
||||
} else {
|
||||
String::new()
|
||||
};
|
||||
|
||||
// Only add if it looks like a real result (not an ad or internal link)
|
||||
if !display_url.is_empty() && !display_url.contains("Ad") {
|
||||
results.push(href);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -120,5 +135,8 @@ async fn extract_search_results(
|
|||
}
|
||||
}
|
||||
|
||||
// Deduplicate results
|
||||
results.dedup();
|
||||
|
||||
Ok(results)
|
||||
}
|
||||
}
|
|
@ -47,7 +47,7 @@ impl BrowserPool {
|
|||
|
||||
let mut caps = DesiredCapabilities::chrome();
|
||||
caps.set_binary(&self.brave_path)?;
|
||||
caps.add_chrome_arg("--headless=new")?;
|
||||
//caps.add_chrome_arg("--headless=new")?;
|
||||
caps.add_chrome_arg("--disable-gpu")?;
|
||||
caps.add_chrome_arg("--no-sandbox")?;
|
||||
|
||||
|
@ -149,7 +149,7 @@ async fn setup_chromedriver() -> Result<String, Box<dyn std::error::Error>> {
|
|||
// Extract the zip to a temporary directory first
|
||||
let mut temp_extract_dir = std::env::temp_dir();
|
||||
temp_extract_dir.push("chromedriver_extract");
|
||||
let mut temp_extract_dir1 = temp_extract_dir.clone();
|
||||
let temp_extract_dir1 = temp_extract_dir.clone();
|
||||
|
||||
// Clean up any previous extraction
|
||||
let _ = fs::remove_dir_all(&temp_extract_dir).await;
|
||||
|
|
Loading…
Add table
Reference in a new issue