This commit is contained in:
parent
a3118dcf0a
commit
e15da79204
4 changed files with 81 additions and 45 deletions
|
@ -10,7 +10,7 @@ FOR EACH item IN items
|
||||||
|
|
||||||
let prompt = "Create a website for " + item.company + " with the following details: " + page
|
let prompt = "Create a website for " + item.company + " with the following details: " + page
|
||||||
|
|
||||||
let alias = LLM "Return a single word for {item.company} like a token, no spaces, no special characters, no numbers, no uppercase letters."
|
let alias = LLM "Return a single word for " + item.company + " like a token, no spaces, no special characters, no numbers, no uppercase letters."
|
||||||
|
|
||||||
CREATE SITE item.company + "bot", item.company, website, "site", prompt
|
CREATE SITE item.company + "bot", item.company, website, "site", prompt
|
||||||
|
|
||||||
|
|
|
@ -1,11 +1,14 @@
|
||||||
use rhai::Dynamic;
|
use rhai::Dynamic;
|
||||||
use rhai::Engine;
|
use rhai::Engine;
|
||||||
|
use std::error::Error;
|
||||||
use std::fs;
|
use std::fs;
|
||||||
use std::path::Path;
|
use std::path::Path;
|
||||||
|
|
||||||
use crate::services::state::AppState;
|
use crate::services::state::AppState;
|
||||||
|
use crate::services::utils;
|
||||||
|
|
||||||
pub fn create_site_keyword(_state: &AppState, engine: &mut Engine) {
|
pub fn create_site_keyword(state: &AppState, engine: &mut Engine) {
|
||||||
|
let state_clone = state.clone();
|
||||||
engine
|
engine
|
||||||
.register_custom_syntax(
|
.register_custom_syntax(
|
||||||
&[
|
&[
|
||||||
|
@ -23,25 +26,40 @@ pub fn create_site_keyword(_state: &AppState, engine: &mut Engine) {
|
||||||
let _website = context.eval_expression_tree(&inputs[2])?;
|
let _website = context.eval_expression_tree(&inputs[2])?;
|
||||||
let _template = context.eval_expression_tree(&inputs[3])?;
|
let _template = context.eval_expression_tree(&inputs[3])?;
|
||||||
let prompt = context.eval_expression_tree(&inputs[4])?;
|
let prompt = context.eval_expression_tree(&inputs[4])?;
|
||||||
|
let ai_config = state_clone.config.as_ref().expect("Config must be initialized").ai.clone();
|
||||||
|
// Use the same pattern as find_keyword
|
||||||
|
let fut = create_site(&ai_config, _name, prompt);
|
||||||
|
let result =
|
||||||
|
tokio::task::block_in_place(|| tokio::runtime::Handle::current().block_on(fut))
|
||||||
|
.map_err(|e| format!("HTTP request failed: {}", e))?;
|
||||||
|
|
||||||
// Call the LLM to generate the HTML content
|
Ok(Dynamic::from(result))
|
||||||
let llm_result = context.call_fn::<String>("chat", (prompt.to_string(),))?;
|
|
||||||
|
|
||||||
// Create the directory structure
|
|
||||||
let base_path = "/opt/gbo/tenants/pragmatismo/proxy/data/websites/sites.pragmatismo.com.br";
|
|
||||||
let site_name = format!("{}", _name.to_string());
|
|
||||||
let full_path = format!("{}/{}", base_path, site_name);
|
|
||||||
|
|
||||||
// Create directory if it doesn't exist
|
|
||||||
fs::create_dir_all(&full_path).map_err(|e| e.to_string())?;
|
|
||||||
|
|
||||||
// Write the HTML file
|
|
||||||
let index_path = Path::new(&full_path).join("index.html");
|
|
||||||
fs::write(index_path, llm_result).map_err(|e| e.to_string())?;
|
|
||||||
|
|
||||||
println!("Site created at: {}", full_path);
|
|
||||||
Ok(Dynamic::UNIT)
|
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async fn create_site(
|
||||||
|
ai_config: &crate::services::config::AIConfig,
|
||||||
|
_name: Dynamic,
|
||||||
|
prompt: Dynamic,
|
||||||
|
) -> Result<String, Box<dyn Error + Send + Sync>> {
|
||||||
|
|
||||||
|
// Call the LLM to generate the HTML contents
|
||||||
|
let llm_result = utils::call_llm(&prompt.to_string(), &ai_config).await?;
|
||||||
|
|
||||||
|
// Create the directory structure
|
||||||
|
let base_path = "/opt/gbo/tenants/pragmatismo/proxy/data/websites/sites.pragmatismo.com.br";
|
||||||
|
let site_name = format!("{}", _name.to_string());
|
||||||
|
let full_path = format!("{}/{}", base_path, site_name);
|
||||||
|
|
||||||
|
// Create directory if it doesn't exist
|
||||||
|
fs::create_dir_all(&full_path).map_err(|e| e.to_string())?;
|
||||||
|
|
||||||
|
// Write the HTML file
|
||||||
|
let index_path = Path::new(&full_path).join("index.html");
|
||||||
|
fs::write(index_path, llm_result).map_err(|e| e.to_string())?;
|
||||||
|
|
||||||
|
println!("Site created at: {}", full_path);
|
||||||
|
Ok(full_path)
|
||||||
|
}
|
||||||
|
|
|
@ -57,35 +57,28 @@ pub async fn execute_headless_browser_search(
|
||||||
|
|
||||||
Ok(result)
|
Ok(result)
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn perform_search(
|
async fn perform_search(
|
||||||
driver: WebDriver,
|
driver: WebDriver,
|
||||||
search_term: &str) -> Result<String, Box<dyn Error + Send + Sync>> {
|
search_term: &str) -> Result<String, Box<dyn Error + Send + Sync>> {
|
||||||
// Configure the search query
|
|
||||||
let query = search_term.to_string();
|
|
||||||
|
|
||||||
// Navigate to DuckDuckGo
|
// Navigate to DuckDuckGo
|
||||||
println!("Navigating to DuckDuckGo...");
|
|
||||||
driver.goto("https://duckduckgo.com").await?;
|
driver.goto("https://duckduckgo.com").await?;
|
||||||
|
|
||||||
// Wait for search box and type query
|
// Wait for search box and type query
|
||||||
println!("Searching for: {}", query);
|
let search_input = driver.find(By::Id("searchbox_input")).await?;
|
||||||
let search_input = driver.find(By::Name("q")).await?;
|
|
||||||
search_input.click().await?;
|
search_input.click().await?;
|
||||||
search_input.send_keys(&query).await?;
|
search_input.send_keys(search_term).await?;
|
||||||
|
|
||||||
// Submit search by pressing Enter
|
// Submit search by pressing Enter
|
||||||
search_input.send_keys("\n").await?;
|
search_input.send_keys("\n").await?;
|
||||||
|
|
||||||
// Wait for results to load
|
// Wait for results to load - using a modern result selector
|
||||||
driver.find(By::Css(".result")).await?;
|
driver.find(By::Css("[data-testid='result']")).await?;
|
||||||
sleep(Duration::from_millis(2000)).await; // Give extra time for JS
|
sleep(Duration::from_millis(2000)).await;
|
||||||
|
|
||||||
// Extract first result link
|
// Extract results
|
||||||
let results = extract_search_results(&driver).await?;
|
let results = extract_search_results(&driver).await?;
|
||||||
|
|
||||||
if !results.is_empty() {
|
if !results.is_empty() {
|
||||||
println!("Found {} results", results.len());
|
|
||||||
Ok(results[0].clone())
|
Ok(results[0].clone())
|
||||||
} else {
|
} else {
|
||||||
Ok("No results found".to_string())
|
Ok("No results found".to_string())
|
||||||
|
@ -97,20 +90,42 @@ async fn extract_search_results(
|
||||||
) -> Result<Vec<String>, Box<dyn Error + Send + Sync>> {
|
) -> Result<Vec<String>, Box<dyn Error + Send + Sync>> {
|
||||||
let mut results = Vec::new();
|
let mut results = Vec::new();
|
||||||
|
|
||||||
// Try different selectors for search results
|
// Try different selectors for search results, ordered by most specific to most general
|
||||||
let selectors = [
|
let selectors = [
|
||||||
"a[data-testid='result-title-a']", // Modern DuckDuckGo
|
// Modern DuckDuckGo (as seen in the HTML)
|
||||||
".result__a", // Classic DuckDuckGo
|
"a[data-testid='result-title-a']", // Primary result links
|
||||||
"a.result-link", // Alternative
|
"a[data-testid='result-extras-url-link']", // URL links in results
|
||||||
".result a[href]", // Generic result links
|
"a.eVNpHGjtxRBq_gLOfGDr", // Class-based selector for result titles
|
||||||
|
"a.Rn_JXVtoPVAFyGkcaXyK", // Class-based selector for URL links
|
||||||
|
".ikg2IXiCD14iVX7AdZo1 a", // Heading container links
|
||||||
|
".OQ_6vPwNhCeusNiEDcGp a", // URL container links
|
||||||
|
// Fallback selectors
|
||||||
|
".result__a", // Classic DuckDuckGo
|
||||||
|
"a.result-link", // Alternative
|
||||||
|
".result a[href]", // Generic result links
|
||||||
];
|
];
|
||||||
|
|
||||||
for selector in &selectors {
|
for selector in &selectors {
|
||||||
if let Ok(elements) = driver.find_all(By::Css(selector)).await {
|
if let Ok(elements) = driver.find_all(By::Css(selector)).await {
|
||||||
for element in elements {
|
for element in elements {
|
||||||
if let Ok(Some(href)) = element.attr("href").await {
|
if let Ok(Some(href)) = element.attr("href").await {
|
||||||
if href.starts_with("http") && !href.contains("duckduckgo.com") {
|
// Filter out internal and non-http links
|
||||||
results.push(href);
|
if href.starts_with("http")
|
||||||
|
&& !href.contains("duckduckgo.com")
|
||||||
|
&& !href.contains("duck.co")
|
||||||
|
&& !results.contains(&href) {
|
||||||
|
|
||||||
|
// Get the display URL for verification
|
||||||
|
let display_url = if let Ok(text) = element.text().await {
|
||||||
|
text.trim().to_string()
|
||||||
|
} else {
|
||||||
|
String::new()
|
||||||
|
};
|
||||||
|
|
||||||
|
// Only add if it looks like a real result (not an ad or internal link)
|
||||||
|
if !display_url.is_empty() && !display_url.contains("Ad") {
|
||||||
|
results.push(href);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -120,5 +135,8 @@ async fn extract_search_results(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Deduplicate results
|
||||||
|
results.dedup();
|
||||||
|
|
||||||
Ok(results)
|
Ok(results)
|
||||||
}
|
}
|
|
@ -47,7 +47,7 @@ impl BrowserPool {
|
||||||
|
|
||||||
let mut caps = DesiredCapabilities::chrome();
|
let mut caps = DesiredCapabilities::chrome();
|
||||||
caps.set_binary(&self.brave_path)?;
|
caps.set_binary(&self.brave_path)?;
|
||||||
caps.add_chrome_arg("--headless=new")?;
|
//caps.add_chrome_arg("--headless=new")?;
|
||||||
caps.add_chrome_arg("--disable-gpu")?;
|
caps.add_chrome_arg("--disable-gpu")?;
|
||||||
caps.add_chrome_arg("--no-sandbox")?;
|
caps.add_chrome_arg("--no-sandbox")?;
|
||||||
|
|
||||||
|
@ -149,7 +149,7 @@ async fn setup_chromedriver() -> Result<String, Box<dyn std::error::Error>> {
|
||||||
// Extract the zip to a temporary directory first
|
// Extract the zip to a temporary directory first
|
||||||
let mut temp_extract_dir = std::env::temp_dir();
|
let mut temp_extract_dir = std::env::temp_dir();
|
||||||
temp_extract_dir.push("chromedriver_extract");
|
temp_extract_dir.push("chromedriver_extract");
|
||||||
let mut temp_extract_dir1 = temp_extract_dir.clone();
|
let temp_extract_dir1 = temp_extract_dir.clone();
|
||||||
|
|
||||||
// Clean up any previous extraction
|
// Clean up any previous extraction
|
||||||
let _ = fs::remove_dir_all(&temp_extract_dir).await;
|
let _ = fs::remove_dir_all(&temp_extract_dir).await;
|
||||||
|
|
Loading…
Add table
Reference in a new issue