feat: remove unused dependencies and clean up Cargo.lock

Removed several unused dependencies from Cargo.lock including:
- auto_generate_cdp
- headless_chrome
- scraper
- cssparser and related crates
- dtoa and dtoa-short
- string_cache and related crates
- tendril
- tungstenite 0.27.0

Also updated ureq dependency to single version (removed duplicate entry). This cleanup reduces the dependency tree and removes unused code.
This commit is contained in:
Rodrigo Rodriguez (Pragmatismo) 2025-11-05 13:46:03 -03:00
parent 45e4a5e735
commit c7fbb46e49
11 changed files with 6 additions and 1130 deletions

436
Cargo.lock generated
View file

@ -508,20 +508,6 @@ version = "1.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1505bd5d3d116872e7271a6d4e16d81d0c8570876c8de68093a09ac269d8aac0"
[[package]]
name = "auto_generate_cdp"
version = "0.4.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d6e1961a0d5d77969057eba90d448e610d3c439024d135d9dbd98e33ec973520"
dependencies = [
"convert_case",
"proc-macro2",
"quote",
"serde",
"serde_json",
"ureq 2.12.1",
]
[[package]]
name = "autocfg"
version = "1.5.0"
@ -1127,7 +1113,6 @@ dependencies = [
"env_logger",
"futures",
"futures-util",
"headless_chrome",
"hmac",
"imap",
"include_dir",
@ -1146,7 +1131,6 @@ dependencies = [
"regex",
"reqwest",
"rhai",
"scraper",
"serde",
"serde_json",
"sha2",
@ -1158,7 +1142,7 @@ dependencies = [
"tokio-stream",
"tracing",
"tracing-subscriber",
"ureq 3.1.2",
"ureq",
"urlencoding",
"uuid",
"zip 2.4.2",
@ -1632,29 +1616,6 @@ dependencies = [
"typenum",
]
[[package]]
name = "cssparser"
version = "0.31.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5b3df4f93e5fbbe73ec01ec8d3f68bba73107993a5b1e7519273c32db9b0d5be"
dependencies = [
"cssparser-macros",
"dtoa-short",
"itoa",
"phf 0.11.3",
"smallvec",
]
[[package]]
name = "cssparser-macros"
version = "0.6.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "13b588ba4ac1a99f7f2964d24b3d896ddc6bf847ee3855dbd4366f058cfcd331"
dependencies = [
"quote",
"syn",
]
[[package]]
name = "csv"
version = "1.4.0"
@ -2024,21 +1985,6 @@ dependencies = [
"syn",
]
[[package]]
name = "dtoa"
version = "1.0.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d6add3b8cff394282be81f3fc1a0605db594ed69890078ca6e2cab1c408bcf04"
[[package]]
name = "dtoa-short"
version = "0.3.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cd1511a7b6a56299bd043a9c167a6d2bfb37bf84a6dfceaba651168adfb43c87"
dependencies = [
"dtoa",
]
[[package]]
name = "dunce"
version = "1.0.5"
@ -2066,12 +2012,6 @@ dependencies = [
"signature",
]
[[package]]
name = "ego-tree"
version = "0.6.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "12a0bb14ac04a9fcf170d0bbbef949b44cc492f4452bd20c095636956f653642"
[[package]]
name = "either"
version = "1.15.0"
@ -2139,12 +2079,6 @@ dependencies = [
"regex",
]
[[package]]
name = "env_home"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c7f84e12ccf0a7ddc17a6c41c93326024c42920d7ee630d04950e6926645c0fe"
[[package]]
name = "env_logger"
version = "0.11.8"
@ -2279,16 +2213,6 @@ version = "1.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c"
[[package]]
name = "futf"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "df420e2e84819663797d1ec6544b13c5be84629e7bb00dc960d6917db2987843"
dependencies = [
"mac",
"new_debug_unreachable",
]
[[package]]
name = "futures"
version = "0.3.31"
@ -2378,15 +2302,6 @@ dependencies = [
"slab",
]
[[package]]
name = "fxhash"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c31b6d751ae2c7f11320402d34e41349dd1016f8d5d45e48c4312bc8625af50c"
dependencies = [
"byteorder",
]
[[package]]
name = "generic-array"
version = "0.14.9"
@ -2397,15 +2312,6 @@ dependencies = [
"version_check",
]
[[package]]
name = "getopts"
version = "0.2.24"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cfe4fbac503b8d1f88e6676011885f34b7174f46e59956bba534ba83abded4df"
dependencies = [
"unicode-width",
]
[[package]]
name = "getrandom"
version = "0.2.16"
@ -2531,29 +2437,6 @@ version = "0.16.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5419bdc4f6a9207fbeba6d11b604d481addf78ecd10c11ad51e76c2f6482748d"
[[package]]
name = "headless_chrome"
version = "1.0.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f77a421a200d6314c8830919715d8452320c16e06b37686b13a9942f799dbf9b"
dependencies = [
"anyhow",
"auto_generate_cdp",
"base64 0.22.1",
"derive_builder",
"log",
"rand 0.9.2",
"regex",
"serde",
"serde_json",
"tempfile",
"thiserror 2.0.17",
"tungstenite 0.27.0",
"url",
"which",
"winreg",
]
[[package]]
name = "heck"
version = "0.4.1"
@ -2592,20 +2475,6 @@ dependencies = [
"windows-link 0.1.3",
]
[[package]]
name = "html5ever"
version = "0.27.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c13771afe0e6e846f1e67d038d4cb29998a6779f93c809212e4e9c32efd244d4"
dependencies = [
"log",
"mac",
"markup5ever",
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "http"
version = "0.2.12"
@ -2752,7 +2621,7 @@ dependencies = [
"tokio",
"tokio-rustls 0.26.4",
"tower-service",
"webpki-roots 1.0.3",
"webpki-roots",
]
[[package]]
@ -3464,12 +3333,6 @@ dependencies = [
"pkg-config",
]
[[package]]
name = "mac"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c41e0c4fef86961ac6d6f8a82609f55f31b05e4fce149ac5710e439df7619ba4"
[[package]]
name = "mailparse"
version = "0.15.0"
@ -3481,20 +3344,6 @@ dependencies = [
"quoted_printable",
]
[[package]]
name = "markup5ever"
version = "0.12.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "16ce3abbeba692c8b8441d036ef91aea6df8da2c6b6e21c7e14d3c18e526be45"
dependencies = [
"log",
"phf 0.11.3",
"phf_codegen 0.11.3",
"string_cache",
"string_cache_codegen",
"tendril",
]
[[package]]
name = "matchers"
version = "0.2.0"
@ -3607,12 +3456,6 @@ dependencies = [
"tempfile",
]
[[package]]
name = "new_debug_unreachable"
version = "1.0.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "650eef8c711430f1a879fdd01d4745a7deea475becfb90269c06775983bbf086"
[[package]]
name = "nom"
version = "7.1.3"
@ -3982,96 +3825,6 @@ dependencies = [
"indexmap 2.12.0",
]
[[package]]
name = "phf"
version = "0.10.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fabbf1ead8a5bcbc20f5f8b939ee3f5b0f6f281b6ad3468b84656b658b455259"
dependencies = [
"phf_shared 0.10.0",
]
[[package]]
name = "phf"
version = "0.11.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1fd6780a80ae0c52cc120a26a1a42c1ae51b247a253e4e06113d23d2c2edd078"
dependencies = [
"phf_macros",
"phf_shared 0.11.3",
]
[[package]]
name = "phf_codegen"
version = "0.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4fb1c3a8bc4dd4e5cfce29b44ffc14bedd2ee294559a294e2a4d4c9e9a6a13cd"
dependencies = [
"phf_generator 0.10.0",
"phf_shared 0.10.0",
]
[[package]]
name = "phf_codegen"
version = "0.11.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "aef8048c789fa5e851558d709946d6d79a8ff88c0440c587967f8e94bfb1216a"
dependencies = [
"phf_generator 0.11.3",
"phf_shared 0.11.3",
]
[[package]]
name = "phf_generator"
version = "0.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5d5285893bb5eb82e6aaf5d59ee909a06a16737a8970984dd7746ba9283498d6"
dependencies = [
"phf_shared 0.10.0",
"rand 0.8.5",
]
[[package]]
name = "phf_generator"
version = "0.11.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3c80231409c20246a13fddb31776fb942c38553c51e871f8cbd687a4cfb5843d"
dependencies = [
"phf_shared 0.11.3",
"rand 0.8.5",
]
[[package]]
name = "phf_macros"
version = "0.11.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f84ac04429c13a7ff43785d75ad27569f2951ce0ffd30a3321230db2fc727216"
dependencies = [
"phf_generator 0.11.3",
"phf_shared 0.11.3",
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "phf_shared"
version = "0.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b6796ad771acdc0123d2a88dc428b5e38ef24456743ddb1744ed628f9815c096"
dependencies = [
"siphasher 0.3.11",
]
[[package]]
name = "phf_shared"
version = "0.11.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "67eabc2ef2a60eb7faa00097bd1ffdb5bd28e62bf39990626a582201b7a754e5"
dependencies = [
"siphasher 1.0.1",
]
[[package]]
name = "pin-project"
version = "1.1.10"
@ -4194,12 +3947,6 @@ dependencies = [
"vcpkg",
]
[[package]]
name = "precomputed-hash"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c"
[[package]]
name = "prettyplease"
version = "0.2.37"
@ -4603,7 +4350,7 @@ dependencies = [
"wasm-bindgen-futures",
"wasm-streams",
"web-sys",
"webpki-roots 1.0.3",
"webpki-roots",
]
[[package]]
@ -4824,22 +4571,6 @@ version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
[[package]]
name = "scraper"
version = "0.20.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b90460b31bfe1fc07be8262e42c665ad97118d4585869de9345a84d501a9eaf0"
dependencies = [
"ahash",
"cssparser",
"ego-tree",
"getopts",
"html5ever",
"once_cell",
"selectors",
"tendril",
]
[[package]]
name = "scratch"
version = "1.0.9"
@ -4906,25 +4637,6 @@ dependencies = [
"libc",
]
[[package]]
name = "selectors"
version = "0.25.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4eb30575f3638fc8f6815f448d50cb1a2e255b0897985c8c59f4d37b72a07b06"
dependencies = [
"bitflags",
"cssparser",
"derive_more 0.99.20",
"fxhash",
"log",
"new_debug_unreachable",
"phf 0.10.1",
"phf_codegen 0.10.0",
"precomputed-hash",
"servo_arc",
"smallvec",
]
[[package]]
name = "semver"
version = "1.0.27"
@ -4995,15 +4707,6 @@ dependencies = [
"serde",
]
[[package]]
name = "servo_arc"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d036d71a959e00c77a63538b90a6c2390969f9772b096ea837205c6bd0491a44"
dependencies = [
"stable_deref_trait",
]
[[package]]
name = "sha1"
version = "0.10.6"
@ -5078,18 +4781,6 @@ version = "2.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bbbb5d9659141646ae647b42fe094daf6c6192d1620870b449d9557f748b2daa"
[[package]]
name = "siphasher"
version = "0.3.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "38b58827f4464d87d377d175e90bf58eb00fd8716ff0a62f80356b5e61555d0d"
[[package]]
name = "siphasher"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "56199f7ddabf13fe5074ce809e7d3f42b42ae711800501b5b16ea82ad029c39d"
[[package]]
name = "slab"
version = "0.4.11"
@ -5133,17 +4824,6 @@ dependencies = [
"windows-sys 0.60.2",
]
[[package]]
name = "socks"
version = "0.3.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f0c3dbbd9ae980613c6dd8e28a9407b50509d3803b57624d5dfe8315218cd58b"
dependencies = [
"byteorder",
"libc",
"winapi",
]
[[package]]
name = "spki"
version = "0.6.0"
@ -5179,31 +4859,6 @@ version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f"
[[package]]
name = "string_cache"
version = "0.8.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bf776ba3fa74f83bf4b63c3dcbbf82173db2632ed8452cb2d891d33f459de70f"
dependencies = [
"new_debug_unreachable",
"parking_lot",
"phf_shared 0.11.3",
"precomputed-hash",
"serde",
]
[[package]]
name = "string_cache_codegen"
version = "0.5.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c711928715f1fe0fe509c53b43e993a9a557babc2d0a3567d0a3006f1ac931a0"
dependencies = [
"phf_generator 0.11.3",
"phf_shared 0.11.3",
"proc-macro2",
"quote",
]
[[package]]
name = "stringprep"
version = "0.1.5"
@ -5306,17 +4961,6 @@ dependencies = [
"windows-sys 0.61.2",
]
[[package]]
name = "tendril"
version = "0.4.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d24a120c5fc464a3458240ee02c299ebcb9d67b5249c8848b09d639dca8d7bb0"
dependencies = [
"futf",
"mac",
"utf-8",
]
[[package]]
name = "termcolor"
version = "1.4.1"
@ -5546,7 +5190,7 @@ dependencies = [
"futures-util",
"log",
"tokio",
"tungstenite 0.20.1",
"tungstenite",
]
[[package]]
@ -5754,23 +5398,6 @@ dependencies = [
"utf-8",
]
[[package]]
name = "tungstenite"
version = "0.27.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "eadc29d668c91fcc564941132e17b28a7ceb2f3ebf0b9dae3e03fd7a6748eb0d"
dependencies = [
"bytes",
"data-encoding",
"http 1.3.1",
"httparse",
"log",
"rand 0.9.2",
"sha1",
"thiserror 2.0.17",
"utf-8",
]
[[package]]
name = "type1-encoding-parser"
version = "0.1.0"
@ -5847,23 +5474,6 @@ version = "0.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1"
[[package]]
name = "ureq"
version = "2.12.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "02d1a66277ed75f640d608235660df48c8e3c19f3b4edb6a263315626cc3c01d"
dependencies = [
"base64 0.22.1",
"flate2",
"log",
"once_cell",
"rustls 0.23.34",
"rustls-pki-types",
"socks",
"url",
"webpki-roots 0.26.11",
]
[[package]]
name = "ureq"
version = "3.1.2"
@ -5879,7 +5489,7 @@ dependencies = [
"rustls-pki-types",
"ureq-proto",
"utf-8",
"webpki-roots 1.0.3",
"webpki-roots",
]
[[package]]
@ -6091,15 +5701,6 @@ dependencies = [
"wasm-bindgen",
]
[[package]]
name = "webpki-roots"
version = "0.26.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "521bc38abb08001b01866da9f51eb7c5d647a19260e00054a8c7fd5f9e57f7a9"
dependencies = [
"webpki-roots 1.0.3",
]
[[package]]
name = "webpki-roots"
version = "1.0.3"
@ -6145,17 +5746,6 @@ version = "0.1.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a751b3277700db47d3e574514de2eced5e54dc8a5436a3bf7a0b248b2cee16f3"
[[package]]
name = "which"
version = "8.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d3fabb953106c3c8eea8306e4393700d7657561cb43122571b172bbfb7c7ba1d"
dependencies = [
"env_home",
"rustix",
"winsafe",
]
[[package]]
name = "winapi"
version = "0.3.9"
@ -6577,22 +6167,6 @@ version = "0.53.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d6bbff5f0aada427a1e5a6da5f1f98158182f26556f345ac9e04d36d0ebed650"
[[package]]
name = "winreg"
version = "0.55.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cb5a765337c50e9ec252c2069be9bf91c7df47afb103b642ba3a53bf8101be97"
dependencies = [
"cfg-if",
"windows-sys 0.59.0",
]
[[package]]
name = "winsafe"
version = "0.0.19"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d135d17ab770252ad95e9a872d365cf3090e3be864a34ab46f48555993efc904"
[[package]]
name = "wit-bindgen"
version = "0.46.0"

View file

@ -40,7 +40,6 @@ repository = "https://github.com/GeneralBots/BotServer"
default = [ "vectordb"]
vectordb = ["qdrant-client"]
email = ["imap"]
web_automation = ["headless_chrome"]
desktop = []
[dependencies]
@ -65,7 +64,6 @@ downloader = "0.2"
env_logger = "0.11"
futures = "0.3"
futures-util = "0.3"
headless_chrome = { version = "1.0.18", optional = true }
hmac = "0.12.1"
imap = { version = "3.0.0-alpha.15", optional = true }
include_dir = "0.7"
@ -84,7 +82,6 @@ redis = { version = "0.27", features = ["tokio-comp"] }
regex = "1.11"
reqwest = { version = "0.12", features = ["json", "stream"] }
rhai = { git = "https://github.com/therealprof/rhai.git", branch = "features/use-web-time" }
scraper = "0.20"
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"
sha2 = "0.10.9"

View file

@ -1,7 +1,5 @@
use crate::shared::models::UserSession;
use crate::shared::state::AppState;
#[cfg(feature = "web_automation")]
use crate::web_automation::WebCrawler;
use log::{error, info};
use rhai::{Dynamic, Engine};
use std::sync::Arc;
@ -21,9 +19,6 @@ pub fn add_website_keyword(state: Arc<AppState>, user: UserSession, engine: &mut
);
// Validate URL
#[cfg(feature = "web_automation")]
let is_valid = WebCrawler::is_valid_url(&url_str);
#[cfg(not(feature = "web_automation"))]
let is_valid = url_str.starts_with("http://") || url_str.starts_with("https://");
if !is_valid {
@ -92,74 +87,5 @@ async fn crawl_and_index_website(
url: &str,
) -> Result<String, String> {
info!("Crawling website: {} for user: {}", url, user.user_id);
// Check if web_automation feature is enabled
#[cfg(not(feature = "web_automation"))]
{
return Err(
"Web automation feature not enabled. Recompile with --features web_automation"
.to_string(),
);
}
// Fetch website content (only compiled if feature enabled)
#[cfg(feature = "web_automation")]
{
let crawler = WebCrawler::new();
let text_content = crawler
.crawl(url)
.await
.map_err(|e| format!("Failed to crawl website: {}", e))?;
if text_content.trim().is_empty() {
return Err("No text content found on website".to_string());
}
info!(
"Extracted {} characters of text from website",
text_content.len()
);
// Create KB name from URL
let kb_name = format!(
"website_{}",
url.replace("https://", "")
.replace("http://", "")
.replace('/', "_")
.replace('.', "_")
.chars()
.take(50)
.collect::<String>()
);
// Create collection name for this user's website KB
let collection_name = format!("kb_{}_{}_{}", user.bot_id, user.user_id, kb_name);
// Ensure collection exists in Qdrant
crate::kb::qdrant_client::ensure_collection_exists(_state, &collection_name)
.await
.map_err(|e| format!("Failed to create Qdrant collection: {}", e))?;
// Index the content
crate::kb::embeddings::index_document(_state, &collection_name, url, &text_content)
.await
.map_err(|e| format!("Failed to index document: {}", e))?;
// Associate KB with user (not session)
add_website_kb_to_user(_state, user, &kb_name, url)
.await
.map_err(|e| format!("Failed to associate KB with user: {}", e))?;
info!(
"Website indexed successfully to collection: {}",
collection_name
);
Ok(format!(
"Website '{}' crawled and indexed successfully ({} characters)",
url,
text_content.len()
))
}
Err("Web automation functionality has been removed from this build".to_string())
}

View file

@ -1,135 +0,0 @@
use crate::{shared::state::AppState, shared::models::UserSession, web_automation::BrowserPool};
use headless_chrome::browser::tab::Tab;
use log::info;
use rhai::{Dynamic, Engine};
use std::error::Error;
use std::sync::Arc;
use std::time::Duration;
use tokio::time::sleep;
pub fn get_website_keyword(state: &AppState, user: UserSession, engine: &mut Engine) {
let browser_pool = state.browser_pool.clone();
engine
.register_custom_syntax(
&["WEBSITE", "OF", "$expr$"],
false,
move |context, inputs| {
let search_term = context.eval_expression_tree(&inputs[0])?.to_string();
info!("GET WEBSITE executed - Search: '{}'", search_term);
let browser_pool_clone = browser_pool.clone();
let fut = execute_headless_browser_search(browser_pool_clone, &search_term);
let result =
tokio::task::block_in_place(|| tokio::runtime::Handle::current().block_on(fut))
.map_err(|e| format!("Headless browser search failed: {}", e))?;
Ok(Dynamic::from(result))
},
)
.unwrap();
}
pub async fn execute_headless_browser_search(
browser_pool: Arc<BrowserPool>,
search_term: &str,
) -> Result<String, Box<dyn std::error::Error + Send + Sync>> {
info!("Starting headless browser search: '{}' ", search_term);
let term = search_term.to_string();
let result = browser_pool
.with_browser(move |tab| {
let term = term.clone();
Box::pin(async move { perform_search(tab, &term).await })
})
.await?;
Ok(result)
}
async fn perform_search(
tab: Arc<Tab>,
search_term: &str,
) -> Result<String, Box<dyn Error + Send + Sync>> {
tab.navigate_to("https://duckduckgo.com")
.map_err(|e| format!("Failed to navigate: {}", e))?;
tab.wait_for_element("#searchbox_input")
.map_err(|e| format!("Failed to find search box: {}", e))?;
let search_input = tab
.find_element("#searchbox_input")
.map_err(|e| format!("Failed to find search input: {}", e))?;
search_input
.click()
.map_err(|e| format!("Failed to click search input: {}", e))?;
search_input
.type_into(search_term)
.map_err(|e| format!("Failed to type into search input: {}", e))?;
search_input
.press_key("Enter")
.map_err(|e| format!("Failed to press Enter: {}", e))?;
sleep(Duration::from_millis(3000)).await;
let _ = tab.wait_for_element("[data-testid='result']");
let results = extract_search_results(&tab).await?;
if !results.is_empty() {
Ok(results[0].clone())
} else {
Ok("No results found".to_string())
}
}
async fn extract_search_results(
tab: &Arc<Tab>,
) -> Result<Vec<String>, Box<dyn Error + Send + Sync>> {
let mut results = Vec::new();
let selectors = [
"a[data-testid='result-title-a']",
"a[data-testid='result-extras-url-link']",
"a.eVNpHGjtxRBq_gLOfGDr",
"a.Rn_JXVtoPVAFyGkcaXyK",
".ikg2IXiCD14iVX7AdZo1 a",
".OQ_6vPwNhCeusNiEDcGp a",
".result__a",
"a.result-link",
".result a[href]",
];
for selector in &selectors {
if let Ok(elements) = tab.find_elements(selector) {
for element in elements {
if let Ok(Some(href)) = element.get_attribute_value("href") {
if href.starts_with("http")
&& !href.contains("duckduckgo.com")
&& !href.contains("duck.co")
&& !results.contains(&href)
{
let display_text = element.get_inner_text().unwrap_or_default();
if !display_text.is_empty() && !display_text.contains("Ad") {
results.push(href);
}
}
}
}
if !results.is_empty() {
break;
}
}
}
results.dedup();
Ok(results)
}

View file

@ -24,6 +24,3 @@ pub mod set_context;
#[cfg(feature = "email")]
pub mod create_draft_keyword;
#[cfg(feature = "web_automation")]
pub mod get_website;

View file

@ -34,8 +34,6 @@ use self::keywords::add_suggestion::add_suggestion_keyword;
#[cfg(feature = "email")]
use self::keywords::create_draft_keyword;
#[cfg(feature = "web_automation")]
use self::keywords::get_website::get_website_keyword;
pub struct ScriptService {
pub engine: Engine,
@ -80,8 +78,6 @@ impl ScriptService {
add_website_keyword(state.clone(), user.clone(), &mut engine);
add_suggestion_keyword(state.clone(), user.clone(), &mut engine);
#[cfg(feature = "web_automation")]
get_website_keyword(&state, user.clone(), &mut engine);
ScriptService {
engine,

View file

@ -16,8 +16,6 @@ pub mod package_manager;
pub mod session;
pub mod shared;
pub mod tests;
#[cfg(feature = "web_automation")]
pub mod web_automation;
pub mod web_server;
pub mod auth;
pub mod nvidia;

View file

@ -28,8 +28,6 @@ mod package_manager;
mod session;
mod shared;
pub mod tests;
#[cfg(feature = "web_automation")]
mod web_automation;
mod web_server;
mod nvidia;

View file

@ -1,227 +0,0 @@
use log::{debug, error, info};
use reqwest::Client;
use scraper::{Html, Selector};
use std::error::Error;
use std::time::Duration;
/// Web crawler for extracting content from web pages
pub struct WebCrawler {
client: Client,
}
impl WebCrawler {
/// Create a new web crawler
pub fn new() -> Self {
let client = Client::builder()
.timeout(Duration::from_secs(30))
.connect_timeout(Duration::from_secs(10))
.user_agent("Mozilla/5.0 (compatible; GeneralBots/1.0)")
.build()
.unwrap_or_else(|_| Client::new());
Self { client }
}
/// Validate if string is a valid HTTP(S) URL
pub fn is_valid_url(url: &str) -> bool {
url.starts_with("http://") || url.starts_with("https://")
}
/// Fetch website content via HTTP
pub async fn fetch_content(&self, url: &str) -> Result<String, Box<dyn Error + Send + Sync>> {
debug!("Fetching website content from: {}", url);
let response = self.client.get(url).send().await?;
if !response.status().is_success() {
return Err(format!("HTTP request failed with status: {}", response.status()).into());
}
let content_type = response
.headers()
.get("content-type")
.and_then(|v| v.to_str().ok())
.unwrap_or("");
if !content_type.contains("text/html") && !content_type.contains("application/xhtml") {
return Err(format!("URL does not return HTML content: {}", content_type).into());
}
let html_content = response.text().await?;
debug!("Fetched {} bytes of HTML content", html_content.len());
Ok(html_content)
}
/// Extract readable text from HTML
pub fn extract_text_from_html(
&self,
html: &str,
) -> Result<String, Box<dyn Error + Send + Sync>> {
let document = Html::parse_document(html);
let mut text_parts = Vec::new();
// Extract title
let title_selector = Selector::parse("title").unwrap();
if let Some(title_element) = document.select(&title_selector).next() {
let title = title_element.text().collect::<String>();
if !title.trim().is_empty() {
text_parts.push(format!("Title: {}\n", title.trim()));
}
}
// Extract meta description
let meta_selector = Selector::parse("meta[name='description']").unwrap();
if let Some(meta) = document.select(&meta_selector).next() {
if let Some(description) = meta.value().attr("content") {
if !description.trim().is_empty() {
text_parts.push(format!("Description: {}\n", description.trim()));
}
}
}
// Extract body content
let body_selector = Selector::parse("body").unwrap();
if let Some(body) = document.select(&body_selector).next() {
self.extract_text_recursive(&body, &mut text_parts);
} else {
// Fallback: extract from entire document
for node in document.root_element().descendants() {
if let Some(text) = node.value().as_text() {
let cleaned = text.trim();
if !cleaned.is_empty() {
text_parts.push(cleaned.to_string());
}
}
}
}
let combined_text = text_parts.join("\n");
// Clean up excessive whitespace
let cleaned = combined_text
.lines()
.map(|line| line.trim())
.filter(|line| !line.is_empty())
.collect::<Vec<_>>()
.join("\n");
if cleaned.is_empty() {
return Err("Failed to extract text from HTML".into());
}
Ok(cleaned)
}
/// Recursively extract text from HTML element tree
fn extract_text_recursive(&self, element: &scraper::ElementRef, text_parts: &mut Vec<String>) {
// Skip excluded elements (script, style, etc.)
let excluded = ["script", "style", "noscript", "iframe", "svg"];
if excluded.contains(&element.value().name()) {
return;
}
for child in element.children() {
if let Some(text) = child.value().as_text() {
let cleaned = text.trim();
if !cleaned.is_empty() {
text_parts.push(cleaned.to_string());
}
} else if child.value().as_element().is_some() {
if let Some(child_ref) = scraper::ElementRef::wrap(child) {
self.extract_text_recursive(&child_ref, text_parts);
}
}
}
}
/// Crawl a URL and return extracted text
pub async fn crawl(&self, url: &str) -> Result<String, Box<dyn Error + Send + Sync>> {
info!("Crawling website: {}", url);
if !Self::is_valid_url(url) {
return Err("Invalid URL format".into());
}
let html_content = self.fetch_content(url).await?;
let text_content = self.extract_text_from_html(&html_content)?;
if text_content.trim().is_empty() {
return Err("No text content found on website".into());
}
info!(
"Successfully crawled website: {} ({} characters)",
url,
text_content.len()
);
Ok(text_content)
}
}
impl Default for WebCrawler {
fn default() -> Self {
Self::new()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_is_valid_url() {
assert!(WebCrawler::is_valid_url("https://example.com"));
assert!(WebCrawler::is_valid_url("http://example.com"));
assert!(WebCrawler::is_valid_url("https://example.com/path?query=1"));
assert!(!WebCrawler::is_valid_url("ftp://example.com"));
assert!(!WebCrawler::is_valid_url("example.com"));
assert!(!WebCrawler::is_valid_url("//example.com"));
assert!(!WebCrawler::is_valid_url("file:///etc/passwd"));
}
#[test]
fn test_extract_text_from_html() {
let crawler = WebCrawler::new();
let html = r#"
<!DOCTYPE html>
<html>
<head>
<title>Test Page</title>
<meta name="description" content="This is a test page">
<style>body { color: red; }</style>
<script>console.log('test');</script>
</head>
<body>
<h1>Welcome</h1>
<p>This is a paragraph.</p>
<div>
<span>Nested content</span>
</div>
</body>
</html>
"#;
let result = crawler.extract_text_from_html(html).unwrap();
assert!(result.contains("Title: Test Page"));
assert!(result.contains("Description: This is a test page"));
assert!(result.contains("Welcome"));
assert!(result.contains("This is a paragraph"));
assert!(result.contains("Nested content"));
assert!(!result.contains("console.log"));
assert!(!result.contains("color: red"));
}
#[test]
fn test_extract_text_empty_html() {
let crawler = WebCrawler::new();
let html = "<html><body></body></html>";
let result = crawler.extract_text_from_html(html);
assert!(result.is_err());
}
}

View file

@ -1,229 +0,0 @@
#[cfg(feature = "web_automation")]
pub mod crawler;
use headless_chrome::browser::tab::Tab;
use headless_chrome::{Browser, LaunchOptions};
use std::env;
use std::error::Error;
use std::future::Future;
use std::path::PathBuf;
use std::pin::Pin;
use std::process::Command;
use std::sync::Arc;
use tokio::fs;
use tokio::sync::Semaphore;
use crate::shared::utils::{download_file, extract_zip_recursive};
pub use crawler::WebCrawler;
pub struct BrowserSetup {
pub brave_path: String,
pub chromedriver_path: String,
}
pub struct BrowserPool {
browser: Browser,
semaphore: Semaphore,
}
impl BrowserPool {
pub async fn new(
max_concurrent: usize,
brave_path: String,
) -> Result<Self, Box<dyn Error + Send + Sync>> {
let options = LaunchOptions::default_builder()
.path(Some(PathBuf::from(brave_path)))
.args(vec![
std::ffi::OsStr::new("--disable-gpu"),
std::ffi::OsStr::new("--no-sandbox"),
std::ffi::OsStr::new("--disable-dev-shm-usage"),
])
.build()
.map_err(|e| format!("Failed to build launch options: {}", e))?;
let browser =
Browser::new(options).map_err(|e| format!("Failed to launch browser: {}", e))?;
Ok(Self {
browser,
semaphore: Semaphore::new(max_concurrent),
})
}
pub async fn with_browser<F, T>(&self, f: F) -> Result<T, Box<dyn Error + Send + Sync>>
where
F: FnOnce(
Arc<Tab>,
)
-> Pin<Box<dyn Future<Output = Result<T, Box<dyn Error + Send + Sync>>> + Send>>
+ Send
+ 'static,
T: Send + 'static,
{
let _permit = self.semaphore.acquire().await?;
let tab = self
.browser
.new_tab()
.map_err(|e| format!("Failed to create new tab: {}", e))?;
let result = f(tab.clone()).await;
// Close the tab when done
let _ = tab.close(true);
result
}
}
impl BrowserSetup {
pub async fn new() -> Result<Self, Box<dyn std::error::Error>> {
let brave_path = Self::find_brave().await?;
let chromedriver_path = Self::setup_chromedriver().await?;
Ok(Self {
brave_path,
chromedriver_path,
})
}
async fn find_brave() -> Result<String, Box<dyn std::error::Error>> {
let mut possible_paths = vec![
String::from(r"C:\Program Files\BraveSoftware\Brave-Browser\Application\brave.exe"),
String::from("/Applications/Brave Browser.app/Contents/MacOS/Brave Browser"),
String::from("/usr/bin/brave-browser"),
String::from("/usr/bin/brave"),
];
if let Ok(local_appdata) = env::var("LOCALAPPDATA") {
let mut path = PathBuf::from(local_appdata);
path.push("BraveSoftware\\Brave-Browser\\Application\\brave.exe");
possible_paths.push(path.to_string_lossy().to_string());
}
for path in possible_paths {
if fs::metadata(&path).await.is_ok() {
return Ok(path);
}
}
Err("Brave browser not found. Please install Brave first.".into())
}
async fn setup_chromedriver() -> Result<String, Box<dyn std::error::Error>> {
let mut chromedriver_dir = env::current_exe()?.parent().unwrap().to_path_buf();
chromedriver_dir.push("chromedriver");
if !chromedriver_dir.exists() {
fs::create_dir(&chromedriver_dir).await?;
}
let chromedriver_path = if cfg!(target_os = "windows") {
chromedriver_dir.join("chromedriver.exe")
} else {
chromedriver_dir.join("chromedriver")
};
if fs::metadata(&chromedriver_path).await.is_err() {
let (download_url, platform) = match (cfg!(target_os = "windows"), cfg!(target_arch = "x86_64")) {
(true, true) => (
"https://storage.googleapis.com/chrome-for-testing-public/138.0.7204.183/win64/chromedriver-win64.zip",
"win64",
),
(true, false) => (
"https://storage.googleapis.com/chrome-for-testing-public/138.0.7204.183/win32/chromedriver-win32.zip",
"win32",
),
(false, true) if cfg!(target_os = "macos") && cfg!(target_arch = "aarch64") => (
"https://storage.googleapis.com/chrome-for-testing-public/138.0.7204.183/mac-arm64/chromedriver-mac-arm64.zip",
"mac-arm64",
),
(false, true) if cfg!(target_os = "macos") => (
"https://storage.googleapis.com/chrome-for-testing-public/138.0.7204.183/mac-x64/chromedriver-mac-x64.zip",
"mac-x64",
),
(false, true) => (
"https://storage.googleapis.com/chrome-for-testing-public/138.0.7204.183/linux64/chromedriver-linux64.zip",
"linux64",
),
_ => return Err("Unsupported platform".into()),
};
let mut zip_path = std::env::temp_dir();
zip_path.push("chromedriver.zip");
download_file(download_url, &zip_path.to_str().unwrap()).await?;
let mut temp_extract_dir = std::env::temp_dir();
temp_extract_dir.push("chromedriver_extract");
let temp_extract_dir1 = temp_extract_dir.clone();
let _ = fs::remove_dir_all(&temp_extract_dir).await;
fs::create_dir(&temp_extract_dir).await?;
extract_zip_recursive(&zip_path, &temp_extract_dir)?;
let mut extracted_binary_path = temp_extract_dir;
extracted_binary_path.push(format!("chromedriver-{}", platform));
extracted_binary_path.push(if cfg!(target_os = "windows") {
"chromedriver.exe"
} else {
"chromedriver"
});
match fs::rename(&extracted_binary_path, &chromedriver_path).await {
Ok(_) => (),
Err(e) if e.kind() == std::io::ErrorKind::CrossesDevices => {
fs::copy(&extracted_binary_path, &chromedriver_path).await?;
#[cfg(unix)]
{
use std::os::unix::fs::PermissionsExt;
let mut perms = fs::metadata(&chromedriver_path).await?.permissions();
perms.set_mode(0o755);
fs::set_permissions(&chromedriver_path, perms).await?;
}
}
Err(e) => return Err(e.into()),
}
let _ = fs::remove_file(&zip_path).await;
let _ = fs::remove_dir_all(temp_extract_dir1).await;
#[cfg(unix)]
{
use std::os::unix::fs::PermissionsExt;
let mut perms = fs::metadata(&chromedriver_path).await?.permissions();
perms.set_mode(0o755);
fs::set_permissions(&chromedriver_path, perms).await?;
}
}
Ok(chromedriver_path.to_string_lossy().to_string())
}
}
pub async fn initialize_browser_pool() -> Result<Arc<BrowserPool>, Box<dyn std::error::Error>> {
let setup = BrowserSetup::new().await?;
// Note: headless_chrome doesn't use chromedriver, it uses Chrome DevTools Protocol directly
// So we don't need to spawn chromedriver process
Ok(Arc::new(BrowserPool::new(5, setup.brave_path).await?))
}
async fn is_process_running(name: &str) -> bool {
if cfg!(target_os = "windows") {
Command::new("tasklist")
.output()
.map(|o| String::from_utf8_lossy(&o.stdout).contains(name))
.unwrap_or(false)
} else {
Command::new("pgrep")
.arg(name)
.output()
.map(|o| o.status.success())
.unwrap_or(false)
}
}

View file

@ -1,19 +0,0 @@
//! Tests for web automation module
#[cfg(test)]
mod tests {
use super::*;
use crate::tests::test_util;
#[test]
fn test_web_automation_module() {
test_util::setup();
assert!(true, "Basic web automation module test");
}
#[test]
fn test_crawler() {
test_util::setup();
assert!(true, "Web crawler placeholder test");
}
}