feat: remove unused dependencies and clean up Cargo.lock
Removed several unused dependencies from Cargo.lock including: - auto_generate_cdp - headless_chrome - scraper - cssparser and related crates - dtoa and dtoa-short - string_cache and related crates - tendril - tungstenite 0.27.0 Also updated ureq dependency to single version (removed duplicate entry). This cleanup reduces the dependency tree and removes unused code.
This commit is contained in:
parent
45e4a5e735
commit
c7fbb46e49
11 changed files with 6 additions and 1130 deletions
436
Cargo.lock
generated
436
Cargo.lock
generated
|
|
@ -508,20 +508,6 @@ version = "1.1.2"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1505bd5d3d116872e7271a6d4e16d81d0c8570876c8de68093a09ac269d8aac0"
|
||||
|
||||
[[package]]
|
||||
name = "auto_generate_cdp"
|
||||
version = "0.4.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d6e1961a0d5d77969057eba90d448e610d3c439024d135d9dbd98e33ec973520"
|
||||
dependencies = [
|
||||
"convert_case",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"ureq 2.12.1",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "autocfg"
|
||||
version = "1.5.0"
|
||||
|
|
@ -1127,7 +1113,6 @@ dependencies = [
|
|||
"env_logger",
|
||||
"futures",
|
||||
"futures-util",
|
||||
"headless_chrome",
|
||||
"hmac",
|
||||
"imap",
|
||||
"include_dir",
|
||||
|
|
@ -1146,7 +1131,6 @@ dependencies = [
|
|||
"regex",
|
||||
"reqwest",
|
||||
"rhai",
|
||||
"scraper",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"sha2",
|
||||
|
|
@ -1158,7 +1142,7 @@ dependencies = [
|
|||
"tokio-stream",
|
||||
"tracing",
|
||||
"tracing-subscriber",
|
||||
"ureq 3.1.2",
|
||||
"ureq",
|
||||
"urlencoding",
|
||||
"uuid",
|
||||
"zip 2.4.2",
|
||||
|
|
@ -1632,29 +1616,6 @@ dependencies = [
|
|||
"typenum",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cssparser"
|
||||
version = "0.31.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5b3df4f93e5fbbe73ec01ec8d3f68bba73107993a5b1e7519273c32db9b0d5be"
|
||||
dependencies = [
|
||||
"cssparser-macros",
|
||||
"dtoa-short",
|
||||
"itoa",
|
||||
"phf 0.11.3",
|
||||
"smallvec",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cssparser-macros"
|
||||
version = "0.6.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "13b588ba4ac1a99f7f2964d24b3d896ddc6bf847ee3855dbd4366f058cfcd331"
|
||||
dependencies = [
|
||||
"quote",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "csv"
|
||||
version = "1.4.0"
|
||||
|
|
@ -2024,21 +1985,6 @@ dependencies = [
|
|||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "dtoa"
|
||||
version = "1.0.10"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d6add3b8cff394282be81f3fc1a0605db594ed69890078ca6e2cab1c408bcf04"
|
||||
|
||||
[[package]]
|
||||
name = "dtoa-short"
|
||||
version = "0.3.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "cd1511a7b6a56299bd043a9c167a6d2bfb37bf84a6dfceaba651168adfb43c87"
|
||||
dependencies = [
|
||||
"dtoa",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "dunce"
|
||||
version = "1.0.5"
|
||||
|
|
@ -2066,12 +2012,6 @@ dependencies = [
|
|||
"signature",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ego-tree"
|
||||
version = "0.6.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "12a0bb14ac04a9fcf170d0bbbef949b44cc492f4452bd20c095636956f653642"
|
||||
|
||||
[[package]]
|
||||
name = "either"
|
||||
version = "1.15.0"
|
||||
|
|
@ -2139,12 +2079,6 @@ dependencies = [
|
|||
"regex",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "env_home"
|
||||
version = "0.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c7f84e12ccf0a7ddc17a6c41c93326024c42920d7ee630d04950e6926645c0fe"
|
||||
|
||||
[[package]]
|
||||
name = "env_logger"
|
||||
version = "0.11.8"
|
||||
|
|
@ -2279,16 +2213,6 @@ version = "1.3.0"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c"
|
||||
|
||||
[[package]]
|
||||
name = "futf"
|
||||
version = "0.1.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "df420e2e84819663797d1ec6544b13c5be84629e7bb00dc960d6917db2987843"
|
||||
dependencies = [
|
||||
"mac",
|
||||
"new_debug_unreachable",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "futures"
|
||||
version = "0.3.31"
|
||||
|
|
@ -2378,15 +2302,6 @@ dependencies = [
|
|||
"slab",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "fxhash"
|
||||
version = "0.2.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c31b6d751ae2c7f11320402d34e41349dd1016f8d5d45e48c4312bc8625af50c"
|
||||
dependencies = [
|
||||
"byteorder",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "generic-array"
|
||||
version = "0.14.9"
|
||||
|
|
@ -2397,15 +2312,6 @@ dependencies = [
|
|||
"version_check",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "getopts"
|
||||
version = "0.2.24"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "cfe4fbac503b8d1f88e6676011885f34b7174f46e59956bba534ba83abded4df"
|
||||
dependencies = [
|
||||
"unicode-width",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "getrandom"
|
||||
version = "0.2.16"
|
||||
|
|
@ -2531,29 +2437,6 @@ version = "0.16.0"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5419bdc4f6a9207fbeba6d11b604d481addf78ecd10c11ad51e76c2f6482748d"
|
||||
|
||||
[[package]]
|
||||
name = "headless_chrome"
|
||||
version = "1.0.18"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f77a421a200d6314c8830919715d8452320c16e06b37686b13a9942f799dbf9b"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"auto_generate_cdp",
|
||||
"base64 0.22.1",
|
||||
"derive_builder",
|
||||
"log",
|
||||
"rand 0.9.2",
|
||||
"regex",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"tempfile",
|
||||
"thiserror 2.0.17",
|
||||
"tungstenite 0.27.0",
|
||||
"url",
|
||||
"which",
|
||||
"winreg",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "heck"
|
||||
version = "0.4.1"
|
||||
|
|
@ -2592,20 +2475,6 @@ dependencies = [
|
|||
"windows-link 0.1.3",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "html5ever"
|
||||
version = "0.27.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c13771afe0e6e846f1e67d038d4cb29998a6779f93c809212e4e9c32efd244d4"
|
||||
dependencies = [
|
||||
"log",
|
||||
"mac",
|
||||
"markup5ever",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "http"
|
||||
version = "0.2.12"
|
||||
|
|
@ -2752,7 +2621,7 @@ dependencies = [
|
|||
"tokio",
|
||||
"tokio-rustls 0.26.4",
|
||||
"tower-service",
|
||||
"webpki-roots 1.0.3",
|
||||
"webpki-roots",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
|
@ -3464,12 +3333,6 @@ dependencies = [
|
|||
"pkg-config",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "mac"
|
||||
version = "0.1.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c41e0c4fef86961ac6d6f8a82609f55f31b05e4fce149ac5710e439df7619ba4"
|
||||
|
||||
[[package]]
|
||||
name = "mailparse"
|
||||
version = "0.15.0"
|
||||
|
|
@ -3481,20 +3344,6 @@ dependencies = [
|
|||
"quoted_printable",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "markup5ever"
|
||||
version = "0.12.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "16ce3abbeba692c8b8441d036ef91aea6df8da2c6b6e21c7e14d3c18e526be45"
|
||||
dependencies = [
|
||||
"log",
|
||||
"phf 0.11.3",
|
||||
"phf_codegen 0.11.3",
|
||||
"string_cache",
|
||||
"string_cache_codegen",
|
||||
"tendril",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "matchers"
|
||||
version = "0.2.0"
|
||||
|
|
@ -3607,12 +3456,6 @@ dependencies = [
|
|||
"tempfile",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "new_debug_unreachable"
|
||||
version = "1.0.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "650eef8c711430f1a879fdd01d4745a7deea475becfb90269c06775983bbf086"
|
||||
|
||||
[[package]]
|
||||
name = "nom"
|
||||
version = "7.1.3"
|
||||
|
|
@ -3982,96 +3825,6 @@ dependencies = [
|
|||
"indexmap 2.12.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "phf"
|
||||
version = "0.10.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fabbf1ead8a5bcbc20f5f8b939ee3f5b0f6f281b6ad3468b84656b658b455259"
|
||||
dependencies = [
|
||||
"phf_shared 0.10.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "phf"
|
||||
version = "0.11.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1fd6780a80ae0c52cc120a26a1a42c1ae51b247a253e4e06113d23d2c2edd078"
|
||||
dependencies = [
|
||||
"phf_macros",
|
||||
"phf_shared 0.11.3",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "phf_codegen"
|
||||
version = "0.10.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4fb1c3a8bc4dd4e5cfce29b44ffc14bedd2ee294559a294e2a4d4c9e9a6a13cd"
|
||||
dependencies = [
|
||||
"phf_generator 0.10.0",
|
||||
"phf_shared 0.10.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "phf_codegen"
|
||||
version = "0.11.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "aef8048c789fa5e851558d709946d6d79a8ff88c0440c587967f8e94bfb1216a"
|
||||
dependencies = [
|
||||
"phf_generator 0.11.3",
|
||||
"phf_shared 0.11.3",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "phf_generator"
|
||||
version = "0.10.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5d5285893bb5eb82e6aaf5d59ee909a06a16737a8970984dd7746ba9283498d6"
|
||||
dependencies = [
|
||||
"phf_shared 0.10.0",
|
||||
"rand 0.8.5",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "phf_generator"
|
||||
version = "0.11.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3c80231409c20246a13fddb31776fb942c38553c51e871f8cbd687a4cfb5843d"
|
||||
dependencies = [
|
||||
"phf_shared 0.11.3",
|
||||
"rand 0.8.5",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "phf_macros"
|
||||
version = "0.11.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f84ac04429c13a7ff43785d75ad27569f2951ce0ffd30a3321230db2fc727216"
|
||||
dependencies = [
|
||||
"phf_generator 0.11.3",
|
||||
"phf_shared 0.11.3",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "phf_shared"
|
||||
version = "0.10.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b6796ad771acdc0123d2a88dc428b5e38ef24456743ddb1744ed628f9815c096"
|
||||
dependencies = [
|
||||
"siphasher 0.3.11",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "phf_shared"
|
||||
version = "0.11.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "67eabc2ef2a60eb7faa00097bd1ffdb5bd28e62bf39990626a582201b7a754e5"
|
||||
dependencies = [
|
||||
"siphasher 1.0.1",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pin-project"
|
||||
version = "1.1.10"
|
||||
|
|
@ -4194,12 +3947,6 @@ dependencies = [
|
|||
"vcpkg",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "precomputed-hash"
|
||||
version = "0.1.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c"
|
||||
|
||||
[[package]]
|
||||
name = "prettyplease"
|
||||
version = "0.2.37"
|
||||
|
|
@ -4603,7 +4350,7 @@ dependencies = [
|
|||
"wasm-bindgen-futures",
|
||||
"wasm-streams",
|
||||
"web-sys",
|
||||
"webpki-roots 1.0.3",
|
||||
"webpki-roots",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
|
@ -4824,22 +4571,6 @@ version = "1.2.0"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
|
||||
|
||||
[[package]]
|
||||
name = "scraper"
|
||||
version = "0.20.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b90460b31bfe1fc07be8262e42c665ad97118d4585869de9345a84d501a9eaf0"
|
||||
dependencies = [
|
||||
"ahash",
|
||||
"cssparser",
|
||||
"ego-tree",
|
||||
"getopts",
|
||||
"html5ever",
|
||||
"once_cell",
|
||||
"selectors",
|
||||
"tendril",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "scratch"
|
||||
version = "1.0.9"
|
||||
|
|
@ -4906,25 +4637,6 @@ dependencies = [
|
|||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "selectors"
|
||||
version = "0.25.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4eb30575f3638fc8f6815f448d50cb1a2e255b0897985c8c59f4d37b72a07b06"
|
||||
dependencies = [
|
||||
"bitflags",
|
||||
"cssparser",
|
||||
"derive_more 0.99.20",
|
||||
"fxhash",
|
||||
"log",
|
||||
"new_debug_unreachable",
|
||||
"phf 0.10.1",
|
||||
"phf_codegen 0.10.0",
|
||||
"precomputed-hash",
|
||||
"servo_arc",
|
||||
"smallvec",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "semver"
|
||||
version = "1.0.27"
|
||||
|
|
@ -4995,15 +4707,6 @@ dependencies = [
|
|||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "servo_arc"
|
||||
version = "0.3.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d036d71a959e00c77a63538b90a6c2390969f9772b096ea837205c6bd0491a44"
|
||||
dependencies = [
|
||||
"stable_deref_trait",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "sha1"
|
||||
version = "0.10.6"
|
||||
|
|
@ -5078,18 +4781,6 @@ version = "2.7.0"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bbbb5d9659141646ae647b42fe094daf6c6192d1620870b449d9557f748b2daa"
|
||||
|
||||
[[package]]
|
||||
name = "siphasher"
|
||||
version = "0.3.11"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "38b58827f4464d87d377d175e90bf58eb00fd8716ff0a62f80356b5e61555d0d"
|
||||
|
||||
[[package]]
|
||||
name = "siphasher"
|
||||
version = "1.0.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "56199f7ddabf13fe5074ce809e7d3f42b42ae711800501b5b16ea82ad029c39d"
|
||||
|
||||
[[package]]
|
||||
name = "slab"
|
||||
version = "0.4.11"
|
||||
|
|
@ -5133,17 +4824,6 @@ dependencies = [
|
|||
"windows-sys 0.60.2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "socks"
|
||||
version = "0.3.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f0c3dbbd9ae980613c6dd8e28a9407b50509d3803b57624d5dfe8315218cd58b"
|
||||
dependencies = [
|
||||
"byteorder",
|
||||
"libc",
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "spki"
|
||||
version = "0.6.0"
|
||||
|
|
@ -5179,31 +4859,6 @@ version = "1.1.0"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f"
|
||||
|
||||
[[package]]
|
||||
name = "string_cache"
|
||||
version = "0.8.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bf776ba3fa74f83bf4b63c3dcbbf82173db2632ed8452cb2d891d33f459de70f"
|
||||
dependencies = [
|
||||
"new_debug_unreachable",
|
||||
"parking_lot",
|
||||
"phf_shared 0.11.3",
|
||||
"precomputed-hash",
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "string_cache_codegen"
|
||||
version = "0.5.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c711928715f1fe0fe509c53b43e993a9a557babc2d0a3567d0a3006f1ac931a0"
|
||||
dependencies = [
|
||||
"phf_generator 0.11.3",
|
||||
"phf_shared 0.11.3",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "stringprep"
|
||||
version = "0.1.5"
|
||||
|
|
@ -5306,17 +4961,6 @@ dependencies = [
|
|||
"windows-sys 0.61.2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tendril"
|
||||
version = "0.4.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d24a120c5fc464a3458240ee02c299ebcb9d67b5249c8848b09d639dca8d7bb0"
|
||||
dependencies = [
|
||||
"futf",
|
||||
"mac",
|
||||
"utf-8",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "termcolor"
|
||||
version = "1.4.1"
|
||||
|
|
@ -5546,7 +5190,7 @@ dependencies = [
|
|||
"futures-util",
|
||||
"log",
|
||||
"tokio",
|
||||
"tungstenite 0.20.1",
|
||||
"tungstenite",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
|
@ -5754,23 +5398,6 @@ dependencies = [
|
|||
"utf-8",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tungstenite"
|
||||
version = "0.27.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "eadc29d668c91fcc564941132e17b28a7ceb2f3ebf0b9dae3e03fd7a6748eb0d"
|
||||
dependencies = [
|
||||
"bytes",
|
||||
"data-encoding",
|
||||
"http 1.3.1",
|
||||
"httparse",
|
||||
"log",
|
||||
"rand 0.9.2",
|
||||
"sha1",
|
||||
"thiserror 2.0.17",
|
||||
"utf-8",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "type1-encoding-parser"
|
||||
version = "0.1.0"
|
||||
|
|
@ -5847,23 +5474,6 @@ version = "0.9.0"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1"
|
||||
|
||||
[[package]]
|
||||
name = "ureq"
|
||||
version = "2.12.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "02d1a66277ed75f640d608235660df48c8e3c19f3b4edb6a263315626cc3c01d"
|
||||
dependencies = [
|
||||
"base64 0.22.1",
|
||||
"flate2",
|
||||
"log",
|
||||
"once_cell",
|
||||
"rustls 0.23.34",
|
||||
"rustls-pki-types",
|
||||
"socks",
|
||||
"url",
|
||||
"webpki-roots 0.26.11",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ureq"
|
||||
version = "3.1.2"
|
||||
|
|
@ -5879,7 +5489,7 @@ dependencies = [
|
|||
"rustls-pki-types",
|
||||
"ureq-proto",
|
||||
"utf-8",
|
||||
"webpki-roots 1.0.3",
|
||||
"webpki-roots",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
|
@ -6091,15 +5701,6 @@ dependencies = [
|
|||
"wasm-bindgen",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "webpki-roots"
|
||||
version = "0.26.11"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "521bc38abb08001b01866da9f51eb7c5d647a19260e00054a8c7fd5f9e57f7a9"
|
||||
dependencies = [
|
||||
"webpki-roots 1.0.3",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "webpki-roots"
|
||||
version = "1.0.3"
|
||||
|
|
@ -6145,17 +5746,6 @@ version = "0.1.10"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a751b3277700db47d3e574514de2eced5e54dc8a5436a3bf7a0b248b2cee16f3"
|
||||
|
||||
[[package]]
|
||||
name = "which"
|
||||
version = "8.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d3fabb953106c3c8eea8306e4393700d7657561cb43122571b172bbfb7c7ba1d"
|
||||
dependencies = [
|
||||
"env_home",
|
||||
"rustix",
|
||||
"winsafe",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "winapi"
|
||||
version = "0.3.9"
|
||||
|
|
@ -6577,22 +6167,6 @@ version = "0.53.1"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d6bbff5f0aada427a1e5a6da5f1f98158182f26556f345ac9e04d36d0ebed650"
|
||||
|
||||
[[package]]
|
||||
name = "winreg"
|
||||
version = "0.55.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "cb5a765337c50e9ec252c2069be9bf91c7df47afb103b642ba3a53bf8101be97"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"windows-sys 0.59.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "winsafe"
|
||||
version = "0.0.19"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d135d17ab770252ad95e9a872d365cf3090e3be864a34ab46f48555993efc904"
|
||||
|
||||
[[package]]
|
||||
name = "wit-bindgen"
|
||||
version = "0.46.0"
|
||||
|
|
|
|||
|
|
@ -40,7 +40,6 @@ repository = "https://github.com/GeneralBots/BotServer"
|
|||
default = [ "vectordb"]
|
||||
vectordb = ["qdrant-client"]
|
||||
email = ["imap"]
|
||||
web_automation = ["headless_chrome"]
|
||||
desktop = []
|
||||
|
||||
[dependencies]
|
||||
|
|
@ -65,7 +64,6 @@ downloader = "0.2"
|
|||
env_logger = "0.11"
|
||||
futures = "0.3"
|
||||
futures-util = "0.3"
|
||||
headless_chrome = { version = "1.0.18", optional = true }
|
||||
hmac = "0.12.1"
|
||||
imap = { version = "3.0.0-alpha.15", optional = true }
|
||||
include_dir = "0.7"
|
||||
|
|
@ -84,7 +82,6 @@ redis = { version = "0.27", features = ["tokio-comp"] }
|
|||
regex = "1.11"
|
||||
reqwest = { version = "0.12", features = ["json", "stream"] }
|
||||
rhai = { git = "https://github.com/therealprof/rhai.git", branch = "features/use-web-time" }
|
||||
scraper = "0.20"
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
serde_json = "1.0"
|
||||
sha2 = "0.10.9"
|
||||
|
|
|
|||
|
|
@ -1,7 +1,5 @@
|
|||
use crate::shared::models::UserSession;
|
||||
use crate::shared::state::AppState;
|
||||
#[cfg(feature = "web_automation")]
|
||||
use crate::web_automation::WebCrawler;
|
||||
use log::{error, info};
|
||||
use rhai::{Dynamic, Engine};
|
||||
use std::sync::Arc;
|
||||
|
|
@ -21,9 +19,6 @@ pub fn add_website_keyword(state: Arc<AppState>, user: UserSession, engine: &mut
|
|||
);
|
||||
|
||||
// Validate URL
|
||||
#[cfg(feature = "web_automation")]
|
||||
let is_valid = WebCrawler::is_valid_url(&url_str);
|
||||
#[cfg(not(feature = "web_automation"))]
|
||||
let is_valid = url_str.starts_with("http://") || url_str.starts_with("https://");
|
||||
|
||||
if !is_valid {
|
||||
|
|
@ -92,74 +87,5 @@ async fn crawl_and_index_website(
|
|||
url: &str,
|
||||
) -> Result<String, String> {
|
||||
info!("Crawling website: {} for user: {}", url, user.user_id);
|
||||
|
||||
// Check if web_automation feature is enabled
|
||||
#[cfg(not(feature = "web_automation"))]
|
||||
{
|
||||
return Err(
|
||||
"Web automation feature not enabled. Recompile with --features web_automation"
|
||||
.to_string(),
|
||||
);
|
||||
}
|
||||
|
||||
// Fetch website content (only compiled if feature enabled)
|
||||
#[cfg(feature = "web_automation")]
|
||||
{
|
||||
let crawler = WebCrawler::new();
|
||||
let text_content = crawler
|
||||
.crawl(url)
|
||||
.await
|
||||
.map_err(|e| format!("Failed to crawl website: {}", e))?;
|
||||
|
||||
if text_content.trim().is_empty() {
|
||||
return Err("No text content found on website".to_string());
|
||||
}
|
||||
|
||||
info!(
|
||||
"Extracted {} characters of text from website",
|
||||
text_content.len()
|
||||
);
|
||||
|
||||
// Create KB name from URL
|
||||
let kb_name = format!(
|
||||
"website_{}",
|
||||
url.replace("https://", "")
|
||||
.replace("http://", "")
|
||||
.replace('/', "_")
|
||||
.replace('.', "_")
|
||||
.chars()
|
||||
.take(50)
|
||||
.collect::<String>()
|
||||
);
|
||||
|
||||
// Create collection name for this user's website KB
|
||||
let collection_name = format!("kb_{}_{}_{}", user.bot_id, user.user_id, kb_name);
|
||||
|
||||
// Ensure collection exists in Qdrant
|
||||
crate::kb::qdrant_client::ensure_collection_exists(_state, &collection_name)
|
||||
.await
|
||||
.map_err(|e| format!("Failed to create Qdrant collection: {}", e))?;
|
||||
|
||||
// Index the content
|
||||
crate::kb::embeddings::index_document(_state, &collection_name, url, &text_content)
|
||||
.await
|
||||
.map_err(|e| format!("Failed to index document: {}", e))?;
|
||||
|
||||
// Associate KB with user (not session)
|
||||
add_website_kb_to_user(_state, user, &kb_name, url)
|
||||
.await
|
||||
.map_err(|e| format!("Failed to associate KB with user: {}", e))?;
|
||||
|
||||
info!(
|
||||
"Website indexed successfully to collection: {}",
|
||||
collection_name
|
||||
);
|
||||
|
||||
Ok(format!(
|
||||
"Website '{}' crawled and indexed successfully ({} characters)",
|
||||
url,
|
||||
text_content.len()
|
||||
))
|
||||
}
|
||||
Err("Web automation functionality has been removed from this build".to_string())
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,135 +0,0 @@
|
|||
use crate::{shared::state::AppState, shared::models::UserSession, web_automation::BrowserPool};
|
||||
use headless_chrome::browser::tab::Tab;
|
||||
use log::info;
|
||||
use rhai::{Dynamic, Engine};
|
||||
use std::error::Error;
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
use tokio::time::sleep;
|
||||
|
||||
pub fn get_website_keyword(state: &AppState, user: UserSession, engine: &mut Engine) {
|
||||
let browser_pool = state.browser_pool.clone();
|
||||
|
||||
engine
|
||||
.register_custom_syntax(
|
||||
&["WEBSITE", "OF", "$expr$"],
|
||||
false,
|
||||
move |context, inputs| {
|
||||
let search_term = context.eval_expression_tree(&inputs[0])?.to_string();
|
||||
|
||||
info!("GET WEBSITE executed - Search: '{}'", search_term);
|
||||
|
||||
let browser_pool_clone = browser_pool.clone();
|
||||
let fut = execute_headless_browser_search(browser_pool_clone, &search_term);
|
||||
|
||||
let result =
|
||||
tokio::task::block_in_place(|| tokio::runtime::Handle::current().block_on(fut))
|
||||
.map_err(|e| format!("Headless browser search failed: {}", e))?;
|
||||
|
||||
Ok(Dynamic::from(result))
|
||||
},
|
||||
)
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
pub async fn execute_headless_browser_search(
|
||||
browser_pool: Arc<BrowserPool>,
|
||||
search_term: &str,
|
||||
) -> Result<String, Box<dyn std::error::Error + Send + Sync>> {
|
||||
info!("Starting headless browser search: '{}' ", search_term);
|
||||
|
||||
let term = search_term.to_string();
|
||||
|
||||
let result = browser_pool
|
||||
.with_browser(move |tab| {
|
||||
let term = term.clone();
|
||||
Box::pin(async move { perform_search(tab, &term).await })
|
||||
})
|
||||
.await?;
|
||||
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
async fn perform_search(
|
||||
tab: Arc<Tab>,
|
||||
search_term: &str,
|
||||
) -> Result<String, Box<dyn Error + Send + Sync>> {
|
||||
tab.navigate_to("https://duckduckgo.com")
|
||||
.map_err(|e| format!("Failed to navigate: {}", e))?;
|
||||
|
||||
tab.wait_for_element("#searchbox_input")
|
||||
.map_err(|e| format!("Failed to find search box: {}", e))?;
|
||||
|
||||
let search_input = tab
|
||||
.find_element("#searchbox_input")
|
||||
.map_err(|e| format!("Failed to find search input: {}", e))?;
|
||||
|
||||
search_input
|
||||
.click()
|
||||
.map_err(|e| format!("Failed to click search input: {}", e))?;
|
||||
|
||||
search_input
|
||||
.type_into(search_term)
|
||||
.map_err(|e| format!("Failed to type into search input: {}", e))?;
|
||||
|
||||
search_input
|
||||
.press_key("Enter")
|
||||
.map_err(|e| format!("Failed to press Enter: {}", e))?;
|
||||
|
||||
sleep(Duration::from_millis(3000)).await;
|
||||
|
||||
let _ = tab.wait_for_element("[data-testid='result']");
|
||||
|
||||
let results = extract_search_results(&tab).await?;
|
||||
|
||||
if !results.is_empty() {
|
||||
Ok(results[0].clone())
|
||||
} else {
|
||||
Ok("No results found".to_string())
|
||||
}
|
||||
}
|
||||
|
||||
async fn extract_search_results(
|
||||
tab: &Arc<Tab>,
|
||||
) -> Result<Vec<String>, Box<dyn Error + Send + Sync>> {
|
||||
let mut results = Vec::new();
|
||||
|
||||
let selectors = [
|
||||
"a[data-testid='result-title-a']",
|
||||
"a[data-testid='result-extras-url-link']",
|
||||
"a.eVNpHGjtxRBq_gLOfGDr",
|
||||
"a.Rn_JXVtoPVAFyGkcaXyK",
|
||||
".ikg2IXiCD14iVX7AdZo1 a",
|
||||
".OQ_6vPwNhCeusNiEDcGp a",
|
||||
".result__a",
|
||||
"a.result-link",
|
||||
".result a[href]",
|
||||
];
|
||||
|
||||
for selector in &selectors {
|
||||
if let Ok(elements) = tab.find_elements(selector) {
|
||||
for element in elements {
|
||||
if let Ok(Some(href)) = element.get_attribute_value("href") {
|
||||
if href.starts_with("http")
|
||||
&& !href.contains("duckduckgo.com")
|
||||
&& !href.contains("duck.co")
|
||||
&& !results.contains(&href)
|
||||
{
|
||||
let display_text = element.get_inner_text().unwrap_or_default();
|
||||
|
||||
if !display_text.is_empty() && !display_text.contains("Ad") {
|
||||
results.push(href);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if !results.is_empty() {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
results.dedup();
|
||||
|
||||
Ok(results)
|
||||
}
|
||||
|
|
@ -24,6 +24,3 @@ pub mod set_context;
|
|||
|
||||
#[cfg(feature = "email")]
|
||||
pub mod create_draft_keyword;
|
||||
|
||||
#[cfg(feature = "web_automation")]
|
||||
pub mod get_website;
|
||||
|
|
|
|||
|
|
@ -34,8 +34,6 @@ use self::keywords::add_suggestion::add_suggestion_keyword;
|
|||
#[cfg(feature = "email")]
|
||||
use self::keywords::create_draft_keyword;
|
||||
|
||||
#[cfg(feature = "web_automation")]
|
||||
use self::keywords::get_website::get_website_keyword;
|
||||
|
||||
pub struct ScriptService {
|
||||
pub engine: Engine,
|
||||
|
|
@ -80,8 +78,6 @@ impl ScriptService {
|
|||
add_website_keyword(state.clone(), user.clone(), &mut engine);
|
||||
add_suggestion_keyword(state.clone(), user.clone(), &mut engine);
|
||||
|
||||
#[cfg(feature = "web_automation")]
|
||||
get_website_keyword(&state, user.clone(), &mut engine);
|
||||
|
||||
ScriptService {
|
||||
engine,
|
||||
|
|
|
|||
|
|
@ -16,8 +16,6 @@ pub mod package_manager;
|
|||
pub mod session;
|
||||
pub mod shared;
|
||||
pub mod tests;
|
||||
#[cfg(feature = "web_automation")]
|
||||
pub mod web_automation;
|
||||
pub mod web_server;
|
||||
pub mod auth;
|
||||
pub mod nvidia;
|
||||
|
|
|
|||
|
|
@ -28,8 +28,6 @@ mod package_manager;
|
|||
mod session;
|
||||
mod shared;
|
||||
pub mod tests;
|
||||
#[cfg(feature = "web_automation")]
|
||||
mod web_automation;
|
||||
mod web_server;
|
||||
mod nvidia;
|
||||
|
||||
|
|
|
|||
|
|
@ -1,227 +0,0 @@
|
|||
use log::{debug, error, info};
|
||||
use reqwest::Client;
|
||||
use scraper::{Html, Selector};
|
||||
use std::error::Error;
|
||||
use std::time::Duration;
|
||||
|
||||
/// Web crawler for extracting content from web pages
|
||||
pub struct WebCrawler {
|
||||
client: Client,
|
||||
}
|
||||
|
||||
impl WebCrawler {
|
||||
/// Create a new web crawler
|
||||
pub fn new() -> Self {
|
||||
let client = Client::builder()
|
||||
.timeout(Duration::from_secs(30))
|
||||
.connect_timeout(Duration::from_secs(10))
|
||||
.user_agent("Mozilla/5.0 (compatible; GeneralBots/1.0)")
|
||||
.build()
|
||||
.unwrap_or_else(|_| Client::new());
|
||||
|
||||
Self { client }
|
||||
}
|
||||
|
||||
/// Validate if string is a valid HTTP(S) URL
|
||||
pub fn is_valid_url(url: &str) -> bool {
|
||||
url.starts_with("http://") || url.starts_with("https://")
|
||||
}
|
||||
|
||||
/// Fetch website content via HTTP
|
||||
pub async fn fetch_content(&self, url: &str) -> Result<String, Box<dyn Error + Send + Sync>> {
|
||||
debug!("Fetching website content from: {}", url);
|
||||
|
||||
let response = self.client.get(url).send().await?;
|
||||
|
||||
if !response.status().is_success() {
|
||||
return Err(format!("HTTP request failed with status: {}", response.status()).into());
|
||||
}
|
||||
|
||||
let content_type = response
|
||||
.headers()
|
||||
.get("content-type")
|
||||
.and_then(|v| v.to_str().ok())
|
||||
.unwrap_or("");
|
||||
|
||||
if !content_type.contains("text/html") && !content_type.contains("application/xhtml") {
|
||||
return Err(format!("URL does not return HTML content: {}", content_type).into());
|
||||
}
|
||||
|
||||
let html_content = response.text().await?;
|
||||
debug!("Fetched {} bytes of HTML content", html_content.len());
|
||||
|
||||
Ok(html_content)
|
||||
}
|
||||
|
||||
/// Extract readable text from HTML
|
||||
pub fn extract_text_from_html(
|
||||
&self,
|
||||
html: &str,
|
||||
) -> Result<String, Box<dyn Error + Send + Sync>> {
|
||||
let document = Html::parse_document(html);
|
||||
|
||||
let mut text_parts = Vec::new();
|
||||
|
||||
// Extract title
|
||||
let title_selector = Selector::parse("title").unwrap();
|
||||
if let Some(title_element) = document.select(&title_selector).next() {
|
||||
let title = title_element.text().collect::<String>();
|
||||
if !title.trim().is_empty() {
|
||||
text_parts.push(format!("Title: {}\n", title.trim()));
|
||||
}
|
||||
}
|
||||
|
||||
// Extract meta description
|
||||
let meta_selector = Selector::parse("meta[name='description']").unwrap();
|
||||
if let Some(meta) = document.select(&meta_selector).next() {
|
||||
if let Some(description) = meta.value().attr("content") {
|
||||
if !description.trim().is_empty() {
|
||||
text_parts.push(format!("Description: {}\n", description.trim()));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Extract body content
|
||||
let body_selector = Selector::parse("body").unwrap();
|
||||
if let Some(body) = document.select(&body_selector).next() {
|
||||
self.extract_text_recursive(&body, &mut text_parts);
|
||||
} else {
|
||||
// Fallback: extract from entire document
|
||||
for node in document.root_element().descendants() {
|
||||
if let Some(text) = node.value().as_text() {
|
||||
let cleaned = text.trim();
|
||||
if !cleaned.is_empty() {
|
||||
text_parts.push(cleaned.to_string());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let combined_text = text_parts.join("\n");
|
||||
|
||||
// Clean up excessive whitespace
|
||||
let cleaned = combined_text
|
||||
.lines()
|
||||
.map(|line| line.trim())
|
||||
.filter(|line| !line.is_empty())
|
||||
.collect::<Vec<_>>()
|
||||
.join("\n");
|
||||
|
||||
if cleaned.is_empty() {
|
||||
return Err("Failed to extract text from HTML".into());
|
||||
}
|
||||
|
||||
Ok(cleaned)
|
||||
}
|
||||
|
||||
/// Recursively extract text from HTML element tree
|
||||
fn extract_text_recursive(&self, element: &scraper::ElementRef, text_parts: &mut Vec<String>) {
|
||||
// Skip excluded elements (script, style, etc.)
|
||||
let excluded = ["script", "style", "noscript", "iframe", "svg"];
|
||||
if excluded.contains(&element.value().name()) {
|
||||
return;
|
||||
}
|
||||
|
||||
for child in element.children() {
|
||||
if let Some(text) = child.value().as_text() {
|
||||
let cleaned = text.trim();
|
||||
if !cleaned.is_empty() {
|
||||
text_parts.push(cleaned.to_string());
|
||||
}
|
||||
} else if child.value().as_element().is_some() {
|
||||
if let Some(child_ref) = scraper::ElementRef::wrap(child) {
|
||||
self.extract_text_recursive(&child_ref, text_parts);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Crawl a URL and return extracted text
|
||||
pub async fn crawl(&self, url: &str) -> Result<String, Box<dyn Error + Send + Sync>> {
|
||||
info!("Crawling website: {}", url);
|
||||
|
||||
if !Self::is_valid_url(url) {
|
||||
return Err("Invalid URL format".into());
|
||||
}
|
||||
|
||||
let html_content = self.fetch_content(url).await?;
|
||||
let text_content = self.extract_text_from_html(&html_content)?;
|
||||
|
||||
if text_content.trim().is_empty() {
|
||||
return Err("No text content found on website".into());
|
||||
}
|
||||
|
||||
info!(
|
||||
"Successfully crawled website: {} ({} characters)",
|
||||
url,
|
||||
text_content.len()
|
||||
);
|
||||
|
||||
Ok(text_content)
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for WebCrawler {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_is_valid_url() {
|
||||
assert!(WebCrawler::is_valid_url("https://example.com"));
|
||||
assert!(WebCrawler::is_valid_url("http://example.com"));
|
||||
assert!(WebCrawler::is_valid_url("https://example.com/path?query=1"));
|
||||
|
||||
assert!(!WebCrawler::is_valid_url("ftp://example.com"));
|
||||
assert!(!WebCrawler::is_valid_url("example.com"));
|
||||
assert!(!WebCrawler::is_valid_url("//example.com"));
|
||||
assert!(!WebCrawler::is_valid_url("file:///etc/passwd"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_extract_text_from_html() {
|
||||
let crawler = WebCrawler::new();
|
||||
|
||||
let html = r#"
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<title>Test Page</title>
|
||||
<meta name="description" content="This is a test page">
|
||||
<style>body { color: red; }</style>
|
||||
<script>console.log('test');</script>
|
||||
</head>
|
||||
<body>
|
||||
<h1>Welcome</h1>
|
||||
<p>This is a paragraph.</p>
|
||||
<div>
|
||||
<span>Nested content</span>
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
||||
"#;
|
||||
|
||||
let result = crawler.extract_text_from_html(html).unwrap();
|
||||
|
||||
assert!(result.contains("Title: Test Page"));
|
||||
assert!(result.contains("Description: This is a test page"));
|
||||
assert!(result.contains("Welcome"));
|
||||
assert!(result.contains("This is a paragraph"));
|
||||
assert!(result.contains("Nested content"));
|
||||
assert!(!result.contains("console.log"));
|
||||
assert!(!result.contains("color: red"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_extract_text_empty_html() {
|
||||
let crawler = WebCrawler::new();
|
||||
let html = "<html><body></body></html>";
|
||||
let result = crawler.extract_text_from_html(html);
|
||||
assert!(result.is_err());
|
||||
}
|
||||
}
|
||||
|
|
@ -1,229 +0,0 @@
|
|||
#[cfg(feature = "web_automation")]
|
||||
|
||||
pub mod crawler;
|
||||
|
||||
use headless_chrome::browser::tab::Tab;
|
||||
use headless_chrome::{Browser, LaunchOptions};
|
||||
use std::env;
|
||||
use std::error::Error;
|
||||
use std::future::Future;
|
||||
use std::path::PathBuf;
|
||||
use std::pin::Pin;
|
||||
use std::process::Command;
|
||||
use std::sync::Arc;
|
||||
use tokio::fs;
|
||||
use tokio::sync::Semaphore;
|
||||
|
||||
use crate::shared::utils::{download_file, extract_zip_recursive};
|
||||
|
||||
pub use crawler::WebCrawler;
|
||||
|
||||
pub struct BrowserSetup {
|
||||
pub brave_path: String,
|
||||
pub chromedriver_path: String,
|
||||
}
|
||||
|
||||
pub struct BrowserPool {
|
||||
browser: Browser,
|
||||
semaphore: Semaphore,
|
||||
}
|
||||
|
||||
impl BrowserPool {
|
||||
pub async fn new(
|
||||
max_concurrent: usize,
|
||||
brave_path: String,
|
||||
) -> Result<Self, Box<dyn Error + Send + Sync>> {
|
||||
let options = LaunchOptions::default_builder()
|
||||
.path(Some(PathBuf::from(brave_path)))
|
||||
.args(vec![
|
||||
std::ffi::OsStr::new("--disable-gpu"),
|
||||
std::ffi::OsStr::new("--no-sandbox"),
|
||||
std::ffi::OsStr::new("--disable-dev-shm-usage"),
|
||||
])
|
||||
.build()
|
||||
.map_err(|e| format!("Failed to build launch options: {}", e))?;
|
||||
|
||||
let browser =
|
||||
Browser::new(options).map_err(|e| format!("Failed to launch browser: {}", e))?;
|
||||
|
||||
Ok(Self {
|
||||
browser,
|
||||
semaphore: Semaphore::new(max_concurrent),
|
||||
})
|
||||
}
|
||||
|
||||
pub async fn with_browser<F, T>(&self, f: F) -> Result<T, Box<dyn Error + Send + Sync>>
|
||||
where
|
||||
F: FnOnce(
|
||||
Arc<Tab>,
|
||||
)
|
||||
-> Pin<Box<dyn Future<Output = Result<T, Box<dyn Error + Send + Sync>>> + Send>>
|
||||
+ Send
|
||||
+ 'static,
|
||||
T: Send + 'static,
|
||||
{
|
||||
let _permit = self.semaphore.acquire().await?;
|
||||
|
||||
let tab = self
|
||||
.browser
|
||||
.new_tab()
|
||||
.map_err(|e| format!("Failed to create new tab: {}", e))?;
|
||||
|
||||
let result = f(tab.clone()).await;
|
||||
|
||||
// Close the tab when done
|
||||
let _ = tab.close(true);
|
||||
|
||||
result
|
||||
}
|
||||
}
|
||||
|
||||
impl BrowserSetup {
|
||||
pub async fn new() -> Result<Self, Box<dyn std::error::Error>> {
|
||||
let brave_path = Self::find_brave().await?;
|
||||
let chromedriver_path = Self::setup_chromedriver().await?;
|
||||
|
||||
Ok(Self {
|
||||
brave_path,
|
||||
chromedriver_path,
|
||||
})
|
||||
}
|
||||
|
||||
async fn find_brave() -> Result<String, Box<dyn std::error::Error>> {
|
||||
let mut possible_paths = vec![
|
||||
String::from(r"C:\Program Files\BraveSoftware\Brave-Browser\Application\brave.exe"),
|
||||
String::from("/Applications/Brave Browser.app/Contents/MacOS/Brave Browser"),
|
||||
String::from("/usr/bin/brave-browser"),
|
||||
String::from("/usr/bin/brave"),
|
||||
];
|
||||
|
||||
if let Ok(local_appdata) = env::var("LOCALAPPDATA") {
|
||||
let mut path = PathBuf::from(local_appdata);
|
||||
path.push("BraveSoftware\\Brave-Browser\\Application\\brave.exe");
|
||||
possible_paths.push(path.to_string_lossy().to_string());
|
||||
}
|
||||
|
||||
for path in possible_paths {
|
||||
if fs::metadata(&path).await.is_ok() {
|
||||
return Ok(path);
|
||||
}
|
||||
}
|
||||
|
||||
Err("Brave browser not found. Please install Brave first.".into())
|
||||
}
|
||||
|
||||
async fn setup_chromedriver() -> Result<String, Box<dyn std::error::Error>> {
|
||||
let mut chromedriver_dir = env::current_exe()?.parent().unwrap().to_path_buf();
|
||||
chromedriver_dir.push("chromedriver");
|
||||
|
||||
if !chromedriver_dir.exists() {
|
||||
fs::create_dir(&chromedriver_dir).await?;
|
||||
}
|
||||
|
||||
let chromedriver_path = if cfg!(target_os = "windows") {
|
||||
chromedriver_dir.join("chromedriver.exe")
|
||||
} else {
|
||||
chromedriver_dir.join("chromedriver")
|
||||
};
|
||||
|
||||
if fs::metadata(&chromedriver_path).await.is_err() {
|
||||
let (download_url, platform) = match (cfg!(target_os = "windows"), cfg!(target_arch = "x86_64")) {
|
||||
(true, true) => (
|
||||
"https://storage.googleapis.com/chrome-for-testing-public/138.0.7204.183/win64/chromedriver-win64.zip",
|
||||
"win64",
|
||||
),
|
||||
(true, false) => (
|
||||
"https://storage.googleapis.com/chrome-for-testing-public/138.0.7204.183/win32/chromedriver-win32.zip",
|
||||
"win32",
|
||||
),
|
||||
(false, true) if cfg!(target_os = "macos") && cfg!(target_arch = "aarch64") => (
|
||||
"https://storage.googleapis.com/chrome-for-testing-public/138.0.7204.183/mac-arm64/chromedriver-mac-arm64.zip",
|
||||
"mac-arm64",
|
||||
),
|
||||
(false, true) if cfg!(target_os = "macos") => (
|
||||
"https://storage.googleapis.com/chrome-for-testing-public/138.0.7204.183/mac-x64/chromedriver-mac-x64.zip",
|
||||
"mac-x64",
|
||||
),
|
||||
(false, true) => (
|
||||
"https://storage.googleapis.com/chrome-for-testing-public/138.0.7204.183/linux64/chromedriver-linux64.zip",
|
||||
"linux64",
|
||||
),
|
||||
_ => return Err("Unsupported platform".into()),
|
||||
};
|
||||
|
||||
let mut zip_path = std::env::temp_dir();
|
||||
zip_path.push("chromedriver.zip");
|
||||
|
||||
download_file(download_url, &zip_path.to_str().unwrap()).await?;
|
||||
|
||||
let mut temp_extract_dir = std::env::temp_dir();
|
||||
temp_extract_dir.push("chromedriver_extract");
|
||||
let temp_extract_dir1 = temp_extract_dir.clone();
|
||||
|
||||
let _ = fs::remove_dir_all(&temp_extract_dir).await;
|
||||
fs::create_dir(&temp_extract_dir).await?;
|
||||
|
||||
extract_zip_recursive(&zip_path, &temp_extract_dir)?;
|
||||
|
||||
let mut extracted_binary_path = temp_extract_dir;
|
||||
extracted_binary_path.push(format!("chromedriver-{}", platform));
|
||||
extracted_binary_path.push(if cfg!(target_os = "windows") {
|
||||
"chromedriver.exe"
|
||||
} else {
|
||||
"chromedriver"
|
||||
});
|
||||
|
||||
match fs::rename(&extracted_binary_path, &chromedriver_path).await {
|
||||
Ok(_) => (),
|
||||
Err(e) if e.kind() == std::io::ErrorKind::CrossesDevices => {
|
||||
fs::copy(&extracted_binary_path, &chromedriver_path).await?;
|
||||
#[cfg(unix)]
|
||||
{
|
||||
use std::os::unix::fs::PermissionsExt;
|
||||
let mut perms = fs::metadata(&chromedriver_path).await?.permissions();
|
||||
perms.set_mode(0o755);
|
||||
fs::set_permissions(&chromedriver_path, perms).await?;
|
||||
}
|
||||
}
|
||||
Err(e) => return Err(e.into()),
|
||||
}
|
||||
|
||||
let _ = fs::remove_file(&zip_path).await;
|
||||
let _ = fs::remove_dir_all(temp_extract_dir1).await;
|
||||
|
||||
#[cfg(unix)]
|
||||
{
|
||||
use std::os::unix::fs::PermissionsExt;
|
||||
let mut perms = fs::metadata(&chromedriver_path).await?.permissions();
|
||||
perms.set_mode(0o755);
|
||||
fs::set_permissions(&chromedriver_path, perms).await?;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(chromedriver_path.to_string_lossy().to_string())
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn initialize_browser_pool() -> Result<Arc<BrowserPool>, Box<dyn std::error::Error>> {
|
||||
let setup = BrowserSetup::new().await?;
|
||||
|
||||
// Note: headless_chrome doesn't use chromedriver, it uses Chrome DevTools Protocol directly
|
||||
// So we don't need to spawn chromedriver process
|
||||
|
||||
Ok(Arc::new(BrowserPool::new(5, setup.brave_path).await?))
|
||||
}
|
||||
|
||||
async fn is_process_running(name: &str) -> bool {
|
||||
if cfg!(target_os = "windows") {
|
||||
Command::new("tasklist")
|
||||
.output()
|
||||
.map(|o| String::from_utf8_lossy(&o.stdout).contains(name))
|
||||
.unwrap_or(false)
|
||||
} else {
|
||||
Command::new("pgrep")
|
||||
.arg(name)
|
||||
.output()
|
||||
.map(|o| o.status.success())
|
||||
.unwrap_or(false)
|
||||
}
|
||||
}
|
||||
|
|
@ -1,19 +0,0 @@
|
|||
//! Tests for web automation module
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::tests::test_util;
|
||||
|
||||
#[test]
|
||||
fn test_web_automation_module() {
|
||||
test_util::setup();
|
||||
assert!(true, "Basic web automation module test");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_crawler() {
|
||||
test_util::setup();
|
||||
assert!(true, "Web crawler placeholder test");
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Reference in a new issue