diff --git a/Cargo.lock b/Cargo.lock index 423892a8..4230015e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1045,8 +1045,10 @@ dependencies = [ "regex", "reqwest 0.12.23", "rhai", + "scraper", "serde", "serde_json", + "sha2", "smartstring", "tempfile", "time", @@ -1505,6 +1507,29 @@ dependencies = [ "typenum", ] +[[package]] +name = "cssparser" +version = "0.31.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b3df4f93e5fbbe73ec01ec8d3f68bba73107993a5b1e7519273c32db9b0d5be" +dependencies = [ + "cssparser-macros", + "dtoa-short", + "itoa", + "phf 0.11.3", + "smallvec", +] + +[[package]] +name = "cssparser-macros" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13b588ba4ac1a99f7f2964d24b3d896ddc6bf847ee3855dbd4366f058cfcd331" +dependencies = [ + "quote", + "syn", +] + [[package]] name = "ctr" version = "0.9.2" @@ -1854,6 +1879,21 @@ dependencies = [ "syn", ] +[[package]] +name = "dtoa" +version = "1.0.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d6add3b8cff394282be81f3fc1a0605db594ed69890078ca6e2cab1c408bcf04" + +[[package]] +name = "dtoa-short" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cd1511a7b6a56299bd043a9c167a6d2bfb37bf84a6dfceaba651168adfb43c87" +dependencies = [ + "dtoa", +] + [[package]] name = "dunce" version = "1.0.5" @@ -1881,6 +1921,12 @@ dependencies = [ "signature", ] +[[package]] +name = "ego-tree" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "12a0bb14ac04a9fcf170d0bbbef949b44cc492f4452bd20c095636956f653642" + [[package]] name = "either" version = "1.15.0" @@ -2082,6 +2128,16 @@ version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c" +[[package]] +name = "futf" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df420e2e84819663797d1ec6544b13c5be84629e7bb00dc960d6917db2987843" +dependencies = [ + "mac", + "new_debug_unreachable", +] + [[package]] name = "futures" version = "0.3.31" @@ -2171,6 +2227,15 @@ dependencies = [ "slab", ] +[[package]] +name = "fxhash" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c31b6d751ae2c7f11320402d34e41349dd1016f8d5d45e48c4312bc8625af50c" +dependencies = [ + "byteorder", +] + [[package]] name = "generic-array" version = "0.14.7" @@ -2181,6 +2246,15 @@ dependencies = [ "version_check", ] +[[package]] +name = "getopts" +version = "0.2.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cfe4fbac503b8d1f88e6676011885f34b7174f46e59956bba534ba83abded4df" +dependencies = [ + "unicode-width", +] + [[package]] name = "getrandom" version = "0.2.16" @@ -2373,6 +2447,20 @@ dependencies = [ "windows-link 0.1.3", ] +[[package]] +name = "html5ever" +version = "0.27.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c13771afe0e6e846f1e67d038d4cb29998a6779f93c809212e4e9c32efd244d4" +dependencies = [ + "log", + "mac", + "markup5ever", + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "http" version = "0.2.12" @@ -3209,6 +3297,12 @@ dependencies = [ "pkg-config", ] +[[package]] +name = "mac" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c41e0c4fef86961ac6d6f8a82609f55f31b05e4fce149ac5710e439df7619ba4" + [[package]] name = "mailparse" version = "0.15.0" @@ -3220,6 +3314,20 @@ dependencies = [ "quoted_printable", ] +[[package]] +name = "markup5ever" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "16ce3abbeba692c8b8441d036ef91aea6df8da2c6b6e21c7e14d3c18e526be45" +dependencies = [ + "log", + "phf 0.11.3", + "phf_codegen 0.11.3", + "string_cache", + "string_cache_codegen", + "tendril", +] + [[package]] name = "matchit" version = "0.7.3" @@ -3299,6 +3407,12 @@ dependencies = [ "tempfile", ] +[[package]] +name = "new_debug_unreachable" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "650eef8c711430f1a879fdd01d4745a7deea475becfb90269c06775983bbf086" + [[package]] name = "nom" version = "7.1.3" @@ -3640,6 +3754,96 @@ dependencies = [ "indexmap 2.11.4", ] +[[package]] +name = "phf" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fabbf1ead8a5bcbc20f5f8b939ee3f5b0f6f281b6ad3468b84656b658b455259" +dependencies = [ + "phf_shared 0.10.0", +] + +[[package]] +name = "phf" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fd6780a80ae0c52cc120a26a1a42c1ae51b247a253e4e06113d23d2c2edd078" +dependencies = [ + "phf_macros", + "phf_shared 0.11.3", +] + +[[package]] +name = "phf_codegen" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4fb1c3a8bc4dd4e5cfce29b44ffc14bedd2ee294559a294e2a4d4c9e9a6a13cd" +dependencies = [ + "phf_generator 0.10.0", + "phf_shared 0.10.0", +] + +[[package]] +name = "phf_codegen" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aef8048c789fa5e851558d709946d6d79a8ff88c0440c587967f8e94bfb1216a" +dependencies = [ + "phf_generator 0.11.3", + "phf_shared 0.11.3", +] + +[[package]] +name = "phf_generator" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d5285893bb5eb82e6aaf5d59ee909a06a16737a8970984dd7746ba9283498d6" +dependencies = [ + "phf_shared 0.10.0", + "rand 0.8.5", +] + +[[package]] +name = "phf_generator" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3c80231409c20246a13fddb31776fb942c38553c51e871f8cbd687a4cfb5843d" +dependencies = [ + "phf_shared 0.11.3", + "rand 0.8.5", +] + +[[package]] +name = "phf_macros" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f84ac04429c13a7ff43785d75ad27569f2951ce0ffd30a3321230db2fc727216" +dependencies = [ + "phf_generator 0.11.3", + "phf_shared 0.11.3", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "phf_shared" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6796ad771acdc0123d2a88dc428b5e38ef24456743ddb1744ed628f9815c096" +dependencies = [ + "siphasher 0.3.11", +] + +[[package]] +name = "phf_shared" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67eabc2ef2a60eb7faa00097bd1ffdb5bd28e62bf39990626a582201b7a754e5" +dependencies = [ + "siphasher 1.0.1", +] + [[package]] name = "pin-project" version = "1.1.10" @@ -3762,6 +3966,12 @@ dependencies = [ "vcpkg", ] +[[package]] +name = "precomputed-hash" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c" + [[package]] name = "prettyplease" version = "0.2.37" @@ -4430,6 +4640,22 @@ version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" +[[package]] +name = "scraper" +version = "0.20.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b90460b31bfe1fc07be8262e42c665ad97118d4585869de9345a84d501a9eaf0" +dependencies = [ + "ahash", + "cssparser", + "ego-tree", + "getopts", + "html5ever", + "once_cell", + "selectors", + "tendril", +] + [[package]] name = "scratch" version = "1.0.9" @@ -4496,6 +4722,25 @@ dependencies = [ "libc", ] +[[package]] +name = "selectors" +version = "0.25.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4eb30575f3638fc8f6815f448d50cb1a2e255b0897985c8c59f4d37b72a07b06" +dependencies = [ + "bitflags 2.9.4", + "cssparser", + "derive_more 0.99.20", + "fxhash", + "log", + "new_debug_unreachable", + "phf 0.10.1", + "phf_codegen 0.10.0", + "precomputed-hash", + "servo_arc", + "smallvec", +] + [[package]] name = "semver" version = "1.0.27" @@ -4566,6 +4811,15 @@ dependencies = [ "serde", ] +[[package]] +name = "servo_arc" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d036d71a959e00c77a63538b90a6c2390969f9772b096ea837205c6bd0491a44" +dependencies = [ + "stable_deref_trait", +] + [[package]] name = "sha1" version = "0.10.6" @@ -4634,6 +4888,18 @@ version = "0.3.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d66dc143e6b11c1eddc06d5c423cfc97062865baf299914ab64caa38182078fe" +[[package]] +name = "siphasher" +version = "0.3.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38b58827f4464d87d377d175e90bf58eb00fd8716ff0a62f80356b5e61555d0d" + +[[package]] +name = "siphasher" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56199f7ddabf13fe5074ce809e7d3f42b42ae711800501b5b16ea82ad029c39d" + [[package]] name = "slab" version = "0.4.11" @@ -4723,6 +4989,31 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" +[[package]] +name = "string_cache" +version = "0.8.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bf776ba3fa74f83bf4b63c3dcbbf82173db2632ed8452cb2d891d33f459de70f" +dependencies = [ + "new_debug_unreachable", + "parking_lot", + "phf_shared 0.11.3", + "precomputed-hash", + "serde", +] + +[[package]] +name = "string_cache_codegen" +version = "0.5.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c711928715f1fe0fe509c53b43e993a9a557babc2d0a3567d0a3006f1ac931a0" +dependencies = [ + "phf_generator 0.11.3", + "phf_shared 0.11.3", + "proc-macro2", + "quote", +] + [[package]] name = "stringprep" version = "0.1.5" @@ -4838,6 +5129,17 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "tendril" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d24a120c5fc464a3458240ee02c299ebcb9d67b5249c8848b09d639dca8d7bb0" +dependencies = [ + "futf", + "mac", + "utf-8", +] + [[package]] name = "termcolor" version = "1.4.1" diff --git a/Cargo.toml b/Cargo.toml index 907551cf..4a3eda43 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "botserver" -version = "6.0.1" +version = "6.0.4" edition = "2021" authors = [ "@AlanPerdomo", @@ -89,3 +89,5 @@ aws-sdk-s3 = "1.108.0" headless_chrome = { version = "1.0.18", optional = true } rand = "0.9.2" pdf-extract = "0.10.0" +scraper = "0.20" +sha2 = "0.10.9" diff --git a/scripts/database/6.0.0.sql b/scripts/database/6.0.0.sql deleted file mode 100644 index cd3f4839..00000000 --- a/scripts/database/6.0.0.sql +++ /dev/null @@ -1,241 +0,0 @@ - -CREATE TABLE public.bots ( - id uuid DEFAULT gen_random_uuid() NOT NULL, - "name" varchar(255) NOT NULL, - description text NULL, - llm_provider varchar(100) NOT NULL, - llm_config jsonb DEFAULT '{}'::jsonb NOT NULL, - context_provider varchar(100) NOT NULL, - context_config jsonb DEFAULT '{}'::jsonb NOT NULL, - created_at timestamptz DEFAULT now() NOT NULL, - updated_at timestamptz DEFAULT now() NOT NULL, - is_active bool DEFAULT true NULL, - CONSTRAINT bots_pkey PRIMARY KEY (id) -); - - --- public.clicks definition - --- Drop table - --- DROP TABLE public.clicks; - -CREATE TABLE public.clicks ( - campaign_id text NOT NULL, - email text NOT NULL, - updated_at timestamptz DEFAULT now() NULL, - CONSTRAINT clicks_campaign_id_email_key UNIQUE (campaign_id, email) -); - - --- public.organizations definition - --- Drop table - --- DROP TABLE public.organizations; - -CREATE TABLE public.organizations ( - org_id uuid DEFAULT gen_random_uuid() NOT NULL, - "name" varchar(255) NOT NULL, - slug varchar(255) NOT NULL, - created_at timestamptz DEFAULT now() NOT NULL, - updated_at timestamptz DEFAULT now() NOT NULL, - CONSTRAINT organizations_pkey PRIMARY KEY (org_id), - CONSTRAINT organizations_slug_key UNIQUE (slug) -); -CREATE INDEX idx_organizations_created_at ON public.organizations USING btree (created_at); -CREATE INDEX idx_organizations_slug ON public.organizations USING btree (slug); - - --- public.system_automations definition - --- Drop table - --- DROP TABLE public.system_automations; - -CREATE TABLE public.system_automations ( - id uuid DEFAULT gen_random_uuid() NOT NULL, - kind int4 NOT NULL, - "target" varchar(32) NULL, - schedule bpchar(12) NULL, - param varchar(32) NOT NULL, - is_active bool DEFAULT true NOT NULL, - last_triggered timestamptz NULL, - created_at timestamptz DEFAULT now() NOT NULL, - CONSTRAINT system_automations_pkey PRIMARY KEY (id) -); -CREATE INDEX idx_system_automations_active ON public.system_automations USING btree (kind) WHERE is_active; - - --- public.tools definition - --- Drop table - --- DROP TABLE public.tools; - -CREATE TABLE public.tools ( - id uuid DEFAULT gen_random_uuid() NOT NULL, - "name" varchar(255) NOT NULL, - description text NOT NULL, - parameters jsonb DEFAULT '{}'::jsonb NOT NULL, - script text NOT NULL, - is_active bool DEFAULT true NULL, - created_at timestamptz DEFAULT now() NOT NULL, - CONSTRAINT tools_name_key UNIQUE (name), - CONSTRAINT tools_pkey PRIMARY KEY (id) -); - - --- public.users definition - --- Drop table - --- DROP TABLE public.users; - -CREATE TABLE public.users ( - id uuid DEFAULT gen_random_uuid() NOT NULL, - username varchar(255) NOT NULL, - email varchar(255) NOT NULL, - password_hash varchar(255) NOT NULL, - phone_number varchar(50) NULL, - created_at timestamptz DEFAULT now() NOT NULL, - updated_at timestamptz DEFAULT now() NOT NULL, - is_active bool DEFAULT true NULL, - CONSTRAINT users_email_key UNIQUE (email), - CONSTRAINT users_pkey PRIMARY KEY (id), - CONSTRAINT users_username_key UNIQUE (username) -); - - --- public.bot_channels definition - --- Drop table - --- DROP TABLE public.bot_channels; - -CREATE TABLE public.bot_channels ( - id uuid DEFAULT gen_random_uuid() NOT NULL, - bot_id uuid NOT NULL, - channel_type int4 NOT NULL, - config jsonb DEFAULT '{}'::jsonb NOT NULL, - is_active bool DEFAULT true NULL, - created_at timestamptz DEFAULT now() NOT NULL, - CONSTRAINT bot_channels_bot_id_channel_type_key UNIQUE (bot_id, channel_type), - CONSTRAINT bot_channels_pkey PRIMARY KEY (id), - CONSTRAINT bot_channels_bot_id_fkey FOREIGN KEY (bot_id) REFERENCES public.bots(id) ON DELETE CASCADE -); -CREATE INDEX idx_bot_channels_type ON public.bot_channels USING btree (channel_type) WHERE is_active; - - --- public.user_sessions definition - --- Drop table - --- DROP TABLE public.user_sessions; - -CREATE TABLE public.user_sessions ( - id uuid DEFAULT gen_random_uuid() NOT NULL, - user_id uuid NOT NULL, - bot_id uuid NOT NULL, - title varchar(500) DEFAULT 'New Conversation'::character varying NOT NULL, - answer_mode int4 DEFAULT 0 NOT NULL, - context_data jsonb DEFAULT '{}'::jsonb NOT NULL, - current_tool varchar(255) NULL, - message_count int4 DEFAULT 0 NOT NULL, - total_tokens int4 DEFAULT 0 NOT NULL, - created_at timestamptz DEFAULT now() NOT NULL, - updated_at timestamptz DEFAULT now() NOT NULL, - last_activity timestamptz DEFAULT now() NOT NULL, - CONSTRAINT user_sessions_pkey PRIMARY KEY (id), - CONSTRAINT user_sessions_bot_id_fkey FOREIGN KEY (bot_id) REFERENCES public.bots(id) ON DELETE CASCADE, - CONSTRAINT user_sessions_user_id_fkey FOREIGN KEY (user_id) REFERENCES public.users(id) ON DELETE CASCADE -); -CREATE INDEX idx_user_sessions_updated_at ON public.user_sessions USING btree (updated_at); -CREATE INDEX idx_user_sessions_user_bot ON public.user_sessions USING btree (user_id, bot_id); - - --- public.whatsapp_numbers definition - --- Drop table - --- DROP TABLE public.whatsapp_numbers; - -CREATE TABLE public.whatsapp_numbers ( - id uuid DEFAULT gen_random_uuid() NOT NULL, - bot_id uuid NOT NULL, - phone_number varchar(50) NOT NULL, - is_active bool DEFAULT true NULL, - created_at timestamptz DEFAULT now() NOT NULL, - CONSTRAINT whatsapp_numbers_phone_number_bot_id_key UNIQUE (phone_number, bot_id), - CONSTRAINT whatsapp_numbers_pkey PRIMARY KEY (id), - CONSTRAINT whatsapp_numbers_bot_id_fkey FOREIGN KEY (bot_id) REFERENCES public.bots(id) ON DELETE CASCADE -); - - --- public.context_injections definition - --- Drop table - --- DROP TABLE public.context_injections; - -CREATE TABLE public.context_injections ( - id uuid DEFAULT gen_random_uuid() NOT NULL, - session_id uuid NOT NULL, - injected_by uuid NOT NULL, - context_data jsonb NOT NULL, - reason text NULL, - created_at timestamptz DEFAULT now() NOT NULL, - CONSTRAINT context_injections_pkey PRIMARY KEY (id), - CONSTRAINT context_injections_injected_by_fkey FOREIGN KEY (injected_by) REFERENCES public.users(id) ON DELETE CASCADE, - CONSTRAINT context_injections_session_id_fkey FOREIGN KEY (session_id) REFERENCES public.user_sessions(id) ON DELETE CASCADE -); - - --- public.message_history definition - --- Drop table - --- DROP TABLE public.message_history; - -CREATE TABLE public.message_history ( - id uuid DEFAULT gen_random_uuid() NOT NULL, - session_id uuid NOT NULL, - user_id uuid NOT NULL, - "role" int4 NOT NULL, - content_encrypted text NOT NULL, - message_type int4 DEFAULT 0 NOT NULL, - media_url text NULL, - token_count int4 DEFAULT 0 NOT NULL, - processing_time_ms int4 NULL, - llm_model varchar(100) NULL, - created_at timestamptz DEFAULT now() NOT NULL, - message_index int4 NOT NULL, - CONSTRAINT message_history_pkey PRIMARY KEY (id), - CONSTRAINT message_history_session_id_fkey FOREIGN KEY (session_id) REFERENCES public.user_sessions(id) ON DELETE CASCADE, - CONSTRAINT message_history_user_id_fkey FOREIGN KEY (user_id) REFERENCES public.users(id) ON DELETE CASCADE -); -CREATE INDEX idx_message_history_created_at ON public.message_history USING btree (created_at); -CREATE INDEX idx_message_history_session_id ON public.message_history USING btree (session_id); - - --- public.usage_analytics definition - --- Drop table - --- DROP TABLE public.usage_analytics; - -CREATE TABLE public.usage_analytics ( - id uuid DEFAULT gen_random_uuid() NOT NULL, - user_id uuid NOT NULL, - bot_id uuid NOT NULL, - session_id uuid NOT NULL, - "date" date DEFAULT CURRENT_DATE NOT NULL, - message_count int4 DEFAULT 0 NOT NULL, - total_tokens int4 DEFAULT 0 NOT NULL, - total_processing_time_ms int4 DEFAULT 0 NOT NULL, - CONSTRAINT usage_analytics_pkey PRIMARY KEY (id), - CONSTRAINT usage_analytics_bot_id_fkey FOREIGN KEY (bot_id) REFERENCES public.bots(id) ON DELETE CASCADE, - CONSTRAINT usage_analytics_session_id_fkey FOREIGN KEY (session_id) REFERENCES public.user_sessions(id) ON DELETE CASCADE, - CONSTRAINT usage_analytics_user_id_fkey FOREIGN KEY (user_id) REFERENCES public.users(id) ON DELETE CASCADE -); -CREATE INDEX idx_usage_analytics_date ON public.usage_analytics USING btree (date); diff --git a/scripts/database/6.0.1.sql b/scripts/database/6.0.1.sql deleted file mode 100644 index 20d9d1e1..00000000 --- a/scripts/database/6.0.1.sql +++ /dev/null @@ -1,13 +0,0 @@ - -CREATE TABLE bot_memories ( - id UUID PRIMARY KEY DEFAULT gen_random_uuid(), - bot_id UUID NOT NULL REFERENCES bots(id) ON DELETE CASCADE, - key TEXT NOT NULL, - value TEXT NOT NULL, - created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), - updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), - UNIQUE(bot_id, key) -); - -CREATE INDEX idx_bot_memories_bot_id ON bot_memories(bot_id); -CREATE INDEX idx_bot_memories_key ON bot_memories(key); diff --git a/src/basic/keywords/mod.rs b/src/basic/keywords/mod.rs index 34dd3fcf..7474d67f 100644 --- a/src/basic/keywords/mod.rs +++ b/src/basic/keywords/mod.rs @@ -1,4 +1,7 @@ +pub mod add_tool; +pub mod add_website; pub mod bot_memory; +pub mod clear_tools; pub mod create_site; pub mod find; pub mod first; @@ -7,10 +10,13 @@ pub mod format; pub mod get; pub mod hear_talk; pub mod last; +pub mod list_tools; pub mod llm_keyword; pub mod on; pub mod print; +pub mod remove_tool; pub mod set; +pub mod set_kb; pub mod set_schedule; pub mod wait; diff --git a/src/basic/mod.rs b/src/basic/mod.rs index bdfe10bf..ffe61b58 100644 --- a/src/basic/mod.rs +++ b/src/basic/mod.rs @@ -4,9 +4,13 @@ use log::info; use rhai::{Dynamic, Engine, EvalAltResult}; use std::sync::Arc; +pub mod compiler; pub mod keywords; +use self::keywords::add_tool::add_tool_keyword; +use self::keywords::add_website::add_website_keyword; use self::keywords::bot_memory::{get_bot_memory_keyword, set_bot_memory_keyword}; +use self::keywords::clear_tools::clear_tools_keyword; use self::keywords::create_site::create_site_keyword; use self::keywords::find::find_keyword; use self::keywords::first::first_keyword; @@ -17,10 +21,13 @@ use self::keywords::hear_talk::{ hear_keyword, set_context_keyword, set_user_keyword, talk_keyword, }; use self::keywords::last::last_keyword; +use self::keywords::list_tools::list_tools_keyword; use self::keywords::llm_keyword::llm_keyword; use self::keywords::on::on_keyword; use self::keywords::print::print_keyword; +use self::keywords::remove_tool::remove_tool_keyword; use self::keywords::set::set_keyword; +use self::keywords::set_kb::{add_kb_keyword, set_kb_keyword}; use self::keywords::set_schedule::set_schedule_keyword; use self::keywords::wait::wait_keyword; @@ -66,6 +73,15 @@ impl ScriptService { set_context_keyword(&state, user.clone(), &mut engine); set_user_keyword(state.clone(), user.clone(), &mut engine); + // KB and Tools keywords + set_kb_keyword(state.clone(), user.clone(), &mut engine); + add_kb_keyword(state.clone(), user.clone(), &mut engine); + add_tool_keyword(state.clone(), user.clone(), &mut engine); + remove_tool_keyword(state.clone(), user.clone(), &mut engine); + clear_tools_keyword(state.clone(), user.clone(), &mut engine); + list_tools_keyword(state.clone(), user.clone(), &mut engine); + add_website_keyword(state.clone(), user.clone(), &mut engine); + #[cfg(feature = "web_automation")] get_website_keyword(&state, user.clone(), &mut engine); diff --git a/src/config/mod.rs b/src/config/mod.rs index 7f6892b6..dd0626e3 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -1,5 +1,12 @@ -use std::env; +use diesel::prelude::*; +use diesel::sql_types::Text; +use log::{info, warn}; +use serde::{Deserialize, Serialize}; +use std::collections::HashMap; +use std::path::PathBuf; +use std::sync::{Arc, Mutex}; +/// Application configuration - reads from database instead of .env #[derive(Clone)] pub struct AppConfig { pub minio: DriveConfig, @@ -10,6 +17,8 @@ pub struct AppConfig { pub ai: AIConfig, pub site_path: String, pub s3_bucket: String, + pub stack_path: PathBuf, + db_conn: Option>>, } #[derive(Clone)] @@ -53,6 +62,20 @@ pub struct AIConfig { pub endpoint: String, } +#[derive(Debug, Clone, Serialize, Deserialize, QueryableByName)] +pub struct ServerConfigRow { + #[diesel(sql_type = Text)] + pub id: String, + #[diesel(sql_type = Text)] + pub config_key: String, + #[diesel(sql_type = Text)] + pub config_value: String, + #[diesel(sql_type = Text)] + pub config_type: String, + #[diesel(sql_type = diesel::sql_types::Bool)] + pub is_encrypted: bool, +} + impl AppConfig { pub fn database_url(&self) -> String { format!( @@ -76,64 +99,210 @@ impl AppConfig { ) } - pub fn from_env() -> Self { + /// Get stack path for a specific component + pub fn component_path(&self, component: &str) -> PathBuf { + self.stack_path.join(component) + } + + /// Get binary path for a component + pub fn bin_path(&self, component: &str) -> PathBuf { + self.stack_path.join("bin").join(component) + } + + /// Get data path for a component + pub fn data_path(&self, component: &str) -> PathBuf { + self.stack_path.join("data").join(component) + } + + /// Get config path for a component + pub fn config_path(&self, component: &str) -> PathBuf { + self.stack_path.join("conf").join(component) + } + + /// Get log path for a component + pub fn log_path(&self, component: &str) -> PathBuf { + self.stack_path.join("logs").join(component) + } + + /// Load configuration from database + /// Falls back to defaults if database is not yet initialized + pub fn from_database(conn: &mut PgConnection) -> Self { + info!("Loading configuration from database..."); + + // Load all configuration from database + let config_map = match Self::load_config_from_db(conn) { + Ok(map) => { + info!( + "Successfully loaded {} config values from database", + map.len() + ); + map + } + Err(e) => { + warn!( + "Failed to load config from database: {}. Using defaults.", + e + ); + HashMap::new() + } + }; + + // Helper to get config value with fallback + let get_str = |key: &str, default: &str| -> String { + config_map + .get(key) + .map(|v| v.config_value.clone()) + .unwrap_or_else(|| default.to_string()) + }; + + let get_u32 = |key: &str, default: u32| -> u32 { + config_map + .get(key) + .and_then(|v| v.config_value.parse().ok()) + .unwrap_or(default) + }; + + let get_u16 = |key: &str, default: u16| -> u16 { + config_map + .get(key) + .and_then(|v| v.config_value.parse().ok()) + .unwrap_or(default) + }; + + let get_bool = |key: &str, default: bool| -> bool { + config_map + .get(key) + .map(|v| v.config_value.to_lowercase() == "true") + .unwrap_or(default) + }; + + let stack_path = PathBuf::from(get_str("STACK_PATH", "./botserver-stack")); + let database = DatabaseConfig { - username: env::var("TABLES_USERNAME").unwrap(), - password: env::var("TABLES_PASSWORD").unwrap_or_else(|_| "pass".to_string()), - server: env::var("TABLES_SERVER").unwrap_or_else(|_| "localhost".to_string()), - port: env::var("TABLES_PORT") - .ok() - .and_then(|p| p.parse().ok()) - .unwrap_or(5432), - database: env::var("TABLES_DATABASE").unwrap_or_else(|_| "db".to_string()), + username: get_str("TABLES_USERNAME", "botserver"), + password: get_str("TABLES_PASSWORD", "botserver"), + server: get_str("TABLES_SERVER", "localhost"), + port: get_u32("TABLES_PORT", 5432), + database: get_str("TABLES_DATABASE", "botserver"), }; let database_custom = DatabaseConfig { - username: env::var("CUSTOM_USERNAME").unwrap_or_else(|_| "user".to_string()), - password: env::var("CUSTOM_PASSWORD").unwrap_or_else(|_| "pass".to_string()), - server: env::var("CUSTOM_SERVER").unwrap_or_else(|_| "localhost".to_string()), - port: env::var("CUSTOM_PORT") - .ok() - .and_then(|p| p.parse().ok()) - .unwrap_or(5432), - database: env::var("CUSTOM_DATABASE").unwrap_or_else(|_| "db".to_string()), + username: get_str("CUSTOM_USERNAME", "user"), + password: get_str("CUSTOM_PASSWORD", "pass"), + server: get_str("CUSTOM_SERVER", "localhost"), + port: get_u32("CUSTOM_PORT", 5432), + database: get_str("CUSTOM_DATABASE", "custom"), }; let minio = DriveConfig { - server: env::var("DRIVE_SERVER").unwrap_or_else(|_| "localhost:9000".to_string()), - access_key: env::var("DRIVE_ACCESSKEY").unwrap_or_else(|_| "minioadmin".to_string()), - secret_key: env::var("DRIVE_SECRET").unwrap_or_else(|_| "minioadmin".to_string()), - use_ssl: env::var("DRIVE_USE_SSL") - .unwrap_or_else(|_| "false".to_string()) - .parse() - .unwrap_or(false), - org_prefix: env::var("DRIVE_ORG_PREFIX").unwrap_or_else(|_| "botserver".to_string()), + server: get_str("DRIVE_SERVER", "localhost:9000"), + access_key: get_str("DRIVE_ACCESSKEY", "minioadmin"), + secret_key: get_str("DRIVE_SECRET", "minioadmin"), + use_ssl: get_bool("DRIVE_USE_SSL", false), + org_prefix: get_str("DRIVE_ORG_PREFIX", "botserver"), }; let email = EmailConfig { - from: env::var("EMAIL_FROM").unwrap_or_else(|_| "noreply@example.com".to_string()), - server: env::var("EMAIL_SERVER").unwrap_or_else(|_| "smtp.example.com".to_string()), - port: env::var("EMAIL_PORT") - .unwrap_or_else(|_| "587".to_string()) - .parse() - .unwrap_or(587), - username: env::var("EMAIL_USER").unwrap_or_else(|_| "user".to_string()), - password: env::var("EMAIL_PASS").unwrap_or_else(|_| "pass".to_string()), + from: get_str("EMAIL_FROM", "noreply@example.com"), + server: get_str("EMAIL_SERVER", "smtp.example.com"), + port: get_u16("EMAIL_PORT", 587), + username: get_str("EMAIL_USER", "user"), + password: get_str("EMAIL_PASS", "pass"), }; let ai = AIConfig { - instance: env::var("AI_INSTANCE").unwrap_or_else(|_| "gpt-4".to_string()), - key: env::var("AI_KEY").unwrap_or_else(|_| "key".to_string()), - version: env::var("AI_VERSION").unwrap_or_else(|_| "2023-12-01-preview".to_string()), - endpoint: env::var("AI_ENDPOINT") + instance: get_str("AI_INSTANCE", "gpt-4"), + key: get_str("AI_KEY", ""), + version: get_str("AI_VERSION", "2023-12-01-preview"), + endpoint: get_str("AI_ENDPOINT", "https://api.openai.com"), + }; + + AppConfig { + minio, + server: ServerConfig { + host: get_str("SERVER_HOST", "127.0.0.1"), + port: get_u16("SERVER_PORT", 8080), + }, + database, + database_custom, + email, + ai, + s3_bucket: get_str("DRIVE_BUCKET", "default"), + site_path: get_str("SITES_ROOT", "./botserver-stack/sites"), + stack_path, + db_conn: None, + } + } + + /// Legacy method - reads from .env for backward compatibility + /// Will be deprecated once database setup is complete + pub fn from_env() -> Self { + warn!("Loading configuration from environment variables (legacy mode)"); + + let stack_path = + std::env::var("STACK_PATH").unwrap_or_else(|_| "./botserver-stack".to_string()); + + let database = DatabaseConfig { + username: std::env::var("TABLES_USERNAME").unwrap_or_else(|_| "botserver".to_string()), + password: std::env::var("TABLES_PASSWORD").unwrap_or_else(|_| "botserver".to_string()), + server: std::env::var("TABLES_SERVER").unwrap_or_else(|_| "localhost".to_string()), + port: std::env::var("TABLES_PORT") + .ok() + .and_then(|p| p.parse().ok()) + .unwrap_or(5432), + database: std::env::var("TABLES_DATABASE").unwrap_or_else(|_| "botserver".to_string()), + }; + + let database_custom = DatabaseConfig { + username: std::env::var("CUSTOM_USERNAME").unwrap_or_else(|_| "user".to_string()), + password: std::env::var("CUSTOM_PASSWORD").unwrap_or_else(|_| "pass".to_string()), + server: std::env::var("CUSTOM_SERVER").unwrap_or_else(|_| "localhost".to_string()), + port: std::env::var("CUSTOM_PORT") + .ok() + .and_then(|p| p.parse().ok()) + .unwrap_or(5432), + database: std::env::var("CUSTOM_DATABASE").unwrap_or_else(|_| "custom".to_string()), + }; + + let minio = DriveConfig { + server: std::env::var("DRIVE_SERVER").unwrap_or_else(|_| "localhost:9000".to_string()), + access_key: std::env::var("DRIVE_ACCESSKEY") + .unwrap_or_else(|_| "minioadmin".to_string()), + secret_key: std::env::var("DRIVE_SECRET").unwrap_or_else(|_| "minioadmin".to_string()), + use_ssl: std::env::var("DRIVE_USE_SSL") + .unwrap_or_else(|_| "false".to_string()) + .parse() + .unwrap_or(false), + org_prefix: std::env::var("DRIVE_ORG_PREFIX") + .unwrap_or_else(|_| "botserver".to_string()), + }; + + let email = EmailConfig { + from: std::env::var("EMAIL_FROM").unwrap_or_else(|_| "noreply@example.com".to_string()), + server: std::env::var("EMAIL_SERVER") + .unwrap_or_else(|_| "smtp.example.com".to_string()), + port: std::env::var("EMAIL_PORT") + .unwrap_or_else(|_| "587".to_string()) + .parse() + .unwrap_or(587), + username: std::env::var("EMAIL_USER").unwrap_or_else(|_| "user".to_string()), + password: std::env::var("EMAIL_PASS").unwrap_or_else(|_| "pass".to_string()), + }; + + let ai = AIConfig { + instance: std::env::var("AI_INSTANCE").unwrap_or_else(|_| "gpt-4".to_string()), + key: std::env::var("AI_KEY").unwrap_or_else(|_| "".to_string()), + version: std::env::var("AI_VERSION") + .unwrap_or_else(|_| "2023-12-01-preview".to_string()), + endpoint: std::env::var("AI_ENDPOINT") .unwrap_or_else(|_| "https://api.openai.com".to_string()), }; AppConfig { minio, server: ServerConfig { - host: env::var("SERVER_HOST").unwrap_or_else(|_| "127.0.0.1".to_string()), - port: env::var("SERVER_PORT") + host: std::env::var("SERVER_HOST").unwrap_or_else(|_| "127.0.0.1".to_string()), + port: std::env::var("SERVER_PORT") .ok() .and_then(|p| p.parse().ok()) .unwrap_or(8080), @@ -142,9 +311,175 @@ impl AppConfig { database_custom, email, ai, - s3_bucket: env::var("DRIVE_BUCKET").unwrap_or_else(|_| "default".to_string()), - - site_path: env::var("SITES_ROOT").unwrap_or_else(|_| "./sites".to_string()), + s3_bucket: std::env::var("DRIVE_BUCKET").unwrap_or_else(|_| "default".to_string()), + site_path: std::env::var("SITES_ROOT") + .unwrap_or_else(|_| "./botserver-stack/sites".to_string()), + stack_path: PathBuf::from(stack_path), + db_conn: None, } } + + /// Load all configuration from database into a HashMap + fn load_config_from_db( + conn: &mut PgConnection, + ) -> Result, diesel::result::Error> { + // Try to query the server_configuration table + let results = diesel::sql_query( + "SELECT id, config_key, config_value, config_type, is_encrypted + FROM server_configuration", + ) + .load::(conn)?; + + let mut map = HashMap::new(); + for row in results { + map.insert(row.config_key.clone(), row); + } + + Ok(map) + } + + /// Update a configuration value in the database + pub fn set_config( + &self, + conn: &mut PgConnection, + key: &str, + value: &str, + ) -> Result<(), diesel::result::Error> { + diesel::sql_query("SELECT set_config($1, $2)") + .bind::(key) + .bind::(value) + .execute(conn)?; + + info!("Updated configuration: {} = {}", key, value); + Ok(()) + } + + /// Get a configuration value from the database + pub fn get_config( + &self, + conn: &mut PgConnection, + key: &str, + fallback: Option<&str>, + ) -> Result { + // Use empty string when no fallback is supplied + let fallback_str = fallback.unwrap_or(""); + + // Define a temporary struct that matches the shape of the query result. + #[derive(Debug, QueryableByName)] + struct ConfigValue { + #[diesel(sql_type = Text)] + value: String, + } + + // Execute the query and map the resulting row to the inner string. + let result = diesel::sql_query("SELECT get_config($1, $2) as value") + .bind::(key) + .bind::(fallback_str) + .get_result::(conn) + .map(|row| row.value)?; + + Ok(result) + } +} + +/// Configuration manager for handling .gbot/config.csv files +pub struct ConfigManager { + conn: Arc>, +} + +impl ConfigManager { + pub fn new(conn: Arc>) -> Self { + Self { conn } + } + + /// Watch and sync .gbot/config.csv file for a bot + pub fn sync_gbot_config( + &self, + bot_id: &uuid::Uuid, + config_path: &str, + ) -> Result { + // Import necessary crates for hashing and file handling + use sha2::{Digest, Sha256}; + use std::fs; + + // Read the config.csv file + let content = fs::read_to_string(config_path) + .map_err(|e| format!("Failed to read config file: {}", e))?; + + // Calculate file hash + let mut hasher = Sha256::new(); + hasher.update(content.as_bytes()); + let file_hash = format!("{:x}", hasher.finalize()); + + let mut conn = self + .conn + .lock() + .map_err(|e| format!("Failed to acquire lock: {}", e))?; + + // Check if file has changed + #[derive(QueryableByName)] + struct SyncHash { + #[diesel(sql_type = Text)] + file_hash: String, + } + + let last_hash: Option = + diesel::sql_query("SELECT file_hash FROM gbot_config_sync WHERE bot_id = $1") + .bind::(bot_id) + .get_result::(&mut *conn) + .optional() + .map_err(|e| format!("Database error: {}", e))? + .map(|row| row.file_hash); + + if last_hash.as_ref() == Some(&file_hash) { + info!("Config file unchanged for bot {}", bot_id); + return Ok(0); + } + + // Parse CSV and update bot configuration + let mut updated = 0; + for line in content.lines().skip(1) { + // Skip header + let parts: Vec<&str> = line.split(',').collect(); + if parts.len() >= 2 { + let key = parts[0].trim(); + let value = parts[1].trim(); + + // Insert or update bot configuration + diesel::sql_query( + "INSERT INTO bot_configuration (id, bot_id, config_key, config_value, config_type) + VALUES (gen_random_uuid()::text, $1, $2, $3, 'string') + ON CONFLICT (bot_id, config_key) + DO UPDATE SET config_value = EXCLUDED.config_value, updated_at = NOW()" + ) + .bind::(bot_id) + .bind::(key) + .bind::(value) + .execute(&mut *conn) + .map_err(|e| format!("Failed to update config: {}", e))?; + + updated += 1; + } + } + + // Update sync record + diesel::sql_query( + "INSERT INTO gbot_config_sync (id, bot_id, config_file_path, file_hash, sync_count) + VALUES (gen_random_uuid()::text, $1, $2, $3, 1) + ON CONFLICT (bot_id) + DO UPDATE SET last_sync_at = NOW(), file_hash = EXCLUDED.file_hash, + sync_count = gbot_config_sync.sync_count + 1", + ) + .bind::(bot_id) + .bind::(config_path) + .bind::(&file_hash) + .execute(&mut *conn) + .map_err(|e| format!("Failed to update sync record: {}", e))?; + + info!( + "Synced {} config values for bot {} from {}", + updated, bot_id, config_path + ); + Ok(updated) + } } diff --git a/src/context/mod.rs b/src/context/mod.rs index 3f7557be..ad33fab5 100644 --- a/src/context/mod.rs +++ b/src/context/mod.rs @@ -4,6 +4,8 @@ use std::sync::Arc; use crate::shared::models::SearchResult; +pub mod prompt_processor; + #[async_trait] pub trait ContextStore: Send + Sync { async fn store_embedding( diff --git a/src/main.rs b/src/main.rs index 5ed035ee..ddbe9ade 100644 --- a/src/main.rs +++ b/src/main.rs @@ -13,9 +13,11 @@ mod bot; mod channels; mod config; mod context; +mod drive_monitor; #[cfg(feature = "email")] mod email; mod file; +mod kb; mod llm; mod llm_legacy; mod meet; @@ -24,6 +26,8 @@ mod package_manager; mod session; mod shared; mod tools; +#[cfg(feature = "web_automation")] +mod web_automation; mod web_server; mod whatsapp; use crate::auth::auth_handler; @@ -31,6 +35,7 @@ use crate::automation::AutomationService; use crate::bot::{start_session, websocket_handler}; use crate::channels::{VoiceAdapter, WebChannelAdapter}; use crate::config::AppConfig; +use crate::drive_monitor::DriveMonitor; #[cfg(feature = "email")] use crate::email::{ get_emails, get_latest_email_from, list_emails, save_click, save_draft, send_email, @@ -243,6 +248,12 @@ async fn main() -> std::io::Result<()> { ); let _automation_handle = automation.spawn(); + // Start Drive Monitor service in background + let drive_state = app_state.clone(); + let bucket_name = format!("{}default.gbai", cfg.minio.org_prefix); + let drive_monitor = Arc::new(DriveMonitor::new(drive_state, bucket_name)); + let _drive_handle = drive_monitor.spawn(); + HttpServer::new(move || { // CORS configuration – allow any origin/method/header (adjust for production). let cors = Cors::default() diff --git a/src/shared/models.rs b/src/shared/models.rs index 550f64b0..42ab7ce0 100644 --- a/src/shared/models.rs +++ b/src/shared/models.rs @@ -147,93 +147,242 @@ pub struct BotMemory { pub updated_at: chrono::DateTime, } -diesel::table! { - organizations (org_id) { - org_id -> Uuid, - name -> Text, - slug -> Text, - created_at -> Timestamptz, +#[derive(Debug, Clone, Serialize, Deserialize, Queryable, Identifiable, Insertable)] +#[diesel(table_name = kb_documents)] +pub struct KBDocument { + pub id: String, + pub bot_id: String, + pub user_id: String, + pub collection_name: String, + pub file_path: String, + pub file_size: i32, + pub file_hash: String, + pub first_published_at: String, + pub last_modified_at: String, + pub indexed_at: Option, + pub metadata: String, + pub created_at: String, + pub updated_at: String, +} + +#[derive(Debug, Clone, Serialize, Deserialize, Queryable, Identifiable, Insertable)] +#[diesel(table_name = basic_tools)] +pub struct BasicTool { + pub id: String, + pub bot_id: String, + pub tool_name: String, + pub file_path: String, + pub ast_path: String, + pub file_hash: String, + pub mcp_json: Option, + pub tool_json: Option, + pub compiled_at: String, + pub is_active: i32, + pub created_at: String, + pub updated_at: String, +} + +#[derive(Debug, Clone, Serialize, Deserialize, Queryable, Identifiable, Insertable)] +#[diesel(table_name = kb_collections)] +pub struct KBCollection { + pub id: String, + pub bot_id: String, + pub user_id: String, + pub name: String, + pub folder_path: String, + pub qdrant_collection: String, + pub document_count: i32, + pub is_active: i32, + pub created_at: String, + pub updated_at: String, +} + +#[derive(Debug, Clone, Serialize, Deserialize, Queryable, Identifiable, Insertable)] +#[diesel(table_name = user_kb_associations)] +pub struct UserKBAssociation { + pub id: String, + pub user_id: String, + pub bot_id: String, + pub kb_name: String, + pub is_website: i32, + pub website_url: Option, + pub created_at: String, + pub updated_at: String, +} + +#[derive(Debug, Clone, Serialize, Deserialize, Queryable, Identifiable, Insertable)] +#[diesel(table_name = session_tool_associations)] +pub struct SessionToolAssociation { + pub id: String, + pub session_id: String, + pub tool_name: String, + pub added_at: String, +} + +pub mod schema { + diesel::table! { + organizations (org_id) { + org_id -> Uuid, + name -> Text, + slug -> Text, + created_at -> Timestamptz, + } + } + + diesel::table! { + bots (bot_id) { + bot_id -> Uuid, + name -> Text, + status -> Int4, + config -> Jsonb, + created_at -> Timestamptz, + updated_at -> Timestamptz, + } + } + + diesel::table! { + system_automations (id) { + id -> Uuid, + kind -> Int4, + target -> Nullable, + schedule -> Nullable, + param -> Text, + is_active -> Bool, + last_triggered -> Nullable, + } + } + + diesel::table! { + user_sessions (id) { + id -> Uuid, + user_id -> Uuid, + bot_id -> Uuid, + title -> Text, + context_data -> Jsonb, + answer_mode -> Int4, + current_tool -> Nullable, + created_at -> Timestamptz, + updated_at -> Timestamptz, + } + } + + diesel::table! { + message_history (id) { + id -> Uuid, + session_id -> Uuid, + user_id -> Uuid, + role -> Int4, + content_encrypted -> Text, + message_type -> Int4, + message_index -> Int8, + created_at -> Timestamptz, + } + } + + diesel::table! { + users (id) { + id -> Uuid, + username -> Text, + email -> Text, + password_hash -> Text, + is_active -> Bool, + created_at -> Timestamptz, + updated_at -> Timestamptz, + } + } + + diesel::table! { + clicks (id) { + id -> Uuid, + campaign_id -> Text, + email -> Text, + updated_at -> Timestamptz, + } + } + + diesel::table! { + bot_memories (id) { + id -> Uuid, + bot_id -> Uuid, + key -> Text, + value -> Text, + created_at -> Timestamptz, + updated_at -> Timestamptz, + } + } + + diesel::table! { + kb_documents (id) { + id -> Text, + bot_id -> Text, + user_id -> Text, + collection_name -> Text, + file_path -> Text, + file_size -> Integer, + file_hash -> Text, + first_published_at -> Text, + last_modified_at -> Text, + indexed_at -> Nullable, + metadata -> Text, + created_at -> Text, + updated_at -> Text, + } + } + + diesel::table! { + basic_tools (id) { + id -> Text, + bot_id -> Text, + tool_name -> Text, + file_path -> Text, + ast_path -> Text, + file_hash -> Text, + mcp_json -> Nullable, + tool_json -> Nullable, + compiled_at -> Text, + is_active -> Integer, + created_at -> Text, + updated_at -> Text, + } + } + + diesel::table! { + kb_collections (id) { + id -> Text, + bot_id -> Text, + user_id -> Text, + name -> Text, + folder_path -> Text, + qdrant_collection -> Text, + document_count -> Integer, + is_active -> Integer, + created_at -> Text, + updated_at -> Text, + } + } + + diesel::table! { + user_kb_associations (id) { + id -> Text, + user_id -> Text, + bot_id -> Text, + kb_name -> Text, + is_website -> Integer, + website_url -> Nullable, + created_at -> Text, + updated_at -> Text, + } + } + + diesel::table! { + session_tool_associations (id) { + id -> Text, + session_id -> Text, + tool_name -> Text, + added_at -> Text, + } } } -diesel::table! { - bots (bot_id) { - bot_id -> Uuid, - name -> Text, - status -> Int4, - config -> Jsonb, - created_at -> Timestamptz, - updated_at -> Timestamptz, - } -} - -diesel::table! { - system_automations (id) { - id -> Uuid, - kind -> Int4, - target -> Nullable, - schedule -> Nullable, - param -> Text, - is_active -> Bool, - last_triggered -> Nullable, - } -} - -diesel::table! { - user_sessions (id) { - id -> Uuid, - user_id -> Uuid, - bot_id -> Uuid, - title -> Text, - context_data -> Jsonb, - answer_mode -> Int4, - current_tool -> Nullable, - created_at -> Timestamptz, - updated_at -> Timestamptz, - } -} - -diesel::table! { - message_history (id) { - id -> Uuid, - session_id -> Uuid, - user_id -> Uuid, - role -> Int4, - content_encrypted -> Text, - message_type -> Int4, - message_index -> Int8, - created_at -> Timestamptz, - } -} - -diesel::table! { - users (id) { - id -> Uuid, - username -> Text, - email -> Text, - password_hash -> Text, - is_active -> Bool, - created_at -> Timestamptz, - updated_at -> Timestamptz, - } -} - -diesel::table! { - clicks (id) { - id -> Uuid, - campaign_id -> Text, - email -> Text, - updated_at -> Timestamptz, - } -} - -diesel::table! { - bot_memories (id) { - id -> Uuid, - bot_id -> Uuid, - key -> Text, - value -> Text, - created_at -> Timestamptz, - updated_at -> Timestamptz, - } -} +// Re-export all tables at the module level for backward compatibility +pub use schema::*; diff --git a/src/web_automation/mod.rs b/src/web_automation/mod.rs index 6547af17..c6bcc759 100644 --- a/src/web_automation/mod.rs +++ b/src/web_automation/mod.rs @@ -1,3 +1,5 @@ +pub mod crawler; + use headless_chrome::browser::tab::Tab; use headless_chrome::{Browser, LaunchOptions}; use std::env; @@ -12,6 +14,8 @@ use tokio::sync::Semaphore; use crate::shared::utils::{download_file, extract_zip_recursive}; +pub use crawler::WebCrawler; + pub struct BrowserSetup { pub brave_path: String, pub chromedriver_path: String,