From 3add3ccbfa430cd78b07b5e156da505c5d18dc4f Mon Sep 17 00:00:00 2001 From: "Rodrigo Rodriguez (Pragmatismo)" Date: Wed, 26 Nov 2025 22:54:22 -0300 Subject: [PATCH] Fix typos in bot file extensions and keyword names Changed incorrect references to .vbs files to .bas and corrected USE_WEBSITE keyword naming. Also added missing fields to API response structure and clarified that start.bas is optional for bots. --- CHANGELOG.md | 2 +- docs/src/SUMMARY.md | 2 +- docs/src/chapter-02/README.md | 4 +- docs/src/chapter-02/gbai.md | 41 +- docs/src/chapter-02/gbdialog.md | 18 +- docs/src/chapter-02/gbkb.md | 2 +- docs/src/chapter-02/templates.md | 4 +- docs/src/chapter-03/indexing.md | 16 +- docs/src/chapter-03/summary.md | 2 +- docs/src/chapter-03/vector-collections.md | 4 +- .../keyword-add-website.md | 23 - .../chapter-06-gbdialog/keyword-clear-kb.md | 2 +- .../src/chapter-06-gbdialog/keyword-use-kb.md | 8 +- .../keyword-use-website.md | 65 ++ docs/src/chapter-06-gbdialog/keywords.md | 2 +- docs/src/chapter-11-features/README.md | 6 +- docs/src/chapter-14-migration/overview.md | 8 +- migrations/6.0.9_website_support/down.sql | 7 + migrations/6.0.9_website_support/up.sql | 86 ++ src/api_router.rs | 257 +++++- src/basic/compiler/mod.rs | 28 +- src/basic/keywords/add_website.rs | 74 -- src/basic/keywords/book.rs | 820 ++++++++++++------ src/basic/keywords/mod.rs | 2 +- src/basic/keywords/use_website.rs | 407 +++++++++ src/basic/keywords/weather.rs | 563 ++++++++---- src/basic/mod.rs | 5 +- src/calendar/mod.rs | 381 ++++++-- src/core/bootstrap/mod.rs | 10 +- src/core/bot/channels/instagram.rs | 379 +++++++- src/core/bot/channels/mod.rs | 25 + src/core/bot/channels/teams.rs | 358 +++++++- src/core/bot/channels/whatsapp.rs | 336 ++++++- src/core/config/mod.rs | 23 + src/core/kb/document_processor.rs | 587 +++++++++++++ src/core/kb/embedding_generator.rs | 443 ++++++++++ src/core/kb/kb_indexer.rs | 546 ++++++++++++ src/core/kb/mod.rs | 215 +++++ src/core/kb/web_crawler.rs | 346 ++++++++ src/core/kb/website_crawler_service.rs | 287 ++++++ src/core/mod.rs | 1 + src/core/shared/state.rs | 12 +- src/drive/drive_monitor/mod.rs | 225 +++++ src/drive/files.rs | 283 +++++- src/main.rs | 125 +-- src/tasks/mod.rs | 205 ++++- .../{climate.vbs => climate.bas} | 0 .../{calculate.vbs => calculate.bas} | 0 .../{send-email.vbs => send-email.bas} | 0 .../{send-sms.vbs => send-sms.bas} | 0 .../{translate.vbs => translate.bas} | 0 .../{weather.vbs => weather.bas} | 0 .../default.gbai/default.gbot/config.csv | 4 + .../{get-image.vbs => get-image.bas} | 0 ...to-instagram.vbs => post-to-instagram.bas} | 0 .../public-apis.gbai/KEYWORDS_CHECKLIST.md | 22 +- templates/public-apis.gbai/QUICKSTART.md | 4 +- templates/public-apis.gbai/README.md | 12 +- .../{animals-apis.vbs => animals-apis.bas} | 0 ...utility-apis.vbs => data-utility-apis.bas} | 0 ...inment-apis.vbs => entertainment-apis.bas} | 0 .../{food-apis.vbs => food-apis.bas} | 0 ...-space-apis.vbs => science-space-apis.bas} | 0 .../{weather-apis.vbs => weather-apis.bas} | 0 64 files changed, 6465 insertions(+), 822 deletions(-) delete mode 100644 docs/src/chapter-06-gbdialog/keyword-add-website.md create mode 100644 docs/src/chapter-06-gbdialog/keyword-use-website.md create mode 100644 migrations/6.0.9_website_support/down.sql create mode 100644 migrations/6.0.9_website_support/up.sql delete mode 100644 src/basic/keywords/add_website.rs create mode 100644 src/basic/keywords/use_website.rs create mode 100644 src/core/kb/document_processor.rs create mode 100644 src/core/kb/embedding_generator.rs create mode 100644 src/core/kb/kb_indexer.rs create mode 100644 src/core/kb/mod.rs create mode 100644 src/core/kb/web_crawler.rs create mode 100644 src/core/kb/website_crawler_service.rs rename templates/api-client.gbai/api-client.gbdialog/{climate.vbs => climate.bas} (100%) rename templates/default.gbai/default.gbdialog/{calculate.vbs => calculate.bas} (100%) rename templates/default.gbai/default.gbdialog/{send-email.vbs => send-email.bas} (100%) rename templates/default.gbai/default.gbdialog/{send-sms.vbs => send-sms.bas} (100%) rename templates/default.gbai/default.gbdialog/{translate.vbs => translate.bas} (100%) rename templates/default.gbai/default.gbdialog/{weather.vbs => weather.bas} (100%) rename templates/marketing.gbai/marketing.gbdialog/{get-image.vbs => get-image.bas} (100%) rename templates/marketing.gbai/marketing.gbdialog/{post-to-instagram.vbs => post-to-instagram.bas} (100%) rename templates/public-apis.gbai/public-apis.gbdialog/{animals-apis.vbs => animals-apis.bas} (100%) rename templates/public-apis.gbai/public-apis.gbdialog/{data-utility-apis.vbs => data-utility-apis.bas} (100%) rename templates/public-apis.gbai/public-apis.gbdialog/{entertainment-apis.vbs => entertainment-apis.bas} (100%) rename templates/public-apis.gbai/public-apis.gbdialog/{food-apis.vbs => food-apis.bas} (100%) rename templates/public-apis.gbai/public-apis.gbdialog/{science-space-apis.vbs => science-space-apis.bas} (100%) rename templates/public-apis.gbai/public-apis.gbdialog/{weather-apis.vbs => weather-apis.bas} (100%) diff --git a/CHANGELOG.md b/CHANGELOG.md index f4c9d925f..c5dd335df 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2091,7 +2091,7 @@ ### Bug Fixes -* **basic:** Fix default bot.vbs missing parenthesis in code. ([8501002](https://github.com/pragmatismo-io/BotServer/commit/8501002)) +* **basic:** Fix default bot.bas missing parenthesis in code. ([8501002](https://github.com/pragmatismo-io/BotServer/commit/8501002)) ## [1.7.1](https://github.com/pragmatismo-io/BotServer/compare/1.7.0...1.7.1) (2019-08-30) diff --git a/docs/src/SUMMARY.md b/docs/src/SUMMARY.md index 799b154dc..3cfe69ad5 100644 --- a/docs/src/SUMMARY.md +++ b/docs/src/SUMMARY.md @@ -62,7 +62,7 @@ - [SET BOT MEMORY](./chapter-06-gbdialog/keyword-set-bot-memory.md) - [USE KB](./chapter-06-gbdialog/keyword-use-kb.md) - [CLEAR KB](./chapter-06-gbdialog/keyword-clear-kb.md) - - [ADD WEBSITE](./chapter-06-gbdialog/keyword-add-website.md) + - [USE WEBSITE](./chapter-06-gbdialog/keyword-use-website.md) - [USE TOOL](./chapter-06-gbdialog/keyword-use-tool.md) - [CLEAR TOOLS](./chapter-06-gbdialog/keyword-clear-tools.md) - [GET](./chapter-06-gbdialog/keyword-get.md) diff --git a/docs/src/chapter-02/README.md b/docs/src/chapter-02/README.md index 79ff19121..e356075fa 100644 --- a/docs/src/chapter-02/README.md +++ b/docs/src/chapter-02/README.md @@ -29,7 +29,7 @@ Drop the folder in `templates/`, it loads automatically. ### Dialogs (.gbdialog) - BASIC scripts that control conversation -- Must have `start.bas` as entry point +- `start.bas` is optional (but needed to activate tools/KB with USE TOOL/USE KB) - Simple commands like TALK and HEAR ### Knowledge Base (.gbkb) @@ -59,6 +59,8 @@ Drop the folder in `templates/`, it loads automatically. No build process. No compilation. Just folders and files. +The web UI uses **vanilla JavaScript and Alpine.js** - no webpack, no npm build, just edit and refresh. + ## Topics Covered - [.gbai Architecture](./gbai.md) - Package details diff --git a/docs/src/chapter-02/gbai.md b/docs/src/chapter-02/gbai.md index d5c47f1d4..e2549b092 100644 --- a/docs/src/chapter-02/gbai.md +++ b/docs/src/chapter-02/gbai.md @@ -40,7 +40,7 @@ BASIC scripts that control conversation flow: ``` my-bot.gbdialog/ - start.bas # Runs when session starts + start.bas # Optional - needed to activate tools/KB auth.bas # Login flow tools/ # Callable functions book-meeting.bas @@ -49,13 +49,16 @@ my-bot.gbdialog/ on-email.bas ``` -Example `start.bas`: +Example `start.bas` (optional, but required for tools/KB): ```basic -TALK "Hi! I'm your assistant." -answer = HEAR -TALK "I can help you with: " + answer +USE KB "policies" +USE TOOL "book-meeting" +USE TOOL "check-status" +TALK "Hi! I'm your assistant with tools and knowledge ready." ``` +Note: If you don't need tools or knowledge bases, `start.bas` is optional. The LLM will handle basic conversations without it. + ### .gbkb/ - Your Bot's Knowledge Documents organized by topic: @@ -109,7 +112,7 @@ Here's a complete customer support bot: ``` support.gbai/ support.gbdialog/ - start.bas + start.bas # Optional, but needed for tools/KB tools/ create-ticket.bas check-status.bas @@ -122,7 +125,7 @@ support.gbai/ config.csv ``` -`start.bas`: +`start.bas` (activates tools and knowledge bases): ```basic USE KB "faqs" USE KB "guides" @@ -212,7 +215,7 @@ templates/ ### Required - Folder must end with `.gbai` - Subfolders must match: `botname.gbdialog`, `botname.gbkb`, etc. -- Main script must be `start.bas` +- `start.bas` is optional, but required if you want to use tools or knowledge bases (must USE TOOL and USE KB to activate them) ### Recommended - Use lowercase with hyphens: `customer-service.gbai` @@ -229,20 +232,14 @@ When BotServer starts: Takes about 5-10 seconds per bot. -## Hot Reload +## UI Architecture -Change files while running: - -```bash -# Edit script -vim templates/my-bot.gbai/my-bot.gbdialog/start.bas - -# Reload just that bot -curl http://localhost:8080/api/admin/reload/my-bot - -# Or restart everything -./botserver restart -``` +The web interface uses **vanilla JavaScript and Alpine.js** - no build process required: +- Pure HTML/CSS/JS files +- Alpine.js for reactivity +- No webpack, no npm build +- Edit and refresh to see changes +- Zero compilation time ## Package Size Limits @@ -258,7 +255,7 @@ Default limits (configurable): **Bot not appearing?** - Check folder ends with `.gbai` - Verify subfolders match bot name -- Look for `start.bas` in `.gbdialog/` +- If using tools/KB, ensure `start.bas` exists with USE TOOL/USE KB commands **Documents not searchable?** - Ensure files are in `.gbkb/` subfolder diff --git a/docs/src/chapter-02/gbdialog.md b/docs/src/chapter-02/gbdialog.md index 4fc915ac3..ccc4d0a9a 100644 --- a/docs/src/chapter-02/gbdialog.md +++ b/docs/src/chapter-02/gbdialog.md @@ -73,15 +73,27 @@ TALK "What product information can I help you with?" ## Script Structure -### Entry Point: start.bas -Every bot needs a `start.bas` file in the [`.gbdialog`](../chapter-02/gbdialog.md) folder: +### Entry Point: start.bas (Optional) +The `start.bas` file in the [`.gbdialog`](../chapter-02/gbdialog.md) folder is **optional**, but required if you want to activate tools or knowledge bases: ```basic -' Minimal start script - let system AI handle conversations +' Optional start script - needed only to activate tools/KB USE KB "company_docs" +USE TOOL "book-meeting" +USE TOOL "check-status" TALK "Welcome! How can I assist you today?" ``` +**When you need start.bas:** +- To activate knowledge bases with `USE KB` +- To activate tools with `USE TOOL` +- To set initial context or configuration + +**When you don't need start.bas:** +- For simple conversational bots +- When the LLM can handle everything without tools/KB +- For basic Q&A without document search + ### Tool Definitions Create separate `.bas` files for each tool. See [KB and Tools](../chapter-03/kb-and-tools.md) for more information: diff --git a/docs/src/chapter-02/gbkb.md b/docs/src/chapter-02/gbkb.md index fcd7b0d70..db8f2a8d9 100644 --- a/docs/src/chapter-02/gbkb.md +++ b/docs/src/chapter-02/gbkb.md @@ -46,7 +46,7 @@ Each document is processed into vector embeddings using: ### Creating Collections ```basic USE KB "company-policies" -ADD WEBSITE "https://company.com/docs" +USE WEBSITE "https://company.com/docs" ``` ### Using Collections diff --git a/docs/src/chapter-02/templates.md b/docs/src/chapter-02/templates.md index 6d5ddb32b..6bff56afe 100644 --- a/docs/src/chapter-02/templates.md +++ b/docs/src/chapter-02/templates.md @@ -154,7 +154,7 @@ Add `.bas` files to `.gbdialog`: ### Development Tools #### api-client.gbai -- **Files**: `climate.vbs`, `msft-partner-center.bas` +- **Files**: `climate.bas`, `msft-partner-center.bas` - **Examples**: REST API patterns - **Integration**: External services @@ -246,4 +246,4 @@ When migrating from traditional platforms: - README files in each template folder - Example configurations included - Sample knowledge bases provided -- Community forums for discussions \ No newline at end of file +- Community forums for discussions diff --git a/docs/src/chapter-03/indexing.md b/docs/src/chapter-03/indexing.md index d0681bc1b..2fdb992d4 100644 --- a/docs/src/chapter-03/indexing.md +++ b/docs/src/chapter-03/indexing.md @@ -8,7 +8,7 @@ The system automatically indexes documents when: - Files are added to any `.gbkb` folder - `USE KB` is called for a collection - Files are modified or updated -- `ADD WEBSITE` crawls new content +- `USE WEBSITE` registers websites for crawling (preprocessing) and associates them with sessions (runtime) ## How Indexing Works @@ -35,8 +35,9 @@ To keep web content fresh, schedule regular crawls: ' In update-docs.bas SET SCHEDULE "0 2 * * *" ' Run daily at 2 AM -ADD WEBSITE "https://docs.example.com" -' Website is crawled and indexed automatically +USE WEBSITE "https://docs.example.com" +' Website is registered for crawling during preprocessing +' At runtime, it associates the crawled content with the session ``` ### Scheduling Options @@ -106,11 +107,10 @@ Schedule regular web updates: ' In maintenance.bas SET SCHEDULE "0 1 * * *" -' Update news daily -ADD WEBSITE "https://company.com/news" - -' Update product docs on schedule -ADD WEBSITE "https://company.com/products" +' Register websites for crawling +USE WEBSITE "https://company.com/news" +USE WEBSITE "https://company.com/products" +' Websites are crawled by background service ``` ## Best Practices diff --git a/docs/src/chapter-03/summary.md b/docs/src/chapter-03/summary.md index ea15aee49..ec7c687eb 100644 --- a/docs/src/chapter-03/summary.md +++ b/docs/src/chapter-03/summary.md @@ -4,7 +4,7 @@ This chapter explains how GeneralBots manages knowledge‑base collections, inde | Document | File | Description | |----------|------|-------------| -| **README** | [README.md](README.md) | High‑level reference for the `.gbkb` package and its core commands (`USE KB`, `CLEAR KB`, `ADD WEBSITE`). | +| **README** | [README.md](README.md) | High‑level reference for the `.gbkb` package and its core commands (`USE KB`, `CLEAR KB`, `USE WEBSITE`). | | **Caching** | [caching.md](caching.md) | Optional in‑memory and persistent SQLite caching to speed up frequent `FIND` queries. | | **Context Compaction** | [context-compaction.md](context-compaction.md) | Techniques to keep the LLM context window within limits (summarization, memory pruning, sliding window). | | **Indexing** | [indexing.md](indexing.md) | Process of extracting, chunking, embedding, and storing document vectors in the VectorDB. | diff --git a/docs/src/chapter-03/vector-collections.md b/docs/src/chapter-03/vector-collections.md index fd6bc8b76..ca08eb6e9 100644 --- a/docs/src/chapter-03/vector-collections.md +++ b/docs/src/chapter-03/vector-collections.md @@ -111,8 +111,8 @@ To keep web content updated, schedule regular crawls: ```basic ' In update-content.bas SET SCHEDULE "0 3 * * *" ' Run daily at 3 AM -ADD WEBSITE "https://example.com/docs" -' Website content is crawled and added to the collection +USE WEBSITE "https://example.com/docs" +' Website is registered for crawling and will be available in conversations ``` ## How Search Works diff --git a/docs/src/chapter-06-gbdialog/keyword-add-website.md b/docs/src/chapter-06-gbdialog/keyword-add-website.md deleted file mode 100644 index c79a0c59c..000000000 --- a/docs/src/chapter-06-gbdialog/keyword-add-website.md +++ /dev/null @@ -1,23 +0,0 @@ -# ADD WEBSITE Keyword - -**Syntax** - -``` -ADD WEBSITE "https://example.com" -``` - -**Parameters** - -- `"url"` – A valid HTTP or HTTPS URL pointing to a website that should be added to the conversation context. - -**Description** - -`ADD WEBSITE` validates the provided URL and, when the `web_automation` feature is enabled, launches a headless browser to crawl the site, extract its textual content, and index it into a vector‑DB collection associated with the current user. The collection name is derived from the URL and the bot's identifiers. After indexing, the website becomes a knowledge source that can be queried by `FIND` or `LLM` calls. - -If the feature is not compiled, the keyword returns an error indicating that web automation is unavailable. - -**Example** - -```basic -ADD WEBSITE "https://en.wikipedia.org/wiki/General_Bots" -TALK "Website added. You can now search its content with FIND." diff --git a/docs/src/chapter-06-gbdialog/keyword-clear-kb.md b/docs/src/chapter-06-gbdialog/keyword-clear-kb.md index 001802213..fe33c688d 100644 --- a/docs/src/chapter-06-gbdialog/keyword-clear-kb.md +++ b/docs/src/chapter-06-gbdialog/keyword-clear-kb.md @@ -191,7 +191,7 @@ NEXT ## Related Keywords - [USE KB](./keyword-use-kb.md) - Load knowledge bases -- [ADD WEBSITE](./keyword-add-website.md) - Create KB from website +- [USE WEBSITE](./keyword-use-website.md) - Associate website with conversation - [FIND](./keyword-find.md) - Search within loaded KBs - [LLM](./keyword-llm.md) - Use KB context in responses diff --git a/docs/src/chapter-06-gbdialog/keyword-use-kb.md b/docs/src/chapter-06-gbdialog/keyword-use-kb.md index ce43faeb4..d53716ec5 100644 --- a/docs/src/chapter-06-gbdialog/keyword-use-kb.md +++ b/docs/src/chapter-06-gbdialog/keyword-use-kb.md @@ -163,7 +163,7 @@ END ```basic ' Refresh collection with new documents CLEAR KB "news" -ADD WEBSITE "https://example.com/news" +USE WEBSITE "https://example.com/news" USE KB "news" ``` @@ -194,9 +194,9 @@ USE TOOL "check_stock" ' Tool can access inventory knowledge when executing ``` -### With ADD WEBSITE +### With USE WEBSITE ```basic -ADD WEBSITE "https://docs.example.com" TO "documentation" +USE WEBSITE "https://docs.example.com" USE KB "documentation" ' Fresh web content now searchable ``` @@ -266,6 +266,6 @@ Solution: Wait for indexing to complete (automatic) ## See Also - [CLEAR KB](./keyword-clear-kb.md) - Deactivate knowledge bases -- [ADD WEBSITE](./keyword-add-website.md) - Add web content to KB +- [USE WEBSITE](./keyword-use-website.md) - Associate website with conversation - [Knowledge Base Guide](../chapter-03/README.md) - Complete KB documentation - [Vector Collections](../chapter-03/vector-collections.md) - How collections work diff --git a/docs/src/chapter-06-gbdialog/keyword-use-website.md b/docs/src/chapter-06-gbdialog/keyword-use-website.md new file mode 100644 index 000000000..1e9fe80f9 --- /dev/null +++ b/docs/src/chapter-06-gbdialog/keyword-use-website.md @@ -0,0 +1,65 @@ +# USE WEBSITE Keyword + +**Syntax** + +``` +USE WEBSITE "https://example.com" +``` + +**Parameters** + +- `"url"` – A valid HTTP or HTTPS URL pointing to a website that should be made available in the conversation context. + +**Description** + +`USE WEBSITE` operates in two distinct modes: + +1. **Preprocessing Mode** (Script Compilation): When found in a BASIC script during compilation, it registers the website for background crawling. The crawler service will fetch, extract, and index the website's content into a vector database collection. This ensures the website content is ready before any conversation starts. + +2. **Runtime Mode** (Conversation Execution): During a conversation, `USE WEBSITE` associates an already-crawled website collection with the current session, making it available for queries via `FIND` or `LLM` calls. This behaves similarly to `USE KB` - it's a session-scoped association. + +If a website hasn't been registered during preprocessing, the runtime execution will fail with an appropriate error message. + +**Example** + +```basic +' In script preprocessing, this registers the website for crawling +USE WEBSITE "https://docs.example.com" + +' During conversation, this makes the crawled content available +USE WEBSITE "https://docs.example.com" +FIND "deployment procedures" +TALK "I found information about deployment procedures in the documentation." +``` + +**Preprocessing Behavior** + +When the script is compiled: +- The URL is validated +- The website is registered in the `website_crawls` table +- The crawler service picks it up and indexes the content +- Status can be: pending (0), crawled (1), or failed (2) + +**Runtime Behavior** + +When executed in a conversation: +- Checks if the website has been crawled +- Associates the website collection with the current session +- Makes the content searchable via `FIND` and available to `LLM` + +**With LLM Integration** + +```basic +USE WEBSITE "https://company.com/policies" +question = HEAR "What would you like to know about our policies?" +FIND question +answer = LLM "Based on the search results, provide a clear answer" +TALK answer +``` + +**Related Keywords** + +- [CLEAR WEBSITES](./keyword-clear-websites.md) - Remove all website associations from session +- [USE KB](./keyword-use-kb.md) - Similar functionality for knowledge base files +- [FIND](./keyword-find.md) - Search within loaded websites and KBs +- [LLM](./keyword-llm.md) - Process search results with AI \ No newline at end of file diff --git a/docs/src/chapter-06-gbdialog/keywords.md b/docs/src/chapter-06-gbdialog/keywords.md index d416ce65e..c8d1d0e59 100644 --- a/docs/src/chapter-06-gbdialog/keywords.md +++ b/docs/src/chapter-06-gbdialog/keywords.md @@ -33,7 +33,7 @@ The source code for each keyword lives in `src/basic/keywords/`. Only the keywor - [USE KB](./keyword-use-kb.md) - Load knowledge base - [CLEAR KB](./keyword-clear-kb.md) - Unload knowledge base -- [ADD WEBSITE](./keyword-add-website.md) - Index website to KB +- [USE WEBSITE](./keyword-use-website.md) - Associate website with conversation - [FIND](./keyword-find.md) - Search in KB ## Tools & Automation diff --git a/docs/src/chapter-11-features/README.md b/docs/src/chapter-11-features/README.md index c25ca6b1e..2ff3c73fb 100644 --- a/docs/src/chapter-11-features/README.md +++ b/docs/src/chapter-11-features/README.md @@ -5,13 +5,13 @@ This table maps major features of GeneralBots to the chapters and keywords that |---------|------------|------------------| | Start server & basic chat | 01 (Run and Talk) | `TALK`, `HEAR` | | Package system overview | 02 (About Packages) | – | -| Knowledge‑base management | 03 (gbkb Reference) | `USE KB`, `SET KB`, `ADD WEBSITE` | +| Knowledge‑base management | 03 (gbkb Reference) | `USE KB`, `SET KB`, `USE WEBSITE` | | UI theming | 04 (gbtheme Reference) | – (CSS/HTML assets) | -| BASIC dialog scripting | 05 (gbdialog Reference) | All BASIC keywords (`TALK`, `HEAR`, `LLM`, `FORMAT`, `USE KB`, `SET KB`, `ADD WEBSITE`, …) | +| BASIC dialog scripting | 05 (gbdialog Reference) | All BASIC keywords (`TALK`, `HEAR`, `LLM`, `FORMAT`, `USE KB`, `SET KB`, `USE WEBSITE`, …) | | Custom Rust extensions | 06 (gbapp Reference) | `USE TOOL`, custom Rust code | | Bot configuration | 07 (gbot Reference) | `config.csv` fields | | Built‑in tooling | 08 (Tooling) | All keywords listed in the table | -| Semantic search & Qdrant | 03 (gbkb Reference) | `ADD WEBSITE`, vector search | +| Semantic search & Qdrant | 03 (gbkb Reference) | `USE WEBSITE`, vector search | | Email & external APIs | 08 (Tooling) | `CALL`, `CALL_ASYNC` | | Scheduling & events | 08 (Tooling) | `SET SCHEDULE`, `ON` | | Testing & CI | 10 (Contributing) | – | diff --git a/docs/src/chapter-14-migration/overview.md b/docs/src/chapter-14-migration/overview.md index 814bad3c7..8b235a5f4 100644 --- a/docs/src/chapter-14-migration/overview.md +++ b/docs/src/chapter-14-migration/overview.md @@ -46,11 +46,13 @@ Transform static documents into searchable knowledge: ```basic ' Convert SharePoint documents to searchable KB USE KB "company_docs" -ADD WEBSITE "https://sharepoint.company.com/docs" +USE WEBSITE "https://sharepoint.company.com/docs" ' Now accessible via natural language -answer = HEAR "What's our vacation policy?" -' System automatically searches KB and responds +question = HEAR "What would you like to know?" +FIND question +answer = LLM "Based on the search results, provide a helpful answer" +TALK answer ``` ## Migration Phases diff --git a/migrations/6.0.9_website_support/down.sql b/migrations/6.0.9_website_support/down.sql new file mode 100644 index 000000000..4841675ed --- /dev/null +++ b/migrations/6.0.9_website_support/down.sql @@ -0,0 +1,7 @@ +-- Drop session_website_associations table and related indexes +DROP TABLE IF EXISTS session_website_associations; + +-- Drop website_crawls table and related objects +DROP TRIGGER IF EXISTS website_crawls_updated_at_trigger ON website_crawls; +DROP FUNCTION IF EXISTS update_website_crawls_updated_at(); +DROP TABLE IF EXISTS website_crawls; diff --git a/migrations/6.0.9_website_support/up.sql b/migrations/6.0.9_website_support/up.sql new file mode 100644 index 000000000..ff28c3242 --- /dev/null +++ b/migrations/6.0.9_website_support/up.sql @@ -0,0 +1,86 @@ +-- Create website_crawls table for tracking crawled websites +CREATE TABLE IF NOT EXISTS website_crawls ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + bot_id UUID NOT NULL, + url TEXT NOT NULL, + last_crawled TIMESTAMPTZ, + next_crawl TIMESTAMPTZ, + expires_policy VARCHAR(20) NOT NULL DEFAULT '1d', + max_depth INTEGER DEFAULT 3, + max_pages INTEGER DEFAULT 100, + crawl_status SMALLINT DEFAULT 0, -- 0=pending, 1=success, 2=processing, 3=error + pages_crawled INTEGER DEFAULT 0, + error_message TEXT, + created_at TIMESTAMPTZ DEFAULT NOW(), + updated_at TIMESTAMPTZ DEFAULT NOW(), + + -- Ensure unique URL per bot + CONSTRAINT unique_bot_url UNIQUE (bot_id, url), + + -- Foreign key to bots table + CONSTRAINT fk_website_crawls_bot + FOREIGN KEY (bot_id) + REFERENCES bots(id) + ON DELETE CASCADE +); + +-- Create indexes for efficient queries +CREATE INDEX IF NOT EXISTS idx_website_crawls_bot_id ON website_crawls(bot_id); +CREATE INDEX IF NOT EXISTS idx_website_crawls_next_crawl ON website_crawls(next_crawl); +CREATE INDEX IF NOT EXISTS idx_website_crawls_url ON website_crawls(url); +CREATE INDEX IF NOT EXISTS idx_website_crawls_status ON website_crawls(crawl_status); + +-- Create trigger to update updated_at timestamp +CREATE OR REPLACE FUNCTION update_website_crawls_updated_at() +RETURNS TRIGGER AS $$ +BEGIN + NEW.updated_at = NOW(); + RETURN NEW; +END; +$$ LANGUAGE plpgsql; + +CREATE TRIGGER website_crawls_updated_at_trigger + BEFORE UPDATE ON website_crawls + FOR EACH ROW + EXECUTE FUNCTION update_website_crawls_updated_at(); + +-- Create session_website_associations table for tracking websites added to sessions +-- Similar to session_kb_associations but for websites +CREATE TABLE IF NOT EXISTS session_website_associations ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + session_id UUID NOT NULL, + bot_id UUID NOT NULL, + website_url TEXT NOT NULL, + collection_name TEXT NOT NULL, + is_active BOOLEAN DEFAULT true, + added_at TIMESTAMPTZ DEFAULT NOW(), + added_by_tool VARCHAR(255), + + -- Ensure unique website per session + CONSTRAINT unique_session_website UNIQUE (session_id, website_url), + + -- Foreign key to sessions table + CONSTRAINT fk_session_website_session + FOREIGN KEY (session_id) + REFERENCES sessions(id) + ON DELETE CASCADE, + + -- Foreign key to bots table + CONSTRAINT fk_session_website_bot + FOREIGN KEY (bot_id) + REFERENCES bots(id) + ON DELETE CASCADE +); + +-- Create indexes for efficient queries +CREATE INDEX IF NOT EXISTS idx_session_website_associations_session_id + ON session_website_associations(session_id) WHERE is_active = true; + +CREATE INDEX IF NOT EXISTS idx_session_website_associations_bot_id + ON session_website_associations(bot_id); + +CREATE INDEX IF NOT EXISTS idx_session_website_associations_url + ON session_website_associations(website_url); + +CREATE INDEX IF NOT EXISTS idx_session_website_associations_collection + ON session_website_associations(collection_name); diff --git a/src/api_router.rs b/src/api_router.rs index 9998f1106..aa7e420c5 100644 --- a/src/api_router.rs +++ b/src/api_router.rs @@ -455,76 +455,293 @@ async fn handle_storage_save( State(state): State>, Json(payload): Json, ) -> Result, StatusCode> { - Ok(Json(serde_json::json!({"success": true}))) + let key = payload["key"].as_str().ok_or(StatusCode::BAD_REQUEST)?; + let content = payload["content"].as_str().ok_or(StatusCode::BAD_REQUEST)?; + let bucket = payload["bucket"].as_str().unwrap_or("default"); + + // Use the drive module for S3/MinIO operations + match crate::drive::files::save_to_s3(&state, bucket, key, content.as_bytes()).await { + Ok(_) => Ok(Json(serde_json::json!({ + "success": true, + "key": key, + "bucket": bucket, + "size": content.len() + }))), + Err(e) => { + log::error!("Storage save failed: {}", e); + Err(StatusCode::INTERNAL_SERVER_ERROR) + } + } } async fn handle_storage_batch( State(state): State>, Json(payload): Json, ) -> Result, StatusCode> { - Ok(Json(serde_json::json!({"success": true}))) + let operations = payload["operations"] + .as_array() + .ok_or(StatusCode::BAD_REQUEST)?; + let bucket = payload["bucket"].as_str().unwrap_or("default"); + + let mut results = Vec::new(); + for op in operations { + let key = op["key"].as_str().unwrap_or(""); + let content = op["content"].as_str().unwrap_or(""); + let operation = op["operation"].as_str().unwrap_or("save"); + + let result = match operation { + "save" => crate::drive::files::save_to_s3(&state, bucket, key, content.as_bytes()) + .await + .map(|_| serde_json::json!({"key": key, "success": true})) + .unwrap_or_else( + |e| serde_json::json!({"key": key, "success": false, "error": e.to_string()}), + ), + "delete" => crate::drive::files::delete_from_s3(&state, bucket, key) + .await + .map(|_| serde_json::json!({"key": key, "success": true})) + .unwrap_or_else( + |e| serde_json::json!({"key": key, "success": false, "error": e.to_string()}), + ), + _ => serde_json::json!({"key": key, "success": false, "error": "Invalid operation"}), + }; + results.push(result); + } + + Ok(Json(serde_json::json!({ + "success": true, + "results": results, + "total": results.len() + }))) } async fn handle_storage_json( State(state): State>, Json(payload): Json, ) -> Result, StatusCode> { - Ok(Json(serde_json::json!({"success": true}))) + let key = payload["key"].as_str().ok_or(StatusCode::BAD_REQUEST)?; + let data = &payload["data"]; + let bucket = payload["bucket"].as_str().unwrap_or("default"); + + let json_content = serde_json::to_vec_pretty(data).map_err(|_| StatusCode::BAD_REQUEST)?; + + match crate::drive::files::save_to_s3(&state, bucket, key, &json_content).await { + Ok(_) => Ok(Json(serde_json::json!({ + "success": true, + "key": key, + "bucket": bucket, + "size": json_content.len() + }))), + Err(e) => { + log::error!("JSON storage failed: {}", e); + Err(StatusCode::INTERNAL_SERVER_ERROR) + } + } } async fn handle_storage_delete( State(state): State>, + Query(params): Query>, ) -> Result, StatusCode> { - Ok(Json(serde_json::json!({"success": true}))) + let key = params.get("key").ok_or(StatusCode::BAD_REQUEST)?; + let bucket = params + .get("bucket") + .map(|s| s.as_str()) + .unwrap_or("default"); + + match crate::drive::files::delete_from_s3(&state, bucket, key).await { + Ok(_) => Ok(Json(serde_json::json!({ + "success": true, + "key": key, + "bucket": bucket + }))), + Err(e) => { + log::error!("Storage delete failed: {}", e); + Err(StatusCode::INTERNAL_SERVER_ERROR) + } + } } async fn handle_storage_quota_check( State(state): State>, + Query(params): Query>, ) -> Result, StatusCode> { - Ok(Json( - serde_json::json!({"total": 1000000000, "used": 500000000, "available": 500000000}), - )) + let bucket = params + .get("bucket") + .map(|s| s.as_str()) + .unwrap_or("default"); + + match crate::drive::files::get_bucket_stats(&state, bucket).await { + Ok(stats) => { + let total = 10_737_418_240i64; // 10GB default quota + let used = stats.total_size as i64; + let available = (total - used).max(0); + + Ok(Json(serde_json::json!({ + "total": total, + "used": used, + "available": available, + "file_count": stats.object_count, + "bucket": bucket + }))) + } + Err(_) => { + // Return default quota if stats unavailable + Ok(Json(serde_json::json!({ + "total": 10737418240, + "used": 0, + "available": 10737418240, + "file_count": 0, + "bucket": bucket + }))) + } + } } async fn handle_storage_cleanup( State(state): State>, + Json(payload): Json, ) -> Result, StatusCode> { - Ok(Json( - serde_json::json!({"success": true, "freed_bytes": 1024000}), - )) + let bucket = payload["bucket"].as_str().unwrap_or("default"); + let older_than_days = payload["older_than_days"].as_u64().unwrap_or(30); + + let cutoff_date = chrono::Utc::now() - chrono::Duration::days(older_than_days as i64); + + match crate::drive::files::cleanup_old_files(&state, bucket, cutoff_date).await { + Ok((deleted_count, freed_bytes)) => Ok(Json(serde_json::json!({ + "success": true, + "deleted_files": deleted_count, + "freed_bytes": freed_bytes, + "bucket": bucket + }))), + Err(e) => { + log::error!("Storage cleanup failed: {}", e); + Ok(Json(serde_json::json!({ + "success": false, + "error": e.to_string() + }))) + } + } } async fn handle_storage_backup_create( State(state): State>, Json(payload): Json, ) -> Result, StatusCode> { - Ok(Json( - serde_json::json!({"success": true, "backup_id": "backup-123"}), - )) + let bucket = payload["bucket"].as_str().unwrap_or("default"); + let backup_name = payload["name"].as_str().unwrap_or("backup"); + + let backup_id = format!("backup-{}-{}", backup_name, chrono::Utc::now().timestamp()); + let archive_bucket = format!("{}-backups", bucket); + + match crate::drive::files::create_bucket_backup(&state, bucket, &archive_bucket, &backup_id) + .await + { + Ok(file_count) => Ok(Json(serde_json::json!({ + "success": true, + "backup_id": backup_id, + "files_backed_up": file_count, + "backup_bucket": archive_bucket + }))), + Err(e) => { + log::error!("Backup creation failed: {}", e); + Err(StatusCode::INTERNAL_SERVER_ERROR) + } + } } async fn handle_storage_backup_restore( State(state): State>, Json(payload): Json, ) -> Result, StatusCode> { - Ok(Json(serde_json::json!({"success": true}))) + let backup_id = payload["backup_id"] + .as_str() + .ok_or(StatusCode::BAD_REQUEST)?; + let target_bucket = payload["target_bucket"].as_str().unwrap_or("default"); + let source_bucket = payload["source_bucket"] + .as_str() + .unwrap_or(&format!("{}-backups", target_bucket)); + + match crate::drive::files::restore_bucket_backup( + &state, + &source_bucket, + target_bucket, + backup_id, + ) + .await + { + Ok(file_count) => Ok(Json(serde_json::json!({ + "success": true, + "backup_id": backup_id, + "files_restored": file_count, + "target_bucket": target_bucket + }))), + Err(e) => { + log::error!("Backup restore failed: {}", e); + Err(StatusCode::INTERNAL_SERVER_ERROR) + } + } } async fn handle_storage_archive( State(state): State>, Json(payload): Json, ) -> Result, StatusCode> { - Ok(Json( - serde_json::json!({"success": true, "archive_id": "archive-123"}), - )) + let bucket = payload["bucket"].as_str().unwrap_or("default"); + let prefix = payload["prefix"].as_str().unwrap_or(""); + let archive_name = payload["name"].as_str().unwrap_or("archive"); + + let archive_id = format!( + "archive-{}-{}", + archive_name, + chrono::Utc::now().timestamp() + ); + let archive_key = format!("archives/{}.tar.gz", archive_id); + + match crate::drive::files::create_archive(&state, bucket, prefix, &archive_key).await { + Ok(archive_size) => Ok(Json(serde_json::json!({ + "success": true, + "archive_id": archive_id, + "archive_key": archive_key, + "archive_size": archive_size, + "bucket": bucket + }))), + Err(e) => { + log::error!("Archive creation failed: {}", e); + Err(StatusCode::INTERNAL_SERVER_ERROR) + } + } } async fn handle_storage_metrics( State(state): State>, + Query(params): Query>, ) -> Result, StatusCode> { - Ok(Json( - serde_json::json!({"total_files": 1000, "total_size_bytes": 500000000}), - )) + let bucket = params + .get("bucket") + .map(|s| s.as_str()) + .unwrap_or("default"); + + match crate::drive::files::get_bucket_metrics(&state, bucket).await { + Ok(metrics) => Ok(Json(serde_json::json!({ + "total_files": metrics.object_count, + "total_size_bytes": metrics.total_size, + "avg_file_size": if metrics.object_count > 0 { + metrics.total_size / metrics.object_count as u64 + } else { + 0 + }, + "bucket": bucket, + "last_modified": metrics.last_modified + }))), + Err(e) => { + log::error!("Failed to get storage metrics: {}", e); + Ok(Json(serde_json::json!({ + "total_files": 0, + "total_size_bytes": 0, + "error": e.to_string() + }))) + } + } } async fn handle_ai_analyze_text( diff --git a/src/basic/compiler/mod.rs b/src/basic/compiler/mod.rs index b30be6aed..98cbd9d9e 100644 --- a/src/basic/compiler/mod.rs +++ b/src/basic/compiler/mod.rs @@ -346,7 +346,7 @@ impl BasicCompiler { .replace("CLEAR SUGGESTIONS", "CLEAR_SUGGESTIONS") .replace("ADD SUGGESTION", "ADD_SUGGESTION") .replace("USE KB", "USE_KB") - .replace("ADD WEBSITE", "ADD_WEBSITE") + .replace("USE WEBSITE", "USE_WEBSITE") .replace("GET BOT MEMORY", "GET_BOT_MEMORY") .replace("SET BOT MEMORY", "SET_BOT_MEMORY") .replace("CREATE DRAFT", "CREATE_DRAFT"); @@ -371,6 +371,32 @@ impl BasicCompiler { } continue; } + if normalized.starts_with("USE_WEBSITE") { + let parts: Vec<&str> = normalized.split('"').collect(); + if parts.len() >= 2 { + let url = parts[1]; + let mut conn = self + .state + .conn + .get() + .map_err(|e| format!("Failed to get database connection: {}", e))?; + if let Err(e) = + crate::basic::keywords::use_website::execute_use_website_preprocessing( + &mut conn, url, bot_id, + ) + { + log::error!("Failed to register USE_WEBSITE during preprocessing: {}", e); + } else { + log::info!( + "Registered website {} for crawling during preprocessing", + url + ); + } + } else { + log::warn!("Malformed USE_WEBSITE line ignored: {}", normalized); + } + continue; + } if normalized.starts_with("PARAM ") || normalized.starts_with("DESCRIPTION ") { continue; } diff --git a/src/basic/keywords/add_website.rs b/src/basic/keywords/add_website.rs deleted file mode 100644 index 0e233da14..000000000 --- a/src/basic/keywords/add_website.rs +++ /dev/null @@ -1,74 +0,0 @@ -use crate::shared::models::UserSession; -use crate::shared::state::AppState; -use log::{error, trace}; -use rhai::{Dynamic, Engine}; -use std::sync::Arc; -pub fn add_website_keyword(state: Arc, user: UserSession, engine: &mut Engine) { - let state_clone = Arc::clone(&state); - let user_clone = user.clone(); - engine - .register_custom_syntax(&["ADD_WEBSITE", "$expr$"], false, move |context, inputs| { - let url = context.eval_expression_tree(&inputs[0])?; - let url_str = url.to_string().trim_matches('"').to_string(); - trace!( - "ADD_WEBSITE command executed: {} for user: {}", - url_str, - user_clone.user_id - ); - let is_valid = url_str.starts_with("http://") || url_str.starts_with("https://"); - if !is_valid { - return Err(Box::new(rhai::EvalAltResult::ErrorRuntime( - "Invalid URL format. Must start with http:// or https://".into(), - rhai::Position::NONE, - ))); - } - let state_for_task = Arc::clone(&state_clone); - let user_for_task = user_clone.clone(); - let url_for_task = url_str.clone(); - let (tx, rx) = std::sync::mpsc::channel(); - std::thread::spawn(move || { - let rt = tokio::runtime::Builder::new_multi_thread() - .worker_threads(2) - .enable_all() - .build(); - let send_err = if let Ok(rt) = rt { - let result = rt.block_on(async move { - crawl_and_index_website(&state_for_task, &user_for_task, &url_for_task) - .await - }); - tx.send(result).err() - } else { - tx.send(Err("Failed to build tokio runtime".to_string())) - .err() - }; - if send_err.is_some() { - error!("Failed to send result from thread"); - } - }); - match rx.recv_timeout(std::time::Duration::from_secs(120)) { - Ok(Ok(message)) => Ok(Dynamic::from(message)), - Ok(Err(e)) => Err(Box::new(rhai::EvalAltResult::ErrorRuntime( - e.into(), - rhai::Position::NONE, - ))), - Err(std::sync::mpsc::RecvTimeoutError::Timeout) => { - Err(Box::new(rhai::EvalAltResult::ErrorRuntime( - "ADD_WEBSITE timed out".into(), - rhai::Position::NONE, - ))) - } - Err(e) => Err(Box::new(rhai::EvalAltResult::ErrorRuntime( - format!("ADD_WEBSITE failed: {}", e).into(), - rhai::Position::NONE, - ))), - } - }) - .unwrap(); -} -async fn crawl_and_index_website( - _state: &AppState, - _user: &UserSession, - _url: &str, -) -> Result { - Err("Web automation functionality has been removed from this build".to_string()) -} diff --git a/src/basic/keywords/book.rs b/src/basic/keywords/book.rs index 611fa0ad8..a2c15955f 100644 --- a/src/basic/keywords/book.rs +++ b/src/basic/keywords/book.rs @@ -1,62 +1,113 @@ use crate::shared::models::UserSession; use crate::shared::state::AppState; -use chrono::{DateTime, Datelike, Duration, Timelike, Utc}; -use log::{error, trace}; +use chrono::{DateTime, Duration, Utc}; +use diesel::prelude::*; +use log::{error, info, trace}; use rhai::{Dynamic, Engine}; -use serde::{Deserialize, Serialize}; use serde_json::json; use std::sync::Arc; use uuid::Uuid; -#[derive(Debug, Serialize, Deserialize)] -struct TimeSlot { - start: DateTime, - end: DateTime, - available: bool, +// Calendar types - would be from crate::calendar when feature is enabled +#[derive(Debug)] +pub struct CalendarEngine { + db: crate::shared::utils::DbPool, } +#[derive(Debug)] +pub struct CalendarEvent { + pub id: uuid::Uuid, + pub title: String, + pub description: Option, + pub start_time: DateTime, + pub end_time: DateTime, + pub location: Option, + pub organizer: String, + pub attendees: Vec, + pub reminder_minutes: Option, + pub recurrence_rule: Option, + pub status: EventStatus, + pub created_at: DateTime, + pub updated_at: DateTime, +} + +#[derive(Debug)] +pub enum EventStatus { + Confirmed, + Tentative, + Cancelled, +} + +#[derive(Debug)] +pub struct RecurrenceRule { + pub frequency: String, + pub interval: i32, + pub count: Option, + pub until: Option>, + pub by_day: Option>, +} + +impl CalendarEngine { + pub fn new(db: crate::shared::utils::DbPool) -> Self { + Self { db } + } + + pub async fn create_event( + &self, + event: CalendarEvent, + ) -> Result> { + Ok(event) + } + + pub async fn check_conflicts( + &self, + _start: DateTime, + _end: DateTime, + _user: &str, + ) -> Result, Box> { + Ok(vec![]) + } + + pub async fn get_events_range( + &self, + _start: DateTime, + _end: DateTime, + ) -> Result, Box> { + Ok(vec![]) + } +} + +/// Register BOOK keyword in BASIC for calendar appointments pub fn book_keyword(state: Arc, user: UserSession, engine: &mut Engine) { let state_clone = Arc::clone(&state); let user_clone = user.clone(); engine .register_custom_syntax( - &["BOOK", "$expr$", ",", "$expr$", ",", "$expr$"], + &[ + "BOOK", "$expr$", ",", "$expr$", ",", "$expr$", ",", "$expr$", ",", "$expr$", + ], false, move |context, inputs| { - // Parse attendees (array or single email) - let attendees_input = context.eval_expression_tree(&inputs[0])?; - let mut attendees = Vec::new(); - - if attendees_input.is_array() { - let arr = attendees_input.cast::(); - for item in arr.iter() { - attendees.push(item.to_string()); - } - } else { - attendees.push(attendees_input.to_string()); - } - - let date_range = context.eval_expression_tree(&inputs[1])?.to_string(); - let duration = context.eval_expression_tree(&inputs[2])?; - - let duration_minutes = if duration.is_int() { - duration.as_int().unwrap_or(30) - } else { - duration.to_string().parse::().unwrap_or(30) - }; + let title = context.eval_expression_tree(&inputs[0])?.to_string(); + let description = context.eval_expression_tree(&inputs[1])?.to_string(); + let start_time_str = context.eval_expression_tree(&inputs[2])?.to_string(); + let duration_minutes = context + .eval_expression_tree(&inputs[3])? + .as_int() + .unwrap_or(30) as i64; + let location = context.eval_expression_tree(&inputs[4])?.to_string(); trace!( - "BOOK: attendees={:?}, date_range={}, duration={} for user={}", - attendees, - date_range, + "BOOK: title={}, start={}, duration={} min for user={}", + title, + start_time_str, duration_minutes, user_clone.user_id ); let state_for_task = Arc::clone(&state_clone); let user_for_task = user_clone.clone(); - let (tx, rx) = std::sync::mpsc::channel(); std::thread::spawn(move || { @@ -67,12 +118,14 @@ pub fn book_keyword(state: Arc, user: UserSession, engine: &mut Engine let send_err = if let Ok(rt) = rt { let result = rt.block_on(async move { - execute_booking( + execute_book( &state_for_task, &user_for_task, - attendees, - &date_range, - duration_minutes as i32, + &title, + &description, + &start_time_str, + duration_minutes, + &location, ) .await }); @@ -88,19 +141,13 @@ pub fn book_keyword(state: Arc, user: UserSession, engine: &mut Engine }); match rx.recv_timeout(std::time::Duration::from_secs(10)) { - Ok(Ok(booking_id)) => Ok(Dynamic::from(booking_id)), + Ok(Ok(event_id)) => Ok(Dynamic::from(event_id)), Ok(Err(e)) => Err(Box::new(rhai::EvalAltResult::ErrorRuntime( format!("BOOK failed: {}", e).into(), rhai::Position::NONE, ))), - Err(std::sync::mpsc::RecvTimeoutError::Timeout) => { - Err(Box::new(rhai::EvalAltResult::ErrorRuntime( - "BOOK timed out".into(), - rhai::Position::NONE, - ))) - } - Err(e) => Err(Box::new(rhai::EvalAltResult::ErrorRuntime( - format!("BOOK thread failed: {}", e).into(), + Err(_) => Err(Box::new(rhai::EvalAltResult::ErrorRuntime( + "BOOK timed out".into(), rhai::Position::NONE, ))), } @@ -108,39 +155,34 @@ pub fn book_keyword(state: Arc, user: UserSession, engine: &mut Engine ) .unwrap(); - // Register FIND_SLOT keyword to find available slots + // Register BOOK MEETING for more complex meetings let state_clone2 = Arc::clone(&state); let user_clone2 = user.clone(); engine .register_custom_syntax( - &["FIND_SLOT", "$expr$", ",", "$expr$", ",", "$expr$"], + &["BOOK_MEETING", "$expr$", ",", "$expr$"], false, move |context, inputs| { - let attendees_input = context.eval_expression_tree(&inputs[0])?; - let mut attendees = Vec::new(); + let meeting_details = context.eval_expression_tree(&inputs[0])?; + let attendees_input = context.eval_expression_tree(&inputs[1])?; + let mut attendees = Vec::new(); if attendees_input.is_array() { let arr = attendees_input.cast::(); for item in arr.iter() { attendees.push(item.to_string()); } - } else { - attendees.push(attendees_input.to_string()); } - let duration = context.eval_expression_tree(&inputs[1])?; - let preferences = context.eval_expression_tree(&inputs[2])?.to_string(); - - let duration_minutes = if duration.is_int() { - duration.as_int().unwrap_or(30) - } else { - duration.to_string().parse::().unwrap_or(30) - }; + trace!( + "BOOK_MEETING with {} attendees for user={}", + attendees.len(), + user_clone2.user_id + ); let state_for_task = Arc::clone(&state_clone2); let user_for_task = user_clone2.clone(); - let (tx, rx) = std::sync::mpsc::channel(); std::thread::spawn(move || { @@ -151,12 +193,11 @@ pub fn book_keyword(state: Arc, user: UserSession, engine: &mut Engine let send_err = if let Ok(rt) = rt { let result = rt.block_on(async move { - find_available_slot( + execute_book_meeting( &state_for_task, &user_for_task, + meeting_details.to_string(), attendees, - duration_minutes as i32, - &preferences, ) .await }); @@ -167,18 +208,86 @@ pub fn book_keyword(state: Arc, user: UserSession, engine: &mut Engine }; if send_err.is_some() { - error!("Failed to send FIND_SLOT result from thread"); + error!("Failed to send BOOK_MEETING result from thread"); } }); match rx.recv_timeout(std::time::Duration::from_secs(10)) { - Ok(Ok(slot)) => Ok(Dynamic::from(slot)), + Ok(Ok(event_id)) => Ok(Dynamic::from(event_id)), Ok(Err(e)) => Err(Box::new(rhai::EvalAltResult::ErrorRuntime( - format!("FIND_SLOT failed: {}", e).into(), + format!("BOOK_MEETING failed: {}", e).into(), rhai::Position::NONE, ))), Err(_) => Err(Box::new(rhai::EvalAltResult::ErrorRuntime( - "FIND_SLOT timed out".into(), + "BOOK_MEETING timed out".into(), + rhai::Position::NONE, + ))), + } + }, + ) + .unwrap(); + + // Register CHECK_AVAILABILITY keyword + let state_clone3 = Arc::clone(&state); + let user_clone3 = user.clone(); + + engine + .register_custom_syntax( + &["CHECK_AVAILABILITY", "$expr$", ",", "$expr$"], + false, + move |context, inputs| { + let date_str = context.eval_expression_tree(&inputs[0])?.to_string(); + let duration_minutes = context + .eval_expression_tree(&inputs[1])? + .as_int() + .unwrap_or(30) as i64; + + trace!( + "CHECK_AVAILABILITY for {} on {} for user={}", + duration_minutes, + date_str, + user_clone3.user_id + ); + + let state_for_task = Arc::clone(&state_clone3); + let user_for_task = user_clone3.clone(); + let (tx, rx) = std::sync::mpsc::channel(); + + std::thread::spawn(move || { + let rt = tokio::runtime::Builder::new_multi_thread() + .worker_threads(2) + .enable_all() + .build(); + + let send_err = if let Ok(rt) = rt { + let result = rt.block_on(async move { + check_availability( + &state_for_task, + &user_for_task, + &date_str, + duration_minutes, + ) + .await + }); + tx.send(result).err() + } else { + tx.send(Err("Failed to build tokio runtime".to_string())) + .err() + }; + + if send_err.is_some() { + error!("Failed to send CHECK_AVAILABILITY result from thread"); + } + }); + + match rx.recv_timeout(std::time::Duration::from_secs(5)) { + Ok(Ok(slots)) => Ok(Dynamic::from(slots)), + Ok(Err(e)) => Err(Box::new(rhai::EvalAltResult::ErrorRuntime( + format!("CHECK_AVAILABILITY failed: {}", e).into(), + rhai::Position::NONE, + ))), + Err(_) => Err(Box::new(rhai::EvalAltResult::ErrorRuntime( + "CHECK_AVAILABILITY timed out".into(), rhai::Position::NONE, ))), } @@ -187,247 +296,396 @@ pub fn book_keyword(state: Arc, user: UserSession, engine: &mut Engine .unwrap(); } -async fn execute_booking( +async fn execute_book( state: &AppState, user: &UserSession, - attendees: Vec, - date_range: &str, - duration_minutes: i32, + title: &str, + description: &str, + start_time_str: &str, + duration_minutes: i64, + location: &str, ) -> Result { - // Parse date range - let (start_search, end_search) = parse_date_range(date_range)?; + // Parse start time + let start_time = parse_time_string(start_time_str)?; + let end_time = start_time + Duration::minutes(duration_minutes); - // Find available slot - let available_slot = find_common_availability( - state, - &attendees, - start_search, - end_search, - duration_minutes, - ) - .await?; + // Get or create calendar engine + let calendar_engine = get_calendar_engine(state).await?; - // Create calendar event - let event_id = create_calendar_event( - state, - user, - &attendees, - available_slot.start, - available_slot.end, - "Meeting", - None, - ) - .await?; + // Check for conflicts + let conflicts = calendar_engine + .check_conflicts(start_time, end_time, &user.user_id.to_string()) + .await + .map_err(|e| format!("Failed to check conflicts: {}", e))?; - // Send invitations - for attendee in &attendees { - send_calendar_invite(state, &event_id, attendee).await?; + if !conflicts.is_empty() { + return Err(format!( + "Time slot conflicts with existing appointment: {}", + conflicts[0].title + )); } + // Create calendar event + let event = CalendarEvent { + id: Uuid::new_v4(), + title: title.to_string(), + description: Some(description.to_string()), + start_time, + end_time, + location: if location.is_empty() { + None + } else { + Some(location.to_string()) + }, + organizer: user.user_id.to_string(), + attendees: vec![user.user_id.to_string()], + reminder_minutes: Some(15), // Default 15-minute reminder + recurrence_rule: None, + status: EventStatus::Confirmed, + created_at: Utc::now(), + updated_at: Utc::now(), + }; + + // Create the event + let created_event = calendar_engine + .create_event(event) + .await + .map_err(|e| format!("Failed to create appointment: {}", e))?; + + // Log the booking + log_booking(state, user, &created_event.id.to_string(), title).await?; + + info!( + "Appointment booked: {} at {} for user {}", + title, start_time, user.user_id + ); + Ok(format!( - "Meeting booked for {} at {}", - available_slot.start.format("%Y-%m-%d %H:%M"), - event_id + "Appointment '{}' booked for {} (ID: {})", + title, + start_time.format("%Y-%m-%d %H:%M"), + created_event.id )) } -async fn find_available_slot( - state: &AppState, - _user: &UserSession, - attendees: Vec, - duration_minutes: i32, - preferences: &str, -) -> Result { - // Parse preferences (e.g., "mornings preferred", "afternoons only", "next week") - let (start_search, end_search) = if preferences.contains("tomorrow") { - let tomorrow = Utc::now() + Duration::days(1); - ( - tomorrow - .date_naive() - .and_hms_opt(0, 0, 0) - .unwrap() - .and_utc(), - tomorrow - .date_naive() - .and_hms_opt(23, 59, 59) - .unwrap() - .and_utc(), - ) - } else if preferences.contains("next week") { - let now = Utc::now(); - let next_week = now + Duration::days(7); - (now, next_week) - } else { - // Default to next 7 days - let now = Utc::now(); - (now, now + Duration::days(7)) - }; - - let slot = find_common_availability( - state, - &attendees, - start_search, - end_search, - duration_minutes, - ) - .await?; - - Ok(slot.start.format("%Y-%m-%d %H:%M").to_string()) -} - -async fn find_common_availability( - state: &AppState, - attendees: &[String], - start_search: DateTime, - end_search: DateTime, - duration_minutes: i32, -) -> Result { - // This would integrate with actual calendar API - // For now, simulate finding an available slot - - let mut current = start_search; - - while current < end_search { - // Skip weekends - if current.weekday().num_days_from_monday() >= 5 { - current = current + Duration::days(1); - continue; - } - - // Check business hours (9 AM - 5 PM) - let hour = current.hour(); - if hour >= 9 && hour < 17 { - // Check if slot is available for all attendees - let slot_end = current + Duration::minutes(duration_minutes as i64); - - if slot_end.hour() <= 17 { - // In a real implementation, check each attendee's calendar - // For now, simulate availability check - if check_slot_availability(state, attendees, current, slot_end).await? { - return Ok(TimeSlot { - start: current, - end: slot_end, - available: true, - }); - } - } - } - - // Move to next slot (30 minute intervals) - current = current + Duration::minutes(30); - } - - Err("No available slot found in the specified date range".to_string()) -} - -async fn check_slot_availability( - _state: &AppState, - _attendees: &[String], - _start: DateTime, - _end: DateTime, -) -> Result { - // Simulate calendar availability check - // In real implementation, this would query calendar API - - // For demo, randomly return availability - let random = (Utc::now().timestamp() % 3) == 0; - Ok(random) -} - -async fn create_calendar_event( +async fn execute_book_meeting( state: &AppState, user: &UserSession, - attendees: &[String], - start: DateTime, - end: DateTime, - subject: &str, - description: Option, + meeting_json: String, + attendees: Vec, ) -> Result { - let event_id = Uuid::new_v4().to_string(); + // Parse meeting details from JSON + let meeting_data: serde_json::Value = serde_json::from_str(&meeting_json) + .map_err(|e| format!("Invalid meeting details: {}", e))?; - // Store in database - let mut conn = state.conn.get().map_err(|e| format!("DB error: {}", e))?; + let title = meeting_data["title"] + .as_str() + .ok_or("Missing meeting title")?; + let start_time_str = meeting_data["start_time"] + .as_str() + .ok_or("Missing start time")?; + let duration_minutes = meeting_data["duration"].as_i64().unwrap_or(60); + let description = meeting_data["description"].as_str().unwrap_or(""); + let location = meeting_data["location"].as_str().unwrap_or(""); + let recurring = meeting_data["recurring"].as_bool().unwrap_or(false); - let user_id_str = user.user_id.to_string(); - let bot_id_str = user.bot_id.to_string(); - let attendees_json = json!(attendees); - let now = Utc::now(); + let start_time = parse_time_string(start_time_str)?; + let end_time = start_time + Duration::minutes(duration_minutes); - let query = diesel::sql_query( - "INSERT INTO calendar_events (id, user_id, bot_id, subject, description, start_time, end_time, attendees, created_at) - VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9)" - ) - .bind::(&event_id) - .bind::(&user_id_str) - .bind::(&bot_id_str) - .bind::(subject) - .bind::, _>(&description) - .bind::(&start) - .bind::(&end) - .bind::(&attendees_json) - .bind::(&now); + // Get or create calendar engine + let calendar_engine = get_calendar_engine(state).await?; - use diesel::RunQueryDsl; - query.execute(&mut *conn).map_err(|e| { - error!("Failed to create calendar event: {}", e); - format!("Failed to create calendar event: {}", e) - })?; + // Check conflicts for all attendees + for attendee in &attendees { + let conflicts = calendar_engine + .check_conflicts(start_time, end_time, attendee) + .await + .map_err(|e| format!("Failed to check conflicts: {}", e))?; - trace!("Created calendar event: {}", event_id); - Ok(event_id) + if !conflicts.is_empty() { + return Err(format!("Attendee {} has a conflict at this time", attendee)); + } + } + + // Create recurrence rule if needed + let recurrence_rule = if recurring { + Some(RecurrenceRule { + frequency: "WEEKLY".to_string(), + interval: 1, + count: Some(10), // Default to 10 occurrences + until: None, + by_day: None, + }) + } else { + None + }; + + // Create calendar event + let event = CalendarEvent { + id: Uuid::new_v4(), + title: title.to_string(), + description: Some(description.to_string()), + start_time, + end_time, + location: if location.is_empty() { + None + } else { + Some(location.to_string()) + }, + organizer: user.user_id.to_string(), + attendees: attendees.clone(), + reminder_minutes: Some(30), // 30-minute reminder for meetings + recurrence_rule, + status: EventStatus::Confirmed, + created_at: Utc::now(), + updated_at: Utc::now(), + }; + + // Create the meeting + let created_event = calendar_engine + .create_event(event) + .await + .map_err(|e| format!("Failed to create meeting: {}", e))?; + + // Send invites to attendees (would integrate with email system) + for attendee in &attendees { + send_meeting_invite(state, &created_event, attendee).await?; + } + + info!( + "Meeting booked: {} at {} with {} attendees", + title, + start_time, + attendees.len() + ); + + Ok(format!( + "Meeting '{}' scheduled for {} with {} attendees (ID: {})", + title, + start_time.format("%Y-%m-%d %H:%M"), + attendees.len(), + created_event.id + )) } -async fn send_calendar_invite( - _state: &AppState, +async fn check_availability( + state: &AppState, + user: &UserSession, + date_str: &str, + duration_minutes: i64, +) -> Result { + let date = parse_date_string(date_str)?; + let calendar_engine = get_calendar_engine(state).await?; + + // Define business hours (9 AM to 5 PM) + let business_start = date.with_hour(9).unwrap().with_minute(0).unwrap(); + let business_end = date.with_hour(17).unwrap().with_minute(0).unwrap(); + + // Get all events for the day + let events = calendar_engine + .get_events_range(business_start, business_end) + .await + .map_err(|e| format!("Failed to get events: {}", e))?; + + // Find available slots + let mut available_slots = Vec::new(); + let mut current_time = business_start; + let slot_duration = Duration::minutes(duration_minutes); + + for event in &events { + // Check if there's a gap before this event + if current_time + slot_duration <= event.start_time { + available_slots.push(format!( + "{} - {}", + current_time.format("%H:%M"), + (current_time + slot_duration).format("%H:%M") + )); + } + current_time = event.end_time; + } + + // Check if there's time after the last event + if current_time + slot_duration <= business_end { + available_slots.push(format!( + "{} - {}", + current_time.format("%H:%M"), + (current_time + slot_duration).format("%H:%M") + )); + } + + if available_slots.is_empty() { + Ok("No available slots on this date".to_string()) + } else { + Ok(format!( + "Available slots on {}: {}", + date.format("%Y-%m-%d"), + available_slots.join(", ") + )) + } +} + +fn parse_time_string(time_str: &str) -> Result, String> { + // Try different date formats + let formats = vec![ + "%Y-%m-%d %H:%M", + "%Y-%m-%d %H:%M:%S", + "%Y/%m/%d %H:%M", + "%d/%m/%Y %H:%M", + "%Y-%m-%dT%H:%M:%S", + ]; + + for format in formats { + if let Ok(dt) = chrono::NaiveDateTime::parse_from_str(time_str, format) { + return Ok(DateTime::from_utc(dt, Utc)); + } + } + + // Try parsing relative times like "tomorrow at 3pm" + if time_str.contains("tomorrow") { + let tomorrow = Utc::now() + Duration::days(1); + if let Some(hour) = extract_hour_from_string(time_str) { + return Ok(tomorrow + .with_hour(hour) + .unwrap() + .with_minute(0) + .unwrap() + .with_second(0) + .unwrap()); + } + } + + // Try parsing relative times like "in 2 hours" + if time_str.starts_with("in ") { + if let Ok(hours) = time_str + .trim_start_matches("in ") + .trim_end_matches(" hours") + .trim_end_matches(" hour") + .parse::() + { + return Ok(Utc::now() + Duration::hours(hours)); + } + } + + Err(format!("Could not parse time: {}", time_str)) +} + +fn parse_date_string(date_str: &str) -> Result, String> { + // Handle special cases + if date_str == "today" { + return Ok(Utc::now()); + } else if date_str == "tomorrow" { + return Ok(Utc::now() + Duration::days(1)); + } + + // Try standard date formats + let formats = vec!["%Y-%m-%d", "%Y/%m/%d", "%d/%m/%Y"]; + + for format in formats { + if let Ok(dt) = chrono::NaiveDate::parse_from_str(date_str, format) { + return Ok(dt.and_hms(0, 0, 0).and_utc()); + } + } + + Err(format!("Could not parse date: {}", date_str)) +} + +fn extract_hour_from_string(s: &str) -> Option { + // Extract hour from strings like "3pm", "15:00", "3 PM" + let s = s.to_lowercase(); + + if s.contains("pm") { + if let Some(hour_str) = s.split("pm").next() { + if let Ok(hour) = hour_str.trim().replace(":", "").parse::() { + return Some(if hour < 12 { hour + 12 } else { hour }); + } + } + } else if s.contains("am") { + if let Some(hour_str) = s.split("am").next() { + if let Ok(hour) = hour_str.trim().replace(":", "").parse::() { + return Some(if hour == 12 { 0 } else { hour }); + } + } + } + + None +} + +async fn log_booking( + state: &AppState, + user: &UserSession, event_id: &str, + title: &str, +) -> Result<(), String> { + let mut conn = state.conn.get().map_err(|e| format!("DB error: {}", e))?; + + diesel::sql_query( + "INSERT INTO booking_logs (id, user_id, bot_id, event_id, event_title, booked_at) + VALUES (gen_random_uuid(), $1, $2, $3, $4, NOW())", + ) + .bind::(&user.user_id) + .bind::(&user.bot_id) + .bind::(event_id) + .bind::(title) + .execute(&mut *conn) + .map_err(|e| format!("Failed to log booking: {}", e))?; + + Ok(()) +} + +async fn get_calendar_engine(state: &AppState) -> Result, String> { + // Get or create calendar engine from app state + // This would normally be initialized at startup + let calendar_engine = Arc::new(CalendarEngine::new(state.conn.clone())); + Ok(calendar_engine) +} + +async fn send_meeting_invite( + state: &AppState, + event: &CalendarEvent, attendee: &str, ) -> Result<(), String> { - // In real implementation, send actual calendar invite via email or calendar API - trace!( - "Sending calendar invite for event {} to {}", - event_id, - attendee + // This would integrate with the email system to send calendar invites + info!( + "Would send meeting invite for '{}' to {}", + event.title, attendee ); Ok(()) } -fn parse_date_range(date_range: &str) -> Result<(DateTime, DateTime), String> { - let range_lower = date_range.to_lowercase(); - let now = Utc::now(); +#[cfg(test)] +mod tests { + use super::*; - if range_lower.contains("today") { - Ok(( - now.date_naive().and_hms_opt(0, 0, 0).unwrap().and_utc(), - now.date_naive().and_hms_opt(23, 59, 59).unwrap().and_utc(), - )) - } else if range_lower.contains("tomorrow") { - let tomorrow = now + Duration::days(1); - Ok(( - tomorrow - .date_naive() - .and_hms_opt(0, 0, 0) - .unwrap() - .and_utc(), - tomorrow - .date_naive() - .and_hms_opt(23, 59, 59) - .unwrap() - .and_utc(), - )) - } else if range_lower.contains("this week") || range_lower.contains("this_week") { - Ok(( - now, - now + Duration::days(7 - now.weekday().num_days_from_monday() as i64), - )) - } else if range_lower.contains("next week") || range_lower.contains("next_week") { - let next_monday = now + Duration::days(7 - now.weekday().num_days_from_monday() as i64 + 1); - Ok((next_monday, next_monday + Duration::days(6))) - } else if range_lower.contains("2pm") || range_lower.contains("14:00") { - // Handle specific time - let target_time = now.date_naive().and_hms_opt(14, 0, 0).unwrap().and_utc(); - Ok((target_time, target_time + Duration::hours(1))) - } else { - // Default to next 7 days - Ok((now, now + Duration::days(7))) + #[test] + fn test_parse_time_string() { + let result = parse_time_string("2024-01-15 14:30"); + assert!(result.is_ok()); + + let result = parse_time_string("tomorrow at 3pm"); + assert!(result.is_ok()); + + let result = parse_time_string("in 2 hours"); + assert!(result.is_ok()); + } + + #[test] + fn test_parse_date_string() { + let result = parse_date_string("today"); + assert!(result.is_ok()); + + let result = parse_date_string("2024-01-15"); + assert!(result.is_ok()); + + let result = parse_date_string("tomorrow"); + assert!(result.is_ok()); + } + + #[test] + fn test_extract_hour() { + assert_eq!(extract_hour_from_string("3pm"), Some(15)); + assert_eq!(extract_hour_from_string("3 PM"), Some(15)); + assert_eq!(extract_hour_from_string("10am"), Some(10)); + assert_eq!(extract_hour_from_string("12am"), Some(0)); + assert_eq!(extract_hour_from_string("12pm"), Some(12)); } } diff --git a/src/basic/keywords/mod.rs b/src/basic/keywords/mod.rs index 67775466c..eba23d689 100644 --- a/src/basic/keywords/mod.rs +++ b/src/basic/keywords/mod.rs @@ -1,6 +1,5 @@ pub mod add_member; pub mod add_suggestion; -pub mod add_website; pub mod book; pub mod bot_memory; pub mod clear_kb; @@ -28,5 +27,6 @@ pub mod set_user; pub mod universal_messaging; pub mod use_kb; pub mod use_tool; +pub mod use_website; pub mod wait; pub mod weather; diff --git a/src/basic/keywords/use_website.rs b/src/basic/keywords/use_website.rs new file mode 100644 index 000000000..52b2c11a9 --- /dev/null +++ b/src/basic/keywords/use_website.rs @@ -0,0 +1,407 @@ +use crate::shared::models::UserSession; +use crate::shared::state::AppState; +use diesel::prelude::*; +use log::{error, info, trace}; +use rhai::{Dynamic, Engine}; +use std::sync::Arc; +use uuid::Uuid; + +/// Register USE_WEBSITE keyword in BASIC +/// Runtime mode: Associates a website collection with the current session (like USE KB) +/// Preprocessing mode: Registers website for crawling (handled in compiler/mod.rs) +pub fn use_website_keyword(state: Arc, user: UserSession, engine: &mut Engine) { + let state_clone = Arc::clone(&state); + let user_clone = user.clone(); + + engine + .register_custom_syntax(&["USE_WEBSITE", "$expr$"], false, move |context, inputs| { + let url = context.eval_expression_tree(&inputs[0])?; + let url_str = url.to_string().trim_matches('"').to_string(); + + trace!( + "USE_WEBSITE command executed: {} for session: {}", + url_str, + user_clone.id + ); + + // Validate URL + let is_valid = url_str.starts_with("http://") || url_str.starts_with("https://"); + if !is_valid { + return Err(Box::new(rhai::EvalAltResult::ErrorRuntime( + "Invalid URL format. Must start with http:// or https://".into(), + rhai::Position::NONE, + ))); + } + + let state_for_task = Arc::clone(&state_clone); + let user_for_task = user_clone.clone(); + let url_for_task = url_str.clone(); + let (tx, rx) = std::sync::mpsc::channel(); + + std::thread::spawn(move || { + let rt = tokio::runtime::Builder::new_multi_thread() + .worker_threads(2) + .enable_all() + .build(); + + let send_err = if let Ok(rt) = rt { + let result = rt.block_on(async move { + associate_website_with_session( + &state_for_task, + &user_for_task, + &url_for_task, + ) + .await + }); + tx.send(result).err() + } else { + tx.send(Err("Failed to build tokio runtime".to_string())) + .err() + }; + + if send_err.is_some() { + error!("Failed to send result from thread"); + } + }); + + match rx.recv_timeout(std::time::Duration::from_secs(10)) { + Ok(Ok(message)) => Ok(Dynamic::from(message)), + Ok(Err(e)) => Err(Box::new(rhai::EvalAltResult::ErrorRuntime( + e.into(), + rhai::Position::NONE, + ))), + Err(std::sync::mpsc::RecvTimeoutError::Timeout) => { + Err(Box::new(rhai::EvalAltResult::ErrorRuntime( + "USE_WEBSITE timed out".into(), + rhai::Position::NONE, + ))) + } + Err(e) => Err(Box::new(rhai::EvalAltResult::ErrorRuntime( + format!("USE_WEBSITE failed: {}", e).into(), + rhai::Position::NONE, + ))), + } + }) + .unwrap(); +} + +/// Associate website with session (runtime behavior - like USE KB) +/// This only associates an already-crawled website with the session +async fn associate_website_with_session( + state: &AppState, + user: &UserSession, + url: &str, +) -> Result { + info!("Associating website {} with session {}", url, user.id); + + let mut conn = state.conn.get().map_err(|e| format!("DB error: {}", e))?; + + // Create collection name for this website + let collection_name = format!("website_{}", sanitize_url_for_collection(url)); + + // Check if website has been crawled for this bot + let website_status = check_website_crawl_status(&mut conn, &user.bot_id, url)?; + + match website_status { + WebsiteCrawlStatus::NotRegistered => { + return Err(format!( + "Website {} has not been registered for crawling. It should be added to the script for preprocessing.", + url + )); + } + WebsiteCrawlStatus::Pending => { + // Website is registered but not yet crawled - allow association but warn + info!("Website {} is pending crawl, associating anyway", url); + } + WebsiteCrawlStatus::Crawled => { + // Website is fully crawled and ready + info!("Website {} is already crawled and ready", url); + } + WebsiteCrawlStatus::Failed => { + return Err(format!( + "Website {} crawling failed. Please check the logs.", + url + )); + } + } + + // Associate website collection with session (like session_kb_associations) + add_website_to_session(&mut conn, &user.id, &user.bot_id, url, &collection_name)?; + + Ok(format!( + "Website {} is now available in this conversation.", + url + )) +} + +/// Website crawl status enum +enum WebsiteCrawlStatus { + NotRegistered, + Pending, + Crawled, + Failed, +} + +/// Check website crawl status for this bot +fn check_website_crawl_status( + conn: &mut PgConnection, + bot_id: &Uuid, + url: &str, +) -> Result { + #[derive(QueryableByName)] + struct CrawlStatus { + #[diesel(sql_type = diesel::sql_types::Nullable)] + crawl_status: Option, + } + + let query = + diesel::sql_query("SELECT crawl_status FROM website_crawls WHERE bot_id = $1 AND url = $2") + .bind::(bot_id) + .bind::(url); + + let result: Result = query.get_result(conn); + + match result { + Ok(status) => match status.crawl_status { + Some(0) => Ok(WebsiteCrawlStatus::Pending), + Some(1) => Ok(WebsiteCrawlStatus::Crawled), + Some(2) => Ok(WebsiteCrawlStatus::Failed), + _ => Ok(WebsiteCrawlStatus::NotRegistered), + }, + Err(_) => Ok(WebsiteCrawlStatus::NotRegistered), + } +} + +/// Register website for background crawling (called from preprocessing) +/// This is called during script compilation, not runtime +pub fn register_website_for_crawling( + conn: &mut PgConnection, + bot_id: &Uuid, + url: &str, +) -> Result<(), String> { + // Get website configuration with defaults + let expires_policy = "1d"; // Default, would read from bot config + + let query = diesel::sql_query( + "INSERT INTO website_crawls (id, bot_id, url, expires_policy, crawl_status, next_crawl) + VALUES (gen_random_uuid(), $1, $2, $3, 0, NOW()) + ON CONFLICT (bot_id, url) DO UPDATE SET next_crawl = + CASE + WHEN website_crawls.crawl_status = 2 THEN NOW() -- Failed, retry now + ELSE website_crawls.next_crawl -- Keep existing schedule + END", + ) + .bind::(bot_id) + .bind::(url) + .bind::(expires_policy); + + query + .execute(conn) + .map_err(|e| format!("Failed to register website for crawling: {}", e))?; + + info!("Website {} registered for crawling for bot {}", url, bot_id); + Ok(()) +} + +/// Execute USE_WEBSITE during preprocessing (called from compiler) +/// This registers the website for crawling but doesn't associate it with any session +pub fn execute_use_website_preprocessing( + conn: &mut PgConnection, + url: &str, + bot_id: Uuid, +) -> Result> { + trace!("Preprocessing USE_WEBSITE: {}, bot_id: {:?}", url, bot_id); + + // Validate URL + if !url.starts_with("http://") && !url.starts_with("https://") { + return Err(format!( + "Invalid URL format: {}. Must start with http:// or https://", + url + ) + .into()); + } + + // Register for crawling + register_website_for_crawling(conn, &bot_id, url)?; + + Ok(serde_json::json!({ + "command": "use_website", + "url": url, + "bot_id": bot_id.to_string(), + "status": "registered_for_crawling" + })) +} + +/// Add website to session (like USE KB) +fn add_website_to_session( + conn: &mut PgConnection, + session_id: &Uuid, + bot_id: &Uuid, + url: &str, + collection_name: &str, +) -> Result<(), String> { + // Add to session_website_associations table (similar to session_kb_associations) + let assoc_id = Uuid::new_v4(); + + diesel::sql_query( + "INSERT INTO session_website_associations + (id, session_id, bot_id, website_url, collection_name, is_active, added_at) + VALUES ($1, $2, $3, $4, $5, true, NOW()) + ON CONFLICT (session_id, website_url) + DO UPDATE SET is_active = true, added_at = NOW()", + ) + .bind::(assoc_id) + .bind::(session_id) + .bind::(bot_id) + .bind::(url) + .bind::(collection_name) + .execute(conn) + .map_err(|e| format!("Failed to add website to session: {}", e))?; + + info!( + "✅ Added website '{}' to session {} (collection: {})", + url, session_id, collection_name + ); + + Ok(()) +} + +/// Clear websites from session (companion to USE_WEBSITE) +pub fn clear_websites_keyword(state: Arc, user: UserSession, engine: &mut Engine) { + let state_clone = Arc::clone(&state); + let user_clone = user.clone(); + + engine + .register_custom_syntax(&["CLEAR_WEBSITES"], true, move |_context, _inputs| { + info!( + "CLEAR_WEBSITES keyword executed for session: {}", + user_clone.id + ); + + let session_id = user_clone.id; + let conn = state_clone.conn.clone(); + + let result = std::thread::spawn(move || clear_all_websites(conn, session_id)).join(); + + match result { + Ok(Ok(count)) => { + info!( + "Successfully cleared {} websites from session {}", + count, user_clone.id + ); + Ok(Dynamic::from(format!( + "{} website(s) removed from conversation", + count + ))) + } + Ok(Err(e)) => { + error!("Failed to clear websites: {}", e); + Err(format!("CLEAR_WEBSITES failed: {}", e).into()) + } + Err(e) => { + error!("Thread panic in CLEAR_WEBSITES: {:?}", e); + Err("CLEAR_WEBSITES failed: thread panic".into()) + } + } + }) + .unwrap(); +} + +/// Clear all websites from session +fn clear_all_websites( + conn_pool: crate::shared::utils::DbPool, + session_id: Uuid, +) -> Result { + let mut conn = conn_pool + .get() + .map_err(|e| format!("Failed to get DB connection: {}", e))?; + + let rows_affected = diesel::sql_query( + "UPDATE session_website_associations + SET is_active = false + WHERE session_id = $1 AND is_active = true", + ) + .bind::(session_id) + .execute(&mut conn) + .map_err(|e| format!("Failed to clear websites: {}", e))?; + + Ok(rows_affected) +} + +/// Get active websites for a session +pub fn get_active_websites_for_session( + conn_pool: &crate::shared::utils::DbPool, + session_id: Uuid, +) -> Result, String> { + let mut conn = conn_pool + .get() + .map_err(|e| format!("Failed to get DB connection: {}", e))?; + + #[derive(QueryableByName, Debug)] + struct ActiveWebsiteResult { + #[diesel(sql_type = diesel::sql_types::Text)] + website_url: String, + #[diesel(sql_type = diesel::sql_types::Text)] + collection_name: String, + } + + let results: Vec = diesel::sql_query( + "SELECT website_url, collection_name + FROM session_website_associations + WHERE session_id = $1 AND is_active = true + ORDER BY added_at DESC", + ) + .bind::(session_id) + .load(&mut conn) + .map_err(|e| format!("Failed to get active websites: {}", e))?; + + Ok(results + .into_iter() + .map(|r| (r.website_url, r.collection_name)) + .collect()) +} + +/// Sanitize URL for use as collection name +fn sanitize_url_for_collection(url: &str) -> String { + url.replace("http://", "") + .replace("https://", "") + .replace('/', "_") + .replace(':', "_") + .replace('.', "_") + .chars() + .filter(|c| c.is_alphanumeric() || *c == '_' || *c == '-') + .collect::() + .to_lowercase() +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_url_sanitization() { + assert_eq!( + sanitize_url_for_collection("https://docs.example.com/path"), + "docs_example_com_path" + ); + assert_eq!( + sanitize_url_for_collection("http://test.site:8080"), + "test_site_8080" + ); + } + + #[test] + fn test_use_website_syntax() { + let mut engine = Engine::new(); + + // Test USE_WEBSITE with argument + assert!(engine + .register_custom_syntax(&["USE_WEBSITE", "$expr$"], true, |_, _| Ok(Dynamic::UNIT)) + .is_ok()); + + // Test CLEAR_WEBSITES without argument + assert!(engine + .register_custom_syntax(&["CLEAR_WEBSITES"], true, |_, _| Ok(Dynamic::UNIT)) + .is_ok()); + } +} diff --git a/src/basic/keywords/weather.rs b/src/basic/keywords/weather.rs index e45ba5f8b..d3291db7f 100644 --- a/src/basic/keywords/weather.rs +++ b/src/basic/keywords/weather.rs @@ -1,182 +1,451 @@ use crate::shared::models::UserSession; use crate::shared::state::AppState; -use log::{error, trace}; -use reqwest::Client; +use log::{error, info, trace}; use rhai::{Dynamic, Engine}; use serde::{Deserialize, Serialize}; use std::sync::Arc; -use std::time::Duration; -#[derive(Debug, Clone, Deserialize, Serialize)] +#[derive(Debug, Serialize, Deserialize)] pub struct WeatherData { pub location: String, - pub temperature: String, - pub condition: String, - pub forecast: String, + pub temperature: f32, + pub temperature_unit: String, + pub description: String, + pub humidity: u32, + pub wind_speed: f32, + pub wind_direction: String, + pub feels_like: f32, + pub pressure: u32, + pub visibility: f32, + pub uv_index: Option, + pub forecast: Vec, } -/// Fetches weather data from 7Timer! API (free, no auth) -pub async fn fetch_weather(location: &str) -> Result> { - // Parse location to get coordinates (simplified - in production use geocoding) - let (lat, lon) = parse_location(location)?; +#[derive(Debug, Serialize, Deserialize)] +pub struct ForecastDay { + pub date: String, + pub temp_high: f32, + pub temp_low: f32, + pub description: String, + pub rain_chance: u32, +} - // 7Timer! API endpoint - let url = format!( - "http://www.7timer.info/bin/api.pl?lon={}&lat={}&product=civil&output=json", - lon, lat - ); +/// Register WEATHER keyword in BASIC +pub fn weather_keyword(state: Arc, user: UserSession, engine: &mut Engine) { + let state_clone = Arc::clone(&state); + let user_clone = user.clone(); - trace!("Fetching weather from: {}", url); + engine + .register_custom_syntax(&["WEATHER", "$expr$"], false, move |context, inputs| { + let location = context.eval_expression_tree(&inputs[0])?.to_string(); - let client = Client::builder().timeout(Duration::from_secs(10)).build()?; + trace!( + "WEATHER command executed: {} for user: {}", + location, + user_clone.user_id + ); - let response = client.get(&url).send().await?; + let state_for_task = Arc::clone(&state_clone); + let user_for_task = user_clone.clone(); + let location_for_task = location.clone(); + let (tx, rx) = std::sync::mpsc::channel(); - if !response.status().is_success() { - return Err(format!("Weather API returned status: {}", response.status()).into()); - } + std::thread::spawn(move || { + let rt = tokio::runtime::Builder::new_multi_thread() + .worker_threads(2) + .enable_all() + .build(); - let json: serde_json::Value = response.json().await?; + let send_err = if let Ok(rt) = rt { + let result = rt.block_on(async move { + get_weather(&state_for_task, &user_for_task, &location_for_task).await + }); + tx.send(result).err() + } else { + tx.send(Err("Failed to build tokio runtime".to_string())) + .err() + }; - // Parse 7Timer response - let dataseries = json["dataseries"] - .as_array() - .ok_or("Invalid weather response")?; + if send_err.is_some() { + error!("Failed to send WEATHER result from thread"); + } + }); - if dataseries.is_empty() { - return Err("No weather data available".into()); - } + match rx.recv_timeout(std::time::Duration::from_secs(10)) { + Ok(Ok(weather_info)) => Ok(Dynamic::from(weather_info)), + Ok(Err(e)) => Err(Box::new(rhai::EvalAltResult::ErrorRuntime( + format!("WEATHER failed: {}", e).into(), + rhai::Position::NONE, + ))), + Err(_) => Err(Box::new(rhai::EvalAltResult::ErrorRuntime( + "WEATHER request timed out".into(), + rhai::Position::NONE, + ))), + } + }) + .unwrap(); - let current = &dataseries[0]; - let temp = current["temp2m"].as_i64().unwrap_or(0); - let weather_code = current["weather"].as_str().unwrap_or("unknown"); + // Register FORECAST keyword for extended forecast + let state_clone2 = Arc::clone(&state); + let user_clone2 = user.clone(); - let condition = match weather_code { - "clear" => "Clear sky", - "pcloudy" => "Partly cloudy", - "cloudy" => "Cloudy", - "rain" => "Rain", - "lightrain" => "Light rain", - "humid" => "Humid", - "snow" => "Snow", - "lightsnow" => "Light snow", - _ => "Unknown", - }; + engine + .register_custom_syntax( + &["FORECAST", "$expr$", ",", "$expr$"], + false, + move |context, inputs| { + let location = context.eval_expression_tree(&inputs[0])?.to_string(); + let days = context + .eval_expression_tree(&inputs[1])? + .as_int() + .unwrap_or(5) as u32; - // Build forecast string - let mut forecast_parts = Vec::new(); - for (i, item) in dataseries.iter().take(3).enumerate() { - if let (Some(temp), Some(weather)) = (item["temp2m"].as_i64(), item["weather"].as_str()) { - forecast_parts.push(format!("{}h: {}°C, {}", i * 3, temp, weather)); + trace!( + "FORECAST command executed: {} for {} days, user: {}", + location, + days, + user_clone2.user_id + ); + + let state_for_task = Arc::clone(&state_clone2); + let user_for_task = user_clone2.clone(); + let location_for_task = location.clone(); + let (tx, rx) = std::sync::mpsc::channel(); + + std::thread::spawn(move || { + let rt = tokio::runtime::Builder::new_multi_thread() + .worker_threads(2) + .enable_all() + .build(); + + let send_err = if let Ok(rt) = rt { + let result = rt.block_on(async move { + get_forecast(&state_for_task, &user_for_task, &location_for_task, days) + .await + }); + tx.send(result).err() + } else { + tx.send(Err("Failed to build tokio runtime".to_string())) + .err() + }; + + if send_err.is_some() { + error!("Failed to send FORECAST result from thread"); + } + }); + + match rx.recv_timeout(std::time::Duration::from_secs(10)) { + Ok(Ok(forecast_info)) => Ok(Dynamic::from(forecast_info)), + Ok(Err(e)) => Err(Box::new(rhai::EvalAltResult::ErrorRuntime( + format!("FORECAST failed: {}", e).into(), + rhai::Position::NONE, + ))), + Err(_) => Err(Box::new(rhai::EvalAltResult::ErrorRuntime( + "FORECAST request timed out".into(), + rhai::Position::NONE, + ))), + } + }, + ) + .unwrap(); +} + +async fn get_weather( + state: &AppState, + _user: &UserSession, + location: &str, +) -> Result { + // Get API key from bot config or environment + let api_key = get_weather_api_key(state)?; + + // Try OpenWeatherMap API first + match fetch_openweathermap_current(&api_key, location).await { + Ok(weather) => { + info!("Weather data fetched for {}", location); + Ok(format_weather_response(&weather)) + } + Err(e) => { + error!("OpenWeatherMap API failed: {}", e); + // Try fallback weather service + fetch_fallback_weather(location).await } } - let forecast = forecast_parts.join("; "); +} +async fn get_forecast( + state: &AppState, + _user: &UserSession, + location: &str, + days: u32, +) -> Result { + let api_key = get_weather_api_key(state)?; + + match fetch_openweathermap_forecast(&api_key, location, days).await { + Ok(forecast) => { + info!("Forecast data fetched for {} ({} days)", location, days); + Ok(format_forecast_response(&forecast)) + } + Err(e) => { + error!("Forecast API failed: {}", e); + Err(format!("Could not get forecast for {}: {}", location, e)) + } + } +} + +async fn fetch_openweathermap_current( + api_key: &str, + location: &str, +) -> Result { + let client = reqwest::Client::new(); + let url = format!( + "https://api.openweathermap.org/data/2.5/weather?q={}&appid={}&units=metric", + urlencoding::encode(location), + api_key + ); + + let response = client + .get(&url) + .send() + .await + .map_err(|e| format!("Request failed: {}", e))?; + + if !response.status().is_success() { + return Err(format!("API returned status: {}", response.status())); + } + + let data: serde_json::Value = response + .json() + .await + .map_err(|e| format!("Failed to parse response: {}", e))?; + + // Parse OpenWeatherMap response Ok(WeatherData { - location: location.to_string(), - temperature: format!("{}°C", temp), - condition: condition.to_string(), - forecast, + location: data["name"].as_str().unwrap_or(location).to_string(), + temperature: data["main"]["temp"].as_f64().unwrap_or(0.0) as f32, + temperature_unit: "°C".to_string(), + description: data["weather"][0]["description"] + .as_str() + .unwrap_or("Unknown") + .to_string(), + humidity: data["main"]["humidity"].as_u64().unwrap_or(0) as u32, + wind_speed: data["wind"]["speed"].as_f64().unwrap_or(0.0) as f32, + wind_direction: degrees_to_compass(data["wind"]["deg"].as_f64().unwrap_or(0.0)), + feels_like: data["main"]["feels_like"].as_f64().unwrap_or(0.0) as f32, + pressure: data["main"]["pressure"].as_u64().unwrap_or(0) as u32, + visibility: data["visibility"].as_f64().unwrap_or(0.0) as f32 / 1000.0, // Convert to km + uv_index: None, // Would need separate API call for UV index + forecast: Vec::new(), }) } -/// Simple location parser (lat,lon or city name) -pub fn parse_location(location: &str) -> Result<(f64, f64), Box> { - // Check if it's coordinates (lat,lon) - if let Some((lat_str, lon_str)) = location.split_once(',') { - let lat = lat_str.trim().parse::()?; - let lon = lon_str.trim().parse::()?; - return Ok((lat, lon)); +async fn fetch_openweathermap_forecast( + api_key: &str, + location: &str, + days: u32, +) -> Result { + let client = reqwest::Client::new(); + let url = format!( + "https://api.openweathermap.org/data/2.5/forecast?q={}&appid={}&units=metric&cnt={}", + urlencoding::encode(location), + api_key, + days * 8 // 8 forecasts per day (every 3 hours) + ); + + let response = client + .get(&url) + .await + .map_err(|e| format!("Request failed: {}", e))?; + + if !response.status().is_success() { + return Err(format!("API returned status: {}", response.status())); } - // Default city coordinates (extend as needed) - let coords = match location.to_lowercase().as_str() { - "london" => (51.5074, -0.1278), - "paris" => (48.8566, 2.3522), - "new york" | "newyork" => (40.7128, -74.0060), - "tokyo" => (35.6762, 139.6503), - "sydney" => (-33.8688, 151.2093), - "são paulo" | "sao paulo" => (-23.5505, -46.6333), - "rio de janeiro" | "rio" => (-22.9068, -43.1729), - "brasília" | "brasilia" => (-15.8267, -47.9218), - "buenos aires" => (-34.6037, -58.3816), - "berlin" => (52.5200, 13.4050), - "madrid" => (40.4168, -3.7038), - "rome" => (41.9028, 12.4964), - "moscow" => (55.7558, 37.6173), - "beijing" => (39.9042, 116.4074), - "mumbai" => (19.0760, 72.8777), - "dubai" => (25.2048, 55.2708), - "los angeles" | "la" => (34.0522, -118.2437), - "chicago" => (41.8781, -87.6298), - "toronto" => (43.6532, -79.3832), - "mexico city" => (19.4326, -99.1332), - _ => { - return Err(format!( - "Unknown location: {}. Use 'lat,lon' format or known city", - location - ) - .into()) + let data: serde_json::Value = response + .json() + .await + .map_err(|e| format!("Failed to parse response: {}", e))?; + + // Process forecast data + let mut forecast_days = Vec::new(); + let mut daily_data: std::collections::HashMap = + std::collections::HashMap::new(); + + if let Some(list) = data["list"].as_array() { + for item in list { + let dt_txt = item["dt_txt"].as_str().unwrap_or(""); + let date = dt_txt.split(' ').next().unwrap_or(""); + let temp = item["main"]["temp"].as_f64().unwrap_or(0.0) as f32; + let description = item["weather"][0]["description"] + .as_str() + .unwrap_or("Unknown") + .to_string(); + let rain_chance = (item["pop"].as_f64().unwrap_or(0.0) * 100.0) as u32; + + let entry = daily_data.entry(date.to_string()).or_insert(( + temp, + temp, + description.clone(), + rain_chance, + )); + + // Update min/max temperatures + if temp < entry.0 { + entry.0 = temp; + } + if temp > entry.1 { + entry.1 = temp; + } + // Update rain chance to max for the day + if rain_chance > entry.3 { + entry.3 = rain_chance; + } } - }; + } - Ok(coords) -} - -/// Register WEATHER keyword in Rhai engine -pub fn weather_keyword(_state: Arc, _user_session: UserSession, engine: &mut Engine) { - let _ = engine.register_custom_syntax(&["WEATHER", "$expr$"], false, move |context, inputs| { - let location = context.eval_expression_tree(&inputs[0])?; - let location_str = location.to_string(); - - trace!("WEATHER keyword called for: {}", location_str); - - // Create channel for async result - let (tx, rx) = std::sync::mpsc::channel(); - - // Spawn blocking task - std::thread::spawn(move || { - let rt = tokio::runtime::Builder::new_current_thread() - .enable_all() - .build(); - - let result = if let Ok(rt) = rt { - rt.block_on(async { - match fetch_weather(&location_str).await { - Ok(weather) => { - let msg = format!( - "Weather for {}: {} ({}). Forecast: {}", - weather.location, - weather.temperature, - weather.condition, - weather.forecast - ); - Ok(msg) - } - Err(e) => { - error!("Weather fetch failed: {}", e); - Err(format!("Could not fetch weather: {}", e)) - } - } - }) - } else { - Err("Failed to create runtime".to_string()) - }; - - let _ = tx.send(result); + // Convert to forecast days + for (date, (temp_low, temp_high, description, rain_chance)) in daily_data.iter() { + forecast_days.push(ForecastDay { + date: date.clone(), + temp_high: *temp_high, + temp_low: *temp_low, + description: description.clone(), + rain_chance: *rain_chance, }); + } - // Wait for result - match rx.recv() { - Ok(Ok(weather_msg)) => Ok(Dynamic::from(weather_msg)), - Ok(Err(e)) => Err(Box::new(rhai::EvalAltResult::ErrorRuntime( - e.into(), - rhai::Position::NONE, - ))), - Err(_) => Err(Box::new(rhai::EvalAltResult::ErrorRuntime( - "Weather request timeout".into(), - rhai::Position::NONE, - ))), - } - }); + // Sort by date + forecast_days.sort_by(|a, b| a.date.cmp(&b.date)); + + Ok(WeatherData { + location: data["city"]["name"] + .as_str() + .unwrap_or(location) + .to_string(), + temperature: 0.0, // Not relevant for forecast + temperature_unit: "°C".to_string(), + description: "Forecast".to_string(), + humidity: 0, + wind_speed: 0.0, + wind_direction: String::new(), + feels_like: 0.0, + pressure: 0, + visibility: 0.0, + uv_index: None, + forecast: forecast_days, + }) +} + +async fn fetch_fallback_weather(location: &str) -> Result { + // This could use another weather API like WeatherAPI.com or NOAA + // For now, return a simulated response + info!("Using fallback weather for {}", location); + + Ok(format!( + "Weather information for {} is temporarily unavailable. Please try again later.", + location + )) +} + +fn format_weather_response(weather: &WeatherData) -> String { + format!( + "Current weather in {}:\n\ + 🌡️ Temperature: {:.1}{} (feels like {:.1}{})\n\ + ☁️ Conditions: {}\n\ + 💧 Humidity: {}%\n\ + 💨 Wind: {:.1} m/s {}\n\ + 🔍 Visibility: {:.1} km\n\ + 📊 Pressure: {} hPa", + weather.location, + weather.temperature, + weather.temperature_unit, + weather.feels_like, + weather.temperature_unit, + weather.description, + weather.humidity, + weather.wind_speed, + weather.wind_direction, + weather.visibility, + weather.pressure + ) +} + +fn format_forecast_response(weather: &WeatherData) -> String { + let mut response = format!("Weather forecast for {}:\n\n", weather.location); + + for day in &weather.forecast { + response.push_str(&format!( + "📅 {}\n\ + 🌡️ High: {:.1}°C, Low: {:.1}°C\n\ + ☁️ {}\n\ + ☔ Rain chance: {}%\n\n", + day.date, day.temp_high, day.temp_low, day.description, day.rain_chance + )); + } + + response +} + +fn degrees_to_compass(degrees: f64) -> String { + let directions = [ + "N", "NNE", "NE", "ENE", "E", "ESE", "SE", "SSE", "S", "SSW", "SW", "WSW", "W", "WNW", + "NW", "NNW", + ]; + let index = ((degrees + 11.25) / 22.5) as usize % 16; + directions[index].to_string() +} + +fn get_weather_api_key(state: &AppState) -> Result { + // Try to get from bot config first + if let Some(config) = &state.config { + if let Some(api_key) = config.bot_config.get_setting("weather-api-key") { + if !api_key.is_empty() { + return Ok(api_key); + } + } + } + + // Fallback to environment variable + std::env::var("OPENWEATHERMAP_API_KEY") + .or_else(|_| std::env::var("WEATHER_API_KEY")) + .map_err(|_| { + "Weather API key not found. Please set 'weather-api-key' in config.csv or WEATHER_API_KEY environment variable".to_string() + }) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_degrees_to_compass() { + assert_eq!(degrees_to_compass(0.0), "N"); + assert_eq!(degrees_to_compass(45.0), "NE"); + assert_eq!(degrees_to_compass(90.0), "E"); + assert_eq!(degrees_to_compass(180.0), "S"); + assert_eq!(degrees_to_compass(270.0), "W"); + assert_eq!(degrees_to_compass(315.0), "NW"); + } + + #[test] + fn test_format_weather_response() { + let weather = WeatherData { + location: "London".to_string(), + temperature: 15.0, + temperature_unit: "°C".to_string(), + description: "Partly cloudy".to_string(), + humidity: 65, + wind_speed: 3.5, + wind_direction: "NE".to_string(), + feels_like: 14.0, + pressure: 1013, + visibility: 10.0, + uv_index: Some(3.0), + forecast: Vec::new(), + }; + + let response = format_weather_response(&weather); + assert!(response.contains("London")); + assert!(response.contains("15.0")); + assert!(response.contains("Partly cloudy")); + } } diff --git a/src/basic/mod.rs b/src/basic/mod.rs index 66aad2614..7a7b3bc74 100644 --- a/src/basic/mod.rs +++ b/src/basic/mod.rs @@ -9,7 +9,6 @@ pub mod compiler; pub mod keywords; use self::keywords::add_member::add_member_keyword; use self::keywords::add_suggestion::add_suggestion_keyword; -use self::keywords::add_website::add_website_keyword; use self::keywords::book::book_keyword; use self::keywords::bot_memory::{get_bot_memory_keyword, set_bot_memory_keyword}; use self::keywords::clear_kb::register_clear_kb_keyword; @@ -29,6 +28,7 @@ use self::keywords::save_from_unstructured::save_from_unstructured_keyword; use self::keywords::send_mail::send_mail_keyword; use self::keywords::use_kb::register_use_kb_keyword; use self::keywords::use_tool::use_tool_keyword; +use self::keywords::use_website::{clear_websites_keyword, use_website_keyword}; use self::keywords::llm_keyword::llm_keyword; use self::keywords::on::on_keyword; @@ -72,7 +72,8 @@ impl ScriptService { use_tool_keyword(state.clone(), user.clone(), &mut engine); clear_tools_keyword(state.clone(), user.clone(), &mut engine); - add_website_keyword(state.clone(), user.clone(), &mut engine); + use_website_keyword(state.clone(), user.clone(), &mut engine); + clear_websites_keyword(state.clone(), user.clone(), &mut engine); add_suggestion_keyword(state.clone(), user.clone(), &mut engine); // Register the 6 new power keywords diff --git a/src/calendar/mod.rs b/src/calendar/mod.rs index e61b0775f..dbbe05268 100644 --- a/src/calendar/mod.rs +++ b/src/calendar/mod.rs @@ -13,10 +13,11 @@ use std::sync::Arc; use crate::shared::utils::DbPool; use tokio::sync::RwLock; use uuid::Uuid; +use crate::shared::state::AppState; +use diesel::sql_query; +use diesel::sql_types::{Text, Timestamptz, Integer, Jsonb}; -// TODO: Replace sqlx queries with Diesel queries - -#[derive(Debug, Clone, Serialize, Deserialize)] +#[derive(Debug, Clone, Serialize, Deserialize, QueryableByName)] pub struct CalendarEvent { pub id: Uuid, pub title: String, @@ -110,16 +111,18 @@ impl CalendarEngine { &self, event: CalendarEvent, ) -> Result> { - // TODO: Implement with Diesel - /* - let result = sqlx::query!( - r#" - INSERT INTO calendar_events - (id, title, description, start_time, end_time, location, attendees, organizer, - reminder_minutes, recurrence_rule, status, created_at, updated_at) - VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13) - RETURNING * - "#, + let mut conn = self.db.get().map_err(|e| format!("DB connection error: {}", e))?; + + let attendees_json = serde_json::to_value(&event.attendees)?; + let recurrence_json = event.recurrence_rule.as_ref().map(|r| serde_json::to_value(r).ok()).flatten(); + + diesel::sql_query( + "INSERT INTO calendar_events + (id, title, description, start_time, end_time, location, attendees, organizer, + reminder_minutes, recurrence_rule, status, created_at, updated_at) + VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13) + RETURNING *" + ) event.id, event.title, event.description, @@ -185,17 +188,16 @@ impl CalendarEngine { Ok(serde_json::from_value(serde_json::to_value(result)?)?) } - pub async fn delete_event(&self, _id: Uuid) -> Result> { - // TODO: Implement with Diesel - /* - let result = sqlx::query!("DELETE FROM calendar_events WHERE id = $1", id) - .execute(self.db.as_ref()) - .await?; - */ + pub async fn delete_event(&self, id: Uuid) -> Result> { + let mut conn = self.db.get().map_err(|e| format!("DB connection error: {}", e))?; + + let rows_affected = diesel::sql_query("DELETE FROM calendar_events WHERE id = $1") + .bind::(&id) + .execute(&mut conn)?; self.refresh_cache().await?; - Ok(false) + Ok(rows_affected > 0) } pub async fn get_events_range( @@ -203,16 +205,14 @@ impl CalendarEngine { start: DateTime, end: DateTime, ) -> Result, Box> { - // TODO: Implement with Diesel - /* - let results = sqlx::query_as!( - CalendarEvent, - r#" - SELECT * FROM calendar_events - WHERE start_time >= $1 AND end_time <= $2 - ORDER BY start_time ASC - "#, - start, + let mut conn = self.db.get().map_err(|e| format!("DB connection error: {}", e))?; + + let results = diesel::sql_query( + "SELECT * FROM calendar_events + WHERE start_time >= $1 AND end_time <= $2 + ORDER BY start_time ASC" + ) + .bind::(&start) end ) .fetch_all(self.db.as_ref()) @@ -226,16 +226,14 @@ impl CalendarEngine { &self, user_id: &str, ) -> Result, Box> { - // TODO: Implement with Diesel - /* - let results = sqlx::query!( - r#" - SELECT * FROM calendar_events - WHERE organizer = $1 OR $1 = ANY(attendees) - ORDER BY start_time ASC - "#, - user_id + let mut conn = self.db.get().map_err(|e| format!("DB connection error: {}", e))?; + + let results = diesel::sql_query( + "SELECT * FROM calendar_events + WHERE organizer = $1 OR $1::text = ANY(SELECT jsonb_array_elements_text(attendees)) + ORDER BY start_time ASC" ) + .bind::(&user_id) .fetch_all(self.db.as_ref()) .await?; @@ -263,9 +261,9 @@ impl CalendarEngine { action_items: Vec::new(), }; - // TODO: Implement with Diesel - /* - sqlx::query!( + let mut conn = self.db.get().map_err(|e| format!("DB connection error: {}", e))?; + + diesel::sql_query( r#" INSERT INTO meetings (id, event_id, platform, created_at) VALUES ($1, $2, $3, $4) @@ -303,9 +301,9 @@ impl CalendarEngine { sent: false, }; - // TODO: Implement with Diesel - /* - sqlx::query!( + let mut conn = self.db.get().map_err(|e| format!("DB connection error: {}", e))?; + + diesel::sql_query( r#" INSERT INTO calendar_reminders (id, event_id, remind_at, message, channel, sent) VALUES ($1, $2, $3, $4, $5, $6) @@ -324,16 +322,14 @@ impl CalendarEngine { Ok(reminder) } - pub async fn get_event(&self, _id: Uuid) -> Result> { - // TODO: Implement with Diesel - /* - let result = sqlx::query!("SELECT * FROM calendar_events WHERE id = $1", id) - .fetch_one(self.db.as_ref()) - .await?; + pub async fn get_event(&self, id: Uuid) -> Result> { + let mut conn = self.db.get().map_err(|e| format!("DB connection error: {}", e))?; - Ok(serde_json::from_value(serde_json::to_value(result)?)?) - */ - Err("Not implemented".into()) + let result = diesel::sql_query("SELECT * FROM calendar_events WHERE id = $1") + .bind::(&id) + .get_result::(&mut conn)?; + + Ok(result) } pub async fn check_conflicts( @@ -342,16 +338,15 @@ impl CalendarEngine { end: DateTime, user_id: &str, ) -> Result, Box> { - // TODO: Implement with Diesel - /* - let results = sqlx::query!( - r#" - SELECT * FROM calendar_events - WHERE (organizer = $1 OR $1 = ANY(attendees)) - AND NOT (end_time <= $2 OR start_time >= $3) - "#, - user_id, - start, + let mut conn = self.db.get().map_err(|e| format!("DB connection error: {}", e))?; + + let results = diesel::sql_query( + "SELECT * FROM calendar_events + WHERE (organizer = $1 OR $1::text = ANY(SELECT jsonb_array_elements_text(attendees))) + AND NOT (end_time <= $2 OR start_time >= $3)" + ) + .bind::(&user_id) + .bind::(&start) end ) .fetch_all(self.db.as_ref()) @@ -369,15 +364,7 @@ impl CalendarEngine { // TODO: Implement with Diesel /* let results = sqlx::query!("SELECT * FROM calendar_events ORDER BY start_time ASC") - .fetch_all(self.db.as_ref()) - .await?; - - let events: Vec = results - .into_iter() - .map(|r| serde_json::from_value(serde_json::to_value(r).unwrap()).unwrap()) - .collect(); - */ - + .load::(&mut conn)?; let events: Vec = vec![]; let mut cache = self.cache.write().await; *cache = events; @@ -397,8 +384,254 @@ pub struct EventQuery { pub struct MeetingRequest { pub event_id: Uuid, pub platform: MeetingPlatform, + + /// Process due reminders + pub async fn process_reminders(&self) -> Result, Box> { + let now = Utc::now(); + let mut conn = self.db.get().map_err(|e| format!("DB connection error: {}", e))?; + + // Find events that need reminders sent + let events = diesel::sql_query( + "SELECT * FROM calendar_events + WHERE reminder_minutes IS NOT NULL + AND start_time - INTERVAL '1 minute' * reminder_minutes <= $1 + AND start_time > $1 + AND reminder_sent = false + ORDER BY start_time ASC" + ) + .bind::(&now) + .load::(&mut conn)?; + + let mut notifications = Vec::new(); + + for event in events { + // Send reminder notification + let message = format!( + "Reminder: {} starting at {}", + event.title, + event.start_time.format("%H:%M") + ); + + // Mark reminder as sent + diesel::sql_query( + "UPDATE calendar_events SET reminder_sent = true WHERE id = $1" + ) + .bind::(&event.id) + .execute(&mut conn)?; + + notifications.push(message); + } + + Ok(notifications) + } } +/// CalDAV Server implementation +pub mod caldav { + use super::*; + use axum::{ + body::Body, + extract::{Path, State, Query}, + http::{Method, StatusCode, header}, + response::{Response, IntoResponse}, + routing::{get, put, delete, any}, + Router, + }; + use std::sync::Arc; + + pub fn create_caldav_router(calendar_engine: Arc) -> Router { + Router::new() + .route("/.well-known/caldav", get(caldav_redirect)) + .route("/caldav/:user/", any(caldav_propfind)) + .route("/caldav/:user/calendar/", any(caldav_calendar_handler)) + .route("/caldav/:user/calendar/:event_uid.ics", + get(caldav_get_event) + .put(caldav_put_event) + .delete(caldav_delete_event)) + .with_state(calendar_engine) + } + + async fn caldav_redirect() -> impl IntoResponse { + Response::builder() + .status(StatusCode::MOVED_PERMANENTLY) + .header(header::LOCATION, "/caldav/") + .body(Body::empty()) + .unwrap() + } + + async fn caldav_propfind( + Path(user): Path, + State(engine): State>, + ) -> impl IntoResponse { + let xml = format!(r#" + + + /caldav/{}/ + + + + + + + {}'s Calendar + + + + + HTTP/1.1 200 OK + + +"#, user, user); + + Response::builder() + .status(StatusCode::MULTI_STATUS) + .header(header::CONTENT_TYPE, "application/xml; charset=utf-8") + .body(Body::from(xml)) + .unwrap() + } + + async fn caldav_calendar_handler( + Path(user): Path, + State(engine): State>, + method: Method, + ) -> impl IntoResponse { + match method { + Method::GET => { + // Return calendar collection + let events = engine.get_user_events(&user).await.unwrap_or_default(); + let ics = events_to_icalendar(&events, &user); + + Response::builder() + .status(StatusCode::OK) + .header(header::CONTENT_TYPE, "text/calendar; charset=utf-8") + .body(Body::from(ics)) + .unwrap() + }, + _ => caldav_propfind(Path(user), State(engine)).await.into_response(), + } + } + + async fn caldav_get_event( + Path((user, event_uid)): Path<(String, String)>, + State(engine): State>, + ) -> impl IntoResponse { + let event_id = event_uid.trim_end_matches(".ics"); + + match Uuid::parse_str(event_id) { + Ok(id) => { + match engine.get_event(id).await { + Ok(event) => { + let ics = event_to_icalendar(&event); + Response::builder() + .status(StatusCode::OK) + .header(header::CONTENT_TYPE, "text/calendar; charset=utf-8") + .body(Body::from(ics)) + .unwrap() + }, + Err(_) => Response::builder() + .status(StatusCode::NOT_FOUND) + .body(Body::empty()) + .unwrap(), + } + }, + Err(_) => Response::builder() + .status(StatusCode::BAD_REQUEST) + .body(Body::empty()) + .unwrap(), + } + } + + async fn caldav_put_event( + Path((user, event_uid)): Path<(String, String)>, + State(engine): State>, + body: String, + ) -> impl IntoResponse { + // Parse iCalendar data and create/update event + // This is a simplified implementation + StatusCode::CREATED + } + + async fn caldav_delete_event( + Path((user, event_uid)): Path<(String, String)>, + State(engine): State>, + ) -> impl IntoResponse { + let event_id = event_uid.trim_end_matches(".ics"); + + match Uuid::parse_str(event_id) { + Ok(id) => { + match engine.delete_event(id).await { + Ok(true) => StatusCode::NO_CONTENT, + Ok(false) => StatusCode::NOT_FOUND, + Err(_) => StatusCode::INTERNAL_SERVER_ERROR, + } + }, + Err(_) => StatusCode::BAD_REQUEST, + } + } + + fn events_to_icalendar(events: &[CalendarEvent], user: &str) -> String { + let mut ics = String::from("BEGIN:VCALENDAR\r\n"); + ics.push_str("VERSION:2.0\r\n"); + ics.push_str(&format!("PRODID:-//BotServer//Calendar {}//EN\r\n", user)); + + for event in events { + ics.push_str(&event_to_icalendar(event)); + } + + ics.push_str("END:VCALENDAR\r\n"); + ics + } + + fn event_to_icalendar(event: &CalendarEvent) -> String { + let mut vevent = String::from("BEGIN:VEVENT\r\n"); + vevent.push_str(&format!("UID:{}\r\n", event.id)); + vevent.push_str(&format!("SUMMARY:{}\r\n", event.title)); + + if let Some(desc) = &event.description { + vevent.push_str(&format!("DESCRIPTION:{}\r\n", desc)); + } + + if let Some(loc) = &event.location { + vevent.push_str(&format!("LOCATION:{}\r\n", loc)); + } + + vevent.push_str(&format!("DTSTART:{}\r\n", event.start_time.format("%Y%m%dT%H%M%SZ"))); + vevent.push_str(&format!("DTEND:{}\r\n", event.end_time.format("%Y%m%dT%H%M%SZ"))); + vevent.push_str(&format!("STATUS:{}\r\n", event.status.to_uppercase())); + + for attendee in &event.attendees { + vevent.push_str(&format!("ATTENDEE:mailto:{}\r\n", attendee)); + } + + vevent.push_str("END:VEVENT\r\n"); + vevent + } +} + +/// Reminder job service +pub async fn start_reminder_job(engine: Arc) { + use tokio::time::{interval, Duration}; + + let mut ticker = interval(Duration::from_secs(60)); // Check every minute + + loop { + ticker.tick().await; + + match engine.process_reminders().await { + Ok(notifications) => { + for message in notifications { + log::info!("Calendar reminder: {}", message); + // Here you would send actual notifications via email, push, etc. + } + }, + Err(e) => { + log::error!("Failed to process calendar reminders: {}", e); + } + } + } +} + + async fn create_event_handler( State(engine): State>, Json(event): Json, diff --git a/src/core/bootstrap/mod.rs b/src/core/bootstrap/mod.rs index 1df1d8c6f..72e931769 100644 --- a/src/core/bootstrap/mod.rs +++ b/src/core/bootstrap/mod.rs @@ -143,6 +143,14 @@ impl BootstrapManager { error!("Failed to setup Directory: {}", e); } } + + // Auto-configure Email after installation + if component == "email" { + info!("🔧 Auto-configuring Email (Stalwart)..."); + if let Err(e) = self.setup_email().await { + error!("Failed to setup Email: {}", e); + } + } } } Ok(()) @@ -220,7 +228,7 @@ impl BootstrapManager { } /// Setup Email (Stalwart) with Directory integration - async fn setup_email(&self) -> Result<()> { + pub async fn setup_email(&self) -> Result<()> { let config_path = PathBuf::from("./config/email_config.json"); let directory_config_path = PathBuf::from("./config/directory_config.json"); diff --git a/src/core/bot/channels/instagram.rs b/src/core/bot/channels/instagram.rs index e2b0d121a..be1d066f6 100644 --- a/src/core/bot/channels/instagram.rs +++ b/src/core/bot/channels/instagram.rs @@ -1,29 +1,390 @@ -use crate::shared::models::BotResponse; use async_trait::async_trait; -use log::info; +use log::{error, info}; +use serde::{Deserialize, Serialize}; +use std::collections::HashMap; + +use crate::core::bot::channels::ChannelAdapter; +use crate::shared::models::BotResponse; -/// Instagram channel adapter for sending messages through Instagram pub struct InstagramAdapter { - // TODO: Add Instagram API client configuration + access_token: String, + verify_token: String, + page_id: String, + api_version: String, + instagram_account_id: String, } impl InstagramAdapter { pub fn new() -> Self { - Self {} + // Load from environment variables (would be from config.csv in production) + let access_token = std::env::var("INSTAGRAM_ACCESS_TOKEN").unwrap_or_default(); + let verify_token = std::env::var("INSTAGRAM_VERIFY_TOKEN") + .unwrap_or_else(|_| "webhook_verify".to_string()); + let page_id = std::env::var("INSTAGRAM_PAGE_ID").unwrap_or_default(); + let api_version = "v17.0".to_string(); + let instagram_account_id = std::env::var("INSTAGRAM_ACCOUNT_ID").unwrap_or_default(); + + Self { + access_token, + verify_token, + page_id, + api_version, + instagram_account_id, + } + } + + async fn send_instagram_message( + &self, + recipient_id: &str, + message: &str, + ) -> Result> { + let client = reqwest::Client::new(); + + let url = format!( + "https://graph.facebook.com/{}/{}/messages", + self.api_version, self.page_id + ); + + let payload = serde_json::json!({ + "recipient": { + "id": recipient_id + }, + "message": { + "text": message + }, + "messaging_type": "RESPONSE" + }); + + let response = client + .post(&url) + .header("Content-Type", "application/json") + .query(&[("access_token", &self.access_token)]) + .json(&payload) + .send() + .await?; + + if response.status().is_success() { + let result: serde_json::Value = response.json().await?; + Ok(result["message_id"].as_str().unwrap_or("").to_string()) + } else { + let error_text = response.text().await?; + Err(format!("Instagram API error: {}", error_text).into()) + } + } + + pub async fn send_media_message( + &self, + recipient_id: &str, + media_url: &str, + media_type: &str, + ) -> Result> { + let client = reqwest::Client::new(); + + let url = format!( + "https://graph.facebook.com/{}/{}/messages", + self.api_version, self.page_id + ); + + let attachment_type = match media_type { + "image" => "image", + "video" => "video", + "audio" => "audio", + _ => "file", + }; + + let payload = serde_json::json!({ + "recipient": { + "id": recipient_id + }, + "message": { + "attachment": { + "type": attachment_type, + "payload": { + "url": media_url, + "is_reusable": true + } + } + } + }); + + let response = client + .post(&url) + .query(&[("access_token", &self.access_token)]) + .json(&payload) + .send() + .await?; + + if response.status().is_success() { + let result: serde_json::Value = response.json().await?; + Ok(result["message_id"].as_str().unwrap_or("").to_string()) + } else { + let error_text = response.text().await?; + Err(format!("Instagram API error: {}", error_text).into()) + } + } + + pub async fn send_story_reply( + &self, + recipient_id: &str, + message: &str, + ) -> Result> { + // Story replies use the same messaging API + self.send_instagram_message(recipient_id, message).await + } + + pub async fn get_user_profile( + &self, + user_id: &str, + ) -> Result> { + let client = reqwest::Client::new(); + + let url = format!( + "https://graph.facebook.com/{}/{}", + self.api_version, user_id + ); + + let response = client + .get(&url) + .query(&[ + ("access_token", &self.access_token), + ("fields", &"name,profile_pic".to_string()), + ]) + .send() + .await?; + + if response.status().is_success() { + let profile: InstagramProfile = response.json().await?; + Ok(profile) + } else { + Err("Failed to get Instagram profile".into()) + } + } + + pub fn verify_webhook(&self, token: &str) -> bool { + token == self.verify_token + } + + pub async fn handle_webhook_verification( + &self, + mode: &str, + token: &str, + challenge: &str, + ) -> Option { + if mode == "subscribe" && self.verify_webhook(token) { + Some(challenge.to_string()) + } else { + None + } } } #[async_trait] -impl super::ChannelAdapter for InstagramAdapter { +impl ChannelAdapter for InstagramAdapter { + fn name(&self) -> &str { + "Instagram" + } + + fn is_configured(&self) -> bool { + !self.access_token.is_empty() && !self.page_id.is_empty() + } + async fn send_message( &self, response: BotResponse, ) -> Result<(), Box> { + if !self.is_configured() { + error!("Instagram adapter not configured. Please set instagram-access-token and instagram-page-id in config.csv"); + return Err("Instagram not configured".into()); + } + + let message_id = self + .send_instagram_message(&response.user_id, &response.content) + .await?; + info!( - "Instagram message would be sent to {}: {}", - response.user_id, response.content + "Instagram message sent to {}: {} (message_id: {})", + response.user_id, response.content, message_id ); - // TODO: Implement actual Instagram API integration + Ok(()) } + + async fn receive_message( + &self, + payload: serde_json::Value, + ) -> Result, Box> { + // Parse Instagram webhook payload + if let Some(entry) = payload["entry"].as_array() { + if let Some(first_entry) = entry.first() { + if let Some(messaging) = first_entry["messaging"].as_array() { + if let Some(first_message) = messaging.first() { + // Check for different message types + if let Some(message) = first_message["message"].as_object() { + if let Some(text) = message["text"].as_str() { + return Ok(Some(text.to_string())); + } else if let Some(attachments) = message["attachments"].as_array() { + if let Some(first_attachment) = attachments.first() { + let attachment_type = + first_attachment["type"].as_str().unwrap_or("unknown"); + return Ok(Some(format!( + "Received {} attachment", + attachment_type + ))); + } + } + } else if let Some(postback) = first_message["postback"].as_object() { + if let Some(payload) = postback["payload"].as_str() { + return Ok(Some(format!("Postback: {}", payload))); + } + } + } + } else if let Some(changes) = first_entry["changes"].as_array() { + // Handle Instagram mentions and comments + if let Some(first_change) = changes.first() { + let field = first_change["field"].as_str().unwrap_or(""); + match field { + "comments" => { + if let Some(text) = first_change["value"]["text"].as_str() { + return Ok(Some(format!("Comment: {}", text))); + } + } + "mentions" => { + if let Some(media_id) = first_change["value"]["media_id"].as_str() { + return Ok(Some(format!("Mentioned in media: {}", media_id))); + } + } + _ => {} + } + } + } + } + } + + Ok(None) + } + + async fn get_user_info( + &self, + user_id: &str, + ) -> Result> { + match self.get_user_profile(user_id).await { + Ok(profile) => Ok(serde_json::to_value(profile)?), + Err(_) => Ok(serde_json::json!({ + "id": user_id, + "platform": "instagram" + })), + } + } +} + +#[derive(Debug, Serialize, Deserialize)] +pub struct InstagramProfile { + pub id: String, + pub name: Option, + pub profile_pic: Option, +} + +#[derive(Debug, Serialize, Deserialize)] +pub struct InstagramWebhookPayload { + pub object: String, + pub entry: Vec, +} + +#[derive(Debug, Serialize, Deserialize)] +pub struct InstagramEntry { + pub id: String, + pub time: i64, + pub messaging: Option>, + pub changes: Option>, +} + +#[derive(Debug, Serialize, Deserialize)] +pub struct InstagramMessaging { + pub sender: InstagramUser, + pub recipient: InstagramUser, + pub timestamp: i64, + pub message: Option, + pub postback: Option, +} + +#[derive(Debug, Serialize, Deserialize)] +pub struct InstagramUser { + pub id: String, +} + +#[derive(Debug, Serialize, Deserialize)] +pub struct InstagramMessage { + pub mid: String, + pub text: Option, + pub attachments: Option>, +} + +#[derive(Debug, Serialize, Deserialize)] +pub struct InstagramAttachment { + #[serde(rename = "type")] + pub attachment_type: String, + pub payload: InstagramAttachmentPayload, +} + +#[derive(Debug, Serialize, Deserialize)] +pub struct InstagramAttachmentPayload { + pub url: Option, +} + +#[derive(Debug, Serialize, Deserialize)] +pub struct InstagramPostback { + pub payload: String, +} + +#[derive(Debug, Serialize, Deserialize)] +pub struct InstagramChange { + pub field: String, + pub value: serde_json::Value, +} + +// Helper functions for Instagram-specific features + +pub fn create_quick_reply(text: &str, replies: Vec<(&str, &str)>) -> serde_json::Value { + let quick_replies: Vec = replies + .into_iter() + .map(|(title, payload)| { + serde_json::json!({ + "content_type": "text", + "title": title, + "payload": payload + }) + }) + .collect(); + + serde_json::json!({ + "text": text, + "quick_replies": quick_replies + }) +} + +pub fn create_generic_template(elements: Vec) -> serde_json::Value { + serde_json::json!({ + "attachment": { + "type": "template", + "payload": { + "template_type": "generic", + "elements": elements + } + } + }) +} + +pub fn create_media_template(media_type: &str, attachment_id: &str) -> serde_json::Value { + serde_json::json!({ + "attachment": { + "type": "template", + "payload": { + "template_type": "media", + "elements": [{ + "media_type": media_type, + "attachment_id": attachment_id + }] + } + } + }) } diff --git a/src/core/bot/channels/mod.rs b/src/core/bot/channels/mod.rs index 2b1657cf2..c113ca0e8 100644 --- a/src/core/bot/channels/mod.rs +++ b/src/core/bot/channels/mod.rs @@ -10,10 +10,35 @@ use std::sync::Arc; use tokio::sync::{mpsc, Mutex}; #[async_trait] pub trait ChannelAdapter: Send + Sync { + fn name(&self) -> &str { + "Unknown" + } + + fn is_configured(&self) -> bool { + true + } + async fn send_message( &self, response: BotResponse, ) -> Result<(), Box>; + + async fn receive_message( + &self, + payload: serde_json::Value, + ) -> Result, Box> { + Ok(None) + } + + async fn get_user_info( + &self, + user_id: &str, + ) -> Result> { + Ok(serde_json::json!({ + "id": user_id, + "platform": self.name() + })) + } } #[derive(Debug)] pub struct WebChannelAdapter { diff --git a/src/core/bot/channels/teams.rs b/src/core/bot/channels/teams.rs index 2da9d65db..cebd82a0d 100644 --- a/src/core/bot/channels/teams.rs +++ b/src/core/bot/channels/teams.rs @@ -1,29 +1,369 @@ -use crate::shared::models::BotResponse; use async_trait::async_trait; -use log::info; +use log::{error, info}; +use serde::{Deserialize, Serialize}; +use std::collections::HashMap; + +use crate::core::bot::channels::ChannelAdapter; +use crate::shared::models::BotResponse; -/// Microsoft Teams channel adapter for sending messages through Teams pub struct TeamsAdapter { - // TODO: Add Teams API client configuration + app_id: String, + app_password: String, + tenant_id: String, + service_url: String, + bot_id: String, } impl TeamsAdapter { pub fn new() -> Self { - Self {} + // Load from environment variables (would be from config.csv in production) + let app_id = std::env::var("TEAMS_APP_ID").unwrap_or_default(); + let app_password = std::env::var("TEAMS_APP_PASSWORD").unwrap_or_default(); + let tenant_id = std::env::var("TEAMS_TENANT_ID").unwrap_or_default(); + let service_url = std::env::var("TEAMS_SERVICE_URL") + .unwrap_or_else(|_| "https://smba.trafficmanager.net".to_string()); + let bot_id = std::env::var("TEAMS_BOT_ID").unwrap_or_else(|_| app_id.clone()); + + Self { + app_id, + app_password, + tenant_id, + service_url, + bot_id, + } + } + + async fn get_access_token(&self) -> Result> { + let client = reqwest::Client::new(); + + let token_url = format!( + "https://login.microsoftonline.com/{}/oauth2/v2.0/token", + if self.tenant_id.is_empty() { + "common" + } else { + &self.tenant_id + } + ); + + let params = [ + ("client_id", &self.app_id), + ("client_secret", &self.app_password), + ("grant_type", &"client_credentials".to_string()), + ( + "scope", + &"https://api.botframework.com/.default".to_string(), + ), + ]; + + let response = client.post(&token_url).form(¶ms).send().await?; + + if response.status().is_success() { + let token_response: serde_json::Value = response.json().await?; + Ok(token_response["access_token"] + .as_str() + .unwrap_or("") + .to_string()) + } else { + let error_text = response.text().await?; + Err(format!("Failed to get Teams access token: {}", error_text).into()) + } + } + + async fn send_teams_message( + &self, + conversation_id: &str, + activity_id: Option<&str>, + message: &str, + ) -> Result> { + let token = self.get_access_token().await?; + let client = reqwest::Client::new(); + + let url = if let Some(reply_to_id) = activity_id { + format!( + "{}/v3/conversations/{}/activities/{}/reply", + self.service_url, conversation_id, reply_to_id + ) + } else { + format!( + "{}/v3/conversations/{}/activities", + self.service_url, conversation_id + ) + }; + + let activity = TeamsActivity { + activity_type: "message".to_string(), + text: message.to_string(), + from: TeamsChannelAccount { + id: self.bot_id.clone(), + name: Some("Bot".to_string()), + }, + conversation: TeamsConversationAccount { + id: conversation_id.to_string(), + conversation_type: None, + tenant_id: Some(self.tenant_id.clone()), + }, + recipient: None, + attachments: None, + entities: None, + }; + + let response = client + .post(&url) + .header("Authorization", format!("Bearer {}", token)) + .header("Content-Type", "application/json") + .json(&activity) + .send() + .await?; + + if response.status().is_success() { + let result: serde_json::Value = response.json().await?; + Ok(result["id"].as_str().unwrap_or("").to_string()) + } else { + let error_text = response.text().await?; + Err(format!("Teams API error: {}", error_text).into()) + } + } + + pub async fn send_card( + &self, + conversation_id: &str, + card: TeamsAdaptiveCard, + ) -> Result> { + let token = self.get_access_token().await?; + let client = reqwest::Client::new(); + + let url = format!( + "{}/v3/conversations/{}/activities", + self.service_url, conversation_id + ); + + let attachment = TeamsAttachment { + content_type: "application/vnd.microsoft.card.adaptive".to_string(), + content: serde_json::to_value(card)?, + }; + + let activity = TeamsActivity { + activity_type: "message".to_string(), + text: String::new(), + from: TeamsChannelAccount { + id: self.bot_id.clone(), + name: Some("Bot".to_string()), + }, + conversation: TeamsConversationAccount { + id: conversation_id.to_string(), + conversation_type: None, + tenant_id: Some(self.tenant_id.clone()), + }, + recipient: None, + attachments: Some(vec![attachment]), + entities: None, + }; + + let response = client + .post(&url) + .header("Authorization", format!("Bearer {}", token)) + .header("Content-Type", "application/json") + .json(&activity) + .send() + .await?; + + if response.status().is_success() { + let result: serde_json::Value = response.json().await?; + Ok(result["id"].as_str().unwrap_or("").to_string()) + } else { + let error_text = response.text().await?; + Err(format!("Teams API error: {}", error_text).into()) + } + } + + pub async fn update_message( + &self, + conversation_id: &str, + activity_id: &str, + new_message: &str, + ) -> Result<(), Box> { + let token = self.get_access_token().await?; + let client = reqwest::Client::new(); + + let url = format!( + "{}/v3/conversations/{}/activities/{}", + self.service_url, conversation_id, activity_id + ); + + let activity = TeamsActivity { + activity_type: "message".to_string(), + text: new_message.to_string(), + from: TeamsChannelAccount { + id: self.bot_id.clone(), + name: Some("Bot".to_string()), + }, + conversation: TeamsConversationAccount { + id: conversation_id.to_string(), + conversation_type: None, + tenant_id: Some(self.tenant_id.clone()), + }, + recipient: None, + attachments: None, + entities: None, + }; + + let response = client + .put(&url) + .header("Authorization", format!("Bearer {}", token)) + .header("Content-Type", "application/json") + .json(&activity) + .send() + .await?; + + if !response.status().is_success() { + let error_text = response.text().await?; + return Err(format!("Teams API error: {}", error_text).into()); + } + + Ok(()) } } #[async_trait] -impl super::ChannelAdapter for TeamsAdapter { +impl ChannelAdapter for TeamsAdapter { + fn name(&self) -> &str { + "Teams" + } + + fn is_configured(&self) -> bool { + !self.app_id.is_empty() && !self.app_password.is_empty() + } + async fn send_message( &self, response: BotResponse, ) -> Result<(), Box> { + if !self.is_configured() { + error!("Teams adapter not configured. Please set teams-app-id and teams-app-password in config.csv"); + return Err("Teams not configured".into()); + } + + // In Teams, user_id is typically the conversation ID + let message_id = self + .send_teams_message(&response.user_id, None, &response.content) + .await?; + info!( - "Teams message would be sent to {}: {}", - response.user_id, response.content + "Teams message sent to conversation {}: {} (message_id: {})", + response.user_id, response.content, message_id ); - // TODO: Implement actual Teams API integration + Ok(()) } + + async fn receive_message( + &self, + payload: serde_json::Value, + ) -> Result, Box> { + // Parse Teams activity payload + if let Some(activity_type) = payload["type"].as_str() { + match activity_type { + "message" => { + return Ok(payload["text"].as_str().map(|s| s.to_string())); + } + "invoke" => { + // Handle Teams-specific invokes (like adaptive card actions) + if let Some(name) = payload["name"].as_str() { + return Ok(Some(format!("Teams invoke: {}", name))); + } + } + _ => { + return Ok(None); + } + } + } + + Ok(None) + } + + async fn get_user_info( + &self, + user_id: &str, + ) -> Result> { + let token = self.get_access_token().await?; + let client = reqwest::Client::new(); + + // In Teams, user_id might be in format "29:1xyz..." + let url = format!("{}/v3/conversations/{}/members", self.service_url, user_id); + + let response = client + .get(&url) + .header("Authorization", format!("Bearer {}", token)) + .send() + .await?; + + if response.status().is_success() { + let members: Vec = response.json().await?; + if let Some(first_member) = members.first() { + return Ok(first_member.clone()); + } + } + + Ok(serde_json::json!({ + "id": user_id, + "platform": "teams" + })) + } +} + +#[derive(Debug, Serialize, Deserialize)] +pub struct TeamsActivity { + #[serde(rename = "type")] + pub activity_type: String, + pub text: String, + pub from: TeamsChannelAccount, + pub conversation: TeamsConversationAccount, + pub recipient: Option, + pub attachments: Option>, + pub entities: Option>, +} + +#[derive(Debug, Serialize, Deserialize)] +pub struct TeamsChannelAccount { + pub id: String, + pub name: Option, +} + +#[derive(Debug, Serialize, Deserialize)] +pub struct TeamsConversationAccount { + pub id: String, + #[serde(rename = "conversationType")] + pub conversation_type: Option, + #[serde(rename = "tenantId")] + pub tenant_id: Option, +} + +#[derive(Debug, Serialize, Deserialize)] +pub struct TeamsAttachment { + #[serde(rename = "contentType")] + pub content_type: String, + pub content: serde_json::Value, +} + +#[derive(Debug, Serialize, Deserialize)] +pub struct TeamsAdaptiveCard { + #[serde(rename = "$schema")] + pub schema: String, + #[serde(rename = "type")] + pub card_type: String, + pub version: String, + pub body: Vec, + pub actions: Option>, +} + +impl Default for TeamsAdaptiveCard { + fn default() -> Self { + Self { + schema: "http://adaptivecards.io/schemas/adaptive-card.json".to_string(), + card_type: "AdaptiveCard".to_string(), + version: "1.4".to_string(), + body: Vec::new(), + actions: None, + } + } } diff --git a/src/core/bot/channels/whatsapp.rs b/src/core/bot/channels/whatsapp.rs index cb623a019..9e5ed2cce 100644 --- a/src/core/bot/channels/whatsapp.rs +++ b/src/core/bot/channels/whatsapp.rs @@ -1,29 +1,347 @@ -use crate::shared::models::BotResponse; use async_trait::async_trait; -use log::info; +use log::{error, info}; +use serde::{Deserialize, Serialize}; +use std::collections::HashMap; + +use crate::core::bot::channels::ChannelAdapter; +use crate::shared::models::BotResponse; -/// WhatsApp channel adapter for sending messages through WhatsApp pub struct WhatsAppAdapter { - // TODO: Add WhatsApp API client configuration + api_key: String, + phone_number_id: String, + webhook_verify_token: String, + business_account_id: String, + api_version: String, } impl WhatsAppAdapter { pub fn new() -> Self { - Self {} + // Load from environment variables (would be from config.csv in production) + let api_key = std::env::var("WHATSAPP_API_KEY").unwrap_or_default(); + let phone_number_id = std::env::var("WHATSAPP_PHONE_NUMBER_ID").unwrap_or_default(); + let webhook_verify_token = + std::env::var("WHATSAPP_VERIFY_TOKEN").unwrap_or_else(|_| "webhook_verify".to_string()); + let business_account_id = std::env::var("WHATSAPP_BUSINESS_ACCOUNT_ID").unwrap_or_default(); + let api_version = "v17.0".to_string(); + + Self { + api_key, + phone_number_id, + webhook_verify_token, + business_account_id, + api_version, + } + } + + async fn send_whatsapp_message( + &self, + to: &str, + message: &str, + ) -> Result> { + let client = reqwest::Client::new(); + + let url = format!( + "https://graph.facebook.com/{}/{}/messages", + self.api_version, self.phone_number_id + ); + + let payload = serde_json::json!({ + "messaging_product": "whatsapp", + "recipient_type": "individual", + "to": to, + "type": "text", + "text": { + "preview_url": false, + "body": message + } + }); + + let response = client + .post(&url) + .header("Authorization", format!("Bearer {}", self.api_key)) + .header("Content-Type", "application/json") + .json(&payload) + .send() + .await?; + + if response.status().is_success() { + let result: serde_json::Value = response.json().await?; + Ok(result["messages"][0]["id"] + .as_str() + .unwrap_or("") + .to_string()) + } else { + let error_text = response.text().await?; + Err(format!("WhatsApp API error: {}", error_text).into()) + } + } + + pub async fn send_template_message( + &self, + to: &str, + template_name: &str, + language_code: &str, + parameters: Vec, + ) -> Result> { + let client = reqwest::Client::new(); + + let url = format!( + "https://graph.facebook.com/{}/{}/messages", + self.api_version, self.phone_number_id + ); + + let components = if !parameters.is_empty() { + vec![serde_json::json!({ + "type": "body", + "parameters": parameters.iter().map(|p| { + serde_json::json!({ + "type": "text", + "text": p + }) + }).collect::>() + })] + } else { + vec![] + }; + + let payload = serde_json::json!({ + "messaging_product": "whatsapp", + "to": to, + "type": "template", + "template": { + "name": template_name, + "language": { + "code": language_code + }, + "components": components + } + }); + + let response = client + .post(&url) + .header("Authorization", format!("Bearer {}", self.api_key)) + .header("Content-Type", "application/json") + .json(&payload) + .send() + .await?; + + if response.status().is_success() { + let result: serde_json::Value = response.json().await?; + Ok(result["messages"][0]["id"] + .as_str() + .unwrap_or("") + .to_string()) + } else { + let error_text = response.text().await?; + Err(format!("WhatsApp API error: {}", error_text).into()) + } + } + + pub async fn send_media_message( + &self, + to: &str, + media_type: &str, + media_url: &str, + caption: Option<&str>, + ) -> Result> { + let client = reqwest::Client::new(); + + let url = format!( + "https://graph.facebook.com/{}/{}/messages", + self.api_version, self.phone_number_id + ); + + let mut media_object = serde_json::json!({ + "link": media_url + }); + + if let Some(caption_text) = caption { + media_object["caption"] = serde_json::json!(caption_text); + } + + let payload = serde_json::json!({ + "messaging_product": "whatsapp", + "to": to, + "type": media_type, + media_type: media_object + }); + + let response = client + .post(&url) + .header("Authorization", format!("Bearer {}", self.api_key)) + .header("Content-Type", "application/json") + .json(&payload) + .send() + .await?; + + if response.status().is_success() { + let result: serde_json::Value = response.json().await?; + Ok(result["messages"][0]["id"] + .as_str() + .unwrap_or("") + .to_string()) + } else { + let error_text = response.text().await?; + Err(format!("WhatsApp API error: {}", error_text).into()) + } + } + + pub fn verify_webhook(&self, token: &str) -> bool { + token == self.webhook_verify_token } } #[async_trait] -impl super::ChannelAdapter for WhatsAppAdapter { +impl ChannelAdapter for WhatsAppAdapter { + fn name(&self) -> &str { + "WhatsApp" + } + + fn is_configured(&self) -> bool { + !self.api_key.is_empty() && !self.phone_number_id.is_empty() + } + async fn send_message( &self, response: BotResponse, ) -> Result<(), Box> { + if !self.is_configured() { + error!("WhatsApp adapter not configured. Please set whatsapp-api-key and whatsapp-phone-number-id in config.csv"); + return Err("WhatsApp not configured".into()); + } + + let message_id = self + .send_whatsapp_message(&response.user_id, &response.content) + .await?; + info!( - "WhatsApp message would be sent to {}: {}", - response.user_id, response.content + "WhatsApp message sent to {}: {} (message_id: {})", + response.user_id, response.content, message_id ); - // TODO: Implement actual WhatsApp API integration + Ok(()) } + + async fn receive_message( + &self, + payload: serde_json::Value, + ) -> Result, Box> { + // Parse WhatsApp webhook payload + if let Some(entry) = payload["entry"].as_array() { + if let Some(first_entry) = entry.first() { + if let Some(changes) = first_entry["changes"].as_array() { + if let Some(first_change) = changes.first() { + if let Some(messages) = first_change["value"]["messages"].as_array() { + if let Some(first_message) = messages.first() { + let message_type = first_message["type"].as_str().unwrap_or(""); + + match message_type { + "text" => { + return Ok(first_message["text"]["body"] + .as_str() + .map(|s| s.to_string())); + } + "image" | "document" | "audio" | "video" => { + return Ok(Some(format!( + "Received {} message", + message_type + ))); + } + _ => { + return Ok(Some(format!( + "Received unsupported message type: {}", + message_type + ))); + } + } + } + } + } + } + } + } + + Ok(None) + } + + async fn get_user_info( + &self, + user_id: &str, + ) -> Result> { + let client = reqwest::Client::new(); + + let url = format!( + "https://graph.facebook.com/{}/{}", + self.api_version, user_id + ); + + let response = client + .get(&url) + .header("Authorization", format!("Bearer {}", self.api_key)) + .send() + .await?; + + if response.status().is_success() { + Ok(response.json().await?) + } else { + Ok(serde_json::json!({ + "id": user_id, + "platform": "whatsapp" + })) + } + } +} + +#[derive(Debug, Serialize, Deserialize)] +pub struct WhatsAppWebhookPayload { + pub entry: Vec, +} + +#[derive(Debug, Serialize, Deserialize)] +pub struct WhatsAppEntry { + pub id: String, + pub changes: Vec, +} + +#[derive(Debug, Serialize, Deserialize)] +pub struct WhatsAppChange { + pub field: String, + pub value: WhatsAppValue, +} + +#[derive(Debug, Serialize, Deserialize)] +pub struct WhatsAppValue { + pub messaging_product: String, + pub metadata: WhatsAppMetadata, + pub messages: Option>, + pub statuses: Option>, +} + +#[derive(Debug, Serialize, Deserialize)] +pub struct WhatsAppMetadata { + pub display_phone_number: String, + pub phone_number_id: String, +} + +#[derive(Debug, Serialize, Deserialize)] +pub struct WhatsAppMessage { + pub from: String, + pub id: String, + pub timestamp: String, + #[serde(rename = "type")] + pub message_type: String, + pub text: Option, +} + +#[derive(Debug, Serialize, Deserialize)] +pub struct WhatsAppText { + pub body: String, +} + +#[derive(Debug, Serialize, Deserialize)] +pub struct WhatsAppStatus { + pub id: String, + pub status: String, + pub timestamp: String, + pub recipient_id: String, } diff --git a/src/core/config/mod.rs b/src/core/config/mod.rs index ddcbc9aa0..6fbed84d0 100644 --- a/src/core/config/mod.rs +++ b/src/core/config/mod.rs @@ -207,6 +207,29 @@ impl ConfigManager { }; Ok(value) } + + pub async fn get_bot_config_value( + &self, + target_bot_id: &uuid::Uuid, + key: &str, + ) -> Result { + use crate::shared::models::schema::bot_configuration::dsl::*; + use diesel::prelude::*; + + let mut conn = self + .get_conn() + .map_err(|e| format!("Failed to acquire connection: {}", e))?; + + let value = bot_configuration + .filter(bot_id.eq(target_bot_id)) + .filter(config_key.eq(key)) + .select(config_value) + .first::(&mut conn) + .map_err(|e| format!("Failed to get bot config value: {}", e))?; + + Ok(value) + } + pub fn sync_gbot_config(&self, bot_id: &uuid::Uuid, content: &str) -> Result { use sha2::{Digest, Sha256}; let mut hasher = Sha256::new(); diff --git a/src/core/kb/document_processor.rs b/src/core/kb/document_processor.rs new file mode 100644 index 000000000..5bbf9c976 --- /dev/null +++ b/src/core/kb/document_processor.rs @@ -0,0 +1,587 @@ +use anyhow::Result; +use log::{error, info, warn}; +use serde::{Deserialize, Serialize}; +use std::collections::HashMap; +use std::path::Path; +use tokio::io::AsyncReadExt; + +/// Supported document formats for knowledge base +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum DocumentFormat { + PDF, + DOCX, + XLSX, + PPTX, + TXT, + MD, + HTML, + RTF, + CSV, + JSON, + XML, +} + +impl DocumentFormat { + /// Detect format from file extension + pub fn from_extension(path: &Path) -> Option { + let ext = path.extension()?.to_str()?.to_lowercase(); + match ext.as_str() { + "pdf" => Some(Self::PDF), + "docx" => Some(Self::DOCX), + "xlsx" => Some(Self::XLSX), + "pptx" => Some(Self::PPTX), + "txt" => Some(Self::TXT), + "md" | "markdown" => Some(Self::MD), + "html" | "htm" => Some(Self::HTML), + "rtf" => Some(Self::RTF), + "csv" => Some(Self::CSV), + "json" => Some(Self::JSON), + "xml" => Some(Self::XML), + _ => None, + } + } + + /// Get maximum file size for this format (in bytes) + pub fn max_size(&self) -> usize { + match self { + Self::PDF => 500 * 1024 * 1024, // 500MB + Self::DOCX => 100 * 1024 * 1024, // 100MB + Self::XLSX => 100 * 1024 * 1024, // 100MB + Self::PPTX => 200 * 1024 * 1024, // 200MB + Self::TXT => 100 * 1024 * 1024, // 100MB + Self::MD => 10 * 1024 * 1024, // 10MB + Self::HTML => 50 * 1024 * 1024, // 50MB + Self::RTF => 50 * 1024 * 1024, // 50MB + Self::CSV => 1024 * 1024 * 1024, // 1GB + Self::JSON => 100 * 1024 * 1024, // 100MB + Self::XML => 100 * 1024 * 1024, // 100MB + } + } +} + +/// Document metadata extracted during processing +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct DocumentMetadata { + pub title: Option, + pub author: Option, + pub creation_date: Option, + pub modification_date: Option, + pub page_count: Option, + pub word_count: Option, + pub language: Option, +} + +/// A text chunk ready for embedding +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct TextChunk { + pub content: String, + pub metadata: ChunkMetadata, +} + +/// Metadata for a text chunk +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ChunkMetadata { + pub document_path: String, + pub document_title: Option, + pub chunk_index: usize, + pub total_chunks: usize, + pub start_char: usize, + pub end_char: usize, + pub page_number: Option, +} + +/// Main document processor for knowledge base +#[derive(Debug)] +pub struct DocumentProcessor { + chunk_size: usize, + chunk_overlap: usize, +} + +impl Default for DocumentProcessor { + fn default() -> Self { + Self { + chunk_size: 1000, // 1000 characters as per docs + chunk_overlap: 200, // 200 character overlap as per docs + } + } +} + +impl DocumentProcessor { + pub fn new(chunk_size: usize, chunk_overlap: usize) -> Self { + Self { + chunk_size, + chunk_overlap, + } + } + + /// Get the chunk size + pub fn chunk_size(&self) -> usize { + self.chunk_size + } + + /// Get the chunk overlap + pub fn chunk_overlap(&self) -> usize { + self.chunk_overlap + } + + /// Process a document file and return extracted text chunks + pub async fn process_document(&self, file_path: &Path) -> Result> { + // Check if file exists + if !file_path.exists() { + return Err(anyhow::anyhow!("File not found: {:?}", file_path)); + } + + // Get file size + let metadata = tokio::fs::metadata(file_path).await?; + let file_size = metadata.len() as usize; + + // Detect format + let format = DocumentFormat::from_extension(file_path) + .ok_or_else(|| anyhow::anyhow!("Unsupported file format: {:?}", file_path))?; + + // Check file size + if file_size > format.max_size() { + return Err(anyhow::anyhow!( + "File too large: {} bytes (max: {} bytes)", + file_size, + format.max_size() + )); + } + + info!( + "Processing document: {:?} (format: {:?}, size: {} bytes)", + file_path, format, file_size + ); + + // Extract text based on format + let text = self.extract_text(file_path, format).await?; + + // Clean and normalize text + let cleaned_text = self.clean_text(&text); + + // Generate chunks + let chunks = self.create_chunks(&cleaned_text, file_path); + + info!( + "Created {} chunks from document: {:?}", + chunks.len(), + file_path + ); + + Ok(chunks) + } + + /// Extract text from document based on format + async fn extract_text(&self, file_path: &Path, format: DocumentFormat) -> Result { + match format { + DocumentFormat::TXT | DocumentFormat::MD => { + // Direct text file reading + let mut file = tokio::fs::File::open(file_path).await?; + let mut contents = String::new(); + file.read_to_string(&mut contents).await?; + Ok(contents) + } + DocumentFormat::PDF => self.extract_pdf_text(file_path).await, + DocumentFormat::DOCX => self.extract_docx_text(file_path).await, + DocumentFormat::HTML => self.extract_html_text(file_path).await, + DocumentFormat::CSV => self.extract_csv_text(file_path).await, + DocumentFormat::JSON => self.extract_json_text(file_path).await, + _ => { + warn!( + "Format {:?} extraction not yet implemented, using fallback", + format + ); + self.fallback_text_extraction(file_path).await + } + } + } + + /// Extract text from PDF files + async fn extract_pdf_text(&self, file_path: &Path) -> Result { + // Try system pdftotext first (fastest and most reliable) + let output = tokio::process::Command::new("pdftotext") + .arg("-layout") + .arg(file_path) + .arg("-") + .output() + .await; + + match output { + Ok(output) if output.status.success() => { + info!("Successfully extracted PDF with pdftotext: {:?}", file_path); + Ok(String::from_utf8_lossy(&output.stdout).to_string()) + } + _ => { + warn!( + "pdftotext failed for {:?}, trying library extraction", + file_path + ); + self.extract_pdf_with_library(file_path).await + } + } + } + + /// Extract PDF using poppler-utils + async fn extract_pdf_with_poppler(&self, file_path: &Path) -> Result { + let output = tokio::process::Command::new("pdftotext") + .arg(file_path) + .arg("-") + .output() + .await?; + + if output.status.success() { + Ok(String::from_utf8_lossy(&output.stdout).to_string()) + } else { + // Fallback to library extraction + self.extract_pdf_with_library(file_path).await + } + } + + /// Extract PDF using rust library (fallback) + async fn extract_pdf_with_library(&self, file_path: &Path) -> Result { + use pdf_extract::extract_text; + + match extract_text(file_path) { + Ok(text) => { + info!("Successfully extracted PDF with library: {:?}", file_path); + Ok(text) + } + Err(e) => { + warn!("PDF library extraction failed: {}", e); + // Last resort: try to get any text we can + self.extract_pdf_basic(file_path).await + } + } + } + + /// Basic PDF extraction using rust library (minimal approach) + async fn extract_pdf_basic(&self, file_path: &Path) -> Result { + // Try using pdf-extract as final fallback + match pdf_extract::extract_text(file_path) { + Ok(text) if !text.is_empty() => Ok(text), + _ => { + // Last resort: return error message + Err(anyhow::anyhow!( + "Could not extract text from PDF. Please ensure pdftotext is installed." + )) + } + } + } + + /// Extract text from DOCX files + async fn extract_docx_text(&self, file_path: &Path) -> Result { + // Use docx-rs or similar crate + // For now, use pandoc as fallback + let output = tokio::process::Command::new("pandoc") + .arg("-f") + .arg("docx") + .arg("-t") + .arg("plain") + .arg(file_path) + .output() + .await; + + match output { + Ok(output) if output.status.success() => { + Ok(String::from_utf8_lossy(&output.stdout).to_string()) + } + _ => { + warn!("pandoc failed for DOCX, using fallback"); + self.fallback_text_extraction(file_path).await + } + } + } + + /// Extract text from HTML files + async fn extract_html_text(&self, file_path: &Path) -> Result { + let contents = tokio::fs::read_to_string(file_path).await?; + + // Simple HTML tag removal (production should use html parser) + let text = contents + .split('<') + .flat_map(|s| s.split('>').skip(1)) + .collect::>() + .join(" "); + + Ok(text) + } + + /// Extract text from CSV files + async fn extract_csv_text(&self, file_path: &Path) -> Result { + let contents = tokio::fs::read_to_string(file_path).await?; + + // Convert CSV rows to text + let mut text = String::new(); + for line in contents.lines() { + text.push_str(line); + text.push('\n'); + } + + Ok(text) + } + + /// Extract text from JSON files + async fn extract_json_text(&self, file_path: &Path) -> Result { + let contents = tokio::fs::read_to_string(file_path).await?; + + // Parse JSON and extract all string values + if let Ok(json) = serde_json::from_str::(&contents) { + Ok(self.extract_json_strings(&json)) + } else { + Ok(contents) + } + } + + /// Recursively extract string values from JSON + fn extract_json_strings(&self, value: &serde_json::Value) -> String { + let mut result = String::new(); + + match value { + serde_json::Value::String(s) => { + result.push_str(s); + result.push(' '); + } + serde_json::Value::Array(arr) => { + for item in arr { + result.push_str(&self.extract_json_strings(item)); + } + } + serde_json::Value::Object(map) => { + for (_key, val) in map { + result.push_str(&self.extract_json_strings(val)); + } + } + _ => {} + } + + result + } + + /// Fallback text extraction for unsupported formats + async fn fallback_text_extraction(&self, file_path: &Path) -> Result { + // Try to read as UTF-8 text + match tokio::fs::read_to_string(file_path).await { + Ok(contents) => Ok(contents), + Err(_) => { + // If not UTF-8, try with lossy conversion + let bytes = tokio::fs::read(file_path).await?; + Ok(String::from_utf8_lossy(&bytes).to_string()) + } + } + } + + /// Clean and normalize extracted text + fn clean_text(&self, text: &str) -> String { + // Remove multiple spaces and normalize whitespace + let cleaned = text + .lines() + .map(|line| line.trim()) + .filter(|line| !line.is_empty()) + .collect::>() + .join("\n"); + + // Remove control characters + cleaned + .chars() + .filter(|c| !c.is_control() || c.is_whitespace()) + .collect::() + .split_whitespace() + .collect::>() + .join(" ") + } + + /// Create overlapping chunks from text + fn create_chunks(&self, text: &str, file_path: &Path) -> Vec { + let mut chunks = Vec::new(); + let chars: Vec = text.chars().collect(); + let total_chars = chars.len(); + + if total_chars == 0 { + return chunks; + } + + let mut start = 0; + let mut chunk_index = 0; + + // Calculate total number of chunks for metadata + let step_size = self.chunk_size.saturating_sub(self.chunk_overlap); + let total_chunks = if step_size > 0 { + (total_chars + step_size - 1) / step_size + } else { + 1 + }; + + while start < total_chars { + let end = std::cmp::min(start + self.chunk_size, total_chars); + + // Find word boundary for clean cuts + let mut chunk_end = end; + if end < total_chars { + // Look for word boundary + for i in (start..end).rev() { + if chars[i].is_whitespace() { + chunk_end = i + 1; + break; + } + } + } + + let chunk_content: String = chars[start..chunk_end].iter().collect(); + + chunks.push(TextChunk { + content: chunk_content, + metadata: ChunkMetadata { + document_path: file_path.to_string_lossy().to_string(), + document_title: file_path + .file_stem() + .and_then(|s| s.to_str()) + .map(|s| s.to_string()), + chunk_index, + total_chunks, + start_char: start, + end_char: chunk_end, + page_number: None, // Would be set for PDFs with page info + }, + }); + + chunk_index += 1; + + // Move forward by chunk_size - overlap + start = if chunk_end >= self.chunk_overlap { + chunk_end - self.chunk_overlap + } else { + chunk_end + }; + + // Prevent infinite loop + if start >= total_chars { + break; + } + } + + chunks + } + + /// Process all documents in a knowledge base folder + pub async fn process_kb_folder( + &self, + kb_path: &Path, + ) -> Result>> { + let mut results = HashMap::new(); + + if !kb_path.exists() { + return Err(anyhow::anyhow!( + "Knowledge base folder not found: {:?}", + kb_path + )); + } + + info!("Processing knowledge base folder: {:?}", kb_path); + + // Recursively process all files + self.process_directory_recursive(kb_path, &mut results) + .await?; + + info!("Processed {} documents in knowledge base", results.len()); + + Ok(results) + } + + /// Recursively process directory + fn process_directory_recursive<'a>( + &'a self, + dir: &'a Path, + results: &'a mut HashMap>, + ) -> std::pin::Pin> + Send + 'a>> { + Box::pin(async move { + let mut entries = tokio::fs::read_dir(dir).await?; + + while let Some(entry) = entries.next_entry().await? { + let path = entry.path(); + let metadata = entry.metadata().await?; + + if metadata.is_dir() { + // Recurse into subdirectory + self.process_directory_recursive(&path, results).await?; + } else if metadata.is_file() { + // Check if this is a supported format + if DocumentFormat::from_extension(&path).is_some() { + match self.process_document(&path).await { + Ok(chunks) => { + let key = path.to_string_lossy().to_string(); + results.insert(key, chunks); + } + Err(e) => { + error!("Failed to process document {:?}: {}", path, e); + } + } + } + } + } + + Ok(()) + }) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_chunk_creation() { + let processor = DocumentProcessor::default(); + let text = "This is a test document with some content that needs to be chunked properly. " + .repeat(20); + let chunks = processor.create_chunks(&text, Path::new("test.txt")); + + // Verify chunks are created + assert!(!chunks.is_empty()); + + // Verify chunk size + for chunk in &chunks { + assert!(chunk.content.len() <= processor.chunk_size); + } + + // Verify overlap exists + if chunks.len() > 1 { + let first_end = &chunks[0].content[chunks[0].content.len().saturating_sub(100)..]; + let second_start = &chunks[1].content[..100.min(chunks[1].content.len())]; + + // There should be some overlap + assert!(first_end.chars().any(|c| second_start.contains(c))); + } + } + + #[test] + fn test_format_detection() { + assert_eq!( + DocumentFormat::from_extension(Path::new("test.pdf")), + Some(DocumentFormat::PDF) + ); + assert_eq!( + DocumentFormat::from_extension(Path::new("test.docx")), + Some(DocumentFormat::DOCX) + ); + assert_eq!( + DocumentFormat::from_extension(Path::new("test.txt")), + Some(DocumentFormat::TXT) + ); + assert_eq!( + DocumentFormat::from_extension(Path::new("test.md")), + Some(DocumentFormat::MD) + ); + assert_eq!( + DocumentFormat::from_extension(Path::new("test.unknown")), + None + ); + } + + #[test] + fn test_text_cleaning() { + let processor = DocumentProcessor::default(); + let dirty_text = " This is\n\n\na test\r\nwith multiple spaces "; + let cleaned = processor.clean_text(dirty_text); + assert_eq!(cleaned, "This is a test with multiple spaces"); + } +} diff --git a/src/core/kb/embedding_generator.rs b/src/core/kb/embedding_generator.rs new file mode 100644 index 000000000..daf65f9c0 --- /dev/null +++ b/src/core/kb/embedding_generator.rs @@ -0,0 +1,443 @@ +use anyhow::{Context, Result}; +use log::{debug, info, warn}; +use reqwest::Client; +use serde::{Deserialize, Serialize}; + +use std::sync::Arc; +use tokio::sync::Semaphore; + +use super::document_processor::TextChunk; + +/// Embedding model configuration +#[derive(Debug, Clone)] +pub struct EmbeddingConfig { + /// URL for the embedding service (e.g., http://localhost:8082) + pub embedding_url: String, + /// Model name/path for embeddings (e.g., bge-small-en-v1.5) + pub embedding_model: String, + /// Dimension of embeddings (e.g., 384, 768, 1536) + pub dimensions: usize, + /// Maximum batch size for embedding generation + pub batch_size: usize, + /// Request timeout in seconds + pub timeout_seconds: u64, +} + +impl Default for EmbeddingConfig { + fn default() -> Self { + Self { + embedding_url: "http://localhost:8082".to_string(), + embedding_model: "bge-small-en-v1.5".to_string(), + dimensions: 384, // Default for bge-small + batch_size: 32, + timeout_seconds: 30, + } + } +} + +impl EmbeddingConfig { + /// Create config from environment or config.csv values + pub fn from_env() -> Self { + let embedding_url = + std::env::var("EMBEDDING_URL").unwrap_or_else(|_| "http://localhost:8082".to_string()); + + let embedding_model = + std::env::var("EMBEDDING_MODEL").unwrap_or_else(|_| "bge-small-en-v1.5".to_string()); + + // Detect dimensions based on model name + let dimensions = Self::detect_dimensions(&embedding_model); + + Self { + embedding_url, + embedding_model, + dimensions, + batch_size: 32, + timeout_seconds: 30, + } + } + + /// Detect embedding dimensions based on model name + fn detect_dimensions(model: &str) -> usize { + if model.contains("small") || model.contains("MiniLM") { + 384 + } else if model.contains("base") || model.contains("mpnet") { + 768 + } else if model.contains("large") || model.contains("ada") { + 1536 + } else { + 384 // Default + } + } +} + +/// Request payload for embedding generation +#[derive(Debug, Serialize)] +struct EmbeddingRequest { + input: Vec, + model: String, +} + +/// Response from embedding service +#[derive(Debug, Deserialize)] +struct EmbeddingResponse { + data: Vec, + model: String, + usage: Option, +} + +#[derive(Debug, Deserialize)] +struct EmbeddingData { + embedding: Vec, + index: usize, +} + +#[derive(Debug, Deserialize)] +struct EmbeddingUsage { + prompt_tokens: usize, + total_tokens: usize, +} + +/// Generated embedding with metadata +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct Embedding { + pub vector: Vec, + pub dimensions: usize, + pub model: String, + pub tokens_used: Option, +} + +/// Knowledge base embedding generator +pub struct KbEmbeddingGenerator { + config: EmbeddingConfig, + client: Client, + semaphore: Arc, +} + +impl std::fmt::Debug for KbEmbeddingGenerator { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("KbEmbeddingGenerator") + .field("config", &self.config) + .field("client", &"Client") + .field("semaphore", &"Semaphore") + .finish() + } +} + +impl KbEmbeddingGenerator { + pub fn new(config: EmbeddingConfig) -> Self { + let client = Client::builder() + .timeout(std::time::Duration::from_secs(config.timeout_seconds)) + .build() + .expect("Failed to create HTTP client"); + + // Limit concurrent requests + let semaphore = Arc::new(Semaphore::new(4)); + + Self { + config, + client, + semaphore, + } + } + + /// Generate embeddings for text chunks + pub async fn generate_embeddings( + &self, + chunks: &[TextChunk], + ) -> Result> { + if chunks.is_empty() { + return Ok(Vec::new()); + } + + info!("Generating embeddings for {} chunks", chunks.len()); + + let mut results = Vec::new(); + + // Process in batches + for batch in chunks.chunks(self.config.batch_size) { + let batch_embeddings = self.generate_batch_embeddings(batch).await?; + + // Pair chunks with their embeddings + for (chunk, embedding) in batch.iter().zip(batch_embeddings.iter()) { + results.push((chunk.clone(), embedding.clone())); + } + } + + info!("Generated {} embeddings", results.len()); + + Ok(results) + } + + /// Generate embeddings for a batch of chunks + async fn generate_batch_embeddings(&self, chunks: &[TextChunk]) -> Result> { + let _permit = self.semaphore.acquire().await?; + + let texts: Vec = chunks.iter().map(|c| c.content.clone()).collect(); + + debug!("Generating embeddings for batch of {} texts", texts.len()); + + // Try local embedding service first + match self.generate_local_embeddings(&texts).await { + Ok(embeddings) => Ok(embeddings), + Err(e) => { + warn!("Local embedding service failed: {}, trying OpenAI API", e); + self.generate_openai_embeddings(&texts).await + } + } + } + + /// Generate embeddings using local service + async fn generate_local_embeddings(&self, texts: &[String]) -> Result> { + let request = EmbeddingRequest { + input: texts.to_vec(), + model: self.config.embedding_model.clone(), + }; + + let response = self + .client + .post(&format!("{}/embeddings", self.config.embedding_url)) + .json(&request) + .send() + .await + .context("Failed to send request to embedding service")?; + + if !response.status().is_success() { + let status = response.status(); + let error_text = response.text().await.unwrap_or_default(); + return Err(anyhow::anyhow!( + "Embedding service error {}: {}", + status, + error_text + )); + } + + let embedding_response: EmbeddingResponse = response + .json() + .await + .context("Failed to parse embedding response")?; + + let mut embeddings = Vec::new(); + for data in embedding_response.data { + embeddings.push(Embedding { + vector: data.embedding, + dimensions: self.config.dimensions, + model: embedding_response.model.clone(), + tokens_used: embedding_response.usage.as_ref().map(|u| u.total_tokens), + }); + } + + Ok(embeddings) + } + + /// Generate embeddings using OpenAI API (fallback) + async fn generate_openai_embeddings(&self, texts: &[String]) -> Result> { + let api_key = std::env::var("OPENAI_API_KEY") + .context("OPENAI_API_KEY not set for fallback embedding generation")?; + + let request = serde_json::json!({ + "input": texts, + "model": "text-embedding-ada-002" + }); + + let response = self + .client + .post("https://api.openai.com/v1/embeddings") + .header("Authorization", format!("Bearer {}", api_key)) + .json(&request) + .send() + .await + .context("Failed to send request to OpenAI")?; + + if !response.status().is_success() { + let status = response.status(); + let error_text = response.text().await.unwrap_or_default(); + return Err(anyhow::anyhow!( + "OpenAI API error {}: {}", + status, + error_text + )); + } + + let response_json: serde_json::Value = response + .json() + .await + .context("Failed to parse OpenAI response")?; + + let mut embeddings = Vec::new(); + + if let Some(data) = response_json["data"].as_array() { + for item in data { + if let Some(embedding) = item["embedding"].as_array() { + let vector: Vec = embedding + .iter() + .filter_map(|v| v.as_f64().map(|f| f as f32)) + .collect(); + + embeddings.push(Embedding { + vector, + dimensions: 1536, // OpenAI ada-002 dimensions + model: "text-embedding-ada-002".to_string(), + tokens_used: response_json["usage"]["total_tokens"] + .as_u64() + .map(|t| t as usize), + }); + } + } + } + + Ok(embeddings) + } + + /// Generate embedding for a single text + pub async fn generate_single_embedding(&self, text: &str) -> Result { + let embeddings = self + .generate_batch_embeddings(&[TextChunk { + content: text.to_string(), + metadata: super::document_processor::ChunkMetadata { + document_path: "query".to_string(), + document_title: None, + chunk_index: 0, + total_chunks: 1, + start_char: 0, + end_char: text.len(), + page_number: None, + }, + }]) + .await?; + + embeddings + .into_iter() + .next() + .ok_or_else(|| anyhow::anyhow!("No embedding generated")) + } +} + +/// Generic embedding generator for other uses (email, etc.) +pub struct EmbeddingGenerator { + kb_generator: KbEmbeddingGenerator, +} + +impl std::fmt::Debug for EmbeddingGenerator { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("EmbeddingGenerator") + .field("kb_generator", &self.kb_generator) + .finish() + } +} + +impl EmbeddingGenerator { + pub fn new(llm_endpoint: String) -> Self { + let config = EmbeddingConfig { + embedding_url: llm_endpoint, + ..Default::default() + }; + + Self { + kb_generator: KbEmbeddingGenerator::new(config), + } + } + + /// Generate embedding for arbitrary text + pub async fn generate_text_embedding(&self, text: &str) -> Result> { + let embedding = self.kb_generator.generate_single_embedding(text).await?; + Ok(embedding.vector) + } +} + +/// Email-specific embedding generator (for compatibility) +pub struct EmailEmbeddingGenerator { + generator: EmbeddingGenerator, +} + +impl std::fmt::Debug for EmailEmbeddingGenerator { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("EmailEmbeddingGenerator") + .field("generator", &self.generator) + .finish() + } +} + +impl EmailEmbeddingGenerator { + pub fn new(llm_endpoint: String) -> Self { + Self { + generator: EmbeddingGenerator::new(llm_endpoint), + } + } + + /// Generate embedding for email content + pub async fn generate_embedding(&self, email: &impl EmailLike) -> Result> { + let text = format!( + "Subject: {}\nFrom: {}\nTo: {}\n\n{}", + email.subject(), + email.from(), + email.to(), + email.body() + ); + + self.generator.generate_text_embedding(&text).await + } + + /// Generate embedding for text + pub async fn generate_text_embedding(&self, text: &str) -> Result> { + self.generator.generate_text_embedding(text).await + } +} + +/// Trait for email-like objects +pub trait EmailLike { + fn subject(&self) -> &str; + fn from(&self) -> &str; + fn to(&self) -> &str; + fn body(&self) -> &str; +} + +/// Simple email struct for testing +#[derive(Debug)] +pub struct SimpleEmail { + pub id: String, + pub subject: String, + pub from: String, + pub to: String, + pub body: String, +} + +impl EmailLike for SimpleEmail { + fn subject(&self) -> &str { + &self.subject + } + fn from(&self) -> &str { + &self.from + } + fn to(&self) -> &str { + &self.to + } + fn body(&self) -> &str { + &self.body + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_dimension_detection() { + assert_eq!(EmbeddingConfig::detect_dimensions("bge-small-en"), 384); + assert_eq!(EmbeddingConfig::detect_dimensions("all-mpnet-base-v2"), 768); + assert_eq!( + EmbeddingConfig::detect_dimensions("text-embedding-ada-002"), + 1536 + ); + assert_eq!(EmbeddingConfig::detect_dimensions("unknown-model"), 384); + } + + #[tokio::test] + async fn test_text_cleaning_for_embedding() { + let text = "This is a test\n\nWith multiple lines"; + let generator = EmbeddingGenerator::new("http://localhost:8082".to_string()); + + // This would test actual embedding generation if service is available + // For unit tests, we just verify the structure is correct + assert!(!text.is_empty()); + } +} diff --git a/src/core/kb/kb_indexer.rs b/src/core/kb/kb_indexer.rs new file mode 100644 index 000000000..f2f778acf --- /dev/null +++ b/src/core/kb/kb_indexer.rs @@ -0,0 +1,546 @@ +use anyhow::Result; +use log::{debug, info, warn}; +use serde::{Deserialize, Serialize}; +use std::collections::HashMap; +use std::path::{Path, PathBuf}; +use uuid::Uuid; + +use super::document_processor::{DocumentProcessor, TextChunk}; +use super::embedding_generator::{Embedding, EmbeddingConfig, KbEmbeddingGenerator}; + +/// Qdrant client configuration +#[derive(Debug, Clone)] +pub struct QdrantConfig { + pub url: String, + pub api_key: Option, + pub timeout_secs: u64, +} + +impl Default for QdrantConfig { + fn default() -> Self { + Self { + url: std::env::var("QDRANT_URL") + .unwrap_or_else(|_| "http://localhost:6333".to_string()), + api_key: std::env::var("QDRANT_API_KEY").ok(), + timeout_secs: 30, + } + } +} + +/// Point structure for Qdrant +#[derive(Debug, Serialize, Deserialize)] +pub struct QdrantPoint { + pub id: String, + pub vector: Vec, + pub payload: HashMap, +} + +/// Collection configuration for Qdrant +#[derive(Debug, Serialize)] +pub struct CollectionConfig { + pub vectors: VectorConfig, + pub replication_factor: u32, + pub shard_number: u32, +} + +#[derive(Debug, Serialize)] +pub struct VectorConfig { + pub size: usize, + pub distance: String, +} + +/// Search request structure +#[derive(Debug, Serialize)] +pub struct SearchRequest { + pub vector: Vec, + pub limit: usize, + pub with_payload: bool, + pub score_threshold: Option, + pub filter: Option, +} + +/// Knowledge Base Indexer for Qdrant +pub struct KbIndexer { + document_processor: DocumentProcessor, + embedding_generator: KbEmbeddingGenerator, + qdrant_config: QdrantConfig, + http_client: reqwest::Client, +} + +impl std::fmt::Debug for KbIndexer { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("KbIndexer") + .field("document_processor", &self.document_processor) + .field("embedding_generator", &self.embedding_generator) + .field("qdrant_config", &self.qdrant_config) + .field("http_client", &"reqwest::Client") + .finish() + } +} + +impl KbIndexer { + pub fn new(embedding_config: EmbeddingConfig, qdrant_config: QdrantConfig) -> Self { + let document_processor = DocumentProcessor::default(); + let embedding_generator = KbEmbeddingGenerator::new(embedding_config); + + let http_client = reqwest::Client::builder() + .timeout(std::time::Duration::from_secs(qdrant_config.timeout_secs)) + .build() + .expect("Failed to create HTTP client"); + + Self { + document_processor, + embedding_generator, + qdrant_config, + http_client, + } + } + + /// Index a knowledge base folder + pub async fn index_kb_folder( + &self, + bot_name: &str, + kb_name: &str, + kb_path: &Path, + ) -> Result { + info!("Indexing KB folder: {} for bot {}", kb_name, bot_name); + + // Create collection name + let collection_name = format!("{}_{}", bot_name, kb_name); + + // Ensure collection exists + self.ensure_collection_exists(&collection_name).await?; + + // Process all documents in the folder + let documents = self.document_processor.process_kb_folder(kb_path).await?; + + let mut total_chunks = 0; + let mut indexed_documents = 0; + + for (doc_path, chunks) in documents { + if chunks.is_empty() { + continue; + } + + info!( + "Processing document: {} ({} chunks)", + doc_path, + chunks.len() + ); + + // Generate embeddings for chunks + let embeddings = self + .embedding_generator + .generate_embeddings(&chunks) + .await?; + + // Create points for Qdrant + let points = self.create_qdrant_points(&doc_path, embeddings)?; + + // Upsert points to collection + self.upsert_points(&collection_name, points).await?; + + total_chunks += chunks.len(); + indexed_documents += 1; + } + + // Update collection info in database + self.update_collection_metadata(&collection_name, bot_name, kb_name, total_chunks) + .await?; + + Ok(IndexingResult { + collection_name, + documents_processed: indexed_documents, + chunks_indexed: total_chunks, + }) + } + + /// Ensure Qdrant collection exists + async fn ensure_collection_exists(&self, collection_name: &str) -> Result<()> { + // Check if collection exists + let check_url = format!("{}/collections/{}", self.qdrant_config.url, collection_name); + + let response = self.http_client.get(&check_url).send().await?; + + if response.status().is_success() { + info!("Collection {} already exists", collection_name); + return Ok(()); + } + + // Create collection + info!("Creating collection: {}", collection_name); + + let config = CollectionConfig { + vectors: VectorConfig { + size: 384, // Default for bge-small, should be configurable + distance: "Cosine".to_string(), + }, + replication_factor: 1, + shard_number: 1, + }; + + let create_url = format!("{}/collections/{}", self.qdrant_config.url, collection_name); + + let response = self + .http_client + .put(&create_url) + .json(&config) + .send() + .await?; + + if !response.status().is_success() { + let error_text = response.text().await.unwrap_or_default(); + return Err(anyhow::anyhow!( + "Failed to create collection: {}", + error_text + )); + } + + // Create indexes for better performance + self.create_collection_indexes(collection_name).await?; + + Ok(()) + } + + /// Create indexes for collection + async fn create_collection_indexes(&self, collection_name: &str) -> Result<()> { + // Create HNSW index for vector search + let index_config = serde_json::json!({ + "hnsw_config": { + "m": 16, + "ef_construct": 200, + "full_scan_threshold": 10000 + } + }); + + let index_url = format!( + "{}/collections/{}/index", + self.qdrant_config.url, collection_name + ); + + let response = self + .http_client + .put(&index_url) + .json(&index_config) + .send() + .await?; + + if !response.status().is_success() { + warn!("Failed to create index, using defaults"); + } + + Ok(()) + } + + /// Create Qdrant points from chunks and embeddings + fn create_qdrant_points( + &self, + doc_path: &str, + embeddings: Vec<(TextChunk, Embedding)>, + ) -> Result> { + let mut points = Vec::new(); + + for (chunk, embedding) in embeddings { + let point_id = Uuid::new_v4().to_string(); + + let mut payload = HashMap::new(); + payload.insert( + "content".to_string(), + serde_json::Value::String(chunk.content), + ); + payload.insert( + "document_path".to_string(), + serde_json::Value::String(doc_path.to_string()), + ); + payload.insert( + "chunk_index".to_string(), + serde_json::Value::Number(chunk.metadata.chunk_index.into()), + ); + payload.insert( + "total_chunks".to_string(), + serde_json::Value::Number(chunk.metadata.total_chunks.into()), + ); + payload.insert( + "start_char".to_string(), + serde_json::Value::Number(chunk.metadata.start_char.into()), + ); + payload.insert( + "end_char".to_string(), + serde_json::Value::Number(chunk.metadata.end_char.into()), + ); + + if let Some(title) = chunk.metadata.document_title { + payload.insert( + "document_title".to_string(), + serde_json::Value::String(title), + ); + } + + points.push(QdrantPoint { + id: point_id, + vector: embedding.vector, + payload, + }); + } + + Ok(points) + } + + /// Upsert points to Qdrant collection + async fn upsert_points(&self, collection_name: &str, points: Vec) -> Result<()> { + if points.is_empty() { + return Ok(()); + } + + let batch_size = 100; // Qdrant recommended batch size + + for batch in points.chunks(batch_size) { + let upsert_request = serde_json::json!({ + "points": batch + }); + + let upsert_url = format!( + "{}/collections/{}/points?wait=true", + self.qdrant_config.url, collection_name + ); + + let response = self + .http_client + .put(&upsert_url) + .json(&upsert_request) + .send() + .await?; + + if !response.status().is_success() { + let error_text = response.text().await.unwrap_or_default(); + return Err(anyhow::anyhow!("Failed to upsert points: {}", error_text)); + } + } + + debug!( + "Upserted {} points to collection {}", + points.len(), + collection_name + ); + + Ok(()) + } + + /// Update collection metadata in database + async fn update_collection_metadata( + &self, + collection_name: &str, + bot_name: &str, + kb_name: &str, + document_count: usize, + ) -> Result<()> { + // This would update the kb_collections table + // For now, just log the information + info!( + "Updated collection {} metadata: bot={}, kb={}, docs={}", + collection_name, bot_name, kb_name, document_count + ); + + Ok(()) + } + + /// Search for similar chunks in a collection + pub async fn search( + &self, + collection_name: &str, + query: &str, + limit: usize, + ) -> Result> { + // Generate embedding for query + let embedding = self + .embedding_generator + .generate_single_embedding(query) + .await?; + + // Create search request + let search_request = SearchRequest { + vector: embedding.vector, + limit, + with_payload: true, + score_threshold: Some(0.5), // Minimum similarity threshold + filter: None, + }; + + let search_url = format!( + "{}/collections/{}/points/search", + self.qdrant_config.url, collection_name + ); + + let response = self + .http_client + .post(&search_url) + .json(&search_request) + .send() + .await?; + + if !response.status().is_success() { + let error_text = response.text().await.unwrap_or_default(); + return Err(anyhow::anyhow!("Search failed: {}", error_text)); + } + + let response_json: serde_json::Value = response.json().await?; + + let mut results = Vec::new(); + + if let Some(result_array) = response_json["result"].as_array() { + for item in result_array { + if let (Some(score), Some(payload)) = + (item["score"].as_f64(), item["payload"].as_object()) + { + let content = payload + .get("content") + .and_then(|v| v.as_str()) + .unwrap_or("") + .to_string(); + + let document_path = payload + .get("document_path") + .and_then(|v| v.as_str()) + .unwrap_or("") + .to_string(); + + results.push(SearchResult { + content, + document_path, + score: score as f32, + metadata: payload.clone(), + }); + } + } + } + + Ok(results) + } + + /// Delete a collection + pub async fn delete_collection(&self, collection_name: &str) -> Result<()> { + let delete_url = format!("{}/collections/{}", self.qdrant_config.url, collection_name); + + let response = self.http_client.delete(&delete_url).send().await?; + + if !response.status().is_success() { + let error_text = response.text().await.unwrap_or_default(); + warn!( + "Failed to delete collection {}: {}", + collection_name, error_text + ); + } + + Ok(()) + } +} + +/// Result of indexing operation +#[derive(Debug)] +pub struct IndexingResult { + pub collection_name: String, + pub documents_processed: usize, + pub chunks_indexed: usize, +} + +/// Search result from vector database +#[derive(Debug, Clone)] +pub struct SearchResult { + pub content: String, + pub document_path: String, + pub score: f32, + pub metadata: serde_json::Map, +} + +/// Monitor for .gbkb folder changes and trigger indexing +#[derive(Debug)] +pub struct KbFolderMonitor { + indexer: KbIndexer, + work_root: PathBuf, +} + +impl KbFolderMonitor { + pub fn new(work_root: PathBuf, embedding_config: EmbeddingConfig) -> Self { + let qdrant_config = QdrantConfig::default(); + let indexer = KbIndexer::new(embedding_config, qdrant_config); + + Self { indexer, work_root } + } + + /// Process a .gbkb folder that was detected by drive monitor + pub async fn process_gbkb_folder(&self, bot_name: &str, kb_folder: &Path) -> Result<()> { + // Extract KB name from folder path + let kb_name = kb_folder + .file_name() + .and_then(|n| n.to_str()) + .ok_or_else(|| anyhow::anyhow!("Invalid KB folder name"))?; + + info!("Processing .gbkb folder: {} for bot {}", kb_name, bot_name); + + // Build local work path + let local_path = self + .work_root + .join(bot_name) + .join(format!("{}.gbkb", bot_name)) + .join(kb_name); + + // Index the folder + let result = self + .indexer + .index_kb_folder(bot_name, kb_name, &local_path) + .await?; + + info!( + "Indexed {} documents ({} chunks) into collection {}", + result.documents_processed, result.chunks_indexed, result.collection_name + ); + + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_collection_name_generation() { + let bot_name = "mybot"; + let kb_name = "docs"; + let collection_name = format!("{}_{}", bot_name, kb_name); + assert_eq!(collection_name, "mybot_docs"); + } + + #[test] + fn test_qdrant_point_creation() { + let chunk = TextChunk { + content: "Test content".to_string(), + metadata: super::super::document_processor::ChunkMetadata { + document_path: "test.txt".to_string(), + document_title: Some("Test".to_string()), + chunk_index: 0, + total_chunks: 1, + start_char: 0, + end_char: 12, + page_number: None, + }, + }; + + let embedding = Embedding { + vector: vec![0.1, 0.2, 0.3], + dimensions: 3, + model: "test".to_string(), + tokens_used: None, + }; + + let indexer = KbIndexer::new(EmbeddingConfig::default(), QdrantConfig::default()); + + let points = indexer + .create_qdrant_points("test.txt", vec![(chunk, embedding)]) + .unwrap(); + + assert_eq!(points.len(), 1); + assert_eq!(points[0].vector.len(), 3); + assert!(points[0].payload.contains_key("content")); + } +} diff --git a/src/core/kb/mod.rs b/src/core/kb/mod.rs new file mode 100644 index 000000000..8d5232197 --- /dev/null +++ b/src/core/kb/mod.rs @@ -0,0 +1,215 @@ +pub mod document_processor; +pub mod embedding_generator; +pub mod kb_indexer; +pub mod web_crawler; +pub mod website_crawler_service; + +pub use document_processor::{DocumentFormat, DocumentProcessor, TextChunk}; +pub use embedding_generator::{ + EmailEmbeddingGenerator, EmbeddingConfig, EmbeddingGenerator, KbEmbeddingGenerator, +}; +pub use kb_indexer::{KbFolderMonitor, KbIndexer, QdrantConfig, SearchResult}; +pub use web_crawler::{WebCrawler, WebPage, WebsiteCrawlConfig}; +pub use website_crawler_service::{ensure_crawler_service_running, WebsiteCrawlerService}; + +use anyhow::Result; +use log::{error, info, warn}; +use std::path::Path; +use std::sync::Arc; +use tokio::sync::RwLock; + +/// Main Knowledge Base manager +#[derive(Debug)] +pub struct KnowledgeBaseManager { + indexer: Arc, + processor: Arc, + monitor: Arc>, +} + +impl KnowledgeBaseManager { + /// Create new KB manager with default configuration + pub fn new(work_root: impl Into) -> Self { + let work_root = work_root.into(); + let embedding_config = EmbeddingConfig::from_env(); + let qdrant_config = QdrantConfig::default(); + + let indexer = Arc::new(KbIndexer::new(embedding_config.clone(), qdrant_config)); + let processor = Arc::new(DocumentProcessor::default()); + let monitor = Arc::new(RwLock::new(KbFolderMonitor::new( + work_root, + embedding_config, + ))); + + Self { + indexer, + processor, + monitor, + } + } + + /// Process and index a knowledge base folder + pub async fn index_kb_folder( + &self, + bot_name: &str, + kb_name: &str, + kb_path: &Path, + ) -> Result<()> { + info!( + "Indexing knowledge base: {} for bot {} from path: {:?}", + kb_name, bot_name, kb_path + ); + + // Index the folder using the indexer + let result = self + .indexer + .index_kb_folder(bot_name, kb_name, kb_path) + .await?; + + info!( + "Successfully indexed {} documents with {} chunks into collection {}", + result.documents_processed, result.chunks_indexed, result.collection_name + ); + + Ok(()) + } + + /// Search in a knowledge base + pub async fn search( + &self, + bot_name: &str, + kb_name: &str, + query: &str, + limit: usize, + ) -> Result> { + let collection_name = format!("{}_{}", bot_name, kb_name); + self.indexer.search(&collection_name, query, limit).await + } + + /// Process a single document + pub async fn process_document(&self, file_path: &Path) -> Result> { + self.processor.process_document(file_path).await + } + + /// Handle .gbkb folder change notification from drive monitor + pub async fn handle_gbkb_change(&self, bot_name: &str, kb_folder: &Path) -> Result<()> { + info!( + "Handling .gbkb folder change for bot {} at {:?}", + bot_name, kb_folder + ); + + let monitor = self.monitor.read().await; + monitor.process_gbkb_folder(bot_name, kb_folder).await + } + + /// Clear a knowledge base collection + pub async fn clear_kb(&self, bot_name: &str, kb_name: &str) -> Result<()> { + let collection_name = format!("{}_{}", bot_name, kb_name); + + warn!("Clearing knowledge base collection: {}", collection_name); + + match self.indexer.delete_collection(&collection_name).await { + Ok(_) => { + info!("Successfully cleared collection: {}", collection_name); + Ok(()) + } + Err(e) => { + error!("Failed to clear collection {}: {}", collection_name, e); + Err(e) + } + } + } + + /// Get collection statistics + pub async fn get_kb_stats(&self, bot_name: &str, kb_name: &str) -> Result { + let collection_name = format!("{}_{}", bot_name, kb_name); + + // This would query Qdrant for collection statistics + // For now, return placeholder stats + Ok(KbStatistics { + collection_name, + document_count: 0, + chunk_count: 0, + total_size_bytes: 0, + }) + } +} + +/// Statistics for a knowledge base +#[derive(Debug, Clone)] +pub struct KbStatistics { + pub collection_name: String, + pub document_count: usize, + pub chunk_count: usize, + pub total_size_bytes: usize, +} + +/// Integration with drive monitor +pub struct DriveMonitorIntegration { + kb_manager: Arc, +} + +impl DriveMonitorIntegration { + pub fn new(kb_manager: Arc) -> Self { + Self { kb_manager } + } + + /// Called when drive monitor detects changes in .gbkb folder + pub async fn on_gbkb_folder_changed( + &self, + bot_name: &str, + folder_path: &Path, + change_type: ChangeType, + ) -> Result<()> { + match change_type { + ChangeType::Created | ChangeType::Modified => { + info!( + "Drive monitor detected {:?} in .gbkb folder: {:?}", + change_type, folder_path + ); + self.kb_manager + .handle_gbkb_change(bot_name, folder_path) + .await + } + ChangeType::Deleted => { + // Extract KB name from path + if let Some(kb_name) = folder_path.file_name().and_then(|n| n.to_str()) { + self.kb_manager.clear_kb(bot_name, kb_name).await + } else { + Err(anyhow::anyhow!("Invalid KB folder path")) + } + } + } + } +} + +/// Types of changes detected by drive monitor +#[derive(Debug, Clone, Copy)] +pub enum ChangeType { + Created, + Modified, + Deleted, +} + +#[cfg(test)] +mod tests { + use super::*; + use tempfile::TempDir; + + #[tokio::test] + async fn test_kb_manager_creation() { + let temp_dir = TempDir::new().unwrap(); + let manager = KnowledgeBaseManager::new(temp_dir.path()); + + // Test that manager is created successfully + assert!(manager.processor.chunk_size() == 1000); + assert!(manager.processor.chunk_overlap() == 200); + } + + #[test] + fn test_collection_naming() { + let bot_name = "testbot"; + let kb_name = "docs"; + let collection_name = format!("{}_{}", bot_name, kb_name); + assert_eq!(collection_name, "testbot_docs"); + } +} diff --git a/src/core/kb/web_crawler.rs b/src/core/kb/web_crawler.rs new file mode 100644 index 000000000..1fa581a68 --- /dev/null +++ b/src/core/kb/web_crawler.rs @@ -0,0 +1,346 @@ +use anyhow::Result; +use log::{info, trace, warn}; +use serde::{Deserialize, Serialize}; +use std::collections::HashSet; +use std::time::Duration; +use tokio::time::sleep; + +/// Website crawl configuration +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct WebsiteCrawlConfig { + pub url: String, + pub max_depth: usize, + pub max_pages: usize, + pub crawl_delay_ms: u64, + pub expires_policy: String, + pub last_crawled: Option>, + pub next_crawl: Option>, +} + +impl WebsiteCrawlConfig { + /// Parse expiration policy and calculate next crawl time + pub fn calculate_next_crawl(&mut self) { + let now = chrono::Utc::now(); + self.last_crawled = Some(now); + + let duration = match self.expires_policy.as_str() { + "1h" => chrono::Duration::hours(1), + "6h" => chrono::Duration::hours(6), + "12h" => chrono::Duration::hours(12), + "1d" | "24h" => chrono::Duration::days(1), + "3d" => chrono::Duration::days(3), + "1w" | "7d" => chrono::Duration::weeks(1), + "2w" => chrono::Duration::weeks(2), + "1m" | "30d" => chrono::Duration::days(30), + "3m" => chrono::Duration::days(90), + "6m" => chrono::Duration::days(180), + "1y" | "365d" => chrono::Duration::days(365), + custom => { + // Simple parsing for custom format like "2h", "5d", etc. + if custom.ends_with('h') { + if let Ok(hours) = custom[..custom.len() - 1].parse::() { + chrono::Duration::hours(hours) + } else { + chrono::Duration::days(1) + } + } else if custom.ends_with('d') { + if let Ok(days) = custom[..custom.len() - 1].parse::() { + chrono::Duration::days(days) + } else { + chrono::Duration::days(1) + } + } else if custom.ends_with('w') { + if let Ok(weeks) = custom[..custom.len() - 1].parse::() { + chrono::Duration::weeks(weeks) + } else { + chrono::Duration::days(1) + } + } else if custom.ends_with('m') { + if let Ok(months) = custom[..custom.len() - 1].parse::() { + chrono::Duration::days(months * 30) + } else { + chrono::Duration::days(1) + } + } else if custom.ends_with('y') { + if let Ok(years) = custom[..custom.len() - 1].parse::() { + chrono::Duration::days(years * 365) + } else { + chrono::Duration::days(1) + } + } else { + chrono::Duration::days(1) // Default to daily if unparseable + } + } + }; + + self.next_crawl = Some(now + duration); + } + + /// Check if website needs recrawling + pub fn needs_crawl(&self) -> bool { + match self.next_crawl { + Some(next) => chrono::Utc::now() >= next, + None => true, // Never crawled + } + } +} + +/// Website content for indexing +#[derive(Debug, Clone)] +pub struct WebPage { + pub url: String, + pub title: Option, + pub content: String, + pub meta_description: Option, + pub crawled_at: chrono::DateTime, +} + +/// Web crawler for website content +pub struct WebCrawler { + client: reqwest::Client, + config: WebsiteCrawlConfig, + visited_urls: HashSet, + pages: Vec, +} + +impl WebCrawler { + pub fn new(config: WebsiteCrawlConfig) -> Self { + let client = reqwest::Client::builder() + .timeout(Duration::from_secs(30)) + .user_agent("GeneralBots/1.0 (Knowledge Base Crawler)") + .build() + .unwrap_or_default(); + + Self { + client, + config, + visited_urls: HashSet::new(), + pages: Vec::new(), + } + } + + /// Crawl website starting from configured URL + pub async fn crawl(&mut self) -> Result> { + info!("Starting crawl of website: {}", self.config.url); + + // Start crawling from root URL + self.crawl_recursive(&self.config.url.clone(), 0).await?; + + info!( + "Crawled {} pages from {}", + self.pages.len(), + self.config.url + ); + + Ok(self.pages.clone()) + } + + /// Recursive crawling with depth control + async fn crawl_recursive(&mut self, url: &str, depth: usize) -> Result<()> { + // Check depth limit + if depth > self.config.max_depth { + trace!( + "Reached max depth {} for URL: {}", + self.config.max_depth, + url + ); + return Ok(()); + } + + // Check page limit + if self.pages.len() >= self.config.max_pages { + trace!("Reached max pages limit: {}", self.config.max_pages); + return Ok(()); + } + + // Check if already visited + if self.visited_urls.contains(url) { + return Ok(()); + } + + // Mark as visited + self.visited_urls.insert(url.to_string()); + + // Add crawl delay to be polite + if !self.visited_urls.is_empty() { + sleep(Duration::from_millis(self.config.crawl_delay_ms)).await; + } + + // Fetch page + let response = match self.client.get(url).send().await { + Ok(resp) => resp, + Err(e) => { + warn!("Failed to fetch {}: {}", url, e); + return Ok(()); // Continue crawling other pages + } + }; + + // Check if HTML + let content_type = response + .headers() + .get("content-type") + .and_then(|v| v.to_str().ok()) + .unwrap_or(""); + + if !content_type.contains("text/html") { + trace!("Skipping non-HTML content: {}", url); + return Ok(()); + } + + // Get page content + let html_text = match response.text().await { + Ok(text) => text, + Err(e) => { + warn!("Failed to read response from {}: {}", url, e); + return Ok(()); + } + }; + + // Extract page content + let page = self.extract_page_content(&html_text, url); + self.pages.push(page); + + // Extract and crawl links if not at max depth + if depth < self.config.max_depth { + let links = self.extract_links(&html_text, url); + for link in links { + // Only crawl same domain + if self.is_same_domain(url, &link) { + Box::pin(self.crawl_recursive(&link, depth + 1)).await?; + } + } + } + + Ok(()) + } + + /// Extract text content from HTML + fn extract_page_content(&self, html: &str, url: &str) -> WebPage { + // Simple HTML tag removal + let mut text = html.to_string(); + + // Remove script and style tags with their content + while let Some(start) = text.find("") { + text.replace_range(start..=end + 8, " "); + } else { + break; + } + } + + while let Some(start) = text.find("") { + text.replace_range(start..=end + 7, " "); + } else { + break; + } + } + + // Extract title if present + let title = if let Some(title_start) = text.find("") { + if let Some(title_end) = text.find("") { + Some(text[title_start + 7..title_end].to_string()) + } else { + None + } + } else { + None + }; + + // Remove all remaining HTML tags + while let Some(start) = text.find('<') { + if let Some(end) = text.find('>') { + if end > start { + text.replace_range(start..=end, " "); + } else { + break; + } + } else { + break; + } + } + + // Clean up whitespace + let content = text.split_whitespace().collect::>().join(" "); + + WebPage { + url: url.to_string(), + title, + content, + meta_description: None, + crawled_at: chrono::Utc::now(), + } + } + + /// Extract links from HTML + fn extract_links(&self, html: &str, base_url: &str) -> Vec { + let mut links = Vec::new(); + let mut search_pos = 0; + + // Simple href extraction + while let Some(href_pos) = html[search_pos..].find("href=\"") { + let href_start = search_pos + href_pos + 6; + if let Some(href_end) = html[href_start..].find('"') { + let href = &html[href_start..href_start + href_end]; + + // Skip anchors, javascript, mailto, etc. + if !href.starts_with('#') + && !href.starts_with("javascript:") + && !href.starts_with("mailto:") + && !href.starts_with("tel:") + { + // Convert relative URLs to absolute + let absolute_url = + if href.starts_with("http://") || href.starts_with("https://") { + href.to_string() + } else if href.starts_with('/') { + // Get base domain from base_url + if let Some(domain_end) = base_url[8..].find('/') { + format!("{}{}", &base_url[..8 + domain_end], href) + } else { + format!("{}{}", base_url, href) + } + } else { + // Relative to current page + if let Some(last_slash) = base_url.rfind('/') { + format!("{}/{}", &base_url[..last_slash], href) + } else { + format!("{}/{}", base_url, href) + } + }; + + links.push(absolute_url); + } + search_pos = href_start + href_end; + } else { + break; + } + } + + links + } + + /// Check if two URLs are from the same domain + fn is_same_domain(&self, url1: &str, url2: &str) -> bool { + let domain1 = self.extract_domain(url1); + let domain2 = self.extract_domain(url2); + domain1 == domain2 + } + + /// Extract domain from URL + fn extract_domain(&self, url: &str) -> String { + let without_protocol = if url.starts_with("https://") { + &url[8..] + } else if url.starts_with("http://") { + &url[7..] + } else { + url + }; + + if let Some(slash_pos) = without_protocol.find('/') { + without_protocol[..slash_pos].to_string() + } else { + without_protocol.to_string() + } + } +} diff --git a/src/core/kb/website_crawler_service.rs b/src/core/kb/website_crawler_service.rs new file mode 100644 index 000000000..dd2b38005 --- /dev/null +++ b/src/core/kb/website_crawler_service.rs @@ -0,0 +1,287 @@ +use crate::config::ConfigManager; +use crate::core::kb::web_crawler::{WebCrawler, WebsiteCrawlConfig}; +use crate::core::kb::KnowledgeBaseManager; +use crate::shared::state::AppState; +use crate::shared::utils::DbPool; +use diesel::prelude::*; +use log::{error, info, warn}; +use std::sync::Arc; +use tokio::time::{interval, Duration}; +use uuid::Uuid; + +/// Service for periodically checking and recrawling websites +pub struct WebsiteCrawlerService { + db_pool: DbPool, + kb_manager: Arc, + check_interval: Duration, + running: Arc>, +} + +impl WebsiteCrawlerService { + pub fn new(db_pool: DbPool, kb_manager: Arc) -> Self { + Self { + db_pool, + kb_manager, + check_interval: Duration::from_secs(3600), // Check every hour + running: Arc::new(tokio::sync::RwLock::new(false)), + } + } + + /// Start the website crawler service + pub async fn start(self: Arc) -> tokio::task::JoinHandle<()> { + let service = Arc::clone(&self); + + tokio::spawn(async move { + info!("Website crawler service started"); + + let mut ticker = interval(service.check_interval); + + loop { + ticker.tick().await; + + // Check if already running + if *service.running.read().await { + warn!("Website crawler is already running, skipping this cycle"); + continue; + } + + // Set running flag + *service.running.write().await = true; + + // Check and crawl websites + if let Err(e) = service.check_and_crawl_websites().await { + error!("Error in website crawler service: {}", e); + } + + // Clear running flag + *service.running.write().await = false; + } + }) + } + + /// Check for websites that need recrawling + async fn check_and_crawl_websites(&self) -> Result<(), Box> { + info!("Checking for websites that need recrawling"); + + let mut conn = self.db_pool.get()?; + + // Query websites that need recrawling + let websites = diesel::sql_query( + "SELECT id, bot_id, url, expires_policy, max_depth, max_pages + FROM website_crawls + WHERE next_crawl <= NOW() + AND crawl_status != 2 + ORDER BY next_crawl ASC + LIMIT 10", + ) + .load::(&mut conn)?; + + info!("Found {} websites to recrawl", websites.len()); + + for website in websites { + // Mark as processing (status = 2) + diesel::sql_query("UPDATE website_crawls SET crawl_status = 2 WHERE id = $1") + .bind::(&website.id) + .execute(&mut conn)?; + + // Spawn crawl task + let kb_manager = Arc::clone(&self.kb_manager); + let db_pool = self.db_pool.clone(); + + tokio::spawn(async move { + if let Err(e) = Self::crawl_website(website, kb_manager, db_pool).await { + error!("Failed to crawl website: {}", e); + } + }); + } + + Ok(()) + } + + /// Crawl a single website + async fn crawl_website( + website: WebsiteCrawlRecord, + kb_manager: Arc, + db_pool: DbPool, + ) -> Result<(), Box> { + info!("Starting crawl for website: {}", website.url); + + // Get bot configuration for max_depth and max_pages + let config_manager = ConfigManager::new(db_pool.clone()); + + let website_max_depth = config_manager + .get_bot_config_value(&website.bot_id, "website-max-depth") + .await + .ok() + .and_then(|v| v.parse::().ok()) + .unwrap_or(website.max_depth as usize); + + let website_max_pages = config_manager + .get_bot_config_value(&website.bot_id, "website-max-pages") + .await + .ok() + .and_then(|v| v.parse::().ok()) + .unwrap_or(website.max_pages as usize); + + // Create crawler configuration + let mut config = WebsiteCrawlConfig { + url: website.url.clone(), + max_depth: website_max_depth, + max_pages: website_max_pages, + crawl_delay_ms: 500, + expires_policy: website.expires_policy.clone(), + last_crawled: None, + next_crawl: None, + }; + + // Create and run crawler + let mut crawler = WebCrawler::new(config.clone()); + + match crawler.crawl().await { + Ok(pages) => { + info!("Crawled {} pages from {}", pages.len(), website.url); + + // Get bot name + let mut conn = db_pool.get()?; + #[derive(QueryableByName)] + struct BotNameResult { + #[diesel(sql_type = diesel::sql_types::Text)] + name: String, + } + + let bot_name: String = diesel::sql_query("SELECT name FROM bots WHERE id = $1") + .bind::(&website.bot_id) + .get_result::(&mut conn) + .map(|r| r.name)?; + + // Create KB name from URL + let kb_name = format!("website_{}", sanitize_url_for_kb(&website.url)); + + // Create work directory + let work_path = std::path::PathBuf::from("work") + .join(&bot_name) + .join(format!("{}.gbkb", bot_name)) + .join(&kb_name); + + // Ensure directory exists + tokio::fs::create_dir_all(&work_path).await?; + + // Write pages to files + for (idx, page) in pages.iter().enumerate() { + let filename = format!("page_{:04}.txt", idx); + let filepath = work_path.join(&filename); + + let content = format!( + "URL: {}\nTitle: {}\nCrawled: {}\n\n{}", + page.url, + page.title.as_deref().unwrap_or("Untitled"), + page.crawled_at, + page.content + ); + + tokio::fs::write(&filepath, content).await?; + } + + // Index with KB manager + kb_manager + .index_kb_folder(&bot_name, &kb_name, &work_path) + .await?; + + // Update configuration + config.calculate_next_crawl(); + + // Update database + diesel::sql_query( + "UPDATE website_crawls + SET last_crawled = NOW(), + next_crawl = $1, + crawl_status = 1, + pages_crawled = $2, + error_message = NULL + WHERE id = $3", + ) + .bind::, _>( + config.next_crawl, + ) + .bind::(pages.len() as i32) + .bind::(&website.id) + .execute(&mut conn)?; + + info!( + "Successfully recrawled {}, next crawl: {:?}", + website.url, config.next_crawl + ); + } + Err(e) => { + error!("Failed to crawl {}: {}", website.url, e); + + // Update database with error + let mut conn = db_pool.get()?; + diesel::sql_query( + "UPDATE website_crawls + SET crawl_status = 3, + error_message = $1 + WHERE id = $2", + ) + .bind::(&e.to_string()) + .bind::(&website.id) + .execute(&mut conn)?; + } + } + + Ok(()) + } +} + +/// Record from website_crawls table +#[derive(QueryableByName, Debug)] +struct WebsiteCrawlRecord { + #[diesel(sql_type = diesel::sql_types::Uuid)] + id: Uuid, + #[diesel(sql_type = diesel::sql_types::Uuid)] + bot_id: Uuid, + #[diesel(sql_type = diesel::sql_types::Text)] + url: String, + #[diesel(sql_type = diesel::sql_types::Text)] + expires_policy: String, + #[diesel(sql_type = diesel::sql_types::Integer)] + max_depth: i32, + #[diesel(sql_type = diesel::sql_types::Integer)] + max_pages: i32, +} + +/// Sanitize URL for use as KB name (duplicate from add_website.rs for isolation) +fn sanitize_url_for_kb(url: &str) -> String { + url.replace("http://", "") + .replace("https://", "") + .replace('/', "_") + .replace(':', "_") + .replace('.', "_") + .chars() + .filter(|c| c.is_alphanumeric() || *c == '_' || *c == '-') + .collect::() + .to_lowercase() +} + +/// Get crawler service for a state (create if not exists) +pub async fn ensure_crawler_service_running( + state: Arc, +) -> Result<(), Box> { + // Check if KB manager exists + if let Some(kb_manager) = &state.kb_manager { + let service = Arc::new(WebsiteCrawlerService::new( + state.conn.clone(), + Arc::clone(kb_manager), + )); + + // Start the service + service.start().await; + + info!("Website crawler service started"); + + Ok(()) + } else { + warn!("KB manager not available, website crawler service not started"); + Ok(()) + } +} diff --git a/src/core/mod.rs b/src/core/mod.rs index ec849fc09..1b6380627 100644 --- a/src/core/mod.rs +++ b/src/core/mod.rs @@ -2,6 +2,7 @@ pub mod automation; pub mod bootstrap; pub mod bot; pub mod config; +pub mod kb; pub mod package_manager; pub mod session; pub mod shared; diff --git a/src/core/shared/state.rs b/src/core/shared/state.rs index ca964fcec..b62bb2493 100644 --- a/src/core/shared/state.rs +++ b/src/core/shared/state.rs @@ -1,10 +1,11 @@ -#[cfg(feature = "directory")] -use crate::directory::AuthService; use crate::core::bot::channels::{ChannelAdapter, VoiceAdapter, WebChannelAdapter}; use crate::core::config::AppConfig; +use crate::core::kb::KnowledgeBaseManager; +use crate::core::session::SessionManager; +#[cfg(feature = "directory")] +use crate::directory::AuthService; #[cfg(feature = "llm")] use crate::llm::LLMProvider; -use crate::core::session::SessionManager; use crate::shared::models::BotResponse; use crate::shared::utils::DbPool; #[cfg(feature = "drive")] @@ -32,6 +33,7 @@ pub struct AppState { pub response_channels: Arc>>>, pub web_adapter: Arc, pub voice_adapter: Arc, + pub kb_manager: Option>, } impl Clone for AppState { fn clone(&self) -> Self { @@ -48,6 +50,7 @@ impl Clone for AppState { llm_provider: Arc::clone(&self.llm_provider), #[cfg(feature = "directory")] auth_service: Arc::clone(&self.auth_service), + kb_manager: self.kb_manager.clone(), channels: Arc::clone(&self.channels), response_channels: Arc::clone(&self.response_channels), web_adapter: Arc::clone(&self.web_adapter), @@ -66,7 +69,8 @@ impl std::fmt::Debug for AppState { #[cfg(feature = "redis-cache")] debug.field("cache", &self.cache.is_some()); - debug.field("bucket_name", &self.bucket_name) + debug + .field("bucket_name", &self.bucket_name) .field("config", &self.config) .field("conn", &"DbPool") .field("session_manager", &"Arc>"); diff --git a/src/drive/drive_monitor/mod.rs b/src/drive/drive_monitor/mod.rs index 3f8cfa6c4..bdf6269d4 100644 --- a/src/drive/drive_monitor/mod.rs +++ b/src/drive/drive_monitor/mod.rs @@ -1,10 +1,13 @@ use crate::basic::compiler::BasicCompiler; use crate::config::ConfigManager; +use crate::core::kb::{ChangeType, KnowledgeBaseManager}; use crate::shared::state::AppState; use aws_sdk_s3::Client; use log::info; use std::collections::HashMap; use std::error::Error; +use std::path::PathBuf; +use std::sync::atomic::{AtomicBool, Ordering}; use std::sync::Arc; use tokio::time::{interval, Duration}; #[derive(Debug, Clone)] @@ -17,14 +20,23 @@ pub struct DriveMonitor { bucket_name: String, file_states: Arc>>, bot_id: uuid::Uuid, + kb_manager: Arc, + work_root: PathBuf, + is_processing: Arc, } impl DriveMonitor { pub fn new(state: Arc, bucket_name: String, bot_id: uuid::Uuid) -> Self { + let work_root = PathBuf::from("work"); + let kb_manager = Arc::new(KnowledgeBaseManager::new(work_root.clone())); + Self { state, bucket_name, file_states: Arc::new(tokio::sync::RwLock::new(HashMap::new())), bot_id, + kb_manager, + work_root, + is_processing: Arc::new(AtomicBool::new(false)), } } pub fn spawn(self: Arc) -> tokio::task::JoinHandle<()> { @@ -36,9 +48,25 @@ impl DriveMonitor { let mut tick = interval(Duration::from_secs(90)); loop { tick.tick().await; + + // Check if we're already processing to prevent reentrancy + if self.is_processing.load(Ordering::Acquire) { + log::warn!( + "Drive monitor is still processing previous changes, skipping this tick" + ); + continue; + } + + // Set processing flag + self.is_processing.store(true, Ordering::Release); + + // Process changes if let Err(e) = self.check_for_changes().await { log::error!("Error checking for drive changes: {}", e); } + + // Clear processing flag + self.is_processing.store(false, Ordering::Release); } }) } @@ -49,6 +77,7 @@ impl DriveMonitor { }; self.check_gbdialog_changes(client).await?; self.check_gbot(client).await?; + self.check_gbkb_changes(client).await?; Ok(()) } async fn check_gbdialog_changes( @@ -352,4 +381,200 @@ impl DriveMonitor { .await??; Ok(()) } + + async fn check_gbkb_changes( + &self, + client: &Client, + ) -> Result<(), Box> { + let bot_name = self + .bucket_name + .strip_suffix(".gbai") + .unwrap_or(&self.bucket_name); + + let gbkb_prefix = format!("{}.gbkb/", bot_name); + let mut current_files = HashMap::new(); + let mut continuation_token = None; + + // Add progress tracking for large file sets + let mut files_processed = 0; + let mut files_to_process = Vec::new(); + + loop { + let list_objects = match tokio::time::timeout( + Duration::from_secs(30), + client + .list_objects_v2() + .bucket(&self.bucket_name.to_lowercase()) + .prefix(&gbkb_prefix) + .set_continuation_token(continuation_token) + .send(), + ) + .await + { + Ok(Ok(list)) => list, + Ok(Err(e)) => return Err(e.into()), + Err(_) => { + log::error!( + "Timeout listing .gbkb objects in bucket {}", + self.bucket_name + ); + return Ok(()); + } + }; + + for obj in list_objects.contents.unwrap_or_default() { + let path = obj.key().unwrap_or_default().to_string(); + + // Skip directories + if path.ends_with('/') { + continue; + } + + let file_state = FileState { + etag: obj.e_tag().unwrap_or_default().to_string(), + }; + current_files.insert(path.clone(), file_state); + } + + if !list_objects.is_truncated.unwrap_or(false) { + break; + } + continuation_token = list_objects.next_continuation_token; + } + + let mut file_states = self.file_states.write().await; + + // Check for new or modified files + for (path, current_state) in current_files.iter() { + let is_new = !file_states.contains_key(path); + let is_modified = file_states + .get(path) + .map(|prev| prev.etag != current_state.etag) + .unwrap_or(false); + + if is_new || is_modified { + info!( + "Detected {} in .gbkb: {}", + if is_new { "new file" } else { "change" }, + path + ); + + // Queue file for batch processing instead of immediate download + files_to_process.push(path.clone()); + files_processed += 1; + + // Process in batches of 10 to avoid overwhelming the system + if files_to_process.len() >= 10 { + for file_path in files_to_process.drain(..) { + if let Err(e) = self.download_gbkb_file(client, &file_path).await { + log::error!("Failed to download .gbkb file {}: {}", file_path, e); + continue; + } + } + + // Add small delay between batches to prevent system overload + tokio::time::sleep(Duration::from_millis(100)).await; + } + + // Extract KB name from path (e.g., "mybot.gbkb/docs/file.pdf" -> "docs") + let path_parts: Vec<&str> = path.split('/').collect(); + if path_parts.len() >= 2 { + let kb_name = path_parts[1]; + let kb_folder_path = self + .work_root + .join(bot_name) + .join(&gbkb_prefix) + .join(kb_name); + + // Trigger indexing + if let Err(e) = self + .kb_manager + .handle_gbkb_change(bot_name, &kb_folder_path) + .await + { + log::error!("Failed to process .gbkb change: {}", e); + } + } + } + } + + // Check for deleted files first + let paths_to_remove: Vec = file_states + .keys() + .filter(|path| path.starts_with(&gbkb_prefix) && !current_files.contains_key(*path)) + .cloned() + .collect(); + + // Process remaining files in the queue + for file_path in files_to_process { + if let Err(e) = self.download_gbkb_file(client, &file_path).await { + log::error!("Failed to download .gbkb file {}: {}", file_path, e); + } + } + + if files_processed > 0 { + info!("Processed {} .gbkb files", files_processed); + } + + // Update file states after checking for deletions + for (path, state) in current_files { + file_states.insert(path, state); + } + + for path in paths_to_remove { + info!("Detected deletion in .gbkb: {}", path); + file_states.remove(&path); + + // Extract KB name and trigger cleanup + let path_parts: Vec<&str> = path.split('/').collect(); + if path_parts.len() >= 2 { + let kb_name = path_parts[1]; + + // Check if entire KB folder was deleted + let kb_prefix = format!("{}{}/", gbkb_prefix, kb_name); + if !file_states.keys().any(|k| k.starts_with(&kb_prefix)) { + // No more files in this KB, clear the collection + if let Err(e) = self.kb_manager.clear_kb(bot_name, kb_name).await { + log::error!("Failed to clear KB {}: {}", kb_name, e); + } + } + } + } + + Ok(()) + } + + async fn download_gbkb_file( + &self, + client: &Client, + file_path: &str, + ) -> Result<(), Box> { + let bot_name = self + .bucket_name + .strip_suffix(".gbai") + .unwrap_or(&self.bucket_name); + + // Create local path + let local_path = self.work_root.join(bot_name).join(file_path); + + // Create parent directories + if let Some(parent) = local_path.parent() { + tokio::fs::create_dir_all(parent).await?; + } + + // Download file + let response = client + .get_object() + .bucket(&self.bucket_name) + .key(file_path) + .send() + .await?; + + let bytes = response.body.collect().await?.into_bytes(); + tokio::fs::write(&local_path, bytes).await?; + + info!("Downloaded .gbkb file {} to {:?}", file_path, local_path); + + Ok(()) + } } diff --git a/src/drive/files.rs b/src/drive/files.rs index 044e91ad5..858bdc87b 100644 --- a/src/drive/files.rs +++ b/src/drive/files.rs @@ -813,7 +813,7 @@ pub async fn create_folder( /// POST /files/shareFolder - Share a folder pub async fn share_folder( State(_state): State>, - Json(req): Json, + Json(_params): Json, ) -> Result>, (StatusCode, Json>)> { // TODO: Implement actual sharing logic with database let share_id = Uuid::new_v4().to_string(); @@ -825,13 +825,290 @@ pub async fn share_folder( success: true, share_id, share_link: Some(share_link), - expires_at: req.expires_at, }), message: Some("Folder shared successfully".to_string()), - error: None, })) } +// S3/MinIO helper functions for storage operations + +pub async fn save_to_s3( + state: &Arc, + bucket: &str, + key: &str, + content: &[u8], +) -> Result<(), Box> { + let s3_client = &state.s3_client; + + s3_client + .put_object() + .bucket(bucket) + .key(key) + .body(ByteStream::from(content.to_vec())) + .send() + .await?; + + Ok(()) +} + +pub async fn delete_from_s3( + state: &Arc, + bucket: &str, + key: &str, +) -> Result<(), Box> { + let s3_client = &state.s3_client; + + s3_client + .delete_object() + .bucket(bucket) + .key(key) + .send() + .await?; + + Ok(()) +} + +#[derive(Debug)] +pub struct BucketStats { + pub object_count: usize, + pub total_size: u64, + pub last_modified: Option, +} + +pub async fn get_bucket_stats( + state: &Arc, + bucket: &str, +) -> Result> { + let s3_client = &state.s3_client; + + let list_response = s3_client.list_objects_v2().bucket(bucket).send().await?; + + let mut total_size = 0u64; + let mut object_count = 0usize; + let mut last_modified = None; + + if let Some(contents) = list_response.contents() { + object_count = contents.len(); + for object in contents { + if let Some(size) = object.size() { + total_size += size as u64; + } + if let Some(modified) = object.last_modified() { + let modified_str = modified.to_string(); + if last_modified.is_none() || last_modified.as_ref().unwrap() < &modified_str { + last_modified = Some(modified_str); + } + } + } + } + + Ok(BucketStats { + object_count, + total_size, + last_modified, + }) +} + +pub async fn cleanup_old_files( + state: &Arc, + bucket: &str, + cutoff_date: chrono::DateTime, +) -> Result<(usize, u64), Box> { + let s3_client = &state.s3_client; + + let list_response = s3_client.list_objects_v2().bucket(bucket).send().await?; + + let mut deleted_count = 0usize; + let mut freed_bytes = 0u64; + + if let Some(contents) = list_response.contents() { + for object in contents { + if let Some(modified) = object.last_modified() { + let modified_time = chrono::DateTime::parse_from_rfc3339(&modified.to_string()) + .map(|dt| dt.with_timezone(&chrono::Utc)) + .unwrap_or_else(|_| chrono::Utc::now()); + + if modified_time < cutoff_date { + if let Some(key) = object.key() { + if let Some(size) = object.size() { + freed_bytes += size as u64; + } + + s3_client + .delete_object() + .bucket(bucket) + .key(key) + .send() + .await?; + + deleted_count += 1; + } + } + } + } + } + + Ok((deleted_count, freed_bytes)) +} + +pub async fn create_bucket_backup( + state: &Arc, + source_bucket: &str, + backup_bucket: &str, + backup_id: &str, +) -> Result> { + let s3_client = &state.s3_client; + + // Create backup bucket if it doesn't exist + let _ = s3_client.create_bucket().bucket(backup_bucket).send().await; + + let list_response = s3_client + .list_objects_v2() + .bucket(source_bucket) + .send() + .await?; + + let mut file_count = 0usize; + + if let Some(contents) = list_response.contents() { + for object in contents { + if let Some(key) = object.key() { + let backup_key = format!("{}/{}", backup_id, key); + + // Copy object to backup bucket + let copy_source = format!("{}/{}", source_bucket, key); + s3_client + .copy_object() + .copy_source(©_source) + .bucket(backup_bucket) + .key(&backup_key) + .send() + .await?; + + file_count += 1; + } + } + } + + Ok(file_count) +} + +pub async fn restore_bucket_backup( + state: &Arc, + backup_bucket: &str, + target_bucket: &str, + backup_id: &str, +) -> Result> { + let s3_client = &state.s3_client; + + let prefix = format!("{}/", backup_id); + let list_response = s3_client + .list_objects_v2() + .bucket(backup_bucket) + .prefix(&prefix) + .send() + .await?; + + let mut file_count = 0usize; + + if let Some(contents) = list_response.contents() { + for object in contents { + if let Some(key) = object.key() { + // Remove backup_id prefix from key + let restored_key = key.strip_prefix(&prefix).unwrap_or(key); + + // Copy object back to target bucket + let copy_source = format!("{}/{}", backup_bucket, key); + s3_client + .copy_object() + .copy_source(©_source) + .bucket(target_bucket) + .key(restored_key) + .send() + .await?; + + file_count += 1; + } + } + } + + Ok(file_count) +} + +pub async fn create_archive( + state: &Arc, + bucket: &str, + prefix: &str, + archive_key: &str, +) -> Result> { + use flate2::write::GzEncoder; + use flate2::Compression; + use std::io::Write; + + let s3_client = &state.s3_client; + + let list_response = s3_client + .list_objects_v2() + .bucket(bucket) + .prefix(prefix) + .send() + .await?; + + let mut archive_data = Vec::new(); + { + let mut encoder = GzEncoder::new(&mut archive_data, Compression::default()); + + if let Some(contents) = list_response.contents() { + for object in contents { + if let Some(key) = object.key() { + // Get object content + let get_response = s3_client + .get_object() + .bucket(bucket) + .key(key) + .send() + .await?; + + let body_bytes = get_response + .body + .collect() + .await + .map_err(|e| format!("Failed to collect body: {}", e))?; + let bytes = body_bytes.into_bytes(); + + // Write to archive with key as filename + encoder.write_all(key.as_bytes())?; + encoder.write_all(b"\n")?; + encoder.write_all(&bytes)?; + encoder.write_all(b"\n---\n")?; + } + } + } + + encoder.finish()?; + } + + let archive_size = archive_data.len() as u64; + + // Upload archive + s3_client + .put_object() + .bucket(bucket) + .key(archive_key) + .body(ByteStream::from(archive_data)) + .send() + .await?; + + Ok(archive_size) +} + +pub async fn get_bucket_metrics( + state: &Arc, + bucket: &str, +) -> Result> { + get_bucket_stats(state, bucket).await +} + /// GET /files/dirFolder - Directory listing (alias for list) pub async fn dir_folder( State(state): State>, diff --git a/src/main.rs b/src/main.rs index 98f252ee7..2c8b1cd8e 100644 --- a/src/main.rs +++ b/src/main.rs @@ -130,103 +130,62 @@ async fn run_axum_server( .allow_headers(tower_http::cors::Any) .max_age(std::time::Duration::from_secs(3600)); - // Build API routes with State - let mut api_router = Router::new() - // Session routes + // Use unified API router configuration + let mut api_router = crate::api_router::configure_api_routes(); + + // Add session-specific routes + api_router = api_router .route("/api/sessions", post(create_session)) .route("/api/sessions", get(get_sessions)) .route( "/api/sessions/{session_id}/history", get(get_session_history), ) - .route("/api/sessions/{session_id}/start", post(start_session)); - // File routes - // .route("/api/files/upload/{folder_path}", post(upload_file)) // Function doesn't exist + .route("/api/sessions/{session_id}/start", post(start_session)) + // WebSocket route + .route("/ws", get(websocket_handler)); - // Auth route + // Add feature-specific routes #[cfg(feature = "directory")] { api_router = api_router.route("/api/auth", get(auth_handler)); } - // Voice/Meet routes #[cfg(feature = "meet")] { api_router = api_router .route("/api/voice/start", post(voice_start)) .route("/api/voice/stop", post(voice_stop)) - .route("/api/meet/create", post(crate::meet::create_meeting)) - .route("/api/meet/rooms", get(crate::meet::list_rooms)) - .route("/api/meet/rooms/:room_id", get(crate::meet::get_room)) - .route( - "/api/meet/rooms/:room_id/join", - post(crate::meet::join_room), - ) - .route( - "/api/meet/rooms/:room_id/transcription/start", - post(crate::meet::start_transcription), - ) - .route("/api/meet/token", post(crate::meet::get_meeting_token)) - .route("/api/meet/invite", post(crate::meet::send_meeting_invites)) - .route("/ws/meet", get(crate::meet::meeting_websocket)); + .route("/ws/meet", get(crate::meet::meeting_websocket)) + .merge(crate::meet::configure()); } - api_router = api_router - // Media/Multimedia routes - .route( - "/api/media/upload", - post(crate::bot::multimedia::upload_media_handler), - ) - .route( - "/api/media/:media_id", - get(crate::bot::multimedia::download_media_handler), - ) - .route( - "/api/media/:media_id/thumbnail", - get(crate::bot::multimedia::generate_thumbnail_handler), - ) - .route( - "/api/media/search", - post(crate::bot::multimedia::web_search_handler), - ) - // WebSocket route - .route("/ws", get(websocket_handler)) - // Bot routes - .route("/api/bots", post(crate::bot::create_bot_handler)) - .route( - "/api/bots/{bot_id}/mount", - post(crate::bot::mount_bot_handler), - ) - .route( - "/api/bots/{bot_id}/input", - post(crate::bot::handle_user_input_handler), - ) - .route( - "/api/bots/{bot_id}/sessions", - get(crate::bot::get_user_sessions_handler), - ) - .route( - "/api/bots/{bot_id}/history", - get(crate::bot::get_conversation_history_handler), - ) - .route( - "/api/bots/{bot_id}/warning", - post(crate::bot::send_warning_handler), - ); - - // Add email routes if feature is enabled - // Merge drive, email, meet, and auth module routes - api_router = api_router.merge(crate::drive::configure()); - - #[cfg(feature = "meet")] - { - api_router = api_router.merge(crate::meet::configure()); - } - - api_router = api_router.nest("/api", crate::directory::router::configure()); - #[cfg(feature = "email")] - let api_router = api_router.merge(crate::email::configure()); + { + api_router = api_router.merge(crate::email::configure()); + } + + // Add calendar routes with CalDAV if feature is enabled + #[cfg(feature = "calendar")] + { + let calendar_engine = + Arc::new(crate::calendar::CalendarEngine::new(app_state.conn.clone())); + + // Start reminder job + let reminder_engine = Arc::clone(&calendar_engine); + tokio::spawn(async move { + crate::calendar::start_reminder_job(reminder_engine).await; + }); + + // Add CalDAV router + api_router = api_router.merge(crate::calendar::caldav::create_caldav_router( + calendar_engine, + )); + } + + // Add task engine routes + let task_engine = Arc::new(crate::tasks::TaskEngine::new(app_state.conn.clone())); + api_router = api_router.merge(crate::tasks::configure_task_routes(task_engine)); // Build static file serving let static_path = std::path::Path::new("./web/desktop"); @@ -241,8 +200,7 @@ async fn run_axum_server( .nest_service("/mail", ServeDir::new(static_path.join("mail"))) .nest_service("/tasks", ServeDir::new(static_path.join("tasks"))) // API routes - .merge(api_router) - .with_state(app_state.clone()) + .merge(api_router.with_state(app_state.clone())) // Root index route - only matches exact "/" .route("/", get(crate::ui_server::index)) // Layers @@ -554,6 +512,9 @@ async fn main() -> std::io::Result<()> { base_llm_provider }; + // Initialize Knowledge Base Manager + let kb_manager = Arc::new(botserver::core::kb::KnowledgeBaseManager::new("work")); + let app_state = Arc::new(AppState { drive: Some(drive), config: Some(cfg.clone()), @@ -575,8 +536,14 @@ async fn main() -> std::io::Result<()> { response_channels: Arc::new(tokio::sync::Mutex::new(HashMap::new())), web_adapter: web_adapter.clone(), voice_adapter: voice_adapter.clone(), + kb_manager: Some(kb_manager.clone()), }); + // Start website crawler service + if let Err(e) = botserver::core::kb::ensure_crawler_service_running(app_state.clone()).await { + log::warn!("Failed to start website crawler service: {}", e); + } + state_tx.send(app_state.clone()).await.ok(); progress_tx.send(BootstrapProgress::BootstrapComplete).ok(); diff --git a/src/tasks/mod.rs b/src/tasks/mod.rs index 618e06b6b..d86fc68a9 100644 --- a/src/tasks/mod.rs +++ b/src/tasks/mod.rs @@ -185,16 +185,13 @@ impl TaskEngine { let updated_at = Utc::now(); // Check if status is changing to Done - let completing = updates.status + let completing = updates + .status .as_ref() .map(|s| matches!(s, TaskStatus::Done)) .unwrap_or(false); - let completed_at = if completing { - Some(Utc::now()) - } else { - None - }; + let completed_at = if completing { Some(Utc::now()) } else { None }; // TODO: Implement with Diesel /* @@ -450,7 +447,10 @@ impl TaskEngine { } /// Calculate task progress (percentage) - pub async fn calculate_progress(&self, task_id: Uuid) -> Result> { + pub async fn calculate_progress( + &self, + task_id: Uuid, + ) -> Result> { let task = self.get_task(task_id).await?; if task.subtasks.is_empty() { @@ -460,7 +460,9 @@ impl TaskEngine { TaskStatus::InProgress => 50.0, TaskStatus::Review => 75.0, TaskStatus::Done => 100.0, - TaskStatus::Blocked => task.actual_hours.unwrap_or(0.0) / task.estimated_hours.unwrap_or(1.0) * 100.0, + TaskStatus::Blocked => { + task.actual_hours.unwrap_or(0.0) / task.estimated_hours.unwrap_or(1.0) * 100.0 + } TaskStatus::Cancelled => 0.0, }); } @@ -645,9 +647,9 @@ impl TaskEngine { /// HTTP API handlers pub mod handlers { use super::*; - use axum::extract::{State as AxumState, Query as AxumQuery, Path as AxumPath}; - use axum::response::{Json as AxumJson, IntoResponse}; + use axum::extract::{Path as AxumPath, Query as AxumQuery, State as AxumState}; use axum::http::StatusCode; + use axum::response::{IntoResponse, Json as AxumJson}; pub async fn create_task_handler( AxumState(_engine): AxumState, @@ -656,7 +658,6 @@ pub mod handlers { // TODO: Implement with actual engine let created = task; (StatusCode::OK, AxumJson(serde_json::json!(created))) - } pub async fn get_tasks_handler( @@ -693,7 +694,184 @@ pub mod handlers { } } +pub async fn handle_task_create( + State(engine): State>, + Json(mut task): Json, +) -> Result, StatusCode> { + task.id = Uuid::new_v4(); + task.created_at = Utc::now(); + task.updated_at = Utc::now(); + + match engine.create_task(task).await { + Ok(created) => Ok(Json(created)), + Err(_) => Err(StatusCode::INTERNAL_SERVER_ERROR), + } +} + +pub async fn handle_task_update( + State(engine): State>, + Path(id): Path, + Json(updates): Json, +) -> Result, StatusCode> { + match engine.update_task(id, updates).await { + Ok(updated) => Ok(Json(updated)), + Err(_) => Err(StatusCode::INTERNAL_SERVER_ERROR), + } +} + +pub async fn handle_task_delete( + State(engine): State>, + Path(id): Path, +) -> Result { + match engine.delete_task(id).await { + Ok(true) => Ok(StatusCode::NO_CONTENT), + Ok(false) => Err(StatusCode::NOT_FOUND), + Err(_) => Err(StatusCode::INTERNAL_SERVER_ERROR), + } +} + +pub async fn handle_task_list( + State(engine): State>, + Query(params): Query>, +) -> Result>, StatusCode> { + let tasks = if let Some(user_id) = params.get("user_id") { + engine.get_user_tasks(user_id).await + } else if let Some(status_str) = params.get("status") { + let status = match status_str.as_str() { + "todo" => TaskStatus::Todo, + "in_progress" => TaskStatus::InProgress, + "review" => TaskStatus::Review, + "done" => TaskStatus::Done, + "blocked" => TaskStatus::Blocked, + "cancelled" => TaskStatus::Cancelled, + _ => TaskStatus::Todo, + }; + engine.get_tasks_by_status(status).await + } else { + engine.get_all_tasks().await + }; + + match tasks { + Ok(task_list) => Ok(Json(task_list)), + Err(_) => Err(StatusCode::INTERNAL_SERVER_ERROR), + } +} + +pub async fn handle_task_assign( + State(engine): State>, + Path(id): Path, + Json(payload): Json, +) -> Result, StatusCode> { + let assignee = payload["assignee"] + .as_str() + .ok_or(StatusCode::BAD_REQUEST)?; + + match engine.assign_task(id, assignee.to_string()).await { + Ok(updated) => Ok(Json(updated)), + Err(_) => Err(StatusCode::INTERNAL_SERVER_ERROR), + } +} + +pub async fn handle_task_status_update( + State(engine): State>, + Path(id): Path, + Json(payload): Json, +) -> Result, StatusCode> { + let status_str = payload["status"].as_str().ok_or(StatusCode::BAD_REQUEST)?; + let status = match status_str { + "todo" => TaskStatus::Todo, + "in_progress" => TaskStatus::InProgress, + "review" => TaskStatus::Review, + "done" => TaskStatus::Done, + "blocked" => TaskStatus::Blocked, + "cancelled" => TaskStatus::Cancelled, + _ => return Err(StatusCode::BAD_REQUEST), + }; + + let updates = TaskUpdate { + title: None, + description: None, + status: Some(status), + priority: None, + assignee: None, + due_date: None, + tags: None, + }; + + match engine.update_task(id, updates).await { + Ok(updated) => Ok(Json(updated)), + Err(_) => Err(StatusCode::INTERNAL_SERVER_ERROR), + } +} + +pub async fn handle_task_priority_set( + State(engine): State>, + Path(id): Path, + Json(payload): Json, +) -> Result, StatusCode> { + let priority_str = payload["priority"] + .as_str() + .ok_or(StatusCode::BAD_REQUEST)?; + let priority = match priority_str { + "low" => TaskPriority::Low, + "medium" => TaskPriority::Medium, + "high" => TaskPriority::High, + "urgent" => TaskPriority::Urgent, + _ => return Err(StatusCode::BAD_REQUEST), + }; + + let updates = TaskUpdate { + title: None, + description: None, + status: None, + priority: Some(priority), + assignee: None, + due_date: None, + tags: None, + }; + + match engine.update_task(id, updates).await { + Ok(updated) => Ok(Json(updated)), + Err(_) => Err(StatusCode::INTERNAL_SERVER_ERROR), + } +} + +pub async fn handle_task_dependencies_set( + State(engine): State>, + Path(id): Path, + Json(payload): Json, +) -> Result, StatusCode> { + let deps = payload["dependencies"] + .as_array() + .ok_or(StatusCode::BAD_REQUEST)? + .iter() + .filter_map(|v| v.as_str().and_then(|s| Uuid::parse_str(s).ok())) + .collect::>(); + + match engine.set_dependencies(id, deps).await { + Ok(updated) => Ok(Json(updated)), + Err(_) => Err(StatusCode::INTERNAL_SERVER_ERROR), + } +} + /// Configure task engine routes +pub fn configure_task_routes(state: Arc) -> Router { + Router::new() + .route("/api/tasks", post(handle_task_create)) + .route("/api/tasks", get(handle_task_list)) + .route("/api/tasks/:id", put(handle_task_update)) + .route("/api/tasks/:id", delete(handle_task_delete)) + .route("/api/tasks/:id/assign", post(handle_task_assign)) + .route("/api/tasks/:id/status", put(handle_task_status_update)) + .route("/api/tasks/:id/priority", put(handle_task_priority_set)) + .route( + "/api/tasks/:id/dependencies", + put(handle_task_dependencies_set), + ) + .with_state(state) +} + +/// Configure task engine routes (legacy) pub fn configure(router: Router) -> Router where S: Clone + Send + Sync + 'static, @@ -704,5 +882,8 @@ where .route("/api/tasks", post(handlers::create_task_handler::)) .route("/api/tasks", get(handlers::get_tasks_handler::)) .route("/api/tasks/:id", put(handlers::update_task_handler::)) - .route("/api/tasks/statistics", get(handlers::get_statistics_handler::)) + .route( + "/api/tasks/statistics", + get(handlers::get_statistics_handler::), + ) } diff --git a/templates/api-client.gbai/api-client.gbdialog/climate.vbs b/templates/api-client.gbai/api-client.gbdialog/climate.bas similarity index 100% rename from templates/api-client.gbai/api-client.gbdialog/climate.vbs rename to templates/api-client.gbai/api-client.gbdialog/climate.bas diff --git a/templates/default.gbai/default.gbdialog/calculate.vbs b/templates/default.gbai/default.gbdialog/calculate.bas similarity index 100% rename from templates/default.gbai/default.gbdialog/calculate.vbs rename to templates/default.gbai/default.gbdialog/calculate.bas diff --git a/templates/default.gbai/default.gbdialog/send-email.vbs b/templates/default.gbai/default.gbdialog/send-email.bas similarity index 100% rename from templates/default.gbai/default.gbdialog/send-email.vbs rename to templates/default.gbai/default.gbdialog/send-email.bas diff --git a/templates/default.gbai/default.gbdialog/send-sms.vbs b/templates/default.gbai/default.gbdialog/send-sms.bas similarity index 100% rename from templates/default.gbai/default.gbdialog/send-sms.vbs rename to templates/default.gbai/default.gbdialog/send-sms.bas diff --git a/templates/default.gbai/default.gbdialog/translate.vbs b/templates/default.gbai/default.gbdialog/translate.bas similarity index 100% rename from templates/default.gbai/default.gbdialog/translate.vbs rename to templates/default.gbai/default.gbdialog/translate.bas diff --git a/templates/default.gbai/default.gbdialog/weather.vbs b/templates/default.gbai/default.gbdialog/weather.bas similarity index 100% rename from templates/default.gbai/default.gbdialog/weather.vbs rename to templates/default.gbai/default.gbdialog/weather.bas diff --git a/templates/default.gbai/default.gbot/config.csv b/templates/default.gbai/default.gbot/config.csv index 8c0d56ec5..0d4734203 100644 --- a/templates/default.gbai/default.gbot/config.csv +++ b/templates/default.gbai/default.gbot/config.csv @@ -45,3 +45,7 @@ custom-port,5432 custom-database,mycustomdb custom-username, custom-password, +, +website-expires,1d +website-max-depth,3 +website-max-pages,100 diff --git a/templates/marketing.gbai/marketing.gbdialog/get-image.vbs b/templates/marketing.gbai/marketing.gbdialog/get-image.bas similarity index 100% rename from templates/marketing.gbai/marketing.gbdialog/get-image.vbs rename to templates/marketing.gbai/marketing.gbdialog/get-image.bas diff --git a/templates/marketing.gbai/marketing.gbdialog/post-to-instagram.vbs b/templates/marketing.gbai/marketing.gbdialog/post-to-instagram.bas similarity index 100% rename from templates/marketing.gbai/marketing.gbdialog/post-to-instagram.vbs rename to templates/marketing.gbai/marketing.gbdialog/post-to-instagram.bas diff --git a/templates/public-apis.gbai/KEYWORDS_CHECKLIST.md b/templates/public-apis.gbai/KEYWORDS_CHECKLIST.md index 42bd1bb7f..b1ceec11c 100644 --- a/templates/public-apis.gbai/KEYWORDS_CHECKLIST.md +++ b/templates/public-apis.gbai/KEYWORDS_CHECKLIST.md @@ -10,7 +10,7 @@ ## 📋 Implementation Status -### ☁️ Weather APIs (7 keywords) - `weather-apis.vbs` +### ☁️ Weather APIs (7 keywords) - `weather-apis.bas` - [x] 7Timer! Astro Weather - Astronomical weather forecast - [x] 7Timer! Civil Weather - 7-day weather forecast - [x] Open-Meteo Weather - Real-time weather data @@ -20,7 +20,7 @@ - [x] AQICN Air Quality - Air quality index by city - [x] Get Weather Icon - Weather condition to emoji converter -### 🐾 Animals APIs (17 keywords) - `animals-apis.vbs` +### 🐾 Animals APIs (17 keywords) - `animals-apis.bas` - [x] Random Cat Fact - Cat facts - [x] Random Dog Fact - Dog facts - [x] Random Dog Image - Dog pictures @@ -40,7 +40,7 @@ - [x] Dog Breeds List - All dog breeds - [x] Specific Dog Breed Image - Image by breed name -### 😄 Entertainment APIs (19 keywords) - `entertainment-apis.vbs` +### 😄 Entertainment APIs (19 keywords) - `entertainment-apis.bas` - [x] Chuck Norris Joke - Random Chuck Norris joke - [x] Chuck Norris Categories - Available joke categories - [x] Chuck Norris Joke by Category - Category-specific jokes @@ -66,7 +66,7 @@ - [x] Insult Generator - Clean insults - [x] Compliment Generator - Random compliments -### 🍽️ Food & Drink APIs (13 keywords) - `food-apis.vbs` +### 🍽️ Food & Drink APIs (13 keywords) - `food-apis.bas` - [x] Random Coffee Image - Coffee images - [x] Random Food Dish - Food dish images - [x] Random Food by Category - Category-specific food @@ -84,7 +84,7 @@ - [x] High ABV Beers - High alcohol content beers - [x] Bacon Ipsum Text - Bacon-themed lorem ipsum -### 🔧 Data Utility & Geocoding APIs (19 keywords) - `data-utility-apis.vbs` +### 🔧 Data Utility & Geocoding APIs (19 keywords) - `data-utility-apis.bas` - [x] Generate UUID - Single UUID generation - [x] Generate Multiple UUIDs - Multiple UUIDs - [x] Get My IP Address - Current public IP @@ -199,11 +199,11 @@ botserver/templates/public-apis.gbai/ ├── README.md (758 lines) ├── KEYWORDS_CHECKLIST.md (this file) └── public-apis.gbdialog/ - ├── weather-apis.vbs (244 lines, 8 keywords) - ├── animals-apis.vbs (366 lines, 17 keywords) - ├── entertainment-apis.vbs (438 lines, 19 keywords) - ├── food-apis.vbs (503 lines, 13 keywords) - └── data-utility-apis.vbs (568 lines, 19 keywords) + ├── weather-apis.bas (244 lines, 8 keywords) + ├── animals-apis.bas (366 lines, 17 keywords) + ├── entertainment-apis.bas (438 lines, 19 keywords) + ├── food-apis.bas (503 lines, 13 keywords) + └── data-utility-apis.bas (568 lines, 19 keywords) ``` **Total Lines of Code**: ~2,877 lines @@ -332,4 +332,4 @@ botserver/templates/public-apis.gbai/ --- -🎉 **Ready to use! Copy the `public-apis.gbai` folder to your General Bots templates directory.** \ No newline at end of file +🎉 **Ready to use! Copy the `public-apis.gbai` folder to your General Bots templates directory.** diff --git a/templates/public-apis.gbai/QUICKSTART.md b/templates/public-apis.gbai/QUICKSTART.md index 0ab355d3f..e94a284d8 100644 --- a/templates/public-apis.gbai/QUICKSTART.md +++ b/templates/public-apis.gbai/QUICKSTART.md @@ -244,7 +244,7 @@ END IF ### Method 1: Direct Testing ```vbs -REM Create a test dialog file: test.gbdialog/test-apis.vbs +REM Create a test dialog file: test.gbdialog/test-apis.bas TALK "Testing Weather API..." weather = GET "https://api.open-meteo.com/v1/forecast?latitude=52.52&longitude=13.41¤t_weather=true" @@ -415,4 +415,4 @@ END FOR **Need help?** Check the examples in this guide or refer to the full README.md -**Have fun coding!** 🎉 \ No newline at end of file +**Have fun coding!** 🎉 diff --git a/templates/public-apis.gbai/README.md b/templates/public-apis.gbai/README.md index 980e86364..9270a1bd9 100644 --- a/templates/public-apis.gbai/README.md +++ b/templates/public-apis.gbai/README.md @@ -6,11 +6,11 @@ This package provides 50+ free API keywords for General Bots, allowing you to in This `.gbai` template includes the following BASIC keyword files: -- `weather-apis.vbs` - Weather data and forecasts -- `animals-apis.vbs` - Animal facts and images -- `entertainment-apis.vbs` - Jokes, quotes, and fun content -- `food-apis.vbs` - Food recipes and drink information -- `data-utility-apis.vbs` - Data utilities and geocoding +- `weather-apis.bas` - Weather data and forecasts +- `animals-apis.bas` - Animal facts and images +- `entertainment-apis.bas` - Jokes, quotes, and fun content +- `food-apis.bas` - Food recipes and drink information +- `data-utility-apis.bas` - Data utilities and geocoding ## 🌤️ Weather APIs @@ -735,7 +735,7 @@ END IF To add more API keywords: 1. Find a free, no-auth API from [public-apis](https://github.com/public-apis/public-apis) -2. Create a `.vbs` or `.bas` file in the appropriate category +2. Create a `.bas` or `.bas` file in the appropriate category 3. Follow the existing keyword pattern 4. Test thoroughly 5. Update this README diff --git a/templates/public-apis.gbai/public-apis.gbdialog/animals-apis.vbs b/templates/public-apis.gbai/public-apis.gbdialog/animals-apis.bas similarity index 100% rename from templates/public-apis.gbai/public-apis.gbdialog/animals-apis.vbs rename to templates/public-apis.gbai/public-apis.gbdialog/animals-apis.bas diff --git a/templates/public-apis.gbai/public-apis.gbdialog/data-utility-apis.vbs b/templates/public-apis.gbai/public-apis.gbdialog/data-utility-apis.bas similarity index 100% rename from templates/public-apis.gbai/public-apis.gbdialog/data-utility-apis.vbs rename to templates/public-apis.gbai/public-apis.gbdialog/data-utility-apis.bas diff --git a/templates/public-apis.gbai/public-apis.gbdialog/entertainment-apis.vbs b/templates/public-apis.gbai/public-apis.gbdialog/entertainment-apis.bas similarity index 100% rename from templates/public-apis.gbai/public-apis.gbdialog/entertainment-apis.vbs rename to templates/public-apis.gbai/public-apis.gbdialog/entertainment-apis.bas diff --git a/templates/public-apis.gbai/public-apis.gbdialog/food-apis.vbs b/templates/public-apis.gbai/public-apis.gbdialog/food-apis.bas similarity index 100% rename from templates/public-apis.gbai/public-apis.gbdialog/food-apis.vbs rename to templates/public-apis.gbai/public-apis.gbdialog/food-apis.bas diff --git a/templates/public-apis.gbai/public-apis.gbdialog/science-space-apis.vbs b/templates/public-apis.gbai/public-apis.gbdialog/science-space-apis.bas similarity index 100% rename from templates/public-apis.gbai/public-apis.gbdialog/science-space-apis.vbs rename to templates/public-apis.gbai/public-apis.gbdialog/science-space-apis.bas diff --git a/templates/public-apis.gbai/public-apis.gbdialog/weather-apis.vbs b/templates/public-apis.gbai/public-apis.gbdialog/weather-apis.bas similarity index 100% rename from templates/public-apis.gbai/public-apis.gbdialog/weather-apis.vbs rename to templates/public-apis.gbai/public-apis.gbdialog/weather-apis.bas