diff --git a/package.json b/package.json index 989d1f02..5dd32375 100644 --- a/package.json +++ b/package.json @@ -86,7 +86,9 @@ "@google-cloud/translate": "8.5.0", "@hubspot/api-client": "11.2.0", "@koa/cors": "5.0.0", + "@langchain/anthropic": "^0.3.7", "@langchain/community": "0.2.31", + "@langchain/core": "^0.3.17", "@langchain/openai": "0.2.8", "@microsoft/microsoft-graph-client": "3.0.7", "@nlpjs/basic": "4.27.0", diff --git a/packages/core.gbapp/services/GBConversationalService.ts b/packages/core.gbapp/services/GBConversationalService.ts index 50a3d64f..cfa776a0 100644 --- a/packages/core.gbapp/services/GBConversationalService.ts +++ b/packages/core.gbapp/services/GBConversationalService.ts @@ -53,7 +53,6 @@ import fs from 'fs/promises'; import twilio from 'twilio'; import Nexmo from 'nexmo'; import { join } from 'path'; -import path from 'path'; import shell from 'any-shell-escape'; import { exec } from 'child_process'; import prism from 'prism-media'; @@ -66,7 +65,6 @@ import * as marked from 'marked'; import Translate from '@google-cloud/translate'; import { GBUtil } from '../../../src/util.js'; import { GBLogEx } from './GBLogEx.js'; -import { DialogKeywords } from '../../basic.gblib/services/DialogKeywords.js'; /** * Provides basic services for handling messages and dispatching to back-end diff --git a/packages/core.gbapp/services/GBDeployer.ts b/packages/core.gbapp/services/GBDeployer.ts index d3092a9f..e67e89d4 100644 --- a/packages/core.gbapp/services/GBDeployer.ts +++ b/packages/core.gbapp/services/GBDeployer.ts @@ -353,6 +353,7 @@ export class GBDeployer implements IGBDeployer { try { vectorStore = await HNSWLib.load(min['vectorStorePath'], embedding); } catch { + GBLogEx.info(min, 'Creating new store...'); vectorStore = new HNSWLib(embedding, { space: 'cosine' }); diff --git a/packages/kb.gbapp/services/KBService.ts b/packages/kb.gbapp/services/KBService.ts index 06cb124a..85a6ab2c 100644 --- a/packages/kb.gbapp/services/KBService.ts +++ b/packages/kb.gbapp/services/KBService.ts @@ -47,6 +47,7 @@ import { CSVLoader } from '@langchain/community/document_loaders/fs/csv'; import { DocxLoader } from '@langchain/community/document_loaders/fs/docx'; import { EPubLoader } from '@langchain/community/document_loaders/fs/epub'; import { PDFLoader } from '@langchain/community/document_loaders/fs/pdf'; +import { rimraf } from 'rimraf'; import getColors from 'get-image-colors'; import { Document } from 'langchain/document'; @@ -651,8 +652,8 @@ export class KBService implements IGBKBService { await min.conversationalService['playMarkdown'](min, answer, channel, step, GBMinService.userMobile(step)); } else if (answer.endsWith('.ogg') && process.env.AUDIO_DISABLED !== 'true') { await this.playAudio(min, answer, channel, step, min.conversationalService); - } else if(answer.startsWith('![')){ - + } else if (answer.startsWith('![')) { + const url = answer.match(/\((.*?)\)/)[1]; await this.showImage(min, min.conversationalService, step, url, channel) } else { @@ -885,7 +886,7 @@ export class KBService implements IGBKBService { ): Promise { try { if ( - depth > maxDepth || + (depth > maxDepth && !url.endsWith('pdf')) || visited.has(url) || url.endsWith('.jpg') || url.endsWith('.png') || @@ -1029,6 +1030,14 @@ export class KBService implements IGBKBService { const websiteIgnoreUrls = min.core.getParam<[]>(min.instance, 'Website Ignore URLs', null); GBLogEx.info(min, `Website: ${website}, Max Depth: ${maxDepth}, Ignore URLs: ${websiteIgnoreUrls}`); + let vectorStore = min['vectorStore']; + if (vectorStore) { + rimraf.sync(min['vectorStorePath']) + + vectorStore = await min.deployService['loadOrCreateEmptyVectorStore'](min); + min['vectorStore'] = vectorStore; + } + if (website) { // Removes last slash if any. @@ -1099,19 +1108,20 @@ export class KBService implements IGBKBService { GBLogEx.info(min, `Vectorizing ${files.length} file(s)...`); + await CollectionUtil.asyncForEach(files, async file => { let content = null; try { const document = await this.loadAndSplitFile(file); const flattenedDocuments = document.reduce((acc, val) => acc.concat(val), []); - const vectorStore = min['vectorStore']; await vectorStore.addDocuments(flattenedDocuments); - await vectorStore.save(min['vectorStorePath']); } catch (error) { GBLogEx.info(min, `Ignore processing of ${file}. ${GBUtil.toYAML(error)}`); } }); + + } files = await walkPromise(urlJoin(localPath, 'docs')); @@ -1123,13 +1133,16 @@ export class KBService implements IGBKBService { const document = await this.loadAndSplitFile(filePath); const flattenedDocuments = document.reduce((acc, val) => acc.concat(val), []); - const vectorStore = min['vectorStore']; await vectorStore.addDocuments(flattenedDocuments); - await vectorStore.save(min['vectorStorePath']); }); } + await vectorStore.save(min['vectorStorePath']); + min['vectorStore'] = vectorStore; + } + + defaultRecursiveCharacterTextSplitter = new RecursiveCharacterTextSplitter({ chunkSize: 700, chunkOverlap: 50 @@ -1496,7 +1509,11 @@ export class KBService implements IGBKBService { return filePath; // Return the saved file path } else { - await page.goto(url, { waitUntil: 'networkidle2' }); + await page.goto(url, { + waitUntil: 'networkidle2', + timeout: 60000 // Timeout after 1 minute (60,000 ms) + }); + const parsedUrl = new URL(url); diff --git a/packages/llm.gblib/services/ChatServices.ts b/packages/llm.gblib/services/ChatServices.ts index 4fc69ee6..7fb62314 100644 --- a/packages/llm.gblib/services/ChatServices.ts +++ b/packages/llm.gblib/services/ChatServices.ts @@ -29,6 +29,7 @@ \*****************************************************************************/ 'use strict'; +import { ChatAnthropic } from "@langchain/anthropic"; import { PromptTemplate } from '@langchain/core/prompts'; import { WikipediaQueryRun } from '@langchain/community/tools/wikipedia_query_run'; import { HNSWLib } from '@langchain/community/vectorstores/hnswlib'; @@ -244,6 +245,16 @@ export class ChatServices { public static async invokeLLM(min: GBMinInstance, text: string) { let model; + model = await ChatServices.getModel(min); + + return await model.invoke(text); + } + + public static memoryMap = {}; + public static userSystemPrompt = {}; + public static usersMode = {}; + + private static async getModel(min: GBMinInstance) { const azureOpenAIKey = await (min.core as any)['getParam'](min.instance, 'Azure Open AI Key', null, true); const azureOpenAILLMModel = await (min.core as any)['getParam']( min.instance, @@ -259,23 +270,33 @@ export class ChatServices { true ); - model = new ChatOpenAI({ - azureOpenAIApiKey: azureOpenAIKey, - azureOpenAIApiInstanceName: azureOpenAIApiInstanceName, - - azureOpenAIApiDeploymentName: azureOpenAILLMModel, - azureOpenAIApiVersion: azureOpenAIVersion, - temperature: 0, - callbacks: [logHandler] - }); - - return await model.invoke(text); + const provider = await (min.core as any)['getParam']( + min.instance, + 'LLM Provider', + null, + 'openai' + ); + let model; + if (provider === 'claude') { + model = new ChatAnthropic({ + model: "claude-3-haiku-20240307", + temperature: 0, + maxTokens: undefined, + maxRetries: 2, + }); + } else { + model = new ChatOpenAI({ + azureOpenAIApiKey: azureOpenAIKey, + azureOpenAIApiInstanceName: azureOpenAIApiInstanceName, + azureOpenAIApiDeploymentName: azureOpenAILLMModel, + azureOpenAIApiVersion: azureOpenAIVersion, + temperature: 0, + callbacks: [logHandler] + }); + } + return model; } - public static memoryMap = {}; - public static userSystemPrompt = {}; - public static usersMode = {}; - public static async answerByLLM(pid: number, min: GBMinInstance, user, question: string, mode = null) { const answerMode = this.usersMode[user.userSystemId] ? this.usersMode[user.userSystemId] @@ -308,32 +329,7 @@ export class ChatServices { const systemPrompt = securityPrompt + (user ? this.userSystemPrompt[user.userSystemId] : ''); - let model; - - const azureOpenAIKey = await (min.core as any)['getParam'](min.instance, 'Azure Open AI Key', null, true); - const azureOpenAILLMModel = await (min.core as any)['getParam']( - min.instance, - 'Azure Open AI LLM Model', - null, - true - ); - const azureOpenAIVersion = await (min.core as any)['getParam'](min.instance, 'Azure Open AI Version', null, true); - const azureOpenAIApiInstanceName = await (min.core as any)['getParam']( - min.instance, - 'Azure Open AI Instance', - null, - true - ); - - model = new ChatOpenAI({ - azureOpenAIApiKey: azureOpenAIKey, - azureOpenAIApiInstanceName: azureOpenAIApiInstanceName, - - azureOpenAIApiDeploymentName: azureOpenAILLMModel, - azureOpenAIApiVersion: azureOpenAIVersion, - temperature: 0, - callbacks: [logHandler] - }); + let model = await ChatServices.getModel(min); let tools = await ChatServices.getTools(min); let toolsAsText = ChatServices.getToolsAsText(tools); diff --git a/templates/crawler.gbai/crawler.gbot/config.csv b/templates/crawler.gbai/crawler.gbot/config.csv index 7fd0bc96..e84fa5b2 100644 --- a/templates/crawler.gbai/crawler.gbot/config.csv +++ b/templates/crawler.gbai/crawler.gbot/config.csv @@ -1,4 +1,5 @@ name,value -Website,https://www.dgti.uerj.br/ +Website,https://www.oabgo.org.br/ Answer Mode,document -Theme Color,purple \ No newline at end of file +Theme Color,purple +LLM Provider,claude \ No newline at end of file