new(all): Website indexing.
This commit is contained in:
parent
c620473fbf
commit
d73bf50a2d
5 changed files with 499 additions and 411 deletions
|
@ -133,8 +133,10 @@
|
|||
"express": "4.18.2",
|
||||
"express-remove-route": "1.0.0",
|
||||
"ffmpeg-static": "5.1.0",
|
||||
"get-image-colors": "^4.0.1",
|
||||
"google-libphonenumber": "3.2.31",
|
||||
"googleapis": "126.0.1",
|
||||
"hnswlib-node": "^1.4.2",
|
||||
"http-proxy": "1.18.1",
|
||||
"ibm-watson": "7.1.2",
|
||||
"iso-639-1": "3.1.1",
|
||||
|
|
|
@ -2055,7 +2055,9 @@ export class SystemKeywords {
|
|||
return (orientation || 0) >= 5 ? [height, width] : [width, height];
|
||||
};
|
||||
|
||||
const size = getNormalSize(await sharp(buf).metadata());
|
||||
const metadata = await sharp(buf).metadata();
|
||||
const size = getNormalSize({width:metadata['width'],
|
||||
height:metadata['height'], orientation: metadata['orientation'] });
|
||||
url = urlJoin(GBServer.globals.publicAddress, min.botId, 'cache', Path.basename(imageName));
|
||||
images[index++] = { url: url, size: size, buf: buf };
|
||||
}
|
||||
|
|
|
@ -169,36 +169,20 @@ export class GBMinService {
|
|||
let i = 1;
|
||||
|
||||
if (instances.length > 1) {
|
||||
this.bar1 = new cliProgress.SingleBar(
|
||||
{
|
||||
format: '[{bar}] ({value}/{total}) Loading {botId} ...',
|
||||
barsize: 40,
|
||||
forceRedraw: true
|
||||
},
|
||||
cliProgress.Presets.rect
|
||||
);
|
||||
this.bar1.start(instances.length, i, { botId: 'Boot' });
|
||||
}
|
||||
|
||||
await CollectionUtil.asyncForEach(
|
||||
instances,
|
||||
(async instance => {
|
||||
try {
|
||||
GBLog.info(`Mounting ${instance.botId}...`)
|
||||
await this['mountBot'](instance);
|
||||
} catch (error) {
|
||||
GBLog.error(`Error mounting bot ${instance.botId}: ${error.message}\n${error.stack}`);
|
||||
} finally {
|
||||
if (this.bar1) {
|
||||
this.bar1.update(i++, { botId: instance.botId });
|
||||
}
|
||||
}
|
||||
}).bind(this)
|
||||
);
|
||||
|
||||
if (this.bar1) {
|
||||
this.bar1.stop();
|
||||
}
|
||||
|
||||
// Loads API.
|
||||
|
||||
await this.ensureAPI();
|
||||
|
|
|
@ -31,14 +31,19 @@
|
|||
'use strict';
|
||||
|
||||
import { HNSWLib } from '@langchain/community/vectorstores/hnswlib';
|
||||
import { StringOutputParser } from "@langchain/core/output_parsers";
|
||||
import { AIMessagePromptTemplate, ChatPromptTemplate, HumanMessagePromptTemplate, MessagesPlaceholder } from '@langchain/core/prompts';
|
||||
import { RunnableSequence } from "@langchain/core/runnables";
|
||||
import { convertToOpenAITool } from "@langchain/core/utils/function_calling";
|
||||
import { ChatOpenAI } from "@langchain/openai";
|
||||
import { StringOutputParser } from '@langchain/core/output_parsers';
|
||||
import {
|
||||
AIMessagePromptTemplate,
|
||||
ChatPromptTemplate,
|
||||
HumanMessagePromptTemplate,
|
||||
MessagesPlaceholder
|
||||
} from '@langchain/core/prompts';
|
||||
import { RunnableSequence } from '@langchain/core/runnables';
|
||||
import { convertToOpenAITool } from '@langchain/core/utils/function_calling';
|
||||
import { ChatOpenAI } from '@langchain/openai';
|
||||
import { GBLog, GBMinInstance } from 'botlib';
|
||||
import * as Fs from 'fs';
|
||||
import { jsonSchemaToZod } from "json-schema-to-zod";
|
||||
import { jsonSchemaToZod } from 'json-schema-to-zod';
|
||||
import { BufferWindowMemory } from 'langchain/memory';
|
||||
import Path from 'path';
|
||||
import { CollectionUtil } from 'pragmatismo-io-framework';
|
||||
|
@ -46,35 +51,28 @@ import { DialogKeywords } from '../../basic.gblib/services/DialogKeywords.js';
|
|||
import { GBVMService } from '../../basic.gblib/services/GBVMService.js';
|
||||
import { GBConfigService } from '../../core.gbapp/services/GBConfigService.js';
|
||||
import { GuaribasSubject } from '../../kb.gbapp/models/index.js';
|
||||
import { Serialized } from "@langchain/core/load/serializable";
|
||||
import { BaseCallbackHandler } from "@langchain/core/callbacks/base";
|
||||
import { Serialized } from '@langchain/core/load/serializable';
|
||||
import { BaseCallbackHandler } from '@langchain/core/callbacks/base';
|
||||
import { pdfToPng, PngPageOutput } from 'pdf-to-png-converter';
|
||||
import { DynamicStructuredTool } from "@langchain/core/tools";
|
||||
import { WikipediaQueryRun } from "@langchain/community/tools/wikipedia_query_run";
|
||||
import {
|
||||
BaseLLMOutputParser,
|
||||
OutputParserException,
|
||||
} from "@langchain/core/output_parsers";
|
||||
import { ChatGeneration, Generation } from "@langchain/core/outputs";
|
||||
import { DynamicStructuredTool } from '@langchain/core/tools';
|
||||
import { WikipediaQueryRun } from '@langchain/community/tools/wikipedia_query_run';
|
||||
import { BaseLLMOutputParser, OutputParserException } from '@langchain/core/output_parsers';
|
||||
import { ChatGeneration, Generation } from '@langchain/core/outputs';
|
||||
import { GBAdminService } from '../../admin.gbapp/services/GBAdminService.js';
|
||||
import { GBServer } from '../../../src/app.js';
|
||||
import urlJoin from 'url-join';
|
||||
import { getDocument } from "pdfjs-dist/legacy/build/pdf.mjs";
|
||||
import { getDocument } from 'pdfjs-dist/legacy/build/pdf.mjs';
|
||||
import { GBLogEx } from '../../core.gbapp/services/GBLogEx.js';
|
||||
|
||||
|
||||
export interface CustomOutputParserFields { }
|
||||
export interface CustomOutputParserFields {}
|
||||
export type ExpectedOutput = any;
|
||||
|
||||
function isChatGeneration(
|
||||
llmOutput: ChatGeneration | Generation
|
||||
): llmOutput is ChatGeneration {
|
||||
return "message" in llmOutput;
|
||||
function isChatGeneration(llmOutput: ChatGeneration | Generation): llmOutput is ChatGeneration {
|
||||
return 'message' in llmOutput;
|
||||
}
|
||||
|
||||
class CustomHandler extends BaseCallbackHandler {
|
||||
name = "custom_handler";
|
||||
|
||||
name = 'custom_handler';
|
||||
|
||||
handleLLMNewToken(token: string) {
|
||||
GBLogEx.info(0, `LLM: token: ${JSON.stringify(token)}`);
|
||||
|
@ -95,11 +93,10 @@ class CustomHandler extends BaseCallbackHandler {
|
|||
|
||||
const logHandler = new CustomHandler();
|
||||
|
||||
export class GBLLMOutputParser extends
|
||||
BaseLLMOutputParser<ExpectedOutput> {
|
||||
lc_namespace = ["langchain", "output_parsers"];
|
||||
export class GBLLMOutputParser extends BaseLLMOutputParser<ExpectedOutput> {
|
||||
lc_namespace = ['langchain', 'output_parsers'];
|
||||
|
||||
private toolChain: RunnableSequence
|
||||
private toolChain: RunnableSequence;
|
||||
private min;
|
||||
|
||||
constructor(min, toolChain: RunnableSequence, documentChain: RunnableSequence) {
|
||||
|
@ -108,14 +105,9 @@ export class GBLLMOutputParser extends
|
|||
this.toolChain = toolChain;
|
||||
}
|
||||
|
||||
async parseResult(
|
||||
llmOutputs: ChatGeneration[] | Generation[]
|
||||
): Promise<ExpectedOutput> {
|
||||
|
||||
async parseResult(llmOutputs: ChatGeneration[] | Generation[]): Promise<ExpectedOutput> {
|
||||
if (!llmOutputs.length) {
|
||||
throw new OutputParserException(
|
||||
"Output parser did not receive any generations."
|
||||
);
|
||||
throw new OutputParserException('Output parser did not receive any generations.');
|
||||
}
|
||||
let result;
|
||||
|
||||
|
@ -140,10 +132,9 @@ export class GBLLMOutputParser extends
|
|||
|
||||
let { sources, text } = res;
|
||||
|
||||
await CollectionUtil.asyncForEach(sources, async (source) => {
|
||||
await CollectionUtil.asyncForEach(sources, async source => {
|
||||
let found = false;
|
||||
if (source) {
|
||||
|
||||
if (source && source.file.endsWith('.pdf')) {
|
||||
const gbaiName = DialogKeywords.getGBAIPath(this.min.botId, 'gbkb');
|
||||
const localName = Path.join(process.env.PWD, 'work', gbaiName, 'docs', source.file);
|
||||
|
||||
|
@ -166,9 +157,7 @@ export class GBLLMOutputParser extends
|
|||
}
|
||||
|
||||
export class ChatServices {
|
||||
|
||||
public static async pdfPageAsImage(min, filename, pageNumber) {
|
||||
|
||||
// Converts the PDF to PNG.
|
||||
|
||||
GBLogEx.info(min, `Converting ${filename}, page: ${pageNumber}...`);
|
||||
|
@ -181,7 +170,6 @@ export class ChatServices {
|
|||
verbosityLevel: 0
|
||||
});
|
||||
|
||||
|
||||
// Prepare an image on cache and return the GBFILE information.
|
||||
|
||||
if (pngPages.length > 0) {
|
||||
|
@ -199,7 +187,6 @@ export class ChatServices {
|
|||
sanitizedQuestion: string,
|
||||
numDocuments: number = 100
|
||||
): Promise<string> {
|
||||
|
||||
if (sanitizedQuestion === '') {
|
||||
return '';
|
||||
}
|
||||
|
@ -219,10 +206,12 @@ export class ChatServices {
|
|||
const doc = uniqueDocuments[filePaths];
|
||||
const metadata = doc.metadata;
|
||||
const filename = Path.basename(metadata.source);
|
||||
const page = await ChatServices.findPageForText(metadata.source,
|
||||
doc.pageContent);
|
||||
let page = 0;
|
||||
if (metadata.source.endsWith('.pdf')) {
|
||||
page = await ChatServices.findPageForText(metadata.source, doc.pageContent);
|
||||
}
|
||||
|
||||
output = `${output}\n\n\n\nUse also the following context which is coming from Source Document: ${filename} at page: ${page}
|
||||
output = `${output}\n\n\n\nUse also the following context which is coming from Source Document: ${filename} at page: ${page?page:'entire document'}
|
||||
(you will fill the JSON sources collection field later),
|
||||
memorize this block among document information and return when you are refering this part of content:\n\n\n\n ${doc.pageContent} \n\n\n\n.`;
|
||||
}
|
||||
|
@ -233,12 +222,15 @@ export class ChatServices {
|
|||
const data = new Uint8Array(Fs.readFileSync(pdfPath));
|
||||
const pdf = await getDocument({ data }).promise;
|
||||
|
||||
searchText = searchText.replace(/\s/g, '')
|
||||
searchText = searchText.replace(/\s/g, '');
|
||||
|
||||
for (let i = 1; i <= pdf.numPages; i++) {
|
||||
const page = await pdf.getPage(i);
|
||||
const textContent = await page.getTextContent();
|
||||
const text = textContent.items.map(item => item['str']).join('').replace(/\s/g, '');
|
||||
const text = textContent.items
|
||||
.map(item => item['str'])
|
||||
.join('')
|
||||
.replace(/\s/g, '');
|
||||
|
||||
if (text.includes(searchText)) return i;
|
||||
}
|
||||
|
@ -247,34 +239,31 @@ export class ChatServices {
|
|||
}
|
||||
|
||||
/**
|
||||
* Generate text
|
||||
*
|
||||
* CONTINUE keword.
|
||||
*
|
||||
* result = CONTINUE text
|
||||
*
|
||||
*/
|
||||
public static async continue(min: GBMinInstance, question: string, chatId) {
|
||||
|
||||
}
|
||||
* Generate text
|
||||
*
|
||||
* CONTINUE keword.
|
||||
*
|
||||
* result = CONTINUE text
|
||||
*
|
||||
*/
|
||||
public static async continue(min: GBMinInstance, question: string, chatId) {}
|
||||
|
||||
private static memoryMap = {};
|
||||
public static userSystemPrompt = {};
|
||||
|
||||
public static async answerByGPT(min: GBMinInstance, user, pid,
|
||||
public static async answerByGPT(
|
||||
min: GBMinInstance,
|
||||
user,
|
||||
pid,
|
||||
question: string,
|
||||
searchScore: number,
|
||||
subjects: GuaribasSubject[]
|
||||
) {
|
||||
|
||||
if (!process.env.OPENAI_API_KEY) {
|
||||
return { answer: undefined, questionId: 0 };
|
||||
}
|
||||
|
||||
const LLMMode = min.core.getParam(
|
||||
min.instance,
|
||||
'Answer Mode', 'direct'
|
||||
);
|
||||
const LLMMode = min.core.getParam(min.instance, 'Answer Mode', 'direct');
|
||||
|
||||
const docsContext = min['vectorStore'];
|
||||
|
||||
|
@ -283,20 +272,19 @@ export class ChatServices {
|
|||
returnMessages: true,
|
||||
memoryKey: 'chat_history',
|
||||
inputKey: 'input',
|
||||
k: 2,
|
||||
})
|
||||
k: 2
|
||||
});
|
||||
}
|
||||
const memory = this.memoryMap[user.userSystemId];
|
||||
const systemPrompt = this.userSystemPrompt[user.userSystemId];
|
||||
|
||||
const model = new ChatOpenAI({
|
||||
openAIApiKey: process.env.OPENAI_API_KEY,
|
||||
modelName: "gpt-3.5-turbo-0125",
|
||||
modelName: 'gpt-3.5-turbo-0125',
|
||||
temperature: 0,
|
||||
callbacks: [logHandler],
|
||||
callbacks: [logHandler]
|
||||
});
|
||||
|
||||
|
||||
let tools = await ChatServices.getTools(min);
|
||||
let toolsAsText = ChatServices.getToolsAsText(tools);
|
||||
|
||||
|
@ -316,9 +304,9 @@ export class ChatServices {
|
|||
Do not use any previous tools output in the chat_history.
|
||||
`
|
||||
),
|
||||
new MessagesPlaceholder("chat_history"),
|
||||
new MessagesPlaceholder('chat_history'),
|
||||
AIMessagePromptTemplate.fromTemplate(`Follow Up Input: {question}
|
||||
Standalone question:`),
|
||||
Standalone question:`)
|
||||
]);
|
||||
|
||||
const toolsResultPrompt = ChatPromptTemplate.fromMessages([
|
||||
|
@ -327,9 +315,9 @@ export class ChatServices {
|
|||
rephrase the answer to the user using this tool output.
|
||||
`
|
||||
),
|
||||
new MessagesPlaceholder("chat_history"),
|
||||
new MessagesPlaceholder('chat_history'),
|
||||
AIMessagePromptTemplate.fromTemplate(`Tool output: {tool_output}
|
||||
Standalone question:`),
|
||||
Standalone question:`)
|
||||
]);
|
||||
|
||||
const combineDocumentsPrompt = ChatPromptTemplate.fromMessages([
|
||||
|
@ -355,14 +343,13 @@ export class ChatServices {
|
|||
Double check if the output is a valid JSON with brackets. all fields are required: text, file, page.
|
||||
`
|
||||
),
|
||||
new MessagesPlaceholder("chat_history"),
|
||||
HumanMessagePromptTemplate.fromTemplate("Question: {question}"),
|
||||
new MessagesPlaceholder('chat_history'),
|
||||
HumanMessagePromptTemplate.fromTemplate('Question: {question}')
|
||||
]);
|
||||
|
||||
const callToolChain = RunnableSequence.from([
|
||||
{
|
||||
tool_output: async (output: object) => {
|
||||
|
||||
const name = output['func'][0].function.name;
|
||||
const args = JSON.parse(output['func'][0].function.arguments);
|
||||
GBLogEx.info(min, `Running .gbdialog '${name}' as GPT tool...`);
|
||||
|
@ -373,8 +360,7 @@ export class ChatServices {
|
|||
chat_history: async () => {
|
||||
const { chat_history } = await memory.loadMemoryVariables({});
|
||||
return chat_history;
|
||||
},
|
||||
|
||||
}
|
||||
},
|
||||
toolsResultPrompt,
|
||||
model,
|
||||
|
@ -391,8 +377,7 @@ export class ChatServices {
|
|||
context: async (output: string) => {
|
||||
const c = await ChatServices.getRelevantContext(docsContext, output);
|
||||
return `${systemPrompt} \n ${c ? 'Use this context to answer:\n' + c : 'answer just with user question.'}`;
|
||||
|
||||
},
|
||||
}
|
||||
},
|
||||
combineDocumentsPrompt,
|
||||
model,
|
||||
|
@ -405,7 +390,7 @@ export class ChatServices {
|
|||
chat_history: async () => {
|
||||
const { chat_history } = await memory.loadMemoryVariables({});
|
||||
return chat_history;
|
||||
},
|
||||
}
|
||||
},
|
||||
questionGeneratorTemplate,
|
||||
modelWithTools,
|
||||
|
@ -416,45 +401,36 @@ export class ChatServices {
|
|||
let result, sources;
|
||||
let text, file, page;
|
||||
|
||||
|
||||
// Choose the operation mode of answer generation, based on
|
||||
// Choose the operation mode of answer generation, based on
|
||||
// .gbot switch LLMMode and choose the corresponding chain.
|
||||
|
||||
if (LLMMode === "direct") {
|
||||
if (LLMMode === 'direct') {
|
||||
result = await (tools.length > 0 ? modelWithTools : model).invoke(`
|
||||
${systemPrompt}
|
||||
|
||||
${question}`);
|
||||
|
||||
result = result.content;
|
||||
}
|
||||
else if (LLMMode === "document") {
|
||||
|
||||
} else if (LLMMode === 'document') {
|
||||
const res = await combineDocumentsChain.invoke(question);
|
||||
result = res.text;
|
||||
sources = res.sources;
|
||||
|
||||
} else if (LLMMode === "function") {
|
||||
|
||||
} else if (LLMMode === 'function') {
|
||||
result = await conversationalToolChain.invoke({
|
||||
question,
|
||||
question
|
||||
});
|
||||
}
|
||||
else if (LLMMode === "full") {
|
||||
|
||||
} else if (LLMMode === 'full') {
|
||||
throw new Error('Not implemented.'); // TODO: #407.
|
||||
}
|
||||
|
||||
else {
|
||||
} else {
|
||||
GBLogEx.info(min, `Invalid Answer Mode in Config.xlsx: ${LLMMode}.`);
|
||||
}
|
||||
|
||||
await memory.saveContext(
|
||||
{
|
||||
input: question,
|
||||
input: question
|
||||
},
|
||||
{
|
||||
output: result.replace(/\!\[.*\)/gi, '') // Removes .MD url beforing adding to history.
|
||||
output: result?result.replace(/\!\[.*\)/gi, ''): 'no answer' // Removes .MD url beforing adding to history.
|
||||
}
|
||||
);
|
||||
|
||||
|
@ -464,40 +440,34 @@ export class ChatServices {
|
|||
|
||||
private static getToolsAsText(tools) {
|
||||
return Object.keys(tools)
|
||||
.map((toolname) => `- ${tools[toolname].name}: ${tools[toolname].description}`)
|
||||
.join("\n");
|
||||
.map(toolname => `- ${tools[toolname].name}: ${tools[toolname].description}`)
|
||||
.join('\n');
|
||||
}
|
||||
|
||||
private static async getTools(min: GBMinInstance) {
|
||||
let functions = [];
|
||||
|
||||
// Adds .gbdialog as functions if any to GPT Functions.
|
||||
await CollectionUtil.asyncForEach(Object.keys(min.scriptMap), async (script) => {
|
||||
|
||||
|
||||
const path = DialogKeywords.getGBAIPath(min.botId, "gbdialog", null);
|
||||
await CollectionUtil.asyncForEach(Object.keys(min.scriptMap), async script => {
|
||||
const path = DialogKeywords.getGBAIPath(min.botId, 'gbdialog', null);
|
||||
const jsonFile = Path.join('work', path, `${script}.json`);
|
||||
|
||||
if (Fs.existsSync(jsonFile) && script.toLowerCase() !== 'start.vbs') {
|
||||
|
||||
const funcJSON = JSON.parse(Fs.readFileSync(jsonFile, 'utf8'));
|
||||
const funcObj = funcJSON?.function;
|
||||
|
||||
if (funcObj) {
|
||||
|
||||
// TODO: Use ajv.
|
||||
funcObj.schema = eval(jsonSchemaToZod(funcObj.parameters));
|
||||
functions.push(new DynamicStructuredTool(funcObj));
|
||||
}
|
||||
}
|
||||
|
||||
});
|
||||
|
||||
if (process.env.WIKIPEDIA_TOOL) {
|
||||
|
||||
const tool = new WikipediaQueryRun({
|
||||
topKResults: 3,
|
||||
maxDocContentLength: 4000,
|
||||
maxDocContentLength: 4000
|
||||
});
|
||||
functions.push(tool);
|
||||
}
|
||||
|
|
|
@ -48,8 +48,11 @@ import { DocxLoader } from 'langchain/document_loaders/fs/docx';
|
|||
import { EPubLoader } from 'langchain/document_loaders/fs/epub';
|
||||
import { CSVLoader } from 'langchain/document_loaders/fs/csv';
|
||||
import path from 'path';
|
||||
import puppeteer, { Page } from 'puppeteer';
|
||||
import { RecursiveCharacterTextSplitter } from 'langchain/text_splitter';
|
||||
import { Document } from 'langchain/document';
|
||||
import getColors from 'get-image-colors';
|
||||
|
||||
|
||||
import {
|
||||
GBDialogStep,
|
||||
|
@ -81,7 +84,6 @@ import { DialogKeywords } from '../../basic.gblib/services/DialogKeywords.js';
|
|||
import { GBMinService } from '../../core.gbapp/services/GBMinService.js';
|
||||
import { ChatServices } from '../../gpt.gblib/services/ChatServices.js';
|
||||
|
||||
|
||||
/**
|
||||
* Result for quey on KB data.
|
||||
*/
|
||||
|
@ -271,7 +273,7 @@ export class KBService implements IGBKBService {
|
|||
min: GBMinInstance,
|
||||
user,
|
||||
step,
|
||||
pid,
|
||||
pid,
|
||||
query: string,
|
||||
searchScore: number,
|
||||
subjects: GuaribasSubject[]
|
||||
|
@ -293,13 +295,9 @@ export class KBService implements IGBKBService {
|
|||
'Default Content Language',
|
||||
GBConfigService.get('DEFAULT_CONTENT_LANGUAGE')
|
||||
);
|
||||
|
||||
query = await min.conversationalService.translate(
|
||||
min,
|
||||
query,
|
||||
contentLocale
|
||||
);
|
||||
|
||||
|
||||
query = await min.conversationalService.translate(min, query, contentLocale);
|
||||
|
||||
GBLogEx.info(min, `Translated query (prompt): ${query}.`);
|
||||
|
||||
// Try simple search first.
|
||||
|
@ -317,10 +315,8 @@ export class KBService implements IGBKBService {
|
|||
}
|
||||
}
|
||||
let returnedScore = 0;
|
||||
const key = instance.searchKey ? instance.searchKey :
|
||||
GBServer.globals.minBoot.instance.searchKey;
|
||||
const host = instance.searchHost ? instance.searchHost :
|
||||
GBServer.globals.minBoot.instance.searchHost;
|
||||
const key = instance.searchKey ? instance.searchKey : GBServer.globals.minBoot.instance.searchKey;
|
||||
const host = instance.searchHost ? instance.searchHost : GBServer.globals.minBoot.instance.searchHost;
|
||||
|
||||
// No direct match found, so Search is used.
|
||||
|
||||
|
@ -348,8 +344,6 @@ export class KBService implements IGBKBService {
|
|||
top: 1
|
||||
});
|
||||
|
||||
|
||||
|
||||
// Searches via Search (Azure Search).
|
||||
|
||||
let found = false;
|
||||
|
@ -359,11 +353,15 @@ export class KBService implements IGBKBService {
|
|||
if (returnedScore >= searchScore) {
|
||||
const value = await this.getAnswerById(instance.instanceId, result.document.answerId);
|
||||
if (value !== null) {
|
||||
GBLogEx.info(min, `SEARCH WILL BE USED with score: ${returnedScore} > required (searchScore): ${searchScore}`);
|
||||
GBLogEx.info(
|
||||
min,
|
||||
`SEARCH WILL BE USED with score: ${returnedScore} > required (searchScore): ${searchScore}`
|
||||
);
|
||||
|
||||
return { answer: value, questionId: result.document.questionId };
|
||||
} else {
|
||||
GBLogEx.info(min,
|
||||
GBLogEx.info(
|
||||
min,
|
||||
`Index problem. SEARCH WILL NOT be used as answerId ${result.document.answerId} was not found in database,
|
||||
returnedScore: ${returnedScore} < required (searchScore): ${searchScore}`
|
||||
);
|
||||
|
@ -373,17 +371,13 @@ export class KBService implements IGBKBService {
|
|||
}
|
||||
}
|
||||
}
|
||||
GBLogEx.info(min,
|
||||
GBLogEx.info(
|
||||
min,
|
||||
`SEARCH returned LOW level score, calling NLP if any,
|
||||
returnedScore: ${returnedScore} < required (searchScore): ${searchScore}`
|
||||
);
|
||||
|
||||
return await ChatServices.answerByGPT(min, user, pid,
|
||||
query,
|
||||
searchScore,
|
||||
subjects
|
||||
);
|
||||
|
||||
return await ChatServices.answerByGPT(min, user, pid, query, searchScore, subjects);
|
||||
}
|
||||
|
||||
public async getSubjectItems(instanceId: number, parentId: number): Promise<GuaribasSubject[]> {
|
||||
|
@ -626,7 +620,7 @@ export class KBService implements IGBKBService {
|
|||
}
|
||||
|
||||
public async sendAnswer(min: GBMinInstance, channel: string, step: GBDialogStep, answer) {
|
||||
answer = typeof (answer) === 'string' ? answer : answer.content;
|
||||
answer = typeof answer === 'string' ? answer : answer.content;
|
||||
if (answer.endsWith('.mp4')) {
|
||||
await this.playVideo(min, min.conversationalService, step, answer, channel);
|
||||
} else if (
|
||||
|
@ -646,14 +640,11 @@ export class KBService implements IGBKBService {
|
|||
const url = urlJoin('kb', path, 'assets', answer);
|
||||
await this.playUrl(min, min.conversationalService, step, url, channel);
|
||||
} else if (answer.format === '.md') {
|
||||
await min.conversationalService['playMarkdown'](min, answer, channel, step,
|
||||
GBMinService.userMobile(step));
|
||||
await min.conversationalService['playMarkdown'](min, answer, channel, step, GBMinService.userMobile(step));
|
||||
} else if (answer.endsWith('.ogg') && process.env.AUDIO_DISABLED !== 'true') {
|
||||
await this.playAudio(min, answer, channel, step, min.conversationalService);
|
||||
} else {
|
||||
|
||||
await min.conversationalService.sendText(min, step, answer);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -685,7 +676,6 @@ export class KBService implements IGBKBService {
|
|||
const a = await GuaribasAnswer.create(answer);
|
||||
question['answerId'] = a.answerId;
|
||||
const q = await GuaribasQuestion.create(question);
|
||||
|
||||
}
|
||||
|
||||
public async importKbPackage(
|
||||
|
@ -747,7 +737,7 @@ export class KBService implements IGBKBService {
|
|||
const localName = Path.join('work', path, 'articles', file.name);
|
||||
let loader = new DocxLoader(localName);
|
||||
let doc = await loader.load();
|
||||
|
||||
|
||||
const answer = {
|
||||
instanceId: instance.instanceId,
|
||||
content: doc[0].pageContent,
|
||||
|
@ -758,10 +748,6 @@ export class KBService implements IGBKBService {
|
|||
};
|
||||
|
||||
data.answers.push(answer);
|
||||
|
||||
|
||||
|
||||
|
||||
} else if (file !== null && file.name.endsWith('.toc.docx')) {
|
||||
const path = DialogKeywords.getGBAIPath(instance.botId, `gbkb`);
|
||||
const localName = Path.join('work', path, 'articles', file.name);
|
||||
|
@ -866,6 +852,103 @@ export class KBService implements IGBKBService {
|
|||
});
|
||||
}
|
||||
|
||||
async saveHtmlPage(min, url: string, page: Page): Promise<string | null> {
|
||||
const response = await page.goto(url);
|
||||
|
||||
|
||||
if (response.headers && response.status() === 200) {
|
||||
const contentType = response.headers()['content-type'];
|
||||
if (contentType && contentType.includes('text/html')) {
|
||||
const buffer = await response.buffer();
|
||||
const urlObj = new URL(url);
|
||||
const urlPath = urlObj.pathname.endsWith('/') ? urlObj.pathname.slice(0, -1) : urlObj.pathname; // Remove trailing slash if present
|
||||
let filename = urlPath.split('/').pop() || 'index'; // Get the filename from the URL path or set it to 'index.html' as default
|
||||
filename = `${filename}.html`;
|
||||
let path = DialogKeywords.getGBAIPath(min.botId, `gbot`);
|
||||
const directoryPath = Path.join(process.env.PWD, 'work', path, 'Website');
|
||||
const filePath = Path.join(directoryPath, filename);
|
||||
|
||||
GBLogEx.info(min, `[GBDeployer] Saving Website file in ${filePath}.`);
|
||||
|
||||
Fs.mkdirSync(directoryPath, { recursive: true }); // Create directory recursively if it doesn't exist
|
||||
Fs.writeFileSync(filePath, buffer);
|
||||
return filePath;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
async crawl(min, url: string, visited: Set<string>, depth: number, maxDepth: number, page: Page): Promise<string[]> {
|
||||
try {
|
||||
if (
|
||||
depth > maxDepth ||
|
||||
(visited.has(url) ||
|
||||
url.endsWith('.jpg') ||
|
||||
url.endsWith('.pdf') ||
|
||||
url.endsWith('.jpg') ||
|
||||
url.endsWith('.png') ||
|
||||
url.endsWith('.mp4'))
|
||||
) {
|
||||
return [];
|
||||
}
|
||||
|
||||
await GBLogEx.info(min, `Processing URL: ${url}.`);
|
||||
|
||||
visited.add(url);
|
||||
|
||||
const filename = await this.saveHtmlPage(min, url, page);
|
||||
|
||||
if (!filename) {
|
||||
// If the URL doesn't represent an HTML page, skip crawling its links
|
||||
return [];
|
||||
}
|
||||
const currentDomain = new URL(page.url()).hostname;
|
||||
let links = await page.evaluate(currentDomain => {
|
||||
const anchors = Array.from(document.querySelectorAll('a')).filter(p => {
|
||||
try {
|
||||
return currentDomain == new URL(p.href).hostname;
|
||||
} catch (err) {
|
||||
return false;
|
||||
}
|
||||
});
|
||||
|
||||
return anchors.map(anchor => {
|
||||
return anchor.href.replace(/#.*/, '');
|
||||
});
|
||||
}, currentDomain);
|
||||
|
||||
if (!Array.isArray(links)) {
|
||||
links = [];
|
||||
}
|
||||
|
||||
let filteredLinks = [];
|
||||
|
||||
if (links && typeof links[Symbol.iterator] === 'function') {
|
||||
filteredLinks = links.filter(l => {
|
||||
try {
|
||||
new URL(l); // Check if the link is a valid URL
|
||||
return !visited.has(l);
|
||||
} catch (error) {
|
||||
// Ignore invalid URLs
|
||||
return false;
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
const childLinks = [];
|
||||
for (const link of filteredLinks) {
|
||||
const links = await this.crawl(min, link, visited, depth + 1, maxDepth, page);
|
||||
if (links){
|
||||
childLinks.push(...links);
|
||||
}
|
||||
}
|
||||
|
||||
return [filename, ...childLinks]; // Include the filename of the cached file
|
||||
} catch (error) {
|
||||
await GBLogEx.info(min, error);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Import all .docx files in reading comprehension folder.
|
||||
*/
|
||||
|
@ -875,11 +958,52 @@ export class KBService implements IGBKBService {
|
|||
instance: IGBInstance,
|
||||
packageId: number
|
||||
): Promise<any> {
|
||||
const files = await walkPromise(urlJoin(localPath, 'docs'));
|
||||
let files = [];
|
||||
|
||||
const website = min.core.getParam<string>(min.instance, 'Website', null);
|
||||
|
||||
if (website) {
|
||||
const browser = await puppeteer.launch({ headless: false });
|
||||
const page = await browser.newPage();
|
||||
const response = await page.goto(website);
|
||||
|
||||
await page.screenshot({ path: 'screenshot.png' });
|
||||
|
||||
// Extract dominant colors from the screenshot
|
||||
const colors = await getColors('screenshot.png');
|
||||
|
||||
// Assuming you want the two most dominant colors
|
||||
const mainColor1 = colors[0].hex();
|
||||
const mainColor2 = colors[1].hex();
|
||||
|
||||
console.log('Main Color 1:', mainColor1);
|
||||
console.log('Main Color 2:', mainColor2);
|
||||
|
||||
|
||||
const maxDepth = 1; // Maximum depth of recursion
|
||||
const visited = new Set<string>();
|
||||
files = files.concat(await this.crawl(min, website, visited, 0, maxDepth, page));
|
||||
|
||||
await browser.close();
|
||||
|
||||
files.shift();
|
||||
|
||||
await CollectionUtil.asyncForEach(files, async file => {
|
||||
let content = null;
|
||||
|
||||
const document = await this.loadAndSplitFile(file);
|
||||
const flattenedDocuments = document.reduce((acc, val) => acc.concat(val), []);
|
||||
const vectorStore = min['vectorStore'];
|
||||
await vectorStore.addDocuments(flattenedDocuments);
|
||||
await vectorStore.save(min['vectorStorePath']);
|
||||
});
|
||||
|
||||
}
|
||||
|
||||
files = await walkPromise(urlJoin(localPath, 'docs'));
|
||||
|
||||
if (!files[0]) {
|
||||
GBLogEx.info(min,
|
||||
`[GBDeployer] docs folder not created yet in .gbkb. To use Reading Comprehension, create this folder at root and put a document to get read by the.`
|
||||
);
|
||||
GBLogEx.info(min, `[GBDeployer] docs folder not created yet in .gbkb neither a website in .gbot.`);
|
||||
} else {
|
||||
await CollectionUtil.asyncForEach(files, async file => {
|
||||
let content = null;
|
||||
|
@ -894,211 +1018,217 @@ export class KBService implements IGBKBService {
|
|||
}
|
||||
}
|
||||
|
||||
defaultRecursiveCharacterTextSplitter = new RecursiveCharacterTextSplitter({
|
||||
defaultRecursiveCharacterTextSplitter = new RecursiveCharacterTextSplitter({
|
||||
chunkSize: 700,
|
||||
chunkOverlap: 50,
|
||||
chunkOverlap: 50
|
||||
});
|
||||
|
||||
markdownRecursiveCharacterTextSplitter = RecursiveCharacterTextSplitter.fromLanguage('markdown', {
|
||||
|
||||
markdownRecursiveCharacterTextSplitter = RecursiveCharacterTextSplitter.fromLanguage('markdown', {
|
||||
chunkSize: 700,
|
||||
chunkOverlap: 50,
|
||||
chunkOverlap: 50
|
||||
});
|
||||
|
||||
|
||||
private async loadAndSplitFile(filePath: string): Promise<Document<Record<string, unknown>>[]> {
|
||||
const fileExtension = path.extname(filePath);
|
||||
let loader;
|
||||
let documents: Document<Record<string, unknown>>[];
|
||||
switch (fileExtension) {
|
||||
case '.json':
|
||||
loader = new JSONLoader(filePath);
|
||||
documents = await loader.loadAndSplit(this.defaultRecursiveCharacterTextSplitter);
|
||||
break;
|
||||
case '.txt':
|
||||
loader = new TextLoader(filePath);
|
||||
documents = await loader.loadAndSplit(this.defaultRecursiveCharacterTextSplitter);
|
||||
break;
|
||||
case '.md':
|
||||
loader = new TextLoader(filePath);
|
||||
documents = await loader.loadAndSplit(this.markdownRecursiveCharacterTextSplitter);
|
||||
break;
|
||||
case '.pdf':
|
||||
loader = new PDFLoader(filePath, { splitPages: false });
|
||||
documents = await loader.loadAndSplit(this.defaultRecursiveCharacterTextSplitter);
|
||||
break;
|
||||
case '.docx':
|
||||
loader = new DocxLoader(filePath);
|
||||
documents = await loader.loadAndSplit(this.defaultRecursiveCharacterTextSplitter);
|
||||
break;
|
||||
case '.csv':
|
||||
loader = new CSVLoader(filePath);
|
||||
documents = await loader.loadAndSplit(this.defaultRecursiveCharacterTextSplitter);
|
||||
break;
|
||||
case '.epub':
|
||||
loader = new EPubLoader(filePath, { splitChapters: false });
|
||||
documents = await loader.loadAndSplit(this.defaultRecursiveCharacterTextSplitter);
|
||||
break;
|
||||
default:
|
||||
throw new Error(`Unsupported file extension: ${fileExtension}`);
|
||||
}
|
||||
return documents;
|
||||
}
|
||||
|
||||
|
||||
public async importKbTabularDirectory(localPath: string, min: GBMinInstance, packageId: number): Promise < any > {
|
||||
const files = await walkPromise(localPath);
|
||||
|
||||
await CollectionUtil.asyncForEach(files, async file => {
|
||||
if (file !== null && file.name.endsWith('.xlsx')) {
|
||||
return await this.importKbTabularFile(urlJoin(file.root, file.name), min, packageId);
|
||||
const fileExtension = path.extname(filePath);
|
||||
let loader;
|
||||
let documents: Document<Record<string, unknown>>[];
|
||||
switch (fileExtension) {
|
||||
case '.json':
|
||||
loader = new JSONLoader(filePath);
|
||||
documents = await loader.loadAndSplit(this.defaultRecursiveCharacterTextSplitter);
|
||||
break;
|
||||
case '.txt':
|
||||
loader = new TextLoader(filePath);
|
||||
documents = await loader.loadAndSplit(this.defaultRecursiveCharacterTextSplitter);
|
||||
break;
|
||||
case '.txt':
|
||||
loader = new TextLoader(filePath);
|
||||
documents = await loader.loadAndSplit(this.defaultRecursiveCharacterTextSplitter);
|
||||
break;
|
||||
case '.html':
|
||||
loader = new TextLoader(filePath);
|
||||
documents = await loader.loadAndSplit(this.defaultRecursiveCharacterTextSplitter);
|
||||
break;
|
||||
case '.md':
|
||||
loader = new TextLoader(filePath);
|
||||
documents = await loader.loadAndSplit(this.markdownRecursiveCharacterTextSplitter);
|
||||
break;
|
||||
case '.pdf':
|
||||
loader = new PDFLoader(filePath, { splitPages: false });
|
||||
documents = await loader.loadAndSplit(this.defaultRecursiveCharacterTextSplitter);
|
||||
break;
|
||||
case '.docx':
|
||||
loader = new DocxLoader(filePath);
|
||||
documents = await loader.loadAndSplit(this.defaultRecursiveCharacterTextSplitter);
|
||||
break;
|
||||
case '.csv':
|
||||
loader = new CSVLoader(filePath);
|
||||
documents = await loader.loadAndSplit(this.defaultRecursiveCharacterTextSplitter);
|
||||
break;
|
||||
case '.epub':
|
||||
loader = new EPubLoader(filePath, { splitChapters: false });
|
||||
documents = await loader.loadAndSplit(this.defaultRecursiveCharacterTextSplitter);
|
||||
break;
|
||||
default:
|
||||
throw new Error(`Unsupported file extension: ${fileExtension}`);
|
||||
}
|
||||
});
|
||||
}
|
||||
return documents;
|
||||
}
|
||||
|
||||
public async importKbTabularDirectory(localPath: string, min: GBMinInstance, packageId: number): Promise<any> {
|
||||
const files = await walkPromise(localPath);
|
||||
|
||||
await CollectionUtil.asyncForEach(files, async file => {
|
||||
if (file !== null && file.name.endsWith('.xlsx')) {
|
||||
return await this.importKbTabularFile(urlJoin(file.root, file.name), min, packageId);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
public async importSubjectFile(
|
||||
packageId: number,
|
||||
filename: string,
|
||||
menuFile: string,
|
||||
instance: IGBInstance
|
||||
): Promise < any > {
|
||||
let subjectsLoaded;
|
||||
if(menuFile) {
|
||||
// Loads menu.xlsx and finds worksheet.
|
||||
packageId: number,
|
||||
filename: string,
|
||||
menuFile: string,
|
||||
instance: IGBInstance
|
||||
): Promise<any> {
|
||||
let subjectsLoaded;
|
||||
if (menuFile) {
|
||||
// Loads menu.xlsx and finds worksheet.
|
||||
|
||||
const workbook = new Excel.Workbook();
|
||||
const data = await workbook.xlsx.readFile(menuFile);
|
||||
let worksheet: any;
|
||||
for (let t = 0; t < data.worksheets.length; t++) {
|
||||
worksheet = data.worksheets[t];
|
||||
if (worksheet) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
const MAX_LEVEL = 4; // Max column level to reach menu items in plan.
|
||||
// Iterates over all items.
|
||||
|
||||
let rows = worksheet._rows;
|
||||
rows.length = 24;
|
||||
let lastLevel = 0;
|
||||
let subjects = { children: [] };
|
||||
let childrenNode = subjects.children;
|
||||
let activeObj = null;
|
||||
|
||||
let activeChildrenGivenLevel = [childrenNode];
|
||||
|
||||
await asyncPromise.eachSeries(rows, async row => {
|
||||
if (!row) return;
|
||||
let menu;
|
||||
|
||||
// Detect menu level by skipping blank cells on left.
|
||||
|
||||
let level;
|
||||
for (level = 0; level < MAX_LEVEL; level++) {
|
||||
const cell = row._cells[level];
|
||||
if (cell && cell.text) {
|
||||
menu = cell.text;
|
||||
const workbook = new Excel.Workbook();
|
||||
const data = await workbook.xlsx.readFile(menuFile);
|
||||
let worksheet: any;
|
||||
for (let t = 0; t < data.worksheets.length; t++) {
|
||||
worksheet = data.worksheets[t];
|
||||
if (worksheet) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Tree hierarchy calculation.
|
||||
const MAX_LEVEL = 4; // Max column level to reach menu items in plan.
|
||||
// Iterates over all items.
|
||||
|
||||
if (level > lastLevel) {
|
||||
childrenNode = activeObj.children;
|
||||
} else if (level < lastLevel) {
|
||||
childrenNode = activeChildrenGivenLevel[level];
|
||||
}
|
||||
let rows = worksheet._rows;
|
||||
rows.length = 24;
|
||||
let lastLevel = 0;
|
||||
let subjects = { children: [] };
|
||||
let childrenNode = subjects.children;
|
||||
let activeObj = null;
|
||||
|
||||
/// Keeps the record of last subroots for each level, to
|
||||
// changel levels greater than one (return to main menu),
|
||||
// can exists between leaf nodes and roots.
|
||||
let activeChildrenGivenLevel = [childrenNode];
|
||||
|
||||
activeChildrenGivenLevel[level] = childrenNode;
|
||||
await asyncPromise.eachSeries(rows, async row => {
|
||||
if (!row) return;
|
||||
let menu;
|
||||
|
||||
// Insert the object into JSON.
|
||||
const description = row._cells[level + 1] ? row._cells[level + 1].text : null;
|
||||
activeObj = {
|
||||
title: menu,
|
||||
description: description,
|
||||
id: menu,
|
||||
children: []
|
||||
};
|
||||
activeChildrenGivenLevel[level].push(activeObj);
|
||||
// Detect menu level by skipping blank cells on left.
|
||||
|
||||
lastLevel = level;
|
||||
});
|
||||
let level;
|
||||
for (level = 0; level < MAX_LEVEL; level++) {
|
||||
const cell = row._cells[level];
|
||||
if (cell && cell.text) {
|
||||
menu = cell.text;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
subjectsLoaded = subjects;
|
||||
} else {
|
||||
subjectsLoaded = JSON.parse(Fs.readFileSync(filename, 'utf8'));
|
||||
}
|
||||
// Tree hierarchy calculation.
|
||||
|
||||
const doIt = async (subjects: GuaribasSubject[], parentSubjectId: number) => {
|
||||
return asyncPromise.eachSeries(subjects, async item => {
|
||||
const value = await GuaribasSubject.create(<GuaribasSubject>{
|
||||
internalId: item.id,
|
||||
parentSubjectId: parentSubjectId,
|
||||
instanceId: instance.instanceId,
|
||||
from: item.from,
|
||||
to: item.to,
|
||||
title: item.title,
|
||||
description: item.description,
|
||||
packageId: packageId
|
||||
if (level > lastLevel) {
|
||||
childrenNode = activeObj.children;
|
||||
} else if (level < lastLevel) {
|
||||
childrenNode = activeChildrenGivenLevel[level];
|
||||
}
|
||||
|
||||
/// Keeps the record of last subroots for each level, to
|
||||
// changel levels greater than one (return to main menu),
|
||||
// can exists between leaf nodes and roots.
|
||||
|
||||
activeChildrenGivenLevel[level] = childrenNode;
|
||||
|
||||
// Insert the object into JSON.
|
||||
const description = row._cells[level + 1] ? row._cells[level + 1].text : null;
|
||||
activeObj = {
|
||||
title: menu,
|
||||
description: description,
|
||||
id: menu,
|
||||
children: []
|
||||
};
|
||||
activeChildrenGivenLevel[level].push(activeObj);
|
||||
|
||||
lastLevel = level;
|
||||
});
|
||||
|
||||
if (item.children) {
|
||||
return doIt(item.children, value.subjectId);
|
||||
} else {
|
||||
return item;
|
||||
}
|
||||
});
|
||||
};
|
||||
subjectsLoaded = subjects;
|
||||
} else {
|
||||
subjectsLoaded = JSON.parse(Fs.readFileSync(filename, 'utf8'));
|
||||
}
|
||||
|
||||
return doIt(subjectsLoaded.children, undefined);
|
||||
}
|
||||
const doIt = async (subjects: GuaribasSubject[], parentSubjectId: number) => {
|
||||
return asyncPromise.eachSeries(subjects, async item => {
|
||||
const value = await GuaribasSubject.create(<GuaribasSubject>{
|
||||
internalId: item.id,
|
||||
parentSubjectId: parentSubjectId,
|
||||
instanceId: instance.instanceId,
|
||||
from: item.from,
|
||||
to: item.to,
|
||||
title: item.title,
|
||||
description: item.description,
|
||||
packageId: packageId
|
||||
});
|
||||
|
||||
if (item.children) {
|
||||
return doIt(item.children, value.subjectId);
|
||||
} else {
|
||||
return item;
|
||||
}
|
||||
});
|
||||
};
|
||||
|
||||
return doIt(subjectsLoaded.children, undefined);
|
||||
}
|
||||
|
||||
public async undeployKbFromStorage(instance: IGBInstance, deployer: GBDeployer, packageId: number) {
|
||||
await GuaribasQuestion.destroy({
|
||||
where: { instanceId: instance.instanceId, packageId: packageId }
|
||||
});
|
||||
await GuaribasAnswer.destroy({
|
||||
where: { instanceId: instance.instanceId, packageId: packageId }
|
||||
});
|
||||
await GuaribasSubject.destroy({
|
||||
where: { instanceId: instance.instanceId, packageId: packageId }
|
||||
});
|
||||
await this.undeployPackageFromStorage(instance, packageId);
|
||||
}
|
||||
await GuaribasQuestion.destroy({
|
||||
where: { instanceId: instance.instanceId, packageId: packageId }
|
||||
});
|
||||
await GuaribasAnswer.destroy({
|
||||
where: { instanceId: instance.instanceId, packageId: packageId }
|
||||
});
|
||||
await GuaribasSubject.destroy({
|
||||
where: { instanceId: instance.instanceId, packageId: packageId }
|
||||
});
|
||||
await this.undeployPackageFromStorage(instance, packageId);
|
||||
}
|
||||
|
||||
public static async RefreshNER(min: GBMinInstance) {
|
||||
const questions = await KBService.getQuestionsNER(min.instance.instanceId);
|
||||
const contentLocale = min.core.getParam<string>(
|
||||
min.instance,
|
||||
'Default Content Language',
|
||||
GBConfigService.get('DEFAULT_CONTENT_LANGUAGE')
|
||||
);
|
||||
const questions = await KBService.getQuestionsNER(min.instance.instanceId);
|
||||
const contentLocale = min.core.getParam<string>(
|
||||
min.instance,
|
||||
'Default Content Language',
|
||||
GBConfigService.get('DEFAULT_CONTENT_LANGUAGE')
|
||||
);
|
||||
|
||||
await CollectionUtil.asyncForEach(questions, async question => {
|
||||
const text = question.content;
|
||||
await CollectionUtil.asyncForEach(questions, async question => {
|
||||
const text = question.content;
|
||||
|
||||
const categoryReg = /.*\((.*)\).*/gi.exec(text);
|
||||
const nameReg = /(\w+)\(.*\).*/gi.exec(text);
|
||||
const categoryReg = /.*\((.*)\).*/gi.exec(text);
|
||||
const nameReg = /(\w+)\(.*\).*/gi.exec(text);
|
||||
|
||||
if (categoryReg) {
|
||||
let category = categoryReg[1];
|
||||
if (categoryReg) {
|
||||
let category = categoryReg[1];
|
||||
|
||||
if (category === 'number') {
|
||||
min['nerEngine'].addRegexEntity('number', 'pt', '/d+/gi');
|
||||
if (category === 'number') {
|
||||
min['nerEngine'].addRegexEntity('number', 'pt', '/d+/gi');
|
||||
}
|
||||
if (nameReg) {
|
||||
let name = nameReg[1];
|
||||
|
||||
min['nerEngine'].addNamedEntityText(category, name, [contentLocale], [name]);
|
||||
}
|
||||
}
|
||||
if (nameReg) {
|
||||
let name = nameReg[1];
|
||||
|
||||
min['nerEngine'].addNamedEntityText(category, name, [contentLocale], [name]);
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Deploys a knowledge base to the storage using the .gbkb format.
|
||||
|
@ -1106,90 +1236,90 @@ export class KBService implements IGBKBService {
|
|||
* @param localPath Path to the .gbkb folder.
|
||||
*/
|
||||
public async deployKb(core: IGBCoreService, deployer: GBDeployer, localPath: string, min: GBMinInstance) {
|
||||
const packageName = Path.basename(localPath);
|
||||
const instance = await core.loadInstanceByBotId(min.botId);
|
||||
GBLogEx.info(min, `[GBDeployer] Importing: ${localPath}`);
|
||||
const packageName = Path.basename(localPath);
|
||||
const instance = await core.loadInstanceByBotId(min.botId);
|
||||
GBLogEx.info(min, `[GBDeployer] Importing: ${localPath}`);
|
||||
|
||||
const p = await deployer.deployPackageToStorage(instance.instanceId, packageName);
|
||||
await this.importKbPackage(min, localPath, p, instance);
|
||||
GBDeployer.mountGBKBAssets(packageName, min.botId, localPath);
|
||||
const service = await AzureDeployerService.createInstance(deployer);
|
||||
const searchIndex = instance.searchIndex ? instance.searchIndex : GBServer.globals.minBoot.instance.searchIndex;
|
||||
await deployer.rebuildIndex(instance, service.getKBSearchSchema(searchIndex));
|
||||
const p = await deployer.deployPackageToStorage(instance.instanceId, packageName);
|
||||
await this.importKbPackage(min, localPath, p, instance);
|
||||
GBDeployer.mountGBKBAssets(packageName, min.botId, localPath);
|
||||
const service = await AzureDeployerService.createInstance(deployer);
|
||||
const searchIndex = instance.searchIndex ? instance.searchIndex : GBServer.globals.minBoot.instance.searchIndex;
|
||||
await deployer.rebuildIndex(instance, service.getKBSearchSchema(searchIndex));
|
||||
|
||||
min['groupCache'] = await KBService.getGroupReplies(instance.instanceId);
|
||||
await KBService.RefreshNER(min);
|
||||
min['groupCache'] = await KBService.getGroupReplies(instance.instanceId);
|
||||
await KBService.RefreshNER(min);
|
||||
|
||||
GBLogEx.info(min, `[GBDeployer] Start Bot Server Side Rendering... ${localPath}`);
|
||||
const html = await GBSSR.getHTML(min);
|
||||
let path = DialogKeywords.getGBAIPath(min.botId, `gbui`);
|
||||
path = Path.join(process.env.PWD, 'work', path, 'index.html');
|
||||
GBLogEx.info(min, `[GBDeployer] Saving SSR HTML in ${path}.`);
|
||||
Fs.writeFileSync(path, html, 'utf8');
|
||||
GBLogEx.info(min, `[GBDeployer] Start Bot Server Side Rendering... ${localPath}`);
|
||||
const html = await GBSSR.getHTML(min);
|
||||
let path = DialogKeywords.getGBAIPath(min.botId, `gbui`);
|
||||
path = Path.join(process.env.PWD, 'work', path, 'index.html');
|
||||
GBLogEx.info(min, `[GBDeployer] Saving SSR HTML in ${path}.`);
|
||||
Fs.writeFileSync(path, html, 'utf8');
|
||||
|
||||
GBLogEx.info(min, `[GBDeployer] Finished import of ${localPath}`);
|
||||
}
|
||||
GBLogEx.info(min, `[GBDeployer] Finished import of ${localPath}`);
|
||||
}
|
||||
|
||||
private async playAudio(
|
||||
min: GBMinInstance,
|
||||
answer: GuaribasAnswer,
|
||||
channel: string,
|
||||
step: GBDialogStep,
|
||||
conversationalService: IGBConversationalService
|
||||
) {
|
||||
conversationalService.sendAudio(min, step, answer.content);
|
||||
}
|
||||
min: GBMinInstance,
|
||||
answer: GuaribasAnswer,
|
||||
channel: string,
|
||||
step: GBDialogStep,
|
||||
conversationalService: IGBConversationalService
|
||||
) {
|
||||
conversationalService.sendAudio(min, step, answer.content);
|
||||
}
|
||||
|
||||
private async playUrl(
|
||||
min,
|
||||
conversationalService: IGBConversationalService,
|
||||
step: GBDialogStep,
|
||||
url: string,
|
||||
channel: string
|
||||
) {
|
||||
if (channel === 'whatsapp') {
|
||||
await min.conversationalService.sendFile(min, step, null, url, '');
|
||||
} else {
|
||||
await conversationalService.sendEvent(min, step, 'play', {
|
||||
playerType: 'url',
|
||||
data: url
|
||||
});
|
||||
min,
|
||||
conversationalService: IGBConversationalService,
|
||||
step: GBDialogStep,
|
||||
url: string,
|
||||
channel: string
|
||||
) {
|
||||
if (channel === 'whatsapp') {
|
||||
await min.conversationalService.sendFile(min, step, null, url, '');
|
||||
} else {
|
||||
await conversationalService.sendEvent(min, step, 'play', {
|
||||
playerType: 'url',
|
||||
data: url
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private async playVideo(
|
||||
min,
|
||||
conversationalService: IGBConversationalService,
|
||||
step: GBDialogStep,
|
||||
answer: GuaribasAnswer,
|
||||
channel: string
|
||||
) {
|
||||
if (channel === 'whatsapp') {
|
||||
await min.conversationalService.sendFile(min, step, null, answer.content, '');
|
||||
} else {
|
||||
const path = DialogKeywords.getGBAIPath(min.botId, `gbkb`);
|
||||
await conversationalService.sendEvent(min, step, 'play', {
|
||||
playerType: 'video',
|
||||
data: urlJoin(path, 'videos', answer.content)
|
||||
min,
|
||||
conversationalService: IGBConversationalService,
|
||||
step: GBDialogStep,
|
||||
answer: GuaribasAnswer,
|
||||
channel: string
|
||||
) {
|
||||
if (channel === 'whatsapp') {
|
||||
await min.conversationalService.sendFile(min, step, null, answer.content, '');
|
||||
} else {
|
||||
const path = DialogKeywords.getGBAIPath(min.botId, `gbkb`);
|
||||
await conversationalService.sendEvent(min, step, 'play', {
|
||||
playerType: 'video',
|
||||
data: urlJoin(path, 'videos', answer.content)
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
private async undeployPackageFromStorage(instance: any, packageId: number) {
|
||||
await GuaribasPackage.destroy({
|
||||
where: { instanceId: instance.instanceId, packageId: packageId }
|
||||
});
|
||||
}
|
||||
|
||||
private async getTextFromFile(filename: string) {
|
||||
return new Promise<string>(async (resolve, reject) => {
|
||||
textract.fromFileWithPath(filename, { preserveLineBreaks: true }, (error, text) => {
|
||||
if (error) {
|
||||
reject(error);
|
||||
} else {
|
||||
resolve(text);
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
private async undeployPackageFromStorage(instance: any, packageId: number) {
|
||||
await GuaribasPackage.destroy({
|
||||
where: { instanceId: instance.instanceId, packageId: packageId }
|
||||
});
|
||||
}
|
||||
|
||||
private async getTextFromFile(filename: string) {
|
||||
return new Promise<string>(async (resolve, reject) => {
|
||||
textract.fromFileWithPath(filename, { preserveLineBreaks: true }, (error, text) => {
|
||||
if (error) {
|
||||
reject(error);
|
||||
} else {
|
||||
resolve(text);
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue