new(all): Website indexing.
This commit is contained in:
parent
c620473fbf
commit
d73bf50a2d
5 changed files with 499 additions and 411 deletions
|
@ -133,8 +133,10 @@
|
||||||
"express": "4.18.2",
|
"express": "4.18.2",
|
||||||
"express-remove-route": "1.0.0",
|
"express-remove-route": "1.0.0",
|
||||||
"ffmpeg-static": "5.1.0",
|
"ffmpeg-static": "5.1.0",
|
||||||
|
"get-image-colors": "^4.0.1",
|
||||||
"google-libphonenumber": "3.2.31",
|
"google-libphonenumber": "3.2.31",
|
||||||
"googleapis": "126.0.1",
|
"googleapis": "126.0.1",
|
||||||
|
"hnswlib-node": "^1.4.2",
|
||||||
"http-proxy": "1.18.1",
|
"http-proxy": "1.18.1",
|
||||||
"ibm-watson": "7.1.2",
|
"ibm-watson": "7.1.2",
|
||||||
"iso-639-1": "3.1.1",
|
"iso-639-1": "3.1.1",
|
||||||
|
|
|
@ -2055,7 +2055,9 @@ export class SystemKeywords {
|
||||||
return (orientation || 0) >= 5 ? [height, width] : [width, height];
|
return (orientation || 0) >= 5 ? [height, width] : [width, height];
|
||||||
};
|
};
|
||||||
|
|
||||||
const size = getNormalSize(await sharp(buf).metadata());
|
const metadata = await sharp(buf).metadata();
|
||||||
|
const size = getNormalSize({width:metadata['width'],
|
||||||
|
height:metadata['height'], orientation: metadata['orientation'] });
|
||||||
url = urlJoin(GBServer.globals.publicAddress, min.botId, 'cache', Path.basename(imageName));
|
url = urlJoin(GBServer.globals.publicAddress, min.botId, 'cache', Path.basename(imageName));
|
||||||
images[index++] = { url: url, size: size, buf: buf };
|
images[index++] = { url: url, size: size, buf: buf };
|
||||||
}
|
}
|
||||||
|
|
|
@ -169,36 +169,20 @@ export class GBMinService {
|
||||||
let i = 1;
|
let i = 1;
|
||||||
|
|
||||||
if (instances.length > 1) {
|
if (instances.length > 1) {
|
||||||
this.bar1 = new cliProgress.SingleBar(
|
|
||||||
{
|
|
||||||
format: '[{bar}] ({value}/{total}) Loading {botId} ...',
|
|
||||||
barsize: 40,
|
|
||||||
forceRedraw: true
|
|
||||||
},
|
|
||||||
cliProgress.Presets.rect
|
|
||||||
);
|
|
||||||
this.bar1.start(instances.length, i, { botId: 'Boot' });
|
|
||||||
}
|
}
|
||||||
|
|
||||||
await CollectionUtil.asyncForEach(
|
await CollectionUtil.asyncForEach(
|
||||||
instances,
|
instances,
|
||||||
(async instance => {
|
(async instance => {
|
||||||
try {
|
try {
|
||||||
|
GBLog.info(`Mounting ${instance.botId}...`)
|
||||||
await this['mountBot'](instance);
|
await this['mountBot'](instance);
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
GBLog.error(`Error mounting bot ${instance.botId}: ${error.message}\n${error.stack}`);
|
GBLog.error(`Error mounting bot ${instance.botId}: ${error.message}\n${error.stack}`);
|
||||||
} finally {
|
|
||||||
if (this.bar1) {
|
|
||||||
this.bar1.update(i++, { botId: instance.botId });
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}).bind(this)
|
}).bind(this)
|
||||||
);
|
);
|
||||||
|
|
||||||
if (this.bar1) {
|
|
||||||
this.bar1.stop();
|
|
||||||
}
|
|
||||||
|
|
||||||
// Loads API.
|
// Loads API.
|
||||||
|
|
||||||
await this.ensureAPI();
|
await this.ensureAPI();
|
||||||
|
|
|
@ -31,14 +31,19 @@
|
||||||
'use strict';
|
'use strict';
|
||||||
|
|
||||||
import { HNSWLib } from '@langchain/community/vectorstores/hnswlib';
|
import { HNSWLib } from '@langchain/community/vectorstores/hnswlib';
|
||||||
import { StringOutputParser } from "@langchain/core/output_parsers";
|
import { StringOutputParser } from '@langchain/core/output_parsers';
|
||||||
import { AIMessagePromptTemplate, ChatPromptTemplate, HumanMessagePromptTemplate, MessagesPlaceholder } from '@langchain/core/prompts';
|
import {
|
||||||
import { RunnableSequence } from "@langchain/core/runnables";
|
AIMessagePromptTemplate,
|
||||||
import { convertToOpenAITool } from "@langchain/core/utils/function_calling";
|
ChatPromptTemplate,
|
||||||
import { ChatOpenAI } from "@langchain/openai";
|
HumanMessagePromptTemplate,
|
||||||
|
MessagesPlaceholder
|
||||||
|
} from '@langchain/core/prompts';
|
||||||
|
import { RunnableSequence } from '@langchain/core/runnables';
|
||||||
|
import { convertToOpenAITool } from '@langchain/core/utils/function_calling';
|
||||||
|
import { ChatOpenAI } from '@langchain/openai';
|
||||||
import { GBLog, GBMinInstance } from 'botlib';
|
import { GBLog, GBMinInstance } from 'botlib';
|
||||||
import * as Fs from 'fs';
|
import * as Fs from 'fs';
|
||||||
import { jsonSchemaToZod } from "json-schema-to-zod";
|
import { jsonSchemaToZod } from 'json-schema-to-zod';
|
||||||
import { BufferWindowMemory } from 'langchain/memory';
|
import { BufferWindowMemory } from 'langchain/memory';
|
||||||
import Path from 'path';
|
import Path from 'path';
|
||||||
import { CollectionUtil } from 'pragmatismo-io-framework';
|
import { CollectionUtil } from 'pragmatismo-io-framework';
|
||||||
|
@ -46,35 +51,28 @@ import { DialogKeywords } from '../../basic.gblib/services/DialogKeywords.js';
|
||||||
import { GBVMService } from '../../basic.gblib/services/GBVMService.js';
|
import { GBVMService } from '../../basic.gblib/services/GBVMService.js';
|
||||||
import { GBConfigService } from '../../core.gbapp/services/GBConfigService.js';
|
import { GBConfigService } from '../../core.gbapp/services/GBConfigService.js';
|
||||||
import { GuaribasSubject } from '../../kb.gbapp/models/index.js';
|
import { GuaribasSubject } from '../../kb.gbapp/models/index.js';
|
||||||
import { Serialized } from "@langchain/core/load/serializable";
|
import { Serialized } from '@langchain/core/load/serializable';
|
||||||
import { BaseCallbackHandler } from "@langchain/core/callbacks/base";
|
import { BaseCallbackHandler } from '@langchain/core/callbacks/base';
|
||||||
import { pdfToPng, PngPageOutput } from 'pdf-to-png-converter';
|
import { pdfToPng, PngPageOutput } from 'pdf-to-png-converter';
|
||||||
import { DynamicStructuredTool } from "@langchain/core/tools";
|
import { DynamicStructuredTool } from '@langchain/core/tools';
|
||||||
import { WikipediaQueryRun } from "@langchain/community/tools/wikipedia_query_run";
|
import { WikipediaQueryRun } from '@langchain/community/tools/wikipedia_query_run';
|
||||||
import {
|
import { BaseLLMOutputParser, OutputParserException } from '@langchain/core/output_parsers';
|
||||||
BaseLLMOutputParser,
|
import { ChatGeneration, Generation } from '@langchain/core/outputs';
|
||||||
OutputParserException,
|
|
||||||
} from "@langchain/core/output_parsers";
|
|
||||||
import { ChatGeneration, Generation } from "@langchain/core/outputs";
|
|
||||||
import { GBAdminService } from '../../admin.gbapp/services/GBAdminService.js';
|
import { GBAdminService } from '../../admin.gbapp/services/GBAdminService.js';
|
||||||
import { GBServer } from '../../../src/app.js';
|
import { GBServer } from '../../../src/app.js';
|
||||||
import urlJoin from 'url-join';
|
import urlJoin from 'url-join';
|
||||||
import { getDocument } from "pdfjs-dist/legacy/build/pdf.mjs";
|
import { getDocument } from 'pdfjs-dist/legacy/build/pdf.mjs';
|
||||||
import { GBLogEx } from '../../core.gbapp/services/GBLogEx.js';
|
import { GBLogEx } from '../../core.gbapp/services/GBLogEx.js';
|
||||||
|
|
||||||
|
export interface CustomOutputParserFields {}
|
||||||
export interface CustomOutputParserFields { }
|
|
||||||
export type ExpectedOutput = any;
|
export type ExpectedOutput = any;
|
||||||
|
|
||||||
function isChatGeneration(
|
function isChatGeneration(llmOutput: ChatGeneration | Generation): llmOutput is ChatGeneration {
|
||||||
llmOutput: ChatGeneration | Generation
|
return 'message' in llmOutput;
|
||||||
): llmOutput is ChatGeneration {
|
|
||||||
return "message" in llmOutput;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
class CustomHandler extends BaseCallbackHandler {
|
class CustomHandler extends BaseCallbackHandler {
|
||||||
name = "custom_handler";
|
name = 'custom_handler';
|
||||||
|
|
||||||
|
|
||||||
handleLLMNewToken(token: string) {
|
handleLLMNewToken(token: string) {
|
||||||
GBLogEx.info(0, `LLM: token: ${JSON.stringify(token)}`);
|
GBLogEx.info(0, `LLM: token: ${JSON.stringify(token)}`);
|
||||||
|
@ -95,11 +93,10 @@ class CustomHandler extends BaseCallbackHandler {
|
||||||
|
|
||||||
const logHandler = new CustomHandler();
|
const logHandler = new CustomHandler();
|
||||||
|
|
||||||
export class GBLLMOutputParser extends
|
export class GBLLMOutputParser extends BaseLLMOutputParser<ExpectedOutput> {
|
||||||
BaseLLMOutputParser<ExpectedOutput> {
|
lc_namespace = ['langchain', 'output_parsers'];
|
||||||
lc_namespace = ["langchain", "output_parsers"];
|
|
||||||
|
|
||||||
private toolChain: RunnableSequence
|
private toolChain: RunnableSequence;
|
||||||
private min;
|
private min;
|
||||||
|
|
||||||
constructor(min, toolChain: RunnableSequence, documentChain: RunnableSequence) {
|
constructor(min, toolChain: RunnableSequence, documentChain: RunnableSequence) {
|
||||||
|
@ -108,14 +105,9 @@ export class GBLLMOutputParser extends
|
||||||
this.toolChain = toolChain;
|
this.toolChain = toolChain;
|
||||||
}
|
}
|
||||||
|
|
||||||
async parseResult(
|
async parseResult(llmOutputs: ChatGeneration[] | Generation[]): Promise<ExpectedOutput> {
|
||||||
llmOutputs: ChatGeneration[] | Generation[]
|
|
||||||
): Promise<ExpectedOutput> {
|
|
||||||
|
|
||||||
if (!llmOutputs.length) {
|
if (!llmOutputs.length) {
|
||||||
throw new OutputParserException(
|
throw new OutputParserException('Output parser did not receive any generations.');
|
||||||
"Output parser did not receive any generations."
|
|
||||||
);
|
|
||||||
}
|
}
|
||||||
let result;
|
let result;
|
||||||
|
|
||||||
|
@ -140,10 +132,9 @@ export class GBLLMOutputParser extends
|
||||||
|
|
||||||
let { sources, text } = res;
|
let { sources, text } = res;
|
||||||
|
|
||||||
await CollectionUtil.asyncForEach(sources, async (source) => {
|
await CollectionUtil.asyncForEach(sources, async source => {
|
||||||
let found = false;
|
let found = false;
|
||||||
if (source) {
|
if (source && source.file.endsWith('.pdf')) {
|
||||||
|
|
||||||
const gbaiName = DialogKeywords.getGBAIPath(this.min.botId, 'gbkb');
|
const gbaiName = DialogKeywords.getGBAIPath(this.min.botId, 'gbkb');
|
||||||
const localName = Path.join(process.env.PWD, 'work', gbaiName, 'docs', source.file);
|
const localName = Path.join(process.env.PWD, 'work', gbaiName, 'docs', source.file);
|
||||||
|
|
||||||
|
@ -166,9 +157,7 @@ export class GBLLMOutputParser extends
|
||||||
}
|
}
|
||||||
|
|
||||||
export class ChatServices {
|
export class ChatServices {
|
||||||
|
|
||||||
public static async pdfPageAsImage(min, filename, pageNumber) {
|
public static async pdfPageAsImage(min, filename, pageNumber) {
|
||||||
|
|
||||||
// Converts the PDF to PNG.
|
// Converts the PDF to PNG.
|
||||||
|
|
||||||
GBLogEx.info(min, `Converting ${filename}, page: ${pageNumber}...`);
|
GBLogEx.info(min, `Converting ${filename}, page: ${pageNumber}...`);
|
||||||
|
@ -181,7 +170,6 @@ export class ChatServices {
|
||||||
verbosityLevel: 0
|
verbosityLevel: 0
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|
||||||
// Prepare an image on cache and return the GBFILE information.
|
// Prepare an image on cache and return the GBFILE information.
|
||||||
|
|
||||||
if (pngPages.length > 0) {
|
if (pngPages.length > 0) {
|
||||||
|
@ -199,7 +187,6 @@ export class ChatServices {
|
||||||
sanitizedQuestion: string,
|
sanitizedQuestion: string,
|
||||||
numDocuments: number = 100
|
numDocuments: number = 100
|
||||||
): Promise<string> {
|
): Promise<string> {
|
||||||
|
|
||||||
if (sanitizedQuestion === '') {
|
if (sanitizedQuestion === '') {
|
||||||
return '';
|
return '';
|
||||||
}
|
}
|
||||||
|
@ -219,10 +206,12 @@ export class ChatServices {
|
||||||
const doc = uniqueDocuments[filePaths];
|
const doc = uniqueDocuments[filePaths];
|
||||||
const metadata = doc.metadata;
|
const metadata = doc.metadata;
|
||||||
const filename = Path.basename(metadata.source);
|
const filename = Path.basename(metadata.source);
|
||||||
const page = await ChatServices.findPageForText(metadata.source,
|
let page = 0;
|
||||||
doc.pageContent);
|
if (metadata.source.endsWith('.pdf')) {
|
||||||
|
page = await ChatServices.findPageForText(metadata.source, doc.pageContent);
|
||||||
|
}
|
||||||
|
|
||||||
output = `${output}\n\n\n\nUse also the following context which is coming from Source Document: ${filename} at page: ${page}
|
output = `${output}\n\n\n\nUse also the following context which is coming from Source Document: ${filename} at page: ${page?page:'entire document'}
|
||||||
(you will fill the JSON sources collection field later),
|
(you will fill the JSON sources collection field later),
|
||||||
memorize this block among document information and return when you are refering this part of content:\n\n\n\n ${doc.pageContent} \n\n\n\n.`;
|
memorize this block among document information and return when you are refering this part of content:\n\n\n\n ${doc.pageContent} \n\n\n\n.`;
|
||||||
}
|
}
|
||||||
|
@ -233,12 +222,15 @@ export class ChatServices {
|
||||||
const data = new Uint8Array(Fs.readFileSync(pdfPath));
|
const data = new Uint8Array(Fs.readFileSync(pdfPath));
|
||||||
const pdf = await getDocument({ data }).promise;
|
const pdf = await getDocument({ data }).promise;
|
||||||
|
|
||||||
searchText = searchText.replace(/\s/g, '')
|
searchText = searchText.replace(/\s/g, '');
|
||||||
|
|
||||||
for (let i = 1; i <= pdf.numPages; i++) {
|
for (let i = 1; i <= pdf.numPages; i++) {
|
||||||
const page = await pdf.getPage(i);
|
const page = await pdf.getPage(i);
|
||||||
const textContent = await page.getTextContent();
|
const textContent = await page.getTextContent();
|
||||||
const text = textContent.items.map(item => item['str']).join('').replace(/\s/g, '');
|
const text = textContent.items
|
||||||
|
.map(item => item['str'])
|
||||||
|
.join('')
|
||||||
|
.replace(/\s/g, '');
|
||||||
|
|
||||||
if (text.includes(searchText)) return i;
|
if (text.includes(searchText)) return i;
|
||||||
}
|
}
|
||||||
|
@ -247,34 +239,31 @@ export class ChatServices {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Generate text
|
* Generate text
|
||||||
*
|
*
|
||||||
* CONTINUE keword.
|
* CONTINUE keword.
|
||||||
*
|
*
|
||||||
* result = CONTINUE text
|
* result = CONTINUE text
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
public static async continue(min: GBMinInstance, question: string, chatId) {
|
public static async continue(min: GBMinInstance, question: string, chatId) {}
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
private static memoryMap = {};
|
private static memoryMap = {};
|
||||||
public static userSystemPrompt = {};
|
public static userSystemPrompt = {};
|
||||||
|
|
||||||
public static async answerByGPT(min: GBMinInstance, user, pid,
|
public static async answerByGPT(
|
||||||
|
min: GBMinInstance,
|
||||||
|
user,
|
||||||
|
pid,
|
||||||
question: string,
|
question: string,
|
||||||
searchScore: number,
|
searchScore: number,
|
||||||
subjects: GuaribasSubject[]
|
subjects: GuaribasSubject[]
|
||||||
) {
|
) {
|
||||||
|
|
||||||
if (!process.env.OPENAI_API_KEY) {
|
if (!process.env.OPENAI_API_KEY) {
|
||||||
return { answer: undefined, questionId: 0 };
|
return { answer: undefined, questionId: 0 };
|
||||||
}
|
}
|
||||||
|
|
||||||
const LLMMode = min.core.getParam(
|
const LLMMode = min.core.getParam(min.instance, 'Answer Mode', 'direct');
|
||||||
min.instance,
|
|
||||||
'Answer Mode', 'direct'
|
|
||||||
);
|
|
||||||
|
|
||||||
const docsContext = min['vectorStore'];
|
const docsContext = min['vectorStore'];
|
||||||
|
|
||||||
|
@ -283,20 +272,19 @@ export class ChatServices {
|
||||||
returnMessages: true,
|
returnMessages: true,
|
||||||
memoryKey: 'chat_history',
|
memoryKey: 'chat_history',
|
||||||
inputKey: 'input',
|
inputKey: 'input',
|
||||||
k: 2,
|
k: 2
|
||||||
})
|
});
|
||||||
}
|
}
|
||||||
const memory = this.memoryMap[user.userSystemId];
|
const memory = this.memoryMap[user.userSystemId];
|
||||||
const systemPrompt = this.userSystemPrompt[user.userSystemId];
|
const systemPrompt = this.userSystemPrompt[user.userSystemId];
|
||||||
|
|
||||||
const model = new ChatOpenAI({
|
const model = new ChatOpenAI({
|
||||||
openAIApiKey: process.env.OPENAI_API_KEY,
|
openAIApiKey: process.env.OPENAI_API_KEY,
|
||||||
modelName: "gpt-3.5-turbo-0125",
|
modelName: 'gpt-3.5-turbo-0125',
|
||||||
temperature: 0,
|
temperature: 0,
|
||||||
callbacks: [logHandler],
|
callbacks: [logHandler]
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|
||||||
let tools = await ChatServices.getTools(min);
|
let tools = await ChatServices.getTools(min);
|
||||||
let toolsAsText = ChatServices.getToolsAsText(tools);
|
let toolsAsText = ChatServices.getToolsAsText(tools);
|
||||||
|
|
||||||
|
@ -316,9 +304,9 @@ export class ChatServices {
|
||||||
Do not use any previous tools output in the chat_history.
|
Do not use any previous tools output in the chat_history.
|
||||||
`
|
`
|
||||||
),
|
),
|
||||||
new MessagesPlaceholder("chat_history"),
|
new MessagesPlaceholder('chat_history'),
|
||||||
AIMessagePromptTemplate.fromTemplate(`Follow Up Input: {question}
|
AIMessagePromptTemplate.fromTemplate(`Follow Up Input: {question}
|
||||||
Standalone question:`),
|
Standalone question:`)
|
||||||
]);
|
]);
|
||||||
|
|
||||||
const toolsResultPrompt = ChatPromptTemplate.fromMessages([
|
const toolsResultPrompt = ChatPromptTemplate.fromMessages([
|
||||||
|
@ -327,9 +315,9 @@ export class ChatServices {
|
||||||
rephrase the answer to the user using this tool output.
|
rephrase the answer to the user using this tool output.
|
||||||
`
|
`
|
||||||
),
|
),
|
||||||
new MessagesPlaceholder("chat_history"),
|
new MessagesPlaceholder('chat_history'),
|
||||||
AIMessagePromptTemplate.fromTemplate(`Tool output: {tool_output}
|
AIMessagePromptTemplate.fromTemplate(`Tool output: {tool_output}
|
||||||
Standalone question:`),
|
Standalone question:`)
|
||||||
]);
|
]);
|
||||||
|
|
||||||
const combineDocumentsPrompt = ChatPromptTemplate.fromMessages([
|
const combineDocumentsPrompt = ChatPromptTemplate.fromMessages([
|
||||||
|
@ -355,14 +343,13 @@ export class ChatServices {
|
||||||
Double check if the output is a valid JSON with brackets. all fields are required: text, file, page.
|
Double check if the output is a valid JSON with brackets. all fields are required: text, file, page.
|
||||||
`
|
`
|
||||||
),
|
),
|
||||||
new MessagesPlaceholder("chat_history"),
|
new MessagesPlaceholder('chat_history'),
|
||||||
HumanMessagePromptTemplate.fromTemplate("Question: {question}"),
|
HumanMessagePromptTemplate.fromTemplate('Question: {question}')
|
||||||
]);
|
]);
|
||||||
|
|
||||||
const callToolChain = RunnableSequence.from([
|
const callToolChain = RunnableSequence.from([
|
||||||
{
|
{
|
||||||
tool_output: async (output: object) => {
|
tool_output: async (output: object) => {
|
||||||
|
|
||||||
const name = output['func'][0].function.name;
|
const name = output['func'][0].function.name;
|
||||||
const args = JSON.parse(output['func'][0].function.arguments);
|
const args = JSON.parse(output['func'][0].function.arguments);
|
||||||
GBLogEx.info(min, `Running .gbdialog '${name}' as GPT tool...`);
|
GBLogEx.info(min, `Running .gbdialog '${name}' as GPT tool...`);
|
||||||
|
@ -373,8 +360,7 @@ export class ChatServices {
|
||||||
chat_history: async () => {
|
chat_history: async () => {
|
||||||
const { chat_history } = await memory.loadMemoryVariables({});
|
const { chat_history } = await memory.loadMemoryVariables({});
|
||||||
return chat_history;
|
return chat_history;
|
||||||
},
|
}
|
||||||
|
|
||||||
},
|
},
|
||||||
toolsResultPrompt,
|
toolsResultPrompt,
|
||||||
model,
|
model,
|
||||||
|
@ -391,8 +377,7 @@ export class ChatServices {
|
||||||
context: async (output: string) => {
|
context: async (output: string) => {
|
||||||
const c = await ChatServices.getRelevantContext(docsContext, output);
|
const c = await ChatServices.getRelevantContext(docsContext, output);
|
||||||
return `${systemPrompt} \n ${c ? 'Use this context to answer:\n' + c : 'answer just with user question.'}`;
|
return `${systemPrompt} \n ${c ? 'Use this context to answer:\n' + c : 'answer just with user question.'}`;
|
||||||
|
}
|
||||||
},
|
|
||||||
},
|
},
|
||||||
combineDocumentsPrompt,
|
combineDocumentsPrompt,
|
||||||
model,
|
model,
|
||||||
|
@ -405,7 +390,7 @@ export class ChatServices {
|
||||||
chat_history: async () => {
|
chat_history: async () => {
|
||||||
const { chat_history } = await memory.loadMemoryVariables({});
|
const { chat_history } = await memory.loadMemoryVariables({});
|
||||||
return chat_history;
|
return chat_history;
|
||||||
},
|
}
|
||||||
},
|
},
|
||||||
questionGeneratorTemplate,
|
questionGeneratorTemplate,
|
||||||
modelWithTools,
|
modelWithTools,
|
||||||
|
@ -416,45 +401,36 @@ export class ChatServices {
|
||||||
let result, sources;
|
let result, sources;
|
||||||
let text, file, page;
|
let text, file, page;
|
||||||
|
|
||||||
|
// Choose the operation mode of answer generation, based on
|
||||||
// Choose the operation mode of answer generation, based on
|
|
||||||
// .gbot switch LLMMode and choose the corresponding chain.
|
// .gbot switch LLMMode and choose the corresponding chain.
|
||||||
|
|
||||||
if (LLMMode === "direct") {
|
if (LLMMode === 'direct') {
|
||||||
result = await (tools.length > 0 ? modelWithTools : model).invoke(`
|
result = await (tools.length > 0 ? modelWithTools : model).invoke(`
|
||||||
${systemPrompt}
|
${systemPrompt}
|
||||||
|
|
||||||
${question}`);
|
${question}`);
|
||||||
|
|
||||||
result = result.content;
|
result = result.content;
|
||||||
}
|
} else if (LLMMode === 'document') {
|
||||||
else if (LLMMode === "document") {
|
|
||||||
|
|
||||||
const res = await combineDocumentsChain.invoke(question);
|
const res = await combineDocumentsChain.invoke(question);
|
||||||
result = res.text;
|
result = res.text;
|
||||||
sources = res.sources;
|
sources = res.sources;
|
||||||
|
} else if (LLMMode === 'function') {
|
||||||
} else if (LLMMode === "function") {
|
|
||||||
|
|
||||||
result = await conversationalToolChain.invoke({
|
result = await conversationalToolChain.invoke({
|
||||||
question,
|
question
|
||||||
});
|
});
|
||||||
}
|
} else if (LLMMode === 'full') {
|
||||||
else if (LLMMode === "full") {
|
|
||||||
|
|
||||||
throw new Error('Not implemented.'); // TODO: #407.
|
throw new Error('Not implemented.'); // TODO: #407.
|
||||||
}
|
} else {
|
||||||
|
|
||||||
else {
|
|
||||||
GBLogEx.info(min, `Invalid Answer Mode in Config.xlsx: ${LLMMode}.`);
|
GBLogEx.info(min, `Invalid Answer Mode in Config.xlsx: ${LLMMode}.`);
|
||||||
}
|
}
|
||||||
|
|
||||||
await memory.saveContext(
|
await memory.saveContext(
|
||||||
{
|
{
|
||||||
input: question,
|
input: question
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
output: result.replace(/\!\[.*\)/gi, '') // Removes .MD url beforing adding to history.
|
output: result?result.replace(/\!\[.*\)/gi, ''): 'no answer' // Removes .MD url beforing adding to history.
|
||||||
}
|
}
|
||||||
);
|
);
|
||||||
|
|
||||||
|
@ -464,40 +440,34 @@ export class ChatServices {
|
||||||
|
|
||||||
private static getToolsAsText(tools) {
|
private static getToolsAsText(tools) {
|
||||||
return Object.keys(tools)
|
return Object.keys(tools)
|
||||||
.map((toolname) => `- ${tools[toolname].name}: ${tools[toolname].description}`)
|
.map(toolname => `- ${tools[toolname].name}: ${tools[toolname].description}`)
|
||||||
.join("\n");
|
.join('\n');
|
||||||
}
|
}
|
||||||
|
|
||||||
private static async getTools(min: GBMinInstance) {
|
private static async getTools(min: GBMinInstance) {
|
||||||
let functions = [];
|
let functions = [];
|
||||||
|
|
||||||
// Adds .gbdialog as functions if any to GPT Functions.
|
// Adds .gbdialog as functions if any to GPT Functions.
|
||||||
await CollectionUtil.asyncForEach(Object.keys(min.scriptMap), async (script) => {
|
await CollectionUtil.asyncForEach(Object.keys(min.scriptMap), async script => {
|
||||||
|
const path = DialogKeywords.getGBAIPath(min.botId, 'gbdialog', null);
|
||||||
|
|
||||||
const path = DialogKeywords.getGBAIPath(min.botId, "gbdialog", null);
|
|
||||||
const jsonFile = Path.join('work', path, `${script}.json`);
|
const jsonFile = Path.join('work', path, `${script}.json`);
|
||||||
|
|
||||||
if (Fs.existsSync(jsonFile) && script.toLowerCase() !== 'start.vbs') {
|
if (Fs.existsSync(jsonFile) && script.toLowerCase() !== 'start.vbs') {
|
||||||
|
|
||||||
const funcJSON = JSON.parse(Fs.readFileSync(jsonFile, 'utf8'));
|
const funcJSON = JSON.parse(Fs.readFileSync(jsonFile, 'utf8'));
|
||||||
const funcObj = funcJSON?.function;
|
const funcObj = funcJSON?.function;
|
||||||
|
|
||||||
if (funcObj) {
|
if (funcObj) {
|
||||||
|
|
||||||
// TODO: Use ajv.
|
// TODO: Use ajv.
|
||||||
funcObj.schema = eval(jsonSchemaToZod(funcObj.parameters));
|
funcObj.schema = eval(jsonSchemaToZod(funcObj.parameters));
|
||||||
functions.push(new DynamicStructuredTool(funcObj));
|
functions.push(new DynamicStructuredTool(funcObj));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
});
|
});
|
||||||
|
|
||||||
if (process.env.WIKIPEDIA_TOOL) {
|
if (process.env.WIKIPEDIA_TOOL) {
|
||||||
|
|
||||||
const tool = new WikipediaQueryRun({
|
const tool = new WikipediaQueryRun({
|
||||||
topKResults: 3,
|
topKResults: 3,
|
||||||
maxDocContentLength: 4000,
|
maxDocContentLength: 4000
|
||||||
});
|
});
|
||||||
functions.push(tool);
|
functions.push(tool);
|
||||||
}
|
}
|
||||||
|
|
|
@ -48,8 +48,11 @@ import { DocxLoader } from 'langchain/document_loaders/fs/docx';
|
||||||
import { EPubLoader } from 'langchain/document_loaders/fs/epub';
|
import { EPubLoader } from 'langchain/document_loaders/fs/epub';
|
||||||
import { CSVLoader } from 'langchain/document_loaders/fs/csv';
|
import { CSVLoader } from 'langchain/document_loaders/fs/csv';
|
||||||
import path from 'path';
|
import path from 'path';
|
||||||
|
import puppeteer, { Page } from 'puppeteer';
|
||||||
import { RecursiveCharacterTextSplitter } from 'langchain/text_splitter';
|
import { RecursiveCharacterTextSplitter } from 'langchain/text_splitter';
|
||||||
import { Document } from 'langchain/document';
|
import { Document } from 'langchain/document';
|
||||||
|
import getColors from 'get-image-colors';
|
||||||
|
|
||||||
|
|
||||||
import {
|
import {
|
||||||
GBDialogStep,
|
GBDialogStep,
|
||||||
|
@ -81,7 +84,6 @@ import { DialogKeywords } from '../../basic.gblib/services/DialogKeywords.js';
|
||||||
import { GBMinService } from '../../core.gbapp/services/GBMinService.js';
|
import { GBMinService } from '../../core.gbapp/services/GBMinService.js';
|
||||||
import { ChatServices } from '../../gpt.gblib/services/ChatServices.js';
|
import { ChatServices } from '../../gpt.gblib/services/ChatServices.js';
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Result for quey on KB data.
|
* Result for quey on KB data.
|
||||||
*/
|
*/
|
||||||
|
@ -271,7 +273,7 @@ export class KBService implements IGBKBService {
|
||||||
min: GBMinInstance,
|
min: GBMinInstance,
|
||||||
user,
|
user,
|
||||||
step,
|
step,
|
||||||
pid,
|
pid,
|
||||||
query: string,
|
query: string,
|
||||||
searchScore: number,
|
searchScore: number,
|
||||||
subjects: GuaribasSubject[]
|
subjects: GuaribasSubject[]
|
||||||
|
@ -293,13 +295,9 @@ export class KBService implements IGBKBService {
|
||||||
'Default Content Language',
|
'Default Content Language',
|
||||||
GBConfigService.get('DEFAULT_CONTENT_LANGUAGE')
|
GBConfigService.get('DEFAULT_CONTENT_LANGUAGE')
|
||||||
);
|
);
|
||||||
|
|
||||||
query = await min.conversationalService.translate(
|
query = await min.conversationalService.translate(min, query, contentLocale);
|
||||||
min,
|
|
||||||
query,
|
|
||||||
contentLocale
|
|
||||||
);
|
|
||||||
|
|
||||||
GBLogEx.info(min, `Translated query (prompt): ${query}.`);
|
GBLogEx.info(min, `Translated query (prompt): ${query}.`);
|
||||||
|
|
||||||
// Try simple search first.
|
// Try simple search first.
|
||||||
|
@ -317,10 +315,8 @@ export class KBService implements IGBKBService {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
let returnedScore = 0;
|
let returnedScore = 0;
|
||||||
const key = instance.searchKey ? instance.searchKey :
|
const key = instance.searchKey ? instance.searchKey : GBServer.globals.minBoot.instance.searchKey;
|
||||||
GBServer.globals.minBoot.instance.searchKey;
|
const host = instance.searchHost ? instance.searchHost : GBServer.globals.minBoot.instance.searchHost;
|
||||||
const host = instance.searchHost ? instance.searchHost :
|
|
||||||
GBServer.globals.minBoot.instance.searchHost;
|
|
||||||
|
|
||||||
// No direct match found, so Search is used.
|
// No direct match found, so Search is used.
|
||||||
|
|
||||||
|
@ -348,8 +344,6 @@ export class KBService implements IGBKBService {
|
||||||
top: 1
|
top: 1
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
// Searches via Search (Azure Search).
|
// Searches via Search (Azure Search).
|
||||||
|
|
||||||
let found = false;
|
let found = false;
|
||||||
|
@ -359,11 +353,15 @@ export class KBService implements IGBKBService {
|
||||||
if (returnedScore >= searchScore) {
|
if (returnedScore >= searchScore) {
|
||||||
const value = await this.getAnswerById(instance.instanceId, result.document.answerId);
|
const value = await this.getAnswerById(instance.instanceId, result.document.answerId);
|
||||||
if (value !== null) {
|
if (value !== null) {
|
||||||
GBLogEx.info(min, `SEARCH WILL BE USED with score: ${returnedScore} > required (searchScore): ${searchScore}`);
|
GBLogEx.info(
|
||||||
|
min,
|
||||||
|
`SEARCH WILL BE USED with score: ${returnedScore} > required (searchScore): ${searchScore}`
|
||||||
|
);
|
||||||
|
|
||||||
return { answer: value, questionId: result.document.questionId };
|
return { answer: value, questionId: result.document.questionId };
|
||||||
} else {
|
} else {
|
||||||
GBLogEx.info(min,
|
GBLogEx.info(
|
||||||
|
min,
|
||||||
`Index problem. SEARCH WILL NOT be used as answerId ${result.document.answerId} was not found in database,
|
`Index problem. SEARCH WILL NOT be used as answerId ${result.document.answerId} was not found in database,
|
||||||
returnedScore: ${returnedScore} < required (searchScore): ${searchScore}`
|
returnedScore: ${returnedScore} < required (searchScore): ${searchScore}`
|
||||||
);
|
);
|
||||||
|
@ -373,17 +371,13 @@ export class KBService implements IGBKBService {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
GBLogEx.info(min,
|
GBLogEx.info(
|
||||||
|
min,
|
||||||
`SEARCH returned LOW level score, calling NLP if any,
|
`SEARCH returned LOW level score, calling NLP if any,
|
||||||
returnedScore: ${returnedScore} < required (searchScore): ${searchScore}`
|
returnedScore: ${returnedScore} < required (searchScore): ${searchScore}`
|
||||||
);
|
);
|
||||||
|
|
||||||
return await ChatServices.answerByGPT(min, user, pid,
|
return await ChatServices.answerByGPT(min, user, pid, query, searchScore, subjects);
|
||||||
query,
|
|
||||||
searchScore,
|
|
||||||
subjects
|
|
||||||
);
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public async getSubjectItems(instanceId: number, parentId: number): Promise<GuaribasSubject[]> {
|
public async getSubjectItems(instanceId: number, parentId: number): Promise<GuaribasSubject[]> {
|
||||||
|
@ -626,7 +620,7 @@ export class KBService implements IGBKBService {
|
||||||
}
|
}
|
||||||
|
|
||||||
public async sendAnswer(min: GBMinInstance, channel: string, step: GBDialogStep, answer) {
|
public async sendAnswer(min: GBMinInstance, channel: string, step: GBDialogStep, answer) {
|
||||||
answer = typeof (answer) === 'string' ? answer : answer.content;
|
answer = typeof answer === 'string' ? answer : answer.content;
|
||||||
if (answer.endsWith('.mp4')) {
|
if (answer.endsWith('.mp4')) {
|
||||||
await this.playVideo(min, min.conversationalService, step, answer, channel);
|
await this.playVideo(min, min.conversationalService, step, answer, channel);
|
||||||
} else if (
|
} else if (
|
||||||
|
@ -646,14 +640,11 @@ export class KBService implements IGBKBService {
|
||||||
const url = urlJoin('kb', path, 'assets', answer);
|
const url = urlJoin('kb', path, 'assets', answer);
|
||||||
await this.playUrl(min, min.conversationalService, step, url, channel);
|
await this.playUrl(min, min.conversationalService, step, url, channel);
|
||||||
} else if (answer.format === '.md') {
|
} else if (answer.format === '.md') {
|
||||||
await min.conversationalService['playMarkdown'](min, answer, channel, step,
|
await min.conversationalService['playMarkdown'](min, answer, channel, step, GBMinService.userMobile(step));
|
||||||
GBMinService.userMobile(step));
|
|
||||||
} else if (answer.endsWith('.ogg') && process.env.AUDIO_DISABLED !== 'true') {
|
} else if (answer.endsWith('.ogg') && process.env.AUDIO_DISABLED !== 'true') {
|
||||||
await this.playAudio(min, answer, channel, step, min.conversationalService);
|
await this.playAudio(min, answer, channel, step, min.conversationalService);
|
||||||
} else {
|
} else {
|
||||||
|
|
||||||
await min.conversationalService.sendText(min, step, answer);
|
await min.conversationalService.sendText(min, step, answer);
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -685,7 +676,6 @@ export class KBService implements IGBKBService {
|
||||||
const a = await GuaribasAnswer.create(answer);
|
const a = await GuaribasAnswer.create(answer);
|
||||||
question['answerId'] = a.answerId;
|
question['answerId'] = a.answerId;
|
||||||
const q = await GuaribasQuestion.create(question);
|
const q = await GuaribasQuestion.create(question);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public async importKbPackage(
|
public async importKbPackage(
|
||||||
|
@ -747,7 +737,7 @@ export class KBService implements IGBKBService {
|
||||||
const localName = Path.join('work', path, 'articles', file.name);
|
const localName = Path.join('work', path, 'articles', file.name);
|
||||||
let loader = new DocxLoader(localName);
|
let loader = new DocxLoader(localName);
|
||||||
let doc = await loader.load();
|
let doc = await loader.load();
|
||||||
|
|
||||||
const answer = {
|
const answer = {
|
||||||
instanceId: instance.instanceId,
|
instanceId: instance.instanceId,
|
||||||
content: doc[0].pageContent,
|
content: doc[0].pageContent,
|
||||||
|
@ -758,10 +748,6 @@ export class KBService implements IGBKBService {
|
||||||
};
|
};
|
||||||
|
|
||||||
data.answers.push(answer);
|
data.answers.push(answer);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
} else if (file !== null && file.name.endsWith('.toc.docx')) {
|
} else if (file !== null && file.name.endsWith('.toc.docx')) {
|
||||||
const path = DialogKeywords.getGBAIPath(instance.botId, `gbkb`);
|
const path = DialogKeywords.getGBAIPath(instance.botId, `gbkb`);
|
||||||
const localName = Path.join('work', path, 'articles', file.name);
|
const localName = Path.join('work', path, 'articles', file.name);
|
||||||
|
@ -866,6 +852,103 @@ export class KBService implements IGBKBService {
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async saveHtmlPage(min, url: string, page: Page): Promise<string | null> {
|
||||||
|
const response = await page.goto(url);
|
||||||
|
|
||||||
|
|
||||||
|
if (response.headers && response.status() === 200) {
|
||||||
|
const contentType = response.headers()['content-type'];
|
||||||
|
if (contentType && contentType.includes('text/html')) {
|
||||||
|
const buffer = await response.buffer();
|
||||||
|
const urlObj = new URL(url);
|
||||||
|
const urlPath = urlObj.pathname.endsWith('/') ? urlObj.pathname.slice(0, -1) : urlObj.pathname; // Remove trailing slash if present
|
||||||
|
let filename = urlPath.split('/').pop() || 'index'; // Get the filename from the URL path or set it to 'index.html' as default
|
||||||
|
filename = `${filename}.html`;
|
||||||
|
let path = DialogKeywords.getGBAIPath(min.botId, `gbot`);
|
||||||
|
const directoryPath = Path.join(process.env.PWD, 'work', path, 'Website');
|
||||||
|
const filePath = Path.join(directoryPath, filename);
|
||||||
|
|
||||||
|
GBLogEx.info(min, `[GBDeployer] Saving Website file in ${filePath}.`);
|
||||||
|
|
||||||
|
Fs.mkdirSync(directoryPath, { recursive: true }); // Create directory recursively if it doesn't exist
|
||||||
|
Fs.writeFileSync(filePath, buffer);
|
||||||
|
return filePath;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
async crawl(min, url: string, visited: Set<string>, depth: number, maxDepth: number, page: Page): Promise<string[]> {
|
||||||
|
try {
|
||||||
|
if (
|
||||||
|
depth > maxDepth ||
|
||||||
|
(visited.has(url) ||
|
||||||
|
url.endsWith('.jpg') ||
|
||||||
|
url.endsWith('.pdf') ||
|
||||||
|
url.endsWith('.jpg') ||
|
||||||
|
url.endsWith('.png') ||
|
||||||
|
url.endsWith('.mp4'))
|
||||||
|
) {
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
|
||||||
|
await GBLogEx.info(min, `Processing URL: ${url}.`);
|
||||||
|
|
||||||
|
visited.add(url);
|
||||||
|
|
||||||
|
const filename = await this.saveHtmlPage(min, url, page);
|
||||||
|
|
||||||
|
if (!filename) {
|
||||||
|
// If the URL doesn't represent an HTML page, skip crawling its links
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
const currentDomain = new URL(page.url()).hostname;
|
||||||
|
let links = await page.evaluate(currentDomain => {
|
||||||
|
const anchors = Array.from(document.querySelectorAll('a')).filter(p => {
|
||||||
|
try {
|
||||||
|
return currentDomain == new URL(p.href).hostname;
|
||||||
|
} catch (err) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
return anchors.map(anchor => {
|
||||||
|
return anchor.href.replace(/#.*/, '');
|
||||||
|
});
|
||||||
|
}, currentDomain);
|
||||||
|
|
||||||
|
if (!Array.isArray(links)) {
|
||||||
|
links = [];
|
||||||
|
}
|
||||||
|
|
||||||
|
let filteredLinks = [];
|
||||||
|
|
||||||
|
if (links && typeof links[Symbol.iterator] === 'function') {
|
||||||
|
filteredLinks = links.filter(l => {
|
||||||
|
try {
|
||||||
|
new URL(l); // Check if the link is a valid URL
|
||||||
|
return !visited.has(l);
|
||||||
|
} catch (error) {
|
||||||
|
// Ignore invalid URLs
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
const childLinks = [];
|
||||||
|
for (const link of filteredLinks) {
|
||||||
|
const links = await this.crawl(min, link, visited, depth + 1, maxDepth, page);
|
||||||
|
if (links){
|
||||||
|
childLinks.push(...links);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return [filename, ...childLinks]; // Include the filename of the cached file
|
||||||
|
} catch (error) {
|
||||||
|
await GBLogEx.info(min, error);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Import all .docx files in reading comprehension folder.
|
* Import all .docx files in reading comprehension folder.
|
||||||
*/
|
*/
|
||||||
|
@ -875,11 +958,52 @@ export class KBService implements IGBKBService {
|
||||||
instance: IGBInstance,
|
instance: IGBInstance,
|
||||||
packageId: number
|
packageId: number
|
||||||
): Promise<any> {
|
): Promise<any> {
|
||||||
const files = await walkPromise(urlJoin(localPath, 'docs'));
|
let files = [];
|
||||||
|
|
||||||
|
const website = min.core.getParam<string>(min.instance, 'Website', null);
|
||||||
|
|
||||||
|
if (website) {
|
||||||
|
const browser = await puppeteer.launch({ headless: false });
|
||||||
|
const page = await browser.newPage();
|
||||||
|
const response = await page.goto(website);
|
||||||
|
|
||||||
|
await page.screenshot({ path: 'screenshot.png' });
|
||||||
|
|
||||||
|
// Extract dominant colors from the screenshot
|
||||||
|
const colors = await getColors('screenshot.png');
|
||||||
|
|
||||||
|
// Assuming you want the two most dominant colors
|
||||||
|
const mainColor1 = colors[0].hex();
|
||||||
|
const mainColor2 = colors[1].hex();
|
||||||
|
|
||||||
|
console.log('Main Color 1:', mainColor1);
|
||||||
|
console.log('Main Color 2:', mainColor2);
|
||||||
|
|
||||||
|
|
||||||
|
const maxDepth = 1; // Maximum depth of recursion
|
||||||
|
const visited = new Set<string>();
|
||||||
|
files = files.concat(await this.crawl(min, website, visited, 0, maxDepth, page));
|
||||||
|
|
||||||
|
await browser.close();
|
||||||
|
|
||||||
|
files.shift();
|
||||||
|
|
||||||
|
await CollectionUtil.asyncForEach(files, async file => {
|
||||||
|
let content = null;
|
||||||
|
|
||||||
|
const document = await this.loadAndSplitFile(file);
|
||||||
|
const flattenedDocuments = document.reduce((acc, val) => acc.concat(val), []);
|
||||||
|
const vectorStore = min['vectorStore'];
|
||||||
|
await vectorStore.addDocuments(flattenedDocuments);
|
||||||
|
await vectorStore.save(min['vectorStorePath']);
|
||||||
|
});
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
files = await walkPromise(urlJoin(localPath, 'docs'));
|
||||||
|
|
||||||
if (!files[0]) {
|
if (!files[0]) {
|
||||||
GBLogEx.info(min,
|
GBLogEx.info(min, `[GBDeployer] docs folder not created yet in .gbkb neither a website in .gbot.`);
|
||||||
`[GBDeployer] docs folder not created yet in .gbkb. To use Reading Comprehension, create this folder at root and put a document to get read by the.`
|
|
||||||
);
|
|
||||||
} else {
|
} else {
|
||||||
await CollectionUtil.asyncForEach(files, async file => {
|
await CollectionUtil.asyncForEach(files, async file => {
|
||||||
let content = null;
|
let content = null;
|
||||||
|
@ -894,211 +1018,217 @@ export class KBService implements IGBKBService {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
defaultRecursiveCharacterTextSplitter = new RecursiveCharacterTextSplitter({
|
defaultRecursiveCharacterTextSplitter = new RecursiveCharacterTextSplitter({
|
||||||
chunkSize: 700,
|
chunkSize: 700,
|
||||||
chunkOverlap: 50,
|
chunkOverlap: 50
|
||||||
});
|
});
|
||||||
|
|
||||||
markdownRecursiveCharacterTextSplitter = RecursiveCharacterTextSplitter.fromLanguage('markdown', {
|
markdownRecursiveCharacterTextSplitter = RecursiveCharacterTextSplitter.fromLanguage('markdown', {
|
||||||
chunkSize: 700,
|
chunkSize: 700,
|
||||||
chunkOverlap: 50,
|
chunkOverlap: 50
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|
||||||
private async loadAndSplitFile(filePath: string): Promise<Document<Record<string, unknown>>[]> {
|
private async loadAndSplitFile(filePath: string): Promise<Document<Record<string, unknown>>[]> {
|
||||||
const fileExtension = path.extname(filePath);
|
const fileExtension = path.extname(filePath);
|
||||||
let loader;
|
let loader;
|
||||||
let documents: Document<Record<string, unknown>>[];
|
let documents: Document<Record<string, unknown>>[];
|
||||||
switch (fileExtension) {
|
switch (fileExtension) {
|
||||||
case '.json':
|
case '.json':
|
||||||
loader = new JSONLoader(filePath);
|
loader = new JSONLoader(filePath);
|
||||||
documents = await loader.loadAndSplit(this.defaultRecursiveCharacterTextSplitter);
|
documents = await loader.loadAndSplit(this.defaultRecursiveCharacterTextSplitter);
|
||||||
break;
|
break;
|
||||||
case '.txt':
|
case '.txt':
|
||||||
loader = new TextLoader(filePath);
|
loader = new TextLoader(filePath);
|
||||||
documents = await loader.loadAndSplit(this.defaultRecursiveCharacterTextSplitter);
|
documents = await loader.loadAndSplit(this.defaultRecursiveCharacterTextSplitter);
|
||||||
break;
|
break;
|
||||||
case '.md':
|
case '.txt':
|
||||||
loader = new TextLoader(filePath);
|
loader = new TextLoader(filePath);
|
||||||
documents = await loader.loadAndSplit(this.markdownRecursiveCharacterTextSplitter);
|
documents = await loader.loadAndSplit(this.defaultRecursiveCharacterTextSplitter);
|
||||||
break;
|
break;
|
||||||
case '.pdf':
|
case '.html':
|
||||||
loader = new PDFLoader(filePath, { splitPages: false });
|
loader = new TextLoader(filePath);
|
||||||
documents = await loader.loadAndSplit(this.defaultRecursiveCharacterTextSplitter);
|
documents = await loader.loadAndSplit(this.defaultRecursiveCharacterTextSplitter);
|
||||||
break;
|
break;
|
||||||
case '.docx':
|
case '.md':
|
||||||
loader = new DocxLoader(filePath);
|
loader = new TextLoader(filePath);
|
||||||
documents = await loader.loadAndSplit(this.defaultRecursiveCharacterTextSplitter);
|
documents = await loader.loadAndSplit(this.markdownRecursiveCharacterTextSplitter);
|
||||||
break;
|
break;
|
||||||
case '.csv':
|
case '.pdf':
|
||||||
loader = new CSVLoader(filePath);
|
loader = new PDFLoader(filePath, { splitPages: false });
|
||||||
documents = await loader.loadAndSplit(this.defaultRecursiveCharacterTextSplitter);
|
documents = await loader.loadAndSplit(this.defaultRecursiveCharacterTextSplitter);
|
||||||
break;
|
break;
|
||||||
case '.epub':
|
case '.docx':
|
||||||
loader = new EPubLoader(filePath, { splitChapters: false });
|
loader = new DocxLoader(filePath);
|
||||||
documents = await loader.loadAndSplit(this.defaultRecursiveCharacterTextSplitter);
|
documents = await loader.loadAndSplit(this.defaultRecursiveCharacterTextSplitter);
|
||||||
break;
|
break;
|
||||||
default:
|
case '.csv':
|
||||||
throw new Error(`Unsupported file extension: ${fileExtension}`);
|
loader = new CSVLoader(filePath);
|
||||||
}
|
documents = await loader.loadAndSplit(this.defaultRecursiveCharacterTextSplitter);
|
||||||
return documents;
|
break;
|
||||||
}
|
case '.epub':
|
||||||
|
loader = new EPubLoader(filePath, { splitChapters: false });
|
||||||
|
documents = await loader.loadAndSplit(this.defaultRecursiveCharacterTextSplitter);
|
||||||
public async importKbTabularDirectory(localPath: string, min: GBMinInstance, packageId: number): Promise < any > {
|
break;
|
||||||
const files = await walkPromise(localPath);
|
default:
|
||||||
|
throw new Error(`Unsupported file extension: ${fileExtension}`);
|
||||||
await CollectionUtil.asyncForEach(files, async file => {
|
|
||||||
if (file !== null && file.name.endsWith('.xlsx')) {
|
|
||||||
return await this.importKbTabularFile(urlJoin(file.root, file.name), min, packageId);
|
|
||||||
}
|
}
|
||||||
});
|
return documents;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public async importKbTabularDirectory(localPath: string, min: GBMinInstance, packageId: number): Promise<any> {
|
||||||
|
const files = await walkPromise(localPath);
|
||||||
|
|
||||||
|
await CollectionUtil.asyncForEach(files, async file => {
|
||||||
|
if (file !== null && file.name.endsWith('.xlsx')) {
|
||||||
|
return await this.importKbTabularFile(urlJoin(file.root, file.name), min, packageId);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
public async importSubjectFile(
|
public async importSubjectFile(
|
||||||
packageId: number,
|
packageId: number,
|
||||||
filename: string,
|
filename: string,
|
||||||
menuFile: string,
|
menuFile: string,
|
||||||
instance: IGBInstance
|
instance: IGBInstance
|
||||||
): Promise < any > {
|
): Promise<any> {
|
||||||
let subjectsLoaded;
|
let subjectsLoaded;
|
||||||
if(menuFile) {
|
if (menuFile) {
|
||||||
// Loads menu.xlsx and finds worksheet.
|
// Loads menu.xlsx and finds worksheet.
|
||||||
|
|
||||||
const workbook = new Excel.Workbook();
|
const workbook = new Excel.Workbook();
|
||||||
const data = await workbook.xlsx.readFile(menuFile);
|
const data = await workbook.xlsx.readFile(menuFile);
|
||||||
let worksheet: any;
|
let worksheet: any;
|
||||||
for (let t = 0; t < data.worksheets.length; t++) {
|
for (let t = 0; t < data.worksheets.length; t++) {
|
||||||
worksheet = data.worksheets[t];
|
worksheet = data.worksheets[t];
|
||||||
if (worksheet) {
|
if (worksheet) {
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
const MAX_LEVEL = 4; // Max column level to reach menu items in plan.
|
|
||||||
// Iterates over all items.
|
|
||||||
|
|
||||||
let rows = worksheet._rows;
|
|
||||||
rows.length = 24;
|
|
||||||
let lastLevel = 0;
|
|
||||||
let subjects = { children: [] };
|
|
||||||
let childrenNode = subjects.children;
|
|
||||||
let activeObj = null;
|
|
||||||
|
|
||||||
let activeChildrenGivenLevel = [childrenNode];
|
|
||||||
|
|
||||||
await asyncPromise.eachSeries(rows, async row => {
|
|
||||||
if (!row) return;
|
|
||||||
let menu;
|
|
||||||
|
|
||||||
// Detect menu level by skipping blank cells on left.
|
|
||||||
|
|
||||||
let level;
|
|
||||||
for (level = 0; level < MAX_LEVEL; level++) {
|
|
||||||
const cell = row._cells[level];
|
|
||||||
if (cell && cell.text) {
|
|
||||||
menu = cell.text;
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Tree hierarchy calculation.
|
const MAX_LEVEL = 4; // Max column level to reach menu items in plan.
|
||||||
|
// Iterates over all items.
|
||||||
|
|
||||||
if (level > lastLevel) {
|
let rows = worksheet._rows;
|
||||||
childrenNode = activeObj.children;
|
rows.length = 24;
|
||||||
} else if (level < lastLevel) {
|
let lastLevel = 0;
|
||||||
childrenNode = activeChildrenGivenLevel[level];
|
let subjects = { children: [] };
|
||||||
}
|
let childrenNode = subjects.children;
|
||||||
|
let activeObj = null;
|
||||||
|
|
||||||
/// Keeps the record of last subroots for each level, to
|
let activeChildrenGivenLevel = [childrenNode];
|
||||||
// changel levels greater than one (return to main menu),
|
|
||||||
// can exists between leaf nodes and roots.
|
|
||||||
|
|
||||||
activeChildrenGivenLevel[level] = childrenNode;
|
await asyncPromise.eachSeries(rows, async row => {
|
||||||
|
if (!row) return;
|
||||||
|
let menu;
|
||||||
|
|
||||||
// Insert the object into JSON.
|
// Detect menu level by skipping blank cells on left.
|
||||||
const description = row._cells[level + 1] ? row._cells[level + 1].text : null;
|
|
||||||
activeObj = {
|
|
||||||
title: menu,
|
|
||||||
description: description,
|
|
||||||
id: menu,
|
|
||||||
children: []
|
|
||||||
};
|
|
||||||
activeChildrenGivenLevel[level].push(activeObj);
|
|
||||||
|
|
||||||
lastLevel = level;
|
let level;
|
||||||
});
|
for (level = 0; level < MAX_LEVEL; level++) {
|
||||||
|
const cell = row._cells[level];
|
||||||
|
if (cell && cell.text) {
|
||||||
|
menu = cell.text;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
subjectsLoaded = subjects;
|
// Tree hierarchy calculation.
|
||||||
} else {
|
|
||||||
subjectsLoaded = JSON.parse(Fs.readFileSync(filename, 'utf8'));
|
|
||||||
}
|
|
||||||
|
|
||||||
const doIt = async (subjects: GuaribasSubject[], parentSubjectId: number) => {
|
if (level > lastLevel) {
|
||||||
return asyncPromise.eachSeries(subjects, async item => {
|
childrenNode = activeObj.children;
|
||||||
const value = await GuaribasSubject.create(<GuaribasSubject>{
|
} else if (level < lastLevel) {
|
||||||
internalId: item.id,
|
childrenNode = activeChildrenGivenLevel[level];
|
||||||
parentSubjectId: parentSubjectId,
|
}
|
||||||
instanceId: instance.instanceId,
|
|
||||||
from: item.from,
|
/// Keeps the record of last subroots for each level, to
|
||||||
to: item.to,
|
// changel levels greater than one (return to main menu),
|
||||||
title: item.title,
|
// can exists between leaf nodes and roots.
|
||||||
description: item.description,
|
|
||||||
packageId: packageId
|
activeChildrenGivenLevel[level] = childrenNode;
|
||||||
|
|
||||||
|
// Insert the object into JSON.
|
||||||
|
const description = row._cells[level + 1] ? row._cells[level + 1].text : null;
|
||||||
|
activeObj = {
|
||||||
|
title: menu,
|
||||||
|
description: description,
|
||||||
|
id: menu,
|
||||||
|
children: []
|
||||||
|
};
|
||||||
|
activeChildrenGivenLevel[level].push(activeObj);
|
||||||
|
|
||||||
|
lastLevel = level;
|
||||||
});
|
});
|
||||||
|
|
||||||
if (item.children) {
|
subjectsLoaded = subjects;
|
||||||
return doIt(item.children, value.subjectId);
|
} else {
|
||||||
} else {
|
subjectsLoaded = JSON.parse(Fs.readFileSync(filename, 'utf8'));
|
||||||
return item;
|
}
|
||||||
}
|
|
||||||
});
|
|
||||||
};
|
|
||||||
|
|
||||||
return doIt(subjectsLoaded.children, undefined);
|
const doIt = async (subjects: GuaribasSubject[], parentSubjectId: number) => {
|
||||||
}
|
return asyncPromise.eachSeries(subjects, async item => {
|
||||||
|
const value = await GuaribasSubject.create(<GuaribasSubject>{
|
||||||
|
internalId: item.id,
|
||||||
|
parentSubjectId: parentSubjectId,
|
||||||
|
instanceId: instance.instanceId,
|
||||||
|
from: item.from,
|
||||||
|
to: item.to,
|
||||||
|
title: item.title,
|
||||||
|
description: item.description,
|
||||||
|
packageId: packageId
|
||||||
|
});
|
||||||
|
|
||||||
|
if (item.children) {
|
||||||
|
return doIt(item.children, value.subjectId);
|
||||||
|
} else {
|
||||||
|
return item;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
};
|
||||||
|
|
||||||
|
return doIt(subjectsLoaded.children, undefined);
|
||||||
|
}
|
||||||
|
|
||||||
public async undeployKbFromStorage(instance: IGBInstance, deployer: GBDeployer, packageId: number) {
|
public async undeployKbFromStorage(instance: IGBInstance, deployer: GBDeployer, packageId: number) {
|
||||||
await GuaribasQuestion.destroy({
|
await GuaribasQuestion.destroy({
|
||||||
where: { instanceId: instance.instanceId, packageId: packageId }
|
where: { instanceId: instance.instanceId, packageId: packageId }
|
||||||
});
|
});
|
||||||
await GuaribasAnswer.destroy({
|
await GuaribasAnswer.destroy({
|
||||||
where: { instanceId: instance.instanceId, packageId: packageId }
|
where: { instanceId: instance.instanceId, packageId: packageId }
|
||||||
});
|
});
|
||||||
await GuaribasSubject.destroy({
|
await GuaribasSubject.destroy({
|
||||||
where: { instanceId: instance.instanceId, packageId: packageId }
|
where: { instanceId: instance.instanceId, packageId: packageId }
|
||||||
});
|
});
|
||||||
await this.undeployPackageFromStorage(instance, packageId);
|
await this.undeployPackageFromStorage(instance, packageId);
|
||||||
}
|
}
|
||||||
|
|
||||||
public static async RefreshNER(min: GBMinInstance) {
|
public static async RefreshNER(min: GBMinInstance) {
|
||||||
const questions = await KBService.getQuestionsNER(min.instance.instanceId);
|
const questions = await KBService.getQuestionsNER(min.instance.instanceId);
|
||||||
const contentLocale = min.core.getParam<string>(
|
const contentLocale = min.core.getParam<string>(
|
||||||
min.instance,
|
min.instance,
|
||||||
'Default Content Language',
|
'Default Content Language',
|
||||||
GBConfigService.get('DEFAULT_CONTENT_LANGUAGE')
|
GBConfigService.get('DEFAULT_CONTENT_LANGUAGE')
|
||||||
);
|
);
|
||||||
|
|
||||||
await CollectionUtil.asyncForEach(questions, async question => {
|
await CollectionUtil.asyncForEach(questions, async question => {
|
||||||
const text = question.content;
|
const text = question.content;
|
||||||
|
|
||||||
const categoryReg = /.*\((.*)\).*/gi.exec(text);
|
const categoryReg = /.*\((.*)\).*/gi.exec(text);
|
||||||
const nameReg = /(\w+)\(.*\).*/gi.exec(text);
|
const nameReg = /(\w+)\(.*\).*/gi.exec(text);
|
||||||
|
|
||||||
if (categoryReg) {
|
if (categoryReg) {
|
||||||
let category = categoryReg[1];
|
let category = categoryReg[1];
|
||||||
|
|
||||||
if (category === 'number') {
|
if (category === 'number') {
|
||||||
min['nerEngine'].addRegexEntity('number', 'pt', '/d+/gi');
|
min['nerEngine'].addRegexEntity('number', 'pt', '/d+/gi');
|
||||||
|
}
|
||||||
|
if (nameReg) {
|
||||||
|
let name = nameReg[1];
|
||||||
|
|
||||||
|
min['nerEngine'].addNamedEntityText(category, name, [contentLocale], [name]);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if (nameReg) {
|
});
|
||||||
let name = nameReg[1];
|
}
|
||||||
|
|
||||||
min['nerEngine'].addNamedEntityText(category, name, [contentLocale], [name]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Deploys a knowledge base to the storage using the .gbkb format.
|
* Deploys a knowledge base to the storage using the .gbkb format.
|
||||||
|
@ -1106,90 +1236,90 @@ export class KBService implements IGBKBService {
|
||||||
* @param localPath Path to the .gbkb folder.
|
* @param localPath Path to the .gbkb folder.
|
||||||
*/
|
*/
|
||||||
public async deployKb(core: IGBCoreService, deployer: GBDeployer, localPath: string, min: GBMinInstance) {
|
public async deployKb(core: IGBCoreService, deployer: GBDeployer, localPath: string, min: GBMinInstance) {
|
||||||
const packageName = Path.basename(localPath);
|
const packageName = Path.basename(localPath);
|
||||||
const instance = await core.loadInstanceByBotId(min.botId);
|
const instance = await core.loadInstanceByBotId(min.botId);
|
||||||
GBLogEx.info(min, `[GBDeployer] Importing: ${localPath}`);
|
GBLogEx.info(min, `[GBDeployer] Importing: ${localPath}`);
|
||||||
|
|
||||||
const p = await deployer.deployPackageToStorage(instance.instanceId, packageName);
|
const p = await deployer.deployPackageToStorage(instance.instanceId, packageName);
|
||||||
await this.importKbPackage(min, localPath, p, instance);
|
await this.importKbPackage(min, localPath, p, instance);
|
||||||
GBDeployer.mountGBKBAssets(packageName, min.botId, localPath);
|
GBDeployer.mountGBKBAssets(packageName, min.botId, localPath);
|
||||||
const service = await AzureDeployerService.createInstance(deployer);
|
const service = await AzureDeployerService.createInstance(deployer);
|
||||||
const searchIndex = instance.searchIndex ? instance.searchIndex : GBServer.globals.minBoot.instance.searchIndex;
|
const searchIndex = instance.searchIndex ? instance.searchIndex : GBServer.globals.minBoot.instance.searchIndex;
|
||||||
await deployer.rebuildIndex(instance, service.getKBSearchSchema(searchIndex));
|
await deployer.rebuildIndex(instance, service.getKBSearchSchema(searchIndex));
|
||||||
|
|
||||||
min['groupCache'] = await KBService.getGroupReplies(instance.instanceId);
|
min['groupCache'] = await KBService.getGroupReplies(instance.instanceId);
|
||||||
await KBService.RefreshNER(min);
|
await KBService.RefreshNER(min);
|
||||||
|
|
||||||
GBLogEx.info(min, `[GBDeployer] Start Bot Server Side Rendering... ${localPath}`);
|
GBLogEx.info(min, `[GBDeployer] Start Bot Server Side Rendering... ${localPath}`);
|
||||||
const html = await GBSSR.getHTML(min);
|
const html = await GBSSR.getHTML(min);
|
||||||
let path = DialogKeywords.getGBAIPath(min.botId, `gbui`);
|
let path = DialogKeywords.getGBAIPath(min.botId, `gbui`);
|
||||||
path = Path.join(process.env.PWD, 'work', path, 'index.html');
|
path = Path.join(process.env.PWD, 'work', path, 'index.html');
|
||||||
GBLogEx.info(min, `[GBDeployer] Saving SSR HTML in ${path}.`);
|
GBLogEx.info(min, `[GBDeployer] Saving SSR HTML in ${path}.`);
|
||||||
Fs.writeFileSync(path, html, 'utf8');
|
Fs.writeFileSync(path, html, 'utf8');
|
||||||
|
|
||||||
GBLogEx.info(min, `[GBDeployer] Finished import of ${localPath}`);
|
GBLogEx.info(min, `[GBDeployer] Finished import of ${localPath}`);
|
||||||
}
|
}
|
||||||
|
|
||||||
private async playAudio(
|
private async playAudio(
|
||||||
min: GBMinInstance,
|
min: GBMinInstance,
|
||||||
answer: GuaribasAnswer,
|
answer: GuaribasAnswer,
|
||||||
channel: string,
|
channel: string,
|
||||||
step: GBDialogStep,
|
step: GBDialogStep,
|
||||||
conversationalService: IGBConversationalService
|
conversationalService: IGBConversationalService
|
||||||
) {
|
) {
|
||||||
conversationalService.sendAudio(min, step, answer.content);
|
conversationalService.sendAudio(min, step, answer.content);
|
||||||
}
|
}
|
||||||
|
|
||||||
private async playUrl(
|
private async playUrl(
|
||||||
min,
|
min,
|
||||||
conversationalService: IGBConversationalService,
|
conversationalService: IGBConversationalService,
|
||||||
step: GBDialogStep,
|
step: GBDialogStep,
|
||||||
url: string,
|
url: string,
|
||||||
channel: string
|
channel: string
|
||||||
) {
|
) {
|
||||||
if (channel === 'whatsapp') {
|
if (channel === 'whatsapp') {
|
||||||
await min.conversationalService.sendFile(min, step, null, url, '');
|
await min.conversationalService.sendFile(min, step, null, url, '');
|
||||||
} else {
|
} else {
|
||||||
await conversationalService.sendEvent(min, step, 'play', {
|
await conversationalService.sendEvent(min, step, 'play', {
|
||||||
playerType: 'url',
|
playerType: 'url',
|
||||||
data: url
|
data: url
|
||||||
});
|
});
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
private async playVideo(
|
private async playVideo(
|
||||||
min,
|
min,
|
||||||
conversationalService: IGBConversationalService,
|
conversationalService: IGBConversationalService,
|
||||||
step: GBDialogStep,
|
step: GBDialogStep,
|
||||||
answer: GuaribasAnswer,
|
answer: GuaribasAnswer,
|
||||||
channel: string
|
channel: string
|
||||||
) {
|
) {
|
||||||
if (channel === 'whatsapp') {
|
if (channel === 'whatsapp') {
|
||||||
await min.conversationalService.sendFile(min, step, null, answer.content, '');
|
await min.conversationalService.sendFile(min, step, null, answer.content, '');
|
||||||
} else {
|
} else {
|
||||||
const path = DialogKeywords.getGBAIPath(min.botId, `gbkb`);
|
const path = DialogKeywords.getGBAIPath(min.botId, `gbkb`);
|
||||||
await conversationalService.sendEvent(min, step, 'play', {
|
await conversationalService.sendEvent(min, step, 'play', {
|
||||||
playerType: 'video',
|
playerType: 'video',
|
||||||
data: urlJoin(path, 'videos', answer.content)
|
data: urlJoin(path, 'videos', answer.content)
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private async undeployPackageFromStorage(instance: any, packageId: number) {
|
||||||
|
await GuaribasPackage.destroy({
|
||||||
|
where: { instanceId: instance.instanceId, packageId: packageId }
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
private async getTextFromFile(filename: string) {
|
||||||
|
return new Promise<string>(async (resolve, reject) => {
|
||||||
|
textract.fromFileWithPath(filename, { preserveLineBreaks: true }, (error, text) => {
|
||||||
|
if (error) {
|
||||||
|
reject(error);
|
||||||
|
} else {
|
||||||
|
resolve(text);
|
||||||
|
}
|
||||||
|
});
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
private async undeployPackageFromStorage(instance: any, packageId: number) {
|
|
||||||
await GuaribasPackage.destroy({
|
|
||||||
where: { instanceId: instance.instanceId, packageId: packageId }
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
private async getTextFromFile(filename: string) {
|
|
||||||
return new Promise<string>(async (resolve, reject) => {
|
|
||||||
textract.fromFileWithPath(filename, { preserveLineBreaks: true }, (error, text) => {
|
|
||||||
if (error) {
|
|
||||||
reject(error);
|
|
||||||
} else {
|
|
||||||
resolve(text);
|
|
||||||
}
|
|
||||||
});
|
|
||||||
});
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Reference in a new issue