new(basic.gblib): GPT replacing ALLEN NLP Reading Comp.
This commit is contained in:
parent
4d8061db60
commit
ff6adacf9b
4 changed files with 375 additions and 259 deletions
|
@ -309,6 +309,21 @@ export class GBDeployer implements IGBDeployer {
|
||||||
return await this.core.saveInstance(instance);
|
return await this.core.saveInstance(instance);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public async loadOrCreateEmptyVectorStore(min: GBMinInstance): Promise<HNSWLib> {
|
||||||
|
let vectorStore: HNSWLib;
|
||||||
|
|
||||||
|
try {
|
||||||
|
vectorStore = await HNSWLib.load(min['vectorStorePath'], new OpenAIEmbeddings({ maxConcurrency: 5 }));
|
||||||
|
} catch {
|
||||||
|
vectorStore = new HNSWLib(new OpenAIEmbeddings({ maxConcurrency: 5 }), {
|
||||||
|
space: 'cosine',
|
||||||
|
numDimensions: 1536,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
return vectorStore;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Performs the NLP publishing process on remote service.
|
* Performs the NLP publishing process on remote service.
|
||||||
*/
|
*/
|
||||||
|
|
|
@ -312,6 +312,10 @@ export class GBMinService {
|
||||||
if (!Fs.existsSync(dir)) {
|
if (!Fs.existsSync(dir)) {
|
||||||
mkdirp.sync(dir);
|
mkdirp.sync(dir);
|
||||||
}
|
}
|
||||||
|
dir = `work/${gbai}/${botId}.gbkb/docs-vectorized`;
|
||||||
|
if (!Fs.existsSync(dir)) {
|
||||||
|
mkdirp.sync(dir);
|
||||||
|
}
|
||||||
dir = `work/${gbai}/${botId}.gbdialog`;
|
dir = `work/${gbai}/${botId}.gbdialog`;
|
||||||
if (!Fs.existsSync(dir)) {
|
if (!Fs.existsSync(dir)) {
|
||||||
mkdirp.sync(dir);
|
mkdirp.sync(dir);
|
||||||
|
@ -746,6 +750,9 @@ export class GBMinService {
|
||||||
min.sandBoxMap = {};
|
min.sandBoxMap = {};
|
||||||
min['scheduleMap'] = {};
|
min['scheduleMap'] = {};
|
||||||
min['conversationWelcomed'] = {};
|
min['conversationWelcomed'] = {};
|
||||||
|
min['vectorStore']= await this.deployer.loadOrCreateEmptyVectorStore(min);
|
||||||
|
const gbkbPath = DialogKeywords.getGBAIPath(min.botId, 'gbkb');
|
||||||
|
min['vectorStorePath']= Path.join('work', gbkbPath, 'docs-vectorized');
|
||||||
min.packages = sysPackages;
|
min.packages = sysPackages;
|
||||||
|
|
||||||
// NLP Manager.
|
// NLP Manager.
|
||||||
|
|
|
@ -31,7 +31,7 @@
|
||||||
'use strict';
|
'use strict';
|
||||||
|
|
||||||
import { GBMinInstance } from 'botlib';
|
import { GBMinInstance } from 'botlib';
|
||||||
//import OpenAI from "openai";
|
import OpenAI from "openai";
|
||||||
import { ChatGPTAPIBrowser, getOpenAIAuth } from 'chatgpt'
|
import { ChatGPTAPIBrowser, getOpenAIAuth } from 'chatgpt'
|
||||||
import { CollectionUtil } from 'pragmatismo-io-framework';
|
import { CollectionUtil } from 'pragmatismo-io-framework';
|
||||||
import { DialogKeywords } from '../../basic.gblib/services/DialogKeywords.js';
|
import { DialogKeywords } from '../../basic.gblib/services/DialogKeywords.js';
|
||||||
|
@ -69,15 +69,15 @@ export class ChatServices {
|
||||||
|
|
||||||
// Calls Model.
|
// Calls Model.
|
||||||
|
|
||||||
// const openai = new OpenAI({
|
const openai = new OpenAI({
|
||||||
// apiKey: key
|
apiKey: key
|
||||||
// });
|
});
|
||||||
// const chatCompletion = await openai.chat.completions.create({
|
const chatCompletion = await openai.chat.completions.create({
|
||||||
// model: "gpt-3.5-turbo",
|
model: "gpt-3.5-turbo",
|
||||||
// messages: [{ role: "user", content: text }],
|
messages: [{ role: "user", content: text }],
|
||||||
// functions: functions
|
functions: functions
|
||||||
// });
|
});
|
||||||
// return chatCompletion.choices[0].message.content;
|
return chatCompletion.choices[0].message.content;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -35,13 +35,24 @@
|
||||||
import Path from 'path';
|
import Path from 'path';
|
||||||
import Fs from 'fs';
|
import Fs from 'fs';
|
||||||
import urlJoin from 'url-join';
|
import urlJoin from 'url-join';
|
||||||
import path from 'path';
|
|
||||||
import asyncPromise from 'async-promises';
|
import asyncPromise from 'async-promises';
|
||||||
import walkPromise from 'walk-promise';
|
import walkPromise from 'walk-promise';
|
||||||
import { SearchClient } from '@azure/search-documents';
|
import { SearchClient } from '@azure/search-documents';
|
||||||
import Excel from 'exceljs';
|
import Excel from 'exceljs';
|
||||||
import getSlug from 'speakingurl';
|
import getSlug from 'speakingurl';
|
||||||
import { GBServer } from '../../../src/app.js';
|
import { GBServer } from '../../../src/app.js';
|
||||||
|
import { HNSWLib } from 'langchain/vectorstores/hnswlib';
|
||||||
|
import { JSONLoader } from 'langchain/document_loaders/fs/json';
|
||||||
|
import { TextLoader } from 'langchain/document_loaders/fs/text';
|
||||||
|
import { PDFLoader } from 'langchain/document_loaders/fs/pdf';
|
||||||
|
import { DocxLoader } from 'langchain/document_loaders/fs/docx';
|
||||||
|
import { EPubLoader } from 'langchain/document_loaders/fs/epub';
|
||||||
|
import { CSVLoader } from 'langchain/document_loaders/fs/csv';
|
||||||
|
import { RecursiveCharacterTextSplitter } from 'langchain/text_splitter';
|
||||||
|
import { Document } from 'langchain/document';
|
||||||
|
import path from 'path';
|
||||||
|
import { YoutubeTranscript } from 'youtube-transcript';
|
||||||
|
|
||||||
import {
|
import {
|
||||||
GBDialogStep,
|
GBDialogStep,
|
||||||
GBLog,
|
GBLog,
|
||||||
|
@ -257,7 +268,7 @@ export class KBService implements IGBKBService {
|
||||||
}
|
}
|
||||||
|
|
||||||
public async ask(
|
public async ask(
|
||||||
instance: IGBInstance,
|
min: GBMinInstance,
|
||||||
query: string,
|
query: string,
|
||||||
searchScore: number,
|
searchScore: number,
|
||||||
subjects: GuaribasSubject[]
|
subjects: GuaribasSubject[]
|
||||||
|
@ -272,6 +283,8 @@ export class KBService implements IGBKBService {
|
||||||
query = query.replace('\\', ' ');
|
query = query.replace('\\', ' ');
|
||||||
query = query.replace('\r\n', ' ');
|
query = query.replace('\r\n', ' ');
|
||||||
|
|
||||||
|
const instance = min.instance;
|
||||||
|
|
||||||
// Try simple search first.
|
// Try simple search first.
|
||||||
|
|
||||||
const data = await this.getAnswerByText(instance.instanceId, query.trim());
|
const data = await this.getAnswerByText(instance.instanceId, query.trim());
|
||||||
|
@ -341,12 +354,17 @@ export class KBService implements IGBKBService {
|
||||||
return { answer: undefined, questionId: 0 };
|
return { answer: undefined, questionId: 0 };
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
|
||||||
GBLog.info(
|
GBLog.info(
|
||||||
`SEARCH called but returned LOW level score,
|
`SEARCH called but returned LOW level score,
|
||||||
returnedScore: ${returnedScore} < required (searchScore): ${searchScore}`
|
returnedScore: ${returnedScore} < required (searchScore): ${searchScore}`
|
||||||
);
|
);
|
||||||
|
|
||||||
return { answer: undefined, questionId: 0 };
|
return await this.answerByGPT(min,
|
||||||
|
query,
|
||||||
|
searchScore,
|
||||||
|
subjects
|
||||||
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -354,6 +372,44 @@ export class KBService implements IGBKBService {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private async getRelevantContext(
|
||||||
|
vectorStore: HNSWLib,
|
||||||
|
sanitizedQuestion: string,
|
||||||
|
numDocuments: number
|
||||||
|
): Promise<string> {
|
||||||
|
const documents = await vectorStore.similaritySearch(sanitizedQuestion, numDocuments);
|
||||||
|
return documents
|
||||||
|
.map((doc) => doc.pageContent)
|
||||||
|
.join(', ')
|
||||||
|
.trim()
|
||||||
|
.replaceAll('\n', ' ');
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public async answerByGPT(min: GBMinInstance,
|
||||||
|
query: string,
|
||||||
|
searchScore: number,
|
||||||
|
subjects: GuaribasSubject[]
|
||||||
|
) {
|
||||||
|
const contextVectorStore = min['VectorStore'];
|
||||||
|
const question = query.trim().replaceAll('\n', ' ');
|
||||||
|
const context = await this.getRelevantContext(contextVectorStore, question, 1);
|
||||||
|
|
||||||
|
const response = await chain.call({
|
||||||
|
input: question,
|
||||||
|
context,
|
||||||
|
history: '',
|
||||||
|
immediate_history: '',
|
||||||
|
});
|
||||||
|
if (response) {
|
||||||
|
|
||||||
|
return { answer: response.response, questionId: 0 };
|
||||||
|
}
|
||||||
|
|
||||||
|
return { answer: undefined, questionId: 0 };
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
public async getSubjectItems(instanceId: number, parentId: number): Promise<GuaribasSubject[]> {
|
public async getSubjectItems(instanceId: number, parentId: number): Promise<GuaribasSubject[]> {
|
||||||
const where = { parentSubjectId: parentId, instanceId: instanceId };
|
const where = { parentSubjectId: parentId, instanceId: instanceId };
|
||||||
|
|
||||||
|
@ -830,29 +886,67 @@ export class KBService implements IGBKBService {
|
||||||
await CollectionUtil.asyncForEach(files, async file => {
|
await CollectionUtil.asyncForEach(files, async file => {
|
||||||
let content = null;
|
let content = null;
|
||||||
let filePath = Path.join(file.root, file.name);
|
let filePath = Path.join(file.root, file.name);
|
||||||
if (file !== null) {
|
|
||||||
if (file.name.endsWith('.docx')) {
|
const document = await this.loadAndSplitFile(filePath);
|
||||||
content = await this.getTextFromFile(filePath);
|
const flattenedDocuments = document.reduce((acc, val) => acc.concat(val), []);
|
||||||
} else if (file.name.endsWith('.pdf')) {
|
const vectorStore = min['vectorStore'];
|
||||||
const read = await pdf(Fs.readFileSync(filePath));
|
await vectorStore.addDocuments(flattenedDocuments);
|
||||||
content = read.text;
|
await vectorStore.save(min['vectorStorePath']);
|
||||||
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (content) {
|
defaultRecursiveCharacterTextSplitter = new RecursiveCharacterTextSplitter({
|
||||||
content = await min.conversationalService.translate(min, content, 'en');
|
chunkSize: 1024,
|
||||||
await GuaribasAnswer.create(<GuaribasAnswer>{
|
chunkOverlap: 1024,
|
||||||
instanceId: instance.instanceId,
|
|
||||||
content: content,
|
|
||||||
format: '.docx',
|
|
||||||
media: file.name,
|
|
||||||
packageId: packageId
|
|
||||||
});
|
});
|
||||||
}
|
|
||||||
|
markdownRecursiveCharacterTextSplitter = RecursiveCharacterTextSplitter.fromLanguage('markdown', {
|
||||||
|
chunkSize: 1024,
|
||||||
|
chunkOverlap: 1024,
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|
||||||
|
private async loadAndSplitFile(filePath: string): Promise<Document<Record<string, unknown>>[]> {
|
||||||
|
const fileExtension = path.extname(filePath);
|
||||||
|
let loader;
|
||||||
|
let documents: Document<Record<string, unknown>>[];
|
||||||
|
switch (fileExtension) {
|
||||||
|
case '.json':
|
||||||
|
loader = new JSONLoader(filePath);
|
||||||
|
documents = await loader.loadAndSplit(this.defaultRecursiveCharacterTextSplitter);
|
||||||
|
break;
|
||||||
|
case '.txt':
|
||||||
|
loader = new TextLoader(filePath);
|
||||||
|
documents = await loader.loadAndSplit(this.defaultRecursiveCharacterTextSplitter);
|
||||||
|
break;
|
||||||
|
case '.md':
|
||||||
|
loader = new TextLoader(filePath);
|
||||||
|
documents = await loader.loadAndSplit(this.markdownRecursiveCharacterTextSplitter);
|
||||||
|
break;
|
||||||
|
case '.pdf':
|
||||||
|
loader = new PDFLoader(filePath, { splitPages: false });
|
||||||
|
documents = await loader.loadAndSplit(this.defaultRecursiveCharacterTextSplitter);
|
||||||
|
break;
|
||||||
|
case '.docx':
|
||||||
|
loader = new DocxLoader(filePath);
|
||||||
|
documents = await loader.loadAndSplit(this.defaultRecursiveCharacterTextSplitter);
|
||||||
|
break;
|
||||||
|
case '.csv':
|
||||||
|
loader = new CSVLoader(filePath);
|
||||||
|
documents = await loader.loadAndSplit(this.defaultRecursiveCharacterTextSplitter);
|
||||||
|
break;
|
||||||
|
case '.epub':
|
||||||
|
loader = new EPubLoader(filePath, { splitChapters: false });
|
||||||
|
documents = await loader.loadAndSplit(this.defaultRecursiveCharacterTextSplitter);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
throw new Error(`Unsupported file extension: ${fileExtension}`);
|
||||||
}
|
}
|
||||||
|
return documents;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public async importKbTabularDirectory(localPath: string, min: GBMinInstance, packageId: number): Promise < any > {
|
public async importKbTabularDirectory(localPath: string, min: GBMinInstance, packageId: number): Promise < any > {
|
||||||
const files = await walkPromise(localPath);
|
const files = await walkPromise(localPath);
|
||||||
|
|
||||||
|
|
Loading…
Add table
Reference in a new issue