fix(core.gbapp): Fix deployer group.
This commit is contained in:
parent
063c149b94
commit
14b172187e
2 changed files with 80 additions and 7 deletions
|
@ -170,6 +170,7 @@
|
||||||
"pdf-extraction": "1.0.2",
|
"pdf-extraction": "1.0.2",
|
||||||
"pdf-parse": "1.1.1",
|
"pdf-parse": "1.1.1",
|
||||||
"pdf-to-png-converter": "3.1.0",
|
"pdf-to-png-converter": "3.1.0",
|
||||||
|
"pdfjs-dist": "4.0.379",
|
||||||
"pdfkit": "0.13.0",
|
"pdfkit": "0.13.0",
|
||||||
"phone": "3.1.30",
|
"phone": "3.1.30",
|
||||||
"pizzip": "3.1.3",
|
"pizzip": "3.1.3",
|
||||||
|
|
|
@ -48,13 +48,18 @@ import { GBConfigService } from '../../core.gbapp/services/GBConfigService.js';
|
||||||
import { GuaribasSubject } from '../../kb.gbapp/models/index.js';
|
import { GuaribasSubject } from '../../kb.gbapp/models/index.js';
|
||||||
import { Serialized } from "@langchain/core/load/serializable";
|
import { Serialized } from "@langchain/core/load/serializable";
|
||||||
import { BaseCallbackHandler } from "@langchain/core/callbacks/base";
|
import { BaseCallbackHandler } from "@langchain/core/callbacks/base";
|
||||||
|
import { pdfToPng, PngPageOutput } from 'pdf-to-png-converter';
|
||||||
import { DynamicStructuredTool } from "@langchain/core/tools";
|
import { DynamicStructuredTool } from "@langchain/core/tools";
|
||||||
import {
|
import {
|
||||||
BaseLLMOutputParser,
|
BaseLLMOutputParser,
|
||||||
OutputParserException,
|
OutputParserException,
|
||||||
} from "@langchain/core/output_parsers";
|
} from "@langchain/core/output_parsers";
|
||||||
import { ChatGeneration, Generation } from "@langchain/core/outputs";
|
import { ChatGeneration, Generation } from "@langchain/core/outputs";
|
||||||
import { LunaryHandler } from "@langchain/community/callbacks/handlers/lunary";
|
import { GBAdminService } from '../../admin.gbapp/services/GBAdminService.js';
|
||||||
|
import { GBServer } from '../../../src/app.js';
|
||||||
|
import urlJoin from 'url-join';
|
||||||
|
import { getDocument } from "pdfjs-dist/legacy/build/pdf.mjs";
|
||||||
|
|
||||||
|
|
||||||
export interface CustomOutputParserFields { }
|
export interface CustomOutputParserFields { }
|
||||||
export type ExpectedOutput = string;
|
export type ExpectedOutput = string;
|
||||||
|
@ -126,23 +131,85 @@ export class CustomLLMOutputParser extends BaseLLMOutputParser<ExpectedOutput> {
|
||||||
|
|
||||||
export class ChatServices {
|
export class ChatServices {
|
||||||
|
|
||||||
|
private async pdfPageAsImage(min, filename, pageNumber) {
|
||||||
|
|
||||||
|
const data = Fs.readFileSync(filename);
|
||||||
|
|
||||||
|
// Converts the PDF to PNG.
|
||||||
|
|
||||||
|
const pngPages: PngPageOutput[] = await pdfToPng(data, {
|
||||||
|
disableFontFace: false,
|
||||||
|
useSystemFonts: false,
|
||||||
|
viewportScale: 2.0,
|
||||||
|
pagesToProcess: [1],
|
||||||
|
strictPagesToProcess: false,
|
||||||
|
verbosityLevel: 0
|
||||||
|
});
|
||||||
|
|
||||||
|
const gbaiName = DialogKeywords.getGBAIPath(min.botId, 'gbdata');
|
||||||
|
|
||||||
|
// Prepare an image on cache and return the GBFILE information.
|
||||||
|
|
||||||
|
const localName = Path.join('work', gbaiName, 'cache', `img${GBAdminService.getRndReadableIdentifier()}.png`);
|
||||||
|
if (pngPages.length > 0) {
|
||||||
|
const buffer = pngPages[pageNumber - 1].content;
|
||||||
|
const url = urlJoin(GBServer.globals.publicAddress, min.botId, 'cache', Path.basename(localName));
|
||||||
|
Fs.writeFileSync(localName, buffer, { encoding: null });
|
||||||
|
return { localName: localName, url: url, data: buffer };
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
private static async getRelevantContext(
|
private static async getRelevantContext(
|
||||||
vectorStore: HNSWLib,
|
vectorStore: HNSWLib,
|
||||||
sanitizedQuestion: string,
|
sanitizedQuestion: string,
|
||||||
numDocuments: number = 10
|
numDocuments: number = 10
|
||||||
): Promise<string> {
|
): Promise<string> {
|
||||||
|
|
||||||
if (sanitizedQuestion === '') {
|
if (sanitizedQuestion === '') {
|
||||||
return '';
|
return '';
|
||||||
}
|
}
|
||||||
|
|
||||||
const documents = await vectorStore.similaritySearch(sanitizedQuestion, numDocuments);
|
const documents = await vectorStore.similaritySearch(sanitizedQuestion, numDocuments);
|
||||||
return documents
|
let output = '';
|
||||||
.map((doc) => doc.pageContent)
|
|
||||||
.join(', ')
|
await CollectionUtil.asyncForEach(documents, async (doc) => {
|
||||||
.trim()
|
|
||||||
.replaceAll('\n', ' ');
|
const metadata = doc.metadata;
|
||||||
|
const filename = Path.basename(metadata.source);
|
||||||
|
const page = await ChatServices.findPageForTextInterval(doc.metadata.source,
|
||||||
|
metadata.loc.lines.from, metadata.loc.lines.to);
|
||||||
|
|
||||||
|
output = `${output}\n\n\n\nThe following context is coming from ${filename} at page: ${page},
|
||||||
|
memorize this block among document information and return when you are refering this part of content:\n\n\n\n ${doc.pageContent} \n\n\n\n.`;
|
||||||
|
});
|
||||||
|
return output;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static async findPageForTextInterval(pdfPath, startLine, endLine) {
|
||||||
|
const data = new Uint8Array(Fs.readFileSync(pdfPath));
|
||||||
|
const pdf = await getDocument({ data }).promise;
|
||||||
|
|
||||||
|
// Loop através de cada página para encontrar o intervalo de texto
|
||||||
|
for (let i = 1; i <= pdf.numPages; i++) {
|
||||||
|
const page = await pdf.getPage(i);
|
||||||
|
const textContent = await page.getTextContent();
|
||||||
|
const text = textContent.items.map(item => item['str']).join('\n');
|
||||||
|
|
||||||
|
const lines = text.split('\n');
|
||||||
|
const numLines = lines.length;
|
||||||
|
|
||||||
|
// Verificar se o intervalo de texto está nesta página
|
||||||
|
const nextPageLine = (i === pdf.numPages) ? numLines : endLine;
|
||||||
|
if (startLine <= numLines && endLine >= 0 && startLine <= nextPageLine) {
|
||||||
|
return i;
|
||||||
|
}
|
||||||
|
|
||||||
|
startLine -= numLines;
|
||||||
|
endLine -= numLines;
|
||||||
|
}
|
||||||
|
|
||||||
|
return -1; // Intervalo de texto não encontrado
|
||||||
|
}
|
||||||
/**
|
/**
|
||||||
* Generate text
|
* Generate text
|
||||||
*
|
*
|
||||||
|
@ -234,6 +301,11 @@ export class ChatServices {
|
||||||
const combineDocumentsPrompt = ChatPromptTemplate.fromMessages([
|
const combineDocumentsPrompt = ChatPromptTemplate.fromMessages([
|
||||||
AIMessagePromptTemplate.fromTemplate(
|
AIMessagePromptTemplate.fromTemplate(
|
||||||
`
|
`
|
||||||
|
This is a sectioned context.
|
||||||
|
|
||||||
|
Very important: When answering, *mention in the answer* the PDF filename and page number related to each block of information used to answer.
|
||||||
|
Eg.: filename.pdf, page 3 - filename2.pdf, page 55.
|
||||||
|
|
||||||
\n\n{context}\n\n
|
\n\n{context}\n\n
|
||||||
|
|
||||||
And using \n\n{chat_history}\n\n
|
And using \n\n{chat_history}\n\n
|
||||||
|
@ -278,7 +350,7 @@ export class ChatServices {
|
||||||
},
|
},
|
||||||
context: async (output: string) => {
|
context: async (output: string) => {
|
||||||
const c = await ChatServices.getRelevantContext(docsContext, output);
|
const c = await ChatServices.getRelevantContext(docsContext, output);
|
||||||
return `${systemPrompt} \n ${c ? 'Use this context to answer:\n' + c: 'answer just with user question.'}`;
|
return `${systemPrompt} \n ${c ? 'Use this context to answer:\n' + c : 'answer just with user question.'}`;
|
||||||
|
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
|
Loading…
Add table
Reference in a new issue