fix(basic.gblib): FUNCTION GPT.

This commit is contained in:
Rodrigo Rodriguez 2024-03-21 23:41:33 -03:00
parent ba0adc57af
commit 44dc1f625a
2 changed files with 59 additions and 28 deletions

View file

@ -169,7 +169,7 @@
"openai": "4.6.0", "openai": "4.6.0",
"pdf-extraction": "1.0.2", "pdf-extraction": "1.0.2",
"pdf-parse": "1.1.1", "pdf-parse": "1.1.1",
"pdf-to-png-converter": "3.1.0", "pdf-to-png-converter": "3.2.0",
"pdfjs-dist": "4.0.379", "pdfjs-dist": "4.0.379",
"pdfkit": "0.13.0", "pdfkit": "0.13.0",
"phone": "3.1.30", "phone": "3.1.30",

View file

@ -93,16 +93,18 @@ class CustomHandler extends BaseCallbackHandler {
const logHandler = new CustomHandler(); const logHandler = new CustomHandler();
export class CustomLLMOutputParser extends BaseLLMOutputParser<ExpectedOutput> { export class GBLLMOutputParser extends BaseLLMOutputParser<ExpectedOutput> {
lc_namespace = ["langchain", "output_parsers"]; lc_namespace = ["langchain", "output_parsers"];
private toolChain: RunnableSequence private toolChain: RunnableSequence
private documentChain: RunnableSequence; private documentChain: RunnableSequence;
private min;
constructor(toolChain: RunnableSequence, documentChain: RunnableSequence) { constructor(min, toolChain: RunnableSequence, documentChain: RunnableSequence) {
super(); super();
this.min = min;
this.toolChain = toolChain; this.toolChain = toolChain;
this.documentChain = documentChain;
} }
async parseResult( async parseResult(
@ -125,35 +127,57 @@ export class CustomLLMOutputParser extends BaseLLMOutputParser<ExpectedOutput> {
result = llmOutputs[0].text; result = llmOutputs[0].text;
} }
return this.documentChain ? this.documentChain.invoke(result) : result; const naiveJSONFromText = (text) => {
const match = text.match(/\{[\s\S]*\}/);
if (!match) return null;
try {
return JSON.parse(match[0]);
} catch {
return null;
}
};
if (result) {
const metadata = naiveJSONFromText(result);
if (metadata) {
const {url} = await ChatServices.pdfPageAsImage(this.min, metadata.filename,
metadata.page);
result = `![alt text](${url})
${result}`;
}
}
return result;
} }
} }
export class ChatServices { export class ChatServices {
private async pdfPageAsImage(min, filename, pageNumber) { public static async pdfPageAsImage(min, filename, pageNumber) {
const data = Fs.readFileSync(filename); const gbaiName = DialogKeywords.getGBAIPath(min.botId, 'gbkb');
const localName = Path.join('work', gbaiName, 'docs', filename);
// Converts the PDF to PNG. // Converts the PDF to PNG.
const pngPages: PngPageOutput[] = await pdfToPng(data, { const pngPages: PngPageOutput[] = await pdfToPng(localName, {
disableFontFace: false, disableFontFace: true,
useSystemFonts: false, useSystemFonts: true,
viewportScale: 2.0, viewportScale: 2.0,
pagesToProcess: [1], pagesToProcess: [pageNumber],
strictPagesToProcess: false, strictPagesToProcess: false,
verbosityLevel: 0 verbosityLevel: 0
}); });
const gbaiName = DialogKeywords.getGBAIPath(min.botId, 'gbdata');
// Prepare an image on cache and return the GBFILE information. // Prepare an image on cache and return the GBFILE information.
const localName = Path.join('work', gbaiName, 'cache', `img${GBAdminService.getRndReadableIdentifier()}.png`);
if (pngPages.length > 0) { if (pngPages.length > 0) {
const buffer = pngPages[pageNumber - 1].content; const buffer = pngPages[0].content;
const gbaiName = DialogKeywords.getGBAIPath(min.botId, null);
const localName = Path.join('work', gbaiName, 'cache', `img${GBAdminService.getRndReadableIdentifier()}.png`);
const url = urlJoin(GBServer.globals.publicAddress, min.botId, 'cache', Path.basename(localName)); const url = urlJoin(GBServer.globals.publicAddress, min.botId, 'cache', Path.basename(localName));
Fs.writeFileSync(localName, buffer, { encoding: null }); Fs.writeFileSync(localName, buffer, { encoding: null });
return { localName: localName, url: url, data: buffer }; return { localName: localName, url: url, data: buffer };
@ -236,15 +260,11 @@ export class ChatServices {
return { answer: undefined, questionId: 0 }; return { answer: undefined, questionId: 0 };
} }
const contentLocale = min.core.getParam(
min.instance,
'Default Content Language',
GBConfigService.get('DEFAULT_CONTENT_LANGUAGE')
);
const LLMMode = min.core.getParam( const LLMMode = min.core.getParam(
min.instance, min.instance,
'Answer Mode', 'direct' 'Answer Mode', 'direct'
); );
const docsContext = min['vectorStore']; const docsContext = min['vectorStore'];
if (!this.memoryMap[user.userSystemId]) { if (!this.memoryMap[user.userSystemId]) {
@ -306,8 +326,9 @@ export class ChatServices {
` `
This is a sectioned context. This is a sectioned context.
Very important: When answering, *mention in the answer* the PDF filename and page number related to each block of information used to answer. Very important: When answering, besides the answer, at the end of the message, return this information as JSON
Eg.: filename.pdf, page 3 - filename2.pdf, page 55. containing two fields file as PDF 'filename' and 'page' as the page number.
Eg. of JSON generated: file: filename.pdf, page: 3.
\n\n{context}\n\n \n\n{context}\n\n
@ -359,7 +380,7 @@ export class ChatServices {
}, },
combineDocumentsPrompt, combineDocumentsPrompt,
model, model,
new StringOutputParser() new GBLLMOutputParser(min, null, null)
]); ]);
const conversationalQaChain = RunnableSequence.from([ const conversationalQaChain = RunnableSequence.from([
@ -372,7 +393,7 @@ export class ChatServices {
}, },
questionGeneratorTemplate, questionGeneratorTemplate,
modelWithTools, modelWithTools,
new CustomLLMOutputParser(callToolChain, docsContext?.docstore?._docs.length > 0 ? combineDocumentsChain : null), new GBLLMOutputParser(min, callToolChain, docsContext?.docstore?._docs.length > 0 ? combineDocumentsChain : null),
new StringOutputParser() new StringOutputParser()
]); ]);
@ -386,12 +407,16 @@ export class ChatServices {
}, },
questionGeneratorTemplate, questionGeneratorTemplate,
modelWithTools, modelWithTools,
new CustomLLMOutputParser(callToolChain, docsContext?.docstore?._docs.length > 0 ? combineDocumentsChain : null), new GBLLMOutputParser(min, callToolChain, docsContext?.docstore?._docs.length > 0 ? combineDocumentsChain : null),
new StringOutputParser() new StringOutputParser()
]); ]);
let result; let result;
// Choose the operation mode of answer generation, based on
// .gbot switch LLMMode and choose the corresponding chain.
if (LLMMode === "direct") { if (LLMMode === "direct") {
result = await (tools.length > 0 ? modelWithTools : model).invoke(` result = await (tools.length > 0 ? modelWithTools : model).invoke(`
${systemPrompt} ${systemPrompt}
@ -401,6 +426,7 @@ export class ChatServices {
result = result.content; result = result.content;
} }
else if (LLMMode === "document") { else if (LLMMode === "document") {
result = await combineDocumentsChain.invoke(question); result = await combineDocumentsChain.invoke(question);
} else if (LLMMode === "function") { } else if (LLMMode === "function") {
@ -409,6 +435,11 @@ export class ChatServices {
question, question,
}); });
} }
else if (LLMMode === "full") {
throw new Error('Not implemented.'); // TODO: #407.
}
else { else {
GBLog.info(`Invalid Answer Mode in Config.xlsx: ${LLMMode}.`); GBLog.info(`Invalid Answer Mode in Config.xlsx: ${LLMMode}.`);
} }