new(gpt.gblib): PDF opener.
This commit is contained in:
parent
462297dc75
commit
51107fcd76
1 changed files with 42 additions and 67 deletions
|
@ -99,19 +99,18 @@ export class GBLLMOutputParser extends
|
|||
lc_namespace = ["langchain", "output_parsers"];
|
||||
|
||||
private toolChain: RunnableSequence
|
||||
private documentChain: RunnableSequence;
|
||||
private min;
|
||||
|
||||
constructor(min, toolChain: RunnableSequence, documentChain: RunnableSequence) {
|
||||
super();
|
||||
this.min = min;
|
||||
this.toolChain = toolChain;
|
||||
|
||||
}
|
||||
|
||||
async parseResult(
|
||||
llmOutputs: ChatGeneration[] | Generation[]
|
||||
): Promise<ExpectedOutput> {
|
||||
|
||||
if (!llmOutputs.length) {
|
||||
throw new OutputParserException(
|
||||
"Output parser did not receive any generations."
|
||||
|
@ -129,33 +128,20 @@ export class GBLLMOutputParser extends
|
|||
result = llmOutputs[0].text;
|
||||
}
|
||||
|
||||
const naiveJSONFromText = (text) => {
|
||||
const match = text.match(/\{[\s\S]*\}/);
|
||||
if (!match) return null;
|
||||
|
||||
let res;
|
||||
try {
|
||||
return {metadata: JSON.parse(match[0]),
|
||||
text: text.replace(match, '')};
|
||||
result = result.replace(/\\n/g, '');
|
||||
res = JSON.parse(result);
|
||||
} catch {
|
||||
return null;
|
||||
return result;
|
||||
}
|
||||
};
|
||||
|
||||
if (result) {
|
||||
const res = naiveJSONFromText(result);
|
||||
|
||||
if (res) {
|
||||
const {metadata, text} = res;
|
||||
const {url} = await ChatServices.pdfPageAsImage(this.min, metadata.file,
|
||||
metadata.page);
|
||||
result = `
|
||||
let { file, page, text } = res;
|
||||
const { url } = await ChatServices.pdfPageAsImage(this.min, file, page);
|
||||
text = `
|
||||
${text}`;
|
||||
|
||||
return [ result, metadata.file, metadata.page];
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
return {text, file, page};
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -167,6 +153,7 @@ export class ChatServices {
|
|||
const localName = Path.join('work', gbaiName, 'docs', filename);
|
||||
|
||||
// Converts the PDF to PNG.
|
||||
|
||||
GBLogEx.info(min, `Converting ${filename}, page: ${pageNumber}...`);
|
||||
const pngPages: PngPageOutput[] = await pdfToPng(localName, {
|
||||
disableFontFace: true,
|
||||
|
@ -216,7 +203,6 @@ export class ChatServices {
|
|||
return output;
|
||||
}
|
||||
|
||||
|
||||
private static async findPageForText(pdfPath, searchText) {
|
||||
const data = new Uint8Array(Fs.readFileSync(pdfPath));
|
||||
const pdf = await getDocument({ data }).promise;
|
||||
|
@ -231,7 +217,7 @@ export class ChatServices {
|
|||
if (text.includes(searchText)) return i;
|
||||
}
|
||||
|
||||
return -1; // Texto não encontrado
|
||||
return -1;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -325,12 +311,19 @@ export class ChatServices {
|
|||
`
|
||||
This is a segmented context.
|
||||
|
||||
VERY IMPORTANT: When responding, ALWAYS, I said, You must always both include the text and the following information at the end of your message as a VALID standard JSON, just after the text answer: 'file' indicating the PDF filename and 'page' indicating the page number. Example JSON format: "file": "filename.pdf", "page": 3, return valid JSON with brackets. Avoid explaining the context directly to the user; instead, refer to the document source. Double check if the output JSON has brackets.
|
||||
|
||||
\n\n{context}\n\n
|
||||
|
||||
And based on \n\n{chat_history}\n\n
|
||||
rephrase the response to the user using the aforementioned context. If you're unsure of the answer, utilize any relevant context provided to answer the question effectively. Don´t output MD images tags url previously shown.
|
||||
|
||||
VERY IMPORTANT: ALWAYS return VALID standard JSON with the folowing structure: 'text' as answer,
|
||||
'file' indicating the PDF filename and 'page' indicating the page number.
|
||||
Example JSON format: "text": "this is the answer, anything LLM output as text answer shoud be here.",
|
||||
"file": "filename.pdf", "page": 3,
|
||||
return valid JSON with brackets. Avoid explaining the context directly
|
||||
to the user; instead, refer to the document source.
|
||||
|
||||
Double check if the output is a valid JSON with brackets. all fields are required: text, file, page.
|
||||
`
|
||||
),
|
||||
new MessagesPlaceholder("chat_history"),
|
||||
|
@ -377,20 +370,6 @@ export class ChatServices {
|
|||
new GBLLMOutputParser(min, null, null)
|
||||
]);
|
||||
|
||||
const conversationalQaChain = RunnableSequence.from([
|
||||
{
|
||||
question: (i: { question: string }) => i.question,
|
||||
chat_history: async () => {
|
||||
const { chat_history } = await memory.loadMemoryVariables({});
|
||||
return chat_history;
|
||||
},
|
||||
},
|
||||
questionGeneratorTemplate,
|
||||
modelWithTools,
|
||||
new GBLLMOutputParser(min, callToolChain, docsContext?.docstore?._docs.length > 0 ? combineDocumentsChain : null),
|
||||
new StringOutputParser()
|
||||
]);
|
||||
|
||||
const conversationalToolChain = RunnableSequence.from([
|
||||
{
|
||||
question: (i: { question: string }) => i.question,
|
||||
|
@ -422,7 +401,7 @@ export class ChatServices {
|
|||
}
|
||||
else if (LLMMode === "document") {
|
||||
|
||||
[text, file, page] = await combineDocumentsChain.invoke(question);
|
||||
const {text, file, page} = await combineDocumentsChain.invoke(question);
|
||||
result = text;
|
||||
|
||||
} else if (LLMMode === "function") {
|
||||
|
@ -440,7 +419,6 @@ export class ChatServices {
|
|||
GBLog.info(`Invalid Answer Mode in Config.xlsx: ${LLMMode}.`);
|
||||
}
|
||||
|
||||
|
||||
await memory.saveContext(
|
||||
{
|
||||
input: question,
|
||||
|
@ -452,8 +430,6 @@ export class ChatServices {
|
|||
|
||||
GBLog.info(`GPT Result: ${result.toString()}`);
|
||||
return { answer: result.toString(), file, questionId: 0, page };
|
||||
|
||||
|
||||
}
|
||||
|
||||
private static getToolsAsText(tools) {
|
||||
|
@ -483,7 +459,6 @@ export class ChatServices {
|
|||
funcObj.schema = eval(jsonSchemaToZod(funcObj.parameters));
|
||||
functions.push(new DynamicStructuredTool(funcObj));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
});
|
||||
|
|
Loading…
Add table
Reference in a new issue