new(gpt.gblib): PDF opener.
This commit is contained in:
		
							parent
							
								
									462297dc75
								
							
						
					
					
						commit
						51107fcd76
					
				
					 1 changed files with 42 additions and 67 deletions
				
			
		| 
						 | 
					@ -94,24 +94,23 @@ class CustomHandler extends BaseCallbackHandler {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
const logHandler = new CustomHandler();
 | 
					const logHandler = new CustomHandler();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
export class GBLLMOutputParser extends 
 | 
					export class GBLLMOutputParser extends
 | 
				
			||||||
  BaseLLMOutputParser<ExpectedOutput> {
 | 
					  BaseLLMOutputParser<ExpectedOutput> {
 | 
				
			||||||
  lc_namespace = ["langchain", "output_parsers"];
 | 
					  lc_namespace = ["langchain", "output_parsers"];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  private toolChain: RunnableSequence
 | 
					  private toolChain: RunnableSequence
 | 
				
			||||||
  private documentChain: RunnableSequence;
 | 
					 | 
				
			||||||
  private min;
 | 
					  private min;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  constructor(min, toolChain: RunnableSequence, documentChain: RunnableSequence) {
 | 
					  constructor(min, toolChain: RunnableSequence, documentChain: RunnableSequence) {
 | 
				
			||||||
    super();
 | 
					    super();
 | 
				
			||||||
    this.min = min;
 | 
					    this.min = min;
 | 
				
			||||||
    this.toolChain = toolChain;
 | 
					    this.toolChain = toolChain;
 | 
				
			||||||
 | 
					 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  async parseResult(
 | 
					  async parseResult(
 | 
				
			||||||
    llmOutputs: ChatGeneration[] | Generation[]
 | 
					    llmOutputs: ChatGeneration[] | Generation[]
 | 
				
			||||||
  ): Promise<ExpectedOutput> {
 | 
					  ): Promise<ExpectedOutput> {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    if (!llmOutputs.length) {
 | 
					    if (!llmOutputs.length) {
 | 
				
			||||||
      throw new OutputParserException(
 | 
					      throw new OutputParserException(
 | 
				
			||||||
        "Output parser did not receive any generations."
 | 
					        "Output parser did not receive any generations."
 | 
				
			||||||
| 
						 | 
					@ -129,44 +128,32 @@ export class GBLLMOutputParser extends
 | 
				
			||||||
      result = llmOutputs[0].text;
 | 
					      result = llmOutputs[0].text;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    const naiveJSONFromText = (text) => {
 | 
					    let res;
 | 
				
			||||||
      const match = text.match(/\{[\s\S]*\}/);
 | 
					    try {
 | 
				
			||||||
      if (!match) return null;
 | 
					      result = result.replace(/\\n/g, '');
 | 
				
			||||||
 | 
					      res = JSON.parse(result);
 | 
				
			||||||
      try {
 | 
					    } catch {
 | 
				
			||||||
        return {metadata: JSON.parse(match[0]),
 | 
					      return result;
 | 
				
			||||||
           text: text.replace(match, '')};
 | 
					 | 
				
			||||||
      } catch {
 | 
					 | 
				
			||||||
        return null;
 | 
					 | 
				
			||||||
      }
 | 
					 | 
				
			||||||
    };
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    if (result) {
 | 
					 | 
				
			||||||
       const res = naiveJSONFromText(result);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
      if (res) {
 | 
					 | 
				
			||||||
        const {metadata, text} = res;
 | 
					 | 
				
			||||||
        const {url} = await ChatServices.pdfPageAsImage(this.min, metadata.file,
 | 
					 | 
				
			||||||
          metadata.page);
 | 
					 | 
				
			||||||
        result = `
 | 
					 | 
				
			||||||
         ${text}`;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
         return [ result, metadata.file, metadata.page];
 | 
					 | 
				
			||||||
      }
 | 
					 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    return result;
 | 
					    let { file, page, text } = res;
 | 
				
			||||||
 | 
					    const { url } = await ChatServices.pdfPageAsImage(this.min, file, page);
 | 
				
			||||||
 | 
					    text = `
 | 
				
			||||||
 | 
					      ${text}`;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    return {text, file, page};
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
export class ChatServices {
 | 
					export class ChatServices {
 | 
				
			||||||
  
 | 
					
 | 
				
			||||||
  public static async pdfPageAsImage(min, filename, pageNumber) {
 | 
					  public static async pdfPageAsImage(min, filename, pageNumber) {
 | 
				
			||||||
    
 | 
					
 | 
				
			||||||
    const gbaiName = DialogKeywords.getGBAIPath(min.botId, 'gbkb');
 | 
					    const gbaiName = DialogKeywords.getGBAIPath(min.botId, 'gbkb');
 | 
				
			||||||
    const localName = Path.join('work', gbaiName, 'docs', filename);
 | 
					    const localName = Path.join('work', gbaiName, 'docs', filename);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    // Converts the PDF to PNG.
 | 
					    // Converts the PDF to PNG.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    GBLogEx.info(min, `Converting ${filename}, page: ${pageNumber}...`);
 | 
					    GBLogEx.info(min, `Converting ${filename}, page: ${pageNumber}...`);
 | 
				
			||||||
    const pngPages: PngPageOutput[] = await pdfToPng(localName, {
 | 
					    const pngPages: PngPageOutput[] = await pdfToPng(localName, {
 | 
				
			||||||
      disableFontFace: true,
 | 
					      disableFontFace: true,
 | 
				
			||||||
| 
						 | 
					@ -216,7 +203,6 @@ export class ChatServices {
 | 
				
			||||||
    return output;
 | 
					    return output;
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					 | 
				
			||||||
  private static async findPageForText(pdfPath, searchText) {
 | 
					  private static async findPageForText(pdfPath, searchText) {
 | 
				
			||||||
    const data = new Uint8Array(Fs.readFileSync(pdfPath));
 | 
					    const data = new Uint8Array(Fs.readFileSync(pdfPath));
 | 
				
			||||||
    const pdf = await getDocument({ data }).promise;
 | 
					    const pdf = await getDocument({ data }).promise;
 | 
				
			||||||
| 
						 | 
					@ -224,24 +210,24 @@ export class ChatServices {
 | 
				
			||||||
    searchText = searchText.replace(/\s/g, '')
 | 
					    searchText = searchText.replace(/\s/g, '')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    for (let i = 1; i <= pdf.numPages; i++) {
 | 
					    for (let i = 1; i <= pdf.numPages; i++) {
 | 
				
			||||||
        const page = await pdf.getPage(i);
 | 
					      const page = await pdf.getPage(i);
 | 
				
			||||||
        const textContent = await page.getTextContent();
 | 
					      const textContent = await page.getTextContent();
 | 
				
			||||||
        const text = textContent.items.map(item => item['str']).join('').replace(/\s/g, '');
 | 
					      const text = textContent.items.map(item => item['str']).join('').replace(/\s/g, '');
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        if (text.includes(searchText)) return i;
 | 
					      if (text.includes(searchText)) return i;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    return -1; // Texto não encontrado
 | 
					    return -1; 
 | 
				
			||||||
}
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/**
 | 
					  /**
 | 
				
			||||||
   * Generate text
 | 
					     * Generate text
 | 
				
			||||||
   *
 | 
					     *
 | 
				
			||||||
   * CONTINUE keword.
 | 
					     * CONTINUE keword.
 | 
				
			||||||
   *
 | 
					     *
 | 
				
			||||||
   * result = CONTINUE text
 | 
					     * result = CONTINUE text
 | 
				
			||||||
   *
 | 
					     *
 | 
				
			||||||
   */
 | 
					     */
 | 
				
			||||||
  public static async continue(min: GBMinInstance, question: string, chatId) {
 | 
					  public static async continue(min: GBMinInstance, question: string, chatId) {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
| 
						 | 
					@ -324,13 +310,20 @@ export class ChatServices {
 | 
				
			||||||
      AIMessagePromptTemplate.fromTemplate(
 | 
					      AIMessagePromptTemplate.fromTemplate(
 | 
				
			||||||
        `
 | 
					        `
 | 
				
			||||||
        This is a segmented context.
 | 
					        This is a segmented context.
 | 
				
			||||||
 | 
					 | 
				
			||||||
        VERY IMPORTANT: When responding, ALWAYS, I said, You must always both include the text and the following information at the end of your message as a VALID standard JSON, just after the text answer: 'file' indicating the PDF filename and 'page' indicating the page number. Example JSON format: "file": "filename.pdf", "page": 3, return valid JSON with brackets. Avoid explaining the context directly to the user; instead, refer to the document source. Double check if the output JSON has brackets.
 | 
					 | 
				
			||||||
        
 | 
					        
 | 
				
			||||||
        \n\n{context}\n\n
 | 
					        \n\n{context}\n\n
 | 
				
			||||||
        
 | 
					        
 | 
				
			||||||
        And based on \n\n{chat_history}\n\n
 | 
					        And based on \n\n{chat_history}\n\n
 | 
				
			||||||
        rephrase the response to the user using the aforementioned context. If you're unsure of the answer, utilize any relevant context provided to answer the question effectively. Don´t output MD images tags url previously shown.
 | 
					        rephrase the response to the user using the aforementioned context. If you're unsure of the answer, utilize any relevant context provided to answer the question effectively. Don´t output MD images tags url previously shown.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        VERY IMPORTANT: ALWAYS return VALID standard JSON with the folowing structure: 'text' as answer, 
 | 
				
			||||||
 | 
					          'file' indicating the PDF filename and 'page' indicating the page number. 
 | 
				
			||||||
 | 
					        Example JSON format: "text": "this is the answer, anything LLM output as text answer shoud be here.", 
 | 
				
			||||||
 | 
					          "file": "filename.pdf", "page": 3,
 | 
				
			||||||
 | 
					         return valid JSON with brackets. Avoid explaining the context directly
 | 
				
			||||||
 | 
					          to the user; instead, refer to the document source. 
 | 
				
			||||||
 | 
					          
 | 
				
			||||||
 | 
					        Double check if the output is a valid JSON with brackets. all fields are required: text, file, page.
 | 
				
			||||||
        `
 | 
					        `
 | 
				
			||||||
      ),
 | 
					      ),
 | 
				
			||||||
      new MessagesPlaceholder("chat_history"),
 | 
					      new MessagesPlaceholder("chat_history"),
 | 
				
			||||||
| 
						 | 
					@ -377,20 +370,6 @@ export class ChatServices {
 | 
				
			||||||
      new GBLLMOutputParser(min, null, null)
 | 
					      new GBLLMOutputParser(min, null, null)
 | 
				
			||||||
    ]);
 | 
					    ]);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    const conversationalQaChain = RunnableSequence.from([
 | 
					 | 
				
			||||||
      {
 | 
					 | 
				
			||||||
        question: (i: { question: string }) => i.question,
 | 
					 | 
				
			||||||
        chat_history: async () => {
 | 
					 | 
				
			||||||
          const { chat_history } = await memory.loadMemoryVariables({});
 | 
					 | 
				
			||||||
          return chat_history;
 | 
					 | 
				
			||||||
        },
 | 
					 | 
				
			||||||
      },
 | 
					 | 
				
			||||||
      questionGeneratorTemplate,
 | 
					 | 
				
			||||||
      modelWithTools,
 | 
					 | 
				
			||||||
      new GBLLMOutputParser(min, callToolChain, docsContext?.docstore?._docs.length > 0 ? combineDocumentsChain : null),
 | 
					 | 
				
			||||||
      new StringOutputParser()
 | 
					 | 
				
			||||||
    ]);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    const conversationalToolChain = RunnableSequence.from([
 | 
					    const conversationalToolChain = RunnableSequence.from([
 | 
				
			||||||
      {
 | 
					      {
 | 
				
			||||||
        question: (i: { question: string }) => i.question,
 | 
					        question: (i: { question: string }) => i.question,
 | 
				
			||||||
| 
						 | 
					@ -422,7 +401,7 @@ export class ChatServices {
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
    else if (LLMMode === "document") {
 | 
					    else if (LLMMode === "document") {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
      [text, file, page] = await combineDocumentsChain.invoke(question);
 | 
					      const {text, file, page} = await combineDocumentsChain.invoke(question);
 | 
				
			||||||
      result = text;
 | 
					      result = text;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    } else if (LLMMode === "function") {
 | 
					    } else if (LLMMode === "function") {
 | 
				
			||||||
| 
						 | 
					@ -440,7 +419,6 @@ export class ChatServices {
 | 
				
			||||||
      GBLog.info(`Invalid Answer Mode in Config.xlsx: ${LLMMode}.`);
 | 
					      GBLog.info(`Invalid Answer Mode in Config.xlsx: ${LLMMode}.`);
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    
 | 
					 | 
				
			||||||
    await memory.saveContext(
 | 
					    await memory.saveContext(
 | 
				
			||||||
      {
 | 
					      {
 | 
				
			||||||
        input: question,
 | 
					        input: question,
 | 
				
			||||||
| 
						 | 
					@ -451,9 +429,7 @@ export class ChatServices {
 | 
				
			||||||
    );
 | 
					    );
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    GBLog.info(`GPT Result: ${result.toString()}`);
 | 
					    GBLog.info(`GPT Result: ${result.toString()}`);
 | 
				
			||||||
    return { answer: result.toString(), file,  questionId: 0, page };
 | 
					    return { answer: result.toString(), file, questionId: 0, page };
 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  private static getToolsAsText(tools) {
 | 
					  private static getToolsAsText(tools) {
 | 
				
			||||||
| 
						 | 
					@ -483,7 +459,6 @@ export class ChatServices {
 | 
				
			||||||
          funcObj.schema = eval(jsonSchemaToZod(funcObj.parameters));
 | 
					          funcObj.schema = eval(jsonSchemaToZod(funcObj.parameters));
 | 
				
			||||||
          functions.push(new DynamicStructuredTool(funcObj));
 | 
					          functions.push(new DynamicStructuredTool(funcObj));
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
 | 
					 | 
				
			||||||
      }
 | 
					      }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    });
 | 
					    });
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
	Add table
		
		Reference in a new issue