diff --git a/packages/basic.gblib/services/KeywordsExpressions.ts b/packages/basic.gblib/services/KeywordsExpressions.ts index 34199205..1cba1813 100644 --- a/packages/basic.gblib/services/KeywordsExpressions.ts +++ b/packages/basic.gblib/services/KeywordsExpressions.ts @@ -49,7 +49,7 @@ export class KeywordsExpressions { if (curr === '') { curr = null; } - accum.soFar.push(curr?curr.trim(): ''); + accum.soFar.push(curr ? curr.trim() : ''); } if (curr.split('`').length % 2 == 0) { accum.isConcatting = !accum.isConcatting; @@ -406,7 +406,7 @@ export class KeywordsExpressions { ($0, $1) => { const items = KeywordsExpressions.splitParamsButIgnoreCommasInDoublequotes($1); const [url, tableName, key1, pageVariable, limitVariable] = items; - + return ` if (!limit) limit = 100; @@ -744,7 +744,10 @@ export class KeywordsExpressions { // Handles the GET http version. else { - return ` + if ($2.endsWith('.pdf') && !$2.startsWith('https')) { + return `${$1} = await sys.getPdf({pid: pid, file: ${$2});`; + } else { + return ` await retry( async (bail) => { await ensureTokens(); @@ -754,6 +757,7 @@ export class KeywordsExpressions { ${$1} = __${$1} `; + } } } ]; @@ -780,6 +784,14 @@ export class KeywordsExpressions { } ]; + keywords[i++] = [ + /^\s*(SET CONTEXT)(\s*)(.*)/gim, + ($0, $1, $2, $3) => { + const params = this.getParams($3, ['text']); + return `await sys.setContext({pid: pid, ${params}})`; + } + ]; + keywords[i++] = [ /^\s*(set language)(\s*)(.*)/gim, ($0, $1, $2, $3) => { diff --git a/packages/basic.gblib/services/SystemKeywords.ts b/packages/basic.gblib/services/SystemKeywords.ts index 0b6c367b..05a4084f 100644 --- a/packages/basic.gblib/services/SystemKeywords.ts +++ b/packages/basic.gblib/services/SystemKeywords.ts @@ -28,6 +28,8 @@ | | \*****************************************************************************/ 'use strict'; + +import { getDocument } from 'pdfjs-dist/legacy/build/pdf.mjs'; import { GBLog, GBMinInstance } from 'botlib'; import { GBConfigService } from '../../core.gbapp/services/GBConfigService.js'; import { CollectionUtil } from 'pragmatismo-io-framework'; @@ -40,6 +42,7 @@ import Fs from 'fs'; import { GBSSR } from '../../core.gbapp/services/GBSSR.js'; import urlJoin from 'url-join'; import Excel from 'exceljs'; +import { BufferWindowMemory } from 'langchain/memory'; import { TwitterApi } from 'twitter-api-v2'; import Path from 'path'; import ComputerVisionClient from '@azure/cognitiveservices-computervision'; @@ -2662,4 +2665,39 @@ export class SystemKeywords { GBLogEx.info(min, mydump(obj, level)); } + + public async getPdfContents({ pid, pdfName }) { + const { min } = await DialogKeywords.getProcessInfo(pid); + GBLogEx.info(min, `BASIC GET (pdf): ${pdfName}`); + + let { baseUrl, client } = await GBDeployer.internalGetDriveClient(min); + const gbaiName = DialogKeywords.getGBAIPath(min.botId); + let path = '/' + urlJoin(gbaiName, `${min.botId}.gbdrive`); + let template = await this.internalGetDocument(client, baseUrl, path, pdfName); + let url = template['@microsoft.graph.downloadUrl']; + const res = await fetch(url); + let data: any = Buffer.from(await res.arrayBuffer()); + + const pdf = await getDocument({ data }).promise; + let pages = [] + + for (let i = 1; i <= pdf.numPages; i++) { + const page = await pdf.getPage(i); + const textContent = await page.getTextContent(); + const text = textContent.items + .map(item => item['str']) + .join('') + .replace(/\s/g, ''); + pages.push(text) + + } + + return pages.join(""); + } + + public async setContext({pid, text}){ + const { min, user, params } = await DialogKeywords.getProcessInfo(pid); + ChatServices.userSystemPrompt[user.userSystemId] = text; + } + } diff --git a/packages/gpt.gblib/services/ChatServices.ts b/packages/gpt.gblib/services/ChatServices.ts index 836097c2..dba6aaf6 100644 --- a/packages/gpt.gblib/services/ChatServices.ts +++ b/packages/gpt.gblib/services/ChatServices.ts @@ -60,6 +60,7 @@ import { GBAdminService } from '../../admin.gbapp/services/GBAdminService.js'; import { DialogKeywords } from '../../basic.gblib/services/DialogKeywords.js'; import { GBVMService } from '../../basic.gblib/services/GBVMService.js'; import { GBLogEx } from '../../core.gbapp/services/GBLogEx.js'; +import { pagespeedonline } from 'googleapis/build/src/apis/pagespeedonline/index.js'; export interface CustomOutputParserFields {} export type ExpectedOutput = any; @@ -220,6 +221,7 @@ export class ChatServices { return output; } + private static async findPageForText(pdfPath, searchText) { const data = new Uint8Array(Fs.readFileSync(pdfPath)); const pdf = await getDocument({ data }).promise;