diff --git a/packages/admin.gbapp/services/GBAdminService.ts b/packages/admin.gbapp/services/GBAdminService.ts index fb90234c..63154e75 100644 --- a/packages/admin.gbapp/services/GBAdminService.ts +++ b/packages/admin.gbapp/services/GBAdminService.ts @@ -152,7 +152,7 @@ export class GBAdminService implements IGBAdminService { const packageName = text.split(' ')[1]; const importer = new GBImporter(min.core); const deployer = new GBDeployer(min.core, importer); - const path = DialogKeywords.getGBAIPath(min.botId, null, packageName); + const path = GBUtil.getGBAIPath(min.botId, null, packageName); const localFolder = Path.join('work', path); await deployer.undeployPackageFromLocalPath(min.instance, localFolder); } @@ -170,12 +170,12 @@ export class GBAdminService implements IGBAdminService { const folderName = text.split(' ')[2]; const packageType = Path.extname(folderName).substr(1); - const gbaiPath = DialogKeywords.getGBAIPath(min.instance.botId, packageType, null); + const gbaiPath = GBUtil.getGBAIPath(min.instance.botId, packageType, null); const localFolder = Path.join('work', gbaiPath); // .gbot packages are handled using storage API, so no download // of local resources is required. - const gbai = DialogKeywords.getGBAIPath(min.instance.botId); + const gbai = GBUtil.getGBAIPath(min.instance.botId); if (packageType === 'gbkb') { await deployer['cleanupPackage'](min.instance, packageName); diff --git a/packages/basic.gblib/services/DialogKeywords.ts b/packages/basic.gblib/services/DialogKeywords.ts index 681d328a..8787c2eb 100644 --- a/packages/basic.gblib/services/DialogKeywords.ts +++ b/packages/basic.gblib/services/DialogKeywords.ts @@ -84,7 +84,7 @@ export class DialogKeywords { Based on this data, generate a configuration for a Billboard.js chart. The output should be valid JSON, following Billboard.js conventions. Ensure the JSON is returned without markdown formatting, explanations, or comments. - The chart should be ${prompt}. Return only the JSON configuration, nothing else.`; + The chart should be ${prompt}. Return only the one-line only JSON configuration, nothing else.`; // Send the prompt to the LLM and get the response @@ -108,7 +108,7 @@ export class DialogKeywords { // Get the chart container and take a screenshot const content = await page.$('.bb'); - const gbaiName = DialogKeywords.getGBAIPath(min.botId); + const gbaiName = GBUtil.getGBAIPath(min.botId); const localName = Path.join('work', gbaiName, 'cache', `chart${GBAdminService.getRndReadableIdentifier()}.jpg`); await content.screenshot({ path: localName, omitBackground: true }); await browser.close(); @@ -182,7 +182,7 @@ export class DialogKeywords { }; } - const gbaiName = DialogKeywords.getGBAIPath(min.botId); + const gbaiName = GBUtil.getGBAIPath(min.botId); const localName = Path.join('work', gbaiName, 'cache', `img${GBAdminService.getRndReadableIdentifier()}.jpg`); await ChartServices.screenshot(definition, localName); @@ -1037,7 +1037,7 @@ export class DialogKeywords { let { baseUrl, client } = await GBDeployer.internalGetDriveClient(min); const botId = min.instance.botId; - const path = DialogKeywords.getGBAIPath(botId); + const path = GBUtil.getGBAIPath(botId); let url = `${baseUrl}/drive/root:/${path}:/children`; GBLogEx.info(min, `Loading HEAR AS .xlsx options from Sheet: ${url}`); @@ -1287,20 +1287,6 @@ export class DialogKeywords { GBLog.error(`BASIC RUNTIME ERR HEAR ${error.message ? error.message : error}\n Stack:${error.stack}`); } } - static getGBAIPath(botId, packageType = null, packageName = null) { - let gbai = `${botId}.gbai`; - if (!packageType && !packageName) { - return GBConfigService.get('DEV_GBAI') ? GBConfigService.get('DEV_GBAI') : gbai; - } - - if (GBConfigService.get('DEV_GBAI')) { - gbai = GBConfigService.get('DEV_GBAI'); - botId = gbai.replace(/\.[^/.]+$/, ''); - return urljoin(GBConfigService.get('DEV_GBAI'), packageName ? packageName : `${botId}.${packageType}`); - } else { - return urljoin(gbai, packageName ? packageName : `${botId}.${packageType}`); - } - } /** * Prepares the next dialog to be shown to the specified user. @@ -1457,7 +1443,7 @@ export class DialogKeywords { const element = filename._page ? filename._page : filename.screenshot ? filename : null; let url; let nameOnly; - const gbaiName = DialogKeywords.getGBAIPath(min.botId); + const gbaiName = GBUtil.getGBAIPath(min.botId); // Web automation. @@ -1494,7 +1480,7 @@ export class DialogKeywords { // .gbdrive direct sending. else { const ext = Path.extname(filename); - const gbaiName = DialogKeywords.getGBAIPath(min.botId); + const gbaiName = GBUtil.getGBAIPath(min.botId); let { baseUrl, client } = await GBDeployer.internalGetDriveClient(min); const fileUrl = urlJoin('/', gbaiName, `${min.botId}.gbdrive`, filename); @@ -1527,7 +1513,7 @@ export class DialogKeywords { // Prepare a cache to be referenced by Bot Framework. const buf = Fs.readFileSync(filename); - const gbaiName = DialogKeywords.getGBAIPath(min.botId); + const gbaiName = GBUtil.getGBAIPath(min.botId); const localName = Path.join('work', gbaiName, 'cache', `tmp${GBAdminService.getRndReadableIdentifier()}.${ext}`); Fs.writeFileSync(localName, buf, { encoding: null }); url = urlJoin(GBServer.globals.publicAddress, min.botId, 'cache', Path.basename(localName)); @@ -1560,7 +1546,7 @@ export class DialogKeywords { const data = img.replace(/^data:image\/\w+;base64,/, ''); const buf = Buffer.from(data, 'base64'); - const gbaiName = DialogKeywords.getGBAIPath(min.botId); + const gbaiName = GBUtil.getGBAIPath(min.botId); const localName = Path.join('work', gbaiName, 'cache', `qr${GBAdminService.getRndReadableIdentifier()}.png`); Fs.writeFileSync(localName, buf, { encoding: null }); const url = urlJoin(GBServer.globals.publicAddress, min.botId, 'cache', Path.basename(localName)); diff --git a/packages/basic.gblib/services/GBVMService.ts b/packages/basic.gblib/services/GBVMService.ts index 5ff0b408..3c957333 100644 --- a/packages/basic.gblib/services/GBVMService.ts +++ b/packages/basic.gblib/services/GBVMService.ts @@ -54,6 +54,7 @@ import { SystemKeywords } from './SystemKeywords.js'; import { Sequelize, QueryTypes } from '@sequelize/core'; import { z } from 'zod'; import { zodToJsonSchema } from 'zod-to-json-schema'; +import { GBUtil } from '../../../src/util.js'; /** * @fileoverview Decision was to priorize security(isolation) and debugging, @@ -68,7 +69,7 @@ export class GBVMService extends GBService { public static API_PORT = 1111; public async loadDialogPackage(folder: string, min: GBMinInstance, core: IGBCoreService, deployer: GBDeployer) { - const ignore = Path.join('work', DialogKeywords.getGBAIPath(min.botId, 'gbdialog'), 'node_modules'); + const ignore = Path.join('work', GBUtil.getGBAIPath(min.botId, 'gbdialog'), 'node_modules'); const files = await walkPromise(folder, { ignore: [ignore] }); await CollectionUtil.asyncForEach(files, async file => { @@ -223,7 +224,7 @@ export class GBVMService extends GBService { public static async loadConnections(min) { // Loads storage custom connections. - const path = DialogKeywords.getGBAIPath(min.botId, null); + const path = GBUtil.getGBAIPath(min.botId, null); const filePath = Path.join('work', path, 'connections.json'); let connections = []; if (Fs.existsSync(filePath)) { @@ -1089,7 +1090,7 @@ export class GBVMService extends GBService { } const botId = min.botId; - const path = DialogKeywords.getGBAIPath(min.botId, `gbdialog`); + const path = GBUtil.getGBAIPath(min.botId, `gbdialog`); const gbdialogPath = urlJoin(process.cwd(), 'work', path); const scriptFilePath = urlJoin(gbdialogPath, `${text}.js`); diff --git a/packages/basic.gblib/services/ImageProcessingServices.ts b/packages/basic.gblib/services/ImageProcessingServices.ts index 141772eb..22d3ab7c 100644 --- a/packages/basic.gblib/services/ImageProcessingServices.ts +++ b/packages/basic.gblib/services/ImageProcessingServices.ts @@ -38,6 +38,7 @@ import { GBAdminService } from '../../admin.gbapp/services/GBAdminService.js'; import urlJoin from 'url-join'; import { GBServer } from '../../../src/app.js'; import { GBLogEx } from '../../core.gbapp/services/GBLogEx.js'; +import { GBUtil } from '../../../src/util.js'; /** * Image processing services of conversation to be called by BASIC. @@ -74,7 +75,7 @@ export class ImageProcessingServices { }); const botId = min.instance.botId; - const path = DialogKeywords.getGBAIPath(min.botId); + const path = GBUtil.getGBAIPath(min.botId); // TODO: const img = await joinImages(paths); const localName = Path.join('work', path, 'cache', `img-mrg${GBAdminService.getRndReadableIdentifier()}.png`); const url = urlJoin(GBServer.globals.publicAddress, min.botId, 'cache', Path.basename(localName)); diff --git a/packages/basic.gblib/services/SystemKeywords.ts b/packages/basic.gblib/services/SystemKeywords.ts index b8c4f172..ee2fda2c 100644 --- a/packages/basic.gblib/services/SystemKeywords.ts +++ b/packages/basic.gblib/services/SystemKeywords.ts @@ -262,7 +262,7 @@ export class SystemKeywords { // headers. const { min, user } = await DialogKeywords.getProcessInfo(pid); - const gbaiName = DialogKeywords.getGBAIPath(min.botId); + const gbaiName = GBUtil.getGBAIPath(min.botId); const browser = await GBSSR.createBrowser(null); const page = await browser.newPage(); await page.minimize(); @@ -378,7 +378,7 @@ export class SystemKeywords { let { baseUrl, client } = await GBDeployer.internalGetDriveClient(min); const botId = min.instance.botId; - const gbaiName = DialogKeywords.getGBAIPath(min.botId); + const gbaiName = GBUtil.getGBAIPath(min.botId); const tmpDocx = urlJoin(gbaiName, `${botId}.gbdrive`, `tmp${GBAdminService.getRndReadableIdentifier()}.docx`); // Performs the conversion operation. @@ -571,7 +571,7 @@ export class SystemKeywords { let { baseUrl, client } = await GBDeployer.internalGetDriveClient(min); const botId = min.instance.botId; - const path = DialogKeywords.getGBAIPath(botId, 'gbdata'); + const path = GBUtil.getGBAIPath(botId, 'gbdata'); let document = await this.internalGetDocument(client, baseUrl, path, file); let sheets = await client.api(`${baseUrl}/drive/items/${document.id}/workbook/worksheets`).get(); let body = { values: [[]] }; @@ -656,7 +656,7 @@ export class SystemKeywords { GBLogEx.info(min, `Saving '${file}' (SAVE file).`); let { baseUrl, client } = await GBDeployer.internalGetDriveClient(min); const botId = min.instance.botId; - const path = DialogKeywords.getGBAIPath(min.botId, `gbdrive`); + const path = GBUtil.getGBAIPath(min.botId, `gbdrive`); // Checks if it is a GB FILE object. @@ -701,7 +701,7 @@ export class SystemKeywords { // It is an SharePoint object that needs to be downloaded. - const gbaiName = DialogKeywords.getGBAIPath(min.botId); + const gbaiName = GBUtil.getGBAIPath(min.botId); const localName = Path.join('work', gbaiName, 'cache', `${GBAdminService.getRndReadableIdentifier()}.tmp`); const url = file['url']; const response = await fetch(url); @@ -875,7 +875,7 @@ export class SystemKeywords { GBLogEx.info(min, `Saving '${file}' (SAVE). Args: ${args.join(',')}.`); let { baseUrl, client } = await GBDeployer.internalGetDriveClient(min); const botId = min.instance.botId; - const path = DialogKeywords.getGBAIPath(botId, 'gbdata'); + const path = GBUtil.getGBAIPath(botId, 'gbdata'); let sheets; let document; @@ -994,7 +994,7 @@ export class SystemKeywords { let { baseUrl, client } = await GBDeployer.internalGetDriveClient(min); const botId = min.instance.botId; (''); - const path = DialogKeywords.getGBAIPath(botId, 'gbdata'); + const path = GBUtil.getGBAIPath(botId, 'gbdata'); let document = await this.internalGetDocument(client, baseUrl, path, file); @@ -1092,7 +1092,7 @@ export class SystemKeywords { args.shift(); const botId = min.instance.botId; - const path = DialogKeywords.getGBAIPath(botId, 'gbdata'); + const path = GBUtil.getGBAIPath(botId, 'gbdata'); // MAX LINES property. @@ -1150,7 +1150,7 @@ export class SystemKeywords { } result = null; } else if (file['cTag']) { - const gbaiName = DialogKeywords.getGBAIPath(min.botId); + const gbaiName = GBUtil.getGBAIPath(min.botId); const localName = Path.join('work', gbaiName, 'cache', `csv${GBAdminService.getRndReadableIdentifier()}.csv`); const url = file['@microsoft.graph.downloadUrl']; const response = await fetch(url); @@ -1200,7 +1200,7 @@ export class SystemKeywords { rows = results.text; } else if (file.indexOf('.csv') !== -1) { let res; - let path = DialogKeywords.getGBAIPath(min.botId, `gbdata`); + let path = GBUtil.getGBAIPath(min.botId, `gbdata`); const csvFile = Path.join(GBConfigService.get('STORAGE_LIBRARY'), path, file); const firstLine = Fs.readFileSync(csvFile, 'utf8').split('\n')[0]; const headers = firstLine.split(','); @@ -1520,7 +1520,7 @@ export class SystemKeywords { if (user) { ChatServices.userSystemPrompt[user.userSystemId] = text; - const path = DialogKeywords.getGBAIPath(min.botId); + const path = GBUtil.getGBAIPath(min.botId); const systemPromptFile = urlJoin(process.cwd(), 'work', path, 'users', user.userSystemId, 'systemPrompt.txt'); Fs.writeFileSync(systemPromptFile, text); } @@ -1536,7 +1536,7 @@ export class SystemKeywords { const { min, user, params } = await DialogKeywords.getProcessInfo(pid); let { baseUrl, client } = await GBDeployer.internalGetDriveClient(min); const botId = min.instance.botId; - let path = DialogKeywords.getGBAIPath(min.botId, `gbdrive`); + let path = GBUtil.getGBAIPath(min.botId, `gbdrive`); // Extracts each part of path to call create folder to each // one of them. @@ -1585,7 +1585,7 @@ export class SystemKeywords { public async shareFolder({ pid, folder, email, message }) { const { min, user, params } = await DialogKeywords.getProcessInfo(pid); let { baseUrl, client } = await GBDeployer.internalGetDriveClient(min); - const path = DialogKeywords.getGBAIPath(min.botId, `gbdrive`); + const path = GBUtil.getGBAIPath(min.botId, `gbdrive`); const root = urlJoin(path, folder); const src = await client.api(`${baseUrl}/drive/root:/${root}`).get(); @@ -1606,7 +1606,7 @@ export class SystemKeywords { public async internalCreateDocument(min, path, content) { GBLogEx.info(min, `CREATE DOCUMENT '${path}...'`); let { baseUrl, client } = await GBDeployer.internalGetDriveClient(min); - const gbaiName = DialogKeywords.getGBAIPath(min.botId); + const gbaiName = GBUtil.getGBAIPath(min.botId); const tmpDocx = urlJoin(gbaiName, path); // Templates a blank {content} tag inside the blank.docx. @@ -1651,7 +1651,7 @@ export class SystemKeywords { // Determines full path at source and destination. - const root = DialogKeywords.getGBAIPath(botId, 'gbdrive'); + const root = GBUtil.getGBAIPath(botId, 'gbdrive'); const srcPath = urlJoin(root, src); const dstPath = urlJoin(root, dest); @@ -1711,7 +1711,7 @@ export class SystemKeywords { dest = dest.replace(/\\/gi, '/'); // Determines full path at source and destination. - const path = DialogKeywords.getGBAIPath(min.botId, `gbdrive`); + const path = GBUtil.getGBAIPath(min.botId, `gbdrive`); const root = path; const srcPath = urlJoin(root, src); const dstPath = urlJoin(path, dest); @@ -2001,7 +2001,7 @@ export class SystemKeywords { public async fill({ pid, templateName, data }) { const { min, user } = await DialogKeywords.getProcessInfo(pid); const botId = min.instance.botId; - const gbaiName = DialogKeywords.getGBAIPath(botId, 'gbdata'); + const gbaiName = GBUtil.getGBAIPath(botId, 'gbdata'); let localName; // Downloads template from .gbdrive. @@ -2240,7 +2240,7 @@ export class SystemKeywords { } } else { const botId = min.instance.botId; - const path = DialogKeywords.getGBAIPath(botId, 'gbdata'); + const path = GBUtil.getGBAIPath(botId, 'gbdata'); let { baseUrl, client } = await GBDeployer.internalGetDriveClient(min); @@ -2510,7 +2510,7 @@ export class SystemKeywords { public async pay({ pid, orderId, customerName, ammount }) { const { min, user } = await DialogKeywords.getProcessInfo(pid); - const gbaiName = DialogKeywords.getGBAIPath(min.botId); + const gbaiName = GBUtil.getGBAIPath(min.botId); const merchantId = min.core.getParam(min.instance, 'Merchant ID', null); const merchantKey = min.core.getParam(min.instance, 'Merchant Key', null); @@ -2587,7 +2587,7 @@ export class SystemKeywords { GBLogEx.info(min, `Auto saving '${file.filename}' (SAVE file).`); let { baseUrl, client } = await GBDeployer.internalGetDriveClient(min); - const path = DialogKeywords.getGBAIPath(min.botId, `gbdrive`); + const path = GBUtil.getGBAIPath(min.botId, `gbdrive`); const fileName = file.url ? file.url : file.name; const contentType = mime.lookup(fileName); const ext = Path.extname(fileName).substring(1); @@ -2635,7 +2635,7 @@ export class SystemKeywords { GBLogEx.info(min, `DELETE '${file.name}'.`); let { baseUrl, client } = await GBDeployer.internalGetDriveClient(min); - const gbaiPath = DialogKeywords.getGBAIPath(min.botId); + const gbaiPath = GBUtil.getGBAIPath(min.botId); const fileName = file.name; const contentType = mime.lookup(fileName); const ext = Path.extname(fileName).substring(1); @@ -2677,7 +2677,7 @@ export class SystemKeywords { // Retrieves all files in remote folder. - let path = DialogKeywords.getGBAIPath(min.botId); + let path = GBUtil.getGBAIPath(min.botId); path = urlJoin(path, remotePath); let url = `${baseUrl}/drive/root:/${path}:/children`; @@ -2721,11 +2721,11 @@ export class SystemKeywords { const { min } = await DialogKeywords.getProcessInfo(pid); GBLogEx.info(min, `BASIC GET (pdf): ${file}`); - let data ; + let data; if (GBConfigService.get('STORAGE_NAME')) { let { baseUrl, client } = await GBDeployer.internalGetDriveClient(min); - const gbaiName = DialogKeywords.getGBAIPath(min.botId); + const gbaiName = GBUtil.getGBAIPath(min.botId); let path = '/' + urlJoin(gbaiName, `${min.botId}.gbdrive`); let template = await this.internalGetDocument(client, baseUrl, path, file); let url = template['@microsoft.graph.downloadUrl']; @@ -2734,26 +2734,12 @@ export class SystemKeywords { data = new Uint8Array(buf); } else { - let path = DialogKeywords.getGBAIPath(min.botId, `gbdrive`); + let path = GBUtil.getGBAIPath(min.botId, `gbdrive`); let filePath = Path.join(GBConfigService.get('STORAGE_LIBRARY'), path, file); data = Fs.readFileSync(filePath, 'utf8'); data = new Uint8Array(Buffer.from(data, 'utf8')); } - - const pdf = await getDocument({ data }).promise; - let pages = []; - - for (let i = 1; i <= pdf.numPages; i++) { - const page = await pdf.getPage(i); - const textContent = await page.getTextContent(); - const text = textContent.items - .map(item => item['str']) - .join('') - .replace(/\s/g, ''); - pages.push(text); - } - - return pages.join(''); + return await GBUtil.getPdfText(data); } public async setContext({ pid, text }) { diff --git a/packages/basic.gblib/services/WebAutomationServices.ts b/packages/basic.gblib/services/WebAutomationServices.ts index e985adbd..b4c82496 100644 --- a/packages/basic.gblib/services/WebAutomationServices.ts +++ b/packages/basic.gblib/services/WebAutomationServices.ts @@ -44,6 +44,7 @@ import { GBDeployer } from '../../core.gbapp/services/GBDeployer.js'; import { Mutex } from 'async-mutex'; import { GBLogEx } from '../../core.gbapp/services/GBLogEx.js'; import { SystemKeywords } from './SystemKeywords.js'; +import { GBUtil } from '../../../src/util.js'; /** * Web Automation services of conversation to be called by BASIC. @@ -348,7 +349,7 @@ export class WebAutomationServices { const page = WebAutomationServices.getPageByHandle(handle); GBLogEx.info(min, `Web Automation SCREENSHOT ${selector}.`); - const gbaiName = DialogKeywords.getGBAIPath(min.botId); + const gbaiName = GBUtil.getGBAIPath(min.botId); const localName = Path.join('work', gbaiName, 'cache', `screen-${GBAdminService.getRndReadableIdentifier()}.jpg`); await page.screenshot({ path: localName }); @@ -439,7 +440,7 @@ export class WebAutomationServices { folder = folder.replace(/\\/gi, '/'); // Determines full path at source and destination. - const path = DialogKeywords.getGBAIPath(min.botId, `gbdrive`); + const path = GBUtil.getGBAIPath(min.botId, `gbdrive`); const root = path; const dstPath = urlJoin(root, folder, filename); diff --git a/packages/core.gbapp/services/GBConversationalService.ts b/packages/core.gbapp/services/GBConversationalService.ts index eda8e895..fea30aa6 100644 --- a/packages/core.gbapp/services/GBConversationalService.ts +++ b/packages/core.gbapp/services/GBConversationalService.ts @@ -656,7 +656,7 @@ export class GBConversationalService { // Set folder based on media type const folder = mediaType === 'video' ? 'videos' : 'images'; - const gbaiName = DialogKeywords.getGBAIPath(min.botId); + const gbaiName = GBUtil.getGBAIPath(min.botId); const fileUrl = urlJoin(process.env.BOT_URL, 'kb', gbaiName, `${min.botId}.gbkb`, folder, mediaFile); let urlMedia = mediaFile.startsWith('http') ? mediaFile : fileUrl; diff --git a/packages/core.gbapp/services/GBCoreService.ts b/packages/core.gbapp/services/GBCoreService.ts index cdd38dc6..a91289f3 100644 --- a/packages/core.gbapp/services/GBCoreService.ts +++ b/packages/core.gbapp/services/GBCoreService.ts @@ -680,7 +680,7 @@ ENDPOINT_UPDATE=true const maxLines = 512; const file = 'Config.xlsx'; - const path = DialogKeywords.getGBAIPath(min.botId, `gbot`); + const path = GBUtil.getGBAIPath(min.botId, `gbot`); let document = await new SystemKeywords().internalGetDocument(client, baseUrl, path, file); @@ -716,7 +716,7 @@ ENDPOINT_UPDATE=true ) .patch(body); } else { - let path = DialogKeywords.getGBAIPath(min.botId, `gbot`); + let path = GBUtil.getGBAIPath(min.botId, `gbot`); const config = Path.join(GBConfigService.get('STORAGE_LIBRARY'), path, 'config.csv'); const db = await csvdb(config, ['name', 'value'], ','); @@ -914,7 +914,7 @@ ENDPOINT_UPDATE=true const objUser = userManager.addUser(user, pass); const virtualPath = '/' + min.botId; - let path = DialogKeywords.getGBAIPath(min.botId, null); + let path = GBUtil.getGBAIPath(min.botId, null); const gbaiRoot = Path.join(GBConfigService.get('STORAGE_LIBRARY'), path); server.setFileSystem(virtualPath, new webdav.PhysicalFileSystem(gbaiRoot), successed => { diff --git a/packages/core.gbapp/services/GBDeployer.ts b/packages/core.gbapp/services/GBDeployer.ts index 52f2667e..0a498c16 100644 --- a/packages/core.gbapp/services/GBDeployer.ts +++ b/packages/core.gbapp/services/GBDeployer.ts @@ -511,7 +511,7 @@ export class GBDeployer implements IGBDeployer { // Retrieves all files in remote folder. - let path = DialogKeywords.getGBAIPath(min.botId); + let path = GBUtil.getGBAIPath(min.botId); path = urlJoin(path, remotePath); let url = `${baseUrl}/drive/root:/${path}:/children`; @@ -643,7 +643,7 @@ export class GBDeployer implements IGBDeployer { const strFind = ' Driver'; const conns = await min.core['findParam'](min.instance, strFind); await CollectionUtil.asyncForEach(conns, async t => { - const connectionName = t.replace(strFind, ''); + const connectionName = t.replace(strFind, '').trim(); let con = {}; con['name'] = connectionName; con['storageDriver'] = min.core.getParam(min.instance, `${connectionName} Driver`, null); @@ -652,13 +652,13 @@ export class GBDeployer implements IGBDeployer { let file = min.core.getParam(min.instance, `${connectionName} File`, null); if (storageName) { - con['storageName'] = storageName; + con['storageName'] = storageName.trim(); con['storageServer'] = min.core.getParam(min.instance, `${connectionName} Server`, null); con['storageUsername'] = min.core.getParam(min.instance, `${connectionName} Username`, null); con['storagePort'] = min.core.getParam(min.instance, `${connectionName} Port`, null); con['storagePassword'] = min.core.getParam(min.instance, `${connectionName} Password`, null); } else if (file) { - const path = DialogKeywords.getGBAIPath(min.botId, 'gbdata'); + const path = GBUtil.getGBAIPath(min.botId, 'gbdata'); con['storageFile'] = Path.join(GBConfigService.get('STORAGE_LIBRARY'), path, file); } else { GBLogEx.debug(min, `No storage information found for ${connectionName}, missing storage name or file.`); @@ -666,7 +666,7 @@ export class GBDeployer implements IGBDeployer { connections.push(con); }); - const path = DialogKeywords.getGBAIPath(min.botId, null); + const path = GBUtil.getGBAIPath(min.botId, null); const localFolder = Path.join('work', path, 'connections.json'); Fs.writeFileSync(localFolder, JSON.stringify(connections), { encoding: null }); @@ -726,7 +726,7 @@ export class GBDeployer implements IGBDeployer { * Removes the package local files from cache. */ public async cleanupPackage(instance: IGBInstance, packageName: string) { - const path = DialogKeywords.getGBAIPath(instance.botId, null, packageName); + const path = GBUtil.getGBAIPath(instance.botId, null, packageName); const localFolder = Path.join('work', path); rimraf.sync(localFolder); } @@ -740,7 +740,7 @@ export class GBDeployer implements IGBDeployer { const packageType = Path.extname(packageName); const p = await this.getStoragePackageByName(instance.instanceId, packageName); - const path = DialogKeywords.getGBAIPath(instance.botId, null, packageName); + const path = GBUtil.getGBAIPath(instance.botId, null, packageName); const localFolder = Path.join('work', path); return await this.undeployPackageFromLocalPath(instance, localFolder); @@ -899,7 +899,7 @@ export class GBDeployer implements IGBDeployer { * Servers bot storage assets to be used by web, WhatsApp and other channels. */ public static mountGBKBAssets(packageName: any, botId: string, filename: string) { - const gbaiName = DialogKeywords.getGBAIPath(botId); + const gbaiName = GBUtil.getGBAIPath(botId); // Servers menu assets. diff --git a/packages/core.gbapp/services/GBMinService.ts b/packages/core.gbapp/services/GBMinService.ts index ee2f20f4..4692e1e0 100644 --- a/packages/core.gbapp/services/GBMinService.ts +++ b/packages/core.gbapp/services/GBMinService.ts @@ -293,24 +293,24 @@ export class GBMinService { // Install per bot deployed packages. - let packagePath = urlJoin(`work`, DialogKeywords.getGBAIPath(min.botId, 'gbdialog')); + let packagePath = urlJoin(`work`, GBUtil.getGBAIPath(min.botId, 'gbdialog')); if (Fs.existsSync(packagePath)) { await this.deployer['deployPackage2'](min, user, packagePath); } - packagePath = urlJoin(`work`, DialogKeywords.getGBAIPath(min.botId, 'gbapp')); + packagePath = urlJoin(`work`, GBUtil.getGBAIPath(min.botId, 'gbapp')); if (Fs.existsSync(packagePath)) { await this.deployer['deployPackage2'](min, user, packagePath); } - packagePath = urlJoin(`work`, DialogKeywords.getGBAIPath(min.botId, 'gbtheme')); + packagePath = urlJoin(`work`, GBUtil.getGBAIPath(min.botId, 'gbtheme')); if (Fs.existsSync(packagePath)) { await this.deployer['deployPackage2'](min, user, packagePath); } - packagePath = urlJoin(`work`, DialogKeywords.getGBAIPath(min.botId, `gblib`)); + packagePath = urlJoin(`work`, GBUtil.getGBAIPath(min.botId, `gblib`)); if (Fs.existsSync(packagePath)) { await this.deployer['deployPackage2'](min, user, packagePath); } - const gbai = DialogKeywords.getGBAIPath(min.botId); + const gbai = GBUtil.getGBAIPath(min.botId); let dir = `work/${gbai}/cache`; const botId = gbai.replace(/\.[^/.]+$/, ''); @@ -387,7 +387,7 @@ export class GBMinService { // Generates MS Teams manifest. const manifest = `${instance.botId}-Teams.zip`; - const packageTeams = urlJoin(`work`, DialogKeywords.getGBAIPath(instance.botId), manifest); + const packageTeams = urlJoin(`work`, GBUtil.getGBAIPath(instance.botId), manifest); if (!Fs.existsSync(packageTeams)) { GBLogEx.info(min, 'Generating MS Teams manifest....'); const data = await this.deployer.getBotManifest(instance); @@ -832,7 +832,7 @@ export class GBMinService { min['scheduleMap'] = {}; min['conversationWelcomed'] = {}; if (await min.core.getParam(min.instance, 'Answer Mode', null)) { - const gbkbPath = DialogKeywords.getGBAIPath(min.botId, 'gbkb'); + const gbkbPath = GBUtil.getGBAIPath(min.botId, 'gbkb'); min['vectorStorePath'] = Path.join('work', gbkbPath, 'docs-vectorized'); min['vectorStore'] = await this.deployer.loadOrCreateEmptyVectorStore(min); } @@ -1095,7 +1095,7 @@ export class GBMinService { ps: null, qs: null }); - const path = DialogKeywords.getGBAIPath(min.botId); + const path = GBUtil.getGBAIPath(min.botId); const folder = `work/${path}/cache`; const filename = `${GBAdminService.generateUuid()}.png`; @@ -1271,7 +1271,7 @@ export class GBMinService { private static async downloadAttachmentAndWrite(attachment) { const url = attachment.contentUrl; const localFolder = 'work'; - const path = DialogKeywords.getGBAIPath(this['min'].botId); + const path = GBUtil.getGBAIPath(this['min'].botId); const localFileName = Path.join(localFolder, path, 'uploads', attachment.name); let buffer; diff --git a/packages/core.gbapp/services/GBSSR.ts b/packages/core.gbapp/services/GBSSR.ts index 0b58fccf..63128d66 100644 --- a/packages/core.gbapp/services/GBSSR.ts +++ b/packages/core.gbapp/services/GBSSR.ts @@ -303,12 +303,12 @@ export class GBSSR { } - let path = DialogKeywords.getGBAIPath(botId, `gbui`); + let path = GBUtil.getGBAIPath(botId, `gbui`); // Checks if the bot has an .gbui published or use default.gbui. if (!Fs.existsSync(path)) { - path = DialogKeywords.getGBAIPath(minBoot.botId, `gbui`); + path = GBUtil.getGBAIPath(minBoot.botId, `gbui`); } let parts = req.url.replace(`/${botId}`, '').split('?'); let url = parts[0]; diff --git a/packages/kb.gbapp/dialogs/AskDialog.ts b/packages/kb.gbapp/dialogs/AskDialog.ts index f07ccb72..a155866b 100644 --- a/packages/kb.gbapp/dialogs/AskDialog.ts +++ b/packages/kb.gbapp/dialogs/AskDialog.ts @@ -51,6 +51,7 @@ import { SystemKeywords } from '../../basic.gblib/services/SystemKeywords.js'; import { DialogKeywords } from '../../basic.gblib/services/DialogKeywords.js'; import Path from 'path'; import { GBLogEx } from '../../core.gbapp/services/GBLogEx.js'; +import { GBUtil } from '../../../src/util.js'; /** * Dialog arguments. @@ -245,7 +246,7 @@ export class AskDialog extends IGBDialog { for (const key in results.sources) { const source = results.sources[key]; - const path = DialogKeywords.getGBAIPath(min.botId, `gbkb`); + const path = GBUtil.getGBAIPath(min.botId, `gbkb`); let url = urlJoin('kb', path, 'docs', Path.basename(source.file)); url = `${url}#page=${source.page}&toolbar=0&messages=0&statusbar=0&navpanes=0`; urls.push({ url: url }); diff --git a/packages/kb.gbapp/services/KBService.ts b/packages/kb.gbapp/services/KBService.ts index b9cf1bb6..e7ce5e9a 100644 --- a/packages/kb.gbapp/services/KBService.ts +++ b/packages/kb.gbapp/services/KBService.ts @@ -54,7 +54,6 @@ import { RecursiveCharacterTextSplitter } from 'langchain/text_splitter'; import { Document } from 'langchain/document'; import getColors from 'get-image-colors'; - import { GBDialogStep, GBLog, @@ -379,7 +378,7 @@ export class KBService implements IGBKBService { returnedScore: ${returnedScore} < required (searchScore): ${searchScore}` ); - return await ChatServices.answerByLLM( step.context.activity['pid'], min, user, query); + return await ChatServices.answerByLLM(step.context.activity['pid'], min, user, query); } public async getSubjectItems(instanceId: number, parentId: number): Promise { @@ -558,7 +557,7 @@ export class KBService implements IGBKBService { const isBasic = answer.toLowerCase().startsWith('/basic'); if (/TALK\s*\".*\"/gi.test(answer) || isBasic) { const code = isBasic ? answer.substr(6) : answer; - const path = DialogKeywords.getGBAIPath(min.botId, `gbdialog`); + const path = GBUtil.getGBAIPath(min.botId, `gbdialog`); const scriptName = `tmp${GBAdminService.getRndReadableIdentifier()}.docx`; const localName = Path.join('work', path, `${scriptName}`); Fs.writeFileSync(localName, code, { encoding: null }); @@ -633,12 +632,12 @@ export class KBService implements IGBKBService { answer.endsWith('.xls') || answer.endsWith('.xlsx') ) { - const path = DialogKeywords.getGBAIPath(min.botId, `gbkb`); + const path = GBUtil.getGBAIPath(min.botId, `gbkb`); const doc = urlJoin(GBServer.globals.publicAddress, 'kb', path, 'assets', answer); const url = `http://view.officeapps.live.com/op/view.aspx?src=${doc}`; await this.playUrl(min, min.conversationalService, step, url, channel); } else if (answer.endsWith('.pdf')) { - const path = DialogKeywords.getGBAIPath(min.botId, `gbkb`); + const path = GBUtil.getGBAIPath(min.botId, `gbkb`); const url = urlJoin('kb', path, 'assets', answer); await this.playUrl(min, min.conversationalService, step, url, channel); } else if (answer.format === '.md') { @@ -740,7 +739,7 @@ export class KBService implements IGBKBService { }); } } else if (file !== null && file.name.endsWith('.docx')) { - let path = DialogKeywords.getGBAIPath(instance.botId, `gbkb`); + let path = GBUtil.getGBAIPath(instance.botId, `gbkb`); const localName = Path.join('work', path, 'articles', file.name); let loader = new DocxLoader(localName); let doc = await loader.load(); @@ -761,7 +760,7 @@ export class KBService implements IGBKBService { data.answers.push(answer); } else if (file !== null && file.name.endsWith('.toc.docx')) { - const path = DialogKeywords.getGBAIPath(instance.botId, `gbkb`); + const path = GBUtil.getGBAIPath(instance.botId, `gbkb`); const localName = Path.join('work', path, 'articles', file.name); const buffer = Fs.readFileSync(localName, { encoding: null }); var options = { @@ -769,13 +768,13 @@ export class KBService implements IGBKBService { convertImage: async image => { const localName = Path.join( 'work', - DialogKeywords.getGBAIPath(instance.botId), + GBUtil.getGBAIPath(instance.botId), 'cache', `img-docx${GBAdminService.getRndReadableIdentifier()}.png` ); const url = urlJoin( GBServer.globals.publicAddress, - DialogKeywords.getGBAIPath(instance.botId).replace(/\.[^/.]+$/, ''), + GBUtil.getGBAIPath(instance.botId).replace(/\.[^/.]+$/, ''), 'cache', Path.basename(localName) ); @@ -864,34 +863,6 @@ export class KBService implements IGBKBService { }); } - async saveHtmlPage(min, url: string, page: Page): Promise { - let response = await page.goto(url); - if (!response) { - response = await page.waitForResponse(() => true); - } - if (response && response.headers && response.status() === 200) { - const contentType = response.headers()['content-type']; - if (contentType && contentType.includes('text/html')) { - const buffer = html2md(await response.text()); - const urlObj = new URL(url); - const urlPath = urlObj.pathname.endsWith('/') ? urlObj.pathname.slice(0, -1) : urlObj.pathname; // Remove trailing slash if present - let filename = urlPath.split('/').pop() || 'index'; // Get the filename from the URL path or set it to 'index.html' as default - filename = `${filename}.html`; - let path = DialogKeywords.getGBAIPath(min.botId, `gbot`); - const directoryPath = Path.join(process.env.PWD, 'work', path, 'Website'); - const filePath = Path.join(directoryPath, filename); - - GBLogEx.info(min, `[GBDeployer] Saving Website file in ${filePath}.`); - - Fs.mkdirSync(directoryPath, { recursive: true }); // Create directory recursively if it doesn't exist - Fs.writeFileSync(filePath, buffer); - - return filePath; - } - } - return null; - } - async crawl( min, url: string, @@ -906,7 +877,6 @@ export class KBService implements IGBKBService { depth > maxDepth || visited.has(url) || url.endsWith('.jpg') || - url.endsWith('.pdf') || url.endsWith('.jpg') || url.endsWith('.png') || url.endsWith('.mp4') @@ -915,14 +885,17 @@ export class KBService implements IGBKBService { } await GBLogEx.info(min, `Processing URL: ${url}.`); - visited.add(url); - - const filename = await this.saveHtmlPage(min, url, page); + + const path = GBUtil.getGBAIPath(min.botId, `gbot`); + const directoryPath = path.join(process.env.PWD, 'work', path, 'Website'); + const filename = await GBUtil.savePage(url, page, directoryPath); if (!filename) { - // If the URL doesn't represent an HTML page, skip crawling its links + + // If the URL doesn't represent an HTML/PDF page, skip crawling its links return []; + } const currentDomain = new URL(page.url()).hostname; @@ -1052,7 +1025,7 @@ export class KBService implements IGBKBService { website.endsWith('/') ? website.substring(0, website.length - 1) : website; - let path = DialogKeywords.getGBAIPath(min.botId, `gbot`); + let path = GBUtil.getGBAIPath(min.botId, `gbot`); const directoryPath = Path.join(process.env.PWD, 'work', path, 'Website'); Fs.rmSync(directoryPath, { recursive: true, force: true }); @@ -1061,8 +1034,8 @@ export class KBService implements IGBKBService { let logo = await this.getLogoByPage(min, page); if (logo) { - path = DialogKeywords.getGBAIPath(min.botId); - + path = GBUtil.getGBAIPath(min.botId); + const baseUrl = page.url().split('/').slice(0, 3).join('/'); logo = logo.startsWith('https') ? logo : urlJoin(baseUrl, logo); @@ -1380,7 +1353,7 @@ export class KBService implements IGBKBService { GBLogEx.info(min, `[GBDeployer] Start Bot Server Side Rendering... ${localPath}`); const html = await GBSSR.getHTML(min); - let path = DialogKeywords.getGBAIPath(min.botId, `gbui`); + let path = GBUtil.getGBAIPath(min.botId, `gbui`); path = Path.join(process.env.PWD, 'work', path, 'index.html'); GBLogEx.info(min, `[GBDeployer] Saving SSR HTML in ${path}.`); Fs.writeFileSync(path, html, 'utf8'); @@ -1425,7 +1398,7 @@ export class KBService implements IGBKBService { if (channel === 'whatsapp') { await min.conversationalService.sendFile(min, step, null, answer.content, ''); } else { - const path = DialogKeywords.getGBAIPath(min.botId, `gbkb`); + const path = GBUtil.getGBAIPath(min.botId, `gbkb`); await conversationalService.sendEvent(min, step, 'play', { playerType: 'video', data: urlJoin(path, 'videos', answer.content) diff --git a/packages/llm.gblib/services/ChatServices.ts b/packages/llm.gblib/services/ChatServices.ts index f37b2c2e..47d3d43f 100644 --- a/packages/llm.gblib/services/ChatServices.ts +++ b/packages/llm.gblib/services/ChatServices.ts @@ -146,7 +146,7 @@ export class GBLLMOutputParser extends BaseLLMOutputParser { await CollectionUtil.asyncForEach(sources, async source => { let found = false; if (source && source.file.endsWith('.pdf')) { - const gbaiName = DialogKeywords.getGBAIPath(this.min.botId, 'gbkb'); + const gbaiName = GBUtil.getGBAIPath(this.min.botId, 'gbkb'); const localName = Path.join(process.env.PWD, 'work', gbaiName, 'docs', source.file); if (localName) { @@ -185,7 +185,7 @@ export class ChatServices { if (pngPages.length > 0) { const buffer = pngPages[0].content; - const gbaiName = DialogKeywords.getGBAIPath(min.botId, null); + const gbaiName = GBUtil.getGBAIPath(min.botId, null); const localName = Path.join('work', gbaiName, 'cache', `img${GBAdminService.getRndReadableIdentifier()}.png`); const url = urlJoin(GBServer.globals.publicAddress, min.botId, 'cache', Path.basename(localName)); Fs.writeFileSync(localName, buffer, { encoding: null }); @@ -708,7 +708,7 @@ export class ChatServices { // Adds .gbdialog as functions if any to LLM Functions. await CollectionUtil.asyncForEach(Object.keys(min.scriptMap), async script => { - const path = DialogKeywords.getGBAIPath(min.botId, 'gbdialog', null); + const path = GBUtil.getGBAIPath(min.botId, 'gbdialog', null); const jsonFile = Path.join('work', path, `${script}.json`); if (Fs.existsSync(jsonFile) && script.toLowerCase() !== 'start.vbs') { diff --git a/packages/llm.gblib/services/ImageServices.ts b/packages/llm.gblib/services/ImageServices.ts index fc273055..dcb9e3ee 100644 --- a/packages/llm.gblib/services/ImageServices.ts +++ b/packages/llm.gblib/services/ImageServices.ts @@ -41,6 +41,7 @@ import Fs from 'fs'; import urlJoin from 'url-join'; import { GBAdminService } from '../../admin.gbapp/services/GBAdminService'; import { GBLogEx } from '../../core.gbapp/services/GBLogEx'; +import { GBUtil } from '../../../src/util'; /** * Image processing services of conversation to be called by BASIC. @@ -66,7 +67,7 @@ export class ImageServices { size: '1024x1024' }); - const gbaiName = DialogKeywords.getGBAIPath(min.botId); + const gbaiName = GBUtil.getGBAIPath(min.botId); const localName = Path.join('work', gbaiName, 'cache', `DALL-E${GBAdminService.getRndReadableIdentifier()}.png`); const url = response.data[0].url; diff --git a/packages/security.gbapp/services/SecService.ts b/packages/security.gbapp/services/SecService.ts index 55ab9074..217f2129 100644 --- a/packages/security.gbapp/services/SecService.ts +++ b/packages/security.gbapp/services/SecService.ts @@ -9,6 +9,7 @@ import mkdirp from 'mkdirp'; import urlJoin from 'url-join'; import { GBLogEx } from '../../core.gbapp/services/GBLogEx.js'; import { GBServer } from '../../../src/app.js'; +import { GBUtil } from '../../../src/util.js'; /** @@ -25,7 +26,7 @@ export class SecService extends GBService { email: string ): Promise { - const gbaiPath = DialogKeywords.getGBAIPath(min.botId); + const gbaiPath = GBUtil.getGBAIPath(min.botId); const dir = urlJoin ('work',gbaiPath, 'users', userSystemId); if (!Fs.existsSync(dir)) { diff --git a/packages/whatsapp.gblib/services/WhatsappDirectLine.ts b/packages/whatsapp.gblib/services/WhatsappDirectLine.ts index 2cee0db0..f0d17562 100644 --- a/packages/whatsapp.gblib/services/WhatsappDirectLine.ts +++ b/packages/whatsapp.gblib/services/WhatsappDirectLine.ts @@ -150,7 +150,7 @@ export class WhatsappDirectLine extends GBService { case 'GeneralBots': const minBoot = GBServer.globals.minBoot; // Initialize the browser using a local profile for each bot. - const gbaiPath = DialogKeywords.getGBAIPath(this.min.botId); + const gbaiPath = GBUtil.getGBAIPath(this.min.botId); const webVersion = '2.2412.51'; const localName = Path.join('work', gbaiPath, 'profile'); const createClient = () => { @@ -323,7 +323,7 @@ export class WhatsappDirectLine extends GBService { const base64Image = await message.downloadMedia(); let buf: any = Buffer.from(base64Image.data, 'base64'); - const gbaiName = DialogKeywords.getGBAIPath(this.min.botId); + const gbaiName = GBUtil.getGBAIPath(this.min.botId); const localName = Path.join( 'work', gbaiName, @@ -763,7 +763,7 @@ export class WhatsappDirectLine extends GBService { // Set folder based on media type let folder = mediaType === 'video' ? 'videos' : 'images'; - let path = DialogKeywords.getGBAIPath(min.botId, `gbkb`); + let path = GBUtil.getGBAIPath(min.botId, `gbkb`); path = Path.join(process.env.PWD, 'work', path, folder, mediaFile); text = text.substring(mediaFile.length + 1).trim(); diff --git a/src/util.ts b/src/util.ts index 58d1aa5b..c82b94c7 100644 --- a/src/util.ts +++ b/src/util.ts @@ -38,6 +38,10 @@ import SwaggerClient from 'swagger-client'; import Fs from 'fs'; import { GBConfigService } from '../packages/core.gbapp/services/GBConfigService.js'; import path from 'path'; +import { getDocument } from 'pdfjs-dist/legacy/build/pdf.mjs'; +import { Page } from 'puppeteer'; +import urljoin from 'url-join'; +import html2md from 'html-to-md'; export class GBUtil { public static repeat(chr, count) { @@ -174,4 +178,91 @@ export class GBUtil { } return false; } + + public static async getPdfText(data: Buffer): Promise { + const pdf = await getDocument({ data }).promise; + let pages = []; + + for (let i = 1; i <= pdf.numPages; i++) { + const page = await pdf.getPage(i); + const textContent = await page.getTextContent(); + const text = textContent.items + .map(item => item['str']) + .join('') + .replace(/\s/g, ''); // Optionally remove extra spaces + pages.push(text); + } + + return pages.join(''); + } + + static getGBAIPath(botId, packageType = null, packageName = null) { + let gbai = `${botId}.gbai`; + if (!packageType && !packageName) { + return GBConfigService.get('DEV_GBAI') ? GBConfigService.get('DEV_GBAI') : gbai; + } + + if (GBConfigService.get('DEV_GBAI')) { + gbai = GBConfigService.get('DEV_GBAI'); + botId = gbai.replace(/\.[^/.]+$/, ''); + return urljoin(GBConfigService.get('DEV_GBAI'), packageName ? packageName : `${botId}.${packageType}`); + } else { + return urljoin(gbai, packageName ? packageName : `${botId}.${packageType}`); + } + } + + public static async savePage(url: string, page: Page, directoryPath: string): Promise { + + let response = await page.goto(url); + + if (!response) { + response = await page.waitForResponse(() => true); + } + + if (response && response.headers && response.status() === 200) { + const contentType = response.headers()['content-type']; + + if (contentType) { + const urlObj = new URL(url); + const urlPath = urlObj.pathname.endsWith('/') ? urlObj.pathname.slice(0, -1) : urlObj.pathname; + let filename = urlPath.split('/').pop() || 'index'; + + Fs.mkdirSync(directoryPath, { recursive: true }); + + const extensionMap = { + 'text/html': 'html', + 'application/pdf': 'pdf', + 'text/plain': 'txt', + 'application/vnd.openxmlformats-officedocument.wordprocessingml.document': 'docx', + 'application/json': 'json', + 'application/xml': 'xml', + 'text/csv': 'csv', + 'application/x-httpd-php': 'php', + 'application/javascript': 'js', + 'text/javascript': 'js', + 'text/css': 'css', + 'text/xml': 'xml' + }; + + const extension = Object.keys(extensionMap).find(key => contentType.includes(key)) || 'bin'; + filename = `${filename}.${extension}`; + const filePath = path.join(directoryPath, filename); + + let fileContent; + if (extension === 'html') { + fileContent = html2md(await response.text()); + } else if (extension === 'pdf') { + const pdfBuffer = await response.buffer(); + fileContent = await GBUtil.getPdfText(pdfBuffer); // Extract text from the PDF + } else { + fileContent = await response.buffer(); + } + + Fs.writeFileSync(filePath, fileContent); + + return filePath; + } + } + return null; + } } diff --git a/templates/crawler.gbai/crawler.gbot/config.csv b/templates/crawler.gbai/crawler.gbot/config.csv index 89a00c9f..950dccb8 100644 --- a/templates/crawler.gbai/crawler.gbot/config.csv +++ b/templates/crawler.gbai/crawler.gbot/config.csv @@ -1,3 +1,3 @@ name,value -Website,https://pragmatismo.cloud +Website,https://www.oabprevpr.org.br Answer Mode,document \ No newline at end of file