new(basic.gblib): Crawler can index all now.

This commit is contained in:
Rodrigo Rodriguez 2024-09-06 15:15:42 -03:00
parent 2b09b58d4b
commit c1ac3f183b
19 changed files with 193 additions and 151 deletions

View file

@ -152,7 +152,7 @@ export class GBAdminService implements IGBAdminService {
const packageName = text.split(' ')[1];
const importer = new GBImporter(min.core);
const deployer = new GBDeployer(min.core, importer);
const path = DialogKeywords.getGBAIPath(min.botId, null, packageName);
const path = GBUtil.getGBAIPath(min.botId, null, packageName);
const localFolder = Path.join('work', path);
await deployer.undeployPackageFromLocalPath(min.instance, localFolder);
}
@ -170,12 +170,12 @@ export class GBAdminService implements IGBAdminService {
const folderName = text.split(' ')[2];
const packageType = Path.extname(folderName).substr(1);
const gbaiPath = DialogKeywords.getGBAIPath(min.instance.botId, packageType, null);
const gbaiPath = GBUtil.getGBAIPath(min.instance.botId, packageType, null);
const localFolder = Path.join('work', gbaiPath);
// .gbot packages are handled using storage API, so no download
// of local resources is required.
const gbai = DialogKeywords.getGBAIPath(min.instance.botId);
const gbai = GBUtil.getGBAIPath(min.instance.botId);
if (packageType === 'gbkb') {
await deployer['cleanupPackage'](min.instance, packageName);

View file

@ -84,7 +84,7 @@ export class DialogKeywords {
Based on this data, generate a configuration for a Billboard.js chart. The output should be valid JSON, following Billboard.js conventions. Ensure the JSON is returned without markdown formatting, explanations, or comments.
The chart should be ${prompt}. Return only the JSON configuration, nothing else.`;
The chart should be ${prompt}. Return only the one-line only JSON configuration, nothing else.`;
// Send the prompt to the LLM and get the response
@ -108,7 +108,7 @@ export class DialogKeywords {
// Get the chart container and take a screenshot
const content = await page.$('.bb');
const gbaiName = DialogKeywords.getGBAIPath(min.botId);
const gbaiName = GBUtil.getGBAIPath(min.botId);
const localName = Path.join('work', gbaiName, 'cache', `chart${GBAdminService.getRndReadableIdentifier()}.jpg`);
await content.screenshot({ path: localName, omitBackground: true });
await browser.close();
@ -182,7 +182,7 @@ export class DialogKeywords {
};
}
const gbaiName = DialogKeywords.getGBAIPath(min.botId);
const gbaiName = GBUtil.getGBAIPath(min.botId);
const localName = Path.join('work', gbaiName, 'cache', `img${GBAdminService.getRndReadableIdentifier()}.jpg`);
await ChartServices.screenshot(definition, localName);
@ -1037,7 +1037,7 @@ export class DialogKeywords {
let { baseUrl, client } = await GBDeployer.internalGetDriveClient(min);
const botId = min.instance.botId;
const path = DialogKeywords.getGBAIPath(botId);
const path = GBUtil.getGBAIPath(botId);
let url = `${baseUrl}/drive/root:/${path}:/children`;
GBLogEx.info(min, `Loading HEAR AS .xlsx options from Sheet: ${url}`);
@ -1287,20 +1287,6 @@ export class DialogKeywords {
GBLog.error(`BASIC RUNTIME ERR HEAR ${error.message ? error.message : error}\n Stack:${error.stack}`);
}
}
static getGBAIPath(botId, packageType = null, packageName = null) {
let gbai = `${botId}.gbai`;
if (!packageType && !packageName) {
return GBConfigService.get('DEV_GBAI') ? GBConfigService.get('DEV_GBAI') : gbai;
}
if (GBConfigService.get('DEV_GBAI')) {
gbai = GBConfigService.get('DEV_GBAI');
botId = gbai.replace(/\.[^/.]+$/, '');
return urljoin(GBConfigService.get('DEV_GBAI'), packageName ? packageName : `${botId}.${packageType}`);
} else {
return urljoin(gbai, packageName ? packageName : `${botId}.${packageType}`);
}
}
/**
* Prepares the next dialog to be shown to the specified user.
@ -1457,7 +1443,7 @@ export class DialogKeywords {
const element = filename._page ? filename._page : filename.screenshot ? filename : null;
let url;
let nameOnly;
const gbaiName = DialogKeywords.getGBAIPath(min.botId);
const gbaiName = GBUtil.getGBAIPath(min.botId);
// Web automation.
@ -1494,7 +1480,7 @@ export class DialogKeywords {
// .gbdrive direct sending.
else {
const ext = Path.extname(filename);
const gbaiName = DialogKeywords.getGBAIPath(min.botId);
const gbaiName = GBUtil.getGBAIPath(min.botId);
let { baseUrl, client } = await GBDeployer.internalGetDriveClient(min);
const fileUrl = urlJoin('/', gbaiName, `${min.botId}.gbdrive`, filename);
@ -1527,7 +1513,7 @@ export class DialogKeywords {
// Prepare a cache to be referenced by Bot Framework.
const buf = Fs.readFileSync(filename);
const gbaiName = DialogKeywords.getGBAIPath(min.botId);
const gbaiName = GBUtil.getGBAIPath(min.botId);
const localName = Path.join('work', gbaiName, 'cache', `tmp${GBAdminService.getRndReadableIdentifier()}.${ext}`);
Fs.writeFileSync(localName, buf, { encoding: null });
url = urlJoin(GBServer.globals.publicAddress, min.botId, 'cache', Path.basename(localName));
@ -1560,7 +1546,7 @@ export class DialogKeywords {
const data = img.replace(/^data:image\/\w+;base64,/, '');
const buf = Buffer.from(data, 'base64');
const gbaiName = DialogKeywords.getGBAIPath(min.botId);
const gbaiName = GBUtil.getGBAIPath(min.botId);
const localName = Path.join('work', gbaiName, 'cache', `qr${GBAdminService.getRndReadableIdentifier()}.png`);
Fs.writeFileSync(localName, buf, { encoding: null });
const url = urlJoin(GBServer.globals.publicAddress, min.botId, 'cache', Path.basename(localName));

View file

@ -54,6 +54,7 @@ import { SystemKeywords } from './SystemKeywords.js';
import { Sequelize, QueryTypes } from '@sequelize/core';
import { z } from 'zod';
import { zodToJsonSchema } from 'zod-to-json-schema';
import { GBUtil } from '../../../src/util.js';
/**
* @fileoverview Decision was to priorize security(isolation) and debugging,
@ -68,7 +69,7 @@ export class GBVMService extends GBService {
public static API_PORT = 1111;
public async loadDialogPackage(folder: string, min: GBMinInstance, core: IGBCoreService, deployer: GBDeployer) {
const ignore = Path.join('work', DialogKeywords.getGBAIPath(min.botId, 'gbdialog'), 'node_modules');
const ignore = Path.join('work', GBUtil.getGBAIPath(min.botId, 'gbdialog'), 'node_modules');
const files = await walkPromise(folder, { ignore: [ignore] });
await CollectionUtil.asyncForEach(files, async file => {
@ -223,7 +224,7 @@ export class GBVMService extends GBService {
public static async loadConnections(min) {
// Loads storage custom connections.
const path = DialogKeywords.getGBAIPath(min.botId, null);
const path = GBUtil.getGBAIPath(min.botId, null);
const filePath = Path.join('work', path, 'connections.json');
let connections = [];
if (Fs.existsSync(filePath)) {
@ -1089,7 +1090,7 @@ export class GBVMService extends GBService {
}
const botId = min.botId;
const path = DialogKeywords.getGBAIPath(min.botId, `gbdialog`);
const path = GBUtil.getGBAIPath(min.botId, `gbdialog`);
const gbdialogPath = urlJoin(process.cwd(), 'work', path);
const scriptFilePath = urlJoin(gbdialogPath, `${text}.js`);

View file

@ -38,6 +38,7 @@ import { GBAdminService } from '../../admin.gbapp/services/GBAdminService.js';
import urlJoin from 'url-join';
import { GBServer } from '../../../src/app.js';
import { GBLogEx } from '../../core.gbapp/services/GBLogEx.js';
import { GBUtil } from '../../../src/util.js';
/**
* Image processing services of conversation to be called by BASIC.
@ -74,7 +75,7 @@ export class ImageProcessingServices {
});
const botId = min.instance.botId;
const path = DialogKeywords.getGBAIPath(min.botId);
const path = GBUtil.getGBAIPath(min.botId);
// TODO: const img = await joinImages(paths);
const localName = Path.join('work', path, 'cache', `img-mrg${GBAdminService.getRndReadableIdentifier()}.png`);
const url = urlJoin(GBServer.globals.publicAddress, min.botId, 'cache', Path.basename(localName));

View file

@ -262,7 +262,7 @@ export class SystemKeywords {
// headers.
const { min, user } = await DialogKeywords.getProcessInfo(pid);
const gbaiName = DialogKeywords.getGBAIPath(min.botId);
const gbaiName = GBUtil.getGBAIPath(min.botId);
const browser = await GBSSR.createBrowser(null);
const page = await browser.newPage();
await page.minimize();
@ -378,7 +378,7 @@ export class SystemKeywords {
let { baseUrl, client } = await GBDeployer.internalGetDriveClient(min);
const botId = min.instance.botId;
const gbaiName = DialogKeywords.getGBAIPath(min.botId);
const gbaiName = GBUtil.getGBAIPath(min.botId);
const tmpDocx = urlJoin(gbaiName, `${botId}.gbdrive`, `tmp${GBAdminService.getRndReadableIdentifier()}.docx`);
// Performs the conversion operation.
@ -571,7 +571,7 @@ export class SystemKeywords {
let { baseUrl, client } = await GBDeployer.internalGetDriveClient(min);
const botId = min.instance.botId;
const path = DialogKeywords.getGBAIPath(botId, 'gbdata');
const path = GBUtil.getGBAIPath(botId, 'gbdata');
let document = await this.internalGetDocument(client, baseUrl, path, file);
let sheets = await client.api(`${baseUrl}/drive/items/${document.id}/workbook/worksheets`).get();
let body = { values: [[]] };
@ -656,7 +656,7 @@ export class SystemKeywords {
GBLogEx.info(min, `Saving '${file}' (SAVE file).`);
let { baseUrl, client } = await GBDeployer.internalGetDriveClient(min);
const botId = min.instance.botId;
const path = DialogKeywords.getGBAIPath(min.botId, `gbdrive`);
const path = GBUtil.getGBAIPath(min.botId, `gbdrive`);
// Checks if it is a GB FILE object.
@ -701,7 +701,7 @@ export class SystemKeywords {
// It is an SharePoint object that needs to be downloaded.
const gbaiName = DialogKeywords.getGBAIPath(min.botId);
const gbaiName = GBUtil.getGBAIPath(min.botId);
const localName = Path.join('work', gbaiName, 'cache', `${GBAdminService.getRndReadableIdentifier()}.tmp`);
const url = file['url'];
const response = await fetch(url);
@ -875,7 +875,7 @@ export class SystemKeywords {
GBLogEx.info(min, `Saving '${file}' (SAVE). Args: ${args.join(',')}.`);
let { baseUrl, client } = await GBDeployer.internalGetDriveClient(min);
const botId = min.instance.botId;
const path = DialogKeywords.getGBAIPath(botId, 'gbdata');
const path = GBUtil.getGBAIPath(botId, 'gbdata');
let sheets;
let document;
@ -994,7 +994,7 @@ export class SystemKeywords {
let { baseUrl, client } = await GBDeployer.internalGetDriveClient(min);
const botId = min.instance.botId;
('');
const path = DialogKeywords.getGBAIPath(botId, 'gbdata');
const path = GBUtil.getGBAIPath(botId, 'gbdata');
let document = await this.internalGetDocument(client, baseUrl, path, file);
@ -1092,7 +1092,7 @@ export class SystemKeywords {
args.shift();
const botId = min.instance.botId;
const path = DialogKeywords.getGBAIPath(botId, 'gbdata');
const path = GBUtil.getGBAIPath(botId, 'gbdata');
// MAX LINES property.
@ -1150,7 +1150,7 @@ export class SystemKeywords {
}
result = null;
} else if (file['cTag']) {
const gbaiName = DialogKeywords.getGBAIPath(min.botId);
const gbaiName = GBUtil.getGBAIPath(min.botId);
const localName = Path.join('work', gbaiName, 'cache', `csv${GBAdminService.getRndReadableIdentifier()}.csv`);
const url = file['@microsoft.graph.downloadUrl'];
const response = await fetch(url);
@ -1200,7 +1200,7 @@ export class SystemKeywords {
rows = results.text;
} else if (file.indexOf('.csv') !== -1) {
let res;
let path = DialogKeywords.getGBAIPath(min.botId, `gbdata`);
let path = GBUtil.getGBAIPath(min.botId, `gbdata`);
const csvFile = Path.join(GBConfigService.get('STORAGE_LIBRARY'), path, file);
const firstLine = Fs.readFileSync(csvFile, 'utf8').split('\n')[0];
const headers = firstLine.split(',');
@ -1520,7 +1520,7 @@ export class SystemKeywords {
if (user) {
ChatServices.userSystemPrompt[user.userSystemId] = text;
const path = DialogKeywords.getGBAIPath(min.botId);
const path = GBUtil.getGBAIPath(min.botId);
const systemPromptFile = urlJoin(process.cwd(), 'work', path, 'users', user.userSystemId, 'systemPrompt.txt');
Fs.writeFileSync(systemPromptFile, text);
}
@ -1536,7 +1536,7 @@ export class SystemKeywords {
const { min, user, params } = await DialogKeywords.getProcessInfo(pid);
let { baseUrl, client } = await GBDeployer.internalGetDriveClient(min);
const botId = min.instance.botId;
let path = DialogKeywords.getGBAIPath(min.botId, `gbdrive`);
let path = GBUtil.getGBAIPath(min.botId, `gbdrive`);
// Extracts each part of path to call create folder to each
// one of them.
@ -1585,7 +1585,7 @@ export class SystemKeywords {
public async shareFolder({ pid, folder, email, message }) {
const { min, user, params } = await DialogKeywords.getProcessInfo(pid);
let { baseUrl, client } = await GBDeployer.internalGetDriveClient(min);
const path = DialogKeywords.getGBAIPath(min.botId, `gbdrive`);
const path = GBUtil.getGBAIPath(min.botId, `gbdrive`);
const root = urlJoin(path, folder);
const src = await client.api(`${baseUrl}/drive/root:/${root}`).get();
@ -1606,7 +1606,7 @@ export class SystemKeywords {
public async internalCreateDocument(min, path, content) {
GBLogEx.info(min, `CREATE DOCUMENT '${path}...'`);
let { baseUrl, client } = await GBDeployer.internalGetDriveClient(min);
const gbaiName = DialogKeywords.getGBAIPath(min.botId);
const gbaiName = GBUtil.getGBAIPath(min.botId);
const tmpDocx = urlJoin(gbaiName, path);
// Templates a blank {content} tag inside the blank.docx.
@ -1651,7 +1651,7 @@ export class SystemKeywords {
// Determines full path at source and destination.
const root = DialogKeywords.getGBAIPath(botId, 'gbdrive');
const root = GBUtil.getGBAIPath(botId, 'gbdrive');
const srcPath = urlJoin(root, src);
const dstPath = urlJoin(root, dest);
@ -1711,7 +1711,7 @@ export class SystemKeywords {
dest = dest.replace(/\\/gi, '/');
// Determines full path at source and destination.
const path = DialogKeywords.getGBAIPath(min.botId, `gbdrive`);
const path = GBUtil.getGBAIPath(min.botId, `gbdrive`);
const root = path;
const srcPath = urlJoin(root, src);
const dstPath = urlJoin(path, dest);
@ -2001,7 +2001,7 @@ export class SystemKeywords {
public async fill({ pid, templateName, data }) {
const { min, user } = await DialogKeywords.getProcessInfo(pid);
const botId = min.instance.botId;
const gbaiName = DialogKeywords.getGBAIPath(botId, 'gbdata');
const gbaiName = GBUtil.getGBAIPath(botId, 'gbdata');
let localName;
// Downloads template from .gbdrive.
@ -2240,7 +2240,7 @@ export class SystemKeywords {
}
} else {
const botId = min.instance.botId;
const path = DialogKeywords.getGBAIPath(botId, 'gbdata');
const path = GBUtil.getGBAIPath(botId, 'gbdata');
let { baseUrl, client } = await GBDeployer.internalGetDriveClient(min);
@ -2510,7 +2510,7 @@ export class SystemKeywords {
public async pay({ pid, orderId, customerName, ammount }) {
const { min, user } = await DialogKeywords.getProcessInfo(pid);
const gbaiName = DialogKeywords.getGBAIPath(min.botId);
const gbaiName = GBUtil.getGBAIPath(min.botId);
const merchantId = min.core.getParam(min.instance, 'Merchant ID', null);
const merchantKey = min.core.getParam(min.instance, 'Merchant Key', null);
@ -2587,7 +2587,7 @@ export class SystemKeywords {
GBLogEx.info(min, `Auto saving '${file.filename}' (SAVE file).`);
let { baseUrl, client } = await GBDeployer.internalGetDriveClient(min);
const path = DialogKeywords.getGBAIPath(min.botId, `gbdrive`);
const path = GBUtil.getGBAIPath(min.botId, `gbdrive`);
const fileName = file.url ? file.url : file.name;
const contentType = mime.lookup(fileName);
const ext = Path.extname(fileName).substring(1);
@ -2635,7 +2635,7 @@ export class SystemKeywords {
GBLogEx.info(min, `DELETE '${file.name}'.`);
let { baseUrl, client } = await GBDeployer.internalGetDriveClient(min);
const gbaiPath = DialogKeywords.getGBAIPath(min.botId);
const gbaiPath = GBUtil.getGBAIPath(min.botId);
const fileName = file.name;
const contentType = mime.lookup(fileName);
const ext = Path.extname(fileName).substring(1);
@ -2677,7 +2677,7 @@ export class SystemKeywords {
// Retrieves all files in remote folder.
let path = DialogKeywords.getGBAIPath(min.botId);
let path = GBUtil.getGBAIPath(min.botId);
path = urlJoin(path, remotePath);
let url = `${baseUrl}/drive/root:/${path}:/children`;
@ -2725,7 +2725,7 @@ export class SystemKeywords {
if (GBConfigService.get('STORAGE_NAME')) {
let { baseUrl, client } = await GBDeployer.internalGetDriveClient(min);
const gbaiName = DialogKeywords.getGBAIPath(min.botId);
const gbaiName = GBUtil.getGBAIPath(min.botId);
let path = '/' + urlJoin(gbaiName, `${min.botId}.gbdrive`);
let template = await this.internalGetDocument(client, baseUrl, path, file);
let url = template['@microsoft.graph.downloadUrl'];
@ -2734,26 +2734,12 @@ export class SystemKeywords {
data = new Uint8Array(buf);
}
else {
let path = DialogKeywords.getGBAIPath(min.botId, `gbdrive`);
let path = GBUtil.getGBAIPath(min.botId, `gbdrive`);
let filePath = Path.join(GBConfigService.get('STORAGE_LIBRARY'), path, file);
data = Fs.readFileSync(filePath, 'utf8');
data = new Uint8Array(Buffer.from(data, 'utf8'));
}
const pdf = await getDocument({ data }).promise;
let pages = [];
for (let i = 1; i <= pdf.numPages; i++) {
const page = await pdf.getPage(i);
const textContent = await page.getTextContent();
const text = textContent.items
.map(item => item['str'])
.join('')
.replace(/\s/g, '');
pages.push(text);
}
return pages.join('');
return await GBUtil.getPdfText(data);
}
public async setContext({ pid, text }) {

View file

@ -44,6 +44,7 @@ import { GBDeployer } from '../../core.gbapp/services/GBDeployer.js';
import { Mutex } from 'async-mutex';
import { GBLogEx } from '../../core.gbapp/services/GBLogEx.js';
import { SystemKeywords } from './SystemKeywords.js';
import { GBUtil } from '../../../src/util.js';
/**
* Web Automation services of conversation to be called by BASIC.
@ -348,7 +349,7 @@ export class WebAutomationServices {
const page = WebAutomationServices.getPageByHandle(handle);
GBLogEx.info(min, `Web Automation SCREENSHOT ${selector}.`);
const gbaiName = DialogKeywords.getGBAIPath(min.botId);
const gbaiName = GBUtil.getGBAIPath(min.botId);
const localName = Path.join('work', gbaiName, 'cache', `screen-${GBAdminService.getRndReadableIdentifier()}.jpg`);
await page.screenshot({ path: localName });
@ -439,7 +440,7 @@ export class WebAutomationServices {
folder = folder.replace(/\\/gi, '/');
// Determines full path at source and destination.
const path = DialogKeywords.getGBAIPath(min.botId, `gbdrive`);
const path = GBUtil.getGBAIPath(min.botId, `gbdrive`);
const root = path;
const dstPath = urlJoin(root, folder, filename);

View file

@ -656,7 +656,7 @@ export class GBConversationalService {
// Set folder based on media type
const folder = mediaType === 'video' ? 'videos' : 'images';
const gbaiName = DialogKeywords.getGBAIPath(min.botId);
const gbaiName = GBUtil.getGBAIPath(min.botId);
const fileUrl = urlJoin(process.env.BOT_URL, 'kb', gbaiName, `${min.botId}.gbkb`, folder, mediaFile);
let urlMedia = mediaFile.startsWith('http') ? mediaFile : fileUrl;

View file

@ -680,7 +680,7 @@ ENDPOINT_UPDATE=true
const maxLines = 512;
const file = 'Config.xlsx';
const path = DialogKeywords.getGBAIPath(min.botId, `gbot`);
const path = GBUtil.getGBAIPath(min.botId, `gbot`);
let document = await new SystemKeywords().internalGetDocument(client, baseUrl, path, file);
@ -716,7 +716,7 @@ ENDPOINT_UPDATE=true
)
.patch(body);
} else {
let path = DialogKeywords.getGBAIPath(min.botId, `gbot`);
let path = GBUtil.getGBAIPath(min.botId, `gbot`);
const config = Path.join(GBConfigService.get('STORAGE_LIBRARY'), path, 'config.csv');
const db = await csvdb(config, ['name', 'value'], ',');
@ -914,7 +914,7 @@ ENDPOINT_UPDATE=true
const objUser = userManager.addUser(user, pass);
const virtualPath = '/' + min.botId;
let path = DialogKeywords.getGBAIPath(min.botId, null);
let path = GBUtil.getGBAIPath(min.botId, null);
const gbaiRoot = Path.join(GBConfigService.get('STORAGE_LIBRARY'), path);
server.setFileSystem(virtualPath, new webdav.PhysicalFileSystem(gbaiRoot), successed => {

View file

@ -511,7 +511,7 @@ export class GBDeployer implements IGBDeployer {
// Retrieves all files in remote folder.
let path = DialogKeywords.getGBAIPath(min.botId);
let path = GBUtil.getGBAIPath(min.botId);
path = urlJoin(path, remotePath);
let url = `${baseUrl}/drive/root:/${path}:/children`;
@ -643,7 +643,7 @@ export class GBDeployer implements IGBDeployer {
const strFind = ' Driver';
const conns = await min.core['findParam'](min.instance, strFind);
await CollectionUtil.asyncForEach(conns, async t => {
const connectionName = t.replace(strFind, '');
const connectionName = t.replace(strFind, '').trim();
let con = {};
con['name'] = connectionName;
con['storageDriver'] = min.core.getParam<string>(min.instance, `${connectionName} Driver`, null);
@ -652,13 +652,13 @@ export class GBDeployer implements IGBDeployer {
let file = min.core.getParam<string>(min.instance, `${connectionName} File`, null);
if (storageName) {
con['storageName'] = storageName;
con['storageName'] = storageName.trim();
con['storageServer'] = min.core.getParam<string>(min.instance, `${connectionName} Server`, null);
con['storageUsername'] = min.core.getParam<string>(min.instance, `${connectionName} Username`, null);
con['storagePort'] = min.core.getParam<string>(min.instance, `${connectionName} Port`, null);
con['storagePassword'] = min.core.getParam<string>(min.instance, `${connectionName} Password`, null);
} else if (file) {
const path = DialogKeywords.getGBAIPath(min.botId, 'gbdata');
const path = GBUtil.getGBAIPath(min.botId, 'gbdata');
con['storageFile'] = Path.join(GBConfigService.get('STORAGE_LIBRARY'), path, file);
} else {
GBLogEx.debug(min, `No storage information found for ${connectionName}, missing storage name or file.`);
@ -666,7 +666,7 @@ export class GBDeployer implements IGBDeployer {
connections.push(con);
});
const path = DialogKeywords.getGBAIPath(min.botId, null);
const path = GBUtil.getGBAIPath(min.botId, null);
const localFolder = Path.join('work', path, 'connections.json');
Fs.writeFileSync(localFolder, JSON.stringify(connections), { encoding: null });
@ -726,7 +726,7 @@ export class GBDeployer implements IGBDeployer {
* Removes the package local files from cache.
*/
public async cleanupPackage(instance: IGBInstance, packageName: string) {
const path = DialogKeywords.getGBAIPath(instance.botId, null, packageName);
const path = GBUtil.getGBAIPath(instance.botId, null, packageName);
const localFolder = Path.join('work', path);
rimraf.sync(localFolder);
}
@ -740,7 +740,7 @@ export class GBDeployer implements IGBDeployer {
const packageType = Path.extname(packageName);
const p = await this.getStoragePackageByName(instance.instanceId, packageName);
const path = DialogKeywords.getGBAIPath(instance.botId, null, packageName);
const path = GBUtil.getGBAIPath(instance.botId, null, packageName);
const localFolder = Path.join('work', path);
return await this.undeployPackageFromLocalPath(instance, localFolder);
@ -899,7 +899,7 @@ export class GBDeployer implements IGBDeployer {
* Servers bot storage assets to be used by web, WhatsApp and other channels.
*/
public static mountGBKBAssets(packageName: any, botId: string, filename: string) {
const gbaiName = DialogKeywords.getGBAIPath(botId);
const gbaiName = GBUtil.getGBAIPath(botId);
// Servers menu assets.

View file

@ -293,24 +293,24 @@ export class GBMinService {
// Install per bot deployed packages.
let packagePath = urlJoin(`work`, DialogKeywords.getGBAIPath(min.botId, 'gbdialog'));
let packagePath = urlJoin(`work`, GBUtil.getGBAIPath(min.botId, 'gbdialog'));
if (Fs.existsSync(packagePath)) {
await this.deployer['deployPackage2'](min, user, packagePath);
}
packagePath = urlJoin(`work`, DialogKeywords.getGBAIPath(min.botId, 'gbapp'));
packagePath = urlJoin(`work`, GBUtil.getGBAIPath(min.botId, 'gbapp'));
if (Fs.existsSync(packagePath)) {
await this.deployer['deployPackage2'](min, user, packagePath);
}
packagePath = urlJoin(`work`, DialogKeywords.getGBAIPath(min.botId, 'gbtheme'));
packagePath = urlJoin(`work`, GBUtil.getGBAIPath(min.botId, 'gbtheme'));
if (Fs.existsSync(packagePath)) {
await this.deployer['deployPackage2'](min, user, packagePath);
}
packagePath = urlJoin(`work`, DialogKeywords.getGBAIPath(min.botId, `gblib`));
packagePath = urlJoin(`work`, GBUtil.getGBAIPath(min.botId, `gblib`));
if (Fs.existsSync(packagePath)) {
await this.deployer['deployPackage2'](min, user, packagePath);
}
const gbai = DialogKeywords.getGBAIPath(min.botId);
const gbai = GBUtil.getGBAIPath(min.botId);
let dir = `work/${gbai}/cache`;
const botId = gbai.replace(/\.[^/.]+$/, '');
@ -387,7 +387,7 @@ export class GBMinService {
// Generates MS Teams manifest.
const manifest = `${instance.botId}-Teams.zip`;
const packageTeams = urlJoin(`work`, DialogKeywords.getGBAIPath(instance.botId), manifest);
const packageTeams = urlJoin(`work`, GBUtil.getGBAIPath(instance.botId), manifest);
if (!Fs.existsSync(packageTeams)) {
GBLogEx.info(min, 'Generating MS Teams manifest....');
const data = await this.deployer.getBotManifest(instance);
@ -832,7 +832,7 @@ export class GBMinService {
min['scheduleMap'] = {};
min['conversationWelcomed'] = {};
if (await min.core.getParam(min.instance, 'Answer Mode', null)) {
const gbkbPath = DialogKeywords.getGBAIPath(min.botId, 'gbkb');
const gbkbPath = GBUtil.getGBAIPath(min.botId, 'gbkb');
min['vectorStorePath'] = Path.join('work', gbkbPath, 'docs-vectorized');
min['vectorStore'] = await this.deployer.loadOrCreateEmptyVectorStore(min);
}
@ -1095,7 +1095,7 @@ export class GBMinService {
ps: null,
qs: null
});
const path = DialogKeywords.getGBAIPath(min.botId);
const path = GBUtil.getGBAIPath(min.botId);
const folder = `work/${path}/cache`;
const filename = `${GBAdminService.generateUuid()}.png`;
@ -1271,7 +1271,7 @@ export class GBMinService {
private static async downloadAttachmentAndWrite(attachment) {
const url = attachment.contentUrl;
const localFolder = 'work';
const path = DialogKeywords.getGBAIPath(this['min'].botId);
const path = GBUtil.getGBAIPath(this['min'].botId);
const localFileName = Path.join(localFolder, path, 'uploads', attachment.name);
let buffer;

View file

@ -303,12 +303,12 @@ export class GBSSR {
}
let path = DialogKeywords.getGBAIPath(botId, `gbui`);
let path = GBUtil.getGBAIPath(botId, `gbui`);
// Checks if the bot has an .gbui published or use default.gbui.
if (!Fs.existsSync(path)) {
path = DialogKeywords.getGBAIPath(minBoot.botId, `gbui`);
path = GBUtil.getGBAIPath(minBoot.botId, `gbui`);
}
let parts = req.url.replace(`/${botId}`, '').split('?');
let url = parts[0];

View file

@ -51,6 +51,7 @@ import { SystemKeywords } from '../../basic.gblib/services/SystemKeywords.js';
import { DialogKeywords } from '../../basic.gblib/services/DialogKeywords.js';
import Path from 'path';
import { GBLogEx } from '../../core.gbapp/services/GBLogEx.js';
import { GBUtil } from '../../../src/util.js';
/**
* Dialog arguments.
@ -245,7 +246,7 @@ export class AskDialog extends IGBDialog {
for (const key in results.sources) {
const source = results.sources[key];
const path = DialogKeywords.getGBAIPath(min.botId, `gbkb`);
const path = GBUtil.getGBAIPath(min.botId, `gbkb`);
let url = urlJoin('kb', path, 'docs', Path.basename(source.file));
url = `${url}#page=${source.page}&toolbar=0&messages=0&statusbar=0&navpanes=0`;
urls.push({ url: url });

View file

@ -54,7 +54,6 @@ import { RecursiveCharacterTextSplitter } from 'langchain/text_splitter';
import { Document } from 'langchain/document';
import getColors from 'get-image-colors';
import {
GBDialogStep,
GBLog,
@ -558,7 +557,7 @@ export class KBService implements IGBKBService {
const isBasic = answer.toLowerCase().startsWith('/basic');
if (/TALK\s*\".*\"/gi.test(answer) || isBasic) {
const code = isBasic ? answer.substr(6) : answer;
const path = DialogKeywords.getGBAIPath(min.botId, `gbdialog`);
const path = GBUtil.getGBAIPath(min.botId, `gbdialog`);
const scriptName = `tmp${GBAdminService.getRndReadableIdentifier()}.docx`;
const localName = Path.join('work', path, `${scriptName}`);
Fs.writeFileSync(localName, code, { encoding: null });
@ -633,12 +632,12 @@ export class KBService implements IGBKBService {
answer.endsWith('.xls') ||
answer.endsWith('.xlsx')
) {
const path = DialogKeywords.getGBAIPath(min.botId, `gbkb`);
const path = GBUtil.getGBAIPath(min.botId, `gbkb`);
const doc = urlJoin(GBServer.globals.publicAddress, 'kb', path, 'assets', answer);
const url = `http://view.officeapps.live.com/op/view.aspx?src=${doc}`;
await this.playUrl(min, min.conversationalService, step, url, channel);
} else if (answer.endsWith('.pdf')) {
const path = DialogKeywords.getGBAIPath(min.botId, `gbkb`);
const path = GBUtil.getGBAIPath(min.botId, `gbkb`);
const url = urlJoin('kb', path, 'assets', answer);
await this.playUrl(min, min.conversationalService, step, url, channel);
} else if (answer.format === '.md') {
@ -740,7 +739,7 @@ export class KBService implements IGBKBService {
});
}
} else if (file !== null && file.name.endsWith('.docx')) {
let path = DialogKeywords.getGBAIPath(instance.botId, `gbkb`);
let path = GBUtil.getGBAIPath(instance.botId, `gbkb`);
const localName = Path.join('work', path, 'articles', file.name);
let loader = new DocxLoader(localName);
let doc = await loader.load();
@ -761,7 +760,7 @@ export class KBService implements IGBKBService {
data.answers.push(answer);
} else if (file !== null && file.name.endsWith('.toc.docx')) {
const path = DialogKeywords.getGBAIPath(instance.botId, `gbkb`);
const path = GBUtil.getGBAIPath(instance.botId, `gbkb`);
const localName = Path.join('work', path, 'articles', file.name);
const buffer = Fs.readFileSync(localName, { encoding: null });
var options = {
@ -769,13 +768,13 @@ export class KBService implements IGBKBService {
convertImage: async image => {
const localName = Path.join(
'work',
DialogKeywords.getGBAIPath(instance.botId),
GBUtil.getGBAIPath(instance.botId),
'cache',
`img-docx${GBAdminService.getRndReadableIdentifier()}.png`
);
const url = urlJoin(
GBServer.globals.publicAddress,
DialogKeywords.getGBAIPath(instance.botId).replace(/\.[^/.]+$/, ''),
GBUtil.getGBAIPath(instance.botId).replace(/\.[^/.]+$/, ''),
'cache',
Path.basename(localName)
);
@ -864,34 +863,6 @@ export class KBService implements IGBKBService {
});
}
async saveHtmlPage(min, url: string, page: Page): Promise<string | null> {
let response = await page.goto(url);
if (!response) {
response = await page.waitForResponse(() => true);
}
if (response && response.headers && response.status() === 200) {
const contentType = response.headers()['content-type'];
if (contentType && contentType.includes('text/html')) {
const buffer = html2md(await response.text());
const urlObj = new URL(url);
const urlPath = urlObj.pathname.endsWith('/') ? urlObj.pathname.slice(0, -1) : urlObj.pathname; // Remove trailing slash if present
let filename = urlPath.split('/').pop() || 'index'; // Get the filename from the URL path or set it to 'index.html' as default
filename = `${filename}.html`;
let path = DialogKeywords.getGBAIPath(min.botId, `gbot`);
const directoryPath = Path.join(process.env.PWD, 'work', path, 'Website');
const filePath = Path.join(directoryPath, filename);
GBLogEx.info(min, `[GBDeployer] Saving Website file in ${filePath}.`);
Fs.mkdirSync(directoryPath, { recursive: true }); // Create directory recursively if it doesn't exist
Fs.writeFileSync(filePath, buffer);
return filePath;
}
}
return null;
}
async crawl(
min,
url: string,
@ -906,7 +877,6 @@ export class KBService implements IGBKBService {
depth > maxDepth ||
visited.has(url) ||
url.endsWith('.jpg') ||
url.endsWith('.pdf') ||
url.endsWith('.jpg') ||
url.endsWith('.png') ||
url.endsWith('.mp4')
@ -915,14 +885,17 @@ export class KBService implements IGBKBService {
}
await GBLogEx.info(min, `Processing URL: ${url}.`);
visited.add(url);
const filename = await this.saveHtmlPage(min, url, page);
const path = GBUtil.getGBAIPath(min.botId, `gbot`);
const directoryPath = path.join(process.env.PWD, 'work', path, 'Website');
const filename = await GBUtil.savePage(url, page, directoryPath);
if (!filename) {
// If the URL doesn't represent an HTML page, skip crawling its links
// If the URL doesn't represent an HTML/PDF page, skip crawling its links
return [];
}
const currentDomain = new URL(page.url()).hostname;
@ -1052,7 +1025,7 @@ export class KBService implements IGBKBService {
website.endsWith('/') ? website.substring(0, website.length - 1) : website;
let path = DialogKeywords.getGBAIPath(min.botId, `gbot`);
let path = GBUtil.getGBAIPath(min.botId, `gbot`);
const directoryPath = Path.join(process.env.PWD, 'work', path, 'Website');
Fs.rmSync(directoryPath, { recursive: true, force: true });
@ -1061,7 +1034,7 @@ export class KBService implements IGBKBService {
let logo = await this.getLogoByPage(min, page);
if (logo) {
path = DialogKeywords.getGBAIPath(min.botId);
path = GBUtil.getGBAIPath(min.botId);
const baseUrl = page.url().split('/').slice(0, 3).join('/');
logo = logo.startsWith('https') ? logo : urlJoin(baseUrl, logo);
@ -1380,7 +1353,7 @@ export class KBService implements IGBKBService {
GBLogEx.info(min, `[GBDeployer] Start Bot Server Side Rendering... ${localPath}`);
const html = await GBSSR.getHTML(min);
let path = DialogKeywords.getGBAIPath(min.botId, `gbui`);
let path = GBUtil.getGBAIPath(min.botId, `gbui`);
path = Path.join(process.env.PWD, 'work', path, 'index.html');
GBLogEx.info(min, `[GBDeployer] Saving SSR HTML in ${path}.`);
Fs.writeFileSync(path, html, 'utf8');
@ -1425,7 +1398,7 @@ export class KBService implements IGBKBService {
if (channel === 'whatsapp') {
await min.conversationalService.sendFile(min, step, null, answer.content, '');
} else {
const path = DialogKeywords.getGBAIPath(min.botId, `gbkb`);
const path = GBUtil.getGBAIPath(min.botId, `gbkb`);
await conversationalService.sendEvent(min, step, 'play', {
playerType: 'video',
data: urlJoin(path, 'videos', answer.content)

View file

@ -146,7 +146,7 @@ export class GBLLMOutputParser extends BaseLLMOutputParser<ExpectedOutput> {
await CollectionUtil.asyncForEach(sources, async source => {
let found = false;
if (source && source.file.endsWith('.pdf')) {
const gbaiName = DialogKeywords.getGBAIPath(this.min.botId, 'gbkb');
const gbaiName = GBUtil.getGBAIPath(this.min.botId, 'gbkb');
const localName = Path.join(process.env.PWD, 'work', gbaiName, 'docs', source.file);
if (localName) {
@ -185,7 +185,7 @@ export class ChatServices {
if (pngPages.length > 0) {
const buffer = pngPages[0].content;
const gbaiName = DialogKeywords.getGBAIPath(min.botId, null);
const gbaiName = GBUtil.getGBAIPath(min.botId, null);
const localName = Path.join('work', gbaiName, 'cache', `img${GBAdminService.getRndReadableIdentifier()}.png`);
const url = urlJoin(GBServer.globals.publicAddress, min.botId, 'cache', Path.basename(localName));
Fs.writeFileSync(localName, buffer, { encoding: null });
@ -708,7 +708,7 @@ export class ChatServices {
// Adds .gbdialog as functions if any to LLM Functions.
await CollectionUtil.asyncForEach(Object.keys(min.scriptMap), async script => {
const path = DialogKeywords.getGBAIPath(min.botId, 'gbdialog', null);
const path = GBUtil.getGBAIPath(min.botId, 'gbdialog', null);
const jsonFile = Path.join('work', path, `${script}.json`);
if (Fs.existsSync(jsonFile) && script.toLowerCase() !== 'start.vbs') {

View file

@ -41,6 +41,7 @@ import Fs from 'fs';
import urlJoin from 'url-join';
import { GBAdminService } from '../../admin.gbapp/services/GBAdminService';
import { GBLogEx } from '../../core.gbapp/services/GBLogEx';
import { GBUtil } from '../../../src/util';
/**
* Image processing services of conversation to be called by BASIC.
@ -66,7 +67,7 @@ export class ImageServices {
size: '1024x1024'
});
const gbaiName = DialogKeywords.getGBAIPath(min.botId);
const gbaiName = GBUtil.getGBAIPath(min.botId);
const localName = Path.join('work', gbaiName, 'cache', `DALL-E${GBAdminService.getRndReadableIdentifier()}.png`);
const url = response.data[0].url;

View file

@ -9,6 +9,7 @@ import mkdirp from 'mkdirp';
import urlJoin from 'url-join';
import { GBLogEx } from '../../core.gbapp/services/GBLogEx.js';
import { GBServer } from '../../../src/app.js';
import { GBUtil } from '../../../src/util.js';
/**
@ -25,7 +26,7 @@ export class SecService extends GBService {
email: string
): Promise<GuaribasUser> {
const gbaiPath = DialogKeywords.getGBAIPath(min.botId);
const gbaiPath = GBUtil.getGBAIPath(min.botId);
const dir = urlJoin ('work',gbaiPath, 'users', userSystemId);
if (!Fs.existsSync(dir)) {

View file

@ -150,7 +150,7 @@ export class WhatsappDirectLine extends GBService {
case 'GeneralBots':
const minBoot = GBServer.globals.minBoot;
// Initialize the browser using a local profile for each bot.
const gbaiPath = DialogKeywords.getGBAIPath(this.min.botId);
const gbaiPath = GBUtil.getGBAIPath(this.min.botId);
const webVersion = '2.2412.51';
const localName = Path.join('work', gbaiPath, 'profile');
const createClient = () => {
@ -323,7 +323,7 @@ export class WhatsappDirectLine extends GBService {
const base64Image = await message.downloadMedia();
let buf: any = Buffer.from(base64Image.data, 'base64');
const gbaiName = DialogKeywords.getGBAIPath(this.min.botId);
const gbaiName = GBUtil.getGBAIPath(this.min.botId);
const localName = Path.join(
'work',
gbaiName,
@ -763,7 +763,7 @@ export class WhatsappDirectLine extends GBService {
// Set folder based on media type
let folder = mediaType === 'video' ? 'videos' : 'images';
let path = DialogKeywords.getGBAIPath(min.botId, `gbkb`);
let path = GBUtil.getGBAIPath(min.botId, `gbkb`);
path = Path.join(process.env.PWD, 'work', path, folder, mediaFile);
text = text.substring(mediaFile.length + 1).trim();

View file

@ -38,6 +38,10 @@ import SwaggerClient from 'swagger-client';
import Fs from 'fs';
import { GBConfigService } from '../packages/core.gbapp/services/GBConfigService.js';
import path from 'path';
import { getDocument } from 'pdfjs-dist/legacy/build/pdf.mjs';
import { Page } from 'puppeteer';
import urljoin from 'url-join';
import html2md from 'html-to-md';
export class GBUtil {
public static repeat(chr, count) {
@ -174,4 +178,91 @@ export class GBUtil {
}
return false;
}
public static async getPdfText(data: Buffer): Promise<string> {
const pdf = await getDocument({ data }).promise;
let pages = [];
for (let i = 1; i <= pdf.numPages; i++) {
const page = await pdf.getPage(i);
const textContent = await page.getTextContent();
const text = textContent.items
.map(item => item['str'])
.join('')
.replace(/\s/g, ''); // Optionally remove extra spaces
pages.push(text);
}
return pages.join('');
}
static getGBAIPath(botId, packageType = null, packageName = null) {
let gbai = `${botId}.gbai`;
if (!packageType && !packageName) {
return GBConfigService.get('DEV_GBAI') ? GBConfigService.get('DEV_GBAI') : gbai;
}
if (GBConfigService.get('DEV_GBAI')) {
gbai = GBConfigService.get('DEV_GBAI');
botId = gbai.replace(/\.[^/.]+$/, '');
return urljoin(GBConfigService.get('DEV_GBAI'), packageName ? packageName : `${botId}.${packageType}`);
} else {
return urljoin(gbai, packageName ? packageName : `${botId}.${packageType}`);
}
}
public static async savePage(url: string, page: Page, directoryPath: string): Promise<string | null> {
let response = await page.goto(url);
if (!response) {
response = await page.waitForResponse(() => true);
}
if (response && response.headers && response.status() === 200) {
const contentType = response.headers()['content-type'];
if (contentType) {
const urlObj = new URL(url);
const urlPath = urlObj.pathname.endsWith('/') ? urlObj.pathname.slice(0, -1) : urlObj.pathname;
let filename = urlPath.split('/').pop() || 'index';
Fs.mkdirSync(directoryPath, { recursive: true });
const extensionMap = {
'text/html': 'html',
'application/pdf': 'pdf',
'text/plain': 'txt',
'application/vnd.openxmlformats-officedocument.wordprocessingml.document': 'docx',
'application/json': 'json',
'application/xml': 'xml',
'text/csv': 'csv',
'application/x-httpd-php': 'php',
'application/javascript': 'js',
'text/javascript': 'js',
'text/css': 'css',
'text/xml': 'xml'
};
const extension = Object.keys(extensionMap).find(key => contentType.includes(key)) || 'bin';
filename = `${filename}.${extension}`;
const filePath = path.join(directoryPath, filename);
let fileContent;
if (extension === 'html') {
fileContent = html2md(await response.text());
} else if (extension === 'pdf') {
const pdfBuffer = await response.buffer();
fileContent = await GBUtil.getPdfText(pdfBuffer); // Extract text from the PDF
} else {
fileContent = await response.buffer();
}
Fs.writeFileSync(filePath, fileContent);
return filePath;
}
}
return null;
}
}

View file

@ -1,3 +1,3 @@
name,value
Website,https://pragmatismo.cloud
Website,https://www.oabprevpr.org.br
Answer Mode,document
1 name value
2 Website https://pragmatismo.cloud https://www.oabprevpr.org.br
3 Answer Mode document