botserver/src/util.ts
2025-03-05 22:09:05 -03:00

398 lines
13 KiB
TypeScript

/**
* @fileoverview General Bots local utility.
* This file contains utility functions used across the General Bots project.
* @license AGPL-3.0
*/
'use strict';
import * as YAML from 'yaml';
import SwaggerClient from 'swagger-client';
import fs from 'fs/promises';
import { GBConfigService } from '../packages/core.gbapp/services/GBConfigService.js';
import path from 'path';
import { VerbosityLevel, getDocument } from 'pdfjs-dist/legacy/build/pdf.mjs';
import urljoin from 'url-join';
import { GBAdminService } from '../packages/admin.gbapp/services/GBAdminService.js';
import { GBLogEx } from '../packages/core.gbapp/services/GBLogEx.js';
import { PngPageOutput, pdfToPng } from 'pdf-to-png-converter';
import urlJoin from 'url-join';
import { GBServer } from './app.js';
import { QueryTypes } from '@sequelize/core';
// ... existing code ...
/**
* Utility class containing various helper functions for the General Bots project.
*/
export class GBUtil {
/**
* Repeats a character a specified number of times.
* @param {string} chr - The character to repeat.
* @param {number} count - The number of times to repeat the character.
* @returns {string} The repeated string.
*/
public static repeat(chr: string, count: number): string {
let str = '';
for (let x = 0; x < count; x++) {
str += chr;
}
return str;
}
/**
* Pads a string on the left with a specified character.
* @param {string} value - The string to pad.
* @param {number} width - The desired width of the padded string.
* @param {string} [pad=' '] - The character to use for padding.
* @returns {string} The padded string.
*/
public static padL(value: string, width: number, pad: string = ' '): string {
if (!width || width < 1) return value;
if (!pad) pad = ' ';
const length = width - value.length;
if (length < 1) return value.substr(0, width);
return (GBUtil.repeat(pad, length) + value).substr(0, width);
}
/**
* Pads a string on the right with a specified character.
* @param {string} value - The string to pad.
* @param {number} width - The desired width of the padded string.
* @param {string} [pad=' '] - The character to use for padding.
* @returns {string} The padded string.
*/
public static padR(value: string, width: number, pad: string = ' '): string {
if (!width || width < 1) return value;
if (!pad) pad = ' ';
const length = width - value.length;
if (length < 1) value.substr(0, width);
return (value + GBUtil.repeat(pad, length)).substr(0, width);
}
/**
* Gets a DirectLine client for bot communication.
* @param {any} min - The minimum configuration object.
* @returns {Promise<SwaggerClient>} A promise that resolves to a SwaggerClient instance.
*/
public static async getDirectLineClient(min: any): Promise<SwaggerClient> {
let config;
if (GBConfigService.get('GB_MODE') !== 'legacy') {
config = {
spec: JSON.parse(await fs.readFile('directline-v2.json', 'utf8')),
requestInterceptor: req => {
req.headers['Authorization'] = `Bearer ${min.instance.webchatKey}`;
}
};
config.spec['host'] = `127.0.0.1:${GBConfigService.getServerPort()}`;
config.spec['basePath'] = `/api/messages/${min.botId}`;
config.spec['schemes'] = ["http"];
} else {
config = {
spec: JSON.parse(await fs.readFile('directline-v2.json', 'utf8')),
requestInterceptor: req => {
req.headers['Authorization'] = `Bearer ${min.instance.webchatKey}`;
}
};
}
return await new SwaggerClient(config);
}
/**
* Converts data to YAML format.
* @param {any} data - The data to convert to YAML.
* @returns {string} The YAML representation of the data.
*/
public static toYAML(data: any): string {
const extractProps = obj => {
return Object.getOwnPropertyNames(obj).reduce((acc, key) => {
const value = obj[key];
acc[key] = value && typeof value === 'object' && !Array.isArray(value) ? extractProps(value) : value;
return acc;
}, {});
};
const extractedError = extractProps(data);
let yamlString = YAML.stringify(extractedError, {
indent: 2, // Defines the indentation
flowLevel: -1, // Forces inline formatting
styles: { '!!null': 'canonical' } // Optional: Customize null display
} as any);
//yamlString = yamlString.slice(0, 256); // Truncate to 1024 bytes
return yamlString;
}
/**
* Implements a delay function.
* @param {number} ms - The number of milliseconds to sleep.
* @returns {Promise<void>} A promise that resolves after the specified delay.
*/
public static sleep(ms: number): Promise<void> {
return new Promise(resolve => {
setTimeout(resolve, ms);
});
}
/**
* Creates case-insensitive proxies for objects or arrays.
* @param {any} listOrRow - The object or array to make case-insensitive.
* @returns {any} A case-insensitive version of the input.
*/
public static caseInsensitive(listOrRow: any): any {
// If the input is not an object or array, return it as is
if (!listOrRow || typeof listOrRow !== 'object') {
return listOrRow;
}
// Helper function to convert property names to lowercase
const lowercase = key => (typeof key === 'string' ? key.toLowerCase() : key);
// Create a proxy that maps property accesses to lowercase property names
const createCaseInsensitiveProxy = obj => {
const propertiesMap = new Map(Object.keys(obj).map(propKey => [lowercase(propKey), obj[propKey]]));
const caseInsensitiveGetHandler = {
get: (target, property) => propertiesMap.get(lowercase(property))
};
return new Proxy(obj, caseInsensitiveGetHandler);
};
// Handle arrays by mapping each element to a case-insensitive proxy
if (Array.isArray(listOrRow)) {
return listOrRow.map(row => (typeof row === 'object' && row !== null ? createCaseInsensitiveProxy(row) : row));
} else {
return createCaseInsensitiveProxy(listOrRow);
}
}
/**
* Checks if a file exists.
* @param {string} filePath - The path of the file to check.
* @returns {Promise<boolean>} A promise that resolves to true if the file exists, false otherwise.
*/
public static async exists(filePath: string): Promise<boolean> {
try {
await fs.access(filePath);
return true; // File exists
} catch (error) {
return false; // File does not exist
}
}
/**
* Recursively copies files if they are newer.
* @param {string} src - The source path.
* @param {string} dest - The destination path.
* @returns {Promise<void>} A promise that resolves when the copy operation is complete.
*/
public static async copyIfNewerRecursive(src: string, dest: string): Promise<void> {
// Check if the source exists
if (!(await GBUtil.exists(src))) {
return;
}
// Check if the source is a directory
if ((await fs.stat(src)).isDirectory()) {
// Create the destination directory if it doesn't exist
if (!(await GBUtil.exists(dest))) {
await fs.mkdir(dest, { recursive: true });
}
// Read all files and directories in the source directory
const entries = await fs.readdir(src);
for (let entry of entries) {
const srcEntry = path.join(src, entry);
const destEntry = path.join(dest, entry);
// Recursively copy each entry
await this.copyIfNewerRecursive(srcEntry, destEntry);
}
} else {
// Source is a file, check if we need to copy it
if (await GBUtil.exists(dest)) {
const srcStat = await fs.stat(src);
const destStat = await fs.stat(dest);
// Copy only if the source file is newer than the destination file
if (srcStat.mtime > destStat.mtime) {
await fs.cp(src, dest, { force: true });
}
} else {
// Destination file doesn't exist, so copy it
await fs.cp(src, dest, { force: true });
}
}
}
/**
* Lists database tables.
* @param {any} dialect - The database dialect.
* @param {any} seq - The Sequelize instance.
* @returns {Promise<string[]>} A promise that resolves to an array of table names.
*/
public static async listTables(dialect: any, seq: any): Promise<string[]> {
let tables;
if (dialect === 'sqlite') {
tables = await seq.getQueryInterface().showAllTables();
} else {
// Extracting table name from the object returned by MSSQL
tables = await seq.getQueryInterface().showAllTables();
tables = tables.map((table: any) => table.tableName); // Extracting the table name
}
return tables;
}
/**
* Checks if an object has sub-objects.
* @param {any} t - The object to check.
* @returns {boolean} True if the object has sub-objects, false otherwise.
*/
public static hasSubObject(t: any): boolean {
for (var key in t) {
if (!t.hasOwnProperty(key)) continue;
if (typeof t[key] === 'object') return true;
}
return false;
}
/**
* Extracts text from a PDF.
* @param {any} data - The PDF data.
* @returns {Promise<string>} A promise that resolves to the extracted text.
*/
public static async getPdfText(data: any): Promise<string> {
const pdf = await getDocument({ data }).promise;
let pages = [];
for (let i = 1; i <= pdf.numPages; i++) {
const page = await pdf.getPage(i);
const textContent = await page.getTextContent();
const text = textContent.items
.map(item => item['str'])
.join(' ')
.replace(/\s+/g, ' '); // Optionally remove extra spaces
pages.push(text);
}
return pages.join(' ');
}
/**
* Gets the path for GBAI (General Bots AI) files.
* @param {string} botId - The bot ID.
* @param {string} [packageType] - The package type.
* @param {string} [packageName] - The package name.
* @returns {string} The GBAI path.
*/
static getGBAIPath(botId: string, packageType?: string, packageName?: string): string {
let gbai = `${botId}.gbai`;
if (!packageType && !packageName) {
return GBConfigService.get('DEV_GBAI') ? GBConfigService.get('DEV_GBAI') : gbai;
}
if (GBConfigService.get('DEV_GBAI')) {
gbai = GBConfigService.get('DEV_GBAI');
botId = gbai.replace(/\.[^/.]+$/, '');
return urljoin(GBConfigService.get('DEV_GBAI'), packageName ? packageName : `${botId}.${packageType}`);
} else {
return urljoin(gbai, packageName ? packageName : `${botId}.${packageType}`);
}
}
/**
* Converts a PDF page to an image.
* @param {any} min - The minimum configuration object.
* @param {string} filename - The filename of the PDF.
* @param {number} [pageNumber] - The page number to convert (optional).
* @returns {Promise<any[]>} A promise that resolves to an array of generated image files.
*/
public static async pdfPageAsImage(min: any, filename: string, pageNumber?: number): Promise<any[]> {
// Converts the PDF to PNG.
GBLogEx.info(min, `Converting ${filename}, page: ${pageNumber ?? 'all'}...`);
const options = {
disableFontFace: true,
useSystemFonts: true,
viewportScale: 2.0,
pagesToProcess: pageNumber !== undefined ? [pageNumber] : undefined,
strictPagesToProcess: false,
verbosityLevel: 0
};
const pngPages: PngPageOutput[] = await pdfToPng(filename, options);
const generatedFiles = [];
for (const pngPage of pngPages) {
const buffer = pngPage.content;
const gbaiName = GBUtil.getGBAIPath(min.botId, null);
const localName = path.join('work', gbaiName, 'cache', `img${GBAdminService.getRndReadableIdentifier()}.png`);
const url = urlJoin(GBServer.globals.publicAddress, min.botId, 'cache', path.basename(localName));
await fs.writeFile(localName, buffer, { encoding: null });
generatedFiles.push({ localName: localName, url: url, data: buffer });
}
return generatedFiles.length > 0 ? generatedFiles : null;
}
/**
* Implements a random delay.
* @param {number} [min=1] - The minimum delay in seconds.
* @param {number} [max=5] - The maximum delay in seconds.
* @returns {Promise<void>} A promise that resolves after the random delay.
*/
public static async sleepRandom(min: number = 1, max: number = 5): Promise<void> {
const randomDelay = Math.floor(Math.random() * (max - min + 1) + min) * 1000;
await new Promise(resolve => setTimeout(resolve, randomDelay));
}
public static isContentPage(text: string): boolean {
// Common patterns that indicate non-content pages
const nonContentPatterns = [
/^index$/i,
/^table of contents$/i,
];
// Check if page is mostly dots, numbers or blank
const isDotLeaderPage = text.replace(/\s+/g, '').match(/\.{10,}/);
const isNumbersPage = text.replace(/\s+/g, '').match(/^\d+$/);
const isBlankPage = text.trim().length === 0;
// Check if page has actual content
const wordCount = text.trim().split(/\s+/).length;
const hasMinimalContent = wordCount > 10;
// Check if page matches any non-content patterns
const isNonContent = nonContentPatterns.some(pattern =>
pattern.test(text.trim())
);
// Page is valid content if:
// - Not mostly dots/numbers/blank
// - Has minimal word count
// - Doesn't match non-content patterns
return !isDotLeaderPage &&
!isNumbersPage &&
!isBlankPage &&
hasMinimalContent &&
!isNonContent;
}
}