398 lines
13 KiB
TypeScript
398 lines
13 KiB
TypeScript
|
|
/**
|
|
* @fileoverview General Bots local utility.
|
|
* This file contains utility functions used across the General Bots project.
|
|
* @license AGPL-3.0
|
|
*/
|
|
|
|
'use strict';
|
|
|
|
import * as YAML from 'yaml';
|
|
import SwaggerClient from 'swagger-client';
|
|
import fs from 'fs/promises';
|
|
import { GBConfigService } from '../packages/core.gbapp/services/GBConfigService.js';
|
|
import path from 'path';
|
|
import { VerbosityLevel, getDocument } from 'pdfjs-dist/legacy/build/pdf.mjs';
|
|
import urljoin from 'url-join';
|
|
import { GBAdminService } from '../packages/admin.gbapp/services/GBAdminService.js';
|
|
import { GBLogEx } from '../packages/core.gbapp/services/GBLogEx.js';
|
|
import { PngPageOutput, pdfToPng } from 'pdf-to-png-converter';
|
|
import urlJoin from 'url-join';
|
|
import { GBServer } from './app.js';
|
|
import { QueryTypes } from '@sequelize/core';
|
|
|
|
// ... existing code ...
|
|
|
|
/**
|
|
* Utility class containing various helper functions for the General Bots project.
|
|
*/
|
|
export class GBUtil {
|
|
/**
|
|
* Repeats a character a specified number of times.
|
|
* @param {string} chr - The character to repeat.
|
|
* @param {number} count - The number of times to repeat the character.
|
|
* @returns {string} The repeated string.
|
|
*/
|
|
public static repeat(chr: string, count: number): string {
|
|
let str = '';
|
|
for (let x = 0; x < count; x++) {
|
|
str += chr;
|
|
}
|
|
return str;
|
|
}
|
|
|
|
/**
|
|
* Pads a string on the left with a specified character.
|
|
* @param {string} value - The string to pad.
|
|
* @param {number} width - The desired width of the padded string.
|
|
* @param {string} [pad=' '] - The character to use for padding.
|
|
* @returns {string} The padded string.
|
|
*/
|
|
public static padL(value: string, width: number, pad: string = ' '): string {
|
|
if (!width || width < 1) return value;
|
|
|
|
if (!pad) pad = ' ';
|
|
const length = width - value.length;
|
|
if (length < 1) return value.substr(0, width);
|
|
|
|
return (GBUtil.repeat(pad, length) + value).substr(0, width);
|
|
}
|
|
|
|
/**
|
|
* Pads a string on the right with a specified character.
|
|
* @param {string} value - The string to pad.
|
|
* @param {number} width - The desired width of the padded string.
|
|
* @param {string} [pad=' '] - The character to use for padding.
|
|
* @returns {string} The padded string.
|
|
*/
|
|
public static padR(value: string, width: number, pad: string = ' '): string {
|
|
if (!width || width < 1) return value;
|
|
|
|
if (!pad) pad = ' ';
|
|
const length = width - value.length;
|
|
if (length < 1) value.substr(0, width);
|
|
|
|
return (value + GBUtil.repeat(pad, length)).substr(0, width);
|
|
}
|
|
|
|
/**
|
|
* Gets a DirectLine client for bot communication.
|
|
* @param {any} min - The minimum configuration object.
|
|
* @returns {Promise<SwaggerClient>} A promise that resolves to a SwaggerClient instance.
|
|
*/
|
|
public static async getDirectLineClient(min: any): Promise<SwaggerClient> {
|
|
let config;
|
|
if (GBConfigService.get('GB_MODE') !== 'legacy') {
|
|
config = {
|
|
spec: JSON.parse(await fs.readFile('directline-v2.json', 'utf8')),
|
|
requestInterceptor: req => {
|
|
req.headers['Authorization'] = `Bearer ${min.instance.webchatKey}`;
|
|
}
|
|
};
|
|
config.spec['host'] = `127.0.0.1:${GBConfigService.getServerPort()}`;
|
|
config.spec['basePath'] = `/api/messages/${min.botId}`;
|
|
config.spec['schemes'] = ["http"];
|
|
|
|
} else {
|
|
config = {
|
|
spec: JSON.parse(await fs.readFile('directline-v2.json', 'utf8')),
|
|
requestInterceptor: req => {
|
|
req.headers['Authorization'] = `Bearer ${min.instance.webchatKey}`;
|
|
}
|
|
};
|
|
}
|
|
return await new SwaggerClient(config);
|
|
}
|
|
|
|
/**
|
|
* Converts data to YAML format.
|
|
* @param {any} data - The data to convert to YAML.
|
|
* @returns {string} The YAML representation of the data.
|
|
*/
|
|
public static toYAML(data: any): string {
|
|
const extractProps = obj => {
|
|
return Object.getOwnPropertyNames(obj).reduce((acc, key) => {
|
|
const value = obj[key];
|
|
acc[key] = value && typeof value === 'object' && !Array.isArray(value) ? extractProps(value) : value;
|
|
return acc;
|
|
}, {});
|
|
};
|
|
|
|
const extractedError = extractProps(data);
|
|
let yamlString = YAML.stringify(extractedError, {
|
|
indent: 2, // Defines the indentation
|
|
flowLevel: -1, // Forces inline formatting
|
|
styles: { '!!null': 'canonical' } // Optional: Customize null display
|
|
} as any);
|
|
|
|
|
|
//yamlString = yamlString.slice(0, 256); // Truncate to 1024 bytes
|
|
|
|
|
|
return yamlString;
|
|
}
|
|
|
|
/**
|
|
* Implements a delay function.
|
|
* @param {number} ms - The number of milliseconds to sleep.
|
|
* @returns {Promise<void>} A promise that resolves after the specified delay.
|
|
*/
|
|
public static sleep(ms: number): Promise<void> {
|
|
return new Promise(resolve => {
|
|
setTimeout(resolve, ms);
|
|
});
|
|
}
|
|
|
|
/**
|
|
* Creates case-insensitive proxies for objects or arrays.
|
|
* @param {any} listOrRow - The object or array to make case-insensitive.
|
|
* @returns {any} A case-insensitive version of the input.
|
|
*/
|
|
public static caseInsensitive(listOrRow: any): any {
|
|
// If the input is not an object or array, return it as is
|
|
if (!listOrRow || typeof listOrRow !== 'object') {
|
|
return listOrRow;
|
|
}
|
|
|
|
// Helper function to convert property names to lowercase
|
|
const lowercase = key => (typeof key === 'string' ? key.toLowerCase() : key);
|
|
|
|
// Create a proxy that maps property accesses to lowercase property names
|
|
const createCaseInsensitiveProxy = obj => {
|
|
const propertiesMap = new Map(Object.keys(obj).map(propKey => [lowercase(propKey), obj[propKey]]));
|
|
|
|
const caseInsensitiveGetHandler = {
|
|
get: (target, property) => propertiesMap.get(lowercase(property))
|
|
};
|
|
|
|
return new Proxy(obj, caseInsensitiveGetHandler);
|
|
};
|
|
|
|
// Handle arrays by mapping each element to a case-insensitive proxy
|
|
if (Array.isArray(listOrRow)) {
|
|
return listOrRow.map(row => (typeof row === 'object' && row !== null ? createCaseInsensitiveProxy(row) : row));
|
|
} else {
|
|
return createCaseInsensitiveProxy(listOrRow);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Checks if a file exists.
|
|
* @param {string} filePath - The path of the file to check.
|
|
* @returns {Promise<boolean>} A promise that resolves to true if the file exists, false otherwise.
|
|
*/
|
|
public static async exists(filePath: string): Promise<boolean> {
|
|
try {
|
|
await fs.access(filePath);
|
|
return true; // File exists
|
|
} catch (error) {
|
|
return false; // File does not exist
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Recursively copies files if they are newer.
|
|
* @param {string} src - The source path.
|
|
* @param {string} dest - The destination path.
|
|
* @returns {Promise<void>} A promise that resolves when the copy operation is complete.
|
|
*/
|
|
public static async copyIfNewerRecursive(src: string, dest: string): Promise<void> {
|
|
// Check if the source exists
|
|
if (!(await GBUtil.exists(src))) {
|
|
return;
|
|
}
|
|
|
|
// Check if the source is a directory
|
|
if ((await fs.stat(src)).isDirectory()) {
|
|
// Create the destination directory if it doesn't exist
|
|
if (!(await GBUtil.exists(dest))) {
|
|
await fs.mkdir(dest, { recursive: true });
|
|
}
|
|
|
|
// Read all files and directories in the source directory
|
|
const entries = await fs.readdir(src);
|
|
|
|
for (let entry of entries) {
|
|
const srcEntry = path.join(src, entry);
|
|
const destEntry = path.join(dest, entry);
|
|
|
|
// Recursively copy each entry
|
|
await this.copyIfNewerRecursive(srcEntry, destEntry);
|
|
}
|
|
} else {
|
|
// Source is a file, check if we need to copy it
|
|
if (await GBUtil.exists(dest)) {
|
|
const srcStat = await fs.stat(src);
|
|
const destStat = await fs.stat(dest);
|
|
|
|
// Copy only if the source file is newer than the destination file
|
|
if (srcStat.mtime > destStat.mtime) {
|
|
await fs.cp(src, dest, { force: true });
|
|
}
|
|
} else {
|
|
// Destination file doesn't exist, so copy it
|
|
await fs.cp(src, dest, { force: true });
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Lists database tables.
|
|
* @param {any} dialect - The database dialect.
|
|
* @param {any} seq - The Sequelize instance.
|
|
* @returns {Promise<string[]>} A promise that resolves to an array of table names.
|
|
*/
|
|
public static async listTables(dialect: any, seq: any): Promise<string[]> {
|
|
let tables;
|
|
if (dialect === 'sqlite') {
|
|
tables = await seq.getQueryInterface().showAllTables();
|
|
} else {
|
|
// Extracting table name from the object returned by MSSQL
|
|
tables = await seq.getQueryInterface().showAllTables();
|
|
tables = tables.map((table: any) => table.tableName); // Extracting the table name
|
|
}
|
|
return tables;
|
|
}
|
|
|
|
/**
|
|
* Checks if an object has sub-objects.
|
|
* @param {any} t - The object to check.
|
|
* @returns {boolean} True if the object has sub-objects, false otherwise.
|
|
*/
|
|
public static hasSubObject(t: any): boolean {
|
|
for (var key in t) {
|
|
if (!t.hasOwnProperty(key)) continue;
|
|
if (typeof t[key] === 'object') return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
/**
|
|
* Extracts text from a PDF.
|
|
* @param {any} data - The PDF data.
|
|
* @returns {Promise<string>} A promise that resolves to the extracted text.
|
|
*/
|
|
public static async getPdfText(data: any): Promise<string> {
|
|
const pdf = await getDocument({ data }).promise;
|
|
let pages = [];
|
|
|
|
for (let i = 1; i <= pdf.numPages; i++) {
|
|
const page = await pdf.getPage(i);
|
|
const textContent = await page.getTextContent();
|
|
const text = textContent.items
|
|
.map(item => item['str'])
|
|
.join(' ')
|
|
.replace(/\s+/g, ' '); // Optionally remove extra spaces
|
|
pages.push(text);
|
|
}
|
|
|
|
return pages.join(' ');
|
|
}
|
|
|
|
/**
|
|
* Gets the path for GBAI (General Bots AI) files.
|
|
* @param {string} botId - The bot ID.
|
|
* @param {string} [packageType] - The package type.
|
|
* @param {string} [packageName] - The package name.
|
|
* @returns {string} The GBAI path.
|
|
*/
|
|
static getGBAIPath(botId: string, packageType?: string, packageName?: string): string {
|
|
let gbai = `${botId}.gbai`;
|
|
if (!packageType && !packageName) {
|
|
return GBConfigService.get('DEV_GBAI') ? GBConfigService.get('DEV_GBAI') : gbai;
|
|
}
|
|
|
|
if (GBConfigService.get('DEV_GBAI')) {
|
|
gbai = GBConfigService.get('DEV_GBAI');
|
|
botId = gbai.replace(/\.[^/.]+$/, '');
|
|
return urljoin(GBConfigService.get('DEV_GBAI'), packageName ? packageName : `${botId}.${packageType}`);
|
|
} else {
|
|
return urljoin(gbai, packageName ? packageName : `${botId}.${packageType}`);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Converts a PDF page to an image.
|
|
* @param {any} min - The minimum configuration object.
|
|
* @param {string} filename - The filename of the PDF.
|
|
* @param {number} [pageNumber] - The page number to convert (optional).
|
|
* @returns {Promise<any[]>} A promise that resolves to an array of generated image files.
|
|
*/
|
|
public static async pdfPageAsImage(min: any, filename: string, pageNumber?: number): Promise<any[]> {
|
|
// Converts the PDF to PNG.
|
|
|
|
GBLogEx.info(min, `Converting ${filename}, page: ${pageNumber ?? 'all'}...`);
|
|
|
|
const options = {
|
|
disableFontFace: true,
|
|
useSystemFonts: true,
|
|
viewportScale: 2.0,
|
|
pagesToProcess: pageNumber !== undefined ? [pageNumber] : undefined,
|
|
strictPagesToProcess: false,
|
|
verbosityLevel: 0
|
|
};
|
|
|
|
const pngPages: PngPageOutput[] = await pdfToPng(filename, options);
|
|
|
|
const generatedFiles = [];
|
|
|
|
for (const pngPage of pngPages) {
|
|
const buffer = pngPage.content;
|
|
const gbaiName = GBUtil.getGBAIPath(min.botId, null);
|
|
const localName = path.join('work', gbaiName, 'cache', `img${GBAdminService.getRndReadableIdentifier()}.png`);
|
|
const url = urlJoin(GBServer.globals.publicAddress, min.botId, 'cache', path.basename(localName));
|
|
|
|
await fs.writeFile(localName, buffer, { encoding: null });
|
|
|
|
generatedFiles.push({ localName: localName, url: url, data: buffer });
|
|
}
|
|
|
|
return generatedFiles.length > 0 ? generatedFiles : null;
|
|
}
|
|
|
|
/**
|
|
* Implements a random delay.
|
|
* @param {number} [min=1] - The minimum delay in seconds.
|
|
* @param {number} [max=5] - The maximum delay in seconds.
|
|
* @returns {Promise<void>} A promise that resolves after the random delay.
|
|
*/
|
|
public static async sleepRandom(min: number = 1, max: number = 5): Promise<void> {
|
|
const randomDelay = Math.floor(Math.random() * (max - min + 1) + min) * 1000;
|
|
await new Promise(resolve => setTimeout(resolve, randomDelay));
|
|
}
|
|
|
|
public static isContentPage(text: string): boolean {
|
|
// Common patterns that indicate non-content pages
|
|
const nonContentPatterns = [
|
|
/^index$/i,
|
|
/^table of contents$/i,
|
|
];
|
|
|
|
// Check if page is mostly dots, numbers or blank
|
|
const isDotLeaderPage = text.replace(/\s+/g, '').match(/\.{10,}/);
|
|
const isNumbersPage = text.replace(/\s+/g, '').match(/^\d+$/);
|
|
const isBlankPage = text.trim().length === 0;
|
|
|
|
// Check if page has actual content
|
|
const wordCount = text.trim().split(/\s+/).length;
|
|
const hasMinimalContent = wordCount > 10;
|
|
|
|
// Check if page matches any non-content patterns
|
|
const isNonContent = nonContentPatterns.some(pattern =>
|
|
pattern.test(text.trim())
|
|
);
|
|
|
|
// Page is valid content if:
|
|
// - Not mostly dots/numbers/blank
|
|
// - Has minimal word count
|
|
// - Doesn't match non-content patterns
|
|
return !isDotLeaderPage &&
|
|
!isNumbersPage &&
|
|
!isBlankPage &&
|
|
hasMinimalContent &&
|
|
!isNonContent;
|
|
}
|
|
|
|
|
|
|
|
}
|