fix(basic.gblib): #282 Fix SSR for Bots 3.0.

This commit is contained in:
rodrigorodriguez 2023-02-20 10:29:04 -03:00
parent 7f3bd7d8fe
commit 866b361292
6 changed files with 82 additions and 56 deletions

View file

@ -36,39 +36,39 @@
'use strict';
import { GBLog, IGBInstance } from "botlib";
import { GuaribasLog } from "../models/GBModel.js";
import { GBLog, IGBInstance } from 'botlib';
import { GuaribasLog } from '../models/GBModel.js';
export class GBLogEx {
public static async error(minOrInstanceId: any, message: string) {
GBLog.error(message);
if (typeof minOrInstanceId === 'object') {
minOrInstanceId = minOrInstanceId.instance.instanceId;
}
GBLog.error(`${minOrInstanceId}: ${message}.`);
await this.log(minOrInstanceId, 'e', message);
}
public static async debug(minOrInstanceId: any, message: string) {
GBLog.debug(message);
if (typeof minOrInstanceId === 'object') {
minOrInstanceId = minOrInstanceId.instance.instanceId;
}
GBLog.debug(`${minOrInstanceId}: ${message}.`);
await this.log(minOrInstanceId, 'd', message);
}
public static async info(minOrInstanceId: any, message: string) {
GBLog.info(message);
public static async info(minOrInstanceId: any, message: string) {
if (typeof minOrInstanceId === 'object') {
minOrInstanceId = minOrInstanceId.instance.instanceId;
}
GBLog.info(`${minOrInstanceId}: ${message}.`);
await this.log(minOrInstanceId, 'i', message);
}
public static async verbose(minOrInstanceId: any, message: string) {
GBLog.verbose(message);
if (typeof minOrInstanceId === 'object') {
minOrInstanceId = minOrInstanceId.instance.instanceId;
}
GBLog.verbose(`${minOrInstanceId}: ${message}.`);
await this.log(minOrInstanceId, 'v', message);
}
@ -76,9 +76,9 @@ export class GBLogEx {
* Finds and update user agent information to a next available person.
*/
public static async log(instance: IGBInstance, kind: string, message: string): Promise<GuaribasLog> {
message = message?message.substring(0,1023):null;
message = message ? message.substring(0, 1023) : null;
return await GuaribasLog.create(<GuaribasLog>{
instanceId: instance.instanceId,
instanceId: instance ? instance.instanceId : 1,
message: message,
kind: kind
});

View file

@ -96,7 +96,7 @@ export class GBMinService {
/**
* Default General Bots User Interface package.
*/
private static uiPackage = 'default.gbui';
public static uiPackage = 'default.gbui';
/**
* Main core service attached to this bot service.
@ -142,23 +142,11 @@ export class GBMinService {
// Servers default UI on root address '/' if web enabled.
if (process.env.DISABLE_WEB !== 'true') {
// SSR processing.
// SSR processing and default.gbui access definition.
const defaultOptions = {
prerender: [],
exclude: ['/api/', '/instances/', '/webhooks/'],
useCache: true,
cacheRefreshRate: 86400
};
// GBServer.globals.server.use(ssrForBots(defaultOptions));
const url = GBServer.globals.wwwroot
? GBServer.globals.wwwroot
: urlJoin(GBDeployer.deployFolder, GBMinService.uiPackage, 'build');
// default.gbui access definition.
GBServer.globals.server.use('/', express.static(url));
GBServer.globals.server.get('/', async (req, res, next)=> {
await GBSSR.ssrFilter(req, res, next);
});
// Servers the bot information object via HTTP so clients can get
// instance information stored on server.
@ -376,24 +364,22 @@ export class GBMinService {
if (process.env.DISABLE_WEB !== 'true') {
const uiUrl = `/${instance.botId}`;
let staticHandler = express.static(urlJoin(GBDeployer.deployFolder, GBMinService.uiPackage, 'build'));
GBServer.globals.server.get(uiUrl, async (req, res, next)=> {
await GBSSR.ssrFilter(req, res, staticHandler as any);
await GBSSR.ssrFilter(req, res, next);
});
const uiUrlAlt = `/${instance.activationCode}`;
GBServer.globals.server.use(
uiUrlAlt,
express.static(urlJoin(GBDeployer.deployFolder, GBMinService.uiPackage, 'build'))
);
GBServer.globals.server.get(uiUrlAlt, async (req, res, next)=> {
await GBSSR.ssrFilter(req, res, next);
});
const domain = min.core.getParam(min.instance, 'Domain', null);
if (domain) {
GBServer.globals.server.use(
domain,
express.static(urlJoin(GBDeployer.deployFolder, GBMinService.uiPackage, 'build'))
);
GBLog.verbose(`Bot UI ${GBMinService.uiPackage} accessible at custom domain: ${domain}.`);
GBServer.globals.server.get(domain, async (req, res, next)=> {
await GBSSR.ssrFilter(req, res, next);
});
GBLog.verbose(`Bot UI ${GBMinService.uiPackage} accessible at custom domain: ${domain}.`);
}
GBLog.verbose(`Bot UI ${GBMinService.uiPackage} accessible at: ${uiUrl} and ${uiUrlAlt}.`);
}
@ -607,6 +593,7 @@ export class GBMinService {
* Gets a Speech to Text / Text to Speech token from the provider.
*/
private async getSTSToken(instance: any) {
return null; // TODO: https://github.com/GeneralBots/BotServer/issues/332
const options = {
method: 'POST',
headers: {

View file

@ -44,6 +44,9 @@ import { GBMinInstance } from 'botlib';
import { GBServer } from '../../../src/app.js';
import { GBLogEx } from './GBLogEx.js';
import { createRequire } from 'module';
import urlJoin from 'url-join';
import { GBDeployer } from './GBDeployer.js';
import { GBMinService } from './GBMinService.js';
const require = createRequire(import.meta.url);
const puppeteer = require('puppeteer-extra');
const hidden = require('puppeteer-extra-plugin-stealth');
@ -53,10 +56,19 @@ export class GBSSR {
// https://hackernoon.com/tips-and-tricks-for-web-scraping-with-puppeteer-ed391a63d952
// Dont download all resources, we just need the HTML
// Also, this is huge performance/response time boost
private blockedResourceTypes = ['image', 'media', 'font', 'texttrack', 'object', 'beacon', 'csp_report', 'imageset'];
private static blockedResourceTypes = [
'image',
'media',
'font',
'texttrack',
'object',
'beacon',
'csp_report',
'imageset'
];
// const whitelist = ["document", "script", "xhr", "fetch"];
private skippedResources = [
private static skippedResources = [
'quantserve',
'adzerk',
'doubleclick',
@ -115,10 +127,11 @@ export class GBSSR {
/**
* Return the HTML of bot default.gbui.
*/
public async getHTML(min: GBMinInstance) {
public static async getHTML(min: GBMinInstance) {
const url = urljoin(GBServer.globals.publicAddress, min.botId);
const browser = await GBSSR.createBrowser(null);
const stylesheetContents = {};
let html;
try {
const page = await browser.newPage();
@ -129,8 +142,8 @@ export class GBSSR {
page.on('request', request => {
const requestUrl = request.url().split('?')[0].split('#')[0];
if (
this.blockedResourceTypes.indexOf(request.resourceType()) !== -1 ||
this.skippedResources.some(resource => requestUrl.indexOf(resource) !== -1)
GBSSR.blockedResourceTypes.indexOf(request.resourceType()) !== -1 ||
GBSSR.skippedResources.some(resource => requestUrl.indexOf(resource) !== -1)
) {
request.abort();
} else {
@ -147,6 +160,9 @@ export class GBSSR {
}
});
await page.setExtraHTTPHeaders({
'ngrok-skip-browser-warning': '1'
});
const response = await page.goto(url, {
timeout: 120000,
waitUntil: 'networkidle0'
@ -157,14 +173,15 @@ export class GBSSR {
setTimeout(resolve, ms);
});
};
await sleep(45000);
await sleep(15000);
// Inject <base> on page to relative resources load properly.
await page.evaluate(url => {
const base = document.createElement('base');
base.href = url;
// Add to top of head, before all other resources.
// Add to top of head, beeeEEEfore all other resources.
document.head.prepend(base);
}, url);
@ -194,19 +211,18 @@ export class GBSSR {
stylesheetContents
);
const html = await page.content();
html = await page.content();
// Close the page we opened here (not the browser).
await page.close();
return html;
} catch (e) {
const html = e.toString();
GBLogEx.error(min, `URL: ${url} Failed with message: ${html}`);
return html;
} finally {
await browser.close();
}
return html;
}
public static async ssrFilter(req: Request, res: Response, next) {
@ -257,19 +273,34 @@ export class GBSSR {
const botId = req.originalUrl ? req.originalUrl.substr(1) : GBServer.globals.minInstances[0].botId; // TODO: Get only bot.
const min: GBMinInstance = GBServer.globals.minInstances.filter(p => p.instance.botId === botId)[0];
const path = Path.join(process.env.PWD, 'work', `${min.instance.botId}.gbai`, `${min.instance.botId}.gbui`, 'index.html');
if (req.originalUrl && prerender && exclude) {
if (min && req.originalUrl && prerender && exclude) {
const path = Path.join(
process.env.PWD,
'work',
`${min.instance.botId}.gbai`,
`${min.instance.botId}.gbui`,
'index.html'
);
const html = Fs.readFileSync(path, 'utf8');
res.status(200).send(html);
return true;
} else {
const path = Path.join(
process.env.PWD,
GBDeployer.deployFolder,
GBMinService.uiPackage,
'build',
min ? 'index.html' : req.url
);
if (Fs.existsSync(path)) {
res.sendFile(path);
return true;
} else {
GBLogEx.info(min, `HTTP 404: ${req.url}.`);
res.status(404);
res.end();
}
}
}
}
}

View file

@ -64,6 +64,8 @@ import { GuaribasAnswer, GuaribasQuestion, GuaribasSubject } from '../models/ind
import { GBConfigService } from './../../core.gbapp/services/GBConfigService.js';
import textract from 'textract';
import pdf from 'pdf-extraction';
import { GBSSR } from '../../core.gbapp/services/GBSSR.js';
import { GBLogEx } from '../../core.gbapp/services/GBLogEx.js';
/**
* Result for quey on KB data.
@ -775,7 +777,13 @@ export class KBService implements IGBKBService {
*/
public async deployKb(core: IGBCoreService, deployer: GBDeployer, localPath: string, min: GBMinInstance) {
const packageName = Path.basename(localPath);
GBLog.info(`[GBDeployer] Opening package: ${localPath}`);
const html = await GBSSR.getHTML(min);
const path = Path.join(process.env.PWD, 'work', `${min.instance.botId}.gbai`, `${min.instance.botId}.gbui`, 'index.html');
GBLogEx.info(min, `[GBDeployer] Generating SSR HTML in ${path}.`);
Fs.writeFileSync(path, html, 'utf8');
const instance = await core.loadInstanceByBotId(min.botId);
GBLog.info(`[GBDeployer] Importing: ${localPath}`);
@ -788,6 +796,8 @@ export class KBService implements IGBKBService {
min['groupCache'] = await KBService.getGroupReplies(instance.instanceId);
await KBService.RefreshNER(min);
GBLog.info(`[GBDeployer] Finished import of ${localPath}`);
}

View file

@ -230,10 +230,8 @@ export class GBServer {
winston.default(server, loggers[1]);
}
server.get('*', function (req, res) {
GBLog.info(`HTTP 404: ${req.url}.`);
res.status(404);
res.end();
server.get('*', async (req, res, next) => {
await GBSSR.ssrFilter(req, res, next);
});
GBLog.info(`The Bot Server is in RUNNING mode...`);