fix(basic.gblib): #282 Fix SSR for Bots 3.0.

This commit is contained in:
rodrigorodriguez 2023-02-18 16:48:40 -03:00
parent bc85f714ca
commit 7f3bd7d8fe
7 changed files with 253 additions and 283 deletions

View file

@ -32,7 +32,7 @@
'use strict'; 'use strict';
import { createBrowser } from '../../core.gbapp/services/GBSSR.js'; import { GBSSR }from '../../core.gbapp/services/GBSSR.js';
export class ChartServices { export class ChartServices {
/** /**
@ -41,7 +41,7 @@ export class ChartServices {
* @param {string} path screenshot image full path with file name * @param {string} path screenshot image full path with file name
*/ */
public static async screenshot (args, path) { public static async screenshot (args, path) {
const browser = await createBrowser(null); const browser = await GBSSR.createBrowser(null);
const page = await browser.newPage(); const page = await browser.newPage();
// load billboard.js assets from CDN. // load billboard.js assets from CDN.

View file

@ -39,7 +39,7 @@ import { DialogKeywords } from './DialogKeywords.js';
import { GBServer } from '../../../src/app.js'; import { GBServer } from '../../../src/app.js';
import { GBVMService } from './GBVMService.js'; import { GBVMService } from './GBVMService.js';
import Fs from 'fs'; import Fs from 'fs';
import { createBrowser } from '../../core.gbapp/services/GBSSR.js'; import { GBSSR }from '../../core.gbapp/services/GBSSR.js';
import urlJoin from 'url-join'; import urlJoin from 'url-join';
import Excel from 'exceljs'; import Excel from 'exceljs';
import { TwitterApi } from 'twitter-api-v2'; import { TwitterApi } from 'twitter-api-v2';
@ -257,7 +257,7 @@ export class SystemKeywords {
const { min, user } = await DialogKeywords.getProcessInfo(pid); const { min, user } = await DialogKeywords.getProcessInfo(pid);
const gbaiName = `${min.botId}.gbai`; const gbaiName = `${min.botId}.gbai`;
const browser = await createBrowser(null); const browser = await GBSSR.createBrowser(null);
const page = await browser.newPage(); const page = await browser.newPage();
// Includes the associated CSS related to current theme. // Includes the associated CSS related to current theme.

View file

@ -32,19 +32,19 @@
'use strict'; 'use strict';
import { GBLog, GBMinInstance } from 'botlib';
import { GBServer } from '../../../src/app.js';
import { GBAdminService } from '../../admin.gbapp/services/GBAdminService.js';
import { createBrowser } from '../../core.gbapp/services/GBSSR.js';
import { GuaribasUser } from '../../security.gbapp/models/index.js';
import { DialogKeywords } from './DialogKeywords.js';
import { GBDeployer } from '../../core.gbapp/services/GBDeployer.js';
import urlJoin from 'url-join'; import urlJoin from 'url-join';
import Fs from 'fs'; import Fs from 'fs';
import Path from 'path'; import Path from 'path';
import url from 'url'; import url from 'url';
import { Mutex, Semaphore, withTimeout } from 'async-mutex';
import { GBLog, GBMinInstance } from 'botlib';
import { GBServer } from '../../../src/app.js';
import { GBAdminService } from '../../admin.gbapp/services/GBAdminService.js';
import { GBSSR }from '../../core.gbapp/services/GBSSR.js';
import { GuaribasUser } from '../../security.gbapp/models/index.js';
import { DialogKeywords } from './DialogKeywords.js';
import { GBDeployer } from '../../core.gbapp/services/GBDeployer.js';
import { Mutex } from 'async-mutex';
import { GBLogEx } from '../../core.gbapp/services/GBLogEx.js'; import { GBLogEx } from '../../core.gbapp/services/GBLogEx.js';
/** /**
@ -172,16 +172,17 @@ export class WebAutomationServices {
let browser; let browser;
if (!page) { if (!page) {
browser = await createBrowser(null); browser = await GBSSR.createBrowser(null);
page = (await browser.pages())[0]; page = (await browser.pages())[0];
if (username || password) { if (username || password) {
await page.authenticate({ pid, username: username, password: password }); await page.authenticate({ pid, username: username, password: password });
} }
} }
// There is no session yet, // There is no session yet.
if (!session && sessionKind === 'AS') { if (!session && sessionKind === 'AS') {
// A new web session is being created. // A new web session is being created.
handle = WebAutomationServices.cyrb53(this.min.botId + url); handle = WebAutomationServices.cyrb53(this.min.botId + url);
@ -459,4 +460,20 @@ export class WebAutomationServices {
return file; return file;
} }
private async recursiveFindInFrames (inputFrame, selector) {
const frames = inputFrame.childFrames();
const results = await Promise.all(
frames.map(async frame => {
const el = await frame.$(selector);
if (el) return el;
if (frame.childFrames().length > 0) {
return await this.recursiveFindInFrames(frame, selector);
}
return null;
})
);
return results.find(Boolean);
}
} }

View file

@ -87,6 +87,7 @@ import { GoogleChatDirectLine } from '../../google-chat.gblib/services/GoogleCha
import { SystemKeywords } from '../../basic.gblib/services/SystemKeywords.js'; import { SystemKeywords } from '../../basic.gblib/services/SystemKeywords.js';
import * as nlp from 'node-nlp'; import * as nlp from 'node-nlp';
import Path from 'path'; import Path from 'path';
import { GBSSR } from './GBSSR.js';
/** /**
* Minimal service layer for a bot and encapsulation of BOT Framework calls. * Minimal service layer for a bot and encapsulation of BOT Framework calls.
@ -236,6 +237,7 @@ export class GBMinService {
const url = `/api/messages/${botId}`; const url = `/api/messages/${botId}`;
removeRoute(GBServer.globals.server, url); removeRoute(GBServer.globals.server, url);
const uiUrl = `/${botId}`; const uiUrl = `/${botId}`;
removeRoute(GBServer.globals.server, uiUrl); removeRoute(GBServer.globals.server, uiUrl);
@ -295,6 +297,10 @@ export class GBMinService {
if (!Fs.existsSync(dir)) { if (!Fs.existsSync(dir)) {
mkdirp.sync(dir); mkdirp.sync(dir);
} }
dir = `work/${min.botId}.gbai/${min.botId}.gbui`;
if (!Fs.existsSync(dir)) {
mkdirp.sync(dir);
}
// Loads Named Entity data for this bot. // Loads Named Entity data for this bot.
@ -370,15 +376,17 @@ export class GBMinService {
if (process.env.DISABLE_WEB !== 'true') { if (process.env.DISABLE_WEB !== 'true') {
const uiUrl = `/${instance.botId}`; const uiUrl = `/${instance.botId}`;
let staticHandler = express.static(urlJoin(GBDeployer.deployFolder, GBMinService.uiPackage, 'build'));
GBServer.globals.server.get(uiUrl, async (req, res, next)=> {
await GBSSR.ssrFilter(req, res, staticHandler as any);
});
const uiUrlAlt = `/${instance.activationCode}`; const uiUrlAlt = `/${instance.activationCode}`;
GBServer.globals.server.use(
uiUrl,
express.static(urlJoin(GBDeployer.deployFolder, GBMinService.uiPackage, 'build'))
);
GBServer.globals.server.use( GBServer.globals.server.use(
uiUrlAlt, uiUrlAlt,
express.static(urlJoin(GBDeployer.deployFolder, GBMinService.uiPackage, 'build')) express.static(urlJoin(GBDeployer.deployFolder, GBMinService.uiPackage, 'build'))
); );
const domain = min.core.getParam(min.instance, 'Domain', null); const domain = min.core.getParam(min.instance, 'Domain', null);
if (domain) { if (domain) {
GBServer.globals.server.use( GBServer.globals.server.use(

View file

@ -36,27 +36,27 @@
'use strict'; 'use strict';
import Path from 'path';
import {createRequire} from "module";
const require = createRequire(import.meta.url);
const puppeteer = require('puppeteer-extra');
const hidden = require('puppeteer-extra-plugin-stealth')
// require executablePath from puppeteer
const {executablePath} = require('puppeteer')
import Fs from 'fs'; import Fs from 'fs';
import { NextFunction, Request, Response } from 'express'; import { NextFunction, Request, Response } from 'express';
import urljoin from 'url-join'; import urljoin from 'url-join';
import { GBMinInstance } from 'botlib';
import { GBServer } from '../../../src/app.js';
import { GBLogEx } from './GBLogEx.js';
import { createRequire } from 'module';
const require = createRequire(import.meta.url);
const puppeteer = require('puppeteer-extra');
const hidden = require('puppeteer-extra-plugin-stealth');
const { executablePath } = require('puppeteer');
// https://hackernoon.com/tips-and-tricks-for-web-scraping-with-puppeteer-ed391a63d952 export class GBSSR {
// Dont download all resources, we just need the HTML // https://hackernoon.com/tips-and-tricks-for-web-scraping-with-puppeteer-ed391a63d952
// Also, this is huge performance/response time boost // Dont download all resources, we just need the HTML
const blockedResourceTypes = ['image', 'media', 'font', 'texttrack', 'object', 'beacon', 'csp_report', 'imageset']; // Also, this is huge performance/response time boost
// const whitelist = ["document", "script", "xhr", "fetch"]; private blockedResourceTypes = ['image', 'media', 'font', 'texttrack', 'object', 'beacon', 'csp_report', 'imageset'];
const skippedResources = [
// const whitelist = ["document", "script", "xhr", "fetch"];
private skippedResources = [
'quantserve', 'quantserve',
'adzerk', 'adzerk',
'doubleclick', 'doubleclick',
@ -76,11 +76,9 @@ const skippedResources = [
'zedo', 'zedo',
'clicksor', 'clicksor',
'tiqcdn' 'tiqcdn'
]; ];
const RENDER_CACHE = new Map(); public static async createBrowser(profilePath): Promise<any> {
async function createBrowser (profilePath): Promise<any> {
let args = [ let args = [
'--check-for-update-interval=2592000', '--check-for-update-interval=2592000',
'--disable-accelerated-2d-canvas', '--disable-accelerated-2d-canvas',
@ -102,50 +100,24 @@ async function createBrowser (profilePath): Promise<any> {
Fs.writeFileSync(preferences, JSON.stringify(data)); Fs.writeFileSync(preferences, JSON.stringify(data));
} }
} }
puppeteer.use(hidden()) puppeteer.use(hidden());
const browser = await puppeteer.launch({ const browser = await puppeteer.launch({
args: args, args: args,
ignoreHTTPSErrors: true, ignoreHTTPSErrors: true,
headless: false, headless: false,
defaultViewport: null, defaultViewport: null,
executablePath:executablePath(), executablePath: executablePath(),
ignoreDefaultArgs: ['--enable-automation', '--enable-blink-features=IdleDetection'] ignoreDefaultArgs: ['--enable-automation', '--enable-blink-features=IdleDetection']
}); });
return browser; return browser;
}
async function recursiveFindInFrames (inputFrame, selector) {
const frames = inputFrame.childFrames();
const results = await Promise.all(
frames.map(async frame => {
const el = await frame.$(selector);
if (el) return el;
if (frame.childFrames().length > 0) {
return await recursiveFindInFrames(frame, selector);
} }
return null;
})
);
return results.find(Boolean);
}
/** /**
* https://developers.google.com/web/tools/puppeteer/articles/ssr#reuseinstance * Return the HTML of bot default.gbui.
* @param {string} url URL to prerender.
*/ */
async function ssr (url: string, useCache: boolean, cacheRefreshRate: number) { public async getHTML(min: GBMinInstance) {
if (RENDER_CACHE.has(url) && useCache) { const url = urljoin(GBServer.globals.publicAddress, min.botId);
const cached = RENDER_CACHE.get(url); const browser = await GBSSR.createBrowser(null);
if (Date.now() - cached.renderedAt > cacheRefreshRate && !(cacheRefreshRate <= 0)) {
RENDER_CACHE.delete(url);
} else {
return {
html: cached.html,
status: 200
};
}
}
const browser = await createBrowser(null);
const stylesheetContents = {}; const stylesheetContents = {};
try { try {
@ -155,13 +127,10 @@ async function ssr (url: string, useCache: boolean, cacheRefreshRate: number) {
); );
await page.setRequestInterception(true); await page.setRequestInterception(true);
page.on('request', request => { page.on('request', request => {
const requestUrl = request const requestUrl = request.url().split('?')[0].split('#')[0];
.url()
.split('?')[0]
.split('#')[0];
if ( if (
blockedResourceTypes.indexOf(request.resourceType()) !== -1 || this.blockedResourceTypes.indexOf(request.resourceType()) !== -1 ||
skippedResources.some(resource => requestUrl.indexOf(resource) !== -1) this.skippedResources.some(resource => requestUrl.indexOf(resource) !== -1)
) { ) {
request.abort(); request.abort();
} else { } else {
@ -191,6 +160,7 @@ async function ssr (url: string, useCache: boolean, cacheRefreshRate: number) {
await sleep(45000); await sleep(45000);
// Inject <base> on page to relative resources load properly. // Inject <base> on page to relative resources load properly.
await page.evaluate(url => { await page.evaluate(url => {
const base = document.createElement('base'); const base = document.createElement('base');
base.href = url; base.href = url;
@ -199,6 +169,7 @@ async function ssr (url: string, useCache: boolean, cacheRefreshRate: number) {
}, url); }, url);
// Remove scripts and html imports. They've already executed. // Remove scripts and html imports. They've already executed.
await page.evaluate(() => { await page.evaluate(() => {
const elements = document.querySelectorAll('script, link[rel="import"]'); const elements = document.querySelectorAll('script, link[rel="import"]');
elements.forEach(e => { elements.forEach(e => {
@ -207,6 +178,7 @@ async function ssr (url: string, useCache: boolean, cacheRefreshRate: number) {
}); });
// Replace stylesheets in the page with their equivalent <style>. // Replace stylesheets in the page with their equivalent <style>.
await page.$$eval( await page.$$eval(
'link[rel="stylesheet"]', 'link[rel="stylesheet"]',
(links, content) => { (links, content) => {
@ -225,48 +197,25 @@ async function ssr (url: string, useCache: boolean, cacheRefreshRate: number) {
const html = await page.content(); const html = await page.content();
// Close the page we opened here (not the browser). // Close the page we opened here (not the browser).
await page.close(); await page.close();
if (useCache) { return html;
RENDER_CACHE.set(url, { html, renderedAt: Date.now() });
}
return { html, status: response!.status() };
} catch (e) { } catch (e) {
const html = e.toString(); const html = e.toString();
console.warn({ message: `URL: ${url} Failed with message: ${html}` }); GBLogEx.error(min, `URL: ${url} Failed with message: ${html}`);
return { html, status: 500 }; return html;
} finally { } finally {
await browser.close(); await browser.close();
} }
}
function clearCache () {
RENDER_CACHE.clear();
}
interface Options {
prerender?: Array<string>;
exclude?: Array<string>;
useCache?: boolean;
cacheRefreshRate?: number;
}
function ssrForBots (
options: Options = {
prerender: [], // Array containing the user-agents that will trigger the ssr service
exclude: [], // Array containing paths and/or extentions that will be excluded from being prerendered by the ssr service
useCache: true, // Variable that determins if we will use page caching or not
cacheRefreshRate: 86400 // Seconds of which the cache will be kept alive, pass 0 or negative value for infinite lifespan
} }
) {
let applyOptions = Object.assign( public static async ssrFilter(req: Request, res: Response, next) {
{ let applyOptions = {
prerender: [], // Array containing the user-agents that will trigger the ssr service prerender: [], // Array containing the user-agents that will trigger the ssr service
exclude: [], // Array containing paths and/or extentions that will be excluded from being prerendered by the ssr service exclude: [], // Array containing paths and/or extentions that will be excluded from being prerendered by the ssr service
useCache: true, // Variable that determins if we will use page caching or not useCache: true, // Variable that determins if we will use page caching or not
cacheRefreshRate: 86400 // Seconds of which the cache will be kept alive, pass 0 or negative value for infinite lifespan cacheRefreshRate: 86400 // Seconds of which the cache will be kept alive, pass 0 or negative value for infinite lifespan
}, };
options
);
// Default user agents // Default user agents
const prerenderArray = [ const prerenderArray = [
@ -296,37 +245,31 @@ function ssrForBots (
// default exclude array // default exclude array
const excludeArray = ['.xml', '.ico', '.txt', '.json']; const excludeArray = ['.xml', '.ico', '.txt', '.json'];
function ssrOnDemand (req: Request, res: Response, next: NextFunction) {
Promise.resolve(() => {
return true;
})
.then(async () => {
const userAgent: string = req.headers['user-agent'] || ''; const userAgent: string = req.headers['user-agent'] || '';
const prerender = new RegExp([...prerenderArray, ...applyOptions.prerender].join('|').slice(0, -1), 'i').test( const prerender = new RegExp([...prerenderArray, ...applyOptions.prerender].join('|').slice(0, -1), 'i').test(
userAgent userAgent
); );
const exclude = !new RegExp([...excludeArray, ...applyOptions.exclude].join('|').slice(0, -1)).test( const exclude = !new RegExp([...excludeArray, ...applyOptions.exclude].join('|').slice(0, -1)).test(
req.originalUrl req.originalUrl
); );
// Reads from static HTML when a bot is crawling.
const botId = req.originalUrl ? req.originalUrl.substr(1) : GBServer.globals.minInstances[0].botId; // TODO: Get only bot.
const min: GBMinInstance = GBServer.globals.minInstances.filter(p => p.instance.botId === botId)[0];
const path = Path.join(process.env.PWD, 'work', `${min.instance.botId}.gbai`, `${min.instance.botId}.gbui`, 'index.html');
if (req.originalUrl && prerender && exclude) { if (req.originalUrl && prerender && exclude) {
const { html, status } = await ssr( const html = Fs.readFileSync(path, 'utf8');
req.protocol + '://' + req.get('host') + req.originalUrl, res.status(200).send(html);
applyOptions.useCache, return true;
applyOptions.cacheRefreshRate
);
return res.status(status).send(html);
} else { } else {
return next(); if (Fs.existsSync(path)) {
res.sendFile(path);
} else {
res.status(404);
res.end();
}
} }
})
.catch(next);
} }
return ssrOnDemand;
} }
export { createBrowser, ssr, clearCache, ssrForBots };

View file

@ -55,6 +55,7 @@ import auth from 'basic-auth';
import child_process from 'child_process'; import child_process from 'child_process';
import * as winston from 'winston-logs-display'; import * as winston from 'winston-logs-display';
import { RootData } from './RootData.js'; import { RootData } from './RootData.js';
import { GBSSR } from '../packages/core.gbapp/services/GBSSR.js';
/** /**
* General Bots open-core entry point. * General Bots open-core entry point.
@ -229,7 +230,7 @@ export class GBServer {
winston.default(server, loggers[1]); winston.default(server, loggers[1]);
} }
server.get('*', function(req, res){ server.get('*', function (req, res) {
GBLog.info(`HTTP 404: ${req.url}.`); GBLog.info(`HTTP 404: ${req.url}.`);
res.status(404); res.status(404);
res.end(); res.end();

View file

@ -1,5 +1,6 @@
{ {
"compilerOptions": { "compilerOptions": {
"allowJs": true, "allowJs": true,
"downlevelIteration": true, "downlevelIteration": true,
"baseUrl": "./", "baseUrl": "./",