/*****************************************************************************\ | █████ █████ ██ █ █████ █████ ████ ██ ████ █████ █████ ███ ® | | ██ █ ███ █ █ ██ ██ ██ ██ ██ ██ █ ██ ██ █ █ | | ██ ███ ████ █ ██ █ ████ █████ ██████ ██ ████ █ █ █ ██ | | ██ ██ █ █ ██ █ █ ██ ██ ██ ██ ██ ██ █ ██ ██ █ █ | | █████ █████ █ ███ █████ ██ ██ ██ ██ █████ ████ █████ █ ███ | | | | General Bots Copyright (c) pragmatismo.cloud. All rights reserved. | | Licensed under the AGPL-3.0. | | | | According to our dual licensing model, this program can be used either | | under the terms of the GNU Affero General Public License, version 3, | | or under a proprietary license. | | | | The texts of the GNU Affero General Public License with an additional | | permission and of our proprietary license can be found at and | | in the LICENSE file you have received along with this program. | | | | This program is distributed in the hope that it will be useful, | | but WITHOUT ANY WARRANTY, without even the implied warranty of | | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | | GNU Affero General Public License for more details. | | | | "General Bots" is a registered trademark of pragmatismo.cloud. | | The licensing of the program under the AGPLv3 does not imply a | | trademark license. Therefore any rights, title and interest in | | our trademarks remain entirely with us. | | | \*****************************************************************************/ /** * @fileoverview General Bots SSR support based on https://www.npmjs.com/package/ssr-for-bots. */ 'use strict'; import { createRequire } from 'module'; const require = createRequire(import.meta.url); import Path from 'path'; import Fs from 'fs'; import { NextFunction, Request, Response } from 'express'; import urljoin from 'url-join'; import { GBMinInstance } from 'botlib'; import { GBServer } from '../../../src/app.js'; import { GBLogEx } from './GBLogEx.js'; import urlJoin from 'url-join'; import { GBDeployer } from './GBDeployer.js'; import { GBMinService } from './GBMinService.js'; import { DialogKeywords } from '../../basic.gblib/services/DialogKeywords.js'; import { GBUtil } from '../../../src/util.js'; const puppeteer = require('puppeteer-extra'); const hidden = require('puppeteer-extra-plugin-stealth'); const { executablePath } = require('puppeteer'); export class GBSSR { // https://hackernoon.com/tips-and-tricks-for-web-scraping-with-puppeteer-ed391a63d952 // Dont download all resources, we just need the HTML // Also, this is huge performance/response time boost private static blockedResourceTypes = [ 'image', 'media', 'font', 'texttrack', 'object', 'beacon', 'csp_report', 'imageset' ]; // const whitelist = ["document", "script", "xhr", "fetch"]; private static skippedResources = [ 'quantserve', 'adzerk', 'doubleclick', 'adition', 'exelator', 'sharethrough', 'cdn.api.twitter', 'google-analytics', 'googletagmanager', 'google', 'fontawesome', 'facebook', 'analytics', 'optimizely', 'clicktale', 'mixpanel', 'zedo', 'clicksor', 'tiqcdn' ]; public static preparePuppeteer(profilePath) { let args = [ '--check-for-update-interval=2592000', '--disable-accelerated-2d-canvas', '--disable-dev-shm-usage', '--disable-features=site-per-process', '--disable-gpu', '--no-first-run', '--no-default-browser-check' ]; if (profilePath) { args.push(`--user-data-dir=${profilePath}`); const preferences = urljoin(profilePath, 'Default', 'Preferences'); if (Fs.existsSync(preferences)) { const file = Fs.readFileSync(preferences, 'utf8'); const data = JSON.parse(file); data['profile']['exit_type'] = 'none'; Fs.writeFileSync(preferences, JSON.stringify(data)); } } return { args: args, ignoreHTTPSErrors: true, headless: false, defaultViewport: null, executablePath: process.env.CHROME_PATH ? process.env.CHROME_PATH : executablePath(), ignoreDefaultArgs: ['--enable-automation', '--enable-blink-features=IdleDetection'] }; } public static async createBrowser(profilePath): Promise { const opts = this.preparePuppeteer(profilePath); puppeteer.use(hidden()); puppeteer.use(require("puppeteer-extra-plugin-minmax")()); const browser = await puppeteer.launch(opts); return browser; } /** * Return the HTML of bot default.gbui. */ public static async getHTML(min: GBMinInstance) { const url = urljoin(GBServer.globals.publicAddress, min.botId); const browser = await GBSSR.createBrowser(null); const stylesheetContents = {}; let html; try { const page = await browser.newPage(); await page.minimize(); await page.setUserAgent( 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.61 Safari/537.36' ); await page.setRequestInterception(true); page.on('request', request => { const requestUrl = request.url().split('?')[0].split('#')[0]; if ( GBSSR.blockedResourceTypes.indexOf(request.resourceType()) !== -1 || GBSSR.skippedResources.some(resource => requestUrl.indexOf(resource) !== -1) ) { request.abort(); } else { request.continue(); } }); page.on('response', async resp => { const responseUrl = resp.url(); const sameOrigin = new URL(responseUrl).origin === new URL(url).origin; const isStylesheet = resp.request().resourceType() === 'stylesheet'; if (sameOrigin && isStylesheet) { stylesheetContents[responseUrl] = await resp.text(); } }); await page.setExtraHTTPHeaders({ 'ngrok-skip-browser-warning': '1' }); const response = await page.goto(url, { timeout: 120000, waitUntil: 'networkidle0' }); await GBUtil.sleep(6000); // Inject on page to relative resources load properly. await page.evaluate(url => { const base = document.createElement('base'); base.href = url; // Add to top of head, before all other resources. document.head.prepend(base); }, url); // Remove scripts and html imports. They've already executed. await page.evaluate(() => { const elements = document.querySelectorAll('script, link[rel="import"]'); elements.forEach(e => { e.remove(); }); }); // Replace stylesheets in the page with their equivalent