diff --git a/packages/core.gbapp/services/GBMinService.ts b/packages/core.gbapp/services/GBMinService.ts index e5b67329..efb938f4 100644 --- a/packages/core.gbapp/services/GBMinService.ts +++ b/packages/core.gbapp/services/GBMinService.ts @@ -40,7 +40,6 @@ const express = require('express'); const Fs = require('fs'); const request = require('request-promise-native'); const removeRoute = require('express-remove-route'); -const ssrForBots = require("ssr-for-bots").default; const AuthenticationContext = require('adal-node').AuthenticationContext; const wash = require('washyourmouthoutwithsoap'); const { FacebookAdapter } = require('botbuilder-adapter-facebook'); @@ -84,6 +83,7 @@ import fs = require('fs'); import { GoogleChatDirectLine } from '../../google-chat.gblib/services/GoogleChatDirectLine'; import { ScheduleServices } from '../../basic.gblib/services/ScheduleServices'; import { SystemKeywords } from '../../basic.gblib/services/SystemKeywords'; +import { ssrForBots } from './GBSSR'; /** * Minimal service layer for a bot and encapsulation of BOT Framework calls. @@ -156,23 +156,12 @@ export class GBMinService { // default.gbui access definition. GBServer.globals.server.use('/', express.static(url)); - GBServer.globals.server.use('/ssr-delay', async (req,res) => { - const sleep = async ms => { - return new Promise(resolve => { - setTimeout(resolve, ms); - }); - }; - await sleep(1); - res.status(200); - res.end(); - }); - } - // Servers the bot information object via HTTP so clients can get - // instance information stored on server. + // Servers the bot information object via HTTP so clients can get + // instance information stored on server. + - if (process.env.DISABLE_WEB !== 'true') { GBServer.globals.server.get('/instances/:botId', this.handleGetInstanceForClient.bind(this)); } @@ -927,13 +916,13 @@ export class GBMinService { } if (step.context.activity.channelId !== 'msteams') { - const service = new KBService(min.core.sequelize); - const data = await service.getFaqBySubjectArray(instance.instanceId, 'faq', undefined); - await min.conversationalService.sendEvent(min, step, 'play', { - playerType: 'bullet', - data: data.slice(0, 10) - }); - } + const service = new KBService(min.core.sequelize); + const data = await service.getFaqBySubjectArray(instance.instanceId, 'faq', undefined); + await min.conversationalService.sendEvent(min, step, 'play', { + playerType: 'bullet', + data: data.slice(0, 10) + }); + } // Saves session user (persisted GuaribasUser is inside). diff --git a/packages/core.gbapp/services/GBSSR.ts b/packages/core.gbapp/services/GBSSR.ts new file mode 100644 index 00000000..7bae28b5 --- /dev/null +++ b/packages/core.gbapp/services/GBSSR.ts @@ -0,0 +1,324 @@ +/*****************************************************************************\ +| ( )_ _ | +| _ _ _ __ _ _ __ ___ ___ _ _ | ,_)(_) ___ ___ _ | +| ( '_`\ ( '__)/'_` ) /'_ `\/' _ ` _ `\ /'_` )| | | |/',__)/' v `\ /'_`\ | +| | (_) )| | ( (_| |( (_) || ( ) ( ) |( (_| || |_ | |\__, \| (˅) |( (_) ) | +| | ,__/'(_) `\__,_)`\__ |(_) (_) (_)`\__,_)`\__)(_)(____/(_) (_)`\___/' | +| | | ( )_) | | +| (_) \___/' | +| | +| General Bots Copyright (c) Pragmatismo.io. All rights reserved. | +| Licensed under the AGPL-3.0. | +| | +| According to our dual licensing model, this program can be used either | +| under the terms of the GNU Affero General Public License, version 3, | +| or under a proprietary license. | +| | +| The texts of the GNU Affero General Public License with an additional | +| permission and of our proprietary license can be found at and | +| in the LICENSE file you have received along with this program. | +| | +| This program is distributed in the hope that it will be useful, | +| but WITHOUT ANY WARRANTY, without even the implied warranty of | +| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | +| GNU Affero General Public License for more details. | +| | +| "General Bots" is a registered trademark of Pragmatismo.io. | +| The licensing of the program under the AGPLv3 does not imply a | +| trademark license. Therefore any rights, title and interest in | +| our trademarks remain entirely with us. | +| | +\*****************************************************************************/ + +/** + * @fileoverview General Bots SSR support based on https://www.npmjs.com/package/ssr-for-bots. + */ + +'use strict'; + +const Path = require('path'); +const urlJoin = require('url-join'); +const Fs = require('fs'); +const express = require('express'); +const child_process = require('child_process'); +const rimraf = require('rimraf'); +const request = require('request-promise-native'); +import { GBError, GBLog, GBMinInstance, IGBCoreService, IGBDeployer, IGBInstance, IGBPackage } from 'botlib'; +import { CollectionUtil } from 'pragmatismo-io-framework'; +const puppeteer = require('puppeteer'); +import { NextFunction, Request, Response } from "express"; + + +// https://hackernoon.com/tips-and-tricks-for-web-scraping-with-puppeteer-ed391a63d952 +// Dont download all resources, we just need the HTML +// Also, this is huge performance/response time boost +const blockedResourceTypes = [ + "image", + "media", + "font", + "texttrack", + "object", + "beacon", + "csp_report", + "imageset", +]; +// const whitelist = ["document", "script", "xhr", "fetch"]; +const skippedResources = [ + "quantserve", + "adzerk", + "doubleclick", + "adition", + "exelator", + "sharethrough", + "cdn.api.twitter", + "google-analytics", + "googletagmanager", + "google", + "fontawesome", + "facebook", + "analytics", + "optimizely", + "clicktale", + "mixpanel", + "zedo", + "clicksor", + "tiqcdn", +]; + +const RENDER_CACHE = new Map(); + +async function recursiveFindInFrames(inputFrame, selector) { + const frames = inputFrame.childFrames(); + const results = await Promise.all( + frames.map(async frame => { + const el = await frame.$(selector); + if (el) return el; + if (frame.childFrames().length > 0) { + return await recursiveFindInFrames(frame, selector); + } + return null; + }) + ); + return results.find(Boolean); +} + +async function findInFrames(page, selector) { + const result = await recursiveFindInFrames(page.mainFrame(), selector); + if (!result) { + throw new Error( + `The selector \`${selector}\` could not be found in any child frames.` + ); + } + return result; +} + + + +/** + * https://developers.google.com/web/tools/puppeteer/articles/ssr#reuseinstance + * @param {string} url URL to prerender. + */ +async function ssr(url: string, useCache: boolean, cacheRefreshRate: number) { + if (RENDER_CACHE.has(url) && useCache) { + const cached = RENDER_CACHE.get(url); + if ( + Date.now() - cached.renderedAt > cacheRefreshRate && + !(cacheRefreshRate <= 0) + ) { + RENDER_CACHE.delete(url); + } else { + return { + html: cached.html, + status: 200, + }; + } + } + const browser = await puppeteer.launch({ + headless: false, + args: ["--single-process", "--no-zygote", "--no-sandbox", "--disable-features=site-per-process"] + }); + // browserWSEndpoint = await browserT.wsEndpoint(); + // const browser = await puppeteer.connect({ browserWSEndpoint }); + const stylesheetContents = {}; + + try { + const page = await browser.newPage(); + await page.setUserAgent( + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.61 Safari/537.36" + ); + await page.setRequestInterception(true); + page.on("request", (request) => { + const requestUrl = request.url().split("?")[0].split("#")[0]; + if ( + blockedResourceTypes.indexOf(request.resourceType()) !== -1 || + skippedResources.some((resource) => requestUrl.indexOf(resource) !== -1) + ) { + request.abort(); + } else { + request.continue(); + } + }); + + page.on("response", async (resp) => { + const responseUrl = resp.url(); + const sameOrigin = new URL(responseUrl).origin === new URL(url).origin; + const isStylesheet = resp.request().resourceType() === "stylesheet"; + if (sameOrigin && isStylesheet) { + stylesheetContents[responseUrl] = await resp.text(); + } + }); + + const response = await page.goto(url, { + timeout: 120000, + waitUntil: "networkidle0", + }); + + const sleep = ms => { + return new Promise(resolve => { + setTimeout(resolve, ms); + }); + }; + await sleep(45000); + + // Inject on page to relative resources load properly. + await page.evaluate((url) => { + const base = document.createElement("base"); + base.href = url; + // Add to top of head, before all other resources. + document.head.prepend(base); + }, url); + + // Remove scripts and html imports. They've already executed. + await page.evaluate(() => { + const elements = document.querySelectorAll('script, link[rel="import"]'); + elements.forEach((e) => { + e.remove(); + }); + }); + + // Replace stylesheets in the page with their equivalent