diff --git a/packages/basic.gblib/services/ChartServices.ts b/packages/basic.gblib/services/ChartServices.ts index c2a2280a..25d6f686 100644 --- a/packages/basic.gblib/services/ChartServices.ts +++ b/packages/basic.gblib/services/ChartServices.ts @@ -32,7 +32,7 @@ 'use strict'; -import { createBrowser } from '../../core.gbapp/services/GBSSR.js'; +import { GBSSR }from '../../core.gbapp/services/GBSSR.js'; export class ChartServices { /** @@ -41,7 +41,7 @@ export class ChartServices { * @param {string} path screenshot image full path with file name */ public static async screenshot (args, path) { - const browser = await createBrowser(null); + const browser = await GBSSR.createBrowser(null); const page = await browser.newPage(); // load billboard.js assets from CDN. diff --git a/packages/basic.gblib/services/SystemKeywords.ts b/packages/basic.gblib/services/SystemKeywords.ts index 786f4d42..20d7ab72 100644 --- a/packages/basic.gblib/services/SystemKeywords.ts +++ b/packages/basic.gblib/services/SystemKeywords.ts @@ -39,7 +39,7 @@ import { DialogKeywords } from './DialogKeywords.js'; import { GBServer } from '../../../src/app.js'; import { GBVMService } from './GBVMService.js'; import Fs from 'fs'; -import { createBrowser } from '../../core.gbapp/services/GBSSR.js'; +import { GBSSR }from '../../core.gbapp/services/GBSSR.js'; import urlJoin from 'url-join'; import Excel from 'exceljs'; import { TwitterApi } from 'twitter-api-v2'; @@ -257,7 +257,7 @@ export class SystemKeywords { const { min, user } = await DialogKeywords.getProcessInfo(pid); const gbaiName = `${min.botId}.gbai`; - const browser = await createBrowser(null); + const browser = await GBSSR.createBrowser(null); const page = await browser.newPage(); // Includes the associated CSS related to current theme. diff --git a/packages/basic.gblib/services/WebAutomationServices.ts b/packages/basic.gblib/services/WebAutomationServices.ts index 0b709c33..c9aa7b66 100644 --- a/packages/basic.gblib/services/WebAutomationServices.ts +++ b/packages/basic.gblib/services/WebAutomationServices.ts @@ -32,19 +32,19 @@ 'use strict'; -import { GBLog, GBMinInstance } from 'botlib'; -import { GBServer } from '../../../src/app.js'; -import { GBAdminService } from '../../admin.gbapp/services/GBAdminService.js'; -import { createBrowser } from '../../core.gbapp/services/GBSSR.js'; -import { GuaribasUser } from '../../security.gbapp/models/index.js'; -import { DialogKeywords } from './DialogKeywords.js'; - -import { GBDeployer } from '../../core.gbapp/services/GBDeployer.js'; import urlJoin from 'url-join'; import Fs from 'fs'; import Path from 'path'; import url from 'url'; -import { Mutex, Semaphore, withTimeout } from 'async-mutex'; + +import { GBLog, GBMinInstance } from 'botlib'; +import { GBServer } from '../../../src/app.js'; +import { GBAdminService } from '../../admin.gbapp/services/GBAdminService.js'; +import { GBSSR }from '../../core.gbapp/services/GBSSR.js'; +import { GuaribasUser } from '../../security.gbapp/models/index.js'; +import { DialogKeywords } from './DialogKeywords.js'; +import { GBDeployer } from '../../core.gbapp/services/GBDeployer.js'; +import { Mutex } from 'async-mutex'; import { GBLogEx } from '../../core.gbapp/services/GBLogEx.js'; /** @@ -172,16 +172,17 @@ export class WebAutomationServices { let browser; if (!page) { - browser = await createBrowser(null); + browser = await GBSSR.createBrowser(null); page = (await browser.pages())[0]; if (username || password) { await page.authenticate({ pid, username: username, password: password }); } } - // There is no session yet, + // There is no session yet. if (!session && sessionKind === 'AS') { + // A new web session is being created. handle = WebAutomationServices.cyrb53(this.min.botId + url); @@ -459,4 +460,20 @@ export class WebAutomationServices { return file; } + + private async recursiveFindInFrames (inputFrame, selector) { + const frames = inputFrame.childFrames(); + const results = await Promise.all( + frames.map(async frame => { + const el = await frame.$(selector); + if (el) return el; + if (frame.childFrames().length > 0) { + return await this.recursiveFindInFrames(frame, selector); + } + return null; + }) + ); + return results.find(Boolean); + } + } diff --git a/packages/core.gbapp/services/GBMinService.ts b/packages/core.gbapp/services/GBMinService.ts index 09f9c3d5..7a77c0a6 100644 --- a/packages/core.gbapp/services/GBMinService.ts +++ b/packages/core.gbapp/services/GBMinService.ts @@ -87,6 +87,7 @@ import { GoogleChatDirectLine } from '../../google-chat.gblib/services/GoogleCha import { SystemKeywords } from '../../basic.gblib/services/SystemKeywords.js'; import * as nlp from 'node-nlp'; import Path from 'path'; +import { GBSSR } from './GBSSR.js'; /** * Minimal service layer for a bot and encapsulation of BOT Framework calls. @@ -236,6 +237,7 @@ export class GBMinService { const url = `/api/messages/${botId}`; removeRoute(GBServer.globals.server, url); + const uiUrl = `/${botId}`; removeRoute(GBServer.globals.server, uiUrl); @@ -295,6 +297,10 @@ export class GBMinService { if (!Fs.existsSync(dir)) { mkdirp.sync(dir); } + dir = `work/${min.botId}.gbai/${min.botId}.gbui`; + if (!Fs.existsSync(dir)) { + mkdirp.sync(dir); + } // Loads Named Entity data for this bot. @@ -370,15 +376,17 @@ export class GBMinService { if (process.env.DISABLE_WEB !== 'true') { const uiUrl = `/${instance.botId}`; + let staticHandler = express.static(urlJoin(GBDeployer.deployFolder, GBMinService.uiPackage, 'build')); + + GBServer.globals.server.get(uiUrl, async (req, res, next)=> { + await GBSSR.ssrFilter(req, res, staticHandler as any); + }); const uiUrlAlt = `/${instance.activationCode}`; - GBServer.globals.server.use( - uiUrl, - express.static(urlJoin(GBDeployer.deployFolder, GBMinService.uiPackage, 'build')) - ); GBServer.globals.server.use( uiUrlAlt, express.static(urlJoin(GBDeployer.deployFolder, GBMinService.uiPackage, 'build')) ); + const domain = min.core.getParam(min.instance, 'Domain', null); if (domain) { GBServer.globals.server.use( diff --git a/packages/core.gbapp/services/GBSSR.ts b/packages/core.gbapp/services/GBSSR.ts index 8f361f13..b353b10d 100644 --- a/packages/core.gbapp/services/GBSSR.ts +++ b/packages/core.gbapp/services/GBSSR.ts @@ -36,297 +36,240 @@ 'use strict'; - -import {createRequire} from "module"; -const require = createRequire(import.meta.url); - -const puppeteer = require('puppeteer-extra'); -const hidden = require('puppeteer-extra-plugin-stealth') - -// require executablePath from puppeteer -const {executablePath} = require('puppeteer') +import Path from 'path'; import Fs from 'fs'; - - import { NextFunction, Request, Response } from 'express'; import urljoin from 'url-join'; +import { GBMinInstance } from 'botlib'; +import { GBServer } from '../../../src/app.js'; +import { GBLogEx } from './GBLogEx.js'; +import { createRequire } from 'module'; +const require = createRequire(import.meta.url); +const puppeteer = require('puppeteer-extra'); +const hidden = require('puppeteer-extra-plugin-stealth'); +const { executablePath } = require('puppeteer'); -// https://hackernoon.com/tips-and-tricks-for-web-scraping-with-puppeteer-ed391a63d952 -// Dont download all resources, we just need the HTML -// Also, this is huge performance/response time boost -const blockedResourceTypes = ['image', 'media', 'font', 'texttrack', 'object', 'beacon', 'csp_report', 'imageset']; -// const whitelist = ["document", "script", "xhr", "fetch"]; -const skippedResources = [ - 'quantserve', - 'adzerk', - 'doubleclick', - 'adition', - 'exelator', - 'sharethrough', - 'cdn.api.twitter', - 'google-analytics', - 'googletagmanager', - 'google', - 'fontawesome', - 'facebook', - 'analytics', - 'optimizely', - 'clicktale', - 'mixpanel', - 'zedo', - 'clicksor', - 'tiqcdn' -]; +export class GBSSR { + // https://hackernoon.com/tips-and-tricks-for-web-scraping-with-puppeteer-ed391a63d952 + // Dont download all resources, we just need the HTML + // Also, this is huge performance/response time boost + private blockedResourceTypes = ['image', 'media', 'font', 'texttrack', 'object', 'beacon', 'csp_report', 'imageset']; -const RENDER_CACHE = new Map(); - -async function createBrowser (profilePath): Promise { - let args = [ - '--check-for-update-interval=2592000', - '--disable-accelerated-2d-canvas', - '--disable-dev-shm-usage', - '--disable-features=site-per-process', - '--disable-gpu', - '--no-first-run', - '--no-default-browser-check' + // const whitelist = ["document", "script", "xhr", "fetch"]; + private skippedResources = [ + 'quantserve', + 'adzerk', + 'doubleclick', + 'adition', + 'exelator', + 'sharethrough', + 'cdn.api.twitter', + 'google-analytics', + 'googletagmanager', + 'google', + 'fontawesome', + 'facebook', + 'analytics', + 'optimizely', + 'clicktale', + 'mixpanel', + 'zedo', + 'clicksor', + 'tiqcdn' ]; - if (profilePath) { - args.push(`--user-data-dir=${profilePath}`); + public static async createBrowser(profilePath): Promise { + let args = [ + '--check-for-update-interval=2592000', + '--disable-accelerated-2d-canvas', + '--disable-dev-shm-usage', + '--disable-features=site-per-process', + '--disable-gpu', + '--no-first-run', + '--no-default-browser-check' + ]; - const preferences = urljoin(profilePath, 'Default', 'Preferences'); - if (Fs.existsSync(preferences)) { - const file = Fs.readFileSync(preferences, 'utf8'); - const data = JSON.parse(file); - data['profile']['exit_type'] = 'none'; - Fs.writeFileSync(preferences, JSON.stringify(data)); + if (profilePath) { + args.push(`--user-data-dir=${profilePath}`); + + const preferences = urljoin(profilePath, 'Default', 'Preferences'); + if (Fs.existsSync(preferences)) { + const file = Fs.readFileSync(preferences, 'utf8'); + const data = JSON.parse(file); + data['profile']['exit_type'] = 'none'; + Fs.writeFileSync(preferences, JSON.stringify(data)); + } } + puppeteer.use(hidden()); + const browser = await puppeteer.launch({ + args: args, + ignoreHTTPSErrors: true, + headless: false, + defaultViewport: null, + executablePath: executablePath(), + ignoreDefaultArgs: ['--enable-automation', '--enable-blink-features=IdleDetection'] + }); + return browser; } - puppeteer.use(hidden()) - const browser = await puppeteer.launch({ - args: args, - ignoreHTTPSErrors: true, - headless: false, - defaultViewport: null, - executablePath:executablePath(), - ignoreDefaultArgs: ['--enable-automation', '--enable-blink-features=IdleDetection'] - }); - return browser; -} -async function recursiveFindInFrames (inputFrame, selector) { - const frames = inputFrame.childFrames(); - const results = await Promise.all( - frames.map(async frame => { - const el = await frame.$(selector); - if (el) return el; - if (frame.childFrames().length > 0) { - return await recursiveFindInFrames(frame, selector); - } - return null; - }) - ); - return results.find(Boolean); -} + /** + * Return the HTML of bot default.gbui. + */ + public async getHTML(min: GBMinInstance) { + const url = urljoin(GBServer.globals.publicAddress, min.botId); + const browser = await GBSSR.createBrowser(null); + const stylesheetContents = {}; -/** - * https://developers.google.com/web/tools/puppeteer/articles/ssr#reuseinstance - * @param {string} url URL to prerender. - */ -async function ssr (url: string, useCache: boolean, cacheRefreshRate: number) { - if (RENDER_CACHE.has(url) && useCache) { - const cached = RENDER_CACHE.get(url); - if (Date.now() - cached.renderedAt > cacheRefreshRate && !(cacheRefreshRate <= 0)) { - RENDER_CACHE.delete(url); - } else { - return { - html: cached.html, - status: 200 - }; - } - } - const browser = await createBrowser(null); - const stylesheetContents = {}; - - try { - const page = await browser.newPage(); - await page.setUserAgent( - 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.61 Safari/537.36' - ); - await page.setRequestInterception(true); - page.on('request', request => { - const requestUrl = request - .url() - .split('?')[0] - .split('#')[0]; - if ( - blockedResourceTypes.indexOf(request.resourceType()) !== -1 || - skippedResources.some(resource => requestUrl.indexOf(resource) !== -1) - ) { - request.abort(); - } else { - request.continue(); - } - }); - - page.on('response', async resp => { - const responseUrl = resp.url(); - const sameOrigin = new URL(responseUrl).origin === new URL(url).origin; - const isStylesheet = resp.request().resourceType() === 'stylesheet'; - if (sameOrigin && isStylesheet) { - stylesheetContents[responseUrl] = await resp.text(); - } - }); - - const response = await page.goto(url, { - timeout: 120000, - waitUntil: 'networkidle0' - }); - - const sleep = ms => { - return new Promise(resolve => { - setTimeout(resolve, ms); + try { + const page = await browser.newPage(); + await page.setUserAgent( + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.61 Safari/537.36' + ); + await page.setRequestInterception(true); + page.on('request', request => { + const requestUrl = request.url().split('?')[0].split('#')[0]; + if ( + this.blockedResourceTypes.indexOf(request.resourceType()) !== -1 || + this.skippedResources.some(resource => requestUrl.indexOf(resource) !== -1) + ) { + request.abort(); + } else { + request.continue(); + } }); - }; - await sleep(45000); - // Inject on page to relative resources load properly. - await page.evaluate(url => { - const base = document.createElement('base'); - base.href = url; - // Add to top of head, before all other resources. - document.head.prepend(base); - }, url); - - // Remove scripts and html imports. They've already executed. - await page.evaluate(() => { - const elements = document.querySelectorAll('script, link[rel="import"]'); - elements.forEach(e => { - e.remove(); + page.on('response', async resp => { + const responseUrl = resp.url(); + const sameOrigin = new URL(responseUrl).origin === new URL(url).origin; + const isStylesheet = resp.request().resourceType() === 'stylesheet'; + if (sameOrigin && isStylesheet) { + stylesheetContents[responseUrl] = await resp.text(); + } }); - }); - // Replace stylesheets in the page with their equivalent