From d694038532bfef2115d6a6179c10c309e010ffb6 Mon Sep 17 00:00:00 2001 From: Rodrigo Rodriguez Date: Sun, 15 Dec 2024 16:27:40 -0300 Subject: [PATCH] fix(kb.gbapp): HTML crawler improved. --- packages/kb.gbapp/services/KBService.ts | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/packages/kb.gbapp/services/KBService.ts b/packages/kb.gbapp/services/KBService.ts index b2d55007..2145b687 100644 --- a/packages/kb.gbapp/services/KBService.ts +++ b/packages/kb.gbapp/services/KBService.ts @@ -1019,17 +1019,20 @@ export class KBService implements IGBKBService { } async getFreshPage(browser, url) { - try { - if (!browser || browser.isConnected() === false) { - browser = await puppeteer.launch({ headless: false }); // Change headless to true if you don't want to see the browser window - } - const page = await browser.newPage(); - await page.goto(url); - return page; - } catch (error) { - console.error('An error occurred while getting fresh page:', error); - throw error; + if (!browser || browser.isConnected() === false) { + browser = await puppeteer.launch({ headless: false }); } + const page = await browser.newPage(); + try { + await page.goto(url, { + waitUntil: 'networkidle0', // Wait until network is idle + timeout: 30000 // 30 second timeout + }); + } catch (err) { + // Ignore timeout/navigation errors + } + + return page; } /**