From 5cd29131faada68367f1222e62d697f62a2fd6a0 Mon Sep 17 00:00:00 2001 From: Rodrigo Rodriguez Date: Sat, 30 Nov 2024 11:58:40 -0300 Subject: [PATCH] fix(llm.gblib): Fix in doc. publishing. --- packages/kb.gbapp/services/KBService.ts | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/packages/kb.gbapp/services/KBService.ts b/packages/kb.gbapp/services/KBService.ts index 1cc43063..80d415e4 100644 --- a/packages/kb.gbapp/services/KBService.ts +++ b/packages/kb.gbapp/services/KBService.ts @@ -895,10 +895,11 @@ export class KBService implements IGBKBService { depth: number, maxDepth: number, page: Page, - websiteIgnoreUrls + websiteIgnoreUrls, maxDocuments: number ): Promise { try { if ( + (maxDocuments > visited.size) || (depth > maxDepth && !url.endsWith('pdf')) || visited.has(url) || url.endsWith('.jpg') || @@ -1040,8 +1041,9 @@ export class KBService implements IGBKBService { let website = min.core.getParam(min.instance, 'Website', null); const maxDepth = min.core.getParam(min.instance, 'Website Depth', 1); + const maxDocuments = min.core.getParam(min.instance, 'Website Max Documents', 1); const websiteIgnoreUrls = min.core.getParam<[]>(min.instance, 'Website Ignore URLs', null); - GBLogEx.info(min, `Website: ${website}, Max Depth: ${maxDepth}, Ignore URLs: ${websiteIgnoreUrls}`); + GBLogEx.info(min, `Website: ${website}, Max Depth: ${maxDepth}, Website Max Documents${maxDocuments}, Ignore URLs: ${websiteIgnoreUrls}`); let shouldSave = false; @@ -1128,7 +1130,7 @@ export class KBService implements IGBKBService { page.setCacheEnabled(false); const visited = new Set(); - files = files.concat(await this.crawl(min, website, visited, 0, maxDepth, page, websiteIgnoreUrls)); + files = files.concat(await this.crawl(min, website, visited, 0, maxDepth, page, websiteIgnoreUrls, maxDocuments)); await browser.close();