fix(llm.gblib): Fix in doc. publishing.
This commit is contained in:
parent
afc72c5d55
commit
5cd29131fa
1 changed files with 5 additions and 3 deletions
|
@ -895,10 +895,11 @@ export class KBService implements IGBKBService {
|
||||||
depth: number,
|
depth: number,
|
||||||
maxDepth: number,
|
maxDepth: number,
|
||||||
page: Page,
|
page: Page,
|
||||||
websiteIgnoreUrls
|
websiteIgnoreUrls, maxDocuments: number
|
||||||
): Promise<string[]> {
|
): Promise<string[]> {
|
||||||
try {
|
try {
|
||||||
if (
|
if (
|
||||||
|
(maxDocuments > visited.size) ||
|
||||||
(depth > maxDepth && !url.endsWith('pdf')) ||
|
(depth > maxDepth && !url.endsWith('pdf')) ||
|
||||||
visited.has(url) ||
|
visited.has(url) ||
|
||||||
url.endsWith('.jpg') ||
|
url.endsWith('.jpg') ||
|
||||||
|
@ -1040,8 +1041,9 @@ export class KBService implements IGBKBService {
|
||||||
|
|
||||||
let website = min.core.getParam<string>(min.instance, 'Website', null);
|
let website = min.core.getParam<string>(min.instance, 'Website', null);
|
||||||
const maxDepth = min.core.getParam<number>(min.instance, 'Website Depth', 1);
|
const maxDepth = min.core.getParam<number>(min.instance, 'Website Depth', 1);
|
||||||
|
const maxDocuments = min.core.getParam<number>(min.instance, 'Website Max Documents', 1);
|
||||||
const websiteIgnoreUrls = min.core.getParam<[]>(min.instance, 'Website Ignore URLs', null);
|
const websiteIgnoreUrls = min.core.getParam<[]>(min.instance, 'Website Ignore URLs', null);
|
||||||
GBLogEx.info(min, `Website: ${website}, Max Depth: ${maxDepth}, Ignore URLs: ${websiteIgnoreUrls}`);
|
GBLogEx.info(min, `Website: ${website}, Max Depth: ${maxDepth}, Website Max Documents${maxDocuments}, Ignore URLs: ${websiteIgnoreUrls}`);
|
||||||
|
|
||||||
let shouldSave = false;
|
let shouldSave = false;
|
||||||
|
|
||||||
|
@ -1128,7 +1130,7 @@ export class KBService implements IGBKBService {
|
||||||
page.setCacheEnabled(false);
|
page.setCacheEnabled(false);
|
||||||
|
|
||||||
const visited = new Set<string>();
|
const visited = new Set<string>();
|
||||||
files = files.concat(await this.crawl(min, website, visited, 0, maxDepth, page, websiteIgnoreUrls));
|
files = files.concat(await this.crawl(min, website, visited, 0, maxDepth, page, websiteIgnoreUrls, maxDocuments));
|
||||||
|
|
||||||
await browser.close();
|
await browser.close();
|
||||||
|
|
||||||
|
|
Loading…
Add table
Reference in a new issue