new(all): Auto import for logo, colors and website content.
This commit is contained in:
parent
7e6b2807a1
commit
c374f27567
1 changed files with 16 additions and 7 deletions
|
@ -879,7 +879,7 @@ export class KBService implements IGBKBService {
|
|||
return null;
|
||||
}
|
||||
|
||||
async crawl(min, url: string, visited: Set<string>, depth: number, maxDepth: number, page: Page): Promise<string[]> {
|
||||
async crawl(min, url: string, visited: Set<string>, depth: number, maxDepth: number, page: Page, websiteIgnoreUrls): Promise<string[]> {
|
||||
try {
|
||||
if (
|
||||
depth > maxDepth ||
|
||||
|
@ -904,10 +904,18 @@ export class KBService implements IGBKBService {
|
|||
return [];
|
||||
}
|
||||
const currentDomain = new URL(page.url()).hostname;
|
||||
let links = await page.evaluate(currentDomain => {
|
||||
|
||||
|
||||
let links = await page.evaluate(({currentDomain, websiteIgnoreUrls}) => {
|
||||
const anchors = Array.from(document.querySelectorAll('a')).filter(p => {
|
||||
try {
|
||||
return currentDomain == new URL(p.href).hostname;
|
||||
|
||||
// Check if urlToCheck contains any of the ignored URLs
|
||||
|
||||
const isIgnored = websiteIgnoreUrls.split(";").some(ignoredUrl => p.href.includes(ignoredUrl));
|
||||
|
||||
return !isIgnored && currentDomain == new URL(p.href).hostname;
|
||||
|
||||
} catch (err) {
|
||||
return false;
|
||||
}
|
||||
|
@ -916,7 +924,7 @@ export class KBService implements IGBKBService {
|
|||
return anchors.map(anchor => {
|
||||
return anchor.href.replace(/#.*/, '');
|
||||
});
|
||||
}, currentDomain);
|
||||
}, {currentDomain, websiteIgnoreUrls});
|
||||
|
||||
if (!Array.isArray(links)) {
|
||||
links = [];
|
||||
|
@ -1009,7 +1017,8 @@ export class KBService implements IGBKBService {
|
|||
|
||||
|
||||
const website = min.core.getParam<string>(min.instance, 'Website', null);
|
||||
|
||||
const websiteIgnoreUrls = min.core.getParam<string>(min.instance, 'Website Ignore URLs', null);
|
||||
|
||||
if (website) {
|
||||
|
||||
Fs.rmSync(min['vectorStorePath'], { recursive: true, force: true });
|
||||
|
@ -1063,10 +1072,10 @@ export class KBService implements IGBKBService {
|
|||
|
||||
page.setDefaultTimeout(15000);
|
||||
page.setCacheEnabled(false);
|
||||
|
||||
|
||||
const maxDepth = 2; // Maximum depth of recursion
|
||||
const visited = new Set<string>();
|
||||
files = files.concat(await this.crawl(min, website, visited, 0, maxDepth, page));
|
||||
files = files.concat(await this.crawl(min, website, visited, 0, maxDepth, page, websiteIgnoreUrls));
|
||||
|
||||
await browser.close();
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue