new(all): Auto import for logo, colors and website content.

This commit is contained in:
Rodrigo Rodriguez 2024-05-22 13:21:29 -03:00
parent 7e6b2807a1
commit c374f27567

View file

@ -879,7 +879,7 @@ export class KBService implements IGBKBService {
return null; return null;
} }
async crawl(min, url: string, visited: Set<string>, depth: number, maxDepth: number, page: Page): Promise<string[]> { async crawl(min, url: string, visited: Set<string>, depth: number, maxDepth: number, page: Page, websiteIgnoreUrls): Promise<string[]> {
try { try {
if ( if (
depth > maxDepth || depth > maxDepth ||
@ -904,10 +904,18 @@ export class KBService implements IGBKBService {
return []; return [];
} }
const currentDomain = new URL(page.url()).hostname; const currentDomain = new URL(page.url()).hostname;
let links = await page.evaluate(currentDomain => {
let links = await page.evaluate(({currentDomain, websiteIgnoreUrls}) => {
const anchors = Array.from(document.querySelectorAll('a')).filter(p => { const anchors = Array.from(document.querySelectorAll('a')).filter(p => {
try { try {
return currentDomain == new URL(p.href).hostname;
// Check if urlToCheck contains any of the ignored URLs
const isIgnored = websiteIgnoreUrls.split(";").some(ignoredUrl => p.href.includes(ignoredUrl));
return !isIgnored && currentDomain == new URL(p.href).hostname;
} catch (err) { } catch (err) {
return false; return false;
} }
@ -916,7 +924,7 @@ export class KBService implements IGBKBService {
return anchors.map(anchor => { return anchors.map(anchor => {
return anchor.href.replace(/#.*/, ''); return anchor.href.replace(/#.*/, '');
}); });
}, currentDomain); }, {currentDomain, websiteIgnoreUrls});
if (!Array.isArray(links)) { if (!Array.isArray(links)) {
links = []; links = [];
@ -1009,6 +1017,7 @@ export class KBService implements IGBKBService {
const website = min.core.getParam<string>(min.instance, 'Website', null); const website = min.core.getParam<string>(min.instance, 'Website', null);
const websiteIgnoreUrls = min.core.getParam<string>(min.instance, 'Website Ignore URLs', null);
if (website) { if (website) {
@ -1066,7 +1075,7 @@ export class KBService implements IGBKBService {
const maxDepth = 2; // Maximum depth of recursion const maxDepth = 2; // Maximum depth of recursion
const visited = new Set<string>(); const visited = new Set<string>();
files = files.concat(await this.crawl(min, website, visited, 0, maxDepth, page)); files = files.concat(await this.crawl(min, website, visited, 0, maxDepth, page, websiteIgnoreUrls));
await browser.close(); await browser.close();