fix(KBService): standardize formatting and improve code readability
All checks were successful
GBCI / build (push) Successful in 1m1s
All checks were successful
GBCI / build (push) Successful in 1m1s
This commit is contained in:
parent
53710da2a7
commit
e2382ccddc
1 changed files with 47 additions and 46 deletions
|
|
@ -970,7 +970,7 @@ export class KBService implements IGBKBService {
|
||||||
|
|
||||||
const childLinks = [];
|
const childLinks = [];
|
||||||
for (const link of filteredLinks) {
|
for (const link of filteredLinks) {
|
||||||
const links = await this.crawl(min, link,
|
const links = await this.crawl(min, link,
|
||||||
visited, depth + 1, maxDepth, page, websiteIgnoreUrls, maxDocuments);
|
visited, depth + 1, maxDepth, page, websiteIgnoreUrls, maxDocuments);
|
||||||
if (links) {
|
if (links) {
|
||||||
childLinks.push(...links);
|
childLinks.push(...links);
|
||||||
|
|
@ -1021,16 +1021,16 @@ export class KBService implements IGBKBService {
|
||||||
|
|
||||||
async getFreshPage(browser, url) {
|
async getFreshPage(browser, url) {
|
||||||
if (!browser || browser.isConnected() === false) {
|
if (!browser || browser.isConnected() === false) {
|
||||||
let args = [
|
let args = [
|
||||||
'--check-for-update-interval=2592000',
|
'--check-for-update-interval=2592000',
|
||||||
'--disable-accelerated-2d-canvas',
|
'--disable-accelerated-2d-canvas',
|
||||||
'--disable-dev-shm-usage',
|
'--disable-dev-shm-usage',
|
||||||
'--disable-features=site-per-process',
|
'--disable-features=site-per-process',
|
||||||
'--disable-gpu',
|
'--disable-gpu',
|
||||||
'--no-first-run',
|
'--no-first-run',
|
||||||
'--no-sandbox',
|
'--no-sandbox',
|
||||||
'--no-default-browser-check'
|
'--no-default-browser-check'
|
||||||
];
|
];
|
||||||
|
|
||||||
|
|
||||||
browser = await puppeteer.launch({ headless: false, args: args });
|
browser = await puppeteer.launch({ headless: false, args: args });
|
||||||
|
|
@ -1076,18 +1076,18 @@ export class KBService implements IGBKBService {
|
||||||
let packagePath = GBUtil.getGBAIPath(min.botId, `gbot`);
|
let packagePath = GBUtil.getGBAIPath(min.botId, `gbot`);
|
||||||
const directoryPath = path.join(process.env.PWD, 'work', packagePath, 'Website');
|
const directoryPath = path.join(process.env.PWD, 'work', packagePath, 'Website');
|
||||||
fs.rm(directoryPath, { recursive: true, force: true });
|
fs.rm(directoryPath, { recursive: true, force: true });
|
||||||
let args = [
|
let args = [
|
||||||
'--check-for-update-interval=2592000',
|
'--check-for-update-interval=2592000',
|
||||||
'--disable-accelerated-2d-canvas',
|
'--disable-accelerated-2d-canvas',
|
||||||
'--disable-dev-shm-usage',
|
'--disable-dev-shm-usage',
|
||||||
'--disable-features=site-per-process',
|
'--disable-features=site-per-process',
|
||||||
'--disable-gpu',
|
'--disable-gpu',
|
||||||
'--no-first-run',
|
'--no-first-run',
|
||||||
'--no-sandbox',
|
'--no-sandbox',
|
||||||
'--no-default-browser-check'
|
'--no-default-browser-check'
|
||||||
];
|
];
|
||||||
|
|
||||||
let browser = await puppeteer.launch({ headless: false , args});
|
let browser = await puppeteer.launch({ headless: false, args });
|
||||||
const page = await this.getFreshPage(browser, website);
|
const page = await this.getFreshPage(browser, website);
|
||||||
|
|
||||||
let logo = await this.getLogoByPage(min, page);
|
let logo = await this.getLogoByPage(min, page);
|
||||||
|
|
@ -1239,42 +1239,43 @@ export class KBService implements IGBKBService {
|
||||||
const gbdata = path.join(process.env.PWD, 'work', GBUtil.getGBAIPath(min.botId, 'gbdata'));
|
const gbdata = path.join(process.env.PWD, 'work', GBUtil.getGBAIPath(min.botId, 'gbdata'));
|
||||||
files = files.concat(await walkPromise(gbdata));
|
files = files.concat(await walkPromise(gbdata));
|
||||||
|
|
||||||
|
|
||||||
if (files[0]) {
|
if (files[0]) {
|
||||||
|
|
||||||
|
files = files.filter(p => { return p });
|
||||||
shouldSave = true;
|
shouldSave = true;
|
||||||
GBLogEx.info(min, `Add embeddings from packages, ${files.length} files being processed...`);
|
GBLogEx.info(min, `Add embeddings from packages, ${files.length} files being processed...`);
|
||||||
await CollectionUtil.asyncForEach(files, async file => {
|
await CollectionUtil.asyncForEach(files, async file => {
|
||||||
|
|
||||||
if (file.root) {
|
if (file.root) {
|
||||||
|
|
||||||
let content = null;
|
let content = null;
|
||||||
let filePath = path.join(file.root, file.name);
|
let filePath = path.join(file.root, file.name);
|
||||||
try {
|
try {
|
||||||
|
|
||||||
if (file.name.endsWith('.csv') || file.name.endsWith('.md')
|
if (file.name.endsWith('.csv') || file.name.endsWith('.md')
|
||||||
|| file.name.endsWith('.pdf') || file.name.endsWith('.docx') ||
|
|| file.name.endsWith('.pdf') || file.name.endsWith('.docx') ||
|
||||||
file.name.endsWith('.epub') ||file.name.endsWith('.txt')
|
file.name.endsWith('.epub') || file.name.endsWith('.txt')
|
||||||
|
|
||||||
) {
|
|
||||||
|
|
||||||
if (file.name.endsWith('.csv')) {
|
) {
|
||||||
// Read first 1000 lines of CSV file
|
|
||||||
const csvContent = await fs.readFile(filePath, 'utf8');
|
if (file.name.endsWith('.csv')) {
|
||||||
const lines = csvContent.split('\n').slice(0, 200).join('\n');
|
// Read first 1000 lines of CSV file
|
||||||
await fs.writeFile(filePath, lines, 'utf8');
|
const csvContent = await fs.readFile(filePath, 'utf8');
|
||||||
content = lines;
|
const lines = csvContent.split('\n').slice(0, 200).join('\n');
|
||||||
|
await fs.writeFile(filePath, lines, 'utf8');
|
||||||
|
content = lines;
|
||||||
|
}
|
||||||
|
|
||||||
|
const document = await this.loadAndSplitFile(filePath);
|
||||||
|
// TODO: Add full filename.
|
||||||
|
const flattenedDocuments = document.reduce((acc, val) => acc.concat(val), []);
|
||||||
|
await min['vectorStore'].addDocuments(flattenedDocuments);
|
||||||
|
GBLogEx.info(min, `Added ${filePath} to vector store.`);
|
||||||
}
|
}
|
||||||
|
} catch (error) {
|
||||||
const document = await this.loadAndSplitFile(filePath);
|
GBLogEx.info(min, `Ignore processing of ${file}. ${GBUtil.toYAML(error)}`);
|
||||||
// TODO: Add full filename.
|
|
||||||
const flattenedDocuments = document.reduce((acc, val) => acc.concat(val), []);
|
|
||||||
await min['vectorStore'].addDocuments(flattenedDocuments);
|
|
||||||
GBLogEx.info(min, `Added ${filePath} to vector store.`);
|
|
||||||
}
|
}
|
||||||
} catch (error) {
|
|
||||||
GBLogEx.info(min, `Ignore processing of ${file}. ${GBUtil.toYAML(error)}`);
|
|
||||||
}
|
}
|
||||||
}
|
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
if (shouldSave && min['vectorStore']) {
|
if (shouldSave && min['vectorStore']) {
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue