fix(KBService): update file processing logic to include additional file types
All checks were successful
GBCI / build (push) Successful in 1m13s
All checks were successful
GBCI / build (push) Successful in 1m13s
This commit is contained in:
parent
0f0f8d610e
commit
4c28404578
1 changed files with 21 additions and 20 deletions
|
|
@ -1179,7 +1179,7 @@ export class KBService implements IGBKBService {
|
||||||
files = files.concat(await this.crawl(min, website, visited, 0, maxDepth, page, websiteIgnoreUrls, maxDocuments));
|
files = files.concat(await this.crawl(min, website, visited, 0, maxDepth, page, websiteIgnoreUrls, maxDocuments));
|
||||||
|
|
||||||
await browser.close();
|
await browser.close();
|
||||||
|
|
||||||
|
|
||||||
GBLogEx.info(min, `Vectorizing ${files.length} file(s)...`);
|
GBLogEx.info(min, `Vectorizing ${files.length} file(s)...`);
|
||||||
|
|
||||||
|
|
@ -1226,26 +1226,27 @@ export class KBService implements IGBKBService {
|
||||||
let skip = false;
|
let skip = false;
|
||||||
try {
|
try {
|
||||||
|
|
||||||
if (file.root.endsWith('.gbdata'))
|
if (file.name.endsWith('.csv') || file.name.endsWith('.md')
|
||||||
{
|
|| file.name.endsWith('.pdf') || file.name.endsWith('.docx') ||
|
||||||
skip = true;
|
file.name.endsWith('.epub') ||file.name.endsWith('.txt')
|
||||||
if (file.name.endsWith('.csv')) {
|
|
||||||
skip = false;
|
) {
|
||||||
// Read first 1000 lines of CSV file
|
|
||||||
const csvContent = await fs.readFile(filePath, 'utf8');
|
|
||||||
const lines = csvContent.split('\n').slice(0, 200).join('\n');
|
|
||||||
await fs.writeFile(filePath, lines, 'utf8');
|
|
||||||
content = lines;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (!skip){
|
|
||||||
|
|
||||||
const document = await this.loadAndSplitFile(filePath);
|
if (file.name.endsWith('.csv')) {
|
||||||
// TODO: Add full filename.
|
skip = false;
|
||||||
const flattenedDocuments = document.reduce((acc, val) => acc.concat(val), []);
|
// Read first 1000 lines of CSV file
|
||||||
await min['vectorStore'].addDocuments(flattenedDocuments);
|
const csvContent = await fs.readFile(filePath, 'utf8');
|
||||||
GBLogEx.info(min, `Added ${filePath} to vector store.`);
|
const lines = csvContent.split('\n').slice(0, 200).join('\n');
|
||||||
}
|
await fs.writeFile(filePath, lines, 'utf8');
|
||||||
|
content = lines;
|
||||||
|
}
|
||||||
|
|
||||||
|
const document = await this.loadAndSplitFile(filePath);
|
||||||
|
// TODO: Add full filename.
|
||||||
|
const flattenedDocuments = document.reduce((acc, val) => acc.concat(val), []);
|
||||||
|
await min['vectorStore'].addDocuments(flattenedDocuments);
|
||||||
|
GBLogEx.info(min, `Added ${filePath} to vector store.`);
|
||||||
|
}
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
GBLogEx.info(min, `Ignore processing of ${file}. ${GBUtil.toYAML(error)}`);
|
GBLogEx.info(min, `Ignore processing of ${file}. ${GBUtil.toYAML(error)}`);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue