fix(KBService): update file processing logic to include additional file types
All checks were successful
GBCI / build (push) Successful in 1m13s
All checks were successful
GBCI / build (push) Successful in 1m13s
This commit is contained in:
parent
0f0f8d610e
commit
4c28404578
1 changed files with 21 additions and 20 deletions
|
@ -1179,7 +1179,7 @@ export class KBService implements IGBKBService {
|
|||
files = files.concat(await this.crawl(min, website, visited, 0, maxDepth, page, websiteIgnoreUrls, maxDocuments));
|
||||
|
||||
await browser.close();
|
||||
|
||||
|
||||
|
||||
GBLogEx.info(min, `Vectorizing ${files.length} file(s)...`);
|
||||
|
||||
|
@ -1226,26 +1226,27 @@ export class KBService implements IGBKBService {
|
|||
let skip = false;
|
||||
try {
|
||||
|
||||
if (file.root.endsWith('.gbdata'))
|
||||
{
|
||||
skip = true;
|
||||
if (file.name.endsWith('.csv')) {
|
||||
skip = false;
|
||||
// Read first 1000 lines of CSV file
|
||||
const csvContent = await fs.readFile(filePath, 'utf8');
|
||||
const lines = csvContent.split('\n').slice(0, 200).join('\n');
|
||||
await fs.writeFile(filePath, lines, 'utf8');
|
||||
content = lines;
|
||||
}
|
||||
}
|
||||
if (!skip){
|
||||
if (file.name.endsWith('.csv') || file.name.endsWith('.md')
|
||||
|| file.name.endsWith('.pdf') || file.name.endsWith('.docx') ||
|
||||
file.name.endsWith('.epub') ||file.name.endsWith('.txt')
|
||||
|
||||
) {
|
||||
|
||||
const document = await this.loadAndSplitFile(filePath);
|
||||
// TODO: Add full filename.
|
||||
const flattenedDocuments = document.reduce((acc, val) => acc.concat(val), []);
|
||||
await min['vectorStore'].addDocuments(flattenedDocuments);
|
||||
GBLogEx.info(min, `Added ${filePath} to vector store.`);
|
||||
}
|
||||
if (file.name.endsWith('.csv')) {
|
||||
skip = false;
|
||||
// Read first 1000 lines of CSV file
|
||||
const csvContent = await fs.readFile(filePath, 'utf8');
|
||||
const lines = csvContent.split('\n').slice(0, 200).join('\n');
|
||||
await fs.writeFile(filePath, lines, 'utf8');
|
||||
content = lines;
|
||||
}
|
||||
|
||||
const document = await this.loadAndSplitFile(filePath);
|
||||
// TODO: Add full filename.
|
||||
const flattenedDocuments = document.reduce((acc, val) => acc.concat(val), []);
|
||||
await min['vectorStore'].addDocuments(flattenedDocuments);
|
||||
GBLogEx.info(min, `Added ${filePath} to vector store.`);
|
||||
}
|
||||
} catch (error) {
|
||||
GBLogEx.info(min, `Ignore processing of ${file}. ${GBUtil.toYAML(error)}`);
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue