fix(KBService): update file processing logic to include additional file types

2025-05-13 22:43:47 -03:00 · 2025-05-13 22:43:47 -03:00 · 4c28404578
commit 4c28404578
parent 0f0f8d610e
1 changed files with 21 additions and 20 deletions
--- a/packages/kb.gbapp/services/KBService.ts
+++ b/packages/kb.gbapp/services/KBService.ts
@ -1179,7 +1179,7 @@ export class KBService implements IGBKBService {
      files = files.concat(await this.crawl(min, website, visited, 0, maxDepth, page, websiteIgnoreUrls, maxDocuments));
      await browser.close();
-      
+
      GBLogEx.info(min, `Vectorizing ${files.length} file(s)...`);
@ -1226,26 +1226,27 @@ export class KBService implements IGBKBService {
        let skip = false;
        try {
-          if (file.root.endsWith('.gbdata'))
+          if (file.name.endsWith('.csv') || file.name.endsWith('.md')
-          {
+          || file.name.endsWith('.pdf') || file.name.endsWith('.docx') ||
-            skip = true;
+         file.name.endsWith('.epub') ||file.name.endsWith('.txt')
-          if (file.name.endsWith('.csv')) {
+          
-            skip = false;
+          ) {
            // Read first 1000 lines of CSV file
            const csvContent = await fs.readFile(filePath, 'utf8');
            const lines = csvContent.split('\n').slice(0, 200).join('\n');
            await fs.writeFile(filePath, lines, 'utf8');
            content = lines;
          }
        }
        if (!skip){
-          const document = await this.loadAndSplitFile(filePath);
+            if (file.name.endsWith('.csv')) {
-          // TODO: Add full filename.
+              skip = false;
-          const flattenedDocuments = document.reduce((acc, val) => acc.concat(val), []);
+              // Read first 1000 lines of CSV file
-          await min['vectorStore'].addDocuments(flattenedDocuments);
+              const csvContent = await fs.readFile(filePath, 'utf8');
-          GBLogEx.info(min, `Added ${filePath} to vector store.`);
+              const lines = csvContent.split('\n').slice(0, 200).join('\n');
-        }
+              await fs.writeFile(filePath, lines, 'utf8');
              content = lines;
            }
            const document = await this.loadAndSplitFile(filePath);
            // TODO: Add full filename.
            const flattenedDocuments = document.reduce((acc, val) => acc.concat(val), []);
            await min['vectorStore'].addDocuments(flattenedDocuments);
            GBLogEx.info(min, `Added ${filePath} to vector store.`);
          }
        } catch (error) {
          GBLogEx.info(min, `Ignore processing of ${file}. ${GBUtil.toYAML(error)}`);
        }