fix(llm.gblib): Talk to data local db use fix.

This commit is contained in:
Rodrigo Rodriguez 2024-11-24 13:17:18 -03:00
parent 6e46bcf4ce
commit 9c2c1bc204
2 changed files with 11 additions and 15 deletions

View file

@ -180,16 +180,24 @@ export class ChatServices {
if (sanitizedQuestion === '' || !vectorStore) { if (sanitizedQuestion === '' || !vectorStore) {
return ''; return '';
} }
let documents = await vectorStore.similaritySearch(sanitizedQuestion, numDocuments * 10);
let documents = await vectorStore.similaritySearch(sanitizedQuestion, numDocuments);
const uniqueDocuments = {}; const uniqueDocuments = {};
const MAX_DOCUMENTS = numDocuments;
for (const document of documents) { for (const document of documents) {
if (!GBUtil.isContentPage(document.pageContent)) {
continue;
}
if (!uniqueDocuments[document.metadata.source]) { if (!uniqueDocuments[document.metadata.source]) {
uniqueDocuments[document.metadata.source] = document; uniqueDocuments[document.metadata.source] = document;
} }
}
// Stop once we have max unique documents
if (Object.keys(uniqueDocuments).length >= MAX_DOCUMENTS) {
break;
}
}
let output = ''; let output = '';
for (const filePaths of Object.keys(uniqueDocuments)) { for (const filePaths of Object.keys(uniqueDocuments)) {
@ -197,10 +205,6 @@ export class ChatServices {
const metadata = doc.metadata; const metadata = doc.metadata;
const filename = path.basename(metadata.source); const filename = path.basename(metadata.source);
if (!GBUtil.isContentPage(doc.pageContent)){
continue;
}
let page = 0; let page = 0;
if (metadata.source.endsWith('.pdf')) { if (metadata.source.endsWith('.pdf')) {
page = await ChatServices.findPageForText(metadata.source, doc.pageContent); page = await ChatServices.findPageForText(metadata.source, doc.pageContent);

View file

@ -365,15 +365,7 @@ export class GBUtil {
// Common patterns that indicate non-content pages // Common patterns that indicate non-content pages
const nonContentPatterns = [ const nonContentPatterns = [
/^index$/i, /^index$/i,
/^contents$/i,
/^table of contents$/i, /^table of contents$/i,
/^appendix/i,
/^glossary$/i,
/^bibliography$/i,
/^references$/i,
/^acknowledgments?$/i,
/^copyright/i,
/^about the author/i
]; ];
// Check if page is mostly dots, numbers or blank // Check if page is mostly dots, numbers or blank