fix(llm.gblib): Talk to data local db use fix.
This commit is contained in:
parent
6e46bcf4ce
commit
9c2c1bc204
2 changed files with 11 additions and 15 deletions
|
@ -180,16 +180,24 @@ export class ChatServices {
|
|||
if (sanitizedQuestion === '' || !vectorStore) {
|
||||
return '';
|
||||
}
|
||||
|
||||
let documents = await vectorStore.similaritySearch(sanitizedQuestion, numDocuments);
|
||||
let documents = await vectorStore.similaritySearch(sanitizedQuestion, numDocuments * 10);
|
||||
const uniqueDocuments = {};
|
||||
const MAX_DOCUMENTS = numDocuments;
|
||||
|
||||
for (const document of documents) {
|
||||
if (!GBUtil.isContentPage(document.pageContent)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!uniqueDocuments[document.metadata.source]) {
|
||||
uniqueDocuments[document.metadata.source] = document;
|
||||
}
|
||||
}
|
||||
|
||||
// Stop once we have max unique documents
|
||||
if (Object.keys(uniqueDocuments).length >= MAX_DOCUMENTS) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
let output = '';
|
||||
|
||||
for (const filePaths of Object.keys(uniqueDocuments)) {
|
||||
|
@ -197,10 +205,6 @@ export class ChatServices {
|
|||
const metadata = doc.metadata;
|
||||
const filename = path.basename(metadata.source);
|
||||
|
||||
if (!GBUtil.isContentPage(doc.pageContent)){
|
||||
continue;
|
||||
}
|
||||
|
||||
let page = 0;
|
||||
if (metadata.source.endsWith('.pdf')) {
|
||||
page = await ChatServices.findPageForText(metadata.source, doc.pageContent);
|
||||
|
|
|
@ -365,15 +365,7 @@ export class GBUtil {
|
|||
// Common patterns that indicate non-content pages
|
||||
const nonContentPatterns = [
|
||||
/^index$/i,
|
||||
/^contents$/i,
|
||||
/^table of contents$/i,
|
||||
/^appendix/i,
|
||||
/^glossary$/i,
|
||||
/^bibliography$/i,
|
||||
/^references$/i,
|
||||
/^acknowledgments?$/i,
|
||||
/^copyright/i,
|
||||
/^about the author/i
|
||||
];
|
||||
|
||||
// Check if page is mostly dots, numbers or blank
|
||||
|
|
Loading…
Add table
Reference in a new issue