fix(llm.gblib): Talk to data local db use fix.
This commit is contained in:
parent
6e46bcf4ce
commit
9c2c1bc204
2 changed files with 11 additions and 15 deletions
|
@ -180,16 +180,24 @@ export class ChatServices {
|
||||||
if (sanitizedQuestion === '' || !vectorStore) {
|
if (sanitizedQuestion === '' || !vectorStore) {
|
||||||
return '';
|
return '';
|
||||||
}
|
}
|
||||||
|
let documents = await vectorStore.similaritySearch(sanitizedQuestion, numDocuments * 10);
|
||||||
let documents = await vectorStore.similaritySearch(sanitizedQuestion, numDocuments);
|
|
||||||
const uniqueDocuments = {};
|
const uniqueDocuments = {};
|
||||||
|
const MAX_DOCUMENTS = numDocuments;
|
||||||
|
|
||||||
for (const document of documents) {
|
for (const document of documents) {
|
||||||
|
if (!GBUtil.isContentPage(document.pageContent)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
if (!uniqueDocuments[document.metadata.source]) {
|
if (!uniqueDocuments[document.metadata.source]) {
|
||||||
uniqueDocuments[document.metadata.source] = document;
|
uniqueDocuments[document.metadata.source] = document;
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
|
// Stop once we have max unique documents
|
||||||
|
if (Object.keys(uniqueDocuments).length >= MAX_DOCUMENTS) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
let output = '';
|
let output = '';
|
||||||
|
|
||||||
for (const filePaths of Object.keys(uniqueDocuments)) {
|
for (const filePaths of Object.keys(uniqueDocuments)) {
|
||||||
|
@ -197,10 +205,6 @@ export class ChatServices {
|
||||||
const metadata = doc.metadata;
|
const metadata = doc.metadata;
|
||||||
const filename = path.basename(metadata.source);
|
const filename = path.basename(metadata.source);
|
||||||
|
|
||||||
if (!GBUtil.isContentPage(doc.pageContent)){
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
let page = 0;
|
let page = 0;
|
||||||
if (metadata.source.endsWith('.pdf')) {
|
if (metadata.source.endsWith('.pdf')) {
|
||||||
page = await ChatServices.findPageForText(metadata.source, doc.pageContent);
|
page = await ChatServices.findPageForText(metadata.source, doc.pageContent);
|
||||||
|
|
|
@ -365,15 +365,7 @@ export class GBUtil {
|
||||||
// Common patterns that indicate non-content pages
|
// Common patterns that indicate non-content pages
|
||||||
const nonContentPatterns = [
|
const nonContentPatterns = [
|
||||||
/^index$/i,
|
/^index$/i,
|
||||||
/^contents$/i,
|
|
||||||
/^table of contents$/i,
|
/^table of contents$/i,
|
||||||
/^appendix/i,
|
|
||||||
/^glossary$/i,
|
|
||||||
/^bibliography$/i,
|
|
||||||
/^references$/i,
|
|
||||||
/^acknowledgments?$/i,
|
|
||||||
/^copyright/i,
|
|
||||||
/^about the author/i
|
|
||||||
];
|
];
|
||||||
|
|
||||||
// Check if page is mostly dots, numbers or blank
|
// Check if page is mostly dots, numbers or blank
|
||||||
|
|
Loading…
Add table
Reference in a new issue