new(kb.gbapp): New option for website depth during Vector Retrieval.

This commit is contained in:
Rodrigo Rodriguez 2024-06-26 21:41:32 -03:00
parent ee1fc0afc9
commit 66e3295f1f
2 changed files with 3 additions and 1 deletions

View file

@ -136,6 +136,7 @@
"google-libphonenumber": "3.2.34",
"googleapis": "126.0.1",
"hnswlib-node": "3.0.0",
"html-to-md": "^0.8.5",
"http-proxy": "1.18.1",
"ibm-watson": "9.1.0",
"iso-639-1": "3.1.2",

View file

@ -32,6 +32,7 @@
* @fileoverview Knowledge base services and logic.
*/
import html2md from 'html-to-md'
import Path from 'path';
import Fs from 'fs';
import urlJoin from 'url-join';
@ -861,7 +862,7 @@ export class KBService implements IGBKBService {
if (response && response.headers && response.status() === 200) {
const contentType = response.headers()['content-type'];
if (contentType && contentType.includes('text/html')) {
const buffer = await page.$eval('*', el => el['innerText']);
const buffer = html2md(await response.text());
const urlObj = new URL(url);
const urlPath = urlObj.pathname.endsWith('/') ? urlObj.pathname.slice(0, -1) : urlObj.pathname; // Remove trailing slash if present
let filename = urlPath.split('/').pop() || 'index'; // Get the filename from the URL path or set it to 'index.html' as default