new(kb.gbapp): New option for website depth during Vector Retrieval.
This commit is contained in:
parent
ee1fc0afc9
commit
66e3295f1f
2 changed files with 3 additions and 1 deletions
|
@ -136,6 +136,7 @@
|
|||
"google-libphonenumber": "3.2.34",
|
||||
"googleapis": "126.0.1",
|
||||
"hnswlib-node": "3.0.0",
|
||||
"html-to-md": "^0.8.5",
|
||||
"http-proxy": "1.18.1",
|
||||
"ibm-watson": "9.1.0",
|
||||
"iso-639-1": "3.1.2",
|
||||
|
|
|
@ -32,6 +32,7 @@
|
|||
* @fileoverview Knowledge base services and logic.
|
||||
*/
|
||||
|
||||
import html2md from 'html-to-md'
|
||||
import Path from 'path';
|
||||
import Fs from 'fs';
|
||||
import urlJoin from 'url-join';
|
||||
|
@ -861,7 +862,7 @@ export class KBService implements IGBKBService {
|
|||
if (response && response.headers && response.status() === 200) {
|
||||
const contentType = response.headers()['content-type'];
|
||||
if (contentType && contentType.includes('text/html')) {
|
||||
const buffer = await page.$eval('*', el => el['innerText']);
|
||||
const buffer = html2md(await response.text());
|
||||
const urlObj = new URL(url);
|
||||
const urlPath = urlObj.pathname.endsWith('/') ? urlObj.pathname.slice(0, -1) : urlObj.pathname; // Remove trailing slash if present
|
||||
let filename = urlPath.split('/').pop() || 'index'; // Get the filename from the URL path or set it to 'index.html' as default
|
||||
|
|
Loading…
Add table
Reference in a new issue