new(kb.gbapp): New option for website depth during Vector Retrieval.
This commit is contained in:
		
							parent
							
								
									ee1fc0afc9
								
							
						
					
					
						commit
						66e3295f1f
					
				
					 2 changed files with 3 additions and 1 deletions
				
			
		| 
						 | 
					@ -136,6 +136,7 @@
 | 
				
			||||||
    "google-libphonenumber": "3.2.34",
 | 
					    "google-libphonenumber": "3.2.34",
 | 
				
			||||||
    "googleapis": "126.0.1",
 | 
					    "googleapis": "126.0.1",
 | 
				
			||||||
    "hnswlib-node": "3.0.0",
 | 
					    "hnswlib-node": "3.0.0",
 | 
				
			||||||
 | 
					    "html-to-md": "^0.8.5",
 | 
				
			||||||
    "http-proxy": "1.18.1",
 | 
					    "http-proxy": "1.18.1",
 | 
				
			||||||
    "ibm-watson": "9.1.0",
 | 
					    "ibm-watson": "9.1.0",
 | 
				
			||||||
    "iso-639-1": "3.1.2",
 | 
					    "iso-639-1": "3.1.2",
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -32,6 +32,7 @@
 | 
				
			||||||
 * @fileoverview Knowledge base services and logic.
 | 
					 * @fileoverview Knowledge base services and logic.
 | 
				
			||||||
 */
 | 
					 */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					import html2md from 'html-to-md'
 | 
				
			||||||
import Path from 'path';
 | 
					import Path from 'path';
 | 
				
			||||||
import Fs from 'fs';
 | 
					import Fs from 'fs';
 | 
				
			||||||
import urlJoin from 'url-join';
 | 
					import urlJoin from 'url-join';
 | 
				
			||||||
| 
						 | 
					@ -861,7 +862,7 @@ export class KBService implements IGBKBService {
 | 
				
			||||||
    if (response && response.headers && response.status() === 200) {
 | 
					    if (response && response.headers && response.status() === 200) {
 | 
				
			||||||
      const contentType = response.headers()['content-type'];
 | 
					      const contentType = response.headers()['content-type'];
 | 
				
			||||||
      if (contentType && contentType.includes('text/html')) {
 | 
					      if (contentType && contentType.includes('text/html')) {
 | 
				
			||||||
        const buffer = await page.$eval('*', el => el['innerText']);
 | 
					        const buffer = html2md(await response.text());
 | 
				
			||||||
        const urlObj = new URL(url);
 | 
					        const urlObj = new URL(url);
 | 
				
			||||||
        const urlPath = urlObj.pathname.endsWith('/') ? urlObj.pathname.slice(0, -1) : urlObj.pathname; // Remove trailing slash if present
 | 
					        const urlPath = urlObj.pathname.endsWith('/') ? urlObj.pathname.slice(0, -1) : urlObj.pathname; // Remove trailing slash if present
 | 
				
			||||||
        let filename = urlPath.split('/').pop() || 'index'; // Get the filename from the URL path or set it to 'index.html' as default
 | 
					        let filename = urlPath.split('/').pop() || 'index'; // Get the filename from the URL path or set it to 'index.html' as default
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
	Add table
		
		Reference in a new issue