2018-04-21 02:59:30 -03:00
/ * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * \
| ( ) _ _ |
| _ _ _ __ _ _ __ ___ ___ _ _ | , _ ) ( _ ) ___ ___ _ |
2020-07-01 15:00:40 -03:00
| ( '_`\ ( ' __ ) / '_` ) /' _ ` \ /' _ ` _ ` \ /'_ ` ) | | | | /',__)/ ' v `\ /' _ ` \ |
2019-03-09 16:59:31 -03:00
| | ( _ ) ) | | ( ( _ | | ( ( _ ) || ( ) ( ) | ( ( _ | || | _ | | \ __ , \ | ( ˅ ) | ( ( _ ) ) |
2018-04-21 02:59:30 -03:00
| | , __ / '(_) `\__,_)`\__ |(_) (_) (_)`\__,_)`\__)(_)(____/(_) (_)`\___/' |
| | | ( ) _ ) | |
| ( _ ) \ ___ / ' |
| |
| General Bots Copyright ( c ) Pragmatismo . io . All rights reserved . |
| Licensed under the AGPL - 3.0 . |
2018-11-11 19:09:18 -02:00
| |
2018-04-21 02:59:30 -03:00
| According to our dual licensing model , this program can be used either |
| under the terms of the GNU Affero General Public License , version 3 , |
| or under a proprietary license . |
| |
| The texts of the GNU Affero General Public License with an additional |
| permission and of our proprietary license can be found at and |
| in the LICENSE file you have received along with this program . |
| |
| This program is distributed in the hope that it will be useful , |
2018-09-11 19:40:53 -03:00
| but WITHOUT ANY WARRANTY , without even the implied warranty of |
2018-04-21 02:59:30 -03:00
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the |
| GNU Affero General Public License for more details . |
| |
| "General Bots" is a registered trademark of Pragmatismo . io . |
| The licensing of the program under the AGPLv3 does not imply a |
| trademark license . Therefore any rights , title and interest in |
| our trademarks remain entirely with us . |
| |
\ * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * /
2018-11-27 22:56:11 -02:00
/ * *
* @fileoverview Knowledge base services and logic .
* /
2018-11-12 12:20:44 -02:00
const Path = require ( 'path' ) ;
const Fs = require ( 'fs' ) ;
2020-03-30 14:03:12 -03:00
const urlJoin = require ( 'url-join' ) ;
2018-11-12 12:20:44 -02:00
const path = require ( 'path' ) ;
const asyncPromise = require ( 'async-promises' ) ;
const walkPromise = require ( 'walk-promise' ) ;
2019-03-09 16:59:31 -03:00
// tslint:disable-next-line:newline-per-chained-call
2019-08-23 02:23:00 -03:00
const { SearchService } = require ( 'azure-search-client' ) ;
2020-12-31 15:36:19 -03:00
const Excel = require ( 'exceljs' ) ;
2021-01-20 18:23:42 -03:00
const getSlug = require ( 'speakingurl' ) ;
2021-04-03 20:13:27 -03:00
import { GBServer } from '../../../src/app' ;
2020-08-26 17:50:42 -03:00
import {
GBDialogStep ,
GBLog ,
2020-12-31 15:36:19 -03:00
GBMinInstance ,
2020-08-26 17:50:42 -03:00
IGBConversationalService ,
IGBCoreService ,
IGBInstance ,
2020-12-31 15:36:19 -03:00
IGBKBService
2020-08-26 17:50:42 -03:00
} from 'botlib' ;
2020-12-31 15:36:19 -03:00
import { CollectionUtil } from 'pragmatismo-io-framework' ;
2019-12-31 16:12:06 -03:00
import { Op } from 'sequelize' ;
2018-11-12 12:20:44 -02:00
import { Sequelize } from 'sequelize-typescript' ;
2019-03-08 06:49:22 -03:00
import { AzureDeployerService } from '../../azuredeployer.gbapp/services/AzureDeployerService' ;
2018-11-12 12:20:44 -02:00
import { GuaribasPackage } from '../../core.gbapp/models/GBModel' ;
import { GBDeployer } from '../../core.gbapp/services/GBDeployer' ;
2020-12-31 15:36:19 -03:00
import { CSService } from '../../customer-satisfaction.gbapp/services/CSService' ;
2018-11-12 12:20:44 -02:00
import { GuaribasAnswer , GuaribasQuestion , GuaribasSubject } from '../models' ;
import { GBConfigService } from './../../core.gbapp/services/GBConfigService' ;
2021-07-16 08:12:58 -03:00
const request = require ( 'request-promise-native' ) ;
const textract = require ( 'textract' ) ;
2021-07-26 10:19:56 -03:00
const pdf = require ( "pdf-extraction" ) ;
2018-04-21 02:59:30 -03:00
2019-03-08 19:13:00 -03:00
/ * *
* Result for quey on KB data .
* /
2018-08-28 19:16:29 -03:00
export class KBServiceSearchResults {
2018-11-12 12:20:44 -02:00
public answer : GuaribasAnswer ;
public questionId : number ;
2018-08-28 19:16:29 -03:00
}
2019-03-08 19:13:00 -03:00
/ * *
* All services related to knowledge base management .
* /
2020-03-30 14:03:12 -03:00
export class KBService implements IGBKBService {
2018-11-12 12:20:44 -02:00
public sequelize : Sequelize ;
2018-09-10 16:24:32 -03:00
constructor ( sequelize : Sequelize ) {
2018-11-12 12:20:44 -02:00
this . sequelize = sequelize ;
2018-09-10 16:24:32 -03:00
}
2018-11-12 12:20:44 -02:00
public static getFormattedSubjectItems ( subjects : GuaribasSubject [ ] ) {
2019-04-08 11:30:01 -03:00
if ( subjects === null ) {
2019-02-11 11:25:00 -02:00
return '' ;
}
2018-11-12 12:20:44 -02:00
const out = [ ] ;
subjects . forEach ( subject = > {
out . push ( subject . title ) ;
} ) ;
2018-11-27 22:56:11 -02:00
2018-11-12 12:20:44 -02:00
return out . join ( ', ' ) ;
}
public static getSubjectItemsSeparatedBySpaces ( subjects : GuaribasSubject [ ] ) {
const out = [ ] ;
2020-08-26 17:50:42 -03:00
if ( subjects === undefined ) {
return '' ;
}
2018-11-12 12:20:44 -02:00
subjects . forEach ( subject = > {
out . push ( subject . internalId ) ;
} ) ;
2018-11-27 22:56:11 -02:00
2018-11-12 12:20:44 -02:00
return out . join ( ' ' ) ;
}
2020-03-31 09:11:04 -03:00
public async getAnswerTextByMediaName ( instanceId : number , answerMediaName : string ) : Promise < string > {
2020-03-30 14:03:12 -03:00
const answer = await GuaribasAnswer . findOne ( {
where : {
instanceId : instanceId ,
media : answerMediaName
}
} ) ;
2020-12-31 15:36:19 -03:00
return answer != undefined ? answer.content : null ;
2020-03-30 14:03:12 -03:00
}
2019-02-11 11:25:00 -02:00
public async getQuestionById ( instanceId : number , questionId : number ) : Promise < GuaribasQuestion > {
2018-09-20 12:35:47 -03:00
return GuaribasQuestion . findOne ( {
where : {
instanceId : instanceId ,
questionId : questionId
}
2018-11-12 12:20:44 -02:00
} ) ;
2018-09-20 12:35:47 -03:00
}
2019-02-11 11:25:00 -02:00
public async getAnswerById ( instanceId : number , answerId : number ) : Promise < GuaribasAnswer > {
2018-09-20 12:35:47 -03:00
return GuaribasAnswer . findOne ( {
where : {
instanceId : instanceId ,
answerId : answerId
}
2018-11-12 12:20:44 -02:00
} ) ;
2018-04-21 02:59:30 -03:00
}
2021-01-20 18:23:42 -03:00
/ * *
* Returns a question object given a SEO friendly URL .
* /
public async getQuestionIdFromURL ( core : IGBCoreService , url : string ) {
// Extracts questionId from URL.
const id = url . substr ( url . lastIndexOf ( '-' ) + 1 ) ;
2021-03-31 08:40:51 -03:00
2021-01-20 18:23:42 -03:00
// Extracts botId from URL.
2021-03-31 08:40:51 -03:00
2021-01-20 18:23:42 -03:00
let path = /(http[s]?:\/\/)?([^\/\s]+\/)(.*)/gi ;
const botId = url . replace ( path , ( $0 , $1 , $2 , $3 ) = > {
return $3 . substr ( $3 . indexOf ( '/' ) ) ;
} ) ;
// Finds the associated question.
const instance = await core . loadInstanceByBotId ( botId ) ;
const question = await GuaribasQuestion . findAll ( {
where : {
instanceId : instance.instanceId ,
questionId : id
}
} ) ;
return question ;
}
2022-06-06 18:03:02 -03:00
public static async getQuestionsNER ( instanceId : number ) {
const where = {
instanceId : instanceId ,
content : { [ Op . like ] : ` %(% ` }
} ;
const questions = await GuaribasQuestion . findAll ( {
where : where
} ) ;
return questions ;
}
2021-01-20 18:23:42 -03:00
public async getQuestionsSEO ( instanceId : number ) {
const questions = await GuaribasQuestion . findAll ( {
where : {
instanceId : instanceId
}
} ) ;
let output = [ ] ;
for ( let i = 0 ; i < questions . length ; i ++ ) {
const answer = questions [ i ] ;
const text = getSlug ( answer . content ) ;
let url = ` ${ text } - ${ i } ` ;
output . push ( url ) ;
}
return output ;
}
2021-07-16 08:12:58 -03:00
public async getDocs ( instanceId : number ) {
return await GuaribasAnswer . findAll ( {
where : {
instanceId : instanceId ,
format : '.docx'
}
} ) ;
}
2021-12-25 22:13:52 -03:00
public async getAnswerByText ( instanceId : number , text : string , from : string = null ) : Promise < any > {
2020-01-29 10:48:51 -03:00
text = text . trim ( ) ;
2021-07-16 08:12:58 -03:00
2020-01-29 10:48:51 -03:00
const service = new CSService ( ) ;
let question = await service . getQuestionFromAlternateText ( instanceId , text ) ;
2021-03-31 08:40:51 -03:00
if ( ! question ) {
2022-06-06 18:03:02 -03:00
const where = {
2021-12-25 22:13:52 -03:00
instanceId : instanceId ,
content : { [ Op . like ] : ` %[^a-z] ${ text } [^a-z]% ` }
} ;
2022-06-06 18:03:02 -03:00
if ( from ) {
where [ 'from' ] = from ;
2021-12-25 22:13:52 -03:00
}
2020-01-29 10:48:51 -03:00
question = await GuaribasQuestion . findOne ( {
2021-12-25 22:13:52 -03:00
where : where
2020-01-29 10:48:51 -03:00
} ) ;
}
2021-04-27 13:42:47 -03:00
if ( ! question ) {
2021-11-16 14:53:52 -03:00
// TODO: Solve this compilation error.
// question = await GuaribasQuestion.findOne({
// where: {
// instanceId: instanceId,
// content: { [Op.eq]: `${text}` }
// }
// });
2021-04-27 13:42:47 -03:00
}
2018-09-10 16:24:32 -03:00
2019-03-08 19:13:00 -03:00
if ( question !== null ) {
2018-11-12 12:20:44 -02:00
const answer = await GuaribasAnswer . findOne ( {
2018-09-10 16:24:32 -03:00
where : {
instanceId : instanceId ,
answerId : question.answerId
}
2018-11-12 12:20:44 -02:00
} ) ;
2018-11-27 22:56:11 -02:00
2020-08-19 13:00:21 -03:00
return { question : question , answer : answer } ;
2018-09-10 16:24:32 -03:00
}
2018-11-27 22:56:11 -02:00
2020-08-19 13:00:21 -03:00
return undefined ;
2018-04-21 02:59:30 -03:00
}
2022-06-06 18:03:02 -03:00
2021-12-25 22:13:52 -03:00
2018-11-12 12:20:44 -02:00
public async addAnswer ( obj : GuaribasAnswer ) : Promise < GuaribasAnswer > {
2019-10-16 23:22:21 -03:00
return await GuaribasAnswer . create ( obj ) ;
2018-04-21 02:59:30 -03:00
}
2018-11-12 12:20:44 -02:00
public async ask (
2018-04-21 02:59:30 -03:00
instance : IGBInstance ,
2018-09-10 16:24:32 -03:00
query : string ,
2018-04-21 02:59:30 -03:00
searchScore : number ,
2018-08-28 19:16:29 -03:00
subjects : GuaribasSubject [ ]
) : Promise < KBServiceSearchResults > {
2021-03-31 08:40:51 -03:00
2018-09-09 14:39:37 -03:00
// Builds search query.
2018-08-28 19:16:29 -03:00
2018-11-12 12:20:44 -02:00
query = query . toLowerCase ( ) ;
query = query . replace ( '?' , ' ' ) ;
query = query . replace ( '!' , ' ' ) ;
query = query . replace ( '.' , ' ' ) ;
query = query . replace ( '/' , ' ' ) ;
query = query . replace ( '\\' , ' ' ) ;
2020-10-30 11:30:03 -03:00
query = query . replace ( '\r\n' , ' ' ) ;
2018-08-28 19:16:29 -03:00
2021-04-20 12:01:36 -03:00
// Try simple search first.
const data = await this . getAnswerByText ( instance . instanceId , query . trim ( ) ) ;
if ( data ) {
GBLog . info ( ` Simple SEARCH called. ` ) ;
return { answer : data.answer , questionId : data.question.questionId } ;
}
2019-03-08 19:13:00 -03:00
if ( subjects !== null ) {
2018-11-12 12:20:44 -02:00
const text = KBService . getSubjectItemsSeparatedBySpaces ( subjects ) ;
2019-03-08 19:13:00 -03:00
if ( text !== null ) {
2018-11-12 12:20:44 -02:00
query = ` ${ query } ${ text } ` ;
2018-05-18 11:39:17 -03:00
}
2018-09-09 14:39:37 -03:00
}
2019-08-24 12:22:52 -03:00
2020-12-06 16:22:34 -03:00
2021-03-31 08:40:51 -03:00
// No direct match found, so Search is used.
2019-03-08 19:13:00 -03:00
if ( instance . searchKey !== null && GBConfigService . get ( 'STORAGE_DIALECT' ) === 'mssql' ) {
2019-08-24 12:22:52 -03:00
const client = new SearchService ( instance . searchHost . split ( '.' ) [ 0 ] , instance . searchKey ) ;
2021-10-12 13:47:46 -03:00
const results = await client . indexes . use ( 'azuresql-index' ) . search ( {
count : true ,
filter : ` instanceId eq ${ instance . instanceId } and skipIndex eq false ` ,
search : query ,
searchFields : 'content, subject1, subject2, subject3, subject4' ,
select : 'instanceId, questionId, answerId' ,
skip : 0 ,
top : 1 ,
} ) ;
2019-08-23 02:23:00 -03:00
const values = results . result . value ;
2020-12-06 16:22:34 -03:00
let returnedScore = 0 ;
// Searches via Search (Azure Search).
if ( values && values . length > 0 ) {
returnedScore = values [ 0 ] [ '@search.score' ] ;
if ( returnedScore >= searchScore ) {
const value = await this . getAnswerById ( instance . instanceId , values [ 0 ] . answerId ) ;
if ( value !== null ) {
GBLog . info (
` SEARCH WILL BE USED with score: ${ returnedScore } > required (searchScore): ${ searchScore } `
) ;
2021-03-31 10:26:26 -03:00
2020-12-31 15:36:19 -03:00
2020-12-06 16:22:34 -03:00
return { answer : value , questionId : values [ 0 ] . questionId } ;
} else {
GBLog . info (
` SEARCH WILL NOT be used as answerId ${ values [ 0 ] . answerId } was not found in database,
returnedScore : $ { returnedScore } < required ( searchScore ) : $ { searchScore } `
) ;
return { answer : undefined , questionId : 0 } ;
}
2018-09-11 19:33:58 -03:00
} else {
2020-12-06 16:22:34 -03:00
GBLog . info (
` SEARCH called but returned LOW level score,
returnedScore : $ { returnedScore } < required ( searchScore ) : $ { searchScore } `
) ;
2020-08-19 13:00:21 -03:00
return { answer : undefined , questionId : 0 } ;
2018-09-11 19:33:58 -03:00
}
2019-03-08 19:13:00 -03:00
} else {
2020-12-06 16:22:34 -03:00
GBLog . info (
` SEARCH called but NO answer could be found (zero results). `
) ;
2020-12-31 15:36:19 -03:00
2020-08-19 13:00:21 -03:00
return { answer : undefined , questionId : 0 } ;
2019-03-08 19:13:00 -03:00
}
2020-12-07 11:21:51 -03:00
}
2018-04-21 02:59:30 -03:00
}
2019-02-11 11:25:00 -02:00
public async getSubjectItems ( instanceId : number , parentId : number ) : Promise < GuaribasSubject [ ] > {
2018-11-12 12:20:44 -02:00
const where = { parentSubjectId : parentId , instanceId : instanceId } ;
2018-11-27 22:56:11 -02:00
2018-09-20 12:35:47 -03:00
return GuaribasSubject . findAll ( {
where : where
2018-11-12 12:20:44 -02:00
} ) ;
2018-04-21 02:59:30 -03:00
}
2021-02-28 12:26:44 -03:00
public async getFaqBySubjectArray ( instanceId : number , from : string , subjects : any ) : Promise < GuaribasQuestion [ ] > {
2018-09-20 12:35:47 -03:00
if ( subjects ) {
2019-02-11 11:25:00 -02:00
const where = {
from : from ,
2019-04-07 12:23:27 -03:00
// tslint:disable-next-line: no-null-keyword
subject1 : null ,
// tslint:disable-next-line: no-null-keyword
subject2 : null ,
// tslint:disable-next-line: no-null-keyword
subject3 : null ,
// tslint:disable-next-line: no-null-keyword
2021-03-31 08:40:51 -03:00
subject4 : null ,
2021-02-28 12:26:44 -03:00
// tslint:disable-next-line: no-null-keyword
instanceId : instanceId
2019-02-11 11:25:00 -02:00
} ;
2019-04-07 12:23:27 -03:00
if ( subjects [ 0 ] && subjects [ 0 ] . internalId ) {
2018-11-12 12:20:44 -02:00
where . subject1 = subjects [ 0 ] . internalId ;
2018-09-20 12:35:47 -03:00
}
2018-04-21 02:59:30 -03:00
2019-04-07 12:23:27 -03:00
if ( subjects [ 1 ] && subjects [ 1 ] . internalId ) {
2018-11-12 12:20:44 -02:00
where . subject2 = subjects [ 1 ] . internalId ;
2018-09-20 12:35:47 -03:00
}
2018-04-21 02:59:30 -03:00
2019-04-07 12:23:27 -03:00
if ( subjects [ 2 ] && subjects [ 2 ] . internalId ) {
2018-11-12 12:20:44 -02:00
where . subject3 = subjects [ 2 ] . internalId ;
2018-09-20 12:35:47 -03:00
}
2018-09-09 14:39:37 -03:00
2019-04-07 12:23:27 -03:00
if ( subjects [ 3 ] && subjects [ 3 ] . internalId ) {
2018-11-12 12:20:44 -02:00
where . subject4 = subjects [ 3 ] . internalId ;
2018-09-20 12:35:47 -03:00
}
2018-11-27 22:56:11 -02:00
2019-02-11 11:25:00 -02:00
return await GuaribasQuestion . findAll ( {
where : where
} ) ;
} else {
return await GuaribasQuestion . findAll ( {
2021-02-28 13:06:05 -03:00
where : { from : from , instanceId : instanceId }
2019-02-11 11:25:00 -02:00
} ) ;
}
2018-04-21 02:59:30 -03:00
}
2021-12-25 22:13:52 -03:00
public static async getGroupReplies ( instanceId : number ) : Promise < GuaribasQuestion [ ] > {
2022-06-06 18:03:02 -03:00
return await GuaribasQuestion . findAll ( {
where : { from : 'group' , instanceId : instanceId }
} ) ;
2021-12-25 22:13:52 -03:00
}
2018-11-12 12:20:44 -02:00
public async importKbTabularFile (
2018-09-09 14:39:37 -03:00
filePath : string ,
2018-04-21 02:59:30 -03:00
instanceId : number ,
2018-09-09 14:39:37 -03:00
packageId : number
) : Promise < GuaribasQuestion [ ] > {
2021-03-31 08:40:51 -03:00
GBLog . info ( ` Now reading file ${ filePath } ... ` ) ;
2020-12-31 15:36:19 -03:00
const workbook = new Excel . Workbook ( ) ;
const data = await workbook . xlsx . readFile ( filePath ) ;
2018-09-09 20:09:07 -03:00
2019-02-11 11:25:00 -02:00
let lastQuestionId : number ;
2018-09-20 12:35:47 -03:00
let lastAnswer : GuaribasAnswer ;
2020-08-26 17:50:42 -03:00
// Finds a valid worksheet because Excel returns empty slots
// when loading worksheets collection.
2020-11-17 08:27:10 -03:00
2020-08-26 17:50:42 -03:00
let worksheet : any ;
for ( let t = 0 ; t < data . _worksheets . length ; t ++ ) {
worksheet = data . _worksheets [ t ] ;
2020-11-17 08:27:10 -03:00
if ( worksheet ) {
2020-08-26 17:50:42 -03:00
break ;
}
}
2020-05-12 19:20:59 -03:00
2020-12-31 15:36:19 -03:00
const rows = worksheet . _rows ;
const answers = [ ] ;
const questions = [ ] ;
2018-09-09 20:09:07 -03:00
2020-12-07 11:21:51 -03:00
GBLog . info ( ` Processing ${ rows . length } rows from tabular file ${ filePath } ... ` ) ;
await asyncPromise . eachSeries ( rows , async line = > {
2018-09-09 20:09:07 -03:00
// Skips the first line.
2020-08-26 17:50:42 -03:00
if (
line != undefined &&
line . _cells [ 0 ] !== undefined &&
2020-04-02 19:03:57 -03:00
line . _cells [ 1 ] !== undefined &&
line . _cells [ 2 ] !== undefined &&
line . _cells [ 3 ] !== undefined &&
2020-08-26 17:50:42 -03:00
line . _cells [ 4 ] !== undefined
) {
2020-04-02 19:03:57 -03:00
// Extracts values from columns in the current line.
2018-09-09 20:09:07 -03:00
2020-06-03 16:43:35 -03:00
const subjectsText = line . _cells [ 0 ] . text ;
const from = line . _cells [ 1 ] . text ;
const to = line . _cells [ 2 ] . text ;
const question = line . _cells [ 3 ] . text ;
let answer = line . _cells [ 4 ] . text ;
2020-03-30 14:03:12 -03:00
2020-08-26 17:50:42 -03:00
if ( ! ( subjectsText === 'subjects' && from === 'from' ) && answer !== null && question !== null ) {
2020-04-02 19:03:57 -03:00
let format = '.txt' ;
2018-09-09 18:11:41 -03:00
2020-04-02 19:03:57 -03:00
// Extracts answer from external media if any.
2018-09-09 20:09:07 -03:00
2020-04-02 19:03:57 -03:00
let media = null ;
2018-09-20 12:35:47 -03:00
2020-08-26 17:50:42 -03:00
if ( typeof answer !== 'string' ) {
2020-06-03 16:43:35 -03:00
GBLog . info ( ` [GBImporter] Answer is NULL related to Question ' ${ question } '. ` ) ;
2020-08-26 17:50:42 -03:00
answer =
'Existe um problema na base de conhecimento. Fui treinado para entender sua pergunta, avise a quem me criou que a resposta não foi informada para esta pergunta.' ;
2020-06-03 16:43:35 -03:00
} else if ( answer . indexOf ( '.md' ) > - 1 ) {
2020-04-02 19:03:57 -03:00
const mediaFilename = urlJoin ( path . dirname ( filePath ) , '..' , 'articles' , answer ) ;
if ( Fs . existsSync ( mediaFilename ) ) {
answer = Fs . readFileSync ( mediaFilename , 'utf8' ) ;
format = '.md' ;
media = path . basename ( mediaFilename ) ;
} else {
GBLog . info ( ` [GBImporter] File not found: ${ mediaFilename } . ` ) ;
answer = '' ;
}
}
2018-09-10 12:09:48 -03:00
2020-04-02 19:03:57 -03:00
// Processes subjects hierarchy splitting by dots.
const subjectArray = subjectsText . split ( '.' ) ;
let subject1 : string ;
let subject2 : string ;
let subject3 : string ;
let subject4 : string ;
let indexer = 0 ;
subjectArray . forEach ( element = > {
if ( indexer === 0 ) {
subject1 = subjectArray [ indexer ] . substring ( 0 , 63 ) ;
} else if ( indexer === 1 ) {
subject2 = subjectArray [ indexer ] . substring ( 0 , 63 ) ;
} else if ( indexer === 2 ) {
subject3 = subjectArray [ indexer ] . substring ( 0 , 63 ) ;
} else if ( indexer === 3 ) {
subject4 = subjectArray [ indexer ] . substring ( 0 , 63 ) ;
}
indexer ++ ;
} ) ;
// Now with all the data ready, creates entities in the store.
2020-12-07 11:21:51 -03:00
const answer1 = {
2020-04-02 19:03:57 -03:00
instanceId : instanceId ,
content : answer ,
format : format ,
media : media ,
packageId : packageId ,
prevId : lastQuestionId !== null ? lastQuestionId : 0
2020-12-07 11:21:51 -03:00
} ;
2020-04-02 19:03:57 -03:00
2020-12-07 11:21:51 -03:00
answers . push ( answer1 ) ;
const question1 = {
2020-04-02 19:03:57 -03:00
from : from ,
to : to ,
subject1 : subject1 ,
subject2 : subject2 ,
subject3 : subject3 ,
subject4 : subject4 ,
2021-04-27 13:42:47 -03:00
content : question.replace ( /["]+/g , '' ) ,
2020-04-02 19:03:57 -03:00
instanceId : instanceId ,
2021-03-31 10:26:26 -03:00
skipIndex : ( question . charAt ( 0 ) === "\"" ) ,
2020-04-02 19:03:57 -03:00
packageId : packageId
2020-12-07 11:21:51 -03:00
} ;
questions . push ( question1 ) ;
2020-04-02 19:03:57 -03:00
2020-12-07 11:21:51 -03:00
// TODO: Tutorial. if (lastAnswer !== undefined && lastQuestionId !== 0) {
// await lastAnswer.update({ nextId: lastQuestionId });
// }
// lastAnswer = answer1;
// lastQuestionId = question1.questionId;
2018-09-09 20:09:07 -03:00
2020-12-07 11:21:51 -03:00
return true ;
2020-04-02 19:03:57 -03:00
} else {
// Skips the header.
2018-09-09 20:09:07 -03:00
2020-08-19 13:00:21 -03:00
return undefined ;
2020-04-02 19:03:57 -03:00
}
2018-09-09 20:09:07 -03:00
}
2018-11-12 12:20:44 -02:00
} ) ;
2020-12-07 11:21:51 -03:00
const answersCreated = await GuaribasAnswer . bulkCreate ( answers ) ;
let i = 0 ;
await CollectionUtil . asyncForEach ( questions , async question = > {
question . answerId = answersCreated [ i ++ ] . answerId ;
} ) ;
2020-12-31 15:36:19 -03:00
2020-12-07 11:21:51 -03:00
return await GuaribasQuestion . bulkCreate ( questions ) ;
2018-04-21 02:59:30 -03:00
}
2019-08-24 18:46:04 -03:00
public async sendAnswer ( min : GBMinInstance , channel : string , step : GBDialogStep , answer : GuaribasAnswer ) {
2018-04-21 02:59:30 -03:00
if ( answer . content . endsWith ( '.mp4' ) ) {
2020-06-05 16:09:47 -03:00
await this . playVideo ( min , min . conversationalService , step , answer , channel ) ;
2021-04-03 20:13:27 -03:00
} else if (
answer . content . endsWith ( '.ppt' ) ||
answer . content . endsWith ( '.pptx' ) ||
answer . content . endsWith ( '.doc' ) ||
answer . content . endsWith ( '.docx' ) ||
answer . content . endsWith ( '.xls' ) ||
answer . content . endsWith ( '.xlsx' )
) {
const doc = urlJoin ( GBServer . globals . publicAddress , 'kb' , ` ${ min . instance . botId } .gbai ` ,
` ${ min . instance . botId } .gbkb ` , 'assets' , answer . content )
const url = ` http://view.officeapps.live.com/op/view.aspx?src= ${ doc } ` ;
await this . playUrl ( min , min . conversationalService , step , url , channel ) ;
2021-04-03 12:41:44 -03:00
} else if ( answer . content . endsWith ( '.pdf' ) ) {
2021-04-03 20:13:27 -03:00
const url = urlJoin ( 'kb' , ` ${ min . instance . botId } .gbai ` ,
` ${ min . instance . botId } .gbkb ` , 'assets' , answer . content ) ;
await this . playUrl ( min , min . conversationalService , step , url , channel ) ;
2020-08-26 17:50:42 -03:00
} else if ( answer . format === '.md' ) {
2021-06-02 14:37:46 -03:00
await min . conversationalService [ 'playMarkdown' ] ( min , answer . content , channel , step , min . conversationalService ) ;
2020-08-26 17:50:42 -03:00
} else if ( answer . content . endsWith ( '.ogg' ) && process . env . AUDIO_DISABLED !== 'true' ) {
2019-08-24 18:46:04 -03:00
await this . playAudio ( min , answer , channel , step , min . conversationalService ) ;
2019-08-24 12:22:52 -03:00
} else {
2020-05-17 21:30:21 +00:00
await min . conversationalService . sendText ( min , step , answer . content ) ;
await min . conversationalService . sendEvent ( min , step , 'stop' , undefined ) ;
2019-08-24 12:22:52 -03:00
}
2019-08-24 18:46:04 -03:00
}
2019-08-24 12:22:52 -03:00
public async importKbPackage (
2021-07-16 08:12:58 -03:00
min : GBMinInstance ,
2019-08-24 18:46:04 -03:00
localPath : string ,
packageStorage : GuaribasPackage ,
instance : IGBInstance
) : Promise < any > {
// Imports subjects tree into database and return it.
2018-09-09 14:39:37 -03:00
2020-05-27 23:01:44 -03:00
const subjectFile = urlJoin ( localPath , 'subjects.json' ) ;
if ( Fs . existsSync ( subjectFile ) ) {
await this . importSubjectFile ( packageStorage . packageId , subjectFile , instance ) ;
}
2018-09-09 14:39:37 -03:00
2020-04-02 19:03:57 -03:00
// Import tabular files in the tabular directory.
await this . importKbTabularDirectory ( localPath , instance , packageStorage . packageId ) ;
2018-04-21 02:59:30 -03:00
2020-04-02 19:03:57 -03:00
// Import remaining .md files in articles directory.
2021-07-16 08:12:58 -03:00
await this . importRemainingArticles ( localPath , instance , packageStorage . packageId ) ;
// Import docs files in .docx directory.
return await this . importDocs ( min , localPath , instance , packageStorage . packageId ) ;
2019-08-24 18:46:04 -03:00
}
2018-09-09 18:11:41 -03:00
2020-04-02 19:03:57 -03:00
/ * *
2021-07-16 08:12:58 -03:00
* Import all . md files in articles folder that has not been referenced by tabular files .
2020-04-02 19:03:57 -03:00
* /
public async importRemainingArticles ( localPath : string , instance : IGBInstance , packageId : number ) : Promise < any > {
const files = await walkPromise ( urlJoin ( localPath , 'articles' ) ) ;
2020-05-27 23:01:44 -03:00
await CollectionUtil . asyncForEach ( files , async file = > {
if ( file !== null && file . name . endsWith ( '.md' ) ) {
let content = await this . getAnswerTextByMediaName ( instance . instanceId , file . name ) ;
2020-04-02 19:03:57 -03:00
2020-05-27 23:01:44 -03:00
if ( content === null ) {
const fullFilename = urlJoin ( file . root , file . name ) ;
content = Fs . readFileSync ( fullFilename , 'utf-8' ) ;
2020-04-02 19:03:57 -03:00
2022-01-03 13:11:21 -03:00
await GuaribasAnswer . create ( < GuaribasAnswer > {
2020-05-27 23:01:44 -03:00
instanceId : instance.instanceId ,
content : content ,
2020-08-26 17:50:42 -03:00
format : '.md' ,
2020-05-27 23:01:44 -03:00
media : file.name ,
packageId : packageId ,
prevId : 0 // TODO: Calculate total rows and increment.
} ) ;
2020-04-02 19:03:57 -03:00
}
2020-05-27 23:01:44 -03:00
}
} ) ;
2020-04-02 19:03:57 -03:00
}
2021-01-20 18:23:42 -03:00
2021-07-16 08:12:58 -03:00
/ * *
* Import all . docx files in reading comprehension folder .
* /
public async importDocs ( min : GBMinInstance , localPath : string , instance : IGBInstance , packageId : number ) : Promise < any > {
const files = await walkPromise ( urlJoin ( localPath , 'docs' ) ) ;
2021-07-29 09:50:38 -03:00
if ( ! files [ 0 ] ) {
GBLog . info ( ` [GBDeployer] docs folder not created yet in .gbkb. To use Reading Comprehension, create this folder at root and put a document to get read by the. ` ) ;
}
else {
await CollectionUtil . asyncForEach ( files , async file = > {
let content = null ;
let filePath = Path . join ( file . root , file . name ) ;
if ( file !== null ) {
if ( file . name . endsWith ( '.docx' ) ) {
content = await this . getTextFromFile ( filePath ) ;
} else if ( file . name . endsWith ( '.pdf' ) ) {
const read = await pdf ( Fs . readFileSync ( filePath ) ) ;
content = read . text ;
}
2021-07-26 10:19:56 -03:00
}
2021-07-16 08:12:58 -03:00
2021-07-29 09:50:38 -03:00
if ( content ) {
content = await min . conversationalService . translate ( min , content , 'en' ) ;
2022-01-03 13:11:21 -03:00
await GuaribasAnswer . create ( < GuaribasAnswer > {
2021-07-29 09:50:38 -03:00
instanceId : instance.instanceId ,
content : content ,
format : '.docx' ,
media : file.name ,
packageId : packageId
} ) ;
}
2021-07-26 10:19:56 -03:00
2021-07-29 09:50:38 -03:00
} ) ;
}
2021-07-16 08:12:58 -03:00
}
2019-08-24 18:46:04 -03:00
public async importKbTabularDirectory ( localPath : string , instance : IGBInstance , packageId : number ) : Promise < any > {
2020-12-31 15:36:19 -03:00
const files = await walkPromise ( localPath ) ;
2020-05-27 23:01:44 -03:00
await CollectionUtil . asyncForEach ( files , async file = > {
if ( file !== null && file . name . endsWith ( '.xlsx' ) ) {
return await this . importKbTabularFile ( urlJoin ( file . root , file . name ) , instance . instanceId , packageId ) ;
}
2020-08-26 17:50:42 -03:00
} ) ;
2019-08-24 18:46:04 -03:00
}
2018-09-09 20:09:07 -03:00
2019-08-24 18:46:04 -03:00
public async importSubjectFile ( packageId : number , filename : string , instance : IGBInstance ) : Promise < any > {
const subjectsLoaded = JSON . parse ( Fs . readFileSync ( filename , 'utf8' ) ) ;
const doIt = async ( subjects : GuaribasSubject [ ] , parentSubjectId : number ) = > {
return asyncPromise . eachSeries ( subjects , async item = > {
2022-01-03 13:11:21 -03:00
const value = await GuaribasSubject . create ( < GuaribasSubject > {
2019-08-24 18:46:04 -03:00
internalId : item.id ,
parentSubjectId : parentSubjectId ,
instanceId : instance.instanceId ,
from : item . from ,
to : item.to ,
title : item.title ,
description : item.description ,
packageId : packageId
} ) ;
2019-03-08 06:49:22 -03:00
2019-08-24 18:46:04 -03:00
if ( item . children ) {
2020-08-19 13:00:21 -03:00
return doIt ( item . children , value . subjectId ) ;
2019-08-24 18:46:04 -03:00
} else {
2020-08-19 13:00:21 -03:00
return item ;
2019-08-24 18:46:04 -03:00
}
} ) ;
} ;
2018-09-10 16:24:32 -03:00
2019-08-24 18:46:04 -03:00
return doIt ( subjectsLoaded . children , undefined ) ;
}
2019-08-24 12:22:52 -03:00
public async undeployKbFromStorage ( instance : IGBInstance , deployer : GBDeployer , packageId : number ) {
2019-08-24 18:46:04 -03:00
await GuaribasQuestion . destroy ( {
where : { instanceId : instance.instanceId , packageId : packageId }
} ) ;
await GuaribasAnswer . destroy ( {
where : { instanceId : instance.instanceId , packageId : packageId }
} ) ;
await GuaribasSubject . destroy ( {
where : { instanceId : instance.instanceId , packageId : packageId }
} ) ;
2020-07-04 16:32:44 -03:00
await this . undeployPackageFromStorage ( instance , packageId ) ;
2019-08-24 18:46:04 -03:00
}
2018-04-21 02:59:30 -03:00
2022-06-12 18:43:19 -03:00
public static async RefreshNER ( min : GBMinInstance ) {
2022-06-06 18:03:02 -03:00
const questions = await KBService . getQuestionsNER ( min . instance . instanceId ) ;
const contentLocale = min . core . getParam < string > (
min . instance ,
'Default Content Language' ,
GBConfigService . get ( 'DEFAULT_CONTENT_LANGUAGE' )
) ;
await CollectionUtil . asyncForEach ( questions , async question = > {
const text = question . content ;
2022-06-12 18:43:19 -03:00
const categoryReg = /.*\((.*)\).*/gi . exec ( text ) ;
const nameReg = /(\w+)\(.*\).*/gi . exec ( text ) ;
2022-06-06 18:03:02 -03:00
2022-06-12 18:43:19 -03:00
if ( categoryReg && nameReg ) {
2022-06-12 21:04:58 -03:00
let category = categoryReg [ 1 ] ;
let name = nameReg [ 1 ] ;
2022-06-12 18:43:19 -03:00
min [ "nerEngine" ] . addNamedEntityText ( category , name ,
[ contentLocale ] , [ name ] ) ;
}
2022-06-06 18:03:02 -03:00
} ) ;
}
2018-04-21 02:59:30 -03:00
/ * *
2019-02-11 11:25:00 -02:00
* Deploys a knowledge base to the storage using the . gbkb format .
*
* @param localPath Path to the . gbkb folder .
* /
2020-05-12 19:20:59 -03:00
public async deployKb ( core : IGBCoreService , deployer : GBDeployer , localPath : string , min : GBMinInstance ) {
2019-08-24 18:46:04 -03:00
const packageName = Path . basename ( localPath ) ;
GBLog . info ( ` [GBDeployer] Opening package: ${ localPath } ` ) ;
2020-05-27 23:01:44 -03:00
2020-05-12 19:20:59 -03:00
const instance = await core . loadInstanceByBotId ( min . botId ) ;
2019-08-24 18:46:04 -03:00
GBLog . info ( ` [GBDeployer] Importing: ${ localPath } ` ) ;
const p = await deployer . deployPackageToStorage ( instance . instanceId , packageName ) ;
2021-07-16 08:12:58 -03:00
await this . importKbPackage ( min , localPath , p , instance ) ;
2021-11-29 18:39:42 -03:00
GBDeployer . mountGBKBAssets ( packageName , min . botId , localPath ) ;
2019-08-24 18:46:04 -03:00
2020-06-04 16:18:02 -03:00
await deployer . rebuildIndex ( instance , new AzureDeployerService ( deployer ) . getKBSearchSchema ( instance . searchIndex ) ) ;
2021-12-25 22:13:52 -03:00
min [ 'groupCache' ] = await KBService . getGroupReplies ( instance . instanceId ) ;
2022-06-06 18:03:02 -03:00
await KBService . RefreshNER ( min ) ;
2021-12-25 22:13:52 -03:00
2019-08-24 18:46:04 -03:00
GBLog . info ( ` [GBDeployer] Finished import of ${ localPath } ` ) ;
}
2020-12-31 15:36:19 -03:00
private async playAudio (
min : GBMinInstance ,
answer : GuaribasAnswer ,
channel : string ,
step : GBDialogStep ,
conversationalService : IGBConversationalService
) {
conversationalService . sendAudio ( min , step , answer . content ) ;
}
2021-04-03 12:41:44 -03:00
private async playUrl (
min ,
conversationalService : IGBConversationalService ,
step : GBDialogStep ,
2021-04-03 20:13:27 -03:00
url : string ,
2021-04-03 12:41:44 -03:00
channel : string
) {
if ( channel === 'whatsapp' ) {
2021-04-03 20:13:27 -03:00
await min . conversationalService . sendFile ( min , step , null , url , '' ) ;
2021-04-03 12:41:44 -03:00
} else {
await conversationalService . sendEvent ( min , step , 'play' , {
playerType : 'url' ,
2021-04-03 20:13:27 -03:00
data : url
2021-04-03 12:41:44 -03:00
} ) ;
}
}
2020-12-31 15:36:19 -03:00
private async playVideo (
min ,
conversationalService : IGBConversationalService ,
step : GBDialogStep ,
answer : GuaribasAnswer ,
channel : string
) {
if ( channel === 'whatsapp' ) {
await min . conversationalService . sendFile ( min , step , null , answer . content , '' ) ;
} else {
await conversationalService . sendEvent ( min , step , 'play' , {
playerType : 'video' ,
2021-04-03 12:41:44 -03:00
data : urlJoin ( ` ${ min . instance . botId } .gbai ` , ` ${ min . instance . botId } .gbkb ` , 'videos' , answer . content )
2020-12-31 15:36:19 -03:00
} ) ;
}
}
private async undeployPackageFromStorage ( instance : any , packageId : number ) {
await GuaribasPackage . destroy ( {
where : { instanceId : instance.instanceId , packageId : packageId }
} ) ;
}
2021-07-16 08:12:58 -03:00
public async readComprehension ( instanceId : number , doc : string , question : string ) {
const options = {
timeout : 60000 * 5 ,
uri : ` http:// ${ process . env . GBMODELS_SERVER } /reading-comprehension ` ,
form : { content : doc } ,
qs : { question : question , key : process.env.GBMODELS_KEY }
} ;
GBLog . info ( ` [General Bots Models]: ReadComprehension for ${ question } . ` ) ;
return await request . post ( options ) ;
}
2021-07-26 10:19:56 -03:00
private async getTextFromFile ( filename : string ) {
2021-07-16 08:12:58 -03:00
return new Promise < string > ( async ( resolve , reject ) = > {
textract . fromFileWithPath ( filename , { preserveLineBreaks : true } , ( error , text ) = > {
if ( error ) {
reject ( error ) ;
} else {
resolve ( text ) ;
}
} ) ;
} ) ;
}
2018-04-21 02:59:30 -03:00
}
2021-07-16 08:12:58 -03:00