new(all): General Bots Reading Comprehension for .pdf.
This commit is contained in:
parent
1d337cf24a
commit
ba796c86a7
4 changed files with 268 additions and 42 deletions
194
package-lock.json
generated
194
package-lock.json
generated
|
@ -1,6 +1,6 @@
|
|||
{
|
||||
"name": "botserver",
|
||||
"version": "2.0.126",
|
||||
"version": "2.0.127",
|
||||
"lockfileVersion": 1,
|
||||
"requires": true,
|
||||
"dependencies": {
|
||||
|
@ -1491,6 +1491,46 @@
|
|||
"integrity": "sha512-DwS94K+M0vtG+cymxH0rslJr09qpdjyOLdCjmpKcG/nNiZQfMA1ybAaFEmwk9UaVlUG9STENFeQwyrLevJB+7g==",
|
||||
"dev": true
|
||||
},
|
||||
"@google-cloud/common": {
|
||||
"version": "3.7.0",
|
||||
"resolved": "https://registry.npmjs.org/@google-cloud/common/-/common-3.7.0.tgz",
|
||||
"integrity": "sha512-oFgpKLjH9JTOAyQd3kB36iSuH8wNSpDKb1TywlB6zcsG0xmJFxLutmfPhz03KUxRMNQOZ1K1Gc9BYvJifVnGVA==",
|
||||
"requires": {
|
||||
"@google-cloud/projectify": "^2.0.0",
|
||||
"@google-cloud/promisify": "^2.0.0",
|
||||
"arrify": "^2.0.1",
|
||||
"duplexify": "^4.1.1",
|
||||
"ent": "^2.2.0",
|
||||
"extend": "^3.0.2",
|
||||
"google-auth-library": "^7.0.2",
|
||||
"retry-request": "^4.2.2",
|
||||
"teeny-request": "^7.0.0"
|
||||
},
|
||||
"dependencies": {
|
||||
"arrify": {
|
||||
"version": "2.0.1",
|
||||
"resolved": "https://registry.npmjs.org/arrify/-/arrify-2.0.1.tgz",
|
||||
"integrity": "sha512-3duEwti880xqi4eAMN8AyR4a0ByT90zoYdLlevfrvU43vb0YZwZVfxOgxWrLXXXpyugL0hNZc9G6BiB5B3nUug=="
|
||||
},
|
||||
"debug": {
|
||||
"version": "4.3.2",
|
||||
"resolved": "https://registry.npmjs.org/debug/-/debug-4.3.2.tgz",
|
||||
"integrity": "sha512-mOp8wKcvj7XxC78zLgw/ZA+6TSgkoE2C/ienthhRD298T7UNwAg9diBpLRxC0mOezLl4B0xV7M0cCO6P/O0Xhw==",
|
||||
"requires": {
|
||||
"ms": "2.1.2"
|
||||
}
|
||||
},
|
||||
"retry-request": {
|
||||
"version": "4.2.2",
|
||||
"resolved": "https://registry.npmjs.org/retry-request/-/retry-request-4.2.2.tgz",
|
||||
"integrity": "sha512-xA93uxUD/rogV7BV59agW/JHPGXeREMWiZc9jhcwY4YdZ7QOtC7qbomYg0n4wyk2lJhggjvKvhNX8wln/Aldhg==",
|
||||
"requires": {
|
||||
"debug": "^4.1.1",
|
||||
"extend": "^3.0.2"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"@google-cloud/paginator": {
|
||||
"version": "3.0.5",
|
||||
"resolved": "https://registry.npmjs.org/@google-cloud/paginator/-/paginator-3.0.5.tgz",
|
||||
|
@ -1556,6 +1596,94 @@
|
|||
}
|
||||
}
|
||||
},
|
||||
"@google-cloud/translate": {
|
||||
"version": "6.2.6",
|
||||
"resolved": "https://registry.npmjs.org/@google-cloud/translate/-/translate-6.2.6.tgz",
|
||||
"integrity": "sha512-DzXly5s9RtkVCkPk/AxjZV2HQ4b4eN2Dvg+8x8d8Yk/tnKpU1IOCX6lWUGIONMNHMKbN6ITydXj+quo92tvZOg==",
|
||||
"requires": {
|
||||
"@google-cloud/common": "^3.0.0",
|
||||
"@google-cloud/promisify": "^2.0.0",
|
||||
"arrify": "^2.0.0",
|
||||
"extend": "^3.0.2",
|
||||
"google-gax": "^2.17.1",
|
||||
"is-html": "^2.0.0",
|
||||
"protobufjs": "^6.8.8"
|
||||
},
|
||||
"dependencies": {
|
||||
"arrify": {
|
||||
"version": "2.0.1",
|
||||
"resolved": "https://registry.npmjs.org/arrify/-/arrify-2.0.1.tgz",
|
||||
"integrity": "sha512-3duEwti880xqi4eAMN8AyR4a0ByT90zoYdLlevfrvU43vb0YZwZVfxOgxWrLXXXpyugL0hNZc9G6BiB5B3nUug=="
|
||||
},
|
||||
"google-auth-library": {
|
||||
"version": "7.3.0",
|
||||
"resolved": "https://registry.npmjs.org/google-auth-library/-/google-auth-library-7.3.0.tgz",
|
||||
"integrity": "sha512-MPeeMlnsYnoiiVFMwX3hgaS684aiXrSqKoDP+xL4Ejg4Z0qLvIeg4XsaChemyFI8ZUO7ApwDAzNtgmhWSDNh5w==",
|
||||
"requires": {
|
||||
"arrify": "^2.0.0",
|
||||
"base64-js": "^1.3.0",
|
||||
"ecdsa-sig-formatter": "^1.0.11",
|
||||
"fast-text-encoding": "^1.0.0",
|
||||
"gaxios": "^4.0.0",
|
||||
"gcp-metadata": "^4.2.0",
|
||||
"gtoken": "^5.0.4",
|
||||
"jws": "^4.0.0",
|
||||
"lru-cache": "^6.0.0"
|
||||
}
|
||||
},
|
||||
"google-gax": {
|
||||
"version": "2.19.0",
|
||||
"resolved": "https://registry.npmjs.org/google-gax/-/google-gax-2.19.0.tgz",
|
||||
"integrity": "sha512-2a6WY+p6YMVMmwXmkRqiLreXx67xHDZhkmflcL8aDUkl1csx9ywxEI01veoDXy6T1l0JJD6zLbl5TIbWimmXrw==",
|
||||
"requires": {
|
||||
"@grpc/grpc-js": "~1.3.0",
|
||||
"@grpc/proto-loader": "^0.6.1",
|
||||
"@types/long": "^4.0.0",
|
||||
"abort-controller": "^3.0.0",
|
||||
"duplexify": "^4.0.0",
|
||||
"fast-text-encoding": "^1.0.3",
|
||||
"google-auth-library": "^7.3.0",
|
||||
"is-stream-ended": "^0.1.4",
|
||||
"node-fetch": "^2.6.1",
|
||||
"object-hash": "^2.1.1",
|
||||
"protobufjs": "^6.10.2",
|
||||
"retry-request": "^4.0.0"
|
||||
}
|
||||
},
|
||||
"jwa": {
|
||||
"version": "2.0.0",
|
||||
"resolved": "https://registry.npmjs.org/jwa/-/jwa-2.0.0.tgz",
|
||||
"integrity": "sha512-jrZ2Qx916EA+fq9cEAeCROWPTfCwi1IVHqT2tapuqLEVVDKFDENFw1oL+MwrTvH6msKxsd1YTDVw6uKEcsrLEA==",
|
||||
"requires": {
|
||||
"buffer-equal-constant-time": "1.0.1",
|
||||
"ecdsa-sig-formatter": "1.0.11",
|
||||
"safe-buffer": "^5.0.1"
|
||||
}
|
||||
},
|
||||
"jws": {
|
||||
"version": "4.0.0",
|
||||
"resolved": "https://registry.npmjs.org/jws/-/jws-4.0.0.tgz",
|
||||
"integrity": "sha512-KDncfTmOZoOMTFG4mBlG0qUIOlc03fmzH+ru6RgYVZhPkyiy/92Owlt/8UEN+a4TXR1FQetfIpJE8ApdvdVxTg==",
|
||||
"requires": {
|
||||
"jwa": "^2.0.0",
|
||||
"safe-buffer": "^5.0.1"
|
||||
}
|
||||
},
|
||||
"lru-cache": {
|
||||
"version": "6.0.0",
|
||||
"resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-6.0.0.tgz",
|
||||
"integrity": "sha512-Jo6dJ04CmSjuznwJSS3pUeWmd/H0ffTlkXXgwZi+eq1UCmqQwCh+eLsYOYCwY991i2Fah4h1BEMCx4qThGbsiA==",
|
||||
"requires": {
|
||||
"yallist": "^4.0.0"
|
||||
}
|
||||
},
|
||||
"yallist": {
|
||||
"version": "4.0.0",
|
||||
"resolved": "https://registry.npmjs.org/yallist/-/yallist-4.0.0.tgz",
|
||||
"integrity": "sha512-3wdGidZyq5PB084XLES5TpOSRA3wjXAlIWMhum2kRcv/41Sn2emQ0dycQW4uZXLejwKvg6EsvbdlVL+FYEct7A=="
|
||||
}
|
||||
}
|
||||
},
|
||||
"@grpc/grpc-js": {
|
||||
"version": "1.3.2",
|
||||
"resolved": "https://registry.npmjs.org/@grpc/grpc-js/-/grpc-js-1.3.2.tgz",
|
||||
|
@ -8199,6 +8327,11 @@
|
|||
"once": "^1.4.0"
|
||||
}
|
||||
},
|
||||
"ent": {
|
||||
"version": "2.2.0",
|
||||
"resolved": "https://registry.npmjs.org/ent/-/ent-2.2.0.tgz",
|
||||
"integrity": "sha1-6WQhkyWiHQX0RGai9obtbOX13R0="
|
||||
},
|
||||
"entities": {
|
||||
"version": "1.1.2",
|
||||
"resolved": "https://registry.npmjs.org/entities/-/entities-1.1.2.tgz",
|
||||
|
@ -10146,6 +10279,11 @@
|
|||
"integrity": "sha512-aI5tKwNTBzOZApHIynaAwecLBv8TlZTEy/P4Sj2SzzAhBrGuI8yGZ0UIXVPQzOHGS+to2mjb04iy6VWt/8+d8A==",
|
||||
"dev": true
|
||||
},
|
||||
"html-tags": {
|
||||
"version": "3.1.0",
|
||||
"resolved": "https://registry.npmjs.org/html-tags/-/html-tags-3.1.0.tgz",
|
||||
"integrity": "sha512-1qYz89hW3lFDEazhjW0yVAV87lw8lVkrJocr72XmBkMKsoSVJCQx3W8BXsC7hO2qAt8BoVjYjtAcZ9perqGnNg=="
|
||||
},
|
||||
"htmlparser2": {
|
||||
"version": "3.10.1",
|
||||
"resolved": "https://registry.npmjs.org/htmlparser2/-/htmlparser2-3.10.1.tgz",
|
||||
|
@ -10889,6 +11027,14 @@
|
|||
"integrity": "sha512-gyPJuv83bHMpocVYoqof5VDiZveEoGoFL8m3BXNb2VW8Xs+rz9kqO8LOQ5DH6EsuvilT1ApazU0pyl+ytbPtlw==",
|
||||
"dev": true
|
||||
},
|
||||
"is-html": {
|
||||
"version": "2.0.0",
|
||||
"resolved": "https://registry.npmjs.org/is-html/-/is-html-2.0.0.tgz",
|
||||
"integrity": "sha512-S+OpgB5i7wzIue/YSE5hg0e5ZYfG3hhpNh9KGl6ayJ38p7ED6wxQLd1TV91xHpcTvw90KMJ9EwN3F/iNflHBVg==",
|
||||
"requires": {
|
||||
"html-tags": "^3.0.0"
|
||||
}
|
||||
},
|
||||
"is-installed-globally": {
|
||||
"version": "0.1.0",
|
||||
"resolved": "https://registry.npmjs.org/is-installed-globally/-/is-installed-globally-0.1.0.tgz",
|
||||
|
@ -13219,6 +13365,11 @@
|
|||
"lodash.toarray": "^4.4.0"
|
||||
}
|
||||
},
|
||||
"node-ensure": {
|
||||
"version": "0.0.0",
|
||||
"resolved": "https://registry.npmjs.org/node-ensure/-/node-ensure-0.0.0.tgz",
|
||||
"integrity": "sha1-7K52QVDemYYexcgQ/V0Jaxg5Mqc="
|
||||
},
|
||||
"node-fetch": {
|
||||
"version": "2.6.1",
|
||||
"resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-2.6.1.tgz",
|
||||
|
@ -16306,6 +16457,15 @@
|
|||
"pinkie-promise": "^2.0.0"
|
||||
}
|
||||
},
|
||||
"pdf-extraction": {
|
||||
"version": "1.0.2",
|
||||
"resolved": "https://registry.npmjs.org/pdf-extraction/-/pdf-extraction-1.0.2.tgz",
|
||||
"integrity": "sha512-wVA4HvsvaNYVAH6wp0Tt5+AUHV3XIGM2KQMlOyblsn0YDSUKtTwCJq87F7vIbBnnKsc3noSpL/Bx/sfB1ZqpLA==",
|
||||
"requires": {
|
||||
"debug": "^3.1.0",
|
||||
"node-ensure": "^0.0.0"
|
||||
}
|
||||
},
|
||||
"pdf-text-extract": {
|
||||
"version": "1.3.1",
|
||||
"resolved": "https://registry.npmjs.org/pdf-text-extract/-/pdf-text-extract-1.3.1.tgz",
|
||||
|
@ -19210,6 +19370,14 @@
|
|||
"readable-stream": "^2.0.2"
|
||||
}
|
||||
},
|
||||
"stream-events": {
|
||||
"version": "1.0.5",
|
||||
"resolved": "https://registry.npmjs.org/stream-events/-/stream-events-1.0.5.tgz",
|
||||
"integrity": "sha512-E1GUzBSgvct8Jsb3v2X15pjzN1tYebtbLaMg+eBOUOAxgbLoSbT2NS91ckc5lJD1KfLjId+jXJRgo0qnV5Nerg==",
|
||||
"requires": {
|
||||
"stubs": "^3.0.0"
|
||||
}
|
||||
},
|
||||
"stream-shift": {
|
||||
"version": "1.0.1",
|
||||
"resolved": "https://registry.npmjs.org/stream-shift/-/stream-shift-1.0.1.tgz",
|
||||
|
@ -19325,6 +19493,11 @@
|
|||
"escape-string-regexp": "^1.0.2"
|
||||
}
|
||||
},
|
||||
"stubs": {
|
||||
"version": "3.0.0",
|
||||
"resolved": "https://registry.npmjs.org/stubs/-/stubs-3.0.0.tgz",
|
||||
"integrity": "sha1-6NK6H6nJBXAwPAMLaQD31fiavls="
|
||||
},
|
||||
"superagent": {
|
||||
"version": "3.8.3",
|
||||
"resolved": "https://registry.npmjs.org/superagent/-/superagent-3.8.3.tgz",
|
||||
|
@ -19611,6 +19784,25 @@
|
|||
}
|
||||
}
|
||||
},
|
||||
"teeny-request": {
|
||||
"version": "7.1.1",
|
||||
"resolved": "https://registry.npmjs.org/teeny-request/-/teeny-request-7.1.1.tgz",
|
||||
"integrity": "sha512-iwY6rkW5DDGq8hE2YgNQlKbptYpY5Nn2xecjQiNjOXWbKzPGUfmeUBCSQbbr306d7Z7U2N0TPl+/SwYRfua1Dg==",
|
||||
"requires": {
|
||||
"http-proxy-agent": "^4.0.0",
|
||||
"https-proxy-agent": "^5.0.0",
|
||||
"node-fetch": "^2.6.1",
|
||||
"stream-events": "^1.0.5",
|
||||
"uuid": "^8.0.0"
|
||||
},
|
||||
"dependencies": {
|
||||
"uuid": {
|
||||
"version": "8.3.2",
|
||||
"resolved": "https://registry.npmjs.org/uuid/-/uuid-8.3.2.tgz",
|
||||
"integrity": "sha512-+NYs2QeMWy+GWFOEm9xnn6HCDp0l7QBD7ml8zLUmJ+93Q5NF0NocErnwkTkXVFNiX3/fpC6afS8Dhb/gz7R7eg=="
|
||||
}
|
||||
}
|
||||
},
|
||||
"temp-dir": {
|
||||
"version": "2.0.0",
|
||||
"resolved": "https://registry.npmjs.org/temp-dir/-/temp-dir-2.0.0.tgz",
|
||||
|
|
|
@ -50,6 +50,7 @@
|
|||
"dependencies": {
|
||||
"@azure/ms-rest-js": "2.5.1",
|
||||
"@google-cloud/pubsub": "^2.13.0",
|
||||
"@google-cloud/translate": "^6.2.6",
|
||||
"@microsoft/microsoft-graph-client": "2.2.1",
|
||||
"@semantic-release/changelog": "5.0.1",
|
||||
"@semantic-release/exec": "5.0.0",
|
||||
|
@ -89,6 +90,7 @@
|
|||
"nexmo": "2.9.1",
|
||||
"node-cron": "3.0.0",
|
||||
"opn": "6.0.0",
|
||||
"pdf-extraction": "^1.0.2",
|
||||
"phone": "2.4.21",
|
||||
"pragmatismo-io-framework": "1.0.20",
|
||||
"prism-media": "1.3.1",
|
||||
|
|
|
@ -62,6 +62,7 @@ const fs = require('fs');
|
|||
const SpeechToTextV1 = require('ibm-watson/speech-to-text/v1');
|
||||
const { IamAuthenticator } = require('ibm-watson/auth');
|
||||
const marked = require('marked');
|
||||
const { Translate } = require('@google-cloud/translate').v2;
|
||||
|
||||
/**
|
||||
* Provides basic services for handling messages and dispatching to back-end
|
||||
|
@ -771,6 +772,28 @@ export class GBConversationalService {
|
|||
}
|
||||
text = text.replace('¿', '');
|
||||
|
||||
if (min.instance.googleProjectId) {
|
||||
// Instantiates a client
|
||||
|
||||
const translate = new Translate({
|
||||
projectId: min.instance.googleProjectId,
|
||||
credentials: { client_email: min.instance.googleClientEmail, private_key: min.instance.googlePrivateKey.replace(/\\n/gm, '\n') }
|
||||
});
|
||||
|
||||
try {
|
||||
|
||||
const [translation] = await translate.translate(text, language);
|
||||
|
||||
return translation;
|
||||
} catch (error) {
|
||||
const msg = `Error calling Google Translator service layer. Error is: ${error}.`;
|
||||
|
||||
return Promise.reject(new Error(msg));
|
||||
}
|
||||
|
||||
}
|
||||
else {
|
||||
|
||||
let options = {
|
||||
method: 'POST',
|
||||
baseUrl: endPoint,
|
||||
|
@ -799,11 +822,12 @@ export class GBConversationalService {
|
|||
|
||||
return results[0].translations[0].text;
|
||||
} catch (error) {
|
||||
const msg = `Error calling Translator service layer. Error is: ${error}.`;
|
||||
const msg = `Error calling MSFT Translator service layer. Error is: ${error}.`;
|
||||
|
||||
return Promise.reject(new Error(msg));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public async prompt(min: GBMinInstance, step: GBDialogStep, text: string) {
|
||||
const user = await min.userProfile.get(step.context, {});
|
||||
|
|
|
@ -65,6 +65,7 @@ import { GuaribasAnswer, GuaribasQuestion, GuaribasSubject } from '../models';
|
|||
import { GBConfigService } from './../../core.gbapp/services/GBConfigService';
|
||||
const request = require('request-promise-native');
|
||||
const textract = require('textract');
|
||||
const pdf = require("pdf-extraction");
|
||||
|
||||
/**
|
||||
* Result for quey on KB data.
|
||||
|
@ -613,12 +614,19 @@ export class KBService implements IGBKBService {
|
|||
const files = await walkPromise(urlJoin(localPath, 'docs'));
|
||||
|
||||
await CollectionUtil.asyncForEach(files, async file => {
|
||||
if (file !== null && file.name.endsWith('.docx')) {
|
||||
let content = await this.getTextFromWord(Path.join(file.root, file.name));
|
||||
|
||||
content = await min.conversationalService.translate(min, content, 'en');
|
||||
let content = null;
|
||||
let filePath = Path.join(file.root, file.name);
|
||||
if (file !== null) {
|
||||
if (file.name.endsWith('.docx')) {
|
||||
content = await this.getTextFromFile(filePath);
|
||||
} else if (file.name.endsWith('.pdf')) {
|
||||
const read = await pdf(Fs.readFileSync(filePath));
|
||||
content = read.text;
|
||||
}
|
||||
}
|
||||
|
||||
if (content) {
|
||||
content = await min.conversationalService.translate(min, content, 'en');
|
||||
await GuaribasAnswer.create({
|
||||
instanceId: instance.instanceId,
|
||||
content: content,
|
||||
|
@ -627,7 +635,7 @@ export class KBService implements IGBKBService {
|
|||
packageId: packageId
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
});
|
||||
}
|
||||
|
||||
|
@ -762,7 +770,7 @@ export class KBService implements IGBKBService {
|
|||
return await request.post(options);
|
||||
}
|
||||
|
||||
private async getTextFromWord(filename: string) {
|
||||
private async getTextFromFile(filename: string) {
|
||||
return new Promise<string>(async (resolve, reject) => {
|
||||
textract.fromFileWithPath(filename, { preserveLineBreaks: true }, (error, text) => {
|
||||
if (error) {
|
||||
|
|
Loading…
Add table
Reference in a new issue