fix(core.gbapp): Added IBM for TTS and removed MSFT while it does not compile on AZR.

2021-12-29 13:03:32 -03:00 · 2021-12-29 13:03:32 -03:00 · 4dbc1eb6a0
commit 4dbc1eb6a0
parent c12a7d736c
1 changed files with 34 additions and 36 deletions
--- a/packages/core.gbapp/services/GBConversationalService.ts
+++ b/packages/core.gbapp/services/GBConversationalService.ts
@ -60,6 +60,7 @@ const prism = require('prism-media');
 const request = require('request-promise-native');
 const fs = require('fs');
 const SpeechToTextV1 = require('ibm-watson/speech-to-text/v1');
+const TextToSpeechV1 = require('ibm-watson/text-to-speech/v1');
 const { IamAuthenticator } = require('ibm-watson/auth');
 const marked = require('marked');
 const { Translate } = require('@google-cloud/translate').v2;
@ -297,26 +298,29 @@ export class GBConversationalService {
      const name = GBAdminService.getRndReadableIdentifier();

      const waveFilename = `work/tmp${name}.pcm`;
-      const sdk = require('microsoft-cognitiveservices-speech-sdk');
-      sdk.Recognizer.enableTelemetry(false);
-
-      var audioConfig = sdk.AudioConfig.fromAudioFileOutput(waveFilename);
-      var speechConfig = sdk.SpeechConfig.fromSubscription(speechKey, cloudRegion);
-
-      var synthesizer = new sdk.SpeechSynthesizer(speechConfig, audioConfig);
-
      try {
-        speechConfig.speechSynthesisLanguage = locale;
-        speechConfig.speechSynthesisVoiceName = 'pt-BR-FranciscaNeural';

-        synthesizer.speakTextAsync(text, result => {
-          if (result.reason === sdk.ResultReason.SynthesizingAudioCompleted) {
-            let raw = Buffer.from(result.audioData);
-            fs.writeFileSync(waveFilename, raw);
-            GBLog.info(`Audio data byte size: ${result.audioData.byteLength}.`);
+        const textToSpeech = new TextToSpeechV1({
+          authenticator: new IamAuthenticator({ apikey: process.env.WATSON_TTS_KEY }),
+          url: process.env.WATSON_STT_URL
+        });
+
+        const params = {
+          text: text,
+          accept: 'audio/l16; rate=44100',
+          voice: 'pt-BR_IsabelaV3Voice'
+        };
+
+        // Migrated to IBM from MSFT, as it own package do not compile on Azure Web App.
+
+        let buffer = await textToSpeech.synthesize(params);
+        fs.writeFileSync(waveFilename, buffer);
+        GBLog.info(`Audio data byte size: ${buffer.byteLength}.`);
+
+        // Converts to OGG.
+
        const oggFilenameOnly = `tmp${name}.ogg`;
        const oggFilename = `work/${oggFilenameOnly}`;
-
        const output = fs.createWriteStream(oggFilename);
        const transcoder = new prism.FFmpeg({
          args: ['-analyzeduration', '0', '-loglevel', '0', '-f', 'opus', '-ar', '16000', '-ac', '1']
@ -326,13 +330,7 @@ export class GBConversationalService {

        let url = urlJoin(GBServer.globals.publicAddress, 'audios', oggFilenameOnly);
        resolve(url);
-          } else {
-            const error = 'Speech synthesis canceled, ' + result.errorDetails;
-            reject(error);
-          }
-          synthesizer.close();
-          synthesizer = undefined;
-        });
+
      } catch (error) {
        reject(error);
      }