fix(core.gbapp): Added IBM for TTS and removed MSFT while it does not compile on AZR.

This commit is contained in:
Rodrigo Rodriguez 2021-12-29 13:03:32 -03:00
parent c12a7d736c
commit 4dbc1eb6a0

View file

@ -60,6 +60,7 @@ const prism = require('prism-media');
const request = require('request-promise-native');
const fs = require('fs');
const SpeechToTextV1 = require('ibm-watson/speech-to-text/v1');
const TextToSpeechV1 = require('ibm-watson/text-to-speech/v1');
const { IamAuthenticator } = require('ibm-watson/auth');
const marked = require('marked');
const { Translate } = require('@google-cloud/translate').v2;
@ -297,26 +298,29 @@ export class GBConversationalService {
const name = GBAdminService.getRndReadableIdentifier();
const waveFilename = `work/tmp${name}.pcm`;
const sdk = require('microsoft-cognitiveservices-speech-sdk');
sdk.Recognizer.enableTelemetry(false);
var audioConfig = sdk.AudioConfig.fromAudioFileOutput(waveFilename);
var speechConfig = sdk.SpeechConfig.fromSubscription(speechKey, cloudRegion);
var synthesizer = new sdk.SpeechSynthesizer(speechConfig, audioConfig);
try {
speechConfig.speechSynthesisLanguage = locale;
speechConfig.speechSynthesisVoiceName = 'pt-BR-FranciscaNeural';
synthesizer.speakTextAsync(text, result => {
if (result.reason === sdk.ResultReason.SynthesizingAudioCompleted) {
let raw = Buffer.from(result.audioData);
fs.writeFileSync(waveFilename, raw);
GBLog.info(`Audio data byte size: ${result.audioData.byteLength}.`);
const textToSpeech = new TextToSpeechV1({
authenticator: new IamAuthenticator({ apikey: process.env.WATSON_TTS_KEY }),
url: process.env.WATSON_STT_URL
});
const params = {
text: text,
accept: 'audio/l16; rate=44100',
voice: 'pt-BR_IsabelaV3Voice'
};
// Migrated to IBM from MSFT, as it own package do not compile on Azure Web App.
let buffer = await textToSpeech.synthesize(params);
fs.writeFileSync(waveFilename, buffer);
GBLog.info(`Audio data byte size: ${buffer.byteLength}.`);
// Converts to OGG.
const oggFilenameOnly = `tmp${name}.ogg`;
const oggFilename = `work/${oggFilenameOnly}`;
const output = fs.createWriteStream(oggFilename);
const transcoder = new prism.FFmpeg({
args: ['-analyzeduration', '0', '-loglevel', '0', '-f', 'opus', '-ar', '16000', '-ac', '1']
@ -326,13 +330,7 @@ export class GBConversationalService {
let url = urlJoin(GBServer.globals.publicAddress, 'audios', oggFilenameOnly);
resolve(url);
} else {
const error = 'Speech synthesis canceled, ' + result.errorDetails;
reject(error);
}
synthesizer.close();
synthesizer = undefined;
});
} catch (error) {
reject(error);
}