Speech upgrade (#427)

gguuss · web-flow · commit c330971d3757 · 2017-07-19T15:19:15.000-04:00
* Upgrades speech client to semi-gapic
* Adds library with streaming fixes
diff --git a/speech/package.json b/speech/package.json
@@ -18,14 +18,14 @@
     "test": "npm run system-test"
   },
   "dependencies": {
-    "@google-cloud/speech": "0.9.3",
-    "@google-cloud/storage": "1.1.1",
+    "@google-cloud/speech": "^0.10.1",
+    "@google-cloud/storage": "^1.2.0",
     "node-record-lpcm16": "0.3.0",
-    "yargs": "8.0.2"
+    "yargs": "^8.0.2"
   },
   "devDependencies": {
-    "@google-cloud/nodejs-repo-tools": "1.4.15",
-    "ava": "0.19.1",
+    "@google-cloud/nodejs-repo-tools": "^1.4.15",
+    "ava": "^0.19.1",
     "proxyquire": "1.8.0",
     "sinon": "2.3.4"
   },
diff --git a/speech/quickstart.js b/speech/quickstart.js
@@ -18,9 +18,10 @@
 // [START speech_quickstart]
 // Imports the Google Cloud client library
 const Speech = require('@google-cloud/speech');
+const fs = require('fs');
 
 // Your Google Cloud Platform project ID
-const projectId = 'YOUR_PROJECT_ID';
+const projectId = 'your-project-id';
 
 // Instantiates a client
 const speechClient = Speech({
@@ -30,17 +31,28 @@ const speechClient = Speech({
 // The name of the audio file to transcribe
 const fileName = './resources/audio.raw';
 
+// Reads a local audio file and converts it to base64
+const file = fs.readFileSync(fileName);
+const audioBytes = file.toString('base64');
+
 // The audio file's encoding, sample rate in hertz, and BCP-47 language code
-const options = {
+const audio = {
+  content: audioBytes
+};
+const config = {
   encoding: 'LINEAR16',
   sampleRateHertz: 16000,
   languageCode: 'en-US'
 };
+const request = {
+  audio: audio,
+  config: config
+};
 
 // Detects speech in the audio file
-speechClient.recognize(fileName, options)
+speechClient.recognize(request)
   .then((results) => {
-    const transcription = results[0];
+    const transcription = results[0].results[0].alternatives[0].transcript;
     console.log(`Transcription: ${transcription}`);
   })
   .catch((err) => {
diff --git a/speech/recognize.js b/speech/recognize.js
@@ -26,6 +26,7 @@
 function syncRecognize (filename, encoding, sampleRateHertz, languageCode) {
   // [START speech_sync_recognize]
   // Imports the Google Cloud client library
+  const fs = require('fs');
   const Speech = require('@google-cloud/speech');
 
   // Instantiates a client
@@ -43,18 +44,25 @@ function syncRecognize (filename, encoding, sampleRateHertz, languageCode) {
   // The BCP-47 language code to use, e.g. 'en-US'
   // const languageCode = 'en-US';
 
-  const request = {
+  const config = {
     encoding: encoding,
     sampleRateHertz: sampleRateHertz,
     languageCode: languageCode
   };
+  const audio = {
+    content: fs.readFileSync(filename).toString('base64')
+  };
+
+  const request = {
+    config: config,
+    audio: audio
+  };
 
   // Detects speech in the audio file
-  speech.recognize(filename, request)
+  speech.recognize(request)
     .then((results) => {
-      const transcription = results[0];
-
-      console.log(`Transcription: ${transcription}`);
+      const transcription = results[0].results[0].alternatives[0].transcript;
+      console.log(`Transcription: `, transcription);
     })
     .catch((err) => {
       console.error('ERROR:', err);
@@ -82,18 +90,25 @@ function syncRecognizeGCS (gcsUri, encoding, sampleRateHertz, languageCode) {
   // The BCP-47 language code to use, e.g. 'en-US'
   // const languageCode = 'en-US';
 
-  const request = {
+  const config = {
     encoding: encoding,
     sampleRateHertz: sampleRateHertz,
     languageCode: languageCode
   };
+  const audio = {
+    uri: gcsUri
+  };
+
+  const request = {
+    config: config,
+    audio: audio
+  };
 
   // Detects speech in the audio file
-  speech.recognize(gcsUri, request)
+  speech.recognize(request)
     .then((results) => {
-      const transcription = results[0];
-
-      console.log(`Transcription: ${transcription}`);
+      const transcription = results[0].results[0].alternatives[0].transcript;
+      console.log(`Transcription: `, transcription);
     })
     .catch((err) => {
       console.error('ERROR:', err);
@@ -105,6 +120,7 @@ function asyncRecognize (filename, encoding, sampleRateHertz, languageCode) {
   // [START speech_async_recognize]
   // Imports the Google Cloud client library
   const Speech = require('@google-cloud/speech');
+  const fs = require('fs');
 
   // Instantiates a client
   const speech = Speech();
@@ -121,22 +137,30 @@ function asyncRecognize (filename, encoding, sampleRateHertz, languageCode) {
   // The BCP-47 language code to use, e.g. 'en-US'
   // const languageCode = 'en-US';
 
-  const request = {
+  const config = {
     encoding: encoding,
     sampleRateHertz: sampleRateHertz,
     languageCode: languageCode
   };
+  const audio = {
+    content: fs.readFileSync(filename).toString('base64')
+  };
+
+  const request = {
+    config: config,
+    audio: audio
+  };
 
   // Detects speech in the audio file. This creates a recognition job that you
   // can wait for now, or get its result later.
-  speech.startRecognition(filename, request)
+  speech.longRunningRecognize(request)
     .then((results) => {
       const operation = results[0];
       // Get a Promise representation of the final result of the job
       return operation.promise();
     })
     .then((results) => {
-      const transcription = results[0];
+      const transcription = results[0].results[0].alternatives[0].transcript;
       console.log(`Transcription: ${transcription}`);
     })
     .catch((err) => {
@@ -165,22 +189,31 @@ function asyncRecognizeGCS (gcsUri, encoding, sampleRateHertz, languageCode) {
   // The BCP-47 language code to use, e.g. 'en-US'
   // const languageCode = 'en-US';
 
-  const request = {
+  const config = {
     encoding: encoding,
     sampleRateHertz: sampleRateHertz,
     languageCode: languageCode
   };
 
+  const audio = {
+    uri: gcsUri
+  };
+
+  const request = {
+    config: config,
+    audio: audio
+  };
+
   // Detects speech in the audio file. This creates a recognition job that you
   // can wait for now, or get its result later.
-  speech.startRecognition(gcsUri, request)
+  speech.longRunningRecognize(request)
     .then((results) => {
       const operation = results[0];
       // Get a Promise representation of the final result of the job
       return operation.promise();
     })
     .then((results) => {
-      const transcription = results[0];
+      const transcription = results[0].results[0].alternatives[0].transcript;
       console.log(`Transcription: ${transcription}`);
     })
     .catch((err) => {
@@ -221,10 +254,11 @@ function streamingRecognize (filename, encoding, sampleRateHertz, languageCode)
   };
 
   // Stream the audio to the Google Cloud Speech API
-  const recognizeStream = speech.createRecognizeStream(request)
+  const recognizeStream = speech.streamingRecognize(request)
     .on('error', console.error)
     .on('data', (data) => {
-      console.log(`Transcription: ${data.results}`);
+      console.log(
+          `Transcription: ${data.results[0].alternatives[0].transcript}`);
     });
 
   // Stream an audio file from disk to the Speech API, e.g. "./resources/audio.raw"
@@ -261,9 +295,13 @@ function streamingMicRecognize (encoding, sampleRateHertz, languageCode) {
   };
 
   // Create a recognize stream
-  const recognizeStream = speech.createRecognizeStream(request)
+  const recognizeStream = speech.streamingRecognize(request)
     .on('error', console.error)
-    .on('data', (data) => process.stdout.write(data.results));
+    .on('data', (data) =>
+        process.stdout.write(
+          (data.results[0] && data.results[0].alternatives[0])
+            ? `Transcription: ${data.results[0].alternatives[0].transcript}\n`
+            : `\n\nReached transcription time limit, press Ctrl+C\n`));
 
   // Start recording and send the microphone input to the Speech API
   record
diff --git a/speech/system-test/quickstart.test.js b/speech/system-test/quickstart.test.js
@@ -16,57 +16,20 @@
 'use strict';
 
 const path = require(`path`);
-const proxyquire = require(`proxyquire`).noPreserveCache();
-const sinon = require(`sinon`);
-const speech = proxyquire(`@google-cloud/speech`, {})();
 const test = require(`ava`);
 
+const cmd = `node quickstart.js`;
+const cwd = path.join(__dirname, `..`);
+const text = `how old is the Brooklyn Bridge`;
+
 const {
-  checkCredentials,
-  stubConsole,
-  restoreConsole
+  runAsync
 } = require(`@google-cloud/nodejs-repo-tools`);
 
-const fileName = path.join(__dirname, `../resources/audio.raw`);
-const config = {
-  encoding: `LINEAR16`,
-  sampleRateHertz: 16000,
-  languageCode: `en-US`
-};
-
-test.before(checkCredentials);
-test.before(stubConsole);
-test.after.always(restoreConsole);
-
-test.cb(`should detect speech`, (t) => {
-  const expectedFileName = `./resources/audio.raw`;
-  const expectedText = `how old is the Brooklyn Bridge`;
-
-  const speechMock = {
-    recognize: (_fileName, _config) => {
-      t.is(_fileName, expectedFileName);
-      t.deepEqual(_config, config);
-
-      return speech.recognize(fileName, config)
-        .then(([transcription]) => {
-          t.is(transcription, expectedText);
-
-          setTimeout(() => {
-            try {
-              t.is(console.log.callCount, 1);
-              t.deepEqual(console.log.getCall(0).args, [`Transcription: ${expectedText}`]);
-              t.end();
-            } catch (err) {
-              t.end(err);
-            }
-          }, 200);
-
-          return [transcription];
-        });
-    }
-  };
+test.before(async () => {
+});
 
-  proxyquire(`../quickstart`, {
-    '@google-cloud/speech': sinon.stub().returns(speechMock)
-  });
+test(`should run quickstart`, async (t) => {
+  const output = await runAsync(`${cmd}`, cwd);
+  t.true(output.includes(`Transcription: ${text}`));
 });
diff --git a/speech/system-test/recognize.test.js b/speech/system-test/recognize.test.js
@@ -45,12 +45,12 @@ test.after.always(async () => {
 
 test(`should run sync recognize`, async (t) => {
   const output = await runAsync(`${cmd} sync ${filepath}`, cwd);
-  t.true(output.includes(`Transcription: ${text}`));
+  t.true(output.includes(`Transcription:  ${text}`));
 });
 
 test(`should run sync recognize on a GCS file`, async (t) => {
   const output = await runAsync(`${cmd} sync-gcs gs://${bucketName}/${filename}`, cwd);
-  t.true(output.includes(`Transcription: ${text}`));
+  t.true(output.includes(`Transcription:  ${text}`));
 });
 
 test(`should run async recognize on a local file`, async (t) => {