如何使用 Node.js 通过 Google Cloud Speech-to-Text API 实现语音识别?

问题描述 投票:0回答:1

我是 Google Cloud 新手, 我最近创建了一个带有 Speech-to-Text API 的项目。 创建项目并将其链接到计费帐户后,我访问了 Node.js 控制台教程以快速入门

按照控制台教程中的所有步骤(并处理教程本身代码中的一些错误)并运行代码后,我收到以下错误:

node:internal/process/promises:289
            triggerUncaughtException(err, true /* fromPromise */);
            ^

Error: 3 INVALID_ARGUMENT: Invalid resource field value in the request.
    at callErrorFromStatus (/home/<Google-Account>/speech-to-text-nodejs/node_modules/@grpc/grpc-js/build/src/call.js:31:19)
    at Object.onReceiveStatus (/home/<Google-Account>/speech-to-text-nodejs/node_modules/@grpc/grpc-js/build/src/client.js:192:76)
    at Object.onReceiveStatus (/home/<Google-Account>/speech-to-text-nodejs/node_modules/@grpc/grpc-js/build/src/client-interceptors.js:360:141)
    at Object.onReceiveStatus (/home/<Google-Account>/speech-to-text-nodejs/node_modules/@grpc/grpc-js/build/src/client-interceptors.js:323:181)
    at /home/<Google-Account>/speech-to-text-nodejs/node_modules/@grpc/grpc-js/build/src/resolving-call.js:99:78
    at process.processTicksAndRejections (node:internal/process/task_queues:77:11)
for call at
    at ServiceClientImpl.makeUnaryRequest (/home/<Google-Account>/speech-to-text-nodejs/node_modules/@grpc/grpc-js/build/src/client.js:160:32)
    at ServiceClientImpl.<anonymous> (/home/<Google-Account>/speech-to-text-nodejs/node_modules/@grpc/grpc-js/build/src/make-client.js:105:19)
    at /home/<Google-Account>/speech-to-text-nodejs/node_modules/@google-cloud/speech/build/src/v2/speech_client.js:318:29
    at /home/<Google-Account>/speech-to-text-nodejs/node_modules/google-gax/build/src/normalCalls/timeout.js:44:16
    at repeat (/home/<Google-Account>/speech-to-text-nodejs/node_modules/google-gax/build/src/normalCalls/retries.js:80:25)
    at /home/<Google-Account>/speech-to-text-nodejs/node_modules/google-gax/build/src/normalCalls/retries.js:119:13
    at OngoingCallPromise.call (/home/<Google-Account>/speech-to-text-nodejs/node_modules/google-gax/build/src/call.js:67:27)
    at NormalApiCaller.call (/home/<Google-Account>/speech-to-text-nodejs/node_modules/google-gax/build/src/normalCalls/normalApiCaller.js:34:19)
    at /home/<Google-Account>/speech-to-text-nodejs/node_modules/google-gax/build/src/createApiCall.js:108:30
    at process.processTicksAndRejections (node:internal/process/task_queues:95:5) {
  code: 3,
  details: 'Invalid resource field value in the request.',
  metadata: Metadata {
    internalRepr: Map(2) {
      'google.rpc.errorinfo-bin' => [
        Buffer(127) [Uint8Array] [
           10,  24,  82,  69,  83,  79,  85,  82,  67,  69,  95,  80,
           82,  79,  74,  69,  67,  84,  95,  73,  78,  86,  65,  76,
           73,  68,  18,  14, 103, 111, 111, 103, 108, 101,  97, 112,
          105, 115,  46,  99, 111, 109,  26,  32,  10,   7, 115, 101,
          114, 118, 105,  99, 101,  18,  21, 115, 112, 101, 101,  99,
          104,  46, 103, 111, 111, 103, 108, 101,  97, 112, 105, 115,
           46,  99, 111, 109,  26,  49,  10,   6, 109, 101, 116, 104,
          111, 100,  18,  39, 103, 111, 111, 103, 108, 101,  46,  99,
          108, 111, 117, 100,
          ... 27 more items
        ]
      ],
      'grpc-status-details-bin' => [
        Buffer(222) [Uint8Array] [
            8,   3,  18,  44,  73, 110, 118,  97, 108, 105, 100,  32,
          114, 101, 115, 111, 117, 114,  99, 101,  32, 102, 105, 101,
          108, 100,  32, 118,  97, 108, 117, 101,  32, 105, 110,  32,
          116, 104, 101,  32, 114, 101, 113, 117, 101, 115, 116,  46,
           26, 171,   1,  10,  40, 116, 121, 112, 101,  46, 103, 111,
          111, 103, 108, 101,  97, 112, 105, 115,  46,  99, 111, 109,
           47, 103, 111, 111, 103, 108, 101,  46, 114, 112,  99,  46,
           69, 114, 114, 111, 114,  73, 110, 102, 111,  18, 127,  10,
           24,  82,  69,  83,
          ... 122 more items
        ]
      ]
    },
    options: {}
  },
  note: 'Exception occurred in retry method that was not classified as transient',
  statusDetails: [
    ErrorInfo {
      metadata: {
        service: 'speech.googleapis.com',
        method: 'google.cloud.speech.v2.Speech.Recognize'
      },
      reason: 'RESOURCE_PROJECT_INVALID',
      domain: 'googleapis.com'
    }
  ],
  reason: 'RESOURCE_PROJECT_INVALID',
  domain: 'googleapis.com',
  errorInfoMetadata: {
    service: 'speech.googleapis.com',
    method: 'google.cloud.speech.v2.Speech.Recognize'
  }
}

这是在我设置用于配额目的的项目之后(在终端中使用

gcloud auth application-default set-quota-project $PROJECT_ID
)。

我还通过 gcloud CLI(在 Windows 上)执行了相同的步骤,但它返回了相同的错误。

有什么我忘记做的事吗?或者代码中有什么错误?也许这与我正在使用 API 版本 2 并且代码经过优化以仅适用于 API 版本 1 的事实有关?

提前非常感谢!

可复用的Node.js代码(遇到错误后,我对教程的原始代码进行了一些更改,并且在无法从 uri 中获取音频文件的数据后,我还更改了获取音频文件数据的方式)存储桶,按照 GCP 的惯例):

const speech = require('@google-cloud/speech').v2;
const https = require('https');

const client = new speech.SpeechClient();
const projectId = '<ProjectId>';
var recognizerName;

const fileUrl = 'https://storage.googleapis.com/cloud-samples-data/speech/brooklyn_bridge.wav';

async function createRecognizer() {
  const recognizerRequest = {
    parent: `projects/${projectId}/locations/global`,
    recognizerId: 'en123',
    recognizer: {
      languageCodes: ['en-US'],
      model: 'latest_long',
    },
  };

  const operation = await client.createRecognizer(recognizerRequest);
  const recognizer = operation[0].result;
  recognizerName = recognizer.name;
  console.log(`Created new recognizer: ${recognizerName}`);
}

async function getUrlData(url) {
  return new Promise((resolve) => {
      https.get(url, response => {
          var body = '';
          var i = 0;
          response.on('data', chunk => {
              i++;
              body += chunk;
          });
          response.on('end', () => {
              resolve(Buffer.from(body).toString('base64'));
          });
      });
  });
}

async function transcribeFile() {
  const content = await getUrlData(fileUrl);
  const transcriptionRequest = {
    recognizer: recognizerName,
    config: {
      autoDecodingConfig: {},
    },
    content: content,
  };

  const response = await client.recognize(transcriptionRequest); // <=== The error occurs on this line
  for (const result of response[0].results) {
    console.log(`Transcript: ${result.alternatives[0].transcript}`);
  }
}

createRecognizer();
transcribeFile();

更新:

查看这个问题中的(提问者)代码后,我意识到我对

recognizerName
的定义感到困惑,并且仅使用了
recognizerId
的字符串,而不是使用识别器的完整路径(
projects/${projectId}/locations/global/recognizers/${recognizerId} 
),所以目前我的代码如下所示:

const projectId = '<ProjectId>';
const recognizerId = 'en123';
const fileUrl = 'https://storage.googleapis.com/cloud-samples-data/speech/brooklyn_bridge.wav';

// ...

async function transcribeFile() {
  const recognizerName = `projects/${projectId}/locations/global/recognizers/${recognizerId}`;
  const content = await getUrlData(fileUrl);
  const transcriptionRequest = {
    recognizer: recognizerName,
    config: {
      autoDecodingConfig: {},
    },
    content: content,
  };

  const response = await client.recognize(transcriptionRequest); // <=== The error occurs on this line
  for (const result of response[0].results) {
    console.log(`Transcript: ${result.alternatives[0].transcript}`);
  }
}

createRecognizer();
transcribeFile();

但是现在我收到另一个错误:

Error: 3 INVALID_ARGUMENT: Audio data does not appear to be in a supported encoding. If you believe this to be incorrect, try explicitly specifying the decoding parameters.
    at callErrorFromStatus (/home/<Google-Account>/speech-to-text-nodejs/node_modules/@grpc/grpc-js/build/src/call.js:31:19)
    at Object.onReceiveStatus (/home/<Google-Account>/speech-to-text-nodejs/node_modules/@grpc/grpc-js/build/src/client.js:192:76)
    at Object.onReceiveStatus (/home/<Google-Account>/speech-to-text-nodejs/node_modules/@grpc/grpc-js/build/src/client-interceptors.js:360:141)
    at Object.onReceiveStatus (/home/<Google-Account>/speech-to-text-nodejs/node_modules/@grpc/grpc-js/build/src/client-interceptors.js:323:181)
    at /home/<Google-Account>/speech-to-text-nodejs/node_modules/@grpc/grpc-js/build/src/resolving-call.js:99:78
    at process.processTicksAndRejections (node:internal/process/task_queues:77:11)
for call at
    at ServiceClientImpl.makeUnaryRequest (/home/<Google-Account>/speech-to-text-nodejs/node_modules/@grpc/grpc-js/build/src/client.js:160:32)
    at ServiceClientImpl.<anonymous> (/home/<Google-Account>/speech-to-text-nodejs/node_modules/@grpc/grpc-js/build/src/make-client.js:105:19)
    at /home/<Google-Account>/speech-to-text-nodejs/node_modules/@google-cloud/speech/build/src/v2/speech_client.js:318:29
    at /home/<Google-Account>/speech-to-text-nodejs/node_modules/google-gax/build/src/normalCalls/timeout.js:44:16
    at repeat (/home/<Google-Account>/speech-to-text-nodejs/node_modules/google-gax/build/src/normalCalls/retries.js:80:25)
    at /home/<Google-Account>/speech-to-text-nodejs/node_modules/google-gax/build/src/normalCalls/retries.js:119:13
    at OngoingCallPromise.call (/home/<Google-Account>/speech-to-text-nodejs/node_modules/google-gax/build/src/call.js:67:27)
    at NormalApiCaller.call (/home/<Google-Account>/speech-to-text-nodejs/node_modules/google-gax/build/src/normalCalls/normalApiCaller.js:34:19)
    at /home/<Google-Account>/speech-to-text-nodejs/node_modules/google-gax/build/src/createApiCall.js:108:30
    at process.processTicksAndRejections (node:internal/process/task_queues:95:5) {
  code: 3,
  details: 'Audio data does not appear to be in a supported encoding. If you believe this to be incorrect, try explicitly specifying the decoding parameters.',
  metadata: Metadata { internalRepr: Map(0) {}, options: {} },
  note: 'Exception occurred in retry method that was not classified as transient'
}
node.js google-cloud-platform credentials google-speech-api google-cloud-speech
1个回答
0
投票

我成功解决了它!

当请求的文件采用没有编解码器的格式(在本例中为

.raw
)时,会出现错误 Audio data does not appear to be in a supported encoding. If you believe this to be incorrect, try explicitly specifying the decoding parameters,因此 Speech to Text API 不知道如何对其进行编码。

为了解决这个问题,我必须在创建识别器时(在

explicitDecodingConfig
属性内)或稍后在
defaultRecognitionConfig
(在
transcriptionRequest
属性内)传递一个
config
对象。

const speech = require('@google-cloud/speech').v2;

const projectId = '<ProjectId>';
const recognizerId = 'en123';

const client = new speech.SpeechClient();

async function createRecognizer() {
  const recognizerRequest = {
    parent: `projects/${projectId}/locations/global`,
    recognizerId,
    recognizer: {
      defaultRecognitionConfig: {
        explicitDecodingConfig: {
          encoding: 'LINEAR16',
          sampleRateHertz: 16000,
          audioChannelCount: 1
        }
      },
      languageCodes: ['en-US'],
      model: 'latest_long',
    },
  };

  const operation = await client.createRecognizer(recognizerRequest);
  const recognizer = operation[0].result;
  const recognizerName = recognizer.name;
  console.log(`Created new recognizer: ${recognizerName}`);
}

async function transcribeFile() {
  const recognizerName = `projects/${projectId}/locations/global/recognizers/${recognizerId}`;
  const gcsUri = 'gs://cloud-samples-data/speech/brooklyn_bridge.raw';

  const transcriptionRequest = {
    recognizer: recognizerName,
    uri: gcsUri,
    config: {
      explicitDecodingConfig: {   // If you didn't set explicitDecodingConfig when creating the recognizer, you have to set it here.
        encoding: 'LINEAR16',
        sampleRateHertz: 16000,
        audioChannelCount: 1
      }
    },
  }

  const response = await client.recognize(transcriptionRequest);
  for (const result of response[0].results) {
    console.log(`Transcript: ${result.alternatives[0].transcript}`);
  }
}

(async function main() {
  await createRecognizer();
  await transcribeFile();
})();
© www.soinside.com 2019 - 2024. All rights reserved.