Synchronize new proto/yaml changes.

PiperOrigin-RevId: 265786496
pull/568/head
Google APIs 6 years ago committed by Copybara-Service
parent d42eb8b611
commit 49f953989b
  1. 3
      google/cloud/speech/artman_speech_v1.yaml
  2. 3
      google/cloud/speech/artman_speech_v1p1beta1.yaml
  3. 5
      google/cloud/speech/sample_resources.yaml
  4. 47
      google/cloud/speech/v1/samples/speech_transcribe_async.yaml
  5. 48
      google/cloud/speech/v1/samples/speech_transcribe_async_gcs.yaml
  6. 64
      google/cloud/speech/v1/samples/speech_transcribe_async_word_time_offsets_gcs.yaml
  7. 53
      google/cloud/speech/v1/samples/speech_transcribe_enhanced_model.yaml
  8. 45
      google/cloud/speech/v1/samples/speech_transcribe_model_selection.yaml
  9. 43
      google/cloud/speech/v1/samples/speech_transcribe_model_selection_gcs.yaml
  10. 53
      google/cloud/speech/v1/samples/speech_transcribe_multichannel.yaml
  11. 54
      google/cloud/speech/v1/samples/speech_transcribe_multichannel_gcs.yaml
  12. 47
      google/cloud/speech/v1/samples/speech_transcribe_sync.yaml
  13. 48
      google/cloud/speech/v1/samples/speech_transcribe_sync_gcs.yaml
  14. 2
      google/cloud/speech/v1/samples/test/speech_transcribe_async.test.yaml
  15. 2
      google/cloud/speech/v1/samples/test/speech_transcribe_async_gcs.test.yaml
  16. 2
      google/cloud/speech/v1/samples/test/speech_transcribe_async_word_time_offsets_gcs.test.yaml
  17. 2
      google/cloud/speech/v1/samples/test/speech_transcribe_enhanced_model.test.yaml
  18. 2
      google/cloud/speech/v1/samples/test/speech_transcribe_model_selection.test.yaml
  19. 2
      google/cloud/speech/v1/samples/test/speech_transcribe_model_selection_gcs.test.yaml
  20. 2
      google/cloud/speech/v1/samples/test/speech_transcribe_multichannel.test.yaml
  21. 2
      google/cloud/speech/v1/samples/test/speech_transcribe_multichannel_gcs.test.yaml
  22. 2
      google/cloud/speech/v1/samples/test/speech_transcribe_sync.test.yaml
  23. 2
      google/cloud/speech/v1/samples/test/speech_transcribe_sync_gcs.test.yaml
  24. 407
      google/cloud/speech/v1/speech_gapic.yaml
  25. 49
      google/cloud/speech/v1p1beta1/samples/speech_adaptation_beta.yaml
  26. 43
      google/cloud/speech/v1p1beta1/samples/speech_contexts_classes_beta.yaml
  27. 35
      google/cloud/speech/v1p1beta1/samples/speech_quickstart_beta.yaml
  28. 47
      google/cloud/speech/v1p1beta1/samples/speech_transcribe_auto_punctuation_beta.yaml
  29. 61
      google/cloud/speech/v1p1beta1/samples/speech_transcribe_diarization_beta.yaml
  30. 55
      google/cloud/speech/v1p1beta1/samples/speech_transcribe_multilanguage_beta.yaml
  31. 61
      google/cloud/speech/v1p1beta1/samples/speech_transcribe_recognition_metadata_beta.yaml
  32. 59
      google/cloud/speech/v1p1beta1/samples/speech_transcribe_word_level_confidence_beta.yaml
  33. 2
      google/cloud/speech/v1p1beta1/samples/test/speech_adaptation_beta.test.yaml
  34. 2
      google/cloud/speech/v1p1beta1/samples/test/speech_contexts_classes_beta.test.yaml
  35. 2
      google/cloud/speech/v1p1beta1/samples/test/speech_quickstart_beta.test.yaml
  36. 4
      google/cloud/speech/v1p1beta1/samples/test/speech_transcribe_auto_punctuation_beta.test.yaml
  37. 2
      google/cloud/speech/v1p1beta1/samples/test/speech_transcribe_diarization_beta.test.yaml
  38. 2
      google/cloud/speech/v1p1beta1/samples/test/speech_transcribe_multilanguage_beta.test.yaml
  39. 2
      google/cloud/speech/v1p1beta1/samples/test/speech_transcribe_recognition_metadata_beta.test.yaml
  40. 2
      google/cloud/speech/v1p1beta1/samples/test/speech_transcribe_word_level_confidence_beta.test.yaml
  41. 340
      google/cloud/speech/v1p1beta1/speech_gapic.yaml

@ -3,11 +3,12 @@ common:
api_version: v1
organization_name: google-cloud
proto_deps:
- name: google-common-protos
- name: google-common-protos
src_proto_paths:
- v1
service_yaml: speech_v1.yaml
gapic_yaml: v1/speech_gapic.yaml
samples: v1/samples
artifacts:
- name: gapic_config
type: GAPIC_CONFIG

@ -3,11 +3,12 @@ common:
api_version: v1p1beta1
organization_name: google-cloud
proto_deps:
- name: google-common-protos
- name: google-common-protos
src_proto_paths:
- v1p1beta1
service_yaml: speech_v1p1beta1.yaml
gapic_yaml: v1p1beta1/speech_gapic.yaml
samples: v1p1beta1/samples
artifacts:
- name: gapic_config
type: GAPIC_CONFIG

@ -1,5 +1,8 @@
# Canonical GCS paths to resource files used by samples and sample system tests
sample_resources:
- uri: gs://cloud-samples-data/speech/brooklyn_bridge.mp3
description: |
44100 Hz, 2 channels, English, "How old is the Brooklyn Bridge?"
- uri: gs://cloud-samples-data/speech/brooklyn_bridge.raw
description: |
16000 Hz, 1 channel, English, "How old is the Brooklyn Bridge?"
@ -8,7 +11,7 @@ sample_resources:
44100 Hz, 1 channel, English, "How old is the Brooklyn Bridge?"
- uri: gs://cloud-samples-data/speech/brooklyn_bridge.wav
description: |
16000 Hz, 2 channel (only first contains audio data), English, "How old is the Brooklyn Bridge?"
16000 Hz, 2 channels (only first contains audio data), English, "How old is the Brooklyn Bridge?"
- uri: gs://cloud-samples-data/speech/hello.raw
description: |
16000 Hz, 1 channel, English, "Hello"

@ -1,35 +1,36 @@
type: com.google.api.codegen.samplegen.v1p2.SampleConfigProto
schema_version: 1.2.0
samples:
- id: speech_transcribe_async
- region_tag: speech_transcribe_async
title: Transcribe Audio File using Long Running Operation (Local File) (LRO)
description: Transcribe a long audio file using asynchronous speech recognition
rpc: LongRunningRecognize
service: google.cloud.speech.v1.Speech
parameters:
defaults:
- audio.content = "resources/brooklyn_bridge.raw"
- config.language_code = "en-US"
- config.sample_rate_hertz = 16000
- config.encoding = LINEAR16
attributes:
- parameter: audio.content
sample_argument_name: local_file_path
read_file: true
description: "Path to local audio file, e.g. /path/audio.wav"
- parameter: config.language_code
description: "The language of the supplied audio"
- parameter: config.sample_rate_hertz
description: Sample rate in Hertz of the audio data sent
- parameter: config.encoding
description: |
Encoding of audio data sent. This sample sets this explicitly.
This field is optional for FLAC and WAV audio formats.
on_success:
request:
- field: audio.content
value: "resources/brooklyn_bridge.raw"
input_parameter: local_file_path
comment: Path to local audio file, e.g. /path/audio.wav
value_is_file: true
- field: config.language_code
value: "en-US"
comment: The language of the supplied audio
- field: config.sample_rate_hertz
value: 16000
comment: Sample rate in Hertz of the audio data sent
- field: config.encoding
value: LINEAR16
comment: |
Encoding of audio data sent. This sample sets this explicitly.
This field is optional for FLAC and WAV audio formats.
response:
- loop:
variable: result
collection: $resp.results
body:
- comment: ["First alternative is the most probable result"]
- define: alternative=result.alternatives[0]
- comment:
- First alternative is the most probable result
- define: alternative = result.alternatives[0]
- print:
- "Transcript: %s"
- alternative.transcript

@ -1,35 +1,35 @@
type: com.google.api.codegen.samplegen.v1p2.SampleConfigProto
schema_version: 1.2.0
samples:
- id: speech_transcribe_async_gcs
- region_tag: speech_transcribe_async_gcs
title: Transcript Audio File using Long Running Operation (Cloud Storage) (LRO)
description: |
Transcribe long audio file from Cloud Storage using asynchronous speech recognition
description: Transcribe long audio file from Cloud Storage using asynchronous speech recognition
rpc: LongRunningRecognize
service: google.cloud.speech.v1.Speech
parameters:
defaults:
- audio.uri = "gs://cloud-samples-data/speech/brooklyn_bridge.raw"
- config.sample_rate_hertz = 16000
- config.language_code = "en-US"
- config.encoding = LINEAR16
attributes:
- parameter: audio.uri
sample_argument_name: storage_uri
description: "URI for audio file in Cloud Storage, e.g. gs://[BUCKET]/[FILE]"
- parameter: config.language_code
description: "The language of the supplied audio"
- parameter: config.sample_rate_hertz
description: Sample rate in Hertz of the audio data sent
- parameter: config.encoding
description: |
Encoding of audio data sent. This sample sets this explicitly.
This field is optional for FLAC and WAV audio formats.
on_success:
request:
- field: audio.uri
value: "gs://cloud-samples-data/speech/brooklyn_bridge.raw"
input_parameter: storage_uri
comment: URI for audio file in Cloud Storage, e.g. gs://[BUCKET]/[FILE]
- field: config.sample_rate_hertz
value: 16000
comment: Sample rate in Hertz of the audio data sent
- field: config.language_code
value: "en-US"
comment: The language of the supplied audio
- field: config.encoding
value: LINEAR16
comment: |
Encoding of audio data sent. This sample sets this explicitly.
This field is optional for FLAC and WAV audio formats.
response:
- loop:
variable: result
collection: $resp.results
body:
- comment: ["First alternative is the most probable result"]
- define: alternative=result.alternatives[0]
- comment:
- First alternative is the most probable result
- define: alternative = result.alternatives[0]
- print:
- "Transcript: %s"
- alternative.transcript

@ -1,38 +1,48 @@
type: com.google.api.codegen.samplegen.v1p2.SampleConfigProto
schema_version: 1.2.0
samples:
- id: speech_transcribe_async_word_time_offsets_gcs
- region_tag: speech_transcribe_async_word_time_offsets_gcs
title: Getting word timestamps (Cloud Storage) (LRO)
description: |
Print start and end time of each word spoken in audio file from Cloud Storage
description: Print start and end time of each word spoken in audio file from Cloud Storage
rpc: LongRunningRecognize
service: google.cloud.speech.v1.Speech
parameters:
defaults:
- audio.uri = "gs://cloud-samples-data/speech/brooklyn_bridge.flac"
- config.enable_word_time_offsets = True
- config.language_code = "en-US"
attributes:
- parameter: audio.uri
sample_argument_name: storage_uri
description: "URI for audio file in Cloud Storage, e.g. gs://[BUCKET]/[FILE]"
- parameter: config.enable_word_time_offsets
description: |
When enabled, the first result returned by the API will include a list
of words and the start and end time offsets (timestamps) for those words.
- parameter: config.language_code
description: "The language of the supplied audio"
on_success:
- comment: ["The first result includes start and end time word offsets"]
- define: result=$resp.results[0]
- comment: ["First alternative is the most probable result"]
- define: alternative=result.alternatives[0]
request:
- field: audio.uri
value: "gs://cloud-samples-data/speech/brooklyn_bridge.flac"
input_parameter: storage_uri
comment: URI for audio file in Cloud Storage, e.g. gs://[BUCKET]/[FILE]
- field: config.enable_word_time_offsets
value: true
comment: |
When enabled, the first result returned by the API will include a list
of words and the start and end time offsets (timestamps) for those words.
- field: config.language_code
value: "en-US"
comment: The language of the supplied audio
response:
- comment:
- The first result includes start and end time word offsets
- define: result = $resp.results[0]
- comment:
- First alternative is the most probable result
- define: alternative = result.alternatives[0]
- print:
- "Transcript: %s"
- alternative.transcript
- comment: ["Print the start and end time of each word"]
- comment:
- Print the start and end time of each word
- loop:
collection: alternative.words
variable: word
body:
- print: ["Word: %s", word.word]
- print: ["Start time: %s seconds %s nanos", word.start_time.seconds, word.start_time.nanos]
- print: ["End time: %s seconds %s nanos", word.end_time.seconds, word.end_time.nanos]
- print:
- "Word: %s"
- word.word
- print:
- "Start time: %s seconds %s nanos"
- word.start_time.seconds
- word.start_time.nanos
- print:
- "End time: %s seconds %s nanos"
- word.end_time.seconds
- word.end_time.nanos

@ -1,38 +1,39 @@
type: com.google.api.codegen.samplegen.v1p2.SampleConfigProto
schema_version: 1.2.0
samples:
- id: speech_transcribe_enhanced_model
- region_tag: speech_transcribe_enhanced_model
title: Using Enhanced Models (Local File)
description: Transcribe a short audio file using an enhanced model
rpc: Recognize
service: google.cloud.speech.v1.Speech
parameters:
defaults:
- audio.content = "resources/hello.wav"
- config.model = "phone_call"
- config.use_enhanced = True
- config.language_code = "en-US"
attributes:
- parameter: audio.content
sample_argument_name: local_file_path
read_file: true
description: "Path to local audio file, e.g. /path/audio.wav"
- parameter: config.model
description: |
The enhanced model to use, e.g. phone_call
Currently phone_call is the only model available as an enhanced model.
- parameter: config.use_enhanced
description: |
Use an enhanced model for speech recognition (when set to true).
Project must be eligible for requesting enhanced models.
Enhanced speech models require that you opt-in to data logging.
- parameter: config.language_code
description: "The language of the supplied audio"
on_success:
request:
- field: audio.content
value: "resources/hello.wav"
input_parameter: local_file_path
comment: Path to local audio file, e.g. /path/audio.wav
value_is_file: true
- field: config.model
value: "phone_call"
comment: |
The enhanced model to use, e.g. phone_call
Currently phone_call is the only model available as an enhanced model.
- field: config.use_enhanced
value: true
comment: |
Use an enhanced model for speech recognition (when set to true).
Project must be eligible for requesting enhanced models.
Enhanced speech models require that you opt-in to data logging.
- field: config.language_code
value: "en-US"
comment: The language of the supplied audio
response:
- loop:
variable: result
collection: $resp.results
body:
- comment: ["First alternative is the most probable result"]
- define: alternative=result.alternatives[0]
- comment:
- First alternative is the most probable result
- define: alternative = result.alternatives[0]
- print:
- "Transcript: %s"
- alternative.transcript

@ -1,34 +1,35 @@
type: com.google.api.codegen.samplegen.v1p2.SampleConfigProto
schema_version: 1.2.0
samples:
- id: speech_transcribe_model_selection
- region_tag: speech_transcribe_model_selection
title: Selecting a Transcription Model (Local File)
description: Transcribe a short audio file using a specified transcription model
rpc: Recognize
service: google.cloud.speech.v1.Speech
parameters:
defaults:
- audio.content = "resources/hello.wav"
- config.model = "phone_call"
- config.language_code = "en-US"
attributes:
- parameter: audio.content
sample_argument_name: local_file_path
read_file: true
description: "Path to local audio file, e.g. /path/audio.wav"
- parameter: config.model
sample_argument_name: model
description: |
The transcription model to use, e.g. video, phone_call, default
For a list of available transcription models, see:
https://cloud.google.com/speech-to-text/docs/transcription-model#transcription_models
- parameter: config.language_code
description: "The language of the supplied audio"
on_success:
request:
- field: audio.content
value: "resources/hello.wav"
input_parameter: local_file_path
comment: Path to local audio file, e.g. /path/audio.wav
value_is_file: true
- field: config.model
value: "phone_call"
input_parameter: model
comment: |
The transcription model to use, e.g. video, phone_call, default
For a list of available transcription models, see:
https://cloud.google.com/speech-to-text/docs/transcription-model#transcription_models
- field: config.language_code
value: "en-US"
comment: The language of the supplied audio
response:
- loop:
variable: result
collection: $resp.results
body:
- comment: ["First alternative is the most probable result"]
- define: alternative=result.alternatives[0]
- comment:
- First alternative is the most probable result
- define: alternative = result.alternatives[0]
- print:
- "Transcript: %s"
- alternative.transcript

@ -1,34 +1,35 @@
type: com.google.api.codegen.samplegen.v1p2.SampleConfigProto
schema_version: 1.2.0
samples:
- id: speech_transcribe_model_selection_gcs
- region_tag: speech_transcribe_model_selection_gcs
title: Selecting a Transcription Model (Cloud Storage)
description: |
Transcribe a short audio file from Cloud Storage using a specified transcription model
rpc: Recognize
service: google.cloud.speech.v1.Speech
parameters:
defaults:
- audio.uri = "gs://cloud-samples-data/speech/hello.wav"
- config.model = "phone_call"
- config.language_code = "en-US"
attributes:
- parameter: audio.uri
sample_argument_name: storage_uri
description: "URI for audio file in Cloud Storage, e.g. gs://[BUCKET]/[FILE]"
- parameter: config.model
sample_argument_name: model
description: |
The transcription model to use, e.g. video, phone_call, default
For a list of available transcription models, see:
https://cloud.google.com/speech-to-text/docs/transcription-model#transcription_models
- parameter: config.language_code
description: "The language of the supplied audio"
on_success:
request:
- field: audio.uri
value: "gs://cloud-samples-data/speech/hello.wav"
input_parameter: storage_uri
comment: URI for audio file in Cloud Storage, e.g. gs://[BUCKET]/[FILE]
- field: config.model
value: "phone_call"
input_parameter: model
comment: |
The transcription model to use, e.g. video, phone_call, default
For a list of available transcription models, see:
https://cloud.google.com/speech-to-text/docs/transcription-model#transcription_models
- field: config.language_code
value: "en-US"
comment: The language of the supplied audio
response:
- loop:
variable: result
collection: $resp.results
body:
- comment: ["First alternative is the most probable result"]
- define: alternative=result.alternatives[0]
- comment:
- First alternative is the most probable result
- define: alternative = result.alternatives[0]
- print:
- "Transcript: %s"
- alternative.transcript

@ -1,30 +1,30 @@
type: com.google.api.codegen.samplegen.v1p2.SampleConfigProto
schema_version: 1.2.0
samples:
- id: speech_transcribe_multichannel
- region_tag: speech_transcribe_multichannel
title: Multi-Channel Audio Transcription (Local File)
description: Transcribe a short audio file with multiple channels
rpc: Recognize
service: google.cloud.speech.v1.Speech
parameters:
defaults:
- audio.content = "resources/multi.wav"
- config.audio_channel_count = 2
- config.enable_separate_recognition_per_channel = True
- config.language_code = "en-US"
attributes:
- parameter: audio.content
sample_argument_name: local_file_path
read_file: true
description: "Path to local audio file, e.g. /path/audio.wav"
- parameter: config.audio_channel_count
description: The number of channels in the input audio file (optional)
- parameter: config.enable_separate_recognition_per_channel
description: |
When set to true, each audio channel will be recognized separately.
The recognition result will contain a channel_tag field to state which
channel that result belongs to
- parameter: config.language_code
description: "The language of the supplied audio"
on_success:
request:
- field: audio.content
value: "resources/multi.wav"
input_parameter: local_file_path
comment: Path to local audio file, e.g. /path/audio.wav
value_is_file: true
- field: config.audio_channel_count
value: 2
comment: The number of channels in the input audio file (optional)
- field: config.enable_separate_recognition_per_channel
value: true
comment: |
When set to true, each audio channel will be recognized separately.
The recognition result will contain a channel_tag field to state which
channel that result belongs to
- field: config.language_code
value: "en-US"
comment: The language of the supplied audio
response:
- loop:
variable: result
collection: $resp.results
@ -32,9 +32,12 @@ samples:
- comment:
- "%s to recognize which audio channel this result is for"
- channel_tag
- print: ["Channel tag: %s", result.channel_tag]
- comment: ["First alternative is the most probable result"]
- define: alternative=result.alternatives[0]
- print:
- "Channel tag: %s"
- result.channel_tag
- comment:
- First alternative is the most probable result
- define: alternative = result.alternatives[0]
- print:
- "Transcript: %s"
- alternative.transcript

@ -1,30 +1,29 @@
type: com.google.api.codegen.samplegen.v1p2.SampleConfigProto
schema_version: 1.2.0
samples:
- id: speech_transcribe_multichannel_gcs
- region_tag: speech_transcribe_multichannel_gcs
title: Multi-Channel Audio Transcription (Cloud Storage)
description: |
Transcribe a short audio file from Cloud Storage with multiple channels
description: Transcribe a short audio file from Cloud Storage with multiple channels
rpc: Recognize
service: google.cloud.speech.v1.Speech
parameters:
defaults:
- audio.uri = "gs://cloud-samples-data/speech/multi.wav"
- config.audio_channel_count = 2
- config.enable_separate_recognition_per_channel = True
- config.language_code = "en-US"
attributes:
- parameter: audio.uri
sample_argument_name: storage_uri
description: "URI for audio file in Cloud Storage, e.g. gs://[BUCKET]/[FILE]"
- parameter: config.audio_channel_count
description: The number of channels in the input audio file (optional)
- parameter: config.enable_separate_recognition_per_channel
description: |
When set to true, each audio channel will be recognized separately.
The recognition result will contain a channel_tag field to state which
channel that result belongs to
- parameter: config.language_code
description: "The language of the supplied audio"
on_success:
request:
- field: audio.uri
value: "gs://cloud-samples-data/speech/multi.wav"
input_parameter: storage_uri
comment: URI for audio file in Cloud Storage, e.g. gs://[BUCKET]/[FILE]
- field: config.audio_channel_count
value: 2
comment: The number of channels in the input audio file (optional)
- field: config.enable_separate_recognition_per_channel
value: true
comment: |
When set to true, each audio channel will be recognized separately.
The recognition result will contain a channel_tag field to state which
channel that result belongs to
- field: config.language_code
value: "en-US"
comment: The language of the supplied audio
response:
- loop:
variable: result
collection: $resp.results
@ -32,9 +31,12 @@ samples:
- comment:
- "%s to recognize which audio channel this result is for"
- channel_tag
- print: ["Channel tag: %s", result.channel_tag]
- comment: ["First alternative is the most probable result"]
- define: alternative=result.alternatives[0]
- print:
- "Channel tag: %s"
- result.channel_tag
- comment:
- First alternative is the most probable result
- define: alternative = result.alternatives[0]
- print:
- "Transcript: %s"
- alternative.transcript

@ -1,35 +1,36 @@
type: com.google.api.codegen.samplegen.v1p2.SampleConfigProto
schema_version: 1.2.0
samples:
- id: speech_transcribe_sync
- region_tag: speech_transcribe_sync
title: Transcribe Audio File (Local File)
description: Transcribe a short audio file using synchronous speech recognition
rpc: Recognize
service: google.cloud.speech.v1.Speech
parameters:
defaults:
- audio.content = "resources/brooklyn_bridge.raw"
- config.language_code = "en-US"
- config.sample_rate_hertz = 16000
- config.encoding = LINEAR16
attributes:
- parameter: audio.content
sample_argument_name: local_file_path
read_file: true
description: "Path to local audio file, e.g. /path/audio.wav"
- parameter: config.language_code
description: "The language of the supplied audio"
- parameter: config.sample_rate_hertz
description: Sample rate in Hertz of the audio data sent
- parameter: config.encoding
description: |
Encoding of audio data sent. This sample sets this explicitly.
This field is optional for FLAC and WAV audio formats.
on_success:
request:
- field: audio.content
value: "resources/brooklyn_bridge.raw"
input_parameter: local_file_path
comment: Path to local audio file, e.g. /path/audio.wav
value_is_file: true
- field: config.language_code
value: "en-US"
comment: The language of the supplied audio
- field: config.sample_rate_hertz
value: 16000
comment: Sample rate in Hertz of the audio data sent
- field: config.encoding
value: LINEAR16
comment: |
Encoding of audio data sent. This sample sets this explicitly.
This field is optional for FLAC and WAV audio formats.
response:
- loop:
variable: result
collection: $resp.results
body:
- comment: ["First alternative is the most probable result"]
- define: alternative=result.alternatives[0]
- comment:
- First alternative is the most probable result
- define: alternative = result.alternatives[0]
- print:
- "Transcript: %s"
- alternative.transcript

@ -1,35 +1,35 @@
type: com.google.api.codegen.samplegen.v1p2.SampleConfigProto
schema_version: 1.2.0
samples:
- id: speech_transcribe_sync_gcs
- region_tag: speech_transcribe_sync_gcs
title: Transcript Audio File (Cloud Storage)
description: |
Transcribe short audio file from Cloud Storage using synchronous speech recognition
description: Transcribe short audio file from Cloud Storage using synchronous speech recognition
rpc: Recognize
service: google.cloud.speech.v1.Speech
parameters:
defaults:
- audio.uri = "gs://cloud-samples-data/speech/brooklyn_bridge.raw"
- config.sample_rate_hertz = 16000
- config.language_code = "en-US"
- config.encoding = LINEAR16
attributes:
- parameter: audio.uri
sample_argument_name: storage_uri
description: "URI for audio file in Cloud Storage, e.g. gs://[BUCKET]/[FILE]"
- parameter: config.language_code
description: "The language of the supplied audio"
- parameter: config.sample_rate_hertz
description: Sample rate in Hertz of the audio data sent
- parameter: config.encoding
description: |
Encoding of audio data sent. This sample sets this explicitly.
This field is optional for FLAC and WAV audio formats.
on_success:
request:
- field: audio.uri
value: "gs://cloud-samples-data/speech/brooklyn_bridge.raw"
input_parameter: storage_uri
comment: URI for audio file in Cloud Storage, e.g. gs://[BUCKET]/[FILE]
- field: config.sample_rate_hertz
value: 16000
comment: Sample rate in Hertz of the audio data sent
- field: config.language_code
value: "en-US"
comment: The language of the supplied audio
- field: config.encoding
value: LINEAR16
comment: |
Encoding of audio data sent. This sample sets this explicitly.
This field is optional for FLAC and WAV audio formats.
response:
- loop:
variable: result
collection: $resp.results
body:
- comment: ["First alternative is the most probable result"]
- define: alternative=result.alternatives[0]
- comment:
- First alternative is the most probable result
- define: alternative = result.alternatives[0]
- print:
- "Transcript: %s"
- alternative.transcript

@ -1,3 +1,5 @@
type: test/samples
schema_version: 1
test:
suites:
- name: Transcribe Audio File using Long Running Operation (Local File) (LRO)

@ -1,3 +1,5 @@
type: test/samples
schema_version: 1
test:
suites:
- name: Transcript Audio File using Long Running Operation (Cloud Storage) (LRO)

@ -1,3 +1,5 @@
type: test/samples
schema_version: 1
test:
suites:
- name: Selecting a Transcription Model (Local File)

@ -1,3 +1,5 @@
type: test/samples
schema_version: 1
test:
suites:
- name: Selecting a Transcription Model (Cloud Storage)

@ -1,3 +1,5 @@
type: test/samples
schema_version: 1
test:
suites:
- name: Multi-Channel Audio Transcription (Local File)

@ -1,3 +1,5 @@
type: test/samples
schema_version: 1
test:
suites:
- name: Multi-Channel Audio Transcription (Cloud Storage)

@ -1,3 +1,5 @@
type: test/samples
schema_version: 1
test:
suites:
- name: Transcribe Audio File (Local File)

@ -1,3 +1,5 @@
type: test/samples
schema_version: 1
test:
suites:
- name: Transcript Audio File (Cloud Storage)

@ -62,285 +62,6 @@ interfaces:
retry_codes_name: idempotent
retry_params_name: default
timeout_millis: 1000000
samples:
standalone:
- region_tag: speech_transcribe_sync_gcs
value_sets:
- speech_transcribe_sync_gcs
- region_tag: speech_transcribe_sync
value_sets:
- speech_transcribe_sync
- region_tag: speech_transcribe_multichannel
value_sets:
- speech_transcribe_multichannel
- region_tag: speech_transcribe_model_selection_gcs
value_sets:
- speech_transcribe_model_selection_gcs
- region_tag: speech_transcribe_async_word_time_offsets_gcs
value_sets:
- speech_transcribe_async_word_time_offsets_gcs
- region_tag: speech_transcribe_model_selection
value_sets:
- speech_transcribe_model_selection
- region_tag: speech_transcribe_multichannel_gcs
value_sets:
- speech_transcribe_multichannel_gcs
- region_tag: speech_transcribe_enhanced_model
value_sets:
- speech_transcribe_enhanced_model
sample_value_sets:
- id: speech_transcribe_model_selection_gcs
title: Selecting a Transcription Model (Cloud Storage)
description: 'Transcribe a short audio file from Cloud Storage using a specified
transcription model
'
parameters:
defaults:
- audio.uri = "gs://cloud-samples-data/speech/hello.wav"
- config.model = "phone_call"
- config.language_code = "en-US"
attributes:
- parameter: audio.uri
sample_argument_name: storage_uri
description: URI for audio file in Cloud Storage, e.g. gs://[BUCKET]/[FILE]
- parameter: config.model
sample_argument_name: model
description: |
The transcription model to use, e.g. video, phone_call, default
For a list of available transcription models, see:
https://cloud.google.com/speech-to-text/docs/transcription-model#transcription_models
- parameter: config.language_code
description: The language of the supplied audio
on_success:
- loop:
variable: result
collection: "$resp.results"
body:
- comment:
- First alternative is the most probable result
- define: alternative=result.alternatives[0]
- print:
- 'Transcript: %s'
- alternative.transcript
- id: speech_transcribe_sync_gcs
title: Transcript Audio File (Cloud Storage)
description: 'Transcribe short audio file from Cloud Storage using synchronous
speech recognition
'
parameters:
defaults:
- audio.uri = "gs://cloud-samples-data/speech/brooklyn_bridge.raw"
- config.sample_rate_hertz = 16000
- config.language_code = "en-US"
- config.encoding = LINEAR16
attributes:
- parameter: audio.uri
sample_argument_name: storage_uri
description: URI for audio file in Cloud Storage, e.g. gs://[BUCKET]/[FILE]
- parameter: config.language_code
description: The language of the supplied audio
- parameter: config.sample_rate_hertz
description: Sample rate in Hertz of the audio data sent
- parameter: config.encoding
description: |
Encoding of audio data sent. This sample sets this explicitly.
This field is optional for FLAC and WAV audio formats.
on_success:
- loop:
variable: result
collection: "$resp.results"
body:
- comment:
- First alternative is the most probable result
- define: alternative=result.alternatives[0]
- print:
- 'Transcript: %s'
- alternative.transcript
- id: speech_transcribe_sync
title: Transcribe Audio File (Local File)
description: Transcribe a short audio file using synchronous speech recognition
parameters:
defaults:
- audio.content = "resources/brooklyn_bridge.raw"
- config.language_code = "en-US"
- config.sample_rate_hertz = 16000
- config.encoding = LINEAR16
attributes:
- parameter: audio.content
sample_argument_name: local_file_path
read_file: true
description: Path to local audio file, e.g. /path/audio.wav
- parameter: config.language_code
description: The language of the supplied audio
- parameter: config.sample_rate_hertz
description: Sample rate in Hertz of the audio data sent
- parameter: config.encoding
description: |
Encoding of audio data sent. This sample sets this explicitly.
This field is optional for FLAC and WAV audio formats.
on_success:
- loop:
variable: result
collection: "$resp.results"
body:
- comment:
- First alternative is the most probable result
- define: alternative=result.alternatives[0]
- print:
- 'Transcript: %s'
- alternative.transcript
- id: speech_transcribe_model_selection
title: Selecting a Transcription Model (Local File)
description: Transcribe a short audio file using a specified transcription model
parameters:
defaults:
- audio.content = "resources/hello.wav"
- config.model = "phone_call"
- config.language_code = "en-US"
attributes:
- parameter: audio.content
sample_argument_name: local_file_path
read_file: true
description: Path to local audio file, e.g. /path/audio.wav
- parameter: config.model
sample_argument_name: model
description: |
The transcription model to use, e.g. video, phone_call, default
For a list of available transcription models, see:
https://cloud.google.com/speech-to-text/docs/transcription-model#transcription_models
- parameter: config.language_code
description: The language of the supplied audio
on_success:
- loop:
variable: result
collection: "$resp.results"
body:
- comment:
- First alternative is the most probable result
- define: alternative=result.alternatives[0]
- print:
- 'Transcript: %s'
- alternative.transcript
- id: speech_transcribe_multichannel_gcs
title: Multi-Channel Audio Transcription (Cloud Storage)
description: 'Transcribe a short audio file from Cloud Storage with multiple
channels
'
parameters:
defaults:
- audio.uri = "gs://cloud-samples-data/speech/multi.wav"
- config.audio_channel_count = 2
- config.enable_separate_recognition_per_channel = True
- config.language_code = "en-US"
attributes:
- parameter: audio.uri
sample_argument_name: storage_uri
description: URI for audio file in Cloud Storage, e.g. gs://[BUCKET]/[FILE]
- parameter: config.audio_channel_count
description: The number of channels in the input audio file (optional)
- parameter: config.enable_separate_recognition_per_channel
description: |
When set to true, each audio channel will be recognized separately.
The recognition result will contain a channel_tag field to state which
channel that result belongs to
- parameter: config.language_code
description: The language of the supplied audio
on_success:
- loop:
variable: result
collection: "$resp.results"
body:
- comment:
- "%s to recognize which audio channel this result is for"
- channel_tag
- print:
- 'Channel tag: %s'
- result.channel_tag
- comment:
- First alternative is the most probable result
- define: alternative=result.alternatives[0]
- print:
- 'Transcript: %s'
- alternative.transcript
- id: speech_transcribe_multichannel
title: Multi-Channel Audio Transcription (Local File)
description: Transcribe a short audio file with multiple channels
parameters:
defaults:
- audio.content = "resources/multi.wav"
- config.audio_channel_count = 2
- config.enable_separate_recognition_per_channel = True
- config.language_code = "en-US"
attributes:
- parameter: audio.content
sample_argument_name: local_file_path
read_file: true
description: Path to local audio file, e.g. /path/audio.wav
- parameter: config.audio_channel_count
description: The number of channels in the input audio file (optional)
- parameter: config.enable_separate_recognition_per_channel
description: |
When set to true, each audio channel will be recognized separately.
The recognition result will contain a channel_tag field to state which
channel that result belongs to
- parameter: config.language_code
description: The language of the supplied audio
on_success:
- loop:
variable: result
collection: "$resp.results"
body:
- comment:
- "%s to recognize which audio channel this result is for"
- channel_tag
- print:
- 'Channel tag: %s'
- result.channel_tag
- comment:
- First alternative is the most probable result
- define: alternative=result.alternatives[0]
- print:
- 'Transcript: %s'
- alternative.transcript
- id: speech_transcribe_enhanced_model
title: Using Enhanced Models (Local File)
description: Transcribe a short audio file using an enhanced model
parameters:
defaults:
- audio.content = "resources/hello.wav"
- config.model = "phone_call"
- config.use_enhanced = True
- config.language_code = "en-US"
attributes:
- parameter: audio.content
sample_argument_name: local_file_path
read_file: true
description: Path to local audio file, e.g. /path/audio.wav
- parameter: config.model
description: |
The enhanced model to use, e.g. phone_call
Currently phone_call is the only model available as an enhanced model.
- parameter: config.use_enhanced
description: |
Use an enhanced model for speech recognition (when set to true).
Project must be eligible for requesting enhanced models.
Enhanced speech models require that you opt-in to data logging.
- parameter: config.language_code
description: The language of the supplied audio
on_success:
- loop:
variable: result
collection: "$resp.results"
body:
- comment:
- First alternative is the most probable result
- define: alternative=result.alternatives[0]
- print:
- 'Transcript: %s'
- alternative.transcript
- name: LongRunningRecognize
flattening:
groups:
@ -365,134 +86,6 @@ interfaces:
poll_delay_multiplier: 1.5
max_poll_delay_millis: 45000
total_poll_timeout_millis: 86400000
samples:
standalone:
- region_tag: speech_transcribe_async_gcs
value_sets:
- speech_transcribe_async_gcs
- region_tag: speech_transcribe_async
value_sets:
- speech_transcribe_async
- region_tag: speech_transcribe_async_word_time_offsets_gcs
value_sets:
- speech_transcribe_async_word_time_offsets_gcs
sample_value_sets:
- id: speech_transcribe_async_gcs
title: Transcript Audio File using Long Running Operation (Cloud Storage) (LRO)
description: 'Transcribe long audio file from Cloud Storage using asynchronous
speech recognition
'
parameters:
defaults:
- audio.uri = "gs://cloud-samples-data/speech/brooklyn_bridge.raw"
- config.sample_rate_hertz = 16000
- config.language_code = "en-US"
- config.encoding = LINEAR16
attributes:
- parameter: audio.uri
sample_argument_name: storage_uri
description: URI for audio file in Cloud Storage, e.g. gs://[BUCKET]/[FILE]
- parameter: config.language_code
description: The language of the supplied audio
- parameter: config.sample_rate_hertz
description: Sample rate in Hertz of the audio data sent
- parameter: config.encoding
description: |
Encoding of audio data sent. This sample sets this explicitly.
This field is optional for FLAC and WAV audio formats.
on_success:
- loop:
variable: result
collection: "$resp.results"
body:
- comment:
- First alternative is the most probable result
- define: alternative=result.alternatives[0]
- print:
- 'Transcript: %s'
- alternative.transcript
- id: speech_transcribe_async
title: Transcribe Audio File using Long Running Operation (Local File) (LRO)
description: Transcribe a long audio file using asynchronous speech recognition
parameters:
defaults:
- audio.content = "resources/brooklyn_bridge.raw"
- config.language_code = "en-US"
- config.sample_rate_hertz = 16000
- config.encoding = LINEAR16
attributes:
- parameter: audio.content
sample_argument_name: local_file_path
read_file: true
description: Path to local audio file, e.g. /path/audio.wav
- parameter: config.language_code
description: The language of the supplied audio
- parameter: config.sample_rate_hertz
description: Sample rate in Hertz of the audio data sent
- parameter: config.encoding
description: |
Encoding of audio data sent. This sample sets this explicitly.
This field is optional for FLAC and WAV audio formats.
on_success:
- loop:
variable: result
collection: "$resp.results"
body:
- comment:
- First alternative is the most probable result
- define: alternative=result.alternatives[0]
- print:
- 'Transcript: %s'
- alternative.transcript
- id: speech_transcribe_async_word_time_offsets_gcs
title: Getting word timestamps (Cloud Storage) (LRO)
description: 'Print start and end time of each word spoken in audio file from
Cloud Storage
'
parameters:
defaults:
- audio.uri = "gs://cloud-samples-data/speech/brooklyn_bridge.flac"
- config.enable_word_time_offsets = True
- config.language_code = "en-US"
attributes:
- parameter: audio.uri
sample_argument_name: storage_uri
description: URI for audio file in Cloud Storage, e.g. gs://[BUCKET]/[FILE]
- parameter: config.enable_word_time_offsets
description: |
When enabled, the first result returned by the API will include a list
of words and the start and end time offsets (timestamps) for those words.
- parameter: config.language_code
description: The language of the supplied audio
on_success:
- comment:
- The first result includes start and end time word offsets
- define: result=$resp.results[0]
- comment:
- First alternative is the most probable result
- define: alternative=result.alternatives[0]
- print:
- 'Transcript: %s'
- alternative.transcript
- comment:
- Print the start and end time of each word
- loop:
collection: alternative.words
variable: word
body:
- print:
- 'Word: %s'
- word.word
- print:
- 'Start time: %s seconds %s nanos'
- word.start_time.seconds
- word.start_time.nanos
- print:
- 'End time: %s seconds %s nanos'
- word.end_time.seconds
- word.end_time.nanos
- name: StreamingRecognize
retry_codes_name: idempotent
retry_params_name: default

@ -0,0 +1,49 @@
type: com.google.api.codegen.samplegen.v1p2.SampleConfigProto
schema_version: 1.2.0
samples:
- region_tag: speech_adaptation_beta
title: Speech Adaptation (Cloud Storage)
description: Transcribe a short audio file with speech adaptation.
rpc: Recognize
service: google.cloud.speech.v1p1beta1.Speech
request:
- field: audio.uri
value: "gs://cloud-samples-data/speech/brooklyn_bridge.mp3"
input_parameter: storage_uri
comment: URI for audio file in Cloud Storage, e.g. gs://[BUCKET]/[FILE]
- field: config.speech_contexts[0].phrases[0]
value: "Brooklyn Bridge"
input_parameter: phrase
comment: |
Phrase "hints" help recognize the specified phrases from your audio.
- field: config.speech_contexts[0].boost
value: 20.0
comment: |
Hint Boost. This value increases the probability that a specific
phrase will be recognized over other similar sounding phrases.
The higher the boost, the higher the chance of false positive
recognition as well. Can accept wide range of positive values.
Most use cases are best served with values between 0 and 20.
Using a binary search happroach may help you find the optimal value.
- field: config.sample_rate_hertz
value: 44100
comment: Sample rate in Hertz of the audio data sent
- field: config.language_code
value: "en-US"
comment: The language of the supplied audio
- field: config.encoding
value: MP3
comment: |
Encoding of audio data sent. This sample sets this explicitly.
This field is optional for FLAC and WAV audio formats.
response:
- loop:
variable: result
collection: $resp.results
body:
- comment:
- First alternative is the most probable result
- define: alternative = result.alternatives[0]
- print:
- "Transcript: %s"
- alternative.transcript

@ -0,0 +1,43 @@
type: com.google.api.codegen.samplegen.v1p2.SampleConfigProto
schema_version: 1.2.0
samples:
- region_tag: speech_contexts_classes_beta
title: Using Context Classes (Cloud Storage)
description: Transcribe a short audio file with static context classes.
rpc: Recognize
service: google.cloud.speech.v1p1beta1.Speech
request:
- field: audio.uri
value: "gs://cloud-samples-data/speech/time.mp3"
input_parameter: storage_uri
comment: URI for audio file in Cloud Storage, e.g. gs://[BUCKET]/[FILE]
- field: config.speech_contexts[0].phrases[0]
value: "$TIME"
input_parameter: phrase
comment: |
Phrase "hints" help recognize the specified phrases from your audio.
In this sample we are using a static class phrase ($TIME).
Classes represent groups of words that represent common concepts
that occur in natural language.
- field: config.language_code
value: "en-US"
comment: The language of the supplied audio
- field: config.sample_rate_hertz
value: 24000
comment: Sample rate in Hertz of the audio data sent
- field: config.encoding
value: MP3
comment: |
Encoding of audio data sent. This sample sets this explicitly.
This field is optional for FLAC and WAV audio formats.
response:
- loop:
variable: result
collection: $resp.results
body:
- comment:
- First alternative is the most probable result
- define: alternative = result.alternatives[0]
- print:
- "Transcript: %s"
- alternative.transcript

@ -0,0 +1,35 @@
type: com.google.api.codegen.samplegen.v1p2.SampleConfigProto
schema_version: 1.2.0
samples:
- region_tag: speech_quickstart_beta
title: Quickstart Beta
description: Performs synchronous speech recognition on an audio file
rpc: Recognize
service: google.cloud.speech.v1p1beta1.Speech
request:
- field: audio.uri
value: "gs://cloud-samples-data/speech/brooklyn_bridge.mp3"
input_parameter: storage_uri
comment: URI for audio file in Cloud Storage, e.g. gs://[BUCKET]/[FILE]
- field: config.language_code
value: "en-US"
comment: The language of the supplied audio
- field: config.sample_rate_hertz
value: 44100
comment: Sample rate in Hertz of the audio data sent
- field: config.encoding
value: MP3
comment: |
Encoding of audio data sent. This sample sets this explicitly.
This field is optional for FLAC and WAV audio formats.
response:
- loop:
variable: result
collection: $resp.results
body:
- comment:
- First alternative is the most probable result
- define: alternative = result.alternatives[0]
- print:
- "Transcript: %s"
- alternative.transcript

@ -1,34 +1,35 @@
type: com.google.api.codegen.samplegen.v1p2.SampleConfigProto
schema_version: 1.2.0
samples:
- id: speech_transcribe_auto_punctuation_beta
- region_tag: speech_transcribe_auto_punctuation_beta
title: Getting punctuation in results (Local File) (Beta)
description: |
Transcribe a short audio file with punctuation
description: Transcribe a short audio file with punctuation
rpc: Recognize
service: google.cloud.speech.v1p1beta1.Speech
parameters:
defaults:
- audio.content = "resources/commercial_mono.wav"
- config.enable_automatic_punctuation = True
- config.language_code = "en-US"
attributes:
- parameter: audio.content
sample_argument_name: local_file_path
read_file: true
description: "Path to local audio file, e.g. /path/audio.wav"
- parameter: config.enable_automatic_punctuation
description: |
When enabled, trascription results may include punctuation (available for select languages).
- parameter: config.language_code
description: |
The language of the supplied audio. Even though additional languages are
provided by alternative_language_codes, a primary language is still required.
on_success:
request:
- field: audio.content
value: "resources/commercial_mono.wav"
input_parameter: local_file_path
comment: Path to local audio file, e.g. /path/audio.wav
value_is_file: true
- field: config.enable_automatic_punctuation
value: true
comment: |
When enabled, trascription results may include punctuation
(available for select languages).
- field: config.language_code
value: "en-US"
comment: |
The language of the supplied audio. Even though additional languages are
provided by alternative_language_codes, a primary language is still required.
response:
- loop:
variable: result
collection: $resp.results
body:
- comment: ["First alternative is the most probable result"]
- define: alternative=result.alternatives[0]
- comment:
- First alternative is the most probable result
- define: alternative = result.alternatives[0]
- print:
- "Transcript: %s"
- alternative.transcript

@ -1,46 +1,51 @@
type: com.google.api.codegen.samplegen.v1p2.SampleConfigProto
schema_version: 1.2.0
samples:
# TODO: this id should include "async" (calls LongRunningRecognize async rpc)
- id: speech_transcribe_diarization_beta
- region_tag: speech_transcribe_diarization_beta
title: Separating different speakers (Local File) (LRO) (Beta)
description: |
Print confidence level for individual words in a transcription of a short audio file
Separating different speakers in an audio file recording
rpc: LongRunningRecognize
service: google.cloud.speech.v1p1beta1.Speech
parameters:
defaults:
- audio.content = "resources/commercial_mono.wav"
- config.enable_speaker_diarization = True
- config.diarization_speaker_count = 2
- config.language_code = "en-US"
attributes:
- parameter: audio.content
sample_argument_name: local_file_path
read_file: true
description: "Path to local audio file, e.g. /path/audio.wav"
- parameter: config.enable_speaker_diarization
description: |
If enabled, each word in the first alternative of each result will be
tagged with a speaker tag to identify the speaker.
- parameter: config.diarization_speaker_count
description: |
Optional. Specifies the estimated number of speakers in the conversation.
- parameter: config.language_code
description: "The language of the supplied audio"
on_success:
request:
- field: audio.content
value: "resources/commercial_mono.wav"
input_parameter: local_file_path
comment: Path to local audio file, e.g. /path/audio.wav
value_is_file: true
- field: config.enable_speaker_diarization
value: true
comment: |
If enabled, each word in the first alternative of each result will be
tagged with a speaker tag to identify the speaker.
- field: config.diarization_speaker_count
value: 2
comment: Optional. Specifies the estimated number of speakers in the conversation.
- field: config.language_code
value: "en-US"
comment: The language of the supplied audio
response:
- loop:
collection: $resp.results
variable: result
body:
- comment: ["First alternative has words tagged with speakers"]
- define: alternative=result.alternatives[0]
- comment:
- First alternative has words tagged with speakers
- define: alternative = result.alternatives[0]
- print:
- "Transcript: %s"
- alternative.transcript
- comment: ["Print the %s of each word", speaker_tag]
- comment:
- Print the %s of each word
- speaker_tag
- loop:
collection: alternative.words
variable: word
body:
- print: ["Word: %s", word.word]
- print: ["Speaker tag: %s", word.speaker_tag]
- print:
- 'Word: %s'
- word.word
- print:
- 'Speaker tag: %s'
- word.speaker_tag

@ -1,37 +1,44 @@
type: com.google.api.codegen.samplegen.v1p2.SampleConfigProto
schema_version: 1.2.0
samples:
- id: speech_transcribe_multilanguage_beta
- region_tag: speech_transcribe_multilanguage_beta
title: Detecting language spoken automatically (Local File) (Beta)
description: |
Transcribe a short audio file with language detected from a list of possible languages
rpc: Recognize
service: google.cloud.speech.v1p1beta1.Speech
parameters:
defaults:
- audio.content = "resources/brooklyn_bridge.flac"
- config.language_code = "fr"
- config.alternative_language_codes[0] = "es"
- config.alternative_language_codes[1] = "en"
attributes:
- parameter: audio.content
sample_argument_name: local_file_path
read_file: true
description: "Path to local audio file, e.g. /path/audio.wav"
- parameter: config.alternative_language_codes[0]
description: |
Specify up to 3 additional languages as possible alternative languages of the supplied audio.
- parameter: config.language_code
description: |
The language of the supplied audio. Even though additional languages are
provided by alternative_language_codes, a primary language is still required.
on_success:
request:
- field: audio.content
value: resources/brooklyn_bridge.flac
input_parameter: local_file_path
comment: Path to local audio file, e.g. /path/audio.wav
value_is_file: true
- field: config.language_code
value: "fr"
comment: |
The language of the supplied audio. Even though additional languages are
provided by alternative_language_codes, a primary language is still required.
- field: config.alternative_language_codes[0]
value: "es"
comment: |
Specify up to 3 additional languages as possible alternative languages
of the supplied audio.
- field: config.alternative_language_codes[1]
value: "en"
response:
- loop:
variable: result
collection: $resp.results
body:
- comment: ["The %s which was detected as the most likely being spoken in the audio", language_code]
- print: ["Detected language: %s", result.language_code]
- comment: ["First alternative is the most probable result"]
- define: alternative=result.alternatives[0]
- comment:
- The %s which was detected as the most likely being spoken in the audio
- language_code
- print:
- "Detected language: %s"
- result.language_code
- comment:
- First alternative is the most probable result
- define: alternative = result.alternatives[0]
- print:
- "Transcript: %s"
- alternative.transcript

@ -1,42 +1,41 @@
type: com.google.api.codegen.samplegen.v1p2.SampleConfigProto
schema_version: 1.2.0
samples:
- id: speech_transcribe_recognition_metadata_beta
- region_tag: speech_transcribe_recognition_metadata_beta
title: Adding recognition metadata (Local File) (Beta)
description: |
Adds additional details short audio file included in this recognition request
description: Adds additional details short audio file included in this recognition request
rpc: Recognize
service: google.cloud.speech.v1p1beta1.Speech
parameters:
defaults:
- audio.content = "resources/commercial_mono.wav"
- config.metadata.interaction_type = VOICE_SEARCH
- config.metadata.recording_device_type = SMARTPHONE
- config.metadata.recording_device_name = "Pixel 3"
- config.language_code = "en-US"
attributes:
- parameter: audio.content
sample_argument_name: local_file_path
read_file: true
description: "Path to local audio file, e.g. /path/audio.wav"
- parameter: config.metadata.interaction_type
description: |
The use case of the audio, e.g. PHONE_CALL, DISCUSSION, PRESENTATION, et al.
- parameter: config.metadata.recording_device_type
description: The kind of device used to capture the audio
- parameter: config.metadata.recording_device_name
description: |
The device used to make the recording.
Arbitrary string, e.g. 'Pixel XL', 'VoIP', 'Cardioid Microphone', or other value.
- parameter: config.language_code
description: |
The language of the supplied audio. Even though additional languages are
provided by alternative_language_codes, a primary language is still required.
on_success:
request:
- field: audio.content
value: "resources/commercial_mono.wav"
input_parameter: local_file_path
comment: Path to local audio file, e.g. /path/audio.wav
value_is_file: true
- field: config.metadata.interaction_type
value: VOICE_SEARCH
comment: The use case of the audio, e.g. PHONE_CALL, DISCUSSION, PRESENTATION, et al.
- field: config.metadata.recording_device_type
value: SMARTPHONE
comment: The kind of device used to capture the audio
- field: config.metadata.recording_device_name
value: "Pixel 3"
comment: |
The device used to make the recording.
Arbitrary string, e.g. 'Pixel XL', 'VoIP', 'Cardioid Microphone', or other value.
- field: config.language_code
value: "en-US"
comment: |
The language of the supplied audio. Even though additional languages are
provided by alternative_language_codes, a primary language is still required.
response:
- loop:
variable: result
collection: $resp.results
body:
- comment: ["First alternative is the most probable result"]
- define: alternative=result.alternatives[0]
- comment:
- First alternative is the most probable result
- define: alternative = result.alternatives[0]
- print:
- "Transcript: %s"
- alternative.transcript

@ -1,38 +1,45 @@
type: com.google.api.codegen.samplegen.v1p2.SampleConfigProto
schema_version: 1.2.0
samples:
- id: speech_transcribe_word_level_confidence_beta
- region_tag: speech_transcribe_word_level_confidence_beta
title: Enabling word-level confidence (Local File) (Beta)
description: |
Print confidence level for individual words in a transcription of a short audio file
Print confidence level for individual words in a transcription of a short audio file.
rpc: Recognize
service: google.cloud.speech.v1p1beta1.Speech
parameters:
defaults:
- audio.content = "resources/brooklyn_bridge.flac"
- config.enable_word_confidence = True
- config.language_code = "en-US"
attributes:
- parameter: audio.content
sample_argument_name: local_file_path
read_file: true
description: "Path to local audio file, e.g. /path/audio.wav"
- parameter: config.enable_word_confidence
description: |
When enabled, the first result returned by the API will include a list
of words and the confidence level for each of those words.
- parameter: config.language_code
description: "The language of the supplied audio"
on_success:
- comment: ["The first result includes confidence levels per word"]
- define: result=$resp.results[0]
- comment: ["First alternative is the most probable result"]
- define: alternative=result.alternatives[0]
request:
- field: audio.content
value: "resources/brooklyn_bridge.flac"
input_parameter: local_file_path
comment: Path to local audio file, e.g. /path/audio.wav
value_is_file: true
- field: config.enable_word_confidence
value: true
comment: |
When enabled, the first result returned by the API will include a list
of words and the confidence level for each of those words.
- field: config.language_code
value: "en-US"
comment: The language of the supplied audio
response:
- comment:
- The first result includes confidence levels per word
- define: result = $resp.results[0]
- comment:
- First alternative is the most probable result
- define: alternative = result.alternatives[0]
- print:
- "Transcript: %s"
- alternative.transcript
- comment: ["Print the confidence level of each word"]
- comment:
- Print the confidence level of each word
- loop:
collection: alternative.words
variable: word
body:
- print: ["Word: %s", word.word]
- print: ["Confidence: %s", word.confidence]
- print:
- "Word: %s"
- word.word
- print:
- "Confidence: %s"
- word.confidence

@ -1,3 +1,5 @@
type: test/samples
schema_version: 1
test:
suites:
- name: Speech-to-Text Sample Tests For Speech Adaptation

@ -1,3 +1,5 @@
type: test/samples
schema_version: 1
test:
suites:
- name: Speech-to-Text Sample Tests For Speech Contexts Static Classes

@ -1,3 +1,5 @@
type: test/samples
schema_version: 1
test:
suites:
- name: Speech-to-Text Sample Tests For Quickstart

@ -1,3 +1,5 @@
type: test/samples
schema_version: 1
test:
suites:
- name: Getting punctuation in results (Local File) (Beta)
@ -12,7 +14,7 @@ test:
# Simply assert that actual punctuation is present from commercial_mono.wav
- literal: "?"
- literal: ","
- literal: "'"
- literal: ""
# Confirm that another file can be transcribed (use another file)
- name: speech_transcribe_auto_punctuation_beta (--local_file_path)

@ -1,3 +1,5 @@
type: test/samples
schema_version: 1
test:
suites:
- name: Separating different speakers (Local File) (LRO) (Beta)

@ -1,3 +1,5 @@
type: test/samples
schema_version: 1
test:
suites:
- name: Detecting language spoken automatically (Local File) (Beta)

@ -60,289 +60,6 @@ interfaces:
retry_codes_name: idempotent
retry_params_name: default
timeout_millis: 1000000
samples:
standalone:
- region_tag: speech_transcribe_word_level_confidence_beta
value_sets:
- speech_transcribe_word_level_confidence_beta
- region_tag: speech_transcribe_multilanguage_beta
value_sets:
- speech_transcribe_multilanguage_beta
- region_tag: speech_transcribe_recognition_metadata_beta
value_sets:
- speech_transcribe_recognition_metadata_beta
- region_tag: speech_transcribe_auto_punctuation_beta
value_sets:
- speech_transcribe_auto_punctuation_beta
- region_tag: speech_quickstart_beta
value_sets:
- speech_quickstart_beta
- region_tag: speech_adaptation_beta
value_sets:
- speech_adaptation_beta
- region_tag: speech_contexts_classes_beta
value_sets:
- speech_contexts_classes_beta
sample_value_sets:
- id: speech_transcribe_word_level_confidence_beta
title: Enabling word-level confidence (Local File) (Beta)
description: 'Print confidence level for individual words in a transcription
of a short audio file
'
parameters:
defaults:
- audio.content = "resources/brooklyn_bridge.flac"
- config.enable_word_confidence = True
- config.language_code = "en-US"
attributes:
- parameter: audio.content
sample_argument_name: local_file_path
read_file: true
description: Path to local audio file, e.g. /path/audio.wav
- parameter: config.enable_word_confidence
description: |
When enabled, the first result returned by the API will include a list
of words and the confidence level for each of those words.
- parameter: config.language_code
description: The language of the supplied audio
on_success:
- comment:
- The first result includes confidence levels per word
- define: result=$resp.results[0]
- comment:
- First alternative is the most probable result
- define: alternative=result.alternatives[0]
- print:
- 'Transcript: %s'
- alternative.transcript
- comment:
- Print the confidence level of each word
- loop:
collection: alternative.words
variable: word
body:
- print:
- 'Word: %s'
- word.word
- print:
- 'Confidence: %s'
- word.confidence
- id: speech_transcribe_multilanguage_beta
title: Detecting language spoken automatically (Local File) (Beta)
description: 'Transcribe a short audio file with language detected from a list
of possible languages
'
parameters:
defaults:
- audio.content = "resources/brooklyn_bridge.flac"
- config.language_code = "fr"
- config.alternative_language_codes[0] = "es"
- config.alternative_language_codes[1] = "en"
attributes:
- parameter: audio.content
sample_argument_name: local_file_path
read_file: true
description: Path to local audio file, e.g. /path/audio.wav
- parameter: config.alternative_language_codes[0]
description: 'Specify up to 3 additional languages as possible alternative
languages of the supplied audio.
'
- parameter: config.language_code
description: |
The language of the supplied audio. Even though additional languages are
provided by alternative_language_codes, a primary language is still required.
on_success:
- loop:
variable: result
collection: "$resp.results"
body:
- comment:
- The %s which was detected as the most likely being spoken in the audio
- language_code
- print:
- 'Detected language: %s'
- result.language_code
- comment:
- First alternative is the most probable result
- define: alternative=result.alternatives[0]
- print:
- 'Transcript: %s'
- alternative.transcript
- id: speech_transcribe_auto_punctuation_beta
title: Getting punctuation in results (Local File) (Beta)
description: 'Transcribe a short audio file with punctuation
'
parameters:
defaults:
- audio.content = "resources/commercial_mono.wav"
- config.enable_automatic_punctuation = True
- config.language_code = "en-US"
attributes:
- parameter: audio.content
sample_argument_name: local_file_path
read_file: true
description: Path to local audio file, e.g. /path/audio.wav
- parameter: config.enable_automatic_punctuation
description: 'When enabled, trascription results may include punctuation
(available for select languages).
'
- parameter: config.language_code
description: |
The language of the supplied audio. Even though additional languages are
provided by alternative_language_codes, a primary language is still required.
on_success:
- loop:
variable: result
collection: "$resp.results"
body:
- comment:
- First alternative is the most probable result
- define: alternative=result.alternatives[0]
- print:
- 'Transcript: %s'
- alternative.transcript
- id: speech_transcribe_recognition_metadata_beta
title: Adding recognition metadata (Local File) (Beta)
description: "Adds additional details short audio file included in this recognition
request \n"
parameters:
defaults:
- audio.content = "resources/commercial_mono.wav"
- config.metadata.interaction_type = VOICE_SEARCH
- config.metadata.recording_device_type = SMARTPHONE
- config.metadata.recording_device_name = "Pixel 3"
- config.language_code = "en-US"
attributes:
- parameter: audio.content
sample_argument_name: local_file_path
read_file: true
description: Path to local audio file, e.g. /path/audio.wav
- parameter: config.metadata.interaction_type
description: 'The use case of the audio, e.g. PHONE_CALL, DISCUSSION, PRESENTATION,
et al.
'
- parameter: config.metadata.recording_device_type
description: The kind of device used to capture the audio
- parameter: config.metadata.recording_device_name
description: |
The device used to make the recording.
Arbitrary string, e.g. 'Pixel XL', 'VoIP', 'Cardioid Microphone', or other value.
- parameter: config.language_code
description: |
The language of the supplied audio. Even though additional languages are
provided by alternative_language_codes, a primary language is still required.
on_success:
- loop:
variable: result
collection: "$resp.results"
body:
- comment:
- First alternative is the most probable result
- define: alternative=result.alternatives[0]
- print:
- 'Transcript: %s'
- alternative.transcript
- id: speech_quickstart_beta
description: "Performs synchronous speech recognition on an audio file."
parameters:
defaults:
- config.encoding=MP3
- config.sample_rate_hertz=44100
- config.language_code="en-US"
- audio.uri="gs://cloud-samples-data/speech/brooklyn_bridge.mp3"
attributes:
- parameter: config.sample_rate_hertz
sample_argument_name: sample_rate_hertz
description: "Sample rate in Hertz of the audio data sent in all `RecognitionAudio` messages. Valid values are: 8000-48000."
- parameter: config.language_code
sample_argument_name: language_code
description: The language of the supplied audio.
- parameter: audio.uri
sample_argument_name: uri_path
description: Path to the audio file stored on GCS.
on_success:
- loop:
collection: $resp.results
variable: result
body:
- define: transcript=result.alternatives[0].transcript
- print: ["Transcript: %s", transcript]
- id: speech_adaptation_beta
description: "Performs synchronous speech recognition with speech adaptation."
parameters:
defaults:
- config.encoding=MP3
- config.sample_rate_hertz=44100
- config.language_code="en-US"
- config.speech_contexts[0].phrases[0]="Brooklyn Bridge"
- config.speech_contexts[0].boost=20
- audio.uri="gs://cloud-samples-data/speech/brooklyn_bridge.mp3"
attributes:
- parameter: config.sample_rate_hertz
sample_argument_name: sample_rate_hertz
description: "Sample rate in Hertz of the audio data sent in all `RecognitionAudio` messages. Valid values are: 8000-48000."
- parameter: config.language_code
sample_argument_name: language_code
description: The language of the supplied audio.
- parameter: config.speech_contexts[0].phrases[0]
sample_argument_name: phrase
description: Phrase "hints" help Speech-to-Text API recognize the specified phrases from your audio data.
- parameter: config.speech_contexts[0].boost
sample_argument_name: boost
description: Positive value will increase the probability that a specific phrase will be recognized over other similar sounding phrases.
- parameter: audio.uri
sample_argument_name: uri_path
description: Path to the audio file stored on GCS.
on_success:
- loop:
variable: result
collection: "$resp.results"
body:
- comment:
- First alternative is the most probable result
- define: alternative=result.alternatives[0]
- print:
- 'Transcript: %s'
- alternative.transcript
- id: speech_contexts_classes_beta
description: "Performs synchronous speech recognition with static context classes."
parameters:
defaults:
- config.encoding=MP3
- config.sample_rate_hertz=24000
- config.language_code="en-US"
- config.speech_contexts[0].phrases[0]="$TIME"
- audio.uri="gs://cloud-samples-data/speech/time.mp3"
attributes:
- parameter: config.sample_rate_hertz
sample_argument_name: sample_rate_hertz
description: "Sample rate in Hertz of the audio data sent in all `RecognitionAudio` messages. Valid values are: 8000-48000."
- parameter: config.language_code
sample_argument_name: language_code
description: The language of the supplied audio.
- parameter: config.speech_contexts[0].phrases[0]
sample_argument_name: phrase
description: Phrase "hints" help Speech-to-Text API recognize the specified phrases from your audio data. In this sample we are using a static class phrase ($TIME). Classes represent groups of words that represent common concepts that occur in natural language. We recommend checking out the docs page for more info on static classes.
- parameter: audio.uri
sample_argument_name: uri_path
description: Path to the audio file stored on GCS.
on_success:
- loop:
variable: result
collection: "$resp.results"
body:
- comment:
- First alternative is the most probable result
- define: alternative=result.alternatives[0]
- print:
- 'Transcript: %s'
- alternative.transcript
- name: LongRunningRecognize
flattening:
groups:
@ -367,63 +84,6 @@ interfaces:
poll_delay_multiplier: 1.5
max_poll_delay_millis: 45000
total_poll_timeout_millis: 86400000
samples:
standalone:
- region_tag: speech_transcribe_diarization_beta
value_sets:
- speech_transcribe_diarization_beta
sample_value_sets:
- id: speech_transcribe_diarization_beta
title: Separating different speakers (Local File) (LRO) (Beta)
description: |
Print confidence level for individual words in a transcription of a short audio file
Separating different speakers in an audio file recording
parameters:
defaults:
- audio.content = "resources/commercial_mono.wav"
- config.enable_speaker_diarization = True
- config.diarization_speaker_count = 2
- config.language_code = "en-US"
attributes:
- parameter: audio.content
sample_argument_name: local_file_path
read_file: true
description: Path to local audio file, e.g. /path/audio.wav
- parameter: config.enable_speaker_diarization
description: |
If enabled, each word in the first alternative of each result will be
tagged with a speaker tag to identify the speaker.
- parameter: config.diarization_speaker_count
description: 'Optional. Specifies the estimated number of speakers in the
conversation.
'
- parameter: config.language_code
description: The language of the supplied audio
on_success:
- loop:
collection: "$resp.results"
variable: result
body:
- comment:
- First alternative has words tagged with speakers
- define: alternative=result.alternatives[0]
- print:
- 'Transcript: %s'
- alternative.transcript
- comment:
- Print the %s of each word
- speaker_tag
- loop:
collection: alternative.words
variable: word
body:
- print:
- 'Word: %s'
- word.word
- print:
- 'Speaker tag: %s'
- word.speaker_tag
- name: StreamingRecognize
retry_codes_name: idempotent
retry_params_name: default

Loading…
Cancel
Save