parent
d42eb8b611
commit
49f953989b
41 changed files with 584 additions and 1124 deletions
@ -1,35 +1,36 @@ |
||||
type: com.google.api.codegen.samplegen.v1p2.SampleConfigProto |
||||
schema_version: 1.2.0 |
||||
samples: |
||||
- id: speech_transcribe_async |
||||
- region_tag: speech_transcribe_async |
||||
title: Transcribe Audio File using Long Running Operation (Local File) (LRO) |
||||
description: Transcribe a long audio file using asynchronous speech recognition |
||||
rpc: LongRunningRecognize |
||||
service: google.cloud.speech.v1.Speech |
||||
parameters: |
||||
defaults: |
||||
- audio.content = "resources/brooklyn_bridge.raw" |
||||
- config.language_code = "en-US" |
||||
- config.sample_rate_hertz = 16000 |
||||
- config.encoding = LINEAR16 |
||||
attributes: |
||||
- parameter: audio.content |
||||
sample_argument_name: local_file_path |
||||
read_file: true |
||||
description: "Path to local audio file, e.g. /path/audio.wav" |
||||
- parameter: config.language_code |
||||
description: "The language of the supplied audio" |
||||
- parameter: config.sample_rate_hertz |
||||
description: Sample rate in Hertz of the audio data sent |
||||
- parameter: config.encoding |
||||
description: | |
||||
Encoding of audio data sent. This sample sets this explicitly. |
||||
This field is optional for FLAC and WAV audio formats. |
||||
on_success: |
||||
request: |
||||
- field: audio.content |
||||
value: "resources/brooklyn_bridge.raw" |
||||
input_parameter: local_file_path |
||||
comment: Path to local audio file, e.g. /path/audio.wav |
||||
value_is_file: true |
||||
- field: config.language_code |
||||
value: "en-US" |
||||
comment: The language of the supplied audio |
||||
- field: config.sample_rate_hertz |
||||
value: 16000 |
||||
comment: Sample rate in Hertz of the audio data sent |
||||
- field: config.encoding |
||||
value: LINEAR16 |
||||
comment: | |
||||
Encoding of audio data sent. This sample sets this explicitly. |
||||
This field is optional for FLAC and WAV audio formats. |
||||
response: |
||||
- loop: |
||||
variable: result |
||||
collection: $resp.results |
||||
body: |
||||
- comment: ["First alternative is the most probable result"] |
||||
- define: alternative=result.alternatives[0] |
||||
- comment: |
||||
- First alternative is the most probable result |
||||
- define: alternative = result.alternatives[0] |
||||
- print: |
||||
- "Transcript: %s" |
||||
- alternative.transcript |
||||
|
@ -1,35 +1,35 @@ |
||||
type: com.google.api.codegen.samplegen.v1p2.SampleConfigProto |
||||
schema_version: 1.2.0 |
||||
samples: |
||||
- id: speech_transcribe_async_gcs |
||||
- region_tag: speech_transcribe_async_gcs |
||||
title: Transcript Audio File using Long Running Operation (Cloud Storage) (LRO) |
||||
description: | |
||||
Transcribe long audio file from Cloud Storage using asynchronous speech recognition |
||||
description: Transcribe long audio file from Cloud Storage using asynchronous speech recognition |
||||
rpc: LongRunningRecognize |
||||
service: google.cloud.speech.v1.Speech |
||||
parameters: |
||||
defaults: |
||||
- audio.uri = "gs://cloud-samples-data/speech/brooklyn_bridge.raw" |
||||
- config.sample_rate_hertz = 16000 |
||||
- config.language_code = "en-US" |
||||
- config.encoding = LINEAR16 |
||||
attributes: |
||||
- parameter: audio.uri |
||||
sample_argument_name: storage_uri |
||||
description: "URI for audio file in Cloud Storage, e.g. gs://[BUCKET]/[FILE]" |
||||
- parameter: config.language_code |
||||
description: "The language of the supplied audio" |
||||
- parameter: config.sample_rate_hertz |
||||
description: Sample rate in Hertz of the audio data sent |
||||
- parameter: config.encoding |
||||
description: | |
||||
Encoding of audio data sent. This sample sets this explicitly. |
||||
This field is optional for FLAC and WAV audio formats. |
||||
on_success: |
||||
request: |
||||
- field: audio.uri |
||||
value: "gs://cloud-samples-data/speech/brooklyn_bridge.raw" |
||||
input_parameter: storage_uri |
||||
comment: URI for audio file in Cloud Storage, e.g. gs://[BUCKET]/[FILE] |
||||
- field: config.sample_rate_hertz |
||||
value: 16000 |
||||
comment: Sample rate in Hertz of the audio data sent |
||||
- field: config.language_code |
||||
value: "en-US" |
||||
comment: The language of the supplied audio |
||||
- field: config.encoding |
||||
value: LINEAR16 |
||||
comment: | |
||||
Encoding of audio data sent. This sample sets this explicitly. |
||||
This field is optional for FLAC and WAV audio formats. |
||||
response: |
||||
- loop: |
||||
variable: result |
||||
collection: $resp.results |
||||
body: |
||||
- comment: ["First alternative is the most probable result"] |
||||
- define: alternative=result.alternatives[0] |
||||
- comment: |
||||
- First alternative is the most probable result |
||||
- define: alternative = result.alternatives[0] |
||||
- print: |
||||
- "Transcript: %s" |
||||
- alternative.transcript |
||||
|
@ -1,38 +1,48 @@ |
||||
type: com.google.api.codegen.samplegen.v1p2.SampleConfigProto |
||||
schema_version: 1.2.0 |
||||
samples: |
||||
- id: speech_transcribe_async_word_time_offsets_gcs |
||||
- region_tag: speech_transcribe_async_word_time_offsets_gcs |
||||
title: Getting word timestamps (Cloud Storage) (LRO) |
||||
description: | |
||||
Print start and end time of each word spoken in audio file from Cloud Storage |
||||
description: Print start and end time of each word spoken in audio file from Cloud Storage |
||||
rpc: LongRunningRecognize |
||||
service: google.cloud.speech.v1.Speech |
||||
parameters: |
||||
defaults: |
||||
- audio.uri = "gs://cloud-samples-data/speech/brooklyn_bridge.flac" |
||||
- config.enable_word_time_offsets = True |
||||
- config.language_code = "en-US" |
||||
attributes: |
||||
- parameter: audio.uri |
||||
sample_argument_name: storage_uri |
||||
description: "URI for audio file in Cloud Storage, e.g. gs://[BUCKET]/[FILE]" |
||||
- parameter: config.enable_word_time_offsets |
||||
description: | |
||||
When enabled, the first result returned by the API will include a list |
||||
of words and the start and end time offsets (timestamps) for those words. |
||||
- parameter: config.language_code |
||||
description: "The language of the supplied audio" |
||||
on_success: |
||||
- comment: ["The first result includes start and end time word offsets"] |
||||
- define: result=$resp.results[0] |
||||
- comment: ["First alternative is the most probable result"] |
||||
- define: alternative=result.alternatives[0] |
||||
request: |
||||
- field: audio.uri |
||||
value: "gs://cloud-samples-data/speech/brooklyn_bridge.flac" |
||||
input_parameter: storage_uri |
||||
comment: URI for audio file in Cloud Storage, e.g. gs://[BUCKET]/[FILE] |
||||
- field: config.enable_word_time_offsets |
||||
value: true |
||||
comment: | |
||||
When enabled, the first result returned by the API will include a list |
||||
of words and the start and end time offsets (timestamps) for those words. |
||||
- field: config.language_code |
||||
value: "en-US" |
||||
comment: The language of the supplied audio |
||||
response: |
||||
- comment: |
||||
- The first result includes start and end time word offsets |
||||
- define: result = $resp.results[0] |
||||
- comment: |
||||
- First alternative is the most probable result |
||||
- define: alternative = result.alternatives[0] |
||||
- print: |
||||
- "Transcript: %s" |
||||
- alternative.transcript |
||||
- comment: ["Print the start and end time of each word"] |
||||
- comment: |
||||
- Print the start and end time of each word |
||||
- loop: |
||||
collection: alternative.words |
||||
variable: word |
||||
body: |
||||
- print: ["Word: %s", word.word] |
||||
- print: ["Start time: %s seconds %s nanos", word.start_time.seconds, word.start_time.nanos] |
||||
- print: ["End time: %s seconds %s nanos", word.end_time.seconds, word.end_time.nanos] |
||||
- print: |
||||
- "Word: %s" |
||||
- word.word |
||||
- print: |
||||
- "Start time: %s seconds %s nanos" |
||||
- word.start_time.seconds |
||||
- word.start_time.nanos |
||||
- print: |
||||
- "End time: %s seconds %s nanos" |
||||
- word.end_time.seconds |
||||
- word.end_time.nanos |
||||
|
@ -1,38 +1,39 @@ |
||||
type: com.google.api.codegen.samplegen.v1p2.SampleConfigProto |
||||
schema_version: 1.2.0 |
||||
samples: |
||||
- id: speech_transcribe_enhanced_model |
||||
- region_tag: speech_transcribe_enhanced_model |
||||
title: Using Enhanced Models (Local File) |
||||
description: Transcribe a short audio file using an enhanced model |
||||
rpc: Recognize |
||||
service: google.cloud.speech.v1.Speech |
||||
parameters: |
||||
defaults: |
||||
- audio.content = "resources/hello.wav" |
||||
- config.model = "phone_call" |
||||
- config.use_enhanced = True |
||||
- config.language_code = "en-US" |
||||
attributes: |
||||
- parameter: audio.content |
||||
sample_argument_name: local_file_path |
||||
read_file: true |
||||
description: "Path to local audio file, e.g. /path/audio.wav" |
||||
- parameter: config.model |
||||
description: | |
||||
The enhanced model to use, e.g. phone_call |
||||
Currently phone_call is the only model available as an enhanced model. |
||||
- parameter: config.use_enhanced |
||||
description: | |
||||
Use an enhanced model for speech recognition (when set to true). |
||||
Project must be eligible for requesting enhanced models. |
||||
Enhanced speech models require that you opt-in to data logging. |
||||
- parameter: config.language_code |
||||
description: "The language of the supplied audio" |
||||
on_success: |
||||
request: |
||||
- field: audio.content |
||||
value: "resources/hello.wav" |
||||
input_parameter: local_file_path |
||||
comment: Path to local audio file, e.g. /path/audio.wav |
||||
value_is_file: true |
||||
- field: config.model |
||||
value: "phone_call" |
||||
comment: | |
||||
The enhanced model to use, e.g. phone_call |
||||
Currently phone_call is the only model available as an enhanced model. |
||||
- field: config.use_enhanced |
||||
value: true |
||||
comment: | |
||||
Use an enhanced model for speech recognition (when set to true). |
||||
Project must be eligible for requesting enhanced models. |
||||
Enhanced speech models require that you opt-in to data logging. |
||||
- field: config.language_code |
||||
value: "en-US" |
||||
comment: The language of the supplied audio |
||||
response: |
||||
- loop: |
||||
variable: result |
||||
collection: $resp.results |
||||
body: |
||||
- comment: ["First alternative is the most probable result"] |
||||
- define: alternative=result.alternatives[0] |
||||
- comment: |
||||
- First alternative is the most probable result |
||||
- define: alternative = result.alternatives[0] |
||||
- print: |
||||
- "Transcript: %s" |
||||
- alternative.transcript |
||||
|
@ -1,34 +1,35 @@ |
||||
type: com.google.api.codegen.samplegen.v1p2.SampleConfigProto |
||||
schema_version: 1.2.0 |
||||
samples: |
||||
- id: speech_transcribe_model_selection |
||||
- region_tag: speech_transcribe_model_selection |
||||
title: Selecting a Transcription Model (Local File) |
||||
description: Transcribe a short audio file using a specified transcription model |
||||
rpc: Recognize |
||||
service: google.cloud.speech.v1.Speech |
||||
parameters: |
||||
defaults: |
||||
- audio.content = "resources/hello.wav" |
||||
- config.model = "phone_call" |
||||
- config.language_code = "en-US" |
||||
attributes: |
||||
- parameter: audio.content |
||||
sample_argument_name: local_file_path |
||||
read_file: true |
||||
description: "Path to local audio file, e.g. /path/audio.wav" |
||||
- parameter: config.model |
||||
sample_argument_name: model |
||||
description: | |
||||
The transcription model to use, e.g. video, phone_call, default |
||||
For a list of available transcription models, see: |
||||
https://cloud.google.com/speech-to-text/docs/transcription-model#transcription_models |
||||
- parameter: config.language_code |
||||
description: "The language of the supplied audio" |
||||
on_success: |
||||
request: |
||||
- field: audio.content |
||||
value: "resources/hello.wav" |
||||
input_parameter: local_file_path |
||||
comment: Path to local audio file, e.g. /path/audio.wav |
||||
value_is_file: true |
||||
- field: config.model |
||||
value: "phone_call" |
||||
input_parameter: model |
||||
comment: | |
||||
The transcription model to use, e.g. video, phone_call, default |
||||
For a list of available transcription models, see: |
||||
https://cloud.google.com/speech-to-text/docs/transcription-model#transcription_models |
||||
- field: config.language_code |
||||
value: "en-US" |
||||
comment: The language of the supplied audio |
||||
response: |
||||
- loop: |
||||
variable: result |
||||
collection: $resp.results |
||||
body: |
||||
- comment: ["First alternative is the most probable result"] |
||||
- define: alternative=result.alternatives[0] |
||||
- comment: |
||||
- First alternative is the most probable result |
||||
- define: alternative = result.alternatives[0] |
||||
- print: |
||||
- "Transcript: %s" |
||||
- alternative.transcript |
||||
|
@ -1,34 +1,35 @@ |
||||
type: com.google.api.codegen.samplegen.v1p2.SampleConfigProto |
||||
schema_version: 1.2.0 |
||||
samples: |
||||
- id: speech_transcribe_model_selection_gcs |
||||
- region_tag: speech_transcribe_model_selection_gcs |
||||
title: Selecting a Transcription Model (Cloud Storage) |
||||
description: | |
||||
Transcribe a short audio file from Cloud Storage using a specified transcription model |
||||
rpc: Recognize |
||||
service: google.cloud.speech.v1.Speech |
||||
parameters: |
||||
defaults: |
||||
- audio.uri = "gs://cloud-samples-data/speech/hello.wav" |
||||
- config.model = "phone_call" |
||||
- config.language_code = "en-US" |
||||
attributes: |
||||
- parameter: audio.uri |
||||
sample_argument_name: storage_uri |
||||
description: "URI for audio file in Cloud Storage, e.g. gs://[BUCKET]/[FILE]" |
||||
- parameter: config.model |
||||
sample_argument_name: model |
||||
description: | |
||||
The transcription model to use, e.g. video, phone_call, default |
||||
For a list of available transcription models, see: |
||||
https://cloud.google.com/speech-to-text/docs/transcription-model#transcription_models |
||||
- parameter: config.language_code |
||||
description: "The language of the supplied audio" |
||||
on_success: |
||||
request: |
||||
- field: audio.uri |
||||
value: "gs://cloud-samples-data/speech/hello.wav" |
||||
input_parameter: storage_uri |
||||
comment: URI for audio file in Cloud Storage, e.g. gs://[BUCKET]/[FILE] |
||||
- field: config.model |
||||
value: "phone_call" |
||||
input_parameter: model |
||||
comment: | |
||||
The transcription model to use, e.g. video, phone_call, default |
||||
For a list of available transcription models, see: |
||||
https://cloud.google.com/speech-to-text/docs/transcription-model#transcription_models |
||||
- field: config.language_code |
||||
value: "en-US" |
||||
comment: The language of the supplied audio |
||||
response: |
||||
- loop: |
||||
variable: result |
||||
collection: $resp.results |
||||
body: |
||||
- comment: ["First alternative is the most probable result"] |
||||
- define: alternative=result.alternatives[0] |
||||
- comment: |
||||
- First alternative is the most probable result |
||||
- define: alternative = result.alternatives[0] |
||||
- print: |
||||
- "Transcript: %s" |
||||
- alternative.transcript |
||||
|
@ -1,35 +1,36 @@ |
||||
type: com.google.api.codegen.samplegen.v1p2.SampleConfigProto |
||||
schema_version: 1.2.0 |
||||
samples: |
||||
- id: speech_transcribe_sync |
||||
- region_tag: speech_transcribe_sync |
||||
title: Transcribe Audio File (Local File) |
||||
description: Transcribe a short audio file using synchronous speech recognition |
||||
rpc: Recognize |
||||
service: google.cloud.speech.v1.Speech |
||||
parameters: |
||||
defaults: |
||||
- audio.content = "resources/brooklyn_bridge.raw" |
||||
- config.language_code = "en-US" |
||||
- config.sample_rate_hertz = 16000 |
||||
- config.encoding = LINEAR16 |
||||
attributes: |
||||
- parameter: audio.content |
||||
sample_argument_name: local_file_path |
||||
read_file: true |
||||
description: "Path to local audio file, e.g. /path/audio.wav" |
||||
- parameter: config.language_code |
||||
description: "The language of the supplied audio" |
||||
- parameter: config.sample_rate_hertz |
||||
description: Sample rate in Hertz of the audio data sent |
||||
- parameter: config.encoding |
||||
description: | |
||||
Encoding of audio data sent. This sample sets this explicitly. |
||||
This field is optional for FLAC and WAV audio formats. |
||||
on_success: |
||||
request: |
||||
- field: audio.content |
||||
value: "resources/brooklyn_bridge.raw" |
||||
input_parameter: local_file_path |
||||
comment: Path to local audio file, e.g. /path/audio.wav |
||||
value_is_file: true |
||||
- field: config.language_code |
||||
value: "en-US" |
||||
comment: The language of the supplied audio |
||||
- field: config.sample_rate_hertz |
||||
value: 16000 |
||||
comment: Sample rate in Hertz of the audio data sent |
||||
- field: config.encoding |
||||
value: LINEAR16 |
||||
comment: | |
||||
Encoding of audio data sent. This sample sets this explicitly. |
||||
This field is optional for FLAC and WAV audio formats. |
||||
response: |
||||
- loop: |
||||
variable: result |
||||
collection: $resp.results |
||||
body: |
||||
- comment: ["First alternative is the most probable result"] |
||||
- define: alternative=result.alternatives[0] |
||||
- comment: |
||||
- First alternative is the most probable result |
||||
- define: alternative = result.alternatives[0] |
||||
- print: |
||||
- "Transcript: %s" |
||||
- alternative.transcript |
||||
|
@ -1,35 +1,35 @@ |
||||
type: com.google.api.codegen.samplegen.v1p2.SampleConfigProto |
||||
schema_version: 1.2.0 |
||||
samples: |
||||
- id: speech_transcribe_sync_gcs |
||||
- region_tag: speech_transcribe_sync_gcs |
||||
title: Transcript Audio File (Cloud Storage) |
||||
description: | |
||||
Transcribe short audio file from Cloud Storage using synchronous speech recognition |
||||
description: Transcribe short audio file from Cloud Storage using synchronous speech recognition |
||||
rpc: Recognize |
||||
service: google.cloud.speech.v1.Speech |
||||
parameters: |
||||
defaults: |
||||
- audio.uri = "gs://cloud-samples-data/speech/brooklyn_bridge.raw" |
||||
- config.sample_rate_hertz = 16000 |
||||
- config.language_code = "en-US" |
||||
- config.encoding = LINEAR16 |
||||
attributes: |
||||
- parameter: audio.uri |
||||
sample_argument_name: storage_uri |
||||
description: "URI for audio file in Cloud Storage, e.g. gs://[BUCKET]/[FILE]" |
||||
- parameter: config.language_code |
||||
description: "The language of the supplied audio" |
||||
- parameter: config.sample_rate_hertz |
||||
description: Sample rate in Hertz of the audio data sent |
||||
- parameter: config.encoding |
||||
description: | |
||||
Encoding of audio data sent. This sample sets this explicitly. |
||||
This field is optional for FLAC and WAV audio formats. |
||||
on_success: |
||||
request: |
||||
- field: audio.uri |
||||
value: "gs://cloud-samples-data/speech/brooklyn_bridge.raw" |
||||
input_parameter: storage_uri |
||||
comment: URI for audio file in Cloud Storage, e.g. gs://[BUCKET]/[FILE] |
||||
- field: config.sample_rate_hertz |
||||
value: 16000 |
||||
comment: Sample rate in Hertz of the audio data sent |
||||
- field: config.language_code |
||||
value: "en-US" |
||||
comment: The language of the supplied audio |
||||
- field: config.encoding |
||||
value: LINEAR16 |
||||
comment: | |
||||
Encoding of audio data sent. This sample sets this explicitly. |
||||
This field is optional for FLAC and WAV audio formats. |
||||
response: |
||||
- loop: |
||||
variable: result |
||||
collection: $resp.results |
||||
body: |
||||
- comment: ["First alternative is the most probable result"] |
||||
- define: alternative=result.alternatives[0] |
||||
- comment: |
||||
- First alternative is the most probable result |
||||
- define: alternative = result.alternatives[0] |
||||
- print: |
||||
- "Transcript: %s" |
||||
- alternative.transcript |
||||
|
@ -1,3 +1,5 @@ |
||||
type: test/samples |
||||
schema_version: 1 |
||||
test: |
||||
suites: |
||||
- name: Transcribe Audio File using Long Running Operation (Local File) (LRO) |
@ -1,3 +1,5 @@ |
||||
type: test/samples |
||||
schema_version: 1 |
||||
test: |
||||
suites: |
||||
- name: Transcript Audio File using Long Running Operation (Cloud Storage) (LRO) |
@ -1,3 +1,5 @@ |
||||
type: test/samples |
||||
schema_version: 1 |
||||
test: |
||||
suites: |
||||
- name: Getting word timestamps (Cloud Storage) (LRO) |
@ -1,3 +1,5 @@ |
||||
type: test/samples |
||||
schema_version: 1 |
||||
test: |
||||
suites: |
||||
- name: Using Enhanced Models (Local File) |
@ -1,3 +1,5 @@ |
||||
type: test/samples |
||||
schema_version: 1 |
||||
test: |
||||
suites: |
||||
- name: Selecting a Transcription Model (Local File) |
@ -1,3 +1,5 @@ |
||||
type: test/samples |
||||
schema_version: 1 |
||||
test: |
||||
suites: |
||||
- name: Selecting a Transcription Model (Cloud Storage) |
@ -1,3 +1,5 @@ |
||||
type: test/samples |
||||
schema_version: 1 |
||||
test: |
||||
suites: |
||||
- name: Multi-Channel Audio Transcription (Local File) |
@ -1,3 +1,5 @@ |
||||
type: test/samples |
||||
schema_version: 1 |
||||
test: |
||||
suites: |
||||
- name: Multi-Channel Audio Transcription (Cloud Storage) |
@ -1,3 +1,5 @@ |
||||
type: test/samples |
||||
schema_version: 1 |
||||
test: |
||||
suites: |
||||
- name: Transcribe Audio File (Local File) |
@ -1,3 +1,5 @@ |
||||
type: test/samples |
||||
schema_version: 1 |
||||
test: |
||||
suites: |
||||
- name: Transcript Audio File (Cloud Storage) |
@ -0,0 +1,49 @@ |
||||
type: com.google.api.codegen.samplegen.v1p2.SampleConfigProto |
||||
schema_version: 1.2.0 |
||||
samples: |
||||
- region_tag: speech_adaptation_beta |
||||
title: Speech Adaptation (Cloud Storage) |
||||
description: Transcribe a short audio file with speech adaptation. |
||||
rpc: Recognize |
||||
service: google.cloud.speech.v1p1beta1.Speech |
||||
request: |
||||
- field: audio.uri |
||||
value: "gs://cloud-samples-data/speech/brooklyn_bridge.mp3" |
||||
input_parameter: storage_uri |
||||
comment: URI for audio file in Cloud Storage, e.g. gs://[BUCKET]/[FILE] |
||||
- field: config.speech_contexts[0].phrases[0] |
||||
value: "Brooklyn Bridge" |
||||
input_parameter: phrase |
||||
comment: | |
||||
Phrase "hints" help recognize the specified phrases from your audio. |
||||
- field: config.speech_contexts[0].boost |
||||
value: 20.0 |
||||
comment: | |
||||
Hint Boost. This value increases the probability that a specific |
||||
phrase will be recognized over other similar sounding phrases. |
||||
The higher the boost, the higher the chance of false positive |
||||
recognition as well. Can accept wide range of positive values. |
||||
Most use cases are best served with values between 0 and 20. |
||||
Using a binary search happroach may help you find the optimal value. |
||||
- field: config.sample_rate_hertz |
||||
value: 44100 |
||||
comment: Sample rate in Hertz of the audio data sent |
||||
- field: config.language_code |
||||
value: "en-US" |
||||
comment: The language of the supplied audio |
||||
- field: config.encoding |
||||
value: MP3 |
||||
comment: | |
||||
Encoding of audio data sent. This sample sets this explicitly. |
||||
This field is optional for FLAC and WAV audio formats. |
||||
response: |
||||
- loop: |
||||
variable: result |
||||
collection: $resp.results |
||||
body: |
||||
- comment: |
||||
- First alternative is the most probable result |
||||
- define: alternative = result.alternatives[0] |
||||
- print: |
||||
- "Transcript: %s" |
||||
- alternative.transcript |
@ -0,0 +1,43 @@ |
||||
type: com.google.api.codegen.samplegen.v1p2.SampleConfigProto |
||||
schema_version: 1.2.0 |
||||
samples: |
||||
- region_tag: speech_contexts_classes_beta |
||||
title: Using Context Classes (Cloud Storage) |
||||
description: Transcribe a short audio file with static context classes. |
||||
rpc: Recognize |
||||
service: google.cloud.speech.v1p1beta1.Speech |
||||
request: |
||||
- field: audio.uri |
||||
value: "gs://cloud-samples-data/speech/time.mp3" |
||||
input_parameter: storage_uri |
||||
comment: URI for audio file in Cloud Storage, e.g. gs://[BUCKET]/[FILE] |
||||
- field: config.speech_contexts[0].phrases[0] |
||||
value: "$TIME" |
||||
input_parameter: phrase |
||||
comment: | |
||||
Phrase "hints" help recognize the specified phrases from your audio. |
||||
In this sample we are using a static class phrase ($TIME). |
||||
Classes represent groups of words that represent common concepts |
||||
that occur in natural language. |
||||
- field: config.language_code |
||||
value: "en-US" |
||||
comment: The language of the supplied audio |
||||
- field: config.sample_rate_hertz |
||||
value: 24000 |
||||
comment: Sample rate in Hertz of the audio data sent |
||||
- field: config.encoding |
||||
value: MP3 |
||||
comment: | |
||||
Encoding of audio data sent. This sample sets this explicitly. |
||||
This field is optional for FLAC and WAV audio formats. |
||||
response: |
||||
- loop: |
||||
variable: result |
||||
collection: $resp.results |
||||
body: |
||||
- comment: |
||||
- First alternative is the most probable result |
||||
- define: alternative = result.alternatives[0] |
||||
- print: |
||||
- "Transcript: %s" |
||||
- alternative.transcript |
@ -0,0 +1,35 @@ |
||||
type: com.google.api.codegen.samplegen.v1p2.SampleConfigProto |
||||
schema_version: 1.2.0 |
||||
samples: |
||||
- region_tag: speech_quickstart_beta |
||||
title: Quickstart Beta |
||||
description: Performs synchronous speech recognition on an audio file |
||||
rpc: Recognize |
||||
service: google.cloud.speech.v1p1beta1.Speech |
||||
request: |
||||
- field: audio.uri |
||||
value: "gs://cloud-samples-data/speech/brooklyn_bridge.mp3" |
||||
input_parameter: storage_uri |
||||
comment: URI for audio file in Cloud Storage, e.g. gs://[BUCKET]/[FILE] |
||||
- field: config.language_code |
||||
value: "en-US" |
||||
comment: The language of the supplied audio |
||||
- field: config.sample_rate_hertz |
||||
value: 44100 |
||||
comment: Sample rate in Hertz of the audio data sent |
||||
- field: config.encoding |
||||
value: MP3 |
||||
comment: | |
||||
Encoding of audio data sent. This sample sets this explicitly. |
||||
This field is optional for FLAC and WAV audio formats. |
||||
response: |
||||
- loop: |
||||
variable: result |
||||
collection: $resp.results |
||||
body: |
||||
- comment: |
||||
- First alternative is the most probable result |
||||
- define: alternative = result.alternatives[0] |
||||
- print: |
||||
- "Transcript: %s" |
||||
- alternative.transcript |
@ -1,34 +1,35 @@ |
||||
type: com.google.api.codegen.samplegen.v1p2.SampleConfigProto |
||||
schema_version: 1.2.0 |
||||
samples: |
||||
- id: speech_transcribe_auto_punctuation_beta |
||||
- region_tag: speech_transcribe_auto_punctuation_beta |
||||
title: Getting punctuation in results (Local File) (Beta) |
||||
description: | |
||||
Transcribe a short audio file with punctuation |
||||
description: Transcribe a short audio file with punctuation |
||||
rpc: Recognize |
||||
service: google.cloud.speech.v1p1beta1.Speech |
||||
parameters: |
||||
defaults: |
||||
- audio.content = "resources/commercial_mono.wav" |
||||
- config.enable_automatic_punctuation = True |
||||
- config.language_code = "en-US" |
||||
attributes: |
||||
- parameter: audio.content |
||||
sample_argument_name: local_file_path |
||||
read_file: true |
||||
description: "Path to local audio file, e.g. /path/audio.wav" |
||||
- parameter: config.enable_automatic_punctuation |
||||
description: | |
||||
When enabled, trascription results may include punctuation (available for select languages). |
||||
- parameter: config.language_code |
||||
description: | |
||||
The language of the supplied audio. Even though additional languages are |
||||
provided by alternative_language_codes, a primary language is still required. |
||||
on_success: |
||||
request: |
||||
- field: audio.content |
||||
value: "resources/commercial_mono.wav" |
||||
input_parameter: local_file_path |
||||
comment: Path to local audio file, e.g. /path/audio.wav |
||||
value_is_file: true |
||||
- field: config.enable_automatic_punctuation |
||||
value: true |
||||
comment: | |
||||
When enabled, trascription results may include punctuation |
||||
(available for select languages). |
||||
- field: config.language_code |
||||
value: "en-US" |
||||
comment: | |
||||
The language of the supplied audio. Even though additional languages are |
||||
provided by alternative_language_codes, a primary language is still required. |
||||
response: |
||||
- loop: |
||||
variable: result |
||||
collection: $resp.results |
||||
body: |
||||
- comment: ["First alternative is the most probable result"] |
||||
- define: alternative=result.alternatives[0] |
||||
- comment: |
||||
- First alternative is the most probable result |
||||
- define: alternative = result.alternatives[0] |
||||
- print: |
||||
- "Transcript: %s" |
||||
- alternative.transcript |
||||
|
@ -1,46 +1,51 @@ |
||||
type: com.google.api.codegen.samplegen.v1p2.SampleConfigProto |
||||
schema_version: 1.2.0 |
||||
samples: |
||||
# TODO: this id should include "async" (calls LongRunningRecognize async rpc) |
||||
- id: speech_transcribe_diarization_beta |
||||
- region_tag: speech_transcribe_diarization_beta |
||||
title: Separating different speakers (Local File) (LRO) (Beta) |
||||
description: | |
||||
Print confidence level for individual words in a transcription of a short audio file |
||||
Separating different speakers in an audio file recording |
||||
rpc: LongRunningRecognize |
||||
service: google.cloud.speech.v1p1beta1.Speech |
||||
parameters: |
||||
defaults: |
||||
- audio.content = "resources/commercial_mono.wav" |
||||
- config.enable_speaker_diarization = True |
||||
- config.diarization_speaker_count = 2 |
||||
- config.language_code = "en-US" |
||||
attributes: |
||||
- parameter: audio.content |
||||
sample_argument_name: local_file_path |
||||
read_file: true |
||||
description: "Path to local audio file, e.g. /path/audio.wav" |
||||
- parameter: config.enable_speaker_diarization |
||||
description: | |
||||
If enabled, each word in the first alternative of each result will be |
||||
tagged with a speaker tag to identify the speaker. |
||||
- parameter: config.diarization_speaker_count |
||||
description: | |
||||
Optional. Specifies the estimated number of speakers in the conversation. |
||||
- parameter: config.language_code |
||||
description: "The language of the supplied audio" |
||||
on_success: |
||||
request: |
||||
- field: audio.content |
||||
value: "resources/commercial_mono.wav" |
||||
input_parameter: local_file_path |
||||
comment: Path to local audio file, e.g. /path/audio.wav |
||||
value_is_file: true |
||||
- field: config.enable_speaker_diarization |
||||
value: true |
||||
comment: | |
||||
If enabled, each word in the first alternative of each result will be |
||||
tagged with a speaker tag to identify the speaker. |
||||
- field: config.diarization_speaker_count |
||||
value: 2 |
||||
comment: Optional. Specifies the estimated number of speakers in the conversation. |
||||
- field: config.language_code |
||||
value: "en-US" |
||||
comment: The language of the supplied audio |
||||
response: |
||||
- loop: |
||||
collection: $resp.results |
||||
variable: result |
||||
body: |
||||
- comment: ["First alternative has words tagged with speakers"] |
||||
- define: alternative=result.alternatives[0] |
||||
- comment: |
||||
- First alternative has words tagged with speakers |
||||
- define: alternative = result.alternatives[0] |
||||
- print: |
||||
- "Transcript: %s" |
||||
- alternative.transcript |
||||
- comment: ["Print the %s of each word", speaker_tag] |
||||
- comment: |
||||
- Print the %s of each word |
||||
- speaker_tag |
||||
- loop: |
||||
collection: alternative.words |
||||
variable: word |
||||
body: |
||||
- print: ["Word: %s", word.word] |
||||
- print: ["Speaker tag: %s", word.speaker_tag] |
||||
- print: |
||||
- 'Word: %s' |
||||
- word.word |
||||
- print: |
||||
- 'Speaker tag: %s' |
||||
- word.speaker_tag |
||||
|
@ -1,37 +1,44 @@ |
||||
type: com.google.api.codegen.samplegen.v1p2.SampleConfigProto |
||||
schema_version: 1.2.0 |
||||
samples: |
||||
- id: speech_transcribe_multilanguage_beta |
||||
- region_tag: speech_transcribe_multilanguage_beta |
||||
title: Detecting language spoken automatically (Local File) (Beta) |
||||
description: | |
||||
Transcribe a short audio file with language detected from a list of possible languages |
||||
rpc: Recognize |
||||
service: google.cloud.speech.v1p1beta1.Speech |
||||
parameters: |
||||
defaults: |
||||
- audio.content = "resources/brooklyn_bridge.flac" |
||||
- config.language_code = "fr" |
||||
- config.alternative_language_codes[0] = "es" |
||||
- config.alternative_language_codes[1] = "en" |
||||
attributes: |
||||
- parameter: audio.content |
||||
sample_argument_name: local_file_path |
||||
read_file: true |
||||
description: "Path to local audio file, e.g. /path/audio.wav" |
||||
- parameter: config.alternative_language_codes[0] |
||||
description: | |
||||
Specify up to 3 additional languages as possible alternative languages of the supplied audio. |
||||
- parameter: config.language_code |
||||
description: | |
||||
The language of the supplied audio. Even though additional languages are |
||||
provided by alternative_language_codes, a primary language is still required. |
||||
on_success: |
||||
request: |
||||
- field: audio.content |
||||
value: resources/brooklyn_bridge.flac |
||||
input_parameter: local_file_path |
||||
comment: Path to local audio file, e.g. /path/audio.wav |
||||
value_is_file: true |
||||
- field: config.language_code |
||||
value: "fr" |
||||
comment: | |
||||
The language of the supplied audio. Even though additional languages are |
||||
provided by alternative_language_codes, a primary language is still required. |
||||
- field: config.alternative_language_codes[0] |
||||
value: "es" |
||||
comment: | |
||||
Specify up to 3 additional languages as possible alternative languages |
||||
of the supplied audio. |
||||
- field: config.alternative_language_codes[1] |
||||
value: "en" |
||||
response: |
||||
- loop: |
||||
variable: result |
||||
collection: $resp.results |
||||
body: |
||||
- comment: ["The %s which was detected as the most likely being spoken in the audio", language_code] |
||||
- print: ["Detected language: %s", result.language_code] |
||||
- comment: ["First alternative is the most probable result"] |
||||
- define: alternative=result.alternatives[0] |
||||
- comment: |
||||
- The %s which was detected as the most likely being spoken in the audio |
||||
- language_code |
||||
- print: |
||||
- "Detected language: %s" |
||||
- result.language_code |
||||
- comment: |
||||
- First alternative is the most probable result |
||||
- define: alternative = result.alternatives[0] |
||||
- print: |
||||
- "Transcript: %s" |
||||
- alternative.transcript |
||||
|
@ -1,42 +1,41 @@ |
||||
type: com.google.api.codegen.samplegen.v1p2.SampleConfigProto |
||||
schema_version: 1.2.0 |
||||
samples: |
||||
- id: speech_transcribe_recognition_metadata_beta |
||||
- region_tag: speech_transcribe_recognition_metadata_beta |
||||
title: Adding recognition metadata (Local File) (Beta) |
||||
description: | |
||||
Adds additional details short audio file included in this recognition request |
||||
description: Adds additional details short audio file included in this recognition request |
||||
rpc: Recognize |
||||
service: google.cloud.speech.v1p1beta1.Speech |
||||
parameters: |
||||
defaults: |
||||
- audio.content = "resources/commercial_mono.wav" |
||||
- config.metadata.interaction_type = VOICE_SEARCH |
||||
- config.metadata.recording_device_type = SMARTPHONE |
||||
- config.metadata.recording_device_name = "Pixel 3" |
||||
- config.language_code = "en-US" |
||||
attributes: |
||||
- parameter: audio.content |
||||
sample_argument_name: local_file_path |
||||
read_file: true |
||||
description: "Path to local audio file, e.g. /path/audio.wav" |
||||
- parameter: config.metadata.interaction_type |
||||
description: | |
||||
The use case of the audio, e.g. PHONE_CALL, DISCUSSION, PRESENTATION, et al. |
||||
- parameter: config.metadata.recording_device_type |
||||
description: The kind of device used to capture the audio |
||||
- parameter: config.metadata.recording_device_name |
||||
description: | |
||||
The device used to make the recording. |
||||
Arbitrary string, e.g. 'Pixel XL', 'VoIP', 'Cardioid Microphone', or other value. |
||||
- parameter: config.language_code |
||||
description: | |
||||
The language of the supplied audio. Even though additional languages are |
||||
provided by alternative_language_codes, a primary language is still required. |
||||
on_success: |
||||
request: |
||||
- field: audio.content |
||||
value: "resources/commercial_mono.wav" |
||||
input_parameter: local_file_path |
||||
comment: Path to local audio file, e.g. /path/audio.wav |
||||
value_is_file: true |
||||
- field: config.metadata.interaction_type |
||||
value: VOICE_SEARCH |
||||
comment: The use case of the audio, e.g. PHONE_CALL, DISCUSSION, PRESENTATION, et al. |
||||
- field: config.metadata.recording_device_type |
||||
value: SMARTPHONE |
||||
comment: The kind of device used to capture the audio |
||||
- field: config.metadata.recording_device_name |
||||
value: "Pixel 3" |
||||
comment: | |
||||
The device used to make the recording. |
||||
Arbitrary string, e.g. 'Pixel XL', 'VoIP', 'Cardioid Microphone', or other value. |
||||
- field: config.language_code |
||||
value: "en-US" |
||||
comment: | |
||||
The language of the supplied audio. Even though additional languages are |
||||
provided by alternative_language_codes, a primary language is still required. |
||||
response: |
||||
- loop: |
||||
variable: result |
||||
collection: $resp.results |
||||
body: |
||||
- comment: ["First alternative is the most probable result"] |
||||
- define: alternative=result.alternatives[0] |
||||
- comment: |
||||
- First alternative is the most probable result |
||||
- define: alternative = result.alternatives[0] |
||||
- print: |
||||
- "Transcript: %s" |
||||
- alternative.transcript |
||||
|
@ -1,38 +1,45 @@ |
||||
type: com.google.api.codegen.samplegen.v1p2.SampleConfigProto |
||||
schema_version: 1.2.0 |
||||
samples: |
||||
- id: speech_transcribe_word_level_confidence_beta |
||||
- region_tag: speech_transcribe_word_level_confidence_beta |
||||
title: Enabling word-level confidence (Local File) (Beta) |
||||
description: | |
||||
Print confidence level for individual words in a transcription of a short audio file |
||||
Print confidence level for individual words in a transcription of a short audio file. |
||||
rpc: Recognize |
||||
service: google.cloud.speech.v1p1beta1.Speech |
||||
parameters: |
||||
defaults: |
||||
- audio.content = "resources/brooklyn_bridge.flac" |
||||
- config.enable_word_confidence = True |
||||
- config.language_code = "en-US" |
||||
attributes: |
||||
- parameter: audio.content |
||||
sample_argument_name: local_file_path |
||||
read_file: true |
||||
description: "Path to local audio file, e.g. /path/audio.wav" |
||||
- parameter: config.enable_word_confidence |
||||
description: | |
||||
When enabled, the first result returned by the API will include a list |
||||
of words and the confidence level for each of those words. |
||||
- parameter: config.language_code |
||||
description: "The language of the supplied audio" |
||||
on_success: |
||||
- comment: ["The first result includes confidence levels per word"] |
||||
- define: result=$resp.results[0] |
||||
- comment: ["First alternative is the most probable result"] |
||||
- define: alternative=result.alternatives[0] |
||||
request: |
||||
- field: audio.content |
||||
value: "resources/brooklyn_bridge.flac" |
||||
input_parameter: local_file_path |
||||
comment: Path to local audio file, e.g. /path/audio.wav |
||||
value_is_file: true |
||||
- field: config.enable_word_confidence |
||||
value: true |
||||
comment: | |
||||
When enabled, the first result returned by the API will include a list |
||||
of words and the confidence level for each of those words. |
||||
- field: config.language_code |
||||
value: "en-US" |
||||
comment: The language of the supplied audio |
||||
response: |
||||
- comment: |
||||
- The first result includes confidence levels per word |
||||
- define: result = $resp.results[0] |
||||
- comment: |
||||
- First alternative is the most probable result |
||||
- define: alternative = result.alternatives[0] |
||||
- print: |
||||
- "Transcript: %s" |
||||
- alternative.transcript |
||||
- comment: ["Print the confidence level of each word"] |
||||
- comment: |
||||
- Print the confidence level of each word |
||||
- loop: |
||||
collection: alternative.words |
||||
variable: word |
||||
body: |
||||
- print: ["Word: %s", word.word] |
||||
- print: ["Confidence: %s", word.confidence] |
||||
- print: |
||||
- "Word: %s" |
||||
- word.word |
||||
- print: |
||||
- "Confidence: %s" |
||||
- word.confidence |
||||
|
@ -1,3 +1,5 @@ |
||||
type: test/samples |
||||
schema_version: 1 |
||||
test: |
||||
suites: |
||||
- name: Speech-to-Text Sample Tests For Speech Adaptation |
@ -1,3 +1,5 @@ |
||||
type: test/samples |
||||
schema_version: 1 |
||||
test: |
||||
suites: |
||||
- name: Speech-to-Text Sample Tests For Speech Contexts Static Classes |
@ -1,3 +1,5 @@ |
||||
type: test/samples |
||||
schema_version: 1 |
||||
test: |
||||
suites: |
||||
- name: Speech-to-Text Sample Tests For Quickstart |
@ -1,3 +1,5 @@ |
||||
type: test/samples |
||||
schema_version: 1 |
||||
test: |
||||
suites: |
||||
- name: Separating different speakers (Local File) (LRO) (Beta) |
@ -1,3 +1,5 @@ |
||||
type: test/samples |
||||
schema_version: 1 |
||||
test: |
||||
suites: |
||||
- name: Detecting language spoken automatically (Local File) (Beta) |
@ -1,3 +1,5 @@ |
||||
type: test/samples |
||||
schema_version: 1 |
||||
test: |
||||
suites: |
||||
- name: Adding recognition metadata (Local File) (Beta) |
@ -1,3 +1,5 @@ |
||||
type: test/samples |
||||
schema_version: 1 |
||||
test: |
||||
suites: |
||||
- name: Enabling word-level confidence (Local File) (Beta) |
Loading…
Reference in new issue