Synchronize new proto/yaml changes.

PiperOrigin-RevId: 271102771
6 years ago · a5d0708f07
parent 9dc1d37b6b
commit a5d0708f07
8 changed files with 274 additions and 208 deletions
--- a/google/cloud/speech/artman_speech_v1.yaml
+++ b/google/cloud/speech/artman_speech_v1.yaml
@ -6,7 +6,7 @@ common:
  - name: google-common-protos
  src_proto_paths:
  - v1
-  service_yaml: speech_v1.yaml
+  service_yaml: v1/speech_v1.yaml
  gapic_yaml: v1/speech_gapic.yaml
  samples: v1/samples
  proto_package: google.cloud.speech.v1
--- a/google/cloud/speech/artman_speech_v1p1beta1.yaml
+++ b/google/cloud/speech/artman_speech_v1p1beta1.yaml
@ -6,9 +6,10 @@ common:
  - name: google-common-protos
  src_proto_paths:
  - v1p1beta1
-  service_yaml: speech_v1p1beta1.yaml
+  service_yaml: v1p1beta1/speech_v1p1beta1.yaml
  gapic_yaml: v1p1beta1/speech_gapic.yaml
  samples: v1p1beta1/samples
+  proto_package: google.cloud.speech.v1p1beta1
 artifacts:
 - name: gapic_config
  type: GAPIC_CONFIG
--- a/google/cloud/speech/speech_v1p1beta1.yaml
+++ b/google/cloud/speech/speech_v1p1beta1.yaml
@ -1,50 +0,0 @@
-type: google.api.Service
-config_version: 3
-name: speech.googleapis.com
-title: Cloud Speech API
-
-apis:
- name: google.cloud.speech.v1p1beta1.Speech
-
-documentation:
-  summary: Converts audio to text by applying powerful neural network models.
-  overview: |-
-    # Introduction
-
-    Google Cloud Speech API provides speech recognition as a service.
-
-backend:
-  rules:
-  - selector: google.longrunning.Operations.ListOperations
-    deadline: 200.0
-  - selector: google.longrunning.Operations.GetOperation
-    deadline: 200.0
-  - selector: google.longrunning.Operations.WaitOperation
-    deadline: 200.0
-  - selector: google.cloud.speech.v1p1beta1.Speech.Recognize
-    deadline: 200.0
-  - selector: google.cloud.speech.v1p1beta1.Speech.LongRunningRecognize
-    deadline: 200.0
-  - selector: google.cloud.speech.v1p1beta1.Speech.StreamingRecognize
-    deadline: 905.0
-
-http:
-  rules:
-  - selector: google.longrunning.Operations.ListOperations
-    get: /v1/operations
-    additional_bindings:
-    - get: /v1beta1/operations
-  - selector: google.longrunning.Operations.GetOperation
-    get: '/v1/operations/{name=*}'
-    additional_bindings:
-    - get: '/v1beta1/operations/{name=*}'
-
-    - get: '/v1p1beta1/operations/{name=*}'
-
-
-authentication:
-  rules:
-  - selector: '*'
-    oauth:
-      canonical_scopes: |-
-        https://www.googleapis.com/auth/cloud-platform
--- a/google/cloud/speech/v1p1beta1/cloud_speech.proto
+++ b/google/cloud/speech/v1p1beta1/cloud_speech.proto
@ -1,4 +1,4 @@
-// Copyright 2018 Google LLC.
+// Copyright 2019 Google LLC.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@ -18,10 +18,11 @@ syntax = "proto3";
 package google.cloud.speech.v1p1beta1;

 import "google/api/annotations.proto";
+import "google/api/client.proto";
+import "google/api/field_behavior.proto";
 import "google/longrunning/operations.proto";
 import "google/protobuf/any.proto";
 import "google/protobuf/duration.proto";
-import "google/protobuf/empty.proto";
 import "google/protobuf/timestamp.proto";
 import "google/rpc/status.proto";

@ -30,9 +31,13 @@ option go_package = "google.golang.org/genproto/googleapis/cloud/speech/v1p1beta
 option java_multiple_files = true;
 option java_outer_classname = "SpeechProto";
 option java_package = "com.google.cloud.speech.v1p1beta1";
+option objc_class_prefix = "GCS";

 // Service that implements Google Cloud Speech API.
 service Speech {
+  option (google.api.default_host) = "speech.googleapis.com";
+  option (google.api.oauth_scopes) = "https://www.googleapis.com/auth/cloud-platform";
+
  // Performs synchronous speech recognition: receive results after all audio
  // has been sent and processed.
  rpc Recognize(RecognizeRequest) returns (RecognizeResponse) {
@ -40,52 +45,59 @@ service Speech {
      post: "/v1p1beta1/speech:recognize"
      body: "*"
    };
+    option (google.api.method_signature) = "config,audio";
  }

  // Performs asynchronous speech recognition: receive results via the
  // google.longrunning.Operations interface. Returns either an
  // `Operation.error` or an `Operation.response` which contains
  // a `LongRunningRecognizeResponse` message.
-  rpc LongRunningRecognize(LongRunningRecognizeRequest)
-      returns (google.longrunning.Operation) {
+  // For more information on asynchronous speech recognition, see the
+  // [how-to](https://cloud.google.com/speech-to-text/docs/async-recognize).
+  rpc LongRunningRecognize(LongRunningRecognizeRequest) returns (google.longrunning.Operation) {
    option (google.api.http) = {
      post: "/v1p1beta1/speech:longrunningrecognize"
      body: "*"
    };
+    option (google.api.method_signature) = "config,audio";
+    option (google.longrunning.operation_info) = {
+      response_type: "LongRunningRecognizeResponse"
+      metadata_type: "LongRunningRecognizeMetadata"
+    };
  }

  // Performs bidirectional streaming speech recognition: receive results while
  // sending audio. This method is only available via the gRPC API (not REST).
-  rpc StreamingRecognize(stream StreamingRecognizeRequest)
-      returns (stream StreamingRecognizeResponse) {}
+  rpc StreamingRecognize(stream StreamingRecognizeRequest) returns (stream StreamingRecognizeResponse) {
+  }
 }

 // The top-level message sent by the client for the `Recognize` method.
 message RecognizeRequest {
-  // *Required* Provides information to the recognizer that specifies how to
+  // Required. Provides information to the recognizer that specifies how to
  // process the request.
-  RecognitionConfig config = 1;
+  RecognitionConfig config = 1 [(google.api.field_behavior) = REQUIRED];

-  // *Required* The audio data to be recognized.
-  RecognitionAudio audio = 2;
+  // Required. The audio data to be recognized.
+  RecognitionAudio audio = 2 [(google.api.field_behavior) = REQUIRED];
 }

 // The top-level message sent by the client for the `LongRunningRecognize`
 // method.
 message LongRunningRecognizeRequest {
-  // *Required* Provides information to the recognizer that specifies how to
+  // Required. Provides information to the recognizer that specifies how to
  // process the request.
-  RecognitionConfig config = 1;
+  RecognitionConfig config = 1 [(google.api.field_behavior) = REQUIRED];

-  // *Required* The audio data to be recognized.
-  RecognitionAudio audio = 2;
+  // Required. The audio data to be recognized.
+  RecognitionAudio audio = 2 [(google.api.field_behavior) = REQUIRED];
 }

 // The top-level message sent by the client for the `StreamingRecognize` method.
 // Multiple `StreamingRecognizeRequest` messages are sent. The first message
-// must contain a `streaming_config` message and must not contain `audio` data.
-// All subsequent messages must contain `audio` data and must not contain a
-// `streaming_config` message.
+// must contain a `streaming_config` message and must not contain
+// `audio_content`. All subsequent messages must contain `audio_content` and
+// must not contain a `streaming_config` message.
 message StreamingRecognizeRequest {
  // The streaming request, which is either a streaming config or audio content.
  oneof streaming_request {
@ -99,9 +111,9 @@ message StreamingRecognizeRequest {
    // `StreamingRecognizeRequest` message must not contain `audio_content` data
    // and all subsequent `StreamingRecognizeRequest` messages must contain
    // `audio_content` data. The audio bytes must be encoded as specified in
-    // `RecognitionConfig`. Note: as with all bytes fields, protobuffers use a
+    // `RecognitionConfig`. Note: as with all bytes fields, proto buffers use a
    // pure binary representation (not base64). See
-    // [content limits](/speech-to-text/quotas#content).
+    // [content limits](https://cloud.google.com/speech-to-text/quotas#content).
    bytes audio_content = 2;
  }
 }
@ -109,11 +121,11 @@ message StreamingRecognizeRequest {
 // Provides information to the recognizer that specifies how to process the
 // request.
 message StreamingRecognitionConfig {
-  // *Required* Provides information to the recognizer that specifies how to
+  // Required. Provides information to the recognizer that specifies how to
  // process the request.
-  RecognitionConfig config = 1;
+  RecognitionConfig config = 1 [(google.api.field_behavior) = REQUIRED];

-  // *Optional* If `false` or omitted, the recognizer will perform continuous
+  // If `false` or omitted, the recognizer will perform continuous
  // recognition (continuing to wait for and process audio even if the user
  // pauses speaking) until the client closes the input stream (gRPC API) or
  // until the maximum time limit has been reached. May return multiple
@ -126,7 +138,7 @@ message StreamingRecognitionConfig {
  // `true`.
  bool single_utterance = 2;

-  // *Optional* If `true`, interim results (tentative hypotheses) may be
+  // If `true`, interim results (tentative hypotheses) may be
  // returned as they become available (these interim results are indicated with
  // the `is_final=false` flag).
  // If `false` or omitted, only `is_final=true` result(s) are returned.
@ -138,13 +150,15 @@ message StreamingRecognitionConfig {
 message RecognitionConfig {
  // The encoding of the audio data sent in the request.
  //
-  // All encodings support only 1 channel (mono) audio.
+  // All encodings support only 1 channel (mono) audio, unless the
+  // `audio_channel_count` and `enable_separate_recognition_per_channel` fields
+  // are set.
  //
  // For best results, the audio source should be captured and transmitted using
  // a lossless encoding (`FLAC` or `LINEAR16`). The accuracy of the speech
  // recognition can be reduced if lossy codecs are used to capture or transmit
  // audio, particularly if background noise is present. Lossy codecs include
-  // `MULAW`, `AMR`, `AMR_WB`, `OGG_OPUS`, and `SPEEX_WITH_HEADER_BYTE`.
+  // `MULAW`, `AMR`, `AMR_WB`, `OGG_OPUS`, `SPEEX_WITH_HEADER_BYTE`, and `MP3`.
  //
  // The `FLAC` and `WAV` audio file formats include a header that describes the
  // included audio content. You can request recognition for `WAV` files that
@ -155,8 +169,7 @@ message RecognitionConfig {
  // an `AudioEncoding` when you send  send `FLAC` or `WAV` audio, the
  // encoding configuration must match the encoding described in the audio
  // header; otherwise the request returns an
-  // [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT] error
-  // code.
+  // [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT] error code.
  enum AudioEncoding {
    // Not specified.
    ENCODING_UNSPECIFIED = 0;
@ -209,8 +222,7 @@ message RecognitionConfig {

  // Encoding of audio data sent in all `RecognitionAudio` messages.
  // This field is optional for `FLAC` and `WAV` audio files and required
-  // for all other audio formats. For details, see
-  // [AudioEncoding][google.cloud.speech.v1p1beta1.RecognitionConfig.AudioEncoding].
+  // for all other audio formats. For details, see [AudioEncoding][google.cloud.speech.v1p1beta1.RecognitionConfig.AudioEncoding].
  AudioEncoding encoding = 1;

  // Sample rate in Hertz of the audio data sent in all
@ -218,12 +230,11 @@ message RecognitionConfig {
  // 16000 is optimal. For best results, set the sampling rate of the audio
  // source to 16000 Hz. If that's not possible, use the native sample rate of
  // the audio source (instead of re-sampling).
-  // This field is optional for `FLAC` and `WAV` audio files and required
-  // for all other audio formats. For details, see
-  // [AudioEncoding][google.cloud.speech.v1p1beta1.RecognitionConfig.AudioEncoding].
+  // This field is optional for FLAC and WAV audio files, but is
+  // required for all other audio formats. For details, see [AudioEncoding][google.cloud.speech.v1p1beta1.RecognitionConfig.AudioEncoding].
  int32 sample_rate_hertz = 2;

-  // *Optional* The number of channels in the input audio data.
+  // The number of channels in the input audio data.
  // ONLY set this for MULTI-CHANNEL recognition.
  // Valid values for LINEAR16 and FLAC are `1`-`8`.
  // Valid values for OGG_OPUS are '1'-'254'.
@ -234,7 +245,7 @@ message RecognitionConfig {
  // `enable_separate_recognition_per_channel` to 'true'.
  int32 audio_channel_count = 7;

-  // This needs to be set to ‘true’ explicitly and `audio_channel_count` > 1
+  // This needs to be set to `true` explicitly and `audio_channel_count` > 1
  // to get each channel recognized separately. The recognition result will
  // contain a `channel_tag` field to state which channel that result belongs
  // to. If this is not true, we will only recognize the first channel. The
@ -242,28 +253,29 @@ message RecognitionConfig {
  // `audio_channel_count` multiplied by the length of the audio.
  bool enable_separate_recognition_per_channel = 12;

-  // *Required* The language of the supplied audio as a
+  // Required. The language of the supplied audio as a
  // [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt) language tag.
  // Example: "en-US".
-  // See [Language Support](/speech-to-text/docs/languages)
-  // for a list of the currently supported language codes.
-  string language_code = 3;
+  // See [Language
+  // Support](https://cloud.google.com/speech-to-text/docs/languages) for a list
+  // of the currently supported language codes.
+  string language_code = 3 [(google.api.field_behavior) = REQUIRED];

-  // *Optional* A list of up to 3 additional
+  // A list of up to 3 additional
  // [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt) language tags,
  // listing possible alternative languages of the supplied audio.
-  // See [Language Support](/speech-to-text/docs/languages)
-  // for a list of the currently supported language codes.
-  // If alternative languages are listed, recognition result will contain
-  // recognition in the most likely language detected including the main
-  // language_code. The recognition result will include the language tag
-  // of the language detected in the audio.
-  // Note: This feature is only supported for Voice Command and Voice Search
-  // use cases and performance may vary for other use cases (e.g., phone call
+  // See [Language
+  // Support](https://cloud.google.com/speech-to-text/docs/languages) for a list
+  // of the currently supported language codes. If alternative languages are
+  // listed, recognition result will contain recognition in the most likely
+  // language detected including the main language_code. The recognition result
+  // will include the language tag of the language detected in the audio. Note:
+  // This feature is only supported for Voice Command and Voice Search use cases
+  // and performance may vary for other use cases (e.g., phone call
  // transcription).
  repeated string alternative_language_codes = 18;

-  // *Optional* Maximum number of recognition hypotheses to be returned.
+  // Maximum number of recognition hypotheses to be returned.
  // Specifically, the maximum number of `SpeechRecognitionAlternative` messages
  // within each `SpeechRecognitionResult`.
  // The server may return fewer than `max_alternatives`.
@ -271,30 +283,31 @@ message RecognitionConfig {
  // one. If omitted, will return a maximum of one.
  int32 max_alternatives = 4;

-  // *Optional* If set to `true`, the server will attempt to filter out
+  // If set to `true`, the server will attempt to filter out
  // profanities, replacing all but the initial character in each filtered word
  // with asterisks, e.g. "f***". If set to `false` or omitted, profanities
  // won't be filtered out.
  bool profanity_filter = 5;

-  // *Optional* array of
-  // [SpeechContext][google.cloud.speech.v1p1beta1.SpeechContext]. A means to
-  // provide context to assist the speech recognition. For more information, see
-  // [Phrase Hints](/speech-to-text/docs/basics#phrase-hints).
+  // Array of [SpeechContext][google.cloud.speech.v1p1beta1.SpeechContext].
+  // A means to provide context to assist the speech recognition. For more
+  // information, see
+  // [speech
+  // adaptation](https://cloud.google.com/speech-to-text/docs/context-strength).
  repeated SpeechContext speech_contexts = 6;

-  // *Optional* If `true`, the top result includes a list of words and
+  // If `true`, the top result includes a list of words and
  // the start and end time offsets (timestamps) for those words. If
  // `false`, no word-level time offset information is returned. The default is
  // `false`.
  bool enable_word_time_offsets = 8;

-  // *Optional* If `true`, the top result includes a list of words and the
+  // If `true`, the top result includes a list of words and the
  // confidence for those words. If `false`, no word-level confidence
  // information is returned. The default is `false`.
  bool enable_word_confidence = 15;

-  // *Optional* If 'true', adds punctuation to recognition result hypotheses.
+  // If 'true', adds punctuation to recognition result hypotheses.
  // This feature is only available in select languages. Setting this for
  // requests in other languages has no effect at all.
  // The default 'false' value does not add punctuation to result hypotheses.
@ -303,19 +316,18 @@ message RecognitionConfig {
  // premium feature.
  bool enable_automatic_punctuation = 11;

-  // *Optional* If 'true', enables speaker detection for each recognized word in
+  // If 'true', enables speaker detection for each recognized word in
  // the top alternative of the recognition result using a speaker_tag provided
  // in the WordInfo.
  // Note: Use diarization_config instead.
  bool enable_speaker_diarization = 16 [deprecated = true];

-  // *Optional*
  // If set, specifies the estimated number of speakers in the conversation.
  // Defaults to '2'. Ignored unless enable_speaker_diarization is set to true.
  // Note: Use diarization_config instead.
  int32 diarization_speaker_count = 17 [deprecated = true];

-  // *Optional* Config to enable speaker diarization and set additional
+  // Config to enable speaker diarization and set additional
  // parameters to make diarization better suited for your application.
  // Note: When this is enabled, we send all the words from the beginning of the
  // audio for the top alternative in every consecutive STREAMING responses.
@ -325,10 +337,10 @@ message RecognitionConfig {
  // in the top alternative of the FINAL SpeechRecognitionResult.
  SpeakerDiarizationConfig diarization_config = 19;

-  // *Optional* Metadata regarding this request.
+  // Metadata regarding this request.
  RecognitionMetadata metadata = 9;

-  // *Optional* Which model to select for the given request. Select the model
+  // Which model to select for the given request. Select the model
  // best suited to your domain to get best results. If a model is not
  // explicitly specified, then we auto-select a model based on the parameters
  // in the RecognitionConfig.
@ -362,7 +374,7 @@ message RecognitionConfig {
  // </table>
  string model = 13;

-  // *Optional* Set to true to use an enhanced model for speech recognition.
+  // Set to true to use an enhanced model for speech recognition.
  // If `use_enhanced` is set to true and the `model` field is not set, then
  // an appropriate enhanced model is chosen if an enhanced model exists for
  // the audio.
@ -373,23 +385,18 @@ message RecognitionConfig {
  bool use_enhanced = 14;
 }

-// *Optional* Config to enable speaker diarization.
+// Config to enable speaker diarization.
 message SpeakerDiarizationConfig {
-  // *Optional* If 'true', enables speaker detection for each recognized word in
+  // If 'true', enables speaker detection for each recognized word in
  // the top alternative of the recognition result using a speaker_tag provided
  // in the WordInfo.
  bool enable_speaker_diarization = 1;

-  // Note: Set min_speaker_count = max_speaker_count to fix the number of
-  // speakers to be detected in the audio.
-
-  // *Optional*
  // Minimum number of speakers in the conversation. This range gives you more
  // flexibility by allowing the system to automatically determine the correct
  // number of speakers. If not set, the default value is 2.
  int32 min_speaker_count = 2;

-  // *Optional*
  // Maximum number of speakers in the conversation. This range gives you more
  // flexibility by allowing the system to automatically determine the correct
  // number of speakers. If not set, the default value is 6.
@ -520,7 +527,7 @@ message RecognitionMetadata {

  // Obfuscated (privacy-protected) ID of the user, to identify number of
  // unique users using the service.
-  int64 obfuscated_id = 9;
+  int64 obfuscated_id = 9 [deprecated = true];

  // Description of the content. Eg. "Recordings of federal supreme court
  // hearings from 2012".
@ -530,12 +537,12 @@ message RecognitionMetadata {
 // Provides "hints" to the speech recognizer to favor specific words and phrases
 // in the results.
 message SpeechContext {
-  // *Optional* A list of strings containing words and phrases "hints" so that
+  // A list of strings containing words and phrases "hints" so that
  // the speech recognition is more likely to recognize them. This can be used
  // to improve the accuracy for specific words and phrases, for example, if
  // specific commands are typically spoken by the user. This can also be used
  // to add additional words to the vocabulary of the recognizer. See
-  // [usage limits](/speech-to-text/quotas#content).
+  // [usage limits](https://cloud.google.com/speech-to-text/quotas#content).
  //
  // List items can also be set to classes for groups of words that represent
  // common concepts that occur in natural language. For example, rather than
@ -557,14 +564,14 @@ message SpeechContext {

 // Contains audio data in the encoding specified in the `RecognitionConfig`.
 // Either `content` or `uri` must be supplied. Supplying both or neither
-// returns [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT].
-// See [content limits](/speech-to-text/quotas#content).
+// returns [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT]. See
+// [content limits](https://cloud.google.com/speech-to-text/quotas#content).
 message RecognitionAudio {
  // The audio source, which is either inline content or a Google Cloud
  // Storage uri.
  oneof audio_source {
    // The audio data bytes encoded as specified in
-    // `RecognitionConfig`. Note: as with all bytes fields, protobuffers use a
+    // `RecognitionConfig`. Note: as with all bytes fields, proto buffers use a
    // pure binary representation, whereas JSON representations use base64.
    bytes content = 1;

@ -573,9 +580,8 @@ message RecognitionAudio {
    // Currently, only Google Cloud Storage URIs are
    // supported, which must be specified in the following format:
    // `gs://bucket_name/object_name` (other URI formats return
-    // [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT]).
-    // For more information, see [Request
-    // URIs](https://cloud.google.com/storage/docs/reference-uris).
+    // [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT]). For more information, see
+    // [Request URIs](https://cloud.google.com/storage/docs/reference-uris).
    string uri = 2;
  }
 }
@ -584,7 +590,7 @@ message RecognitionAudio {
 // contains the result as zero or more sequential `SpeechRecognitionResult`
 // messages.
 message RecognizeResponse {
-  // Output only. Sequential list of transcription results corresponding to
+  // Sequential list of transcription results corresponding to
  // sequential portions of audio.
  repeated SpeechRecognitionResult results = 2;
 }
@ -595,7 +601,7 @@ message RecognizeResponse {
 // returned by the `GetOperation` call of the `google::longrunning::Operations`
 // service.
 message LongRunningRecognizeResponse {
-  // Output only. Sequential list of transcription results corresponding to
+  // Sequential list of transcription results corresponding to
  // sequential portions of audio.
  repeated SpeechRecognitionResult results = 2;
 }
@ -680,44 +686,44 @@ message StreamingRecognizeResponse {
    END_OF_SINGLE_UTTERANCE = 1;
  }

-  // Output only. If set, returns a [google.rpc.Status][google.rpc.Status]
-  // message that specifies the error for the operation.
+  // If set, returns a [google.rpc.Status][google.rpc.Status] message that
+  // specifies the error for the operation.
  google.rpc.Status error = 1;

-  // Output only. This repeated list contains zero or more results that
+  // This repeated list contains zero or more results that
  // correspond to consecutive portions of the audio currently being processed.
  // It contains zero or one `is_final=true` result (the newly settled portion),
  // followed by zero or more `is_final=false` results (the interim results).
  repeated StreamingRecognitionResult results = 2;

-  // Output only. Indicates the type of speech event.
+  // Indicates the type of speech event.
  SpeechEventType speech_event_type = 4;
 }

 // A streaming speech recognition result corresponding to a portion of the audio
 // that is currently being processed.
 message StreamingRecognitionResult {
-  // Output only. May contain one or more recognition hypotheses (up to the
+  // May contain one or more recognition hypotheses (up to the
  // maximum specified in `max_alternatives`).
  // These alternatives are ordered in terms of accuracy, with the top (first)
  // alternative being the most probable, as ranked by the recognizer.
  repeated SpeechRecognitionAlternative alternatives = 1;

-  // Output only. If `false`, this `StreamingRecognitionResult` represents an
+  // If `false`, this `StreamingRecognitionResult` represents an
  // interim result that may change. If `true`, this is the final time the
  // speech service will return this particular `StreamingRecognitionResult`,
  // the recognizer will not return any further hypotheses for this portion of
  // the transcript and corresponding audio.
  bool is_final = 2;

-  // Output only. An estimate of the likelihood that the recognizer will not
+  // An estimate of the likelihood that the recognizer will not
  // change its guess about this interim result. Values range from 0.0
  // (completely unstable) to 1.0 (completely stable).
  // This field is only provided for interim results (`is_final=false`).
  // The default of 0.0 is a sentinel value indicating `stability` was not set.
  float stability = 3;

-  // Output only. Time offset of the end of this result relative to the
+  // Time offset of the end of this result relative to the
  // beginning of the audio.
  google.protobuf.Duration result_end_time = 4;

@ -726,16 +732,15 @@ message StreamingRecognitionResult {
  // For audio_channel_count = N, its output values can range from '1' to 'N'.
  int32 channel_tag = 5;

-  // Output only. The
-  // [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt) language tag of the
-  // language in this result. This language code was detected to have the most
-  // likelihood of being spoken in the audio.
+  // The [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt) language tag
+  // of the language in this result. This language code was detected to have
+  // the most likelihood of being spoken in the audio.
  string language_code = 6;
 }

 // A speech recognition result corresponding to a portion of the audio.
 message SpeechRecognitionResult {
-  // Output only. May contain one or more recognition hypotheses (up to the
+  // May contain one or more recognition hypotheses (up to the
  // maximum specified in `max_alternatives`).
  // These alternatives are ordered in terms of accuracy, with the top (first)
  // alternative being the most probable, as ranked by the recognizer.
@ -746,19 +751,18 @@ message SpeechRecognitionResult {
  // For audio_channel_count = N, its output values can range from '1' to 'N'.
  int32 channel_tag = 2;

-  // Output only. The
-  // [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt) language tag of the
-  // language in this result. This language code was detected to have the most
-  // likelihood of being spoken in the audio.
+  // The [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt) language tag
+  // of the language in this result. This language code was detected to have
+  // the most likelihood of being spoken in the audio.
  string language_code = 5;
 }

 // Alternative hypotheses (a.k.a. n-best list).
 message SpeechRecognitionAlternative {
-  // Output only. Transcript text representing the words that the user spoke.
+  // Transcript text representing the words that the user spoke.
  string transcript = 1;

-  // Output only. The confidence estimate between 0.0 and 1.0. A higher number
+  // The confidence estimate between 0.0 and 1.0. A higher number
  // indicates an estimated greater likelihood that the recognized words are
  // correct. This field is set only for the top alternative of a non-streaming
  // result or, of a streaming result where `is_final=true`.
@ -767,7 +771,7 @@ message SpeechRecognitionAlternative {
  // The default of 0.0 is a sentinel value indicating `confidence` was not set.
  float confidence = 2;

-  // Output only. A list of word-specific information for each recognized word.
+  // A list of word-specific information for each recognized word.
  // Note: When `enable_speaker_diarization` is true, you will see all the words
  // from the beginning of the audio.
  repeated WordInfo words = 3;
@ -775,7 +779,7 @@ message SpeechRecognitionAlternative {

 // Word-specific information for recognized words.
 message WordInfo {
-  // Output only. Time offset relative to the beginning of the audio,
+  // Time offset relative to the beginning of the audio,
  // and corresponding to the start of the spoken word.
  // This field is only set if `enable_word_time_offsets=true` and only
  // in the top hypothesis.
@ -783,7 +787,7 @@ message WordInfo {
  // vary.
  google.protobuf.Duration start_time = 1;

-  // Output only. Time offset relative to the beginning of the audio,
+  // Time offset relative to the beginning of the audio,
  // and corresponding to the end of the spoken word.
  // This field is only set if `enable_word_time_offsets=true` and only
  // in the top hypothesis.
@ -791,10 +795,10 @@ message WordInfo {
  // vary.
  google.protobuf.Duration end_time = 2;

-  // Output only. The word corresponding to this set of information.
+  // The word corresponding to this set of information.
  string word = 3;

-  // Output only. The confidence estimate between 0.0 and 1.0. A higher number
+  // The confidence estimate between 0.0 and 1.0. A higher number
  // indicates an estimated greater likelihood that the recognized words are
  // correct. This field is set only for the top alternative of a non-streaming
  // result or, of a streaming result where `is_final=true`.
@ -803,7 +807,7 @@ message WordInfo {
  // The default of 0.0 is a sentinel value indicating `confidence` was not set.
  float confidence = 4;

-  // Output only. A distinct integer value is assigned for every speaker within
+  // A distinct integer value is assigned for every speaker within
  // the audio. This field specifies which one of those speakers was detected to
  // have spoken this word. Value ranges from '1' to diarization_speaker_count.
  // speaker_tag is set if enable_speaker_diarization = 'true' and only in the
--- a/google/cloud/speech/v1p1beta1/speech_gapic.legacy.yaml
+++ b/google/cloud/speech/v1p1beta1/speech_gapic.legacy.yaml
@ -0,0 +1,90 @@
+type: com.google.api.codegen.ConfigProto
+config_schema_version: 1.0.0
+language_settings:
+  java:
+    package_name: com.google.cloud.speech.v1p1beta1
+  python:
+    package_name: google.cloud.speech_v1p1beta1.gapic
+  go:
+    package_name: cloud.google.com/go/speech/apiv1p1beta1
+  csharp:
+    package_name: Google.Cloud.Speech.V1P1Beta1
+  ruby:
+    package_name: Google::Cloud::Speech::V1p1beta1
+  php:
+    package_name: Google\Cloud\Speech\V1p1beta1
+  nodejs:
+    package_name: speech.v1p1beta1
+    domain_layer_location: google-cloud
+interfaces:
+- name: google.cloud.speech.v1p1beta1.Speech
+  smoke_test:
+    method: Recognize
+    init_fields:
+    - config.language_code="en-US"
+    - config.sample_rate_hertz=44100
+    - config.encoding=FLAC
+    - audio.uri="gs://gapic-toolkit/hello.flac"
+  collections: []
+  retry_codes_def:
+  - name: idempotent
+    retry_codes:
+    - DEADLINE_EXCEEDED
+    - UNAVAILABLE
+  - name: non_idempotent
+    retry_codes: []
+  retry_params_def:
+  - name: default
+    initial_retry_delay_millis: 100
+    retry_delay_multiplier: 1.3
+    max_retry_delay_millis: 60000
+    initial_rpc_timeout_millis: 1000000
+    rpc_timeout_multiplier: 1
+    max_rpc_timeout_millis: 1000000
+    total_timeout_millis: 5000000
+  methods:
+  - name: Recognize
+    flattening:
+      groups:
+      - parameters:
+        - config
+        - audio
+    required_fields:
+    - config
+    - audio
+    sample_code_init_fields:
+    - config.encoding=FLAC
+    - config.sample_rate_hertz=44100
+    - config.language_code="en-US"
+    - audio.uri=gs://bucket_name/file_name.flac
+    retry_codes_name: idempotent
+    retry_params_name: default
+    timeout_millis: 1000000
+  - name: LongRunningRecognize
+    flattening:
+      groups:
+      - parameters:
+        - config
+        - audio
+    required_fields:
+    - config
+    - audio
+    sample_code_init_fields:
+    - config.encoding=FLAC
+    - config.sample_rate_hertz=44100
+    - config.language_code="en-US"
+    - audio.uri=gs://bucket_name/file_name.flac
+    retry_codes_name: non_idempotent
+    retry_params_name: default
+    timeout_millis: 60000
+    long_running:
+      return_type: google.cloud.speech.v1p1beta1.LongRunningRecognizeResponse
+      metadata_type: google.cloud.speech.v1p1beta1.LongRunningRecognizeMetadata
+      initial_poll_delay_millis: 20000
+      poll_delay_multiplier: 1.5
+      max_poll_delay_millis: 45000
+      total_poll_timeout_millis: 86400000
+  - name: StreamingRecognize
+    retry_codes_name: idempotent
+    retry_params_name: default
+    timeout_millis: 1000000
--- a/google/cloud/speech/v1p1beta1/speech_gapic.yaml
+++ b/google/cloud/speech/v1p1beta1/speech_gapic.yaml
@ -1,5 +1,5 @@
 type: com.google.api.codegen.ConfigProto
-config_schema_version: 1.0.0
+config_schema_version: 2.0.0
 language_settings:
  java:
    package_name: com.google.cloud.speech.v1p1beta1
@ -25,66 +25,16 @@ interfaces:
    - config.sample_rate_hertz=44100
    - config.encoding=FLAC
    - audio.uri="gs://gapic-toolkit/hello.flac"
-  collections: []
-  retry_codes_def:
-  - name: idempotent
-    retry_codes:
-    - DEADLINE_EXCEEDED
-    - UNAVAILABLE
-  - name: non_idempotent
-    retry_codes: []
-  retry_params_def:
-  - name: default
-    initial_retry_delay_millis: 100
-    retry_delay_multiplier: 1.3
-    max_retry_delay_millis: 60000
-    initial_rpc_timeout_millis: 1000000
-    rpc_timeout_multiplier: 1
-    max_rpc_timeout_millis: 1000000
-    total_timeout_millis: 5000000
  methods:
  - name: Recognize
-    flattening:
-      groups:
-      - parameters:
-        - config
-        - audio
-    required_fields:
-    - config
-    - audio
    sample_code_init_fields:
    - config.encoding=FLAC
    - config.sample_rate_hertz=44100
    - config.language_code="en-US"
    - audio.uri=gs://bucket_name/file_name.flac
-    retry_codes_name: idempotent
-    retry_params_name: default
-    timeout_millis: 1000000
  - name: LongRunningRecognize
-    flattening:
-      groups:
-      - parameters:
-        - config
-        - audio
-    required_fields:
-    - config
-    - audio
    sample_code_init_fields:
    - config.encoding=FLAC
    - config.sample_rate_hertz=44100
    - config.language_code="en-US"
    - audio.uri=gs://bucket_name/file_name.flac
-    retry_codes_name: non_idempotent
-    retry_params_name: default
-    timeout_millis: 60000
-    long_running:
-      return_type: google.cloud.speech.v1p1beta1.LongRunningRecognizeResponse
-      metadata_type: google.cloud.speech.v1p1beta1.LongRunningRecognizeMetadata
-      initial_poll_delay_millis: 20000
-      poll_delay_multiplier: 1.5
-      max_poll_delay_millis: 45000
-      total_poll_timeout_millis: 86400000
-  - name: StreamingRecognize
-    retry_codes_name: idempotent
-    retry_params_name: default
-    timeout_millis: 1000000
--- a/google/cloud/speech/v1p1beta1/speech_grpc_service_config.json
+++ b/google/cloud/speech/v1p1beta1/speech_grpc_service_config.json
@ -0,0 +1,35 @@
+{
+  "methodConfig": [
+    {
+      "name": [
+        {
+          "service": "google.cloud.speech.v1p1beta1.Speech",
+          "method": "Recognize"
+        },
+        {
+          "service": "google.cloud.speech.v1p1beta1.Speech",
+          "method": "StreamingRecognize"
+        }
+      ],
+      "timeout": "5000s",
+      "retryPolicy": {
+        "initialBackoff": "0.100s",
+        "maxBackoff": "60s",
+        "backoffMultiplier": 1.3,
+        "retryableStatusCodes": [
+          "DEADLINE_EXCEEDED",
+          "UNAVAILABLE"
+        ]
+      }
+    },
+    {
+      "name": [
+        {
+          "service": "google.cloud.speech.v1p1beta1.Speech",
+          "method": "LongRunningRecognize"
+        }
+      ],
+      "timeout": "5000s"
+    }
+  ]
+}
--- a/google/cloud/speech/v1p1beta1/speech_v1p1beta1.yaml
+++ b/google/cloud/speech/v1p1beta1/speech_v1p1beta1.yaml
@ -0,0 +1,36 @@
+type: google.api.Service
+config_version: 3
+name: speech.googleapis.com
+title: Cloud Speech-to-Text API
+
+apis:
+- name: google.cloud.speech.v1p1beta1.Speech
+
+types:
+- name: google.cloud.speech.v1p1beta1.LongRunningRecognizeMetadata
+- name: google.cloud.speech.v1p1beta1.LongRunningRecognizeResponse
+
+documentation:
+  summary: Converts audio to text by applying powerful neural network models.
+  overview: |-
+    # Introduction
+
+    Google Cloud Speech API provides speech recognition as a service.
+
+backend:
+  rules:
+  - selector: 'google.cloud.speech.v1p1beta1.Speech.*'
+    deadline: 355.0
+  - selector: 'google.longrunning.Operations.*'
+    deadline: 355.0
+
+authentication:
+  rules:
+  - selector: 'google.cloud.speech.v1p1beta1.Speech.*'
+    oauth:
+      canonical_scopes: |-
+        https://www.googleapis.com/auth/cloud-platform
+  - selector: 'google.longrunning.Operations.*'
+    oauth:
+      canonical_scopes: |-
+        https://www.googleapis.com/auth/cloud-platform