|
|
@ -19,6 +19,7 @@ package google.cloud.dialogflow.cx.v3; |
|
|
|
import "google/api/field_behavior.proto"; |
|
|
|
import "google/api/field_behavior.proto"; |
|
|
|
import "google/api/resource.proto"; |
|
|
|
import "google/api/resource.proto"; |
|
|
|
import "google/protobuf/duration.proto"; |
|
|
|
import "google/protobuf/duration.proto"; |
|
|
|
|
|
|
|
import "google/protobuf/timestamp.proto"; |
|
|
|
import "google/api/annotations.proto"; |
|
|
|
import "google/api/annotations.proto"; |
|
|
|
|
|
|
|
|
|
|
|
option cc_enable_arenas = true; |
|
|
|
option cc_enable_arenas = true; |
|
|
@ -30,31 +31,6 @@ option java_package = "com.google.cloud.dialogflow.cx.v3"; |
|
|
|
option objc_class_prefix = "DF"; |
|
|
|
option objc_class_prefix = "DF"; |
|
|
|
option ruby_package = "Google::Cloud::Dialogflow::CX::V3"; |
|
|
|
option ruby_package = "Google::Cloud::Dialogflow::CX::V3"; |
|
|
|
|
|
|
|
|
|
|
|
// Information for a word recognized by the speech recognizer. |
|
|
|
|
|
|
|
message SpeechWordInfo { |
|
|
|
|
|
|
|
// The word this info is for. |
|
|
|
|
|
|
|
string word = 3; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// Time offset relative to the beginning of the audio that corresponds to the |
|
|
|
|
|
|
|
// start of the spoken word. This is an experimental feature and the accuracy |
|
|
|
|
|
|
|
// of the time offset can vary. |
|
|
|
|
|
|
|
google.protobuf.Duration start_offset = 1; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// Time offset relative to the beginning of the audio that corresponds to the |
|
|
|
|
|
|
|
// end of the spoken word. This is an experimental feature and the accuracy of |
|
|
|
|
|
|
|
// the time offset can vary. |
|
|
|
|
|
|
|
google.protobuf.Duration end_offset = 2; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// The Speech confidence between 0.0 and 1.0 for this word. A higher number |
|
|
|
|
|
|
|
// indicates an estimated greater likelihood that the recognized word is |
|
|
|
|
|
|
|
// correct. The default of 0.0 is a sentinel value indicating that confidence |
|
|
|
|
|
|
|
// was not set. |
|
|
|
|
|
|
|
// |
|
|
|
|
|
|
|
// This field is not guaranteed to be fully stable over time for the same |
|
|
|
|
|
|
|
// audio input. Users should also not rely on it to always be provided. |
|
|
|
|
|
|
|
float confidence = 4; |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// Audio encoding of the audio content sent in the conversational query request. |
|
|
|
// Audio encoding of the audio content sent in the conversational query request. |
|
|
|
// Refer to the |
|
|
|
// Refer to the |
|
|
|
// [Cloud Speech API |
|
|
|
// [Cloud Speech API |
|
|
@ -104,6 +80,31 @@ enum AudioEncoding { |
|
|
|
AUDIO_ENCODING_SPEEX_WITH_HEADER_BYTE = 7; |
|
|
|
AUDIO_ENCODING_SPEEX_WITH_HEADER_BYTE = 7; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// Information for a word recognized by the speech recognizer. |
|
|
|
|
|
|
|
message SpeechWordInfo { |
|
|
|
|
|
|
|
// The word this info is for. |
|
|
|
|
|
|
|
string word = 3; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// Time offset relative to the beginning of the audio that corresponds to the |
|
|
|
|
|
|
|
// start of the spoken word. This is an experimental feature and the accuracy |
|
|
|
|
|
|
|
// of the time offset can vary. |
|
|
|
|
|
|
|
google.protobuf.Duration start_offset = 1; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// Time offset relative to the beginning of the audio that corresponds to the |
|
|
|
|
|
|
|
// end of the spoken word. This is an experimental feature and the accuracy of |
|
|
|
|
|
|
|
// the time offset can vary. |
|
|
|
|
|
|
|
google.protobuf.Duration end_offset = 2; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// The Speech confidence between 0.0 and 1.0 for this word. A higher number |
|
|
|
|
|
|
|
// indicates an estimated greater likelihood that the recognized word is |
|
|
|
|
|
|
|
// correct. The default of 0.0 is a sentinel value indicating that confidence |
|
|
|
|
|
|
|
// was not set. |
|
|
|
|
|
|
|
// |
|
|
|
|
|
|
|
// This field is not guaranteed to be fully stable over time for the same |
|
|
|
|
|
|
|
// audio input. Users should also not rely on it to always be provided. |
|
|
|
|
|
|
|
float confidence = 4; |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
// Instructs the speech recognizer on how to process the audio content. |
|
|
|
// Instructs the speech recognizer on how to process the audio content. |
|
|
|
message InputAudioConfig { |
|
|
|
message InputAudioConfig { |
|
|
|
// Required. Audio encoding of the audio content to process. |
|
|
|
// Required. Audio encoding of the audio content to process. |
|
|
|