You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
515 lines
22 KiB
515 lines
22 KiB
// Copyright 2019 Google LLC. |
|
// |
|
// Licensed under the Apache License, Version 2.0 (the "License"); |
|
// you may not use this file except in compliance with the License. |
|
// You may obtain a copy of the License at |
|
// |
|
// http://www.apache.org/licenses/LICENSE-2.0 |
|
// |
|
// Unless required by applicable law or agreed to in writing, software |
|
// distributed under the License is distributed on an "AS IS" BASIS, |
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|
// See the License for the specific language governing permissions and |
|
// limitations under the License. |
|
// |
|
|
|
syntax = "proto3"; |
|
|
|
package google.cloud.dialogflow.v2; |
|
|
|
import "google/api/annotations.proto"; |
|
import "google/api/client.proto"; |
|
import "google/api/field_behavior.proto"; |
|
import "google/cloud/dialogflow/v2/audio_config.proto"; |
|
import "google/cloud/dialogflow/v2/context.proto"; |
|
import "google/cloud/dialogflow/v2/intent.proto"; |
|
import "google/cloud/dialogflow/v2/session_entity_type.proto"; |
|
import "google/protobuf/duration.proto"; |
|
import "google/protobuf/struct.proto"; |
|
import "google/rpc/status.proto"; |
|
import "google/type/latlng.proto"; |
|
|
|
option cc_enable_arenas = true; |
|
option csharp_namespace = "Google.Cloud.Dialogflow.V2"; |
|
option go_package = "google.golang.org/genproto/googleapis/cloud/dialogflow/v2;dialogflow"; |
|
option java_multiple_files = true; |
|
option java_outer_classname = "SessionProto"; |
|
option java_package = "com.google.cloud.dialogflow.v2"; |
|
option objc_class_prefix = "DF"; |
|
|
|
// A session represents an interaction with a user. You retrieve user input |
|
// and pass it to the [DetectIntent][google.cloud.dialogflow.v2.Sessions.DetectIntent] (or |
|
// [StreamingDetectIntent][google.cloud.dialogflow.v2.Sessions.StreamingDetectIntent]) method to determine |
|
// user intent and respond. |
|
service Sessions { |
|
option (google.api.default_host) = "dialogflow.googleapis.com"; |
|
option (google.api.oauth_scopes) = |
|
"https://www.googleapis.com/auth/cloud-platform," |
|
"https://www.googleapis.com/auth/dialogflow"; |
|
|
|
// Processes a natural language query and returns structured, actionable data |
|
// as a result. This method is not idempotent, because it may cause contexts |
|
// and session entity types to be updated, which in turn might affect |
|
// results of future queries. |
|
rpc DetectIntent(DetectIntentRequest) returns (DetectIntentResponse) { |
|
option (google.api.http) = { |
|
post: "/v2/{session=projects/*/agent/sessions/*}:detectIntent" |
|
body: "*" |
|
}; |
|
option (google.api.method_signature) = "session,query_input"; |
|
} |
|
|
|
// Processes a natural language query in audio format in a streaming fashion |
|
// and returns structured, actionable data as a result. This method is only |
|
// available via the gRPC API (not REST). |
|
rpc StreamingDetectIntent(stream StreamingDetectIntentRequest) returns (stream StreamingDetectIntentResponse) { |
|
} |
|
} |
|
|
|
// The request to detect user's intent. |
|
message DetectIntentRequest { |
|
// Required. The name of the session this query is sent to. Format: |
|
// `projects/<Project ID>/agent/sessions/<Session ID>`. It's up to the API |
|
// caller to choose an appropriate session ID. It can be a random number or |
|
// some type of user identifier (preferably hashed). The length of the session |
|
// ID must not exceed 36 bytes. |
|
string session = 1 [(google.api.field_behavior) = REQUIRED]; |
|
|
|
// Optional. The parameters of this query. |
|
QueryParameters query_params = 2 [(google.api.field_behavior) = OPTIONAL]; |
|
|
|
// Required. The input specification. It can be set to: |
|
// |
|
// 1. an audio config |
|
// which instructs the speech recognizer how to process the speech audio, |
|
// |
|
// 2. a conversational query in the form of text, or |
|
// |
|
// 3. an event that specifies which intent to trigger. |
|
QueryInput query_input = 3 [(google.api.field_behavior) = REQUIRED]; |
|
|
|
// Optional. Instructs the speech synthesizer how to generate the output |
|
// audio. If this field is not set and agent-level speech synthesizer is not |
|
// configured, no output audio is generated. |
|
OutputAudioConfig output_audio_config = 4 [(google.api.field_behavior) = OPTIONAL]; |
|
|
|
// Optional. The natural language speech audio to be processed. This field |
|
// should be populated iff `query_input` is set to an input audio config. |
|
// A single request can contain up to 1 minute of speech audio data. |
|
bytes input_audio = 5 [(google.api.field_behavior) = OPTIONAL]; |
|
} |
|
|
|
// The message returned from the DetectIntent method. |
|
message DetectIntentResponse { |
|
// The unique identifier of the response. It can be used to |
|
// locate a response in the training example set or for reporting issues. |
|
string response_id = 1; |
|
|
|
// The selected results of the conversational query or event processing. |
|
// See `alternative_query_results` for additional potential results. |
|
QueryResult query_result = 2; |
|
|
|
// Specifies the status of the webhook request. |
|
google.rpc.Status webhook_status = 3; |
|
|
|
// The audio data bytes encoded as specified in the request. |
|
// Note: The output audio is generated based on the values of default platform |
|
// text responses found in the `query_result.fulfillment_messages` field. If |
|
// multiple default text responses exist, they will be concatenated when |
|
// generating audio. If no default platform text responses exist, the |
|
// generated audio content will be empty. |
|
bytes output_audio = 4; |
|
|
|
// The config used by the speech synthesizer to generate the output audio. |
|
OutputAudioConfig output_audio_config = 6; |
|
} |
|
|
|
// Represents the parameters of the conversational query. |
|
message QueryParameters { |
|
// Optional. The time zone of this conversational query from the |
|
// [time zone database](https://www.iana.org/time-zones), e.g., |
|
// America/New_York, Europe/Paris. If not provided, the time zone specified in |
|
// agent settings is used. |
|
string time_zone = 1 [(google.api.field_behavior) = OPTIONAL]; |
|
|
|
// Optional. The geo location of this conversational query. |
|
google.type.LatLng geo_location = 2 [(google.api.field_behavior) = OPTIONAL]; |
|
|
|
// Optional. The collection of contexts to be activated before this query is |
|
// executed. |
|
repeated Context contexts = 3 [(google.api.field_behavior) = OPTIONAL]; |
|
|
|
// Optional. Specifies whether to delete all contexts in the current session |
|
// before the new ones are activated. |
|
bool reset_contexts = 4 [(google.api.field_behavior) = OPTIONAL]; |
|
|
|
// Optional. Additional session entity types to replace or extend developer |
|
// entity types with. The entity synonyms apply to all languages and persist |
|
// for the session of this query. |
|
repeated SessionEntityType session_entity_types = 5 [(google.api.field_behavior) = OPTIONAL]; |
|
|
|
// Optional. This field can be used to pass custom data into the webhook |
|
// associated with the agent. Arbitrary JSON objects are supported. |
|
google.protobuf.Struct payload = 6 [(google.api.field_behavior) = OPTIONAL]; |
|
|
|
// Optional. Configures the type of sentiment analysis to perform. If not |
|
// provided, sentiment analysis is not performed. |
|
SentimentAnalysisRequestConfig sentiment_analysis_request_config = 10 [(google.api.field_behavior) = OPTIONAL]; |
|
} |
|
|
|
// Represents the query input. It can contain either: |
|
// |
|
// 1. An audio config which |
|
// instructs the speech recognizer how to process the speech audio. |
|
// |
|
// 2. A conversational query in the form of text,. |
|
// |
|
// 3. An event that specifies which intent to trigger. |
|
message QueryInput { |
|
// Required. The input specification. |
|
oneof input { |
|
// Instructs the speech recognizer how to process the speech audio. |
|
InputAudioConfig audio_config = 1; |
|
|
|
// The natural language text to be processed. |
|
TextInput text = 2; |
|
|
|
// The event to be processed. |
|
EventInput event = 3; |
|
} |
|
} |
|
|
|
// Represents the result of conversational query or event processing. |
|
message QueryResult { |
|
// The original conversational query text: |
|
// |
|
// - If natural language text was provided as input, `query_text` contains |
|
// a copy of the input. |
|
// - If natural language speech audio was provided as input, `query_text` |
|
// contains the speech recognition result. If speech recognizer produced |
|
// multiple alternatives, a particular one is picked. |
|
// - If automatic spell correction is enabled, `query_text` will contain the |
|
// corrected user input. |
|
string query_text = 1; |
|
|
|
// The language that was triggered during intent detection. |
|
// See [Language |
|
// Support](https://cloud.google.com/dialogflow/docs/reference/language) |
|
// for a list of the currently supported language codes. |
|
string language_code = 15; |
|
|
|
// The Speech recognition confidence between 0.0 and 1.0. A higher number |
|
// indicates an estimated greater likelihood that the recognized words are |
|
// correct. The default of 0.0 is a sentinel value indicating that confidence |
|
// was not set. |
|
// |
|
// This field is not guaranteed to be accurate or set. In particular this |
|
// field isn't set for StreamingDetectIntent since the streaming endpoint has |
|
// separate confidence estimates per portion of the audio in |
|
// StreamingRecognitionResult. |
|
float speech_recognition_confidence = 2; |
|
|
|
// The action name from the matched intent. |
|
string action = 3; |
|
|
|
// The collection of extracted parameters. |
|
google.protobuf.Struct parameters = 4; |
|
|
|
// This field is set to: |
|
// |
|
// - `false` if the matched intent has required parameters and not all of |
|
// the required parameter values have been collected. |
|
// - `true` if all required parameter values have been collected, or if the |
|
// matched intent doesn't contain any required parameters. |
|
bool all_required_params_present = 5; |
|
|
|
// The text to be pronounced to the user or shown on the screen. |
|
// Note: This is a legacy field, `fulfillment_messages` should be preferred. |
|
string fulfillment_text = 6; |
|
|
|
// The collection of rich messages to present to the user. |
|
repeated Intent.Message fulfillment_messages = 7; |
|
|
|
// If the query was fulfilled by a webhook call, this field is set to the |
|
// value of the `source` field returned in the webhook response. |
|
string webhook_source = 8; |
|
|
|
// If the query was fulfilled by a webhook call, this field is set to the |
|
// value of the `payload` field returned in the webhook response. |
|
google.protobuf.Struct webhook_payload = 9; |
|
|
|
// The collection of output contexts. If applicable, |
|
// `output_contexts.parameters` contains entries with name |
|
// `<parameter name>.original` containing the original parameter values |
|
// before the query. |
|
repeated Context output_contexts = 10; |
|
|
|
// The intent that matched the conversational query. Some, not |
|
// all fields are filled in this message, including but not limited to: |
|
// `name`, `display_name`, `end_interaction` and `is_fallback`. |
|
Intent intent = 11; |
|
|
|
// The intent detection confidence. Values range from 0.0 |
|
// (completely uncertain) to 1.0 (completely certain). |
|
// This value is for informational purpose only and is only used to |
|
// help match the best intent within the classification threshold. |
|
// This value may change for the same end-user expression at any time due to a |
|
// model retraining or change in implementation. |
|
// If there are `multiple knowledge_answers` messages, this value is set to |
|
// the greatest `knowledgeAnswers.match_confidence` value in the list. |
|
float intent_detection_confidence = 12; |
|
|
|
// The free-form diagnostic info. For example, this field could contain |
|
// webhook call latency. The string keys of the Struct's fields map can change |
|
// without notice. |
|
google.protobuf.Struct diagnostic_info = 14; |
|
|
|
// The sentiment analysis result, which depends on the |
|
// `sentiment_analysis_request_config` specified in the request. |
|
SentimentAnalysisResult sentiment_analysis_result = 17; |
|
} |
|
|
|
// The top-level message sent by the client to the |
|
// [StreamingDetectIntent][] method. |
|
// |
|
// Multiple request messages should be sent in order: |
|
// |
|
// 1. The first message must contain [StreamingDetectIntentRequest.session][google.cloud.dialogflow.v2.StreamingDetectIntentRequest.session], |
|
// [StreamingDetectIntentRequest.query_input] plus optionally |
|
// [StreamingDetectIntentRequest.query_params]. If the client wants to |
|
// receive an audio response, it should also contain |
|
// [StreamingDetectIntentRequest.output_audio_config][google.cloud.dialogflow.v2.StreamingDetectIntentRequest.output_audio_config]. The message |
|
// must not contain [StreamingDetectIntentRequest.input_audio][google.cloud.dialogflow.v2.StreamingDetectIntentRequest.input_audio]. |
|
// 2. If [StreamingDetectIntentRequest.query_input][google.cloud.dialogflow.v2.StreamingDetectIntentRequest.query_input] was set to |
|
// [StreamingDetectIntentRequest.query_input.audio_config][], all subsequent |
|
// messages must contain [StreamingDetectIntentRequest.input_audio] to |
|
// continue with Speech recognition. |
|
// If you decide to rather detect an intent from text input after you |
|
// already started Speech recognition, please send a message with |
|
// [StreamingDetectIntentRequest.query_input.text][]. |
|
// |
|
// However, note that: |
|
// |
|
// * Dialogflow will bill you for the audio duration so far. |
|
// * Dialogflow discards all Speech recognition results in favor of the |
|
// input text. |
|
// * Dialogflow will use the language code from the first message. |
|
// |
|
// After you sent all input, you must half-close or abort the request stream. |
|
message StreamingDetectIntentRequest { |
|
// Required. The name of the session the query is sent to. |
|
// Format of the session name: |
|
// `projects/<Project ID>/agent/sessions/<Session ID>`. It's up to the API |
|
// caller to choose an appropriate `Session ID`. It can be a random number or |
|
// some type of user identifier (preferably hashed). The length of the session |
|
// ID must not exceed 36 characters. |
|
string session = 1 [(google.api.field_behavior) = REQUIRED]; |
|
|
|
// Optional. The parameters of this query. |
|
QueryParameters query_params = 2 [(google.api.field_behavior) = OPTIONAL]; |
|
|
|
// Required. The input specification. It can be set to: |
|
// |
|
// 1. an audio config which instructs the speech recognizer how to process |
|
// the speech audio, |
|
// |
|
// 2. a conversational query in the form of text, or |
|
// |
|
// 3. an event that specifies which intent to trigger. |
|
QueryInput query_input = 3 [(google.api.field_behavior) = REQUIRED]; |
|
|
|
// Optional. Please use [InputAudioConfig.single_utterance][google.cloud.dialogflow.v2.InputAudioConfig.single_utterance] instead. |
|
// If `false` (default), recognition does not cease until |
|
// the client closes the stream. If `true`, the recognizer will detect a |
|
// single spoken utterance in input audio. Recognition ceases when it detects |
|
// the audio's voice has stopped or paused. In this case, once a detected |
|
// intent is received, the client should close the stream and start a new |
|
// request with a new stream as needed. |
|
// This setting is ignored when `query_input` is a piece of text or an event. |
|
bool single_utterance = 4 [ |
|
deprecated = true, |
|
(google.api.field_behavior) = OPTIONAL |
|
]; |
|
|
|
// Optional. Instructs the speech synthesizer how to generate the output |
|
// audio. If this field is not set and agent-level speech synthesizer is not |
|
// configured, no output audio is generated. |
|
OutputAudioConfig output_audio_config = 5 [(google.api.field_behavior) = OPTIONAL]; |
|
|
|
// Optional. The input audio content to be recognized. Must be sent if |
|
// `query_input` was set to a streaming input audio config. The complete audio |
|
// over all streaming messages must not exceed 1 minute. |
|
bytes input_audio = 6 [(google.api.field_behavior) = OPTIONAL]; |
|
} |
|
|
|
// The top-level message returned from the |
|
// `StreamingDetectIntent` method. |
|
// |
|
// Multiple response messages can be returned in order: |
|
// |
|
// 1. If the input was set to streaming audio, the first one or more messages |
|
// contain `recognition_result`. Each `recognition_result` represents a more |
|
// complete transcript of what the user said. The last `recognition_result` |
|
// has `is_final` set to `true`. |
|
// |
|
// 2. The next message contains `response_id`, `query_result` |
|
// and optionally `webhook_status` if a WebHook was called. |
|
message StreamingDetectIntentResponse { |
|
// The unique identifier of the response. It can be used to |
|
// locate a response in the training example set or for reporting issues. |
|
string response_id = 1; |
|
|
|
// The result of speech recognition. |
|
StreamingRecognitionResult recognition_result = 2; |
|
|
|
// The result of the conversational query or event processing. |
|
QueryResult query_result = 3; |
|
|
|
// Specifies the status of the webhook request. |
|
google.rpc.Status webhook_status = 4; |
|
|
|
// The audio data bytes encoded as specified in the request. |
|
// Note: The output audio is generated based on the values of default platform |
|
// text responses found in the `query_result.fulfillment_messages` field. If |
|
// multiple default text responses exist, they will be concatenated when |
|
// generating audio. If no default platform text responses exist, the |
|
// generated audio content will be empty. |
|
bytes output_audio = 5; |
|
|
|
// The config used by the speech synthesizer to generate the output audio. |
|
OutputAudioConfig output_audio_config = 6; |
|
} |
|
|
|
// Contains a speech recognition result corresponding to a portion of the audio |
|
// that is currently being processed or an indication that this is the end |
|
// of the single requested utterance. |
|
// |
|
// Example: |
|
// |
|
// 1. transcript: "tube" |
|
// |
|
// 2. transcript: "to be a" |
|
// |
|
// 3. transcript: "to be" |
|
// |
|
// 4. transcript: "to be or not to be" |
|
// is_final: true |
|
// |
|
// 5. transcript: " that's" |
|
// |
|
// 6. transcript: " that is" |
|
// |
|
// 7. message_type: `END_OF_SINGLE_UTTERANCE` |
|
// |
|
// 8. transcript: " that is the question" |
|
// is_final: true |
|
// |
|
// Only two of the responses contain final results (#4 and #8 indicated by |
|
// `is_final: true`). Concatenating these generates the full transcript: "to be |
|
// or not to be that is the question". |
|
// |
|
// In each response we populate: |
|
// |
|
// * for `TRANSCRIPT`: `transcript` and possibly `is_final`. |
|
// |
|
// * for `END_OF_SINGLE_UTTERANCE`: only `message_type`. |
|
message StreamingRecognitionResult { |
|
// Type of the response message. |
|
enum MessageType { |
|
// Not specified. Should never be used. |
|
MESSAGE_TYPE_UNSPECIFIED = 0; |
|
|
|
// Message contains a (possibly partial) transcript. |
|
TRANSCRIPT = 1; |
|
|
|
// Event indicates that the server has detected the end of the user's speech |
|
// utterance and expects no additional inputs. |
|
// Therefore, the server will not process additional audio (although it may subsequently return additional results). The |
|
// client should stop sending additional audio data, half-close the gRPC |
|
// connection, and wait for any additional results until the server closes |
|
// the gRPC connection. This message is only sent if `single_utterance` was |
|
// set to `true`, and is not used otherwise. |
|
END_OF_SINGLE_UTTERANCE = 2; |
|
} |
|
|
|
// Type of the result message. |
|
MessageType message_type = 1; |
|
|
|
// Transcript text representing the words that the user spoke. |
|
// Populated if and only if `message_type` = `TRANSCRIPT`. |
|
string transcript = 2; |
|
|
|
// If `false`, the `StreamingRecognitionResult` represents an |
|
// interim result that may change. If `true`, the recognizer will not return |
|
// any further hypotheses about this piece of the audio. May only be populated |
|
// for `message_type` = `TRANSCRIPT`. |
|
bool is_final = 3; |
|
|
|
// The Speech confidence between 0.0 and 1.0 for the current portion of audio. |
|
// A higher number indicates an estimated greater likelihood that the |
|
// recognized words are correct. The default of 0.0 is a sentinel value |
|
// indicating that confidence was not set. |
|
// |
|
// This field is typically only provided if `is_final` is true and you should |
|
// not rely on it being accurate or even set. |
|
float confidence = 4; |
|
} |
|
|
|
// Represents the natural language text to be processed. |
|
message TextInput { |
|
// Required. The UTF-8 encoded natural language text to be processed. |
|
// Text length must not exceed 256 characters. |
|
string text = 1 [(google.api.field_behavior) = REQUIRED]; |
|
|
|
// Required. The language of this conversational query. See [Language |
|
// Support](https://cloud.google.com/dialogflow/docs/reference/language) |
|
// for a list of the currently supported language codes. Note that queries in |
|
// the same session do not necessarily need to specify the same language. |
|
string language_code = 2 [(google.api.field_behavior) = REQUIRED]; |
|
} |
|
|
|
// Events allow for matching intents by event name instead of the natural |
|
// language input. For instance, input `<event: { name: "welcome_event", |
|
// parameters: { name: "Sam" } }>` can trigger a personalized welcome response. |
|
// The parameter `name` may be used by the agent in the response: |
|
// `"Hello #welcome_event.name! What can I do for you today?"`. |
|
message EventInput { |
|
// Required. The unique identifier of the event. |
|
string name = 1 [(google.api.field_behavior) = REQUIRED]; |
|
|
|
// Optional. The collection of parameters associated with the event. |
|
google.protobuf.Struct parameters = 2 [(google.api.field_behavior) = OPTIONAL]; |
|
|
|
// Required. The language of this query. See [Language |
|
// Support](https://cloud.google.com/dialogflow/docs/reference/language) |
|
// for a list of the currently supported language codes. Note that queries in |
|
// the same session do not necessarily need to specify the same language. |
|
string language_code = 3 [(google.api.field_behavior) = REQUIRED]; |
|
} |
|
|
|
// Configures the types of sentiment analysis to perform. |
|
message SentimentAnalysisRequestConfig { |
|
// Optional. Instructs the service to perform sentiment analysis on |
|
// `query_text`. If not provided, sentiment analysis is not performed on |
|
// `query_text`. |
|
bool analyze_query_text_sentiment = 1 [(google.api.field_behavior) = OPTIONAL]; |
|
} |
|
|
|
// The result of sentiment analysis as configured by |
|
// `sentiment_analysis_request_config`. |
|
message SentimentAnalysisResult { |
|
// The sentiment analysis result for `query_text`. |
|
Sentiment query_text_sentiment = 1; |
|
} |
|
|
|
// The sentiment, such as positive/negative feeling or association, for a unit |
|
// of analysis, such as the query text. |
|
message Sentiment { |
|
// Sentiment score between -1.0 (negative sentiment) and 1.0 (positive |
|
// sentiment). |
|
float score = 1; |
|
|
|
// A non-negative number in the [0, +inf) range, which represents the absolute |
|
// magnitude of sentiment, regardless of score (positive or negative). |
|
float magnitude = 2; |
|
}
|
|
|