Add proto files for language API.

9 years ago · 1e645debc6
parent 3748d006c4
commit 1e645debc6
1 changed files with 610 additions and 0 deletions
--- a/google/cloud/language/v1beta1/language_service.proto
+++ b/google/cloud/language/v1beta1/language_service.proto
@ -0,0 +1,610 @@
 // Copyright 2016 Google Inc.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 syntax = "proto3";
 package google.cloud.language.v1beta1;
 import "google/api/annotations.proto";
 option java_multiple_files = true;
 option java_outer_classname = "LanguageServiceProto";
 option java_package = "com.google.cloud.language.v1beta1";
 // Provides text analysis operations such as sentiment analysis and entity
 // recognition.
 service LanguageService {
  // Analyzes the sentiment of the provided text.
  rpc AnalyzeSentiment(AnalyzeSentimentRequest) returns (AnalyzeSentimentResponse) {
    option (google.api.http) = { post: "/v1beta1/documents:analyzeSentiment" body: "*" };
  }
  // Finds named entities (currently finds proper names) in the text,
  // entity types, salience, mentions for each entity, and other properties.
  rpc AnalyzeEntities(AnalyzeEntitiesRequest) returns (AnalyzeEntitiesResponse) {
    option (google.api.http) = { post: "/v1beta1/documents:analyzeEntities" body: "*" };
  }
  // Advanced API that analyzes the document and provides a full set of text
  // annotations, including semantic, syntactic, and sentiment information. This
  // API is intended for users who are familiar with machine learning and need
  // in-depth text features to build upon.
  rpc AnnotateText(AnnotateTextRequest) returns (AnnotateTextResponse) {
    option (google.api.http) = { post: "/v1beta1/documents:annotateText" body: "*" };
  }
 }
 // ################################################################ #
 //
 // Represents the input to API methods.
 message Document {
  // The document types enum.
  enum Type {
    // The content type is not specified.
    TYPE_UNSPECIFIED = 0;
    // Plain text
    PLAIN_TEXT = 1;
    // HTML
    HTML = 2;
  }
  // Required. If the type is not set or is `TYPE_UNSPECIFIED`,
  // returns an `INVALID_ARGUMENT` error.
  Type type = 1;
  // The source of the document: a string containing the content or a
  // Google Cloud Storage URI.
  oneof source {
    // The content of the input in string format.
    string content = 2;
    // The Google Cloud Storage URI where the file content is located.
    string gcs_content_uri = 3;
  }
  // The language of the document (if not specified, the language is
  // automatically detected). Both ISO and BCP-47 language codes are
  // accepted.<br>
  // **Current Language Restrictions:**
  //
  //  * Only English, Spanish, and Japanese textual content
  //    are supported, with the following additional restriction:
  //    * `analyzeSentiment` only supports English text.
  // If the language (either specified by the caller or automatically detected)
  // is not supported by the called API method, an `INVALID_ARGUMENT` error
  // is returned.
  string language = 4;
 }
 // Represents a sentence in the input document.
 message Sentence {
  // The sentence text.
  TextSpan text = 1;
 }
 // Represents a phrase in the text that is a known entity, such as
 // a person, an organization, or location. The API associates information, such
 // as salience and mentions, with entities.
 message Entity {
  // The type of the entity.
  enum Type {
    // Unknown
    UNKNOWN = 0;
    // Person
    PERSON = 1;
    // Location
    LOCATION = 2;
    // Organization
    ORGANIZATION = 3;
    // Event
    EVENT = 4;
    // Work of art
    WORK_OF_ART = 5;
    // Consumer goods
    CONSUMER_GOOD = 6;
    // Other types
    OTHER = 7;
  }
  // The representative name for the entity.
  string name = 1;
  // The entity type.
  Type type = 2;
  // Metadata associated with the entity.
  //
  // Currently, only Wikipedia URLs are provided, if available.
  // The associated key is "wikipedia_url".
  map<string, string> metadata = 3;
  // The salience score associated with the entity in the [0, 1.0] range.
  //
  // The salience score for an entity provides information about the
  // importance or centrality of that entity to the entire document text.
  // Scores closer to 0 are less salient, while scores closer to 1.0 are highly
  // salient.
  float salience = 4;
  // The mentions of this entity in the input document. The API currently
  // supports proper noun mentions.
  repeated EntityMention mentions = 5;
 }
 // Represents the smallest syntactic building block of the text.
 message Token {
  // The token text.
  TextSpan text = 1;
  // Parts of speech tag for this token.
  PartOfSpeech part_of_speech = 2;
  // Dependency tree parse for this token.
  DependencyEdge dependency_edge = 3;
  // [Lemma](https://en.wikipedia.org/wiki/Lemma_(morphology))
  // of the token.
  string lemma = 4;
 }
 // Represents the feeling associated with the entire text or entities in
 // the text.
 message Sentiment {
  // Polarity of the sentiment in the [-1.0, 1.0] range. Larger numbers
  // represent more positive sentiments.
  float polarity = 1;
  // A non-negative number in the [0, +inf) range, which represents
  // the absolute magnitude of sentiment regardless of polarity (positive or
  // negative).
  float magnitude = 2;
 }
 // Represents part of speech information for a token.
 message PartOfSpeech {
  // The part of speech tags enum.
  enum Tag {
    // Unknown
    UNKNOWN = 0;
    // Adjective
    ADJ = 1;
    // Adposition (preposition and postposition)
    ADP = 2;
    // Adverb
    ADV = 3;
    // Conjunction
    CONJ = 4;
    // Determiner
    DET = 5;
    // Noun (common and proper)
    NOUN = 6;
    // Cardinal number
    NUM = 7;
    // Pronoun
    PRON = 8;
    // Particle or other function word
    PRT = 9;
    // Punctuation
    PUNCT = 10;
    // Verb (all tenses and modes)
    VERB = 11;
    // Other: foreign words, typos, abbreviations
    X = 12;
    // Affix
    AFFIX = 13;
  }
  // The part of speech tag.
  Tag tag = 1;
 }
 // Represents dependency parse tree information for a token.
 message DependencyEdge {
  // The parse label enum for the token.
  enum Label {
    // Unknown
    UNKNOWN = 0;
    // Abbreviation modifier
    ABBREV = 1;
    // Adjectival complement
    ACOMP = 2;
    // Adverbial clause modifier
    ADVCL = 3;
    // Adverbial modifier
    ADVMOD = 4;
    // Adjectival modifier of an NP
    AMOD = 5;
    // Appositional modifier of an NP
    APPOS = 6;
    // Attribute dependent of a copular verb
    ATTR = 7;
    // Auxiliary (non-main) verb
    AUX = 8;
    // Passive auxiliary
    AUXPASS = 9;
    // Coordinating conjunction
    CC = 10;
    // Clausal complement of a verb or adjective
    CCOMP = 11;
    // Conjunct
    CONJ = 12;
    // Clausal subject
    CSUBJ = 13;
    // Clausal passive subject
    CSUBJPASS = 14;
    // Dependency (unable to determine)
    DEP = 15;
    // Determiner
    DET = 16;
    // Discourse
    DISCOURSE = 17;
    // Direct object
    DOBJ = 18;
    // Expletive
    EXPL = 19;
    // Goes with (part of a word in a text not well edited)
    GOESWITH = 20;
    // Indirect object
    IOBJ = 21;
    // Marker (word introducing a subordinate clause)
    MARK = 22;
    // Multi-word expression
    MWE = 23;
    // Multi-word verbal expression
    MWV = 24;
    // Negation modifier
    NEG = 25;
    // Noun compound modifier
    NN = 26;
    // Noun phrase used as an adverbial modifier
    NPADVMOD = 27;
    // Nominal subject
    NSUBJ = 28;
    // Passive nominal subject
    NSUBJPASS = 29;
    // Numeric modifier of a noun
    NUM = 30;
    // Element of compound number
    NUMBER = 31;
    // Punctuation mark
    P = 32;
    // Parataxis relation
    PARATAXIS = 33;
    // Participial modifier
    PARTMOD = 34;
    // The complement of a preposition is a clause
    PCOMP = 35;
    // Object of a preposition
    POBJ = 36;
    // Possession modifier
    POSS = 37;
    // Postverbal negative particle
    POSTNEG = 38;
    // Predicate complement
    PRECOMP = 39;
    // Preconjunt
    PRECONJ = 40;
    // Predeterminer
    PREDET = 41;
    // Prefix
    PREF = 42;
    // Prepositional modifier
    PREP = 43;
    // The relationship between a verb and verbal morpheme
    PRONL = 44;
    // Particle
    PRT = 45;
    // Associative or possessive marker
    PS = 46;
    // Quantifier phrase modifier
    QUANTMOD = 47;
    // Relative clause modifier
    RCMOD = 48;
    // Complementizer in relative clause
    RCMODREL = 49;
    // Ellipsis without a preceding predicate
    RDROP = 50;
    // Referent
    REF = 51;
    // Remnant
    REMNANT = 52;
    // Reparandum
    REPARANDUM = 53;
    // Root
    ROOT = 54;
    // Suffix specifying a unit of number
    SNUM = 55;
    // Suffix
    SUFF = 56;
    // Temporal modifier
    TMOD = 57;
    // Topic marker
    TOPIC = 58;
    // Clause headed by an infinite form of the verb that modifies a noun
    VMOD = 59;
    // Vocative
    VOCATIVE = 60;
    // Open clausal complement
    XCOMP = 61;
    // Name suffix
    SUFFIX = 62;
    // Name title
    TITLE = 63;
    // Adverbial phrase modifier
    ADVPHMOD = 64;
    // Causative auxiliary
    AUXCAUS = 65;
    // Helper auxiliary
    AUXVV = 66;
    // Rentaishi (Prenominal modifier)
    DTMOD = 67;
    // Foreign words
    FOREIGN = 68;
    // Keyword
    KW = 69;
    // List for chains of comparable items
    LIST = 70;
    // Nominalized clause
    NOMC = 71;
    // Nominalized clausal subject
    NOMCSUBJ = 72;
    // Nominalized clausal passive
    NOMCSUBJPASS = 73;
    // Compound of numeric modifier
    NUMC = 74;
    // Copula
    COP = 75;
    // Dislocated relation (for fronted/topicalized elements)
    DISLOCATED = 76;
  }
  // Represents the head of this token in the dependency tree.
  // This is the index of the token which has an arc going to this token.
  // The index is the position of the token in the array of tokens returned
  // by the API method. If this token is a root token, then the
  // `head_token_index` is its own index.
  int32 head_token_index = 1;
  // The parse label for the token.
  Label label = 2;
 }
 // Represents a mention for an entity in the text. Currently, proper noun
 // mentions are supported.
 message EntityMention {
  // The mention text.
  TextSpan text = 1;
 }
 // Represents an output piece of text.
 message TextSpan {
  // The content of the output text.
  string content = 1;
  // The API calculates the beginning offset of the content in the original
  // document according to the [EncodingType][google.cloud.language.v1beta1.EncodingType] specified in the API request.
  int32 begin_offset = 2;
 }
 // The sentiment analysis request message.
 message AnalyzeSentimentRequest {
  // Input document. Currently, `analyzeSentiment` only supports English text
  // ([Document.language][google.cloud.language.v1beta1.Document.language]="EN").
  Document document = 1;
 }
 // The sentiment analysis response message.
 message AnalyzeSentimentResponse {
  // The overall sentiment of the input document.
  Sentiment document_sentiment = 1;
  // The language of the text, which will be the same as the language specified
  // in the request or, if not specified, the automatically-detected language.
  string language = 2;
 }
 // The entity analysis request message.
 message AnalyzeEntitiesRequest {
  // Input document.
  Document document = 1;
  // The encoding type used by the API to calculate offsets.
  EncodingType encoding_type = 2;
 }
 // The entity analysis response message.
 message AnalyzeEntitiesResponse {
  // The recognized entities in the input document.
  repeated Entity entities = 1;
  // The language of the text, which will be the same as the language specified
  // in the request or, if not specified, the automatically-detected language.
  string language = 2;
 }
 // The request message for the advanced text annotation API, which performs all
 // the above plus syntactic analysis.
 message AnnotateTextRequest {
  // All available features for sentiment, syntax, and semantic analysis.
  // Setting each one to true will enable that specific analysis for the input.
  message Features {
    // Extract syntax information.
    bool extract_syntax = 1;
    // Extract entities.
    bool extract_entities = 2;
    // Extract document-level sentiment.
    bool extract_document_sentiment = 3;
  }
  // Input document.
  Document document = 1;
  // The enabled features.
  Features features = 2;
  // The encoding type used by the API to calculate offsets.
  EncodingType encoding_type = 3;
 }
 // The text annotations response message.
 message AnnotateTextResponse {
  // Sentences in the input document. Populated if the user enables
  // [AnnotateTextRequest.Features.extract_syntax][google.cloud.language.v1beta1.AnnotateTextRequest.Features.extract_syntax].
  repeated Sentence sentences = 1;
  // Tokens, along with their syntactic information, in the input document.
  // Populated if the user enables
  // [AnnotateTextRequest.Features.extract_syntax][google.cloud.language.v1beta1.AnnotateTextRequest.Features.extract_syntax].
  repeated Token tokens = 2;
  // Entities, along with their semantic information, in the input document.
  // Populated if the user enables
  // [AnnotateTextRequest.Features.extract_entities][google.cloud.language.v1beta1.AnnotateTextRequest.Features.extract_entities].
  repeated Entity entities = 3;
  // The overall sentiment for the document. Populated if the user enables
  // [AnnotateTextRequest.Features.extract_document_sentiment][google.cloud.language.v1beta1.AnnotateTextRequest.Features.extract_document_sentiment].
  Sentiment document_sentiment = 4;
  // The language of the text, which will be the same as the language specified
  // in the request or, if not specified, the automatically-detected language.
  string language = 5;
 }
 // Represents the text encoding that the caller uses to process the output.
 // Providing an `EncodingType` is recommended because the API provides the
 // beginning offsets for various outputs, such as tokens and mentions, and
 // languages that natively use different text encodings may access offsets
 // differently.
 enum EncodingType {
  // If `EncodingType` is not specified, encoding-dependent information (such as
  // `begin_offset`) will be set at `-1`.
  NONE = 0;
  // Encoding-dependent information (such as `begin_offset`) is calculated based
  // on the UTF-8 encoding of the input. C++ and Go are examples of languages
  // that use this encoding natively.
  UTF8 = 1;
  // Encoding-dependent information (such as `begin_offset`) is calculated based
  // on the UTF-16 encoding of the input. Java and Javascript are examples of
  // languages that use this encoding natively.
  UTF16 = 2;
  // Encoding-dependent information (such as `begin_offset`) is calculated based
  // on the UTF-32 encoding of the input. Python is an example of a language
  // that uses this encoding natively.
  UTF32 = 3;
 }