Add proto for language/v1 API, and update proto for language/v1beta1 API.

8 years ago · be57dbdf7e
parent 2360492797
commit be57dbdf7e
3 changed files with 1304 additions and 14 deletions
--- a/google/cloud/language/README.md
+++ b/google/cloud/language/README.md
--- a/google/cloud/language/v1/language_service.proto
+++ b/google/cloud/language/v1/language_service.proto
@ -0,0 +1,950 @@
+// Copyright 2016 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+syntax = "proto3";
+
+package google.cloud.language.v1;
+
+import "google/api/annotations.proto";
+
+option java_multiple_files = true;
+option java_outer_classname = "LanguageServiceProto";
+option java_package = "com.google.cloud.language.v1";
+
+
+// Provides text analysis operations such as sentiment analysis and entity
+// recognition.
+service LanguageService {
+  // Analyzes the sentiment of the provided text.
+  rpc AnalyzeSentiment(AnalyzeSentimentRequest) returns (AnalyzeSentimentResponse) {
+    option (google.api.http) = { post: "/v1/documents:analyzeSentiment" body: "*" };
+  }
+
+  // Finds named entities (currently finds proper names) in the text,
+  // entity types, salience, mentions for each entity, and other properties.
+  rpc AnalyzeEntities(AnalyzeEntitiesRequest) returns (AnalyzeEntitiesResponse) {
+    option (google.api.http) = { post: "/v1/documents:analyzeEntities" body: "*" };
+  }
+
+  // Analyzes the syntax of the text and provides sentence boundaries and
+  // tokenization along with part of speech tags, dependency trees, and other
+  // properties.
+  rpc AnalyzeSyntax(AnalyzeSyntaxRequest) returns (AnalyzeSyntaxResponse) {
+    option (google.api.http) = { post: "/v1/documents:analyzeSyntax" body: "*" };
+  }
+
+  // A convenience method that provides all the features that analyzeSentiment,
+  // analyzeEntities, and analyzeSyntax provide in one call.
+  rpc AnnotateText(AnnotateTextRequest) returns (AnnotateTextResponse) {
+    option (google.api.http) = { post: "/v1/documents:annotateText" body: "*" };
+  }
+}
+
+// ################################################################ #
+//
+// Represents the input to API methods.
+message Document {
+  // The document types enum.
+  enum Type {
+    // The content type is not specified.
+    TYPE_UNSPECIFIED = 0;
+
+    // Plain text
+    PLAIN_TEXT = 1;
+
+    // HTML
+    HTML = 2;
+  }
+
+  // Required. If the type is not set or is `TYPE_UNSPECIFIED`,
+  // returns an `INVALID_ARGUMENT` error.
+  Type type = 1;
+
+  // The source of the document: a string containing the content or a
+  // Google Cloud Storage URI.
+  oneof source {
+    // The content of the input in string format.
+    string content = 2;
+
+    // The Google Cloud Storage URI where the file content is located.
+    // This URI must be of the form: gs://bucket_name/object_name. For more
+    // details, see https://cloud.google.com/storage/docs/reference-uris.
+    // NOTE: Cloud Storage object versioning is not supported.
+    string gcs_content_uri = 3;
+  }
+
+  // The language of the document (if not specified, the language is
+  // automatically detected). Both ISO and BCP-47 language codes are
+  // accepted.<br>
+  // **Current Language Restrictions:**
+  //
+  //  * Only English, Spanish, and Japanese textual content
+  //    are supported, with the following additional restriction:
+  //    * `analyzeSentiment` only supports English text.
+  // If the language (either specified by the caller or automatically detected)
+  // is not supported by the called API method, an `INVALID_ARGUMENT` error
+  // is returned.
+  string language = 4;
+}
+
+// Represents a sentence in the input document.
+message Sentence {
+  // The sentence text.
+  TextSpan text = 1;
+
+  // For calls to [AnalyzeSentiment][] or if
+  // [AnnotateTextRequest.Features.extract_document_sentiment][google.cloud.language.v1.AnnotateTextRequest.Features.extract_document_sentiment] is set to
+  // true, this field will contain the sentiment for the sentence.
+  Sentiment sentiment = 2;
+}
+
+// Represents a phrase in the text that is a known entity, such as
+// a person, an organization, or location. The API associates information, such
+// as salience and mentions, with entities.
+message Entity {
+  // The type of the entity.
+  enum Type {
+    // Unknown
+    UNKNOWN = 0;
+
+    // Person
+    PERSON = 1;
+
+    // Location
+    LOCATION = 2;
+
+    // Organization
+    ORGANIZATION = 3;
+
+    // Event
+    EVENT = 4;
+
+    // Work of art
+    WORK_OF_ART = 5;
+
+    // Consumer goods
+    CONSUMER_GOOD = 6;
+
+    // Other types
+    OTHER = 7;
+  }
+
+  // The representative name for the entity.
+  string name = 1;
+
+  // The entity type.
+  Type type = 2;
+
+  // Metadata associated with the entity.
+  //
+  // Currently, Wikipedia URLs and Knowledge Graph MIDs are provided, if
+  // available. The associated keys are "wikipedia_url" and "mid", respectively.
+  map<string, string> metadata = 3;
+
+  // The salience score associated with the entity in the [0, 1.0] range.
+  //
+  // The salience score for an entity provides information about the
+  // importance or centrality of that entity to the entire document text.
+  // Scores closer to 0 are less salient, while scores closer to 1.0 are highly
+  // salient.
+  float salience = 4;
+
+  // The mentions of this entity in the input document. The API currently
+  // supports proper noun mentions.
+  repeated EntityMention mentions = 5;
+}
+
+// Represents the smallest syntactic building block of the text.
+message Token {
+  // The token text.
+  TextSpan text = 1;
+
+  // Parts of speech tag for this token.
+  PartOfSpeech part_of_speech = 2;
+
+  // Dependency tree parse for this token.
+  DependencyEdge dependency_edge = 3;
+
+  // [Lemma](https://en.wikipedia.org/wiki/Lemma_%28morphology%29) of the token.
+  string lemma = 4;
+}
+
+// Represents the feeling associated with the entire text or entities in
+// the text.
+message Sentiment {
+  // A non-negative number in the [0, +inf) range, which represents
+  // the absolute magnitude of sentiment regardless of score (positive or
+  // negative).
+  float magnitude = 2;
+
+  // Sentiment score between -1.0 (negative sentiment) and 1.0
+  // (positive sentiment.)
+  float score = 3;
+}
+
+// Represents part of speech information for a token. Parts of speech
+// are as defined in
+// http://www.lrec-conf.org/proceedings/lrec2012/pdf/274_Paper.pdf
+message PartOfSpeech {
+  // The part of speech tags enum.
+  enum Tag {
+    // Unknown
+    UNKNOWN = 0;
+
+    // Adjective
+    ADJ = 1;
+
+    // Adposition (preposition and postposition)
+    ADP = 2;
+
+    // Adverb
+    ADV = 3;
+
+    // Conjunction
+    CONJ = 4;
+
+    // Determiner
+    DET = 5;
+
+    // Noun (common and proper)
+    NOUN = 6;
+
+    // Cardinal number
+    NUM = 7;
+
+    // Pronoun
+    PRON = 8;
+
+    // Particle or other function word
+    PRT = 9;
+
+    // Punctuation
+    PUNCT = 10;
+
+    // Verb (all tenses and modes)
+    VERB = 11;
+
+    // Other: foreign words, typos, abbreviations
+    X = 12;
+
+    // Affix
+    AFFIX = 13;
+  }
+
+  // The characteristic of a verb that expresses time flow during an event.
+  enum Aspect {
+    // Aspect is not applicable in the analyzed language or is not predicted.
+    ASPECT_UNKNOWN = 0;
+
+    // Perfective
+    PERFECTIVE = 1;
+
+    // Imperfective
+    IMPERFECTIVE = 2;
+
+    // Progressive
+    PROGRESSIVE = 3;
+  }
+
+  // The grammatical function performed by a noun or pronoun in a phrase,
+  // clause, or sentence. In some languages, other parts of speech, such as
+  // adjective and determiner, take case inflection in agreement with the noun.
+  enum Case {
+    // Case is not applicable in the analyzed language or is not predicted.
+    CASE_UNKNOWN = 0;
+
+    // Accusative
+    ACCUSATIVE = 1;
+
+    // Adverbial
+    ADVERBIAL = 2;
+
+    // Complementive
+    COMPLEMENTIVE = 3;
+
+    // Dative
+    DATIVE = 4;
+
+    // Genitive
+    GENITIVE = 5;
+
+    // Instrumental
+    INSTRUMENTAL = 6;
+
+    // Locative
+    LOCATIVE = 7;
+
+    // Nominative
+    NOMINATIVE = 8;
+
+    // Oblique
+    OBLIQUE = 9;
+
+    // Partitive
+    PARTITIVE = 10;
+
+    // Prepositional
+    PREPOSITIONAL = 11;
+
+    // Reflexive
+    REFLEXIVE_CASE = 12;
+
+    // Relative
+    RELATIVE_CASE = 13;
+
+    // Vocative
+    VOCATIVE = 14;
+  }
+
+  // Depending on the language, Form can be categorizing different forms of
+  // verbs, adjectives, adverbs, etc. For example, categorizing inflected
+  // endings of verbs and adjectives or distinguishing between short and long
+  // forms of adjectives and participles
+  enum Form {
+    // Form is not applicable in the analyzed language or is not predicted.
+    FORM_UNKNOWN = 0;
+
+    // Adnomial
+    ADNOMIAL = 1;
+
+    // Auxiliary
+    AUXILIARY = 2;
+
+    // Complementizer
+    COMPLEMENTIZER = 3;
+
+    // Final ending
+    FINAL_ENDING = 4;
+
+    // Gerund
+    GERUND = 5;
+
+    // Realis
+    REALIS = 6;
+
+    // Irrealis
+    IRREALIS = 7;
+
+    // Short form
+    SHORT = 8;
+
+    // Long form
+    LONG = 9;
+
+    // Order form
+    ORDER = 10;
+
+    // Specific form
+    SPECIFIC = 11;
+  }
+
+  // Gender classes of nouns reflected in the behaviour of associated words.
+  enum Gender {
+    // Gender is not applicable in the analyzed language or is not predicted.
+    GENDER_UNKNOWN = 0;
+
+    // Feminine
+    FEMININE = 1;
+
+    // Masculine
+    MASCULINE = 2;
+
+    // Neuter
+    NEUTER = 3;
+  }
+
+  // The grammatical feature of verbs, used for showing modality and attitude.
+  enum Mood {
+    // Mood is not applicable in the analyzed language or is not predicted.
+    MOOD_UNKNOWN = 0;
+
+    // Conditional
+    CONDITIONAL_MOOD = 1;
+
+    // Imperative
+    IMPERATIVE = 2;
+
+    // Indicative
+    INDICATIVE = 3;
+
+    // Interrogative
+    INTERROGATIVE = 4;
+
+    // Jussive
+    JUSSIVE = 5;
+
+    // Subjunctive
+    SUBJUNCTIVE = 6;
+  }
+
+  // Count distinctions.
+  enum Number {
+    // Number is not applicable in the analyzed language or is not predicted.
+    NUMBER_UNKNOWN = 0;
+
+    // Singular
+    SINGULAR = 1;
+
+    // Plural
+    PLURAL = 2;
+
+    // Dual
+    DUAL = 3;
+  }
+
+  // The distinction between the speaker, second person, third person, etc.
+  enum Person {
+    // Person is not applicable in the analyzed language or is not predicted.
+    PERSON_UNKNOWN = 0;
+
+    // First
+    FIRST = 1;
+
+    // Second
+    SECOND = 2;
+
+    // Third
+    THIRD = 3;
+
+    // Reflexive
+    REFLEXIVE_PERSON = 4;
+  }
+
+  // This category shows if the token is part of a proper name.
+  enum Proper {
+    // Proper is not applicable in the analyzed language or is not predicted.
+    PROPER_UNKNOWN = 0;
+
+    // Proper
+    PROPER = 1;
+
+    // Not proper
+    NOT_PROPER = 2;
+  }
+
+  // Reciprocal features of a pronoun.
+  enum Reciprocity {
+    // Reciprocity is not applicable in the analyzed language or is not
+    // predicted.
+    RECIPROCITY_UNKNOWN = 0;
+
+    // Reciprocal
+    RECIPROCAL = 1;
+
+    // Non-reciprocal
+    NON_RECIPROCAL = 2;
+  }
+
+  // Time reference.
+  enum Tense {
+    // Tense is not applicable in the analyzed language or is not predicted.
+    TENSE_UNKNOWN = 0;
+
+    // Conditional
+    CONDITIONAL_TENSE = 1;
+
+    // Future
+    FUTURE = 2;
+
+    // Past
+    PAST = 3;
+
+    // Present
+    PRESENT = 4;
+
+    // Imperfect
+    IMPERFECT = 5;
+
+    // Pluperfect
+    PLUPERFECT = 6;
+  }
+
+  // The relationship between the action that a verb expresses and the
+  // participants identified by its arguments.
+  enum Voice {
+    // Voice is not applicable in the analyzed language or is not predicted.
+    VOICE_UNKNOWN = 0;
+
+    // Active
+    ACTIVE = 1;
+
+    // Causative
+    CAUSATIVE = 2;
+
+    // Passive
+    PASSIVE = 3;
+  }
+
+  // The part of speech tag.
+  Tag tag = 1;
+
+  // The grammatical aspect.
+  Aspect aspect = 2;
+
+  // The grammatical case.
+  Case case = 3;
+
+  // The grammatical form.
+  Form form = 4;
+
+  // The grammatical gender.
+  Gender gender = 5;
+
+  // The grammatical mood.
+  Mood mood = 6;
+
+  // The grammatical number.
+  Number number = 7;
+
+  // The grammatical person.
+  Person person = 8;
+
+  // The grammatical properness.
+  Proper proper = 9;
+
+  // The grammatical reciprocity.
+  Reciprocity reciprocity = 10;
+
+  // The grammatical tense.
+  Tense tense = 11;
+
+  // The grammatical voice.
+  Voice voice = 12;
+}
+
+// Represents dependency parse tree information for a token. (For more
+// information on dependency labels, see
+// http://www.aclweb.org/anthology/P13-2017
+message DependencyEdge {
+  // The parse label enum for the token.
+  enum Label {
+    // Unknown
+    UNKNOWN = 0;
+
+    // Abbreviation modifier
+    ABBREV = 1;
+
+    // Adjectival complement
+    ACOMP = 2;
+
+    // Adverbial clause modifier
+    ADVCL = 3;
+
+    // Adverbial modifier
+    ADVMOD = 4;
+
+    // Adjectival modifier of an NP
+    AMOD = 5;
+
+    // Appositional modifier of an NP
+    APPOS = 6;
+
+    // Attribute dependent of a copular verb
+    ATTR = 7;
+
+    // Auxiliary (non-main) verb
+    AUX = 8;
+
+    // Passive auxiliary
+    AUXPASS = 9;
+
+    // Coordinating conjunction
+    CC = 10;
+
+    // Clausal complement of a verb or adjective
+    CCOMP = 11;
+
+    // Conjunct
+    CONJ = 12;
+
+    // Clausal subject
+    CSUBJ = 13;
+
+    // Clausal passive subject
+    CSUBJPASS = 14;
+
+    // Dependency (unable to determine)
+    DEP = 15;
+
+    // Determiner
+    DET = 16;
+
+    // Discourse
+    DISCOURSE = 17;
+
+    // Direct object
+    DOBJ = 18;
+
+    // Expletive
+    EXPL = 19;
+
+    // Goes with (part of a word in a text not well edited)
+    GOESWITH = 20;
+
+    // Indirect object
+    IOBJ = 21;
+
+    // Marker (word introducing a subordinate clause)
+    MARK = 22;
+
+    // Multi-word expression
+    MWE = 23;
+
+    // Multi-word verbal expression
+    MWV = 24;
+
+    // Negation modifier
+    NEG = 25;
+
+    // Noun compound modifier
+    NN = 26;
+
+    // Noun phrase used as an adverbial modifier
+    NPADVMOD = 27;
+
+    // Nominal subject
+    NSUBJ = 28;
+
+    // Passive nominal subject
+    NSUBJPASS = 29;
+
+    // Numeric modifier of a noun
+    NUM = 30;
+
+    // Element of compound number
+    NUMBER = 31;
+
+    // Punctuation mark
+    P = 32;
+
+    // Parataxis relation
+    PARATAXIS = 33;
+
+    // Participial modifier
+    PARTMOD = 34;
+
+    // The complement of a preposition is a clause
+    PCOMP = 35;
+
+    // Object of a preposition
+    POBJ = 36;
+
+    // Possession modifier
+    POSS = 37;
+
+    // Postverbal negative particle
+    POSTNEG = 38;
+
+    // Predicate complement
+    PRECOMP = 39;
+
+    // Preconjunt
+    PRECONJ = 40;
+
+    // Predeterminer
+    PREDET = 41;
+
+    // Prefix
+    PREF = 42;
+
+    // Prepositional modifier
+    PREP = 43;
+
+    // The relationship between a verb and verbal morpheme
+    PRONL = 44;
+
+    // Particle
+    PRT = 45;
+
+    // Associative or possessive marker
+    PS = 46;
+
+    // Quantifier phrase modifier
+    QUANTMOD = 47;
+
+    // Relative clause modifier
+    RCMOD = 48;
+
+    // Complementizer in relative clause
+    RCMODREL = 49;
+
+    // Ellipsis without a preceding predicate
+    RDROP = 50;
+
+    // Referent
+    REF = 51;
+
+    // Remnant
+    REMNANT = 52;
+
+    // Reparandum
+    REPARANDUM = 53;
+
+    // Root
+    ROOT = 54;
+
+    // Suffix specifying a unit of number
+    SNUM = 55;
+
+    // Suffix
+    SUFF = 56;
+
+    // Temporal modifier
+    TMOD = 57;
+
+    // Topic marker
+    TOPIC = 58;
+
+    // Clause headed by an infinite form of the verb that modifies a noun
+    VMOD = 59;
+
+    // Vocative
+    VOCATIVE = 60;
+
+    // Open clausal complement
+    XCOMP = 61;
+
+    // Name suffix
+    SUFFIX = 62;
+
+    // Name title
+    TITLE = 63;
+
+    // Adverbial phrase modifier
+    ADVPHMOD = 64;
+
+    // Causative auxiliary
+    AUXCAUS = 65;
+
+    // Helper auxiliary
+    AUXVV = 66;
+
+    // Rentaishi (Prenominal modifier)
+    DTMOD = 67;
+
+    // Foreign words
+    FOREIGN = 68;
+
+    // Keyword
+    KW = 69;
+
+    // List for chains of comparable items
+    LIST = 70;
+
+    // Nominalized clause
+    NOMC = 71;
+
+    // Nominalized clausal subject
+    NOMCSUBJ = 72;
+
+    // Nominalized clausal passive
+    NOMCSUBJPASS = 73;
+
+    // Compound of numeric modifier
+    NUMC = 74;
+
+    // Copula
+    COP = 75;
+
+    // Dislocated relation (for fronted/topicalized elements)
+    DISLOCATED = 76;
+  }
+
+  // Represents the head of this token in the dependency tree.
+  // This is the index of the token which has an arc going to this token.
+  // The index is the position of the token in the array of tokens returned
+  // by the API method. If this token is a root token, then the
+  // `head_token_index` is its own index.
+  int32 head_token_index = 1;
+
+  // The parse label for the token.
+  Label label = 2;
+}
+
+// Represents a mention for an entity in the text. Currently, proper noun
+// mentions are supported.
+message EntityMention {
+  // The supported types of mentions.
+  enum Type {
+    // Unknown
+    TYPE_UNKNOWN = 0;
+
+    // Proper name
+    PROPER = 1;
+
+    // Common noun (or noun compound)
+    COMMON = 2;
+  }
+
+  // The mention text.
+  TextSpan text = 1;
+
+  // The type of the entity mention.
+  Type type = 2;
+}
+
+// Represents an output piece of text.
+message TextSpan {
+  // The content of the output text.
+  string content = 1;
+
+  // The API calculates the beginning offset of the content in the original
+  // document according to the [EncodingType][google.cloud.language.v1.EncodingType] specified in the API request.
+  int32 begin_offset = 2;
+}
+
+// The sentiment analysis request message.
+message AnalyzeSentimentRequest {
+  // Input document. Currently, `analyzeSentiment` only supports English text
+  // ([Document.language][google.cloud.language.v1.Document.language]="EN").
+  Document document = 1;
+
+  // The encoding type used by the API to calculate sentence offsets.
+  EncodingType encoding_type = 2;
+}
+
+// The sentiment analysis response message.
+message AnalyzeSentimentResponse {
+  // The overall sentiment of the input document.
+  Sentiment document_sentiment = 1;
+
+  // The language of the text, which will be the same as the language specified
+  // in the request or, if not specified, the automatically-detected language.
+  // See `Document.language` field for more details.
+  string language = 2;
+
+  // The sentiment for all the sentences in the document.
+  repeated Sentence sentences = 3;
+}
+
+// The entity analysis request message.
+message AnalyzeEntitiesRequest {
+  // Input document.
+  Document document = 1;
+
+  // The encoding type used by the API to calculate offsets.
+  EncodingType encoding_type = 2;
+}
+
+// The entity analysis response message.
+message AnalyzeEntitiesResponse {
+  // The recognized entities in the input document.
+  repeated Entity entities = 1;
+
+  // The language of the text, which will be the same as the language specified
+  // in the request or, if not specified, the automatically-detected language.
+  // See `Document.language` field for more details.
+  string language = 2;
+}
+
+// The syntax analysis request message.
+message AnalyzeSyntaxRequest {
+  // Input document.
+  Document document = 1;
+
+  // The encoding type used by the API to calculate offsets.
+  EncodingType encoding_type = 2;
+}
+
+// The syntax analysis response message.
+message AnalyzeSyntaxResponse {
+  // Sentences in the input document.
+  repeated Sentence sentences = 1;
+
+  // Tokens, along with their syntactic information, in the input document.
+  repeated Token tokens = 2;
+
+  // The language of the text, which will be the same as the language specified
+  // in the request or, if not specified, the automatically-detected language.
+  // See `Document.language` field for more details.
+  string language = 3;
+}
+
+// The request message for the text annotation API, which can perform multiple
+// analysis types (sentiment, entities, and syntax) in one call.
+message AnnotateTextRequest {
+  // All available features for sentiment, syntax, and semantic analysis.
+  // Setting each one to true will enable that specific analysis for the input.
+  message Features {
+    // Extract syntax information.
+    bool extract_syntax = 1;
+
+    // Extract entities.
+    bool extract_entities = 2;
+
+    // Extract document-level sentiment.
+    bool extract_document_sentiment = 3;
+  }
+
+  // Input document.
+  Document document = 1;
+
+  // The enabled features.
+  Features features = 2;
+
+  // The encoding type used by the API to calculate offsets.
+  EncodingType encoding_type = 3;
+}
+
+// The text annotations response message.
+message AnnotateTextResponse {
+  // Sentences in the input document. Populated if the user enables
+  // [AnnotateTextRequest.Features.extract_syntax][google.cloud.language.v1.AnnotateTextRequest.Features.extract_syntax].
+  repeated Sentence sentences = 1;
+
+  // Tokens, along with their syntactic information, in the input document.
+  // Populated if the user enables
+  // [AnnotateTextRequest.Features.extract_syntax][google.cloud.language.v1.AnnotateTextRequest.Features.extract_syntax].
+  repeated Token tokens = 2;
+
+  // Entities, along with their semantic information, in the input document.
+  // Populated if the user enables
+  // [AnnotateTextRequest.Features.extract_entities][google.cloud.language.v1.AnnotateTextRequest.Features.extract_entities].
+  repeated Entity entities = 3;
+
+  // The overall sentiment for the document. Populated if the user enables
+  // [AnnotateTextRequest.Features.extract_document_sentiment][google.cloud.language.v1.AnnotateTextRequest.Features.extract_document_sentiment].
+  Sentiment document_sentiment = 4;
+
+  // The language of the text, which will be the same as the language specified
+  // in the request or, if not specified, the automatically-detected language.
+  // See `Document.language` field for more details.
+  string language = 5;
+}
+
+// Represents the text encoding that the caller uses to process the output.
+// Providing an `EncodingType` is recommended because the API provides the
+// beginning offsets for various outputs, such as tokens and mentions, and
+// languages that natively use different text encodings may access offsets
+// differently.
+enum EncodingType {
+  // If `EncodingType` is not specified, encoding-dependent information (such as
+  // `begin_offset`) will be set at `-1`.
+  NONE = 0;
+
+  // Encoding-dependent information (such as `begin_offset`) is calculated based
+  // on the UTF-8 encoding of the input. C++ and Go are examples of languages
+  // that use this encoding natively.
+  UTF8 = 1;
+
+  // Encoding-dependent information (such as `begin_offset`) is calculated based
+  // on the UTF-16 encoding of the input. Java and Javascript are examples of
+  // languages that use this encoding natively.
+  UTF16 = 2;
+
+  // Encoding-dependent information (such as `begin_offset`) is calculated based
+  // on the UTF-32 encoding of the input. Python is an example of a language
+  // that uses this encoding natively.
+  UTF32 = 3;
+}
--- a/google/cloud/language/v1beta1/language_service.proto
+++ b/google/cloud/language/v1beta1/language_service.proto
@ -22,7 +22,6 @@ option java_multiple_files = true;
 option java_outer_classname = "LanguageServiceProto";
 option java_package = "com.google.cloud.language.v1beta1";

-option go_package = "google.golang.org/genproto/googleapis/cloud/language/v1beta1";

 // Provides text analysis operations such as sentiment analysis and entity
 // recognition.
@ -38,10 +37,15 @@ service LanguageService {
    option (google.api.http) = { post: "/v1beta1/documents:analyzeEntities" body: "*" };
  }

-  // Advanced API that analyzes the document and provides a full set of text
-  // annotations, including semantic, syntactic, and sentiment information. This
-  // API is intended for users who are familiar with machine learning and need
-  // in-depth text features to build upon.
+  // Analyzes the syntax of the text and provides sentence boundaries and
+  // tokenization along with part of speech tags, dependency trees, and other
+  // properties.
+  rpc AnalyzeSyntax(AnalyzeSyntaxRequest) returns (AnalyzeSyntaxResponse) {
+    option (google.api.http) = { post: "/v1beta1/documents:analyzeSyntax" body: "*" };
+  }
+
+  // A convenience method that provides all the features that analyzeSentiment,
+  // analyzeEntities, and analyzeSyntax provide in one call.
  rpc AnnotateText(AnnotateTextRequest) returns (AnnotateTextResponse) {
    option (google.api.http) = { post: "/v1beta1/documents:annotateText" body: "*" };
  }
@ -74,6 +78,9 @@ message Document {
    string content = 2;

    // The Google Cloud Storage URI where the file content is located.
+    // This URI must be of the form: gs://bucket_name/object_name. For more
+    // details, see https://cloud.google.com/storage/docs/reference-uris.
+    // NOTE: Cloud Storage object versioning is not supported.
    string gcs_content_uri = 3;
  }

@ -95,6 +102,11 @@ message Document {
 message Sentence {
  // The sentence text.
  TextSpan text = 1;
+
+  // For calls to [AnalyzeSentiment][] or if
+  // [AnnotateTextRequest.Features.extract_document_sentiment][google.cloud.language.v1beta1.AnnotateTextRequest.Features.extract_document_sentiment] is set to
+  // true, this field will contain the sentiment for the sentence.
+  Sentiment sentiment = 2;
 }

 // Represents a phrase in the text that is a known entity, such as
@ -136,8 +148,8 @@ message Entity {

  // Metadata associated with the entity.
  //
-  // Currently, only Wikipedia URLs are provided, if available.
-  // The associated key is "wikipedia_url".
+  // Currently, Wikipedia URLs and Knowledge Graph MIDs are provided, if
+  // available. The associated keys are "wikipedia_url" and "mid", respectively.
  map<string, string> metadata = 3;

  // The salience score associated with the entity in the [0, 1.0] range.
@ -164,22 +176,26 @@ message Token {
  // Dependency tree parse for this token.
  DependencyEdge dependency_edge = 3;

-  // [Lemma](https://en.wikipedia.org/wiki/Lemma_(morphology))
-  // of the token.
+  // [Lemma](https://en.wikipedia.org/wiki/Lemma_%28morphology%29) of the token.
  string lemma = 4;
 }

 // Represents the feeling associated with the entire text or entities in
 // the text.
 message Sentiment {
-  // Polarity of the sentiment in the [-1.0, 1.0] range. Larger numbers
-  // represent more positive sentiments.
+  // DEPRECATED FIELD - This field is being deprecated in
+  // favor of score. Please refer to our documentation at
+  // https://cloud.google.com/natural-language/docs for more information.
  float polarity = 1;

  // A non-negative number in the [0, +inf) range, which represents
-  // the absolute magnitude of sentiment regardless of polarity (positive or
+  // the absolute magnitude of sentiment regardless of score (positive or
  // negative).
  float magnitude = 2;
+
+  // Sentiment score between -1.0 (negative sentiment) and 1.0
+  // (positive sentiment.)
+  float score = 3;
 }

 // Represents part of speech information for a token.
@ -229,8 +245,285 @@ message PartOfSpeech {
    AFFIX = 13;
  }

+  // The characteristic of a verb that expresses time flow during an event.
+  enum Aspect {
+    // Aspect is not applicable in the analyzed language or is not predicted.
+    ASPECT_UNKNOWN = 0;
+
+    // Perfective
+    PERFECTIVE = 1;
+
+    // Imperfective
+    IMPERFECTIVE = 2;
+
+    // Progressive
+    PROGRESSIVE = 3;
+  }
+
+  // The grammatical function performed by a noun or pronoun in a phrase,
+  // clause, or sentence. In some languages, other parts of speech, such as
+  // adjective and determiner, take case inflection in agreement with the noun.
+  enum Case {
+    // Case is not applicable in the analyzed language or is not predicted.
+    CASE_UNKNOWN = 0;
+
+    // Accusative
+    ACCUSATIVE = 1;
+
+    // Adverbial
+    ADVERBIAL = 2;
+
+    // Complementive
+    COMPLEMENTIVE = 3;
+
+    // Dative
+    DATIVE = 4;
+
+    // Genitive
+    GENITIVE = 5;
+
+    // Instrumental
+    INSTRUMENTAL = 6;
+
+    // Locative
+    LOCATIVE = 7;
+
+    // Nominative
+    NOMINATIVE = 8;
+
+    // Oblique
+    OBLIQUE = 9;
+
+    // Partitive
+    PARTITIVE = 10;
+
+    // Prepositional
+    PREPOSITIONAL = 11;
+
+    // Reflexive
+    REFLEXIVE_CASE = 12;
+
+    // Relative
+    RELATIVE_CASE = 13;
+
+    // Vocative
+    VOCATIVE = 14;
+  }
+
+  // Depending on the language, Form can be categorizing different forms of
+  // verbs, adjectives, adverbs, etc. For example, categorizing inflected
+  // endings of verbs and adjectives or distinguishing between short and long
+  // forms of adjectives and participles
+  enum Form {
+    // Form is not applicable in the analyzed language or is not predicted.
+    FORM_UNKNOWN = 0;
+
+    // Adnomial
+    ADNOMIAL = 1;
+
+    // Auxiliary
+    AUXILIARY = 2;
+
+    // Complementizer
+    COMPLEMENTIZER = 3;
+
+    // Final ending
+    FINAL_ENDING = 4;
+
+    // Gerund
+    GERUND = 5;
+
+    // Realis
+    REALIS = 6;
+
+    // Irrealis
+    IRREALIS = 7;
+
+    // Short form
+    SHORT = 8;
+
+    // Long form
+    LONG = 9;
+
+    // Order form
+    ORDER = 10;
+
+    // Specific form
+    SPECIFIC = 11;
+  }
+
+  // Gender classes of nouns reflected in the behaviour of associated words.
+  enum Gender {
+    // Gender is not applicable in the analyzed language or is not predicted.
+    GENDER_UNKNOWN = 0;
+
+    // Feminine
+    FEMININE = 1;
+
+    // Masculine
+    MASCULINE = 2;
+
+    // Neuter
+    NEUTER = 3;
+  }
+
+  // The grammatical feature of verbs, used for showing modality and attitude.
+  enum Mood {
+    // Mood is not applicable in the analyzed language or is not predicted.
+    MOOD_UNKNOWN = 0;
+
+    // Conditional
+    CONDITIONAL_MOOD = 1;
+
+    // Imperative
+    IMPERATIVE = 2;
+
+    // Indicative
+    INDICATIVE = 3;
+
+    // Interrogative
+    INTERROGATIVE = 4;
+
+    // Jussive
+    JUSSIVE = 5;
+
+    // Subjunctive
+    SUBJUNCTIVE = 6;
+  }
+
+  // Count distinctions.
+  enum Number {
+    // Number is not applicable in the analyzed language or is not predicted.
+    NUMBER_UNKNOWN = 0;
+
+    // Singular
+    SINGULAR = 1;
+
+    // Plural
+    PLURAL = 2;
+
+    // Dual
+    DUAL = 3;
+  }
+
+  // The distinction between the speaker, second person, third person, etc.
+  enum Person {
+    // Person is not applicable in the analyzed language or is not predicted.
+    PERSON_UNKNOWN = 0;
+
+    // First
+    FIRST = 1;
+
+    // Second
+    SECOND = 2;
+
+    // Third
+    THIRD = 3;
+
+    // Reflexive
+    REFLEXIVE_PERSON = 4;
+  }
+
+  // This category shows if the token is part of a proper name.
+  enum Proper {
+    // Proper is not applicable in the analyzed language or is not predicted.
+    PROPER_UNKNOWN = 0;
+
+    // Proper
+    PROPER = 1;
+
+    // Not proper
+    NOT_PROPER = 2;
+  }
+
+  // Reciprocal features of a pronoun.
+  enum Reciprocity {
+    // Reciprocity is not applicable in the analyzed language or is not
+    // predicted.
+    RECIPROCITY_UNKNOWN = 0;
+
+    // Reciprocal
+    RECIPROCAL = 1;
+
+    // Non-reciprocal
+    NON_RECIPROCAL = 2;
+  }
+
+  // Time reference.
+  enum Tense {
+    // Tense is not applicable in the analyzed language or is not predicted.
+    TENSE_UNKNOWN = 0;
+
+    // Conditional
+    CONDITIONAL_TENSE = 1;
+
+    // Future
+    FUTURE = 2;
+
+    // Past
+    PAST = 3;
+
+    // Present
+    PRESENT = 4;
+
+    // Imperfect
+    IMPERFECT = 5;
+
+    // Pluperfect
+    PLUPERFECT = 6;
+  }
+
+  // The relationship between the action that a verb expresses and the
+  // participants identified by its arguments.
+  enum Voice {
+    // Voice is not applicable in the analyzed language or is not predicted.
+    VOICE_UNKNOWN = 0;
+
+    // Active
+    ACTIVE = 1;
+
+    // Causative
+    CAUSATIVE = 2;
+
+    // Passive
+    PASSIVE = 3;
+  }
+
  // The part of speech tag.
  Tag tag = 1;
+
+  // The grammatical aspect.
+  Aspect aspect = 2;
+
+  // The grammatical case.
+  Case case = 3;
+
+  // The grammatical form.
+  Form form = 4;
+
+  // The grammatical gender.
+  Gender gender = 5;
+
+  // The grammatical mood.
+  Mood mood = 6;
+
+  // The grammatical number.
+  Number number = 7;
+
+  // The grammatical person.
+  Person person = 8;
+
+  // The grammatical properness.
+  Proper proper = 9;
+
+  // The grammatical reciprocity.
+  Reciprocity reciprocity = 10;
+
+  // The grammatical tense.
+  Tense tense = 11;
+
+  // The grammatical voice.
+  Voice voice = 12;
 }

 // Represents dependency parse tree information for a token.
@ -483,8 +776,23 @@ message DependencyEdge {
 // Represents a mention for an entity in the text. Currently, proper noun
 // mentions are supported.
 message EntityMention {
+  // The supported types of mentions.
+  enum Type {
+    // Unknown
+    TYPE_UNKNOWN = 0;
+
+    // Proper name
+    PROPER = 1;
+
+    // Common noun (or noun compound)
+    COMMON = 2;
+  }
+
  // The mention text.
  TextSpan text = 1;
+
+  // The type of the entity mention.
+  Type type = 2;
 }

 // Represents an output piece of text.
@ -502,6 +810,10 @@ message AnalyzeSentimentRequest {
  // Input document. Currently, `analyzeSentiment` only supports English text
  // ([Document.language][google.cloud.language.v1beta1.Document.language]="EN").
  Document document = 1;
+
+  // The encoding type used by the API to calculate sentence offsets for the
+  // sentence sentiment.
+  EncodingType encoding_type = 2;
 }

 // The sentiment analysis response message.
@ -512,6 +824,9 @@ message AnalyzeSentimentResponse {
  // The language of the text, which will be the same as the language specified
  // in the request or, if not specified, the automatically-detected language.
  string language = 2;
+
+  // The sentiment for all the sentences in the document.
+  repeated Sentence sentences = 3;
 }

 // The entity analysis request message.
@ -530,11 +845,35 @@ message AnalyzeEntitiesResponse {

  // The language of the text, which will be the same as the language specified
  // in the request or, if not specified, the automatically-detected language.
+  // See [Document.language][google.cloud.language.v1beta1.Document.language] field for more details.
  string language = 2;
 }

-// The request message for the advanced text annotation API, which performs all
-// the above plus syntactic analysis.
+// The syntax analysis request message.
+message AnalyzeSyntaxRequest {
+  // Input document.
+  Document document = 1;
+
+  // The encoding type used by the API to calculate offsets.
+  EncodingType encoding_type = 2;
+}
+
+// The syntax analysis response message.
+message AnalyzeSyntaxResponse {
+  // Sentences in the input document.
+  repeated Sentence sentences = 1;
+
+  // Tokens, along with their syntactic information, in the input document.
+  repeated Token tokens = 2;
+
+  // The language of the text, which will be the same as the language specified
+  // in the request or, if not specified, the automatically-detected language.
+  // See [Document.language][google.cloud.language.v1beta1.Document.language] field for more details.
+  string language = 3;
+}
+
+// The request message for the text annotation API, which can perform multiple
+// analysis types (sentiment, entities, and syntax) in one call.
 message AnnotateTextRequest {
  // All available features for sentiment, syntax, and semantic analysis.
  // Setting each one to true will enable that specific analysis for the input.
@ -581,6 +920,7 @@ message AnnotateTextResponse {

  // The language of the text, which will be the same as the language specified
  // in the request or, if not specified, the automatically-detected language.
+  // See [Document.language][google.cloud.language.v1beta1.Document.language] field for more details.
  string language = 5;
 }