parent
3748d006c4
commit
1e645debc6
1 changed files with 610 additions and 0 deletions
@ -0,0 +1,610 @@ |
|||||||
|
// Copyright 2016 Google Inc. |
||||||
|
// |
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License"); |
||||||
|
// you may not use this file except in compliance with the License. |
||||||
|
// You may obtain a copy of the License at |
||||||
|
// |
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0 |
||||||
|
// |
||||||
|
// Unless required by applicable law or agreed to in writing, software |
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS, |
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||||
|
// See the License for the specific language governing permissions and |
||||||
|
// limitations under the License. |
||||||
|
|
||||||
|
syntax = "proto3"; |
||||||
|
|
||||||
|
package google.cloud.language.v1beta1; |
||||||
|
|
||||||
|
import "google/api/annotations.proto"; |
||||||
|
|
||||||
|
option java_multiple_files = true; |
||||||
|
option java_outer_classname = "LanguageServiceProto"; |
||||||
|
option java_package = "com.google.cloud.language.v1beta1"; |
||||||
|
|
||||||
|
|
||||||
|
// Provides text analysis operations such as sentiment analysis and entity |
||||||
|
// recognition. |
||||||
|
service LanguageService { |
||||||
|
// Analyzes the sentiment of the provided text. |
||||||
|
rpc AnalyzeSentiment(AnalyzeSentimentRequest) returns (AnalyzeSentimentResponse) { |
||||||
|
option (google.api.http) = { post: "/v1beta1/documents:analyzeSentiment" body: "*" }; |
||||||
|
} |
||||||
|
|
||||||
|
// Finds named entities (currently finds proper names) in the text, |
||||||
|
// entity types, salience, mentions for each entity, and other properties. |
||||||
|
rpc AnalyzeEntities(AnalyzeEntitiesRequest) returns (AnalyzeEntitiesResponse) { |
||||||
|
option (google.api.http) = { post: "/v1beta1/documents:analyzeEntities" body: "*" }; |
||||||
|
} |
||||||
|
|
||||||
|
// Advanced API that analyzes the document and provides a full set of text |
||||||
|
// annotations, including semantic, syntactic, and sentiment information. This |
||||||
|
// API is intended for users who are familiar with machine learning and need |
||||||
|
// in-depth text features to build upon. |
||||||
|
rpc AnnotateText(AnnotateTextRequest) returns (AnnotateTextResponse) { |
||||||
|
option (google.api.http) = { post: "/v1beta1/documents:annotateText" body: "*" }; |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
// ################################################################ # |
||||||
|
// |
||||||
|
// Represents the input to API methods. |
||||||
|
message Document { |
||||||
|
// The document types enum. |
||||||
|
enum Type { |
||||||
|
// The content type is not specified. |
||||||
|
TYPE_UNSPECIFIED = 0; |
||||||
|
|
||||||
|
// Plain text |
||||||
|
PLAIN_TEXT = 1; |
||||||
|
|
||||||
|
// HTML |
||||||
|
HTML = 2; |
||||||
|
} |
||||||
|
|
||||||
|
// Required. If the type is not set or is `TYPE_UNSPECIFIED`, |
||||||
|
// returns an `INVALID_ARGUMENT` error. |
||||||
|
Type type = 1; |
||||||
|
|
||||||
|
// The source of the document: a string containing the content or a |
||||||
|
// Google Cloud Storage URI. |
||||||
|
oneof source { |
||||||
|
// The content of the input in string format. |
||||||
|
string content = 2; |
||||||
|
|
||||||
|
// The Google Cloud Storage URI where the file content is located. |
||||||
|
string gcs_content_uri = 3; |
||||||
|
} |
||||||
|
|
||||||
|
// The language of the document (if not specified, the language is |
||||||
|
// automatically detected). Both ISO and BCP-47 language codes are |
||||||
|
// accepted.<br> |
||||||
|
// **Current Language Restrictions:** |
||||||
|
// |
||||||
|
// * Only English, Spanish, and Japanese textual content |
||||||
|
// are supported, with the following additional restriction: |
||||||
|
// * `analyzeSentiment` only supports English text. |
||||||
|
// If the language (either specified by the caller or automatically detected) |
||||||
|
// is not supported by the called API method, an `INVALID_ARGUMENT` error |
||||||
|
// is returned. |
||||||
|
string language = 4; |
||||||
|
} |
||||||
|
|
||||||
|
// Represents a sentence in the input document. |
||||||
|
message Sentence { |
||||||
|
// The sentence text. |
||||||
|
TextSpan text = 1; |
||||||
|
} |
||||||
|
|
||||||
|
// Represents a phrase in the text that is a known entity, such as |
||||||
|
// a person, an organization, or location. The API associates information, such |
||||||
|
// as salience and mentions, with entities. |
||||||
|
message Entity { |
||||||
|
// The type of the entity. |
||||||
|
enum Type { |
||||||
|
// Unknown |
||||||
|
UNKNOWN = 0; |
||||||
|
|
||||||
|
// Person |
||||||
|
PERSON = 1; |
||||||
|
|
||||||
|
// Location |
||||||
|
LOCATION = 2; |
||||||
|
|
||||||
|
// Organization |
||||||
|
ORGANIZATION = 3; |
||||||
|
|
||||||
|
// Event |
||||||
|
EVENT = 4; |
||||||
|
|
||||||
|
// Work of art |
||||||
|
WORK_OF_ART = 5; |
||||||
|
|
||||||
|
// Consumer goods |
||||||
|
CONSUMER_GOOD = 6; |
||||||
|
|
||||||
|
// Other types |
||||||
|
OTHER = 7; |
||||||
|
} |
||||||
|
|
||||||
|
// The representative name for the entity. |
||||||
|
string name = 1; |
||||||
|
|
||||||
|
// The entity type. |
||||||
|
Type type = 2; |
||||||
|
|
||||||
|
// Metadata associated with the entity. |
||||||
|
// |
||||||
|
// Currently, only Wikipedia URLs are provided, if available. |
||||||
|
// The associated key is "wikipedia_url". |
||||||
|
map<string, string> metadata = 3; |
||||||
|
|
||||||
|
// The salience score associated with the entity in the [0, 1.0] range. |
||||||
|
// |
||||||
|
// The salience score for an entity provides information about the |
||||||
|
// importance or centrality of that entity to the entire document text. |
||||||
|
// Scores closer to 0 are less salient, while scores closer to 1.0 are highly |
||||||
|
// salient. |
||||||
|
float salience = 4; |
||||||
|
|
||||||
|
// The mentions of this entity in the input document. The API currently |
||||||
|
// supports proper noun mentions. |
||||||
|
repeated EntityMention mentions = 5; |
||||||
|
} |
||||||
|
|
||||||
|
// Represents the smallest syntactic building block of the text. |
||||||
|
message Token { |
||||||
|
// The token text. |
||||||
|
TextSpan text = 1; |
||||||
|
|
||||||
|
// Parts of speech tag for this token. |
||||||
|
PartOfSpeech part_of_speech = 2; |
||||||
|
|
||||||
|
// Dependency tree parse for this token. |
||||||
|
DependencyEdge dependency_edge = 3; |
||||||
|
|
||||||
|
// [Lemma](https://en.wikipedia.org/wiki/Lemma_(morphology)) |
||||||
|
// of the token. |
||||||
|
string lemma = 4; |
||||||
|
} |
||||||
|
|
||||||
|
// Represents the feeling associated with the entire text or entities in |
||||||
|
// the text. |
||||||
|
message Sentiment { |
||||||
|
// Polarity of the sentiment in the [-1.0, 1.0] range. Larger numbers |
||||||
|
// represent more positive sentiments. |
||||||
|
float polarity = 1; |
||||||
|
|
||||||
|
// A non-negative number in the [0, +inf) range, which represents |
||||||
|
// the absolute magnitude of sentiment regardless of polarity (positive or |
||||||
|
// negative). |
||||||
|
float magnitude = 2; |
||||||
|
} |
||||||
|
|
||||||
|
// Represents part of speech information for a token. |
||||||
|
message PartOfSpeech { |
||||||
|
// The part of speech tags enum. |
||||||
|
enum Tag { |
||||||
|
// Unknown |
||||||
|
UNKNOWN = 0; |
||||||
|
|
||||||
|
// Adjective |
||||||
|
ADJ = 1; |
||||||
|
|
||||||
|
// Adposition (preposition and postposition) |
||||||
|
ADP = 2; |
||||||
|
|
||||||
|
// Adverb |
||||||
|
ADV = 3; |
||||||
|
|
||||||
|
// Conjunction |
||||||
|
CONJ = 4; |
||||||
|
|
||||||
|
// Determiner |
||||||
|
DET = 5; |
||||||
|
|
||||||
|
// Noun (common and proper) |
||||||
|
NOUN = 6; |
||||||
|
|
||||||
|
// Cardinal number |
||||||
|
NUM = 7; |
||||||
|
|
||||||
|
// Pronoun |
||||||
|
PRON = 8; |
||||||
|
|
||||||
|
// Particle or other function word |
||||||
|
PRT = 9; |
||||||
|
|
||||||
|
// Punctuation |
||||||
|
PUNCT = 10; |
||||||
|
|
||||||
|
// Verb (all tenses and modes) |
||||||
|
VERB = 11; |
||||||
|
|
||||||
|
// Other: foreign words, typos, abbreviations |
||||||
|
X = 12; |
||||||
|
|
||||||
|
// Affix |
||||||
|
AFFIX = 13; |
||||||
|
} |
||||||
|
|
||||||
|
// The part of speech tag. |
||||||
|
Tag tag = 1; |
||||||
|
} |
||||||
|
|
||||||
|
// Represents dependency parse tree information for a token. |
||||||
|
message DependencyEdge { |
||||||
|
// The parse label enum for the token. |
||||||
|
enum Label { |
||||||
|
// Unknown |
||||||
|
UNKNOWN = 0; |
||||||
|
|
||||||
|
// Abbreviation modifier |
||||||
|
ABBREV = 1; |
||||||
|
|
||||||
|
// Adjectival complement |
||||||
|
ACOMP = 2; |
||||||
|
|
||||||
|
// Adverbial clause modifier |
||||||
|
ADVCL = 3; |
||||||
|
|
||||||
|
// Adverbial modifier |
||||||
|
ADVMOD = 4; |
||||||
|
|
||||||
|
// Adjectival modifier of an NP |
||||||
|
AMOD = 5; |
||||||
|
|
||||||
|
// Appositional modifier of an NP |
||||||
|
APPOS = 6; |
||||||
|
|
||||||
|
// Attribute dependent of a copular verb |
||||||
|
ATTR = 7; |
||||||
|
|
||||||
|
// Auxiliary (non-main) verb |
||||||
|
AUX = 8; |
||||||
|
|
||||||
|
// Passive auxiliary |
||||||
|
AUXPASS = 9; |
||||||
|
|
||||||
|
// Coordinating conjunction |
||||||
|
CC = 10; |
||||||
|
|
||||||
|
// Clausal complement of a verb or adjective |
||||||
|
CCOMP = 11; |
||||||
|
|
||||||
|
// Conjunct |
||||||
|
CONJ = 12; |
||||||
|
|
||||||
|
// Clausal subject |
||||||
|
CSUBJ = 13; |
||||||
|
|
||||||
|
// Clausal passive subject |
||||||
|
CSUBJPASS = 14; |
||||||
|
|
||||||
|
// Dependency (unable to determine) |
||||||
|
DEP = 15; |
||||||
|
|
||||||
|
// Determiner |
||||||
|
DET = 16; |
||||||
|
|
||||||
|
// Discourse |
||||||
|
DISCOURSE = 17; |
||||||
|
|
||||||
|
// Direct object |
||||||
|
DOBJ = 18; |
||||||
|
|
||||||
|
// Expletive |
||||||
|
EXPL = 19; |
||||||
|
|
||||||
|
// Goes with (part of a word in a text not well edited) |
||||||
|
GOESWITH = 20; |
||||||
|
|
||||||
|
// Indirect object |
||||||
|
IOBJ = 21; |
||||||
|
|
||||||
|
// Marker (word introducing a subordinate clause) |
||||||
|
MARK = 22; |
||||||
|
|
||||||
|
// Multi-word expression |
||||||
|
MWE = 23; |
||||||
|
|
||||||
|
// Multi-word verbal expression |
||||||
|
MWV = 24; |
||||||
|
|
||||||
|
// Negation modifier |
||||||
|
NEG = 25; |
||||||
|
|
||||||
|
// Noun compound modifier |
||||||
|
NN = 26; |
||||||
|
|
||||||
|
// Noun phrase used as an adverbial modifier |
||||||
|
NPADVMOD = 27; |
||||||
|
|
||||||
|
// Nominal subject |
||||||
|
NSUBJ = 28; |
||||||
|
|
||||||
|
// Passive nominal subject |
||||||
|
NSUBJPASS = 29; |
||||||
|
|
||||||
|
// Numeric modifier of a noun |
||||||
|
NUM = 30; |
||||||
|
|
||||||
|
// Element of compound number |
||||||
|
NUMBER = 31; |
||||||
|
|
||||||
|
// Punctuation mark |
||||||
|
P = 32; |
||||||
|
|
||||||
|
// Parataxis relation |
||||||
|
PARATAXIS = 33; |
||||||
|
|
||||||
|
// Participial modifier |
||||||
|
PARTMOD = 34; |
||||||
|
|
||||||
|
// The complement of a preposition is a clause |
||||||
|
PCOMP = 35; |
||||||
|
|
||||||
|
// Object of a preposition |
||||||
|
POBJ = 36; |
||||||
|
|
||||||
|
// Possession modifier |
||||||
|
POSS = 37; |
||||||
|
|
||||||
|
// Postverbal negative particle |
||||||
|
POSTNEG = 38; |
||||||
|
|
||||||
|
// Predicate complement |
||||||
|
PRECOMP = 39; |
||||||
|
|
||||||
|
// Preconjunt |
||||||
|
PRECONJ = 40; |
||||||
|
|
||||||
|
// Predeterminer |
||||||
|
PREDET = 41; |
||||||
|
|
||||||
|
// Prefix |
||||||
|
PREF = 42; |
||||||
|
|
||||||
|
// Prepositional modifier |
||||||
|
PREP = 43; |
||||||
|
|
||||||
|
// The relationship between a verb and verbal morpheme |
||||||
|
PRONL = 44; |
||||||
|
|
||||||
|
// Particle |
||||||
|
PRT = 45; |
||||||
|
|
||||||
|
// Associative or possessive marker |
||||||
|
PS = 46; |
||||||
|
|
||||||
|
// Quantifier phrase modifier |
||||||
|
QUANTMOD = 47; |
||||||
|
|
||||||
|
// Relative clause modifier |
||||||
|
RCMOD = 48; |
||||||
|
|
||||||
|
// Complementizer in relative clause |
||||||
|
RCMODREL = 49; |
||||||
|
|
||||||
|
// Ellipsis without a preceding predicate |
||||||
|
RDROP = 50; |
||||||
|
|
||||||
|
// Referent |
||||||
|
REF = 51; |
||||||
|
|
||||||
|
// Remnant |
||||||
|
REMNANT = 52; |
||||||
|
|
||||||
|
// Reparandum |
||||||
|
REPARANDUM = 53; |
||||||
|
|
||||||
|
// Root |
||||||
|
ROOT = 54; |
||||||
|
|
||||||
|
// Suffix specifying a unit of number |
||||||
|
SNUM = 55; |
||||||
|
|
||||||
|
// Suffix |
||||||
|
SUFF = 56; |
||||||
|
|
||||||
|
// Temporal modifier |
||||||
|
TMOD = 57; |
||||||
|
|
||||||
|
// Topic marker |
||||||
|
TOPIC = 58; |
||||||
|
|
||||||
|
// Clause headed by an infinite form of the verb that modifies a noun |
||||||
|
VMOD = 59; |
||||||
|
|
||||||
|
// Vocative |
||||||
|
VOCATIVE = 60; |
||||||
|
|
||||||
|
// Open clausal complement |
||||||
|
XCOMP = 61; |
||||||
|
|
||||||
|
// Name suffix |
||||||
|
SUFFIX = 62; |
||||||
|
|
||||||
|
// Name title |
||||||
|
TITLE = 63; |
||||||
|
|
||||||
|
// Adverbial phrase modifier |
||||||
|
ADVPHMOD = 64; |
||||||
|
|
||||||
|
// Causative auxiliary |
||||||
|
AUXCAUS = 65; |
||||||
|
|
||||||
|
// Helper auxiliary |
||||||
|
AUXVV = 66; |
||||||
|
|
||||||
|
// Rentaishi (Prenominal modifier) |
||||||
|
DTMOD = 67; |
||||||
|
|
||||||
|
// Foreign words |
||||||
|
FOREIGN = 68; |
||||||
|
|
||||||
|
// Keyword |
||||||
|
KW = 69; |
||||||
|
|
||||||
|
// List for chains of comparable items |
||||||
|
LIST = 70; |
||||||
|
|
||||||
|
// Nominalized clause |
||||||
|
NOMC = 71; |
||||||
|
|
||||||
|
// Nominalized clausal subject |
||||||
|
NOMCSUBJ = 72; |
||||||
|
|
||||||
|
// Nominalized clausal passive |
||||||
|
NOMCSUBJPASS = 73; |
||||||
|
|
||||||
|
// Compound of numeric modifier |
||||||
|
NUMC = 74; |
||||||
|
|
||||||
|
// Copula |
||||||
|
COP = 75; |
||||||
|
|
||||||
|
// Dislocated relation (for fronted/topicalized elements) |
||||||
|
DISLOCATED = 76; |
||||||
|
} |
||||||
|
|
||||||
|
// Represents the head of this token in the dependency tree. |
||||||
|
// This is the index of the token which has an arc going to this token. |
||||||
|
// The index is the position of the token in the array of tokens returned |
||||||
|
// by the API method. If this token is a root token, then the |
||||||
|
// `head_token_index` is its own index. |
||||||
|
int32 head_token_index = 1; |
||||||
|
|
||||||
|
// The parse label for the token. |
||||||
|
Label label = 2; |
||||||
|
} |
||||||
|
|
||||||
|
// Represents a mention for an entity in the text. Currently, proper noun |
||||||
|
// mentions are supported. |
||||||
|
message EntityMention { |
||||||
|
// The mention text. |
||||||
|
TextSpan text = 1; |
||||||
|
} |
||||||
|
|
||||||
|
// Represents an output piece of text. |
||||||
|
message TextSpan { |
||||||
|
// The content of the output text. |
||||||
|
string content = 1; |
||||||
|
|
||||||
|
// The API calculates the beginning offset of the content in the original |
||||||
|
// document according to the [EncodingType][google.cloud.language.v1beta1.EncodingType] specified in the API request. |
||||||
|
int32 begin_offset = 2; |
||||||
|
} |
||||||
|
|
||||||
|
// The sentiment analysis request message. |
||||||
|
message AnalyzeSentimentRequest { |
||||||
|
// Input document. Currently, `analyzeSentiment` only supports English text |
||||||
|
// ([Document.language][google.cloud.language.v1beta1.Document.language]="EN"). |
||||||
|
Document document = 1; |
||||||
|
} |
||||||
|
|
||||||
|
// The sentiment analysis response message. |
||||||
|
message AnalyzeSentimentResponse { |
||||||
|
// The overall sentiment of the input document. |
||||||
|
Sentiment document_sentiment = 1; |
||||||
|
|
||||||
|
// The language of the text, which will be the same as the language specified |
||||||
|
// in the request or, if not specified, the automatically-detected language. |
||||||
|
string language = 2; |
||||||
|
} |
||||||
|
|
||||||
|
// The entity analysis request message. |
||||||
|
message AnalyzeEntitiesRequest { |
||||||
|
// Input document. |
||||||
|
Document document = 1; |
||||||
|
|
||||||
|
// The encoding type used by the API to calculate offsets. |
||||||
|
EncodingType encoding_type = 2; |
||||||
|
} |
||||||
|
|
||||||
|
// The entity analysis response message. |
||||||
|
message AnalyzeEntitiesResponse { |
||||||
|
// The recognized entities in the input document. |
||||||
|
repeated Entity entities = 1; |
||||||
|
|
||||||
|
// The language of the text, which will be the same as the language specified |
||||||
|
// in the request or, if not specified, the automatically-detected language. |
||||||
|
string language = 2; |
||||||
|
} |
||||||
|
|
||||||
|
// The request message for the advanced text annotation API, which performs all |
||||||
|
// the above plus syntactic analysis. |
||||||
|
message AnnotateTextRequest { |
||||||
|
// All available features for sentiment, syntax, and semantic analysis. |
||||||
|
// Setting each one to true will enable that specific analysis for the input. |
||||||
|
message Features { |
||||||
|
// Extract syntax information. |
||||||
|
bool extract_syntax = 1; |
||||||
|
|
||||||
|
// Extract entities. |
||||||
|
bool extract_entities = 2; |
||||||
|
|
||||||
|
// Extract document-level sentiment. |
||||||
|
bool extract_document_sentiment = 3; |
||||||
|
} |
||||||
|
|
||||||
|
// Input document. |
||||||
|
Document document = 1; |
||||||
|
|
||||||
|
// The enabled features. |
||||||
|
Features features = 2; |
||||||
|
|
||||||
|
// The encoding type used by the API to calculate offsets. |
||||||
|
EncodingType encoding_type = 3; |
||||||
|
} |
||||||
|
|
||||||
|
// The text annotations response message. |
||||||
|
message AnnotateTextResponse { |
||||||
|
// Sentences in the input document. Populated if the user enables |
||||||
|
// [AnnotateTextRequest.Features.extract_syntax][google.cloud.language.v1beta1.AnnotateTextRequest.Features.extract_syntax]. |
||||||
|
repeated Sentence sentences = 1; |
||||||
|
|
||||||
|
// Tokens, along with their syntactic information, in the input document. |
||||||
|
// Populated if the user enables |
||||||
|
// [AnnotateTextRequest.Features.extract_syntax][google.cloud.language.v1beta1.AnnotateTextRequest.Features.extract_syntax]. |
||||||
|
repeated Token tokens = 2; |
||||||
|
|
||||||
|
// Entities, along with their semantic information, in the input document. |
||||||
|
// Populated if the user enables |
||||||
|
// [AnnotateTextRequest.Features.extract_entities][google.cloud.language.v1beta1.AnnotateTextRequest.Features.extract_entities]. |
||||||
|
repeated Entity entities = 3; |
||||||
|
|
||||||
|
// The overall sentiment for the document. Populated if the user enables |
||||||
|
// [AnnotateTextRequest.Features.extract_document_sentiment][google.cloud.language.v1beta1.AnnotateTextRequest.Features.extract_document_sentiment]. |
||||||
|
Sentiment document_sentiment = 4; |
||||||
|
|
||||||
|
// The language of the text, which will be the same as the language specified |
||||||
|
// in the request or, if not specified, the automatically-detected language. |
||||||
|
string language = 5; |
||||||
|
} |
||||||
|
|
||||||
|
// Represents the text encoding that the caller uses to process the output. |
||||||
|
// Providing an `EncodingType` is recommended because the API provides the |
||||||
|
// beginning offsets for various outputs, such as tokens and mentions, and |
||||||
|
// languages that natively use different text encodings may access offsets |
||||||
|
// differently. |
||||||
|
enum EncodingType { |
||||||
|
// If `EncodingType` is not specified, encoding-dependent information (such as |
||||||
|
// `begin_offset`) will be set at `-1`. |
||||||
|
NONE = 0; |
||||||
|
|
||||||
|
// Encoding-dependent information (such as `begin_offset`) is calculated based |
||||||
|
// on the UTF-8 encoding of the input. C++ and Go are examples of languages |
||||||
|
// that use this encoding natively. |
||||||
|
UTF8 = 1; |
||||||
|
|
||||||
|
// Encoding-dependent information (such as `begin_offset`) is calculated based |
||||||
|
// on the UTF-16 encoding of the input. Java and Javascript are examples of |
||||||
|
// languages that use this encoding natively. |
||||||
|
UTF16 = 2; |
||||||
|
|
||||||
|
// Encoding-dependent information (such as `begin_offset`) is calculated based |
||||||
|
// on the UTF-32 encoding of the input. Python is an example of a language |
||||||
|
// that uses this encoding natively. |
||||||
|
UTF32 = 3; |
||||||
|
} |
Loading…
Reference in new issue