@ -1,4 +1,4 @@
/ / Copyright 2019 Google LLC.
/ / Copyright 202 1 Google LLC
/ /
/ / Licensed under the Apache License , Version 2.0 ( the "License" ) ;
/ / you may not use this file except in compliance with the License.
@ -11,7 +11,6 @@
/ / WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND , either express or implied.
/ / See the License for the specific language governing permissions and
/ / limitations under the License.
/ /
syntax = "proto3" ;
@ -22,7 +21,9 @@ import "google/api/client.proto";
import "google/api/field_behavior.proto" ;
import "google/api/resource.proto" ;
import "google/longrunning/operations.proto" ;
import "google/protobuf/empty.proto" ;
import "google/protobuf/timestamp.proto" ;
import "google/rpc/status.proto" ;
option cc_enable_arenas = true ;
option csharp_namespace = "Google.Cloud.Translate.V3" ;
@ -81,6 +82,15 @@ service TranslationService {
option ( google.api.method_signature ) = "parent,model,display_language_code" ;
}
/ / Translates documents in synchronous mode.
rpc TranslateDocument ( TranslateDocumentRequest )
returns ( TranslateDocumentResponse ) {
option ( google.api.http ) = {
post : "/v3/{parent=projects/*/locations/*}:translateDocument"
body : "*"
} ;
}
/ / Translates a large volume of text in asynchronous batch mode.
/ / This function provides real - time output as the inputs are being processed.
/ / If caller cancels a request , the partial results ( for an input file , it ' s
@ -100,6 +110,25 @@ service TranslationService {
} ;
}
/ / Translates a large volume of document in asynchronous batch mode.
/ / This function provides real - time output as the inputs are being processed.
/ / If caller cancels a request , the partial results ( for an input file , it ' s
/ / all or nothing ) may still be available on the specified output location.
/ /
/ / This call returns immediately and you can use
/ / google.longrunning.Operation.name to poll the status of the call.
rpc BatchTranslateDocument ( BatchTranslateDocumentRequest )
returns ( google.longrunning.Operation ) {
option ( google.api.http ) = {
post : "/v3/{parent=projects/*/locations/*}:batchTranslateDocument"
body : "*"
} ;
option ( google.longrunning.operation_info ) = {
response_type : "BatchTranslateDocumentResponse"
metadata_type : "BatchTranslateDocumentMetadata"
} ;
}
/ / Creates a glossary and returns the long - running operation. Returns
/ / NOT_FOUND , if the project doesn ' t exist.
rpc CreateGlossary ( CreateGlossaryRequest )
@ -152,8 +181,12 @@ service TranslationService {
/ / Configures which glossary should be used for a specific target language ,
/ / and defines options for applying that glossary.
message TranslateTextGlossaryConfig {
/ / Required. Specifies the glossary used for this translation. Use
/ / this format : projects /*/locations/*/ glossaries / *
/ / Required. The ` glossary ` to be applied for this translation.
/ /
/ / The format depends on glossary :
/ /
/ / - User provided custom glossary :
/ / ` projects / { project - number - or - id } / locations / { location - id } / glossaries / { glossary - id } `
string glossary = 1 [ ( google.api.field_behavior ) = REQUIRED ] ;
/ / Optional. Indicates match is case - insensitive.
@ -164,7 +197,8 @@ message TranslateTextGlossaryConfig {
/ / The request message for synchronous translation.
message TranslateTextRequest {
/ / Required. The content of the input in string format.
/ / We recommend the total content be less than 30 k codepoints.
/ / We recommend the total content be less than 30 k codepoints. The max length
/ / of this field is 1024.
/ / Use BatchTranslateText for larger text.
repeated string contents = 1 [ ( google.api.field_behavior ) = REQUIRED ] ;
@ -213,14 +247,13 @@ message TranslateTextRequest {
/ /
/ / - General ( built - in ) models :
/ / ` projects / { project - number - or - id } / locations / { location - id } / models / general / nmt ` ,
/ / ` projects / { project - number - or - id } / locations / { location - id } / models / general / base `
/ /
/ /
/ / For global ( non - regionalized ) requests , use ` location - id ` ` global ` .
/ / For example ,
/ / ` projects / { project - number - or - id } / locations / global / models / general / nmt ` .
/ /
/ / If missing , the system decides which google base model to use .
/ / If not provided , the default Google model ( NMT ) will be used .
string model = 6 [ ( google.api.field_behavior ) = OPTIONAL ] ;
/ / Optional. Glossary to be applied. The glossary must be
@ -236,7 +269,8 @@ message TranslateTextRequest {
/ / characters , underscores and dashes. International characters are allowed.
/ / Label values are optional . Label keys must start with a letter.
/ /
/ / See https : / / cloud.google.com / translate / docs / labels for more information.
/ / See https : / / cloud.google.com / translate / docs / advanced / labels for more
/ / information.
map < string , string > labels = 10 [ ( google.api.field_behavior ) = OPTIONAL ] ;
}
@ -257,6 +291,8 @@ message TranslateTextResponse {
/ / A single translation response.
message Translation {
/ / Text translated into the target language.
/ / If an error occurs during translation , this field might be excluded from
/ / the response.
string translated_text = 1 ;
/ / Only present when ` model ` is present in the request.
@ -327,7 +363,8 @@ message DetectLanguageRequest {
/ / characters , underscores and dashes. International characters are allowed.
/ / Label values are optional . Label keys must start with a letter.
/ /
/ / See https : / / cloud.google.com / translate / docs / labels for more information.
/ / See https : / / cloud.google.com / translate / docs / advanced / labels for more
/ / information.
map < string , string > labels = 6 [ ( google.api.field_behavior ) = OPTIONAL ] ;
}
@ -343,8 +380,8 @@ message DetectedLanguage {
/ / The response message for language detection.
message DetectLanguageResponse {
/ / A list of detected languages sorted by detection confidence in descending
/ / order. The most probable language firs t.
/ / The most probable language detected by the Translation API. For each
/ / request , the Translation API will always return only one resul t.
repeated DetectedLanguage languages = 1 ;
}
@ -384,11 +421,10 @@ message GetSupportedLanguagesRequest {
/ /
/ / - General ( built - in ) models :
/ / ` projects / { project - number - or - id } / locations / { location - id } / models / general / nmt ` ,
/ / ` projects / { project - number - or - id } / locations / { location - id } / models / general / base `
/ /
/ /
/ / Returns languages supported by the specified model.
/ / If missing , we get supported languages of Google general base ( PBMT ) model.
/ / If missing , we get supported languages of Google general NMT model.
string model = 2 [ ( google.api.field_behavior ) = OPTIONAL ] ;
}
@ -422,7 +458,7 @@ message SupportedLanguage {
/ / The Google Cloud Storage location for the input content.
message GcsSource {
/ / Required. Source data URI. For example , ` gs : / / my_bucket / my_object ` .
string input_uri = 1 ;
string input_uri = 1 [ ( google.api.field_behavior ) = REQUIRED ] ;
}
/ / Input configuration for BatchTranslateText request.
@ -459,10 +495,12 @@ message InputConfig {
/ / The Google Cloud Storage location for the output content.
message GcsDestination {
/ / Required. There must be no files under 'output_uri_prefix' .
/ / 'output_uri_prefix' must end with "/" and start with "gs://" , otherwise an
/ / INVALID_ARGUMENT ( 400 ) error is returned.
string output_uri_prefix = 1 ;
/ / Required. The bucket used in 'output_uri_prefix' must exist and there must
/ / be no files under 'output_uri_prefix' . 'output_uri_prefix' must end with
/ / "/" and start with "gs://" . One 'output_uri_prefix' can only be used by one
/ / batch translation job at a time. Otherwise an INVALID_ARGUMENT ( 400 ) error
/ / is returned.
string output_uri_prefix = 1 [ ( google.api.field_behavior ) = REQUIRED ] ;
}
/ / Output configuration for BatchTranslateText request.
@ -501,8 +539,13 @@ message OutputConfig {
/ / processed and ready to be consumed ( that is , no partial output file is
/ / written ) .
/ /
/ / Since index.csv will be keeping updated during the process , please make
/ / sure there is no custom retention policy applied on the output bucket
/ / that may avoid file updating.
/ / ( https : / / cloud.google.com / storage / docs / bucket - lock ? hl = en # retention - policy )
/ /
/ / The format of translations_file ( for target language code 'trg' ) is :
/ / ` gs : / / translation_test / a_b_c_ 'trg' _translations. [ extension ] `
/ / gs : / / translation_test / a_b_c_ 'trg' _translations. [ extension ]
/ /
/ / If the input file extension is tsv , the output has the following
/ / columns :
@ -519,10 +562,10 @@ message OutputConfig {
/ / If input file extension is a txt or html , the translation is directly
/ / written to the output file. If glossary is requested , a separate
/ / glossary_translations_file has format of
/ / ` gs : / / translation_test / a_b_c_ 'trg' _glossary_translations. [ extension ] `
/ / gs : / / translation_test / a_b_c_ 'trg' _glossary_translations. [ extension ]
/ /
/ / The format of errors file ( for target language code 'trg' ) is :
/ / ` gs : / / translation_test / a_b_c_ 'trg' _errors. [ extension ] `
/ / gs : / / translation_test / a_b_c_ 'trg' _errors. [ extension ]
/ /
/ / If the input file extension is tsv , errors_file contains the following :
/ / Column 1 : ID of the request provided in the input , if it ' s not
@ -534,11 +577,224 @@ message OutputConfig {
/ /
/ / If the input file extension is txt or html , glossary_error_file will be
/ / generated that contains error details. glossary_error_file has format of
/ / ` gs : / / translation_test / a_b_c_ 'trg' _glossary_errors. [ extension ] `
/ / gs : / / translation_test / a_b_c_ 'trg' _glossary_errors. [ extension ]
GcsDestination gcs_destination = 1 ;
}
}
/ / A document translation request input config.
message DocumentInputConfig {
/ / Specifies the source for the document ' s content.
/ / The input file size should be < = 20 MB for
/ / - application / vnd.openxmlformats - officedocument.wordprocessingml.document
/ / - application / vnd.openxmlformats - officedocument.presentationml.presentation
/ / - application / vnd.openxmlformats - officedocument.spreadsheetml.sheet
/ / The input file size should be < = 20 MB and the maximum page limit is 20 for
/ / - application / pdf
oneof source {
/ / Document ' s content represented as a stream of bytes .
bytes content = 1 ;
/ / Google Cloud Storage location. This must be a single file.
/ / For example : gs : / / example_bucket / example_file.pdf
GcsSource gcs_source = 2 ;
}
/ / Specifies the input document ' s mime_type.
/ /
/ / If not specified it will be determined using the file extension for
/ / gcs_source provided files. For a file provided through bytes content the
/ / mime_type must be provided.
/ / Currently supported mime types are :
/ / - application / pdf
/ / - application / vnd.openxmlformats - officedocument.wordprocessingml.document
/ / - application / vnd.openxmlformats - officedocument.presentationml.presentation
/ / - application / vnd.openxmlformats - officedocument.spreadsheetml.sheet
string mime_type = 4 ;
}
/ / A document translation request output config.
message DocumentOutputConfig {
/ / A URI destination for the translated document.
/ / It is optional to provide a destination. If provided the results from
/ / TranslateDocument will be stored in the destination.
/ / Whether a destination is provided or not , the translated documents will be
/ / returned within TranslateDocumentResponse.document_translation and
/ / TranslateDocumentResponse.glossary_document_translation.
oneof destination {
/ / Optional. Google Cloud Storage destination for the translation output ,
/ / e.g. , ` gs : / / my_bucket / my_directory / ` .
/ /
/ / The destination directory provided does not have to be empty , but the
/ / bucket must exist. If a file with the same name as the output file
/ / already exists in the destination an error will be returned.
/ /
/ / For a DocumentInputConfig.contents provided document , the output file
/ / will have the name "output_[trg]_translations.[ext]" , where
/ / - [ trg ] corresponds to the translated file ' s language code ,
/ / - [ ext ] corresponds to the translated file ' s extension according to its
/ / mime type.
/ /
/ /
/ / For a DocumentInputConfig.gcs_uri provided document , the output file will
/ / have a name according to its URI. For example : an input file with URI :
/ / "gs://a/b/c.[extension]" stored in a gcs_destination bucket with name
/ / "my_bucket" will have an output URI :
/ / "gs://my_bucket/a_b_c_[trg]_translations.[ext]" , where
/ / - [ trg ] corresponds to the translated file ' s language code ,
/ / - [ ext ] corresponds to the translated file ' s extension according to its
/ / mime type.
/ /
/ /
/ / If the document was directly provided through the request , then the
/ / output document will have the format :
/ / " gs : / / my_bucket / translated_document_ [ trg ] _translations. [ ext ] , where
/ / - [ trg ] corresponds to the translated file ' s language code ,
/ / - [ ext ] corresponds to the translated file ' s extension according to its
/ / mime type.
/ /
/ / If a glossary was provided , then the output URI for the glossary
/ / translation will be equal to the default output URI but have
/ / ` glossary_translations ` instead of ` translations ` . For the previous
/ / example , its glossary URI would be :
/ / "gs://my_bucket/a_b_c_[trg]_glossary_translations.[ext]" .
/ /
/ / Thus the max number of output files will be 2 ( Translated document ,
/ / Glossary translated document ) .
/ /
/ / Callers should expect no partial outputs. If there is any error during
/ / document translation , no output will be stored in the Cloud Storage
/ / bucket.
GcsDestination gcs_destination = 1 [ ( google.api.field_behavior ) = OPTIONAL ] ;
}
/ / Optional. Specifies the translated document ' s mime_type.
/ / If not specified , the translated file ' s mime type will be the same as the
/ / input file ' s mime type.
/ / Currently only support the output mime type to be the same as input mime
/ / type.
/ / - application / pdf
/ / - application / vnd.openxmlformats - officedocument.wordprocessingml.document
/ / - application / vnd.openxmlformats - officedocument.presentationml.presentation
/ / - application / vnd.openxmlformats - officedocument.spreadsheetml.sheet
string mime_type = 3 [ ( google.api.field_behavior ) = OPTIONAL ] ;
}
/ / A document translation request.
message TranslateDocumentRequest {
/ / Required. Location to make a regional call.
/ /
/ / Format : ` projects / { project - number - or - id } / locations / { location - id } ` .
/ /
/ / For global calls , use ` projects / { project - number - or - id } / locations / global ` or
/ / ` projects / { project - number - or - id } ` .
/ /
/ / Non - global location is required for requests using AutoML models or custom
/ / glossaries.
/ /
/ / Models and glossaries must be within the same region ( have the same
/ / location - id ) , otherwise an INVALID_ARGUMENT ( 400 ) error is returned.
string parent = 1 [ ( google.api.field_behavior ) = REQUIRED ] ;
/ / Optional. The BCP - 47 language code of the input document if known , for
/ / example , "en-US" or "sr-Latn" . Supported language codes are listed in
/ / Language Support. If the source language isn ' t specified , the API attempts
/ / to identify the source language automatically and returns the source
/ / language within the response. Source language must be specified if the
/ / request contains a glossary or a custom model.
string source_language_code = 2 [ ( google.api.field_behavior ) = OPTIONAL ] ;
/ / Required. The BCP - 47 language code to use for translation of the input
/ / document , set to one of the language codes listed in Language Support.
string target_language_code = 3 [ ( google.api.field_behavior ) = REQUIRED ] ;
/ / Required. Input configurations.
DocumentInputConfig document_input_config = 4
[ ( google.api.field_behavior ) = REQUIRED ] ;
/ / Optional. Output configurations.
/ / Defines if the output file should be stored within Cloud Storage as well
/ / as the desired output format. If not provided the translated file will
/ / only be returned through a byte - stream and its output mime type will be
/ / the same as the input file ' s mime type.
DocumentOutputConfig document_output_config = 5
[ ( google.api.field_behavior ) = OPTIONAL ] ;
/ / Optional. The ` model ` type requested for this translation.
/ /
/ / The format depends on model type :
/ /
/ / - AutoML Translation models :
/ / ` projects / { project - number - or - id } / locations / { location - id } / models / { model - id } `
/ /
/ / - General ( built - in ) models :
/ / ` projects / { project - number - or - id } / locations / { location - id } / models / general / nmt ` ,
/ /
/ /
/ / If not provided , the default Google model ( NMT ) will be used for
/ / translation.
string model = 6 [ ( google.api.field_behavior ) = OPTIONAL ] ;
/ / Optional. Glossary to be applied. The glossary must be within the same
/ / region ( have the same location - id ) as the model , otherwise an
/ / INVALID_ARGUMENT ( 400 ) error is returned.
TranslateTextGlossaryConfig glossary_config = 7
[ ( google.api.field_behavior ) = OPTIONAL ] ;
/ / Optional. The labels with user - defined metadata for the request.
/ /
/ / Label keys and values can be no longer than 63 characters ( Unicode
/ / codepoints ) , can only contain lowercase letters , numeric characters ,
/ / underscores and dashes. International characters are allowed. Label values
/ / are optional . Label keys must start with a letter.
/ /
/ / See https : / / cloud.google.com / translate / docs / advanced / labels for more
/ / information.
map < string , string > labels = 8 [ ( google.api.field_behavior ) = OPTIONAL ] ;
}
/ / A translated document message.
message DocumentTranslation {
/ / The array of translated documents. It is expected to be size 1 for now. We
/ / may produce multiple translated documents in the future for other type of
/ / file formats.
repeated bytes byte_stream_outputs = 1 ;
/ / The translated document ' s mime type.
string mime_type = 2 ;
/ / The detected language for the input document.
/ / If the user did not provide the source language for the input document ,
/ / this field will have the language code automatically detected. If the
/ / source language was passed , auto - detection of the language does not occur
/ / and this field is empty.
string detected_language_code = 3 ;
}
/ / A translated document response message.
message TranslateDocumentResponse {
/ / Translated document.
DocumentTranslation document_translation = 1 ;
/ / The document ' s translation output if a glossary is provided in the request.
/ / This can be the same as [ TranslateDocumentResponse.document_translation ]
/ / if no glossary terms apply.
DocumentTranslation glossary_document_translation = 2 ;
/ / Only present when 'model' is present in the request.
/ / 'model' is normalized to have a project number.
/ /
/ / For example :
/ / If the 'model' field in TranslateDocumentRequest is :
/ / ` projects / { project - id } / locations / { location - id } / models / general / nmt ` then
/ / ` model ` here would be normalized to
/ / ` projects / { project - number } / locations / { location - id } / models / general / nmt ` .
string model = 3 ;
/ / The ` glossary_config ` used for this translation.
TranslateTextGlossaryConfig glossary_config = 4 ;
}
/ / The batch translation request.
message BatchTranslateTextRequest {
/ / Required. Location to make a call. Must refer to a caller ' s project.
@ -575,7 +831,6 @@ message BatchTranslateTextRequest {
/ /
/ / - General ( built - in ) models :
/ / ` projects / { project - number - or - id } / locations / { location - id } / models / general / nmt ` ,
/ / ` projects / { project - number - or - id } / locations / { location - id } / models / general / base `
/ /
/ /
/ / If the map is empty or a specific model is
@ -583,7 +838,7 @@ message BatchTranslateTextRequest {
map < string , string > models = 4 [ ( google.api.field_behavior ) = OPTIONAL ] ;
/ / Required. Input configurations.
/ / The total number of files matched should be < = 1000 .
/ / The total number of files matched should be < = 100.
/ / The total content size should be < = 100 M Unicode codepoints.
/ / The files must use UTF - 8 encoding.
repeated InputConfig input_configs = 5
@ -606,7 +861,8 @@ message BatchTranslateTextRequest {
/ / characters , underscores and dashes. International characters are allowed.
/ / Label values are optional . Label keys must start with a letter.
/ /
/ / See https : / / cloud.google.com / translate / docs / labels for more information.
/ / See https : / / cloud.google.com / translate / docs / advanced / labels for more
/ / information.
map < string , string > labels = 9 [ ( google.api.field_behavior ) = OPTIONAL ] ;
}
@ -702,9 +958,8 @@ message GlossaryInputConfig {
/ / For equivalent term sets glossaries :
/ /
/ / - CSV ( ` . csv ` ) : Multi - column CSV file defining equivalent glossary terms
/ / in multiple languages. The format is defined for Google Translation
/ / Toolkit and documented in [ Use a
/ / glossary ] ( https : / / support.google.com / translatortoolkit / answer / 6306379 ? hl = en ) .
/ / in multiple languages. See documentation for more information -
/ / [ glossaries ] ( https : / / cloud.google.com / translate / docs / advanced / glossary ) .
GcsSource gcs_source = 1 ;
}
}
@ -737,7 +992,7 @@ message Glossary {
/ / Required. The resource name of the glossary. Glossary names have the form
/ / ` projects / { project - number - or - id } / locations / { location - id } / glossaries / { glossary - id } ` .
string name = 1 ;
string name = 1 [ ( google.api.field_behavior ) = REQUIRED ] ;
/ / Languages supported by the glossary.
oneof languages {
@ -821,7 +1076,20 @@ message ListGlossariesRequest {
string page_token = 3 [ ( google.api.field_behavior ) = OPTIONAL ] ;
/ / Optional. Filter specifying constraints of a list operation.
/ / Filtering is not supported yet , and the parameter currently has no effect.
/ / Specify the constraint by the format of "key=value" , where key must be
/ / "src" or "tgt" , and the value must be a valid language code.
/ / For multiple restrictions , concatenate them by "AND" ( uppercase only ) ,
/ / such as : "src=en-US AND tgt=zh-CN" . Notice that the exact match is used
/ / here , which means using 'en-US' and 'en' can lead to different results ,
/ / which depends on the language code you used when you create the glossary.
/ / For the unidirectional glossaries , the "src" and "tgt" add restrictions
/ / on the source and target language code separately.
/ / For the equivalent term set glossaries , the "src" and / or "tgt" add
/ / restrictions on the term set.
/ / For example : "src=en-US AND tgt=zh-CN" will only pick the unidirectional
/ / glossaries which exactly match the source language code as "en-US" and the
/ / target language code "zh-CN" , but all equivalent term set glossaries which
/ / contain "en-US" and "zh-CN" in their language set will be picked.
/ / If missing , no filtering is performed.
string filter = 4 [ ( google.api.field_behavior ) = OPTIONAL ] ;
}
@ -924,3 +1192,269 @@ message DeleteGlossaryResponse {
/ / set to true .
google.protobuf.Timestamp end_time = 3 ;
}
/ / The BatchTranslateDocument request.
message BatchTranslateDocumentRequest {
/ / Required. Location to make a regional call.
/ /
/ / Format : ` projects / { project - number - or - id } / locations / { location - id } ` .
/ /
/ / The ` global ` location is not supported for batch translation.
/ /
/ / Only AutoML Translation models or glossaries within the same region ( have
/ / the same location - id ) can be used , otherwise an INVALID_ARGUMENT ( 400 )
/ / error is returned.
string parent = 1 [
( google.api.field_behavior ) = REQUIRED ,
( google.api.resource_reference ) = {
type : "locations.googleapis.com/Location"
}
] ;
/ / Required. The BCP - 47 language code of the input document if known , for
/ / example , "en-US" or "sr-Latn" . Supported language codes are listed in
/ / Language Support ( https : / / cloud.google.com / translate / docs / languages ) .
string source_language_code = 2 [ ( google.api.field_behavior ) = REQUIRED ] ;
/ / Required. The BCP - 47 language code to use for translation of the input
/ / document. Specify up to 10 language codes here.
repeated string target_language_codes = 3
[ ( google.api.field_behavior ) = REQUIRED ] ;
/ / Required. Input configurations.
/ / The total number of files matched should be < = 100.
/ / The total content size to translate should be < = 100 M Unicode codepoints.
/ / The files must use UTF - 8 encoding.
repeated BatchDocumentInputConfig input_configs = 4
[ ( google.api.field_behavior ) = REQUIRED ] ;
/ / Required. Output configuration.
/ / If 2 input configs match to the same file ( that is , same input path ) ,
/ / we don ' t generate output for duplicate inputs.
BatchDocumentOutputConfig output_config = 5
[ ( google.api.field_behavior ) = REQUIRED ] ;
/ / Optional. The models to use for translation. Map ' s key is target language
/ / code. Map ' s value is the model name. Value can be a built - in general model ,
/ / or an AutoML Translation model.
/ /
/ / The value format depends on model type :
/ /
/ / - AutoML Translation models :
/ / ` projects / { project - number - or - id } / locations / { location - id } / models / { model - id } `
/ /
/ / - General ( built - in ) models :
/ / ` projects / { project - number - or - id } / locations / { location - id } / models / general / nmt ` ,
/ /
/ /
/ / If the map is empty or a specific model is
/ / not requested for a language pair , then default google model ( nmt ) is used.
map < string , string > models = 6 [ ( google.api.field_behavior ) = OPTIONAL ] ;
/ / Optional. Glossaries to be applied. It ' s keyed by target language code.
map < string , TranslateTextGlossaryConfig > glossaries = 7
[ ( google.api.field_behavior ) = OPTIONAL ] ;
/ / Optional. File format conversion map to be applied to all input files.
/ / Map 's key is the original mime_type. Map' s value is the target mime_type of
/ / translated documents.
/ /
/ / Supported file format conversion includes :
/ / - ` application / pdf ` to
/ / ` application / vnd.openxmlformats - officedocument.wordprocessingml.document `
/ /
/ / If nothing specified , output files will be in the same format as the
/ / original file.
map < string , string > format_conversions = 8
[ ( google.api.field_behavior ) = OPTIONAL ] ;
}
/ / Input configuration for BatchTranslateDocument request.
message BatchDocumentInputConfig {
/ / Specify the input.
oneof source {
/ / Google Cloud Storage location for the source input.
/ / This can be a single file ( for example ,
/ / ` gs : / / translation - test / input.docx ` ) or a wildcard ( for example ,
/ / ` gs : / / translation - test / * ` ) .
/ /
/ / File mime type is determined based on extension. Supported mime type
/ / includes :
/ / - ` pdf ` , application / pdf
/ / - ` docx ` ,
/ / application / vnd.openxmlformats - officedocument.wordprocessingml.document
/ / - ` pptx ` ,
/ / application / vnd.openxmlformats - officedocument.presentationml.presentation
/ / - ` xlsx ` ,
/ / application / vnd.openxmlformats - officedocument.spreadsheetml.sheet
/ /
/ / The max file size to support for ` . docx ` , ` . pptx ` and ` . xlsx ` is 100 MB.
/ / The max file size to support for ` . pdf ` is 1 GB and the max page limit is
/ / 1000 pages.
/ / The max file size to support for all input documents is 1 GB.
GcsSource gcs_source = 1 ;
}
}
/ / Output configuration for BatchTranslateDocument request.
message BatchDocumentOutputConfig {
/ / The destination of output. The destination directory provided must exist
/ / and be empty.
oneof destination {
/ / Google Cloud Storage destination for output content.
/ / For every single input document ( for example , gs : / / a / b / c. [ extension ] ) , we
/ / generate at most 2 * n output files. ( n is the # of target_language_codes
/ / in the BatchTranslateDocumentRequest ) .
/ /
/ / While the input documents are being processed , we write / update an index
/ / file ` index.csv ` under ` gcs_destination.output_uri_prefix ` ( for example ,
/ / gs : / / translation_output / index.csv ) The index file is generated / updated as
/ / new files are being translated. The format is :
/ /
/ / input_document , target_language_code , translation_output , error_output ,
/ / glossary_translation_output , glossary_error_output
/ /
/ / ` input_document ` is one file we matched using gcs_source.input_uri.
/ / ` target_language_code ` is provided in the request.
/ / ` translation_output ` contains the translations. ( details provided below )
/ / ` error_output ` contains the error message during processing of the file.
/ / Both translations_file and errors_file could be empty strings if we have
/ / no content to output.
/ / ` glossary_translation_output ` and ` glossary_error_output ` are the
/ / translated output / error when we apply glossaries. They could also be
/ / empty if we have no content to output.
/ /
/ / Once a row is present in index.csv , the input / output matching never
/ / changes. Callers should also expect all the content in input_file are
/ / processed and ready to be consumed ( that is , no partial output file is
/ / written ) .
/ /
/ / Since index.csv will be keeping updated during the process , please make
/ / sure there is no custom retention policy applied on the output bucket
/ / that may avoid file updating.
/ / ( https : / / cloud.google.com / storage / docs / bucket - lock ? hl = en # retention - policy )
/ /
/ / The naming format of translation output files follows ( for target
/ / language code [ trg ] ) : ` translation_output ` :
/ / gs : / / translation_output / a_b_c_ [ trg ] _translation. [ extension ]
/ / ` glossary_translation_output ` :
/ / gs : / / translation_test / a_b_c_ [ trg ] _glossary_translation. [ extension ] The
/ / output document will maintain the same file format as the input document.
/ /
/ / The naming format of error output files follows ( for target language code
/ / [ trg ] ) : ` error_output ` : gs : / / translation_test / a_b_c_ [ trg ] _errors.txt
/ / ` glossary_error_output ` :
/ / gs : / / translation_test / a_b_c_ [ trg ] _glossary_translation.txt The error
/ / output is a txt file containing error details.
GcsDestination gcs_destination = 1 ;
}
}
/ / Stored in the
/ / [ google.longrunning.Operation.response ] [ google.longrunning.Operation.response ]
/ / field returned by BatchTranslateDocument if at least one document is
/ / translated successfully.
message BatchTranslateDocumentResponse {
/ / Total number of pages to translate in all documents. Documents without
/ / clear page definition ( such as XLSX ) are not counted.
int64 total_pages = 1 ;
/ / Number of successfully translated pages in all documents. Documents without
/ / clear page definition ( such as XLSX ) are not counted.
int64 translated_pages = 2 ;
/ / Number of pages that failed to process in all documents. Documents without
/ / clear page definition ( such as XLSX ) are not counted.
int64 failed_pages = 3 ;
/ / Number of billable pages in documents with clear page definition ( such as
/ / PDF , DOCX , PPTX )
int64 total_billable_pages = 4 ;
/ / Total number of characters ( Unicode codepoints ) in all documents.
int64 total_characters = 5 ;
/ / Number of successfully translated characters ( Unicode codepoints ) in all
/ / documents.
int64 translated_characters = 6 ;
/ / Number of characters that have failed to process ( Unicode codepoints ) in
/ / all documents.
int64 failed_characters = 7 ;
/ / Number of billable characters ( Unicode codepoints ) in documents without
/ / clear page definition , such as XLSX.
int64 total_billable_characters = 8 ;
/ / Time when the operation was submitted.
google.protobuf.Timestamp submit_time = 9 ;
/ / The time when the operation is finished and
/ / [ google.longrunning.Operation.done ] [ google.longrunning.Operation.done ] is
/ / set to true .
google.protobuf.Timestamp end_time = 10 ;
}
/ / State metadata for the batch translation operation.
message BatchTranslateDocumentMetadata {
/ / State of the job.
enum State {
/ / Invalid.
STATE_UNSPECIFIED = 0 ;
/ / Request is being processed.
RUNNING = 1 ;
/ / The batch is processed , and at least one item was successfully processed.
SUCCEEDED = 2 ;
/ / The batch is done and no item was successfully processed.
FAILED = 3 ;
/ / Request is in the process of being canceled after caller invoked
/ / longrunning.Operations.CancelOperation on the request id.
CANCELLING = 4 ;
/ / The batch is done after the user has called the
/ / longrunning.Operations.CancelOperation. Any records processed before the
/ / cancel command are output as specified in the request.
CANCELLED = 5 ;
}
/ / The state of the operation.
State state = 1 ;
/ / Total number of pages to translate in all documents so far. Documents
/ / without clear page definition ( such as XLSX ) are not counted.
int64 total_pages = 2 ;
/ / Number of successfully translated pages in all documents so far. Documents
/ / without clear page definition ( such as XLSX ) are not counted.
int64 translated_pages = 3 ;
/ / Number of pages that failed to process in all documents so far. Documents
/ / without clear page definition ( such as XLSX ) are not counted.
int64 failed_pages = 4 ;
/ / Number of billable pages in documents with clear page definition ( such as
/ / PDF , DOCX , PPTX ) so far.
int64 total_billable_pages = 5 ;
/ / Total number of characters ( Unicode codepoints ) in all documents so far.
int64 total_characters = 6 ;
/ / Number of successfully translated characters ( Unicode codepoints ) in all
/ / documents so far.
int64 translated_characters = 7 ;
/ / Number of characters that have failed to process ( Unicode codepoints ) in
/ / all documents so far.
int64 failed_characters = 8 ;
/ / Number of billable characters ( Unicode codepoints ) in documents without
/ / clear page definition ( such as XLSX ) so far.
int64 total_billable_characters = 9 ;
/ / Time when the operation was submitted.
google.protobuf.Timestamp submit_time = 10 ;
}