diff --git a/google/cloud/documentai/v1beta3/BUILD.bazel b/google/cloud/documentai/v1beta3/BUILD.bazel index 3c3962541..491e43de2 100644 --- a/google/cloud/documentai/v1beta3/BUILD.bazel +++ b/google/cloud/documentai/v1beta3/BUILD.bazel @@ -22,10 +22,12 @@ proto_library( name = "documentai_proto", srcs = [ "barcode.proto", + "dataset.proto", "document.proto", "document_io.proto", "document_processor_service.proto", "document_schema.proto", + "document_service.proto", "evaluation.proto", "geometry.proto", "operation_metadata.proto", @@ -91,8 +93,8 @@ java_gapic_library( rest_numeric_enums = True, service_yaml = "documentai_v1beta3.yaml", test_deps = [ - ":documentai_java_grpc", "//google/cloud/location:location_java_grpc", + ":documentai_java_grpc", ], transport = "grpc+rest", deps = [ @@ -107,6 +109,8 @@ java_gapic_test( test_classes = [ "com.google.cloud.documentai.v1beta3.DocumentProcessorServiceClientHttpJsonTest", "com.google.cloud.documentai.v1beta3.DocumentProcessorServiceClientTest", + "com.google.cloud.documentai.v1beta3.DocumentServiceClientHttpJsonTest", + "com.google.cloud.documentai.v1beta3.DocumentServiceClientTest", ], runtime_deps = [":documentai_java_gapic_test"], ) @@ -242,10 +246,13 @@ php_gapic_library( name = "documentai_php_gapic", srcs = [":documentai_proto_with_info"], grpc_service_config = "documentai_v1beta3_grpc_service_config.json", + migration_mode = "PRE_MIGRATION_SURFACE_ONLY", rest_numeric_enums = True, service_yaml = "documentai_v1beta3.yaml", transport = "grpc+rest", - deps = [":documentai_php_proto"], + deps = [ + ":documentai_php_proto", + ], ) # Open Source Packages @@ -355,6 +362,7 @@ load( csharp_proto_library( name = "documentai_csharp_proto", + extra_opts = [""], deps = [":documentai_proto"], ) diff --git a/google/cloud/documentai/v1beta3/barcode.proto b/google/cloud/documentai/v1beta3/barcode.proto index a5148172b..f0580e69a 100644 --- a/google/cloud/documentai/v1beta3/barcode.proto +++ b/google/cloud/documentai/v1beta3/barcode.proto @@ -1,4 +1,4 @@ -// Copyright 2022 Google LLC +// Copyright 2023 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/google/cloud/documentai/v1beta3/dataset.proto b/google/cloud/documentai/v1beta3/dataset.proto new file mode 100644 index 000000000..fa50183bb --- /dev/null +++ b/google/cloud/documentai/v1beta3/dataset.proto @@ -0,0 +1,138 @@ +// Copyright 2023 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +syntax = "proto3"; + +package google.cloud.documentai.v1beta3; + +import "google/api/field_behavior.proto"; +import "google/api/resource.proto"; +import "google/cloud/documentai/v1beta3/document_io.proto"; +import "google/cloud/documentai/v1beta3/document_schema.proto"; + +option csharp_namespace = "Google.Cloud.DocumentAI.V1Beta3"; +option go_package = "cloud.google.com/go/documentai/apiv1beta3/documentaipb;documentaipb"; +option java_multiple_files = true; +option java_outer_classname = "DatasetProto"; +option java_package = "com.google.cloud.documentai.v1beta3"; +option php_namespace = "Google\\Cloud\\DocumentAI\\V1beta3"; +option ruby_package = "Google::Cloud::DocumentAI::V1beta3"; +option (google.api.resource_definition) = { + type: "contentwarehouse.googleapis.com/Schema" + pattern: "projects/{project}/locations/{location}/schemas/{schema}" +}; + +// A singleton resource under a +// [Processor][google.cloud.documentai.v1beta3.Processor] which configures a +// collection of documents. +message Dataset { + option (google.api.resource) = { + type: "documentai.googleapis.com/Dataset" + pattern: "projects/{project}/locations/{location}/processors/{processor}/dataset" + }; + + // Configuration specific to the Cloud Storage-based implementation. + message GCSManagedConfig { + // Required. The Cloud Storage URI (a directory) where the documents + // belonging to the dataset must be stored. + GcsPrefix gcs_prefix = 1 [(google.api.field_behavior) = REQUIRED]; + } + + // Configuration specific to the Document AI Warehouse-based implementation. + message DocumentWarehouseConfig { + // Output only. The collection in Document AI Warehouse associated with the + // dataset. + string collection = 1 [(google.api.field_behavior) = OUTPUT_ONLY]; + + // Output only. The schema in Document AI Warehouse associated with the + // dataset. + string schema = 2 [ + (google.api.field_behavior) = OUTPUT_ONLY, + (google.api.resource_reference) = { + type: "contentwarehouse.googleapis.com/Schema" + } + ]; + } + + // Configuration specific to an unmanaged dataset. + message UnmanagedDatasetConfig {} + + // Configuration specific to spanner-based indexing. + message SpannerIndexingConfig {} + + // Different states of a dataset. + enum State { + // Default unspecified enum, should not be used. + STATE_UNSPECIFIED = 0; + + // Dataset has not been initialized. + UNINITIALIZED = 1; + + // Dataset is being initialized. + INITIALIZING = 2; + + // Dataset has been initialized. + INITIALIZED = 3; + } + + oneof storage_source { + // Optional. User-managed Cloud Storage dataset configuration. Use this + // configuration if the dataset documents are stored under a user-managed + // Cloud Storage location. + GCSManagedConfig gcs_managed_config = 3 + [(google.api.field_behavior) = OPTIONAL]; + + // Optional. Document AI Warehouse-based dataset configuration. + DocumentWarehouseConfig document_warehouse_config = 5 + [(google.api.field_behavior) = OPTIONAL]; + + // Optional. Unmanaged dataset configuration. Use this configuration if the + // dataset documents are managed by the document service internally (not + // user-managed). + UnmanagedDatasetConfig unmanaged_dataset_config = 6 + [(google.api.field_behavior) = OPTIONAL]; + } + + oneof indexing_source { + // Optional. A lightweight indexing source with low latency and high + // reliability, but lacking advanced features like CMEK and content-based + // search. + SpannerIndexingConfig spanner_indexing_config = 4 + [(google.api.field_behavior) = OPTIONAL]; + } + + // Dataset resource name. + // Format: + // `projects/{project}/locations/{location}/processors/{processor}/dataset` + string name = 1; + + // Required. State of the dataset. Ignored when updating dataset. + State state = 2 [(google.api.field_behavior) = REQUIRED]; +} + +// Dataset Schema. +message DatasetSchema { + option (google.api.resource) = { + type: "documentai.googleapis.com/DatasetSchema" + pattern: "projects/{project}/locations/{location}/processors/{processor}/dataset/datasetSchema" + }; + + // Dataset schema resource name. + // Format: + // `projects/{project}/locations/{location}/processors/{processor}/dataset/datasetSchema` + string name = 1; + + // Optional. Schema of the dataset. + DocumentSchema document_schema = 3 [(google.api.field_behavior) = OPTIONAL]; +} diff --git a/google/cloud/documentai/v1beta3/document.proto b/google/cloud/documentai/v1beta3/document.proto index 193f7700d..8250d4d66 100644 --- a/google/cloud/documentai/v1beta3/document.proto +++ b/google/cloud/documentai/v1beta3/document.proto @@ -1,4 +1,4 @@ -// Copyright 2022 Google LLC +// Copyright 2023 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -64,7 +64,8 @@ message Document { // Font size for the text. float size = 1; - // Unit for the font size. Follows CSS naming (in, px, pt, etc.). + // Unit for the font size. Follows CSS naming (such as `in`, `px`, and + // `pt`). string unit = 2; } @@ -78,17 +79,18 @@ message Document { // Text background color. google.type.Color background_color = 3; - // Font weight. Possible values are normal, bold, bolder, and lighter. - // https://www.w3schools.com/cssref/pr_font_weight.asp + // [Font weight](https://www.w3schools.com/cssref/pr_font_weight.asp). + // Possible values are `normal`, `bold`, `bolder`, and `lighter`. string font_weight = 4; - // Text style. Possible values are normal, italic, and oblique. - // https://www.w3schools.com/cssref/pr_font_font-style.asp + // [Text style](https://www.w3schools.com/cssref/pr_font_font-style.asp). + // Possible values are `normal`, `italic`, and `oblique`. string text_style = 5; - // Text decoration. Follows CSS standard. - // - // https://www.w3schools.com/cssref/pr_text_text-decoration.asp + // [Text + // decoration](https://www.w3schools.com/cssref/pr_text_text-decoration.asp). + // Follows CSS standard. + // string text_decoration = 6; // Font size. @@ -118,7 +120,9 @@ message Document { // Raw byte content of the image. bytes content = 1; - // Encoding mime type for the image. + // Encoding [media type (MIME + // type)](https://www.iana.org/assignments/media-types/media-types.xhtml) + // for the image. string mime_type = 2; // Width of the image in pixels. @@ -255,6 +259,59 @@ message Document { Type type = 1; } + // Font and other text style attributes. + message StyleInfo { + // Font size in points (`1` point is `¹⁄₇₂` inches). + int32 font_size = 1; + + // Font size in pixels, equal to _unrounded + // [font_size][google.cloud.documentai.v1beta3.Document.Page.Token.StyleInfo.font_size]_ + // * _resolution_ ÷ `72.0`. + double pixel_font_size = 2; + + // Letter spacing in points. + double letter_spacing = 3; + + // Name or style of the font. + string font_type = 4; + + // Whether the text is bold (equivalent to + // [font_weight][google.cloud.documentai.v1beta3.Document.Page.Token.StyleInfo.font_weight] + // is at least `700`). + bool bold = 5; + + // Whether the text is italic. + bool italic = 6; + + // Whether the text is underlined. + bool underlined = 7; + + // Whether the text is strikethrough. + bool strikeout = 8; + + // Whether the text is a subscript. + bool subscript = 9; + + // Whether the text is a superscript. + bool superscript = 10; + + // Whether the text is in small caps. + bool smallcaps = 11; + + // TrueType weight on a scale `100` (thin) to `1000` (ultra-heavy). + // Normal is `400`, bold is `700`. + int32 font_weight = 12; + + // Whether the text is handwritten. + bool handwritten = 13; + + // Color of the text. + google.type.Color text_color = 14; + + // Color of the background. + google.type.Color background_color = 15; + } + // [Layout][google.cloud.documentai.v1beta3.Document.Page.Layout] for // [Token][google.cloud.documentai.v1beta3.Document.Page.Token]. Layout layout = 1; @@ -268,6 +325,9 @@ message Document { // The history of this annotation. Provenance provenance = 4 [deprecated = true]; + + // Text style attributes. + StyleInfo style_info = 5; } // A detected symbol. @@ -333,7 +393,7 @@ message Document { repeated DetectedLanguage detected_languages = 4; // The history of this table. - Provenance provenance = 5; + Provenance provenance = 5 [deprecated = true]; } // A form field detected on the page. @@ -389,16 +449,16 @@ message Document { // Detected language for a structural component. message DetectedLanguage { - // The BCP-47 language code, such as `en-US` or `sr-Latn`. For more - // information, see - // https://www.unicode.org/reports/tr35/#Unicode_locale_identifier. + // The [BCP-47 language + // code](https://www.unicode.org/reports/tr35/#Unicode_locale_identifier), + // such as `en-US` or `sr-Latn`. string language_code = 1; // Confidence of detected language. Range `[0, 1]`. float confidence = 2; } - // Image Quality Scores for the page image + // Image quality scores for the page image. message ImageQualityScores { // Image Quality Defects message DetectedDefect { @@ -414,12 +474,12 @@ message Document { // - `quality/defect_glare` string type = 1; - // Confidence of detected defect. Range `[0, 1]` where 1 indicates - // strong confidence of that the defect exists. + // Confidence of detected defect. Range `[0, 1]` where `1` indicates + // strong confidence that the defect exists. float confidence = 2; } - // The overall quality score. Range `[0, 1]` where 1 is perfect quality. + // The overall quality score. Range `[0, 1]` where `1` is perfect quality. float quality_score = 1; // A list of detected defects. @@ -485,7 +545,7 @@ message Document { // A list of detected barcodes. repeated DetectedBarcode detected_barcodes = 15; - // Image Quality Scores. + // Image quality scores. ImageQualityScores image_quality_scores = 17; // The history of this page. @@ -740,13 +800,13 @@ message Document { REMOVE = 2; // Updates any fields within the given provenance scope of the message. It - // 'overwrites' the fields rather than replacing them. This is - // especially relevant when we just want to update a field value of an - // entity without also affecting all the child properties. + // overwrites the fields rather than replacing them. Use this when you + // want to update a field value of an entity without also updating all the + // child properties. UPDATE = 7; // Currently unused. Replace an element identified by `parent`. - REPLACE = 3 [deprecated = true]; + REPLACE = 3; // Deprecated. Request human review for the element identified by // `parent`. @@ -839,10 +899,9 @@ message Document { // Original source document from the user. oneof source { // Optional. Currently supports Google Cloud Storage URI of the form - // `gs://bucket_name/object_name`. Object versioning is not supported. - // See [Google Cloud Storage Request - // URIs](https://cloud.google.com/storage/docs/reference-uris) for more - // info. + // `gs://bucket_name/object_name`. Object versioning is not supported. + // For more information, refer to [Google Cloud Storage Request + // URIs](https://cloud.google.com/storage/docs/reference-uris). string uri = 1 [(google.api.field_behavior) = OPTIONAL]; // Optional. Inline document content, represented as a stream of bytes. @@ -851,9 +910,8 @@ message Document { bytes content = 2 [(google.api.field_behavior) = OPTIONAL]; } - // An IANA published MIME type (also referred to as media type). For more - // information, see - // https://www.iana.org/assignments/media-types/media-types.xhtml. + // An IANA published [media type (MIME + // type)](https://www.iana.org/assignments/media-types/media-types.xhtml). string mime_type = 3; // Optional. UTF-8 encoded text in reading order from the document. diff --git a/google/cloud/documentai/v1beta3/document_io.proto b/google/cloud/documentai/v1beta3/document_io.proto index 9de0733f4..1b7e7ce2f 100644 --- a/google/cloud/documentai/v1beta3/document_io.proto +++ b/google/cloud/documentai/v1beta3/document_io.proto @@ -1,4 +1,4 @@ -// Copyright 2022 Google LLC +// Copyright 2023 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -127,9 +127,18 @@ message OcrConfig { // Adds additional latency comparable to regular OCR to the process call. bool enable_image_quality_scores = 4; - // A list of advanced OCR options to further fine-tune OCR behavior. + // A list of advanced OCR options to further fine-tune OCR behavior. Current + // valid values are: + // + // - `legacy_layout`: a heuristics layout detection algorithm, which serves as + // an alternative to the current ML-based layout detection algorithm. + // Customers can choose the best suitable layout algorithm based on their + // situation. repeated string advanced_ocr_options = 5; // Includes symbol level OCR information if set to true. bool enable_symbol = 6; + + // Turn on font id model and returns font style information. + bool compute_style_info = 8; } diff --git a/google/cloud/documentai/v1beta3/document_processor_service.proto b/google/cloud/documentai/v1beta3/document_processor_service.proto index 885bdffd8..b2a873625 100644 --- a/google/cloud/documentai/v1beta3/document_processor_service.proto +++ b/google/cloud/documentai/v1beta3/document_processor_service.proto @@ -1,4 +1,4 @@ -// Copyright 2022 Google LLC +// Copyright 2023 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -28,6 +28,7 @@ import "google/cloud/documentai/v1beta3/operation_metadata.proto"; import "google/cloud/documentai/v1beta3/processor.proto"; import "google/cloud/documentai/v1beta3/processor_type.proto"; import "google/longrunning/operations.proto"; +import "google/protobuf/empty.proto"; import "google/protobuf/field_mask.proto"; import "google/protobuf/timestamp.proto"; import "google/rpc/status.proto"; @@ -48,7 +49,7 @@ option (google.api.resource_definition) = { pattern: "projects/{project}/locations/{location}" }; -// Service to call Cloud DocumentAI to process documents according to the +// Service to call Document AI to process documents according to the // processor's definition. Processors are built using state-of-the-art Google // AI such as natural language, computer vision, and translation to extract // structured information from unstructured or semi-structured documents. @@ -89,8 +90,9 @@ service DocumentProcessorService { }; } - // Fetches processor types. Note that we do not use ListProcessorTypes here - // because it is not paginated. + // Fetches processor types. Note that we don't use + // [ListProcessorTypes][google.cloud.documentai.v1beta3.DocumentProcessorService.ListProcessorTypes] + // here, because it isn't paginated. rpc FetchProcessorTypes(FetchProcessorTypesRequest) returns (FetchProcessorTypesResponse) { option (google.api.http) = { @@ -134,7 +136,7 @@ service DocumentProcessorService { // Trains a new processor version. // Operation metadata is returned as - // cloud_documentai_core.TrainProcessorVersionMetadata. + // [TrainProcessorVersionMetadata][google.cloud.documentai.v1beta3.TrainProcessorVersionMetadata]. rpc TrainProcessorVersion(TrainProcessorVersionRequest) returns (google.longrunning.Operation) { option (google.api.http) = { @@ -208,8 +210,9 @@ service DocumentProcessorService { }; } - // Creates a processor from the type processor that the user chose. - // The processor will be at "ENABLED" state by default after its creation. + // Creates a processor from the + // [ProcessorType][google.cloud.documentai.v1beta3.ProcessorType] provided. + // The processor will be at `ENABLED` state by default after its creation. rpc CreateProcessor(CreateProcessorRequest) returns (Processor) { option (google.api.http) = { post: "/v1beta3/{parent=projects/*/locations/*}/processors" @@ -339,12 +342,14 @@ service DocumentProcessorService { // Options for Process API message ProcessOptions { - // Only applicable to "Document OCR Processor". Returns error if set on other + // Only applicable to `OCR_PROCESSOR`. Returns error if set on other // processor types. OcrConfig ocr_config = 1; } -// Request message for the process document method. +// Request message for the +// [ProcessDocument][google.cloud.documentai.v1beta3.DocumentProcessorService.ProcessDocument] +// method. message ProcessRequest { // The document payload. oneof source { @@ -370,16 +375,20 @@ message ProcessRequest { (google.api.resource_reference) = { type: "*" } ]; - // The document payload, the [content] and [mime_type] fields must be set. + // The document payload, the + // [content][google.cloud.documentai.v1beta3.Document.content] and + // [mime_type][google.cloud.documentai.v1beta3.Document.mime_type] fields must + // be set. Document document = 2 [deprecated = true]; - // Whether Human Review feature should be skipped for this request. Default to - // false. + // Whether human review should be skipped for this request. Default to + // `false`. bool skip_human_review = 3; - // Specifies which fields to include in ProcessResponse's document. - // Only supports top level document and pages field so it must be in the form - // of `{document_field_name}` or `pages.{page_field_name}`. + // Specifies which fields to include in the + // [ProcessResponse.document][google.cloud.documentai.v1beta3.ProcessResponse.document] + // output. Only supports top-level document and pages field, so it must be in + // the form of `{document_field_name}` or `pages.{page_field_name}`. google.protobuf.FieldMask field_mask = 6; // Inference-time options for the process API @@ -394,7 +403,7 @@ message HumanReviewStatus { STATE_UNSPECIFIED = 0; // Human review is skipped for the document. This can happen because human - // review is not enabled on the processor or the processing request has + // review isn't enabled on the processor or the processing request has // been set to skip this document. SKIPPED = 1; @@ -405,7 +414,8 @@ message HumanReviewStatus { IN_PROGRESS = 3; // Some error happened during triggering human review, see the - // [state_message] for details. + // [state_message][google.cloud.documentai.v1beta3.HumanReviewStatus.state_message] + // for details. ERROR = 4; } @@ -416,29 +426,34 @@ message HumanReviewStatus { string state_message = 2; // The name of the operation triggered by the processed document. This field - // is populated only when the [state] is [HUMAN_REVIEW_IN_PROGRESS]. It has - // the same response type and metadata as the long running operation returned - // by [ReviewDocument] method. + // is populated only when the + // [state][google.cloud.documentai.v1beta3.HumanReviewStatus.state] is + // `HUMAN_REVIEW_IN_PROGRESS`. It has the same response type and metadata as + // the long-running operation returned by + // [ReviewDocument][google.cloud.documentai.v1beta3.DocumentProcessorService.ReviewDocument]. string human_review_operation = 3; } -// Response message for the process document method. +// Response message for the +// [ProcessDocument][google.cloud.documentai.v1beta3.DocumentProcessorService.ProcessDocument] +// method. message ProcessResponse { // The document payload, will populate fields based on the processor's // behavior. Document document = 1; // The name of the operation triggered by the processed document. If the human - // review process is not triggered, this field will be empty. It has the same - // response type and metadata as the long running operation returned by - // ReviewDocument method. + // review process isn't triggered, this field is empty. It has the same + // response type and metadata as the long-running operation returned by + // [ReviewDocument][google.cloud.documentai.v1beta3.DocumentProcessorService.ReviewDocument]. string human_review_operation = 2 [deprecated = true]; // The status of human review on the processed document. HumanReviewStatus human_review_status = 3; } -// Request message for batch process document method. +// Request message for +// [BatchProcessDocuments][google.cloud.documentai.v1beta3.DocumentProcessorService.BatchProcessDocuments]. message BatchProcessRequest { // The message for input config in batch process. message BatchInputConfig { @@ -447,13 +462,19 @@ message BatchProcessRequest { // The Cloud Storage location as the source of the document. string gcs_source = 1; - // Mimetype of the input. If the input is a raw document, the supported - // mimetypes are application/pdf, image/tiff, and image/gif. - // If the input is a [Document] proto, the type should be application/json. + // An IANA published [media type (MIME + // type)](https://www.iana.org/assignments/media-types/media-types.xhtml) of + // the input. If the input is a raw document, refer to [supported file + // types](https://cloud.google.com/document-ai/docs/file-types) for the list + // of media types. If the input is a + // [Document][google.cloud.documentai.v1beta3.Document], the type should be + // `application/json`. string mime_type = 2; } - // The message for output config in batch process. + // The output configuration in the + // [BatchProcessDocuments][google.cloud.documentai.v1beta3.DocumentProcessorService.BatchProcessDocuments] + // method. message BatchOutputConfig { option deprecated = true; @@ -478,44 +499,52 @@ message BatchProcessRequest { // The overall output config for batch process. BatchOutputConfig output_config = 3 [deprecated = true]; - // The input documents for batch process. + // The input documents for the + // [BatchProcessDocuments][google.cloud.documentai.v1beta3.DocumentProcessorService.BatchProcessDocuments] + // method. BatchDocumentsInputConfig input_documents = 5; - // The overall output config for batch process. + // The output configuration for the + // [BatchProcessDocuments][google.cloud.documentai.v1beta3.DocumentProcessorService.BatchProcessDocuments] + // method. DocumentOutputConfig document_output_config = 6; - // Whether Human Review feature should be skipped for this request. Default to - // false. + // Whether human review should be skipped for this request. Default to + // `false`. bool skip_human_review = 4; // Inference-time options for the process API ProcessOptions process_options = 7; } -// Response message for batch process document method. +// Response message for +// [BatchProcessDocuments][google.cloud.documentai.v1beta3.DocumentProcessorService.BatchProcessDocuments]. message BatchProcessResponse {} -// The long running operation metadata for batch process method. +// The long-running operation metadata for +// [BatchProcessDocuments][google.cloud.documentai.v1beta3.DocumentProcessorService.BatchProcessDocuments]. message BatchProcessMetadata { // The status of a each individual document in the batch process. message IndividualProcessStatus { - // The source of the document, same as the [input_gcs_source] field in the - // request when the batch process started. The batch process is started by - // take snapshot of that document, since a user can move or change that - // document during the process. + // The source of the document, same as the + // [input_gcs_source][google.cloud.documentai.v1beta3.BatchProcessMetadata.IndividualProcessStatus.input_gcs_source] + // field in the request when the batch process started. string input_gcs_source = 1; // The status processing the document. google.rpc.Status status = 2; - // The output_gcs_destination (in the request as `output_gcs_destination`) + // The Cloud Storage output destination (in the request as + // [DocumentOutputConfig.GcsOutputConfig.gcs_uri][google.cloud.documentai.v1beta3.DocumentOutputConfig.GcsOutputConfig.gcs_uri]) // of the processed document if it was successful, otherwise empty. string output_gcs_destination = 3; // The name of the operation triggered by the processed document. If the - // human review process is not triggered, this field will be empty. It has - // the same response type and metadata as the long running operation - // returned by ReviewDocument method. + // human review process isn't triggered, this field will be empty. It has + // the same response type and metadata as the long-running operation + // returned by the + // [ReviewDocument][google.cloud.documentai.v1beta3.DocumentProcessorService.ReviewDocument] + // method. string human_review_operation = 4 [deprecated = true]; // The status of human review on the processed document. @@ -563,11 +592,13 @@ message BatchProcessMetadata { repeated IndividualProcessStatus individual_process_statuses = 5; } -// Request message for fetch processor types. +// Request message for the +// [FetchProcessorTypes][google.cloud.documentai.v1beta3.DocumentProcessorService.FetchProcessorTypes] +// method. Some processor types may require the project be added to an +// allowlist. message FetchProcessorTypesRequest { - // Required. The project of processor type to list. - // The available processor types may depend on the allow-listing on projects. - // Format: `projects/{project}/locations/{location}` + // Required. The location of processor types to list. + // Format: `projects/{project}/locations/{location}`. string parent = 1 [ (google.api.field_behavior) = REQUIRED, (google.api.resource_reference) = { @@ -576,17 +607,21 @@ message FetchProcessorTypesRequest { ]; } -// Response message for fetch processor types. +// Response message for the +// [FetchProcessorTypes][google.cloud.documentai.v1beta3.DocumentProcessorService.FetchProcessorTypes] +// method. message FetchProcessorTypesResponse { // The list of processor types. repeated ProcessorType processor_types = 1; } -// Request message for list processor types. +// Request message for the +// [ListProcessorTypes][google.cloud.documentai.v1beta3.DocumentProcessorService.ListProcessorTypes] +// method. Some processor types may require the project be added to an +// allowlist. message ListProcessorTypesRequest { - // Required. The location of processor type to list. - // The available processor types may depend on the allow-listing on projects. - // Format: `projects/{project}/locations/{location}` + // Required. The location of processor types to list. + // Format: `projects/{project}/locations/{location}`. string parent = 1 [ (google.api.field_behavior) = REQUIRED, (google.api.resource_reference) = { @@ -595,15 +630,17 @@ message ListProcessorTypesRequest { ]; // The maximum number of processor types to return. - // If unspecified, at most 100 processor types will be returned. - // The maximum value is 500; values above 500 will be coerced to 500. + // If unspecified, at most `100` processor types will be returned. + // The maximum value is `500`. Values above `500` will be coerced to `500`. int32 page_size = 2; // Used to retrieve the next page of results, empty if at the end of the list. string page_token = 3; } -// Response message for list processor types. +// Response message for the +// [ListProcessorTypes][google.cloud.documentai.v1beta3.DocumentProcessorService.ListProcessorTypes] +// method. message ListProcessorTypesResponse { // The processor types. repeated ProcessorType processor_types = 1; @@ -624,8 +661,8 @@ message ListProcessorsRequest { ]; // The maximum number of processors to return. - // If unspecified, at most 50 processors will be returned. - // The maximum value is 100; values above 100 will be coerced to 100. + // If unspecified, at most `50` processors will be returned. + // The maximum value is `100`. Values above `100` will be coerced to `100`. int32 page_size = 2; // We will return the processors sorted by creation time. The page token @@ -633,7 +670,9 @@ message ListProcessorsRequest { string page_token = 3; } -// Response message for list processors. +// Response message for the +// [ListProcessors][google.cloud.documentai.v1beta3.DocumentProcessorService.ListProcessors] +// method. message ListProcessorsResponse { // The list of processors. repeated Processor processors = 1; @@ -642,7 +681,9 @@ message ListProcessorsResponse { string next_page_token = 2; } -// Request message for get processor. +// Request message for the +// [GetProcessorType][google.cloud.documentai.v1beta3.DocumentProcessorService.GetProcessorType] +// method. message GetProcessorTypeRequest { // Required. The processor type resource name. string name = 1 [ @@ -653,7 +694,9 @@ message GetProcessorTypeRequest { ]; } -// Request message for get processor. +// Request message for the +// [GetProcessor][google.cloud.documentai.v1beta3.DocumentProcessorService.GetProcessor] +// method. message GetProcessorRequest { // Required. The processor resource name. string name = 1 [ @@ -664,7 +707,9 @@ message GetProcessorRequest { ]; } -// Request message for get processor version. +// Request message for the +// [GetProcessorVersion][google.cloud.documentai.v1beta3.DocumentProcessorService.GetProcessorVersion] +// method. message GetProcessorVersionRequest { // Required. The processor resource name. string name = 1 [ @@ -688,8 +733,8 @@ message ListProcessorVersionsRequest { ]; // The maximum number of processor versions to return. - // If unspecified, at most 10 processor versions will be returned. - // The maximum value is 20; values above 20 will be coerced to 20. + // If unspecified, at most `10` processor versions will be returned. + // The maximum value is `20`. Values above `20` will be coerced to `20`. int32 page_size = 2; // We will return the processor versions sorted by creation time. The page @@ -697,7 +742,9 @@ message ListProcessorVersionsRequest { string page_token = 3; } -// Response message for list processors. +// Response message for the +// [ListProcessorVersions][google.cloud.documentai.v1beta3.DocumentProcessorService.ListProcessorVersions] +// method. message ListProcessorVersionsResponse { // The list of processors. repeated ProcessorVersion processor_versions = 1; @@ -706,7 +753,9 @@ message ListProcessorVersionsResponse { string next_page_token = 2; } -// Request message for the delete processor version method. +// Request message for the +// [DeleteProcessorVersion][google.cloud.documentai.v1beta3.DocumentProcessorService.DeleteProcessorVersion] +// method. message DeleteProcessorVersionRequest { // Required. The processor version resource name to be deleted. string name = 1 [ @@ -717,13 +766,17 @@ message DeleteProcessorVersionRequest { ]; } -// The long running operation metadata for delete processor version method. +// The long-running operation metadata for the +// [DeleteProcessorVersion][google.cloud.documentai.v1beta3.DocumentProcessorService.DeleteProcessorVersion] +// method. message DeleteProcessorVersionMetadata { - // The basic metadata of the long running operation. + // The basic metadata of the long-running operation. CommonOperationMetadata common_metadata = 1; } -// Request message for the deploy processor version method. +// Request message for the +// [DeployProcessorVersion][google.cloud.documentai.v1beta3.DocumentProcessorService.DeployProcessorVersion] +// method. message DeployProcessorVersionRequest { // Required. The processor version resource name to be deployed. string name = 1 [ @@ -734,16 +787,22 @@ message DeployProcessorVersionRequest { ]; } -// Response message for the deploy processor version method. +// Response message for the +// [DeployProcessorVersion][google.cloud.documentai.v1beta3.DocumentProcessorService.DeployProcessorVersion] +// method. message DeployProcessorVersionResponse {} -// The long running operation metadata for deploy processor version method. +// The long-running operation metadata for the +// [DeployProcessorVersion][google.cloud.documentai.v1beta3.DocumentProcessorService.DeployProcessorVersion] +// method. message DeployProcessorVersionMetadata { - // The basic metadata of the long running operation. + // The basic metadata of the long-running operation. CommonOperationMetadata common_metadata = 1; } -// Request message for the undeploy processor version method. +// Request message for the +// [UndeployProcessorVersion][google.cloud.documentai.v1beta3.DocumentProcessorService.UndeployProcessorVersion] +// method. message UndeployProcessorVersionRequest { // Required. The processor version resource name to be undeployed. string name = 1 [ @@ -754,19 +813,24 @@ message UndeployProcessorVersionRequest { ]; } -// Response message for the undeploy processor version method. +// Response message for the +// [UndeployProcessorVersion][google.cloud.documentai.v1beta3.DocumentProcessorService.UndeployProcessorVersion] +// method. message UndeployProcessorVersionResponse {} -// The long running operation metadata for the undeploy processor version +// The long-running operation metadata for the +// [UndeployProcessorVersion][google.cloud.documentai.v1beta3.DocumentProcessorService.UndeployProcessorVersion] // method. message UndeployProcessorVersionMetadata { - // The basic metadata of the long running operation. + // The basic metadata of the long-running operation. CommonOperationMetadata common_metadata = 1; } -// Request message for create a processor. Notice this request is sent to -// a regionalized backend service, and if the processor type is not available -// on that region, the creation will fail. +// Request message for the +// [CreateProcessor][google.cloud.documentai.v1beta3.DocumentProcessorService.CreateProcessor] +// method. Notice this request is sent to a regionalized backend service. If the +// [ProcessorType][google.cloud.documentai.v1beta3.ProcessorType] isn't +// available in that region, the creation fails. message CreateProcessorRequest { // Required. The parent (project and location) under which to create the // processor. Format: `projects/{project}/locations/{location}` @@ -777,13 +841,17 @@ message CreateProcessorRequest { } ]; - // Required. The processor to be created, requires [processor_type] and - // [display_name] to be set. Also, the processor is under CMEK if CMEK fields - // are set. + // Required. The processor to be created, requires + // [Processor.type][google.cloud.documentai.v1beta3.Processor.type] and + // [Processor.display_name]][] to be set. Also, the + // [Processor.kms_key_name][google.cloud.documentai.v1beta3.Processor.kms_key_name] + // field must be set if the processor is under CMEK. Processor processor = 2 [(google.api.field_behavior) = REQUIRED]; } -// Request message for the delete processor method. +// Request message for the +// [DeleteProcessor][google.cloud.documentai.v1beta3.DocumentProcessorService.DeleteProcessor] +// method. message DeleteProcessorRequest { // Required. The processor resource name to be deleted. string name = 1 [ @@ -794,13 +862,17 @@ message DeleteProcessorRequest { ]; } -// The long running operation metadata for delete processor method. +// The long-running operation metadata for the +// [DeleteProcessor][google.cloud.documentai.v1beta3.DocumentProcessorService.DeleteProcessor] +// method. message DeleteProcessorMetadata { - // The basic metadata of the long running operation. + // The basic metadata of the long-running operation. CommonOperationMetadata common_metadata = 5; } -// Request message for the enable processor method. +// Request message for the +// [EnableProcessor][google.cloud.documentai.v1beta3.DocumentProcessorService.EnableProcessor] +// method. message EnableProcessorRequest { // Required. The processor resource name to be enabled. string name = 1 [ @@ -811,17 +883,22 @@ message EnableProcessorRequest { ]; } -// Response message for the enable processor method. -// Intentionally empty proto for adding fields in future. +// Response message for the +// [EnableProcessor][google.cloud.documentai.v1beta3.DocumentProcessorService.EnableProcessor] +// method. Intentionally empty proto for adding fields in future. message EnableProcessorResponse {} -// The long running operation metadata for enable processor method. +// The long-running operation metadata for the +// [EnableProcessor][google.cloud.documentai.v1beta3.DocumentProcessorService.EnableProcessor] +// method. message EnableProcessorMetadata { - // The basic metadata of the long running operation. + // The basic metadata of the long-running operation. CommonOperationMetadata common_metadata = 5; } -// Request message for the disable processor method. +// Request message for the +// [DisableProcessor][google.cloud.documentai.v1beta3.DocumentProcessorService.DisableProcessor] +// method. message DisableProcessorRequest { // Required. The processor resource name to be disabled. string name = 1 [ @@ -832,17 +909,22 @@ message DisableProcessorRequest { ]; } -// Response message for the disable processor method. -// Intentionally empty proto for adding fields in future. +// Response message for the +// [DisableProcessor][google.cloud.documentai.v1beta3.DocumentProcessorService.DisableProcessor] +// method. Intentionally empty proto for adding fields in future. message DisableProcessorResponse {} -// The long running operation metadata for disable processor method. +// The long-running operation metadata for the +// [DisableProcessor][google.cloud.documentai.v1beta3.DocumentProcessorService.DisableProcessor] +// method. message DisableProcessorMetadata { - // The basic metadata of the long running operation. + // The basic metadata of the long-running operation. CommonOperationMetadata common_metadata = 5; } -// Request message for the set default processor version method. +// Request message for the +// [SetDefaultProcessorVersion][google.cloud.documentai.v1beta3.DocumentProcessorService.SetDefaultProcessorVersion] +// method. message SetDefaultProcessorVersionRequest { // Required. The resource name of the // [Processor][google.cloud.documentai.v1beta3.Processor] to change default @@ -866,19 +948,25 @@ message SetDefaultProcessorVersionRequest { ]; } -// Response message for set default processor version method. +// Response message for the +// [SetDefaultProcessorVersion][google.cloud.documentai.v1beta3.DocumentProcessorService.SetDefaultProcessorVersion] +// method. message SetDefaultProcessorVersionResponse {} -// The long running operation metadata for set default processor version +// The long-running operation metadata for the +// [SetDefaultProcessorVersion][google.cloud.documentai.v1beta3.DocumentProcessorService.SetDefaultProcessorVersion] // method. message SetDefaultProcessorVersionMetadata { - // The basic metadata of the long running operation. + // The basic metadata of the long-running operation. CommonOperationMetadata common_metadata = 1; } -// Request message for the create processor version method. +// Request message for the +// [TrainProcessorVersion][google.cloud.documentai.v1beta3.DocumentProcessorService.TrainProcessorVersion] +// method. message TrainProcessorVersionRequest { - // The input data used to train a new `ProcessorVersion`. + // The input data used to train a new + // [ProcessorVersion][google.cloud.documentai.v1beta3.ProcessorVersion]. message InputData { // The documents used for training the new version. BatchDocumentsInputConfig training_documents = 3; @@ -887,6 +975,28 @@ message TrainProcessorVersionRequest { BatchDocumentsInputConfig test_documents = 4; } + // Options to control the training of the Custom Document Extraction (CDE) + // Processor. + message CustomDocumentExtractionOptions { + // Training Method for CDE. TRAINING_METHOD_UNSPECIFIED will fallback to + // MODEL_BASED. + enum TrainingMethod { + TRAINING_METHOD_UNSPECIFIED = 0; + + MODEL_BASED = 1; + + TEMPLATE_BASED = 2; + } + + // Training method to use for CDE training. + TrainingMethod training_method = 3; + } + + oneof processor_flags { + // Options to control Custom Document Extraction (CDE) Processor. + CustomDocumentExtractionOptions custom_document_extraction_options = 5; + } + // Required. The parent (project, location and processor) to create the new // version for. Format: // `projects/{project}/locations/{location}/processors/{processor}`. @@ -904,7 +1014,8 @@ message TrainProcessorVersionRequest { // Optional. The schema the processor version will be trained with. DocumentSchema document_schema = 10 [(google.api.field_behavior) = OPTIONAL]; - // Optional. The input data used to train the `ProcessorVersion`. + // Optional. The input data used to train the + // [ProcessorVersion][google.cloud.documentai.v1beta3.ProcessorVersion]. InputData input_data = 4 [(google.api.field_behavior) = OPTIONAL]; // Optional. The processor version to use as a base for training. This @@ -913,7 +1024,8 @@ message TrainProcessorVersionRequest { string base_processor_version = 8 [(google.api.field_behavior) = OPTIONAL]; } -// The response for the TrainProcessorVersion method. +// The response for +// [TrainProcessorVersion][google.cloud.documentai.v1beta3.DocumentProcessorService.TrainProcessorVersion]. message TrainProcessorVersionResponse { // The resource name of the processor version produced by training. string processor_version = 1; @@ -941,7 +1053,7 @@ message TrainProcessorVersionMetadata { repeated google.rpc.Status dataset_errors = 2; } - // The basic metadata of the long running operation. + // The basic metadata of the long-running operation. CommonOperationMetadata common_metadata = 1; // The training dataset validation information. @@ -951,7 +1063,9 @@ message TrainProcessorVersionMetadata { DatasetValidation test_dataset_validation = 3; } -// Request message for review document method. +// Request message for the +// [ReviewDocument][google.cloud.documentai.v1beta3.DocumentProcessorService.ReviewDocument] +// method. message ReviewDocumentRequest { // The priority level of the human review task. enum Priority { @@ -969,8 +1083,9 @@ message ReviewDocumentRequest { Document inline_document = 4; } - // Required. The resource name of the HumanReviewConfig that the document will - // be reviewed with. + // Required. The resource name of the + // [HumanReviewConfig][google.cloud.documentai.v1beta3.HumanReviewConfig] that + // the document will be reviewed with. string human_review_config = 1 [ (google.api.field_behavior) = REQUIRED, (google.api.resource_reference) = { @@ -991,7 +1106,9 @@ message ReviewDocumentRequest { DocumentSchema document_schema = 6; } -// Response message for review document method. +// Response message for the +// [ReviewDocument][google.cloud.documentai.v1beta3.DocumentProcessorService.ReviewDocument] +// method. message ReviewDocumentResponse { // Possible states of the review operation. enum State { @@ -1016,9 +1133,11 @@ message ReviewDocumentResponse { string rejection_reason = 3; } -// The long running operation metadata for review document method. +// The long-running operation metadata for the +// [ReviewDocument][google.cloud.documentai.v1beta3.DocumentProcessorService.ReviewDocument] +// method. message ReviewDocumentOperationMetadata { - // State of the longrunning operation. + // State of the long-running operation. enum State { // Unspecified state. STATE_UNSPECIFIED = 0; @@ -1052,14 +1171,16 @@ message ReviewDocumentOperationMetadata { // The last update time of the operation. google.protobuf.Timestamp update_time = 4; - // The basic metadata of the long running operation. + // The basic metadata of the long-running operation. CommonOperationMetadata common_metadata = 5; // The Crowd Compute question ID. string question_id = 6; } -// Evaluates the given ProcessorVersion against the supplied documents. +// Evaluates the given +// [ProcessorVersion][google.cloud.documentai.v1beta3.ProcessorVersion] against +// the supplied documents. message EvaluateProcessorVersionRequest { // Required. The resource name of the // [ProcessorVersion][google.cloud.documentai.v1beta3.ProcessorVersion] to @@ -1078,13 +1199,17 @@ message EvaluateProcessorVersionRequest { [(google.api.field_behavior) = OPTIONAL]; } -// Metadata of the EvaluateProcessorVersion method. +// Metadata of the +// [EvaluateProcessorVersion][google.cloud.documentai.v1beta3.DocumentProcessorService.EvaluateProcessorVersion] +// method. message EvaluateProcessorVersionMetadata { - // The basic metadata of the long running operation. + // The basic metadata of the long-running operation. CommonOperationMetadata common_metadata = 1; } -// Metadata of the EvaluateProcessorVersion method. +// Response of the +// [EvaluateProcessorVersion][google.cloud.documentai.v1beta3.DocumentProcessorService.EvaluateProcessorVersion] +// method. message EvaluateProcessorVersionResponse { // The resource name of the created evaluation. string evaluation = 2; @@ -1103,7 +1228,8 @@ message GetEvaluationRequest { ]; } -// Retrieves a list of evaluations for a given ProcessorVersion. +// Retrieves a list of evaluations for a given +// [ProcessorVersion][google.cloud.documentai.v1beta3.ProcessorVersion]. message ListEvaluationsRequest { // Required. The resource name of the // [ProcessorVersion][google.cloud.documentai.v1beta3.ProcessorVersion] to @@ -1117,8 +1243,8 @@ message ListEvaluationsRequest { ]; // The standard list page size. - // If unspecified, at most 5 evaluations will be returned. - // The maximum value is 100; values above 100 will be coerced to 100. + // If unspecified, at most `5` evaluations are returned. + // The maximum value is `100`. Values above `100` are coerced to `100`. int32 page_size = 2; // A page token, received from a previous `ListEvaluations` call. @@ -1126,7 +1252,7 @@ message ListEvaluationsRequest { string page_token = 3; } -// The response from ListEvaluations. +// The response from `ListEvaluations`. message ListEvaluationsResponse { // The evaluations requested. repeated Evaluation evaluations = 1; @@ -1136,19 +1262,27 @@ message ListEvaluationsResponse { string next_page_token = 2; } -// The request message for the ImportProcessorVersion method. -// This method requires Document AI Service Agent of the destination project in -// the source project's IAM with [Document AI Editor -// role](https://cloud.google.com/document-ai/docs/access-control/iam-roles). +// The request message for the +// [ImportProcessorVersion][google.cloud.documentai.v1beta3.DocumentProcessorService.ImportProcessorVersion] +// method. Requirements: // -// The destination project is specified as part of the `parent` field. -// The source project is specified as part of `source` field. +// - The source processor version and destination processor +// must be in the same location. +// - The Document AI [Service +// Agent](https://cloud.google.com/iam/docs/service-agents) of the destination +// project must have [Document AI Editor +// role](https://cloud.google.com/document-ai/docs/access-control/iam-roles) on +// the source project. // -// The Service Agent for Document AI can be found in -// https://cloud.google.com/iam/docs/service-agents. +// The destination project is specified as part of the +// [parent][google.cloud.documentai.v1beta3.ImportProcessorVersionRequest.parent] +// field. The source project is specified as part of the +// [source][google.cloud.documentai.v1beta3.ImportProcessorVersionRequest.processor_version_source] +// field. message ImportProcessorVersionRequest { oneof source { - // The source processor version to import from. + // The source processor version to import from. The source processor version + // and destination processor need to be in the same environment and region. string processor_version_source = 2 [(google.api.resource_reference) = { type: "documentai.googleapis.com/ProcessorVersion" }]; @@ -1165,7 +1299,9 @@ message ImportProcessorVersionRequest { ]; } -// The response message for the ImportProcessorVersion method. +// The response message for the +// [ImportProcessorVersion][google.cloud.documentai.v1beta3.DocumentProcessorService.ImportProcessorVersion] +// method. message ImportProcessorVersionResponse { // The destination processor version name. string processor_version = 1 [(google.api.resource_reference) = { @@ -1173,9 +1309,10 @@ message ImportProcessorVersionResponse { }]; } -// The long running operation metadata for the ImportProcessorVersion +// The long-running operation metadata for the +// [ImportProcessorVersion][google.cloud.documentai.v1beta3.DocumentProcessorService.ImportProcessorVersion] // method. message ImportProcessorVersionMetadata { - // The basic metadata for the long running operation. + // The basic metadata for the long-running operation. CommonOperationMetadata common_metadata = 1; } diff --git a/google/cloud/documentai/v1beta3/document_schema.proto b/google/cloud/documentai/v1beta3/document_schema.proto index b792319da..b685be5f2 100644 --- a/google/cloud/documentai/v1beta3/document_schema.proto +++ b/google/cloud/documentai/v1beta3/document_schema.proto @@ -1,4 +1,4 @@ -// Copyright 2022 Google LLC +// Copyright 2023 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -24,6 +24,18 @@ option java_package = "com.google.cloud.documentai.v1beta3"; option php_namespace = "Google\\Cloud\\DocumentAI\\V1beta3"; option ruby_package = "Google::Cloud::DocumentAI::V1beta3"; +// Metadata about a property. +message PropertyMetadata { + // Whether the property should be considered as "inactive". + bool inactive = 3; +} + +// Metadata about an entity type. +message EntityTypeMetadata { + // Whether the entity type should be considered inactive. + bool inactive = 5; +} + // The schema defines the output of the processed document by a processor. message DocumentSchema { // EntityType is the wrapper of a label of the corresponding model with @@ -38,20 +50,28 @@ message DocumentSchema { // Defines properties that can be part of the entity type. message Property { - // Types of occurrences of the entity type in the document. Note: this - // represents the number of instances of an entity types, not number of - // mentions of a given entity instance. + // Types of occurrences of the entity type in the document. This + // represents the number of instances of instances of an entity, not + // number of mentions of an entity. For example, a bank statement may + // only have one `account_number`, but this account number may be + // mentioned in several places on the document. In this case the + // 'account_number' would be considered a `REQUIRED_ONCE` entity type. If, + // on the other hand, we expect a bank statement to contain the status of + // multiple different accounts for the customers, the occurrence type will + // be set to `REQUIRED_MULTIPLE`. enum OccurrenceType { // Unspecified occurrence type. OCCURRENCE_TYPE_UNSPECIFIED = 0; - // There will be zero or one instance of this entity type. + // There will be zero or one instance of this entity type. The same + // entity instance may be mentioned multiple times. OPTIONAL_ONCE = 1; // The entity type will appear zero or multiple times. OPTIONAL_MULTIPLE = 2; - // The entity type will only appear exactly once. + // The entity type will only appear exactly once. The same + // entity instance may be mentioned multiple times. REQUIRED_ONCE = 3; // The entity type will appear once or more times. @@ -69,6 +89,9 @@ message DocumentSchema { // Occurrence type limits the number of instances an entity type appears // in the document. OccurrenceType occurrence_type = 3; + + // Any additional metadata about the property can be added here. + PropertyMetadata property_metadata = 5; } oneof value_source { @@ -84,16 +107,15 @@ message DocumentSchema { string display_name = 13; // Name of the type. It must be unique within the schema file and - // cannot be a 'Common Type'. Besides that we use the following naming - // conventions: + // cannot be a "Common Type". The following naming conventions are used: // - // - *use `snake_casing`* - // - name matching is case-sensitive + // - Use `snake_casing`. + // - Name matching is case-sensitive. // - Maximum 64 characters. // - Must start with a letter. // - Allowed characters: ASCII letters `[a-z0-9_-]`. (For backward // compatibility internal infrastructure and tooling can handle any ascii - // character) + // character.) // - The `/` is sometimes used to denote a property of a type. For example // `line_item/amount`. This convention is deprecated, but will still be // honored for backward compatibility. @@ -103,14 +125,17 @@ message DocumentSchema { // one should be set. repeated string base_types = 2; - // Describing the nested structure, or composition of an entity. + // Description the nested structure, or composition of an entity. repeated Property properties = 6; + + // Metadata for the entity type. + EntityTypeMetadata entity_type_metadata = 11; } // Metadata for global schema behavior. message Metadata { - // If true, a `document` entity type can be applied to subdocument ( - // splitting). Otherwise, it can only be applied to the entire document + // If true, a `document` entity type can be applied to subdocument + // (splitting). Otherwise, it can only be applied to the entire document // (classification). bool document_splitter = 1; diff --git a/google/cloud/documentai/v1beta3/document_service.proto b/google/cloud/documentai/v1beta3/document_service.proto new file mode 100644 index 000000000..8dcf239e3 --- /dev/null +++ b/google/cloud/documentai/v1beta3/document_service.proto @@ -0,0 +1,112 @@ +// Copyright 2023 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +syntax = "proto3"; + +package google.cloud.documentai.v1beta3; + +import "google/api/annotations.proto"; +import "google/api/client.proto"; +import "google/api/field_behavior.proto"; +import "google/api/resource.proto"; +import "google/cloud/documentai/v1beta3/dataset.proto"; +import "google/cloud/documentai/v1beta3/operation_metadata.proto"; +import "google/longrunning/operations.proto"; +import "google/protobuf/field_mask.proto"; + +option csharp_namespace = "Google.Cloud.DocumentAI.V1Beta3"; +option go_package = "cloud.google.com/go/documentai/apiv1beta3/documentaipb;documentaipb"; +option java_multiple_files = true; +option java_outer_classname = "DocumentAiDocumentService"; +option java_package = "com.google.cloud.documentai.v1beta3"; +option php_namespace = "Google\\Cloud\\DocumentAI\\V1beta3"; +option ruby_package = "Google::Cloud::DocumentAI::V1beta3"; + +// Service to call Cloud DocumentAI to manage document collection (dataset). +service DocumentService { + option (google.api.default_host) = "documentai.googleapis.com"; + option (google.api.oauth_scopes) = + "https://www.googleapis.com/auth/cloud-platform"; + + // Updates metadata associated with a dataset. + rpc UpdateDataset(UpdateDatasetRequest) + returns (google.longrunning.Operation) { + option (google.api.http) = { + patch: "/v1beta3/{dataset.name=projects/*/locations/*/processors/*/dataset}" + body: "dataset" + }; + option (google.api.method_signature) = "dataset,update_mask"; + option (google.longrunning.operation_info) = { + response_type: "Dataset" + metadata_type: "UpdateDatasetOperationMetadata" + }; + } + + // Gets the `DatasetSchema` of a `Dataset`. + rpc GetDatasetSchema(GetDatasetSchemaRequest) returns (DatasetSchema) { + option (google.api.http) = { + get: "/v1beta3/{name=projects/*/locations/*/processors/*/dataset/datasetSchema}" + }; + option (google.api.method_signature) = "name"; + } + + // Updates a `DatasetSchema`. + rpc UpdateDatasetSchema(UpdateDatasetSchemaRequest) returns (DatasetSchema) { + option (google.api.http) = { + patch: "/v1beta3/{dataset_schema.name=projects/*/locations/*/processors/*/dataset/datasetSchema}" + body: "dataset_schema" + }; + option (google.api.method_signature) = "dataset_schema,update_mask"; + } +} + +message UpdateDatasetRequest { + // Required. The `name` field of the `Dataset` is used to identify the + // resource to be updated. + Dataset dataset = 1 [(google.api.field_behavior) = REQUIRED]; + + // The update mask applies to the resource. + google.protobuf.FieldMask update_mask = 2; +} + +message UpdateDatasetOperationMetadata { + // The basic metadata of the long running operation. + CommonOperationMetadata common_metadata = 1; +} + +// Request for `GetDatasetSchema`. +message GetDatasetSchemaRequest { + // Required. The dataset schema resource name. + // Format: + // projects/{project}/locations/{location}/processors/{processor}/dataset/datasetSchema + string name = 1 [ + (google.api.field_behavior) = REQUIRED, + (google.api.resource_reference) = { + type: "documentai.googleapis.com/DatasetSchema" + } + ]; + + // If set, only returns the visible fields of the schema. + bool visible_fields_only = 2; +} + +// Request for `UpdateDatasetSchema`. +message UpdateDatasetSchemaRequest { + // Required. The name field of the `DatasetSchema` is used to identify the + // resource to be updated. + DatasetSchema dataset_schema = 1 [(google.api.field_behavior) = REQUIRED]; + + // The update mask applies to the resource. + google.protobuf.FieldMask update_mask = 2; +} diff --git a/google/cloud/documentai/v1beta3/documentai_v1beta3.yaml b/google/cloud/documentai/v1beta3/documentai_v1beta3.yaml index 58deb84ef..39aa11ab6 100644 --- a/google/cloud/documentai/v1beta3/documentai_v1beta3.yaml +++ b/google/cloud/documentai/v1beta3/documentai_v1beta3.yaml @@ -5,12 +5,14 @@ title: Cloud Document AI API apis: - name: google.cloud.documentai.v1beta3.DocumentProcessorService +- name: google.cloud.documentai.v1beta3.DocumentService - name: google.cloud.location.Locations - name: google.longrunning.Operations types: - name: google.cloud.documentai.v1beta3.BatchProcessMetadata - name: google.cloud.documentai.v1beta3.BatchProcessResponse +- name: google.cloud.documentai.v1beta3.Dataset - name: google.cloud.documentai.v1beta3.DeleteProcessorMetadata - name: google.cloud.documentai.v1beta3.DeleteProcessorVersionMetadata - name: google.cloud.documentai.v1beta3.DeployProcessorVersionMetadata @@ -31,6 +33,7 @@ types: - name: google.cloud.documentai.v1beta3.TrainProcessorVersionResponse - name: google.cloud.documentai.v1beta3.UndeployProcessorVersionMetadata - name: google.cloud.documentai.v1beta3.UndeployProcessorVersionResponse +- name: google.cloud.documentai.v1beta3.UpdateDatasetOperationMetadata documentation: summary: |- @@ -73,6 +76,10 @@ authentication: oauth: canonical_scopes: |- https://www.googleapis.com/auth/cloud-platform + - selector: 'google.cloud.documentai.v1beta3.DocumentService.*' + oauth: + canonical_scopes: |- + https://www.googleapis.com/auth/cloud-platform - selector: google.cloud.location.Locations.GetLocation oauth: canonical_scopes: |- diff --git a/google/cloud/documentai/v1beta3/documentai_v1beta3_grpc_service_config.json b/google/cloud/documentai/v1beta3/documentai_v1beta3_grpc_service_config.json index ad5e39478..e002f3b34 100644 --- a/google/cloud/documentai/v1beta3/documentai_v1beta3_grpc_service_config.json +++ b/google/cloud/documentai/v1beta3/documentai_v1beta3_grpc_service_config.json @@ -5,7 +5,21 @@ { "service": "google.cloud.documentai.v1beta3.DocumentProcessorService", "method": "ProcessDocument" - }, + } + ], + "timeout": "300s", + "retryPolicy": { + "initialBackoff": "0.100s", + "maxBackoff": "60s", + "backoffMultiplier": 1.3, + "retryableStatusCodes": [ + "DEADLINE_EXCEEDED", + "UNAVAILABLE" + ] + } + }, + { + "name": [ { "service": "google.cloud.documentai.v1beta3.DocumentProcessorService", "method": "BatchProcessDocuments" diff --git a/google/cloud/documentai/v1beta3/evaluation.proto b/google/cloud/documentai/v1beta3/evaluation.proto index e5087c818..4de7046b6 100644 --- a/google/cloud/documentai/v1beta3/evaluation.proto +++ b/google/cloud/documentai/v1beta3/evaluation.proto @@ -1,4 +1,4 @@ -// Copyright 2022 Google LLC +// Copyright 2023 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/google/cloud/documentai/v1beta3/geometry.proto b/google/cloud/documentai/v1beta3/geometry.proto index 05eeac9c4..da949fb19 100644 --- a/google/cloud/documentai/v1beta3/geometry.proto +++ b/google/cloud/documentai/v1beta3/geometry.proto @@ -1,4 +1,4 @@ -// Copyright 2022 Google LLC +// Copyright 2023 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/google/cloud/documentai/v1beta3/operation_metadata.proto b/google/cloud/documentai/v1beta3/operation_metadata.proto index e8f757a54..509109aba 100644 --- a/google/cloud/documentai/v1beta3/operation_metadata.proto +++ b/google/cloud/documentai/v1beta3/operation_metadata.proto @@ -1,4 +1,4 @@ -// Copyright 2022 Google LLC +// Copyright 2023 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/google/cloud/documentai/v1beta3/processor.proto b/google/cloud/documentai/v1beta3/processor.proto index 7de236e1f..bafcdc0f4 100644 --- a/google/cloud/documentai/v1beta3/processor.proto +++ b/google/cloud/documentai/v1beta3/processor.proto @@ -1,4 +1,4 @@ -// Copyright 2022 Google LLC +// Copyright 2023 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -31,10 +31,9 @@ option php_namespace = "Google\\Cloud\\DocumentAI\\V1beta3"; option ruby_package = "Google::Cloud::DocumentAI::V1beta3"; // A processor version is an implementation of a processor. Each processor -// can have multiple versions, pre-trained by Google internally or up-trained -// by the customer. At a time, a processor can only have one default version -// version. So the processor's behavior (when processing documents) is defined -// by a default version +// can have multiple versions, pretrained by Google internally or uptrained +// by the customer. A processor can only have one default version at a time. +// Its document-processing behavior is defined by that version. message ProcessorVersion { option (google.api.resource) = { type: "documentai.googleapis.com/ProcessorVersion" @@ -78,6 +77,9 @@ message ProcessorVersion { // The processor version failed and is in an indeterminate state. FAILED = 7; + + // The processor version is being imported. + IMPORTING = 8; } // The resource name of the processor version. @@ -106,7 +108,7 @@ message ProcessorVersion { // The KMS key version with which data is encrypted. string kms_key_version_name = 10; - // Denotes that this ProcessorVersion is managed by google. + // Denotes that this `ProcessorVersion` is managed by Google. bool google_managed = 11; // If set, information about the eventual deprecation of this version. @@ -163,8 +165,8 @@ message Processor { (google.api.field_behavior) = OUTPUT_ONLY ]; - // The processor type, e.g., `OCR_PROCESSOR`, `INVOICE_PROCESSOR`, etc. - // To get a list of processors types, see + // The processor type, such as: `OCR_PROCESSOR`, `INVOICE_PROCESSOR`. + // To get a list of processor types, see // [FetchProcessorTypes][google.cloud.documentai.v1beta3.DocumentProcessorService.FetchProcessorTypes]. string type = 2; @@ -189,7 +191,7 @@ message Processor { // The time the processor was created. google.protobuf.Timestamp create_time = 7; - // The KMS key used for encryption/decryption in CMEK scenarios. - // See https://cloud.google.com/security-key-management. + // The [KMS key](https://cloud.google.com/security-key-management) used for + // encryption and decryption in CMEK scenarios. string kms_key_name = 8; } diff --git a/google/cloud/documentai/v1beta3/processor_type.proto b/google/cloud/documentai/v1beta3/processor_type.proto index 0ff23ffaf..2840eb085 100644 --- a/google/cloud/documentai/v1beta3/processor_type.proto +++ b/google/cloud/documentai/v1beta3/processor_type.proto @@ -1,4 +1,4 @@ -// Copyright 2022 Google LLC +// Copyright 2023 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -37,7 +37,8 @@ message ProcessorType { // The location information about where the processor is available. message LocationInfo { - // The location id, currently must be one of [us, eu]. + // The location ID. For supported locations, refer to [regional and + // multi-regional support](/document-ai/docs/regions). string location_id = 1; } @@ -45,7 +46,7 @@ message ProcessorType { // Format: `projects/{project}/processorTypes/{processor_type}` string name = 1; - // The processor type, e.g., `OCR_PROCESSOR`, `INVOICE_PROCESSOR`, etc. + // The processor type, such as: `OCR_PROCESSOR`, `INVOICE_PROCESSOR`. string type = 2; // The processor category, used by UI to group processor types.