feat: add StyleInfo to document.proto

feat: add REPLACE enum to OperationType in document.proto
feat: add PropertyMetadata and EntityTypeMetadata to document_schema.proto
feat: add IMPORTING enum to State in processor.proto
chore: updated comments

PiperOrigin-RevId: 540932243
gce-action-fix
Google APIs 2 years ago committed by Copybara-Service
parent 2d7af51207
commit 120a89c6c7
  1. 12
      google/cloud/documentai/v1beta3/BUILD.bazel
  2. 2
      google/cloud/documentai/v1beta3/barcode.proto
  3. 138
      google/cloud/documentai/v1beta3/dataset.proto
  4. 118
      google/cloud/documentai/v1beta3/document.proto
  5. 13
      google/cloud/documentai/v1beta3/document_io.proto
  6. 397
      google/cloud/documentai/v1beta3/document_processor_service.proto
  7. 53
      google/cloud/documentai/v1beta3/document_schema.proto
  8. 112
      google/cloud/documentai/v1beta3/document_service.proto
  9. 7
      google/cloud/documentai/v1beta3/documentai_v1beta3.yaml
  10. 16
      google/cloud/documentai/v1beta3/documentai_v1beta3_grpc_service_config.json
  11. 2
      google/cloud/documentai/v1beta3/evaluation.proto
  12. 2
      google/cloud/documentai/v1beta3/geometry.proto
  13. 2
      google/cloud/documentai/v1beta3/operation_metadata.proto
  14. 22
      google/cloud/documentai/v1beta3/processor.proto
  15. 7
      google/cloud/documentai/v1beta3/processor_type.proto

@ -22,10 +22,12 @@ proto_library(
name = "documentai_proto",
srcs = [
"barcode.proto",
"dataset.proto",
"document.proto",
"document_io.proto",
"document_processor_service.proto",
"document_schema.proto",
"document_service.proto",
"evaluation.proto",
"geometry.proto",
"operation_metadata.proto",
@ -91,8 +93,8 @@ java_gapic_library(
rest_numeric_enums = True,
service_yaml = "documentai_v1beta3.yaml",
test_deps = [
":documentai_java_grpc",
"//google/cloud/location:location_java_grpc",
":documentai_java_grpc",
],
transport = "grpc+rest",
deps = [
@ -107,6 +109,8 @@ java_gapic_test(
test_classes = [
"com.google.cloud.documentai.v1beta3.DocumentProcessorServiceClientHttpJsonTest",
"com.google.cloud.documentai.v1beta3.DocumentProcessorServiceClientTest",
"com.google.cloud.documentai.v1beta3.DocumentServiceClientHttpJsonTest",
"com.google.cloud.documentai.v1beta3.DocumentServiceClientTest",
],
runtime_deps = [":documentai_java_gapic_test"],
)
@ -242,10 +246,13 @@ php_gapic_library(
name = "documentai_php_gapic",
srcs = [":documentai_proto_with_info"],
grpc_service_config = "documentai_v1beta3_grpc_service_config.json",
migration_mode = "PRE_MIGRATION_SURFACE_ONLY",
rest_numeric_enums = True,
service_yaml = "documentai_v1beta3.yaml",
transport = "grpc+rest",
deps = [":documentai_php_proto"],
deps = [
":documentai_php_proto",
],
)
# Open Source Packages
@ -355,6 +362,7 @@ load(
csharp_proto_library(
name = "documentai_csharp_proto",
extra_opts = [""],
deps = [":documentai_proto"],
)

@ -1,4 +1,4 @@
// Copyright 2022 Google LLC
// Copyright 2023 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.

@ -0,0 +1,138 @@
// Copyright 2023 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto3";
package google.cloud.documentai.v1beta3;
import "google/api/field_behavior.proto";
import "google/api/resource.proto";
import "google/cloud/documentai/v1beta3/document_io.proto";
import "google/cloud/documentai/v1beta3/document_schema.proto";
option csharp_namespace = "Google.Cloud.DocumentAI.V1Beta3";
option go_package = "cloud.google.com/go/documentai/apiv1beta3/documentaipb;documentaipb";
option java_multiple_files = true;
option java_outer_classname = "DatasetProto";
option java_package = "com.google.cloud.documentai.v1beta3";
option php_namespace = "Google\\Cloud\\DocumentAI\\V1beta3";
option ruby_package = "Google::Cloud::DocumentAI::V1beta3";
option (google.api.resource_definition) = {
type: "contentwarehouse.googleapis.com/Schema"
pattern: "projects/{project}/locations/{location}/schemas/{schema}"
};
// A singleton resource under a
// [Processor][google.cloud.documentai.v1beta3.Processor] which configures a
// collection of documents.
message Dataset {
option (google.api.resource) = {
type: "documentai.googleapis.com/Dataset"
pattern: "projects/{project}/locations/{location}/processors/{processor}/dataset"
};
// Configuration specific to the Cloud Storage-based implementation.
message GCSManagedConfig {
// Required. The Cloud Storage URI (a directory) where the documents
// belonging to the dataset must be stored.
GcsPrefix gcs_prefix = 1 [(google.api.field_behavior) = REQUIRED];
}
// Configuration specific to the Document AI Warehouse-based implementation.
message DocumentWarehouseConfig {
// Output only. The collection in Document AI Warehouse associated with the
// dataset.
string collection = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
// Output only. The schema in Document AI Warehouse associated with the
// dataset.
string schema = 2 [
(google.api.field_behavior) = OUTPUT_ONLY,
(google.api.resource_reference) = {
type: "contentwarehouse.googleapis.com/Schema"
}
];
}
// Configuration specific to an unmanaged dataset.
message UnmanagedDatasetConfig {}
// Configuration specific to spanner-based indexing.
message SpannerIndexingConfig {}
// Different states of a dataset.
enum State {
// Default unspecified enum, should not be used.
STATE_UNSPECIFIED = 0;
// Dataset has not been initialized.
UNINITIALIZED = 1;
// Dataset is being initialized.
INITIALIZING = 2;
// Dataset has been initialized.
INITIALIZED = 3;
}
oneof storage_source {
// Optional. User-managed Cloud Storage dataset configuration. Use this
// configuration if the dataset documents are stored under a user-managed
// Cloud Storage location.
GCSManagedConfig gcs_managed_config = 3
[(google.api.field_behavior) = OPTIONAL];
// Optional. Document AI Warehouse-based dataset configuration.
DocumentWarehouseConfig document_warehouse_config = 5
[(google.api.field_behavior) = OPTIONAL];
// Optional. Unmanaged dataset configuration. Use this configuration if the
// dataset documents are managed by the document service internally (not
// user-managed).
UnmanagedDatasetConfig unmanaged_dataset_config = 6
[(google.api.field_behavior) = OPTIONAL];
}
oneof indexing_source {
// Optional. A lightweight indexing source with low latency and high
// reliability, but lacking advanced features like CMEK and content-based
// search.
SpannerIndexingConfig spanner_indexing_config = 4
[(google.api.field_behavior) = OPTIONAL];
}
// Dataset resource name.
// Format:
// `projects/{project}/locations/{location}/processors/{processor}/dataset`
string name = 1;
// Required. State of the dataset. Ignored when updating dataset.
State state = 2 [(google.api.field_behavior) = REQUIRED];
}
// Dataset Schema.
message DatasetSchema {
option (google.api.resource) = {
type: "documentai.googleapis.com/DatasetSchema"
pattern: "projects/{project}/locations/{location}/processors/{processor}/dataset/datasetSchema"
};
// Dataset schema resource name.
// Format:
// `projects/{project}/locations/{location}/processors/{processor}/dataset/datasetSchema`
string name = 1;
// Optional. Schema of the dataset.
DocumentSchema document_schema = 3 [(google.api.field_behavior) = OPTIONAL];
}

@ -1,4 +1,4 @@
// Copyright 2022 Google LLC
// Copyright 2023 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
@ -64,7 +64,8 @@ message Document {
// Font size for the text.
float size = 1;
// Unit for the font size. Follows CSS naming (in, px, pt, etc.).
// Unit for the font size. Follows CSS naming (such as `in`, `px`, and
// `pt`).
string unit = 2;
}
@ -78,17 +79,18 @@ message Document {
// Text background color.
google.type.Color background_color = 3;
// Font weight. Possible values are normal, bold, bolder, and lighter.
// https://www.w3schools.com/cssref/pr_font_weight.asp
// [Font weight](https://www.w3schools.com/cssref/pr_font_weight.asp).
// Possible values are `normal`, `bold`, `bolder`, and `lighter`.
string font_weight = 4;
// Text style. Possible values are normal, italic, and oblique.
// https://www.w3schools.com/cssref/pr_font_font-style.asp
// [Text style](https://www.w3schools.com/cssref/pr_font_font-style.asp).
// Possible values are `normal`, `italic`, and `oblique`.
string text_style = 5;
// Text decoration. Follows CSS standard.
// <text-decoration-line> <text-decoration-color> <text-decoration-style>
// https://www.w3schools.com/cssref/pr_text_text-decoration.asp
// [Text
// decoration](https://www.w3schools.com/cssref/pr_text_text-decoration.asp).
// Follows CSS standard. <text-decoration-line> <text-decoration-color>
// <text-decoration-style>
string text_decoration = 6;
// Font size.
@ -118,7 +120,9 @@ message Document {
// Raw byte content of the image.
bytes content = 1;
// Encoding mime type for the image.
// Encoding [media type (MIME
// type)](https://www.iana.org/assignments/media-types/media-types.xhtml)
// for the image.
string mime_type = 2;
// Width of the image in pixels.
@ -255,6 +259,59 @@ message Document {
Type type = 1;
}
// Font and other text style attributes.
message StyleInfo {
// Font size in points (`1` point is `¹` inches).
int32 font_size = 1;
// Font size in pixels, equal to _unrounded
// [font_size][google.cloud.documentai.v1beta3.Document.Page.Token.StyleInfo.font_size]_
// * _resolution_ ÷ `72.0`.
double pixel_font_size = 2;
// Letter spacing in points.
double letter_spacing = 3;
// Name or style of the font.
string font_type = 4;
// Whether the text is bold (equivalent to
// [font_weight][google.cloud.documentai.v1beta3.Document.Page.Token.StyleInfo.font_weight]
// is at least `700`).
bool bold = 5;
// Whether the text is italic.
bool italic = 6;
// Whether the text is underlined.
bool underlined = 7;
// Whether the text is strikethrough.
bool strikeout = 8;
// Whether the text is a subscript.
bool subscript = 9;
// Whether the text is a superscript.
bool superscript = 10;
// Whether the text is in small caps.
bool smallcaps = 11;
// TrueType weight on a scale `100` (thin) to `1000` (ultra-heavy).
// Normal is `400`, bold is `700`.
int32 font_weight = 12;
// Whether the text is handwritten.
bool handwritten = 13;
// Color of the text.
google.type.Color text_color = 14;
// Color of the background.
google.type.Color background_color = 15;
}
// [Layout][google.cloud.documentai.v1beta3.Document.Page.Layout] for
// [Token][google.cloud.documentai.v1beta3.Document.Page.Token].
Layout layout = 1;
@ -268,6 +325,9 @@ message Document {
// The history of this annotation.
Provenance provenance = 4 [deprecated = true];
// Text style attributes.
StyleInfo style_info = 5;
}
// A detected symbol.
@ -333,7 +393,7 @@ message Document {
repeated DetectedLanguage detected_languages = 4;
// The history of this table.
Provenance provenance = 5;
Provenance provenance = 5 [deprecated = true];
}
// A form field detected on the page.
@ -389,16 +449,16 @@ message Document {
// Detected language for a structural component.
message DetectedLanguage {
// The BCP-47 language code, such as `en-US` or `sr-Latn`. For more
// information, see
// https://www.unicode.org/reports/tr35/#Unicode_locale_identifier.
// The [BCP-47 language
// code](https://www.unicode.org/reports/tr35/#Unicode_locale_identifier),
// such as `en-US` or `sr-Latn`.
string language_code = 1;
// Confidence of detected language. Range `[0, 1]`.
float confidence = 2;
}
// Image Quality Scores for the page image
// Image quality scores for the page image.
message ImageQualityScores {
// Image Quality Defects
message DetectedDefect {
@ -414,12 +474,12 @@ message Document {
// - `quality/defect_glare`
string type = 1;
// Confidence of detected defect. Range `[0, 1]` where 1 indicates
// strong confidence of that the defect exists.
// Confidence of detected defect. Range `[0, 1]` where `1` indicates
// strong confidence that the defect exists.
float confidence = 2;
}
// The overall quality score. Range `[0, 1]` where 1 is perfect quality.
// The overall quality score. Range `[0, 1]` where `1` is perfect quality.
float quality_score = 1;
// A list of detected defects.
@ -485,7 +545,7 @@ message Document {
// A list of detected barcodes.
repeated DetectedBarcode detected_barcodes = 15;
// Image Quality Scores.
// Image quality scores.
ImageQualityScores image_quality_scores = 17;
// The history of this page.
@ -740,13 +800,13 @@ message Document {
REMOVE = 2;
// Updates any fields within the given provenance scope of the message. It
// 'overwrites' the fields rather than replacing them. This is
// especially relevant when we just want to update a field value of an
// entity without also affecting all the child properties.
// overwrites the fields rather than replacing them. Use this when you
// want to update a field value of an entity without also updating all the
// child properties.
UPDATE = 7;
// Currently unused. Replace an element identified by `parent`.
REPLACE = 3 [deprecated = true];
REPLACE = 3;
// Deprecated. Request human review for the element identified by
// `parent`.
@ -839,10 +899,9 @@ message Document {
// Original source document from the user.
oneof source {
// Optional. Currently supports Google Cloud Storage URI of the form
// `gs://bucket_name/object_name`. Object versioning is not supported.
// See [Google Cloud Storage Request
// URIs](https://cloud.google.com/storage/docs/reference-uris) for more
// info.
// `gs://bucket_name/object_name`. Object versioning is not supported.
// For more information, refer to [Google Cloud Storage Request
// URIs](https://cloud.google.com/storage/docs/reference-uris).
string uri = 1 [(google.api.field_behavior) = OPTIONAL];
// Optional. Inline document content, represented as a stream of bytes.
@ -851,9 +910,8 @@ message Document {
bytes content = 2 [(google.api.field_behavior) = OPTIONAL];
}
// An IANA published MIME type (also referred to as media type). For more
// information, see
// https://www.iana.org/assignments/media-types/media-types.xhtml.
// An IANA published [media type (MIME
// type)](https://www.iana.org/assignments/media-types/media-types.xhtml).
string mime_type = 3;
// Optional. UTF-8 encoded text in reading order from the document.

@ -1,4 +1,4 @@
// Copyright 2022 Google LLC
// Copyright 2023 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
@ -127,9 +127,18 @@ message OcrConfig {
// Adds additional latency comparable to regular OCR to the process call.
bool enable_image_quality_scores = 4;
// A list of advanced OCR options to further fine-tune OCR behavior.
// A list of advanced OCR options to further fine-tune OCR behavior. Current
// valid values are:
//
// - `legacy_layout`: a heuristics layout detection algorithm, which serves as
// an alternative to the current ML-based layout detection algorithm.
// Customers can choose the best suitable layout algorithm based on their
// situation.
repeated string advanced_ocr_options = 5;
// Includes symbol level OCR information if set to true.
bool enable_symbol = 6;
// Turn on font id model and returns font style information.
bool compute_style_info = 8;
}

@ -1,4 +1,4 @@
// Copyright 2022 Google LLC
// Copyright 2023 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
@ -28,6 +28,7 @@ import "google/cloud/documentai/v1beta3/operation_metadata.proto";
import "google/cloud/documentai/v1beta3/processor.proto";
import "google/cloud/documentai/v1beta3/processor_type.proto";
import "google/longrunning/operations.proto";
import "google/protobuf/empty.proto";
import "google/protobuf/field_mask.proto";
import "google/protobuf/timestamp.proto";
import "google/rpc/status.proto";
@ -48,7 +49,7 @@ option (google.api.resource_definition) = {
pattern: "projects/{project}/locations/{location}"
};
// Service to call Cloud DocumentAI to process documents according to the
// Service to call Document AI to process documents according to the
// processor's definition. Processors are built using state-of-the-art Google
// AI such as natural language, computer vision, and translation to extract
// structured information from unstructured or semi-structured documents.
@ -89,8 +90,9 @@ service DocumentProcessorService {
};
}
// Fetches processor types. Note that we do not use ListProcessorTypes here
// because it is not paginated.
// Fetches processor types. Note that we don't use
// [ListProcessorTypes][google.cloud.documentai.v1beta3.DocumentProcessorService.ListProcessorTypes]
// here, because it isn't paginated.
rpc FetchProcessorTypes(FetchProcessorTypesRequest)
returns (FetchProcessorTypesResponse) {
option (google.api.http) = {
@ -134,7 +136,7 @@ service DocumentProcessorService {
// Trains a new processor version.
// Operation metadata is returned as
// cloud_documentai_core.TrainProcessorVersionMetadata.
// [TrainProcessorVersionMetadata][google.cloud.documentai.v1beta3.TrainProcessorVersionMetadata].
rpc TrainProcessorVersion(TrainProcessorVersionRequest)
returns (google.longrunning.Operation) {
option (google.api.http) = {
@ -208,8 +210,9 @@ service DocumentProcessorService {
};
}
// Creates a processor from the type processor that the user chose.
// The processor will be at "ENABLED" state by default after its creation.
// Creates a processor from the
// [ProcessorType][google.cloud.documentai.v1beta3.ProcessorType] provided.
// The processor will be at `ENABLED` state by default after its creation.
rpc CreateProcessor(CreateProcessorRequest) returns (Processor) {
option (google.api.http) = {
post: "/v1beta3/{parent=projects/*/locations/*}/processors"
@ -339,12 +342,14 @@ service DocumentProcessorService {
// Options for Process API
message ProcessOptions {
// Only applicable to "Document OCR Processor". Returns error if set on other
// Only applicable to `OCR_PROCESSOR`. Returns error if set on other
// processor types.
OcrConfig ocr_config = 1;
}
// Request message for the process document method.
// Request message for the
// [ProcessDocument][google.cloud.documentai.v1beta3.DocumentProcessorService.ProcessDocument]
// method.
message ProcessRequest {
// The document payload.
oneof source {
@ -370,16 +375,20 @@ message ProcessRequest {
(google.api.resource_reference) = { type: "*" }
];
// The document payload, the [content] and [mime_type] fields must be set.
// The document payload, the
// [content][google.cloud.documentai.v1beta3.Document.content] and
// [mime_type][google.cloud.documentai.v1beta3.Document.mime_type] fields must
// be set.
Document document = 2 [deprecated = true];
// Whether Human Review feature should be skipped for this request. Default to
// false.
// Whether human review should be skipped for this request. Default to
// `false`.
bool skip_human_review = 3;
// Specifies which fields to include in ProcessResponse's document.
// Only supports top level document and pages field so it must be in the form
// of `{document_field_name}` or `pages.{page_field_name}`.
// Specifies which fields to include in the
// [ProcessResponse.document][google.cloud.documentai.v1beta3.ProcessResponse.document]
// output. Only supports top-level document and pages field, so it must be in
// the form of `{document_field_name}` or `pages.{page_field_name}`.
google.protobuf.FieldMask field_mask = 6;
// Inference-time options for the process API
@ -394,7 +403,7 @@ message HumanReviewStatus {
STATE_UNSPECIFIED = 0;
// Human review is skipped for the document. This can happen because human
// review is not enabled on the processor or the processing request has
// review isn't enabled on the processor or the processing request has
// been set to skip this document.
SKIPPED = 1;
@ -405,7 +414,8 @@ message HumanReviewStatus {
IN_PROGRESS = 3;
// Some error happened during triggering human review, see the
// [state_message] for details.
// [state_message][google.cloud.documentai.v1beta3.HumanReviewStatus.state_message]
// for details.
ERROR = 4;
}
@ -416,29 +426,34 @@ message HumanReviewStatus {
string state_message = 2;
// The name of the operation triggered by the processed document. This field
// is populated only when the [state] is [HUMAN_REVIEW_IN_PROGRESS]. It has
// the same response type and metadata as the long running operation returned
// by [ReviewDocument] method.
// is populated only when the
// [state][google.cloud.documentai.v1beta3.HumanReviewStatus.state] is
// `HUMAN_REVIEW_IN_PROGRESS`. It has the same response type and metadata as
// the long-running operation returned by
// [ReviewDocument][google.cloud.documentai.v1beta3.DocumentProcessorService.ReviewDocument].
string human_review_operation = 3;
}
// Response message for the process document method.
// Response message for the
// [ProcessDocument][google.cloud.documentai.v1beta3.DocumentProcessorService.ProcessDocument]
// method.
message ProcessResponse {
// The document payload, will populate fields based on the processor's
// behavior.
Document document = 1;
// The name of the operation triggered by the processed document. If the human
// review process is not triggered, this field will be empty. It has the same
// response type and metadata as the long running operation returned by
// ReviewDocument method.
// review process isn't triggered, this field is empty. It has the same
// response type and metadata as the long-running operation returned by
// [ReviewDocument][google.cloud.documentai.v1beta3.DocumentProcessorService.ReviewDocument].
string human_review_operation = 2 [deprecated = true];
// The status of human review on the processed document.
HumanReviewStatus human_review_status = 3;
}
// Request message for batch process document method.
// Request message for
// [BatchProcessDocuments][google.cloud.documentai.v1beta3.DocumentProcessorService.BatchProcessDocuments].
message BatchProcessRequest {
// The message for input config in batch process.
message BatchInputConfig {
@ -447,13 +462,19 @@ message BatchProcessRequest {
// The Cloud Storage location as the source of the document.
string gcs_source = 1;
// Mimetype of the input. If the input is a raw document, the supported
// mimetypes are application/pdf, image/tiff, and image/gif.
// If the input is a [Document] proto, the type should be application/json.
// An IANA published [media type (MIME
// type)](https://www.iana.org/assignments/media-types/media-types.xhtml) of
// the input. If the input is a raw document, refer to [supported file
// types](https://cloud.google.com/document-ai/docs/file-types) for the list
// of media types. If the input is a
// [Document][google.cloud.documentai.v1beta3.Document], the type should be
// `application/json`.
string mime_type = 2;
}
// The message for output config in batch process.
// The output configuration in the
// [BatchProcessDocuments][google.cloud.documentai.v1beta3.DocumentProcessorService.BatchProcessDocuments]
// method.
message BatchOutputConfig {
option deprecated = true;
@ -478,44 +499,52 @@ message BatchProcessRequest {
// The overall output config for batch process.
BatchOutputConfig output_config = 3 [deprecated = true];
// The input documents for batch process.
// The input documents for the
// [BatchProcessDocuments][google.cloud.documentai.v1beta3.DocumentProcessorService.BatchProcessDocuments]
// method.
BatchDocumentsInputConfig input_documents = 5;
// The overall output config for batch process.
// The output configuration for the
// [BatchProcessDocuments][google.cloud.documentai.v1beta3.DocumentProcessorService.BatchProcessDocuments]
// method.
DocumentOutputConfig document_output_config = 6;
// Whether Human Review feature should be skipped for this request. Default to
// false.
// Whether human review should be skipped for this request. Default to
// `false`.
bool skip_human_review = 4;
// Inference-time options for the process API
ProcessOptions process_options = 7;
}
// Response message for batch process document method.
// Response message for
// [BatchProcessDocuments][google.cloud.documentai.v1beta3.DocumentProcessorService.BatchProcessDocuments].
message BatchProcessResponse {}
// The long running operation metadata for batch process method.
// The long-running operation metadata for
// [BatchProcessDocuments][google.cloud.documentai.v1beta3.DocumentProcessorService.BatchProcessDocuments].
message BatchProcessMetadata {
// The status of a each individual document in the batch process.
message IndividualProcessStatus {
// The source of the document, same as the [input_gcs_source] field in the
// request when the batch process started. The batch process is started by
// take snapshot of that document, since a user can move or change that
// document during the process.
// The source of the document, same as the
// [input_gcs_source][google.cloud.documentai.v1beta3.BatchProcessMetadata.IndividualProcessStatus.input_gcs_source]
// field in the request when the batch process started.
string input_gcs_source = 1;
// The status processing the document.
google.rpc.Status status = 2;
// The output_gcs_destination (in the request as `output_gcs_destination`)
// The Cloud Storage output destination (in the request as
// [DocumentOutputConfig.GcsOutputConfig.gcs_uri][google.cloud.documentai.v1beta3.DocumentOutputConfig.GcsOutputConfig.gcs_uri])
// of the processed document if it was successful, otherwise empty.
string output_gcs_destination = 3;
// The name of the operation triggered by the processed document. If the
// human review process is not triggered, this field will be empty. It has
// the same response type and metadata as the long running operation
// returned by ReviewDocument method.
// human review process isn't triggered, this field will be empty. It has
// the same response type and metadata as the long-running operation
// returned by the
// [ReviewDocument][google.cloud.documentai.v1beta3.DocumentProcessorService.ReviewDocument]
// method.
string human_review_operation = 4 [deprecated = true];
// The status of human review on the processed document.
@ -563,11 +592,13 @@ message BatchProcessMetadata {
repeated IndividualProcessStatus individual_process_statuses = 5;
}
// Request message for fetch processor types.
// Request message for the
// [FetchProcessorTypes][google.cloud.documentai.v1beta3.DocumentProcessorService.FetchProcessorTypes]
// method. Some processor types may require the project be added to an
// allowlist.
message FetchProcessorTypesRequest {
// Required. The project of processor type to list.
// The available processor types may depend on the allow-listing on projects.
// Format: `projects/{project}/locations/{location}`
// Required. The location of processor types to list.
// Format: `projects/{project}/locations/{location}`.
string parent = 1 [
(google.api.field_behavior) = REQUIRED,
(google.api.resource_reference) = {
@ -576,17 +607,21 @@ message FetchProcessorTypesRequest {
];
}
// Response message for fetch processor types.
// Response message for the
// [FetchProcessorTypes][google.cloud.documentai.v1beta3.DocumentProcessorService.FetchProcessorTypes]
// method.
message FetchProcessorTypesResponse {
// The list of processor types.
repeated ProcessorType processor_types = 1;
}
// Request message for list processor types.
// Request message for the
// [ListProcessorTypes][google.cloud.documentai.v1beta3.DocumentProcessorService.ListProcessorTypes]
// method. Some processor types may require the project be added to an
// allowlist.
message ListProcessorTypesRequest {
// Required. The location of processor type to list.
// The available processor types may depend on the allow-listing on projects.
// Format: `projects/{project}/locations/{location}`
// Required. The location of processor types to list.
// Format: `projects/{project}/locations/{location}`.
string parent = 1 [
(google.api.field_behavior) = REQUIRED,
(google.api.resource_reference) = {
@ -595,15 +630,17 @@ message ListProcessorTypesRequest {
];
// The maximum number of processor types to return.
// If unspecified, at most 100 processor types will be returned.
// The maximum value is 500; values above 500 will be coerced to 500.
// If unspecified, at most `100` processor types will be returned.
// The maximum value is `500`. Values above `500` will be coerced to `500`.
int32 page_size = 2;
// Used to retrieve the next page of results, empty if at the end of the list.
string page_token = 3;
}
// Response message for list processor types.
// Response message for the
// [ListProcessorTypes][google.cloud.documentai.v1beta3.DocumentProcessorService.ListProcessorTypes]
// method.
message ListProcessorTypesResponse {
// The processor types.
repeated ProcessorType processor_types = 1;
@ -624,8 +661,8 @@ message ListProcessorsRequest {
];
// The maximum number of processors to return.
// If unspecified, at most 50 processors will be returned.
// The maximum value is 100; values above 100 will be coerced to 100.
// If unspecified, at most `50` processors will be returned.
// The maximum value is `100`. Values above `100` will be coerced to `100`.
int32 page_size = 2;
// We will return the processors sorted by creation time. The page token
@ -633,7 +670,9 @@ message ListProcessorsRequest {
string page_token = 3;
}
// Response message for list processors.
// Response message for the
// [ListProcessors][google.cloud.documentai.v1beta3.DocumentProcessorService.ListProcessors]
// method.
message ListProcessorsResponse {
// The list of processors.
repeated Processor processors = 1;
@ -642,7 +681,9 @@ message ListProcessorsResponse {
string next_page_token = 2;
}
// Request message for get processor.
// Request message for the
// [GetProcessorType][google.cloud.documentai.v1beta3.DocumentProcessorService.GetProcessorType]
// method.
message GetProcessorTypeRequest {
// Required. The processor type resource name.
string name = 1 [
@ -653,7 +694,9 @@ message GetProcessorTypeRequest {
];
}
// Request message for get processor.
// Request message for the
// [GetProcessor][google.cloud.documentai.v1beta3.DocumentProcessorService.GetProcessor]
// method.
message GetProcessorRequest {
// Required. The processor resource name.
string name = 1 [
@ -664,7 +707,9 @@ message GetProcessorRequest {
];
}
// Request message for get processor version.
// Request message for the
// [GetProcessorVersion][google.cloud.documentai.v1beta3.DocumentProcessorService.GetProcessorVersion]
// method.
message GetProcessorVersionRequest {
// Required. The processor resource name.
string name = 1 [
@ -688,8 +733,8 @@ message ListProcessorVersionsRequest {
];
// The maximum number of processor versions to return.
// If unspecified, at most 10 processor versions will be returned.
// The maximum value is 20; values above 20 will be coerced to 20.
// If unspecified, at most `10` processor versions will be returned.
// The maximum value is `20`. Values above `20` will be coerced to `20`.
int32 page_size = 2;
// We will return the processor versions sorted by creation time. The page
@ -697,7 +742,9 @@ message ListProcessorVersionsRequest {
string page_token = 3;
}
// Response message for list processors.
// Response message for the
// [ListProcessorVersions][google.cloud.documentai.v1beta3.DocumentProcessorService.ListProcessorVersions]
// method.
message ListProcessorVersionsResponse {
// The list of processors.
repeated ProcessorVersion processor_versions = 1;
@ -706,7 +753,9 @@ message ListProcessorVersionsResponse {
string next_page_token = 2;
}
// Request message for the delete processor version method.
// Request message for the
// [DeleteProcessorVersion][google.cloud.documentai.v1beta3.DocumentProcessorService.DeleteProcessorVersion]
// method.
message DeleteProcessorVersionRequest {
// Required. The processor version resource name to be deleted.
string name = 1 [
@ -717,13 +766,17 @@ message DeleteProcessorVersionRequest {
];
}
// The long running operation metadata for delete processor version method.
// The long-running operation metadata for the
// [DeleteProcessorVersion][google.cloud.documentai.v1beta3.DocumentProcessorService.DeleteProcessorVersion]
// method.
message DeleteProcessorVersionMetadata {
// The basic metadata of the long running operation.
// The basic metadata of the long-running operation.
CommonOperationMetadata common_metadata = 1;
}
// Request message for the deploy processor version method.
// Request message for the
// [DeployProcessorVersion][google.cloud.documentai.v1beta3.DocumentProcessorService.DeployProcessorVersion]
// method.
message DeployProcessorVersionRequest {
// Required. The processor version resource name to be deployed.
string name = 1 [
@ -734,16 +787,22 @@ message DeployProcessorVersionRequest {
];
}
// Response message for the deploy processor version method.
// Response message for the
// [DeployProcessorVersion][google.cloud.documentai.v1beta3.DocumentProcessorService.DeployProcessorVersion]
// method.
message DeployProcessorVersionResponse {}
// The long running operation metadata for deploy processor version method.
// The long-running operation metadata for the
// [DeployProcessorVersion][google.cloud.documentai.v1beta3.DocumentProcessorService.DeployProcessorVersion]
// method.
message DeployProcessorVersionMetadata {
// The basic metadata of the long running operation.
// The basic metadata of the long-running operation.
CommonOperationMetadata common_metadata = 1;
}
// Request message for the undeploy processor version method.
// Request message for the
// [UndeployProcessorVersion][google.cloud.documentai.v1beta3.DocumentProcessorService.UndeployProcessorVersion]
// method.
message UndeployProcessorVersionRequest {
// Required. The processor version resource name to be undeployed.
string name = 1 [
@ -754,19 +813,24 @@ message UndeployProcessorVersionRequest {
];
}
// Response message for the undeploy processor version method.
// Response message for the
// [UndeployProcessorVersion][google.cloud.documentai.v1beta3.DocumentProcessorService.UndeployProcessorVersion]
// method.
message UndeployProcessorVersionResponse {}
// The long running operation metadata for the undeploy processor version
// The long-running operation metadata for the
// [UndeployProcessorVersion][google.cloud.documentai.v1beta3.DocumentProcessorService.UndeployProcessorVersion]
// method.
message UndeployProcessorVersionMetadata {
// The basic metadata of the long running operation.
// The basic metadata of the long-running operation.
CommonOperationMetadata common_metadata = 1;
}
// Request message for create a processor. Notice this request is sent to
// a regionalized backend service, and if the processor type is not available
// on that region, the creation will fail.
// Request message for the
// [CreateProcessor][google.cloud.documentai.v1beta3.DocumentProcessorService.CreateProcessor]
// method. Notice this request is sent to a regionalized backend service. If the
// [ProcessorType][google.cloud.documentai.v1beta3.ProcessorType] isn't
// available in that region, the creation fails.
message CreateProcessorRequest {
// Required. The parent (project and location) under which to create the
// processor. Format: `projects/{project}/locations/{location}`
@ -777,13 +841,17 @@ message CreateProcessorRequest {
}
];
// Required. The processor to be created, requires [processor_type] and
// [display_name] to be set. Also, the processor is under CMEK if CMEK fields
// are set.
// Required. The processor to be created, requires
// [Processor.type][google.cloud.documentai.v1beta3.Processor.type] and
// [Processor.display_name]][] to be set. Also, the
// [Processor.kms_key_name][google.cloud.documentai.v1beta3.Processor.kms_key_name]
// field must be set if the processor is under CMEK.
Processor processor = 2 [(google.api.field_behavior) = REQUIRED];
}
// Request message for the delete processor method.
// Request message for the
// [DeleteProcessor][google.cloud.documentai.v1beta3.DocumentProcessorService.DeleteProcessor]
// method.
message DeleteProcessorRequest {
// Required. The processor resource name to be deleted.
string name = 1 [
@ -794,13 +862,17 @@ message DeleteProcessorRequest {
];
}
// The long running operation metadata for delete processor method.
// The long-running operation metadata for the
// [DeleteProcessor][google.cloud.documentai.v1beta3.DocumentProcessorService.DeleteProcessor]
// method.
message DeleteProcessorMetadata {
// The basic metadata of the long running operation.
// The basic metadata of the long-running operation.
CommonOperationMetadata common_metadata = 5;
}
// Request message for the enable processor method.
// Request message for the
// [EnableProcessor][google.cloud.documentai.v1beta3.DocumentProcessorService.EnableProcessor]
// method.
message EnableProcessorRequest {
// Required. The processor resource name to be enabled.
string name = 1 [
@ -811,17 +883,22 @@ message EnableProcessorRequest {
];
}
// Response message for the enable processor method.
// Intentionally empty proto for adding fields in future.
// Response message for the
// [EnableProcessor][google.cloud.documentai.v1beta3.DocumentProcessorService.EnableProcessor]
// method. Intentionally empty proto for adding fields in future.
message EnableProcessorResponse {}
// The long running operation metadata for enable processor method.
// The long-running operation metadata for the
// [EnableProcessor][google.cloud.documentai.v1beta3.DocumentProcessorService.EnableProcessor]
// method.
message EnableProcessorMetadata {
// The basic metadata of the long running operation.
// The basic metadata of the long-running operation.
CommonOperationMetadata common_metadata = 5;
}
// Request message for the disable processor method.
// Request message for the
// [DisableProcessor][google.cloud.documentai.v1beta3.DocumentProcessorService.DisableProcessor]
// method.
message DisableProcessorRequest {
// Required. The processor resource name to be disabled.
string name = 1 [
@ -832,17 +909,22 @@ message DisableProcessorRequest {
];
}
// Response message for the disable processor method.
// Intentionally empty proto for adding fields in future.
// Response message for the
// [DisableProcessor][google.cloud.documentai.v1beta3.DocumentProcessorService.DisableProcessor]
// method. Intentionally empty proto for adding fields in future.
message DisableProcessorResponse {}
// The long running operation metadata for disable processor method.
// The long-running operation metadata for the
// [DisableProcessor][google.cloud.documentai.v1beta3.DocumentProcessorService.DisableProcessor]
// method.
message DisableProcessorMetadata {
// The basic metadata of the long running operation.
// The basic metadata of the long-running operation.
CommonOperationMetadata common_metadata = 5;
}
// Request message for the set default processor version method.
// Request message for the
// [SetDefaultProcessorVersion][google.cloud.documentai.v1beta3.DocumentProcessorService.SetDefaultProcessorVersion]
// method.
message SetDefaultProcessorVersionRequest {
// Required. The resource name of the
// [Processor][google.cloud.documentai.v1beta3.Processor] to change default
@ -866,19 +948,25 @@ message SetDefaultProcessorVersionRequest {
];
}
// Response message for set default processor version method.
// Response message for the
// [SetDefaultProcessorVersion][google.cloud.documentai.v1beta3.DocumentProcessorService.SetDefaultProcessorVersion]
// method.
message SetDefaultProcessorVersionResponse {}
// The long running operation metadata for set default processor version
// The long-running operation metadata for the
// [SetDefaultProcessorVersion][google.cloud.documentai.v1beta3.DocumentProcessorService.SetDefaultProcessorVersion]
// method.
message SetDefaultProcessorVersionMetadata {
// The basic metadata of the long running operation.
// The basic metadata of the long-running operation.
CommonOperationMetadata common_metadata = 1;
}
// Request message for the create processor version method.
// Request message for the
// [TrainProcessorVersion][google.cloud.documentai.v1beta3.DocumentProcessorService.TrainProcessorVersion]
// method.
message TrainProcessorVersionRequest {
// The input data used to train a new `ProcessorVersion`.
// The input data used to train a new
// [ProcessorVersion][google.cloud.documentai.v1beta3.ProcessorVersion].
message InputData {
// The documents used for training the new version.
BatchDocumentsInputConfig training_documents = 3;
@ -887,6 +975,28 @@ message TrainProcessorVersionRequest {
BatchDocumentsInputConfig test_documents = 4;
}
// Options to control the training of the Custom Document Extraction (CDE)
// Processor.
message CustomDocumentExtractionOptions {
// Training Method for CDE. TRAINING_METHOD_UNSPECIFIED will fallback to
// MODEL_BASED.
enum TrainingMethod {
TRAINING_METHOD_UNSPECIFIED = 0;
MODEL_BASED = 1;
TEMPLATE_BASED = 2;
}
// Training method to use for CDE training.
TrainingMethod training_method = 3;
}
oneof processor_flags {
// Options to control Custom Document Extraction (CDE) Processor.
CustomDocumentExtractionOptions custom_document_extraction_options = 5;
}
// Required. The parent (project, location and processor) to create the new
// version for. Format:
// `projects/{project}/locations/{location}/processors/{processor}`.
@ -904,7 +1014,8 @@ message TrainProcessorVersionRequest {
// Optional. The schema the processor version will be trained with.
DocumentSchema document_schema = 10 [(google.api.field_behavior) = OPTIONAL];
// Optional. The input data used to train the `ProcessorVersion`.
// Optional. The input data used to train the
// [ProcessorVersion][google.cloud.documentai.v1beta3.ProcessorVersion].
InputData input_data = 4 [(google.api.field_behavior) = OPTIONAL];
// Optional. The processor version to use as a base for training. This
@ -913,7 +1024,8 @@ message TrainProcessorVersionRequest {
string base_processor_version = 8 [(google.api.field_behavior) = OPTIONAL];
}
// The response for the TrainProcessorVersion method.
// The response for
// [TrainProcessorVersion][google.cloud.documentai.v1beta3.DocumentProcessorService.TrainProcessorVersion].
message TrainProcessorVersionResponse {
// The resource name of the processor version produced by training.
string processor_version = 1;
@ -941,7 +1053,7 @@ message TrainProcessorVersionMetadata {
repeated google.rpc.Status dataset_errors = 2;
}
// The basic metadata of the long running operation.
// The basic metadata of the long-running operation.
CommonOperationMetadata common_metadata = 1;
// The training dataset validation information.
@ -951,7 +1063,9 @@ message TrainProcessorVersionMetadata {
DatasetValidation test_dataset_validation = 3;
}
// Request message for review document method.
// Request message for the
// [ReviewDocument][google.cloud.documentai.v1beta3.DocumentProcessorService.ReviewDocument]
// method.
message ReviewDocumentRequest {
// The priority level of the human review task.
enum Priority {
@ -969,8 +1083,9 @@ message ReviewDocumentRequest {
Document inline_document = 4;
}
// Required. The resource name of the HumanReviewConfig that the document will
// be reviewed with.
// Required. The resource name of the
// [HumanReviewConfig][google.cloud.documentai.v1beta3.HumanReviewConfig] that
// the document will be reviewed with.
string human_review_config = 1 [
(google.api.field_behavior) = REQUIRED,
(google.api.resource_reference) = {
@ -991,7 +1106,9 @@ message ReviewDocumentRequest {
DocumentSchema document_schema = 6;
}
// Response message for review document method.
// Response message for the
// [ReviewDocument][google.cloud.documentai.v1beta3.DocumentProcessorService.ReviewDocument]
// method.
message ReviewDocumentResponse {
// Possible states of the review operation.
enum State {
@ -1016,9 +1133,11 @@ message ReviewDocumentResponse {
string rejection_reason = 3;
}
// The long running operation metadata for review document method.
// The long-running operation metadata for the
// [ReviewDocument][google.cloud.documentai.v1beta3.DocumentProcessorService.ReviewDocument]
// method.
message ReviewDocumentOperationMetadata {
// State of the longrunning operation.
// State of the long-running operation.
enum State {
// Unspecified state.
STATE_UNSPECIFIED = 0;
@ -1052,14 +1171,16 @@ message ReviewDocumentOperationMetadata {
// The last update time of the operation.
google.protobuf.Timestamp update_time = 4;
// The basic metadata of the long running operation.
// The basic metadata of the long-running operation.
CommonOperationMetadata common_metadata = 5;
// The Crowd Compute question ID.
string question_id = 6;
}
// Evaluates the given ProcessorVersion against the supplied documents.
// Evaluates the given
// [ProcessorVersion][google.cloud.documentai.v1beta3.ProcessorVersion] against
// the supplied documents.
message EvaluateProcessorVersionRequest {
// Required. The resource name of the
// [ProcessorVersion][google.cloud.documentai.v1beta3.ProcessorVersion] to
@ -1078,13 +1199,17 @@ message EvaluateProcessorVersionRequest {
[(google.api.field_behavior) = OPTIONAL];
}
// Metadata of the EvaluateProcessorVersion method.
// Metadata of the
// [EvaluateProcessorVersion][google.cloud.documentai.v1beta3.DocumentProcessorService.EvaluateProcessorVersion]
// method.
message EvaluateProcessorVersionMetadata {
// The basic metadata of the long running operation.
// The basic metadata of the long-running operation.
CommonOperationMetadata common_metadata = 1;
}
// Metadata of the EvaluateProcessorVersion method.
// Response of the
// [EvaluateProcessorVersion][google.cloud.documentai.v1beta3.DocumentProcessorService.EvaluateProcessorVersion]
// method.
message EvaluateProcessorVersionResponse {
// The resource name of the created evaluation.
string evaluation = 2;
@ -1103,7 +1228,8 @@ message GetEvaluationRequest {
];
}
// Retrieves a list of evaluations for a given ProcessorVersion.
// Retrieves a list of evaluations for a given
// [ProcessorVersion][google.cloud.documentai.v1beta3.ProcessorVersion].
message ListEvaluationsRequest {
// Required. The resource name of the
// [ProcessorVersion][google.cloud.documentai.v1beta3.ProcessorVersion] to
@ -1117,8 +1243,8 @@ message ListEvaluationsRequest {
];
// The standard list page size.
// If unspecified, at most 5 evaluations will be returned.
// The maximum value is 100; values above 100 will be coerced to 100.
// If unspecified, at most `5` evaluations are returned.
// The maximum value is `100`. Values above `100` are coerced to `100`.
int32 page_size = 2;
// A page token, received from a previous `ListEvaluations` call.
@ -1126,7 +1252,7 @@ message ListEvaluationsRequest {
string page_token = 3;
}
// The response from ListEvaluations.
// The response from `ListEvaluations`.
message ListEvaluationsResponse {
// The evaluations requested.
repeated Evaluation evaluations = 1;
@ -1136,19 +1262,27 @@ message ListEvaluationsResponse {
string next_page_token = 2;
}
// The request message for the ImportProcessorVersion method.
// This method requires Document AI Service Agent of the destination project in
// the source project's IAM with [Document AI Editor
// role](https://cloud.google.com/document-ai/docs/access-control/iam-roles).
// The request message for the
// [ImportProcessorVersion][google.cloud.documentai.v1beta3.DocumentProcessorService.ImportProcessorVersion]
// method. Requirements:
//
// The destination project is specified as part of the `parent` field.
// The source project is specified as part of `source` field.
// - The source processor version and destination processor
// must be in the same location.
// - The Document AI [Service
// Agent](https://cloud.google.com/iam/docs/service-agents) of the destination
// project must have [Document AI Editor
// role](https://cloud.google.com/document-ai/docs/access-control/iam-roles) on
// the source project.
//
// The Service Agent for Document AI can be found in
// https://cloud.google.com/iam/docs/service-agents.
// The destination project is specified as part of the
// [parent][google.cloud.documentai.v1beta3.ImportProcessorVersionRequest.parent]
// field. The source project is specified as part of the
// [source][google.cloud.documentai.v1beta3.ImportProcessorVersionRequest.processor_version_source]
// field.
message ImportProcessorVersionRequest {
oneof source {
// The source processor version to import from.
// The source processor version to import from. The source processor version
// and destination processor need to be in the same environment and region.
string processor_version_source = 2 [(google.api.resource_reference) = {
type: "documentai.googleapis.com/ProcessorVersion"
}];
@ -1165,7 +1299,9 @@ message ImportProcessorVersionRequest {
];
}
// The response message for the ImportProcessorVersion method.
// The response message for the
// [ImportProcessorVersion][google.cloud.documentai.v1beta3.DocumentProcessorService.ImportProcessorVersion]
// method.
message ImportProcessorVersionResponse {
// The destination processor version name.
string processor_version = 1 [(google.api.resource_reference) = {
@ -1173,9 +1309,10 @@ message ImportProcessorVersionResponse {
}];
}
// The long running operation metadata for the ImportProcessorVersion
// The long-running operation metadata for the
// [ImportProcessorVersion][google.cloud.documentai.v1beta3.DocumentProcessorService.ImportProcessorVersion]
// method.
message ImportProcessorVersionMetadata {
// The basic metadata for the long running operation.
// The basic metadata for the long-running operation.
CommonOperationMetadata common_metadata = 1;
}

@ -1,4 +1,4 @@
// Copyright 2022 Google LLC
// Copyright 2023 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
@ -24,6 +24,18 @@ option java_package = "com.google.cloud.documentai.v1beta3";
option php_namespace = "Google\\Cloud\\DocumentAI\\V1beta3";
option ruby_package = "Google::Cloud::DocumentAI::V1beta3";
// Metadata about a property.
message PropertyMetadata {
// Whether the property should be considered as "inactive".
bool inactive = 3;
}
// Metadata about an entity type.
message EntityTypeMetadata {
// Whether the entity type should be considered inactive.
bool inactive = 5;
}
// The schema defines the output of the processed document by a processor.
message DocumentSchema {
// EntityType is the wrapper of a label of the corresponding model with
@ -38,20 +50,28 @@ message DocumentSchema {
// Defines properties that can be part of the entity type.
message Property {
// Types of occurrences of the entity type in the document. Note: this
// represents the number of instances of an entity types, not number of
// mentions of a given entity instance.
// Types of occurrences of the entity type in the document. This
// represents the number of instances of instances of an entity, not
// number of mentions of an entity. For example, a bank statement may
// only have one `account_number`, but this account number may be
// mentioned in several places on the document. In this case the
// 'account_number' would be considered a `REQUIRED_ONCE` entity type. If,
// on the other hand, we expect a bank statement to contain the status of
// multiple different accounts for the customers, the occurrence type will
// be set to `REQUIRED_MULTIPLE`.
enum OccurrenceType {
// Unspecified occurrence type.
OCCURRENCE_TYPE_UNSPECIFIED = 0;
// There will be zero or one instance of this entity type.
// There will be zero or one instance of this entity type. The same
// entity instance may be mentioned multiple times.
OPTIONAL_ONCE = 1;
// The entity type will appear zero or multiple times.
OPTIONAL_MULTIPLE = 2;
// The entity type will only appear exactly once.
// The entity type will only appear exactly once. The same
// entity instance may be mentioned multiple times.
REQUIRED_ONCE = 3;
// The entity type will appear once or more times.
@ -69,6 +89,9 @@ message DocumentSchema {
// Occurrence type limits the number of instances an entity type appears
// in the document.
OccurrenceType occurrence_type = 3;
// Any additional metadata about the property can be added here.
PropertyMetadata property_metadata = 5;
}
oneof value_source {
@ -84,16 +107,15 @@ message DocumentSchema {
string display_name = 13;
// Name of the type. It must be unique within the schema file and
// cannot be a 'Common Type'. Besides that we use the following naming
// conventions:
// cannot be a "Common Type". The following naming conventions are used:
//
// - *use `snake_casing`*
// - name matching is case-sensitive
// - Use `snake_casing`.
// - Name matching is case-sensitive.
// - Maximum 64 characters.
// - Must start with a letter.
// - Allowed characters: ASCII letters `[a-z0-9_-]`. (For backward
// compatibility internal infrastructure and tooling can handle any ascii
// character)
// character.)
// - The `/` is sometimes used to denote a property of a type. For example
// `line_item/amount`. This convention is deprecated, but will still be
// honored for backward compatibility.
@ -103,14 +125,17 @@ message DocumentSchema {
// one should be set.
repeated string base_types = 2;
// Describing the nested structure, or composition of an entity.
// Description the nested structure, or composition of an entity.
repeated Property properties = 6;
// Metadata for the entity type.
EntityTypeMetadata entity_type_metadata = 11;
}
// Metadata for global schema behavior.
message Metadata {
// If true, a `document` entity type can be applied to subdocument (
// splitting). Otherwise, it can only be applied to the entire document
// If true, a `document` entity type can be applied to subdocument
// (splitting). Otherwise, it can only be applied to the entire document
// (classification).
bool document_splitter = 1;

@ -0,0 +1,112 @@
// Copyright 2023 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto3";
package google.cloud.documentai.v1beta3;
import "google/api/annotations.proto";
import "google/api/client.proto";
import "google/api/field_behavior.proto";
import "google/api/resource.proto";
import "google/cloud/documentai/v1beta3/dataset.proto";
import "google/cloud/documentai/v1beta3/operation_metadata.proto";
import "google/longrunning/operations.proto";
import "google/protobuf/field_mask.proto";
option csharp_namespace = "Google.Cloud.DocumentAI.V1Beta3";
option go_package = "cloud.google.com/go/documentai/apiv1beta3/documentaipb;documentaipb";
option java_multiple_files = true;
option java_outer_classname = "DocumentAiDocumentService";
option java_package = "com.google.cloud.documentai.v1beta3";
option php_namespace = "Google\\Cloud\\DocumentAI\\V1beta3";
option ruby_package = "Google::Cloud::DocumentAI::V1beta3";
// Service to call Cloud DocumentAI to manage document collection (dataset).
service DocumentService {
option (google.api.default_host) = "documentai.googleapis.com";
option (google.api.oauth_scopes) =
"https://www.googleapis.com/auth/cloud-platform";
// Updates metadata associated with a dataset.
rpc UpdateDataset(UpdateDatasetRequest)
returns (google.longrunning.Operation) {
option (google.api.http) = {
patch: "/v1beta3/{dataset.name=projects/*/locations/*/processors/*/dataset}"
body: "dataset"
};
option (google.api.method_signature) = "dataset,update_mask";
option (google.longrunning.operation_info) = {
response_type: "Dataset"
metadata_type: "UpdateDatasetOperationMetadata"
};
}
// Gets the `DatasetSchema` of a `Dataset`.
rpc GetDatasetSchema(GetDatasetSchemaRequest) returns (DatasetSchema) {
option (google.api.http) = {
get: "/v1beta3/{name=projects/*/locations/*/processors/*/dataset/datasetSchema}"
};
option (google.api.method_signature) = "name";
}
// Updates a `DatasetSchema`.
rpc UpdateDatasetSchema(UpdateDatasetSchemaRequest) returns (DatasetSchema) {
option (google.api.http) = {
patch: "/v1beta3/{dataset_schema.name=projects/*/locations/*/processors/*/dataset/datasetSchema}"
body: "dataset_schema"
};
option (google.api.method_signature) = "dataset_schema,update_mask";
}
}
message UpdateDatasetRequest {
// Required. The `name` field of the `Dataset` is used to identify the
// resource to be updated.
Dataset dataset = 1 [(google.api.field_behavior) = REQUIRED];
// The update mask applies to the resource.
google.protobuf.FieldMask update_mask = 2;
}
message UpdateDatasetOperationMetadata {
// The basic metadata of the long running operation.
CommonOperationMetadata common_metadata = 1;
}
// Request for `GetDatasetSchema`.
message GetDatasetSchemaRequest {
// Required. The dataset schema resource name.
// Format:
// projects/{project}/locations/{location}/processors/{processor}/dataset/datasetSchema
string name = 1 [
(google.api.field_behavior) = REQUIRED,
(google.api.resource_reference) = {
type: "documentai.googleapis.com/DatasetSchema"
}
];
// If set, only returns the visible fields of the schema.
bool visible_fields_only = 2;
}
// Request for `UpdateDatasetSchema`.
message UpdateDatasetSchemaRequest {
// Required. The name field of the `DatasetSchema` is used to identify the
// resource to be updated.
DatasetSchema dataset_schema = 1 [(google.api.field_behavior) = REQUIRED];
// The update mask applies to the resource.
google.protobuf.FieldMask update_mask = 2;
}

@ -5,12 +5,14 @@ title: Cloud Document AI API
apis:
- name: google.cloud.documentai.v1beta3.DocumentProcessorService
- name: google.cloud.documentai.v1beta3.DocumentService
- name: google.cloud.location.Locations
- name: google.longrunning.Operations
types:
- name: google.cloud.documentai.v1beta3.BatchProcessMetadata
- name: google.cloud.documentai.v1beta3.BatchProcessResponse
- name: google.cloud.documentai.v1beta3.Dataset
- name: google.cloud.documentai.v1beta3.DeleteProcessorMetadata
- name: google.cloud.documentai.v1beta3.DeleteProcessorVersionMetadata
- name: google.cloud.documentai.v1beta3.DeployProcessorVersionMetadata
@ -31,6 +33,7 @@ types:
- name: google.cloud.documentai.v1beta3.TrainProcessorVersionResponse
- name: google.cloud.documentai.v1beta3.UndeployProcessorVersionMetadata
- name: google.cloud.documentai.v1beta3.UndeployProcessorVersionResponse
- name: google.cloud.documentai.v1beta3.UpdateDatasetOperationMetadata
documentation:
summary: |-
@ -73,6 +76,10 @@ authentication:
oauth:
canonical_scopes: |-
https://www.googleapis.com/auth/cloud-platform
- selector: 'google.cloud.documentai.v1beta3.DocumentService.*'
oauth:
canonical_scopes: |-
https://www.googleapis.com/auth/cloud-platform
- selector: google.cloud.location.Locations.GetLocation
oauth:
canonical_scopes: |-

@ -5,7 +5,21 @@
{
"service": "google.cloud.documentai.v1beta3.DocumentProcessorService",
"method": "ProcessDocument"
},
}
],
"timeout": "300s",
"retryPolicy": {
"initialBackoff": "0.100s",
"maxBackoff": "60s",
"backoffMultiplier": 1.3,
"retryableStatusCodes": [
"DEADLINE_EXCEEDED",
"UNAVAILABLE"
]
}
},
{
"name": [
{
"service": "google.cloud.documentai.v1beta3.DocumentProcessorService",
"method": "BatchProcessDocuments"

@ -1,4 +1,4 @@
// Copyright 2022 Google LLC
// Copyright 2023 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.

@ -1,4 +1,4 @@
// Copyright 2022 Google LLC
// Copyright 2023 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.

@ -1,4 +1,4 @@
// Copyright 2022 Google LLC
// Copyright 2023 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.

@ -1,4 +1,4 @@
// Copyright 2022 Google LLC
// Copyright 2023 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
@ -31,10 +31,9 @@ option php_namespace = "Google\\Cloud\\DocumentAI\\V1beta3";
option ruby_package = "Google::Cloud::DocumentAI::V1beta3";
// A processor version is an implementation of a processor. Each processor
// can have multiple versions, pre-trained by Google internally or up-trained
// by the customer. At a time, a processor can only have one default version
// version. So the processor's behavior (when processing documents) is defined
// by a default version
// can have multiple versions, pretrained by Google internally or uptrained
// by the customer. A processor can only have one default version at a time.
// Its document-processing behavior is defined by that version.
message ProcessorVersion {
option (google.api.resource) = {
type: "documentai.googleapis.com/ProcessorVersion"
@ -78,6 +77,9 @@ message ProcessorVersion {
// The processor version failed and is in an indeterminate state.
FAILED = 7;
// The processor version is being imported.
IMPORTING = 8;
}
// The resource name of the processor version.
@ -106,7 +108,7 @@ message ProcessorVersion {
// The KMS key version with which data is encrypted.
string kms_key_version_name = 10;
// Denotes that this ProcessorVersion is managed by google.
// Denotes that this `ProcessorVersion` is managed by Google.
bool google_managed = 11;
// If set, information about the eventual deprecation of this version.
@ -163,8 +165,8 @@ message Processor {
(google.api.field_behavior) = OUTPUT_ONLY
];
// The processor type, e.g., `OCR_PROCESSOR`, `INVOICE_PROCESSOR`, etc.
// To get a list of processors types, see
// The processor type, such as: `OCR_PROCESSOR`, `INVOICE_PROCESSOR`.
// To get a list of processor types, see
// [FetchProcessorTypes][google.cloud.documentai.v1beta3.DocumentProcessorService.FetchProcessorTypes].
string type = 2;
@ -189,7 +191,7 @@ message Processor {
// The time the processor was created.
google.protobuf.Timestamp create_time = 7;
// The KMS key used for encryption/decryption in CMEK scenarios.
// See https://cloud.google.com/security-key-management.
// The [KMS key](https://cloud.google.com/security-key-management) used for
// encryption and decryption in CMEK scenarios.
string kms_key_name = 8;
}

@ -1,4 +1,4 @@
// Copyright 2022 Google LLC
// Copyright 2023 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
@ -37,7 +37,8 @@ message ProcessorType {
// The location information about where the processor is available.
message LocationInfo {
// The location id, currently must be one of [us, eu].
// The location ID. For supported locations, refer to [regional and
// multi-regional support](/document-ai/docs/regions).
string location_id = 1;
}
@ -45,7 +46,7 @@ message ProcessorType {
// Format: `projects/{project}/processorTypes/{processor_type}`
string name = 1;
// The processor type, e.g., `OCR_PROCESSOR`, `INVOICE_PROCESSOR`, etc.
// The processor type, such as: `OCR_PROCESSOR`, `INVOICE_PROCESSOR`.
string type = 2;
// The processor category, used by UI to group processor types.

Loading…
Cancel
Save