You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
138 lines
4.9 KiB
138 lines
4.9 KiB
// Copyright 2023 Google LLC |
|
// |
|
// Licensed under the Apache License, Version 2.0 (the "License"); |
|
// you may not use this file except in compliance with the License. |
|
// You may obtain a copy of the License at |
|
// |
|
// http://www.apache.org/licenses/LICENSE-2.0 |
|
// |
|
// Unless required by applicable law or agreed to in writing, software |
|
// distributed under the License is distributed on an "AS IS" BASIS, |
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|
// See the License for the specific language governing permissions and |
|
// limitations under the License. |
|
|
|
syntax = "proto3"; |
|
|
|
package google.cloud.documentai.v1beta3; |
|
|
|
import "google/api/field_behavior.proto"; |
|
import "google/api/resource.proto"; |
|
import "google/cloud/documentai/v1beta3/document_io.proto"; |
|
import "google/cloud/documentai/v1beta3/document_schema.proto"; |
|
|
|
option csharp_namespace = "Google.Cloud.DocumentAI.V1Beta3"; |
|
option go_package = "cloud.google.com/go/documentai/apiv1beta3/documentaipb;documentaipb"; |
|
option java_multiple_files = true; |
|
option java_outer_classname = "DatasetProto"; |
|
option java_package = "com.google.cloud.documentai.v1beta3"; |
|
option php_namespace = "Google\\Cloud\\DocumentAI\\V1beta3"; |
|
option ruby_package = "Google::Cloud::DocumentAI::V1beta3"; |
|
option (google.api.resource_definition) = { |
|
type: "contentwarehouse.googleapis.com/Schema" |
|
pattern: "projects/{project}/locations/{location}/schemas/{schema}" |
|
}; |
|
|
|
// A singleton resource under a |
|
// [Processor][google.cloud.documentai.v1beta3.Processor] which configures a |
|
// collection of documents. |
|
message Dataset { |
|
option (google.api.resource) = { |
|
type: "documentai.googleapis.com/Dataset" |
|
pattern: "projects/{project}/locations/{location}/processors/{processor}/dataset" |
|
}; |
|
|
|
// Configuration specific to the Cloud Storage-based implementation. |
|
message GCSManagedConfig { |
|
// Required. The Cloud Storage URI (a directory) where the documents |
|
// belonging to the dataset must be stored. |
|
GcsPrefix gcs_prefix = 1 [(google.api.field_behavior) = REQUIRED]; |
|
} |
|
|
|
// Configuration specific to the Document AI Warehouse-based implementation. |
|
message DocumentWarehouseConfig { |
|
// Output only. The collection in Document AI Warehouse associated with the |
|
// dataset. |
|
string collection = 1 [(google.api.field_behavior) = OUTPUT_ONLY]; |
|
|
|
// Output only. The schema in Document AI Warehouse associated with the |
|
// dataset. |
|
string schema = 2 [ |
|
(google.api.field_behavior) = OUTPUT_ONLY, |
|
(google.api.resource_reference) = { |
|
type: "contentwarehouse.googleapis.com/Schema" |
|
} |
|
]; |
|
} |
|
|
|
// Configuration specific to an unmanaged dataset. |
|
message UnmanagedDatasetConfig {} |
|
|
|
// Configuration specific to spanner-based indexing. |
|
message SpannerIndexingConfig {} |
|
|
|
// Different states of a dataset. |
|
enum State { |
|
// Default unspecified enum, should not be used. |
|
STATE_UNSPECIFIED = 0; |
|
|
|
// Dataset has not been initialized. |
|
UNINITIALIZED = 1; |
|
|
|
// Dataset is being initialized. |
|
INITIALIZING = 2; |
|
|
|
// Dataset has been initialized. |
|
INITIALIZED = 3; |
|
} |
|
|
|
oneof storage_source { |
|
// Optional. User-managed Cloud Storage dataset configuration. Use this |
|
// configuration if the dataset documents are stored under a user-managed |
|
// Cloud Storage location. |
|
GCSManagedConfig gcs_managed_config = 3 |
|
[(google.api.field_behavior) = OPTIONAL]; |
|
|
|
// Optional. Document AI Warehouse-based dataset configuration. |
|
DocumentWarehouseConfig document_warehouse_config = 5 |
|
[(google.api.field_behavior) = OPTIONAL]; |
|
|
|
// Optional. Unmanaged dataset configuration. Use this configuration if the |
|
// dataset documents are managed by the document service internally (not |
|
// user-managed). |
|
UnmanagedDatasetConfig unmanaged_dataset_config = 6 |
|
[(google.api.field_behavior) = OPTIONAL]; |
|
} |
|
|
|
oneof indexing_source { |
|
// Optional. A lightweight indexing source with low latency and high |
|
// reliability, but lacking advanced features like CMEK and content-based |
|
// search. |
|
SpannerIndexingConfig spanner_indexing_config = 4 |
|
[(google.api.field_behavior) = OPTIONAL]; |
|
} |
|
|
|
// Dataset resource name. |
|
// Format: |
|
// `projects/{project}/locations/{location}/processors/{processor}/dataset` |
|
string name = 1; |
|
|
|
// Required. State of the dataset. Ignored when updating dataset. |
|
State state = 2 [(google.api.field_behavior) = REQUIRED]; |
|
} |
|
|
|
// Dataset Schema. |
|
message DatasetSchema { |
|
option (google.api.resource) = { |
|
type: "documentai.googleapis.com/DatasetSchema" |
|
pattern: "projects/{project}/locations/{location}/processors/{processor}/dataset/datasetSchema" |
|
}; |
|
|
|
// Dataset schema resource name. |
|
// Format: |
|
// `projects/{project}/locations/{location}/processors/{processor}/dataset/datasetSchema` |
|
string name = 1; |
|
|
|
// Optional. Schema of the dataset. |
|
DocumentSchema document_schema = 3 [(google.api.field_behavior) = OPTIONAL]; |
|
}
|
|
|