You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
166 lines
6.2 KiB
166 lines
6.2 KiB
// Copyright 2023 Google LLC |
|
// |
|
// Licensed under the Apache License, Version 2.0 (the "License"); |
|
// you may not use this file except in compliance with the License. |
|
// You may obtain a copy of the License at |
|
// |
|
// http://www.apache.org/licenses/LICENSE-2.0 |
|
// |
|
// Unless required by applicable law or agreed to in writing, software |
|
// distributed under the License is distributed on an "AS IS" BASIS, |
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|
// See the License for the specific language governing permissions and |
|
// limitations under the License. |
|
|
|
syntax = "proto3"; |
|
|
|
package google.cloud.documentai.v1beta3; |
|
|
|
option csharp_namespace = "Google.Cloud.DocumentAI.V1Beta3"; |
|
option go_package = "cloud.google.com/go/documentai/apiv1beta3/documentaipb;documentaipb"; |
|
option java_multiple_files = true; |
|
option java_outer_classname = "DocumentAiDocumentSchema"; |
|
option java_package = "com.google.cloud.documentai.v1beta3"; |
|
option php_namespace = "Google\\Cloud\\DocumentAI\\V1beta3"; |
|
option ruby_package = "Google::Cloud::DocumentAI::V1beta3"; |
|
|
|
// Metadata about a property. |
|
message PropertyMetadata { |
|
// Whether the property should be considered as "inactive". |
|
bool inactive = 3; |
|
} |
|
|
|
// Metadata about an entity type. |
|
message EntityTypeMetadata { |
|
// Whether the entity type should be considered inactive. |
|
bool inactive = 5; |
|
} |
|
|
|
// The schema defines the output of the processed document by a processor. |
|
message DocumentSchema { |
|
// EntityType is the wrapper of a label of the corresponding model with |
|
// detailed attributes and limitations for entity-based processors. Multiple |
|
// types can also compose a dependency tree to represent nested types. |
|
message EntityType { |
|
// Defines the a list of enum values. |
|
message EnumValues { |
|
// The individual values that this enum values type can include. |
|
repeated string values = 1; |
|
} |
|
|
|
// Defines properties that can be part of the entity type. |
|
message Property { |
|
// Types of occurrences of the entity type in the document. This |
|
// represents the number of instances of instances of an entity, not |
|
// number of mentions of an entity. For example, a bank statement may |
|
// only have one `account_number`, but this account number may be |
|
// mentioned in several places on the document. In this case the |
|
// 'account_number' would be considered a `REQUIRED_ONCE` entity type. If, |
|
// on the other hand, we expect a bank statement to contain the status of |
|
// multiple different accounts for the customers, the occurrence type will |
|
// be set to `REQUIRED_MULTIPLE`. |
|
enum OccurrenceType { |
|
// Unspecified occurrence type. |
|
OCCURRENCE_TYPE_UNSPECIFIED = 0; |
|
|
|
// There will be zero or one instance of this entity type. The same |
|
// entity instance may be mentioned multiple times. |
|
OPTIONAL_ONCE = 1; |
|
|
|
// The entity type will appear zero or multiple times. |
|
OPTIONAL_MULTIPLE = 2; |
|
|
|
// The entity type will only appear exactly once. The same |
|
// entity instance may be mentioned multiple times. |
|
REQUIRED_ONCE = 3; |
|
|
|
// The entity type will appear once or more times. |
|
REQUIRED_MULTIPLE = 4; |
|
} |
|
|
|
// The name of the property. Follows the same guidelines as the |
|
// EntityType name. |
|
string name = 1; |
|
|
|
// A reference to the value type of the property. This type is subject |
|
// to the same conventions as the `Entity.base_types` field. |
|
string value_type = 2; |
|
|
|
// Occurrence type limits the number of instances an entity type appears |
|
// in the document. |
|
OccurrenceType occurrence_type = 3; |
|
|
|
// Any additional metadata about the property can be added here. |
|
PropertyMetadata property_metadata = 5; |
|
} |
|
|
|
oneof value_source { |
|
// If specified, lists all the possible values for this entity. This |
|
// should not be more than a handful of values. If the number of values |
|
// is >10 or could change frequently use the `EntityType.value_ontology` |
|
// field and specify a list of all possible values in a value ontology |
|
// file. |
|
EnumValues enum_values = 14; |
|
} |
|
|
|
// User defined name for the type. |
|
string display_name = 13; |
|
|
|
// Name of the type. It must be unique within the schema file and |
|
// cannot be a "Common Type". The following naming conventions are used: |
|
// |
|
// - Use `snake_casing`. |
|
// - Name matching is case-sensitive. |
|
// - Maximum 64 characters. |
|
// - Must start with a letter. |
|
// - Allowed characters: ASCII letters `[a-z0-9_-]`. (For backward |
|
// compatibility internal infrastructure and tooling can handle any ascii |
|
// character.) |
|
// - The `/` is sometimes used to denote a property of a type. For example |
|
// `line_item/amount`. This convention is deprecated, but will still be |
|
// honored for backward compatibility. |
|
string name = 1; |
|
|
|
// The entity type that this type is derived from. For now, one and only |
|
// one should be set. |
|
repeated string base_types = 2; |
|
|
|
// Description the nested structure, or composition of an entity. |
|
repeated Property properties = 6; |
|
|
|
// Metadata for the entity type. |
|
EntityTypeMetadata entity_type_metadata = 11; |
|
} |
|
|
|
// Metadata for global schema behavior. |
|
message Metadata { |
|
// If true, a `document` entity type can be applied to subdocument |
|
// (splitting). Otherwise, it can only be applied to the entire document |
|
// (classification). |
|
bool document_splitter = 1; |
|
|
|
// If true, on a given page, there can be multiple `document` annotations |
|
// covering it. |
|
bool document_allow_multiple_labels = 2; |
|
|
|
// If set, all the nested entities must be prefixed with the parents. |
|
bool prefixed_naming_on_properties = 6; |
|
|
|
// If set, we will skip the naming format validation in the schema. So the |
|
// string values in `DocumentSchema.EntityType.name` and |
|
// `DocumentSchema.EntityType.Property.name` will not be checked. |
|
bool skip_naming_validation = 7; |
|
} |
|
|
|
// Display name to show to users. |
|
string display_name = 1; |
|
|
|
// Description of the schema. |
|
string description = 2; |
|
|
|
// Entity types of the schema. |
|
repeated EntityType entity_types = 3; |
|
|
|
// Metadata of the schema. |
|
Metadata metadata = 4; |
|
}
|
|
|