You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
342 lines
14 KiB
342 lines
14 KiB
// Copyright 2022 Google LLC |
|
// |
|
// Licensed under the Apache License, Version 2.0 (the "License"); |
|
// you may not use this file except in compliance with the License. |
|
// You may obtain a copy of the License at |
|
// |
|
// http://www.apache.org/licenses/LICENSE-2.0 |
|
// |
|
// Unless required by applicable law or agreed to in writing, software |
|
// distributed under the License is distributed on an "AS IS" BASIS, |
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|
// See the License for the specific language governing permissions and |
|
// limitations under the License. |
|
|
|
syntax = "proto3"; |
|
|
|
package google.cloud.discoveryengine.v1beta; |
|
|
|
import "google/api/field_behavior.proto"; |
|
import "google/api/resource.proto"; |
|
import "google/cloud/discoveryengine/v1beta/document.proto"; |
|
import "google/cloud/discoveryengine/v1beta/user_event.proto"; |
|
import "google/protobuf/timestamp.proto"; |
|
import "google/rpc/status.proto"; |
|
import "google/type/date.proto"; |
|
|
|
option csharp_namespace = "Google.Cloud.DiscoveryEngine.V1Beta"; |
|
option go_package = "cloud.google.com/go/discoveryengine/apiv1beta/discoveryenginepb;discoveryenginepb"; |
|
option java_multiple_files = true; |
|
option java_outer_classname = "ImportConfigProto"; |
|
option java_package = "com.google.cloud.discoveryengine.v1beta"; |
|
option objc_class_prefix = "DISCOVERYENGINE"; |
|
option php_namespace = "Google\\Cloud\\DiscoveryEngine\\V1beta"; |
|
option ruby_package = "Google::Cloud::DiscoveryEngine::V1beta"; |
|
|
|
// Cloud Storage location for input content. |
|
message GcsSource { |
|
// Required. Cloud Storage URIs to input files. URI can be up to |
|
// 2000 characters long. URIs can match the full object path (for example, |
|
// `gs://bucket/directory/object.json`) or a pattern matching one or more |
|
// files, such as `gs://bucket/directory/*.json`. |
|
// |
|
// A request can contain at most 100 files (or 100,000 files if `data_schema` |
|
// is `content`). Each file can be up to 2 GB (or 100 MB if `data_schema` is |
|
// `content`). |
|
repeated string input_uris = 1 [(google.api.field_behavior) = REQUIRED]; |
|
|
|
// The schema to use when parsing the data from the source. |
|
// |
|
// Supported values for document imports: |
|
// |
|
// * `document` (default): One JSON |
|
// [Document][google.cloud.discoveryengine.v1beta.Document] per line. Each |
|
// document must |
|
// have a valid |
|
// [Document.id][google.cloud.discoveryengine.v1beta.Document.id]. |
|
// * `content`: Unstructured data (e.g. PDF, HTML). Each file matched by |
|
// `input_uris` will become a document, with the ID set to the first 128 |
|
// bits of SHA256(URI) encoded as a hex string. |
|
// * `custom`: One custom data JSON per row in arbitrary format that conforms |
|
// the defined [Schema][google.cloud.discoveryengine.v1beta.Schema] of the |
|
// data store. This can only be used by the GENERIC Data Store vertical. |
|
// * `csv`: A CSV file with header conforming the defined |
|
// [Schema][google.cloud.discoveryengine.v1beta.Schema] of the |
|
// data store. Each entry after the header will be imported as a Document. |
|
// This can only be used by the GENERIC Data Store vertical. |
|
// |
|
// Supported values for user even imports: |
|
// |
|
// * `user_event` (default): One JSON |
|
// [UserEvent][google.cloud.discoveryengine.v1beta.UserEvent] per line. |
|
string data_schema = 2; |
|
} |
|
|
|
// BigQuery source import data from. |
|
message BigQuerySource { |
|
// BigQuery table partition info. Leave this empty if the BigQuery table |
|
// is not partitioned. |
|
oneof partition { |
|
// BigQuery time partitioned table's _PARTITIONDATE in YYYY-MM-DD format. |
|
google.type.Date partition_date = 5; |
|
} |
|
|
|
// The project ID (can be project # or ID) that the BigQuery source is in with |
|
// a length limit of 128 characters. If not specified, inherits the project |
|
// ID from the parent request. |
|
string project_id = 1; |
|
|
|
// Required. The BigQuery data set to copy the data from with a length limit |
|
// of 1,024 characters. |
|
string dataset_id = 2 [(google.api.field_behavior) = REQUIRED]; |
|
|
|
// Required. The BigQuery table to copy the data from with a length limit of |
|
// 1,024 characters. |
|
string table_id = 3 [(google.api.field_behavior) = REQUIRED]; |
|
|
|
// Intermediate Cloud Storage directory used for the import with a length |
|
// limit of 2,000 characters. Can be specified if one wants to have the |
|
// BigQuery export to a specific Cloud Storage directory. |
|
string gcs_staging_dir = 4; |
|
|
|
// The schema to use when parsing the data from the source. |
|
// |
|
// Supported values for user event imports: |
|
// |
|
// * `user_event` (default): One |
|
// [UserEvent][google.cloud.discoveryengine.v1beta.UserEvent] per row. |
|
// |
|
// Supported values for document imports: |
|
// |
|
// * `document` (default): One |
|
// [Document][google.cloud.discoveryengine.v1beta.Document] format per |
|
// row. Each document must have a valid |
|
// [Document.id][google.cloud.discoveryengine.v1beta.Document.id] and one of |
|
// [Document.json_data][google.cloud.discoveryengine.v1beta.Document.json_data] |
|
// or |
|
// [Document.struct_data][google.cloud.discoveryengine.v1beta.Document.struct_data]. |
|
// * `custom`: One custom data per row in arbitrary format that conforms the |
|
// defined [Schema][google.cloud.discoveryengine.v1beta.Schema] of the data |
|
// store. This can only be used by the GENERIC Data Store vertical. |
|
string data_schema = 6; |
|
} |
|
|
|
// Configuration of destination for Import related errors. |
|
message ImportErrorConfig { |
|
// Required. Errors destination. |
|
oneof destination { |
|
// Cloud Storage prefix for import errors. This must be an empty, |
|
// existing Cloud Storage directory. Import errors will be written to |
|
// sharded files in this directory, one per line, as a JSON-encoded |
|
// `google.rpc.Status` message. |
|
string gcs_prefix = 1; |
|
} |
|
} |
|
|
|
// Request message for the ImportUserEvents request. |
|
message ImportUserEventsRequest { |
|
// The inline source for the input config for ImportUserEvents method. |
|
message InlineSource { |
|
// Required. A list of user events to import. Recommended max of 10k items. |
|
repeated UserEvent user_events = 1 [(google.api.field_behavior) = REQUIRED]; |
|
} |
|
|
|
// Required - The desired input source of the user event data. |
|
oneof source { |
|
// The Inline source for the input content for UserEvents. |
|
InlineSource inline_source = 2; |
|
|
|
// Cloud Storage location for the input content. |
|
GcsSource gcs_source = 3; |
|
|
|
// BigQuery input source. |
|
BigQuerySource bigquery_source = 4; |
|
} |
|
|
|
// Required. Parent DataStore resource name, of the form |
|
// `projects/{project}/locations/{location}/collections/{collection}/dataStores/{data_store}` |
|
string parent = 1 [ |
|
(google.api.field_behavior) = REQUIRED, |
|
(google.api.resource_reference) = { |
|
type: "discoveryengine.googleapis.com/DataStore" |
|
} |
|
]; |
|
|
|
// The desired location of errors incurred during the Import. Cannot be set |
|
// for inline user event imports. |
|
ImportErrorConfig error_config = 5; |
|
} |
|
|
|
// Response of the ImportUserEventsRequest. If the long running |
|
// operation was successful, then this message is returned by the |
|
// google.longrunning.Operations.response field if the operation was successful. |
|
message ImportUserEventsResponse { |
|
// A sample of errors encountered while processing the request. |
|
repeated google.rpc.Status error_samples = 1; |
|
|
|
// Echoes the destination for the complete errors if this field was set in |
|
// the request. |
|
ImportErrorConfig error_config = 2; |
|
|
|
// Count of user events imported with complete existing Documents. |
|
int64 joined_events_count = 3; |
|
|
|
// Count of user events imported, but with Document information not found |
|
// in the existing Branch. |
|
int64 unjoined_events_count = 4; |
|
} |
|
|
|
// Metadata related to the progress of the Import operation. This will be |
|
// returned by the google.longrunning.Operation.metadata field. |
|
message ImportUserEventsMetadata { |
|
// Operation create time. |
|
google.protobuf.Timestamp create_time = 1; |
|
|
|
// Operation last update time. If the operation is done, this is also the |
|
// finish time. |
|
google.protobuf.Timestamp update_time = 2; |
|
|
|
// Count of entries that were processed successfully. |
|
int64 success_count = 3; |
|
|
|
// Count of entries that encountered errors while processing. |
|
int64 failure_count = 4; |
|
} |
|
|
|
// Metadata related to the progress of the ImportDocuments operation. This will |
|
// be returned by the google.longrunning.Operation.metadata field. |
|
message ImportDocumentsMetadata { |
|
// Operation create time. |
|
google.protobuf.Timestamp create_time = 1; |
|
|
|
// Operation last update time. If the operation is done, this is also the |
|
// finish time. |
|
google.protobuf.Timestamp update_time = 2; |
|
|
|
// Count of entries that were processed successfully. |
|
int64 success_count = 3; |
|
|
|
// Count of entries that encountered errors while processing. |
|
int64 failure_count = 4; |
|
} |
|
|
|
// Request message for Import methods. |
|
message ImportDocumentsRequest { |
|
// The inline source for the input config for ImportDocuments method. |
|
message InlineSource { |
|
// Required. A list of documents to update/create. Each document must have a |
|
// valid [Document.id][google.cloud.discoveryengine.v1beta.Document.id]. |
|
// Recommended max of 100 items. |
|
repeated Document documents = 1 [(google.api.field_behavior) = REQUIRED]; |
|
} |
|
|
|
// Indicates how imported documents are reconciled with the existing documents |
|
// created or imported before. |
|
enum ReconciliationMode { |
|
// Defaults to INCREMENTAL. |
|
RECONCILIATION_MODE_UNSPECIFIED = 0; |
|
|
|
// Inserts new documents or updates existing documents. |
|
INCREMENTAL = 1; |
|
|
|
// Calculates diff and replaces the entire document dataset. Existing |
|
// documents may be deleted if they are not present in the source location. |
|
FULL = 2; |
|
} |
|
|
|
// Required. The source of the input. |
|
oneof source { |
|
// The Inline source for the input content for documents. |
|
InlineSource inline_source = 2; |
|
|
|
// Cloud Storage location for the input content. |
|
GcsSource gcs_source = 3; |
|
|
|
// BigQuery input source. |
|
BigQuerySource bigquery_source = 4; |
|
} |
|
|
|
// Required. The parent branch resource name, such as |
|
// `projects/{project}/locations/{location}/collections/{collection}/dataStores/{data_store}/branches/{branch}`. |
|
// Requires create/update permission. |
|
string parent = 1 [ |
|
(google.api.field_behavior) = REQUIRED, |
|
(google.api.resource_reference) = { |
|
type: "discoveryengine.googleapis.com/Branch" |
|
} |
|
]; |
|
|
|
// The desired location of errors incurred during the Import. |
|
ImportErrorConfig error_config = 5; |
|
|
|
// The mode of reconciliation between existing documents and the documents to |
|
// be imported. Defaults to |
|
// [ReconciliationMode.INCREMENTAL][google.cloud.discoveryengine.v1beta.ImportDocumentsRequest.ReconciliationMode.INCREMENTAL]. |
|
ReconciliationMode reconciliation_mode = 6; |
|
|
|
// Whether to automatically generate IDs for the documents if absent. |
|
// |
|
// If set to `true`, |
|
// [Document.id][google.cloud.discoveryengine.v1beta.Document.id]s are |
|
// automatically generated based on the hash of the payload, where IDs may not |
|
// be consistent during multiple imports. In which case |
|
// [ReconciliationMode.FULL][google.cloud.discoveryengine.v1beta.ImportDocumentsRequest.ReconciliationMode.FULL] |
|
// is highly recommended to avoid duplicate contents. If unset or set to |
|
// `false`, [Document.id][google.cloud.discoveryengine.v1beta.Document.id]s |
|
// have to be specified using |
|
// [id_field][google.cloud.discoveryengine.v1beta.ImportDocumentsRequest.id_field], |
|
// otherwises, documents without IDs will fail to be imported. |
|
// |
|
// Only set this field when using |
|
// [GcsSource][google.cloud.discoveryengine.v1beta.GcsSource] or |
|
// [BigQuerySource][google.cloud.discoveryengine.v1beta.BigQuerySource], and |
|
// when |
|
// [GcsSource.data_schema][google.cloud.discoveryengine.v1beta.GcsSource.data_schema] |
|
// or |
|
// [BigQuerySource.data_schema][google.cloud.discoveryengine.v1beta.BigQuerySource.data_schema] |
|
// is `custom` or `csv`. Otherwise, an INVALID_ARGUMENT error is thrown. |
|
bool auto_generate_ids = 8; |
|
|
|
// The field in the Cloud Storage and BigQuery sources that indicates the |
|
// unique IDs of the documents. |
|
// |
|
// For [GcsSource][google.cloud.discoveryengine.v1beta.GcsSource] it is the |
|
// key of the JSON field. For instance, `my_id` for JSON `{"my_id": |
|
// "some_uuid"}`. For |
|
// [BigQuerySource][google.cloud.discoveryengine.v1beta.BigQuerySource] it is |
|
// the column name of the BigQuery table where the unique ids are stored. |
|
// |
|
// The values of the JSON field or the BigQuery column will be used as the |
|
// [Document.id][google.cloud.discoveryengine.v1beta.Document.id]s. The JSON |
|
// field or the BigQuery column must be of string type, and the values must be |
|
// set as valid strings conform to |
|
// [RFC-1034](https://tools.ietf.org/html/rfc1034) with 1-63 characters. |
|
// Otherwise, documents without valid IDs will fail to be imported. |
|
// |
|
// Only set this field when using |
|
// [GcsSource][google.cloud.discoveryengine.v1beta.GcsSource] or |
|
// [BigQuerySource][google.cloud.discoveryengine.v1beta.BigQuerySource], and |
|
// when |
|
// [GcsSource.data_schema][google.cloud.discoveryengine.v1beta.GcsSource.data_schema] |
|
// or |
|
// [BigQuerySource.data_schema][google.cloud.discoveryengine.v1beta.BigQuerySource.data_schema] |
|
// is `custom`. And only set this field when |
|
// [auto_generate_ids][google.cloud.discoveryengine.v1beta.ImportDocumentsRequest.auto_generate_ids] |
|
// is unset or set as `false`. Otherwise, an INVALID_ARGUMENT error is thrown. |
|
// |
|
// If it is unset, a default value `_id` is used when importing from the |
|
// allowed data sources. |
|
string id_field = 9; |
|
} |
|
|
|
// Response of the |
|
// [ImportDocumentsRequest][google.cloud.discoveryengine.v1beta.ImportDocumentsRequest]. |
|
// If the long running operation is done, then this message is returned by the |
|
// google.longrunning.Operations.response field if the operation was successful. |
|
message ImportDocumentsResponse { |
|
// A sample of errors encountered while processing the request. |
|
repeated google.rpc.Status error_samples = 1; |
|
|
|
// Echoes the destination for the complete errors in the request if set. |
|
ImportErrorConfig error_config = 2; |
|
}
|
|
|