feat: added StorageFormat.iceberg chore: formatting changes PiperOrigin-RevId: 496586743pull/763/head
parent
cffce7313a
commit
58f5c43973
8 changed files with 1239 additions and 2 deletions
@ -0,0 +1,207 @@ |
||||
// Copyright 2022 Google LLC |
||||
// |
||||
// Licensed under the Apache License, Version 2.0 (the "License"); |
||||
// you may not use this file except in compliance with the License. |
||||
// You may obtain a copy of the License at |
||||
// |
||||
// http://www.apache.org/licenses/LICENSE-2.0 |
||||
// |
||||
// Unless required by applicable law or agreed to in writing, software |
||||
// distributed under the License is distributed on an "AS IS" BASIS, |
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
// See the License for the specific language governing permissions and |
||||
// limitations under the License. |
||||
|
||||
syntax = "proto3"; |
||||
|
||||
package google.cloud.dataplex.v1; |
||||
|
||||
import "google/cloud/dataplex/v1/processing.proto"; |
||||
|
||||
option go_package = "google.golang.org/genproto/googleapis/cloud/dataplex/v1;dataplex"; |
||||
option java_multiple_files = true; |
||||
option java_outer_classname = "DataProfileProto"; |
||||
option java_package = "com.google.cloud.dataplex.v1"; |
||||
|
||||
// DataProfileScan related setting. |
||||
message DataProfileSpec {} |
||||
|
||||
// DataProfileResult defines the output of DataProfileScan. |
||||
// Each field of the table will have field type specific profile result. |
||||
message DataProfileResult { |
||||
// Profile information describing the structure and layout of the data |
||||
// and contains the profile info. |
||||
message Profile { |
||||
// Represents a column field within a table schema. |
||||
message Field { |
||||
// ProfileInfo defines the profile information for each schema field type. |
||||
message ProfileInfo { |
||||
// StringFieldInfo defines output info for any string type field. |
||||
message StringFieldInfo { |
||||
// The minimum length of the string field in the sampled data. |
||||
// Optional if zero non-null rows. |
||||
int64 min_length = 1; |
||||
|
||||
// The maximum length of a string field in the sampled data. |
||||
// Optional if zero non-null rows. |
||||
int64 max_length = 2; |
||||
|
||||
// The average length of a string field in the sampled data. |
||||
// Optional if zero non-null rows. |
||||
double average_length = 3; |
||||
} |
||||
|
||||
// IntegerFieldInfo defines output for any integer type field. |
||||
message IntegerFieldInfo { |
||||
// The average of non-null values of integer field in the sampled |
||||
// data. Return NaN, if the field has a NaN. Optional if zero non-null |
||||
// rows. |
||||
double average = 1; |
||||
|
||||
// The standard deviation of non-null of integer field in the sampled |
||||
// data. Return NaN, if the field has a NaN. Optional if zero non-null |
||||
// rows. |
||||
double standard_deviation = 3; |
||||
|
||||
// The minimum value of an integer field in the sampled data. |
||||
// Return NaN, if the field has a NaN. Optional if zero non-null |
||||
// rows. |
||||
int64 min = 4; |
||||
|
||||
// A quartile divide the number of data points into four parts, or |
||||
// quarters, of more-or-less equal size. Three main quartiles used |
||||
// are: The first quartile (Q1) splits off the lowest 25% of data from |
||||
// the highest 75%. It is also known as the lower or 25th empirical |
||||
// quartile, as 25% of the data is below this point. The second |
||||
// quartile (Q2) is the median of a data set. So, 50% of the data lies |
||||
// below this point. The third quartile (Q3) splits off the highest |
||||
// 25% of data from the lowest 75%. It is known as the upper or 75th |
||||
// empirical quartile, as 75% of the data lies below this point. So, |
||||
// here the quartiles is provided as an ordered list of quartile |
||||
// values, occurring in order Q1, median, Q3. |
||||
repeated int64 quartiles = 6; |
||||
|
||||
// The maximum value of an integer field in the sampled data. |
||||
// Return NaN, if the field has a NaN. Optional if zero non-null |
||||
// rows. |
||||
int64 max = 5; |
||||
} |
||||
|
||||
// DoubleFieldInfo defines output for any double type field. |
||||
message DoubleFieldInfo { |
||||
// The average of non-null values of double field in the sampled data. |
||||
// Return NaN, if the field has a NaN. Optional if zero non-null rows. |
||||
double average = 1; |
||||
|
||||
// The standard deviation of non-null of double field in the sampled |
||||
// data. Return NaN, if the field has a NaN. Optional if zero non-null |
||||
// rows. |
||||
double standard_deviation = 3; |
||||
|
||||
// The minimum value of a double field in the sampled data. |
||||
// Return NaN, if the field has a NaN. Optional if zero non-null |
||||
// rows. |
||||
double min = 4; |
||||
|
||||
// A quartile divide the numebr of data points into four parts, or |
||||
// quarters, of more-or-less equal size. Three main quartiles used |
||||
// are: The first quartile (Q1) splits off the lowest 25% of data from |
||||
// the highest 75%. It is also known as the lower or 25th empirical |
||||
// quartile, as 25% of the data is below this point. The second |
||||
// quartile (Q2) is the median of a data set. So, 50% of the data lies |
||||
// below this point. The third quartile (Q3) splits off the highest |
||||
// 25% of data from the lowest 75%. It is known as the upper or 75th |
||||
// empirical quartile, as 75% of the data lies below this point. So, |
||||
// here the quartiles is provided as an ordered list of quartile |
||||
// values, occurring in order Q1, median, Q3. |
||||
repeated double quartiles = 6; |
||||
|
||||
// The maximum value of a double field in the sampled data. |
||||
// Return NaN, if the field has a NaN. Optional if zero non-null |
||||
// rows. |
||||
double max = 5; |
||||
} |
||||
|
||||
// The TopNValue defines the structure of output of top N values of a |
||||
// field. |
||||
message TopNValue { |
||||
// The value is the string value of the actual value from the field. |
||||
string value = 1; |
||||
|
||||
// The frequency count of the corresponding value in the field. |
||||
int64 count = 2; |
||||
} |
||||
|
||||
// The ratio of null rows against the rows in the sampled data. |
||||
double null_ratio = 2; |
||||
|
||||
// The ratio of rows that are distinct against the rows in the sampled |
||||
// data. |
||||
double distinct_ratio = 3; |
||||
|
||||
// The array of top N values of the field in the sampled data. |
||||
// Currently N is set as 10 or equal to distinct values in the field, |
||||
// whichever is smaller. This will be optional for complex non-groupable |
||||
// data-types such as JSON, ARRAY, JSON, STRUCT. |
||||
repeated TopNValue top_n_values = 4; |
||||
|
||||
// The corresponding profile for specific field type. |
||||
// Each field will have only one field type specific profile output. |
||||
oneof field_info { |
||||
// The corresponding string field profile. |
||||
StringFieldInfo string_profile = 101; |
||||
|
||||
// The corresponding integer field profile. |
||||
IntegerFieldInfo integer_profile = 102; |
||||
|
||||
// The corresponding double field profile. |
||||
DoubleFieldInfo double_profile = 103; |
||||
} |
||||
} |
||||
|
||||
// The name of the field. |
||||
string name = 1; |
||||
|
||||
// The field data type. Possible values include: |
||||
// |
||||
// * STRING |
||||
// * BYTE |
||||
// * INT64 |
||||
// * INT32 |
||||
// * INT16 |
||||
// * DOUBLE |
||||
// * FLOAT |
||||
// * DECIMAL |
||||
// * BOOLEAN |
||||
// * BINARY |
||||
// * TIMESTAMP |
||||
// * DATE |
||||
// * TIME |
||||
// * NULL |
||||
// * RECORD |
||||
string type = 2; |
||||
|
||||
// The mode of the field. Its value will be: |
||||
// REQUIRED, if it is a required field. |
||||
// NULLABLE, if it is an optional field. |
||||
// REPEATED, if it is a repeated field. |
||||
string mode = 3; |
||||
|
||||
// The profile information for the corresponding field. |
||||
ProfileInfo profile = 4; |
||||
} |
||||
|
||||
// The sequence of fields describing data in table entities. |
||||
repeated Field fields = 2; |
||||
} |
||||
|
||||
// The count of all rows in the sampled data. |
||||
// Return 0, if zero rows. |
||||
int64 row_count = 3; |
||||
|
||||
// This represents the profile information per field. |
||||
Profile profile = 4; |
||||
|
||||
// The data scanned for this profile. |
||||
ScannedData scanned_data = 5; |
||||
} |
@ -0,0 +1,236 @@ |
||||
// Copyright 2022 Google LLC |
||||
// |
||||
// Licensed under the Apache License, Version 2.0 (the "License"); |
||||
// you may not use this file except in compliance with the License. |
||||
// You may obtain a copy of the License at |
||||
// |
||||
// http://www.apache.org/licenses/LICENSE-2.0 |
||||
// |
||||
// Unless required by applicable law or agreed to in writing, software |
||||
// distributed under the License is distributed on an "AS IS" BASIS, |
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
// See the License for the specific language governing permissions and |
||||
// limitations under the License. |
||||
|
||||
syntax = "proto3"; |
||||
|
||||
package google.cloud.dataplex.v1; |
||||
|
||||
import "google/api/field_behavior.proto"; |
||||
import "google/cloud/dataplex/v1/processing.proto"; |
||||
|
||||
option go_package = "google.golang.org/genproto/googleapis/cloud/dataplex/v1;dataplex"; |
||||
option java_multiple_files = true; |
||||
option java_outer_classname = "DataQualityProto"; |
||||
option java_package = "com.google.cloud.dataplex.v1"; |
||||
|
||||
// DataQualityScan related setting. |
||||
message DataQualitySpec { |
||||
// The list of rules to evaluate against a data source. At least one rule is |
||||
// required. |
||||
repeated DataQualityRule rules = 1; |
||||
} |
||||
|
||||
// The output of a DataQualityScan. |
||||
message DataQualityResult { |
||||
// Overall data quality result -- `true` if all rules passed. |
||||
bool passed = 5; |
||||
|
||||
// A list of results at the dimension-level. |
||||
repeated DataQualityDimensionResult dimensions = 2; |
||||
|
||||
// A list of all the rules in a job, and their results. |
||||
repeated DataQualityRuleResult rules = 3; |
||||
|
||||
// The count of rows processed. |
||||
int64 row_count = 4; |
||||
|
||||
// The data scanned for this result. |
||||
ScannedData scanned_data = 7; |
||||
} |
||||
|
||||
// DataQualityRuleResult provides a more detailed, per-rule level view of the |
||||
// results. |
||||
message DataQualityRuleResult { |
||||
// The rule specified in the DataQualitySpec, as is. |
||||
DataQualityRule rule = 1; |
||||
|
||||
// Whether the rule passed or failed. |
||||
bool passed = 7; |
||||
|
||||
// The number of rows a rule was evaluated against. |
||||
// This field is only valid for ColumnMap type rules. |
||||
// Evaluated count can be configured to either |
||||
// (1) include all rows (default) - with null rows automatically failing rule |
||||
// evaluation OR (2) exclude null rows from the evaluated_count, by setting |
||||
// ignore_nulls = true |
||||
int64 evaluated_count = 9; |
||||
|
||||
// The number of rows which passed a rule evaluation. |
||||
// This field is only valid for ColumnMap type rules. |
||||
int64 passed_count = 8; |
||||
|
||||
// The number of rows with null values in the specified column. |
||||
int64 null_count = 5; |
||||
|
||||
// The ratio of passed_count / evaluated_count. |
||||
// This field is only valid for ColumnMap type rules. |
||||
double pass_ratio = 6; |
||||
|
||||
// The query to find rows that did not pass this rule. |
||||
// Only applies to ColumnMap and RowCondition rules. |
||||
string failing_rows_query = 10; |
||||
} |
||||
|
||||
// DataQualityDimensionResult provides a more detailed, per-dimension level view |
||||
// of the results. |
||||
message DataQualityDimensionResult { |
||||
// Whether the dimension passed or failed. |
||||
bool passed = 3; |
||||
} |
||||
|
||||
// A rule captures data quality intent about a data source. |
||||
message DataQualityRule { |
||||
// Evaluates whether each column value lies between a specified range. |
||||
message RangeExpectation { |
||||
// Optional. The minimum column value allowed for a row to pass this |
||||
// validation. At least one of min_value and max_value need to be provided. |
||||
string min_value = 1 [(google.api.field_behavior) = OPTIONAL]; |
||||
|
||||
// Optional. The maximum column value allowed for a row to pass this |
||||
// validation. At least one of min_value and max_value need to be provided. |
||||
string max_value = 2 [(google.api.field_behavior) = OPTIONAL]; |
||||
|
||||
// Optional. Whether each value needs to be strictly greater than ('>') the |
||||
// minimum, or if equality is allowed. Only relevant if a min_value has been |
||||
// defined. Default = false. |
||||
bool strict_min_enabled = 3 [(google.api.field_behavior) = OPTIONAL]; |
||||
|
||||
// Optional. Whether each value needs to be strictly lesser than ('<') the |
||||
// maximum, or if equality is allowed. Only relevant if a max_value has been |
||||
// defined. Default = false. |
||||
bool strict_max_enabled = 4 [(google.api.field_behavior) = OPTIONAL]; |
||||
} |
||||
|
||||
// Evaluates whether each column value is null. |
||||
message NonNullExpectation {} |
||||
|
||||
// Evaluates whether each column value is contained by a specified set. |
||||
message SetExpectation { |
||||
repeated string values = 1; |
||||
} |
||||
|
||||
// Evaluates whether each column value matches a specified regex. |
||||
message RegexExpectation { |
||||
string regex = 1; |
||||
} |
||||
|
||||
// Evaluates whether the column has duplicates. |
||||
message UniquenessExpectation {} |
||||
|
||||
// Evaluates whether the column aggregate statistic lies between a specified |
||||
// range. |
||||
message StatisticRangeExpectation { |
||||
enum ColumnStatistic { |
||||
// Unspecified statistic type |
||||
STATISTIC_UNDEFINED = 0; |
||||
|
||||
// Evaluate the column mean |
||||
MEAN = 1; |
||||
|
||||
// Evaluate the column min |
||||
MIN = 2; |
||||
|
||||
// Evaluate the column max |
||||
MAX = 3; |
||||
} |
||||
|
||||
ColumnStatistic statistic = 1; |
||||
|
||||
// The minimum column statistic value allowed for a row to pass this |
||||
// validation. |
||||
// At least one of min_value and max_value need to be provided. |
||||
string min_value = 2; |
||||
|
||||
// The maximum column statistic value allowed for a row to pass this |
||||
// validation. |
||||
// At least one of min_value and max_value need to be provided. |
||||
string max_value = 3; |
||||
|
||||
// Whether column statistic needs to be strictly greater than ('>') |
||||
// the minimum, or if equality is allowed. Only relevant if a min_value has |
||||
// been defined. Default = false. |
||||
bool strict_min_enabled = 4; |
||||
|
||||
// Whether column statistic needs to be strictly lesser than ('<') the |
||||
// maximum, or if equality is allowed. Only relevant if a max_value has been |
||||
// defined. Default = false. |
||||
bool strict_max_enabled = 5; |
||||
} |
||||
|
||||
// Evaluates whether each row passes the specified condition. |
||||
// The SQL expression needs to use BigQuery standard SQL syntax and should |
||||
// produce a boolean per row as the result. |
||||
// Example: col1 >= 0 AND col2 < 10 |
||||
message RowConditionExpectation { |
||||
string sql_expression = 1; |
||||
} |
||||
|
||||
// Evaluates whether the provided expression is true. |
||||
// The SQL expression needs to use BigQuery standard SQL syntax and should |
||||
// produce a scalar boolean result. |
||||
// Example: MIN(col1) >= 0 |
||||
message TableConditionExpectation { |
||||
string sql_expression = 1; |
||||
} |
||||
|
||||
oneof rule_type { |
||||
// ColumnMap rule which evaluates whether each column value lies between a |
||||
// specified range. |
||||
RangeExpectation range_expectation = 1; |
||||
|
||||
// ColumnMap rule which evaluates whether each column value is null. |
||||
NonNullExpectation non_null_expectation = 2; |
||||
|
||||
// ColumnMap rule which evaluates whether each column value is contained by |
||||
// a specified set. |
||||
SetExpectation set_expectation = 3; |
||||
|
||||
// ColumnMap rule which evaluates whether each column value matches a |
||||
// specified regex. |
||||
RegexExpectation regex_expectation = 4; |
||||
|
||||
// ColumnAggregate rule which evaluates whether the column has duplicates. |
||||
UniquenessExpectation uniqueness_expectation = 100; |
||||
|
||||
// ColumnAggregate rule which evaluates whether the column aggregate |
||||
// statistic lies between a specified range. |
||||
StatisticRangeExpectation statistic_range_expectation = 101; |
||||
|
||||
// Table rule which evaluates whether each row passes the specified |
||||
// condition. |
||||
RowConditionExpectation row_condition_expectation = 200; |
||||
|
||||
// Table rule which evaluates whether the provided expression is true. |
||||
TableConditionExpectation table_condition_expectation = 201; |
||||
} |
||||
|
||||
// Optional. The unnested column which this rule is evaluated against. |
||||
string column = 500 [(google.api.field_behavior) = OPTIONAL]; |
||||
|
||||
// Optional. Rows with null values will automatically fail a rule, unless |
||||
// ignore_null is true. In that case, such null rows are trivially considered |
||||
// passing. Only applicable to ColumnMap rules. |
||||
bool ignore_null = 501 [(google.api.field_behavior) = OPTIONAL]; |
||||
|
||||
// Required. The dimension a rule belongs to. Results are also aggregated at |
||||
// the dimension-level. Supported dimensions are ["COMPLETENESS", "ACCURACY", |
||||
// "CONSISTENCY", "VALIDITY", "UNIQUENESS", "INTEGRITY"] |
||||
string dimension = 502 [(google.api.field_behavior) = REQUIRED]; |
||||
|
||||
// Optional. The minimum ratio of passing_rows / total_rows required to pass |
||||
// this rule, with a range of [0.0, 1.0] |
||||
// |
||||
// 0 indicates default value (i.e. 1.0) |
||||
double threshold = 503 [(google.api.field_behavior) = OPTIONAL]; |
||||
} |
@ -0,0 +1,535 @@ |
||||
// Copyright 2022 Google LLC |
||||
// |
||||
// Licensed under the Apache License, Version 2.0 (the "License"); |
||||
// you may not use this file except in compliance with the License. |
||||
// You may obtain a copy of the License at |
||||
// |
||||
// http://www.apache.org/licenses/LICENSE-2.0 |
||||
// |
||||
// Unless required by applicable law or agreed to in writing, software |
||||
// distributed under the License is distributed on an "AS IS" BASIS, |
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
// See the License for the specific language governing permissions and |
||||
// limitations under the License. |
||||
|
||||
syntax = "proto3"; |
||||
|
||||
package google.cloud.dataplex.v1; |
||||
|
||||
import "google/api/annotations.proto"; |
||||
import "google/api/client.proto"; |
||||
import "google/api/field_behavior.proto"; |
||||
import "google/api/resource.proto"; |
||||
import "google/cloud/dataplex/v1/data_profile.proto"; |
||||
import "google/cloud/dataplex/v1/data_quality.proto"; |
||||
import "google/cloud/dataplex/v1/processing.proto"; |
||||
import "google/cloud/dataplex/v1/resources.proto"; |
||||
import "google/longrunning/operations.proto"; |
||||
import "google/protobuf/field_mask.proto"; |
||||
import "google/protobuf/timestamp.proto"; |
||||
|
||||
option go_package = "google.golang.org/genproto/googleapis/cloud/dataplex/v1;dataplex"; |
||||
option java_multiple_files = true; |
||||
option java_outer_classname = "DataScansProto"; |
||||
option java_package = "com.google.cloud.dataplex.v1"; |
||||
|
||||
service DataScanService { |
||||
option (google.api.default_host) = "dataplex.googleapis.com"; |
||||
option (google.api.oauth_scopes) = |
||||
"https://www.googleapis.com/auth/cloud-platform"; |
||||
|
||||
// Creates a dataScan resource. |
||||
rpc CreateDataScan(CreateDataScanRequest) |
||||
returns (google.longrunning.Operation) { |
||||
option (google.api.http) = { |
||||
post: "/v1/{parent=projects/*/locations/*}/dataScans" |
||||
body: "data_scan" |
||||
}; |
||||
option (google.api.method_signature) = "parent,data_scan,data_scan_id"; |
||||
option (google.longrunning.operation_info) = { |
||||
response_type: "DataScan" |
||||
metadata_type: "OperationMetadata" |
||||
}; |
||||
} |
||||
|
||||
// Update the dataScan resource. |
||||
rpc UpdateDataScan(UpdateDataScanRequest) |
||||
returns (google.longrunning.Operation) { |
||||
option (google.api.http) = { |
||||
patch: "/v1/{data_scan.name=projects/*/locations/*/dataScans/*}" |
||||
body: "data_scan" |
||||
}; |
||||
option (google.api.method_signature) = "data_scan,update_mask"; |
||||
option (google.longrunning.operation_info) = { |
||||
response_type: "DataScan" |
||||
metadata_type: "OperationMetadata" |
||||
}; |
||||
} |
||||
|
||||
// Delete the dataScan resource. |
||||
rpc DeleteDataScan(DeleteDataScanRequest) |
||||
returns (google.longrunning.Operation) { |
||||
option (google.api.http) = { |
||||
delete: "/v1/{name=projects/*/locations/*/dataScans/*}" |
||||
}; |
||||
option (google.api.method_signature) = "name"; |
||||
option (google.longrunning.operation_info) = { |
||||
response_type: "google.protobuf.Empty" |
||||
metadata_type: "OperationMetadata" |
||||
}; |
||||
} |
||||
|
||||
// Get dataScan resource. |
||||
rpc GetDataScan(GetDataScanRequest) returns (DataScan) { |
||||
option (google.api.http) = { |
||||
get: "/v1/{name=projects/*/locations/*/dataScans/*}" |
||||
}; |
||||
option (google.api.method_signature) = "name"; |
||||
} |
||||
|
||||
// Lists dataScans. |
||||
rpc ListDataScans(ListDataScansRequest) returns (ListDataScansResponse) { |
||||
option (google.api.http) = { |
||||
get: "/v1/{parent=projects/*/locations/*}/dataScans" |
||||
}; |
||||
option (google.api.method_signature) = "parent"; |
||||
} |
||||
|
||||
// Run an on demand execution of a DataScan. |
||||
rpc RunDataScan(RunDataScanRequest) returns (RunDataScanResponse) { |
||||
option (google.api.http) = { |
||||
post: "/v1/{name=projects/*/locations/*/dataScans/*}:run" |
||||
body: "*" |
||||
}; |
||||
option (google.api.method_signature) = "name"; |
||||
} |
||||
|
||||
// Get DataScanJob resource. |
||||
rpc GetDataScanJob(GetDataScanJobRequest) returns (DataScanJob) { |
||||
option (google.api.http) = { |
||||
get: "/v1/{name=projects/*/locations/*/dataScans/*/jobs/*}" |
||||
}; |
||||
option (google.api.method_signature) = "name"; |
||||
} |
||||
|
||||
// Lists DataScanJobs under the given dataScan. |
||||
rpc ListDataScanJobs(ListDataScanJobsRequest) |
||||
returns (ListDataScanJobsResponse) { |
||||
option (google.api.http) = { |
||||
get: "/v1/{parent=projects/*/locations/*/dataScans/*}/jobs" |
||||
}; |
||||
option (google.api.method_signature) = "parent"; |
||||
} |
||||
} |
||||
|
||||
// Create dataScan request. |
||||
message CreateDataScanRequest { |
||||
// Required. The resource name of the parent location: |
||||
// projects/{project}/locations/{location_id} |
||||
// where `{project}` refers to a project_id or project_number and |
||||
// `location_id` refers to a GCP region. |
||||
string parent = 1 [ |
||||
(google.api.field_behavior) = REQUIRED, |
||||
(google.api.resource_reference) = { |
||||
type: "locations.googleapis.com/Location" |
||||
} |
||||
]; |
||||
|
||||
// Required. DataScan resource. |
||||
DataScan data_scan = 2 [(google.api.field_behavior) = REQUIRED]; |
||||
|
||||
// Required. DataScan identifier. |
||||
// * Must contain only lowercase letters, numbers and hyphens. |
||||
// * Must start with a letter. |
||||
// * Must end with a number or a letter. |
||||
// * Must be between 1-63 characters. |
||||
// * Must be unique within the customer project / location. |
||||
string data_scan_id = 3 [(google.api.field_behavior) = REQUIRED]; |
||||
} |
||||
|
||||
// Update dataScan request. |
||||
message UpdateDataScanRequest { |
||||
// Required. Update description. |
||||
// Only fields specified in `update_mask` are updated. |
||||
DataScan data_scan = 1 [(google.api.field_behavior) = REQUIRED]; |
||||
|
||||
// Required. Mask of fields to update. |
||||
google.protobuf.FieldMask update_mask = 2 |
||||
[(google.api.field_behavior) = REQUIRED]; |
||||
} |
||||
|
||||
// Delete dataScan request. |
||||
message DeleteDataScanRequest { |
||||
// Required. The resource name of the dataScan: |
||||
// projects/{project}/locations/{location_id}/dataScans/{data_scan_id} |
||||
// where `{project}` refers to a project_id or project_number and |
||||
// `location_id` refers to a GCP region. |
||||
string name = 1 [ |
||||
(google.api.field_behavior) = REQUIRED, |
||||
(google.api.resource_reference) = { |
||||
type: "dataplex.googleapis.com/DataScan" |
||||
} |
||||
]; |
||||
} |
||||
|
||||
// Get dataScan request. |
||||
message GetDataScanRequest { |
||||
// DataScan views for getting a partial dataScan. |
||||
enum DataScanView { |
||||
// The API will default to the `BASIC` view. |
||||
DATA_SCAN_VIEW_UNSPECIFIED = 0; |
||||
|
||||
// Basic view that does not include spec and result. |
||||
BASIC = 1; |
||||
|
||||
// Include everything. |
||||
FULL = 10; |
||||
} |
||||
|
||||
// Required. The resource name of the dataScan: |
||||
// projects/{project}/locations/{location_id}/dataScans/{data_scan_id} |
||||
// where `{project}` refers to a project_id or project_number and |
||||
// `location_id` refers to a GCP region. |
||||
string name = 1 [ |
||||
(google.api.field_behavior) = REQUIRED, |
||||
(google.api.resource_reference) = { |
||||
type: "dataplex.googleapis.com/DataScan" |
||||
} |
||||
]; |
||||
|
||||
// Optional. Used to select the subset of DataScan information to return. |
||||
// Defaults to `BASIC`. |
||||
DataScanView view = 2 [(google.api.field_behavior) = OPTIONAL]; |
||||
} |
||||
|
||||
// List dataScans request. |
||||
message ListDataScansRequest { |
||||
// Required. projects/{project}/locations/{location_id} |
||||
// where `{project}` refers to a project_id or project_number and |
||||
// `location_id` refers to a GCP region. |
||||
string parent = 1 [ |
||||
(google.api.field_behavior) = REQUIRED, |
||||
(google.api.resource_reference) = { |
||||
type: "locations.googleapis.com/Location" |
||||
} |
||||
]; |
||||
|
||||
// Optional. Maximum number of dataScans to return. The service may return |
||||
// fewer than this value. If unspecified, at most 10 scans will be returned. |
||||
// The maximum value is 1000; values above 1000 will be coerced to 1000. |
||||
int32 page_size = 2 [(google.api.field_behavior) = OPTIONAL]; |
||||
|
||||
// Optional. Page token received from a previous `ListDataScans` call. Provide |
||||
// this to retrieve the subsequent page. When paginating, all other parameters |
||||
// provided to `ListDataScans` must match the call that provided the |
||||
// page token. |
||||
string page_token = 3 [(google.api.field_behavior) = OPTIONAL]; |
||||
|
||||
// Optional. Filter request. |
||||
string filter = 4 [(google.api.field_behavior) = OPTIONAL]; |
||||
|
||||
// Optional. Order by fields (name or create_time) for the result. |
||||
// If not specified, the ordering is undefined. |
||||
string order_by = 5 [(google.api.field_behavior) = OPTIONAL]; |
||||
} |
||||
|
||||
// List dataScans response. |
||||
message ListDataScansResponse { |
||||
// DataScans (metadata only) under the given parent location. |
||||
repeated DataScan data_scans = 1; |
||||
|
||||
// Token to retrieve the next page of results, or empty if there are no more |
||||
// results in the list. |
||||
string next_page_token = 2; |
||||
|
||||
// Locations that could not be reached. |
||||
repeated string unreachable = 3; |
||||
} |
||||
|
||||
// Run DataScan Request |
||||
message RunDataScanRequest { |
||||
// Required. The resource name of the DataScan: |
||||
// projects/{project}/locations/{location_id}/dataScans/{data_scan_id}. |
||||
// where `{project}` refers to a project_id or project_number and |
||||
// `location_id` refers to a GCP region. |
||||
// Only on-demand DataScans are allowed. |
||||
string name = 1 [ |
||||
(google.api.field_behavior) = REQUIRED, |
||||
(google.api.resource_reference) = { |
||||
type: "dataplex.googleapis.com/DataScan" |
||||
} |
||||
]; |
||||
} |
||||
|
||||
// Run DataScan Response. |
||||
message RunDataScanResponse { |
||||
// DataScanJob created by RunDataScan API. |
||||
DataScanJob job = 1; |
||||
} |
||||
|
||||
// Get DataScanJob request. |
||||
message GetDataScanJobRequest { |
||||
// DataScanJob views for getting a partial dataScanJob. |
||||
enum DataScanJobView { |
||||
// The API will default to the `BASIC` view. |
||||
DATA_SCAN_JOB_VIEW_UNSPECIFIED = 0; |
||||
|
||||
// Basic view that does not include spec and result. |
||||
BASIC = 1; |
||||
|
||||
// Include everything. |
||||
FULL = 10; |
||||
} |
||||
|
||||
// Required. The resource name of the DataScanJob: |
||||
// projects/{project}/locations/{location_id}/dataScans/{data_scan_id}/dataScanJobs/{data_scan_job_id} |
||||
// where `{project}` refers to a project_id or project_number and |
||||
// `location_id` refers to a GCP region. |
||||
string name = 1 [ |
||||
(google.api.field_behavior) = REQUIRED, |
||||
(google.api.resource_reference) = { |
||||
type: "dataplex.googleapis.com/DataScanJob" |
||||
} |
||||
]; |
||||
|
||||
// Optional. Used to select the subset of DataScan information to return. |
||||
// Defaults to `BASIC`. |
||||
DataScanJobView view = 2 [(google.api.field_behavior) = OPTIONAL]; |
||||
} |
||||
|
||||
// List DataScanJobs request. |
||||
message ListDataScanJobsRequest { |
||||
// Required. The resource name of the parent environment: |
||||
// projects/{project}/locations/{location_id}/dataScans/{data_scan_id} |
||||
// where `{project}` refers to a project_id or project_number and |
||||
// `location_id` refers to a GCP region. |
||||
string parent = 1 [ |
||||
(google.api.field_behavior) = REQUIRED, |
||||
(google.api.resource_reference) = { |
||||
type: "dataplex.googleapis.com/DataScan" |
||||
} |
||||
]; |
||||
|
||||
// Optional. Maximum number of DataScanJobs to return. The service may return |
||||
// fewer than this value. If unspecified, at most 10 DataScanJobs will be |
||||
// returned. The maximum value is 1000; values above 1000 will be coerced to |
||||
// 1000. |
||||
int32 page_size = 2 [(google.api.field_behavior) = OPTIONAL]; |
||||
|
||||
// Optional. Page token received from a previous `ListDataScanJobs` call. |
||||
// Provide this to retrieve the subsequent page. When paginating, all other |
||||
// parameters provided to `ListDataScanJobs` must match the call that provided |
||||
// the page token. |
||||
string page_token = 3 [(google.api.field_behavior) = OPTIONAL]; |
||||
} |
||||
|
||||
// List DataScanJobs response. |
||||
message ListDataScanJobsResponse { |
||||
// DataScanJobs (metadata only) under a given dataScan. |
||||
repeated DataScanJob data_scan_jobs = 1; |
||||
|
||||
// Token to retrieve the next page of results, or empty if there are no more |
||||
// results in the list. |
||||
string next_page_token = 2; |
||||
} |
||||
|
||||
// Represents a user-visible job which provides the insights for the related |
||||
// data source. |
||||
// For examples: |
||||
// - Data Quality: generates queries based on the rules and run against the |
||||
// data to get data quality check results. |
||||
// - Data Profile: analyzes the data in table(s) and generates insights about |
||||
// the structure, content and relationships (such as null percent, |
||||
// cardinality, min/max/mean, etc). |
||||
message DataScan { |
||||
option (google.api.resource) = { |
||||
type: "dataplex.googleapis.com/DataScan" |
||||
pattern: "projects/{project}/locations/{location}/dataScans/{dataScan}" |
||||
}; |
||||
|
||||
// DataScan execution settings. |
||||
message ExecutionSpec { |
||||
// Optional. Spec related to how often and when a scan should be triggered. |
||||
// If not specified, the default is OnDemand, which means the scan will not |
||||
// run until the user calls RunDataScan API. |
||||
Trigger trigger = 1 [(google.api.field_behavior) = OPTIONAL]; |
||||
|
||||
// If not specified, run a data scan on all data in the table. |
||||
// The incremental is immutable, which means once the field is set, |
||||
// it cannot be unset, and vice versa. |
||||
oneof incremental { |
||||
// Immutable. The unnested field (Date or Timestamp) that contains values |
||||
// that monotonically increase over time. |
||||
string field = 100 [(google.api.field_behavior) = IMMUTABLE]; |
||||
} |
||||
} |
||||
|
||||
// Status of the data scan execution. |
||||
message ExecutionStatus { |
||||
// The time when the latest DataScanJob started. |
||||
google.protobuf.Timestamp latest_job_start_time = 4; |
||||
|
||||
// The time when the latest DataScanJob ended. |
||||
google.protobuf.Timestamp latest_job_end_time = 5; |
||||
} |
||||
|
||||
// Output only. The relative resource name of the scan, of the form: |
||||
// projects/{project}/locations/{location_id}/dataScans/{datascan_id}. |
||||
// where `{project}` refers to a project_id or project_number and |
||||
// `location_id` refers to a GCP region. |
||||
string name = 1 [(google.api.field_behavior) = OUTPUT_ONLY]; |
||||
|
||||
// Output only. System generated globally unique ID for the scan. This ID will |
||||
// be different if the scan is deleted and re-created with the same name. |
||||
string uid = 2 [(google.api.field_behavior) = OUTPUT_ONLY]; |
||||
|
||||
// Optional. Description of the scan. |
||||
// * Must be between 1-1024 characters. |
||||
string description = 3 [(google.api.field_behavior) = OPTIONAL]; |
||||
|
||||
// Optional. User friendly display name. |
||||
// * Must be between 1-256 characters. |
||||
string display_name = 4 [(google.api.field_behavior) = OPTIONAL]; |
||||
|
||||
// Optional. User-defined labels for the scan. |
||||
map<string, string> labels = 5 [(google.api.field_behavior) = OPTIONAL]; |
||||
|
||||
// Output only. Current state of the DataScan. |
||||
State state = 6 [(google.api.field_behavior) = OUTPUT_ONLY]; |
||||
|
||||
// Output only. The time when the scan was created. |
||||
google.protobuf.Timestamp create_time = 7 |
||||
[(google.api.field_behavior) = OUTPUT_ONLY]; |
||||
|
||||
// Output only. The time when the scan was last updated. |
||||
google.protobuf.Timestamp update_time = 8 |
||||
[(google.api.field_behavior) = OUTPUT_ONLY]; |
||||
|
||||
// Required. The data source for DataScan. |
||||
DataSource data = 9 [(google.api.field_behavior) = REQUIRED]; |
||||
|
||||
// Optional. DataScan execution settings. |
||||
// If not specified, the fields under it will use their default values. |
||||
ExecutionSpec execution_spec = 10 [(google.api.field_behavior) = OPTIONAL]; |
||||
|
||||
// Output only. Status of the data scan execution. |
||||
ExecutionStatus execution_status = 11 |
||||
[(google.api.field_behavior) = OUTPUT_ONLY]; |
||||
|
||||
// Output only. The type of DataScan. |
||||
DataScanType type = 12 [(google.api.field_behavior) = OUTPUT_ONLY]; |
||||
|
||||
// Data Scan related setting. |
||||
// It is required and immutable which means once data_quality_spec is set, it |
||||
// cannot be changed to data_profile_spec. |
||||
oneof spec { |
||||
// DataQualityScan related setting. |
||||
DataQualitySpec data_quality_spec = 100; |
||||
|
||||
// DataProfileScan related setting. |
||||
DataProfileSpec data_profile_spec = 101; |
||||
} |
||||
|
||||
// The result of the data scan. |
||||
oneof result { |
||||
// Output only. The result of the data quality scan. |
||||
DataQualityResult data_quality_result = 200 |
||||
[(google.api.field_behavior) = OUTPUT_ONLY]; |
||||
|
||||
// Output only. The result of the data profile scan. |
||||
DataProfileResult data_profile_result = 201 |
||||
[(google.api.field_behavior) = OUTPUT_ONLY]; |
||||
} |
||||
} |
||||
|
||||
// A DataScanJob represents an instance of a data scan. |
||||
message DataScanJob { |
||||
option (google.api.resource) = { |
||||
type: "dataplex.googleapis.com/DataScanJob" |
||||
pattern: "projects/{project}/locations/{location}/dataScans/{dataScan}/jobs/{job}" |
||||
}; |
||||
|
||||
// Execution state for the DataScanJob. |
||||
enum State { |
||||
// The DataScanJob state is unspecified. |
||||
STATE_UNSPECIFIED = 0; |
||||
|
||||
// The DataScanJob is running. |
||||
RUNNING = 1; |
||||
|
||||
// The DataScanJob is canceling. |
||||
CANCELING = 2; |
||||
|
||||
// The DataScanJob cancellation was successful. |
||||
CANCELLED = 3; |
||||
|
||||
// The DataScanJob completed successfully. |
||||
SUCCEEDED = 4; |
||||
|
||||
// The DataScanJob is no longer running due to an error. |
||||
FAILED = 5; |
||||
|
||||
// The DataScanJob has been created but not started to run yet. |
||||
PENDING = 7; |
||||
} |
||||
|
||||
// Output only. The relative resource name of the DataScanJob, of the form: |
||||
// projects/{project}/locations/{location_id}/dataScans/{datascan_id}/jobs/{job_id}. |
||||
// where `{project}` refers to a project_id or project_number and |
||||
// `location_id` refers to a GCP region. |
||||
string name = 1 [(google.api.field_behavior) = OUTPUT_ONLY]; |
||||
|
||||
// Output only. System generated globally unique ID for the DataScanJob. |
||||
string uid = 2 [(google.api.field_behavior) = OUTPUT_ONLY]; |
||||
|
||||
// Output only. The time when the DataScanJob was started. |
||||
google.protobuf.Timestamp start_time = 3 |
||||
[(google.api.field_behavior) = OUTPUT_ONLY]; |
||||
|
||||
// Output only. The time when the DataScanJob ended. |
||||
google.protobuf.Timestamp end_time = 4 |
||||
[(google.api.field_behavior) = OUTPUT_ONLY]; |
||||
|
||||
// Output only. Execution state for the DataScanJob. |
||||
State state = 5 [(google.api.field_behavior) = OUTPUT_ONLY]; |
||||
|
||||
// Output only. Additional information about the current state. |
||||
string message = 6 [(google.api.field_behavior) = OUTPUT_ONLY]; |
||||
|
||||
// Output only. The type of the parent DataScan. |
||||
DataScanType type = 7 [(google.api.field_behavior) = OUTPUT_ONLY]; |
||||
|
||||
// Data Scan related setting. |
||||
oneof spec { |
||||
// Output only. DataQualityScan related setting. |
||||
DataQualitySpec data_quality_spec = 100 |
||||
[(google.api.field_behavior) = OUTPUT_ONLY]; |
||||
|
||||
// Output only. DataProfileScan related setting. |
||||
DataProfileSpec data_profile_spec = 101 |
||||
[(google.api.field_behavior) = OUTPUT_ONLY]; |
||||
} |
||||
|
||||
// The result of the data scan. |
||||
oneof result { |
||||
// Output only. The result of the data quality scan. |
||||
DataQualityResult data_quality_result = 200 |
||||
[(google.api.field_behavior) = OUTPUT_ONLY]; |
||||
|
||||
// Output only. The result of the data profile scan. |
||||
DataProfileResult data_profile_result = 201 |
||||
[(google.api.field_behavior) = OUTPUT_ONLY]; |
||||
} |
||||
} |
||||
|
||||
// The type of DataScan. |
||||
enum DataScanType { |
||||
// The DataScan Type is unspecified. |
||||
DATA_SCAN_TYPE_UNSPECIFIED = 0; |
||||
|
||||
// Data Quality Scan. |
||||
DATA_QUALITY = 1; |
||||
|
||||
// Data Profile Scan. |
||||
DATA_PROFILE = 2; |
||||
} |
@ -0,0 +1,94 @@ |
||||
// Copyright 2022 Google LLC |
||||
// |
||||
// Licensed under the Apache License, Version 2.0 (the "License"); |
||||
// you may not use this file except in compliance with the License. |
||||
// You may obtain a copy of the License at |
||||
// |
||||
// http://www.apache.org/licenses/LICENSE-2.0 |
||||
// |
||||
// Unless required by applicable law or agreed to in writing, software |
||||
// distributed under the License is distributed on an "AS IS" BASIS, |
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
// See the License for the specific language governing permissions and |
||||
// limitations under the License. |
||||
|
||||
syntax = "proto3"; |
||||
|
||||
package google.cloud.dataplex.v1; |
||||
|
||||
import "google/api/field_behavior.proto"; |
||||
import "google/api/resource.proto"; |
||||
|
||||
option go_package = "google.golang.org/genproto/googleapis/cloud/dataplex/v1;dataplex"; |
||||
option java_multiple_files = true; |
||||
option java_outer_classname = "ProcessingProto"; |
||||
option java_package = "com.google.cloud.dataplex.v1"; |
||||
|
||||
// DataScan scheduling and trigger settings. |
||||
message Trigger { |
||||
// The scan runs one-time via RunDataScan API. |
||||
message OnDemand {} |
||||
|
||||
// The scan is scheduled to run periodically. |
||||
message Schedule { |
||||
// Required. Cron schedule (https://en.wikipedia.org/wiki/Cron) for running |
||||
// scans periodically. |
||||
// To explicitly set a timezone to the cron tab, apply a prefix in the |
||||
// cron tab: "CRON_TZ=${IANA_TIME_ZONE}" or "TZ=${IANA_TIME_ZONE}". |
||||
// The ${IANA_TIME_ZONE} may only be a valid string from IANA time zone |
||||
// database. For example, "CRON_TZ=America/New_York 1 * * * *", or |
||||
// "TZ=America/New_York 1 * * * *". |
||||
// This field is required for Schedule scans. |
||||
string cron = 1 [(google.api.field_behavior) = REQUIRED]; |
||||
} |
||||
|
||||
// DataScan scheduling and trigger settings. |
||||
// If not specified, the default is OnDemand, which means the scan will not |
||||
// run until the user calls RunDataScan API. |
||||
oneof mode { |
||||
// The scan runs one-time shortly after DataScan Creation. |
||||
OnDemand on_demand = 100; |
||||
|
||||
// The scan is scheduled to run periodically. |
||||
Schedule schedule = 101; |
||||
} |
||||
} |
||||
|
||||
// The data source for DataScan. |
||||
message DataSource { |
||||
// The source is required and immutable which means once entity is set, it |
||||
// cannot be change to others, and vice versa. |
||||
oneof source { |
||||
// Immutable. The dataplex entity that contains the data for DataScan, of |
||||
// the form: |
||||
// `projects/{project_number}/locations/{location_id}/lakes/{lake_id}/zones/{zone_id}/entities/{entity_id}`. |
||||
string entity = 100 [ |
||||
(google.api.field_behavior) = IMMUTABLE, |
||||
(google.api.resource_reference) = { |
||||
type: "dataplex.googleapis.com/Entity" |
||||
} |
||||
]; |
||||
} |
||||
} |
||||
|
||||
// The data scanned during processing (e.g. in incremental DataScan) |
||||
message ScannedData { |
||||
// A data range denoted by a pair of start/end values of a field. |
||||
message IncrementalField { |
||||
// The field that contains values which monotonically increases over time |
||||
// (e.g. timestamp). |
||||
string field = 1; |
||||
|
||||
// Value that marks the start of the range |
||||
string start = 2; |
||||
|
||||
// Value that marks the end of the range |
||||
string end = 3; |
||||
} |
||||
|
||||
// The range of scanned data |
||||
oneof data_range { |
||||
// The range denoted by values of an incremental field |
||||
IncrementalField incremental_field = 1; |
||||
} |
||||
} |
Loading…
Reference in new issue