feat: DataScans service

feat: added StorageFormat.iceberg chore: formatting changes PiperOrigin-RevId: 496586743
2 years ago · 58f5c43973
parent cffce7313a
commit 58f5c43973
8 changed files with 1239 additions and 2 deletions
--- a/google/cloud/dataplex/v1/BUILD.bazel
+++ b/google/cloud/dataplex/v1/BUILD.bazel
@ -23,8 +23,12 @@ proto_library(
    srcs = [
        "analyze.proto",
        "content.proto",
+        "data_profile.proto",
+        "data_quality.proto",
+        "datascans.proto",
        "logs.proto",
        "metadata.proto",
+        "processing.proto",
        "resources.proto",
        "service.proto",
        "tasks.proto",
@ -103,6 +107,8 @@ java_gapic_test(
    test_classes = [
        "com.google.cloud.dataplex.v1.ContentServiceClientHttpJsonTest",
        "com.google.cloud.dataplex.v1.ContentServiceClientTest",
+        "com.google.cloud.dataplex.v1.DataScanServiceClientHttpJsonTest",
+        "com.google.cloud.dataplex.v1.DataScanServiceClientTest",
        "com.google.cloud.dataplex.v1.DataplexServiceClientHttpJsonTest",
        "com.google.cloud.dataplex.v1.DataplexServiceClientTest",
        "com.google.cloud.dataplex.v1.MetadataServiceClientHttpJsonTest",
--- a/google/cloud/dataplex/v1/data_profile.proto
+++ b/google/cloud/dataplex/v1/data_profile.proto
@ -0,0 +1,207 @@
+// Copyright 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+syntax = "proto3";
+
+package google.cloud.dataplex.v1;
+
+import "google/cloud/dataplex/v1/processing.proto";
+
+option go_package = "google.golang.org/genproto/googleapis/cloud/dataplex/v1;dataplex";
+option java_multiple_files = true;
+option java_outer_classname = "DataProfileProto";
+option java_package = "com.google.cloud.dataplex.v1";
+
+// DataProfileScan related setting.
+message DataProfileSpec {}
+
+// DataProfileResult defines the output of DataProfileScan.
+// Each field of the table will have field type specific profile result.
+message DataProfileResult {
+  // Profile information describing the structure and layout of the data
+  // and contains the profile info.
+  message Profile {
+    // Represents a column field within a table schema.
+    message Field {
+      // ProfileInfo defines the profile information for each schema field type.
+      message ProfileInfo {
+        // StringFieldInfo defines output info for any string type field.
+        message StringFieldInfo {
+          // The minimum length of the string field in the sampled data.
+          // Optional if zero non-null rows.
+          int64 min_length = 1;
+
+          // The maximum length of a string field in the sampled data.
+          // Optional if zero non-null rows.
+          int64 max_length = 2;
+
+          // The average length of a string field in the sampled data.
+          // Optional if zero non-null rows.
+          double average_length = 3;
+        }
+
+        // IntegerFieldInfo defines output for any integer type field.
+        message IntegerFieldInfo {
+          // The average of non-null values of integer field in the sampled
+          // data. Return NaN, if the field has a NaN. Optional if zero non-null
+          // rows.
+          double average = 1;
+
+          // The standard deviation of non-null of integer field in the sampled
+          // data. Return NaN, if the field has a NaN. Optional if zero non-null
+          // rows.
+          double standard_deviation = 3;
+
+          // The minimum value of an integer field in the sampled data.
+          // Return NaN, if the field has a NaN. Optional if zero non-null
+          // rows.
+          int64 min = 4;
+
+          // A quartile divide the number of data points into four parts, or
+          // quarters, of more-or-less equal size. Three main quartiles used
+          // are: The first quartile (Q1) splits off the lowest 25% of data from
+          // the highest 75%. It is also known as the lower or 25th empirical
+          // quartile, as 25% of the data is below this point. The second
+          // quartile (Q2) is the median of a data set. So, 50% of the data lies
+          // below this point. The third quartile (Q3) splits off the highest
+          // 25% of data from the lowest 75%. It is known as the upper or 75th
+          // empirical quartile, as 75% of the data lies below this point. So,
+          // here the quartiles is provided as an ordered list of quartile
+          // values, occurring in order Q1, median, Q3.
+          repeated int64 quartiles = 6;
+
+          // The maximum value of an integer field in the sampled data.
+          // Return NaN, if the field has a NaN. Optional if zero non-null
+          // rows.
+          int64 max = 5;
+        }
+
+        // DoubleFieldInfo defines output for any double type field.
+        message DoubleFieldInfo {
+          // The average of non-null values of double field in the sampled data.
+          // Return NaN, if the field has a NaN. Optional if zero non-null rows.
+          double average = 1;
+
+          // The standard deviation of non-null of double field in the sampled
+          // data. Return NaN, if the field has a NaN. Optional if zero non-null
+          // rows.
+          double standard_deviation = 3;
+
+          // The minimum value of a double field in the sampled data.
+          // Return NaN, if the field has a NaN. Optional if zero non-null
+          // rows.
+          double min = 4;
+
+          // A quartile divide the numebr of data points into four parts, or
+          // quarters, of more-or-less equal size. Three main quartiles used
+          // are: The first quartile (Q1) splits off the lowest 25% of data from
+          // the highest 75%. It is also known as the lower or 25th empirical
+          // quartile, as 25% of the data is below this point. The second
+          // quartile (Q2) is the median of a data set. So, 50% of the data lies
+          // below this point. The third quartile (Q3) splits off the highest
+          // 25% of data from the lowest 75%. It is known as the upper or 75th
+          // empirical quartile, as 75% of the data lies below this point. So,
+          // here the quartiles is provided as an ordered list of quartile
+          // values, occurring in order Q1, median, Q3.
+          repeated double quartiles = 6;
+
+          // The maximum value of a double field in the sampled data.
+          // Return NaN, if the field has a NaN. Optional if zero non-null
+          // rows.
+          double max = 5;
+        }
+
+        // The TopNValue defines the structure of output of top N values of a
+        // field.
+        message TopNValue {
+          // The value is the string value of the actual value from the field.
+          string value = 1;
+
+          // The frequency count of the corresponding value in the field.
+          int64 count = 2;
+        }
+
+        // The ratio of null rows against the rows in the sampled data.
+        double null_ratio = 2;
+
+        // The ratio of rows that are distinct against the rows in the sampled
+        // data.
+        double distinct_ratio = 3;
+
+        // The array of top N values of the field in the sampled data.
+        // Currently N is set as 10 or equal to distinct values in the field,
+        // whichever is smaller. This will be optional for complex non-groupable
+        // data-types such as JSON, ARRAY, JSON, STRUCT.
+        repeated TopNValue top_n_values = 4;
+
+        // The corresponding profile for specific field type.
+        // Each field will have only one field type specific profile output.
+        oneof field_info {
+          // The corresponding string field profile.
+          StringFieldInfo string_profile = 101;
+
+          // The corresponding integer field profile.
+          IntegerFieldInfo integer_profile = 102;
+
+          // The corresponding double field profile.
+          DoubleFieldInfo double_profile = 103;
+        }
+      }
+
+      // The name of the field.
+      string name = 1;
+
+      // The field data type. Possible values include:
+      //
+      // * STRING
+      // * BYTE
+      // * INT64
+      // * INT32
+      // * INT16
+      // * DOUBLE
+      // * FLOAT
+      // * DECIMAL
+      // * BOOLEAN
+      // * BINARY
+      // * TIMESTAMP
+      // * DATE
+      // * TIME
+      // * NULL
+      // * RECORD
+      string type = 2;
+
+      // The mode of the field. Its value will be:
+      // REQUIRED, if it is a required field.
+      // NULLABLE, if it is an optional field.
+      // REPEATED, if it is a repeated field.
+      string mode = 3;
+
+      // The profile information for the corresponding field.
+      ProfileInfo profile = 4;
+    }
+
+    // The sequence of fields describing data in table entities.
+    repeated Field fields = 2;
+  }
+
+  // The count of all rows in the sampled data.
+  // Return 0, if zero rows.
+  int64 row_count = 3;
+
+  // This represents the profile information per field.
+  Profile profile = 4;
+
+  // The data scanned for this profile.
+  ScannedData scanned_data = 5;
+}
--- a/google/cloud/dataplex/v1/data_quality.proto
+++ b/google/cloud/dataplex/v1/data_quality.proto
@ -0,0 +1,236 @@
+// Copyright 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+syntax = "proto3";
+
+package google.cloud.dataplex.v1;
+
+import "google/api/field_behavior.proto";
+import "google/cloud/dataplex/v1/processing.proto";
+
+option go_package = "google.golang.org/genproto/googleapis/cloud/dataplex/v1;dataplex";
+option java_multiple_files = true;
+option java_outer_classname = "DataQualityProto";
+option java_package = "com.google.cloud.dataplex.v1";
+
+// DataQualityScan related setting.
+message DataQualitySpec {
+  // The list of rules to evaluate against a data source. At least one rule is
+  // required.
+  repeated DataQualityRule rules = 1;
+}
+
+// The output of a DataQualityScan.
+message DataQualityResult {
+  // Overall data quality result -- `true` if all rules passed.
+  bool passed = 5;
+
+  // A list of results at the dimension-level.
+  repeated DataQualityDimensionResult dimensions = 2;
+
+  // A list of all the rules in a job, and their results.
+  repeated DataQualityRuleResult rules = 3;
+
+  // The count of rows processed.
+  int64 row_count = 4;
+
+  // The data scanned for this result.
+  ScannedData scanned_data = 7;
+}
+
+// DataQualityRuleResult provides a more detailed, per-rule level view of the
+// results.
+message DataQualityRuleResult {
+  // The rule specified in the DataQualitySpec, as is.
+  DataQualityRule rule = 1;
+
+  // Whether the rule passed or failed.
+  bool passed = 7;
+
+  // The number of rows a rule was evaluated against.
+  // This field is only valid for ColumnMap type rules.
+  // Evaluated count can be configured to either
+  // (1) include all rows (default) - with null rows automatically failing rule
+  // evaluation  OR (2) exclude null rows from the evaluated_count, by setting
+  // ignore_nulls = true
+  int64 evaluated_count = 9;
+
+  // The number of rows which passed a rule evaluation.
+  // This field is only valid for ColumnMap type rules.
+  int64 passed_count = 8;
+
+  // The number of rows with null values in the specified column.
+  int64 null_count = 5;
+
+  // The ratio of passed_count / evaluated_count.
+  // This field is only valid for ColumnMap type rules.
+  double pass_ratio = 6;
+
+  // The query to find rows that did not pass this rule.
+  // Only applies to ColumnMap and RowCondition rules.
+  string failing_rows_query = 10;
+}
+
+// DataQualityDimensionResult provides a more detailed, per-dimension level view
+// of the results.
+message DataQualityDimensionResult {
+  // Whether the dimension passed or failed.
+  bool passed = 3;
+}
+
+// A rule captures data quality intent about a data source.
+message DataQualityRule {
+  // Evaluates whether each column value lies between a specified range.
+  message RangeExpectation {
+    // Optional. The minimum column value allowed for a row to pass this
+    // validation. At least one of min_value and max_value need to be provided.
+    string min_value = 1 [(google.api.field_behavior) = OPTIONAL];
+
+    // Optional. The maximum column value allowed for a row to pass this
+    // validation. At least one of min_value and max_value need to be provided.
+    string max_value = 2 [(google.api.field_behavior) = OPTIONAL];
+
+    // Optional. Whether each value needs to be strictly greater than ('>') the
+    // minimum, or if equality is allowed. Only relevant if a min_value has been
+    // defined. Default = false.
+    bool strict_min_enabled = 3 [(google.api.field_behavior) = OPTIONAL];
+
+    // Optional. Whether each value needs to be strictly lesser than ('<') the
+    // maximum, or if equality is allowed. Only relevant if a max_value has been
+    // defined. Default = false.
+    bool strict_max_enabled = 4 [(google.api.field_behavior) = OPTIONAL];
+  }
+
+  // Evaluates whether each column value is null.
+  message NonNullExpectation {}
+
+  // Evaluates whether each column value is contained by a specified set.
+  message SetExpectation {
+    repeated string values = 1;
+  }
+
+  // Evaluates whether each column value matches a specified regex.
+  message RegexExpectation {
+    string regex = 1;
+  }
+
+  // Evaluates whether the column has duplicates.
+  message UniquenessExpectation {}
+
+  // Evaluates whether the column aggregate statistic lies between a specified
+  // range.
+  message StatisticRangeExpectation {
+    enum ColumnStatistic {
+      // Unspecified statistic type
+      STATISTIC_UNDEFINED = 0;
+
+      // Evaluate the column mean
+      MEAN = 1;
+
+      // Evaluate the column min
+      MIN = 2;
+
+      // Evaluate the column max
+      MAX = 3;
+    }
+
+    ColumnStatistic statistic = 1;
+
+    // The minimum column statistic value allowed for a row to pass this
+    // validation.
+    // At least one of min_value and max_value need to be provided.
+    string min_value = 2;
+
+    // The maximum column statistic value allowed for a row to pass this
+    // validation.
+    // At least one of min_value and max_value need to be provided.
+    string max_value = 3;
+
+    // Whether column statistic needs to be strictly greater than ('>')
+    // the minimum, or if equality is allowed. Only relevant if a min_value has
+    // been defined. Default = false.
+    bool strict_min_enabled = 4;
+
+    // Whether column statistic needs to be strictly lesser than ('<') the
+    // maximum, or if equality is allowed. Only relevant if a max_value has been
+    // defined. Default = false.
+    bool strict_max_enabled = 5;
+  }
+
+  // Evaluates whether each row passes the specified condition.
+  // The SQL expression needs to use BigQuery standard SQL syntax and should
+  // produce a boolean per row as the result.
+  // Example: col1 >= 0 AND col2 < 10
+  message RowConditionExpectation {
+    string sql_expression = 1;
+  }
+
+  // Evaluates whether the provided expression is true.
+  // The SQL expression needs to use BigQuery standard SQL syntax and should
+  // produce a scalar boolean result.
+  // Example: MIN(col1) >= 0
+  message TableConditionExpectation {
+    string sql_expression = 1;
+  }
+
+  oneof rule_type {
+    // ColumnMap rule which evaluates whether each column value lies between a
+    // specified range.
+    RangeExpectation range_expectation = 1;
+
+    // ColumnMap rule which evaluates whether each column value is null.
+    NonNullExpectation non_null_expectation = 2;
+
+    // ColumnMap rule which evaluates whether each column value is contained by
+    // a specified set.
+    SetExpectation set_expectation = 3;
+
+    // ColumnMap rule which evaluates whether each column value matches a
+    // specified regex.
+    RegexExpectation regex_expectation = 4;
+
+    // ColumnAggregate rule which evaluates whether the column has duplicates.
+    UniquenessExpectation uniqueness_expectation = 100;
+
+    // ColumnAggregate rule which evaluates whether the column aggregate
+    // statistic lies between a specified range.
+    StatisticRangeExpectation statistic_range_expectation = 101;
+
+    // Table rule which evaluates whether each row passes the specified
+    // condition.
+    RowConditionExpectation row_condition_expectation = 200;
+
+    // Table rule which evaluates whether the provided expression is true.
+    TableConditionExpectation table_condition_expectation = 201;
+  }
+
+  // Optional. The unnested column which this rule is evaluated against.
+  string column = 500 [(google.api.field_behavior) = OPTIONAL];
+
+  // Optional. Rows with null values will automatically fail a rule, unless
+  // ignore_null is true. In that case, such null rows are trivially considered
+  // passing. Only applicable to ColumnMap rules.
+  bool ignore_null = 501 [(google.api.field_behavior) = OPTIONAL];
+
+  // Required. The dimension a rule belongs to. Results are also aggregated at
+  // the dimension-level. Supported dimensions are ["COMPLETENESS", "ACCURACY",
+  // "CONSISTENCY", "VALIDITY", "UNIQUENESS", "INTEGRITY"]
+  string dimension = 502 [(google.api.field_behavior) = REQUIRED];
+
+  // Optional. The minimum ratio of passing_rows / total_rows required to pass
+  // this rule, with a range of [0.0, 1.0]
+  //
+  // 0 indicates default value (i.e. 1.0)
+  double threshold = 503 [(google.api.field_behavior) = OPTIONAL];
+}
--- a/google/cloud/dataplex/v1/dataplex_v1.yaml
+++ b/google/cloud/dataplex/v1/dataplex_v1.yaml
@ -5,6 +5,7 @@ title: Cloud Dataplex API

 apis:
 - name: google.cloud.dataplex.v1.ContentService
+- name: google.cloud.dataplex.v1.DataScanService
 - name: google.cloud.dataplex.v1.DataplexService
 - name: google.cloud.dataplex.v1.MetadataService
 - name: google.cloud.location.Locations
@ -12,6 +13,7 @@ apis:
 - name: google.longrunning.Operations

 types:
+- name: google.cloud.dataplex.v1.DataScanEvent
 - name: google.cloud.dataplex.v1.DiscoveryEvent
 - name: google.cloud.dataplex.v1.JobEvent
 - name: google.cloud.dataplex.v1.OperationMetadata
@ -53,6 +55,8 @@ backend:
  rules:
  - selector: 'google.cloud.dataplex.v1.ContentService.*'
    deadline: 60.0
+  - selector: 'google.cloud.dataplex.v1.DataScanService.*'
+    deadline: 60.0
  - selector: 'google.cloud.dataplex.v1.DataplexService.*'
    deadline: 60.0
  - selector: 'google.cloud.dataplex.v1.MetadataService.*'
@ -79,6 +83,10 @@ http:
    - get: '/v1/{resource=projects/*/locations/*/lakes/*/zones/*/assets/*}:getIamPolicy'
    - get: '/v1/{resource=projects/*/locations/*/lakes/*/tasks/*}:getIamPolicy'
    - get: '/v1/{resource=projects/*/locations/*/lakes/*/environments/*}:getIamPolicy'
+    - get: '/v1/{resource=projects/*/locations/*/dataScans/*}:getIamPolicy'
+    - get: '/v1/{resource=projects/*/locations/*/dataTaxonomies/*}:getIamPolicy'
+    - get: '/v1/{resource=projects/*/locations/*/dataTaxonomies/*/attributes/*}:getIamPolicy'
+    - get: '/v1/{resource=projects/*/locations/*/dataAttributeBindings/*}:getIamPolicy'
  - selector: google.iam.v1.IAMPolicy.SetIamPolicy
    post: '/v1/{resource=projects/*/locations/*/lakes/*}:setIamPolicy'
    body: '*'
@ -91,6 +99,14 @@ http:
      body: '*'
    - post: '/v1/{resource=projects/*/locations/*/lakes/*/environments/*}:setIamPolicy'
      body: '*'
+    - post: '/v1/{resource=projects/*/locations/*/dataScans/*}:setIamPolicy'
+      body: '*'
+    - post: '/v1/{resource=projects/*/locations/*/dataTaxonomies/*}:setIamPolicy'
+      body: '*'
+    - post: '/v1/{resource=projects/*/locations/*/dataTaxonomies/*/attributes/*}:setIamPolicy'
+      body: '*'
+    - post: '/v1/{resource=projects/*/locations/*/dataAttributeBindings/*}:setIamPolicy'
+      body: '*'
  - selector: google.iam.v1.IAMPolicy.TestIamPermissions
    post: '/v1/{resource=projects/*/locations/*/lakes/*}:testIamPermissions'
    body: '*'
@ -103,6 +119,14 @@ http:
      body: '*'
    - post: '/v1/{resource=projects/*/locations/*/lakes/*/environments/*}:testIamPermissions'
      body: '*'
+    - post: '/v1/{resource=projects/*/locations/*/dataScans/*}:testIamPermissions'
+      body: '*'
+    - post: '/v1/{resource=projects/*/locations/*/dataTaxonomies/*}:testIamPermissions'
+      body: '*'
+    - post: '/v1/{resource=projects/*/locations/*/dataTaxonomies/*/attributes/*}:testIamPermissions'
+      body: '*'
+    - post: '/v1/{resource=projects/*/locations/*/dataAttributeBindings/*}:testIamPermissions'
+      body: '*'
  - selector: google.longrunning.Operations.CancelOperation
    post: '/v1/{name=projects/*/locations/*/operations/*}:cancel'
    body: '*'
@ -119,6 +143,10 @@ authentication:
    oauth:
      canonical_scopes: |-
        https://www.googleapis.com/auth/cloud-platform
+  - selector: 'google.cloud.dataplex.v1.DataScanService.*'
+    oauth:
+      canonical_scopes: |-
+        https://www.googleapis.com/auth/cloud-platform
  - selector: 'google.cloud.dataplex.v1.DataplexService.*'
    oauth:
      canonical_scopes: |-
--- a/google/cloud/dataplex/v1/datascans.proto
+++ b/google/cloud/dataplex/v1/datascans.proto
@ -0,0 +1,535 @@
+// Copyright 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+syntax = "proto3";
+
+package google.cloud.dataplex.v1;
+
+import "google/api/annotations.proto";
+import "google/api/client.proto";
+import "google/api/field_behavior.proto";
+import "google/api/resource.proto";
+import "google/cloud/dataplex/v1/data_profile.proto";
+import "google/cloud/dataplex/v1/data_quality.proto";
+import "google/cloud/dataplex/v1/processing.proto";
+import "google/cloud/dataplex/v1/resources.proto";
+import "google/longrunning/operations.proto";
+import "google/protobuf/field_mask.proto";
+import "google/protobuf/timestamp.proto";
+
+option go_package = "google.golang.org/genproto/googleapis/cloud/dataplex/v1;dataplex";
+option java_multiple_files = true;
+option java_outer_classname = "DataScansProto";
+option java_package = "com.google.cloud.dataplex.v1";
+
+service DataScanService {
+  option (google.api.default_host) = "dataplex.googleapis.com";
+  option (google.api.oauth_scopes) =
+      "https://www.googleapis.com/auth/cloud-platform";
+
+  // Creates a dataScan resource.
+  rpc CreateDataScan(CreateDataScanRequest)
+      returns (google.longrunning.Operation) {
+    option (google.api.http) = {
+      post: "/v1/{parent=projects/*/locations/*}/dataScans"
+      body: "data_scan"
+    };
+    option (google.api.method_signature) = "parent,data_scan,data_scan_id";
+    option (google.longrunning.operation_info) = {
+      response_type: "DataScan"
+      metadata_type: "OperationMetadata"
+    };
+  }
+
+  // Update the dataScan resource.
+  rpc UpdateDataScan(UpdateDataScanRequest)
+      returns (google.longrunning.Operation) {
+    option (google.api.http) = {
+      patch: "/v1/{data_scan.name=projects/*/locations/*/dataScans/*}"
+      body: "data_scan"
+    };
+    option (google.api.method_signature) = "data_scan,update_mask";
+    option (google.longrunning.operation_info) = {
+      response_type: "DataScan"
+      metadata_type: "OperationMetadata"
+    };
+  }
+
+  // Delete the dataScan resource.
+  rpc DeleteDataScan(DeleteDataScanRequest)
+      returns (google.longrunning.Operation) {
+    option (google.api.http) = {
+      delete: "/v1/{name=projects/*/locations/*/dataScans/*}"
+    };
+    option (google.api.method_signature) = "name";
+    option (google.longrunning.operation_info) = {
+      response_type: "google.protobuf.Empty"
+      metadata_type: "OperationMetadata"
+    };
+  }
+
+  // Get dataScan resource.
+  rpc GetDataScan(GetDataScanRequest) returns (DataScan) {
+    option (google.api.http) = {
+      get: "/v1/{name=projects/*/locations/*/dataScans/*}"
+    };
+    option (google.api.method_signature) = "name";
+  }
+
+  // Lists dataScans.
+  rpc ListDataScans(ListDataScansRequest) returns (ListDataScansResponse) {
+    option (google.api.http) = {
+      get: "/v1/{parent=projects/*/locations/*}/dataScans"
+    };
+    option (google.api.method_signature) = "parent";
+  }
+
+  // Run an on demand execution of a DataScan.
+  rpc RunDataScan(RunDataScanRequest) returns (RunDataScanResponse) {
+    option (google.api.http) = {
+      post: "/v1/{name=projects/*/locations/*/dataScans/*}:run"
+      body: "*"
+    };
+    option (google.api.method_signature) = "name";
+  }
+
+  // Get DataScanJob resource.
+  rpc GetDataScanJob(GetDataScanJobRequest) returns (DataScanJob) {
+    option (google.api.http) = {
+      get: "/v1/{name=projects/*/locations/*/dataScans/*/jobs/*}"
+    };
+    option (google.api.method_signature) = "name";
+  }
+
+  // Lists DataScanJobs under the given dataScan.
+  rpc ListDataScanJobs(ListDataScanJobsRequest)
+      returns (ListDataScanJobsResponse) {
+    option (google.api.http) = {
+      get: "/v1/{parent=projects/*/locations/*/dataScans/*}/jobs"
+    };
+    option (google.api.method_signature) = "parent";
+  }
+}
+
+// Create dataScan request.
+message CreateDataScanRequest {
+  // Required. The resource name of the parent location:
+  // projects/{project}/locations/{location_id}
+  // where `{project}` refers to a project_id or project_number and
+  // `location_id` refers to a GCP region.
+  string parent = 1 [
+    (google.api.field_behavior) = REQUIRED,
+    (google.api.resource_reference) = {
+      type: "locations.googleapis.com/Location"
+    }
+  ];
+
+  // Required. DataScan resource.
+  DataScan data_scan = 2 [(google.api.field_behavior) = REQUIRED];
+
+  // Required. DataScan identifier.
+  // * Must contain only lowercase letters, numbers and hyphens.
+  // * Must start with a letter.
+  // * Must end with a number or a letter.
+  // * Must be between 1-63 characters.
+  // * Must be unique within the customer project / location.
+  string data_scan_id = 3 [(google.api.field_behavior) = REQUIRED];
+}
+
+// Update dataScan request.
+message UpdateDataScanRequest {
+  // Required. Update description.
+  // Only fields specified in `update_mask` are updated.
+  DataScan data_scan = 1 [(google.api.field_behavior) = REQUIRED];
+
+  // Required. Mask of fields to update.
+  google.protobuf.FieldMask update_mask = 2
+      [(google.api.field_behavior) = REQUIRED];
+}
+
+// Delete dataScan request.
+message DeleteDataScanRequest {
+  // Required. The resource name of the dataScan:
+  // projects/{project}/locations/{location_id}/dataScans/{data_scan_id}
+  // where `{project}` refers to a project_id or project_number and
+  // `location_id` refers to a GCP region.
+  string name = 1 [
+    (google.api.field_behavior) = REQUIRED,
+    (google.api.resource_reference) = {
+      type: "dataplex.googleapis.com/DataScan"
+    }
+  ];
+}
+
+// Get dataScan request.
+message GetDataScanRequest {
+  // DataScan views for getting a partial dataScan.
+  enum DataScanView {
+    // The API will default to the `BASIC` view.
+    DATA_SCAN_VIEW_UNSPECIFIED = 0;
+
+    // Basic view that does not include spec and result.
+    BASIC = 1;
+
+    // Include everything.
+    FULL = 10;
+  }
+
+  // Required. The resource name of the dataScan:
+  // projects/{project}/locations/{location_id}/dataScans/{data_scan_id}
+  // where `{project}` refers to a project_id or project_number and
+  // `location_id` refers to a GCP region.
+  string name = 1 [
+    (google.api.field_behavior) = REQUIRED,
+    (google.api.resource_reference) = {
+      type: "dataplex.googleapis.com/DataScan"
+    }
+  ];
+
+  // Optional. Used to select the subset of DataScan information to return.
+  // Defaults to `BASIC`.
+  DataScanView view = 2 [(google.api.field_behavior) = OPTIONAL];
+}
+
+// List dataScans request.
+message ListDataScansRequest {
+  // Required. projects/{project}/locations/{location_id}
+  // where `{project}` refers to a project_id or project_number and
+  // `location_id` refers to a GCP region.
+  string parent = 1 [
+    (google.api.field_behavior) = REQUIRED,
+    (google.api.resource_reference) = {
+      type: "locations.googleapis.com/Location"
+    }
+  ];
+
+  // Optional. Maximum number of dataScans to return. The service may return
+  // fewer than this value. If unspecified, at most 10 scans will be returned.
+  // The maximum value is 1000; values above 1000 will be coerced to 1000.
+  int32 page_size = 2 [(google.api.field_behavior) = OPTIONAL];
+
+  // Optional. Page token received from a previous `ListDataScans` call. Provide
+  // this to retrieve the subsequent page. When paginating, all other parameters
+  // provided to `ListDataScans` must match the call that provided the
+  // page token.
+  string page_token = 3 [(google.api.field_behavior) = OPTIONAL];
+
+  // Optional. Filter request.
+  string filter = 4 [(google.api.field_behavior) = OPTIONAL];
+
+  // Optional. Order by fields (name or create_time) for the result.
+  // If not specified, the ordering is undefined.
+  string order_by = 5 [(google.api.field_behavior) = OPTIONAL];
+}
+
+// List dataScans response.
+message ListDataScansResponse {
+  // DataScans (metadata only) under the given parent location.
+  repeated DataScan data_scans = 1;
+
+  // Token to retrieve the next page of results, or empty if there are no more
+  // results in the list.
+  string next_page_token = 2;
+
+  // Locations that could not be reached.
+  repeated string unreachable = 3;
+}
+
+// Run DataScan Request
+message RunDataScanRequest {
+  // Required. The resource name of the DataScan:
+  // projects/{project}/locations/{location_id}/dataScans/{data_scan_id}.
+  // where `{project}` refers to a project_id or project_number and
+  // `location_id` refers to a GCP region.
+  // Only on-demand DataScans are allowed.
+  string name = 1 [
+    (google.api.field_behavior) = REQUIRED,
+    (google.api.resource_reference) = {
+      type: "dataplex.googleapis.com/DataScan"
+    }
+  ];
+}
+
+// Run DataScan Response.
+message RunDataScanResponse {
+  // DataScanJob created by RunDataScan API.
+  DataScanJob job = 1;
+}
+
+// Get DataScanJob request.
+message GetDataScanJobRequest {
+  // DataScanJob views for getting a partial dataScanJob.
+  enum DataScanJobView {
+    // The API will default to the `BASIC` view.
+    DATA_SCAN_JOB_VIEW_UNSPECIFIED = 0;
+
+    // Basic view that does not include spec and result.
+    BASIC = 1;
+
+    // Include everything.
+    FULL = 10;
+  }
+
+  // Required. The resource name of the DataScanJob:
+  // projects/{project}/locations/{location_id}/dataScans/{data_scan_id}/dataScanJobs/{data_scan_job_id}
+  // where `{project}` refers to a project_id or project_number and
+  // `location_id` refers to a GCP region.
+  string name = 1 [
+    (google.api.field_behavior) = REQUIRED,
+    (google.api.resource_reference) = {
+      type: "dataplex.googleapis.com/DataScanJob"
+    }
+  ];
+
+  // Optional. Used to select the subset of DataScan information to return.
+  // Defaults to `BASIC`.
+  DataScanJobView view = 2 [(google.api.field_behavior) = OPTIONAL];
+}
+
+// List DataScanJobs request.
+message ListDataScanJobsRequest {
+  // Required. The resource name of the parent environment:
+  // projects/{project}/locations/{location_id}/dataScans/{data_scan_id}
+  // where `{project}` refers to a project_id or project_number and
+  // `location_id` refers to a GCP region.
+  string parent = 1 [
+    (google.api.field_behavior) = REQUIRED,
+    (google.api.resource_reference) = {
+      type: "dataplex.googleapis.com/DataScan"
+    }
+  ];
+
+  // Optional. Maximum number of DataScanJobs to return. The service may return
+  // fewer than this value. If unspecified, at most 10 DataScanJobs will be
+  // returned. The maximum value is 1000; values above 1000 will be coerced to
+  // 1000.
+  int32 page_size = 2 [(google.api.field_behavior) = OPTIONAL];
+
+  // Optional. Page token received from a previous `ListDataScanJobs` call.
+  // Provide this to retrieve the subsequent page. When paginating, all other
+  // parameters provided to `ListDataScanJobs` must match the call that provided
+  // the page token.
+  string page_token = 3 [(google.api.field_behavior) = OPTIONAL];
+}
+
+// List DataScanJobs response.
+message ListDataScanJobsResponse {
+  // DataScanJobs (metadata only) under a given dataScan.
+  repeated DataScanJob data_scan_jobs = 1;
+
+  // Token to retrieve the next page of results, or empty if there are no more
+  // results in the list.
+  string next_page_token = 2;
+}
+
+// Represents a user-visible job which provides the insights for the related
+// data source.
+// For examples:
+//   - Data Quality: generates queries based on the rules and run against the
+//     data to get data quality check results.
+//   - Data Profile: analyzes the data in table(s) and generates insights about
+//     the structure, content and relationships (such as null percent,
+//     cardinality, min/max/mean, etc).
+message DataScan {
+  option (google.api.resource) = {
+    type: "dataplex.googleapis.com/DataScan"
+    pattern: "projects/{project}/locations/{location}/dataScans/{dataScan}"
+  };
+
+  // DataScan execution settings.
+  message ExecutionSpec {
+    // Optional. Spec related to how often and when a scan should be triggered.
+    // If not specified, the default is OnDemand, which means the scan will not
+    // run until the user calls RunDataScan API.
+    Trigger trigger = 1 [(google.api.field_behavior) = OPTIONAL];
+
+    // If not specified, run a data scan on all data in the table.
+    // The incremental is immutable, which means once the field is set,
+    // it cannot be unset, and vice versa.
+    oneof incremental {
+      // Immutable. The unnested field (Date or Timestamp) that contains values
+      // that monotonically increase over time.
+      string field = 100 [(google.api.field_behavior) = IMMUTABLE];
+    }
+  }
+
+  // Status of the data scan execution.
+  message ExecutionStatus {
+    // The time when the latest DataScanJob started.
+    google.protobuf.Timestamp latest_job_start_time = 4;
+
+    // The time when the latest DataScanJob ended.
+    google.protobuf.Timestamp latest_job_end_time = 5;
+  }
+
+  // Output only. The relative resource name of the scan, of the form:
+  // projects/{project}/locations/{location_id}/dataScans/{datascan_id}.
+  // where `{project}` refers to a project_id or project_number and
+  // `location_id` refers to a GCP region.
+  string name = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
+
+  // Output only. System generated globally unique ID for the scan. This ID will
+  // be different if the scan is deleted and re-created with the same name.
+  string uid = 2 [(google.api.field_behavior) = OUTPUT_ONLY];
+
+  // Optional. Description of the scan.
+  // * Must be between 1-1024 characters.
+  string description = 3 [(google.api.field_behavior) = OPTIONAL];
+
+  // Optional. User friendly display name.
+  // * Must be between 1-256 characters.
+  string display_name = 4 [(google.api.field_behavior) = OPTIONAL];
+
+  // Optional. User-defined labels for the scan.
+  map<string, string> labels = 5 [(google.api.field_behavior) = OPTIONAL];
+
+  // Output only. Current state of the DataScan.
+  State state = 6 [(google.api.field_behavior) = OUTPUT_ONLY];
+
+  // Output only. The time when the scan was created.
+  google.protobuf.Timestamp create_time = 7
+      [(google.api.field_behavior) = OUTPUT_ONLY];
+
+  // Output only. The time when the scan was last updated.
+  google.protobuf.Timestamp update_time = 8
+      [(google.api.field_behavior) = OUTPUT_ONLY];
+
+  // Required. The data source for DataScan.
+  DataSource data = 9 [(google.api.field_behavior) = REQUIRED];
+
+  // Optional. DataScan execution settings.
+  // If not specified, the fields under it will use their default values.
+  ExecutionSpec execution_spec = 10 [(google.api.field_behavior) = OPTIONAL];
+
+  // Output only. Status of the data scan execution.
+  ExecutionStatus execution_status = 11
+      [(google.api.field_behavior) = OUTPUT_ONLY];
+
+  // Output only. The type of DataScan.
+  DataScanType type = 12 [(google.api.field_behavior) = OUTPUT_ONLY];
+
+  // Data Scan related setting.
+  // It is required and immutable which means once data_quality_spec is set, it
+  // cannot be changed to data_profile_spec.
+  oneof spec {
+    // DataQualityScan related setting.
+    DataQualitySpec data_quality_spec = 100;
+
+    // DataProfileScan related setting.
+    DataProfileSpec data_profile_spec = 101;
+  }
+
+  // The result of the data scan.
+  oneof result {
+    // Output only. The result of the data quality scan.
+    DataQualityResult data_quality_result = 200
+        [(google.api.field_behavior) = OUTPUT_ONLY];
+
+    // Output only. The result of the data profile scan.
+    DataProfileResult data_profile_result = 201
+        [(google.api.field_behavior) = OUTPUT_ONLY];
+  }
+}
+
+// A DataScanJob represents an instance of a data scan.
+message DataScanJob {
+  option (google.api.resource) = {
+    type: "dataplex.googleapis.com/DataScanJob"
+    pattern: "projects/{project}/locations/{location}/dataScans/{dataScan}/jobs/{job}"
+  };
+
+  // Execution state for the DataScanJob.
+  enum State {
+    // The DataScanJob state is unspecified.
+    STATE_UNSPECIFIED = 0;
+
+    // The DataScanJob is running.
+    RUNNING = 1;
+
+    // The DataScanJob is canceling.
+    CANCELING = 2;
+
+    // The DataScanJob cancellation was successful.
+    CANCELLED = 3;
+
+    // The DataScanJob completed successfully.
+    SUCCEEDED = 4;
+
+    // The DataScanJob is no longer running due to an error.
+    FAILED = 5;
+
+    // The DataScanJob has been created but not started to run yet.
+    PENDING = 7;
+  }
+
+  // Output only. The relative resource name of the DataScanJob, of the form:
+  // projects/{project}/locations/{location_id}/dataScans/{datascan_id}/jobs/{job_id}.
+  // where `{project}` refers to a project_id or project_number and
+  // `location_id` refers to a GCP region.
+  string name = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
+
+  // Output only. System generated globally unique ID for the DataScanJob.
+  string uid = 2 [(google.api.field_behavior) = OUTPUT_ONLY];
+
+  // Output only. The time when the DataScanJob was started.
+  google.protobuf.Timestamp start_time = 3
+      [(google.api.field_behavior) = OUTPUT_ONLY];
+
+  // Output only. The time when the DataScanJob ended.
+  google.protobuf.Timestamp end_time = 4
+      [(google.api.field_behavior) = OUTPUT_ONLY];
+
+  // Output only. Execution state for the DataScanJob.
+  State state = 5 [(google.api.field_behavior) = OUTPUT_ONLY];
+
+  // Output only. Additional information about the current state.
+  string message = 6 [(google.api.field_behavior) = OUTPUT_ONLY];
+
+  // Output only. The type of the parent DataScan.
+  DataScanType type = 7 [(google.api.field_behavior) = OUTPUT_ONLY];
+
+  // Data Scan related setting.
+  oneof spec {
+    // Output only. DataQualityScan related setting.
+    DataQualitySpec data_quality_spec = 100
+        [(google.api.field_behavior) = OUTPUT_ONLY];
+
+    // Output only. DataProfileScan related setting.
+    DataProfileSpec data_profile_spec = 101
+        [(google.api.field_behavior) = OUTPUT_ONLY];
+  }
+
+  // The result of the data scan.
+  oneof result {
+    // Output only. The result of the data quality scan.
+    DataQualityResult data_quality_result = 200
+        [(google.api.field_behavior) = OUTPUT_ONLY];
+
+    // Output only. The result of the data profile scan.
+    DataProfileResult data_profile_result = 201
+        [(google.api.field_behavior) = OUTPUT_ONLY];
+  }
+}
+
+// The type of DataScan.
+enum DataScanType {
+  // The DataScan Type is unspecified.
+  DATA_SCAN_TYPE_UNSPECIFIED = 0;
+
+  // Data Quality Scan.
+  DATA_QUALITY = 1;
+
+  // Data Profile Scan.
+  DATA_PROFILE = 2;
+}
--- a/google/cloud/dataplex/v1/logs.proto
+++ b/google/cloud/dataplex/v1/logs.proto
@ -291,10 +291,128 @@ message SessionEvent {
  // The status of the event.
  bool event_succeeded = 6;

-  // If the session is associated with an Environment with fast startup enabled,
-  // and was pre-created before being assigned to a user.
+  // If the session is associated with an environment with fast startup enabled,
+  // and was created before being assigned to a user.
  bool fast_startup_enabled = 7;

  // The idle duration of a warm pooled session before it is assigned to user.
  google.protobuf.Duration unassigned_duration = 8;
 }
+
+// These messages contain information about the execution of a datascan.
+// The monitored resource is 'DataScan'
+message DataScanEvent {
+  // The type of the data scan.
+  enum ScanType {
+    // An unspecified data scan type.
+    SCAN_TYPE_UNSPECIFIED = 0;
+
+    // Data scan for data profile.
+    DATA_PROFILE = 1;
+
+    // Data scan for data quality.
+    DATA_QUALITY = 2;
+  }
+
+  // The job state of the data scan.
+  enum State {
+    // Unspecified job state.
+    STATE_UNSPECIFIED = 0;
+
+    // Data scan started.
+    STARTED = 1;
+
+    // Data scan successfully completed.
+    SUCCEEDED = 2;
+
+    // Data scan was unsuccessful.
+    FAILED = 3;
+
+    // Data scan was cancelled.
+    CANCELLED = 4;
+  }
+
+  // The trigger type for the data scan.
+  enum Trigger {
+    // An unspecified trigger type.
+    TRIGGER_UNSPECIFIED = 0;
+
+    // Data scan triggers on demand.
+    ON_DEMAND = 1;
+
+    // Data scan triggers as per schedule.
+    SCHEDULE = 2;
+  }
+
+  // The scope of job for the data scan.
+  enum Scope {
+    // An unspecified scope type.
+    SCOPE_UNSPECIFIED = 0;
+
+    // Data scan runs on all of the data.
+    FULL = 1;
+
+    // Data scan runs on incremental data.
+    INCREMENTAL = 2;
+  }
+
+  // Data profile result for data scan job.
+  message DataProfileResult {
+    // The count of rows processed in the data scan job.
+    int64 row_count = 1;
+  }
+
+  // Data quality result for data scan job.
+  message DataQualityResult {
+    // The count of rows processed in the data scan job.
+    int64 row_count = 1;
+
+    // Whether the data quality result was `pass` or not.
+    bool passed = 2;
+
+    // The result of each dimension for data quality result.
+    // The key of the map is the name of the dimension.
+    // The value is the bool value depicting whether the dimension result was
+    // `pass` or not.
+    map<string, bool> dimension_passed = 3;
+  }
+
+  // The data source of the data scan
+  string data_source = 1;
+
+  // The identifier of the specific data scan job this log entry is for.
+  string job_id = 2;
+
+  // The time when the data scan job started to run.
+  google.protobuf.Timestamp start_time = 3;
+
+  // The time when the data scan job finished.
+  google.protobuf.Timestamp end_time = 4;
+
+  // The type of the data scan.
+  ScanType type = 5;
+
+  // The status of the data scan job.
+  State state = 6;
+
+  // The message describing the data scan job event.
+  string message = 7;
+
+  // A version identifier of the spec which was used to execute this job.
+  string spec_version = 8;
+
+  // The trigger type of the data scan job.
+  Trigger trigger = 9;
+
+  // The scope of the data scan (e.g. full, incremental).
+  Scope scope = 10;
+
+  // The result of the data scan job.
+  oneof result {
+    // Data profile result for data profile type data scan.
+    DataProfileResult data_profile = 101;
+
+    // Data quality result for data quality type data scan.
+    DataQualityResult data_quality = 102;
+  }
+}
--- a/google/cloud/dataplex/v1/metadata.proto
+++ b/google/cloud/dataplex/v1/metadata.proto
@ -682,6 +682,13 @@ message StorageFormat {
    string encoding = 1 [(google.api.field_behavior) = OPTIONAL];
  }

+  // Describes Iceberg data format.
+  message IcebergOptions {
+    // Optional. The location of where the iceberg metadata is present, must be
+    // within the table path
+    string metadata_location = 1 [(google.api.field_behavior) = OPTIONAL];
+  }
+
  // The specific file format of the data.
  enum Format {
    // Format unspecified.
@ -752,6 +759,9 @@ message StorageFormat {
  // - application/x-avro
  // - application/x-orc
  // - application/x-tfrecord
+  // - application/x-parquet+iceberg
+  // - application/x-avro+iceberg
+  // - application/x-orc+iceberg
  // - application/json
  // - application/{subtypes}
  // - text/csv
@ -768,6 +778,9 @@ message StorageFormat {

    // Optional. Additional information about CSV formatted data.
    JsonOptions json = 11 [(google.api.field_behavior) = OPTIONAL];
+
+    // Optional. Additional information about iceberg tables.
+    IcebergOptions iceberg = 12 [(google.api.field_behavior) = OPTIONAL];
  }
 }

--- a/google/cloud/dataplex/v1/processing.proto
+++ b/google/cloud/dataplex/v1/processing.proto
@ -0,0 +1,94 @@
+// Copyright 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+syntax = "proto3";
+
+package google.cloud.dataplex.v1;
+
+import "google/api/field_behavior.proto";
+import "google/api/resource.proto";
+
+option go_package = "google.golang.org/genproto/googleapis/cloud/dataplex/v1;dataplex";
+option java_multiple_files = true;
+option java_outer_classname = "ProcessingProto";
+option java_package = "com.google.cloud.dataplex.v1";
+
+// DataScan scheduling and trigger settings.
+message Trigger {
+  // The scan runs one-time via RunDataScan API.
+  message OnDemand {}
+
+  // The scan is scheduled to run periodically.
+  message Schedule {
+    // Required. Cron schedule (https://en.wikipedia.org/wiki/Cron) for running
+    // scans periodically.
+    // To explicitly set a timezone to the cron tab, apply a prefix in the
+    // cron tab: "CRON_TZ=${IANA_TIME_ZONE}" or "TZ=${IANA_TIME_ZONE}".
+    // The ${IANA_TIME_ZONE} may only be a valid string from IANA time zone
+    // database. For example, "CRON_TZ=America/New_York 1 * * * *", or
+    // "TZ=America/New_York 1 * * * *".
+    // This field is required for Schedule scans.
+    string cron = 1 [(google.api.field_behavior) = REQUIRED];
+  }
+
+  // DataScan scheduling and trigger settings.
+  // If not specified, the default is OnDemand, which means the scan will not
+  // run until the user calls RunDataScan API.
+  oneof mode {
+    // The scan runs one-time shortly after DataScan Creation.
+    OnDemand on_demand = 100;
+
+    // The scan is scheduled to run periodically.
+    Schedule schedule = 101;
+  }
+}
+
+// The data source for DataScan.
+message DataSource {
+  // The source is required and immutable which means once entity is set, it
+  // cannot be change to others, and vice versa.
+  oneof source {
+    // Immutable. The dataplex entity that contains the data for DataScan, of
+    // the form:
+    // `projects/{project_number}/locations/{location_id}/lakes/{lake_id}/zones/{zone_id}/entities/{entity_id}`.
+    string entity = 100 [
+      (google.api.field_behavior) = IMMUTABLE,
+      (google.api.resource_reference) = {
+        type: "dataplex.googleapis.com/Entity"
+      }
+    ];
+  }
+}
+
+// The data scanned during processing (e.g. in incremental DataScan)
+message ScannedData {
+  // A data range denoted by a pair of start/end values of a field.
+  message IncrementalField {
+    // The field that contains values which monotonically increases over time
+    // (e.g. timestamp).
+    string field = 1;
+
+    // Value that marks the start of the range
+    string start = 2;
+
+    // Value that marks the end of the range
+    string end = 3;
+  }
+
+  // The range of scanned data
+  oneof data_range {
+    // The range denoted by values of an incremental field
+    IncrementalField incremental_field = 1;
+  }
+}