feat: add support for new Dataproc features

1. Allow to change shielded config defaults for 2.1+ images 2. Support batches filtering in list API 3. Support Trino jobs on 2.1+ image clusters 4. Support batch TTL 5. Support custom staging bucket for batches 6. Expose approximate and current batches resources usage 7. Support Hudi and Trino components PiperOrigin-RevId: 511550277
2 years ago · 9111603ba0
parent 2029bd6047
commit 9111603ba0
10 changed files with 491 additions and 222 deletions
--- a/google/cloud/dataproc/v1/BUILD.bazel
+++ b/google/cloud/dataproc/v1/BUILD.bazel
@ -145,6 +145,8 @@ go_gapic_library(
    transport = "grpc+rest",
    deps = [
        ":dataproc_go_proto",
+        "//google/cloud/location:location_go_proto",
+        "//google/iam/v1:iam_go_proto",
        "//google/longrunning:longrunning_go_proto",
        "@com_google_cloud_go_longrunning//:go_default_library",
        "@com_google_cloud_go_longrunning//autogen:go_default_library",
@ -186,6 +188,10 @@ py_gapic_library(
    rest_numeric_enums = True,
    service_yaml = "dataproc_v1.yaml",
    transport = "grpc+rest",
+    deps = [
+        "//google/iam/v1:iam_policy_py_proto",
+        "//google/iam/v1:policy_py_proto",
+    ],
 )

 py_test(
--- a/google/cloud/dataproc/v1/autoscaling_policies.proto
+++ b/google/cloud/dataproc/v1/autoscaling_policies.proto
@ -1,4 +1,4 @@
-// Copyright 2021 Google LLC
+// Copyright 2022 Google LLC
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@ -36,10 +36,12 @@ option (google.api.resource_definition) = {
 // Dataproc API.
 service AutoscalingPolicyService {
  option (google.api.default_host) = "dataproc.googleapis.com";
-  option (google.api.oauth_scopes) = "https://www.googleapis.com/auth/cloud-platform";
+  option (google.api.oauth_scopes) =
+      "https://www.googleapis.com/auth/cloud-platform";

  // Creates new autoscaling policy.
-  rpc CreateAutoscalingPolicy(CreateAutoscalingPolicyRequest) returns (AutoscalingPolicy) {
+  rpc CreateAutoscalingPolicy(CreateAutoscalingPolicyRequest)
+      returns (AutoscalingPolicy) {
    option (google.api.http) = {
      post: "/v1/{parent=projects/*/locations/*}/autoscalingPolicies"
      body: "policy"
@ -55,7 +57,8 @@ service AutoscalingPolicyService {
  //
  // Disabled check for update_mask, because all updates will be full
  // replacements.
-  rpc UpdateAutoscalingPolicy(UpdateAutoscalingPolicyRequest) returns (AutoscalingPolicy) {
+  rpc UpdateAutoscalingPolicy(UpdateAutoscalingPolicyRequest)
+      returns (AutoscalingPolicy) {
    option (google.api.http) = {
      put: "/v1/{policy.name=projects/*/locations/*/autoscalingPolicies/*}"
      body: "policy"
@ -68,7 +71,8 @@ service AutoscalingPolicyService {
  }

  // Retrieves autoscaling policy.
-  rpc GetAutoscalingPolicy(GetAutoscalingPolicyRequest) returns (AutoscalingPolicy) {
+  rpc GetAutoscalingPolicy(GetAutoscalingPolicyRequest)
+      returns (AutoscalingPolicy) {
    option (google.api.http) = {
      get: "/v1/{name=projects/*/locations/*/autoscalingPolicies/*}"
      additional_bindings {
@ -79,7 +83,8 @@ service AutoscalingPolicyService {
  }

  // Lists autoscaling policies in the project.
-  rpc ListAutoscalingPolicies(ListAutoscalingPoliciesRequest) returns (ListAutoscalingPoliciesResponse) {
+  rpc ListAutoscalingPolicies(ListAutoscalingPoliciesRequest)
+      returns (ListAutoscalingPoliciesResponse) {
    option (google.api.http) = {
      get: "/v1/{parent=projects/*/locations/*}/autoscalingPolicies"
      additional_bindings {
@ -91,7 +96,8 @@ service AutoscalingPolicyService {

  // Deletes an autoscaling policy. It is an error to delete an autoscaling
  // policy that is in use by one or more clusters.
-  rpc DeleteAutoscalingPolicy(DeleteAutoscalingPolicyRequest) returns (google.protobuf.Empty) {
+  rpc DeleteAutoscalingPolicy(DeleteAutoscalingPolicyRequest)
+      returns (google.protobuf.Empty) {
    option (google.api.http) = {
      delete: "/v1/{name=projects/*/locations/*/autoscalingPolicies/*}"
      additional_bindings {
@ -132,14 +138,17 @@ message AutoscalingPolicy {

  // Autoscaling algorithm for policy.
  oneof algorithm {
-    BasicAutoscalingAlgorithm basic_algorithm = 3 [(google.api.field_behavior) = REQUIRED];
+    BasicAutoscalingAlgorithm basic_algorithm = 3
+        [(google.api.field_behavior) = REQUIRED];
  }

  // Required. Describes how the autoscaler will operate for primary workers.
-  InstanceGroupAutoscalingPolicyConfig worker_config = 4 [(google.api.field_behavior) = REQUIRED];
+  InstanceGroupAutoscalingPolicyConfig worker_config = 4
+      [(google.api.field_behavior) = REQUIRED];

  // Optional. Describes how the autoscaler will operate for secondary workers.
-  InstanceGroupAutoscalingPolicyConfig secondary_worker_config = 5 [(google.api.field_behavior) = OPTIONAL];
+  InstanceGroupAutoscalingPolicyConfig secondary_worker_config = 5
+      [(google.api.field_behavior) = OPTIONAL];

  // Optional. The labels to associate with this autoscaling policy.
  // Label **keys** must contain 1 to 63 characters, and must conform to
@ -155,14 +164,16 @@ message AutoscalingPolicy {
 message BasicAutoscalingAlgorithm {
  oneof config {
    // Required. YARN autoscaling configuration.
-    BasicYarnAutoscalingConfig yarn_config = 1 [(google.api.field_behavior) = REQUIRED];
+    BasicYarnAutoscalingConfig yarn_config = 1
+        [(google.api.field_behavior) = REQUIRED];
  }

  // Optional. Duration between scaling events. A scaling period starts after
  // the update operation from the previous event has completed.
  //
  // Bounds: [2m, 1d]. Default: 2m.
-  google.protobuf.Duration cooldown_period = 2 [(google.api.field_behavior) = OPTIONAL];
+  google.protobuf.Duration cooldown_period = 2
+      [(google.api.field_behavior) = OPTIONAL];
 }

 // Basic autoscaling configurations for YARN.
@ -173,22 +184,23 @@ message BasicYarnAutoscalingConfig {
  // downscaling operations.
  //
  // Bounds: [0s, 1d].
-  google.protobuf.Duration graceful_decommission_timeout = 5 [(google.api.field_behavior) = REQUIRED];
-
-  // Required. Fraction of average YARN pending memory in the last cooldown period
-  // for which to add workers. A scale-up factor of 1.0 will result in scaling
-  // up so that there is no pending memory remaining after the update (more
-  // aggressive scaling). A scale-up factor closer to 0 will result in a smaller
-  // magnitude of scaling up (less aggressive scaling).
-  // See [How autoscaling
+  google.protobuf.Duration graceful_decommission_timeout = 5
+      [(google.api.field_behavior) = REQUIRED];
+
+  // Required. Fraction of average YARN pending memory in the last cooldown
+  // period for which to add workers. A scale-up factor of 1.0 will result in
+  // scaling up so that there is no pending memory remaining after the update
+  // (more aggressive scaling). A scale-up factor closer to 0 will result in a
+  // smaller magnitude of scaling up (less aggressive scaling). See [How
+  // autoscaling
  // works](https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/autoscaling#how_autoscaling_works)
  // for more information.
  //
  // Bounds: [0.0, 1.0].
  double scale_up_factor = 1 [(google.api.field_behavior) = REQUIRED];

-  // Required. Fraction of average YARN pending memory in the last cooldown period
-  // for which to remove workers. A scale-down factor of 1 will result in
+  // Required. Fraction of average YARN pending memory in the last cooldown
+  // period for which to remove workers. A scale-down factor of 1 will result in
  // scaling down so that there is no available memory remaining after the
  // update (more aggressive scaling). A scale-down factor of 0 disables
  // removing workers, which can be beneficial for autoscaling a single job.
@ -206,7 +218,8 @@ message BasicYarnAutoscalingConfig {
  // on any recommended change.
  //
  // Bounds: [0.0, 1.0]. Default: 0.0.
-  double scale_up_min_worker_fraction = 3 [(google.api.field_behavior) = OPTIONAL];
+  double scale_up_min_worker_fraction = 3
+      [(google.api.field_behavior) = OPTIONAL];

  // Optional. Minimum scale-down threshold as a fraction of total cluster size
  // before scaling occurs. For example, in a 20-worker cluster, a threshold of
@ -215,7 +228,8 @@ message BasicYarnAutoscalingConfig {
  // on any recommended change.
  //
  // Bounds: [0.0, 1.0]. Default: 0.0.
-  double scale_down_min_worker_fraction = 4 [(google.api.field_behavior) = OPTIONAL];
+  double scale_down_min_worker_fraction = 4
+      [(google.api.field_behavior) = OPTIONAL];
 }

 // Configuration for the size bounds of an instance group, including its
@ -358,7 +372,8 @@ message ListAutoscalingPoliciesRequest {
 // A response to a request to list autoscaling policies in a project.
 message ListAutoscalingPoliciesResponse {
  // Output only. Autoscaling policies list.
-  repeated AutoscalingPolicy policies = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
+  repeated AutoscalingPolicy policies = 1
+      [(google.api.field_behavior) = OUTPUT_ONLY];

  // Output only. This token is included in the response if there are more
  // results to fetch.
--- a/google/cloud/dataproc/v1/batches.proto
+++ b/google/cloud/dataproc/v1/batches.proto
@ -1,4 +1,4 @@
-// Copyright 2021 Google LLC
+// Copyright 2022 Google LLC
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@ -33,7 +33,8 @@ option java_package = "com.google.cloud.dataproc.v1";
 // The BatchController provides methods to manage batch workloads.
 service BatchController {
  option (google.api.default_host) = "dataproc.googleapis.com";
-  option (google.api.oauth_scopes) = "https://www.googleapis.com/auth/cloud-platform";
+  option (google.api.oauth_scopes) =
+      "https://www.googleapis.com/auth/cloud-platform";

  // Creates a batch workload that executes asynchronously.
  rpc CreateBatch(CreateBatchRequest) returns (google.longrunning.Operation) {
@ -87,8 +88,8 @@ message CreateBatchRequest {
  // Required. The batch to create.
  Batch batch = 2 [(google.api.field_behavior) = REQUIRED];

-  // Optional. The ID to use for the batch, which will become the final component of
-  // the batch's resource name.
+  // Optional. The ID to use for the batch, which will become the final
+  // component of the batch's resource name.
  //
  // This value must be 4-63 characters. Valid characters are `/[a-z][0-9]-/`.
  string batch_id = 3 [(google.api.field_behavior) = OPTIONAL];
@ -110,12 +111,12 @@ message CreateBatchRequest {

 // A request to get the resource representation for a batch workload.
 message GetBatchRequest {
-  // Required. The name of the batch to retrieve.
+  // Required. The fully qualified name of the batch to retrieve
+  // in the format
+  // "projects/PROJECT_ID/locations/DATAPROC_REGION/batches/BATCH_ID"
  string name = 1 [
    (google.api.field_behavior) = REQUIRED,
-    (google.api.resource_reference) = {
-      type: "dataproc.googleapis.com/Batch"
-    }
+    (google.api.resource_reference) = { type: "dataproc.googleapis.com/Batch" }
  ];
 }

@ -137,6 +138,28 @@ message ListBatchesRequest {
  // Optional. A page token received from a previous `ListBatches` call.
  // Provide this token to retrieve the subsequent page.
  string page_token = 3 [(google.api.field_behavior) = OPTIONAL];
+
+  // Optional. A filter for the batches to return in the response.
+  //
+  // A filter is a logical expression constraining the values of various fields
+  // in each batch resource. Filters are case sensitive, and may contain
+  // multiple clauses combined with logical operators (AND/OR).
+  // Supported fields are `batch_id`, `batch_uuid`, `state`, and `create_time`.
+  //
+  // e.g. `state = RUNNING and create_time < "2023-01-01T00:00:00Z"`
+  // filters for batches in state RUNNING that were created before 2023-01-01
+  //
+  // See https://google.aip.dev/assets/misc/ebnf-filtering.txt for a detailed
+  // description of the filter syntax and a list of supported comparisons.
+  string filter = 4 [(google.api.field_behavior) = OPTIONAL];
+
+  // Optional. Field(s) on which to sort the list of batches.
+  //
+  // Currently the only supported sort orders are unspecified (empty) and
+  // `create_time desc` to sort by most recently created batches first.
+  //
+  // See https://google.aip.dev/132#ordering for more details.
+  string order_by = 5 [(google.api.field_behavior) = OPTIONAL];
 }

 // A list of batch workloads.
@ -151,12 +174,12 @@ message ListBatchesResponse {

 // A request to delete a batch workload.
 message DeleteBatchRequest {
-  // Required. The name of the batch resource to delete.
+  // Required. The fully qualified name of the batch to retrieve
+  // in the format
+  // "projects/PROJECT_ID/locations/DATAPROC_REGION/batches/BATCH_ID"
  string name = 1 [
    (google.api.field_behavior) = REQUIRED,
-    (google.api.resource_reference) = {
-      type: "dataproc.googleapis.com/Batch"
-    }
+    (google.api.resource_reference) = { type: "dataproc.googleapis.com/Batch" }
  ];
 }

@ -167,18 +190,6 @@ message Batch {
    pattern: "projects/{project}/locations/{location}/batches/{batch}"
  };

-  // Historical state information.
-  message StateHistory {
-    // Output only. The state of the batch at this point in history.
-    State state = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
-
-    // Output only. Details about the state at this point in history.
-    string state_message = 2 [(google.api.field_behavior) = OUTPUT_ONLY];
-
-    // Output only. The time when the batch entered the historical state.
-    google.protobuf.Timestamp state_start_time = 3 [(google.api.field_behavior) = OUTPUT_ONLY];
-  }
-
  // The batch state.
  enum State {
    // The batch state is unknown.
@ -203,6 +214,19 @@ message Batch {
    FAILED = 6;
  }

+  // Historical state information.
+  message StateHistory {
+    // Output only. The state of the batch at this point in history.
+    State state = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
+
+    // Output only. Details about the state at this point in history.
+    string state_message = 2 [(google.api.field_behavior) = OUTPUT_ONLY];
+
+    // Output only. The time when the batch entered the historical state.
+    google.protobuf.Timestamp state_start_time = 3
+        [(google.api.field_behavior) = OUTPUT_ONLY];
+  }
+
  // Output only. The resource name of the batch.
  string name = 1 [(google.api.field_behavior) = OUTPUT_ONLY];

@ -211,7 +235,8 @@ message Batch {
  string uuid = 2 [(google.api.field_behavior) = OUTPUT_ONLY];

  // Output only. The time when the batch was created.
-  google.protobuf.Timestamp create_time = 3 [(google.api.field_behavior) = OUTPUT_ONLY];
+  google.protobuf.Timestamp create_time = 3
+      [(google.api.field_behavior) = OUTPUT_ONLY];

  // The application/framework-specific portion of the batch configuration.
  oneof batch_config {
@ -239,7 +264,8 @@ message Batch {
  string state_message = 10 [(google.api.field_behavior) = OUTPUT_ONLY];

  // Output only. The time when the batch entered a current state.
-  google.protobuf.Timestamp state_time = 11 [(google.api.field_behavior) = OUTPUT_ONLY];
+  google.protobuf.Timestamp state_time = 11
+      [(google.api.field_behavior) = OUTPUT_ONLY];

  // Output only. The email address of the user who created the batch.
  string creator = 12 [(google.api.field_behavior) = OUTPUT_ONLY];
@ -257,13 +283,15 @@ message Batch {
  RuntimeConfig runtime_config = 14 [(google.api.field_behavior) = OPTIONAL];

  // Optional. Environment configuration for the batch execution.
-  EnvironmentConfig environment_config = 15 [(google.api.field_behavior) = OPTIONAL];
+  EnvironmentConfig environment_config = 15
+      [(google.api.field_behavior) = OPTIONAL];

  // Output only. The resource name of the operation associated with this batch.
  string operation = 16 [(google.api.field_behavior) = OUTPUT_ONLY];

  // Output only. Historical state information for the batch.
-  repeated StateHistory state_history = 17 [(google.api.field_behavior) = OUTPUT_ONLY];
+  repeated StateHistory state_history = 17
+      [(google.api.field_behavior) = OUTPUT_ONLY];
 }

 // A configuration for running an
@ -271,8 +299,8 @@ message Batch {
 // PySpark](https://spark.apache.org/docs/latest/api/python/getting_started/quickstart.html)
 // batch workload.
 message PySparkBatch {
-  // Required. The HCFS URI of the main Python file to use as the Spark driver. Must
-  // be a .py file.
+  // Required. The HCFS URI of the main Python file to use as the Spark driver.
+  // Must be a .py file.
  string main_python_file_uri = 1 [(google.api.field_behavior) = REQUIRED];

  // Optional. The arguments to pass to the driver. Do not include arguments
@ -298,7 +326,7 @@ message PySparkBatch {
  repeated string archive_uris = 6 [(google.api.field_behavior) = OPTIONAL];
 }

-// A configuration for running an [Apache Spark](http://spark.apache.org/)
+// A configuration for running an [Apache Spark](https://spark.apache.org/)
 // batch workload.
 message SparkBatch {
  // The specification of the main method to call to drive the Spark
@ -310,8 +338,8 @@ message SparkBatch {
    // Optional. The HCFS URI of the jar file that contains the main class.
    string main_jar_file_uri = 1 [(google.api.field_behavior) = OPTIONAL];

-    // Optional. The name of the driver main class. The jar file that contains the class
-    // must be in the classpath or specified in `jar_file_uris`.
+    // Optional. The name of the driver main class. The jar file that contains
+    // the class must be in the classpath or specified in `jar_file_uris`.
    string main_class = 2 [(google.api.field_behavior) = OPTIONAL];
  }

@ -342,9 +370,9 @@ message SparkRBatch {
  // Must be a `.R` or `.r` file.
  string main_r_file_uri = 1 [(google.api.field_behavior) = REQUIRED];

-  // Optional. The arguments to pass to the Spark driver. Do not include arguments
-  // that can be set as batch properties, such as `--conf`, since a collision
-  // can occur that causes an incorrect batch submission.
+  // Optional. The arguments to pass to the Spark driver. Do not include
+  // arguments that can be set as batch properties, such as `--conf`, since a
+  // collision can occur that causes an incorrect batch submission.
  repeated string args = 2 [(google.api.field_behavior) = OPTIONAL];

  // Optional. HCFS URIs of files to be placed in the working directory of
@ -358,14 +386,17 @@ message SparkRBatch {
 }

 // A configuration for running
-// [Apache Spark SQL](http://spark.apache.org/sql/) queries as a batch workload.
+// [Apache Spark SQL](https://spark.apache.org/sql/) queries as a batch
+// workload.
 message SparkSqlBatch {
-  // Required. The HCFS URI of the script that contains Spark SQL queries to execute.
+  // Required. The HCFS URI of the script that contains Spark SQL queries to
+  // execute.
  string query_file_uri = 1 [(google.api.field_behavior) = REQUIRED];

  // Optional. Mapping of query variable names to values (equivalent to the
  // Spark SQL command: `SET name="value";`).
-  map<string, string> query_variables = 2 [(google.api.field_behavior) = OPTIONAL];
+  map<string, string> query_variables = 2
+      [(google.api.field_behavior) = OPTIONAL];

  // Optional. HCFS URIs of jar files to be added to the Spark CLASSPATH.
  repeated string jar_file_uris = 3 [(google.api.field_behavior) = OPTIONAL];
--- a/google/cloud/dataproc/v1/clusters.proto
+++ b/google/cloud/dataproc/v1/clusters.proto
@ -30,14 +30,6 @@ option go_package = "cloud.google.com/go/dataproc/apiv1/dataprocpb;dataprocpb";
 option java_multiple_files = true;
 option java_outer_classname = "ClustersProto";
 option java_package = "com.google.cloud.dataproc.v1";
-option (google.api.resource_definition) = {
-  type: "container.googleapis.com/Cluster"
-  pattern: "projects/{project}/locations/{location}/clusters/{cluster}"
-};
-option (google.api.resource_definition) = {
-  type: "metastore.googleapis.com/Service"
-  pattern: "projects/{project}/locations/{location}/services/{service}"
-};

 // The ClusterControllerService provides methods to manage clusters
 // of Compute Engine instances.
@ -174,12 +166,14 @@ message Cluster {
  // Optional. The cluster config for a cluster of Compute Engine Instances.
  // Note that Dataproc may set default values, and values may change
  // when clusters are updated.
+  //
+  // Exactly one of ClusterConfig or VirtualClusterConfig must be specified.
  ClusterConfig config = 3 [(google.api.field_behavior) = OPTIONAL];

  // Optional. The virtual cluster config is used when creating a Dataproc
  // cluster that does not directly control the underlying compute resources,
  // for example, when creating a [Dataproc-on-GKE
-  // cluster](https://cloud.google.com/dataproc/docs/guides/dpgke/dataproc-gke).
+  // cluster](https://cloud.google.com/dataproc/docs/guides/dpgke/dataproc-gke-overview).
  // Dataproc may set default values, and values may change when
  // clusters are updated. Exactly one of
  // [config][google.cloud.dataproc.v1.Cluster.config] or
@ -316,7 +310,7 @@ message ClusterConfig {

 // The Dataproc cluster config for a cluster that does not directly control the
 // underlying compute resources, such as a [Dataproc-on-GKE
-// cluster](https://cloud.google.com/dataproc/docs/guides/dpgke/dataproc-gke).
+// cluster](https://cloud.google.com/dataproc/docs/guides/dpgke/dataproc-gke-overview).
 message VirtualClusterConfig {
  // Optional. A Cloud Storage bucket used to stage job
  // dependencies, config files, and job driver console output.
@ -414,17 +408,15 @@ message GceClusterConfig {
    BIDIRECTIONAL = 3;
  }

-  // Optional. The zone where the Compute Engine cluster will be located.
-  // On a create request, it is required in the "global" region. If omitted
-  // in a non-global Dataproc region, the service will pick a zone in the
-  // corresponding Compute Engine region. On a get request, zone will
-  // always be present.
+  // Optional. The Compute Engine zone where the Dataproc cluster will be
+  // located. If omitted, the service will pick a zone in the cluster's Compute
+  // Engine region. On a get request, zone will always be present.
  //
  // A full URL, partial URI, or short name are valid. Examples:
  //
  // * `https://www.googleapis.com/compute/v1/projects/[project_id]/zones/[zone]`
  // * `projects/[project_id]/zones/[zone]`
-  // * `us-central1-f`
+  // * `[zone]`
  string zone_uri = 1 [(google.api.field_behavior) = OPTIONAL];

  // Optional. The Compute Engine network to be used for machine
@ -436,8 +428,8 @@ message GceClusterConfig {
  //
  // A full URL, partial URI, or short name are valid. Examples:
  //
-  // * `https://www.googleapis.com/compute/v1/projects/[project_id]/regions/global/default`
-  // * `projects/[project_id]/regions/global/default`
+  // * `https://www.googleapis.com/compute/v1/projects/[project_id]/global/networks/default`
+  // * `projects/[project_id]/global/networks/default`
  // * `default`
  string network_uri = 2 [(google.api.field_behavior) = OPTIONAL];

@ -446,8 +438,8 @@ message GceClusterConfig {
  //
  // A full URL, partial URI, or short name are valid. Examples:
  //
-  // * `https://www.googleapis.com/compute/v1/projects/[project_id]/regions/us-east1/subnetworks/sub0`
-  // * `projects/[project_id]/regions/us-east1/subnetworks/sub0`
+  // * `https://www.googleapis.com/compute/v1/projects/[project_id]/regions/[region]/subnetworks/sub0`
+  // * `projects/[project_id]/regions/[region]/subnetworks/sub0`
  // * `sub0`
  string subnetwork_uri = 6 [(google.api.field_behavior) = OPTIONAL];

@ -457,7 +449,7 @@ message GceClusterConfig {
  // instance. This `internal_ip_only` restriction can only be enabled for
  // subnetwork enabled networks, and all off-cluster dependencies must be
  // configured to be accessible without external IP addresses.
-  bool internal_ip_only = 7 [(google.api.field_behavior) = OPTIONAL];
+  optional bool internal_ip_only = 7 [(google.api.field_behavior) = OPTIONAL];

  // Optional. The type of IPv6 access for a cluster.
  PrivateIpv6GoogleAccess private_ipv6_google_access = 12
@ -533,8 +525,8 @@ message NodeGroupAffinity {
  //
  // A full URL, partial URI, or node group name are valid. Examples:
  //
-  // * `https://www.googleapis.com/compute/v1/projects/[project_id]/zones/us-central1-a/nodeGroups/node-group-1`
-  // * `projects/[project_id]/zones/us-central1-a/nodeGroups/node-group-1`
+  // * `https://www.googleapis.com/compute/v1/projects/[project_id]/zones/[zone]/nodeGroups/node-group-1`
+  // * `projects/[project_id]/zones/[zone]/nodeGroups/node-group-1`
  // * `node-group-1`
  string node_group_uri = 1 [(google.api.field_behavior) = REQUIRED];
 }
@ -543,13 +535,14 @@ message NodeGroupAffinity {
 // VMs](https://cloud.google.com/security/shielded-cloud/shielded-vm).
 message ShieldedInstanceConfig {
  // Optional. Defines whether instances have Secure Boot enabled.
-  bool enable_secure_boot = 1 [(google.api.field_behavior) = OPTIONAL];
+  optional bool enable_secure_boot = 1 [(google.api.field_behavior) = OPTIONAL];

  // Optional. Defines whether instances have the vTPM enabled.
-  bool enable_vtpm = 2 [(google.api.field_behavior) = OPTIONAL];
+  optional bool enable_vtpm = 2 [(google.api.field_behavior) = OPTIONAL];

  // Optional. Defines whether instances have integrity monitoring enabled.
-  bool enable_integrity_monitoring = 3 [(google.api.field_behavior) = OPTIONAL];
+  optional bool enable_integrity_monitoring = 3
+      [(google.api.field_behavior) = OPTIONAL];
 }

 // Confidential Instance Config for clusters using [Confidential
@ -613,14 +606,14 @@ message InstanceGroupConfig {
  //
  // Image examples:
  //
-  // * `https://www.googleapis.com/compute/beta/projects/[project_id]/global/images/[image-id]`
+  // * `https://www.googleapis.com/compute/v1/projects/[project_id]/global/images/[image-id]`
  // * `projects/[project_id]/global/images/[image-id]`
  // * `image-id`
  //
  // Image family examples. Dataproc will use the most recent
  // image from the family:
  //
-  // * `https://www.googleapis.com/compute/beta/projects/[project_id]/global/images/family/[custom-image-family-name]`
+  // * `https://www.googleapis.com/compute/v1/projects/[project_id]/global/images/family/[custom-image-family-name]`
  // * `projects/[project_id]/global/images/family/[custom-image-family-name]`
  //
  // If the URI is unspecified, it will be inferred from
@ -631,8 +624,8 @@ message InstanceGroupConfig {
  //
  // A full URL, partial URI, or short name are valid. Examples:
  //
-  // * `https://www.googleapis.com/compute/v1/projects/[project_id]/zones/us-east1-a/machineTypes/n1-standard-2`
-  // * `projects/[project_id]/zones/us-east1-a/machineTypes/n1-standard-2`
+  // * `https://www.googleapis.com/compute/v1/projects/[project_id]/zones/[zone]/machineTypes/n1-standard-2`
+  // * `projects/[project_id]/zones/[zone]/machineTypes/n1-standard-2`
  // * `n1-standard-2`
  //
  // **Auto Zone Exception**: If you are using the Dataproc
@ -693,12 +686,12 @@ message AcceleratorConfig {
  // Full URL, partial URI, or short name of the accelerator type resource to
  // expose to this instance. See
  // [Compute Engine
-  // AcceleratorTypes](https://cloud.google.com/compute/docs/reference/beta/acceleratorTypes).
+  // AcceleratorTypes](https://cloud.google.com/compute/docs/reference/v1/acceleratorTypes).
  //
  // Examples:
  //
-  // * `https://www.googleapis.com/compute/beta/projects/[project_id]/zones/us-east1-a/acceleratorTypes/nvidia-tesla-k80`
-  // * `projects/[project_id]/zones/us-east1-a/acceleratorTypes/nvidia-tesla-k80`
+  // * `https://www.googleapis.com/compute/v1/projects/[project_id]/zones/[zone]/acceleratorTypes/nvidia-tesla-k80`
+  // * `projects/[project_id]/zones/[zone]/acceleratorTypes/nvidia-tesla-k80`
  // * `nvidia-tesla-k80`
  //
  // **Auto Zone Exception**: If you are using the Dataproc
@ -730,6 +723,9 @@ message DiskConfig {
  // If one or more SSDs are attached, this runtime bulk
  // data is spread across them, and the boot disk contains only basic
  // config and installed binaries.
+  //
+  // Note: Local SSD options may vary by machine type and number of vCPUs
+  // selected.
  int32 num_local_ssds = 2 [(google.api.field_behavior) = OPTIONAL];

  // Optional. Interface type of local SSDs (default is "scsi").
@ -1065,6 +1061,18 @@ message MetastoreConfig {
  ];
 }

+// Contains cluster daemon metrics, such as HDFS and YARN stats.
+//
+// **Beta Feature**: This report is available for testing purposes only. It may
+// be changed before final release.
+message ClusterMetrics {
+  // The HDFS metrics.
+  map<string, int64> hdfs_metrics = 1;
+
+  // YARN metrics.
+  map<string, int64> yarn_metrics = 2;
+}
+
 // Dataproc metric config.
 message DataprocMetricConfig {
  // A source for the collection of Dataproc OSS metrics (see [available OSS
@ -1094,6 +1102,9 @@ message DataprocMetricConfig {

    // Hiveserver2 metric source.
    HIVESERVER2 = 6;
+
+    // hivemetastore metric source
+    HIVEMETASTORE = 7;
  }

  // A Dataproc OSS metric.
@ -1141,18 +1152,6 @@ message DataprocMetricConfig {
  repeated Metric metrics = 1 [(google.api.field_behavior) = REQUIRED];
 }

-// Contains cluster daemon metrics, such as HDFS and YARN stats.
-//
-// **Beta Feature**: This report is available for testing purposes only. It may
-// be changed before final release.
-message ClusterMetrics {
-  // The HDFS metrics.
-  map<string, int64> hdfs_metrics = 1;
-
-  // The YARN metrics.
-  map<string, int64> yarn_metrics = 2;
-}
-
 // A request to create a cluster.
 message CreateClusterRequest {
  // Required. The ID of the Google Cloud Platform project that the cluster
@ -1199,7 +1198,7 @@ message UpdateClusterRequest {
  // Required. The changes to the cluster.
  Cluster cluster = 3 [(google.api.field_behavior) = REQUIRED];

-  // Optional. Timeout for graceful YARN decomissioning. Graceful
+  // Optional. Timeout for graceful YARN decommissioning. Graceful
  // decommissioning allows removing nodes from the cluster without
  // interrupting jobs in progress. Timeout specifies how long to wait for jobs
  // in progress to finish before forcefully removing nodes (and potentially
--- a/google/cloud/dataproc/v1/dataproc_v1.yaml
+++ b/google/cloud/dataproc/v1/dataproc_v1.yaml
@ -10,6 +10,8 @@ apis:
 - name: google.cloud.dataproc.v1.JobController
 - name: google.cloud.dataproc.v1.NodeGroupController
 - name: google.cloud.dataproc.v1.WorkflowTemplateService
+- name: google.iam.v1.IAMPolicy
+- name: google.longrunning.Operations

 types:
 - name: google.cloud.dataproc.v1.BatchOperationMetadata
@ -97,12 +99,20 @@ http:
      body: '*'
  - selector: google.longrunning.Operations.CancelOperation
    post: '/v1/{name=projects/*/regions/*/operations/*}:cancel'
+    additional_bindings:
+    - post: '/v1/{name=projects/*/locations/*/operations/*}:cancel'
  - selector: google.longrunning.Operations.DeleteOperation
    delete: '/v1/{name=projects/*/regions/*/operations/*}'
+    additional_bindings:
+    - delete: '/v1/{name=projects/*/locations/*/operations/*}'
  - selector: google.longrunning.Operations.GetOperation
    get: '/v1/{name=projects/*/regions/*/operations/*}'
+    additional_bindings:
+    - get: '/v1/{name=projects/*/locations/*/operations/*}'
  - selector: google.longrunning.Operations.ListOperations
    get: '/v1/{name=projects/*/regions/*/operations}'
+    additional_bindings:
+    - get: '/v1/{name=projects/*/locations/*/operations}'

 authentication:
  rules:
--- a/google/cloud/dataproc/v1/jobs.proto
+++ b/google/cloud/dataproc/v1/jobs.proto
@ -476,6 +476,43 @@ message PrestoJob {
  LoggingConfig logging_config = 7 [(google.api.field_behavior) = OPTIONAL];
 }

+// A Dataproc job for running [Trino](https://trino.io/) queries.
+// **IMPORTANT**: The [Dataproc Trino Optional
+// Component](https://cloud.google.com/dataproc/docs/concepts/components/trino)
+// must be enabled when the cluster is created to submit a Trino job to the
+// cluster.
+message TrinoJob {
+  // Required. The sequence of Trino queries to execute, specified as
+  // either an HCFS file URI or as a list of queries.
+  oneof queries {
+    // The HCFS URI of the script that contains SQL queries.
+    string query_file_uri = 1;
+
+    // A list of queries.
+    QueryList query_list = 2;
+  }
+
+  // Optional. Whether to continue executing queries if a query fails.
+  // The default value is `false`. Setting to `true` can be useful when
+  // executing independent parallel queries.
+  bool continue_on_failure = 3 [(google.api.field_behavior) = OPTIONAL];
+
+  // Optional. The format in which query output will be displayed. See the
+  // Trino documentation for supported output formats
+  string output_format = 4 [(google.api.field_behavior) = OPTIONAL];
+
+  // Optional. Trino client tags to attach to this query
+  repeated string client_tags = 5 [(google.api.field_behavior) = OPTIONAL];
+
+  // Optional. A mapping of property names to values. Used to set Trino
+  // [session properties](https://trino.io/docs/current/sql/set-session.html)
+  // Equivalent to using the --session flag in the Trino CLI
+  map<string, string> properties = 6 [(google.api.field_behavior) = OPTIONAL];
+
+  // Optional. The runtime log config for job execution.
+  LoggingConfig logging_config = 7 [(google.api.field_behavior) = OPTIONAL];
+}
+
 // Dataproc job config.
 message JobPlacement {
  // Required. The name of the cluster where the job will be submitted.
@ -680,6 +717,9 @@ message Job {

    // Optional. Job is a Presto job.
    PrestoJob presto_job = 23 [(google.api.field_behavior) = OPTIONAL];
+
+    // Optional. Job is a Trino job.
+    TrinoJob trino_job = 28 [(google.api.field_behavior) = OPTIONAL];
  }

  // Output only. The job status. Additional application-specific
--- a/google/cloud/dataproc/v1/node_groups.proto
+++ b/google/cloud/dataproc/v1/node_groups.proto
@ -144,7 +144,7 @@ message ResizeNodeGroupRequest {
  // underscores (_), and hyphens (-). The maximum length is 40 characters.
  string request_id = 3 [(google.api.field_behavior) = OPTIONAL];

-  // Optional. Timeout for graceful YARN decommissioning. [Graceful
+  // Optional. Timeout for graceful YARN decomissioning. [Graceful
  // decommissioning]
  // (https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/scaling-clusters#graceful_decommissioning)
  // allows the removal of nodes from the Compute Engine node group
--- a/google/cloud/dataproc/v1/operations.proto
+++ b/google/cloud/dataproc/v1/operations.proto
@ -117,6 +117,10 @@ message ClusterOperationMetadata {

  // Output only. Errors encountered during operation execution.
  repeated string warnings = 14 [(google.api.field_behavior) = OUTPUT_ONLY];
+
+  // Output only. Child operation ids
+  repeated string child_operation_ids = 15
+      [(google.api.field_behavior) = OUTPUT_ONLY];
 }

 // Metadata describing the node group operation.
--- a/google/cloud/dataproc/v1/shared.proto
+++ b/google/cloud/dataproc/v1/shared.proto
@ -17,23 +17,34 @@ syntax = "proto3";
 package google.cloud.dataproc.v1;

 import "google/api/field_behavior.proto";
+import "google/api/resource.proto";
+import "google/protobuf/duration.proto";
+import "google/protobuf/timestamp.proto";

 option go_package = "cloud.google.com/go/dataproc/apiv1/dataprocpb;dataprocpb";
 option java_multiple_files = true;
 option java_outer_classname = "SharedProto";
 option java_package = "com.google.cloud.dataproc.v1";
+option (google.api.resource_definition) = {
+  type: "container.googleapis.com/Cluster"
+  pattern: "projects/{project}/locations/{location}/clusters/{cluster}"
+};
+option (google.api.resource_definition) = {
+  type: "metastore.googleapis.com/Service"
+  pattern: "projects/{project}/locations/{location}/services/{service}"
+};

 // Runtime configuration for a workload.
 message RuntimeConfig {
  // Optional. Version of the batch runtime.
  string version = 1 [(google.api.field_behavior) = OPTIONAL];

-  // Optional. Optional custom container image for the job runtime environment. If
-  // not specified, a default container image will be used.
+  // Optional. Optional custom container image for the job runtime environment.
+  // If not specified, a default container image will be used.
  string container_image = 2 [(google.api.field_behavior) = OPTIONAL];

-  // Optional. A mapping of property names to values, which are used to configure workload
-  // execution.
+  // Optional. A mapping of property names to values, which are used to
+  // configure workload execution.
  map<string, string> properties = 3 [(google.api.field_behavior) = OPTIONAL];
 }

@ -43,7 +54,8 @@ message EnvironmentConfig {
  ExecutionConfig execution_config = 1 [(google.api.field_behavior) = OPTIONAL];

  // Optional. Peripherals configuration that workload has access to.
-  PeripheralsConfig peripherals_config = 2 [(google.api.field_behavior) = OPTIONAL];
+  PeripheralsConfig peripherals_config = 2
+      [(google.api.field_behavior) = OPTIONAL];
 }

 // Execution configuration for a workload.
@ -65,19 +77,39 @@ message ExecutionConfig {

  // Optional. The Cloud KMS key to use for encryption.
  string kms_key = 7 [(google.api.field_behavior) = OPTIONAL];
+
+  // Optional. The duration after which the workload will be terminated.
+  // When the workload passes this ttl, it will be unconditionally killed
+  // without waiting for ongoing work to finish.
+  // Minimum value is 10 minutes; maximum value is 14 days (see JSON
+  // representation of
+  // [Duration](https://developers.google.com/protocol-buffers/docs/proto3#json)).
+  // If both ttl and idle_ttl are specified, the conditions are treated as
+  // and OR: the workload will be terminated when it has been idle for idle_ttl
+  // or when the ttl has passed, whichever comes first.
+  // If ttl is not specified for a session, it defaults to 24h.
+  google.protobuf.Duration ttl = 9 [(google.api.field_behavior) = OPTIONAL];
+
+  // Optional. A Cloud Storage bucket used to stage workload dependencies,
+  // config files, and store workload output and other ephemeral data, such as
+  // Spark history files. If you do not specify a staging bucket, Cloud Dataproc
+  // will determine a Cloud Storage location according to the region where your
+  // workload is running, and then create and manage project-level, per-location
+  // staging and temporary buckets.
+  // **This field requires a Cloud Storage bucket name, not a `gs://...` URI to
+  // a Cloud Storage bucket.**
+  string staging_bucket = 10 [(google.api.field_behavior) = OPTIONAL];
 }

 // Spark History Server configuration for the workload.
 message SparkHistoryServerConfig {
-  // Optional. Resource name of an existing Dataproc Cluster to act as a Spark History
-  // Server for the workload.
+  // Optional. Resource name of an existing Dataproc Cluster to act as a Spark
+  // History Server for the workload.
  //
  // Example:
  //
  // * `projects/[project_id]/regions/[region]/clusters/[cluster_name]`
-  string dataproc_cluster = 1 [
-    (google.api.field_behavior) = OPTIONAL
-  ];
+  string dataproc_cluster = 1 [(google.api.field_behavior) = OPTIONAL];
 }

 // Auxiliary services configuration for a workload.
@ -88,58 +120,111 @@ message PeripheralsConfig {
  //
  // * `projects/[project_id]/locations/[region]/services/[service_id]`
  string metastore_service = 1 [
-    (google.api.field_behavior) = OPTIONAL
+    (google.api.field_behavior) = OPTIONAL,
+    (google.api.resource_reference) = {
+      type: "metastore.googleapis.com/Service"
+    }
  ];

  // Optional. The Spark History Server configuration for the workload.
-  SparkHistoryServerConfig spark_history_server_config = 2 [(google.api.field_behavior) = OPTIONAL];
+  SparkHistoryServerConfig spark_history_server_config = 2
+      [(google.api.field_behavior) = OPTIONAL];
 }

 // Runtime information about workload execution.
 message RuntimeInfo {
-  // Output only. Map of remote access endpoints (such as web interfaces and APIs) to their
-  // URIs.
+  // Output only. Map of remote access endpoints (such as web interfaces and
+  // APIs) to their URIs.
  map<string, string> endpoints = 1 [(google.api.field_behavior) = OUTPUT_ONLY];

-  // Output only. A URI pointing to the location of the stdout and stderr of the workload.
+  // Output only. A URI pointing to the location of the stdout and stderr of the
+  // workload.
  string output_uri = 2 [(google.api.field_behavior) = OUTPUT_ONLY];

  // Output only. A URI pointing to the location of the diagnostics tarball.
  string diagnostic_output_uri = 3 [(google.api.field_behavior) = OUTPUT_ONLY];
+
+  // Output only. Approximate workload resource usage calculated after workload
+  // finishes (see [Dataproc Serverless pricing]
+  // (https://cloud.google.com/dataproc-serverless/pricing)).
+  UsageMetrics approximate_usage = 6
+      [(google.api.field_behavior) = OUTPUT_ONLY];
+
+  // Output only. Snapshot of current workload resource usage.
+  UsageSnapshot current_usage = 7 [(google.api.field_behavior) = OUTPUT_ONLY];
+}
+
+// Usage metrics represent approximate total resources consumed by a workload.
+message UsageMetrics {
+  // Optional. DCU (Dataproc Compute Units) usage in (`milliDCU` x `seconds`)
+  // (see [Dataproc Serverless pricing]
+  // (https://cloud.google.com/dataproc-serverless/pricing)).
+  int64 milli_dcu_seconds = 1 [(google.api.field_behavior) = OPTIONAL];
+
+  // Optional. Shuffle storage usage in (`GB` x `seconds`) (see
+  // [Dataproc Serverless pricing]
+  // (https://cloud.google.com/dataproc-serverless/pricing)).
+  int64 shuffle_storage_gb_seconds = 2 [(google.api.field_behavior) = OPTIONAL];
+}
+
+// The usage snaphot represents the resources consumed by a workload at a
+// specified time.
+message UsageSnapshot {
+  // Optional. Milli (one-thousandth) Dataproc Compute Units (DCUs) (see
+  // [Dataproc Serverless pricing]
+  // (https://cloud.google.com/dataproc-serverless/pricing)).
+  int64 milli_dcu = 1 [(google.api.field_behavior) = OPTIONAL];
+
+  // Optional. Shuffle Storage in gigabytes (GB). (see [Dataproc Serverless
+  // pricing] (https://cloud.google.com/dataproc-serverless/pricing))
+  int64 shuffle_storage_gb = 2 [(google.api.field_behavior) = OPTIONAL];
+
+  // Optional. The timestamp of the usage snapshot.
+  google.protobuf.Timestamp snapshot_time = 3
+      [(google.api.field_behavior) = OPTIONAL];
 }

 // The cluster's GKE config.
 message GkeClusterConfig {
-  // Optional. A target GKE cluster to deploy to. It must be in the same project and
-  // region as the Dataproc cluster (the GKE cluster can be zonal or regional).
-  // Format: 'projects/{project}/locations/{location}/clusters/{cluster_id}'
+  // Optional. A target GKE cluster to deploy to. It must be in the same project
+  // and region as the Dataproc cluster (the GKE cluster can be zonal or
+  // regional). Format:
+  // 'projects/{project}/locations/{location}/clusters/{cluster_id}'
  string gke_cluster_target = 2 [
-    (google.api.field_behavior) = OPTIONAL
+    (google.api.field_behavior) = OPTIONAL,
+    (google.api.resource_reference) = {
+      type: "container.googleapis.com/Cluster"
+    }
  ];

-  // Optional. GKE NodePools where workloads will be scheduled. At least one node pool
-  // must be assigned the 'default' role. Each role can be given to only a
-  // single NodePoolTarget. All NodePools must have the same location settings.
-  // If a nodePoolTarget is not specified, Dataproc constructs a default
-  // nodePoolTarget.
-  repeated GkeNodePoolTarget node_pool_target = 3 [(google.api.field_behavior) = OPTIONAL];
+  // Optional. GKE node pools where workloads will be scheduled. At least one
+  // node pool must be assigned the `DEFAULT`
+  // [GkeNodePoolTarget.Role][google.cloud.dataproc.v1.GkeNodePoolTarget.Role].
+  // If a `GkeNodePoolTarget` is not specified, Dataproc constructs a `DEFAULT`
+  // `GkeNodePoolTarget`. Each role can be given to only one
+  // `GkeNodePoolTarget`. All node pools must have the same location settings.
+  repeated GkeNodePoolTarget node_pool_target = 3
+      [(google.api.field_behavior) = OPTIONAL];
 }

 // The configuration for running the Dataproc cluster on Kubernetes.
 message KubernetesClusterConfig {
-  // Optional. A namespace within the Kubernetes cluster to deploy into. If this namespace
-  // does not exist, it is created. If it exists, Dataproc
-  // verifies that another Dataproc VirtualCluster is not installed
-  // into it. If not specified, the name of the Dataproc Cluster is used.
+  // Optional. A namespace within the Kubernetes cluster to deploy into. If this
+  // namespace does not exist, it is created. If it exists, Dataproc verifies
+  // that another Dataproc VirtualCluster is not installed into it. If not
+  // specified, the name of the Dataproc Cluster is used.
  string kubernetes_namespace = 1 [(google.api.field_behavior) = OPTIONAL];

  oneof config {
    // Required. The configuration for running the Dataproc cluster on GKE.
-    GkeClusterConfig gke_cluster_config = 2 [(google.api.field_behavior) = REQUIRED];
+    GkeClusterConfig gke_cluster_config = 2
+        [(google.api.field_behavior) = REQUIRED];
  }

-  // Optional. The software configuration for this Dataproc cluster running on Kubernetes.
-  KubernetesSoftwareConfig kubernetes_software_config = 3 [(google.api.field_behavior) = OPTIONAL];
+  // Optional. The software configuration for this Dataproc cluster running on
+  // Kubernetes.
+  KubernetesSoftwareConfig kubernetes_software_config = 3
+      [(google.api.field_behavior) = OPTIONAL];
 }

 // The software configuration for this Dataproc cluster running on Kubernetes.
@ -163,54 +248,60 @@ message KubernetesSoftwareConfig {
  map<string, string> properties = 2;
 }

-// GKE NodePools that Dataproc workloads run on.
+// GKE node pools that Dataproc workloads run on.
 message GkeNodePoolTarget {
-  // `Role` specifies whose tasks will run on the NodePool. The roles can be
-  // specific to workloads. Exactly one GkeNodePoolTarget within the
-  // VirtualCluster must have 'default' role, which is used to run all workloads
-  // that are not associated with a NodePool.
+  // `Role` specifies the tasks that will run on the node pool. Roles can be
+  // specific to workloads. Exactly one
+  // [GkeNodePoolTarget][google.cloud.dataproc.v1.GkeNodePoolTarget] within the
+  // virtual cluster must have the `DEFAULT` role, which is used to run all
+  // workloads that are not associated with a node pool.
  enum Role {
    // Role is unspecified.
    ROLE_UNSPECIFIED = 0;

-    // Any roles that are not directly assigned to a NodePool run on the
-    // `default` role's NodePool.
+    // At least one node pool must have the `DEFAULT` role.
+    // Work assigned to a role that is not associated with a node pool
+    // is assigned to the node pool with the `DEFAULT` role. For example,
+    // work assigned to the `CONTROLLER` role will be assigned to the node pool
+    // with the `DEFAULT` role if no node pool has the `CONTROLLER` role.
    DEFAULT = 1;

-    // Run controllers and webhooks.
+    // Run work associated with the Dataproc control plane (for example,
+    // controllers and webhooks). Very low resource requirements.
    CONTROLLER = 2;

-    // Run spark driver.
+    // Run work associated with a Spark driver of a job.
    SPARK_DRIVER = 3;

-    // Run spark executors.
+    // Run work associated with a Spark executor of a job.
    SPARK_EXECUTOR = 4;
  }

-  // Required. The target GKE NodePool.
+  // Required. The target GKE node pool.
  // Format:
  // 'projects/{project}/locations/{location}/clusters/{cluster}/nodePools/{node_pool}'
-  string node_pool = 1 [
-    (google.api.field_behavior) = REQUIRED
-  ];
+  string node_pool = 1 [(google.api.field_behavior) = REQUIRED];

-  // Required. The types of role for a GKE NodePool
+  // Required. The roles associated with the GKE node pool.
  repeated Role roles = 2 [(google.api.field_behavior) = REQUIRED];

-  // Optional. The configuration for the GKE NodePool.
+  // Input only. The configuration for the GKE node pool.
  //
-  // If specified, Dataproc attempts to create a NodePool with the
+  // If specified, Dataproc attempts to create a node pool with the
  // specified shape. If one with the same name already exists, it is
  // verified against all specified fields. If a field differs, the
  // virtual cluster creation will fail.
  //
-  // If omitted, any NodePool with the specified name is used. If a
-  // NodePool with the specified name does not exist, Dataproc create a NodePool
-  // with default values.
-  GkeNodePoolConfig node_pool_config = 3 [(google.api.field_behavior) = OPTIONAL];
+  // If omitted, any node pool with the specified name is used. If a
+  // node pool with the specified name does not exist, Dataproc create a
+  // node pool with default values.
+  //
+  // This is an input only field. It will not be returned by the API.
+  GkeNodePoolConfig node_pool_config = 3
+      [(google.api.field_behavior) = INPUT_ONLY];
 }

-// The configuration of a GKE NodePool used by a [Dataproc-on-GKE
+// The configuration of a GKE node pool used by a [Dataproc-on-GKE
 // cluster](https://cloud.google.com/dataproc/docs/concepts/jobs/dataproc-gke#create-a-dataproc-on-gke-cluster).
 message GkeNodePoolConfig {
  // Parameters that describe cluster nodes.
@ -219,19 +310,28 @@ message GkeNodePoolConfig {
    // type](https://cloud.google.com/compute/docs/machine-types).
    string machine_type = 1 [(google.api.field_behavior) = OPTIONAL];

-    // Optional. Whether the nodes are created as [preemptible VM
-    // instances](https://cloud.google.com/compute/docs/instances/preemptible).
-    bool preemptible = 10 [(google.api.field_behavior) = OPTIONAL];
-
-    // Optional. The number of local SSD disks to attach to the node, which is limited by
-    // the maximum number of disks allowable per zone (see [Adding Local
-    // SSDs](https://cloud.google.com/compute/docs/disks/local-ssd)).
+    // Optional. The number of local SSD disks to attach to the node, which is
+    // limited by the maximum number of disks allowable per zone (see [Adding
+    // Local SSDs](https://cloud.google.com/compute/docs/disks/local-ssd)).
    int32 local_ssd_count = 7 [(google.api.field_behavior) = OPTIONAL];

+    // Optional. Whether the nodes are created as legacy [preemptible VM
+    // instances] (https://cloud.google.com/compute/docs/instances/preemptible).
+    // Also see
+    // [Spot][google.cloud.dataproc.v1.GkeNodePoolConfig.GkeNodeConfig.spot]
+    // VMs, preemptible VM instances without a maximum lifetime. Legacy and Spot
+    // preemptible nodes cannot be used in a node pool with the `CONTROLLER`
+    // [role]
+    // (/dataproc/docs/reference/rest/v1/projects.regions.clusters#role)
+    // or in the DEFAULT node pool if the CONTROLLER role is not assigned (the
+    // DEFAULT node pool will assume the CONTROLLER role).
+    bool preemptible = 10 [(google.api.field_behavior) = OPTIONAL];
+
    // Optional. A list of [hardware
    // accelerators](https://cloud.google.com/compute/docs/gpus) to attach to
    // each node.
-    repeated GkeNodePoolAcceleratorConfig accelerators = 11 [(google.api.field_behavior) = OPTIONAL];
+    repeated GkeNodePoolAcceleratorConfig accelerators = 11
+        [(google.api.field_behavior) = OPTIONAL];

    // Optional. [Minimum CPU
    // platform](https://cloud.google.com/compute/docs/instances/specify-min-cpu-platform)
@ -239,26 +339,51 @@ message GkeNodePoolConfig {
    // specified or a newer CPU platform. Specify the friendly names of CPU
    // platforms, such as "Intel Haswell"` or Intel Sandy Bridge".
    string min_cpu_platform = 13 [(google.api.field_behavior) = OPTIONAL];
+
+    // Optional. The [Customer Managed Encryption Key (CMEK)]
+    // (https://cloud.google.com/kubernetes-engine/docs/how-to/using-cmek)
+    // used to encrypt the boot disk attached to each node in the node pool.
+    // Specify the key using the following format:
+    // <code>projects/<var>KEY_PROJECT_ID</var>/locations/<var>LOCATION</var>/keyRings/<var>RING_NAME</var>/cryptoKeys/<var>KEY_NAME</var></code>.
+    string boot_disk_kms_key = 23 [(google.api.field_behavior) = OPTIONAL];
+
+    // Optional. Whether the nodes are created as [Spot VM instances]
+    // (https://cloud.google.com/compute/docs/instances/spot).
+    // Spot VMs are the latest update to legacy
+    // [preemptible
+    // VMs][google.cloud.dataproc.v1.GkeNodePoolConfig.GkeNodeConfig.preemptible].
+    // Spot VMs do not have a maximum lifetime. Legacy and Spot preemptible
+    // nodes cannot be used in a node pool with the `CONTROLLER`
+    // [role](/dataproc/docs/reference/rest/v1/projects.regions.clusters#role)
+    // or in the DEFAULT node pool if the CONTROLLER role is not assigned (the
+    // DEFAULT node pool will assume the CONTROLLER role).
+    bool spot = 32 [(google.api.field_behavior) = OPTIONAL];
  }

  // A GkeNodeConfigAcceleratorConfig represents a Hardware Accelerator request
-  // for a NodePool.
+  // for a node pool.
  message GkeNodePoolAcceleratorConfig {
    // The number of accelerator cards exposed to an instance.
    int64 accelerator_count = 1;

    // The accelerator type resource namename (see GPUs on Compute Engine).
    string accelerator_type = 2;
+
+    // Size of partitions to create on the GPU. Valid values are described in
+    // the NVIDIA [mig user
+    // guide](https://docs.nvidia.com/datacenter/tesla/mig-user-guide/#partitioning).
+    string gpu_partition_size = 3;
  }

  // GkeNodePoolAutoscaling contains information the cluster autoscaler needs to
  // adjust the size of the node pool to the current cluster usage.
  message GkeNodePoolAutoscalingConfig {
-    // The minimum number of nodes in the NodePool. Must be >= 0 and <=
+    // The minimum number of nodes in the node pool. Must be >= 0 and <=
    // max_node_count.
    int32 min_node_count = 2;

-    // The maximum number of nodes in the NodePool. Must be >= min_node_count.
+    // The maximum number of nodes in the node pool. Must be >= min_node_count,
+    // and must be > 0.
    // **Note:** Quota must be sufficient to scale up the cluster.
    int32 max_node_count = 3;
  }
@ -268,17 +393,21 @@ message GkeNodePoolConfig {

  // Optional. The list of Compute Engine
  // [zones](https://cloud.google.com/compute/docs/zones#available) where
-  // NodePool's nodes will be located.
+  // node pool nodes associated with a Dataproc on GKE virtual cluster
+  // will be located.
  //
-  // **Note:** Currently, only one zone may be specified.
+  // **Note:** All node pools associated with a virtual cluster
+  // must be located in the same region as the virtual cluster, and they must
+  // be located in the same zone within that region.
  //
-  // If a location is not specified during NodePool creation, Dataproc will
-  // choose a location.
+  // If a location is not specified during node pool creation, Dataproc on GKE
+  // will choose the zone.
  repeated string locations = 13 [(google.api.field_behavior) = OPTIONAL];

-  // Optional. The autoscaler configuration for this NodePool. The autoscaler is enabled
-  // only when a valid configuration is present.
-  GkeNodePoolAutoscalingConfig autoscaling = 4 [(google.api.field_behavior) = OPTIONAL];
+  // Optional. The autoscaler configuration for this node pool. The autoscaler
+  // is enabled only when a valid configuration is present.
+  GkeNodePoolAutoscalingConfig autoscaling = 4
+      [(google.api.field_behavior) = OPTIONAL];
 }

 // Cluster components that can be activated.
@ -308,12 +437,18 @@ enum Component {
  // The Hive Web HCatalog (the REST service for accessing HCatalog).
  HIVE_WEBHCAT = 3;

+  // Hudi.
+  HUDI = 18;
+
  // The Jupyter Notebook.
  JUPYTER = 1;

  // The Presto query engine.
  PRESTO = 6;

+  // The Trino query engine.
+  TRINO = 17;
+
  // The Ranger service.
  RANGER = 12;

--- a/google/cloud/dataproc/v1/workflow_templates.proto
+++ b/google/cloud/dataproc/v1/workflow_templates.proto
@ -1,4 +1,4 @@
-// Copyright 2021 Google LLC
+// Copyright 2022 Google LLC
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@ -36,10 +36,12 @@ option java_package = "com.google.cloud.dataproc.v1";
 // Dataproc API.
 service WorkflowTemplateService {
  option (google.api.default_host) = "dataproc.googleapis.com";
-  option (google.api.oauth_scopes) = "https://www.googleapis.com/auth/cloud-platform";
+  option (google.api.oauth_scopes) =
+      "https://www.googleapis.com/auth/cloud-platform";

  // Creates new workflow template.
-  rpc CreateWorkflowTemplate(CreateWorkflowTemplateRequest) returns (WorkflowTemplate) {
+  rpc CreateWorkflowTemplate(CreateWorkflowTemplateRequest)
+      returns (WorkflowTemplate) {
    option (google.api.http) = {
      post: "/v1/{parent=projects/*/locations/*}/workflowTemplates"
      body: "template"
@ -55,7 +57,8 @@ service WorkflowTemplateService {
  //
  // Can retrieve previously instantiated template by specifying optional
  // version parameter.
-  rpc GetWorkflowTemplate(GetWorkflowTemplateRequest) returns (WorkflowTemplate) {
+  rpc GetWorkflowTemplate(GetWorkflowTemplateRequest)
+      returns (WorkflowTemplate) {
    option (google.api.http) = {
      get: "/v1/{name=projects/*/locations/*/workflowTemplates/*}"
      additional_bindings {
@ -85,7 +88,8 @@ service WorkflowTemplateService {
  // On successful completion,
  // [Operation.response][google.longrunning.Operation.response] will be
  // [Empty][google.protobuf.Empty].
-  rpc InstantiateWorkflowTemplate(InstantiateWorkflowTemplateRequest) returns (google.longrunning.Operation) {
+  rpc InstantiateWorkflowTemplate(InstantiateWorkflowTemplateRequest)
+      returns (google.longrunning.Operation) {
    option (google.api.http) = {
      post: "/v1/{name=projects/*/locations/*/workflowTemplates/*}:instantiate"
      body: "*"
@ -105,7 +109,8 @@ service WorkflowTemplateService {
  // Instantiates a template and begins execution.
  //
  // This method is equivalent to executing the sequence
-  // [CreateWorkflowTemplate][google.cloud.dataproc.v1.WorkflowTemplateService.CreateWorkflowTemplate], [InstantiateWorkflowTemplate][google.cloud.dataproc.v1.WorkflowTemplateService.InstantiateWorkflowTemplate],
+  // [CreateWorkflowTemplate][google.cloud.dataproc.v1.WorkflowTemplateService.CreateWorkflowTemplate],
+  // [InstantiateWorkflowTemplate][google.cloud.dataproc.v1.WorkflowTemplateService.InstantiateWorkflowTemplate],
  // [DeleteWorkflowTemplate][google.cloud.dataproc.v1.WorkflowTemplateService.DeleteWorkflowTemplate].
  //
  // The returned Operation can be used to track execution of
@ -126,7 +131,9 @@ service WorkflowTemplateService {
  // On successful completion,
  // [Operation.response][google.longrunning.Operation.response] will be
  // [Empty][google.protobuf.Empty].
-  rpc InstantiateInlineWorkflowTemplate(InstantiateInlineWorkflowTemplateRequest) returns (google.longrunning.Operation) {
+  rpc InstantiateInlineWorkflowTemplate(
+      InstantiateInlineWorkflowTemplateRequest)
+      returns (google.longrunning.Operation) {
    option (google.api.http) = {
      post: "/v1/{parent=projects/*/locations/*}/workflowTemplates:instantiateInline"
      body: "template"
@ -144,7 +151,8 @@ service WorkflowTemplateService {

  // Updates (replaces) workflow template. The updated template
  // must contain version that matches the current server version.
-  rpc UpdateWorkflowTemplate(UpdateWorkflowTemplateRequest) returns (WorkflowTemplate) {
+  rpc UpdateWorkflowTemplate(UpdateWorkflowTemplateRequest)
+      returns (WorkflowTemplate) {
    option (google.api.http) = {
      put: "/v1/{template.name=projects/*/locations/*/workflowTemplates/*}"
      body: "template"
@ -157,7 +165,8 @@ service WorkflowTemplateService {
  }

  // Lists workflows that match the specified filter in the request.
-  rpc ListWorkflowTemplates(ListWorkflowTemplatesRequest) returns (ListWorkflowTemplatesResponse) {
+  rpc ListWorkflowTemplates(ListWorkflowTemplatesRequest)
+      returns (ListWorkflowTemplatesResponse) {
    option (google.api.http) = {
      get: "/v1/{parent=projects/*/locations/*}/workflowTemplates"
      additional_bindings {
@ -168,7 +177,8 @@ service WorkflowTemplateService {
  }

  // Deletes a workflow template. It does not cancel in-progress workflows.
-  rpc DeleteWorkflowTemplate(DeleteWorkflowTemplateRequest) returns (google.protobuf.Empty) {
+  rpc DeleteWorkflowTemplate(DeleteWorkflowTemplateRequest)
+      returns (google.protobuf.Empty) {
    option (google.api.http) = {
      delete: "/v1/{name=projects/*/locations/*/workflowTemplates/*}"
      additional_bindings {
@ -214,10 +224,12 @@ message WorkflowTemplate {
  int32 version = 3 [(google.api.field_behavior) = OPTIONAL];

  // Output only. The time template was created.
-  google.protobuf.Timestamp create_time = 4 [(google.api.field_behavior) = OUTPUT_ONLY];
+  google.protobuf.Timestamp create_time = 4
+      [(google.api.field_behavior) = OUTPUT_ONLY];

  // Output only. The time template was last updated.
-  google.protobuf.Timestamp update_time = 5 [(google.api.field_behavior) = OUTPUT_ONLY];
+  google.protobuf.Timestamp update_time = 5
+      [(google.api.field_behavior) = OUTPUT_ONLY];

  // Optional. The labels to associate with this template. These labels
  // will be propagated to all jobs and clusters created by the workflow
@ -234,7 +246,8 @@ message WorkflowTemplate {
  map<string, string> labels = 6 [(google.api.field_behavior) = OPTIONAL];

  // Required. WorkflowTemplate scheduling information.
-  WorkflowTemplatePlacement placement = 7 [(google.api.field_behavior) = REQUIRED];
+  WorkflowTemplatePlacement placement = 7
+      [(google.api.field_behavior) = REQUIRED];

  // Required. The Directed Acyclic Graph of Jobs to submit.
  repeated OrderedJob jobs = 8 [(google.api.field_behavior) = REQUIRED];
@ -242,7 +255,8 @@ message WorkflowTemplate {
  // Optional. Template parameters whose values are substituted into the
  // template. Values for parameters must be provided when the template is
  // instantiated.
-  repeated TemplateParameter parameters = 9 [(google.api.field_behavior) = OPTIONAL];
+  repeated TemplateParameter parameters = 9
+      [(google.api.field_behavior) = OPTIONAL];

  // Optional. Timeout duration for the DAG of jobs, expressed in seconds (see
  // [JSON representation of
@ -254,7 +268,8 @@ message WorkflowTemplate {
  // [managed
  // cluster](/dataproc/docs/concepts/workflows/using-workflows#configuring_or_selecting_a_cluster),
  // the cluster is deleted.
-  google.protobuf.Duration dag_timeout = 10 [(google.api.field_behavior) = OPTIONAL];
+  google.protobuf.Duration dag_timeout = 10
+      [(google.api.field_behavior) = OPTIONAL];
 }

 // Specifies workflow execution target.
@ -312,7 +327,8 @@ message ClusterSelector {

  // Required. The cluster labels. Cluster must have all labels
  // to match.
-  map<string, string> cluster_labels = 2 [(google.api.field_behavior) = REQUIRED];
+  map<string, string> cluster_labels = 2
+      [(google.api.field_behavior) = REQUIRED];
 }

 // A job executed by the workflow.
@ -322,8 +338,8 @@ message OrderedJob {
  //
  // The step id is used as prefix for job id, as job
  // `goog-dataproc-workflow-step-id` label, and in
-  // [prerequisiteStepIds][google.cloud.dataproc.v1.OrderedJob.prerequisite_step_ids] field from other
-  // steps.
+  // [prerequisiteStepIds][google.cloud.dataproc.v1.OrderedJob.prerequisite_step_ids]
+  // field from other steps.
  //
  // The id must contain only letters (a-z, A-Z), numbers (0-9),
  // underscores (_), and hyphens (-). Cannot begin or end with underscore
@ -374,7 +390,8 @@ message OrderedJob {

  // Optional. The optional list of prerequisite job step_ids.
  // If not specified, the job will start at the beginning of workflow.
-  repeated string prerequisite_step_ids = 10 [(google.api.field_behavior) = OPTIONAL];
+  repeated string prerequisite_step_ids = 10
+      [(google.api.field_behavior) = OPTIONAL];
 }

 // A configurable parameter that replaces one or more fields in the template.
@ -400,10 +417,10 @@ message TemplateParameter {
  // A field is allowed to appear in at most one parameter's list of field
  // paths.
  //
-  // A field path is similar in syntax to a [google.protobuf.FieldMask][google.protobuf.FieldMask].
-  // For example, a field path that references the zone field of a workflow
-  // template's cluster selector would be specified as
-  // `placement.clusterSelector.zone`.
+  // A field path is similar in syntax to a
+  // [google.protobuf.FieldMask][google.protobuf.FieldMask]. For example, a
+  // field path that references the zone field of a workflow template's cluster
+  // selector would be specified as `placement.clusterSelector.zone`.
  //
  // Also, field paths can reference fields using the following syntax:
  //
@ -510,13 +527,15 @@ message WorkflowMetadata {
  int32 version = 2 [(google.api.field_behavior) = OUTPUT_ONLY];

  // Output only. The create cluster operation metadata.
-  ClusterOperation create_cluster = 3 [(google.api.field_behavior) = OUTPUT_ONLY];
+  ClusterOperation create_cluster = 3
+      [(google.api.field_behavior) = OUTPUT_ONLY];

  // Output only. The workflow graph.
  WorkflowGraph graph = 4 [(google.api.field_behavior) = OUTPUT_ONLY];

  // Output only. The delete cluster operation metadata.
-  ClusterOperation delete_cluster = 5 [(google.api.field_behavior) = OUTPUT_ONLY];
+  ClusterOperation delete_cluster = 5
+      [(google.api.field_behavior) = OUTPUT_ONLY];

  // Output only. The workflow state.
  State state = 6 [(google.api.field_behavior) = OUTPUT_ONLY];
@ -528,25 +547,33 @@ message WorkflowMetadata {
  map<string, string> parameters = 8;

  // Output only. Workflow start time.
-  google.protobuf.Timestamp start_time = 9 [(google.api.field_behavior) = OUTPUT_ONLY];
+  google.protobuf.Timestamp start_time = 9
+      [(google.api.field_behavior) = OUTPUT_ONLY];

  // Output only. Workflow end time.
-  google.protobuf.Timestamp end_time = 10 [(google.api.field_behavior) = OUTPUT_ONLY];
+  google.protobuf.Timestamp end_time = 10
+      [(google.api.field_behavior) = OUTPUT_ONLY];

  // Output only. The UUID of target cluster.
  string cluster_uuid = 11 [(google.api.field_behavior) = OUTPUT_ONLY];

-  // Output only. The timeout duration for the DAG of jobs, expressed in seconds (see
-  // [JSON representation of
+  // Output only. The timeout duration for the DAG of jobs, expressed in seconds
+  // (see [JSON representation of
  // duration](https://developers.google.com/protocol-buffers/docs/proto3#json)).
-  google.protobuf.Duration dag_timeout = 12 [(google.api.field_behavior) = OUTPUT_ONLY];
-
-  // Output only. DAG start time, only set for workflows with [dag_timeout][google.cloud.dataproc.v1.WorkflowMetadata.dag_timeout] when DAG
-  // begins.
-  google.protobuf.Timestamp dag_start_time = 13 [(google.api.field_behavior) = OUTPUT_ONLY];
-
-  // Output only. DAG end time, only set for workflows with [dag_timeout][google.cloud.dataproc.v1.WorkflowMetadata.dag_timeout] when DAG ends.
-  google.protobuf.Timestamp dag_end_time = 14 [(google.api.field_behavior) = OUTPUT_ONLY];
+  google.protobuf.Duration dag_timeout = 12
+      [(google.api.field_behavior) = OUTPUT_ONLY];
+
+  // Output only. DAG start time, only set for workflows with
+  // [dag_timeout][google.cloud.dataproc.v1.WorkflowMetadata.dag_timeout] when
+  // DAG begins.
+  google.protobuf.Timestamp dag_start_time = 13
+      [(google.api.field_behavior) = OUTPUT_ONLY];
+
+  // Output only. DAG end time, only set for workflows with
+  // [dag_timeout][google.cloud.dataproc.v1.WorkflowMetadata.dag_timeout] when
+  // DAG ends.
+  google.protobuf.Timestamp dag_end_time = 14
+      [(google.api.field_behavior) = OUTPUT_ONLY];
 }

 // The cluster operation triggered by a workflow.
@ -595,7 +622,8 @@ message WorkflowNode {
  string step_id = 1 [(google.api.field_behavior) = OUTPUT_ONLY];

  // Output only. Node's prerequisite nodes.
-  repeated string prerequisite_step_ids = 2 [(google.api.field_behavior) = OUTPUT_ONLY];
+  repeated string prerequisite_step_ids = 2
+      [(google.api.field_behavior) = OUTPUT_ONLY];

  // Output only. The job id; populated after the node enters RUNNING state.
  string job_id = 3 [(google.api.field_behavior) = OUTPUT_ONLY];
@ -771,7 +799,8 @@ message ListWorkflowTemplatesRequest {
 // A response to a request to list workflow templates in a project.
 message ListWorkflowTemplatesResponse {
  // Output only. WorkflowTemplates list.
-  repeated WorkflowTemplate templates = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
+  repeated WorkflowTemplate templates = 1
+      [(google.api.field_behavior) = OUTPUT_ONLY];

  // Output only. This token is included in the response if there are more
  // results to fetch. To fetch additional results, provide this value as the