feat: add support for new Dataproc features

1. Allow to change shielded config defaults for 2.1+ images
2. Support batches filtering in list API
3. Support Trino jobs on 2.1+ image clusters
4. Support batch TTL
5. Support custom staging bucket for batches
6. Expose approximate and current batches resources usage
7. Support Hudi and Trino components

PiperOrigin-RevId: 511550277
pull/782/head
Google APIs 2 years ago committed by Copybara-Service
parent 2029bd6047
commit 9111603ba0
  1. 6
      google/cloud/dataproc/v1/BUILD.bazel
  2. 65
      google/cloud/dataproc/v1/autoscaling_policies.proto
  3. 109
      google/cloud/dataproc/v1/batches.proto
  4. 91
      google/cloud/dataproc/v1/clusters.proto
  5. 10
      google/cloud/dataproc/v1/dataproc_v1.yaml
  6. 40
      google/cloud/dataproc/v1/jobs.proto
  7. 2
      google/cloud/dataproc/v1/node_groups.proto
  8. 4
      google/cloud/dataproc/v1/operations.proto
  9. 279
      google/cloud/dataproc/v1/shared.proto
  10. 107
      google/cloud/dataproc/v1/workflow_templates.proto

@ -145,6 +145,8 @@ go_gapic_library(
transport = "grpc+rest",
deps = [
":dataproc_go_proto",
"//google/cloud/location:location_go_proto",
"//google/iam/v1:iam_go_proto",
"//google/longrunning:longrunning_go_proto",
"@com_google_cloud_go_longrunning//:go_default_library",
"@com_google_cloud_go_longrunning//autogen:go_default_library",
@ -186,6 +188,10 @@ py_gapic_library(
rest_numeric_enums = True,
service_yaml = "dataproc_v1.yaml",
transport = "grpc+rest",
deps = [
"//google/iam/v1:iam_policy_py_proto",
"//google/iam/v1:policy_py_proto",
],
)
py_test(

@ -1,4 +1,4 @@
// Copyright 2021 Google LLC
// Copyright 2022 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
@ -36,10 +36,12 @@ option (google.api.resource_definition) = {
// Dataproc API.
service AutoscalingPolicyService {
option (google.api.default_host) = "dataproc.googleapis.com";
option (google.api.oauth_scopes) = "https://www.googleapis.com/auth/cloud-platform";
option (google.api.oauth_scopes) =
"https://www.googleapis.com/auth/cloud-platform";
// Creates new autoscaling policy.
rpc CreateAutoscalingPolicy(CreateAutoscalingPolicyRequest) returns (AutoscalingPolicy) {
rpc CreateAutoscalingPolicy(CreateAutoscalingPolicyRequest)
returns (AutoscalingPolicy) {
option (google.api.http) = {
post: "/v1/{parent=projects/*/locations/*}/autoscalingPolicies"
body: "policy"
@ -55,7 +57,8 @@ service AutoscalingPolicyService {
//
// Disabled check for update_mask, because all updates will be full
// replacements.
rpc UpdateAutoscalingPolicy(UpdateAutoscalingPolicyRequest) returns (AutoscalingPolicy) {
rpc UpdateAutoscalingPolicy(UpdateAutoscalingPolicyRequest)
returns (AutoscalingPolicy) {
option (google.api.http) = {
put: "/v1/{policy.name=projects/*/locations/*/autoscalingPolicies/*}"
body: "policy"
@ -68,7 +71,8 @@ service AutoscalingPolicyService {
}
// Retrieves autoscaling policy.
rpc GetAutoscalingPolicy(GetAutoscalingPolicyRequest) returns (AutoscalingPolicy) {
rpc GetAutoscalingPolicy(GetAutoscalingPolicyRequest)
returns (AutoscalingPolicy) {
option (google.api.http) = {
get: "/v1/{name=projects/*/locations/*/autoscalingPolicies/*}"
additional_bindings {
@ -79,7 +83,8 @@ service AutoscalingPolicyService {
}
// Lists autoscaling policies in the project.
rpc ListAutoscalingPolicies(ListAutoscalingPoliciesRequest) returns (ListAutoscalingPoliciesResponse) {
rpc ListAutoscalingPolicies(ListAutoscalingPoliciesRequest)
returns (ListAutoscalingPoliciesResponse) {
option (google.api.http) = {
get: "/v1/{parent=projects/*/locations/*}/autoscalingPolicies"
additional_bindings {
@ -91,7 +96,8 @@ service AutoscalingPolicyService {
// Deletes an autoscaling policy. It is an error to delete an autoscaling
// policy that is in use by one or more clusters.
rpc DeleteAutoscalingPolicy(DeleteAutoscalingPolicyRequest) returns (google.protobuf.Empty) {
rpc DeleteAutoscalingPolicy(DeleteAutoscalingPolicyRequest)
returns (google.protobuf.Empty) {
option (google.api.http) = {
delete: "/v1/{name=projects/*/locations/*/autoscalingPolicies/*}"
additional_bindings {
@ -132,14 +138,17 @@ message AutoscalingPolicy {
// Autoscaling algorithm for policy.
oneof algorithm {
BasicAutoscalingAlgorithm basic_algorithm = 3 [(google.api.field_behavior) = REQUIRED];
BasicAutoscalingAlgorithm basic_algorithm = 3
[(google.api.field_behavior) = REQUIRED];
}
// Required. Describes how the autoscaler will operate for primary workers.
InstanceGroupAutoscalingPolicyConfig worker_config = 4 [(google.api.field_behavior) = REQUIRED];
InstanceGroupAutoscalingPolicyConfig worker_config = 4
[(google.api.field_behavior) = REQUIRED];
// Optional. Describes how the autoscaler will operate for secondary workers.
InstanceGroupAutoscalingPolicyConfig secondary_worker_config = 5 [(google.api.field_behavior) = OPTIONAL];
InstanceGroupAutoscalingPolicyConfig secondary_worker_config = 5
[(google.api.field_behavior) = OPTIONAL];
// Optional. The labels to associate with this autoscaling policy.
// Label **keys** must contain 1 to 63 characters, and must conform to
@ -155,14 +164,16 @@ message AutoscalingPolicy {
message BasicAutoscalingAlgorithm {
oneof config {
// Required. YARN autoscaling configuration.
BasicYarnAutoscalingConfig yarn_config = 1 [(google.api.field_behavior) = REQUIRED];
BasicYarnAutoscalingConfig yarn_config = 1
[(google.api.field_behavior) = REQUIRED];
}
// Optional. Duration between scaling events. A scaling period starts after
// the update operation from the previous event has completed.
//
// Bounds: [2m, 1d]. Default: 2m.
google.protobuf.Duration cooldown_period = 2 [(google.api.field_behavior) = OPTIONAL];
google.protobuf.Duration cooldown_period = 2
[(google.api.field_behavior) = OPTIONAL];
}
// Basic autoscaling configurations for YARN.
@ -173,22 +184,23 @@ message BasicYarnAutoscalingConfig {
// downscaling operations.
//
// Bounds: [0s, 1d].
google.protobuf.Duration graceful_decommission_timeout = 5 [(google.api.field_behavior) = REQUIRED];
// Required. Fraction of average YARN pending memory in the last cooldown period
// for which to add workers. A scale-up factor of 1.0 will result in scaling
// up so that there is no pending memory remaining after the update (more
// aggressive scaling). A scale-up factor closer to 0 will result in a smaller
// magnitude of scaling up (less aggressive scaling).
// See [How autoscaling
google.protobuf.Duration graceful_decommission_timeout = 5
[(google.api.field_behavior) = REQUIRED];
// Required. Fraction of average YARN pending memory in the last cooldown
// period for which to add workers. A scale-up factor of 1.0 will result in
// scaling up so that there is no pending memory remaining after the update
// (more aggressive scaling). A scale-up factor closer to 0 will result in a
// smaller magnitude of scaling up (less aggressive scaling). See [How
// autoscaling
// works](https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/autoscaling#how_autoscaling_works)
// for more information.
//
// Bounds: [0.0, 1.0].
double scale_up_factor = 1 [(google.api.field_behavior) = REQUIRED];
// Required. Fraction of average YARN pending memory in the last cooldown period
// for which to remove workers. A scale-down factor of 1 will result in
// Required. Fraction of average YARN pending memory in the last cooldown
// period for which to remove workers. A scale-down factor of 1 will result in
// scaling down so that there is no available memory remaining after the
// update (more aggressive scaling). A scale-down factor of 0 disables
// removing workers, which can be beneficial for autoscaling a single job.
@ -206,7 +218,8 @@ message BasicYarnAutoscalingConfig {
// on any recommended change.
//
// Bounds: [0.0, 1.0]. Default: 0.0.
double scale_up_min_worker_fraction = 3 [(google.api.field_behavior) = OPTIONAL];
double scale_up_min_worker_fraction = 3
[(google.api.field_behavior) = OPTIONAL];
// Optional. Minimum scale-down threshold as a fraction of total cluster size
// before scaling occurs. For example, in a 20-worker cluster, a threshold of
@ -215,7 +228,8 @@ message BasicYarnAutoscalingConfig {
// on any recommended change.
//
// Bounds: [0.0, 1.0]. Default: 0.0.
double scale_down_min_worker_fraction = 4 [(google.api.field_behavior) = OPTIONAL];
double scale_down_min_worker_fraction = 4
[(google.api.field_behavior) = OPTIONAL];
}
// Configuration for the size bounds of an instance group, including its
@ -358,7 +372,8 @@ message ListAutoscalingPoliciesRequest {
// A response to a request to list autoscaling policies in a project.
message ListAutoscalingPoliciesResponse {
// Output only. Autoscaling policies list.
repeated AutoscalingPolicy policies = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
repeated AutoscalingPolicy policies = 1
[(google.api.field_behavior) = OUTPUT_ONLY];
// Output only. This token is included in the response if there are more
// results to fetch.

@ -1,4 +1,4 @@
// Copyright 2021 Google LLC
// Copyright 2022 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
@ -33,7 +33,8 @@ option java_package = "com.google.cloud.dataproc.v1";
// The BatchController provides methods to manage batch workloads.
service BatchController {
option (google.api.default_host) = "dataproc.googleapis.com";
option (google.api.oauth_scopes) = "https://www.googleapis.com/auth/cloud-platform";
option (google.api.oauth_scopes) =
"https://www.googleapis.com/auth/cloud-platform";
// Creates a batch workload that executes asynchronously.
rpc CreateBatch(CreateBatchRequest) returns (google.longrunning.Operation) {
@ -87,8 +88,8 @@ message CreateBatchRequest {
// Required. The batch to create.
Batch batch = 2 [(google.api.field_behavior) = REQUIRED];
// Optional. The ID to use for the batch, which will become the final component of
// the batch's resource name.
// Optional. The ID to use for the batch, which will become the final
// component of the batch's resource name.
//
// This value must be 4-63 characters. Valid characters are `/[a-z][0-9]-/`.
string batch_id = 3 [(google.api.field_behavior) = OPTIONAL];
@ -110,12 +111,12 @@ message CreateBatchRequest {
// A request to get the resource representation for a batch workload.
message GetBatchRequest {
// Required. The name of the batch to retrieve.
// Required. The fully qualified name of the batch to retrieve
// in the format
// "projects/PROJECT_ID/locations/DATAPROC_REGION/batches/BATCH_ID"
string name = 1 [
(google.api.field_behavior) = REQUIRED,
(google.api.resource_reference) = {
type: "dataproc.googleapis.com/Batch"
}
(google.api.resource_reference) = { type: "dataproc.googleapis.com/Batch" }
];
}
@ -137,6 +138,28 @@ message ListBatchesRequest {
// Optional. A page token received from a previous `ListBatches` call.
// Provide this token to retrieve the subsequent page.
string page_token = 3 [(google.api.field_behavior) = OPTIONAL];
// Optional. A filter for the batches to return in the response.
//
// A filter is a logical expression constraining the values of various fields
// in each batch resource. Filters are case sensitive, and may contain
// multiple clauses combined with logical operators (AND/OR).
// Supported fields are `batch_id`, `batch_uuid`, `state`, and `create_time`.
//
// e.g. `state = RUNNING and create_time < "2023-01-01T00:00:00Z"`
// filters for batches in state RUNNING that were created before 2023-01-01
//
// See https://google.aip.dev/assets/misc/ebnf-filtering.txt for a detailed
// description of the filter syntax and a list of supported comparisons.
string filter = 4 [(google.api.field_behavior) = OPTIONAL];
// Optional. Field(s) on which to sort the list of batches.
//
// Currently the only supported sort orders are unspecified (empty) and
// `create_time desc` to sort by most recently created batches first.
//
// See https://google.aip.dev/132#ordering for more details.
string order_by = 5 [(google.api.field_behavior) = OPTIONAL];
}
// A list of batch workloads.
@ -151,12 +174,12 @@ message ListBatchesResponse {
// A request to delete a batch workload.
message DeleteBatchRequest {
// Required. The name of the batch resource to delete.
// Required. The fully qualified name of the batch to retrieve
// in the format
// "projects/PROJECT_ID/locations/DATAPROC_REGION/batches/BATCH_ID"
string name = 1 [
(google.api.field_behavior) = REQUIRED,
(google.api.resource_reference) = {
type: "dataproc.googleapis.com/Batch"
}
(google.api.resource_reference) = { type: "dataproc.googleapis.com/Batch" }
];
}
@ -167,18 +190,6 @@ message Batch {
pattern: "projects/{project}/locations/{location}/batches/{batch}"
};
// Historical state information.
message StateHistory {
// Output only. The state of the batch at this point in history.
State state = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
// Output only. Details about the state at this point in history.
string state_message = 2 [(google.api.field_behavior) = OUTPUT_ONLY];
// Output only. The time when the batch entered the historical state.
google.protobuf.Timestamp state_start_time = 3 [(google.api.field_behavior) = OUTPUT_ONLY];
}
// The batch state.
enum State {
// The batch state is unknown.
@ -203,6 +214,19 @@ message Batch {
FAILED = 6;
}
// Historical state information.
message StateHistory {
// Output only. The state of the batch at this point in history.
State state = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
// Output only. Details about the state at this point in history.
string state_message = 2 [(google.api.field_behavior) = OUTPUT_ONLY];
// Output only. The time when the batch entered the historical state.
google.protobuf.Timestamp state_start_time = 3
[(google.api.field_behavior) = OUTPUT_ONLY];
}
// Output only. The resource name of the batch.
string name = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
@ -211,7 +235,8 @@ message Batch {
string uuid = 2 [(google.api.field_behavior) = OUTPUT_ONLY];
// Output only. The time when the batch was created.
google.protobuf.Timestamp create_time = 3 [(google.api.field_behavior) = OUTPUT_ONLY];
google.protobuf.Timestamp create_time = 3
[(google.api.field_behavior) = OUTPUT_ONLY];
// The application/framework-specific portion of the batch configuration.
oneof batch_config {
@ -239,7 +264,8 @@ message Batch {
string state_message = 10 [(google.api.field_behavior) = OUTPUT_ONLY];
// Output only. The time when the batch entered a current state.
google.protobuf.Timestamp state_time = 11 [(google.api.field_behavior) = OUTPUT_ONLY];
google.protobuf.Timestamp state_time = 11
[(google.api.field_behavior) = OUTPUT_ONLY];
// Output only. The email address of the user who created the batch.
string creator = 12 [(google.api.field_behavior) = OUTPUT_ONLY];
@ -257,13 +283,15 @@ message Batch {
RuntimeConfig runtime_config = 14 [(google.api.field_behavior) = OPTIONAL];
// Optional. Environment configuration for the batch execution.
EnvironmentConfig environment_config = 15 [(google.api.field_behavior) = OPTIONAL];
EnvironmentConfig environment_config = 15
[(google.api.field_behavior) = OPTIONAL];
// Output only. The resource name of the operation associated with this batch.
string operation = 16 [(google.api.field_behavior) = OUTPUT_ONLY];
// Output only. Historical state information for the batch.
repeated StateHistory state_history = 17 [(google.api.field_behavior) = OUTPUT_ONLY];
repeated StateHistory state_history = 17
[(google.api.field_behavior) = OUTPUT_ONLY];
}
// A configuration for running an
@ -271,8 +299,8 @@ message Batch {
// PySpark](https://spark.apache.org/docs/latest/api/python/getting_started/quickstart.html)
// batch workload.
message PySparkBatch {
// Required. The HCFS URI of the main Python file to use as the Spark driver. Must
// be a .py file.
// Required. The HCFS URI of the main Python file to use as the Spark driver.
// Must be a .py file.
string main_python_file_uri = 1 [(google.api.field_behavior) = REQUIRED];
// Optional. The arguments to pass to the driver. Do not include arguments
@ -298,7 +326,7 @@ message PySparkBatch {
repeated string archive_uris = 6 [(google.api.field_behavior) = OPTIONAL];
}
// A configuration for running an [Apache Spark](http://spark.apache.org/)
// A configuration for running an [Apache Spark](https://spark.apache.org/)
// batch workload.
message SparkBatch {
// The specification of the main method to call to drive the Spark
@ -310,8 +338,8 @@ message SparkBatch {
// Optional. The HCFS URI of the jar file that contains the main class.
string main_jar_file_uri = 1 [(google.api.field_behavior) = OPTIONAL];
// Optional. The name of the driver main class. The jar file that contains the class
// must be in the classpath or specified in `jar_file_uris`.
// Optional. The name of the driver main class. The jar file that contains
// the class must be in the classpath or specified in `jar_file_uris`.
string main_class = 2 [(google.api.field_behavior) = OPTIONAL];
}
@ -342,9 +370,9 @@ message SparkRBatch {
// Must be a `.R` or `.r` file.
string main_r_file_uri = 1 [(google.api.field_behavior) = REQUIRED];
// Optional. The arguments to pass to the Spark driver. Do not include arguments
// that can be set as batch properties, such as `--conf`, since a collision
// can occur that causes an incorrect batch submission.
// Optional. The arguments to pass to the Spark driver. Do not include
// arguments that can be set as batch properties, such as `--conf`, since a
// collision can occur that causes an incorrect batch submission.
repeated string args = 2 [(google.api.field_behavior) = OPTIONAL];
// Optional. HCFS URIs of files to be placed in the working directory of
@ -358,14 +386,17 @@ message SparkRBatch {
}
// A configuration for running
// [Apache Spark SQL](http://spark.apache.org/sql/) queries as a batch workload.
// [Apache Spark SQL](https://spark.apache.org/sql/) queries as a batch
// workload.
message SparkSqlBatch {
// Required. The HCFS URI of the script that contains Spark SQL queries to execute.
// Required. The HCFS URI of the script that contains Spark SQL queries to
// execute.
string query_file_uri = 1 [(google.api.field_behavior) = REQUIRED];
// Optional. Mapping of query variable names to values (equivalent to the
// Spark SQL command: `SET name="value";`).
map<string, string> query_variables = 2 [(google.api.field_behavior) = OPTIONAL];
map<string, string> query_variables = 2
[(google.api.field_behavior) = OPTIONAL];
// Optional. HCFS URIs of jar files to be added to the Spark CLASSPATH.
repeated string jar_file_uris = 3 [(google.api.field_behavior) = OPTIONAL];

@ -30,14 +30,6 @@ option go_package = "cloud.google.com/go/dataproc/apiv1/dataprocpb;dataprocpb";
option java_multiple_files = true;
option java_outer_classname = "ClustersProto";
option java_package = "com.google.cloud.dataproc.v1";
option (google.api.resource_definition) = {
type: "container.googleapis.com/Cluster"
pattern: "projects/{project}/locations/{location}/clusters/{cluster}"
};
option (google.api.resource_definition) = {
type: "metastore.googleapis.com/Service"
pattern: "projects/{project}/locations/{location}/services/{service}"
};
// The ClusterControllerService provides methods to manage clusters
// of Compute Engine instances.
@ -174,12 +166,14 @@ message Cluster {
// Optional. The cluster config for a cluster of Compute Engine Instances.
// Note that Dataproc may set default values, and values may change
// when clusters are updated.
//
// Exactly one of ClusterConfig or VirtualClusterConfig must be specified.
ClusterConfig config = 3 [(google.api.field_behavior) = OPTIONAL];
// Optional. The virtual cluster config is used when creating a Dataproc
// cluster that does not directly control the underlying compute resources,
// for example, when creating a [Dataproc-on-GKE
// cluster](https://cloud.google.com/dataproc/docs/guides/dpgke/dataproc-gke).
// cluster](https://cloud.google.com/dataproc/docs/guides/dpgke/dataproc-gke-overview).
// Dataproc may set default values, and values may change when
// clusters are updated. Exactly one of
// [config][google.cloud.dataproc.v1.Cluster.config] or
@ -316,7 +310,7 @@ message ClusterConfig {
// The Dataproc cluster config for a cluster that does not directly control the
// underlying compute resources, such as a [Dataproc-on-GKE
// cluster](https://cloud.google.com/dataproc/docs/guides/dpgke/dataproc-gke).
// cluster](https://cloud.google.com/dataproc/docs/guides/dpgke/dataproc-gke-overview).
message VirtualClusterConfig {
// Optional. A Cloud Storage bucket used to stage job
// dependencies, config files, and job driver console output.
@ -414,17 +408,15 @@ message GceClusterConfig {
BIDIRECTIONAL = 3;
}
// Optional. The zone where the Compute Engine cluster will be located.
// On a create request, it is required in the "global" region. If omitted
// in a non-global Dataproc region, the service will pick a zone in the
// corresponding Compute Engine region. On a get request, zone will
// always be present.
// Optional. The Compute Engine zone where the Dataproc cluster will be
// located. If omitted, the service will pick a zone in the cluster's Compute
// Engine region. On a get request, zone will always be present.
//
// A full URL, partial URI, or short name are valid. Examples:
//
// * `https://www.googleapis.com/compute/v1/projects/[project_id]/zones/[zone]`
// * `projects/[project_id]/zones/[zone]`
// * `us-central1-f`
// * `[zone]`
string zone_uri = 1 [(google.api.field_behavior) = OPTIONAL];
// Optional. The Compute Engine network to be used for machine
@ -436,8 +428,8 @@ message GceClusterConfig {
//
// A full URL, partial URI, or short name are valid. Examples:
//
// * `https://www.googleapis.com/compute/v1/projects/[project_id]/regions/global/default`
// * `projects/[project_id]/regions/global/default`
// * `https://www.googleapis.com/compute/v1/projects/[project_id]/global/networks/default`
// * `projects/[project_id]/global/networks/default`
// * `default`
string network_uri = 2 [(google.api.field_behavior) = OPTIONAL];
@ -446,8 +438,8 @@ message GceClusterConfig {
//
// A full URL, partial URI, or short name are valid. Examples:
//
// * `https://www.googleapis.com/compute/v1/projects/[project_id]/regions/us-east1/subnetworks/sub0`
// * `projects/[project_id]/regions/us-east1/subnetworks/sub0`
// * `https://www.googleapis.com/compute/v1/projects/[project_id]/regions/[region]/subnetworks/sub0`
// * `projects/[project_id]/regions/[region]/subnetworks/sub0`
// * `sub0`
string subnetwork_uri = 6 [(google.api.field_behavior) = OPTIONAL];
@ -457,7 +449,7 @@ message GceClusterConfig {
// instance. This `internal_ip_only` restriction can only be enabled for
// subnetwork enabled networks, and all off-cluster dependencies must be
// configured to be accessible without external IP addresses.
bool internal_ip_only = 7 [(google.api.field_behavior) = OPTIONAL];
optional bool internal_ip_only = 7 [(google.api.field_behavior) = OPTIONAL];
// Optional. The type of IPv6 access for a cluster.
PrivateIpv6GoogleAccess private_ipv6_google_access = 12
@ -533,8 +525,8 @@ message NodeGroupAffinity {
//
// A full URL, partial URI, or node group name are valid. Examples:
//
// * `https://www.googleapis.com/compute/v1/projects/[project_id]/zones/us-central1-a/nodeGroups/node-group-1`
// * `projects/[project_id]/zones/us-central1-a/nodeGroups/node-group-1`
// * `https://www.googleapis.com/compute/v1/projects/[project_id]/zones/[zone]/nodeGroups/node-group-1`
// * `projects/[project_id]/zones/[zone]/nodeGroups/node-group-1`
// * `node-group-1`
string node_group_uri = 1 [(google.api.field_behavior) = REQUIRED];
}
@ -543,13 +535,14 @@ message NodeGroupAffinity {
// VMs](https://cloud.google.com/security/shielded-cloud/shielded-vm).
message ShieldedInstanceConfig {
// Optional. Defines whether instances have Secure Boot enabled.
bool enable_secure_boot = 1 [(google.api.field_behavior) = OPTIONAL];
optional bool enable_secure_boot = 1 [(google.api.field_behavior) = OPTIONAL];
// Optional. Defines whether instances have the vTPM enabled.
bool enable_vtpm = 2 [(google.api.field_behavior) = OPTIONAL];
optional bool enable_vtpm = 2 [(google.api.field_behavior) = OPTIONAL];
// Optional. Defines whether instances have integrity monitoring enabled.
bool enable_integrity_monitoring = 3 [(google.api.field_behavior) = OPTIONAL];
optional bool enable_integrity_monitoring = 3
[(google.api.field_behavior) = OPTIONAL];
}
// Confidential Instance Config for clusters using [Confidential
@ -613,14 +606,14 @@ message InstanceGroupConfig {
//
// Image examples:
//
// * `https://www.googleapis.com/compute/beta/projects/[project_id]/global/images/[image-id]`
// * `https://www.googleapis.com/compute/v1/projects/[project_id]/global/images/[image-id]`
// * `projects/[project_id]/global/images/[image-id]`
// * `image-id`
//
// Image family examples. Dataproc will use the most recent
// image from the family:
//
// * `https://www.googleapis.com/compute/beta/projects/[project_id]/global/images/family/[custom-image-family-name]`
// * `https://www.googleapis.com/compute/v1/projects/[project_id]/global/images/family/[custom-image-family-name]`
// * `projects/[project_id]/global/images/family/[custom-image-family-name]`
//
// If the URI is unspecified, it will be inferred from
@ -631,8 +624,8 @@ message InstanceGroupConfig {
//
// A full URL, partial URI, or short name are valid. Examples:
//
// * `https://www.googleapis.com/compute/v1/projects/[project_id]/zones/us-east1-a/machineTypes/n1-standard-2`
// * `projects/[project_id]/zones/us-east1-a/machineTypes/n1-standard-2`
// * `https://www.googleapis.com/compute/v1/projects/[project_id]/zones/[zone]/machineTypes/n1-standard-2`
// * `projects/[project_id]/zones/[zone]/machineTypes/n1-standard-2`
// * `n1-standard-2`
//
// **Auto Zone Exception**: If you are using the Dataproc
@ -693,12 +686,12 @@ message AcceleratorConfig {
// Full URL, partial URI, or short name of the accelerator type resource to
// expose to this instance. See
// [Compute Engine
// AcceleratorTypes](https://cloud.google.com/compute/docs/reference/beta/acceleratorTypes).
// AcceleratorTypes](https://cloud.google.com/compute/docs/reference/v1/acceleratorTypes).
//
// Examples:
//
// * `https://www.googleapis.com/compute/beta/projects/[project_id]/zones/us-east1-a/acceleratorTypes/nvidia-tesla-k80`
// * `projects/[project_id]/zones/us-east1-a/acceleratorTypes/nvidia-tesla-k80`
// * `https://www.googleapis.com/compute/v1/projects/[project_id]/zones/[zone]/acceleratorTypes/nvidia-tesla-k80`
// * `projects/[project_id]/zones/[zone]/acceleratorTypes/nvidia-tesla-k80`
// * `nvidia-tesla-k80`
//
// **Auto Zone Exception**: If you are using the Dataproc
@ -730,6 +723,9 @@ message DiskConfig {
// If one or more SSDs are attached, this runtime bulk
// data is spread across them, and the boot disk contains only basic
// config and installed binaries.
//
// Note: Local SSD options may vary by machine type and number of vCPUs
// selected.
int32 num_local_ssds = 2 [(google.api.field_behavior) = OPTIONAL];
// Optional. Interface type of local SSDs (default is "scsi").
@ -1065,6 +1061,18 @@ message MetastoreConfig {
];
}
// Contains cluster daemon metrics, such as HDFS and YARN stats.
//
// **Beta Feature**: This report is available for testing purposes only. It may
// be changed before final release.
message ClusterMetrics {
// The HDFS metrics.
map<string, int64> hdfs_metrics = 1;
// YARN metrics.
map<string, int64> yarn_metrics = 2;
}
// Dataproc metric config.
message DataprocMetricConfig {
// A source for the collection of Dataproc OSS metrics (see [available OSS
@ -1094,6 +1102,9 @@ message DataprocMetricConfig {
// Hiveserver2 metric source.
HIVESERVER2 = 6;
// hivemetastore metric source
HIVEMETASTORE = 7;
}
// A Dataproc OSS metric.
@ -1141,18 +1152,6 @@ message DataprocMetricConfig {
repeated Metric metrics = 1 [(google.api.field_behavior) = REQUIRED];
}
// Contains cluster daemon metrics, such as HDFS and YARN stats.
//
// **Beta Feature**: This report is available for testing purposes only. It may
// be changed before final release.
message ClusterMetrics {
// The HDFS metrics.
map<string, int64> hdfs_metrics = 1;
// The YARN metrics.
map<string, int64> yarn_metrics = 2;
}
// A request to create a cluster.
message CreateClusterRequest {
// Required. The ID of the Google Cloud Platform project that the cluster
@ -1199,7 +1198,7 @@ message UpdateClusterRequest {
// Required. The changes to the cluster.
Cluster cluster = 3 [(google.api.field_behavior) = REQUIRED];
// Optional. Timeout for graceful YARN decomissioning. Graceful
// Optional. Timeout for graceful YARN decommissioning. Graceful
// decommissioning allows removing nodes from the cluster without
// interrupting jobs in progress. Timeout specifies how long to wait for jobs
// in progress to finish before forcefully removing nodes (and potentially

@ -10,6 +10,8 @@ apis:
- name: google.cloud.dataproc.v1.JobController
- name: google.cloud.dataproc.v1.NodeGroupController
- name: google.cloud.dataproc.v1.WorkflowTemplateService
- name: google.iam.v1.IAMPolicy
- name: google.longrunning.Operations
types:
- name: google.cloud.dataproc.v1.BatchOperationMetadata
@ -97,12 +99,20 @@ http:
body: '*'
- selector: google.longrunning.Operations.CancelOperation
post: '/v1/{name=projects/*/regions/*/operations/*}:cancel'
additional_bindings:
- post: '/v1/{name=projects/*/locations/*/operations/*}:cancel'
- selector: google.longrunning.Operations.DeleteOperation
delete: '/v1/{name=projects/*/regions/*/operations/*}'
additional_bindings:
- delete: '/v1/{name=projects/*/locations/*/operations/*}'
- selector: google.longrunning.Operations.GetOperation
get: '/v1/{name=projects/*/regions/*/operations/*}'
additional_bindings:
- get: '/v1/{name=projects/*/locations/*/operations/*}'
- selector: google.longrunning.Operations.ListOperations
get: '/v1/{name=projects/*/regions/*/operations}'
additional_bindings:
- get: '/v1/{name=projects/*/locations/*/operations}'
authentication:
rules:

@ -476,6 +476,43 @@ message PrestoJob {
LoggingConfig logging_config = 7 [(google.api.field_behavior) = OPTIONAL];
}
// A Dataproc job for running [Trino](https://trino.io/) queries.
// **IMPORTANT**: The [Dataproc Trino Optional
// Component](https://cloud.google.com/dataproc/docs/concepts/components/trino)
// must be enabled when the cluster is created to submit a Trino job to the
// cluster.
message TrinoJob {
// Required. The sequence of Trino queries to execute, specified as
// either an HCFS file URI or as a list of queries.
oneof queries {
// The HCFS URI of the script that contains SQL queries.
string query_file_uri = 1;
// A list of queries.
QueryList query_list = 2;
}
// Optional. Whether to continue executing queries if a query fails.
// The default value is `false`. Setting to `true` can be useful when
// executing independent parallel queries.
bool continue_on_failure = 3 [(google.api.field_behavior) = OPTIONAL];
// Optional. The format in which query output will be displayed. See the
// Trino documentation for supported output formats
string output_format = 4 [(google.api.field_behavior) = OPTIONAL];
// Optional. Trino client tags to attach to this query
repeated string client_tags = 5 [(google.api.field_behavior) = OPTIONAL];
// Optional. A mapping of property names to values. Used to set Trino
// [session properties](https://trino.io/docs/current/sql/set-session.html)
// Equivalent to using the --session flag in the Trino CLI
map<string, string> properties = 6 [(google.api.field_behavior) = OPTIONAL];
// Optional. The runtime log config for job execution.
LoggingConfig logging_config = 7 [(google.api.field_behavior) = OPTIONAL];
}
// Dataproc job config.
message JobPlacement {
// Required. The name of the cluster where the job will be submitted.
@ -680,6 +717,9 @@ message Job {
// Optional. Job is a Presto job.
PrestoJob presto_job = 23 [(google.api.field_behavior) = OPTIONAL];
// Optional. Job is a Trino job.
TrinoJob trino_job = 28 [(google.api.field_behavior) = OPTIONAL];
}
// Output only. The job status. Additional application-specific

@ -144,7 +144,7 @@ message ResizeNodeGroupRequest {
// underscores (_), and hyphens (-). The maximum length is 40 characters.
string request_id = 3 [(google.api.field_behavior) = OPTIONAL];
// Optional. Timeout for graceful YARN decommissioning. [Graceful
// Optional. Timeout for graceful YARN decomissioning. [Graceful
// decommissioning]
// (https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/scaling-clusters#graceful_decommissioning)
// allows the removal of nodes from the Compute Engine node group

@ -117,6 +117,10 @@ message ClusterOperationMetadata {
// Output only. Errors encountered during operation execution.
repeated string warnings = 14 [(google.api.field_behavior) = OUTPUT_ONLY];
// Output only. Child operation ids
repeated string child_operation_ids = 15
[(google.api.field_behavior) = OUTPUT_ONLY];
}
// Metadata describing the node group operation.

@ -17,23 +17,34 @@ syntax = "proto3";
package google.cloud.dataproc.v1;
import "google/api/field_behavior.proto";
import "google/api/resource.proto";
import "google/protobuf/duration.proto";
import "google/protobuf/timestamp.proto";
option go_package = "cloud.google.com/go/dataproc/apiv1/dataprocpb;dataprocpb";
option java_multiple_files = true;
option java_outer_classname = "SharedProto";
option java_package = "com.google.cloud.dataproc.v1";
option (google.api.resource_definition) = {
type: "container.googleapis.com/Cluster"
pattern: "projects/{project}/locations/{location}/clusters/{cluster}"
};
option (google.api.resource_definition) = {
type: "metastore.googleapis.com/Service"
pattern: "projects/{project}/locations/{location}/services/{service}"
};
// Runtime configuration for a workload.
message RuntimeConfig {
// Optional. Version of the batch runtime.
string version = 1 [(google.api.field_behavior) = OPTIONAL];
// Optional. Optional custom container image for the job runtime environment. If
// not specified, a default container image will be used.
// Optional. Optional custom container image for the job runtime environment.
// If not specified, a default container image will be used.
string container_image = 2 [(google.api.field_behavior) = OPTIONAL];
// Optional. A mapping of property names to values, which are used to configure workload
// execution.
// Optional. A mapping of property names to values, which are used to
// configure workload execution.
map<string, string> properties = 3 [(google.api.field_behavior) = OPTIONAL];
}
@ -43,7 +54,8 @@ message EnvironmentConfig {
ExecutionConfig execution_config = 1 [(google.api.field_behavior) = OPTIONAL];
// Optional. Peripherals configuration that workload has access to.
PeripheralsConfig peripherals_config = 2 [(google.api.field_behavior) = OPTIONAL];
PeripheralsConfig peripherals_config = 2
[(google.api.field_behavior) = OPTIONAL];
}
// Execution configuration for a workload.
@ -65,19 +77,39 @@ message ExecutionConfig {
// Optional. The Cloud KMS key to use for encryption.
string kms_key = 7 [(google.api.field_behavior) = OPTIONAL];
// Optional. The duration after which the workload will be terminated.
// When the workload passes this ttl, it will be unconditionally killed
// without waiting for ongoing work to finish.
// Minimum value is 10 minutes; maximum value is 14 days (see JSON
// representation of
// [Duration](https://developers.google.com/protocol-buffers/docs/proto3#json)).
// If both ttl and idle_ttl are specified, the conditions are treated as
// and OR: the workload will be terminated when it has been idle for idle_ttl
// or when the ttl has passed, whichever comes first.
// If ttl is not specified for a session, it defaults to 24h.
google.protobuf.Duration ttl = 9 [(google.api.field_behavior) = OPTIONAL];
// Optional. A Cloud Storage bucket used to stage workload dependencies,
// config files, and store workload output and other ephemeral data, such as
// Spark history files. If you do not specify a staging bucket, Cloud Dataproc
// will determine a Cloud Storage location according to the region where your
// workload is running, and then create and manage project-level, per-location
// staging and temporary buckets.
// **This field requires a Cloud Storage bucket name, not a `gs://...` URI to
// a Cloud Storage bucket.**
string staging_bucket = 10 [(google.api.field_behavior) = OPTIONAL];
}
// Spark History Server configuration for the workload.
message SparkHistoryServerConfig {
// Optional. Resource name of an existing Dataproc Cluster to act as a Spark History
// Server for the workload.
// Optional. Resource name of an existing Dataproc Cluster to act as a Spark
// History Server for the workload.
//
// Example:
//
// * `projects/[project_id]/regions/[region]/clusters/[cluster_name]`
string dataproc_cluster = 1 [
(google.api.field_behavior) = OPTIONAL
];
string dataproc_cluster = 1 [(google.api.field_behavior) = OPTIONAL];
}
// Auxiliary services configuration for a workload.
@ -88,58 +120,111 @@ message PeripheralsConfig {
//
// * `projects/[project_id]/locations/[region]/services/[service_id]`
string metastore_service = 1 [
(google.api.field_behavior) = OPTIONAL
(google.api.field_behavior) = OPTIONAL,
(google.api.resource_reference) = {
type: "metastore.googleapis.com/Service"
}
];
// Optional. The Spark History Server configuration for the workload.
SparkHistoryServerConfig spark_history_server_config = 2 [(google.api.field_behavior) = OPTIONAL];
SparkHistoryServerConfig spark_history_server_config = 2
[(google.api.field_behavior) = OPTIONAL];
}
// Runtime information about workload execution.
message RuntimeInfo {
// Output only. Map of remote access endpoints (such as web interfaces and APIs) to their
// URIs.
// Output only. Map of remote access endpoints (such as web interfaces and
// APIs) to their URIs.
map<string, string> endpoints = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
// Output only. A URI pointing to the location of the stdout and stderr of the workload.
// Output only. A URI pointing to the location of the stdout and stderr of the
// workload.
string output_uri = 2 [(google.api.field_behavior) = OUTPUT_ONLY];
// Output only. A URI pointing to the location of the diagnostics tarball.
string diagnostic_output_uri = 3 [(google.api.field_behavior) = OUTPUT_ONLY];
// Output only. Approximate workload resource usage calculated after workload
// finishes (see [Dataproc Serverless pricing]
// (https://cloud.google.com/dataproc-serverless/pricing)).
UsageMetrics approximate_usage = 6
[(google.api.field_behavior) = OUTPUT_ONLY];
// Output only. Snapshot of current workload resource usage.
UsageSnapshot current_usage = 7 [(google.api.field_behavior) = OUTPUT_ONLY];
}
// Usage metrics represent approximate total resources consumed by a workload.
message UsageMetrics {
// Optional. DCU (Dataproc Compute Units) usage in (`milliDCU` x `seconds`)
// (see [Dataproc Serverless pricing]
// (https://cloud.google.com/dataproc-serverless/pricing)).
int64 milli_dcu_seconds = 1 [(google.api.field_behavior) = OPTIONAL];
// Optional. Shuffle storage usage in (`GB` x `seconds`) (see
// [Dataproc Serverless pricing]
// (https://cloud.google.com/dataproc-serverless/pricing)).
int64 shuffle_storage_gb_seconds = 2 [(google.api.field_behavior) = OPTIONAL];
}
// The usage snaphot represents the resources consumed by a workload at a
// specified time.
message UsageSnapshot {
// Optional. Milli (one-thousandth) Dataproc Compute Units (DCUs) (see
// [Dataproc Serverless pricing]
// (https://cloud.google.com/dataproc-serverless/pricing)).
int64 milli_dcu = 1 [(google.api.field_behavior) = OPTIONAL];
// Optional. Shuffle Storage in gigabytes (GB). (see [Dataproc Serverless
// pricing] (https://cloud.google.com/dataproc-serverless/pricing))
int64 shuffle_storage_gb = 2 [(google.api.field_behavior) = OPTIONAL];
// Optional. The timestamp of the usage snapshot.
google.protobuf.Timestamp snapshot_time = 3
[(google.api.field_behavior) = OPTIONAL];
}
// The cluster's GKE config.
message GkeClusterConfig {
// Optional. A target GKE cluster to deploy to. It must be in the same project and
// region as the Dataproc cluster (the GKE cluster can be zonal or regional).
// Format: 'projects/{project}/locations/{location}/clusters/{cluster_id}'
// Optional. A target GKE cluster to deploy to. It must be in the same project
// and region as the Dataproc cluster (the GKE cluster can be zonal or
// regional). Format:
// 'projects/{project}/locations/{location}/clusters/{cluster_id}'
string gke_cluster_target = 2 [
(google.api.field_behavior) = OPTIONAL
(google.api.field_behavior) = OPTIONAL,
(google.api.resource_reference) = {
type: "container.googleapis.com/Cluster"
}
];
// Optional. GKE NodePools where workloads will be scheduled. At least one node pool
// must be assigned the 'default' role. Each role can be given to only a
// single NodePoolTarget. All NodePools must have the same location settings.
// If a nodePoolTarget is not specified, Dataproc constructs a default
// nodePoolTarget.
repeated GkeNodePoolTarget node_pool_target = 3 [(google.api.field_behavior) = OPTIONAL];
// Optional. GKE node pools where workloads will be scheduled. At least one
// node pool must be assigned the `DEFAULT`
// [GkeNodePoolTarget.Role][google.cloud.dataproc.v1.GkeNodePoolTarget.Role].
// If a `GkeNodePoolTarget` is not specified, Dataproc constructs a `DEFAULT`
// `GkeNodePoolTarget`. Each role can be given to only one
// `GkeNodePoolTarget`. All node pools must have the same location settings.
repeated GkeNodePoolTarget node_pool_target = 3
[(google.api.field_behavior) = OPTIONAL];
}
// The configuration for running the Dataproc cluster on Kubernetes.
message KubernetesClusterConfig {
// Optional. A namespace within the Kubernetes cluster to deploy into. If this namespace
// does not exist, it is created. If it exists, Dataproc
// verifies that another Dataproc VirtualCluster is not installed
// into it. If not specified, the name of the Dataproc Cluster is used.
// Optional. A namespace within the Kubernetes cluster to deploy into. If this
// namespace does not exist, it is created. If it exists, Dataproc verifies
// that another Dataproc VirtualCluster is not installed into it. If not
// specified, the name of the Dataproc Cluster is used.
string kubernetes_namespace = 1 [(google.api.field_behavior) = OPTIONAL];
oneof config {
// Required. The configuration for running the Dataproc cluster on GKE.
GkeClusterConfig gke_cluster_config = 2 [(google.api.field_behavior) = REQUIRED];
GkeClusterConfig gke_cluster_config = 2
[(google.api.field_behavior) = REQUIRED];
}
// Optional. The software configuration for this Dataproc cluster running on Kubernetes.
KubernetesSoftwareConfig kubernetes_software_config = 3 [(google.api.field_behavior) = OPTIONAL];
// Optional. The software configuration for this Dataproc cluster running on
// Kubernetes.
KubernetesSoftwareConfig kubernetes_software_config = 3
[(google.api.field_behavior) = OPTIONAL];
}
// The software configuration for this Dataproc cluster running on Kubernetes.
@ -163,54 +248,60 @@ message KubernetesSoftwareConfig {
map<string, string> properties = 2;
}
// GKE NodePools that Dataproc workloads run on.
// GKE node pools that Dataproc workloads run on.
message GkeNodePoolTarget {
// `Role` specifies whose tasks will run on the NodePool. The roles can be
// specific to workloads. Exactly one GkeNodePoolTarget within the
// VirtualCluster must have 'default' role, which is used to run all workloads
// that are not associated with a NodePool.
// `Role` specifies the tasks that will run on the node pool. Roles can be
// specific to workloads. Exactly one
// [GkeNodePoolTarget][google.cloud.dataproc.v1.GkeNodePoolTarget] within the
// virtual cluster must have the `DEFAULT` role, which is used to run all
// workloads that are not associated with a node pool.
enum Role {
// Role is unspecified.
ROLE_UNSPECIFIED = 0;
// Any roles that are not directly assigned to a NodePool run on the
// `default` role's NodePool.
// At least one node pool must have the `DEFAULT` role.
// Work assigned to a role that is not associated with a node pool
// is assigned to the node pool with the `DEFAULT` role. For example,
// work assigned to the `CONTROLLER` role will be assigned to the node pool
// with the `DEFAULT` role if no node pool has the `CONTROLLER` role.
DEFAULT = 1;
// Run controllers and webhooks.
// Run work associated with the Dataproc control plane (for example,
// controllers and webhooks). Very low resource requirements.
CONTROLLER = 2;
// Run spark driver.
// Run work associated with a Spark driver of a job.
SPARK_DRIVER = 3;
// Run spark executors.
// Run work associated with a Spark executor of a job.
SPARK_EXECUTOR = 4;
}
// Required. The target GKE NodePool.
// Required. The target GKE node pool.
// Format:
// 'projects/{project}/locations/{location}/clusters/{cluster}/nodePools/{node_pool}'
string node_pool = 1 [
(google.api.field_behavior) = REQUIRED
];
string node_pool = 1 [(google.api.field_behavior) = REQUIRED];
// Required. The types of role for a GKE NodePool
// Required. The roles associated with the GKE node pool.
repeated Role roles = 2 [(google.api.field_behavior) = REQUIRED];
// Optional. The configuration for the GKE NodePool.
// Input only. The configuration for the GKE node pool.
//
// If specified, Dataproc attempts to create a NodePool with the
// If specified, Dataproc attempts to create a node pool with the
// specified shape. If one with the same name already exists, it is
// verified against all specified fields. If a field differs, the
// virtual cluster creation will fail.
//
// If omitted, any NodePool with the specified name is used. If a
// NodePool with the specified name does not exist, Dataproc create a NodePool
// with default values.
GkeNodePoolConfig node_pool_config = 3 [(google.api.field_behavior) = OPTIONAL];
// If omitted, any node pool with the specified name is used. If a
// node pool with the specified name does not exist, Dataproc create a
// node pool with default values.
//
// This is an input only field. It will not be returned by the API.
GkeNodePoolConfig node_pool_config = 3
[(google.api.field_behavior) = INPUT_ONLY];
}
// The configuration of a GKE NodePool used by a [Dataproc-on-GKE
// The configuration of a GKE node pool used by a [Dataproc-on-GKE
// cluster](https://cloud.google.com/dataproc/docs/concepts/jobs/dataproc-gke#create-a-dataproc-on-gke-cluster).
message GkeNodePoolConfig {
// Parameters that describe cluster nodes.
@ -219,19 +310,28 @@ message GkeNodePoolConfig {
// type](https://cloud.google.com/compute/docs/machine-types).
string machine_type = 1 [(google.api.field_behavior) = OPTIONAL];
// Optional. Whether the nodes are created as [preemptible VM
// instances](https://cloud.google.com/compute/docs/instances/preemptible).
bool preemptible = 10 [(google.api.field_behavior) = OPTIONAL];
// Optional. The number of local SSD disks to attach to the node, which is limited by
// the maximum number of disks allowable per zone (see [Adding Local
// SSDs](https://cloud.google.com/compute/docs/disks/local-ssd)).
// Optional. The number of local SSD disks to attach to the node, which is
// limited by the maximum number of disks allowable per zone (see [Adding
// Local SSDs](https://cloud.google.com/compute/docs/disks/local-ssd)).
int32 local_ssd_count = 7 [(google.api.field_behavior) = OPTIONAL];
// Optional. Whether the nodes are created as legacy [preemptible VM
// instances] (https://cloud.google.com/compute/docs/instances/preemptible).
// Also see
// [Spot][google.cloud.dataproc.v1.GkeNodePoolConfig.GkeNodeConfig.spot]
// VMs, preemptible VM instances without a maximum lifetime. Legacy and Spot
// preemptible nodes cannot be used in a node pool with the `CONTROLLER`
// [role]
// (/dataproc/docs/reference/rest/v1/projects.regions.clusters#role)
// or in the DEFAULT node pool if the CONTROLLER role is not assigned (the
// DEFAULT node pool will assume the CONTROLLER role).
bool preemptible = 10 [(google.api.field_behavior) = OPTIONAL];
// Optional. A list of [hardware
// accelerators](https://cloud.google.com/compute/docs/gpus) to attach to
// each node.
repeated GkeNodePoolAcceleratorConfig accelerators = 11 [(google.api.field_behavior) = OPTIONAL];
repeated GkeNodePoolAcceleratorConfig accelerators = 11
[(google.api.field_behavior) = OPTIONAL];
// Optional. [Minimum CPU
// platform](https://cloud.google.com/compute/docs/instances/specify-min-cpu-platform)
@ -239,26 +339,51 @@ message GkeNodePoolConfig {
// specified or a newer CPU platform. Specify the friendly names of CPU
// platforms, such as "Intel Haswell"` or Intel Sandy Bridge".
string min_cpu_platform = 13 [(google.api.field_behavior) = OPTIONAL];
// Optional. The [Customer Managed Encryption Key (CMEK)]
// (https://cloud.google.com/kubernetes-engine/docs/how-to/using-cmek)
// used to encrypt the boot disk attached to each node in the node pool.
// Specify the key using the following format:
// <code>projects/<var>KEY_PROJECT_ID</var>/locations/<var>LOCATION</var>/keyRings/<var>RING_NAME</var>/cryptoKeys/<var>KEY_NAME</var></code>.
string boot_disk_kms_key = 23 [(google.api.field_behavior) = OPTIONAL];
// Optional. Whether the nodes are created as [Spot VM instances]
// (https://cloud.google.com/compute/docs/instances/spot).
// Spot VMs are the latest update to legacy
// [preemptible
// VMs][google.cloud.dataproc.v1.GkeNodePoolConfig.GkeNodeConfig.preemptible].
// Spot VMs do not have a maximum lifetime. Legacy and Spot preemptible
// nodes cannot be used in a node pool with the `CONTROLLER`
// [role](/dataproc/docs/reference/rest/v1/projects.regions.clusters#role)
// or in the DEFAULT node pool if the CONTROLLER role is not assigned (the
// DEFAULT node pool will assume the CONTROLLER role).
bool spot = 32 [(google.api.field_behavior) = OPTIONAL];
}
// A GkeNodeConfigAcceleratorConfig represents a Hardware Accelerator request
// for a NodePool.
// for a node pool.
message GkeNodePoolAcceleratorConfig {
// The number of accelerator cards exposed to an instance.
int64 accelerator_count = 1;
// The accelerator type resource namename (see GPUs on Compute Engine).
string accelerator_type = 2;
// Size of partitions to create on the GPU. Valid values are described in
// the NVIDIA [mig user
// guide](https://docs.nvidia.com/datacenter/tesla/mig-user-guide/#partitioning).
string gpu_partition_size = 3;
}
// GkeNodePoolAutoscaling contains information the cluster autoscaler needs to
// adjust the size of the node pool to the current cluster usage.
message GkeNodePoolAutoscalingConfig {
// The minimum number of nodes in the NodePool. Must be >= 0 and <=
// The minimum number of nodes in the node pool. Must be >= 0 and <=
// max_node_count.
int32 min_node_count = 2;
// The maximum number of nodes in the NodePool. Must be >= min_node_count.
// The maximum number of nodes in the node pool. Must be >= min_node_count,
// and must be > 0.
// **Note:** Quota must be sufficient to scale up the cluster.
int32 max_node_count = 3;
}
@ -268,17 +393,21 @@ message GkeNodePoolConfig {
// Optional. The list of Compute Engine
// [zones](https://cloud.google.com/compute/docs/zones#available) where
// NodePool's nodes will be located.
// node pool nodes associated with a Dataproc on GKE virtual cluster
// will be located.
//
// **Note:** Currently, only one zone may be specified.
// **Note:** All node pools associated with a virtual cluster
// must be located in the same region as the virtual cluster, and they must
// be located in the same zone within that region.
//
// If a location is not specified during NodePool creation, Dataproc will
// choose a location.
// If a location is not specified during node pool creation, Dataproc on GKE
// will choose the zone.
repeated string locations = 13 [(google.api.field_behavior) = OPTIONAL];
// Optional. The autoscaler configuration for this NodePool. The autoscaler is enabled
// only when a valid configuration is present.
GkeNodePoolAutoscalingConfig autoscaling = 4 [(google.api.field_behavior) = OPTIONAL];
// Optional. The autoscaler configuration for this node pool. The autoscaler
// is enabled only when a valid configuration is present.
GkeNodePoolAutoscalingConfig autoscaling = 4
[(google.api.field_behavior) = OPTIONAL];
}
// Cluster components that can be activated.
@ -308,12 +437,18 @@ enum Component {
// The Hive Web HCatalog (the REST service for accessing HCatalog).
HIVE_WEBHCAT = 3;
// Hudi.
HUDI = 18;
// The Jupyter Notebook.
JUPYTER = 1;
// The Presto query engine.
PRESTO = 6;
// The Trino query engine.
TRINO = 17;
// The Ranger service.
RANGER = 12;

@ -1,4 +1,4 @@
// Copyright 2021 Google LLC
// Copyright 2022 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
@ -36,10 +36,12 @@ option java_package = "com.google.cloud.dataproc.v1";
// Dataproc API.
service WorkflowTemplateService {
option (google.api.default_host) = "dataproc.googleapis.com";
option (google.api.oauth_scopes) = "https://www.googleapis.com/auth/cloud-platform";
option (google.api.oauth_scopes) =
"https://www.googleapis.com/auth/cloud-platform";
// Creates new workflow template.
rpc CreateWorkflowTemplate(CreateWorkflowTemplateRequest) returns (WorkflowTemplate) {
rpc CreateWorkflowTemplate(CreateWorkflowTemplateRequest)
returns (WorkflowTemplate) {
option (google.api.http) = {
post: "/v1/{parent=projects/*/locations/*}/workflowTemplates"
body: "template"
@ -55,7 +57,8 @@ service WorkflowTemplateService {
//
// Can retrieve previously instantiated template by specifying optional
// version parameter.
rpc GetWorkflowTemplate(GetWorkflowTemplateRequest) returns (WorkflowTemplate) {
rpc GetWorkflowTemplate(GetWorkflowTemplateRequest)
returns (WorkflowTemplate) {
option (google.api.http) = {
get: "/v1/{name=projects/*/locations/*/workflowTemplates/*}"
additional_bindings {
@ -85,7 +88,8 @@ service WorkflowTemplateService {
// On successful completion,
// [Operation.response][google.longrunning.Operation.response] will be
// [Empty][google.protobuf.Empty].
rpc InstantiateWorkflowTemplate(InstantiateWorkflowTemplateRequest) returns (google.longrunning.Operation) {
rpc InstantiateWorkflowTemplate(InstantiateWorkflowTemplateRequest)
returns (google.longrunning.Operation) {
option (google.api.http) = {
post: "/v1/{name=projects/*/locations/*/workflowTemplates/*}:instantiate"
body: "*"
@ -105,7 +109,8 @@ service WorkflowTemplateService {
// Instantiates a template and begins execution.
//
// This method is equivalent to executing the sequence
// [CreateWorkflowTemplate][google.cloud.dataproc.v1.WorkflowTemplateService.CreateWorkflowTemplate], [InstantiateWorkflowTemplate][google.cloud.dataproc.v1.WorkflowTemplateService.InstantiateWorkflowTemplate],
// [CreateWorkflowTemplate][google.cloud.dataproc.v1.WorkflowTemplateService.CreateWorkflowTemplate],
// [InstantiateWorkflowTemplate][google.cloud.dataproc.v1.WorkflowTemplateService.InstantiateWorkflowTemplate],
// [DeleteWorkflowTemplate][google.cloud.dataproc.v1.WorkflowTemplateService.DeleteWorkflowTemplate].
//
// The returned Operation can be used to track execution of
@ -126,7 +131,9 @@ service WorkflowTemplateService {
// On successful completion,
// [Operation.response][google.longrunning.Operation.response] will be
// [Empty][google.protobuf.Empty].
rpc InstantiateInlineWorkflowTemplate(InstantiateInlineWorkflowTemplateRequest) returns (google.longrunning.Operation) {
rpc InstantiateInlineWorkflowTemplate(
InstantiateInlineWorkflowTemplateRequest)
returns (google.longrunning.Operation) {
option (google.api.http) = {
post: "/v1/{parent=projects/*/locations/*}/workflowTemplates:instantiateInline"
body: "template"
@ -144,7 +151,8 @@ service WorkflowTemplateService {
// Updates (replaces) workflow template. The updated template
// must contain version that matches the current server version.
rpc UpdateWorkflowTemplate(UpdateWorkflowTemplateRequest) returns (WorkflowTemplate) {
rpc UpdateWorkflowTemplate(UpdateWorkflowTemplateRequest)
returns (WorkflowTemplate) {
option (google.api.http) = {
put: "/v1/{template.name=projects/*/locations/*/workflowTemplates/*}"
body: "template"
@ -157,7 +165,8 @@ service WorkflowTemplateService {
}
// Lists workflows that match the specified filter in the request.
rpc ListWorkflowTemplates(ListWorkflowTemplatesRequest) returns (ListWorkflowTemplatesResponse) {
rpc ListWorkflowTemplates(ListWorkflowTemplatesRequest)
returns (ListWorkflowTemplatesResponse) {
option (google.api.http) = {
get: "/v1/{parent=projects/*/locations/*}/workflowTemplates"
additional_bindings {
@ -168,7 +177,8 @@ service WorkflowTemplateService {
}
// Deletes a workflow template. It does not cancel in-progress workflows.
rpc DeleteWorkflowTemplate(DeleteWorkflowTemplateRequest) returns (google.protobuf.Empty) {
rpc DeleteWorkflowTemplate(DeleteWorkflowTemplateRequest)
returns (google.protobuf.Empty) {
option (google.api.http) = {
delete: "/v1/{name=projects/*/locations/*/workflowTemplates/*}"
additional_bindings {
@ -214,10 +224,12 @@ message WorkflowTemplate {
int32 version = 3 [(google.api.field_behavior) = OPTIONAL];
// Output only. The time template was created.
google.protobuf.Timestamp create_time = 4 [(google.api.field_behavior) = OUTPUT_ONLY];
google.protobuf.Timestamp create_time = 4
[(google.api.field_behavior) = OUTPUT_ONLY];
// Output only. The time template was last updated.
google.protobuf.Timestamp update_time = 5 [(google.api.field_behavior) = OUTPUT_ONLY];
google.protobuf.Timestamp update_time = 5
[(google.api.field_behavior) = OUTPUT_ONLY];
// Optional. The labels to associate with this template. These labels
// will be propagated to all jobs and clusters created by the workflow
@ -234,7 +246,8 @@ message WorkflowTemplate {
map<string, string> labels = 6 [(google.api.field_behavior) = OPTIONAL];
// Required. WorkflowTemplate scheduling information.
WorkflowTemplatePlacement placement = 7 [(google.api.field_behavior) = REQUIRED];
WorkflowTemplatePlacement placement = 7
[(google.api.field_behavior) = REQUIRED];
// Required. The Directed Acyclic Graph of Jobs to submit.
repeated OrderedJob jobs = 8 [(google.api.field_behavior) = REQUIRED];
@ -242,7 +255,8 @@ message WorkflowTemplate {
// Optional. Template parameters whose values are substituted into the
// template. Values for parameters must be provided when the template is
// instantiated.
repeated TemplateParameter parameters = 9 [(google.api.field_behavior) = OPTIONAL];
repeated TemplateParameter parameters = 9
[(google.api.field_behavior) = OPTIONAL];
// Optional. Timeout duration for the DAG of jobs, expressed in seconds (see
// [JSON representation of
@ -254,7 +268,8 @@ message WorkflowTemplate {
// [managed
// cluster](/dataproc/docs/concepts/workflows/using-workflows#configuring_or_selecting_a_cluster),
// the cluster is deleted.
google.protobuf.Duration dag_timeout = 10 [(google.api.field_behavior) = OPTIONAL];
google.protobuf.Duration dag_timeout = 10
[(google.api.field_behavior) = OPTIONAL];
}
// Specifies workflow execution target.
@ -312,7 +327,8 @@ message ClusterSelector {
// Required. The cluster labels. Cluster must have all labels
// to match.
map<string, string> cluster_labels = 2 [(google.api.field_behavior) = REQUIRED];
map<string, string> cluster_labels = 2
[(google.api.field_behavior) = REQUIRED];
}
// A job executed by the workflow.
@ -322,8 +338,8 @@ message OrderedJob {
//
// The step id is used as prefix for job id, as job
// `goog-dataproc-workflow-step-id` label, and in
// [prerequisiteStepIds][google.cloud.dataproc.v1.OrderedJob.prerequisite_step_ids] field from other
// steps.
// [prerequisiteStepIds][google.cloud.dataproc.v1.OrderedJob.prerequisite_step_ids]
// field from other steps.
//
// The id must contain only letters (a-z, A-Z), numbers (0-9),
// underscores (_), and hyphens (-). Cannot begin or end with underscore
@ -374,7 +390,8 @@ message OrderedJob {
// Optional. The optional list of prerequisite job step_ids.
// If not specified, the job will start at the beginning of workflow.
repeated string prerequisite_step_ids = 10 [(google.api.field_behavior) = OPTIONAL];
repeated string prerequisite_step_ids = 10
[(google.api.field_behavior) = OPTIONAL];
}
// A configurable parameter that replaces one or more fields in the template.
@ -400,10 +417,10 @@ message TemplateParameter {
// A field is allowed to appear in at most one parameter's list of field
// paths.
//
// A field path is similar in syntax to a [google.protobuf.FieldMask][google.protobuf.FieldMask].
// For example, a field path that references the zone field of a workflow
// template's cluster selector would be specified as
// `placement.clusterSelector.zone`.
// A field path is similar in syntax to a
// [google.protobuf.FieldMask][google.protobuf.FieldMask]. For example, a
// field path that references the zone field of a workflow template's cluster
// selector would be specified as `placement.clusterSelector.zone`.
//
// Also, field paths can reference fields using the following syntax:
//
@ -510,13 +527,15 @@ message WorkflowMetadata {
int32 version = 2 [(google.api.field_behavior) = OUTPUT_ONLY];
// Output only. The create cluster operation metadata.
ClusterOperation create_cluster = 3 [(google.api.field_behavior) = OUTPUT_ONLY];
ClusterOperation create_cluster = 3
[(google.api.field_behavior) = OUTPUT_ONLY];
// Output only. The workflow graph.
WorkflowGraph graph = 4 [(google.api.field_behavior) = OUTPUT_ONLY];
// Output only. The delete cluster operation metadata.
ClusterOperation delete_cluster = 5 [(google.api.field_behavior) = OUTPUT_ONLY];
ClusterOperation delete_cluster = 5
[(google.api.field_behavior) = OUTPUT_ONLY];
// Output only. The workflow state.
State state = 6 [(google.api.field_behavior) = OUTPUT_ONLY];
@ -528,25 +547,33 @@ message WorkflowMetadata {
map<string, string> parameters = 8;
// Output only. Workflow start time.
google.protobuf.Timestamp start_time = 9 [(google.api.field_behavior) = OUTPUT_ONLY];
google.protobuf.Timestamp start_time = 9
[(google.api.field_behavior) = OUTPUT_ONLY];
// Output only. Workflow end time.
google.protobuf.Timestamp end_time = 10 [(google.api.field_behavior) = OUTPUT_ONLY];
google.protobuf.Timestamp end_time = 10
[(google.api.field_behavior) = OUTPUT_ONLY];
// Output only. The UUID of target cluster.
string cluster_uuid = 11 [(google.api.field_behavior) = OUTPUT_ONLY];
// Output only. The timeout duration for the DAG of jobs, expressed in seconds (see
// [JSON representation of
// Output only. The timeout duration for the DAG of jobs, expressed in seconds
// (see [JSON representation of
// duration](https://developers.google.com/protocol-buffers/docs/proto3#json)).
google.protobuf.Duration dag_timeout = 12 [(google.api.field_behavior) = OUTPUT_ONLY];
// Output only. DAG start time, only set for workflows with [dag_timeout][google.cloud.dataproc.v1.WorkflowMetadata.dag_timeout] when DAG
// begins.
google.protobuf.Timestamp dag_start_time = 13 [(google.api.field_behavior) = OUTPUT_ONLY];
// Output only. DAG end time, only set for workflows with [dag_timeout][google.cloud.dataproc.v1.WorkflowMetadata.dag_timeout] when DAG ends.
google.protobuf.Timestamp dag_end_time = 14 [(google.api.field_behavior) = OUTPUT_ONLY];
google.protobuf.Duration dag_timeout = 12
[(google.api.field_behavior) = OUTPUT_ONLY];
// Output only. DAG start time, only set for workflows with
// [dag_timeout][google.cloud.dataproc.v1.WorkflowMetadata.dag_timeout] when
// DAG begins.
google.protobuf.Timestamp dag_start_time = 13
[(google.api.field_behavior) = OUTPUT_ONLY];
// Output only. DAG end time, only set for workflows with
// [dag_timeout][google.cloud.dataproc.v1.WorkflowMetadata.dag_timeout] when
// DAG ends.
google.protobuf.Timestamp dag_end_time = 14
[(google.api.field_behavior) = OUTPUT_ONLY];
}
// The cluster operation triggered by a workflow.
@ -595,7 +622,8 @@ message WorkflowNode {
string step_id = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
// Output only. Node's prerequisite nodes.
repeated string prerequisite_step_ids = 2 [(google.api.field_behavior) = OUTPUT_ONLY];
repeated string prerequisite_step_ids = 2
[(google.api.field_behavior) = OUTPUT_ONLY];
// Output only. The job id; populated after the node enters RUNNING state.
string job_id = 3 [(google.api.field_behavior) = OUTPUT_ONLY];
@ -771,7 +799,8 @@ message ListWorkflowTemplatesRequest {
// A response to a request to list workflow templates in a project.
message ListWorkflowTemplatesResponse {
// Output only. WorkflowTemplates list.
repeated WorkflowTemplate templates = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
repeated WorkflowTemplate templates = 1
[(google.api.field_behavior) = OUTPUT_ONLY];
// Output only. This token is included in the response if there are more
// results to fetch. To fetch additional results, provide this value as the

Loading…
Cancel
Save