Skip to content
This repository was archived by the owner on Nov 29, 2023. It is now read-only.

Commit ee093a8

Browse files
feat(v1beta1): remove DOCKER/FLINK from Component enum; adds HBASE (#108)
Breaking change in v1beta1: 1. The `DOCKER` and `FLINK` values have been removed from the `Component` enum, and an `HBASE` value was added. Other changes: 1. There is a new `temp_bucket` field in `ClusterConfig`. 2. There is a new `preemptibility` field in `InstanceGroupConfig`. 3. The `project_id` field of `JobReference` is now optional instead of required. 4. There is a new `dag_timeout` field in `WorkflowTemplate`. 5. There are new `dag_timeout`, `dag_start_time`, and `dag_end_time` fields in `WorkflowMetadata`. 6. There are various updates to the doc comments.
1 parent 8e96bdd commit ee093a8

File tree

15 files changed

+510
-134
lines changed

15 files changed

+510
-134
lines changed

google/cloud/dataproc_v1/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,6 @@
115115
"CancelJobRequest",
116116
"Cluster",
117117
"ClusterConfig",
118-
"ClusterControllerClient",
119118
"ClusterMetrics",
120119
"ClusterOperation",
121120
"ClusterOperationMetadata",
@@ -192,6 +191,7 @@
192191
"WorkflowNode",
193192
"WorkflowTemplate",
194193
"WorkflowTemplatePlacement",
195-
"YarnApplication",
196194
"WorkflowTemplateServiceClient",
195+
"YarnApplication",
196+
"ClusterControllerClient",
197197
)

google/cloud/dataproc_v1beta2/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -149,7 +149,6 @@
149149
"InstantiateInlineWorkflowTemplateRequest",
150150
"InstantiateWorkflowTemplateRequest",
151151
"Job",
152-
"JobControllerClient",
153152
"JobMetadata",
154153
"JobPlacement",
155154
"JobReference",
@@ -194,6 +193,7 @@
194193
"WorkflowNode",
195194
"WorkflowTemplate",
196195
"WorkflowTemplatePlacement",
197-
"YarnApplication",
198196
"WorkflowTemplateServiceClient",
197+
"YarnApplication",
198+
"JobControllerClient",
199199
)

google/cloud/dataproc_v1beta2/proto/autoscaling_policies.proto

Lines changed: 42 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -36,10 +36,12 @@ option (google.api.resource_definition) = {
3636
// Cloud Dataproc API.
3737
service AutoscalingPolicyService {
3838
option (google.api.default_host) = "dataproc.googleapis.com";
39-
option (google.api.oauth_scopes) = "https://www.googleapis.com/auth/cloud-platform";
39+
option (google.api.oauth_scopes) =
40+
"https://www.googleapis.com/auth/cloud-platform";
4041

4142
// Creates new autoscaling policy.
42-
rpc CreateAutoscalingPolicy(CreateAutoscalingPolicyRequest) returns (AutoscalingPolicy) {
43+
rpc CreateAutoscalingPolicy(CreateAutoscalingPolicyRequest)
44+
returns (AutoscalingPolicy) {
4345
option (google.api.http) = {
4446
post: "/v1beta2/{parent=projects/*/locations/*}/autoscalingPolicies"
4547
body: "policy"
@@ -55,7 +57,8 @@ service AutoscalingPolicyService {
5557
//
5658
// Disabled check for update_mask, because all updates will be full
5759
// replacements.
58-
rpc UpdateAutoscalingPolicy(UpdateAutoscalingPolicyRequest) returns (AutoscalingPolicy) {
60+
rpc UpdateAutoscalingPolicy(UpdateAutoscalingPolicyRequest)
61+
returns (AutoscalingPolicy) {
5962
option (google.api.http) = {
6063
put: "/v1beta2/{policy.name=projects/*/locations/*/autoscalingPolicies/*}"
6164
body: "policy"
@@ -68,7 +71,8 @@ service AutoscalingPolicyService {
6871
}
6972

7073
// Retrieves autoscaling policy.
71-
rpc GetAutoscalingPolicy(GetAutoscalingPolicyRequest) returns (AutoscalingPolicy) {
74+
rpc GetAutoscalingPolicy(GetAutoscalingPolicyRequest)
75+
returns (AutoscalingPolicy) {
7276
option (google.api.http) = {
7377
get: "/v1beta2/{name=projects/*/locations/*/autoscalingPolicies/*}"
7478
additional_bindings {
@@ -79,7 +83,8 @@ service AutoscalingPolicyService {
7983
}
8084

8185
// Lists autoscaling policies in the project.
82-
rpc ListAutoscalingPolicies(ListAutoscalingPoliciesRequest) returns (ListAutoscalingPoliciesResponse) {
86+
rpc ListAutoscalingPolicies(ListAutoscalingPoliciesRequest)
87+
returns (ListAutoscalingPoliciesResponse) {
8388
option (google.api.http) = {
8489
get: "/v1beta2/{parent=projects/*/locations/*}/autoscalingPolicies"
8590
additional_bindings {
@@ -91,7 +96,8 @@ service AutoscalingPolicyService {
9196

9297
// Deletes an autoscaling policy. It is an error to delete an autoscaling
9398
// policy that is in use by one or more clusters.
94-
rpc DeleteAutoscalingPolicy(DeleteAutoscalingPolicyRequest) returns (google.protobuf.Empty) {
99+
rpc DeleteAutoscalingPolicy(DeleteAutoscalingPolicyRequest)
100+
returns (google.protobuf.Empty) {
95101
option (google.api.http) = {
96102
delete: "/v1beta2/{name=projects/*/locations/*/autoscalingPolicies/*}"
97103
additional_bindings {
@@ -136,22 +142,26 @@ message AutoscalingPolicy {
136142
}
137143

138144
// Required. Describes how the autoscaler will operate for primary workers.
139-
InstanceGroupAutoscalingPolicyConfig worker_config = 4 [(google.api.field_behavior) = REQUIRED];
145+
InstanceGroupAutoscalingPolicyConfig worker_config = 4
146+
[(google.api.field_behavior) = REQUIRED];
140147

141148
// Optional. Describes how the autoscaler will operate for secondary workers.
142-
InstanceGroupAutoscalingPolicyConfig secondary_worker_config = 5 [(google.api.field_behavior) = OPTIONAL];
149+
InstanceGroupAutoscalingPolicyConfig secondary_worker_config = 5
150+
[(google.api.field_behavior) = OPTIONAL];
143151
}
144152

145153
// Basic algorithm for autoscaling.
146154
message BasicAutoscalingAlgorithm {
147155
// Required. YARN autoscaling configuration.
148-
BasicYarnAutoscalingConfig yarn_config = 1 [(google.api.field_behavior) = REQUIRED];
156+
BasicYarnAutoscalingConfig yarn_config = 1
157+
[(google.api.field_behavior) = REQUIRED];
149158

150159
// Optional. Duration between scaling events. A scaling period starts after
151160
// the update operation from the previous event has completed.
152161
//
153162
// Bounds: [2m, 1d]. Default: 2m.
154-
google.protobuf.Duration cooldown_period = 2 [(google.api.field_behavior) = OPTIONAL];
163+
google.protobuf.Duration cooldown_period = 2
164+
[(google.api.field_behavior) = OPTIONAL];
155165
}
156166

157167
// Basic autoscaling configurations for YARN.
@@ -162,22 +172,29 @@ message BasicYarnAutoscalingConfig {
162172
// downscaling operations.
163173
//
164174
// Bounds: [0s, 1d].
165-
google.protobuf.Duration graceful_decommission_timeout = 5 [(google.api.field_behavior) = REQUIRED];
166-
167-
// Required. Fraction of average pending memory in the last cooldown period
168-
// for which to add workers. A scale-up factor of 1.0 will result in scaling
169-
// up so that there is no pending memory remaining after the update (more
170-
// aggressive scaling). A scale-up factor closer to 0 will result in a smaller
171-
// magnitude of scaling up (less aggressive scaling).
175+
google.protobuf.Duration graceful_decommission_timeout = 5
176+
[(google.api.field_behavior) = REQUIRED];
177+
178+
// Required. Fraction of average YARN pending memory in the last cooldown
179+
// period for which to add workers. A scale-up factor of 1.0 will result in
180+
// scaling up so that there is no pending memory remaining after the update
181+
// (more aggressive scaling). A scale-up factor closer to 0 will result in a
182+
// smaller magnitude of scaling up (less aggressive scaling). See [How
183+
// autoscaling
184+
// works](https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/autoscaling#how_autoscaling_works)
185+
// for more information.
172186
//
173187
// Bounds: [0.0, 1.0].
174188
double scale_up_factor = 1 [(google.api.field_behavior) = REQUIRED];
175189

176-
// Required. Fraction of average pending memory in the last cooldown period
177-
// for which to remove workers. A scale-down factor of 1 will result in
190+
// Required. Fraction of average YARN pending memory in the last cooldown
191+
// period for which to remove workers. A scale-down factor of 1 will result in
178192
// scaling down so that there is no available memory remaining after the
179193
// update (more aggressive scaling). A scale-down factor of 0 disables
180194
// removing workers, which can be beneficial for autoscaling a single job.
195+
// See [How autoscaling
196+
// works](https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/autoscaling#how_autoscaling_works)
197+
// for more information.
181198
//
182199
// Bounds: [0.0, 1.0].
183200
double scale_down_factor = 2 [(google.api.field_behavior) = REQUIRED];
@@ -189,7 +206,8 @@ message BasicYarnAutoscalingConfig {
189206
// on any recommended change.
190207
//
191208
// Bounds: [0.0, 1.0]. Default: 0.0.
192-
double scale_up_min_worker_fraction = 3 [(google.api.field_behavior) = OPTIONAL];
209+
double scale_up_min_worker_fraction = 3
210+
[(google.api.field_behavior) = OPTIONAL];
193211

194212
// Optional. Minimum scale-down threshold as a fraction of total cluster size
195213
// before scaling occurs. For example, in a 20-worker cluster, a threshold of
@@ -198,7 +216,8 @@ message BasicYarnAutoscalingConfig {
198216
// on any recommended change.
199217
//
200218
// Bounds: [0.0, 1.0]. Default: 0.0.
201-
double scale_down_min_worker_fraction = 4 [(google.api.field_behavior) = OPTIONAL];
219+
double scale_down_min_worker_fraction = 4
220+
[(google.api.field_behavior) = OPTIONAL];
202221
}
203222

204223
// Configuration for the size bounds of an instance group, including its
@@ -341,7 +360,8 @@ message ListAutoscalingPoliciesRequest {
341360
// A response to a request to list autoscaling policies in a project.
342361
message ListAutoscalingPoliciesResponse {
343362
// Output only. Autoscaling policies list.
344-
repeated AutoscalingPolicy policies = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
363+
repeated AutoscalingPolicy policies = 1
364+
[(google.api.field_behavior) = OUTPUT_ONLY];
345365

346366
// Output only. This token is included in the response if there are more
347367
// results to fetch.

google/cloud/dataproc_v1beta2/proto/clusters.proto

Lines changed: 43 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -171,6 +171,17 @@ message ClusterConfig {
171171
// bucket](https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/staging-bucket)).
172172
string config_bucket = 1 [(google.api.field_behavior) = OPTIONAL];
173173

174+
// Optional. A Cloud Storage bucket used to store ephemeral cluster and jobs data,
175+
// such as Spark and MapReduce history files.
176+
// If you do not specify a temp bucket,
177+
// Dataproc will determine a Cloud Storage location (US,
178+
// ASIA, or EU) for your cluster's temp bucket according to the
179+
// Compute Engine zone where your cluster is deployed, and then create
180+
// and manage this project-level, per-location bucket. The default bucket has
181+
// a TTL of 90 days, but you can use any TTL (or none) if you specify a
182+
// bucket.
183+
string temp_bucket = 2 [(google.api.field_behavior) = OPTIONAL];
184+
174185
// Optional. The shared Compute Engine config settings for
175186
// all instances in a cluster.
176187
GceClusterConfig gce_cluster_config = 8 [(google.api.field_behavior) = OPTIONAL];
@@ -330,7 +341,7 @@ message GceClusterConfig {
330341
bool internal_ip_only = 7 [(google.api.field_behavior) = OPTIONAL];
331342

332343
// Optional. The [Dataproc service
333-
// account](https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/service-accounts#service_accounts_in_cloud_dataproc)
344+
// account](https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/service-accounts#service_accounts_in_dataproc)
334345
// (also see [VM Data Plane
335346
// identity](https://cloud.google.com/dataproc/docs/concepts/iam/dataproc-principals#vm_service_account_data_plane_identity))
336347
// used by Dataproc cluster VM instances to access Google Cloud Platform
@@ -374,6 +385,27 @@ message GceClusterConfig {
374385
// The config settings for Compute Engine resources in
375386
// an instance group, such as a master or worker group.
376387
message InstanceGroupConfig {
388+
// Controls the use of
389+
// [preemptible instances]
390+
// (https://cloud.google.com/compute/docs/instances/preemptible)
391+
// within the group.
392+
enum Preemptibility {
393+
// Preemptibility is unspecified, the system will choose the
394+
// appropriate setting for each instance group.
395+
PREEMPTIBILITY_UNSPECIFIED = 0;
396+
397+
// Instances are non-preemptible.
398+
//
399+
// This option is allowed for all instance groups and is the only valid
400+
// value for Master and Worker instance groups.
401+
NON_PREEMPTIBLE = 1;
402+
403+
// Instances are preemptible.
404+
//
405+
// This option is allowed only for secondary worker groups.
406+
PREEMPTIBLE = 2;
407+
}
408+
377409
// Optional. The number of VM instances in the instance group.
378410
// For master instance groups, must be set to 1.
379411
int32 num_instances = 1 [(google.api.field_behavior) = OPTIONAL];
@@ -424,6 +456,15 @@ message InstanceGroupConfig {
424456
// instances.
425457
bool is_preemptible = 6 [(google.api.field_behavior) = OUTPUT_ONLY];
426458

459+
// Optional. Specifies the preemptibility of the instance group.
460+
//
461+
// The default value for master and worker groups is
462+
// `NON_PREEMPTIBLE`. This default cannot be changed.
463+
//
464+
// The default value for secondary instances is
465+
// `PREEMPTIBLE`.
466+
Preemptibility preemptibility = 10 [(google.api.field_behavior) = OPTIONAL];
467+
427468
// Output only. The config for Compute Engine Instance Group
428469
// Manager that manages this group.
429470
// This is only used for preemptible instance groups.
@@ -685,7 +726,7 @@ message ClusterStatus {
685726
message SoftwareConfig {
686727
// Optional. The version of software inside the cluster. It must be one of the
687728
// supported [Dataproc
688-
// Versions](https://cloud.google.com/dataproc/docs/concepts/versioning/dataproc-versions#supported_cloud_dataproc_versions),
729+
// Versions](https://cloud.google.com/dataproc/docs/concepts/versioning/dataproc-versions#supported_dataproc_versions),
689730
// such as "1.2" (including a subminor version, such as "1.2.29"), or the
690731
// ["preview"
691732
// version](https://cloud.google.com/dataproc/docs/concepts/versioning/dataproc-versions#other_versions).

google/cloud/dataproc_v1beta2/proto/jobs.proto

Lines changed: 15 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -224,12 +224,12 @@ message SparkJob {
224224
// Spark driver and tasks.
225225
repeated string jar_file_uris = 4 [(google.api.field_behavior) = OPTIONAL];
226226

227-
// Optional. HCFS URIs of files to be copied to the working directory of
228-
// Spark drivers and distributed tasks. Useful for naively parallel tasks.
227+
// Optional. HCFS URIs of files to be placed in the working directory of
228+
// each executor. Useful for naively parallel tasks.
229229
repeated string file_uris = 5 [(google.api.field_behavior) = OPTIONAL];
230230

231-
// Optional. HCFS URIs of archives to be extracted in the working directory
232-
// of Spark drivers and tasks. Supported file types:
231+
// Optional. HCFS URIs of archives to be extracted into the working directory
232+
// of each executor. Supported file types:
233233
// .jar, .tar, .tar.gz, .tgz, and .zip.
234234
repeated string archive_uris = 6 [(google.api.field_behavior) = OPTIONAL];
235235

@@ -265,11 +265,12 @@ message PySparkJob {
265265
// Python driver and tasks.
266266
repeated string jar_file_uris = 4 [(google.api.field_behavior) = OPTIONAL];
267267

268-
// Optional. HCFS URIs of files to be copied to the working directory of
269-
// Python drivers and distributed tasks. Useful for naively parallel tasks.
268+
// Optional. HCFS URIs of files to be placed in the working directory of
269+
// each executor. Useful for naively parallel tasks.
270270
repeated string file_uris = 5 [(google.api.field_behavior) = OPTIONAL];
271271

272-
// Optional. HCFS URIs of archives to be extracted in the working directory of
272+
// Optional. HCFS URIs of archives to be extracted into the working directory
273+
// of each executor. Supported file types:
273274
// .jar, .tar, .tar.gz, .tgz, and .zip.
274275
repeated string archive_uris = 6 [(google.api.field_behavior) = OPTIONAL];
275276

@@ -414,12 +415,12 @@ message SparkRJob {
414415
// occur that causes an incorrect job submission.
415416
repeated string args = 2 [(google.api.field_behavior) = OPTIONAL];
416417

417-
// Optional. HCFS URIs of files to be copied to the working directory of
418-
// R drivers and distributed tasks. Useful for naively parallel tasks.
418+
// Optional. HCFS URIs of files to be placed in the working directory of
419+
// each executor. Useful for naively parallel tasks.
419420
repeated string file_uris = 3 [(google.api.field_behavior) = OPTIONAL];
420421

421-
// Optional. HCFS URIs of archives to be extracted in the working directory of
422-
// Spark drivers and tasks. Supported file types:
422+
// Optional. HCFS URIs of archives to be extracted into the working directory
423+
// of each executor. Supported file types:
423424
// .jar, .tar, .tar.gz, .tgz, and .zip.
424425
repeated string archive_uris = 4 [(google.api.field_behavior) = OPTIONAL];
425426

@@ -562,9 +563,9 @@ message JobStatus {
562563

563564
// Encapsulates the full scoping used to reference a job.
564565
message JobReference {
565-
// Required. The ID of the Google Cloud Platform project that the job
566-
// belongs to.
567-
string project_id = 1 [(google.api.field_behavior) = REQUIRED];
566+
// Optional. The ID of the Google Cloud Platform project that the job belongs to. If
567+
// specified, must match the request project ID.
568+
string project_id = 1 [(google.api.field_behavior) = OPTIONAL];
568569

569570
// Optional. The job ID, which must be unique within the project.
570571
// The ID must contain only letters (a-z, A-Z), numbers (0-9),

google/cloud/dataproc_v1beta2/proto/shared.proto

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -25,20 +25,17 @@ option java_package = "com.google.cloud.dataproc.v1beta2";
2525

2626
// Cluster components that can be activated.
2727
enum Component {
28-
// Unspecified component.
28+
// Unspecified component. Specifying this will cause Cluster creation to fail.
2929
COMPONENT_UNSPECIFIED = 0;
3030

3131
// The Anaconda python distribution.
3232
ANACONDA = 5;
3333

34-
// Docker
35-
DOCKER = 13;
36-
3734
// The Druid query engine.
3835
DRUID = 9;
3936

40-
// Flink
41-
FLINK = 14;
37+
// HBase.
38+
HBASE = 11;
4239

4340
// The Hive Web HCatalog (the REST service for accessing HCatalog).
4441
HIVE_WEBHCAT = 3;

0 commit comments

Comments
 (0)