1# Copyright 2020 Google LLC
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
15apiVersion: apiextensions.k8s.io/v1
16kind: CustomResourceDefinition
17metadata:
18 annotations:
19 cnrm.cloud.google.com/version: 1.106.0
20 creationTimestamp: null
21 labels:
22 cnrm.cloud.google.com/dcl2crd: "true"
23 cnrm.cloud.google.com/managed-by-kcc: "true"
24 cnrm.cloud.google.com/stability-level: stable
25 cnrm.cloud.google.com/system: "true"
26 name: dataprocclusters.dataproc.cnrm.cloud.google.com
27spec:
28 group: dataproc.cnrm.cloud.google.com
29 names:
30 categories:
31 - gcp
32 kind: DataprocCluster
33 plural: dataprocclusters
34 shortNames:
35 - gcpdataproccluster
36 - gcpdataprocclusters
37 singular: dataproccluster
38 scope: Namespaced
39 versions:
40 - additionalPrinterColumns:
41 - jsonPath: .metadata.creationTimestamp
42 name: Age
43 type: date
44 - description: When 'True', the most recent reconcile of the resource succeeded
45 jsonPath: .status.conditions[?(@.type=='Ready')].status
46 name: Ready
47 type: string
48 - description: The reason for the value in 'Ready'
49 jsonPath: .status.conditions[?(@.type=='Ready')].reason
50 name: Status
51 type: string
52 - description: The last transition time for the value in 'Status'
53 jsonPath: .status.conditions[?(@.type=='Ready')].lastTransitionTime
54 name: Status Age
55 type: date
56 name: v1beta1
57 schema:
58 openAPIV3Schema:
59 properties:
60 apiVersion:
61 description: 'apiVersion defines the versioned schema of this representation
62 of an object. Servers should convert recognized schemas to the latest
63 internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#resources'
64 type: string
65 kind:
66 description: 'kind is a string value representing the REST resource this
67 object represents. Servers may infer this from the endpoint the client
68 submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#types-kinds'
69 type: string
70 metadata:
71 type: object
72 spec:
73 properties:
74 config:
75 description: Immutable. The cluster config. Note that Dataproc may
76 set default values, and values may change when clusters are updated.
77 properties:
78 autoscalingConfig:
79 description: Immutable. Optional. Autoscaling config for the policy
80 associated with the cluster. Cluster does not autoscale if this
81 field is unset.
82 properties:
83 policyRef:
84 description: Immutable.
85 oneOf:
86 - not:
87 required:
88 - external
89 required:
90 - name
91 - not:
92 anyOf:
93 - required:
94 - name
95 - required:
96 - namespace
97 required:
98 - external
99 properties:
100 external:
101 description: |-
102 Optional. The autoscaling policy used by the cluster. Only resource names including projectid and location (region) are valid. Examples: * `https://www.googleapis.com/compute/v1/projects/[project_id]/locations/[dataproc_region]/autoscalingPolicies/[policy_id]` * `projects/[project_id]/locations/[dataproc_region]/autoscalingPolicies/[policy_id]` Note that the policy must be in the same project and Dataproc region.
103
104 Allowed value: The Google Cloud resource name of a `DataprocAutoscalingPolicy` resource (format: `projects/{{project}}/locations/{{location}}/autoscalingPolicies/{{name}}`).
105 type: string
106 name:
107 description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names'
108 type: string
109 namespace:
110 description: 'Namespace of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/'
111 type: string
112 type: object
113 type: object
114 dataprocMetricConfig:
115 description: Immutable. Optional. The config for Dataproc metrics.
116 properties:
117 metrics:
118 description: Immutable. Required. Metrics sources to enable.
119 items:
120 properties:
121 metricOverrides:
122 description: 'Immutable. Optional. Specify one or more
123 [available OSS metrics] (https://cloud.google.com/dataproc/docs/guides/monitoring#available_oss_metrics)
124 to collect for the metric course (for the `SPARK`
125 metric source, any [Spark metric] (https://spark.apache.org/docs/latest/monitoring.html#metrics)
126 can be specified). Provide metrics in the following
127 format: `METRIC_SOURCE:INSTANCE:GROUP:METRIC` Use
128 camelcase as appropriate. Examples: ``` yarn:ResourceManager:QueueMetrics:AppsCompleted
129 spark:driver:DAGScheduler:job.allJobs sparkHistoryServer:JVM:Memory:NonHeapMemoryUsage.committed
130 hiveserver2:JVM:Memory:NonHeapMemoryUsage.used ```
131 Notes: * Only the specified overridden metrics will
132 be collected for the metric source. For example, if
133 one or more `spark:executive` metrics are listed as
134 metric overrides, other `SPARK` metrics will not be
135 collected. The collection of the default metrics for
136 other OSS metric sources is unaffected. For example,
137 if both `SPARK` andd `YARN` metric sources are enabled,
138 and overrides are provided for Spark metrics only,
139 all default YARN metrics will be collected.'
140 items:
141 type: string
142 type: array
143 metricSource:
144 description: 'Immutable. Required. Default metrics are
145 collected unless `metricOverrides` are specified for
146 the metric source (see [Available OSS metrics] (https://cloud.google.com/dataproc/docs/guides/monitoring#available_oss_metrics)
147 for more information). Possible values: METRIC_SOURCE_UNSPECIFIED,
148 MONITORING_AGENT_DEFAULTS, HDFS, SPARK, YARN, SPARK_HISTORY_SERVER,
149 HIVESERVER2'
150 type: string
151 required:
152 - metricSource
153 type: object
154 type: array
155 required:
156 - metrics
157 type: object
158 encryptionConfig:
159 description: Immutable. Optional. Encryption settings for the
160 cluster.
161 properties:
162 gcePdKmsKeyRef:
163 description: Immutable.
164 oneOf:
165 - not:
166 required:
167 - external
168 required:
169 - name
170 - not:
171 anyOf:
172 - required:
173 - name
174 - required:
175 - namespace
176 required:
177 - external
178 properties:
179 external:
180 description: |-
181 Optional. The Cloud KMS key name to use for PD disk encryption for all instances in the cluster.
182
183 Allowed value: The `selfLink` field of a `KMSCryptoKey` resource.
184 type: string
185 name:
186 description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names'
187 type: string
188 namespace:
189 description: 'Namespace of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/'
190 type: string
191 type: object
192 type: object
193 endpointConfig:
194 description: Immutable. Optional. Port/endpoint configuration
195 for this cluster
196 properties:
197 enableHttpPortAccess:
198 description: Immutable. Optional. If true, enable http access
199 to specific ports on the cluster from external sources.
200 Defaults to false.
201 type: boolean
202 type: object
203 gceClusterConfig:
204 description: Immutable. Optional. The shared Compute Engine config
205 settings for all instances in a cluster.
206 properties:
207 confidentialInstanceConfig:
208 description: Immutable. Optional. Confidential Instance Config
209 for clusters using [Confidential VMs](https://cloud.google.com/compute/confidential-vm/docs).
210 properties:
211 enableConfidentialCompute:
212 description: Immutable. Optional. Defines whether the
213 instance should have confidential compute enabled.
214 type: boolean
215 type: object
216 internalIPOnly:
217 description: Immutable. Optional. If true, all instances in
218 the cluster will only have internal IP addresses. By default,
219 clusters are not restricted to internal IP addresses, and
220 will have ephemeral external IP addresses assigned to each
221 instance. This `internal_ip_only` restriction can only be
222 enabled for subnetwork enabled networks, and all off-cluster
223 dependencies must be configured to be accessible without
224 external IP addresses.
225 type: boolean
226 metadata:
227 additionalProperties:
228 type: string
229 description: Immutable. The Compute Engine metadata entries
230 to add to all instances (see [Project and instance metadata](https://cloud.google.com/compute/docs/storing-retrieving-metadata#project_and_instance_metadata)).
231 type: object
232 networkRef:
233 description: Immutable.
234 oneOf:
235 - not:
236 required:
237 - external
238 required:
239 - name
240 - not:
241 anyOf:
242 - required:
243 - name
244 - required:
245 - namespace
246 required:
247 - external
248 properties:
249 external:
250 description: |-
251 Optional. The Compute Engine network to be used for machine communications. Cannot be specified with subnetwork_uri. If neither `network_uri` nor `subnetwork_uri` is specified, the "default" network of the project is used, if it exists. Cannot be a "Custom Subnet Network" (see [Using Subnetworks](https://cloud.google.com/compute/docs/subnetworks) for more information). A full URL, partial URI, or short name are valid. Examples: * `https://www.googleapis.com/compute/v1/projects/[project_id]/regions/global/default` * `projects/[project_id]/regions/global/default` * `default`
252
253 Allowed value: The `selfLink` field of a `ComputeNetwork` resource.
254 type: string
255 name:
256 description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names'
257 type: string
258 namespace:
259 description: 'Namespace of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/'
260 type: string
261 type: object
262 nodeGroupAffinity:
263 description: Immutable. Optional. Node Group Affinity for
264 sole-tenant clusters.
265 properties:
266 nodeGroupRef:
267 description: Immutable.
268 oneOf:
269 - not:
270 required:
271 - external
272 required:
273 - name
274 - not:
275 anyOf:
276 - required:
277 - name
278 - required:
279 - namespace
280 required:
281 - external
282 properties:
283 external:
284 description: |-
285 Required. The URI of a sole-tenant [node group resource](https://cloud.google.com/compute/docs/reference/rest/v1/nodeGroups) that the cluster will be created on. A full URL, partial URI, or node group name are valid. Examples: * `https://www.googleapis.com/compute/v1/projects/[project_id]/zones/us-central1-a/nodeGroups/node-group-1` * `projects/[project_id]/zones/us-central1-a/nodeGroups/node-group-1` * `node-group-1`
286
287 Allowed value: The `selfLink` field of a `ComputeNodeGroup` resource.
288 type: string
289 name:
290 description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names'
291 type: string
292 namespace:
293 description: 'Namespace of the referent. More info:
294 https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/'
295 type: string
296 type: object
297 required:
298 - nodeGroupRef
299 type: object
300 privateIPv6GoogleAccess:
301 description: 'Immutable. Optional. The type of IPv6 access
302 for a cluster. Possible values: PRIVATE_IPV6_GOOGLE_ACCESS_UNSPECIFIED,
303 INHERIT_FROM_SUBNETWORK, OUTBOUND, BIDIRECTIONAL'
304 type: string
305 reservationAffinity:
306 description: Immutable. Optional. Reservation Affinity for
307 consuming Zonal reservation.
308 properties:
309 consumeReservationType:
310 description: 'Immutable. Optional. Type of reservation
311 to consume Possible values: TYPE_UNSPECIFIED, NO_RESERVATION,
312 ANY_RESERVATION, SPECIFIC_RESERVATION'
313 type: string
314 key:
315 description: Immutable. Optional. Corresponds to the label
316 key of reservation resource.
317 type: string
318 values:
319 description: Immutable. Optional. Corresponds to the label
320 values of reservation resource.
321 items:
322 type: string
323 type: array
324 type: object
325 serviceAccountRef:
326 description: Immutable.
327 oneOf:
328 - not:
329 required:
330 - external
331 required:
332 - name
333 - not:
334 anyOf:
335 - required:
336 - name
337 - required:
338 - namespace
339 required:
340 - external
341 properties:
342 external:
343 description: |-
344 Optional. The [Dataproc service account](https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/service-accounts#service_accounts_in_dataproc) (also see [VM Data Plane identity](https://cloud.google.com/dataproc/docs/concepts/iam/dataproc-principals#vm_service_account_data_plane_identity)) used by Dataproc cluster VM instances to access Google Cloud Platform services. If not specified, the [Compute Engine default service account](https://cloud.google.com/compute/docs/access/service-accounts#default_service_account) is used.
345
346 Allowed value: The `email` field of an `IAMServiceAccount` resource.
347 type: string
348 name:
349 description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names'
350 type: string
351 namespace:
352 description: 'Namespace of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/'
353 type: string
354 type: object
355 serviceAccountScopes:
356 description: 'Immutable. Optional. The URIs of service account
357 scopes to be included in Compute Engine instances. The following
358 base set of scopes is always included: * https://www.googleapis.com/auth/cloud.useraccounts.readonly
359 * https://www.googleapis.com/auth/devstorage.read_write
360 * https://www.googleapis.com/auth/logging.write If no scopes
361 are specified, the following defaults are also provided:
362 * https://www.googleapis.com/auth/bigquery * https://www.googleapis.com/auth/bigtable.admin.table
363 * https://www.googleapis.com/auth/bigtable.data * https://www.googleapis.com/auth/devstorage.full_control'
364 items:
365 type: string
366 type: array
367 shieldedInstanceConfig:
368 description: Immutable. Optional. Shielded Instance Config
369 for clusters using [Compute Engine Shielded VMs](https://cloud.google.com/security/shielded-cloud/shielded-vm).
370 properties:
371 enableIntegrityMonitoring:
372 description: Immutable. Optional. Defines whether instances
373 have integrity monitoring enabled.
374 type: boolean
375 enableSecureBoot:
376 description: Immutable. Optional. Defines whether instances
377 have Secure Boot enabled.
378 type: boolean
379 enableVtpm:
380 description: Immutable. Optional. Defines whether instances
381 have the vTPM enabled.
382 type: boolean
383 type: object
384 subnetworkRef:
385 description: Immutable.
386 oneOf:
387 - not:
388 required:
389 - external
390 required:
391 - name
392 - not:
393 anyOf:
394 - required:
395 - name
396 - required:
397 - namespace
398 required:
399 - external
400 properties:
401 external:
402 description: |-
403 Optional. The Compute Engine subnetwork to be used for machine communications. Cannot be specified with network_uri. A full URL, partial URI, or short name are valid. Examples: * `https://www.googleapis.com/compute/v1/projects/[project_id]/regions/us-east1/subnetworks/sub0` * `projects/[project_id]/regions/us-east1/subnetworks/sub0` * `sub0`
404
405 Allowed value: The `selfLink` field of a `ComputeSubnetwork` resource.
406 type: string
407 name:
408 description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names'
409 type: string
410 namespace:
411 description: 'Namespace of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/'
412 type: string
413 type: object
414 tags:
415 description: Immutable. The Compute Engine tags to add to
416 all instances (see [Tagging instances](https://cloud.google.com/compute/docs/label-or-tag-resources#tags)).
417 items:
418 type: string
419 type: array
420 zone:
421 description: 'Immutable. Optional. The zone where the Compute
422 Engine cluster will be located. On a create request, it
423 is required in the "global" region. If omitted in a non-global
424 Dataproc region, the service will pick a zone in the corresponding
425 Compute Engine region. On a get request, zone will always
426 be present. A full URL, partial URI, or short name are valid.
427 Examples: * `https://www.googleapis.com/compute/v1/projects/[project_id]/zones/[zone]`
428 * `projects/[project_id]/zones/[zone]` * `us-central1-f`'
429 type: string
430 type: object
431 initializationActions:
432 description: 'Immutable. Optional. Commands to execute on each
433 node after config is completed. By default, executables are
434 run on master and all worker nodes. You can test a node''s `role`
435 metadata to run an executable on a master or worker node, as
436 shown below using `curl` (you can also use `wget`): ROLE=$(curl
437 -H Metadata-Flavor:Google http://metadata/computeMetadata/v1/instance/attributes/dataproc-role)
438 if [[ "${ROLE}" == ''Master'' ]]; then ... master specific actions
439 ... else ... worker specific actions ... fi'
440 items:
441 properties:
442 executableFile:
443 description: Immutable. Required. Cloud Storage URI of executable
444 file.
445 type: string
446 executionTimeout:
447 description: Immutable. Optional. Amount of time executable
448 has to complete. Default is 10 minutes (see JSON representation
449 of [Duration](https://developers.google.com/protocol-buffers/docs/proto3#json)).
450 Cluster creation fails with an explanatory error message
451 (the name of the executable that caused the error and
452 the exceeded timeout period) if the executable is not
453 completed at end of the timeout period.
454 type: string
455 required:
456 - executableFile
457 type: object
458 type: array
459 lifecycleConfig:
460 description: Immutable. Optional. Lifecycle setting for the cluster.
461 properties:
462 autoDeleteTime:
463 description: Immutable. Optional. The time when cluster will
464 be auto-deleted (see JSON representation of [Timestamp](https://developers.google.com/protocol-buffers/docs/proto3#json)).
465 format: date-time
466 type: string
467 autoDeleteTtl:
468 description: Immutable. Optional. The lifetime duration of
469 cluster. The cluster will be auto-deleted at the end of
470 this period. Minimum value is 10 minutes; maximum value
471 is 14 days (see JSON representation of [Duration](https://developers.google.com/protocol-buffers/docs/proto3#json)).
472 type: string
473 idleDeleteTtl:
474 description: Immutable. Optional. The duration to keep the
475 cluster alive while idling (when no jobs are running). Passing
476 this threshold will cause the cluster to be deleted. Minimum
477 value is 5 minutes; maximum value is 14 days (see JSON representation
478 of [Duration](https://developers.google.com/protocol-buffers/docs/proto3#json)).
479 type: string
480 type: object
481 masterConfig:
482 description: Immutable. Optional. The Compute Engine config settings
483 for the master instance in a cluster.
484 properties:
485 accelerators:
486 description: Immutable. Optional. The Compute Engine accelerator
487 configuration for these instances.
488 items:
489 properties:
490 acceleratorCount:
491 description: Immutable. The number of the accelerator
492 cards of this type exposed to this instance.
493 format: int64
494 type: integer
495 acceleratorType:
496 description: 'Immutable. Full URL, partial URI, or short
497 name of the accelerator type resource to expose to
498 this instance. See [Compute Engine AcceleratorTypes](https://cloud.google.com/compute/docs/reference/beta/acceleratorTypes).
499 Examples: * `https://www.googleapis.com/compute/beta/projects/[project_id]/zones/us-east1-a/acceleratorTypes/nvidia-tesla-k80`
500 * `projects/[project_id]/zones/us-east1-a/acceleratorTypes/nvidia-tesla-k80`
501 * `nvidia-tesla-k80` **Auto Zone Exception**: If you
502 are using the Dataproc [Auto Zone Placement](https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/auto-zone#using_auto_zone_placement)
503 feature, you must use the short name of the accelerator
504 type resource, for example, `nvidia-tesla-k80`.'
505 type: string
506 type: object
507 type: array
508 diskConfig:
509 description: Immutable. Optional. Disk option config settings.
510 properties:
511 bootDiskSizeGb:
512 description: Immutable. Optional. Size in GB of the boot
513 disk (default is 500GB).
514 format: int64
515 type: integer
516 bootDiskType:
517 description: 'Immutable. Optional. Type of the boot disk
518 (default is "pd-standard"). Valid values: "pd-balanced"
519 (Persistent Disk Balanced Solid State Drive), "pd-ssd"
520 (Persistent Disk Solid State Drive), or "pd-standard"
521 (Persistent Disk Hard Disk Drive). See [Disk types](https://cloud.google.com/compute/docs/disks#disk-types).'
522 type: string
523 localSsdInterface:
524 description: 'Immutable. Optional. Interface type of local
525 SSDs (default is "scsi"). Valid values: "scsi" (Small
526 Computer System Interface), "nvme" (Non-Volatile Memory
527 Express). See [local SSD performance](https://cloud.google.com/compute/docs/disks/local-ssd#performance).'
528 type: string
529 numLocalSsds:
530 description: Immutable. Optional. Number of attached SSDs,
531 from 0 to 4 (default is 0). If SSDs are not attached,
532 the boot disk is used to store runtime logs and [HDFS](https://hadoop.apache.org/docs/r1.2.1/hdfs_user_guide.html)
533 data. If one or more SSDs are attached, this runtime
534 bulk data is spread across them, and the boot disk contains
535 only basic config and installed binaries.
536 format: int64
537 type: integer
538 type: object
539 imageRef:
540 description: Immutable.
541 oneOf:
542 - not:
543 required:
544 - external
545 required:
546 - name
547 - not:
548 anyOf:
549 - required:
550 - name
551 - required:
552 - namespace
553 required:
554 - external
555 properties:
556 external:
557 description: |-
558 Optional. The Compute Engine image resource used for cluster instances. The URI can represent an image or image family. Image examples: * `https://www.googleapis.com/compute/beta/projects/[project_id]/global/images/[image-id]` * `projects/[project_id]/global/images/[image-id]` * `image-id` Image family examples. Dataproc will use the most recent image from the family: * `https://www.googleapis.com/compute/beta/projects/[project_id]/global/images/family/[custom-image-family-name]` * `projects/[project_id]/global/images/family/[custom-image-family-name]` If the URI is unspecified, it will be inferred from `SoftwareConfig.image_version` or the system default.
559
560 Allowed value: The `selfLink` field of a `ComputeImage` resource.
561 type: string
562 name:
563 description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names'
564 type: string
565 namespace:
566 description: 'Namespace of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/'
567 type: string
568 type: object
569 machineType:
570 description: 'Immutable. Optional. The Compute Engine machine
571 type used for cluster instances. A full URL, partial URI,
572 or short name are valid. Examples: * `https://www.googleapis.com/compute/v1/projects/[project_id]/zones/us-east1-a/machineTypes/n1-standard-2`
573 * `projects/[project_id]/zones/us-east1-a/machineTypes/n1-standard-2`
574 * `n1-standard-2` **Auto Zone Exception**: If you are using
575 the Dataproc [Auto Zone Placement](https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/auto-zone#using_auto_zone_placement)
576 feature, you must use the short name of the machine type
577 resource, for example, `n1-standard-2`.'
578 type: string
579 minCpuPlatform:
580 description: Immutable. Optional. Specifies the minimum cpu
581 platform for the Instance Group. See [Dataproc -> Minimum
582 CPU Platform](https://cloud.google.com/dataproc/docs/concepts/compute/dataproc-min-cpu).
583 type: string
584 numInstances:
585 description: Immutable. Optional. The number of VM instances
586 in the instance group. For [HA cluster](/dataproc/docs/concepts/configuring-clusters/high-availability)
587 [master_config](#FIELDS.master_config) groups, **must be
588 set to 3**. For standard cluster [master_config](#FIELDS.master_config)
589 groups, **must be set to 1**.
590 format: int64
591 type: integer
592 preemptibility:
593 description: 'Immutable. Optional. Specifies the preemptibility
594 of the instance group. The default value for master and
595 worker groups is `NON_PREEMPTIBLE`. This default cannot
596 be changed. The default value for secondary instances is
597 `PREEMPTIBLE`. Possible values: PREEMPTIBILITY_UNSPECIFIED,
598 NON_PREEMPTIBLE, PREEMPTIBLE'
599 type: string
600 type: object
601 metastoreConfig:
602 description: Immutable. Optional. Metastore configuration.
603 properties:
604 dataprocMetastoreServiceRef:
605 description: Immutable.
606 oneOf:
607 - not:
608 required:
609 - external
610 required:
611 - name
612 - not:
613 anyOf:
614 - required:
615 - name
616 - required:
617 - namespace
618 required:
619 - external
620 properties:
621 external:
622 description: 'Required. Resource name of an existing Dataproc
623 Metastore service. Example: * `projects/[project_id]/locations/[dataproc_region]/services/[service-name]`'
624 type: string
625 name:
626 description: |-
627 [WARNING] DataprocMetastoreService not yet supported in Config Connector, use 'external' field to reference existing resources.
628 Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
629 type: string
630 namespace:
631 description: 'Namespace of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/'
632 type: string
633 type: object
634 required:
635 - dataprocMetastoreServiceRef
636 type: object
637 secondaryWorkerConfig:
638 description: Immutable. Optional. The Compute Engine config settings
639 for additional worker instances in a cluster.
640 properties:
641 accelerators:
642 description: Immutable. Optional. The Compute Engine accelerator
643 configuration for these instances.
644 items:
645 properties:
646 acceleratorCount:
647 description: Immutable. The number of the accelerator
648 cards of this type exposed to this instance.
649 format: int64
650 type: integer
651 acceleratorType:
652 description: 'Immutable. Full URL, partial URI, or short
653 name of the accelerator type resource to expose to
654 this instance. See [Compute Engine AcceleratorTypes](https://cloud.google.com/compute/docs/reference/beta/acceleratorTypes).
655 Examples: * `https://www.googleapis.com/compute/beta/projects/[project_id]/zones/us-east1-a/acceleratorTypes/nvidia-tesla-k80`
656 * `projects/[project_id]/zones/us-east1-a/acceleratorTypes/nvidia-tesla-k80`
657 * `nvidia-tesla-k80` **Auto Zone Exception**: If you
658 are using the Dataproc [Auto Zone Placement](https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/auto-zone#using_auto_zone_placement)
659 feature, you must use the short name of the accelerator
660 type resource, for example, `nvidia-tesla-k80`.'
661 type: string
662 type: object
663 type: array
664 diskConfig:
665 description: Immutable. Optional. Disk option config settings.
666 properties:
667 bootDiskSizeGb:
668 description: Immutable. Optional. Size in GB of the boot
669 disk (default is 500GB).
670 format: int64
671 type: integer
672 bootDiskType:
673 description: 'Immutable. Optional. Type of the boot disk
674 (default is "pd-standard"). Valid values: "pd-balanced"
675 (Persistent Disk Balanced Solid State Drive), "pd-ssd"
676 (Persistent Disk Solid State Drive), or "pd-standard"
677 (Persistent Disk Hard Disk Drive). See [Disk types](https://cloud.google.com/compute/docs/disks#disk-types).'
678 type: string
679 localSsdInterface:
680 description: 'Immutable. Optional. Interface type of local
681 SSDs (default is "scsi"). Valid values: "scsi" (Small
682 Computer System Interface), "nvme" (Non-Volatile Memory
683 Express). See [local SSD performance](https://cloud.google.com/compute/docs/disks/local-ssd#performance).'
684 type: string
685 numLocalSsds:
686 description: Immutable. Optional. Number of attached SSDs,
687 from 0 to 4 (default is 0). If SSDs are not attached,
688 the boot disk is used to store runtime logs and [HDFS](https://hadoop.apache.org/docs/r1.2.1/hdfs_user_guide.html)
689 data. If one or more SSDs are attached, this runtime
690 bulk data is spread across them, and the boot disk contains
691 only basic config and installed binaries.
692 format: int64
693 type: integer
694 type: object
695 imageRef:
696 description: Immutable.
697 oneOf:
698 - not:
699 required:
700 - external
701 required:
702 - name
703 - not:
704 anyOf:
705 - required:
706 - name
707 - required:
708 - namespace
709 required:
710 - external
711 properties:
712 external:
713 description: |-
714 Optional. The Compute Engine image resource used for cluster instances. The URI can represent an image or image family. Image examples: * `https://www.googleapis.com/compute/beta/projects/[project_id]/global/images/[image-id]` * `projects/[project_id]/global/images/[image-id]` * `image-id` Image family examples. Dataproc will use the most recent image from the family: * `https://www.googleapis.com/compute/beta/projects/[project_id]/global/images/family/[custom-image-family-name]` * `projects/[project_id]/global/images/family/[custom-image-family-name]` If the URI is unspecified, it will be inferred from `SoftwareConfig.image_version` or the system default.
715
716 Allowed value: The `selfLink` field of a `ComputeImage` resource.
717 type: string
718 name:
719 description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names'
720 type: string
721 namespace:
722 description: 'Namespace of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/'
723 type: string
724 type: object
725 machineType:
726 description: 'Immutable. Optional. The Compute Engine machine
727 type used for cluster instances. A full URL, partial URI,
728 or short name are valid. Examples: * `https://www.googleapis.com/compute/v1/projects/[project_id]/zones/us-east1-a/machineTypes/n1-standard-2`
729 * `projects/[project_id]/zones/us-east1-a/machineTypes/n1-standard-2`
730 * `n1-standard-2` **Auto Zone Exception**: If you are using
731 the Dataproc [Auto Zone Placement](https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/auto-zone#using_auto_zone_placement)
732 feature, you must use the short name of the machine type
733 resource, for example, `n1-standard-2`.'
734 type: string
735 minCpuPlatform:
736 description: Immutable. Optional. Specifies the minimum cpu
737 platform for the Instance Group. See [Dataproc -> Minimum
738 CPU Platform](https://cloud.google.com/dataproc/docs/concepts/compute/dataproc-min-cpu).
739 type: string
740 numInstances:
741 description: Immutable. Optional. The number of VM instances
742 in the instance group. For [HA cluster](/dataproc/docs/concepts/configuring-clusters/high-availability)
743 [master_config](#FIELDS.master_config) groups, **must be
744 set to 3**. For standard cluster [master_config](#FIELDS.master_config)
745 groups, **must be set to 1**.
746 format: int64
747 type: integer
748 preemptibility:
749 description: 'Immutable. Optional. Specifies the preemptibility
750 of the instance group. The default value for master and
751 worker groups is `NON_PREEMPTIBLE`. This default cannot
752 be changed. The default value for secondary instances is
753 `PREEMPTIBLE`. Possible values: PREEMPTIBILITY_UNSPECIFIED,
754 NON_PREEMPTIBLE, PREEMPTIBLE'
755 type: string
756 type: object
757 securityConfig:
758 description: Immutable. Optional. Security settings for the cluster.
759 properties:
760 identityConfig:
761 description: Immutable. Optional. Identity related configuration,
762 including service account based secure multi-tenancy user
763 mappings.
764 properties:
765 userServiceAccountMapping:
766 additionalProperties:
767 type: string
768 description: Immutable. Required. Map of user to service
769 account.
770 type: object
771 required:
772 - userServiceAccountMapping
773 type: object
774 kerberosConfig:
775 description: Immutable. Optional. Kerberos related configuration.
776 properties:
777 crossRealmTrustAdminServer:
778 description: Immutable. Optional. The admin server (IP
779 or hostname) for the remote trusted realm in a cross
780 realm trust relationship.
781 type: string
782 crossRealmTrustKdc:
783 description: Immutable. Optional. The KDC (IP or hostname)
784 for the remote trusted realm in a cross realm trust
785 relationship.
786 type: string
787 crossRealmTrustRealm:
788 description: Immutable. Optional. The remote realm the
789 Dataproc on-cluster KDC will trust, should the user
790 enable cross realm trust.
791 type: string
792 crossRealmTrustSharedPassword:
793 description: Immutable. Optional. The Cloud Storage URI
794 of a KMS encrypted file containing the shared password
795 between the on-cluster Kerberos realm and the remote
796 trusted realm, in a cross realm trust relationship.
797 type: string
798 enableKerberos:
799 description: 'Immutable. Optional. Flag to indicate whether
800 to Kerberize the cluster (default: false). Set this
801 field to true to enable Kerberos on a cluster.'
802 type: boolean
803 kdcDbKey:
804 description: Immutable. Optional. The Cloud Storage URI
805 of a KMS encrypted file containing the master key of
806 the KDC database.
807 type: string
808 keyPassword:
809 description: Immutable. Optional. The Cloud Storage URI
810 of a KMS encrypted file containing the password to the
811 user provided key. For the self-signed certificate,
812 this password is generated by Dataproc.
813 type: string
814 keystore:
815 description: Immutable. Optional. The Cloud Storage URI
816 of the keystore file used for SSL encryption. If not
817 provided, Dataproc will provide a self-signed certificate.
818 type: string
819 keystorePassword:
820 description: Immutable. Optional. The Cloud Storage URI
821 of a KMS encrypted file containing the password to the
822 user provided keystore. For the self-signed certificate,
823 this password is generated by Dataproc.
824 type: string
825 kmsKeyRef:
826 description: Immutable.
827 oneOf:
828 - not:
829 required:
830 - external
831 required:
832 - name
833 - not:
834 anyOf:
835 - required:
836 - name
837 - required:
838 - namespace
839 required:
840 - external
841 properties:
842 external:
843 description: |-
844 Optional. The uri of the KMS key used to encrypt various sensitive files.
845
846 Allowed value: The `selfLink` field of a `KMSCryptoKey` resource.
847 type: string
848 name:
849 description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names'
850 type: string
851 namespace:
852 description: 'Namespace of the referent. More info:
853 https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/'
854 type: string
855 type: object
856 realm:
857 description: Immutable. Optional. The name of the on-cluster
858 Kerberos realm. If not specified, the uppercased domain
859 of hostnames will be the realm.
860 type: string
861 rootPrincipalPassword:
862 description: Immutable. Optional. The Cloud Storage URI
863 of a KMS encrypted file containing the root principal
864 password.
865 type: string
866 tgtLifetimeHours:
867 description: Immutable. Optional. The lifetime of the
868 ticket granting ticket, in hours. If not specified,
869 or user specifies 0, then default value 10 will be used.
870 format: int64
871 type: integer
872 truststore:
873 description: Immutable. Optional. The Cloud Storage URI
874 of the truststore file used for SSL encryption. If not
875 provided, Dataproc will provide a self-signed certificate.
876 type: string
877 truststorePassword:
878 description: Immutable. Optional. The Cloud Storage URI
879 of a KMS encrypted file containing the password to the
880 user provided truststore. For the self-signed certificate,
881 this password is generated by Dataproc.
882 type: string
883 type: object
884 type: object
885 softwareConfig:
886 description: Immutable. Optional. The config settings for software
887 inside the cluster.
888 properties:
889 imageVersion:
890 description: Immutable. Optional. The version of software
891 inside the cluster. It must be one of the supported [Dataproc
892 Versions](https://cloud.google.com/dataproc/docs/concepts/versioning/dataproc-versions#supported_dataproc_versions),
893 such as "1.2" (including a subminor version, such as "1.2.29"),
894 or the ["preview" version](https://cloud.google.com/dataproc/docs/concepts/versioning/dataproc-versions#other_versions).
895 If unspecified, it defaults to the latest Debian version.
896 type: string
897 optionalComponents:
898 description: Immutable. Optional. The set of components to
899 activate on the cluster.
900 items:
901 type: string
902 type: array
903 properties:
904 additionalProperties:
905 type: string
906 description: 'Immutable. Optional. The properties to set on
907 daemon config files. Property keys are specified in `prefix:property`
908 format, for example `core:hadoop.tmp.dir`. The following
909 are supported prefixes and their mappings: * capacity-scheduler:
910 `capacity-scheduler.xml` * core: `core-site.xml` * distcp:
911 `distcp-default.xml` * hdfs: `hdfs-site.xml` * hive: `hive-site.xml`
912 * mapred: `mapred-site.xml` * pig: `pig.properties` * spark:
913 `spark-defaults.conf` * yarn: `yarn-site.xml` For more information,
914 see [Cluster properties](https://cloud.google.com/dataproc/docs/concepts/cluster-properties).'
915 type: object
916 type: object
917 stagingBucketRef:
918 description: Immutable.
919 oneOf:
920 - not:
921 required:
922 - external
923 required:
924 - name
925 - not:
926 anyOf:
927 - required:
928 - name
929 - required:
930 - namespace
931 required:
932 - external
933 properties:
934 external:
935 description: |-
936 Optional. A Cloud Storage bucket used to stage job dependencies, config files, and job driver console output. If you do not specify a staging bucket, Cloud Dataproc will determine a Cloud Storage location (US, ASIA, or EU) for your cluster's staging bucket according to the Compute Engine zone where your cluster is deployed, and then create and manage this project-level, per-location bucket (see [Dataproc staging bucket](https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/staging-bucket)). **This field requires a Cloud Storage bucket name, not a URI to a Cloud Storage bucket.**
937
938 Allowed value: The Google Cloud resource name of a `StorageBucket` resource (format: `{{name}}`).
939 type: string
940 name:
941 description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names'
942 type: string
943 namespace:
944 description: 'Namespace of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/'
945 type: string
946 type: object
947 tempBucketRef:
948 description: Immutable.
949 oneOf:
950 - not:
951 required:
952 - external
953 required:
954 - name
955 - not:
956 anyOf:
957 - required:
958 - name
959 - required:
960 - namespace
961 required:
962 - external
963 properties:
964 external:
965 description: |-
966 Optional. A Cloud Storage bucket used to store ephemeral cluster and jobs data, such as Spark and MapReduce history files. If you do not specify a temp bucket, Dataproc will determine a Cloud Storage location (US, ASIA, or EU) for your cluster's temp bucket according to the Compute Engine zone where your cluster is deployed, and then create and manage this project-level, per-location bucket. The default bucket has a TTL of 90 days, but you can use any TTL (or none) if you specify a bucket. **This field requires a Cloud Storage bucket name, not a URI to a Cloud Storage bucket.**
967
968 Allowed value: The Google Cloud resource name of a `StorageBucket` resource (format: `{{name}}`).
969 type: string
970 name:
971 description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names'
972 type: string
973 namespace:
974 description: 'Namespace of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/'
975 type: string
976 type: object
977 workerConfig:
978 description: Immutable. Optional. The Compute Engine config settings
979 for worker instances in a cluster.
980 properties:
981 accelerators:
982 description: Immutable. Optional. The Compute Engine accelerator
983 configuration for these instances.
984 items:
985 properties:
986 acceleratorCount:
987 description: Immutable. The number of the accelerator
988 cards of this type exposed to this instance.
989 format: int64
990 type: integer
991 acceleratorType:
992 description: 'Immutable. Full URL, partial URI, or short
993 name of the accelerator type resource to expose to
994 this instance. See [Compute Engine AcceleratorTypes](https://cloud.google.com/compute/docs/reference/beta/acceleratorTypes).
995 Examples: * `https://www.googleapis.com/compute/beta/projects/[project_id]/zones/us-east1-a/acceleratorTypes/nvidia-tesla-k80`
996 * `projects/[project_id]/zones/us-east1-a/acceleratorTypes/nvidia-tesla-k80`
997 * `nvidia-tesla-k80` **Auto Zone Exception**: If you
998 are using the Dataproc [Auto Zone Placement](https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/auto-zone#using_auto_zone_placement)
999 feature, you must use the short name of the accelerator
1000 type resource, for example, `nvidia-tesla-k80`.'
1001 type: string
1002 type: object
1003 type: array
1004 diskConfig:
1005 description: Immutable. Optional. Disk option config settings.
1006 properties:
1007 bootDiskSizeGb:
1008 description: Immutable. Optional. Size in GB of the boot
1009 disk (default is 500GB).
1010 format: int64
1011 type: integer
1012 bootDiskType:
1013 description: 'Immutable. Optional. Type of the boot disk
1014 (default is "pd-standard"). Valid values: "pd-balanced"
1015 (Persistent Disk Balanced Solid State Drive), "pd-ssd"
1016 (Persistent Disk Solid State Drive), or "pd-standard"
1017 (Persistent Disk Hard Disk Drive). See [Disk types](https://cloud.google.com/compute/docs/disks#disk-types).'
1018 type: string
1019 localSsdInterface:
1020 description: 'Immutable. Optional. Interface type of local
1021 SSDs (default is "scsi"). Valid values: "scsi" (Small
1022 Computer System Interface), "nvme" (Non-Volatile Memory
1023 Express). See [local SSD performance](https://cloud.google.com/compute/docs/disks/local-ssd#performance).'
1024 type: string
1025 numLocalSsds:
1026 description: Immutable. Optional. Number of attached SSDs,
1027 from 0 to 4 (default is 0). If SSDs are not attached,
1028 the boot disk is used to store runtime logs and [HDFS](https://hadoop.apache.org/docs/r1.2.1/hdfs_user_guide.html)
1029 data. If one or more SSDs are attached, this runtime
1030 bulk data is spread across them, and the boot disk contains
1031 only basic config and installed binaries.
1032 format: int64
1033 type: integer
1034 type: object
1035 imageRef:
1036 description: Immutable.
1037 oneOf:
1038 - not:
1039 required:
1040 - external
1041 required:
1042 - name
1043 - not:
1044 anyOf:
1045 - required:
1046 - name
1047 - required:
1048 - namespace
1049 required:
1050 - external
1051 properties:
1052 external:
1053 description: |-
1054 Optional. The Compute Engine image resource used for cluster instances. The URI can represent an image or image family. Image examples: * `https://www.googleapis.com/compute/beta/projects/[project_id]/global/images/[image-id]` * `projects/[project_id]/global/images/[image-id]` * `image-id` Image family examples. Dataproc will use the most recent image from the family: * `https://www.googleapis.com/compute/beta/projects/[project_id]/global/images/family/[custom-image-family-name]` * `projects/[project_id]/global/images/family/[custom-image-family-name]` If the URI is unspecified, it will be inferred from `SoftwareConfig.image_version` or the system default.
1055
1056 Allowed value: The `selfLink` field of a `ComputeImage` resource.
1057 type: string
1058 name:
1059 description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names'
1060 type: string
1061 namespace:
1062 description: 'Namespace of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/'
1063 type: string
1064 type: object
1065 machineType:
1066 description: 'Immutable. Optional. The Compute Engine machine
1067 type used for cluster instances. A full URL, partial URI,
1068 or short name are valid. Examples: * `https://www.googleapis.com/compute/v1/projects/[project_id]/zones/us-east1-a/machineTypes/n1-standard-2`
1069 * `projects/[project_id]/zones/us-east1-a/machineTypes/n1-standard-2`
1070 * `n1-standard-2` **Auto Zone Exception**: If you are using
1071 the Dataproc [Auto Zone Placement](https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/auto-zone#using_auto_zone_placement)
1072 feature, you must use the short name of the machine type
1073 resource, for example, `n1-standard-2`.'
1074 type: string
1075 minCpuPlatform:
1076 description: Immutable. Optional. Specifies the minimum cpu
1077 platform for the Instance Group. See [Dataproc -> Minimum
1078 CPU Platform](https://cloud.google.com/dataproc/docs/concepts/compute/dataproc-min-cpu).
1079 type: string
1080 numInstances:
1081 description: Immutable. Optional. The number of VM instances
1082 in the instance group. For [HA cluster](/dataproc/docs/concepts/configuring-clusters/high-availability)
1083 [master_config](#FIELDS.master_config) groups, **must be
1084 set to 3**. For standard cluster [master_config](#FIELDS.master_config)
1085 groups, **must be set to 1**.
1086 format: int64
1087 type: integer
1088 preemptibility:
1089 description: 'Immutable. Optional. Specifies the preemptibility
1090 of the instance group. The default value for master and
1091 worker groups is `NON_PREEMPTIBLE`. This default cannot
1092 be changed. The default value for secondary instances is
1093 `PREEMPTIBLE`. Possible values: PREEMPTIBILITY_UNSPECIFIED,
1094 NON_PREEMPTIBLE, PREEMPTIBLE'
1095 type: string
1096 type: object
1097 type: object
1098 location:
1099 description: Immutable. The location for the resource, usually a GCP
1100 region.
1101 type: string
1102 projectRef:
1103 description: Immutable. The Project that this resource belongs to.
1104 oneOf:
1105 - not:
1106 required:
1107 - external
1108 required:
1109 - name
1110 - not:
1111 anyOf:
1112 - required:
1113 - name
1114 - required:
1115 - namespace
1116 required:
1117 - external
1118 properties:
1119 external:
1120 description: |-
1121 Required. The Google Cloud Platform project ID that the cluster belongs to.
1122
1123 Allowed value: The Google Cloud resource name of a `Project` resource (format: `projects/{{name}}`).
1124 type: string
1125 name:
1126 description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names'
1127 type: string
1128 namespace:
1129 description: 'Namespace of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/'
1130 type: string
1131 type: object
1132 resourceID:
1133 description: Immutable. Optional. The name of the resource. Used for
1134 creation and acquisition. When unset, the value of `metadata.name`
1135 is used as the default.
1136 type: string
1137 virtualClusterConfig:
1138 description: Immutable. Optional. The virtual cluster config is used
1139 when creating a Dataproc cluster that does not directly control
1140 the underlying compute resources, for example, when creating a [Dataproc-on-GKE
1141 cluster](https://cloud.google.com/dataproc/docs/guides/dpgke/dataproc-gke).
1142 Dataproc may set default values, and values may change when clusters
1143 are updated. Exactly one of config or virtual_cluster_config must
1144 be specified.
1145 properties:
1146 auxiliaryServicesConfig:
1147 description: Immutable. Optional. Configuration of auxiliary services
1148 used by this cluster.
1149 properties:
1150 metastoreConfig:
1151 description: Immutable. Optional. The Hive Metastore configuration
1152 for this workload.
1153 properties:
1154 dataprocMetastoreServiceRef:
1155 description: Immutable.
1156 oneOf:
1157 - not:
1158 required:
1159 - external
1160 required:
1161 - name
1162 - not:
1163 anyOf:
1164 - required:
1165 - name
1166 - required:
1167 - namespace
1168 required:
1169 - external
1170 properties:
1171 external:
1172 description: 'Required. Resource name of an existing
1173 Dataproc Metastore service. Example: * `projects/[project_id]/locations/[dataproc_region]/services/[service-name]`'
1174 type: string
1175 name:
1176 description: |-
1177 [WARNING] DataprocMetastoreService not yet supported in Config Connector, use 'external' field to reference existing resources.
1178 Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
1179 type: string
1180 namespace:
1181 description: 'Namespace of the referent. More info:
1182 https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/'
1183 type: string
1184 type: object
1185 required:
1186 - dataprocMetastoreServiceRef
1187 type: object
1188 sparkHistoryServerConfig:
1189 description: Immutable. Optional. The Spark History Server
1190 configuration for the workload.
1191 properties:
1192 dataprocClusterRef:
1193 description: Immutable.
1194 oneOf:
1195 - not:
1196 required:
1197 - external
1198 required:
1199 - name
1200 - not:
1201 anyOf:
1202 - required:
1203 - name
1204 - required:
1205 - namespace
1206 required:
1207 - external
1208 properties:
1209 external:
1210 description: |-
1211 Optional. Resource name of an existing Dataproc Cluster to act as a Spark History Server for the workload. Example: * `projects/[project_id]/regions/[region]/clusters/[cluster_name]`
1212
1213 Allowed value: The `selfLink` field of a `DataprocCluster` resource.
1214 type: string
1215 name:
1216 description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names'
1217 type: string
1218 namespace:
1219 description: 'Namespace of the referent. More info:
1220 https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/'
1221 type: string
1222 type: object
1223 type: object
1224 type: object
1225 kubernetesClusterConfig:
1226 description: Immutable. Required. The configuration for running
1227 the Dataproc cluster on Kubernetes.
1228 properties:
1229 gkeClusterConfig:
1230 description: Immutable. Required. The configuration for running
1231 the Dataproc cluster on GKE.
1232 properties:
1233 gkeClusterTargetRef:
1234 description: Immutable.
1235 oneOf:
1236 - not:
1237 required:
1238 - external
1239 required:
1240 - name
1241 - not:
1242 anyOf:
1243 - required:
1244 - name
1245 - required:
1246 - namespace
1247 required:
1248 - external
1249 properties:
1250 external:
1251 description: |-
1252 Optional. A target GKE cluster to deploy to. It must be in the same project and region as the Dataproc cluster (the GKE cluster can be zonal or regional). Format: 'projects/{project}/locations/{location}/clusters/{cluster_id}'
1253
1254 Allowed value: The `selfLink` field of a `ContainerCluster` resource.
1255 type: string
1256 name:
1257 description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names'
1258 type: string
1259 namespace:
1260 description: 'Namespace of the referent. More info:
1261 https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/'
1262 type: string
1263 type: object
1264 nodePoolTarget:
1265 description: Immutable. Optional. GKE node pools where
1266 workloads will be scheduled. At least one node pool
1267 must be assigned the `DEFAULT` GkeNodePoolTarget.Role.
1268 If a `GkeNodePoolTarget` is not specified, Dataproc
1269 constructs a `DEFAULT` `GkeNodePoolTarget`. Each role
1270 can be given to only one `GkeNodePoolTarget`. All node
1271 pools must have the same location settings.
1272 items:
1273 properties:
1274 nodePoolConfig:
1275 description: Immutable. Input only. The configuration
1276 for the GKE node pool. If specified, Dataproc
1277 attempts to create a node pool with the specified
1278 shape. If one with the same name already exists,
1279 it is verified against all specified fields. If
1280 a field differs, the virtual cluster creation
1281 will fail. If omitted, any node pool with the
1282 specified name is used. If a node pool with the
1283 specified name does not exist, Dataproc create
1284 a node pool with default values. This is an input
1285 only field. It will not be returned by the API.
1286 properties:
1287 autoscaling:
1288 description: Immutable. Optional. The autoscaler
1289 configuration for this node pool. The autoscaler
1290 is enabled only when a valid configuration
1291 is present.
1292 properties:
1293 maxNodeCount:
1294 description: Immutable. The maximum number
1295 of nodes in the node pool. Must be >=
1296 min_node_count, and must be > 0. **Note:**
1297 Quota must be sufficient to scale up the
1298 cluster.
1299 format: int64
1300 type: integer
1301 minNodeCount:
1302 description: Immutable. The minimum number
1303 of nodes in the node pool. Must be >=
1304 0 and <= max_node_count.
1305 format: int64
1306 type: integer
1307 type: object
1308 config:
1309 description: Immutable. Optional. The node pool
1310 configuration.
1311 properties:
1312 accelerators:
1313 description: Immutable. Optional. A list
1314 of [hardware accelerators](https://cloud.google.com/compute/docs/gpus)
1315 to attach to each node.
1316 items:
1317 properties:
1318 acceleratorCount:
1319 description: Immutable. The number
1320 of accelerator cards exposed to
1321 an instance.
1322 format: int64
1323 type: integer
1324 acceleratorType:
1325 description: Immutable. The accelerator
1326 type resource namename (see GPUs
1327 on Compute Engine).
1328 type: string
1329 gpuPartitionSize:
1330 description: Immutable. Size of partitions
1331 to create on the GPU. Valid values
1332 are described in the NVIDIA [mig
1333 user guide](https://docs.nvidia.com/datacenter/tesla/mig-user-guide/#partitioning).
1334 type: string
1335 type: object
1336 type: array
1337 bootDiskKmsKey:
1338 description: 'Immutable. Optional. The [Customer
1339 Managed Encryption Key (CMEK)] (https://cloud.google.com/kubernetes-engine/docs/how-to/using-cmek)
1340 used to encrypt the boot disk attached
1341 to each node in the node pool. Specify
1342 the key using the following format: `projects/KEY_PROJECT_ID/locations/LOCATION/keyRings/RING_NAME/cryptoKeys/KEY_NAME`.'
1343 type: string
1344 ephemeralStorageConfig:
1345 description: Immutable. Optional. Parameters
1346 for the ephemeral storage filesystem.
1347 If unspecified, ephemeral storage is backed
1348 by the boot disk.
1349 properties:
1350 localSsdCount:
1351 description: Immutable. Number of local
1352 SSDs to use to back ephemeral storage.
1353 Uses NVMe interfaces. Each local SSD
1354 is 375 GB in size. If zero, it means
1355 to disable using local SSDs as ephemeral
1356 storage.
1357 format: int64
1358 type: integer
1359 type: object
1360 localSsdCount:
1361 description: Immutable. Optional. The number
1362 of local SSD disks to attach to the node,
1363 which is limited by the maximum number
1364 of disks allowable per zone (see [Adding
1365 Local SSDs](https://cloud.google.com/compute/docs/disks/local-ssd)).
1366 format: int64
1367 type: integer
1368 machineType:
1369 description: Immutable. Optional. The name
1370 of a Compute Engine [machine type](https://cloud.google.com/compute/docs/machine-types).
1371 type: string
1372 minCpuPlatform:
1373 description: Immutable. Optional. [Minimum
1374 CPU platform](https://cloud.google.com/compute/docs/instances/specify-min-cpu-platform)
1375 to be used by this instance. The instance
1376 may be scheduled on the specified or a
1377 newer CPU platform. Specify the friendly
1378 names of CPU platforms, such as "Intel
1379 Haswell"` or Intel Sandy Bridge".
1380 type: string
1381 preemptible:
1382 description: Immutable. Optional. Whether
1383 the nodes are created as legacy [preemptible
1384 VM instances] (https://cloud.google.com/compute/docs/instances/preemptible).
1385 Also see Spot VMs, preemptible VM instances
1386 without a maximum lifetime. Legacy and
1387 Spot preemptible nodes cannot be used
1388 in a node pool with the `CONTROLLER` [role]
1389 (/dataproc/docs/reference/rest/v1/projects.regions.clusters#role)
1390 or in the DEFAULT node pool if the CONTROLLER
1391 role is not assigned (the DEFAULT node
1392 pool will assume the CONTROLLER role).
1393 type: boolean
1394 spot:
1395 description: Immutable. Optional. Whether
1396 the nodes are created as [Spot VM instances]
1397 (https://cloud.google.com/compute/docs/instances/spot).
1398 Spot VMs are the latest update to legacy
1399 preemptible VMs. Spot VMs do not have
1400 a maximum lifetime. Legacy and Spot preemptible
1401 nodes cannot be used in a node pool with
1402 the `CONTROLLER` [role](/dataproc/docs/reference/rest/v1/projects.regions.clusters#role)
1403 or in the DEFAULT node pool if the CONTROLLER
1404 role is not assigned (the DEFAULT node
1405 pool will assume the CONTROLLER role).
1406 type: boolean
1407 type: object
1408 locations:
1409 description: Immutable. Optional. The list of
1410 Compute Engine [zones](https://cloud.google.com/compute/docs/zones#available)
1411 where node pool nodes associated with a Dataproc
1412 on GKE virtual cluster will be located. **Note:**
1413 All node pools associated with a virtual cluster
1414 must be located in the same region as the
1415 virtual cluster, and they must be located
1416 in the same zone within that region. If a
1417 location is not specified during node pool
1418 creation, Dataproc on GKE will choose the
1419 zone.
1420 items:
1421 type: string
1422 type: array
1423 type: object
1424 nodePoolRef:
1425 description: Immutable.
1426 oneOf:
1427 - not:
1428 required:
1429 - external
1430 required:
1431 - name
1432 - not:
1433 anyOf:
1434 - required:
1435 - name
1436 - required:
1437 - namespace
1438 required:
1439 - external
1440 properties:
1441 external:
1442 description: |-
1443 Required. The target GKE node pool. Format: 'projects/{project}/locations/{location}/clusters/{cluster}/nodePools/{node_pool}'
1444
1445 Allowed value: The `selfLink` field of a `ContainerNodePool` resource.
1446 type: string
1447 name:
1448 description: 'Name of the referent. More info:
1449 https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names'
1450 type: string
1451 namespace:
1452 description: 'Namespace of the referent. More
1453 info: https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/'
1454 type: string
1455 type: object
1456 roles:
1457 description: Immutable. Required. The roles associated
1458 with the GKE node pool.
1459 items:
1460 type: string
1461 type: array
1462 required:
1463 - nodePoolRef
1464 - roles
1465 type: object
1466 type: array
1467 type: object
1468 kubernetesNamespace:
1469 description: Immutable. Optional. A namespace within the Kubernetes
1470 cluster to deploy into. If this namespace does not exist,
1471 it is created. If it exists, Dataproc verifies that another
1472 Dataproc VirtualCluster is not installed into it. If not
1473 specified, the name of the Dataproc Cluster is used.
1474 type: string
1475 kubernetesSoftwareConfig:
1476 description: Immutable. Optional. The software configuration
1477 for this Dataproc cluster running on Kubernetes.
1478 properties:
1479 componentVersion:
1480 additionalProperties:
1481 type: string
1482 description: Immutable. The components that should be
1483 installed in this Dataproc cluster. The key must be
1484 a string from the KubernetesComponent enumeration. The
1485 value is the version of the software to be installed.
1486 At least one entry must be specified.
1487 type: object
1488 properties:
1489 additionalProperties:
1490 type: string
1491 description: 'Immutable. The properties to set on daemon
1492 config files. Property keys are specified in `prefix:property`
1493 format, for example `spark:spark.kubernetes.container.image`.
1494 The following are supported prefixes and their mappings:
1495 * spark: `spark-defaults.conf` For more information,
1496 see [Cluster properties](https://cloud.google.com/dataproc/docs/concepts/cluster-properties).'
1497 type: object
1498 type: object
1499 required:
1500 - gkeClusterConfig
1501 type: object
1502 stagingBucketRef:
1503 description: Immutable.
1504 oneOf:
1505 - not:
1506 required:
1507 - external
1508 required:
1509 - name
1510 - not:
1511 anyOf:
1512 - required:
1513 - name
1514 - required:
1515 - namespace
1516 required:
1517 - external
1518 properties:
1519 external:
1520 description: |-
1521 Optional. A Cloud Storage bucket used to stage job dependencies, config files, and job driver console output. If you do not specify a staging bucket, Cloud Dataproc will determine a Cloud Storage location (US, ASIA, or EU) for your cluster's staging bucket according to the Compute Engine zone where your cluster is deployed, and then create and manage this project-level, per-location bucket (see [Dataproc staging and temp buckets](https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/staging-bucket)). **This field requires a Cloud Storage bucket name, not a `gs://...` URI to a Cloud Storage bucket.**
1522
1523 Allowed value: The Google Cloud resource name of a `StorageBucket` resource (format: `{{name}}`).
1524 type: string
1525 name:
1526 description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names'
1527 type: string
1528 namespace:
1529 description: 'Namespace of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/'
1530 type: string
1531 type: object
1532 required:
1533 - kubernetesClusterConfig
1534 type: object
1535 required:
1536 - location
1537 type: object
1538 status:
1539 properties:
1540 clusterUuid:
1541 description: Output only. A cluster UUID (Unique Universal Identifier).
1542 Dataproc generates this value when it creates the cluster.
1543 type: string
1544 conditions:
1545 description: Conditions represent the latest available observation
1546 of the resource's current state.
1547 items:
1548 properties:
1549 lastTransitionTime:
1550 description: Last time the condition transitioned from one status
1551 to another.
1552 type: string
1553 message:
1554 description: Human-readable message indicating details about
1555 last transition.
1556 type: string
1557 reason:
1558 description: Unique, one-word, CamelCase reason for the condition's
1559 last transition.
1560 type: string
1561 status:
1562 description: Status is the status of the condition. Can be True,
1563 False, Unknown.
1564 type: string
1565 type:
1566 description: Type is the type of the condition.
1567 type: string
1568 type: object
1569 type: array
1570 config:
1571 properties:
1572 endpointConfig:
1573 properties:
1574 httpPorts:
1575 additionalProperties:
1576 type: string
1577 description: Output only. The map of port descriptions to
1578 URLs. Will only be populated if enable_http_port_access
1579 is true.
1580 type: object
1581 type: object
1582 lifecycleConfig:
1583 properties:
1584 idleStartTime:
1585 description: Output only. The time when cluster became idle
1586 (most recent job finished) and became eligible for deletion
1587 due to idleness (see JSON representation of [Timestamp](https://developers.google.com/protocol-buffers/docs/proto3#json)).
1588 format: date-time
1589 type: string
1590 type: object
1591 masterConfig:
1592 properties:
1593 instanceNames:
1594 description: Output only. The list of instance names. Dataproc
1595 derives the names from `cluster_name`, `num_instances`,
1596 and the instance group.
1597 items:
1598 type: string
1599 type: array
1600 instanceReferences:
1601 description: Output only. List of references to Compute Engine
1602 instances.
1603 items:
1604 properties:
1605 instanceId:
1606 description: The unique identifier of the Compute Engine
1607 instance.
1608 type: string
1609 instanceName:
1610 description: The user-friendly name of the Compute Engine
1611 instance.
1612 type: string
1613 publicEciesKey:
1614 description: The public ECIES key used for sharing data
1615 with this instance.
1616 type: string
1617 publicKey:
1618 description: The public RSA key used for sharing data
1619 with this instance.
1620 type: string
1621 type: object
1622 type: array
1623 isPreemptible:
1624 description: Output only. Specifies that this instance group
1625 contains preemptible instances.
1626 type: boolean
1627 managedGroupConfig:
1628 description: Output only. The config for Compute Engine Instance
1629 Group Manager that manages this group. This is only used
1630 for preemptible instance groups.
1631 properties:
1632 instanceGroupManagerName:
1633 description: Output only. The name of the Instance Group
1634 Manager for this group.
1635 type: string
1636 instanceTemplateName:
1637 description: Output only. The name of the Instance Template
1638 used for the Managed Instance Group.
1639 type: string
1640 type: object
1641 type: object
1642 secondaryWorkerConfig:
1643 properties:
1644 instanceNames:
1645 description: Output only. The list of instance names. Dataproc
1646 derives the names from `cluster_name`, `num_instances`,
1647 and the instance group.
1648 items:
1649 type: string
1650 type: array
1651 instanceReferences:
1652 description: Output only. List of references to Compute Engine
1653 instances.
1654 items:
1655 properties:
1656 instanceId:
1657 description: The unique identifier of the Compute Engine
1658 instance.
1659 type: string
1660 instanceName:
1661 description: The user-friendly name of the Compute Engine
1662 instance.
1663 type: string
1664 publicEciesKey:
1665 description: The public ECIES key used for sharing data
1666 with this instance.
1667 type: string
1668 publicKey:
1669 description: The public RSA key used for sharing data
1670 with this instance.
1671 type: string
1672 type: object
1673 type: array
1674 isPreemptible:
1675 description: Output only. Specifies that this instance group
1676 contains preemptible instances.
1677 type: boolean
1678 managedGroupConfig:
1679 description: Output only. The config for Compute Engine Instance
1680 Group Manager that manages this group. This is only used
1681 for preemptible instance groups.
1682 properties:
1683 instanceGroupManagerName:
1684 description: Output only. The name of the Instance Group
1685 Manager for this group.
1686 type: string
1687 instanceTemplateName:
1688 description: Output only. The name of the Instance Template
1689 used for the Managed Instance Group.
1690 type: string
1691 type: object
1692 type: object
1693 workerConfig:
1694 properties:
1695 instanceNames:
1696 description: Output only. The list of instance names. Dataproc
1697 derives the names from `cluster_name`, `num_instances`,
1698 and the instance group.
1699 items:
1700 type: string
1701 type: array
1702 instanceReferences:
1703 description: Output only. List of references to Compute Engine
1704 instances.
1705 items:
1706 properties:
1707 instanceId:
1708 description: The unique identifier of the Compute Engine
1709 instance.
1710 type: string
1711 instanceName:
1712 description: The user-friendly name of the Compute Engine
1713 instance.
1714 type: string
1715 publicEciesKey:
1716 description: The public ECIES key used for sharing data
1717 with this instance.
1718 type: string
1719 publicKey:
1720 description: The public RSA key used for sharing data
1721 with this instance.
1722 type: string
1723 type: object
1724 type: array
1725 isPreemptible:
1726 description: Output only. Specifies that this instance group
1727 contains preemptible instances.
1728 type: boolean
1729 managedGroupConfig:
1730 description: Output only. The config for Compute Engine Instance
1731 Group Manager that manages this group. This is only used
1732 for preemptible instance groups.
1733 properties:
1734 instanceGroupManagerName:
1735 description: Output only. The name of the Instance Group
1736 Manager for this group.
1737 type: string
1738 instanceTemplateName:
1739 description: Output only. The name of the Instance Template
1740 used for the Managed Instance Group.
1741 type: string
1742 type: object
1743 type: object
1744 type: object
1745 metrics:
1746 description: 'Output only. Contains cluster daemon metrics such as
1747 HDFS and YARN stats. **Beta Feature**: This report is available
1748 for testing purposes only. It may be changed before final release.'
1749 properties:
1750 hdfsMetrics:
1751 additionalProperties:
1752 type: string
1753 description: The HDFS metrics.
1754 type: object
1755 yarnMetrics:
1756 additionalProperties:
1757 type: string
1758 description: The YARN metrics.
1759 type: object
1760 type: object
1761 observedGeneration:
1762 description: ObservedGeneration is the generation of the resource
1763 that was most recently observed by the Config Connector controller.
1764 If this is equal to metadata.generation, then that means that the
1765 current reported status reflects the most recent desired state of
1766 the resource.
1767 type: integer
1768 status:
1769 description: Output only. Cluster status.
1770 properties:
1771 detail:
1772 description: Optional. Output only. Details of cluster's state.
1773 type: string
1774 state:
1775 description: 'Output only. The cluster''s state. Possible values:
1776 UNKNOWN, CREATING, RUNNING, ERROR, DELETING, UPDATING, STOPPING,
1777 STOPPED, STARTING'
1778 type: string
1779 stateStartTime:
1780 description: Output only. Time when this state was entered (see
1781 JSON representation of [Timestamp](https://developers.google.com/protocol-buffers/docs/proto3#json)).
1782 format: date-time
1783 type: string
1784 substate:
1785 description: 'Output only. Additional state information that includes
1786 status reported by the agent. Possible values: UNSPECIFIED,
1787 UNHEALTHY, STALE_STATUS'
1788 type: string
1789 type: object
1790 statusHistory:
1791 description: Output only. The previous cluster status.
1792 items:
1793 properties:
1794 detail:
1795 description: Optional. Output only. Details of cluster's state.
1796 type: string
1797 state:
1798 description: 'Output only. The cluster''s state. Possible values:
1799 UNKNOWN, CREATING, RUNNING, ERROR, DELETING, UPDATING, STOPPING,
1800 STOPPED, STARTING'
1801 type: string
1802 stateStartTime:
1803 description: Output only. Time when this state was entered (see
1804 JSON representation of [Timestamp](https://developers.google.com/protocol-buffers/docs/proto3#json)).
1805 format: date-time
1806 type: string
1807 substate:
1808 description: 'Output only. Additional state information that
1809 includes status reported by the agent. Possible values: UNSPECIFIED,
1810 UNHEALTHY, STALE_STATUS'
1811 type: string
1812 type: object
1813 type: array
1814 type: object
1815 required:
1816 - spec
1817 type: object
1818 served: true
1819 storage: true
1820 subresources:
1821 status: {}
1822status:
1823 acceptedNames:
1824 kind: ""
1825 plural: ""
1826 conditions: []
1827 storedVersions: []
View as plain text