1apiVersion: apiextensions.k8s.io/v1
2kind: CustomResourceDefinition
3metadata:
4 annotations:
5 cnrm.cloud.google.com/version: 0.0.0-dev
6 creationTimestamp: null
7 labels:
8 cnrm.cloud.google.com/dcl2crd: "true"
9 cnrm.cloud.google.com/managed-by-kcc: "true"
10 cnrm.cloud.google.com/stability-level: stable
11 cnrm.cloud.google.com/system: "true"
12 name: dataprocclusters.dataproc.cnrm.cloud.google.com
13spec:
14 group: dataproc.cnrm.cloud.google.com
15 names:
16 categories:
17 - gcp
18 kind: DataprocCluster
19 plural: dataprocclusters
20 shortNames:
21 - gcpdataproccluster
22 - gcpdataprocclusters
23 singular: dataproccluster
24 preserveUnknownFields: false
25 scope: Namespaced
26 versions:
27 - additionalPrinterColumns:
28 - jsonPath: .metadata.creationTimestamp
29 name: Age
30 type: date
31 - description: When 'True', the most recent reconcile of the resource succeeded
32 jsonPath: .status.conditions[?(@.type=='Ready')].status
33 name: Ready
34 type: string
35 - description: The reason for the value in 'Ready'
36 jsonPath: .status.conditions[?(@.type=='Ready')].reason
37 name: Status
38 type: string
39 - description: The last transition time for the value in 'Status'
40 jsonPath: .status.conditions[?(@.type=='Ready')].lastTransitionTime
41 name: Status Age
42 type: date
43 name: v1beta1
44 schema:
45 openAPIV3Schema:
46 properties:
47 apiVersion:
48 description: 'apiVersion defines the versioned schema of this representation
49 of an object. Servers should convert recognized schemas to the latest
50 internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#resources'
51 type: string
52 kind:
53 description: 'kind is a string value representing the REST resource this
54 object represents. Servers may infer this from the endpoint the client
55 submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#types-kinds'
56 type: string
57 metadata:
58 type: object
59 spec:
60 properties:
61 config:
62 description: Immutable. The cluster config. Note that Dataproc may
63 set default values, and values may change when clusters are updated.
64 properties:
65 autoscalingConfig:
66 description: Immutable. Optional. Autoscaling config for the policy
67 associated with the cluster. Cluster does not autoscale if this
68 field is unset.
69 properties:
70 policyRef:
71 description: Immutable.
72 oneOf:
73 - not:
74 required:
75 - external
76 required:
77 - name
78 - not:
79 anyOf:
80 - required:
81 - name
82 - required:
83 - namespace
84 required:
85 - external
86 properties:
87 external:
88 description: |-
89 Optional. The autoscaling policy used by the cluster. Only resource names including projectid and location (region) are valid. Examples: * `https://www.googleapis.com/compute/v1/projects/[project_id]/locations/[dataproc_region]/autoscalingPolicies/[policy_id]` * `projects/[project_id]/locations/[dataproc_region]/autoscalingPolicies/[policy_id]` Note that the policy must be in the same project and Dataproc region.
90
91 Allowed value: The Google Cloud resource name of a `DataprocAutoscalingPolicy` resource (format: `projects/{{project}}/locations/{{location}}/autoscalingPolicies/{{name}}`).
92 type: string
93 name:
94 description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names'
95 type: string
96 namespace:
97 description: 'Namespace of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/'
98 type: string
99 type: object
100 type: object
101 dataprocMetricConfig:
102 description: Immutable. Optional. The config for Dataproc metrics.
103 properties:
104 metrics:
105 description: Immutable. Required. Metrics sources to enable.
106 items:
107 properties:
108 metricOverrides:
109 description: 'Immutable. Optional. Specify one or more
110 [available OSS metrics] (https://cloud.google.com/dataproc/docs/guides/monitoring#available_oss_metrics)
111 to collect for the metric course (for the `SPARK`
112 metric source, any [Spark metric] (https://spark.apache.org/docs/latest/monitoring.html#metrics)
113 can be specified). Provide metrics in the following
114 format: `METRIC_SOURCE:INSTANCE:GROUP:METRIC` Use
115 camelcase as appropriate. Examples: ``` yarn:ResourceManager:QueueMetrics:AppsCompleted
116 spark:driver:DAGScheduler:job.allJobs sparkHistoryServer:JVM:Memory:NonHeapMemoryUsage.committed
117 hiveserver2:JVM:Memory:NonHeapMemoryUsage.used ```
118 Notes: * Only the specified overridden metrics will
119 be collected for the metric source. For example, if
120 one or more `spark:executive` metrics are listed as
121 metric overrides, other `SPARK` metrics will not be
122 collected. The collection of the default metrics for
123 other OSS metric sources is unaffected. For example,
124 if both `SPARK` andd `YARN` metric sources are enabled,
125 and overrides are provided for Spark metrics only,
126 all default YARN metrics will be collected.'
127 items:
128 type: string
129 type: array
130 metricSource:
131 description: 'Immutable. Required. Default metrics are
132 collected unless `metricOverrides` are specified for
133 the metric source (see [Available OSS metrics] (https://cloud.google.com/dataproc/docs/guides/monitoring#available_oss_metrics)
134 for more information). Possible values: METRIC_SOURCE_UNSPECIFIED,
135 MONITORING_AGENT_DEFAULTS, HDFS, SPARK, YARN, SPARK_HISTORY_SERVER,
136 HIVESERVER2'
137 type: string
138 required:
139 - metricSource
140 type: object
141 type: array
142 required:
143 - metrics
144 type: object
145 encryptionConfig:
146 description: Immutable. Optional. Encryption settings for the
147 cluster.
148 properties:
149 gcePdKmsKeyRef:
150 description: Immutable.
151 oneOf:
152 - not:
153 required:
154 - external
155 required:
156 - name
157 - not:
158 anyOf:
159 - required:
160 - name
161 - required:
162 - namespace
163 required:
164 - external
165 properties:
166 external:
167 description: |-
168 Optional. The Cloud KMS key name to use for PD disk encryption for all instances in the cluster.
169
170 Allowed value: The `selfLink` field of a `KMSCryptoKey` resource.
171 type: string
172 name:
173 description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names'
174 type: string
175 namespace:
176 description: 'Namespace of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/'
177 type: string
178 type: object
179 type: object
180 endpointConfig:
181 description: Immutable. Optional. Port/endpoint configuration
182 for this cluster
183 properties:
184 enableHttpPortAccess:
185 description: Immutable. Optional. If true, enable http access
186 to specific ports on the cluster from external sources.
187 Defaults to false.
188 type: boolean
189 type: object
190 gceClusterConfig:
191 description: Immutable. Optional. The shared Compute Engine config
192 settings for all instances in a cluster.
193 properties:
194 confidentialInstanceConfig:
195 description: Immutable. Optional. Confidential Instance Config
196 for clusters using [Confidential VMs](https://cloud.google.com/compute/confidential-vm/docs).
197 properties:
198 enableConfidentialCompute:
199 description: Immutable. Optional. Defines whether the
200 instance should have confidential compute enabled.
201 type: boolean
202 type: object
203 internalIPOnly:
204 description: Immutable. Optional. If true, all instances in
205 the cluster will only have internal IP addresses. By default,
206 clusters are not restricted to internal IP addresses, and
207 will have ephemeral external IP addresses assigned to each
208 instance. This `internal_ip_only` restriction can only be
209 enabled for subnetwork enabled networks, and all off-cluster
210 dependencies must be configured to be accessible without
211 external IP addresses.
212 type: boolean
213 metadata:
214 additionalProperties:
215 type: string
216 description: Immutable. The Compute Engine metadata entries
217 to add to all instances (see [Project and instance metadata](https://cloud.google.com/compute/docs/storing-retrieving-metadata#project_and_instance_metadata)).
218 type: object
219 networkRef:
220 description: Immutable.
221 oneOf:
222 - not:
223 required:
224 - external
225 required:
226 - name
227 - not:
228 anyOf:
229 - required:
230 - name
231 - required:
232 - namespace
233 required:
234 - external
235 properties:
236 external:
237 description: |-
238 Optional. The Compute Engine network to be used for machine communications. Cannot be specified with subnetwork_uri. If neither `network_uri` nor `subnetwork_uri` is specified, the "default" network of the project is used, if it exists. Cannot be a "Custom Subnet Network" (see [Using Subnetworks](https://cloud.google.com/compute/docs/subnetworks) for more information). A full URL, partial URI, or short name are valid. Examples: * `https://www.googleapis.com/compute/v1/projects/[project_id]/regions/global/default` * `projects/[project_id]/regions/global/default` * `default`
239
240 Allowed value: The `selfLink` field of a `ComputeNetwork` resource.
241 type: string
242 name:
243 description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names'
244 type: string
245 namespace:
246 description: 'Namespace of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/'
247 type: string
248 type: object
249 nodeGroupAffinity:
250 description: Immutable. Optional. Node Group Affinity for
251 sole-tenant clusters.
252 properties:
253 nodeGroupRef:
254 description: Immutable.
255 oneOf:
256 - not:
257 required:
258 - external
259 required:
260 - name
261 - not:
262 anyOf:
263 - required:
264 - name
265 - required:
266 - namespace
267 required:
268 - external
269 properties:
270 external:
271 description: |-
272 Required. The URI of a sole-tenant [node group resource](https://cloud.google.com/compute/docs/reference/rest/v1/nodeGroups) that the cluster will be created on. A full URL, partial URI, or node group name are valid. Examples: * `https://www.googleapis.com/compute/v1/projects/[project_id]/zones/us-central1-a/nodeGroups/node-group-1` * `projects/[project_id]/zones/us-central1-a/nodeGroups/node-group-1` * `node-group-1`
273
274 Allowed value: The `selfLink` field of a `ComputeNodeGroup` resource.
275 type: string
276 name:
277 description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names'
278 type: string
279 namespace:
280 description: 'Namespace of the referent. More info:
281 https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/'
282 type: string
283 type: object
284 required:
285 - nodeGroupRef
286 type: object
287 privateIPv6GoogleAccess:
288 description: 'Immutable. Optional. The type of IPv6 access
289 for a cluster. Possible values: PRIVATE_IPV6_GOOGLE_ACCESS_UNSPECIFIED,
290 INHERIT_FROM_SUBNETWORK, OUTBOUND, BIDIRECTIONAL'
291 type: string
292 reservationAffinity:
293 description: Immutable. Optional. Reservation Affinity for
294 consuming Zonal reservation.
295 properties:
296 consumeReservationType:
297 description: 'Immutable. Optional. Type of reservation
298 to consume Possible values: TYPE_UNSPECIFIED, NO_RESERVATION,
299 ANY_RESERVATION, SPECIFIC_RESERVATION'
300 type: string
301 key:
302 description: Immutable. Optional. Corresponds to the label
303 key of reservation resource.
304 type: string
305 values:
306 description: Immutable. Optional. Corresponds to the label
307 values of reservation resource.
308 items:
309 type: string
310 type: array
311 type: object
312 serviceAccountRef:
313 description: Immutable.
314 oneOf:
315 - not:
316 required:
317 - external
318 required:
319 - name
320 - not:
321 anyOf:
322 - required:
323 - name
324 - required:
325 - namespace
326 required:
327 - external
328 properties:
329 external:
330 description: |-
331 Optional. The [Dataproc service account](https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/service-accounts#service_accounts_in_dataproc) (also see [VM Data Plane identity](https://cloud.google.com/dataproc/docs/concepts/iam/dataproc-principals#vm_service_account_data_plane_identity)) used by Dataproc cluster VM instances to access Google Cloud Platform services. If not specified, the [Compute Engine default service account](https://cloud.google.com/compute/docs/access/service-accounts#default_service_account) is used.
332
333 Allowed value: The `email` field of an `IAMServiceAccount` resource.
334 type: string
335 name:
336 description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names'
337 type: string
338 namespace:
339 description: 'Namespace of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/'
340 type: string
341 type: object
342 serviceAccountScopes:
343 description: 'Immutable. Optional. The URIs of service account
344 scopes to be included in Compute Engine instances. The following
345 base set of scopes is always included: * https://www.googleapis.com/auth/cloud.useraccounts.readonly
346 * https://www.googleapis.com/auth/devstorage.read_write
347 * https://www.googleapis.com/auth/logging.write If no scopes
348 are specified, the following defaults are also provided:
349 * https://www.googleapis.com/auth/bigquery * https://www.googleapis.com/auth/bigtable.admin.table
350 * https://www.googleapis.com/auth/bigtable.data * https://www.googleapis.com/auth/devstorage.full_control'
351 items:
352 type: string
353 type: array
354 shieldedInstanceConfig:
355 description: Immutable. Optional. Shielded Instance Config
356 for clusters using [Compute Engine Shielded VMs](https://cloud.google.com/security/shielded-cloud/shielded-vm).
357 properties:
358 enableIntegrityMonitoring:
359 description: Immutable. Optional. Defines whether instances
360 have integrity monitoring enabled.
361 type: boolean
362 enableSecureBoot:
363 description: Immutable. Optional. Defines whether instances
364 have Secure Boot enabled.
365 type: boolean
366 enableVtpm:
367 description: Immutable. Optional. Defines whether instances
368 have the vTPM enabled.
369 type: boolean
370 type: object
371 subnetworkRef:
372 description: Immutable.
373 oneOf:
374 - not:
375 required:
376 - external
377 required:
378 - name
379 - not:
380 anyOf:
381 - required:
382 - name
383 - required:
384 - namespace
385 required:
386 - external
387 properties:
388 external:
389 description: |-
390 Optional. The Compute Engine subnetwork to be used for machine communications. Cannot be specified with network_uri. A full URL, partial URI, or short name are valid. Examples: * `https://www.googleapis.com/compute/v1/projects/[project_id]/regions/us-east1/subnetworks/sub0` * `projects/[project_id]/regions/us-east1/subnetworks/sub0` * `sub0`
391
392 Allowed value: The `selfLink` field of a `ComputeSubnetwork` resource.
393 type: string
394 name:
395 description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names'
396 type: string
397 namespace:
398 description: 'Namespace of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/'
399 type: string
400 type: object
401 tags:
402 description: Immutable. The Compute Engine tags to add to
403 all instances (see [Tagging instances](https://cloud.google.com/compute/docs/label-or-tag-resources#tags)).
404 items:
405 type: string
406 type: array
407 zone:
408 description: 'Immutable. Optional. The zone where the Compute
409 Engine cluster will be located. On a create request, it
410 is required in the "global" region. If omitted in a non-global
411 Dataproc region, the service will pick a zone in the corresponding
412 Compute Engine region. On a get request, zone will always
413 be present. A full URL, partial URI, or short name are valid.
414 Examples: * `https://www.googleapis.com/compute/v1/projects/[project_id]/zones/[zone]`
415 * `projects/[project_id]/zones/[zone]` * `us-central1-f`'
416 type: string
417 type: object
418 initializationActions:
419 description: 'Immutable. Optional. Commands to execute on each
420 node after config is completed. By default, executables are
421 run on master and all worker nodes. You can test a node''s `role`
422 metadata to run an executable on a master or worker node, as
423 shown below using `curl` (you can also use `wget`): ROLE=$(curl
424 -H Metadata-Flavor:Google http://metadata/computeMetadata/v1/instance/attributes/dataproc-role)
425 if [[ "${ROLE}" == ''Master'' ]]; then ... master specific actions
426 ... else ... worker specific actions ... fi'
427 items:
428 properties:
429 executableFile:
430 description: Immutable. Required. Cloud Storage URI of executable
431 file.
432 type: string
433 executionTimeout:
434 description: Immutable. Optional. Amount of time executable
435 has to complete. Default is 10 minutes (see JSON representation
436 of [Duration](https://developers.google.com/protocol-buffers/docs/proto3#json)).
437 Cluster creation fails with an explanatory error message
438 (the name of the executable that caused the error and
439 the exceeded timeout period) if the executable is not
440 completed at end of the timeout period.
441 type: string
442 required:
443 - executableFile
444 type: object
445 type: array
446 lifecycleConfig:
447 description: Immutable. Optional. Lifecycle setting for the cluster.
448 properties:
449 autoDeleteTime:
450 description: Immutable. Optional. The time when cluster will
451 be auto-deleted (see JSON representation of [Timestamp](https://developers.google.com/protocol-buffers/docs/proto3#json)).
452 format: date-time
453 type: string
454 autoDeleteTtl:
455 description: Immutable. Optional. The lifetime duration of
456 cluster. The cluster will be auto-deleted at the end of
457 this period. Minimum value is 10 minutes; maximum value
458 is 14 days (see JSON representation of [Duration](https://developers.google.com/protocol-buffers/docs/proto3#json)).
459 type: string
460 idleDeleteTtl:
461 description: Immutable. Optional. The duration to keep the
462 cluster alive while idling (when no jobs are running). Passing
463 this threshold will cause the cluster to be deleted. Minimum
464 value is 5 minutes; maximum value is 14 days (see JSON representation
465 of [Duration](https://developers.google.com/protocol-buffers/docs/proto3#json)).
466 type: string
467 type: object
468 masterConfig:
469 description: Immutable. Optional. The Compute Engine config settings
470 for the master instance in a cluster.
471 properties:
472 accelerators:
473 description: Immutable. Optional. The Compute Engine accelerator
474 configuration for these instances.
475 items:
476 properties:
477 acceleratorCount:
478 description: Immutable. The number of the accelerator
479 cards of this type exposed to this instance.
480 format: int64
481 type: integer
482 acceleratorType:
483 description: 'Immutable. Full URL, partial URI, or short
484 name of the accelerator type resource to expose to
485 this instance. See [Compute Engine AcceleratorTypes](https://cloud.google.com/compute/docs/reference/beta/acceleratorTypes).
486 Examples: * `https://www.googleapis.com/compute/beta/projects/[project_id]/zones/us-east1-a/acceleratorTypes/nvidia-tesla-k80`
487 * `projects/[project_id]/zones/us-east1-a/acceleratorTypes/nvidia-tesla-k80`
488 * `nvidia-tesla-k80` **Auto Zone Exception**: If you
489 are using the Dataproc [Auto Zone Placement](https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/auto-zone#using_auto_zone_placement)
490 feature, you must use the short name of the accelerator
491 type resource, for example, `nvidia-tesla-k80`.'
492 type: string
493 type: object
494 type: array
495 diskConfig:
496 description: Immutable. Optional. Disk option config settings.
497 properties:
498 bootDiskSizeGb:
499 description: Immutable. Optional. Size in GB of the boot
500 disk (default is 500GB).
501 format: int64
502 type: integer
503 bootDiskType:
504 description: 'Immutable. Optional. Type of the boot disk
505 (default is "pd-standard"). Valid values: "pd-balanced"
506 (Persistent Disk Balanced Solid State Drive), "pd-ssd"
507 (Persistent Disk Solid State Drive), or "pd-standard"
508 (Persistent Disk Hard Disk Drive). See [Disk types](https://cloud.google.com/compute/docs/disks#disk-types).'
509 type: string
510 localSsdInterface:
511 description: 'Immutable. Optional. Interface type of local
512 SSDs (default is "scsi"). Valid values: "scsi" (Small
513 Computer System Interface), "nvme" (Non-Volatile Memory
514 Express). See [local SSD performance](https://cloud.google.com/compute/docs/disks/local-ssd#performance).'
515 type: string
516 numLocalSsds:
517 description: Immutable. Optional. Number of attached SSDs,
518 from 0 to 4 (default is 0). If SSDs are not attached,
519 the boot disk is used to store runtime logs and [HDFS](https://hadoop.apache.org/docs/r1.2.1/hdfs_user_guide.html)
520 data. If one or more SSDs are attached, this runtime
521 bulk data is spread across them, and the boot disk contains
522 only basic config and installed binaries.
523 format: int64
524 type: integer
525 type: object
526 imageRef:
527 description: Immutable.
528 oneOf:
529 - not:
530 required:
531 - external
532 required:
533 - name
534 - not:
535 anyOf:
536 - required:
537 - name
538 - required:
539 - namespace
540 required:
541 - external
542 properties:
543 external:
544 description: |-
545 Optional. The Compute Engine image resource used for cluster instances. The URI can represent an image or image family. Image examples: * `https://www.googleapis.com/compute/beta/projects/[project_id]/global/images/[image-id]` * `projects/[project_id]/global/images/[image-id]` * `image-id` Image family examples. Dataproc will use the most recent image from the family: * `https://www.googleapis.com/compute/beta/projects/[project_id]/global/images/family/[custom-image-family-name]` * `projects/[project_id]/global/images/family/[custom-image-family-name]` If the URI is unspecified, it will be inferred from `SoftwareConfig.image_version` or the system default.
546
547 Allowed value: The `selfLink` field of a `ComputeImage` resource.
548 type: string
549 name:
550 description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names'
551 type: string
552 namespace:
553 description: 'Namespace of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/'
554 type: string
555 type: object
556 machineType:
557 description: 'Immutable. Optional. The Compute Engine machine
558 type used for cluster instances. A full URL, partial URI,
559 or short name are valid. Examples: * `https://www.googleapis.com/compute/v1/projects/[project_id]/zones/us-east1-a/machineTypes/n1-standard-2`
560 * `projects/[project_id]/zones/us-east1-a/machineTypes/n1-standard-2`
561 * `n1-standard-2` **Auto Zone Exception**: If you are using
562 the Dataproc [Auto Zone Placement](https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/auto-zone#using_auto_zone_placement)
563 feature, you must use the short name of the machine type
564 resource, for example, `n1-standard-2`.'
565 type: string
566 minCpuPlatform:
567 description: Immutable. Optional. Specifies the minimum cpu
568 platform for the Instance Group. See [Dataproc -> Minimum
569 CPU Platform](https://cloud.google.com/dataproc/docs/concepts/compute/dataproc-min-cpu).
570 type: string
571 numInstances:
572 description: Immutable. Optional. The number of VM instances
573 in the instance group. For [HA cluster](/dataproc/docs/concepts/configuring-clusters/high-availability)
574 [master_config](#FIELDS.master_config) groups, **must be
575 set to 3**. For standard cluster [master_config](#FIELDS.master_config)
576 groups, **must be set to 1**.
577 format: int64
578 type: integer
579 preemptibility:
580 description: 'Immutable. Optional. Specifies the preemptibility
581 of the instance group. The default value for master and
582 worker groups is `NON_PREEMPTIBLE`. This default cannot
583 be changed. The default value for secondary instances is
584 `PREEMPTIBLE`. Possible values: PREEMPTIBILITY_UNSPECIFIED,
585 NON_PREEMPTIBLE, PREEMPTIBLE'
586 type: string
587 type: object
588 metastoreConfig:
589 description: Immutable. Optional. Metastore configuration.
590 properties:
591 dataprocMetastoreServiceRef:
592 description: Immutable.
593 oneOf:
594 - not:
595 required:
596 - external
597 required:
598 - name
599 - not:
600 anyOf:
601 - required:
602 - name
603 - required:
604 - namespace
605 required:
606 - external
607 properties:
608 external:
609 description: 'Required. Resource name of an existing Dataproc
610 Metastore service. Example: * `projects/[project_id]/locations/[dataproc_region]/services/[service-name]`'
611 type: string
612 name:
613 description: |-
614 [WARNING] DataprocMetastoreService not yet supported in Config Connector, use 'external' field to reference existing resources.
615 Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
616 type: string
617 namespace:
618 description: 'Namespace of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/'
619 type: string
620 type: object
621 required:
622 - dataprocMetastoreServiceRef
623 type: object
624 secondaryWorkerConfig:
625 description: Immutable. Optional. The Compute Engine config settings
626 for additional worker instances in a cluster.
627 properties:
628 accelerators:
629 description: Immutable. Optional. The Compute Engine accelerator
630 configuration for these instances.
631 items:
632 properties:
633 acceleratorCount:
634 description: Immutable. The number of the accelerator
635 cards of this type exposed to this instance.
636 format: int64
637 type: integer
638 acceleratorType:
639 description: 'Immutable. Full URL, partial URI, or short
640 name of the accelerator type resource to expose to
641 this instance. See [Compute Engine AcceleratorTypes](https://cloud.google.com/compute/docs/reference/beta/acceleratorTypes).
642 Examples: * `https://www.googleapis.com/compute/beta/projects/[project_id]/zones/us-east1-a/acceleratorTypes/nvidia-tesla-k80`
643 * `projects/[project_id]/zones/us-east1-a/acceleratorTypes/nvidia-tesla-k80`
644 * `nvidia-tesla-k80` **Auto Zone Exception**: If you
645 are using the Dataproc [Auto Zone Placement](https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/auto-zone#using_auto_zone_placement)
646 feature, you must use the short name of the accelerator
647 type resource, for example, `nvidia-tesla-k80`.'
648 type: string
649 type: object
650 type: array
651 diskConfig:
652 description: Immutable. Optional. Disk option config settings.
653 properties:
654 bootDiskSizeGb:
655 description: Immutable. Optional. Size in GB of the boot
656 disk (default is 500GB).
657 format: int64
658 type: integer
659 bootDiskType:
660 description: 'Immutable. Optional. Type of the boot disk
661 (default is "pd-standard"). Valid values: "pd-balanced"
662 (Persistent Disk Balanced Solid State Drive), "pd-ssd"
663 (Persistent Disk Solid State Drive), or "pd-standard"
664 (Persistent Disk Hard Disk Drive). See [Disk types](https://cloud.google.com/compute/docs/disks#disk-types).'
665 type: string
666 localSsdInterface:
667 description: 'Immutable. Optional. Interface type of local
668 SSDs (default is "scsi"). Valid values: "scsi" (Small
669 Computer System Interface), "nvme" (Non-Volatile Memory
670 Express). See [local SSD performance](https://cloud.google.com/compute/docs/disks/local-ssd#performance).'
671 type: string
672 numLocalSsds:
673 description: Immutable. Optional. Number of attached SSDs,
674 from 0 to 4 (default is 0). If SSDs are not attached,
675 the boot disk is used to store runtime logs and [HDFS](https://hadoop.apache.org/docs/r1.2.1/hdfs_user_guide.html)
676 data. If one or more SSDs are attached, this runtime
677 bulk data is spread across them, and the boot disk contains
678 only basic config and installed binaries.
679 format: int64
680 type: integer
681 type: object
682 imageRef:
683 description: Immutable.
684 oneOf:
685 - not:
686 required:
687 - external
688 required:
689 - name
690 - not:
691 anyOf:
692 - required:
693 - name
694 - required:
695 - namespace
696 required:
697 - external
698 properties:
699 external:
700 description: |-
701 Optional. The Compute Engine image resource used for cluster instances. The URI can represent an image or image family. Image examples: * `https://www.googleapis.com/compute/beta/projects/[project_id]/global/images/[image-id]` * `projects/[project_id]/global/images/[image-id]` * `image-id` Image family examples. Dataproc will use the most recent image from the family: * `https://www.googleapis.com/compute/beta/projects/[project_id]/global/images/family/[custom-image-family-name]` * `projects/[project_id]/global/images/family/[custom-image-family-name]` If the URI is unspecified, it will be inferred from `SoftwareConfig.image_version` or the system default.
702
703 Allowed value: The `selfLink` field of a `ComputeImage` resource.
704 type: string
705 name:
706 description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names'
707 type: string
708 namespace:
709 description: 'Namespace of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/'
710 type: string
711 type: object
712 machineType:
713 description: 'Immutable. Optional. The Compute Engine machine
714 type used for cluster instances. A full URL, partial URI,
715 or short name are valid. Examples: * `https://www.googleapis.com/compute/v1/projects/[project_id]/zones/us-east1-a/machineTypes/n1-standard-2`
716 * `projects/[project_id]/zones/us-east1-a/machineTypes/n1-standard-2`
717 * `n1-standard-2` **Auto Zone Exception**: If you are using
718 the Dataproc [Auto Zone Placement](https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/auto-zone#using_auto_zone_placement)
719 feature, you must use the short name of the machine type
720 resource, for example, `n1-standard-2`.'
721 type: string
722 minCpuPlatform:
723 description: Immutable. Optional. Specifies the minimum cpu
724 platform for the Instance Group. See [Dataproc -> Minimum
725 CPU Platform](https://cloud.google.com/dataproc/docs/concepts/compute/dataproc-min-cpu).
726 type: string
727 numInstances:
728 description: Immutable. Optional. The number of VM instances
729 in the instance group. For [HA cluster](/dataproc/docs/concepts/configuring-clusters/high-availability)
730 [master_config](#FIELDS.master_config) groups, **must be
731 set to 3**. For standard cluster [master_config](#FIELDS.master_config)
732 groups, **must be set to 1**.
733 format: int64
734 type: integer
735 preemptibility:
736 description: 'Immutable. Optional. Specifies the preemptibility
737 of the instance group. The default value for master and
738 worker groups is `NON_PREEMPTIBLE`. This default cannot
739 be changed. The default value for secondary instances is
740 `PREEMPTIBLE`. Possible values: PREEMPTIBILITY_UNSPECIFIED,
741 NON_PREEMPTIBLE, PREEMPTIBLE'
742 type: string
743 type: object
744 securityConfig:
745 description: Immutable. Optional. Security settings for the cluster.
746 properties:
747 identityConfig:
748 description: Immutable. Optional. Identity related configuration,
749 including service account based secure multi-tenancy user
750 mappings.
751 properties:
752 userServiceAccountMapping:
753 additionalProperties:
754 type: string
755 description: Immutable. Required. Map of user to service
756 account.
757 type: object
758 required:
759 - userServiceAccountMapping
760 type: object
761 kerberosConfig:
762 description: Immutable. Optional. Kerberos related configuration.
763 properties:
764 crossRealmTrustAdminServer:
765 description: Immutable. Optional. The admin server (IP
766 or hostname) for the remote trusted realm in a cross
767 realm trust relationship.
768 type: string
769 crossRealmTrustKdc:
770 description: Immutable. Optional. The KDC (IP or hostname)
771 for the remote trusted realm in a cross realm trust
772 relationship.
773 type: string
774 crossRealmTrustRealm:
775 description: Immutable. Optional. The remote realm the
776 Dataproc on-cluster KDC will trust, should the user
777 enable cross realm trust.
778 type: string
779 crossRealmTrustSharedPassword:
780 description: Immutable. Optional. The Cloud Storage URI
781 of a KMS encrypted file containing the shared password
782 between the on-cluster Kerberos realm and the remote
783 trusted realm, in a cross realm trust relationship.
784 type: string
785 enableKerberos:
786 description: 'Immutable. Optional. Flag to indicate whether
787 to Kerberize the cluster (default: false). Set this
788 field to true to enable Kerberos on a cluster.'
789 type: boolean
790 kdcDbKey:
791 description: Immutable. Optional. The Cloud Storage URI
792 of a KMS encrypted file containing the master key of
793 the KDC database.
794 type: string
795 keyPassword:
796 description: Immutable. Optional. The Cloud Storage URI
797 of a KMS encrypted file containing the password to the
798 user provided key. For the self-signed certificate,
799 this password is generated by Dataproc.
800 type: string
801 keystore:
802 description: Immutable. Optional. The Cloud Storage URI
803 of the keystore file used for SSL encryption. If not
804 provided, Dataproc will provide a self-signed certificate.
805 type: string
806 keystorePassword:
807 description: Immutable. Optional. The Cloud Storage URI
808 of a KMS encrypted file containing the password to the
809 user provided keystore. For the self-signed certificate,
810 this password is generated by Dataproc.
811 type: string
812 kmsKeyRef:
813 description: Immutable.
814 oneOf:
815 - not:
816 required:
817 - external
818 required:
819 - name
820 - not:
821 anyOf:
822 - required:
823 - name
824 - required:
825 - namespace
826 required:
827 - external
828 properties:
829 external:
830 description: |-
831 Optional. The uri of the KMS key used to encrypt various sensitive files.
832
833 Allowed value: The `selfLink` field of a `KMSCryptoKey` resource.
834 type: string
835 name:
836 description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names'
837 type: string
838 namespace:
839 description: 'Namespace of the referent. More info:
840 https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/'
841 type: string
842 type: object
843 realm:
844 description: Immutable. Optional. The name of the on-cluster
845 Kerberos realm. If not specified, the uppercased domain
846 of hostnames will be the realm.
847 type: string
848 rootPrincipalPassword:
849 description: Immutable. Optional. The Cloud Storage URI
850 of a KMS encrypted file containing the root principal
851 password.
852 type: string
853 tgtLifetimeHours:
854 description: Immutable. Optional. The lifetime of the
855 ticket granting ticket, in hours. If not specified,
856 or user specifies 0, then default value 10 will be used.
857 format: int64
858 type: integer
859 truststore:
860 description: Immutable. Optional. The Cloud Storage URI
861 of the truststore file used for SSL encryption. If not
862 provided, Dataproc will provide a self-signed certificate.
863 type: string
864 truststorePassword:
865 description: Immutable. Optional. The Cloud Storage URI
866 of a KMS encrypted file containing the password to the
867 user provided truststore. For the self-signed certificate,
868 this password is generated by Dataproc.
869 type: string
870 type: object
871 type: object
872 softwareConfig:
873 description: Immutable. Optional. The config settings for software
874 inside the cluster.
875 properties:
876 imageVersion:
877 description: Immutable. Optional. The version of software
878 inside the cluster. It must be one of the supported [Dataproc
879 Versions](https://cloud.google.com/dataproc/docs/concepts/versioning/dataproc-versions#supported_dataproc_versions),
880 such as "1.2" (including a subminor version, such as "1.2.29"),
881 or the ["preview" version](https://cloud.google.com/dataproc/docs/concepts/versioning/dataproc-versions#other_versions).
882 If unspecified, it defaults to the latest Debian version.
883 type: string
884 optionalComponents:
885 description: Immutable. Optional. The set of components to
886 activate on the cluster.
887 items:
888 type: string
889 type: array
890 properties:
891 additionalProperties:
892 type: string
893 description: 'Immutable. Optional. The properties to set on
894 daemon config files. Property keys are specified in `prefix:property`
895 format, for example `core:hadoop.tmp.dir`. The following
896 are supported prefixes and their mappings: * capacity-scheduler:
897 `capacity-scheduler.xml` * core: `core-site.xml` * distcp:
898 `distcp-default.xml` * hdfs: `hdfs-site.xml` * hive: `hive-site.xml`
899 * mapred: `mapred-site.xml` * pig: `pig.properties` * spark:
900 `spark-defaults.conf` * yarn: `yarn-site.xml` For more information,
901 see [Cluster properties](https://cloud.google.com/dataproc/docs/concepts/cluster-properties).'
902 type: object
903 type: object
904 stagingBucketRef:
905 description: Immutable.
906 oneOf:
907 - not:
908 required:
909 - external
910 required:
911 - name
912 - not:
913 anyOf:
914 - required:
915 - name
916 - required:
917 - namespace
918 required:
919 - external
920 properties:
921 external:
922 description: |-
923 Optional. A Cloud Storage bucket used to stage job dependencies, config files, and job driver console output. If you do not specify a staging bucket, Cloud Dataproc will determine a Cloud Storage location (US, ASIA, or EU) for your cluster's staging bucket according to the Compute Engine zone where your cluster is deployed, and then create and manage this project-level, per-location bucket (see [Dataproc staging bucket](https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/staging-bucket)). **This field requires a Cloud Storage bucket name, not a URI to a Cloud Storage bucket.**
924
925 Allowed value: The Google Cloud resource name of a `StorageBucket` resource (format: `{{name}}`).
926 type: string
927 name:
928 description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names'
929 type: string
930 namespace:
931 description: 'Namespace of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/'
932 type: string
933 type: object
934 tempBucketRef:
935 description: Immutable.
936 oneOf:
937 - not:
938 required:
939 - external
940 required:
941 - name
942 - not:
943 anyOf:
944 - required:
945 - name
946 - required:
947 - namespace
948 required:
949 - external
950 properties:
951 external:
952 description: |-
953 Optional. A Cloud Storage bucket used to store ephemeral cluster and jobs data, such as Spark and MapReduce history files. If you do not specify a temp bucket, Dataproc will determine a Cloud Storage location (US, ASIA, or EU) for your cluster's temp bucket according to the Compute Engine zone where your cluster is deployed, and then create and manage this project-level, per-location bucket. The default bucket has a TTL of 90 days, but you can use any TTL (or none) if you specify a bucket. **This field requires a Cloud Storage bucket name, not a URI to a Cloud Storage bucket.**
954
955 Allowed value: The Google Cloud resource name of a `StorageBucket` resource (format: `{{name}}`).
956 type: string
957 name:
958 description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names'
959 type: string
960 namespace:
961 description: 'Namespace of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/'
962 type: string
963 type: object
964 workerConfig:
965 description: Immutable. Optional. The Compute Engine config settings
966 for worker instances in a cluster.
967 properties:
968 accelerators:
969 description: Immutable. Optional. The Compute Engine accelerator
970 configuration for these instances.
971 items:
972 properties:
973 acceleratorCount:
974 description: Immutable. The number of the accelerator
975 cards of this type exposed to this instance.
976 format: int64
977 type: integer
978 acceleratorType:
979 description: 'Immutable. Full URL, partial URI, or short
980 name of the accelerator type resource to expose to
981 this instance. See [Compute Engine AcceleratorTypes](https://cloud.google.com/compute/docs/reference/beta/acceleratorTypes).
982 Examples: * `https://www.googleapis.com/compute/beta/projects/[project_id]/zones/us-east1-a/acceleratorTypes/nvidia-tesla-k80`
983 * `projects/[project_id]/zones/us-east1-a/acceleratorTypes/nvidia-tesla-k80`
984 * `nvidia-tesla-k80` **Auto Zone Exception**: If you
985 are using the Dataproc [Auto Zone Placement](https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/auto-zone#using_auto_zone_placement)
986 feature, you must use the short name of the accelerator
987 type resource, for example, `nvidia-tesla-k80`.'
988 type: string
989 type: object
990 type: array
991 diskConfig:
992 description: Immutable. Optional. Disk option config settings.
993 properties:
994 bootDiskSizeGb:
995 description: Immutable. Optional. Size in GB of the boot
996 disk (default is 500GB).
997 format: int64
998 type: integer
999 bootDiskType:
1000 description: 'Immutable. Optional. Type of the boot disk
1001 (default is "pd-standard"). Valid values: "pd-balanced"
1002 (Persistent Disk Balanced Solid State Drive), "pd-ssd"
1003 (Persistent Disk Solid State Drive), or "pd-standard"
1004 (Persistent Disk Hard Disk Drive). See [Disk types](https://cloud.google.com/compute/docs/disks#disk-types).'
1005 type: string
1006 localSsdInterface:
1007 description: 'Immutable. Optional. Interface type of local
1008 SSDs (default is "scsi"). Valid values: "scsi" (Small
1009 Computer System Interface), "nvme" (Non-Volatile Memory
1010 Express). See [local SSD performance](https://cloud.google.com/compute/docs/disks/local-ssd#performance).'
1011 type: string
1012 numLocalSsds:
1013 description: Immutable. Optional. Number of attached SSDs,
1014 from 0 to 4 (default is 0). If SSDs are not attached,
1015 the boot disk is used to store runtime logs and [HDFS](https://hadoop.apache.org/docs/r1.2.1/hdfs_user_guide.html)
1016 data. If one or more SSDs are attached, this runtime
1017 bulk data is spread across them, and the boot disk contains
1018 only basic config and installed binaries.
1019 format: int64
1020 type: integer
1021 type: object
1022 imageRef:
1023 description: Immutable.
1024 oneOf:
1025 - not:
1026 required:
1027 - external
1028 required:
1029 - name
1030 - not:
1031 anyOf:
1032 - required:
1033 - name
1034 - required:
1035 - namespace
1036 required:
1037 - external
1038 properties:
1039 external:
1040 description: |-
1041 Optional. The Compute Engine image resource used for cluster instances. The URI can represent an image or image family. Image examples: * `https://www.googleapis.com/compute/beta/projects/[project_id]/global/images/[image-id]` * `projects/[project_id]/global/images/[image-id]` * `image-id` Image family examples. Dataproc will use the most recent image from the family: * `https://www.googleapis.com/compute/beta/projects/[project_id]/global/images/family/[custom-image-family-name]` * `projects/[project_id]/global/images/family/[custom-image-family-name]` If the URI is unspecified, it will be inferred from `SoftwareConfig.image_version` or the system default.
1042
1043 Allowed value: The `selfLink` field of a `ComputeImage` resource.
1044 type: string
1045 name:
1046 description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names'
1047 type: string
1048 namespace:
1049 description: 'Namespace of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/'
1050 type: string
1051 type: object
1052 machineType:
1053 description: 'Immutable. Optional. The Compute Engine machine
1054 type used for cluster instances. A full URL, partial URI,
1055 or short name are valid. Examples: * `https://www.googleapis.com/compute/v1/projects/[project_id]/zones/us-east1-a/machineTypes/n1-standard-2`
1056 * `projects/[project_id]/zones/us-east1-a/machineTypes/n1-standard-2`
1057 * `n1-standard-2` **Auto Zone Exception**: If you are using
1058 the Dataproc [Auto Zone Placement](https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/auto-zone#using_auto_zone_placement)
1059 feature, you must use the short name of the machine type
1060 resource, for example, `n1-standard-2`.'
1061 type: string
1062 minCpuPlatform:
1063 description: Immutable. Optional. Specifies the minimum cpu
1064 platform for the Instance Group. See [Dataproc -> Minimum
1065 CPU Platform](https://cloud.google.com/dataproc/docs/concepts/compute/dataproc-min-cpu).
1066 type: string
1067 numInstances:
1068 description: Immutable. Optional. The number of VM instances
1069 in the instance group. For [HA cluster](/dataproc/docs/concepts/configuring-clusters/high-availability)
1070 [master_config](#FIELDS.master_config) groups, **must be
1071 set to 3**. For standard cluster [master_config](#FIELDS.master_config)
1072 groups, **must be set to 1**.
1073 format: int64
1074 type: integer
1075 preemptibility:
1076 description: 'Immutable. Optional. Specifies the preemptibility
1077 of the instance group. The default value for master and
1078 worker groups is `NON_PREEMPTIBLE`. This default cannot
1079 be changed. The default value for secondary instances is
1080 `PREEMPTIBLE`. Possible values: PREEMPTIBILITY_UNSPECIFIED,
1081 NON_PREEMPTIBLE, PREEMPTIBLE'
1082 type: string
1083 type: object
1084 type: object
1085 location:
1086 description: Immutable. The location for the resource, usually a GCP
1087 region.
1088 type: string
1089 projectRef:
1090 description: Immutable. The Project that this resource belongs to.
1091 oneOf:
1092 - not:
1093 required:
1094 - external
1095 required:
1096 - name
1097 - not:
1098 anyOf:
1099 - required:
1100 - name
1101 - required:
1102 - namespace
1103 required:
1104 - external
1105 properties:
1106 external:
1107 description: |-
1108 Required. The Google Cloud Platform project ID that the cluster belongs to.
1109
1110 Allowed value: The Google Cloud resource name of a `Project` resource (format: `projects/{{name}}`).
1111 type: string
1112 name:
1113 description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names'
1114 type: string
1115 namespace:
1116 description: 'Namespace of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/'
1117 type: string
1118 type: object
1119 resourceID:
1120 description: Immutable. Optional. The name of the resource. Used for
1121 creation and acquisition. When unset, the value of `metadata.name`
1122 is used as the default.
1123 type: string
1124 virtualClusterConfig:
1125 description: Immutable. Optional. The virtual cluster config is used
1126 when creating a Dataproc cluster that does not directly control
1127 the underlying compute resources, for example, when creating a [Dataproc-on-GKE
1128 cluster](https://cloud.google.com/dataproc/docs/guides/dpgke/dataproc-gke).
1129 Dataproc may set default values, and values may change when clusters
1130 are updated. Exactly one of config or virtual_cluster_config must
1131 be specified.
1132 properties:
1133 auxiliaryServicesConfig:
1134 description: Immutable. Optional. Configuration of auxiliary services
1135 used by this cluster.
1136 properties:
1137 metastoreConfig:
1138 description: Immutable. Optional. The Hive Metastore configuration
1139 for this workload.
1140 properties:
1141 dataprocMetastoreServiceRef:
1142 description: Immutable.
1143 oneOf:
1144 - not:
1145 required:
1146 - external
1147 required:
1148 - name
1149 - not:
1150 anyOf:
1151 - required:
1152 - name
1153 - required:
1154 - namespace
1155 required:
1156 - external
1157 properties:
1158 external:
1159 description: 'Required. Resource name of an existing
1160 Dataproc Metastore service. Example: * `projects/[project_id]/locations/[dataproc_region]/services/[service-name]`'
1161 type: string
1162 name:
1163 description: |-
1164 [WARNING] DataprocMetastoreService not yet supported in Config Connector, use 'external' field to reference existing resources.
1165 Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
1166 type: string
1167 namespace:
1168 description: 'Namespace of the referent. More info:
1169 https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/'
1170 type: string
1171 type: object
1172 required:
1173 - dataprocMetastoreServiceRef
1174 type: object
1175 sparkHistoryServerConfig:
1176 description: Immutable. Optional. The Spark History Server
1177 configuration for the workload.
1178 properties:
1179 dataprocClusterRef:
1180 description: Immutable.
1181 oneOf:
1182 - not:
1183 required:
1184 - external
1185 required:
1186 - name
1187 - not:
1188 anyOf:
1189 - required:
1190 - name
1191 - required:
1192 - namespace
1193 required:
1194 - external
1195 properties:
1196 external:
1197 description: |-
1198 Optional. Resource name of an existing Dataproc Cluster to act as a Spark History Server for the workload. Example: * `projects/[project_id]/regions/[region]/clusters/[cluster_name]`
1199
1200 Allowed value: The `selfLink` field of a `DataprocCluster` resource.
1201 type: string
1202 name:
1203 description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names'
1204 type: string
1205 namespace:
1206 description: 'Namespace of the referent. More info:
1207 https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/'
1208 type: string
1209 type: object
1210 type: object
1211 type: object
1212 kubernetesClusterConfig:
1213 description: Immutable. Required. The configuration for running
1214 the Dataproc cluster on Kubernetes.
1215 properties:
1216 gkeClusterConfig:
1217 description: Immutable. Required. The configuration for running
1218 the Dataproc cluster on GKE.
1219 properties:
1220 gkeClusterTargetRef:
1221 description: Immutable.
1222 oneOf:
1223 - not:
1224 required:
1225 - external
1226 required:
1227 - name
1228 - not:
1229 anyOf:
1230 - required:
1231 - name
1232 - required:
1233 - namespace
1234 required:
1235 - external
1236 properties:
1237 external:
1238 description: |-
1239 Optional. A target GKE cluster to deploy to. It must be in the same project and region as the Dataproc cluster (the GKE cluster can be zonal or regional). Format: 'projects/{project}/locations/{location}/clusters/{cluster_id}'
1240
1241 Allowed value: The `selfLink` field of a `ContainerCluster` resource.
1242 type: string
1243 name:
1244 description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names'
1245 type: string
1246 namespace:
1247 description: 'Namespace of the referent. More info:
1248 https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/'
1249 type: string
1250 type: object
1251 nodePoolTarget:
1252 description: Immutable. Optional. GKE node pools where
1253 workloads will be scheduled. At least one node pool
1254 must be assigned the `DEFAULT` GkeNodePoolTarget.Role.
1255 If a `GkeNodePoolTarget` is not specified, Dataproc
1256 constructs a `DEFAULT` `GkeNodePoolTarget`. Each role
1257 can be given to only one `GkeNodePoolTarget`. All node
1258 pools must have the same location settings.
1259 items:
1260 properties:
1261 nodePoolConfig:
1262 description: Immutable. Input only. The configuration
1263 for the GKE node pool. If specified, Dataproc
1264 attempts to create a node pool with the specified
1265 shape. If one with the same name already exists,
1266 it is verified against all specified fields. If
1267 a field differs, the virtual cluster creation
1268 will fail. If omitted, any node pool with the
1269 specified name is used. If a node pool with the
1270 specified name does not exist, Dataproc create
1271 a node pool with default values. This is an input
1272 only field. It will not be returned by the API.
1273 properties:
1274 autoscaling:
1275 description: Immutable. Optional. The autoscaler
1276 configuration for this node pool. The autoscaler
1277 is enabled only when a valid configuration
1278 is present.
1279 properties:
1280 maxNodeCount:
1281 description: Immutable. The maximum number
1282 of nodes in the node pool. Must be >=
1283 min_node_count, and must be > 0. **Note:**
1284 Quota must be sufficient to scale up the
1285 cluster.
1286 format: int64
1287 type: integer
1288 minNodeCount:
1289 description: Immutable. The minimum number
1290 of nodes in the node pool. Must be >=
1291 0 and <= max_node_count.
1292 format: int64
1293 type: integer
1294 type: object
1295 config:
1296 description: Immutable. Optional. The node pool
1297 configuration.
1298 properties:
1299 accelerators:
1300 description: Immutable. Optional. A list
1301 of [hardware accelerators](https://cloud.google.com/compute/docs/gpus)
1302 to attach to each node.
1303 items:
1304 properties:
1305 acceleratorCount:
1306 description: Immutable. The number
1307 of accelerator cards exposed to
1308 an instance.
1309 format: int64
1310 type: integer
1311 acceleratorType:
1312 description: Immutable. The accelerator
1313 type resource namename (see GPUs
1314 on Compute Engine).
1315 type: string
1316 gpuPartitionSize:
1317 description: Immutable. Size of partitions
1318 to create on the GPU. Valid values
1319 are described in the NVIDIA [mig
1320 user guide](https://docs.nvidia.com/datacenter/tesla/mig-user-guide/#partitioning).
1321 type: string
1322 type: object
1323 type: array
1324 bootDiskKmsKey:
1325 description: 'Immutable. Optional. The [Customer
1326 Managed Encryption Key (CMEK)] (https://cloud.google.com/kubernetes-engine/docs/how-to/using-cmek)
1327 used to encrypt the boot disk attached
1328 to each node in the node pool. Specify
1329 the key using the following format: `projects/KEY_PROJECT_ID/locations/LOCATION/keyRings/RING_NAME/cryptoKeys/KEY_NAME`.'
1330 type: string
1331 ephemeralStorageConfig:
1332 description: Immutable. Optional. Parameters
1333 for the ephemeral storage filesystem.
1334 If unspecified, ephemeral storage is backed
1335 by the boot disk.
1336 properties:
1337 localSsdCount:
1338 description: Immutable. Number of local
1339 SSDs to use to back ephemeral storage.
1340 Uses NVMe interfaces. Each local SSD
1341 is 375 GB in size. If zero, it means
1342 to disable using local SSDs as ephemeral
1343 storage.
1344 format: int64
1345 type: integer
1346 type: object
1347 localSsdCount:
1348 description: Immutable. Optional. The number
1349 of local SSD disks to attach to the node,
1350 which is limited by the maximum number
1351 of disks allowable per zone (see [Adding
1352 Local SSDs](https://cloud.google.com/compute/docs/disks/local-ssd)).
1353 format: int64
1354 type: integer
1355 machineType:
1356 description: Immutable. Optional. The name
1357 of a Compute Engine [machine type](https://cloud.google.com/compute/docs/machine-types).
1358 type: string
1359 minCpuPlatform:
1360 description: Immutable. Optional. [Minimum
1361 CPU platform](https://cloud.google.com/compute/docs/instances/specify-min-cpu-platform)
1362 to be used by this instance. The instance
1363 may be scheduled on the specified or a
1364 newer CPU platform. Specify the friendly
1365 names of CPU platforms, such as "Intel
1366 Haswell"` or Intel Sandy Bridge".
1367 type: string
1368 preemptible:
1369 description: Immutable. Optional. Whether
1370 the nodes are created as legacy [preemptible
1371 VM instances] (https://cloud.google.com/compute/docs/instances/preemptible).
1372 Also see Spot VMs, preemptible VM instances
1373 without a maximum lifetime. Legacy and
1374 Spot preemptible nodes cannot be used
1375 in a node pool with the `CONTROLLER` [role]
1376 (/dataproc/docs/reference/rest/v1/projects.regions.clusters#role)
1377 or in the DEFAULT node pool if the CONTROLLER
1378 role is not assigned (the DEFAULT node
1379 pool will assume the CONTROLLER role).
1380 type: boolean
1381 spot:
1382 description: Immutable. Optional. Whether
1383 the nodes are created as [Spot VM instances]
1384 (https://cloud.google.com/compute/docs/instances/spot).
1385 Spot VMs are the latest update to legacy
1386 preemptible VMs. Spot VMs do not have
1387 a maximum lifetime. Legacy and Spot preemptible
1388 nodes cannot be used in a node pool with
1389 the `CONTROLLER` [role](/dataproc/docs/reference/rest/v1/projects.regions.clusters#role)
1390 or in the DEFAULT node pool if the CONTROLLER
1391 role is not assigned (the DEFAULT node
1392 pool will assume the CONTROLLER role).
1393 type: boolean
1394 type: object
1395 locations:
1396 description: Immutable. Optional. The list of
1397 Compute Engine [zones](https://cloud.google.com/compute/docs/zones#available)
1398 where node pool nodes associated with a Dataproc
1399 on GKE virtual cluster will be located. **Note:**
1400 All node pools associated with a virtual cluster
1401 must be located in the same region as the
1402 virtual cluster, and they must be located
1403 in the same zone within that region. If a
1404 location is not specified during node pool
1405 creation, Dataproc on GKE will choose the
1406 zone.
1407 items:
1408 type: string
1409 type: array
1410 type: object
1411 nodePoolRef:
1412 description: Immutable.
1413 oneOf:
1414 - not:
1415 required:
1416 - external
1417 required:
1418 - name
1419 - not:
1420 anyOf:
1421 - required:
1422 - name
1423 - required:
1424 - namespace
1425 required:
1426 - external
1427 properties:
1428 external:
1429 description: |-
1430 Required. The target GKE node pool. Format: 'projects/{project}/locations/{location}/clusters/{cluster}/nodePools/{node_pool}'
1431
1432 Allowed value: The `selfLink` field of a `ContainerNodePool` resource.
1433 type: string
1434 name:
1435 description: 'Name of the referent. More info:
1436 https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names'
1437 type: string
1438 namespace:
1439 description: 'Namespace of the referent. More
1440 info: https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/'
1441 type: string
1442 type: object
1443 roles:
1444 description: Immutable. Required. The roles associated
1445 with the GKE node pool.
1446 items:
1447 type: string
1448 type: array
1449 required:
1450 - nodePoolRef
1451 - roles
1452 type: object
1453 type: array
1454 type: object
1455 kubernetesNamespace:
1456 description: Immutable. Optional. A namespace within the Kubernetes
1457 cluster to deploy into. If this namespace does not exist,
1458 it is created. If it exists, Dataproc verifies that another
1459 Dataproc VirtualCluster is not installed into it. If not
1460 specified, the name of the Dataproc Cluster is used.
1461 type: string
1462 kubernetesSoftwareConfig:
1463 description: Immutable. Optional. The software configuration
1464 for this Dataproc cluster running on Kubernetes.
1465 properties:
1466 componentVersion:
1467 additionalProperties:
1468 type: string
1469 description: Immutable. The components that should be
1470 installed in this Dataproc cluster. The key must be
1471 a string from the KubernetesComponent enumeration. The
1472 value is the version of the software to be installed.
1473 At least one entry must be specified.
1474 type: object
1475 properties:
1476 additionalProperties:
1477 type: string
1478 description: 'Immutable. The properties to set on daemon
1479 config files. Property keys are specified in `prefix:property`
1480 format, for example `spark:spark.kubernetes.container.image`.
1481 The following are supported prefixes and their mappings:
1482 * spark: `spark-defaults.conf` For more information,
1483 see [Cluster properties](https://cloud.google.com/dataproc/docs/concepts/cluster-properties).'
1484 type: object
1485 type: object
1486 required:
1487 - gkeClusterConfig
1488 type: object
1489 stagingBucketRef:
1490 description: Immutable.
1491 oneOf:
1492 - not:
1493 required:
1494 - external
1495 required:
1496 - name
1497 - not:
1498 anyOf:
1499 - required:
1500 - name
1501 - required:
1502 - namespace
1503 required:
1504 - external
1505 properties:
1506 external:
1507 description: |-
1508 Optional. A Cloud Storage bucket used to stage job dependencies, config files, and job driver console output. If you do not specify a staging bucket, Cloud Dataproc will determine a Cloud Storage location (US, ASIA, or EU) for your cluster's staging bucket according to the Compute Engine zone where your cluster is deployed, and then create and manage this project-level, per-location bucket (see [Dataproc staging and temp buckets](https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/staging-bucket)). **This field requires a Cloud Storage bucket name, not a `gs://...` URI to a Cloud Storage bucket.**
1509
1510 Allowed value: The Google Cloud resource name of a `StorageBucket` resource (format: `{{name}}`).
1511 type: string
1512 name:
1513 description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names'
1514 type: string
1515 namespace:
1516 description: 'Namespace of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/'
1517 type: string
1518 type: object
1519 required:
1520 - kubernetesClusterConfig
1521 type: object
1522 required:
1523 - location
1524 type: object
1525 status:
1526 properties:
1527 clusterUuid:
1528 description: Output only. A cluster UUID (Unique Universal Identifier).
1529 Dataproc generates this value when it creates the cluster.
1530 type: string
1531 conditions:
1532 description: Conditions represent the latest available observation
1533 of the resource's current state.
1534 items:
1535 properties:
1536 lastTransitionTime:
1537 description: Last time the condition transitioned from one status
1538 to another.
1539 type: string
1540 message:
1541 description: Human-readable message indicating details about
1542 last transition.
1543 type: string
1544 reason:
1545 description: Unique, one-word, CamelCase reason for the condition's
1546 last transition.
1547 type: string
1548 status:
1549 description: Status is the status of the condition. Can be True,
1550 False, Unknown.
1551 type: string
1552 type:
1553 description: Type is the type of the condition.
1554 type: string
1555 type: object
1556 type: array
1557 config:
1558 properties:
1559 endpointConfig:
1560 properties:
1561 httpPorts:
1562 additionalProperties:
1563 type: string
1564 description: Output only. The map of port descriptions to
1565 URLs. Will only be populated if enable_http_port_access
1566 is true.
1567 type: object
1568 type: object
1569 lifecycleConfig:
1570 properties:
1571 idleStartTime:
1572 description: Output only. The time when cluster became idle
1573 (most recent job finished) and became eligible for deletion
1574 due to idleness (see JSON representation of [Timestamp](https://developers.google.com/protocol-buffers/docs/proto3#json)).
1575 format: date-time
1576 type: string
1577 type: object
1578 masterConfig:
1579 properties:
1580 instanceNames:
1581 description: Output only. The list of instance names. Dataproc
1582 derives the names from `cluster_name`, `num_instances`,
1583 and the instance group.
1584 items:
1585 type: string
1586 type: array
1587 instanceReferences:
1588 description: Output only. List of references to Compute Engine
1589 instances.
1590 items:
1591 properties:
1592 instanceId:
1593 description: The unique identifier of the Compute Engine
1594 instance.
1595 type: string
1596 instanceName:
1597 description: The user-friendly name of the Compute Engine
1598 instance.
1599 type: string
1600 publicEciesKey:
1601 description: The public ECIES key used for sharing data
1602 with this instance.
1603 type: string
1604 publicKey:
1605 description: The public RSA key used for sharing data
1606 with this instance.
1607 type: string
1608 type: object
1609 type: array
1610 isPreemptible:
1611 description: Output only. Specifies that this instance group
1612 contains preemptible instances.
1613 type: boolean
1614 managedGroupConfig:
1615 description: Output only. The config for Compute Engine Instance
1616 Group Manager that manages this group. This is only used
1617 for preemptible instance groups.
1618 properties:
1619 instanceGroupManagerName:
1620 description: Output only. The name of the Instance Group
1621 Manager for this group.
1622 type: string
1623 instanceTemplateName:
1624 description: Output only. The name of the Instance Template
1625 used for the Managed Instance Group.
1626 type: string
1627 type: object
1628 type: object
1629 secondaryWorkerConfig:
1630 properties:
1631 instanceNames:
1632 description: Output only. The list of instance names. Dataproc
1633 derives the names from `cluster_name`, `num_instances`,
1634 and the instance group.
1635 items:
1636 type: string
1637 type: array
1638 instanceReferences:
1639 description: Output only. List of references to Compute Engine
1640 instances.
1641 items:
1642 properties:
1643 instanceId:
1644 description: The unique identifier of the Compute Engine
1645 instance.
1646 type: string
1647 instanceName:
1648 description: The user-friendly name of the Compute Engine
1649 instance.
1650 type: string
1651 publicEciesKey:
1652 description: The public ECIES key used for sharing data
1653 with this instance.
1654 type: string
1655 publicKey:
1656 description: The public RSA key used for sharing data
1657 with this instance.
1658 type: string
1659 type: object
1660 type: array
1661 isPreemptible:
1662 description: Output only. Specifies that this instance group
1663 contains preemptible instances.
1664 type: boolean
1665 managedGroupConfig:
1666 description: Output only. The config for Compute Engine Instance
1667 Group Manager that manages this group. This is only used
1668 for preemptible instance groups.
1669 properties:
1670 instanceGroupManagerName:
1671 description: Output only. The name of the Instance Group
1672 Manager for this group.
1673 type: string
1674 instanceTemplateName:
1675 description: Output only. The name of the Instance Template
1676 used for the Managed Instance Group.
1677 type: string
1678 type: object
1679 type: object
1680 workerConfig:
1681 properties:
1682 instanceNames:
1683 description: Output only. The list of instance names. Dataproc
1684 derives the names from `cluster_name`, `num_instances`,
1685 and the instance group.
1686 items:
1687 type: string
1688 type: array
1689 instanceReferences:
1690 description: Output only. List of references to Compute Engine
1691 instances.
1692 items:
1693 properties:
1694 instanceId:
1695 description: The unique identifier of the Compute Engine
1696 instance.
1697 type: string
1698 instanceName:
1699 description: The user-friendly name of the Compute Engine
1700 instance.
1701 type: string
1702 publicEciesKey:
1703 description: The public ECIES key used for sharing data
1704 with this instance.
1705 type: string
1706 publicKey:
1707 description: The public RSA key used for sharing data
1708 with this instance.
1709 type: string
1710 type: object
1711 type: array
1712 isPreemptible:
1713 description: Output only. Specifies that this instance group
1714 contains preemptible instances.
1715 type: boolean
1716 managedGroupConfig:
1717 description: Output only. The config for Compute Engine Instance
1718 Group Manager that manages this group. This is only used
1719 for preemptible instance groups.
1720 properties:
1721 instanceGroupManagerName:
1722 description: Output only. The name of the Instance Group
1723 Manager for this group.
1724 type: string
1725 instanceTemplateName:
1726 description: Output only. The name of the Instance Template
1727 used for the Managed Instance Group.
1728 type: string
1729 type: object
1730 type: object
1731 type: object
1732 metrics:
1733 description: 'Output only. Contains cluster daemon metrics such as
1734 HDFS and YARN stats. **Beta Feature**: This report is available
1735 for testing purposes only. It may be changed before final release.'
1736 properties:
1737 hdfsMetrics:
1738 additionalProperties:
1739 type: string
1740 description: The HDFS metrics.
1741 type: object
1742 yarnMetrics:
1743 additionalProperties:
1744 type: string
1745 description: The YARN metrics.
1746 type: object
1747 type: object
1748 observedGeneration:
1749 description: ObservedGeneration is the generation of the resource
1750 that was most recently observed by the Config Connector controller.
1751 If this is equal to metadata.generation, then that means that the
1752 current reported status reflects the most recent desired state of
1753 the resource.
1754 type: integer
1755 status:
1756 description: Output only. Cluster status.
1757 properties:
1758 detail:
1759 description: Optional. Output only. Details of cluster's state.
1760 type: string
1761 state:
1762 description: 'Output only. The cluster''s state. Possible values:
1763 UNKNOWN, CREATING, RUNNING, ERROR, DELETING, UPDATING, STOPPING,
1764 STOPPED, STARTING'
1765 type: string
1766 stateStartTime:
1767 description: Output only. Time when this state was entered (see
1768 JSON representation of [Timestamp](https://developers.google.com/protocol-buffers/docs/proto3#json)).
1769 format: date-time
1770 type: string
1771 substate:
1772 description: 'Output only. Additional state information that includes
1773 status reported by the agent. Possible values: UNSPECIFIED,
1774 UNHEALTHY, STALE_STATUS'
1775 type: string
1776 type: object
1777 statusHistory:
1778 description: Output only. The previous cluster status.
1779 items:
1780 properties:
1781 detail:
1782 description: Optional. Output only. Details of cluster's state.
1783 type: string
1784 state:
1785 description: 'Output only. The cluster''s state. Possible values:
1786 UNKNOWN, CREATING, RUNNING, ERROR, DELETING, UPDATING, STOPPING,
1787 STOPPED, STARTING'
1788 type: string
1789 stateStartTime:
1790 description: Output only. Time when this state was entered (see
1791 JSON representation of [Timestamp](https://developers.google.com/protocol-buffers/docs/proto3#json)).
1792 format: date-time
1793 type: string
1794 substate:
1795 description: 'Output only. Additional state information that
1796 includes status reported by the agent. Possible values: UNSPECIFIED,
1797 UNHEALTHY, STALE_STATUS'
1798 type: string
1799 type: object
1800 type: array
1801 type: object
1802 required:
1803 - spec
1804 type: object
1805 served: true
1806 storage: true
1807 subresources:
1808 status: {}
1809status:
1810 acceptedNames:
1811 kind: ""
1812 plural: ""
1813 conditions: []
1814 storedVersions: []
View as plain text