1apiVersion: apiextensions.k8s.io/v1
2kind: CustomResourceDefinition
3metadata:
4 annotations:
5 cnrm.cloud.google.com/version: 0.0.0-dev
6 creationTimestamp: null
7 labels:
8 cnrm.cloud.google.com/dcl2crd: "true"
9 cnrm.cloud.google.com/managed-by-kcc: "true"
10 cnrm.cloud.google.com/stability-level: stable
11 cnrm.cloud.google.com/system: "true"
12 name: dataprocworkflowtemplates.dataproc.cnrm.cloud.google.com
13spec:
14 group: dataproc.cnrm.cloud.google.com
15 names:
16 categories:
17 - gcp
18 kind: DataprocWorkflowTemplate
19 plural: dataprocworkflowtemplates
20 shortNames:
21 - gcpdataprocworkflowtemplate
22 - gcpdataprocworkflowtemplates
23 singular: dataprocworkflowtemplate
24 preserveUnknownFields: false
25 scope: Namespaced
26 versions:
27 - additionalPrinterColumns:
28 - jsonPath: .metadata.creationTimestamp
29 name: Age
30 type: date
31 - description: When 'True', the most recent reconcile of the resource succeeded
32 jsonPath: .status.conditions[?(@.type=='Ready')].status
33 name: Ready
34 type: string
35 - description: The reason for the value in 'Ready'
36 jsonPath: .status.conditions[?(@.type=='Ready')].reason
37 name: Status
38 type: string
39 - description: The last transition time for the value in 'Status'
40 jsonPath: .status.conditions[?(@.type=='Ready')].lastTransitionTime
41 name: Status Age
42 type: date
43 name: v1beta1
44 schema:
45 openAPIV3Schema:
46 properties:
47 apiVersion:
48 description: 'apiVersion defines the versioned schema of this representation
49 of an object. Servers should convert recognized schemas to the latest
50 internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#resources'
51 type: string
52 kind:
53 description: 'kind is a string value representing the REST resource this
54 object represents. Servers may infer this from the endpoint the client
55 submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#types-kinds'
56 type: string
57 metadata:
58 type: object
59 spec:
60 properties:
61 dagTimeout:
62 description: Immutable. Optional. Timeout duration for the DAG of
63 jobs, expressed in seconds (see [JSON representation of duration](https://developers.google.com/protocol-buffers/docs/proto3#json)).
64 The timeout duration must be from 10 minutes ("600s") to 24 hours
65 ("86400s"). The timer begins when the first job is submitted. If
66 the workflow is running at the end of the timeout period, any remaining
67 jobs are cancelled, the workflow is ended, and if the workflow was
68 running on a [managed cluster](/dataproc/docs/concepts/workflows/using-workflows#configuring_or_selecting_a_cluster),
69 the cluster is deleted.
70 type: string
71 jobs:
72 description: Immutable. Required. The Directed Acyclic Graph of Jobs
73 to submit.
74 items:
75 properties:
76 hadoopJob:
77 description: Immutable. Optional. Job is a Hadoop job.
78 properties:
79 archiveUris:
80 description: 'Immutable. Optional. HCFS URIs of archives
81 to be extracted in the working directory of Hadoop drivers
82 and tasks. Supported file types: .jar, .tar, .tar.gz,
83 .tgz, or .zip.'
84 items:
85 type: string
86 type: array
87 args:
88 description: Immutable. Optional. The arguments to pass
89 to the driver. Do not include arguments, such as `-libjars`
90 or `-Dfoo=bar`, that can be set as job properties, since
91 a collision may occur that causes an incorrect job submission.
92 items:
93 type: string
94 type: array
95 fileUris:
96 description: Immutable. Optional. HCFS (Hadoop Compatible
97 Filesystem) URIs of files to be copied to the working
98 directory of Hadoop drivers and distributed tasks. Useful
99 for naively parallel tasks.
100 items:
101 type: string
102 type: array
103 jarFileUris:
104 description: Immutable. Optional. Jar file URIs to add to
105 the CLASSPATHs of the Hadoop driver and tasks.
106 items:
107 type: string
108 type: array
109 loggingConfig:
110 description: Immutable. Optional. The runtime log config
111 for job execution.
112 properties:
113 driverLogLevels:
114 additionalProperties:
115 type: string
116 description: 'Immutable. The per-package log levels
117 for the driver. This may include "root" package name
118 to configure rootLogger. Examples: ''com.google =
119 FATAL'', ''root = INFO'', ''org.apache = DEBUG'''
120 type: object
121 type: object
122 mainClass:
123 description: Immutable. The name of the driver's main class.
124 The jar file containing the class must be in the default
125 CLASSPATH or specified in `jar_file_uris`.
126 type: string
127 mainJarFileUri:
128 description: 'Immutable. The HCFS URI of the jar file containing
129 the main class. Examples: ''gs://foo-bucket/analytics-binaries/extract-useful-metrics-mr.jar''
130 ''hdfs:/tmp/test-samples/custom-wordcount.jar'' ''file:///home/usr/lib/hadoop-mapreduce/hadoop-mapreduce-examples.jar'''
131 type: string
132 properties:
133 additionalProperties:
134 type: string
135 description: Immutable. Optional. A mapping of property
136 names to values, used to configure Hadoop. Properties
137 that conflict with values set by the Dataproc API may
138 be overwritten. Can include properties set in /etc/hadoop/conf/*-site
139 and classes in user code.
140 type: object
141 type: object
142 hiveJob:
143 description: Immutable. Optional. Job is a Hive job.
144 properties:
145 continueOnFailure:
146 description: Immutable. Optional. Whether to continue executing
147 queries if a query fails. The default value is `false`.
148 Setting to `true` can be useful when executing independent
149 parallel queries.
150 type: boolean
151 jarFileUris:
152 description: Immutable. Optional. HCFS URIs of jar files
153 to add to the CLASSPATH of the Hive server and Hadoop
154 MapReduce (MR) tasks. Can contain Hive SerDes and UDFs.
155 items:
156 type: string
157 type: array
158 properties:
159 additionalProperties:
160 type: string
161 description: Immutable. Optional. A mapping of property
162 names and values, used to configure Hive. Properties that
163 conflict with values set by the Dataproc API may be overwritten.
164 Can include properties set in /etc/hadoop/conf/*-site.xml,
165 /etc/hive/conf/hive-site.xml, and classes in user code.
166 type: object
167 queryFileUri:
168 description: Immutable. The HCFS URI of the script that
169 contains Hive queries.
170 type: string
171 queryList:
172 description: Immutable. A list of queries.
173 properties:
174 queries:
175 description: 'Immutable. Required. The queries to execute.
176 You do not need to end a query expression with a semicolon.
177 Multiple queries can be specified in one string by
178 separating each with a semicolon. Here is an example
179 of a Dataproc API snippet that uses a QueryList to
180 specify a HiveJob: "hiveJob": { "queryList": { "queries":
181 [ "query1", "query2", "query3;query4", ] } }'
182 items:
183 type: string
184 type: array
185 required:
186 - queries
187 type: object
188 scriptVariables:
189 additionalProperties:
190 type: string
191 description: 'Immutable. Optional. Mapping of query variable
192 names to values (equivalent to the Hive command: `SET
193 name="value";`).'
194 type: object
195 type: object
196 labels:
197 additionalProperties:
198 type: string
199 description: 'Immutable. Optional. The labels to associate with
200 this job. Label keys must be between 1 and 63 characters long,
201 and must conform to the following regular expression: p{Ll}p{Lo}{0,62}
202 Label values must be between 1 and 63 characters long, and
203 must conform to the following regular expression: [p{Ll}p{Lo}p{N}_-]{0,63}
204 No more than 32 labels can be associated with a given job.'
205 type: object
206 pigJob:
207 description: Immutable. Optional. Job is a Pig job.
208 properties:
209 continueOnFailure:
210 description: Immutable. Optional. Whether to continue executing
211 queries if a query fails. The default value is `false`.
212 Setting to `true` can be useful when executing independent
213 parallel queries.
214 type: boolean
215 jarFileUris:
216 description: Immutable. Optional. HCFS URIs of jar files
217 to add to the CLASSPATH of the Pig Client and Hadoop MapReduce
218 (MR) tasks. Can contain Pig UDFs.
219 items:
220 type: string
221 type: array
222 loggingConfig:
223 description: Immutable. Optional. The runtime log config
224 for job execution.
225 properties:
226 driverLogLevels:
227 additionalProperties:
228 type: string
229 description: 'Immutable. The per-package log levels
230 for the driver. This may include "root" package name
231 to configure rootLogger. Examples: ''com.google =
232 FATAL'', ''root = INFO'', ''org.apache = DEBUG'''
233 type: object
234 type: object
235 properties:
236 additionalProperties:
237 type: string
238 description: Immutable. Optional. A mapping of property
239 names to values, used to configure Pig. Properties that
240 conflict with values set by the Dataproc API may be overwritten.
241 Can include properties set in /etc/hadoop/conf/*-site.xml,
242 /etc/pig/conf/pig.properties, and classes in user code.
243 type: object
244 queryFileUri:
245 description: Immutable. The HCFS URI of the script that
246 contains the Pig queries.
247 type: string
248 queryList:
249 description: Immutable. A list of queries.
250 properties:
251 queries:
252 description: 'Immutable. Required. The queries to execute.
253 You do not need to end a query expression with a semicolon.
254 Multiple queries can be specified in one string by
255 separating each with a semicolon. Here is an example
256 of a Dataproc API snippet that uses a QueryList to
257 specify a HiveJob: "hiveJob": { "queryList": { "queries":
258 [ "query1", "query2", "query3;query4", ] } }'
259 items:
260 type: string
261 type: array
262 required:
263 - queries
264 type: object
265 scriptVariables:
266 additionalProperties:
267 type: string
268 description: 'Immutable. Optional. Mapping of query variable
269 names to values (equivalent to the Pig command: `name=[value]`).'
270 type: object
271 type: object
272 prerequisiteStepIds:
273 description: Immutable. Optional. The optional list of prerequisite
274 job step_ids. If not specified, the job will start at the
275 beginning of workflow.
276 items:
277 type: string
278 type: array
279 prestoJob:
280 description: Immutable. Optional. Job is a Presto job.
281 properties:
282 clientTags:
283 description: Immutable. Optional. Presto client tags to
284 attach to this query
285 items:
286 type: string
287 type: array
288 continueOnFailure:
289 description: Immutable. Optional. Whether to continue executing
290 queries if a query fails. The default value is `false`.
291 Setting to `true` can be useful when executing independent
292 parallel queries.
293 type: boolean
294 loggingConfig:
295 description: Immutable. Optional. The runtime log config
296 for job execution.
297 properties:
298 driverLogLevels:
299 additionalProperties:
300 type: string
301 description: 'Immutable. The per-package log levels
302 for the driver. This may include "root" package name
303 to configure rootLogger. Examples: ''com.google =
304 FATAL'', ''root = INFO'', ''org.apache = DEBUG'''
305 type: object
306 type: object
307 outputFormat:
308 description: Immutable. Optional. The format in which query
309 output will be displayed. See the Presto documentation
310 for supported output formats
311 type: string
312 properties:
313 additionalProperties:
314 type: string
315 description: Immutable. Optional. A mapping of property
316 names to values. Used to set Presto [session properties](https://prestodb.io/docs/current/sql/set-session.html)
317 Equivalent to using the --session flag in the Presto CLI
318 type: object
319 queryFileUri:
320 description: Immutable. The HCFS URI of the script that
321 contains SQL queries.
322 type: string
323 queryList:
324 description: Immutable. A list of queries.
325 properties:
326 queries:
327 description: 'Immutable. Required. The queries to execute.
328 You do not need to end a query expression with a semicolon.
329 Multiple queries can be specified in one string by
330 separating each with a semicolon. Here is an example
331 of a Dataproc API snippet that uses a QueryList to
332 specify a HiveJob: "hiveJob": { "queryList": { "queries":
333 [ "query1", "query2", "query3;query4", ] } }'
334 items:
335 type: string
336 type: array
337 required:
338 - queries
339 type: object
340 type: object
341 pysparkJob:
342 description: Immutable. Optional. Job is a PySpark job.
343 properties:
344 archiveUris:
345 description: 'Immutable. Optional. HCFS URIs of archives
346 to be extracted into the working directory of each executor.
347 Supported file types: .jar, .tar, .tar.gz, .tgz, and .zip.'
348 items:
349 type: string
350 type: array
351 args:
352 description: Immutable. Optional. The arguments to pass
353 to the driver. Do not include arguments, such as `--conf`,
354 that can be set as job properties, since a collision may
355 occur that causes an incorrect job submission.
356 items:
357 type: string
358 type: array
359 fileUris:
360 description: Immutable. Optional. HCFS URIs of files to
361 be placed in the working directory of each executor. Useful
362 for naively parallel tasks.
363 items:
364 type: string
365 type: array
366 jarFileUris:
367 description: Immutable. Optional. HCFS URIs of jar files
368 to add to the CLASSPATHs of the Python driver and tasks.
369 items:
370 type: string
371 type: array
372 loggingConfig:
373 description: Immutable. Optional. The runtime log config
374 for job execution.
375 properties:
376 driverLogLevels:
377 additionalProperties:
378 type: string
379 description: 'Immutable. The per-package log levels
380 for the driver. This may include "root" package name
381 to configure rootLogger. Examples: ''com.google =
382 FATAL'', ''root = INFO'', ''org.apache = DEBUG'''
383 type: object
384 type: object
385 mainPythonFileUri:
386 description: Immutable. Required. The HCFS URI of the main
387 Python file to use as the driver. Must be a .py file.
388 type: string
389 properties:
390 additionalProperties:
391 type: string
392 description: Immutable. Optional. A mapping of property
393 names to values, used to configure PySpark. Properties
394 that conflict with values set by the Dataproc API may
395 be overwritten. Can include properties set in /etc/spark/conf/spark-defaults.conf
396 and classes in user code.
397 type: object
398 pythonFileUris:
399 description: 'Immutable. Optional. HCFS file URIs of Python
400 files to pass to the PySpark framework. Supported file
401 types: .py, .egg, and .zip.'
402 items:
403 type: string
404 type: array
405 required:
406 - mainPythonFileUri
407 type: object
408 scheduling:
409 description: Immutable. Optional. Job scheduling configuration.
410 properties:
411 maxFailuresPerHour:
412 description: Immutable. Optional. Maximum number of times
413 per hour a driver may be restarted as a result of driver
414 exiting with non-zero code before job is reported failed.
415 A job may be reported as thrashing if driver exits with
416 non-zero code 4 times within 10 minute window. Maximum
417 value is 10.
418 format: int64
419 type: integer
420 maxFailuresTotal:
421 description: Immutable. Optional. Maximum number of times
422 in total a driver may be restarted as a result of driver
423 exiting with non-zero code before job is reported failed.
424 Maximum value is 240.
425 format: int64
426 type: integer
427 type: object
428 sparkJob:
429 description: Immutable. Optional. Job is a Spark job.
430 properties:
431 archiveUris:
432 description: 'Immutable. Optional. HCFS URIs of archives
433 to be extracted into the working directory of each executor.
434 Supported file types: .jar, .tar, .tar.gz, .tgz, and .zip.'
435 items:
436 type: string
437 type: array
438 args:
439 description: Immutable. Optional. The arguments to pass
440 to the driver. Do not include arguments, such as `--conf`,
441 that can be set as job properties, since a collision may
442 occur that causes an incorrect job submission.
443 items:
444 type: string
445 type: array
446 fileUris:
447 description: Immutable. Optional. HCFS URIs of files to
448 be placed in the working directory of each executor. Useful
449 for naively parallel tasks.
450 items:
451 type: string
452 type: array
453 jarFileUris:
454 description: Immutable. Optional. HCFS URIs of jar files
455 to add to the CLASSPATHs of the Spark driver and tasks.
456 items:
457 type: string
458 type: array
459 loggingConfig:
460 description: Immutable. Optional. The runtime log config
461 for job execution.
462 properties:
463 driverLogLevels:
464 additionalProperties:
465 type: string
466 description: 'Immutable. The per-package log levels
467 for the driver. This may include "root" package name
468 to configure rootLogger. Examples: ''com.google =
469 FATAL'', ''root = INFO'', ''org.apache = DEBUG'''
470 type: object
471 type: object
472 mainClass:
473 description: Immutable. The name of the driver's main class.
474 The jar file that contains the class must be in the default
475 CLASSPATH or specified in `jar_file_uris`.
476 type: string
477 mainJarFileUri:
478 description: Immutable. The HCFS URI of the jar file that
479 contains the main class.
480 type: string
481 properties:
482 additionalProperties:
483 type: string
484 description: Immutable. Optional. A mapping of property
485 names to values, used to configure Spark. Properties that
486 conflict with values set by the Dataproc API may be overwritten.
487 Can include properties set in /etc/spark/conf/spark-defaults.conf
488 and classes in user code.
489 type: object
490 type: object
491 sparkRJob:
492 description: Immutable. Optional. Job is a SparkR job.
493 properties:
494 archiveUris:
495 description: 'Immutable. Optional. HCFS URIs of archives
496 to be extracted into the working directory of each executor.
497 Supported file types: .jar, .tar, .tar.gz, .tgz, and .zip.'
498 items:
499 type: string
500 type: array
501 args:
502 description: Immutable. Optional. The arguments to pass
503 to the driver. Do not include arguments, such as `--conf`,
504 that can be set as job properties, since a collision may
505 occur that causes an incorrect job submission.
506 items:
507 type: string
508 type: array
509 fileUris:
510 description: Immutable. Optional. HCFS URIs of files to
511 be placed in the working directory of each executor. Useful
512 for naively parallel tasks.
513 items:
514 type: string
515 type: array
516 loggingConfig:
517 description: Immutable. Optional. The runtime log config
518 for job execution.
519 properties:
520 driverLogLevels:
521 additionalProperties:
522 type: string
523 description: 'Immutable. The per-package log levels
524 for the driver. This may include "root" package name
525 to configure rootLogger. Examples: ''com.google =
526 FATAL'', ''root = INFO'', ''org.apache = DEBUG'''
527 type: object
528 type: object
529 mainRFileUri:
530 description: Immutable. Required. The HCFS URI of the main
531 R file to use as the driver. Must be a .R file.
532 type: string
533 properties:
534 additionalProperties:
535 type: string
536 description: Immutable. Optional. A mapping of property
537 names to values, used to configure SparkR. Properties
538 that conflict with values set by the Dataproc API may
539 be overwritten. Can include properties set in /etc/spark/conf/spark-defaults.conf
540 and classes in user code.
541 type: object
542 required:
543 - mainRFileUri
544 type: object
545 sparkSqlJob:
546 description: Immutable. Optional. Job is a SparkSql job.
547 properties:
548 jarFileUris:
549 description: Immutable. Optional. HCFS URIs of jar files
550 to be added to the Spark CLASSPATH.
551 items:
552 type: string
553 type: array
554 loggingConfig:
555 description: Immutable. Optional. The runtime log config
556 for job execution.
557 properties:
558 driverLogLevels:
559 additionalProperties:
560 type: string
561 description: 'Immutable. The per-package log levels
562 for the driver. This may include "root" package name
563 to configure rootLogger. Examples: ''com.google =
564 FATAL'', ''root = INFO'', ''org.apache = DEBUG'''
565 type: object
566 type: object
567 properties:
568 additionalProperties:
569 type: string
570 description: Immutable. Optional. A mapping of property
571 names to values, used to configure Spark SQL's SparkConf.
572 Properties that conflict with values set by the Dataproc
573 API may be overwritten.
574 type: object
575 queryFileUri:
576 description: Immutable. The HCFS URI of the script that
577 contains SQL queries.
578 type: string
579 queryList:
580 description: Immutable. A list of queries.
581 properties:
582 queries:
583 description: 'Immutable. Required. The queries to execute.
584 You do not need to end a query expression with a semicolon.
585 Multiple queries can be specified in one string by
586 separating each with a semicolon. Here is an example
587 of a Dataproc API snippet that uses a QueryList to
588 specify a HiveJob: "hiveJob": { "queryList": { "queries":
589 [ "query1", "query2", "query3;query4", ] } }'
590 items:
591 type: string
592 type: array
593 required:
594 - queries
595 type: object
596 scriptVariables:
597 additionalProperties:
598 type: string
599 description: 'Immutable. Optional. Mapping of query variable
600 names to values (equivalent to the Spark SQL command:
601 SET `name="value";`).'
602 type: object
603 type: object
604 stepId:
605 description: Immutable. Required. The step id. The id must be
606 unique among all jobs within the template. The step id is
607 used as prefix for job id, as job `goog-dataproc-workflow-step-id`
608 label, and in prerequisiteStepIds field from other steps.
609 The id must contain only letters (a-z, A-Z), numbers (0-9),
610 underscores (_), and hyphens (-). Cannot begin or end with
611 underscore or hyphen. Must consist of between 3 and 50 characters.
612 type: string
613 required:
614 - stepId
615 type: object
616 type: array
617 location:
618 description: Immutable. The location for the resource
619 type: string
620 parameters:
621 description: Immutable. Optional. Template parameters whose values
622 are substituted into the template. Values for parameters must be
623 provided when the template is instantiated.
624 items:
625 properties:
626 description:
627 description: Immutable. Optional. Brief description of the parameter.
628 Must not exceed 1024 characters.
629 type: string
630 fields:
631 description: 'Immutable. Required. Paths to all fields that
632 the parameter replaces. A field is allowed to appear in at
633 most one parameter''s list of field paths. A field path is
634 similar in syntax to a google.protobuf.FieldMask. For example,
635 a field path that references the zone field of a workflow
636 template''s cluster selector would be specified as `placement.clusterSelector.zone`.
637 Also, field paths can reference fields using the following
638 syntax: * Values in maps can be referenced by key: * labels[''key'']
639 * placement.clusterSelector.clusterLabels[''key''] * placement.managedCluster.labels[''key'']
640 * placement.clusterSelector.clusterLabels[''key''] * jobs[''step-id''].labels[''key'']
641 * Jobs in the jobs list can be referenced by step-id: * jobs[''step-id''].hadoopJob.mainJarFileUri
642 * jobs[''step-id''].hiveJob.queryFileUri * jobs[''step-id''].pySparkJob.mainPythonFileUri
643 * jobs[''step-id''].hadoopJob.jarFileUris[0] * jobs[''step-id''].hadoopJob.archiveUris[0]
644 * jobs[''step-id''].hadoopJob.fileUris[0] * jobs[''step-id''].pySparkJob.pythonFileUris[0]
645 * Items in repeated fields can be referenced by a zero-based
646 index: * jobs[''step-id''].sparkJob.args[0] * Other examples:
647 * jobs[''step-id''].hadoopJob.properties[''key''] * jobs[''step-id''].hadoopJob.args[0]
648 * jobs[''step-id''].hiveJob.scriptVariables[''key''] * jobs[''step-id''].hadoopJob.mainJarFileUri
649 * placement.clusterSelector.zone It may not be possible to
650 parameterize maps and repeated fields in their entirety since
651 only individual map values and individual items in repeated
652 fields can be referenced. For example, the following field
653 paths are invalid: - placement.clusterSelector.clusterLabels
654 - jobs[''step-id''].sparkJob.args'
655 items:
656 type: string
657 type: array
658 name:
659 description: Immutable. Required. Parameter name. The parameter
660 name is used as the key, and paired with the parameter value,
661 which are passed to the template when the template is instantiated.
662 The name must contain only capital letters (A-Z), numbers
663 (0-9), and underscores (_), and must not start with a number.
664 The maximum length is 40 characters.
665 type: string
666 validation:
667 description: Immutable. Optional. Validation rules to be applied
668 to this parameter's value.
669 properties:
670 regex:
671 description: Immutable. Validation based on regular expressions.
672 properties:
673 regexes:
674 description: Immutable. Required. RE2 regular expressions
675 used to validate the parameter's value. The value
676 must match the regex in its entirety (substring matches
677 are not sufficient).
678 items:
679 type: string
680 type: array
681 required:
682 - regexes
683 type: object
684 values:
685 description: Immutable. Validation based on a list of allowed
686 values.
687 properties:
688 values:
689 description: Immutable. Required. List of allowed values
690 for the parameter.
691 items:
692 type: string
693 type: array
694 required:
695 - values
696 type: object
697 type: object
698 required:
699 - fields
700 - name
701 type: object
702 type: array
703 placement:
704 description: Immutable. Required. WorkflowTemplate scheduling information.
705 properties:
706 clusterSelector:
707 description: Immutable. Optional. A selector that chooses target
708 cluster for jobs based on metadata. The selector is evaluated
709 at the time each job is submitted.
710 properties:
711 clusterLabels:
712 additionalProperties:
713 type: string
714 description: Immutable. Required. The cluster labels. Cluster
715 must have all labels to match.
716 type: object
717 zone:
718 description: Immutable. Optional. The zone where workflow
719 process executes. This parameter does not affect the selection
720 of the cluster. If unspecified, the zone of the first cluster
721 matching the selector is used.
722 type: string
723 required:
724 - clusterLabels
725 type: object
726 managedCluster:
727 description: Immutable. A cluster that is managed by the workflow.
728 properties:
729 clusterName:
730 description: Immutable. Required. The cluster name prefix.
731 A unique cluster name will be formed by appending a random
732 suffix. The name must contain only lower-case letters (a-z),
733 numbers (0-9), and hyphens (-). Must begin with a letter.
734 Cannot begin or end with hyphen. Must consist of between
735 2 and 35 characters.
736 type: string
737 config:
738 description: Immutable. Required. The cluster configuration.
739 properties:
740 autoscalingConfig:
741 description: Immutable. Optional. Autoscaling config for
742 the policy associated with the cluster. Cluster does
743 not autoscale if this field is unset.
744 properties:
745 policyRef:
746 description: Immutable.
747 oneOf:
748 - not:
749 required:
750 - external
751 required:
752 - name
753 - not:
754 anyOf:
755 - required:
756 - name
757 - required:
758 - namespace
759 required:
760 - external
761 properties:
762 external:
763 description: |-
764 Optional. The autoscaling policy used by the cluster. Only resource names including projectid and location (region) are valid. Examples: * `https://www.googleapis.com/compute/v1/projects/[project_id]/locations/[dataproc_region]/autoscalingPolicies/[policy_id]` * `projects/[project_id]/locations/[dataproc_region]/autoscalingPolicies/[policy_id]` Note that the policy must be in the same project and Dataproc region.
765
766 Allowed value: The Google Cloud resource name of a `DataprocAutoscalingPolicy` resource (format: `projects/{{project}}/locations/{{location}}/autoscalingPolicies/{{name}}`).
767 type: string
768 name:
769 description: 'Name of the referent. More info:
770 https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names'
771 type: string
772 namespace:
773 description: 'Namespace of the referent. More
774 info: https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/'
775 type: string
776 type: object
777 type: object
778 encryptionConfig:
779 description: Immutable. Optional. Encryption settings
780 for the cluster.
781 properties:
782 gcePdKmsKeyRef:
783 description: Immutable.
784 oneOf:
785 - not:
786 required:
787 - external
788 required:
789 - name
790 - not:
791 anyOf:
792 - required:
793 - name
794 - required:
795 - namespace
796 required:
797 - external
798 properties:
799 external:
800 description: |-
801 Optional. The Cloud KMS key name to use for PD disk encryption for all instances in the cluster.
802
803 Allowed value: The `selfLink` field of a `KMSCryptoKey` resource.
804 type: string
805 name:
806 description: 'Name of the referent. More info:
807 https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names'
808 type: string
809 namespace:
810 description: 'Namespace of the referent. More
811 info: https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/'
812 type: string
813 type: object
814 type: object
815 endpointConfig:
816 description: Immutable. Optional. Port/endpoint configuration
817 for this cluster
818 properties:
819 enableHttpPortAccess:
820 description: Immutable. Optional. If true, enable
821 http access to specific ports on the cluster from
822 external sources. Defaults to false.
823 type: boolean
824 type: object
825 gceClusterConfig:
826 description: Immutable. Optional. The shared Compute Engine
827 config settings for all instances in a cluster.
828 properties:
829 internalIPOnly:
830 description: Immutable. Optional. If true, all instances
831 in the cluster will only have internal IP addresses.
832 By default, clusters are not restricted to internal
833 IP addresses, and will have ephemeral external IP
834 addresses assigned to each instance. This `internal_ip_only`
835 restriction can only be enabled for subnetwork enabled
836 networks, and all off-cluster dependencies must
837 be configured to be accessible without external
838 IP addresses.
839 type: boolean
840 metadata:
841 additionalProperties:
842 type: string
843 description: Immutable. The Compute Engine metadata
844 entries to add to all instances (see [Project and
845 instance metadata](https://cloud.google.com/compute/docs/storing-retrieving-metadata#project_and_instance_metadata)).
846 type: object
847 networkRef:
848 description: Immutable.
849 oneOf:
850 - not:
851 required:
852 - external
853 required:
854 - name
855 - not:
856 anyOf:
857 - required:
858 - name
859 - required:
860 - namespace
861 required:
862 - external
863 properties:
864 external:
865 description: |-
866 Optional. The Compute Engine network to be used for machine communications. Cannot be specified with subnetwork_uri. If neither `network_uri` nor `subnetwork_uri` is specified, the "default" network of the project is used, if it exists. Cannot be a "Custom Subnet Network" (see [Using Subnetworks](https://cloud.google.com/compute/docs/subnetworks) for more information). A full URL, partial URI, or short name are valid. Examples: * `https://www.googleapis.com/compute/v1/projects/[project_id]/regions/global/default` * `projects/[project_id]/regions/global/default` * `default`
867
868 Allowed value: The `selfLink` field of a `ComputeNetwork` resource.
869 type: string
870 name:
871 description: 'Name of the referent. More info:
872 https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names'
873 type: string
874 namespace:
875 description: 'Namespace of the referent. More
876 info: https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/'
877 type: string
878 type: object
879 nodeGroupAffinity:
880 description: Immutable. Optional. Node Group Affinity
881 for sole-tenant clusters.
882 properties:
883 nodeGroupRef:
884 description: Immutable.
885 oneOf:
886 - not:
887 required:
888 - external
889 required:
890 - name
891 - not:
892 anyOf:
893 - required:
894 - name
895 - required:
896 - namespace
897 required:
898 - external
899 properties:
900 external:
901 description: |-
902 Required. The URI of a sole-tenant [node group resource](https://cloud.google.com/compute/docs/reference/rest/v1/nodeGroups) that the cluster will be created on. A full URL, partial URI, or node group name are valid. Examples: * `https://www.googleapis.com/compute/v1/projects/[project_id]/zones/us-central1-a/nodeGroups/node-group-1` * `projects/[project_id]/zones/us-central1-a/nodeGroups/node-group-1` * `node-group-1`
903
904 Allowed value: The `selfLink` field of a `ComputeNodeGroup` resource.
905 type: string
906 name:
907 description: 'Name of the referent. More info:
908 https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names'
909 type: string
910 namespace:
911 description: 'Namespace of the referent. More
912 info: https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/'
913 type: string
914 type: object
915 required:
916 - nodeGroupRef
917 type: object
918 privateIPv6GoogleAccess:
919 description: 'Immutable. Optional. The type of IPv6
920 access for a cluster. Possible values: PRIVATE_IPV6_GOOGLE_ACCESS_UNSPECIFIED,
921 INHERIT_FROM_SUBNETWORK, OUTBOUND, BIDIRECTIONAL'
922 type: string
923 reservationAffinity:
924 description: Immutable. Optional. Reservation Affinity
925 for consuming Zonal reservation.
926 properties:
927 consumeReservationType:
928 description: 'Immutable. Optional. Type of reservation
929 to consume Possible values: TYPE_UNSPECIFIED,
930 NO_RESERVATION, ANY_RESERVATION, SPECIFIC_RESERVATION'
931 type: string
932 key:
933 description: Immutable. Optional. Corresponds
934 to the label key of reservation resource.
935 type: string
936 values:
937 description: Immutable. Optional. Corresponds
938 to the label values of reservation resource.
939 items:
940 type: string
941 type: array
942 type: object
943 serviceAccountRef:
944 description: Immutable.
945 oneOf:
946 - not:
947 required:
948 - external
949 required:
950 - name
951 - not:
952 anyOf:
953 - required:
954 - name
955 - required:
956 - namespace
957 required:
958 - external
959 properties:
960 external:
961 description: |-
962 Optional. The [Dataproc service account](https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/service-accounts#service_accounts_in_dataproc) (also see [VM Data Plane identity](https://cloud.google.com/dataproc/docs/concepts/iam/dataproc-principals#vm_service_account_data_plane_identity)) used by Dataproc cluster VM instances to access Google Cloud Platform services. If not specified, the [Compute Engine default service account](https://cloud.google.com/compute/docs/access/service-accounts#default_service_account) is used.
963
964 Allowed value: The `email` field of an `IAMServiceAccount` resource.
965 type: string
966 name:
967 description: 'Name of the referent. More info:
968 https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names'
969 type: string
970 namespace:
971 description: 'Namespace of the referent. More
972 info: https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/'
973 type: string
974 type: object
975 serviceAccountScopes:
976 description: 'Immutable. Optional. The URIs of service
977 account scopes to be included in Compute Engine
978 instances. The following base set of scopes is always
979 included: * https://www.googleapis.com/auth/cloud.useraccounts.readonly
980 * https://www.googleapis.com/auth/devstorage.read_write
981 * https://www.googleapis.com/auth/logging.write
982 If no scopes are specified, the following defaults
983 are also provided: * https://www.googleapis.com/auth/bigquery
984 * https://www.googleapis.com/auth/bigtable.admin.table
985 * https://www.googleapis.com/auth/bigtable.data
986 * https://www.googleapis.com/auth/devstorage.full_control'
987 items:
988 type: string
989 type: array
990 shieldedInstanceConfig:
991 description: Immutable. Optional. Shielded Instance
992 Config for clusters using Compute Engine Shielded
993 VMs.
994 properties:
995 enableIntegrityMonitoring:
996 description: Immutable. Optional. Defines whether
997 instances have integrity monitoring enabled.
998 Integrity monitoring compares the most recent
999 boot measurements to the integrity policy baseline
1000 and returns a pair of pass/fail results depending
1001 on whether they match or not.
1002 type: boolean
1003 enableSecureBoot:
1004 description: Immutable. Optional. Defines whether
1005 the instances have Secure Boot enabled. Secure
1006 Boot helps ensure that the system only runs
1007 authentic software by verifying the digital
1008 signature of all boot components, and halting
1009 the boot process if signature verification fails.
1010 type: boolean
1011 enableVtpm:
1012 description: Immutable. Optional. Defines whether
1013 the instance have the vTPM enabled. Virtual
1014 Trusted Platform Module protects objects like
1015 keys, certificates and enables Measured Boot
1016 by performing the measurements needed to create
1017 a known good boot baseline, called the integrity
1018 policy baseline.
1019 type: boolean
1020 type: object
1021 subnetworkRef:
1022 description: Immutable.
1023 oneOf:
1024 - not:
1025 required:
1026 - external
1027 required:
1028 - name
1029 - not:
1030 anyOf:
1031 - required:
1032 - name
1033 - required:
1034 - namespace
1035 required:
1036 - external
1037 properties:
1038 external:
1039 description: |-
1040 Optional. The Compute Engine subnetwork to be used for machine communications. Cannot be specified with network_uri. A full URL, partial URI, or short name are valid. Examples: * `https://www.googleapis.com/compute/v1/projects/[project_id]/regions/us-east1/subnetworks/sub0` * `projects/[project_id]/regions/us-east1/subnetworks/sub0` * `sub0`
1041
1042 Allowed value: The `selfLink` field of a `ComputeSubnetwork` resource.
1043 type: string
1044 name:
1045 description: 'Name of the referent. More info:
1046 https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names'
1047 type: string
1048 namespace:
1049 description: 'Namespace of the referent. More
1050 info: https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/'
1051 type: string
1052 type: object
1053 tags:
1054 description: Immutable. The Compute Engine tags to
1055 add to all instances (see [Tagging instances](https://cloud.google.com/compute/docs/label-or-tag-resources#tags)).
1056 items:
1057 type: string
1058 type: array
1059 zone:
1060 description: 'Immutable. Optional. The zone where
1061 the Compute Engine cluster will be located. On a
1062 create request, it is required in the "global" region.
1063 If omitted in a non-global Dataproc region, the
1064 service will pick a zone in the corresponding Compute
1065 Engine region. On a get request, zone will always
1066 be present. A full URL, partial URI, or short name
1067 are valid. Examples: * `https://www.googleapis.com/compute/v1/projects/[project_id]/zones/[zone]`
1068 * `projects/[project_id]/zones/[zone]` * `us-central1-f`'
1069 type: string
1070 type: object
1071 initializationActions:
1072 description: 'Immutable. Optional. Commands to execute
1073 on each node after config is completed. By default,
1074 executables are run on master and all worker nodes.
1075 You can test a node''s `role` metadata to run an executable
1076 on a master or worker node, as shown below using `curl`
1077 (you can also use `wget`): ROLE=$(curl -H Metadata-Flavor:Google
1078 http://metadata/computeMetadata/v1/instance/attributes/dataproc-role)
1079 if [[ "${ROLE}" == ''Master'' ]]; then ... master specific
1080 actions ... else ... worker specific actions ... fi'
1081 items:
1082 properties:
1083 executableFile:
1084 description: Immutable. Required. Cloud Storage
1085 URI of executable file.
1086 type: string
1087 executionTimeout:
1088 description: Immutable. Optional. Amount of time
1089 executable has to complete. Default is 10 minutes
1090 (see JSON representation of [Duration](https://developers.google.com/protocol-buffers/docs/proto3#json)).
1091 Cluster creation fails with an explanatory error
1092 message (the name of the executable that caused
1093 the error and the exceeded timeout period) if
1094 the executable is not completed at end of the
1095 timeout period.
1096 type: string
1097 type: object
1098 type: array
1099 lifecycleConfig:
1100 description: Immutable. Optional. Lifecycle setting for
1101 the cluster.
1102 properties:
1103 autoDeleteTime:
1104 description: Immutable. Optional. The time when cluster
1105 will be auto-deleted (see JSON representation of
1106 [Timestamp](https://developers.google.com/protocol-buffers/docs/proto3#json)).
1107 format: date-time
1108 type: string
1109 autoDeleteTtl:
1110 description: Immutable. Optional. The lifetime duration
1111 of cluster. The cluster will be auto-deleted at
1112 the end of this period. Minimum value is 10 minutes;
1113 maximum value is 14 days (see JSON representation
1114 of [Duration](https://developers.google.com/protocol-buffers/docs/proto3#json)).
1115 type: string
1116 idleDeleteTtl:
1117 description: Immutable. Optional. The duration to
1118 keep the cluster alive while idling (when no jobs
1119 are running). Passing this threshold will cause
1120 the cluster to be deleted. Minimum value is 5 minutes;
1121 maximum value is 14 days (see JSON representation
1122 of [Duration](https://developers.google.com/protocol-buffers/docs/proto3#json)).
1123 type: string
1124 type: object
1125 masterConfig:
1126 description: Immutable. Optional. The Compute Engine config
1127 settings for the master instance in a cluster.
1128 properties:
1129 accelerators:
1130 description: Immutable. Optional. The Compute Engine
1131 accelerator configuration for these instances.
1132 items:
1133 properties:
1134 acceleratorCount:
1135 description: Immutable. The number of the accelerator
1136 cards of this type exposed to this instance.
1137 format: int64
1138 type: integer
1139 acceleratorType:
1140 description: 'Immutable. Full URL, partial URI,
1141 or short name of the accelerator type resource
1142 to expose to this instance. See [Compute Engine
1143 AcceleratorTypes](https://cloud.google.com/compute/docs/reference/beta/acceleratorTypes).
1144 Examples: * `https://www.googleapis.com/compute/beta/projects/[project_id]/zones/us-east1-a/acceleratorTypes/nvidia-tesla-k80`
1145 * `projects/[project_id]/zones/us-east1-a/acceleratorTypes/nvidia-tesla-k80`
1146 * `nvidia-tesla-k80` **Auto Zone Exception**:
1147 If you are using the Dataproc [Auto Zone Placement](https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/auto-zone#using_auto_zone_placement)
1148 feature, you must use the short name of the
1149 accelerator type resource, for example, `nvidia-tesla-k80`.'
1150 type: string
1151 type: object
1152 type: array
1153 diskConfig:
1154 description: Immutable. Optional. Disk option config
1155 settings.
1156 properties:
1157 bootDiskSizeGb:
1158 description: Immutable. Optional. Size in GB of
1159 the boot disk (default is 500GB).
1160 format: int64
1161 type: integer
1162 bootDiskType:
1163 description: 'Immutable. Optional. Type of the
1164 boot disk (default is "pd-standard"). Valid
1165 values: "pd-balanced" (Persistent Disk Balanced
1166 Solid State Drive), "pd-ssd" (Persistent Disk
1167 Solid State Drive), or "pd-standard" (Persistent
1168 Disk Hard Disk Drive). See [Disk types](https://cloud.google.com/compute/docs/disks#disk-types).'
1169 type: string
1170 numLocalSsds:
1171 description: Immutable. Optional. Number of attached
1172 SSDs, from 0 to 4 (default is 0). If SSDs are
1173 not attached, the boot disk is used to store
1174 runtime logs and [HDFS](https://hadoop.apache.org/docs/r1.2.1/hdfs_user_guide.html)
1175 data. If one or more SSDs are attached, this
1176 runtime bulk data is spread across them, and
1177 the boot disk contains only basic config and
1178 installed binaries.
1179 format: int64
1180 type: integer
1181 type: object
1182 imageRef:
1183 description: Immutable.
1184 oneOf:
1185 - not:
1186 required:
1187 - external
1188 required:
1189 - name
1190 - not:
1191 anyOf:
1192 - required:
1193 - name
1194 - required:
1195 - namespace
1196 required:
1197 - external
1198 properties:
1199 external:
1200 description: |-
1201 Optional. The Compute Engine image resource used for cluster instances. The URI can represent an image or image family. Image examples: * `https://www.googleapis.com/compute/beta/projects/[project_id]/global/images/[image-id]` * `projects/[project_id]/global/images/[image-id]` * `image-id` Image family examples. Dataproc will use the most recent image from the family: * `https://www.googleapis.com/compute/beta/projects/[project_id]/global/images/family/[custom-image-family-name]` * `projects/[project_id]/global/images/family/[custom-image-family-name]` If the URI is unspecified, it will be inferred from `SoftwareConfig.image_version` or the system default.
1202
1203 Allowed value: The `selfLink` field of a `ComputeImage` resource.
1204 type: string
1205 name:
1206 description: 'Name of the referent. More info:
1207 https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names'
1208 type: string
1209 namespace:
1210 description: 'Namespace of the referent. More
1211 info: https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/'
1212 type: string
1213 type: object
1214 machineType:
1215 description: 'Immutable. Optional. The Compute Engine
1216 machine type used for cluster instances. A full
1217 URL, partial URI, or short name are valid. Examples:
1218 * `https://www.googleapis.com/compute/v1/projects/[project_id]/zones/us-east1-a/machineTypes/n1-standard-2`
1219 * `projects/[project_id]/zones/us-east1-a/machineTypes/n1-standard-2`
1220 * `n1-standard-2` **Auto Zone Exception**: If you
1221 are using the Dataproc [Auto Zone Placement](https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/auto-zone#using_auto_zone_placement)
1222 feature, you must use the short name of the machine
1223 type resource, for example, `n1-standard-2`.'
1224 type: string
1225 minCpuPlatform:
1226 description: Immutable. Optional. Specifies the minimum
1227 cpu platform for the Instance Group. See [Dataproc
1228 -> Minimum CPU Platform](https://cloud.google.com/dataproc/docs/concepts/compute/dataproc-min-cpu).
1229 type: string
1230 numInstances:
1231 description: Immutable. Optional. The number of VM
1232 instances in the instance group. For [HA cluster](/dataproc/docs/concepts/configuring-clusters/high-availability)
1233 [master_config](#FIELDS.master_config) groups, **must
1234 be set to 3**. For standard cluster [master_config](#FIELDS.master_config)
1235 groups, **must be set to 1**.
1236 format: int64
1237 type: integer
1238 preemptibility:
1239 description: 'Immutable. Optional. Specifies the preemptibility
1240 of the instance group. The default value for master
1241 and worker groups is `NON_PREEMPTIBLE`. This default
1242 cannot be changed. The default value for secondary
1243 instances is `PREEMPTIBLE`. Possible values: PREEMPTIBILITY_UNSPECIFIED,
1244 NON_PREEMPTIBLE, PREEMPTIBLE'
1245 type: string
1246 type: object
1247 secondaryWorkerConfig:
1248 description: Immutable. Optional. The Compute Engine config
1249 settings for additional worker instances in a cluster.
1250 properties:
1251 accelerators:
1252 description: Immutable. Optional. The Compute Engine
1253 accelerator configuration for these instances.
1254 items:
1255 properties:
1256 acceleratorCount:
1257 description: Immutable. The number of the accelerator
1258 cards of this type exposed to this instance.
1259 format: int64
1260 type: integer
1261 acceleratorType:
1262 description: 'Immutable. Full URL, partial URI,
1263 or short name of the accelerator type resource
1264 to expose to this instance. See [Compute Engine
1265 AcceleratorTypes](https://cloud.google.com/compute/docs/reference/beta/acceleratorTypes).
1266 Examples: * `https://www.googleapis.com/compute/beta/projects/[project_id]/zones/us-east1-a/acceleratorTypes/nvidia-tesla-k80`
1267 * `projects/[project_id]/zones/us-east1-a/acceleratorTypes/nvidia-tesla-k80`
1268 * `nvidia-tesla-k80` **Auto Zone Exception**:
1269 If you are using the Dataproc [Auto Zone Placement](https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/auto-zone#using_auto_zone_placement)
1270 feature, you must use the short name of the
1271 accelerator type resource, for example, `nvidia-tesla-k80`.'
1272 type: string
1273 type: object
1274 type: array
1275 diskConfig:
1276 description: Immutable. Optional. Disk option config
1277 settings.
1278 properties:
1279 bootDiskSizeGb:
1280 description: Immutable. Optional. Size in GB of
1281 the boot disk (default is 500GB).
1282 format: int64
1283 type: integer
1284 bootDiskType:
1285 description: 'Immutable. Optional. Type of the
1286 boot disk (default is "pd-standard"). Valid
1287 values: "pd-balanced" (Persistent Disk Balanced
1288 Solid State Drive), "pd-ssd" (Persistent Disk
1289 Solid State Drive), or "pd-standard" (Persistent
1290 Disk Hard Disk Drive). See [Disk types](https://cloud.google.com/compute/docs/disks#disk-types).'
1291 type: string
1292 numLocalSsds:
1293 description: Immutable. Optional. Number of attached
1294 SSDs, from 0 to 4 (default is 0). If SSDs are
1295 not attached, the boot disk is used to store
1296 runtime logs and [HDFS](https://hadoop.apache.org/docs/r1.2.1/hdfs_user_guide.html)
1297 data. If one or more SSDs are attached, this
1298 runtime bulk data is spread across them, and
1299 the boot disk contains only basic config and
1300 installed binaries.
1301 format: int64
1302 type: integer
1303 type: object
1304 imageRef:
1305 description: Immutable.
1306 oneOf:
1307 - not:
1308 required:
1309 - external
1310 required:
1311 - name
1312 - not:
1313 anyOf:
1314 - required:
1315 - name
1316 - required:
1317 - namespace
1318 required:
1319 - external
1320 properties:
1321 external:
1322 description: |-
1323 Optional. The Compute Engine image resource used for cluster instances. The URI can represent an image or image family. Image examples: * `https://www.googleapis.com/compute/beta/projects/[project_id]/global/images/[image-id]` * `projects/[project_id]/global/images/[image-id]` * `image-id` Image family examples. Dataproc will use the most recent image from the family: * `https://www.googleapis.com/compute/beta/projects/[project_id]/global/images/family/[custom-image-family-name]` * `projects/[project_id]/global/images/family/[custom-image-family-name]` If the URI is unspecified, it will be inferred from `SoftwareConfig.image_version` or the system default.
1324
1325 Allowed value: The `selfLink` field of a `ComputeImage` resource.
1326 type: string
1327 name:
1328 description: 'Name of the referent. More info:
1329 https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names'
1330 type: string
1331 namespace:
1332 description: 'Namespace of the referent. More
1333 info: https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/'
1334 type: string
1335 type: object
1336 machineType:
1337 description: 'Immutable. Optional. The Compute Engine
1338 machine type used for cluster instances. A full
1339 URL, partial URI, or short name are valid. Examples:
1340 * `https://www.googleapis.com/compute/v1/projects/[project_id]/zones/us-east1-a/machineTypes/n1-standard-2`
1341 * `projects/[project_id]/zones/us-east1-a/machineTypes/n1-standard-2`
1342 * `n1-standard-2` **Auto Zone Exception**: If you
1343 are using the Dataproc [Auto Zone Placement](https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/auto-zone#using_auto_zone_placement)
1344 feature, you must use the short name of the machine
1345 type resource, for example, `n1-standard-2`.'
1346 type: string
1347 minCpuPlatform:
1348 description: Immutable. Optional. Specifies the minimum
1349 cpu platform for the Instance Group. See [Dataproc
1350 -> Minimum CPU Platform](https://cloud.google.com/dataproc/docs/concepts/compute/dataproc-min-cpu).
1351 type: string
1352 numInstances:
1353 description: Immutable. Optional. The number of VM
1354 instances in the instance group. For [HA cluster](/dataproc/docs/concepts/configuring-clusters/high-availability)
1355 [master_config](#FIELDS.master_config) groups, **must
1356 be set to 3**. For standard cluster [master_config](#FIELDS.master_config)
1357 groups, **must be set to 1**.
1358 format: int64
1359 type: integer
1360 preemptibility:
1361 description: 'Immutable. Optional. Specifies the preemptibility
1362 of the instance group. The default value for master
1363 and worker groups is `NON_PREEMPTIBLE`. This default
1364 cannot be changed. The default value for secondary
1365 instances is `PREEMPTIBLE`. Possible values: PREEMPTIBILITY_UNSPECIFIED,
1366 NON_PREEMPTIBLE, PREEMPTIBLE'
1367 type: string
1368 type: object
1369 securityConfig:
1370 description: Immutable. Optional. Security settings for
1371 the cluster.
1372 properties:
1373 kerberosConfig:
1374 description: Immutable. Optional. Kerberos related
1375 configuration.
1376 properties:
1377 crossRealmTrustAdminServer:
1378 description: Immutable. Optional. The admin server
1379 (IP or hostname) for the remote trusted realm
1380 in a cross realm trust relationship.
1381 type: string
1382 crossRealmTrustKdc:
1383 description: Immutable. Optional. The KDC (IP
1384 or hostname) for the remote trusted realm in
1385 a cross realm trust relationship.
1386 type: string
1387 crossRealmTrustRealm:
1388 description: Immutable. Optional. The remote realm
1389 the Dataproc on-cluster KDC will trust, should
1390 the user enable cross realm trust.
1391 type: string
1392 crossRealmTrustSharedPassword:
1393 description: Immutable. Optional. The Cloud Storage
1394 URI of a KMS encrypted file containing the shared
1395 password between the on-cluster Kerberos realm
1396 and the remote trusted realm, in a cross realm
1397 trust relationship.
1398 type: string
1399 enableKerberos:
1400 description: 'Immutable. Optional. Flag to indicate
1401 whether to Kerberize the cluster (default: false).
1402 Set this field to true to enable Kerberos on
1403 a cluster.'
1404 type: boolean
1405 kdcDbKey:
1406 description: Immutable. Optional. The Cloud Storage
1407 URI of a KMS encrypted file containing the master
1408 key of the KDC database.
1409 type: string
1410 keyPassword:
1411 description: Immutable. Optional. The Cloud Storage
1412 URI of a KMS encrypted file containing the password
1413 to the user provided key. For the self-signed
1414 certificate, this password is generated by Dataproc.
1415 type: string
1416 keystore:
1417 description: Immutable. Optional. The Cloud Storage
1418 URI of the keystore file used for SSL encryption.
1419 If not provided, Dataproc will provide a self-signed
1420 certificate.
1421 type: string
1422 keystorePassword:
1423 description: Immutable. Optional. The Cloud Storage
1424 URI of a KMS encrypted file containing the password
1425 to the user provided keystore. For the self-signed
1426 certificate, this password is generated by Dataproc.
1427 type: string
1428 kmsKeyRef:
1429 description: Immutable.
1430 oneOf:
1431 - not:
1432 required:
1433 - external
1434 required:
1435 - name
1436 - not:
1437 anyOf:
1438 - required:
1439 - name
1440 - required:
1441 - namespace
1442 required:
1443 - external
1444 properties:
1445 external:
1446 description: |-
1447 Optional. The uri of the KMS key used to encrypt various sensitive files.
1448
1449 Allowed value: The `selfLink` field of a `KMSCryptoKey` resource.
1450 type: string
1451 name:
1452 description: 'Name of the referent. More info:
1453 https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names'
1454 type: string
1455 namespace:
1456 description: 'Namespace of the referent. More
1457 info: https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/'
1458 type: string
1459 type: object
1460 realm:
1461 description: Immutable. Optional. The name of
1462 the on-cluster Kerberos realm. If not specified,
1463 the uppercased domain of hostnames will be the
1464 realm.
1465 type: string
1466 rootPrincipalPassword:
1467 description: Immutable. Optional. The Cloud Storage
1468 URI of a KMS encrypted file containing the root
1469 principal password.
1470 type: string
1471 tgtLifetimeHours:
1472 description: Immutable. Optional. The lifetime
1473 of the ticket granting ticket, in hours. If
1474 not specified, or user specifies 0, then default
1475 value 10 will be used.
1476 format: int64
1477 type: integer
1478 truststore:
1479 description: Immutable. Optional. The Cloud Storage
1480 URI of the truststore file used for SSL encryption.
1481 If not provided, Dataproc will provide a self-signed
1482 certificate.
1483 type: string
1484 truststorePassword:
1485 description: Immutable. Optional. The Cloud Storage
1486 URI of a KMS encrypted file containing the password
1487 to the user provided truststore. For the self-signed
1488 certificate, this password is generated by Dataproc.
1489 type: string
1490 type: object
1491 type: object
1492 softwareConfig:
1493 description: Immutable. Optional. The config settings
1494 for software inside the cluster.
1495 properties:
1496 imageVersion:
1497 description: Immutable. Optional. The version of software
1498 inside the cluster. It must be one of the supported
1499 [Dataproc Versions](https://cloud.google.com/dataproc/docs/concepts/versioning/dataproc-versions#supported_dataproc_versions),
1500 such as "1.2" (including a subminor version, such
1501 as "1.2.29"), or the ["preview" version](https://cloud.google.com/dataproc/docs/concepts/versioning/dataproc-versions#other_versions).
1502 If unspecified, it defaults to the latest Debian
1503 version.
1504 type: string
1505 optionalComponents:
1506 description: Immutable. Optional. The set of components
1507 to activate on the cluster.
1508 items:
1509 type: string
1510 type: array
1511 properties:
1512 additionalProperties:
1513 type: string
1514 description: 'Immutable. Optional. The properties
1515 to set on daemon config files. Property keys are
1516 specified in `prefix:property` format, for example
1517 `core:hadoop.tmp.dir`. The following are supported
1518 prefixes and their mappings: * capacity-scheduler:
1519 `capacity-scheduler.xml` * core: `core-site.xml`
1520 * distcp: `distcp-default.xml` * hdfs: `hdfs-site.xml`
1521 * hive: `hive-site.xml` * mapred: `mapred-site.xml`
1522 * pig: `pig.properties` * spark: `spark-defaults.conf`
1523 * yarn: `yarn-site.xml` For more information, see
1524 [Cluster properties](https://cloud.google.com/dataproc/docs/concepts/cluster-properties).'
1525 type: object
1526 type: object
1527 stagingBucketRef:
1528 description: Immutable.
1529 oneOf:
1530 - not:
1531 required:
1532 - external
1533 required:
1534 - name
1535 - not:
1536 anyOf:
1537 - required:
1538 - name
1539 - required:
1540 - namespace
1541 required:
1542 - external
1543 properties:
1544 external:
1545 description: |-
1546 Optional. A Cloud Storage bucket used to stage job dependencies, config files, and job driver console output. If you do not specify a staging bucket, Cloud Dataproc will determine a Cloud Storage location (US, ASIA, or EU) for your cluster's staging bucket according to the Compute Engine zone where your cluster is deployed, and then create and manage this project-level, per-location bucket (see [Dataproc staging bucket](https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/staging-bucket)). **This field requires a Cloud Storage bucket name, not a URI to a Cloud Storage bucket.**
1547
1548 Allowed value: The Google Cloud resource name of a `StorageBucket` resource (format: `{{name}}`).
1549 type: string
1550 name:
1551 description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names'
1552 type: string
1553 namespace:
1554 description: 'Namespace of the referent. More info:
1555 https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/'
1556 type: string
1557 type: object
1558 tempBucketRef:
1559 description: Immutable.
1560 oneOf:
1561 - not:
1562 required:
1563 - external
1564 required:
1565 - name
1566 - not:
1567 anyOf:
1568 - required:
1569 - name
1570 - required:
1571 - namespace
1572 required:
1573 - external
1574 properties:
1575 external:
1576 description: |-
1577 Optional. A Cloud Storage bucket used to store ephemeral cluster and jobs data, such as Spark and MapReduce history files. If you do not specify a temp bucket, Dataproc will determine a Cloud Storage location (US, ASIA, or EU) for your cluster's temp bucket according to the Compute Engine zone where your cluster is deployed, and then create and manage this project-level, per-location bucket. The default bucket has a TTL of 90 days, but you can use any TTL (or none) if you specify a bucket. **This field requires a Cloud Storage bucket name, not a URI to a Cloud Storage bucket.**
1578
1579 Allowed value: The Google Cloud resource name of a `StorageBucket` resource (format: `{{name}}`).
1580 type: string
1581 name:
1582 description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names'
1583 type: string
1584 namespace:
1585 description: 'Namespace of the referent. More info:
1586 https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/'
1587 type: string
1588 type: object
1589 workerConfig:
1590 description: Immutable. Optional. The Compute Engine config
1591 settings for worker instances in a cluster.
1592 properties:
1593 accelerators:
1594 description: Immutable. Optional. The Compute Engine
1595 accelerator configuration for these instances.
1596 items:
1597 properties:
1598 acceleratorCount:
1599 description: Immutable. The number of the accelerator
1600 cards of this type exposed to this instance.
1601 format: int64
1602 type: integer
1603 acceleratorType:
1604 description: 'Immutable. Full URL, partial URI,
1605 or short name of the accelerator type resource
1606 to expose to this instance. See [Compute Engine
1607 AcceleratorTypes](https://cloud.google.com/compute/docs/reference/beta/acceleratorTypes).
1608 Examples: * `https://www.googleapis.com/compute/beta/projects/[project_id]/zones/us-east1-a/acceleratorTypes/nvidia-tesla-k80`
1609 * `projects/[project_id]/zones/us-east1-a/acceleratorTypes/nvidia-tesla-k80`
1610 * `nvidia-tesla-k80` **Auto Zone Exception**:
1611 If you are using the Dataproc [Auto Zone Placement](https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/auto-zone#using_auto_zone_placement)
1612 feature, you must use the short name of the
1613 accelerator type resource, for example, `nvidia-tesla-k80`.'
1614 type: string
1615 type: object
1616 type: array
1617 diskConfig:
1618 description: Immutable. Optional. Disk option config
1619 settings.
1620 properties:
1621 bootDiskSizeGb:
1622 description: Immutable. Optional. Size in GB of
1623 the boot disk (default is 500GB).
1624 format: int64
1625 type: integer
1626 bootDiskType:
1627 description: 'Immutable. Optional. Type of the
1628 boot disk (default is "pd-standard"). Valid
1629 values: "pd-balanced" (Persistent Disk Balanced
1630 Solid State Drive), "pd-ssd" (Persistent Disk
1631 Solid State Drive), or "pd-standard" (Persistent
1632 Disk Hard Disk Drive). See [Disk types](https://cloud.google.com/compute/docs/disks#disk-types).'
1633 type: string
1634 numLocalSsds:
1635 description: Immutable. Optional. Number of attached
1636 SSDs, from 0 to 4 (default is 0). If SSDs are
1637 not attached, the boot disk is used to store
1638 runtime logs and [HDFS](https://hadoop.apache.org/docs/r1.2.1/hdfs_user_guide.html)
1639 data. If one or more SSDs are attached, this
1640 runtime bulk data is spread across them, and
1641 the boot disk contains only basic config and
1642 installed binaries.
1643 format: int64
1644 type: integer
1645 type: object
1646 imageRef:
1647 description: Immutable.
1648 oneOf:
1649 - not:
1650 required:
1651 - external
1652 required:
1653 - name
1654 - not:
1655 anyOf:
1656 - required:
1657 - name
1658 - required:
1659 - namespace
1660 required:
1661 - external
1662 properties:
1663 external:
1664 description: |-
1665 Optional. The Compute Engine image resource used for cluster instances. The URI can represent an image or image family. Image examples: * `https://www.googleapis.com/compute/beta/projects/[project_id]/global/images/[image-id]` * `projects/[project_id]/global/images/[image-id]` * `image-id` Image family examples. Dataproc will use the most recent image from the family: * `https://www.googleapis.com/compute/beta/projects/[project_id]/global/images/family/[custom-image-family-name]` * `projects/[project_id]/global/images/family/[custom-image-family-name]` If the URI is unspecified, it will be inferred from `SoftwareConfig.image_version` or the system default.
1666
1667 Allowed value: The `selfLink` field of a `ComputeImage` resource.
1668 type: string
1669 name:
1670 description: 'Name of the referent. More info:
1671 https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names'
1672 type: string
1673 namespace:
1674 description: 'Namespace of the referent. More
1675 info: https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/'
1676 type: string
1677 type: object
1678 machineType:
1679 description: 'Immutable. Optional. The Compute Engine
1680 machine type used for cluster instances. A full
1681 URL, partial URI, or short name are valid. Examples:
1682 * `https://www.googleapis.com/compute/v1/projects/[project_id]/zones/us-east1-a/machineTypes/n1-standard-2`
1683 * `projects/[project_id]/zones/us-east1-a/machineTypes/n1-standard-2`
1684 * `n1-standard-2` **Auto Zone Exception**: If you
1685 are using the Dataproc [Auto Zone Placement](https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/auto-zone#using_auto_zone_placement)
1686 feature, you must use the short name of the machine
1687 type resource, for example, `n1-standard-2`.'
1688 type: string
1689 minCpuPlatform:
1690 description: Immutable. Optional. Specifies the minimum
1691 cpu platform for the Instance Group. See [Dataproc
1692 -> Minimum CPU Platform](https://cloud.google.com/dataproc/docs/concepts/compute/dataproc-min-cpu).
1693 type: string
1694 numInstances:
1695 description: Immutable. Optional. The number of VM
1696 instances in the instance group. For [HA cluster](/dataproc/docs/concepts/configuring-clusters/high-availability)
1697 [master_config](#FIELDS.master_config) groups, **must
1698 be set to 3**. For standard cluster [master_config](#FIELDS.master_config)
1699 groups, **must be set to 1**.
1700 format: int64
1701 type: integer
1702 preemptibility:
1703 description: 'Immutable. Optional. Specifies the preemptibility
1704 of the instance group. The default value for master
1705 and worker groups is `NON_PREEMPTIBLE`. This default
1706 cannot be changed. The default value for secondary
1707 instances is `PREEMPTIBLE`. Possible values: PREEMPTIBILITY_UNSPECIFIED,
1708 NON_PREEMPTIBLE, PREEMPTIBLE'
1709 type: string
1710 type: object
1711 type: object
1712 labels:
1713 additionalProperties:
1714 type: string
1715 description: 'Immutable. Optional. The labels to associate
1716 with this cluster. Label keys must be between 1 and 63 characters
1717 long, and must conform to the following PCRE regular expression:
1718 p{Ll}p{Lo}{0,62} Label values must be between 1 and 63 characters
1719 long, and must conform to the following PCRE regular expression:
1720 [p{Ll}p{Lo}p{N}_-]{0,63} No more than 32 labels can be associated
1721 with a given cluster.'
1722 type: object
1723 required:
1724 - clusterName
1725 - config
1726 type: object
1727 type: object
1728 projectRef:
1729 description: Immutable. The Project that this resource belongs to.
1730 oneOf:
1731 - not:
1732 required:
1733 - external
1734 required:
1735 - name
1736 - not:
1737 anyOf:
1738 - required:
1739 - name
1740 - required:
1741 - namespace
1742 required:
1743 - external
1744 properties:
1745 external:
1746 description: |-
1747 The project for the resource
1748
1749 Allowed value: The Google Cloud resource name of a `Project` resource (format: `projects/{{name}}`).
1750 type: string
1751 name:
1752 description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names'
1753 type: string
1754 namespace:
1755 description: 'Namespace of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/'
1756 type: string
1757 type: object
1758 resourceID:
1759 description: Immutable. Optional. The name of the resource. Used for
1760 creation and acquisition. When unset, the value of `metadata.name`
1761 is used as the default.
1762 type: string
1763 required:
1764 - jobs
1765 - location
1766 - placement
1767 type: object
1768 status:
1769 properties:
1770 conditions:
1771 description: Conditions represent the latest available observation
1772 of the resource's current state.
1773 items:
1774 properties:
1775 lastTransitionTime:
1776 description: Last time the condition transitioned from one status
1777 to another.
1778 type: string
1779 message:
1780 description: Human-readable message indicating details about
1781 last transition.
1782 type: string
1783 reason:
1784 description: Unique, one-word, CamelCase reason for the condition's
1785 last transition.
1786 type: string
1787 status:
1788 description: Status is the status of the condition. Can be True,
1789 False, Unknown.
1790 type: string
1791 type:
1792 description: Type is the type of the condition.
1793 type: string
1794 type: object
1795 type: array
1796 createTime:
1797 description: Output only. The time template was created.
1798 format: date-time
1799 type: string
1800 observedGeneration:
1801 description: ObservedGeneration is the generation of the resource
1802 that was most recently observed by the Config Connector controller.
1803 If this is equal to metadata.generation, then that means that the
1804 current reported status reflects the most recent desired state of
1805 the resource.
1806 type: integer
1807 placement:
1808 properties:
1809 managedCluster:
1810 properties:
1811 config:
1812 properties:
1813 endpointConfig:
1814 properties:
1815 httpPorts:
1816 additionalProperties:
1817 type: string
1818 description: Output only. The map of port descriptions
1819 to URLs. Will only be populated if enable_http_port_access
1820 is true.
1821 type: object
1822 type: object
1823 lifecycleConfig:
1824 properties:
1825 idleStartTime:
1826 description: Output only. The time when cluster became
1827 idle (most recent job finished) and became eligible
1828 for deletion due to idleness (see JSON representation
1829 of [Timestamp](https://developers.google.com/protocol-buffers/docs/proto3#json)).
1830 format: date-time
1831 type: string
1832 type: object
1833 masterConfig:
1834 properties:
1835 instanceNames:
1836 description: Output only. The list of instance names.
1837 Dataproc derives the names from `cluster_name`,
1838 `num_instances`, and the instance group.
1839 items:
1840 type: string
1841 type: array
1842 isPreemptible:
1843 description: Output only. Specifies that this instance
1844 group contains preemptible instances.
1845 type: boolean
1846 managedGroupConfig:
1847 description: Output only. The config for Compute Engine
1848 Instance Group Manager that manages this group.
1849 This is only used for preemptible instance groups.
1850 properties:
1851 instanceGroupManagerName:
1852 description: Output only. The name of the Instance
1853 Group Manager for this group.
1854 type: string
1855 instanceTemplateName:
1856 description: Output only. The name of the Instance
1857 Template used for the Managed Instance Group.
1858 type: string
1859 type: object
1860 type: object
1861 secondaryWorkerConfig:
1862 properties:
1863 instanceNames:
1864 description: Output only. The list of instance names.
1865 Dataproc derives the names from `cluster_name`,
1866 `num_instances`, and the instance group.
1867 items:
1868 type: string
1869 type: array
1870 isPreemptible:
1871 description: Output only. Specifies that this instance
1872 group contains preemptible instances.
1873 type: boolean
1874 managedGroupConfig:
1875 description: Output only. The config for Compute Engine
1876 Instance Group Manager that manages this group.
1877 This is only used for preemptible instance groups.
1878 properties:
1879 instanceGroupManagerName:
1880 description: Output only. The name of the Instance
1881 Group Manager for this group.
1882 type: string
1883 instanceTemplateName:
1884 description: Output only. The name of the Instance
1885 Template used for the Managed Instance Group.
1886 type: string
1887 type: object
1888 type: object
1889 workerConfig:
1890 properties:
1891 instanceNames:
1892 description: Output only. The list of instance names.
1893 Dataproc derives the names from `cluster_name`,
1894 `num_instances`, and the instance group.
1895 items:
1896 type: string
1897 type: array
1898 isPreemptible:
1899 description: Output only. Specifies that this instance
1900 group contains preemptible instances.
1901 type: boolean
1902 managedGroupConfig:
1903 description: Output only. The config for Compute Engine
1904 Instance Group Manager that manages this group.
1905 This is only used for preemptible instance groups.
1906 properties:
1907 instanceGroupManagerName:
1908 description: Output only. The name of the Instance
1909 Group Manager for this group.
1910 type: string
1911 instanceTemplateName:
1912 description: Output only. The name of the Instance
1913 Template used for the Managed Instance Group.
1914 type: string
1915 type: object
1916 type: object
1917 type: object
1918 type: object
1919 type: object
1920 updateTime:
1921 description: Output only. The time template was last updated.
1922 format: date-time
1923 type: string
1924 version:
1925 description: Output only. The current version of this workflow template.
1926 format: int64
1927 type: integer
1928 type: object
1929 required:
1930 - spec
1931 type: object
1932 served: true
1933 storage: true
1934 subresources:
1935 status: {}
1936status:
1937 acceptedNames:
1938 kind: ""
1939 plural: ""
1940 conditions: []
1941 storedVersions: []
View as plain text