1# Copyright 2020 Google LLC
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
15apiVersion: apiextensions.k8s.io/v1
16kind: CustomResourceDefinition
17metadata:
18 annotations:
19 cnrm.cloud.google.com/version: 1.106.0
20 creationTimestamp: null
21 labels:
22 cnrm.cloud.google.com/dcl2crd: "true"
23 cnrm.cloud.google.com/managed-by-kcc: "true"
24 cnrm.cloud.google.com/stability-level: stable
25 cnrm.cloud.google.com/system: "true"
26 name: dataprocworkflowtemplates.dataproc.cnrm.cloud.google.com
27spec:
28 group: dataproc.cnrm.cloud.google.com
29 names:
30 categories:
31 - gcp
32 kind: DataprocWorkflowTemplate
33 plural: dataprocworkflowtemplates
34 shortNames:
35 - gcpdataprocworkflowtemplate
36 - gcpdataprocworkflowtemplates
37 singular: dataprocworkflowtemplate
38 scope: Namespaced
39 versions:
40 - additionalPrinterColumns:
41 - jsonPath: .metadata.creationTimestamp
42 name: Age
43 type: date
44 - description: When 'True', the most recent reconcile of the resource succeeded
45 jsonPath: .status.conditions[?(@.type=='Ready')].status
46 name: Ready
47 type: string
48 - description: The reason for the value in 'Ready'
49 jsonPath: .status.conditions[?(@.type=='Ready')].reason
50 name: Status
51 type: string
52 - description: The last transition time for the value in 'Status'
53 jsonPath: .status.conditions[?(@.type=='Ready')].lastTransitionTime
54 name: Status Age
55 type: date
56 name: v1beta1
57 schema:
58 openAPIV3Schema:
59 properties:
60 apiVersion:
61 description: 'apiVersion defines the versioned schema of this representation
62 of an object. Servers should convert recognized schemas to the latest
63 internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#resources'
64 type: string
65 kind:
66 description: 'kind is a string value representing the REST resource this
67 object represents. Servers may infer this from the endpoint the client
68 submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#types-kinds'
69 type: string
70 metadata:
71 type: object
72 spec:
73 properties:
74 dagTimeout:
75 description: Immutable. Optional. Timeout duration for the DAG of
76 jobs, expressed in seconds (see [JSON representation of duration](https://developers.google.com/protocol-buffers/docs/proto3#json)).
77 The timeout duration must be from 10 minutes ("600s") to 24 hours
78 ("86400s"). The timer begins when the first job is submitted. If
79 the workflow is running at the end of the timeout period, any remaining
80 jobs are cancelled, the workflow is ended, and if the workflow was
81 running on a [managed cluster](/dataproc/docs/concepts/workflows/using-workflows#configuring_or_selecting_a_cluster),
82 the cluster is deleted.
83 type: string
84 jobs:
85 description: Immutable. Required. The Directed Acyclic Graph of Jobs
86 to submit.
87 items:
88 properties:
89 hadoopJob:
90 description: Immutable. Optional. Job is a Hadoop job.
91 properties:
92 archiveUris:
93 description: 'Immutable. Optional. HCFS URIs of archives
94 to be extracted in the working directory of Hadoop drivers
95 and tasks. Supported file types: .jar, .tar, .tar.gz,
96 .tgz, or .zip.'
97 items:
98 type: string
99 type: array
100 args:
101 description: Immutable. Optional. The arguments to pass
102 to the driver. Do not include arguments, such as `-libjars`
103 or `-Dfoo=bar`, that can be set as job properties, since
104 a collision may occur that causes an incorrect job submission.
105 items:
106 type: string
107 type: array
108 fileUris:
109 description: Immutable. Optional. HCFS (Hadoop Compatible
110 Filesystem) URIs of files to be copied to the working
111 directory of Hadoop drivers and distributed tasks. Useful
112 for naively parallel tasks.
113 items:
114 type: string
115 type: array
116 jarFileUris:
117 description: Immutable. Optional. Jar file URIs to add to
118 the CLASSPATHs of the Hadoop driver and tasks.
119 items:
120 type: string
121 type: array
122 loggingConfig:
123 description: Immutable. Optional. The runtime log config
124 for job execution.
125 properties:
126 driverLogLevels:
127 additionalProperties:
128 type: string
129 description: 'Immutable. The per-package log levels
130 for the driver. This may include "root" package name
131 to configure rootLogger. Examples: ''com.google =
132 FATAL'', ''root = INFO'', ''org.apache = DEBUG'''
133 type: object
134 type: object
135 mainClass:
136 description: Immutable. The name of the driver's main class.
137 The jar file containing the class must be in the default
138 CLASSPATH or specified in `jar_file_uris`.
139 type: string
140 mainJarFileUri:
141 description: 'Immutable. The HCFS URI of the jar file containing
142 the main class. Examples: ''gs://foo-bucket/analytics-binaries/extract-useful-metrics-mr.jar''
143 ''hdfs:/tmp/test-samples/custom-wordcount.jar'' ''file:///home/usr/lib/hadoop-mapreduce/hadoop-mapreduce-examples.jar'''
144 type: string
145 properties:
146 additionalProperties:
147 type: string
148 description: Immutable. Optional. A mapping of property
149 names to values, used to configure Hadoop. Properties
150 that conflict with values set by the Dataproc API may
151 be overwritten. Can include properties set in /etc/hadoop/conf/*-site
152 and classes in user code.
153 type: object
154 type: object
155 hiveJob:
156 description: Immutable. Optional. Job is a Hive job.
157 properties:
158 continueOnFailure:
159 description: Immutable. Optional. Whether to continue executing
160 queries if a query fails. The default value is `false`.
161 Setting to `true` can be useful when executing independent
162 parallel queries.
163 type: boolean
164 jarFileUris:
165 description: Immutable. Optional. HCFS URIs of jar files
166 to add to the CLASSPATH of the Hive server and Hadoop
167 MapReduce (MR) tasks. Can contain Hive SerDes and UDFs.
168 items:
169 type: string
170 type: array
171 properties:
172 additionalProperties:
173 type: string
174 description: Immutable. Optional. A mapping of property
175 names and values, used to configure Hive. Properties that
176 conflict with values set by the Dataproc API may be overwritten.
177 Can include properties set in /etc/hadoop/conf/*-site.xml,
178 /etc/hive/conf/hive-site.xml, and classes in user code.
179 type: object
180 queryFileUri:
181 description: Immutable. The HCFS URI of the script that
182 contains Hive queries.
183 type: string
184 queryList:
185 description: Immutable. A list of queries.
186 properties:
187 queries:
188 description: 'Immutable. Required. The queries to execute.
189 You do not need to end a query expression with a semicolon.
190 Multiple queries can be specified in one string by
191 separating each with a semicolon. Here is an example
192 of a Dataproc API snippet that uses a QueryList to
193 specify a HiveJob: "hiveJob": { "queryList": { "queries":
194 [ "query1", "query2", "query3;query4", ] } }'
195 items:
196 type: string
197 type: array
198 required:
199 - queries
200 type: object
201 scriptVariables:
202 additionalProperties:
203 type: string
204 description: 'Immutable. Optional. Mapping of query variable
205 names to values (equivalent to the Hive command: `SET
206 name="value";`).'
207 type: object
208 type: object
209 labels:
210 additionalProperties:
211 type: string
212 description: 'Immutable. Optional. The labels to associate with
213 this job. Label keys must be between 1 and 63 characters long,
214 and must conform to the following regular expression: p{Ll}p{Lo}{0,62}
215 Label values must be between 1 and 63 characters long, and
216 must conform to the following regular expression: [p{Ll}p{Lo}p{N}_-]{0,63}
217 No more than 32 labels can be associated with a given job.'
218 type: object
219 pigJob:
220 description: Immutable. Optional. Job is a Pig job.
221 properties:
222 continueOnFailure:
223 description: Immutable. Optional. Whether to continue executing
224 queries if a query fails. The default value is `false`.
225 Setting to `true` can be useful when executing independent
226 parallel queries.
227 type: boolean
228 jarFileUris:
229 description: Immutable. Optional. HCFS URIs of jar files
230 to add to the CLASSPATH of the Pig Client and Hadoop MapReduce
231 (MR) tasks. Can contain Pig UDFs.
232 items:
233 type: string
234 type: array
235 loggingConfig:
236 description: Immutable. Optional. The runtime log config
237 for job execution.
238 properties:
239 driverLogLevels:
240 additionalProperties:
241 type: string
242 description: 'Immutable. The per-package log levels
243 for the driver. This may include "root" package name
244 to configure rootLogger. Examples: ''com.google =
245 FATAL'', ''root = INFO'', ''org.apache = DEBUG'''
246 type: object
247 type: object
248 properties:
249 additionalProperties:
250 type: string
251 description: Immutable. Optional. A mapping of property
252 names to values, used to configure Pig. Properties that
253 conflict with values set by the Dataproc API may be overwritten.
254 Can include properties set in /etc/hadoop/conf/*-site.xml,
255 /etc/pig/conf/pig.properties, and classes in user code.
256 type: object
257 queryFileUri:
258 description: Immutable. The HCFS URI of the script that
259 contains the Pig queries.
260 type: string
261 queryList:
262 description: Immutable. A list of queries.
263 properties:
264 queries:
265 description: 'Immutable. Required. The queries to execute.
266 You do not need to end a query expression with a semicolon.
267 Multiple queries can be specified in one string by
268 separating each with a semicolon. Here is an example
269 of a Dataproc API snippet that uses a QueryList to
270 specify a HiveJob: "hiveJob": { "queryList": { "queries":
271 [ "query1", "query2", "query3;query4", ] } }'
272 items:
273 type: string
274 type: array
275 required:
276 - queries
277 type: object
278 scriptVariables:
279 additionalProperties:
280 type: string
281 description: 'Immutable. Optional. Mapping of query variable
282 names to values (equivalent to the Pig command: `name=[value]`).'
283 type: object
284 type: object
285 prerequisiteStepIds:
286 description: Immutable. Optional. The optional list of prerequisite
287 job step_ids. If not specified, the job will start at the
288 beginning of workflow.
289 items:
290 type: string
291 type: array
292 prestoJob:
293 description: Immutable. Optional. Job is a Presto job.
294 properties:
295 clientTags:
296 description: Immutable. Optional. Presto client tags to
297 attach to this query
298 items:
299 type: string
300 type: array
301 continueOnFailure:
302 description: Immutable. Optional. Whether to continue executing
303 queries if a query fails. The default value is `false`.
304 Setting to `true` can be useful when executing independent
305 parallel queries.
306 type: boolean
307 loggingConfig:
308 description: Immutable. Optional. The runtime log config
309 for job execution.
310 properties:
311 driverLogLevels:
312 additionalProperties:
313 type: string
314 description: 'Immutable. The per-package log levels
315 for the driver. This may include "root" package name
316 to configure rootLogger. Examples: ''com.google =
317 FATAL'', ''root = INFO'', ''org.apache = DEBUG'''
318 type: object
319 type: object
320 outputFormat:
321 description: Immutable. Optional. The format in which query
322 output will be displayed. See the Presto documentation
323 for supported output formats
324 type: string
325 properties:
326 additionalProperties:
327 type: string
328 description: Immutable. Optional. A mapping of property
329 names to values. Used to set Presto [session properties](https://prestodb.io/docs/current/sql/set-session.html)
330 Equivalent to using the --session flag in the Presto CLI
331 type: object
332 queryFileUri:
333 description: Immutable. The HCFS URI of the script that
334 contains SQL queries.
335 type: string
336 queryList:
337 description: Immutable. A list of queries.
338 properties:
339 queries:
340 description: 'Immutable. Required. The queries to execute.
341 You do not need to end a query expression with a semicolon.
342 Multiple queries can be specified in one string by
343 separating each with a semicolon. Here is an example
344 of a Dataproc API snippet that uses a QueryList to
345 specify a HiveJob: "hiveJob": { "queryList": { "queries":
346 [ "query1", "query2", "query3;query4", ] } }'
347 items:
348 type: string
349 type: array
350 required:
351 - queries
352 type: object
353 type: object
354 pysparkJob:
355 description: Immutable. Optional. Job is a PySpark job.
356 properties:
357 archiveUris:
358 description: 'Immutable. Optional. HCFS URIs of archives
359 to be extracted into the working directory of each executor.
360 Supported file types: .jar, .tar, .tar.gz, .tgz, and .zip.'
361 items:
362 type: string
363 type: array
364 args:
365 description: Immutable. Optional. The arguments to pass
366 to the driver. Do not include arguments, such as `--conf`,
367 that can be set as job properties, since a collision may
368 occur that causes an incorrect job submission.
369 items:
370 type: string
371 type: array
372 fileUris:
373 description: Immutable. Optional. HCFS URIs of files to
374 be placed in the working directory of each executor. Useful
375 for naively parallel tasks.
376 items:
377 type: string
378 type: array
379 jarFileUris:
380 description: Immutable. Optional. HCFS URIs of jar files
381 to add to the CLASSPATHs of the Python driver and tasks.
382 items:
383 type: string
384 type: array
385 loggingConfig:
386 description: Immutable. Optional. The runtime log config
387 for job execution.
388 properties:
389 driverLogLevels:
390 additionalProperties:
391 type: string
392 description: 'Immutable. The per-package log levels
393 for the driver. This may include "root" package name
394 to configure rootLogger. Examples: ''com.google =
395 FATAL'', ''root = INFO'', ''org.apache = DEBUG'''
396 type: object
397 type: object
398 mainPythonFileUri:
399 description: Immutable. Required. The HCFS URI of the main
400 Python file to use as the driver. Must be a .py file.
401 type: string
402 properties:
403 additionalProperties:
404 type: string
405 description: Immutable. Optional. A mapping of property
406 names to values, used to configure PySpark. Properties
407 that conflict with values set by the Dataproc API may
408 be overwritten. Can include properties set in /etc/spark/conf/spark-defaults.conf
409 and classes in user code.
410 type: object
411 pythonFileUris:
412 description: 'Immutable. Optional. HCFS file URIs of Python
413 files to pass to the PySpark framework. Supported file
414 types: .py, .egg, and .zip.'
415 items:
416 type: string
417 type: array
418 required:
419 - mainPythonFileUri
420 type: object
421 scheduling:
422 description: Immutable. Optional. Job scheduling configuration.
423 properties:
424 maxFailuresPerHour:
425 description: Immutable. Optional. Maximum number of times
426 per hour a driver may be restarted as a result of driver
427 exiting with non-zero code before job is reported failed.
428 A job may be reported as thrashing if driver exits with
429 non-zero code 4 times within 10 minute window. Maximum
430 value is 10.
431 format: int64
432 type: integer
433 maxFailuresTotal:
434 description: Immutable. Optional. Maximum number of times
435 in total a driver may be restarted as a result of driver
436 exiting with non-zero code before job is reported failed.
437 Maximum value is 240.
438 format: int64
439 type: integer
440 type: object
441 sparkJob:
442 description: Immutable. Optional. Job is a Spark job.
443 properties:
444 archiveUris:
445 description: 'Immutable. Optional. HCFS URIs of archives
446 to be extracted into the working directory of each executor.
447 Supported file types: .jar, .tar, .tar.gz, .tgz, and .zip.'
448 items:
449 type: string
450 type: array
451 args:
452 description: Immutable. Optional. The arguments to pass
453 to the driver. Do not include arguments, such as `--conf`,
454 that can be set as job properties, since a collision may
455 occur that causes an incorrect job submission.
456 items:
457 type: string
458 type: array
459 fileUris:
460 description: Immutable. Optional. HCFS URIs of files to
461 be placed in the working directory of each executor. Useful
462 for naively parallel tasks.
463 items:
464 type: string
465 type: array
466 jarFileUris:
467 description: Immutable. Optional. HCFS URIs of jar files
468 to add to the CLASSPATHs of the Spark driver and tasks.
469 items:
470 type: string
471 type: array
472 loggingConfig:
473 description: Immutable. Optional. The runtime log config
474 for job execution.
475 properties:
476 driverLogLevels:
477 additionalProperties:
478 type: string
479 description: 'Immutable. The per-package log levels
480 for the driver. This may include "root" package name
481 to configure rootLogger. Examples: ''com.google =
482 FATAL'', ''root = INFO'', ''org.apache = DEBUG'''
483 type: object
484 type: object
485 mainClass:
486 description: Immutable. The name of the driver's main class.
487 The jar file that contains the class must be in the default
488 CLASSPATH or specified in `jar_file_uris`.
489 type: string
490 mainJarFileUri:
491 description: Immutable. The HCFS URI of the jar file that
492 contains the main class.
493 type: string
494 properties:
495 additionalProperties:
496 type: string
497 description: Immutable. Optional. A mapping of property
498 names to values, used to configure Spark. Properties that
499 conflict with values set by the Dataproc API may be overwritten.
500 Can include properties set in /etc/spark/conf/spark-defaults.conf
501 and classes in user code.
502 type: object
503 type: object
504 sparkRJob:
505 description: Immutable. Optional. Job is a SparkR job.
506 properties:
507 archiveUris:
508 description: 'Immutable. Optional. HCFS URIs of archives
509 to be extracted into the working directory of each executor.
510 Supported file types: .jar, .tar, .tar.gz, .tgz, and .zip.'
511 items:
512 type: string
513 type: array
514 args:
515 description: Immutable. Optional. The arguments to pass
516 to the driver. Do not include arguments, such as `--conf`,
517 that can be set as job properties, since a collision may
518 occur that causes an incorrect job submission.
519 items:
520 type: string
521 type: array
522 fileUris:
523 description: Immutable. Optional. HCFS URIs of files to
524 be placed in the working directory of each executor. Useful
525 for naively parallel tasks.
526 items:
527 type: string
528 type: array
529 loggingConfig:
530 description: Immutable. Optional. The runtime log config
531 for job execution.
532 properties:
533 driverLogLevels:
534 additionalProperties:
535 type: string
536 description: 'Immutable. The per-package log levels
537 for the driver. This may include "root" package name
538 to configure rootLogger. Examples: ''com.google =
539 FATAL'', ''root = INFO'', ''org.apache = DEBUG'''
540 type: object
541 type: object
542 mainRFileUri:
543 description: Immutable. Required. The HCFS URI of the main
544 R file to use as the driver. Must be a .R file.
545 type: string
546 properties:
547 additionalProperties:
548 type: string
549 description: Immutable. Optional. A mapping of property
550 names to values, used to configure SparkR. Properties
551 that conflict with values set by the Dataproc API may
552 be overwritten. Can include properties set in /etc/spark/conf/spark-defaults.conf
553 and classes in user code.
554 type: object
555 required:
556 - mainRFileUri
557 type: object
558 sparkSqlJob:
559 description: Immutable. Optional. Job is a SparkSql job.
560 properties:
561 jarFileUris:
562 description: Immutable. Optional. HCFS URIs of jar files
563 to be added to the Spark CLASSPATH.
564 items:
565 type: string
566 type: array
567 loggingConfig:
568 description: Immutable. Optional. The runtime log config
569 for job execution.
570 properties:
571 driverLogLevels:
572 additionalProperties:
573 type: string
574 description: 'Immutable. The per-package log levels
575 for the driver. This may include "root" package name
576 to configure rootLogger. Examples: ''com.google =
577 FATAL'', ''root = INFO'', ''org.apache = DEBUG'''
578 type: object
579 type: object
580 properties:
581 additionalProperties:
582 type: string
583 description: Immutable. Optional. A mapping of property
584 names to values, used to configure Spark SQL's SparkConf.
585 Properties that conflict with values set by the Dataproc
586 API may be overwritten.
587 type: object
588 queryFileUri:
589 description: Immutable. The HCFS URI of the script that
590 contains SQL queries.
591 type: string
592 queryList:
593 description: Immutable. A list of queries.
594 properties:
595 queries:
596 description: 'Immutable. Required. The queries to execute.
597 You do not need to end a query expression with a semicolon.
598 Multiple queries can be specified in one string by
599 separating each with a semicolon. Here is an example
600 of a Dataproc API snippet that uses a QueryList to
601 specify a HiveJob: "hiveJob": { "queryList": { "queries":
602 [ "query1", "query2", "query3;query4", ] } }'
603 items:
604 type: string
605 type: array
606 required:
607 - queries
608 type: object
609 scriptVariables:
610 additionalProperties:
611 type: string
612 description: 'Immutable. Optional. Mapping of query variable
613 names to values (equivalent to the Spark SQL command:
614 SET `name="value";`).'
615 type: object
616 type: object
617 stepId:
618 description: Immutable. Required. The step id. The id must be
619 unique among all jobs within the template. The step id is
620 used as prefix for job id, as job `goog-dataproc-workflow-step-id`
621 label, and in prerequisiteStepIds field from other steps.
622 The id must contain only letters (a-z, A-Z), numbers (0-9),
623 underscores (_), and hyphens (-). Cannot begin or end with
624 underscore or hyphen. Must consist of between 3 and 50 characters.
625 type: string
626 required:
627 - stepId
628 type: object
629 type: array
630 location:
631 description: Immutable. The location for the resource
632 type: string
633 parameters:
634 description: Immutable. Optional. Template parameters whose values
635 are substituted into the template. Values for parameters must be
636 provided when the template is instantiated.
637 items:
638 properties:
639 description:
640 description: Immutable. Optional. Brief description of the parameter.
641 Must not exceed 1024 characters.
642 type: string
643 fields:
644 description: 'Immutable. Required. Paths to all fields that
645 the parameter replaces. A field is allowed to appear in at
646 most one parameter''s list of field paths. A field path is
647 similar in syntax to a google.protobuf.FieldMask. For example,
648 a field path that references the zone field of a workflow
649 template''s cluster selector would be specified as `placement.clusterSelector.zone`.
650 Also, field paths can reference fields using the following
651 syntax: * Values in maps can be referenced by key: * labels[''key'']
652 * placement.clusterSelector.clusterLabels[''key''] * placement.managedCluster.labels[''key'']
653 * placement.clusterSelector.clusterLabels[''key''] * jobs[''step-id''].labels[''key'']
654 * Jobs in the jobs list can be referenced by step-id: * jobs[''step-id''].hadoopJob.mainJarFileUri
655 * jobs[''step-id''].hiveJob.queryFileUri * jobs[''step-id''].pySparkJob.mainPythonFileUri
656 * jobs[''step-id''].hadoopJob.jarFileUris[0] * jobs[''step-id''].hadoopJob.archiveUris[0]
657 * jobs[''step-id''].hadoopJob.fileUris[0] * jobs[''step-id''].pySparkJob.pythonFileUris[0]
658 * Items in repeated fields can be referenced by a zero-based
659 index: * jobs[''step-id''].sparkJob.args[0] * Other examples:
660 * jobs[''step-id''].hadoopJob.properties[''key''] * jobs[''step-id''].hadoopJob.args[0]
661 * jobs[''step-id''].hiveJob.scriptVariables[''key''] * jobs[''step-id''].hadoopJob.mainJarFileUri
662 * placement.clusterSelector.zone It may not be possible to
663 parameterize maps and repeated fields in their entirety since
664 only individual map values and individual items in repeated
665 fields can be referenced. For example, the following field
666 paths are invalid: - placement.clusterSelector.clusterLabels
667 - jobs[''step-id''].sparkJob.args'
668 items:
669 type: string
670 type: array
671 name:
672 description: Immutable. Required. Parameter name. The parameter
673 name is used as the key, and paired with the parameter value,
674 which are passed to the template when the template is instantiated.
675 The name must contain only capital letters (A-Z), numbers
676 (0-9), and underscores (_), and must not start with a number.
677 The maximum length is 40 characters.
678 type: string
679 validation:
680 description: Immutable. Optional. Validation rules to be applied
681 to this parameter's value.
682 properties:
683 regex:
684 description: Immutable. Validation based on regular expressions.
685 properties:
686 regexes:
687 description: Immutable. Required. RE2 regular expressions
688 used to validate the parameter's value. The value
689 must match the regex in its entirety (substring matches
690 are not sufficient).
691 items:
692 type: string
693 type: array
694 required:
695 - regexes
696 type: object
697 values:
698 description: Immutable. Validation based on a list of allowed
699 values.
700 properties:
701 values:
702 description: Immutable. Required. List of allowed values
703 for the parameter.
704 items:
705 type: string
706 type: array
707 required:
708 - values
709 type: object
710 type: object
711 required:
712 - fields
713 - name
714 type: object
715 type: array
716 placement:
717 description: Immutable. Required. WorkflowTemplate scheduling information.
718 properties:
719 clusterSelector:
720 description: Immutable. Optional. A selector that chooses target
721 cluster for jobs based on metadata. The selector is evaluated
722 at the time each job is submitted.
723 properties:
724 clusterLabels:
725 additionalProperties:
726 type: string
727 description: Immutable. Required. The cluster labels. Cluster
728 must have all labels to match.
729 type: object
730 zone:
731 description: Immutable. Optional. The zone where workflow
732 process executes. This parameter does not affect the selection
733 of the cluster. If unspecified, the zone of the first cluster
734 matching the selector is used.
735 type: string
736 required:
737 - clusterLabels
738 type: object
739 managedCluster:
740 description: Immutable. A cluster that is managed by the workflow.
741 properties:
742 clusterName:
743 description: Immutable. Required. The cluster name prefix.
744 A unique cluster name will be formed by appending a random
745 suffix. The name must contain only lower-case letters (a-z),
746 numbers (0-9), and hyphens (-). Must begin with a letter.
747 Cannot begin or end with hyphen. Must consist of between
748 2 and 35 characters.
749 type: string
750 config:
751 description: Immutable. Required. The cluster configuration.
752 properties:
753 autoscalingConfig:
754 description: Immutable. Optional. Autoscaling config for
755 the policy associated with the cluster. Cluster does
756 not autoscale if this field is unset.
757 properties:
758 policyRef:
759 description: Immutable.
760 oneOf:
761 - not:
762 required:
763 - external
764 required:
765 - name
766 - not:
767 anyOf:
768 - required:
769 - name
770 - required:
771 - namespace
772 required:
773 - external
774 properties:
775 external:
776 description: |-
777 Optional. The autoscaling policy used by the cluster. Only resource names including projectid and location (region) are valid. Examples: * `https://www.googleapis.com/compute/v1/projects/[project_id]/locations/[dataproc_region]/autoscalingPolicies/[policy_id]` * `projects/[project_id]/locations/[dataproc_region]/autoscalingPolicies/[policy_id]` Note that the policy must be in the same project and Dataproc region.
778
779 Allowed value: The Google Cloud resource name of a `DataprocAutoscalingPolicy` resource (format: `projects/{{project}}/locations/{{location}}/autoscalingPolicies/{{name}}`).
780 type: string
781 name:
782 description: 'Name of the referent. More info:
783 https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names'
784 type: string
785 namespace:
786 description: 'Namespace of the referent. More
787 info: https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/'
788 type: string
789 type: object
790 type: object
791 encryptionConfig:
792 description: Immutable. Optional. Encryption settings
793 for the cluster.
794 properties:
795 gcePdKmsKeyRef:
796 description: Immutable.
797 oneOf:
798 - not:
799 required:
800 - external
801 required:
802 - name
803 - not:
804 anyOf:
805 - required:
806 - name
807 - required:
808 - namespace
809 required:
810 - external
811 properties:
812 external:
813 description: |-
814 Optional. The Cloud KMS key name to use for PD disk encryption for all instances in the cluster.
815
816 Allowed value: The `selfLink` field of a `KMSCryptoKey` resource.
817 type: string
818 name:
819 description: 'Name of the referent. More info:
820 https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names'
821 type: string
822 namespace:
823 description: 'Namespace of the referent. More
824 info: https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/'
825 type: string
826 type: object
827 type: object
828 endpointConfig:
829 description: Immutable. Optional. Port/endpoint configuration
830 for this cluster
831 properties:
832 enableHttpPortAccess:
833 description: Immutable. Optional. If true, enable
834 http access to specific ports on the cluster from
835 external sources. Defaults to false.
836 type: boolean
837 type: object
838 gceClusterConfig:
839 description: Immutable. Optional. The shared Compute Engine
840 config settings for all instances in a cluster.
841 properties:
842 internalIPOnly:
843 description: Immutable. Optional. If true, all instances
844 in the cluster will only have internal IP addresses.
845 By default, clusters are not restricted to internal
846 IP addresses, and will have ephemeral external IP
847 addresses assigned to each instance. This `internal_ip_only`
848 restriction can only be enabled for subnetwork enabled
849 networks, and all off-cluster dependencies must
850 be configured to be accessible without external
851 IP addresses.
852 type: boolean
853 metadata:
854 additionalProperties:
855 type: string
856 description: Immutable. The Compute Engine metadata
857 entries to add to all instances (see [Project and
858 instance metadata](https://cloud.google.com/compute/docs/storing-retrieving-metadata#project_and_instance_metadata)).
859 type: object
860 networkRef:
861 description: Immutable.
862 oneOf:
863 - not:
864 required:
865 - external
866 required:
867 - name
868 - not:
869 anyOf:
870 - required:
871 - name
872 - required:
873 - namespace
874 required:
875 - external
876 properties:
877 external:
878 description: |-
879 Optional. The Compute Engine network to be used for machine communications. Cannot be specified with subnetwork_uri. If neither `network_uri` nor `subnetwork_uri` is specified, the "default" network of the project is used, if it exists. Cannot be a "Custom Subnet Network" (see [Using Subnetworks](https://cloud.google.com/compute/docs/subnetworks) for more information). A full URL, partial URI, or short name are valid. Examples: * `https://www.googleapis.com/compute/v1/projects/[project_id]/regions/global/default` * `projects/[project_id]/regions/global/default` * `default`
880
881 Allowed value: The `selfLink` field of a `ComputeNetwork` resource.
882 type: string
883 name:
884 description: 'Name of the referent. More info:
885 https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names'
886 type: string
887 namespace:
888 description: 'Namespace of the referent. More
889 info: https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/'
890 type: string
891 type: object
892 nodeGroupAffinity:
893 description: Immutable. Optional. Node Group Affinity
894 for sole-tenant clusters.
895 properties:
896 nodeGroupRef:
897 description: Immutable.
898 oneOf:
899 - not:
900 required:
901 - external
902 required:
903 - name
904 - not:
905 anyOf:
906 - required:
907 - name
908 - required:
909 - namespace
910 required:
911 - external
912 properties:
913 external:
914 description: |-
915 Required. The URI of a sole-tenant [node group resource](https://cloud.google.com/compute/docs/reference/rest/v1/nodeGroups) that the cluster will be created on. A full URL, partial URI, or node group name are valid. Examples: * `https://www.googleapis.com/compute/v1/projects/[project_id]/zones/us-central1-a/nodeGroups/node-group-1` * `projects/[project_id]/zones/us-central1-a/nodeGroups/node-group-1` * `node-group-1`
916
917 Allowed value: The `selfLink` field of a `ComputeNodeGroup` resource.
918 type: string
919 name:
920 description: 'Name of the referent. More info:
921 https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names'
922 type: string
923 namespace:
924 description: 'Namespace of the referent. More
925 info: https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/'
926 type: string
927 type: object
928 required:
929 - nodeGroupRef
930 type: object
931 privateIPv6GoogleAccess:
932 description: 'Immutable. Optional. The type of IPv6
933 access for a cluster. Possible values: PRIVATE_IPV6_GOOGLE_ACCESS_UNSPECIFIED,
934 INHERIT_FROM_SUBNETWORK, OUTBOUND, BIDIRECTIONAL'
935 type: string
936 reservationAffinity:
937 description: Immutable. Optional. Reservation Affinity
938 for consuming Zonal reservation.
939 properties:
940 consumeReservationType:
941 description: 'Immutable. Optional. Type of reservation
942 to consume Possible values: TYPE_UNSPECIFIED,
943 NO_RESERVATION, ANY_RESERVATION, SPECIFIC_RESERVATION'
944 type: string
945 key:
946 description: Immutable. Optional. Corresponds
947 to the label key of reservation resource.
948 type: string
949 values:
950 description: Immutable. Optional. Corresponds
951 to the label values of reservation resource.
952 items:
953 type: string
954 type: array
955 type: object
956 serviceAccountRef:
957 description: Immutable.
958 oneOf:
959 - not:
960 required:
961 - external
962 required:
963 - name
964 - not:
965 anyOf:
966 - required:
967 - name
968 - required:
969 - namespace
970 required:
971 - external
972 properties:
973 external:
974 description: |-
975 Optional. The [Dataproc service account](https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/service-accounts#service_accounts_in_dataproc) (also see [VM Data Plane identity](https://cloud.google.com/dataproc/docs/concepts/iam/dataproc-principals#vm_service_account_data_plane_identity)) used by Dataproc cluster VM instances to access Google Cloud Platform services. If not specified, the [Compute Engine default service account](https://cloud.google.com/compute/docs/access/service-accounts#default_service_account) is used.
976
977 Allowed value: The `email` field of an `IAMServiceAccount` resource.
978 type: string
979 name:
980 description: 'Name of the referent. More info:
981 https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names'
982 type: string
983 namespace:
984 description: 'Namespace of the referent. More
985 info: https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/'
986 type: string
987 type: object
988 serviceAccountScopes:
989 description: 'Immutable. Optional. The URIs of service
990 account scopes to be included in Compute Engine
991 instances. The following base set of scopes is always
992 included: * https://www.googleapis.com/auth/cloud.useraccounts.readonly
993 * https://www.googleapis.com/auth/devstorage.read_write
994 * https://www.googleapis.com/auth/logging.write
995 If no scopes are specified, the following defaults
996 are also provided: * https://www.googleapis.com/auth/bigquery
997 * https://www.googleapis.com/auth/bigtable.admin.table
998 * https://www.googleapis.com/auth/bigtable.data
999 * https://www.googleapis.com/auth/devstorage.full_control'
1000 items:
1001 type: string
1002 type: array
1003 shieldedInstanceConfig:
1004 description: Immutable. Optional. Shielded Instance
1005 Config for clusters using Compute Engine Shielded
1006 VMs.
1007 properties:
1008 enableIntegrityMonitoring:
1009 description: Immutable. Optional. Defines whether
1010 instances have integrity monitoring enabled.
1011 Integrity monitoring compares the most recent
1012 boot measurements to the integrity policy baseline
1013 and returns a pair of pass/fail results depending
1014 on whether they match or not.
1015 type: boolean
1016 enableSecureBoot:
1017 description: Immutable. Optional. Defines whether
1018 the instances have Secure Boot enabled. Secure
1019 Boot helps ensure that the system only runs
1020 authentic software by verifying the digital
1021 signature of all boot components, and halting
1022 the boot process if signature verification fails.
1023 type: boolean
1024 enableVtpm:
1025 description: Immutable. Optional. Defines whether
1026 the instance have the vTPM enabled. Virtual
1027 Trusted Platform Module protects objects like
1028 keys, certificates and enables Measured Boot
1029 by performing the measurements needed to create
1030 a known good boot baseline, called the integrity
1031 policy baseline.
1032 type: boolean
1033 type: object
1034 subnetworkRef:
1035 description: Immutable.
1036 oneOf:
1037 - not:
1038 required:
1039 - external
1040 required:
1041 - name
1042 - not:
1043 anyOf:
1044 - required:
1045 - name
1046 - required:
1047 - namespace
1048 required:
1049 - external
1050 properties:
1051 external:
1052 description: |-
1053 Optional. The Compute Engine subnetwork to be used for machine communications. Cannot be specified with network_uri. A full URL, partial URI, or short name are valid. Examples: * `https://www.googleapis.com/compute/v1/projects/[project_id]/regions/us-east1/subnetworks/sub0` * `projects/[project_id]/regions/us-east1/subnetworks/sub0` * `sub0`
1054
1055 Allowed value: The `selfLink` field of a `ComputeSubnetwork` resource.
1056 type: string
1057 name:
1058 description: 'Name of the referent. More info:
1059 https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names'
1060 type: string
1061 namespace:
1062 description: 'Namespace of the referent. More
1063 info: https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/'
1064 type: string
1065 type: object
1066 tags:
1067 description: Immutable. The Compute Engine tags to
1068 add to all instances (see [Tagging instances](https://cloud.google.com/compute/docs/label-or-tag-resources#tags)).
1069 items:
1070 type: string
1071 type: array
1072 zone:
1073 description: 'Immutable. Optional. The zone where
1074 the Compute Engine cluster will be located. On a
1075 create request, it is required in the "global" region.
1076 If omitted in a non-global Dataproc region, the
1077 service will pick a zone in the corresponding Compute
1078 Engine region. On a get request, zone will always
1079 be present. A full URL, partial URI, or short name
1080 are valid. Examples: * `https://www.googleapis.com/compute/v1/projects/[project_id]/zones/[zone]`
1081 * `projects/[project_id]/zones/[zone]` * `us-central1-f`'
1082 type: string
1083 type: object
1084 initializationActions:
1085 description: 'Immutable. Optional. Commands to execute
1086 on each node after config is completed. By default,
1087 executables are run on master and all worker nodes.
1088 You can test a node''s `role` metadata to run an executable
1089 on a master or worker node, as shown below using `curl`
1090 (you can also use `wget`): ROLE=$(curl -H Metadata-Flavor:Google
1091 http://metadata/computeMetadata/v1/instance/attributes/dataproc-role)
1092 if [[ "${ROLE}" == ''Master'' ]]; then ... master specific
1093 actions ... else ... worker specific actions ... fi'
1094 items:
1095 properties:
1096 executableFile:
1097 description: Immutable. Required. Cloud Storage
1098 URI of executable file.
1099 type: string
1100 executionTimeout:
1101 description: Immutable. Optional. Amount of time
1102 executable has to complete. Default is 10 minutes
1103 (see JSON representation of [Duration](https://developers.google.com/protocol-buffers/docs/proto3#json)).
1104 Cluster creation fails with an explanatory error
1105 message (the name of the executable that caused
1106 the error and the exceeded timeout period) if
1107 the executable is not completed at end of the
1108 timeout period.
1109 type: string
1110 type: object
1111 type: array
1112 lifecycleConfig:
1113 description: Immutable. Optional. Lifecycle setting for
1114 the cluster.
1115 properties:
1116 autoDeleteTime:
1117 description: Immutable. Optional. The time when cluster
1118 will be auto-deleted (see JSON representation of
1119 [Timestamp](https://developers.google.com/protocol-buffers/docs/proto3#json)).
1120 format: date-time
1121 type: string
1122 autoDeleteTtl:
1123 description: Immutable. Optional. The lifetime duration
1124 of cluster. The cluster will be auto-deleted at
1125 the end of this period. Minimum value is 10 minutes;
1126 maximum value is 14 days (see JSON representation
1127 of [Duration](https://developers.google.com/protocol-buffers/docs/proto3#json)).
1128 type: string
1129 idleDeleteTtl:
1130 description: Immutable. Optional. The duration to
1131 keep the cluster alive while idling (when no jobs
1132 are running). Passing this threshold will cause
1133 the cluster to be deleted. Minimum value is 5 minutes;
1134 maximum value is 14 days (see JSON representation
1135 of [Duration](https://developers.google.com/protocol-buffers/docs/proto3#json)).
1136 type: string
1137 type: object
1138 masterConfig:
1139 description: Immutable. Optional. The Compute Engine config
1140 settings for the master instance in a cluster.
1141 properties:
1142 accelerators:
1143 description: Immutable. Optional. The Compute Engine
1144 accelerator configuration for these instances.
1145 items:
1146 properties:
1147 acceleratorCount:
1148 description: Immutable. The number of the accelerator
1149 cards of this type exposed to this instance.
1150 format: int64
1151 type: integer
1152 acceleratorType:
1153 description: 'Immutable. Full URL, partial URI,
1154 or short name of the accelerator type resource
1155 to expose to this instance. See [Compute Engine
1156 AcceleratorTypes](https://cloud.google.com/compute/docs/reference/beta/acceleratorTypes).
1157 Examples: * `https://www.googleapis.com/compute/beta/projects/[project_id]/zones/us-east1-a/acceleratorTypes/nvidia-tesla-k80`
1158 * `projects/[project_id]/zones/us-east1-a/acceleratorTypes/nvidia-tesla-k80`
1159 * `nvidia-tesla-k80` **Auto Zone Exception**:
1160 If you are using the Dataproc [Auto Zone Placement](https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/auto-zone#using_auto_zone_placement)
1161 feature, you must use the short name of the
1162 accelerator type resource, for example, `nvidia-tesla-k80`.'
1163 type: string
1164 type: object
1165 type: array
1166 diskConfig:
1167 description: Immutable. Optional. Disk option config
1168 settings.
1169 properties:
1170 bootDiskSizeGb:
1171 description: Immutable. Optional. Size in GB of
1172 the boot disk (default is 500GB).
1173 format: int64
1174 type: integer
1175 bootDiskType:
1176 description: 'Immutable. Optional. Type of the
1177 boot disk (default is "pd-standard"). Valid
1178 values: "pd-balanced" (Persistent Disk Balanced
1179 Solid State Drive), "pd-ssd" (Persistent Disk
1180 Solid State Drive), or "pd-standard" (Persistent
1181 Disk Hard Disk Drive). See [Disk types](https://cloud.google.com/compute/docs/disks#disk-types).'
1182 type: string
1183 numLocalSsds:
1184 description: Immutable. Optional. Number of attached
1185 SSDs, from 0 to 4 (default is 0). If SSDs are
1186 not attached, the boot disk is used to store
1187 runtime logs and [HDFS](https://hadoop.apache.org/docs/r1.2.1/hdfs_user_guide.html)
1188 data. If one or more SSDs are attached, this
1189 runtime bulk data is spread across them, and
1190 the boot disk contains only basic config and
1191 installed binaries.
1192 format: int64
1193 type: integer
1194 type: object
1195 imageRef:
1196 description: Immutable.
1197 oneOf:
1198 - not:
1199 required:
1200 - external
1201 required:
1202 - name
1203 - not:
1204 anyOf:
1205 - required:
1206 - name
1207 - required:
1208 - namespace
1209 required:
1210 - external
1211 properties:
1212 external:
1213 description: |-
1214 Optional. The Compute Engine image resource used for cluster instances. The URI can represent an image or image family. Image examples: * `https://www.googleapis.com/compute/beta/projects/[project_id]/global/images/[image-id]` * `projects/[project_id]/global/images/[image-id]` * `image-id` Image family examples. Dataproc will use the most recent image from the family: * `https://www.googleapis.com/compute/beta/projects/[project_id]/global/images/family/[custom-image-family-name]` * `projects/[project_id]/global/images/family/[custom-image-family-name]` If the URI is unspecified, it will be inferred from `SoftwareConfig.image_version` or the system default.
1215
1216 Allowed value: The `selfLink` field of a `ComputeImage` resource.
1217 type: string
1218 name:
1219 description: 'Name of the referent. More info:
1220 https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names'
1221 type: string
1222 namespace:
1223 description: 'Namespace of the referent. More
1224 info: https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/'
1225 type: string
1226 type: object
1227 machineType:
1228 description: 'Immutable. Optional. The Compute Engine
1229 machine type used for cluster instances. A full
1230 URL, partial URI, or short name are valid. Examples:
1231 * `https://www.googleapis.com/compute/v1/projects/[project_id]/zones/us-east1-a/machineTypes/n1-standard-2`
1232 * `projects/[project_id]/zones/us-east1-a/machineTypes/n1-standard-2`
1233 * `n1-standard-2` **Auto Zone Exception**: If you
1234 are using the Dataproc [Auto Zone Placement](https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/auto-zone#using_auto_zone_placement)
1235 feature, you must use the short name of the machine
1236 type resource, for example, `n1-standard-2`.'
1237 type: string
1238 minCpuPlatform:
1239 description: Immutable. Optional. Specifies the minimum
1240 cpu platform for the Instance Group. See [Dataproc
1241 -> Minimum CPU Platform](https://cloud.google.com/dataproc/docs/concepts/compute/dataproc-min-cpu).
1242 type: string
1243 numInstances:
1244 description: Immutable. Optional. The number of VM
1245 instances in the instance group. For [HA cluster](/dataproc/docs/concepts/configuring-clusters/high-availability)
1246 [master_config](#FIELDS.master_config) groups, **must
1247 be set to 3**. For standard cluster [master_config](#FIELDS.master_config)
1248 groups, **must be set to 1**.
1249 format: int64
1250 type: integer
1251 preemptibility:
1252 description: 'Immutable. Optional. Specifies the preemptibility
1253 of the instance group. The default value for master
1254 and worker groups is `NON_PREEMPTIBLE`. This default
1255 cannot be changed. The default value for secondary
1256 instances is `PREEMPTIBLE`. Possible values: PREEMPTIBILITY_UNSPECIFIED,
1257 NON_PREEMPTIBLE, PREEMPTIBLE'
1258 type: string
1259 type: object
1260 secondaryWorkerConfig:
1261 description: Immutable. Optional. The Compute Engine config
1262 settings for additional worker instances in a cluster.
1263 properties:
1264 accelerators:
1265 description: Immutable. Optional. The Compute Engine
1266 accelerator configuration for these instances.
1267 items:
1268 properties:
1269 acceleratorCount:
1270 description: Immutable. The number of the accelerator
1271 cards of this type exposed to this instance.
1272 format: int64
1273 type: integer
1274 acceleratorType:
1275 description: 'Immutable. Full URL, partial URI,
1276 or short name of the accelerator type resource
1277 to expose to this instance. See [Compute Engine
1278 AcceleratorTypes](https://cloud.google.com/compute/docs/reference/beta/acceleratorTypes).
1279 Examples: * `https://www.googleapis.com/compute/beta/projects/[project_id]/zones/us-east1-a/acceleratorTypes/nvidia-tesla-k80`
1280 * `projects/[project_id]/zones/us-east1-a/acceleratorTypes/nvidia-tesla-k80`
1281 * `nvidia-tesla-k80` **Auto Zone Exception**:
1282 If you are using the Dataproc [Auto Zone Placement](https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/auto-zone#using_auto_zone_placement)
1283 feature, you must use the short name of the
1284 accelerator type resource, for example, `nvidia-tesla-k80`.'
1285 type: string
1286 type: object
1287 type: array
1288 diskConfig:
1289 description: Immutable. Optional. Disk option config
1290 settings.
1291 properties:
1292 bootDiskSizeGb:
1293 description: Immutable. Optional. Size in GB of
1294 the boot disk (default is 500GB).
1295 format: int64
1296 type: integer
1297 bootDiskType:
1298 description: 'Immutable. Optional. Type of the
1299 boot disk (default is "pd-standard"). Valid
1300 values: "pd-balanced" (Persistent Disk Balanced
1301 Solid State Drive), "pd-ssd" (Persistent Disk
1302 Solid State Drive), or "pd-standard" (Persistent
1303 Disk Hard Disk Drive). See [Disk types](https://cloud.google.com/compute/docs/disks#disk-types).'
1304 type: string
1305 numLocalSsds:
1306 description: Immutable. Optional. Number of attached
1307 SSDs, from 0 to 4 (default is 0). If SSDs are
1308 not attached, the boot disk is used to store
1309 runtime logs and [HDFS](https://hadoop.apache.org/docs/r1.2.1/hdfs_user_guide.html)
1310 data. If one or more SSDs are attached, this
1311 runtime bulk data is spread across them, and
1312 the boot disk contains only basic config and
1313 installed binaries.
1314 format: int64
1315 type: integer
1316 type: object
1317 imageRef:
1318 description: Immutable.
1319 oneOf:
1320 - not:
1321 required:
1322 - external
1323 required:
1324 - name
1325 - not:
1326 anyOf:
1327 - required:
1328 - name
1329 - required:
1330 - namespace
1331 required:
1332 - external
1333 properties:
1334 external:
1335 description: |-
1336 Optional. The Compute Engine image resource used for cluster instances. The URI can represent an image or image family. Image examples: * `https://www.googleapis.com/compute/beta/projects/[project_id]/global/images/[image-id]` * `projects/[project_id]/global/images/[image-id]` * `image-id` Image family examples. Dataproc will use the most recent image from the family: * `https://www.googleapis.com/compute/beta/projects/[project_id]/global/images/family/[custom-image-family-name]` * `projects/[project_id]/global/images/family/[custom-image-family-name]` If the URI is unspecified, it will be inferred from `SoftwareConfig.image_version` or the system default.
1337
1338 Allowed value: The `selfLink` field of a `ComputeImage` resource.
1339 type: string
1340 name:
1341 description: 'Name of the referent. More info:
1342 https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names'
1343 type: string
1344 namespace:
1345 description: 'Namespace of the referent. More
1346 info: https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/'
1347 type: string
1348 type: object
1349 machineType:
1350 description: 'Immutable. Optional. The Compute Engine
1351 machine type used for cluster instances. A full
1352 URL, partial URI, or short name are valid. Examples:
1353 * `https://www.googleapis.com/compute/v1/projects/[project_id]/zones/us-east1-a/machineTypes/n1-standard-2`
1354 * `projects/[project_id]/zones/us-east1-a/machineTypes/n1-standard-2`
1355 * `n1-standard-2` **Auto Zone Exception**: If you
1356 are using the Dataproc [Auto Zone Placement](https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/auto-zone#using_auto_zone_placement)
1357 feature, you must use the short name of the machine
1358 type resource, for example, `n1-standard-2`.'
1359 type: string
1360 minCpuPlatform:
1361 description: Immutable. Optional. Specifies the minimum
1362 cpu platform for the Instance Group. See [Dataproc
1363 -> Minimum CPU Platform](https://cloud.google.com/dataproc/docs/concepts/compute/dataproc-min-cpu).
1364 type: string
1365 numInstances:
1366 description: Immutable. Optional. The number of VM
1367 instances in the instance group. For [HA cluster](/dataproc/docs/concepts/configuring-clusters/high-availability)
1368 [master_config](#FIELDS.master_config) groups, **must
1369 be set to 3**. For standard cluster [master_config](#FIELDS.master_config)
1370 groups, **must be set to 1**.
1371 format: int64
1372 type: integer
1373 preemptibility:
1374 description: 'Immutable. Optional. Specifies the preemptibility
1375 of the instance group. The default value for master
1376 and worker groups is `NON_PREEMPTIBLE`. This default
1377 cannot be changed. The default value for secondary
1378 instances is `PREEMPTIBLE`. Possible values: PREEMPTIBILITY_UNSPECIFIED,
1379 NON_PREEMPTIBLE, PREEMPTIBLE'
1380 type: string
1381 type: object
1382 securityConfig:
1383 description: Immutable. Optional. Security settings for
1384 the cluster.
1385 properties:
1386 kerberosConfig:
1387 description: Immutable. Optional. Kerberos related
1388 configuration.
1389 properties:
1390 crossRealmTrustAdminServer:
1391 description: Immutable. Optional. The admin server
1392 (IP or hostname) for the remote trusted realm
1393 in a cross realm trust relationship.
1394 type: string
1395 crossRealmTrustKdc:
1396 description: Immutable. Optional. The KDC (IP
1397 or hostname) for the remote trusted realm in
1398 a cross realm trust relationship.
1399 type: string
1400 crossRealmTrustRealm:
1401 description: Immutable. Optional. The remote realm
1402 the Dataproc on-cluster KDC will trust, should
1403 the user enable cross realm trust.
1404 type: string
1405 crossRealmTrustSharedPassword:
1406 description: Immutable. Optional. The Cloud Storage
1407 URI of a KMS encrypted file containing the shared
1408 password between the on-cluster Kerberos realm
1409 and the remote trusted realm, in a cross realm
1410 trust relationship.
1411 type: string
1412 enableKerberos:
1413 description: 'Immutable. Optional. Flag to indicate
1414 whether to Kerberize the cluster (default: false).
1415 Set this field to true to enable Kerberos on
1416 a cluster.'
1417 type: boolean
1418 kdcDbKey:
1419 description: Immutable. Optional. The Cloud Storage
1420 URI of a KMS encrypted file containing the master
1421 key of the KDC database.
1422 type: string
1423 keyPassword:
1424 description: Immutable. Optional. The Cloud Storage
1425 URI of a KMS encrypted file containing the password
1426 to the user provided key. For the self-signed
1427 certificate, this password is generated by Dataproc.
1428 type: string
1429 keystore:
1430 description: Immutable. Optional. The Cloud Storage
1431 URI of the keystore file used for SSL encryption.
1432 If not provided, Dataproc will provide a self-signed
1433 certificate.
1434 type: string
1435 keystorePassword:
1436 description: Immutable. Optional. The Cloud Storage
1437 URI of a KMS encrypted file containing the password
1438 to the user provided keystore. For the self-signed
1439 certificate, this password is generated by Dataproc.
1440 type: string
1441 kmsKeyRef:
1442 description: Immutable.
1443 oneOf:
1444 - not:
1445 required:
1446 - external
1447 required:
1448 - name
1449 - not:
1450 anyOf:
1451 - required:
1452 - name
1453 - required:
1454 - namespace
1455 required:
1456 - external
1457 properties:
1458 external:
1459 description: |-
1460 Optional. The uri of the KMS key used to encrypt various sensitive files.
1461
1462 Allowed value: The `selfLink` field of a `KMSCryptoKey` resource.
1463 type: string
1464 name:
1465 description: 'Name of the referent. More info:
1466 https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names'
1467 type: string
1468 namespace:
1469 description: 'Namespace of the referent. More
1470 info: https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/'
1471 type: string
1472 type: object
1473 realm:
1474 description: Immutable. Optional. The name of
1475 the on-cluster Kerberos realm. If not specified,
1476 the uppercased domain of hostnames will be the
1477 realm.
1478 type: string
1479 rootPrincipalPassword:
1480 description: Immutable. Optional. The Cloud Storage
1481 URI of a KMS encrypted file containing the root
1482 principal password.
1483 type: string
1484 tgtLifetimeHours:
1485 description: Immutable. Optional. The lifetime
1486 of the ticket granting ticket, in hours. If
1487 not specified, or user specifies 0, then default
1488 value 10 will be used.
1489 format: int64
1490 type: integer
1491 truststore:
1492 description: Immutable. Optional. The Cloud Storage
1493 URI of the truststore file used for SSL encryption.
1494 If not provided, Dataproc will provide a self-signed
1495 certificate.
1496 type: string
1497 truststorePassword:
1498 description: Immutable. Optional. The Cloud Storage
1499 URI of a KMS encrypted file containing the password
1500 to the user provided truststore. For the self-signed
1501 certificate, this password is generated by Dataproc.
1502 type: string
1503 type: object
1504 type: object
1505 softwareConfig:
1506 description: Immutable. Optional. The config settings
1507 for software inside the cluster.
1508 properties:
1509 imageVersion:
1510 description: Immutable. Optional. The version of software
1511 inside the cluster. It must be one of the supported
1512 [Dataproc Versions](https://cloud.google.com/dataproc/docs/concepts/versioning/dataproc-versions#supported_dataproc_versions),
1513 such as "1.2" (including a subminor version, such
1514 as "1.2.29"), or the ["preview" version](https://cloud.google.com/dataproc/docs/concepts/versioning/dataproc-versions#other_versions).
1515 If unspecified, it defaults to the latest Debian
1516 version.
1517 type: string
1518 optionalComponents:
1519 description: Immutable. Optional. The set of components
1520 to activate on the cluster.
1521 items:
1522 type: string
1523 type: array
1524 properties:
1525 additionalProperties:
1526 type: string
1527 description: 'Immutable. Optional. The properties
1528 to set on daemon config files. Property keys are
1529 specified in `prefix:property` format, for example
1530 `core:hadoop.tmp.dir`. The following are supported
1531 prefixes and their mappings: * capacity-scheduler:
1532 `capacity-scheduler.xml` * core: `core-site.xml`
1533 * distcp: `distcp-default.xml` * hdfs: `hdfs-site.xml`
1534 * hive: `hive-site.xml` * mapred: `mapred-site.xml`
1535 * pig: `pig.properties` * spark: `spark-defaults.conf`
1536 * yarn: `yarn-site.xml` For more information, see
1537 [Cluster properties](https://cloud.google.com/dataproc/docs/concepts/cluster-properties).'
1538 type: object
1539 type: object
1540 stagingBucketRef:
1541 description: Immutable.
1542 oneOf:
1543 - not:
1544 required:
1545 - external
1546 required:
1547 - name
1548 - not:
1549 anyOf:
1550 - required:
1551 - name
1552 - required:
1553 - namespace
1554 required:
1555 - external
1556 properties:
1557 external:
1558 description: |-
1559 Optional. A Cloud Storage bucket used to stage job dependencies, config files, and job driver console output. If you do not specify a staging bucket, Cloud Dataproc will determine a Cloud Storage location (US, ASIA, or EU) for your cluster's staging bucket according to the Compute Engine zone where your cluster is deployed, and then create and manage this project-level, per-location bucket (see [Dataproc staging bucket](https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/staging-bucket)). **This field requires a Cloud Storage bucket name, not a URI to a Cloud Storage bucket.**
1560
1561 Allowed value: The Google Cloud resource name of a `StorageBucket` resource (format: `{{name}}`).
1562 type: string
1563 name:
1564 description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names'
1565 type: string
1566 namespace:
1567 description: 'Namespace of the referent. More info:
1568 https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/'
1569 type: string
1570 type: object
1571 tempBucketRef:
1572 description: Immutable.
1573 oneOf:
1574 - not:
1575 required:
1576 - external
1577 required:
1578 - name
1579 - not:
1580 anyOf:
1581 - required:
1582 - name
1583 - required:
1584 - namespace
1585 required:
1586 - external
1587 properties:
1588 external:
1589 description: |-
1590 Optional. A Cloud Storage bucket used to store ephemeral cluster and jobs data, such as Spark and MapReduce history files. If you do not specify a temp bucket, Dataproc will determine a Cloud Storage location (US, ASIA, or EU) for your cluster's temp bucket according to the Compute Engine zone where your cluster is deployed, and then create and manage this project-level, per-location bucket. The default bucket has a TTL of 90 days, but you can use any TTL (or none) if you specify a bucket. **This field requires a Cloud Storage bucket name, not a URI to a Cloud Storage bucket.**
1591
1592 Allowed value: The Google Cloud resource name of a `StorageBucket` resource (format: `{{name}}`).
1593 type: string
1594 name:
1595 description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names'
1596 type: string
1597 namespace:
1598 description: 'Namespace of the referent. More info:
1599 https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/'
1600 type: string
1601 type: object
1602 workerConfig:
1603 description: Immutable. Optional. The Compute Engine config
1604 settings for worker instances in a cluster.
1605 properties:
1606 accelerators:
1607 description: Immutable. Optional. The Compute Engine
1608 accelerator configuration for these instances.
1609 items:
1610 properties:
1611 acceleratorCount:
1612 description: Immutable. The number of the accelerator
1613 cards of this type exposed to this instance.
1614 format: int64
1615 type: integer
1616 acceleratorType:
1617 description: 'Immutable. Full URL, partial URI,
1618 or short name of the accelerator type resource
1619 to expose to this instance. See [Compute Engine
1620 AcceleratorTypes](https://cloud.google.com/compute/docs/reference/beta/acceleratorTypes).
1621 Examples: * `https://www.googleapis.com/compute/beta/projects/[project_id]/zones/us-east1-a/acceleratorTypes/nvidia-tesla-k80`
1622 * `projects/[project_id]/zones/us-east1-a/acceleratorTypes/nvidia-tesla-k80`
1623 * `nvidia-tesla-k80` **Auto Zone Exception**:
1624 If you are using the Dataproc [Auto Zone Placement](https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/auto-zone#using_auto_zone_placement)
1625 feature, you must use the short name of the
1626 accelerator type resource, for example, `nvidia-tesla-k80`.'
1627 type: string
1628 type: object
1629 type: array
1630 diskConfig:
1631 description: Immutable. Optional. Disk option config
1632 settings.
1633 properties:
1634 bootDiskSizeGb:
1635 description: Immutable. Optional. Size in GB of
1636 the boot disk (default is 500GB).
1637 format: int64
1638 type: integer
1639 bootDiskType:
1640 description: 'Immutable. Optional. Type of the
1641 boot disk (default is "pd-standard"). Valid
1642 values: "pd-balanced" (Persistent Disk Balanced
1643 Solid State Drive), "pd-ssd" (Persistent Disk
1644 Solid State Drive), or "pd-standard" (Persistent
1645 Disk Hard Disk Drive). See [Disk types](https://cloud.google.com/compute/docs/disks#disk-types).'
1646 type: string
1647 numLocalSsds:
1648 description: Immutable. Optional. Number of attached
1649 SSDs, from 0 to 4 (default is 0). If SSDs are
1650 not attached, the boot disk is used to store
1651 runtime logs and [HDFS](https://hadoop.apache.org/docs/r1.2.1/hdfs_user_guide.html)
1652 data. If one or more SSDs are attached, this
1653 runtime bulk data is spread across them, and
1654 the boot disk contains only basic config and
1655 installed binaries.
1656 format: int64
1657 type: integer
1658 type: object
1659 imageRef:
1660 description: Immutable.
1661 oneOf:
1662 - not:
1663 required:
1664 - external
1665 required:
1666 - name
1667 - not:
1668 anyOf:
1669 - required:
1670 - name
1671 - required:
1672 - namespace
1673 required:
1674 - external
1675 properties:
1676 external:
1677 description: |-
1678 Optional. The Compute Engine image resource used for cluster instances. The URI can represent an image or image family. Image examples: * `https://www.googleapis.com/compute/beta/projects/[project_id]/global/images/[image-id]` * `projects/[project_id]/global/images/[image-id]` * `image-id` Image family examples. Dataproc will use the most recent image from the family: * `https://www.googleapis.com/compute/beta/projects/[project_id]/global/images/family/[custom-image-family-name]` * `projects/[project_id]/global/images/family/[custom-image-family-name]` If the URI is unspecified, it will be inferred from `SoftwareConfig.image_version` or the system default.
1679
1680 Allowed value: The `selfLink` field of a `ComputeImage` resource.
1681 type: string
1682 name:
1683 description: 'Name of the referent. More info:
1684 https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names'
1685 type: string
1686 namespace:
1687 description: 'Namespace of the referent. More
1688 info: https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/'
1689 type: string
1690 type: object
1691 machineType:
1692 description: 'Immutable. Optional. The Compute Engine
1693 machine type used for cluster instances. A full
1694 URL, partial URI, or short name are valid. Examples:
1695 * `https://www.googleapis.com/compute/v1/projects/[project_id]/zones/us-east1-a/machineTypes/n1-standard-2`
1696 * `projects/[project_id]/zones/us-east1-a/machineTypes/n1-standard-2`
1697 * `n1-standard-2` **Auto Zone Exception**: If you
1698 are using the Dataproc [Auto Zone Placement](https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/auto-zone#using_auto_zone_placement)
1699 feature, you must use the short name of the machine
1700 type resource, for example, `n1-standard-2`.'
1701 type: string
1702 minCpuPlatform:
1703 description: Immutable. Optional. Specifies the minimum
1704 cpu platform for the Instance Group. See [Dataproc
1705 -> Minimum CPU Platform](https://cloud.google.com/dataproc/docs/concepts/compute/dataproc-min-cpu).
1706 type: string
1707 numInstances:
1708 description: Immutable. Optional. The number of VM
1709 instances in the instance group. For [HA cluster](/dataproc/docs/concepts/configuring-clusters/high-availability)
1710 [master_config](#FIELDS.master_config) groups, **must
1711 be set to 3**. For standard cluster [master_config](#FIELDS.master_config)
1712 groups, **must be set to 1**.
1713 format: int64
1714 type: integer
1715 preemptibility:
1716 description: 'Immutable. Optional. Specifies the preemptibility
1717 of the instance group. The default value for master
1718 and worker groups is `NON_PREEMPTIBLE`. This default
1719 cannot be changed. The default value for secondary
1720 instances is `PREEMPTIBLE`. Possible values: PREEMPTIBILITY_UNSPECIFIED,
1721 NON_PREEMPTIBLE, PREEMPTIBLE'
1722 type: string
1723 type: object
1724 type: object
1725 labels:
1726 additionalProperties:
1727 type: string
1728 description: 'Immutable. Optional. The labels to associate
1729 with this cluster. Label keys must be between 1 and 63 characters
1730 long, and must conform to the following PCRE regular expression:
1731 p{Ll}p{Lo}{0,62} Label values must be between 1 and 63 characters
1732 long, and must conform to the following PCRE regular expression:
1733 [p{Ll}p{Lo}p{N}_-]{0,63} No more than 32 labels can be associated
1734 with a given cluster.'
1735 type: object
1736 required:
1737 - clusterName
1738 - config
1739 type: object
1740 type: object
1741 projectRef:
1742 description: Immutable. The Project that this resource belongs to.
1743 oneOf:
1744 - not:
1745 required:
1746 - external
1747 required:
1748 - name
1749 - not:
1750 anyOf:
1751 - required:
1752 - name
1753 - required:
1754 - namespace
1755 required:
1756 - external
1757 properties:
1758 external:
1759 description: |-
1760 The project for the resource
1761
1762 Allowed value: The Google Cloud resource name of a `Project` resource (format: `projects/{{name}}`).
1763 type: string
1764 name:
1765 description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names'
1766 type: string
1767 namespace:
1768 description: 'Namespace of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/'
1769 type: string
1770 type: object
1771 resourceID:
1772 description: Immutable. Optional. The name of the resource. Used for
1773 creation and acquisition. When unset, the value of `metadata.name`
1774 is used as the default.
1775 type: string
1776 required:
1777 - jobs
1778 - location
1779 - placement
1780 type: object
1781 status:
1782 properties:
1783 conditions:
1784 description: Conditions represent the latest available observation
1785 of the resource's current state.
1786 items:
1787 properties:
1788 lastTransitionTime:
1789 description: Last time the condition transitioned from one status
1790 to another.
1791 type: string
1792 message:
1793 description: Human-readable message indicating details about
1794 last transition.
1795 type: string
1796 reason:
1797 description: Unique, one-word, CamelCase reason for the condition's
1798 last transition.
1799 type: string
1800 status:
1801 description: Status is the status of the condition. Can be True,
1802 False, Unknown.
1803 type: string
1804 type:
1805 description: Type is the type of the condition.
1806 type: string
1807 type: object
1808 type: array
1809 createTime:
1810 description: Output only. The time template was created.
1811 format: date-time
1812 type: string
1813 observedGeneration:
1814 description: ObservedGeneration is the generation of the resource
1815 that was most recently observed by the Config Connector controller.
1816 If this is equal to metadata.generation, then that means that the
1817 current reported status reflects the most recent desired state of
1818 the resource.
1819 type: integer
1820 placement:
1821 properties:
1822 managedCluster:
1823 properties:
1824 config:
1825 properties:
1826 endpointConfig:
1827 properties:
1828 httpPorts:
1829 additionalProperties:
1830 type: string
1831 description: Output only. The map of port descriptions
1832 to URLs. Will only be populated if enable_http_port_access
1833 is true.
1834 type: object
1835 type: object
1836 lifecycleConfig:
1837 properties:
1838 idleStartTime:
1839 description: Output only. The time when cluster became
1840 idle (most recent job finished) and became eligible
1841 for deletion due to idleness (see JSON representation
1842 of [Timestamp](https://developers.google.com/protocol-buffers/docs/proto3#json)).
1843 format: date-time
1844 type: string
1845 type: object
1846 masterConfig:
1847 properties:
1848 instanceNames:
1849 description: Output only. The list of instance names.
1850 Dataproc derives the names from `cluster_name`,
1851 `num_instances`, and the instance group.
1852 items:
1853 type: string
1854 type: array
1855 isPreemptible:
1856 description: Output only. Specifies that this instance
1857 group contains preemptible instances.
1858 type: boolean
1859 managedGroupConfig:
1860 description: Output only. The config for Compute Engine
1861 Instance Group Manager that manages this group.
1862 This is only used for preemptible instance groups.
1863 properties:
1864 instanceGroupManagerName:
1865 description: Output only. The name of the Instance
1866 Group Manager for this group.
1867 type: string
1868 instanceTemplateName:
1869 description: Output only. The name of the Instance
1870 Template used for the Managed Instance Group.
1871 type: string
1872 type: object
1873 type: object
1874 secondaryWorkerConfig:
1875 properties:
1876 instanceNames:
1877 description: Output only. The list of instance names.
1878 Dataproc derives the names from `cluster_name`,
1879 `num_instances`, and the instance group.
1880 items:
1881 type: string
1882 type: array
1883 isPreemptible:
1884 description: Output only. Specifies that this instance
1885 group contains preemptible instances.
1886 type: boolean
1887 managedGroupConfig:
1888 description: Output only. The config for Compute Engine
1889 Instance Group Manager that manages this group.
1890 This is only used for preemptible instance groups.
1891 properties:
1892 instanceGroupManagerName:
1893 description: Output only. The name of the Instance
1894 Group Manager for this group.
1895 type: string
1896 instanceTemplateName:
1897 description: Output only. The name of the Instance
1898 Template used for the Managed Instance Group.
1899 type: string
1900 type: object
1901 type: object
1902 workerConfig:
1903 properties:
1904 instanceNames:
1905 description: Output only. The list of instance names.
1906 Dataproc derives the names from `cluster_name`,
1907 `num_instances`, and the instance group.
1908 items:
1909 type: string
1910 type: array
1911 isPreemptible:
1912 description: Output only. Specifies that this instance
1913 group contains preemptible instances.
1914 type: boolean
1915 managedGroupConfig:
1916 description: Output only. The config for Compute Engine
1917 Instance Group Manager that manages this group.
1918 This is only used for preemptible instance groups.
1919 properties:
1920 instanceGroupManagerName:
1921 description: Output only. The name of the Instance
1922 Group Manager for this group.
1923 type: string
1924 instanceTemplateName:
1925 description: Output only. The name of the Instance
1926 Template used for the Managed Instance Group.
1927 type: string
1928 type: object
1929 type: object
1930 type: object
1931 type: object
1932 type: object
1933 updateTime:
1934 description: Output only. The time template was last updated.
1935 format: date-time
1936 type: string
1937 version:
1938 description: Output only. The current version of this workflow template.
1939 format: int64
1940 type: integer
1941 type: object
1942 required:
1943 - spec
1944 type: object
1945 served: true
1946 storage: true
1947 subresources:
1948 status: {}
1949status:
1950 acceptedNames:
1951 kind: ""
1952 plural: ""
1953 conditions: []
1954 storedVersions: []
View as plain text