1apiVersion: apiextensions.k8s.io/v1
2kind: CustomResourceDefinition
3metadata:
4 annotations:
5 cnrm.cloud.google.com/version: 0.0.0-dev
6 creationTimestamp: null
7 labels:
8 cnrm.cloud.google.com/managed-by-kcc: "true"
9 cnrm.cloud.google.com/stability-level: alpha
10 cnrm.cloud.google.com/system: "true"
11 cnrm.cloud.google.com/tf2crd: "true"
12 name: vertexaiendpoints.vertexai.cnrm.cloud.google.com
13spec:
14 group: vertexai.cnrm.cloud.google.com
15 names:
16 categories:
17 - gcp
18 kind: VertexAIEndpoint
19 plural: vertexaiendpoints
20 shortNames:
21 - gcpvertexaiendpoint
22 - gcpvertexaiendpoints
23 singular: vertexaiendpoint
24 preserveUnknownFields: false
25 scope: Namespaced
26 versions:
27 - additionalPrinterColumns:
28 - jsonPath: .metadata.creationTimestamp
29 name: Age
30 type: date
31 - description: When 'True', the most recent reconcile of the resource succeeded
32 jsonPath: .status.conditions[?(@.type=='Ready')].status
33 name: Ready
34 type: string
35 - description: The reason for the value in 'Ready'
36 jsonPath: .status.conditions[?(@.type=='Ready')].reason
37 name: Status
38 type: string
39 - description: The last transition time for the value in 'Status'
40 jsonPath: .status.conditions[?(@.type=='Ready')].lastTransitionTime
41 name: Status Age
42 type: date
43 name: v1alpha1
44 schema:
45 openAPIV3Schema:
46 properties:
47 apiVersion:
48 description: 'apiVersion defines the versioned schema of this representation
49 of an object. Servers should convert recognized schemas to the latest
50 internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#resources'
51 type: string
52 kind:
53 description: 'kind is a string value representing the REST resource this
54 object represents. Servers may infer this from the endpoint the client
55 submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#types-kinds'
56 type: string
57 metadata:
58 type: object
59 spec:
60 properties:
61 description:
62 description: The description of the Endpoint.
63 type: string
64 displayName:
65 description: Required. The display name of the Endpoint. The name
66 can be up to 128 characters long and can consist of any UTF-8 characters.
67 type: string
68 encryptionSpec:
69 description: Immutable. Customer-managed encryption key spec for an
70 Endpoint. If set, this Endpoint and all sub-resources of this Endpoint
71 will be secured by this key.
72 properties:
73 kmsKeyName:
74 description: 'Immutable. Required. The Cloud KMS resource identifier
75 of the customer managed encryption key used to protect a resource.
76 Has the form: ''projects/my-project/locations/my-region/keyRings/my-kr/cryptoKeys/my-key''.
77 The key needs to be in the same region as where the compute
78 resource is created.'
79 type: string
80 required:
81 - kmsKeyName
82 type: object
83 location:
84 description: Immutable. The location for the resource.
85 type: string
86 network:
87 description: 'Immutable. The full name of the Google Compute Engine
88 [network](https://cloud.google.com//compute/docs/networks-and-firewalls#networks)
89 to which the Endpoint should be peered. Private services access
90 must already be configured for the network. If left unspecified,
91 the Endpoint is not peered with any network. Only one of the fields,
92 network or enable_private_service_connect, can be set. [Format](https://cloud.google.com/compute/docs/reference/rest/v1/networks/insert):
93 ''projects/{project}/global/networks/{network}''. Where ''{project}''
94 is a project number, as in ''12345'', and ''{network}'' is network
95 name.'
96 type: string
97 projectRef:
98 description: The project that this resource belongs to.
99 oneOf:
100 - not:
101 required:
102 - external
103 required:
104 - name
105 - not:
106 anyOf:
107 - required:
108 - name
109 - required:
110 - namespace
111 required:
112 - external
113 properties:
114 external:
115 description: 'Allowed value: The `name` field of a `Project` resource.'
116 type: string
117 name:
118 description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names'
119 type: string
120 namespace:
121 description: 'Namespace of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/'
122 type: string
123 type: object
124 region:
125 description: Immutable. The region for the resource.
126 type: string
127 resourceID:
128 description: Immutable. Optional. The name of the resource. Used for
129 creation and acquisition. When unset, the value of `metadata.name`
130 is used as the default.
131 type: string
132 required:
133 - displayName
134 - location
135 - projectRef
136 type: object
137 status:
138 properties:
139 conditions:
140 description: Conditions represent the latest available observation
141 of the resource's current state.
142 items:
143 properties:
144 lastTransitionTime:
145 description: Last time the condition transitioned from one status
146 to another.
147 type: string
148 message:
149 description: Human-readable message indicating details about
150 last transition.
151 type: string
152 reason:
153 description: Unique, one-word, CamelCase reason for the condition's
154 last transition.
155 type: string
156 status:
157 description: Status is the status of the condition. Can be True,
158 False, Unknown.
159 type: string
160 type:
161 description: Type is the type of the condition.
162 type: string
163 type: object
164 type: array
165 createTime:
166 description: Output only. Timestamp when this Endpoint was created.
167 type: string
168 deployedModels:
169 description: Output only. The models deployed in this Endpoint. To
170 add or remove DeployedModels use EndpointService.DeployModel and
171 EndpointService.UndeployModel respectively. Models can also be deployed
172 and undeployed using the [Cloud Console](https://console.cloud.google.com/vertex-ai/).
173 items:
174 properties:
175 automaticResources:
176 description: A description of resources that to large degree
177 are decided by Vertex AI, and require only a modest additional
178 configuration.
179 items:
180 properties:
181 maxReplicaCount:
182 description: The maximum number of replicas this DeployedModel
183 may be deployed on when the traffic against it increases.
184 If the requested value is too large, the deployment
185 will error, but if deployment succeeds then the ability
186 to scale the model to that many replicas is guaranteed
187 (barring service outages). If traffic against the DeployedModel
188 increases beyond what its replicas at maximum may handle,
189 a portion of the traffic will be dropped. If this value
190 is not provided, a no upper bound for scaling under
191 heavy traffic will be assume, though Vertex AI may be
192 unable to scale beyond certain replica number.
193 type: integer
194 minReplicaCount:
195 description: The minimum number of replicas this DeployedModel
196 will be always deployed on. If traffic against it increases,
197 it may dynamically be deployed onto more replicas up
198 to max_replica_count, and as traffic decreases, some
199 of these extra replicas may be freed. If the requested
200 value is too large, the deployment will error.
201 type: integer
202 type: object
203 type: array
204 createTime:
205 description: Output only. Timestamp when the DeployedModel was
206 created.
207 type: string
208 dedicatedResources:
209 description: A description of resources that are dedicated to
210 the DeployedModel, and that need a higher degree of manual
211 configuration.
212 items:
213 properties:
214 autoscalingMetricSpecs:
215 description: The metric specifications that overrides
216 a resource utilization metric (CPU utilization, accelerator's
217 duty cycle, and so on) target value (default to 60 if
218 not set). At most one entry is allowed per metric. If
219 machine_spec.accelerator_count is above 0, the autoscaling
220 will be based on both CPU utilization and accelerator's
221 duty cycle metrics and scale up when either metrics
222 exceeds its target value while scale down if both metrics
223 are under their target value. The default target value
224 is 60 for both metrics. If machine_spec.accelerator_count
225 is 0, the autoscaling will be based on CPU utilization
226 metric only with default target value 60 if not explicitly
227 set. For example, in the case of Online Prediction,
228 if you want to override target CPU utilization to 80,
229 you should set autoscaling_metric_specs.metric_name
230 to 'aiplatform.googleapis.com/prediction/online/cpu/utilization'
231 and autoscaling_metric_specs.target to '80'.
232 items:
233 properties:
234 metricName:
235 description: 'The resource metric name. Supported
236 metrics: * For Online Prediction: * ''aiplatform.googleapis.com/prediction/online/accelerator/duty_cycle''
237 * ''aiplatform.googleapis.com/prediction/online/cpu/utilization''.'
238 type: string
239 target:
240 description: The target resource utilization in
241 percentage (1% - 100%) for the given metric; once
242 the real usage deviates from the target by a certain
243 percentage, the machine replicas change. The default
244 value is 60 (representing 60%) if not provided.
245 type: integer
246 type: object
247 type: array
248 machineSpec:
249 description: The specification of a single machine used
250 by the prediction.
251 items:
252 properties:
253 acceleratorCount:
254 description: The number of accelerators to attach
255 to the machine.
256 type: integer
257 acceleratorType:
258 description: The type of accelerator(s) that may
259 be attached to the machine as per accelerator_count.
260 See possible values [here](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec#AcceleratorType).
261 type: string
262 machineType:
263 description: 'The type of the machine. See the [list
264 of machine types supported for prediction](https://cloud.google.com/vertex-ai/docs/predictions/configure-compute#machine-types)
265 See the [list of machine types supported for custom
266 training](https://cloud.google.com/vertex-ai/docs/training/configure-compute#machine-types).
267 For DeployedModel this field is optional, and
268 the default value is ''n1-standard-2''. For BatchPredictionJob
269 or as part of WorkerPoolSpec this field is required.
270 TODO(rsurowka): Try to better unify the required
271 vs optional.'
272 type: string
273 type: object
274 type: array
275 maxReplicaCount:
276 description: The maximum number of replicas this DeployedModel
277 may be deployed on when the traffic against it increases.
278 If the requested value is too large, the deployment
279 will error, but if deployment succeeds then the ability
280 to scale the model to that many replicas is guaranteed
281 (barring service outages). If traffic against the DeployedModel
282 increases beyond what its replicas at maximum may handle,
283 a portion of the traffic will be dropped. If this value
284 is not provided, will use min_replica_count as the default
285 value. The value of this field impacts the charge against
286 Vertex CPU and GPU quotas. Specifically, you will be
287 charged for max_replica_count * number of cores in the
288 selected machine type) and (max_replica_count * number
289 of GPUs per replica in the selected machine type).
290 type: integer
291 minReplicaCount:
292 description: The minimum number of machine replicas this
293 DeployedModel will be always deployed on. This value
294 must be greater than or equal to 1. If traffic against
295 the DeployedModel increases, it may dynamically be deployed
296 onto more replicas, and as traffic decreases, some of
297 these extra replicas may be freed.
298 type: integer
299 type: object
300 type: array
301 displayName:
302 description: The display name of the DeployedModel. If not provided
303 upon creation, the Model's display_name is used.
304 type: string
305 enableAccessLogging:
306 description: These logs are like standard server access logs,
307 containing information like timestamp and latency for each
308 prediction request. Note that Stackdriver logs may incur a
309 cost, especially if your project receives prediction requests
310 at a high queries per second rate (QPS). Estimate your costs
311 before enabling this option.
312 type: boolean
313 enableContainerLogging:
314 description: If true, the container of the DeployedModel instances
315 will send 'stderr' and 'stdout' streams to Stackdriver Logging.
316 Only supported for custom-trained Models and AutoML Tabular
317 Models.
318 type: boolean
319 id:
320 description: The ID of the DeployedModel. If not provided upon
321 deployment, Vertex AI will generate a value for this ID. This
322 value should be 1-10 characters, and valid characters are
323 /[0-9]/.
324 type: string
325 model:
326 description: The name of the Model that this is the deployment
327 of. Note that the Model may be in a different location than
328 the DeployedModel's Endpoint.
329 type: string
330 modelVersionId:
331 description: Output only. The version ID of the model that is
332 deployed.
333 type: string
334 privateEndpoints:
335 description: Output only. Provide paths for users to send predict/explain/health
336 requests directly to the deployed model services running on
337 Cloud via private services access. This field is populated
338 if network is configured.
339 items:
340 properties:
341 explainHttpUri:
342 description: Output only. Http(s) path to send explain
343 requests.
344 type: string
345 healthHttpUri:
346 description: Output only. Http(s) path to send health
347 check requests.
348 type: string
349 predictHttpUri:
350 description: Output only. Http(s) path to send prediction
351 requests.
352 type: string
353 serviceAttachment:
354 description: Output only. The name of the service attachment
355 resource. Populated if private service connect is enabled.
356 type: string
357 type: object
358 type: array
359 serviceAccount:
360 description: The service account that the DeployedModel's container
361 runs as. Specify the email address of the service account.
362 If this service account is not specified, the container runs
363 as a service account that doesn't have access to the resource
364 project. Users deploying the Model must have the 'iam.serviceAccounts.actAs'
365 permission on this service account.
366 type: string
367 sharedResources:
368 description: 'The resource name of the shared DeploymentResourcePool
369 to deploy on. Format: projects/{project}/locations/{location}/deploymentResourcePools/{deployment_resource_pool}.'
370 type: string
371 type: object
372 type: array
373 etag:
374 description: Used to perform consistent read-modify-write updates.
375 If not set, a blind "overwrite" update happens.
376 type: string
377 modelDeploymentMonitoringJob:
378 description: 'Output only. Resource name of the Model Monitoring job
379 associated with this Endpoint if monitoring is enabled by CreateModelDeploymentMonitoringJob.
380 Format: ''projects/{project}/locations/{location}/modelDeploymentMonitoringJobs/{model_deployment_monitoring_job}''.'
381 type: string
382 observedGeneration:
383 description: ObservedGeneration is the generation of the resource
384 that was most recently observed by the Config Connector controller.
385 If this is equal to metadata.generation, then that means that the
386 current reported status reflects the most recent desired state of
387 the resource.
388 type: integer
389 updateTime:
390 description: Output only. Timestamp when this Endpoint was last updated.
391 type: string
392 type: object
393 required:
394 - spec
395 type: object
396 served: true
397 storage: true
398 subresources:
399 status: {}
400status:
401 acceptedNames:
402 kind: ""
403 plural: ""
404 conditions: []
405 storedVersions: []
View as plain text