1# Copyright 2020 Google LLC
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
15apiVersion: apiextensions.k8s.io/v1
16kind: CustomResourceDefinition
17metadata:
18 annotations:
19 cnrm.cloud.google.com/version: 1.106.0
20 creationTimestamp: null
21 labels:
22 cnrm.cloud.google.com/managed-by-kcc: "true"
23 cnrm.cloud.google.com/stability-level: alpha
24 cnrm.cloud.google.com/system: "true"
25 cnrm.cloud.google.com/tf2crd: "true"
26 name: vertexaiendpoints.vertexai.cnrm.cloud.google.com
27spec:
28 group: vertexai.cnrm.cloud.google.com
29 names:
30 categories:
31 - gcp
32 kind: VertexAIEndpoint
33 plural: vertexaiendpoints
34 shortNames:
35 - gcpvertexaiendpoint
36 - gcpvertexaiendpoints
37 singular: vertexaiendpoint
38 scope: Namespaced
39 versions:
40 - additionalPrinterColumns:
41 - jsonPath: .metadata.creationTimestamp
42 name: Age
43 type: date
44 - description: When 'True', the most recent reconcile of the resource succeeded
45 jsonPath: .status.conditions[?(@.type=='Ready')].status
46 name: Ready
47 type: string
48 - description: The reason for the value in 'Ready'
49 jsonPath: .status.conditions[?(@.type=='Ready')].reason
50 name: Status
51 type: string
52 - description: The last transition time for the value in 'Status'
53 jsonPath: .status.conditions[?(@.type=='Ready')].lastTransitionTime
54 name: Status Age
55 type: date
56 name: v1alpha1
57 schema:
58 openAPIV3Schema:
59 properties:
60 apiVersion:
61 description: 'apiVersion defines the versioned schema of this representation
62 of an object. Servers should convert recognized schemas to the latest
63 internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#resources'
64 type: string
65 kind:
66 description: 'kind is a string value representing the REST resource this
67 object represents. Servers may infer this from the endpoint the client
68 submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#types-kinds'
69 type: string
70 metadata:
71 type: object
72 spec:
73 properties:
74 description:
75 description: The description of the Endpoint.
76 type: string
77 displayName:
78 description: Required. The display name of the Endpoint. The name
79 can be up to 128 characters long and can consist of any UTF-8 characters.
80 type: string
81 encryptionSpec:
82 description: Immutable. Customer-managed encryption key spec for an
83 Endpoint. If set, this Endpoint and all sub-resources of this Endpoint
84 will be secured by this key.
85 properties:
86 kmsKeyName:
87 description: 'Immutable. Required. The Cloud KMS resource identifier
88 of the customer managed encryption key used to protect a resource.
89 Has the form: ''projects/my-project/locations/my-region/keyRings/my-kr/cryptoKeys/my-key''.
90 The key needs to be in the same region as where the compute
91 resource is created.'
92 type: string
93 required:
94 - kmsKeyName
95 type: object
96 location:
97 description: Immutable. The location for the resource.
98 type: string
99 network:
100 description: 'Immutable. The full name of the Google Compute Engine
101 [network](https://cloud.google.com//compute/docs/networks-and-firewalls#networks)
102 to which the Endpoint should be peered. Private services access
103 must already be configured for the network. If left unspecified,
104 the Endpoint is not peered with any network. Only one of the fields,
105 network or enable_private_service_connect, can be set. [Format](https://cloud.google.com/compute/docs/reference/rest/v1/networks/insert):
106 ''projects/{project}/global/networks/{network}''. Where ''{project}''
107 is a project number, as in ''12345'', and ''{network}'' is network
108 name.'
109 type: string
110 projectRef:
111 description: The project that this resource belongs to.
112 oneOf:
113 - not:
114 required:
115 - external
116 required:
117 - name
118 - not:
119 anyOf:
120 - required:
121 - name
122 - required:
123 - namespace
124 required:
125 - external
126 properties:
127 external:
128 description: 'Allowed value: The `name` field of a `Project` resource.'
129 type: string
130 name:
131 description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names'
132 type: string
133 namespace:
134 description: 'Namespace of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/'
135 type: string
136 type: object
137 region:
138 description: Immutable. The region for the resource.
139 type: string
140 resourceID:
141 description: Immutable. Optional. The name of the resource. Used for
142 creation and acquisition. When unset, the value of `metadata.name`
143 is used as the default.
144 type: string
145 required:
146 - displayName
147 - location
148 - projectRef
149 type: object
150 status:
151 properties:
152 conditions:
153 description: Conditions represent the latest available observation
154 of the resource's current state.
155 items:
156 properties:
157 lastTransitionTime:
158 description: Last time the condition transitioned from one status
159 to another.
160 type: string
161 message:
162 description: Human-readable message indicating details about
163 last transition.
164 type: string
165 reason:
166 description: Unique, one-word, CamelCase reason for the condition's
167 last transition.
168 type: string
169 status:
170 description: Status is the status of the condition. Can be True,
171 False, Unknown.
172 type: string
173 type:
174 description: Type is the type of the condition.
175 type: string
176 type: object
177 type: array
178 createTime:
179 description: Output only. Timestamp when this Endpoint was created.
180 type: string
181 deployedModels:
182 description: Output only. The models deployed in this Endpoint. To
183 add or remove DeployedModels use EndpointService.DeployModel and
184 EndpointService.UndeployModel respectively. Models can also be deployed
185 and undeployed using the [Cloud Console](https://console.cloud.google.com/vertex-ai/).
186 items:
187 properties:
188 automaticResources:
189 description: A description of resources that to large degree
190 are decided by Vertex AI, and require only a modest additional
191 configuration.
192 items:
193 properties:
194 maxReplicaCount:
195 description: The maximum number of replicas this DeployedModel
196 may be deployed on when the traffic against it increases.
197 If the requested value is too large, the deployment
198 will error, but if deployment succeeds then the ability
199 to scale the model to that many replicas is guaranteed
200 (barring service outages). If traffic against the DeployedModel
201 increases beyond what its replicas at maximum may handle,
202 a portion of the traffic will be dropped. If this value
203 is not provided, a no upper bound for scaling under
204 heavy traffic will be assume, though Vertex AI may be
205 unable to scale beyond certain replica number.
206 type: integer
207 minReplicaCount:
208 description: The minimum number of replicas this DeployedModel
209 will be always deployed on. If traffic against it increases,
210 it may dynamically be deployed onto more replicas up
211 to max_replica_count, and as traffic decreases, some
212 of these extra replicas may be freed. If the requested
213 value is too large, the deployment will error.
214 type: integer
215 type: object
216 type: array
217 createTime:
218 description: Output only. Timestamp when the DeployedModel was
219 created.
220 type: string
221 dedicatedResources:
222 description: A description of resources that are dedicated to
223 the DeployedModel, and that need a higher degree of manual
224 configuration.
225 items:
226 properties:
227 autoscalingMetricSpecs:
228 description: The metric specifications that overrides
229 a resource utilization metric (CPU utilization, accelerator's
230 duty cycle, and so on) target value (default to 60 if
231 not set). At most one entry is allowed per metric. If
232 machine_spec.accelerator_count is above 0, the autoscaling
233 will be based on both CPU utilization and accelerator's
234 duty cycle metrics and scale up when either metrics
235 exceeds its target value while scale down if both metrics
236 are under their target value. The default target value
237 is 60 for both metrics. If machine_spec.accelerator_count
238 is 0, the autoscaling will be based on CPU utilization
239 metric only with default target value 60 if not explicitly
240 set. For example, in the case of Online Prediction,
241 if you want to override target CPU utilization to 80,
242 you should set autoscaling_metric_specs.metric_name
243 to 'aiplatform.googleapis.com/prediction/online/cpu/utilization'
244 and autoscaling_metric_specs.target to '80'.
245 items:
246 properties:
247 metricName:
248 description: 'The resource metric name. Supported
249 metrics: * For Online Prediction: * ''aiplatform.googleapis.com/prediction/online/accelerator/duty_cycle''
250 * ''aiplatform.googleapis.com/prediction/online/cpu/utilization''.'
251 type: string
252 target:
253 description: The target resource utilization in
254 percentage (1% - 100%) for the given metric; once
255 the real usage deviates from the target by a certain
256 percentage, the machine replicas change. The default
257 value is 60 (representing 60%) if not provided.
258 type: integer
259 type: object
260 type: array
261 machineSpec:
262 description: The specification of a single machine used
263 by the prediction.
264 items:
265 properties:
266 acceleratorCount:
267 description: The number of accelerators to attach
268 to the machine.
269 type: integer
270 acceleratorType:
271 description: The type of accelerator(s) that may
272 be attached to the machine as per accelerator_count.
273 See possible values [here](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec#AcceleratorType).
274 type: string
275 machineType:
276 description: 'The type of the machine. See the [list
277 of machine types supported for prediction](https://cloud.google.com/vertex-ai/docs/predictions/configure-compute#machine-types)
278 See the [list of machine types supported for custom
279 training](https://cloud.google.com/vertex-ai/docs/training/configure-compute#machine-types).
280 For DeployedModel this field is optional, and
281 the default value is ''n1-standard-2''. For BatchPredictionJob
282 or as part of WorkerPoolSpec this field is required.
283 TODO(rsurowka): Try to better unify the required
284 vs optional.'
285 type: string
286 type: object
287 type: array
288 maxReplicaCount:
289 description: The maximum number of replicas this DeployedModel
290 may be deployed on when the traffic against it increases.
291 If the requested value is too large, the deployment
292 will error, but if deployment succeeds then the ability
293 to scale the model to that many replicas is guaranteed
294 (barring service outages). If traffic against the DeployedModel
295 increases beyond what its replicas at maximum may handle,
296 a portion of the traffic will be dropped. If this value
297 is not provided, will use min_replica_count as the default
298 value. The value of this field impacts the charge against
299 Vertex CPU and GPU quotas. Specifically, you will be
300 charged for max_replica_count * number of cores in the
301 selected machine type) and (max_replica_count * number
302 of GPUs per replica in the selected machine type).
303 type: integer
304 minReplicaCount:
305 description: The minimum number of machine replicas this
306 DeployedModel will be always deployed on. This value
307 must be greater than or equal to 1. If traffic against
308 the DeployedModel increases, it may dynamically be deployed
309 onto more replicas, and as traffic decreases, some of
310 these extra replicas may be freed.
311 type: integer
312 type: object
313 type: array
314 displayName:
315 description: The display name of the DeployedModel. If not provided
316 upon creation, the Model's display_name is used.
317 type: string
318 enableAccessLogging:
319 description: These logs are like standard server access logs,
320 containing information like timestamp and latency for each
321 prediction request. Note that Stackdriver logs may incur a
322 cost, especially if your project receives prediction requests
323 at a high queries per second rate (QPS). Estimate your costs
324 before enabling this option.
325 type: boolean
326 enableContainerLogging:
327 description: If true, the container of the DeployedModel instances
328 will send 'stderr' and 'stdout' streams to Stackdriver Logging.
329 Only supported for custom-trained Models and AutoML Tabular
330 Models.
331 type: boolean
332 id:
333 description: The ID of the DeployedModel. If not provided upon
334 deployment, Vertex AI will generate a value for this ID. This
335 value should be 1-10 characters, and valid characters are
336 /[0-9]/.
337 type: string
338 model:
339 description: The name of the Model that this is the deployment
340 of. Note that the Model may be in a different location than
341 the DeployedModel's Endpoint.
342 type: string
343 modelVersionId:
344 description: Output only. The version ID of the model that is
345 deployed.
346 type: string
347 privateEndpoints:
348 description: Output only. Provide paths for users to send predict/explain/health
349 requests directly to the deployed model services running on
350 Cloud via private services access. This field is populated
351 if network is configured.
352 items:
353 properties:
354 explainHttpUri:
355 description: Output only. Http(s) path to send explain
356 requests.
357 type: string
358 healthHttpUri:
359 description: Output only. Http(s) path to send health
360 check requests.
361 type: string
362 predictHttpUri:
363 description: Output only. Http(s) path to send prediction
364 requests.
365 type: string
366 serviceAttachment:
367 description: Output only. The name of the service attachment
368 resource. Populated if private service connect is enabled.
369 type: string
370 type: object
371 type: array
372 serviceAccount:
373 description: The service account that the DeployedModel's container
374 runs as. Specify the email address of the service account.
375 If this service account is not specified, the container runs
376 as a service account that doesn't have access to the resource
377 project. Users deploying the Model must have the 'iam.serviceAccounts.actAs'
378 permission on this service account.
379 type: string
380 sharedResources:
381 description: 'The resource name of the shared DeploymentResourcePool
382 to deploy on. Format: projects/{project}/locations/{location}/deploymentResourcePools/{deployment_resource_pool}.'
383 type: string
384 type: object
385 type: array
386 etag:
387 description: Used to perform consistent read-modify-write updates.
388 If not set, a blind "overwrite" update happens.
389 type: string
390 modelDeploymentMonitoringJob:
391 description: 'Output only. Resource name of the Model Monitoring job
392 associated with this Endpoint if monitoring is enabled by CreateModelDeploymentMonitoringJob.
393 Format: ''projects/{project}/locations/{location}/modelDeploymentMonitoringJobs/{model_deployment_monitoring_job}''.'
394 type: string
395 observedGeneration:
396 description: ObservedGeneration is the generation of the resource
397 that was most recently observed by the Config Connector controller.
398 If this is equal to metadata.generation, then that means that the
399 current reported status reflects the most recent desired state of
400 the resource.
401 type: integer
402 updateTime:
403 description: Output only. Timestamp when this Endpoint was last updated.
404 type: string
405 type: object
406 required:
407 - spec
408 type: object
409 served: true
410 storage: true
411 subresources:
412 status: {}
413status:
414 acceptedNames:
415 kind: ""
416 plural: ""
417 conditions: []
418 storedVersions: []
View as plain text