# Copyright 2020 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. apiVersion: apiextensions.k8s.io/v1 kind: CustomResourceDefinition metadata: annotations: cnrm.cloud.google.com/version: 1.106.0 creationTimestamp: null labels: cnrm.cloud.google.com/managed-by-kcc: "true" cnrm.cloud.google.com/stability-level: alpha cnrm.cloud.google.com/system: "true" cnrm.cloud.google.com/tf2crd: "true" name: vertexaiendpoints.vertexai.cnrm.cloud.google.com spec: group: vertexai.cnrm.cloud.google.com names: categories: - gcp kind: VertexAIEndpoint plural: vertexaiendpoints shortNames: - gcpvertexaiendpoint - gcpvertexaiendpoints singular: vertexaiendpoint scope: Namespaced versions: - additionalPrinterColumns: - jsonPath: .metadata.creationTimestamp name: Age type: date - description: When 'True', the most recent reconcile of the resource succeeded jsonPath: .status.conditions[?(@.type=='Ready')].status name: Ready type: string - description: The reason for the value in 'Ready' jsonPath: .status.conditions[?(@.type=='Ready')].reason name: Status type: string - description: The last transition time for the value in 'Status' jsonPath: .status.conditions[?(@.type=='Ready')].lastTransitionTime name: Status Age type: date name: v1alpha1 schema: openAPIV3Schema: properties: apiVersion: description: 'apiVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#resources' type: string kind: description: 'kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#types-kinds' type: string metadata: type: object spec: properties: description: description: The description of the Endpoint. type: string displayName: description: Required. The display name of the Endpoint. The name can be up to 128 characters long and can consist of any UTF-8 characters. type: string encryptionSpec: description: Immutable. Customer-managed encryption key spec for an Endpoint. If set, this Endpoint and all sub-resources of this Endpoint will be secured by this key. properties: kmsKeyName: description: 'Immutable. Required. The Cloud KMS resource identifier of the customer managed encryption key used to protect a resource. Has the form: ''projects/my-project/locations/my-region/keyRings/my-kr/cryptoKeys/my-key''. The key needs to be in the same region as where the compute resource is created.' type: string required: - kmsKeyName type: object location: description: Immutable. The location for the resource. type: string network: description: 'Immutable. The full name of the Google Compute Engine [network](https://cloud.google.com//compute/docs/networks-and-firewalls#networks) to which the Endpoint should be peered. Private services access must already be configured for the network. If left unspecified, the Endpoint is not peered with any network. Only one of the fields, network or enable_private_service_connect, can be set. [Format](https://cloud.google.com/compute/docs/reference/rest/v1/networks/insert): ''projects/{project}/global/networks/{network}''. Where ''{project}'' is a project number, as in ''12345'', and ''{network}'' is network name.' type: string projectRef: description: The project that this resource belongs to. oneOf: - not: required: - external required: - name - not: anyOf: - required: - name - required: - namespace required: - external properties: external: description: 'Allowed value: The `name` field of a `Project` resource.' type: string name: description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names' type: string namespace: description: 'Namespace of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/' type: string type: object region: description: Immutable. The region for the resource. type: string resourceID: description: Immutable. Optional. The name of the resource. Used for creation and acquisition. When unset, the value of `metadata.name` is used as the default. type: string required: - displayName - location - projectRef type: object status: properties: conditions: description: Conditions represent the latest available observation of the resource's current state. items: properties: lastTransitionTime: description: Last time the condition transitioned from one status to another. type: string message: description: Human-readable message indicating details about last transition. type: string reason: description: Unique, one-word, CamelCase reason for the condition's last transition. type: string status: description: Status is the status of the condition. Can be True, False, Unknown. type: string type: description: Type is the type of the condition. type: string type: object type: array createTime: description: Output only. Timestamp when this Endpoint was created. type: string deployedModels: description: Output only. The models deployed in this Endpoint. To add or remove DeployedModels use EndpointService.DeployModel and EndpointService.UndeployModel respectively. Models can also be deployed and undeployed using the [Cloud Console](https://console.cloud.google.com/vertex-ai/). items: properties: automaticResources: description: A description of resources that to large degree are decided by Vertex AI, and require only a modest additional configuration. items: properties: maxReplicaCount: description: The maximum number of replicas this DeployedModel may be deployed on when the traffic against it increases. If the requested value is too large, the deployment will error, but if deployment succeeds then the ability to scale the model to that many replicas is guaranteed (barring service outages). If traffic against the DeployedModel increases beyond what its replicas at maximum may handle, a portion of the traffic will be dropped. If this value is not provided, a no upper bound for scaling under heavy traffic will be assume, though Vertex AI may be unable to scale beyond certain replica number. type: integer minReplicaCount: description: The minimum number of replicas this DeployedModel will be always deployed on. If traffic against it increases, it may dynamically be deployed onto more replicas up to max_replica_count, and as traffic decreases, some of these extra replicas may be freed. If the requested value is too large, the deployment will error. type: integer type: object type: array createTime: description: Output only. Timestamp when the DeployedModel was created. type: string dedicatedResources: description: A description of resources that are dedicated to the DeployedModel, and that need a higher degree of manual configuration. items: properties: autoscalingMetricSpecs: description: The metric specifications that overrides a resource utilization metric (CPU utilization, accelerator's duty cycle, and so on) target value (default to 60 if not set). At most one entry is allowed per metric. If machine_spec.accelerator_count is above 0, the autoscaling will be based on both CPU utilization and accelerator's duty cycle metrics and scale up when either metrics exceeds its target value while scale down if both metrics are under their target value. The default target value is 60 for both metrics. If machine_spec.accelerator_count is 0, the autoscaling will be based on CPU utilization metric only with default target value 60 if not explicitly set. For example, in the case of Online Prediction, if you want to override target CPU utilization to 80, you should set autoscaling_metric_specs.metric_name to 'aiplatform.googleapis.com/prediction/online/cpu/utilization' and autoscaling_metric_specs.target to '80'. items: properties: metricName: description: 'The resource metric name. Supported metrics: * For Online Prediction: * ''aiplatform.googleapis.com/prediction/online/accelerator/duty_cycle'' * ''aiplatform.googleapis.com/prediction/online/cpu/utilization''.' type: string target: description: The target resource utilization in percentage (1% - 100%) for the given metric; once the real usage deviates from the target by a certain percentage, the machine replicas change. The default value is 60 (representing 60%) if not provided. type: integer type: object type: array machineSpec: description: The specification of a single machine used by the prediction. items: properties: acceleratorCount: description: The number of accelerators to attach to the machine. type: integer acceleratorType: description: The type of accelerator(s) that may be attached to the machine as per accelerator_count. See possible values [here](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec#AcceleratorType). type: string machineType: description: 'The type of the machine. See the [list of machine types supported for prediction](https://cloud.google.com/vertex-ai/docs/predictions/configure-compute#machine-types) See the [list of machine types supported for custom training](https://cloud.google.com/vertex-ai/docs/training/configure-compute#machine-types). For DeployedModel this field is optional, and the default value is ''n1-standard-2''. For BatchPredictionJob or as part of WorkerPoolSpec this field is required. TODO(rsurowka): Try to better unify the required vs optional.' type: string type: object type: array maxReplicaCount: description: The maximum number of replicas this DeployedModel may be deployed on when the traffic against it increases. If the requested value is too large, the deployment will error, but if deployment succeeds then the ability to scale the model to that many replicas is guaranteed (barring service outages). If traffic against the DeployedModel increases beyond what its replicas at maximum may handle, a portion of the traffic will be dropped. If this value is not provided, will use min_replica_count as the default value. The value of this field impacts the charge against Vertex CPU and GPU quotas. Specifically, you will be charged for max_replica_count * number of cores in the selected machine type) and (max_replica_count * number of GPUs per replica in the selected machine type). type: integer minReplicaCount: description: The minimum number of machine replicas this DeployedModel will be always deployed on. This value must be greater than or equal to 1. If traffic against the DeployedModel increases, it may dynamically be deployed onto more replicas, and as traffic decreases, some of these extra replicas may be freed. type: integer type: object type: array displayName: description: The display name of the DeployedModel. If not provided upon creation, the Model's display_name is used. type: string enableAccessLogging: description: These logs are like standard server access logs, containing information like timestamp and latency for each prediction request. Note that Stackdriver logs may incur a cost, especially if your project receives prediction requests at a high queries per second rate (QPS). Estimate your costs before enabling this option. type: boolean enableContainerLogging: description: If true, the container of the DeployedModel instances will send 'stderr' and 'stdout' streams to Stackdriver Logging. Only supported for custom-trained Models and AutoML Tabular Models. type: boolean id: description: The ID of the DeployedModel. If not provided upon deployment, Vertex AI will generate a value for this ID. This value should be 1-10 characters, and valid characters are /[0-9]/. type: string model: description: The name of the Model that this is the deployment of. Note that the Model may be in a different location than the DeployedModel's Endpoint. type: string modelVersionId: description: Output only. The version ID of the model that is deployed. type: string privateEndpoints: description: Output only. Provide paths for users to send predict/explain/health requests directly to the deployed model services running on Cloud via private services access. This field is populated if network is configured. items: properties: explainHttpUri: description: Output only. Http(s) path to send explain requests. type: string healthHttpUri: description: Output only. Http(s) path to send health check requests. type: string predictHttpUri: description: Output only. Http(s) path to send prediction requests. type: string serviceAttachment: description: Output only. The name of the service attachment resource. Populated if private service connect is enabled. type: string type: object type: array serviceAccount: description: The service account that the DeployedModel's container runs as. Specify the email address of the service account. If this service account is not specified, the container runs as a service account that doesn't have access to the resource project. Users deploying the Model must have the 'iam.serviceAccounts.actAs' permission on this service account. type: string sharedResources: description: 'The resource name of the shared DeploymentResourcePool to deploy on. Format: projects/{project}/locations/{location}/deploymentResourcePools/{deployment_resource_pool}.' type: string type: object type: array etag: description: Used to perform consistent read-modify-write updates. If not set, a blind "overwrite" update happens. type: string modelDeploymentMonitoringJob: description: 'Output only. Resource name of the Model Monitoring job associated with this Endpoint if monitoring is enabled by CreateModelDeploymentMonitoringJob. Format: ''projects/{project}/locations/{location}/modelDeploymentMonitoringJobs/{model_deployment_monitoring_job}''.' type: string observedGeneration: description: ObservedGeneration is the generation of the resource that was most recently observed by the Config Connector controller. If this is equal to metadata.generation, then that means that the current reported status reflects the most recent desired state of the resource. type: integer updateTime: description: Output only. Timestamp when this Endpoint was last updated. type: string type: object required: - spec type: object served: true storage: true subresources: status: {} status: acceptedNames: kind: "" plural: "" conditions: [] storedVersions: []