# Copyright 2020 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

apiVersion: apiextensions.k8s.io/v1
kind: CustomResourceDefinition
metadata:
  annotations:
    cnrm.cloud.google.com/version: 1.106.0
  creationTimestamp: null
  labels:
    cnrm.cloud.google.com/managed-by-kcc: "true"
    cnrm.cloud.google.com/stability-level: alpha
    cnrm.cloud.google.com/system: "true"
    cnrm.cloud.google.com/tf2crd: "true"
  name: vertexaiendpoints.vertexai.cnrm.cloud.google.com
spec:
  group: vertexai.cnrm.cloud.google.com
  names:
    categories:
    - gcp
    kind: VertexAIEndpoint
    plural: vertexaiendpoints
    shortNames:
    - gcpvertexaiendpoint
    - gcpvertexaiendpoints
    singular: vertexaiendpoint
  scope: Namespaced
  versions:
  - additionalPrinterColumns:
    - jsonPath: .metadata.creationTimestamp
      name: Age
      type: date
    - description: When 'True', the most recent reconcile of the resource succeeded
      jsonPath: .status.conditions[?(@.type=='Ready')].status
      name: Ready
      type: string
    - description: The reason for the value in 'Ready'
      jsonPath: .status.conditions[?(@.type=='Ready')].reason
      name: Status
      type: string
    - description: The last transition time for the value in 'Status'
      jsonPath: .status.conditions[?(@.type=='Ready')].lastTransitionTime
      name: Status Age
      type: date
    name: v1alpha1
    schema:
      openAPIV3Schema:
        properties:
          apiVersion:
            description: 'apiVersion defines the versioned schema of this representation
              of an object. Servers should convert recognized schemas to the latest
              internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#resources'
            type: string
          kind:
            description: 'kind is a string value representing the REST resource this
              object represents. Servers may infer this from the endpoint the client
              submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#types-kinds'
            type: string
          metadata:
            type: object
          spec:
            properties:
              description:
                description: The description of the Endpoint.
                type: string
              displayName:
                description: Required. The display name of the Endpoint. The name
                  can be up to 128 characters long and can consist of any UTF-8 characters.
                type: string
              encryptionSpec:
                description: Immutable. Customer-managed encryption key spec for an
                  Endpoint. If set, this Endpoint and all sub-resources of this Endpoint
                  will be secured by this key.
                properties:
                  kmsKeyName:
                    description: 'Immutable. Required. The Cloud KMS resource identifier
                      of the customer managed encryption key used to protect a resource.
                      Has the form: ''projects/my-project/locations/my-region/keyRings/my-kr/cryptoKeys/my-key''.
                      The key needs to be in the same region as where the compute
                      resource is created.'
                    type: string
                required:
                - kmsKeyName
                type: object
              location:
                description: Immutable. The location for the resource.
                type: string
              network:
                description: 'Immutable. The full name of the Google Compute Engine
                  [network](https://cloud.google.com//compute/docs/networks-and-firewalls#networks)
                  to which the Endpoint should be peered. Private services access
                  must already be configured for the network. If left unspecified,
                  the Endpoint is not peered with any network. Only one of the fields,
                  network or enable_private_service_connect, can be set. [Format](https://cloud.google.com/compute/docs/reference/rest/v1/networks/insert):
                  ''projects/{project}/global/networks/{network}''. Where ''{project}''
                  is a project number, as in ''12345'', and ''{network}'' is network
                  name.'
                type: string
              projectRef:
                description: The project that this resource belongs to.
                oneOf:
                - not:
                    required:
                    - external
                  required:
                  - name
                - not:
                    anyOf:
                    - required:
                      - name
                    - required:
                      - namespace
                  required:
                  - external
                properties:
                  external:
                    description: 'Allowed value: The `name` field of a `Project` resource.'
                    type: string
                  name:
                    description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names'
                    type: string
                  namespace:
                    description: 'Namespace of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/'
                    type: string
                type: object
              region:
                description: Immutable. The region for the resource.
                type: string
              resourceID:
                description: Immutable. Optional. The name of the resource. Used for
                  creation and acquisition. When unset, the value of `metadata.name`
                  is used as the default.
                type: string
            required:
            - displayName
            - location
            - projectRef
            type: object
          status:
            properties:
              conditions:
                description: Conditions represent the latest available observation
                  of the resource's current state.
                items:
                  properties:
                    lastTransitionTime:
                      description: Last time the condition transitioned from one status
                        to another.
                      type: string
                    message:
                      description: Human-readable message indicating details about
                        last transition.
                      type: string
                    reason:
                      description: Unique, one-word, CamelCase reason for the condition's
                        last transition.
                      type: string
                    status:
                      description: Status is the status of the condition. Can be True,
                        False, Unknown.
                      type: string
                    type:
                      description: Type is the type of the condition.
                      type: string
                  type: object
                type: array
              createTime:
                description: Output only. Timestamp when this Endpoint was created.
                type: string
              deployedModels:
                description: Output only. The models deployed in this Endpoint. To
                  add or remove DeployedModels use EndpointService.DeployModel and
                  EndpointService.UndeployModel respectively. Models can also be deployed
                  and undeployed using the [Cloud Console](https://console.cloud.google.com/vertex-ai/).
                items:
                  properties:
                    automaticResources:
                      description: A description of resources that to large degree
                        are decided by Vertex AI, and require only a modest additional
                        configuration.
                      items:
                        properties:
                          maxReplicaCount:
                            description: The maximum number of replicas this DeployedModel
                              may be deployed on when the traffic against it increases.
                              If the requested value is too large, the deployment
                              will error, but if deployment succeeds then the ability
                              to scale the model to that many replicas is guaranteed
                              (barring service outages). If traffic against the DeployedModel
                              increases beyond what its replicas at maximum may handle,
                              a portion of the traffic will be dropped. If this value
                              is not provided, a no upper bound for scaling under
                              heavy traffic will be assume, though Vertex AI may be
                              unable to scale beyond certain replica number.
                            type: integer
                          minReplicaCount:
                            description: The minimum number of replicas this DeployedModel
                              will be always deployed on. If traffic against it increases,
                              it may dynamically be deployed onto more replicas up
                              to max_replica_count, and as traffic decreases, some
                              of these extra replicas may be freed. If the requested
                              value is too large, the deployment will error.
                            type: integer
                        type: object
                      type: array
                    createTime:
                      description: Output only. Timestamp when the DeployedModel was
                        created.
                      type: string
                    dedicatedResources:
                      description: A description of resources that are dedicated to
                        the DeployedModel, and that need a higher degree of manual
                        configuration.
                      items:
                        properties:
                          autoscalingMetricSpecs:
                            description: The metric specifications that overrides
                              a resource utilization metric (CPU utilization, accelerator's
                              duty cycle, and so on) target value (default to 60 if
                              not set). At most one entry is allowed per metric. If
                              machine_spec.accelerator_count is above 0, the autoscaling
                              will be based on both CPU utilization and accelerator's
                              duty cycle metrics and scale up when either metrics
                              exceeds its target value while scale down if both metrics
                              are under their target value. The default target value
                              is 60 for both metrics. If machine_spec.accelerator_count
                              is 0, the autoscaling will be based on CPU utilization
                              metric only with default target value 60 if not explicitly
                              set. For example, in the case of Online Prediction,
                              if you want to override target CPU utilization to 80,
                              you should set autoscaling_metric_specs.metric_name
                              to 'aiplatform.googleapis.com/prediction/online/cpu/utilization'
                              and autoscaling_metric_specs.target to '80'.
                            items:
                              properties:
                                metricName:
                                  description: 'The resource metric name. Supported
                                    metrics: * For Online Prediction: * ''aiplatform.googleapis.com/prediction/online/accelerator/duty_cycle''
                                    * ''aiplatform.googleapis.com/prediction/online/cpu/utilization''.'
                                  type: string
                                target:
                                  description: The target resource utilization in
                                    percentage (1% - 100%) for the given metric; once
                                    the real usage deviates from the target by a certain
                                    percentage, the machine replicas change. The default
                                    value is 60 (representing 60%) if not provided.
                                  type: integer
                              type: object
                            type: array
                          machineSpec:
                            description: The specification of a single machine used
                              by the prediction.
                            items:
                              properties:
                                acceleratorCount:
                                  description: The number of accelerators to attach
                                    to the machine.
                                  type: integer
                                acceleratorType:
                                  description: The type of accelerator(s) that may
                                    be attached to the machine as per accelerator_count.
                                    See possible values [here](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec#AcceleratorType).
                                  type: string
                                machineType:
                                  description: 'The type of the machine. See the [list
                                    of machine types supported for prediction](https://cloud.google.com/vertex-ai/docs/predictions/configure-compute#machine-types)
                                    See the [list of machine types supported for custom
                                    training](https://cloud.google.com/vertex-ai/docs/training/configure-compute#machine-types).
                                    For DeployedModel this field is optional, and
                                    the default value is ''n1-standard-2''. For BatchPredictionJob
                                    or as part of WorkerPoolSpec this field is required.
                                    TODO(rsurowka): Try to better unify the required
                                    vs optional.'
                                  type: string
                              type: object
                            type: array
                          maxReplicaCount:
                            description: The maximum number of replicas this DeployedModel
                              may be deployed on when the traffic against it increases.
                              If the requested value is too large, the deployment
                              will error, but if deployment succeeds then the ability
                              to scale the model to that many replicas is guaranteed
                              (barring service outages). If traffic against the DeployedModel
                              increases beyond what its replicas at maximum may handle,
                              a portion of the traffic will be dropped. If this value
                              is not provided, will use min_replica_count as the default
                              value. The value of this field impacts the charge against
                              Vertex CPU and GPU quotas. Specifically, you will be
                              charged for max_replica_count * number of cores in the
                              selected machine type) and (max_replica_count * number
                              of GPUs per replica in the selected machine type).
                            type: integer
                          minReplicaCount:
                            description: The minimum number of machine replicas this
                              DeployedModel will be always deployed on. This value
                              must be greater than or equal to 1. If traffic against
                              the DeployedModel increases, it may dynamically be deployed
                              onto more replicas, and as traffic decreases, some of
                              these extra replicas may be freed.
                            type: integer
                        type: object
                      type: array
                    displayName:
                      description: The display name of the DeployedModel. If not provided
                        upon creation, the Model's display_name is used.
                      type: string
                    enableAccessLogging:
                      description: These logs are like standard server access logs,
                        containing information like timestamp and latency for each
                        prediction request. Note that Stackdriver logs may incur a
                        cost, especially if your project receives prediction requests
                        at a high queries per second rate (QPS). Estimate your costs
                        before enabling this option.
                      type: boolean
                    enableContainerLogging:
                      description: If true, the container of the DeployedModel instances
                        will send 'stderr' and 'stdout' streams to Stackdriver Logging.
                        Only supported for custom-trained Models and AutoML Tabular
                        Models.
                      type: boolean
                    id:
                      description: The ID of the DeployedModel. If not provided upon
                        deployment, Vertex AI will generate a value for this ID. This
                        value should be 1-10 characters, and valid characters are
                        /[0-9]/.
                      type: string
                    model:
                      description: The name of the Model that this is the deployment
                        of. Note that the Model may be in a different location than
                        the DeployedModel's Endpoint.
                      type: string
                    modelVersionId:
                      description: Output only. The version ID of the model that is
                        deployed.
                      type: string
                    privateEndpoints:
                      description: Output only. Provide paths for users to send predict/explain/health
                        requests directly to the deployed model services running on
                        Cloud via private services access. This field is populated
                        if network is configured.
                      items:
                        properties:
                          explainHttpUri:
                            description: Output only. Http(s) path to send explain
                              requests.
                            type: string
                          healthHttpUri:
                            description: Output only. Http(s) path to send health
                              check requests.
                            type: string
                          predictHttpUri:
                            description: Output only. Http(s) path to send prediction
                              requests.
                            type: string
                          serviceAttachment:
                            description: Output only. The name of the service attachment
                              resource. Populated if private service connect is enabled.
                            type: string
                        type: object
                      type: array
                    serviceAccount:
                      description: The service account that the DeployedModel's container
                        runs as. Specify the email address of the service account.
                        If this service account is not specified, the container runs
                        as a service account that doesn't have access to the resource
                        project. Users deploying the Model must have the 'iam.serviceAccounts.actAs'
                        permission on this service account.
                      type: string
                    sharedResources:
                      description: 'The resource name of the shared DeploymentResourcePool
                        to deploy on. Format: projects/{project}/locations/{location}/deploymentResourcePools/{deployment_resource_pool}.'
                      type: string
                  type: object
                type: array
              etag:
                description: Used to perform consistent read-modify-write updates.
                  If not set, a blind "overwrite" update happens.
                type: string
              modelDeploymentMonitoringJob:
                description: 'Output only. Resource name of the Model Monitoring job
                  associated with this Endpoint if monitoring is enabled by CreateModelDeploymentMonitoringJob.
                  Format: ''projects/{project}/locations/{location}/modelDeploymentMonitoringJobs/{model_deployment_monitoring_job}''.'
                type: string
              observedGeneration:
                description: ObservedGeneration is the generation of the resource
                  that was most recently observed by the Config Connector controller.
                  If this is equal to metadata.generation, then that means that the
                  current reported status reflects the most recent desired state of
                  the resource.
                type: integer
              updateTime:
                description: Output only. Timestamp when this Endpoint was last updated.
                type: string
            type: object
        required:
        - spec
        type: object
    served: true
    storage: true
    subresources:
      status: {}
status:
  acceptedNames:
    kind: ""
    plural: ""
  conditions: []
  storedVersions: []