vertexai_v1alpha1_vertexaiendpoint.yaml

Documentation: github.com/GoogleCloudPlatform/k8s-config-connector/crds

     1# Copyright 2020 Google LLC
     2#
     3# Licensed under the Apache License, Version 2.0 (the "License");
     4# you may not use this file except in compliance with the License.
     5# You may obtain a copy of the License at
     6#
     7#     http://www.apache.org/licenses/LICENSE-2.0
     8#
     9# Unless required by applicable law or agreed to in writing, software
    10# distributed under the License is distributed on an "AS IS" BASIS,
    11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12# See the License for the specific language governing permissions and
    13# limitations under the License.
    14
    15apiVersion: apiextensions.k8s.io/v1
    16kind: CustomResourceDefinition
    17metadata:
    18  annotations:
    19    cnrm.cloud.google.com/version: 1.106.0
    20  creationTimestamp: null
    21  labels:
    22    cnrm.cloud.google.com/managed-by-kcc: "true"
    23    cnrm.cloud.google.com/stability-level: alpha
    24    cnrm.cloud.google.com/system: "true"
    25    cnrm.cloud.google.com/tf2crd: "true"
    26  name: vertexaiendpoints.vertexai.cnrm.cloud.google.com
    27spec:
    28  group: vertexai.cnrm.cloud.google.com
    29  names:
    30    categories:
    31    - gcp
    32    kind: VertexAIEndpoint
    33    plural: vertexaiendpoints
    34    shortNames:
    35    - gcpvertexaiendpoint
    36    - gcpvertexaiendpoints
    37    singular: vertexaiendpoint
    38  scope: Namespaced
    39  versions:
    40  - additionalPrinterColumns:
    41    - jsonPath: .metadata.creationTimestamp
    42      name: Age
    43      type: date
    44    - description: When 'True', the most recent reconcile of the resource succeeded
    45      jsonPath: .status.conditions[?(@.type=='Ready')].status
    46      name: Ready
    47      type: string
    48    - description: The reason for the value in 'Ready'
    49      jsonPath: .status.conditions[?(@.type=='Ready')].reason
    50      name: Status
    51      type: string
    52    - description: The last transition time for the value in 'Status'
    53      jsonPath: .status.conditions[?(@.type=='Ready')].lastTransitionTime
    54      name: Status Age
    55      type: date
    56    name: v1alpha1
    57    schema:
    58      openAPIV3Schema:
    59        properties:
    60          apiVersion:
    61            description: 'apiVersion defines the versioned schema of this representation
    62              of an object. Servers should convert recognized schemas to the latest
    63              internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#resources'
    64            type: string
    65          kind:
    66            description: 'kind is a string value representing the REST resource this
    67              object represents. Servers may infer this from the endpoint the client
    68              submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#types-kinds'
    69            type: string
    70          metadata:
    71            type: object
    72          spec:
    73            properties:
    74              description:
    75                description: The description of the Endpoint.
    76                type: string
    77              displayName:
    78                description: Required. The display name of the Endpoint. The name
    79                  can be up to 128 characters long and can consist of any UTF-8 characters.
    80                type: string
    81              encryptionSpec:
    82                description: Immutable. Customer-managed encryption key spec for an
    83                  Endpoint. If set, this Endpoint and all sub-resources of this Endpoint
    84                  will be secured by this key.
    85                properties:
    86                  kmsKeyName:
    87                    description: 'Immutable. Required. The Cloud KMS resource identifier
    88                      of the customer managed encryption key used to protect a resource.
    89                      Has the form: ''projects/my-project/locations/my-region/keyRings/my-kr/cryptoKeys/my-key''.
    90                      The key needs to be in the same region as where the compute
    91                      resource is created.'
    92                    type: string
    93                required:
    94                - kmsKeyName
    95                type: object
    96              location:
    97                description: Immutable. The location for the resource.
    98                type: string
    99              network:
   100                description: 'Immutable. The full name of the Google Compute Engine
   101                  [network](https://cloud.google.com//compute/docs/networks-and-firewalls#networks)
   102                  to which the Endpoint should be peered. Private services access
   103                  must already be configured for the network. If left unspecified,
   104                  the Endpoint is not peered with any network. Only one of the fields,
   105                  network or enable_private_service_connect, can be set. [Format](https://cloud.google.com/compute/docs/reference/rest/v1/networks/insert):
   106                  ''projects/{project}/global/networks/{network}''. Where ''{project}''
   107                  is a project number, as in ''12345'', and ''{network}'' is network
   108                  name.'
   109                type: string
   110              projectRef:
   111                description: The project that this resource belongs to.
   112                oneOf:
   113                - not:
   114                    required:
   115                    - external
   116                  required:
   117                  - name
   118                - not:
   119                    anyOf:
   120                    - required:
   121                      - name
   122                    - required:
   123                      - namespace
   124                  required:
   125                  - external
   126                properties:
   127                  external:
   128                    description: 'Allowed value: The `name` field of a `Project` resource.'
   129                    type: string
   130                  name:
   131                    description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names'
   132                    type: string
   133                  namespace:
   134                    description: 'Namespace of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/'
   135                    type: string
   136                type: object
   137              region:
   138                description: Immutable. The region for the resource.
   139                type: string
   140              resourceID:
   141                description: Immutable. Optional. The name of the resource. Used for
   142                  creation and acquisition. When unset, the value of `metadata.name`
   143                  is used as the default.
   144                type: string
   145            required:
   146            - displayName
   147            - location
   148            - projectRef
   149            type: object
   150          status:
   151            properties:
   152              conditions:
   153                description: Conditions represent the latest available observation
   154                  of the resource's current state.
   155                items:
   156                  properties:
   157                    lastTransitionTime:
   158                      description: Last time the condition transitioned from one status
   159                        to another.
   160                      type: string
   161                    message:
   162                      description: Human-readable message indicating details about
   163                        last transition.
   164                      type: string
   165                    reason:
   166                      description: Unique, one-word, CamelCase reason for the condition's
   167                        last transition.
   168                      type: string
   169                    status:
   170                      description: Status is the status of the condition. Can be True,
   171                        False, Unknown.
   172                      type: string
   173                    type:
   174                      description: Type is the type of the condition.
   175                      type: string
   176                  type: object
   177                type: array
   178              createTime:
   179                description: Output only. Timestamp when this Endpoint was created.
   180                type: string
   181              deployedModels:
   182                description: Output only. The models deployed in this Endpoint. To
   183                  add or remove DeployedModels use EndpointService.DeployModel and
   184                  EndpointService.UndeployModel respectively. Models can also be deployed
   185                  and undeployed using the [Cloud Console](https://console.cloud.google.com/vertex-ai/).
   186                items:
   187                  properties:
   188                    automaticResources:
   189                      description: A description of resources that to large degree
   190                        are decided by Vertex AI, and require only a modest additional
   191                        configuration.
   192                      items:
   193                        properties:
   194                          maxReplicaCount:
   195                            description: The maximum number of replicas this DeployedModel
   196                              may be deployed on when the traffic against it increases.
   197                              If the requested value is too large, the deployment
   198                              will error, but if deployment succeeds then the ability
   199                              to scale the model to that many replicas is guaranteed
   200                              (barring service outages). If traffic against the DeployedModel
   201                              increases beyond what its replicas at maximum may handle,
   202                              a portion of the traffic will be dropped. If this value
   203                              is not provided, a no upper bound for scaling under
   204                              heavy traffic will be assume, though Vertex AI may be
   205                              unable to scale beyond certain replica number.
   206                            type: integer
   207                          minReplicaCount:
   208                            description: The minimum number of replicas this DeployedModel
   209                              will be always deployed on. If traffic against it increases,
   210                              it may dynamically be deployed onto more replicas up
   211                              to max_replica_count, and as traffic decreases, some
   212                              of these extra replicas may be freed. If the requested
   213                              value is too large, the deployment will error.
   214                            type: integer
   215                        type: object
   216                      type: array
   217                    createTime:
   218                      description: Output only. Timestamp when the DeployedModel was
   219                        created.
   220                      type: string
   221                    dedicatedResources:
   222                      description: A description of resources that are dedicated to
   223                        the DeployedModel, and that need a higher degree of manual
   224                        configuration.
   225                      items:
   226                        properties:
   227                          autoscalingMetricSpecs:
   228                            description: The metric specifications that overrides
   229                              a resource utilization metric (CPU utilization, accelerator's
   230                              duty cycle, and so on) target value (default to 60 if
   231                              not set). At most one entry is allowed per metric. If
   232                              machine_spec.accelerator_count is above 0, the autoscaling
   233                              will be based on both CPU utilization and accelerator's
   234                              duty cycle metrics and scale up when either metrics
   235                              exceeds its target value while scale down if both metrics
   236                              are under their target value. The default target value
   237                              is 60 for both metrics. If machine_spec.accelerator_count
   238                              is 0, the autoscaling will be based on CPU utilization
   239                              metric only with default target value 60 if not explicitly
   240                              set. For example, in the case of Online Prediction,
   241                              if you want to override target CPU utilization to 80,
   242                              you should set autoscaling_metric_specs.metric_name
   243                              to 'aiplatform.googleapis.com/prediction/online/cpu/utilization'
   244                              and autoscaling_metric_specs.target to '80'.
   245                            items:
   246                              properties:
   247                                metricName:
   248                                  description: 'The resource metric name. Supported
   249                                    metrics: * For Online Prediction: * ''aiplatform.googleapis.com/prediction/online/accelerator/duty_cycle''
   250                                    * ''aiplatform.googleapis.com/prediction/online/cpu/utilization''.'
   251                                  type: string
   252                                target:
   253                                  description: The target resource utilization in
   254                                    percentage (1% - 100%) for the given metric; once
   255                                    the real usage deviates from the target by a certain
   256                                    percentage, the machine replicas change. The default
   257                                    value is 60 (representing 60%) if not provided.
   258                                  type: integer
   259                              type: object
   260                            type: array
   261                          machineSpec:
   262                            description: The specification of a single machine used
   263                              by the prediction.
   264                            items:
   265                              properties:
   266                                acceleratorCount:
   267                                  description: The number of accelerators to attach
   268                                    to the machine.
   269                                  type: integer
   270                                acceleratorType:
   271                                  description: The type of accelerator(s) that may
   272                                    be attached to the machine as per accelerator_count.
   273                                    See possible values [here](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec#AcceleratorType).
   274                                  type: string
   275                                machineType:
   276                                  description: 'The type of the machine. See the [list
   277                                    of machine types supported for prediction](https://cloud.google.com/vertex-ai/docs/predictions/configure-compute#machine-types)
   278                                    See the [list of machine types supported for custom
   279                                    training](https://cloud.google.com/vertex-ai/docs/training/configure-compute#machine-types).
   280                                    For DeployedModel this field is optional, and
   281                                    the default value is ''n1-standard-2''. For BatchPredictionJob
   282                                    or as part of WorkerPoolSpec this field is required.
   283                                    TODO(rsurowka): Try to better unify the required
   284                                    vs optional.'
   285                                  type: string
   286                              type: object
   287                            type: array
   288                          maxReplicaCount:
   289                            description: The maximum number of replicas this DeployedModel
   290                              may be deployed on when the traffic against it increases.
   291                              If the requested value is too large, the deployment
   292                              will error, but if deployment succeeds then the ability
   293                              to scale the model to that many replicas is guaranteed
   294                              (barring service outages). If traffic against the DeployedModel
   295                              increases beyond what its replicas at maximum may handle,
   296                              a portion of the traffic will be dropped. If this value
   297                              is not provided, will use min_replica_count as the default
   298                              value. The value of this field impacts the charge against
   299                              Vertex CPU and GPU quotas. Specifically, you will be
   300                              charged for max_replica_count * number of cores in the
   301                              selected machine type) and (max_replica_count * number
   302                              of GPUs per replica in the selected machine type).
   303                            type: integer
   304                          minReplicaCount:
   305                            description: The minimum number of machine replicas this
   306                              DeployedModel will be always deployed on. This value
   307                              must be greater than or equal to 1. If traffic against
   308                              the DeployedModel increases, it may dynamically be deployed
   309                              onto more replicas, and as traffic decreases, some of
   310                              these extra replicas may be freed.
   311                            type: integer
   312                        type: object
   313                      type: array
   314                    displayName:
   315                      description: The display name of the DeployedModel. If not provided
   316                        upon creation, the Model's display_name is used.
   317                      type: string
   318                    enableAccessLogging:
   319                      description: These logs are like standard server access logs,
   320                        containing information like timestamp and latency for each
   321                        prediction request. Note that Stackdriver logs may incur a
   322                        cost, especially if your project receives prediction requests
   323                        at a high queries per second rate (QPS). Estimate your costs
   324                        before enabling this option.
   325                      type: boolean
   326                    enableContainerLogging:
   327                      description: If true, the container of the DeployedModel instances
   328                        will send 'stderr' and 'stdout' streams to Stackdriver Logging.
   329                        Only supported for custom-trained Models and AutoML Tabular
   330                        Models.
   331                      type: boolean
   332                    id:
   333                      description: The ID of the DeployedModel. If not provided upon
   334                        deployment, Vertex AI will generate a value for this ID. This
   335                        value should be 1-10 characters, and valid characters are
   336                        /[0-9]/.
   337                      type: string
   338                    model:
   339                      description: The name of the Model that this is the deployment
   340                        of. Note that the Model may be in a different location than
   341                        the DeployedModel's Endpoint.
   342                      type: string
   343                    modelVersionId:
   344                      description: Output only. The version ID of the model that is
   345                        deployed.
   346                      type: string
   347                    privateEndpoints:
   348                      description: Output only. Provide paths for users to send predict/explain/health
   349                        requests directly to the deployed model services running on
   350                        Cloud via private services access. This field is populated
   351                        if network is configured.
   352                      items:
   353                        properties:
   354                          explainHttpUri:
   355                            description: Output only. Http(s) path to send explain
   356                              requests.
   357                            type: string
   358                          healthHttpUri:
   359                            description: Output only. Http(s) path to send health
   360                              check requests.
   361                            type: string
   362                          predictHttpUri:
   363                            description: Output only. Http(s) path to send prediction
   364                              requests.
   365                            type: string
   366                          serviceAttachment:
   367                            description: Output only. The name of the service attachment
   368                              resource. Populated if private service connect is enabled.
   369                            type: string
   370                        type: object
   371                      type: array
   372                    serviceAccount:
   373                      description: The service account that the DeployedModel's container
   374                        runs as. Specify the email address of the service account.
   375                        If this service account is not specified, the container runs
   376                        as a service account that doesn't have access to the resource
   377                        project. Users deploying the Model must have the 'iam.serviceAccounts.actAs'
   378                        permission on this service account.
   379                      type: string
   380                    sharedResources:
   381                      description: 'The resource name of the shared DeploymentResourcePool
   382                        to deploy on. Format: projects/{project}/locations/{location}/deploymentResourcePools/{deployment_resource_pool}.'
   383                      type: string
   384                  type: object
   385                type: array
   386              etag:
   387                description: Used to perform consistent read-modify-write updates.
   388                  If not set, a blind "overwrite" update happens.
   389                type: string
   390              modelDeploymentMonitoringJob:
   391                description: 'Output only. Resource name of the Model Monitoring job
   392                  associated with this Endpoint if monitoring is enabled by CreateModelDeploymentMonitoringJob.
   393                  Format: ''projects/{project}/locations/{location}/modelDeploymentMonitoringJobs/{model_deployment_monitoring_job}''.'
   394                type: string
   395              observedGeneration:
   396                description: ObservedGeneration is the generation of the resource
   397                  that was most recently observed by the Config Connector controller.
   398                  If this is equal to metadata.generation, then that means that the
   399                  current reported status reflects the most recent desired state of
   400                  the resource.
   401                type: integer
   402              updateTime:
   403                description: Output only. Timestamp when this Endpoint was last updated.
   404                type: string
   405            type: object
   406        required:
   407        - spec
   408        type: object
   409    served: true
   410    storage: true
   411    subresources:
   412      status: {}
   413status:
   414  acceptedNames:
   415    kind: ""
   416    plural: ""
   417  conditions: []
   418  storedVersions: []
View as plain text