apiextensions.k8s.io_v1_customresourcedefinition_vertexaiendpoints.vertexai.cnrm.cloud.google.com.yaml

Documentation: github.com/GoogleCloudPlatform/k8s-config-connector/config/crds/resources

     1apiVersion: apiextensions.k8s.io/v1
     2kind: CustomResourceDefinition
     3metadata:
     4  annotations:
     5    cnrm.cloud.google.com/version: 0.0.0-dev
     6  creationTimestamp: null
     7  labels:
     8    cnrm.cloud.google.com/managed-by-kcc: "true"
     9    cnrm.cloud.google.com/stability-level: alpha
    10    cnrm.cloud.google.com/system: "true"
    11    cnrm.cloud.google.com/tf2crd: "true"
    12  name: vertexaiendpoints.vertexai.cnrm.cloud.google.com
    13spec:
    14  group: vertexai.cnrm.cloud.google.com
    15  names:
    16    categories:
    17    - gcp
    18    kind: VertexAIEndpoint
    19    plural: vertexaiendpoints
    20    shortNames:
    21    - gcpvertexaiendpoint
    22    - gcpvertexaiendpoints
    23    singular: vertexaiendpoint
    24  preserveUnknownFields: false
    25  scope: Namespaced
    26  versions:
    27  - additionalPrinterColumns:
    28    - jsonPath: .metadata.creationTimestamp
    29      name: Age
    30      type: date
    31    - description: When 'True', the most recent reconcile of the resource succeeded
    32      jsonPath: .status.conditions[?(@.type=='Ready')].status
    33      name: Ready
    34      type: string
    35    - description: The reason for the value in 'Ready'
    36      jsonPath: .status.conditions[?(@.type=='Ready')].reason
    37      name: Status
    38      type: string
    39    - description: The last transition time for the value in 'Status'
    40      jsonPath: .status.conditions[?(@.type=='Ready')].lastTransitionTime
    41      name: Status Age
    42      type: date
    43    name: v1alpha1
    44    schema:
    45      openAPIV3Schema:
    46        properties:
    47          apiVersion:
    48            description: 'apiVersion defines the versioned schema of this representation
    49              of an object. Servers should convert recognized schemas to the latest
    50              internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#resources'
    51            type: string
    52          kind:
    53            description: 'kind is a string value representing the REST resource this
    54              object represents. Servers may infer this from the endpoint the client
    55              submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#types-kinds'
    56            type: string
    57          metadata:
    58            type: object
    59          spec:
    60            properties:
    61              description:
    62                description: The description of the Endpoint.
    63                type: string
    64              displayName:
    65                description: Required. The display name of the Endpoint. The name
    66                  can be up to 128 characters long and can consist of any UTF-8 characters.
    67                type: string
    68              encryptionSpec:
    69                description: Immutable. Customer-managed encryption key spec for an
    70                  Endpoint. If set, this Endpoint and all sub-resources of this Endpoint
    71                  will be secured by this key.
    72                properties:
    73                  kmsKeyName:
    74                    description: 'Immutable. Required. The Cloud KMS resource identifier
    75                      of the customer managed encryption key used to protect a resource.
    76                      Has the form: ''projects/my-project/locations/my-region/keyRings/my-kr/cryptoKeys/my-key''.
    77                      The key needs to be in the same region as where the compute
    78                      resource is created.'
    79                    type: string
    80                required:
    81                - kmsKeyName
    82                type: object
    83              location:
    84                description: Immutable. The location for the resource.
    85                type: string
    86              network:
    87                description: 'Immutable. The full name of the Google Compute Engine
    88                  [network](https://cloud.google.com//compute/docs/networks-and-firewalls#networks)
    89                  to which the Endpoint should be peered. Private services access
    90                  must already be configured for the network. If left unspecified,
    91                  the Endpoint is not peered with any network. Only one of the fields,
    92                  network or enable_private_service_connect, can be set. [Format](https://cloud.google.com/compute/docs/reference/rest/v1/networks/insert):
    93                  ''projects/{project}/global/networks/{network}''. Where ''{project}''
    94                  is a project number, as in ''12345'', and ''{network}'' is network
    95                  name.'
    96                type: string
    97              projectRef:
    98                description: The project that this resource belongs to.
    99                oneOf:
   100                - not:
   101                    required:
   102                    - external
   103                  required:
   104                  - name
   105                - not:
   106                    anyOf:
   107                    - required:
   108                      - name
   109                    - required:
   110                      - namespace
   111                  required:
   112                  - external
   113                properties:
   114                  external:
   115                    description: 'Allowed value: The `name` field of a `Project` resource.'
   116                    type: string
   117                  name:
   118                    description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names'
   119                    type: string
   120                  namespace:
   121                    description: 'Namespace of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/'
   122                    type: string
   123                type: object
   124              region:
   125                description: Immutable. The region for the resource.
   126                type: string
   127              resourceID:
   128                description: Immutable. Optional. The name of the resource. Used for
   129                  creation and acquisition. When unset, the value of `metadata.name`
   130                  is used as the default.
   131                type: string
   132            required:
   133            - displayName
   134            - location
   135            - projectRef
   136            type: object
   137          status:
   138            properties:
   139              conditions:
   140                description: Conditions represent the latest available observation
   141                  of the resource's current state.
   142                items:
   143                  properties:
   144                    lastTransitionTime:
   145                      description: Last time the condition transitioned from one status
   146                        to another.
   147                      type: string
   148                    message:
   149                      description: Human-readable message indicating details about
   150                        last transition.
   151                      type: string
   152                    reason:
   153                      description: Unique, one-word, CamelCase reason for the condition's
   154                        last transition.
   155                      type: string
   156                    status:
   157                      description: Status is the status of the condition. Can be True,
   158                        False, Unknown.
   159                      type: string
   160                    type:
   161                      description: Type is the type of the condition.
   162                      type: string
   163                  type: object
   164                type: array
   165              createTime:
   166                description: Output only. Timestamp when this Endpoint was created.
   167                type: string
   168              deployedModels:
   169                description: Output only. The models deployed in this Endpoint. To
   170                  add or remove DeployedModels use EndpointService.DeployModel and
   171                  EndpointService.UndeployModel respectively. Models can also be deployed
   172                  and undeployed using the [Cloud Console](https://console.cloud.google.com/vertex-ai/).
   173                items:
   174                  properties:
   175                    automaticResources:
   176                      description: A description of resources that to large degree
   177                        are decided by Vertex AI, and require only a modest additional
   178                        configuration.
   179                      items:
   180                        properties:
   181                          maxReplicaCount:
   182                            description: The maximum number of replicas this DeployedModel
   183                              may be deployed on when the traffic against it increases.
   184                              If the requested value is too large, the deployment
   185                              will error, but if deployment succeeds then the ability
   186                              to scale the model to that many replicas is guaranteed
   187                              (barring service outages). If traffic against the DeployedModel
   188                              increases beyond what its replicas at maximum may handle,
   189                              a portion of the traffic will be dropped. If this value
   190                              is not provided, a no upper bound for scaling under
   191                              heavy traffic will be assume, though Vertex AI may be
   192                              unable to scale beyond certain replica number.
   193                            type: integer
   194                          minReplicaCount:
   195                            description: The minimum number of replicas this DeployedModel
   196                              will be always deployed on. If traffic against it increases,
   197                              it may dynamically be deployed onto more replicas up
   198                              to max_replica_count, and as traffic decreases, some
   199                              of these extra replicas may be freed. If the requested
   200                              value is too large, the deployment will error.
   201                            type: integer
   202                        type: object
   203                      type: array
   204                    createTime:
   205                      description: Output only. Timestamp when the DeployedModel was
   206                        created.
   207                      type: string
   208                    dedicatedResources:
   209                      description: A description of resources that are dedicated to
   210                        the DeployedModel, and that need a higher degree of manual
   211                        configuration.
   212                      items:
   213                        properties:
   214                          autoscalingMetricSpecs:
   215                            description: The metric specifications that overrides
   216                              a resource utilization metric (CPU utilization, accelerator's
   217                              duty cycle, and so on) target value (default to 60 if
   218                              not set). At most one entry is allowed per metric. If
   219                              machine_spec.accelerator_count is above 0, the autoscaling
   220                              will be based on both CPU utilization and accelerator's
   221                              duty cycle metrics and scale up when either metrics
   222                              exceeds its target value while scale down if both metrics
   223                              are under their target value. The default target value
   224                              is 60 for both metrics. If machine_spec.accelerator_count
   225                              is 0, the autoscaling will be based on CPU utilization
   226                              metric only with default target value 60 if not explicitly
   227                              set. For example, in the case of Online Prediction,
   228                              if you want to override target CPU utilization to 80,
   229                              you should set autoscaling_metric_specs.metric_name
   230                              to 'aiplatform.googleapis.com/prediction/online/cpu/utilization'
   231                              and autoscaling_metric_specs.target to '80'.
   232                            items:
   233                              properties:
   234                                metricName:
   235                                  description: 'The resource metric name. Supported
   236                                    metrics: * For Online Prediction: * ''aiplatform.googleapis.com/prediction/online/accelerator/duty_cycle''
   237                                    * ''aiplatform.googleapis.com/prediction/online/cpu/utilization''.'
   238                                  type: string
   239                                target:
   240                                  description: The target resource utilization in
   241                                    percentage (1% - 100%) for the given metric; once
   242                                    the real usage deviates from the target by a certain
   243                                    percentage, the machine replicas change. The default
   244                                    value is 60 (representing 60%) if not provided.
   245                                  type: integer
   246                              type: object
   247                            type: array
   248                          machineSpec:
   249                            description: The specification of a single machine used
   250                              by the prediction.
   251                            items:
   252                              properties:
   253                                acceleratorCount:
   254                                  description: The number of accelerators to attach
   255                                    to the machine.
   256                                  type: integer
   257                                acceleratorType:
   258                                  description: The type of accelerator(s) that may
   259                                    be attached to the machine as per accelerator_count.
   260                                    See possible values [here](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec#AcceleratorType).
   261                                  type: string
   262                                machineType:
   263                                  description: 'The type of the machine. See the [list
   264                                    of machine types supported for prediction](https://cloud.google.com/vertex-ai/docs/predictions/configure-compute#machine-types)
   265                                    See the [list of machine types supported for custom
   266                                    training](https://cloud.google.com/vertex-ai/docs/training/configure-compute#machine-types).
   267                                    For DeployedModel this field is optional, and
   268                                    the default value is ''n1-standard-2''. For BatchPredictionJob
   269                                    or as part of WorkerPoolSpec this field is required.
   270                                    TODO(rsurowka): Try to better unify the required
   271                                    vs optional.'
   272                                  type: string
   273                              type: object
   274                            type: array
   275                          maxReplicaCount:
   276                            description: The maximum number of replicas this DeployedModel
   277                              may be deployed on when the traffic against it increases.
   278                              If the requested value is too large, the deployment
   279                              will error, but if deployment succeeds then the ability
   280                              to scale the model to that many replicas is guaranteed
   281                              (barring service outages). If traffic against the DeployedModel
   282                              increases beyond what its replicas at maximum may handle,
   283                              a portion of the traffic will be dropped. If this value
   284                              is not provided, will use min_replica_count as the default
   285                              value. The value of this field impacts the charge against
   286                              Vertex CPU and GPU quotas. Specifically, you will be
   287                              charged for max_replica_count * number of cores in the
   288                              selected machine type) and (max_replica_count * number
   289                              of GPUs per replica in the selected machine type).
   290                            type: integer
   291                          minReplicaCount:
   292                            description: The minimum number of machine replicas this
   293                              DeployedModel will be always deployed on. This value
   294                              must be greater than or equal to 1. If traffic against
   295                              the DeployedModel increases, it may dynamically be deployed
   296                              onto more replicas, and as traffic decreases, some of
   297                              these extra replicas may be freed.
   298                            type: integer
   299                        type: object
   300                      type: array
   301                    displayName:
   302                      description: The display name of the DeployedModel. If not provided
   303                        upon creation, the Model's display_name is used.
   304                      type: string
   305                    enableAccessLogging:
   306                      description: These logs are like standard server access logs,
   307                        containing information like timestamp and latency for each
   308                        prediction request. Note that Stackdriver logs may incur a
   309                        cost, especially if your project receives prediction requests
   310                        at a high queries per second rate (QPS). Estimate your costs
   311                        before enabling this option.
   312                      type: boolean
   313                    enableContainerLogging:
   314                      description: If true, the container of the DeployedModel instances
   315                        will send 'stderr' and 'stdout' streams to Stackdriver Logging.
   316                        Only supported for custom-trained Models and AutoML Tabular
   317                        Models.
   318                      type: boolean
   319                    id:
   320                      description: The ID of the DeployedModel. If not provided upon
   321                        deployment, Vertex AI will generate a value for this ID. This
   322                        value should be 1-10 characters, and valid characters are
   323                        /[0-9]/.
   324                      type: string
   325                    model:
   326                      description: The name of the Model that this is the deployment
   327                        of. Note that the Model may be in a different location than
   328                        the DeployedModel's Endpoint.
   329                      type: string
   330                    modelVersionId:
   331                      description: Output only. The version ID of the model that is
   332                        deployed.
   333                      type: string
   334                    privateEndpoints:
   335                      description: Output only. Provide paths for users to send predict/explain/health
   336                        requests directly to the deployed model services running on
   337                        Cloud via private services access. This field is populated
   338                        if network is configured.
   339                      items:
   340                        properties:
   341                          explainHttpUri:
   342                            description: Output only. Http(s) path to send explain
   343                              requests.
   344                            type: string
   345                          healthHttpUri:
   346                            description: Output only. Http(s) path to send health
   347                              check requests.
   348                            type: string
   349                          predictHttpUri:
   350                            description: Output only. Http(s) path to send prediction
   351                              requests.
   352                            type: string
   353                          serviceAttachment:
   354                            description: Output only. The name of the service attachment
   355                              resource. Populated if private service connect is enabled.
   356                            type: string
   357                        type: object
   358                      type: array
   359                    serviceAccount:
   360                      description: The service account that the DeployedModel's container
   361                        runs as. Specify the email address of the service account.
   362                        If this service account is not specified, the container runs
   363                        as a service account that doesn't have access to the resource
   364                        project. Users deploying the Model must have the 'iam.serviceAccounts.actAs'
   365                        permission on this service account.
   366                      type: string
   367                    sharedResources:
   368                      description: 'The resource name of the shared DeploymentResourcePool
   369                        to deploy on. Format: projects/{project}/locations/{location}/deploymentResourcePools/{deployment_resource_pool}.'
   370                      type: string
   371                  type: object
   372                type: array
   373              etag:
   374                description: Used to perform consistent read-modify-write updates.
   375                  If not set, a blind "overwrite" update happens.
   376                type: string
   377              modelDeploymentMonitoringJob:
   378                description: 'Output only. Resource name of the Model Monitoring job
   379                  associated with this Endpoint if monitoring is enabled by CreateModelDeploymentMonitoringJob.
   380                  Format: ''projects/{project}/locations/{location}/modelDeploymentMonitoringJobs/{model_deployment_monitoring_job}''.'
   381                type: string
   382              observedGeneration:
   383                description: ObservedGeneration is the generation of the resource
   384                  that was most recently observed by the Config Connector controller.
   385                  If this is equal to metadata.generation, then that means that the
   386                  current reported status reflects the most recent desired state of
   387                  the resource.
   388                type: integer
   389              updateTime:
   390                description: Output only. Timestamp when this Endpoint was last updated.
   391                type: string
   392            type: object
   393        required:
   394        - spec
   395        type: object
   396    served: true
   397    storage: true
   398    subresources:
   399      status: {}
   400status:
   401  acceptedNames:
   402    kind: ""
   403    plural: ""
   404  conditions: []
   405  storedVersions: []
View as plain text