1 /* 2 Copyright 2018 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package config 18 19 import ( 20 "math" 21 22 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 23 "k8s.io/apimachinery/pkg/runtime" 24 "k8s.io/apimachinery/pkg/util/sets" 25 componentbaseconfig "k8s.io/component-base/config" 26 ) 27 28 const ( 29 // DefaultKubeSchedulerPort is the default port for the scheduler status server. 30 // May be overridden by a flag at startup. 31 DefaultKubeSchedulerPort = 10259 32 ) 33 34 // +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object 35 36 // KubeSchedulerConfiguration configures a scheduler 37 type KubeSchedulerConfiguration struct { 38 // TypeMeta contains the API version and kind. In kube-scheduler, after 39 // conversion from the versioned KubeSchedulerConfiguration type to this 40 // internal type, we set the APIVersion field to the scheme group/version of 41 // the type we converted from. This is done in cmd/kube-scheduler in two 42 // places: (1) when loading config from a file, (2) generating the default 43 // config. Based on the versioned type set in this field, we make decisions; 44 // for example (1) during validation to check for usage of removed plugins, 45 // (2) writing config to a file, (3) initialising the scheduler. 46 metav1.TypeMeta 47 48 // Parallelism defines the amount of parallelism in algorithms for scheduling a Pods. Must be greater than 0. Defaults to 16 49 Parallelism int32 50 51 // LeaderElection defines the configuration of leader election client. 52 LeaderElection componentbaseconfig.LeaderElectionConfiguration 53 54 // ClientConnection specifies the kubeconfig file and client connection 55 // settings for the proxy server to use when communicating with the apiserver. 56 ClientConnection componentbaseconfig.ClientConnectionConfiguration 57 58 // DebuggingConfiguration holds configuration for Debugging related features 59 // TODO: We might wanna make this a substruct like Debugging componentbaseconfig.DebuggingConfiguration 60 componentbaseconfig.DebuggingConfiguration 61 62 // PercentageOfNodesToScore is the percentage of all nodes that once found feasible 63 // for running a pod, the scheduler stops its search for more feasible nodes in 64 // the cluster. This helps improve scheduler's performance. Scheduler always tries to find 65 // at least "minFeasibleNodesToFind" feasible nodes no matter what the value of this flag is. 66 // Example: if the cluster size is 500 nodes and the value of this flag is 30, 67 // then scheduler stops finding further feasible nodes once it finds 150 feasible ones. 68 // When the value is 0, default percentage (5%--50% based on the size of the cluster) of the 69 // nodes will be scored. It is overridden by profile level PercentageOfNodesToScore. 70 PercentageOfNodesToScore *int32 71 72 // PodInitialBackoffSeconds is the initial backoff for unschedulable pods. 73 // If specified, it must be greater than 0. If this value is null, the default value (1s) 74 // will be used. 75 PodInitialBackoffSeconds int64 76 77 // PodMaxBackoffSeconds is the max backoff for unschedulable pods. 78 // If specified, it must be greater than or equal to podInitialBackoffSeconds. If this value is null, 79 // the default value (10s) will be used. 80 PodMaxBackoffSeconds int64 81 82 // Profiles are scheduling profiles that kube-scheduler supports. Pods can 83 // choose to be scheduled under a particular profile by setting its associated 84 // scheduler name. Pods that don't specify any scheduler name are scheduled 85 // with the "default-scheduler" profile, if present here. 86 Profiles []KubeSchedulerProfile 87 88 // Extenders are the list of scheduler extenders, each holding the values of how to communicate 89 // with the extender. These extenders are shared by all scheduler profiles. 90 Extenders []Extender 91 92 // DelayCacheUntilActive specifies when to start caching. If this is true and leader election is enabled, 93 // the scheduler will wait to fill informer caches until it is the leader. Doing so will have slower 94 // failover with the benefit of lower memory overhead while waiting to become leader. 95 // Defaults to false. 96 DelayCacheUntilActive bool 97 } 98 99 // KubeSchedulerProfile is a scheduling profile. 100 type KubeSchedulerProfile struct { 101 // SchedulerName is the name of the scheduler associated to this profile. 102 // If SchedulerName matches with the pod's "spec.schedulerName", then the pod 103 // is scheduled with this profile. 104 SchedulerName string 105 106 // PercentageOfNodesToScore is the percentage of all nodes that once found feasible 107 // for running a pod, the scheduler stops its search for more feasible nodes in 108 // the cluster. This helps improve scheduler's performance. Scheduler always tries to find 109 // at least "minFeasibleNodesToFind" feasible nodes no matter what the value of this flag is. 110 // Example: if the cluster size is 500 nodes and the value of this flag is 30, 111 // then scheduler stops finding further feasible nodes once it finds 150 feasible ones. 112 // When the value is 0, default percentage (5%--50% based on the size of the cluster) of the 113 // nodes will be scored. It will override global PercentageOfNodesToScore. If it is empty, 114 // global PercentageOfNodesToScore will be used. 115 PercentageOfNodesToScore *int32 116 117 // Plugins specify the set of plugins that should be enabled or disabled. 118 // Enabled plugins are the ones that should be enabled in addition to the 119 // default plugins. Disabled plugins are any of the default plugins that 120 // should be disabled. 121 // When no enabled or disabled plugin is specified for an extension point, 122 // default plugins for that extension point will be used if there is any. 123 // If a QueueSort plugin is specified, the same QueueSort Plugin and 124 // PluginConfig must be specified for all profiles. 125 Plugins *Plugins 126 127 // PluginConfig is an optional set of custom plugin arguments for each plugin. 128 // Omitting config args for a plugin is equivalent to using the default config 129 // for that plugin. 130 PluginConfig []PluginConfig 131 } 132 133 // Plugins include multiple extension points. When specified, the list of plugins for 134 // a particular extension point are the only ones enabled. If an extension point is 135 // omitted from the config, then the default set of plugins is used for that extension point. 136 // Enabled plugins are called in the order specified here, after default plugins. If they need to 137 // be invoked before default plugins, default plugins must be disabled and re-enabled here in desired order. 138 type Plugins struct { 139 // PreEnqueue is a list of plugins that should be invoked before adding pods to the scheduling queue. 140 PreEnqueue PluginSet 141 142 // QueueSort is a list of plugins that should be invoked when sorting pods in the scheduling queue. 143 QueueSort PluginSet 144 145 // PreFilter is a list of plugins that should be invoked at "PreFilter" extension point of the scheduling framework. 146 PreFilter PluginSet 147 148 // Filter is a list of plugins that should be invoked when filtering out nodes that cannot run the Pod. 149 Filter PluginSet 150 151 // PostFilter is a list of plugins that are invoked after filtering phase, but only when no feasible nodes were found for the pod. 152 PostFilter PluginSet 153 154 // PreScore is a list of plugins that are invoked before scoring. 155 PreScore PluginSet 156 157 // Score is a list of plugins that should be invoked when ranking nodes that have passed the filtering phase. 158 Score PluginSet 159 160 // Reserve is a list of plugins invoked when reserving/unreserving resources 161 // after a node is assigned to run the pod. 162 Reserve PluginSet 163 164 // Permit is a list of plugins that control binding of a Pod. These plugins can prevent or delay binding of a Pod. 165 Permit PluginSet 166 167 // PreBind is a list of plugins that should be invoked before a pod is bound. 168 PreBind PluginSet 169 170 // Bind is a list of plugins that should be invoked at "Bind" extension point of the scheduling framework. 171 // The scheduler call these plugins in order. Scheduler skips the rest of these plugins as soon as one returns success. 172 Bind PluginSet 173 174 // PostBind is a list of plugins that should be invoked after a pod is successfully bound. 175 PostBind PluginSet 176 177 // MultiPoint is a simplified config field for enabling plugins for all valid extension points 178 MultiPoint PluginSet 179 } 180 181 // PluginSet specifies enabled and disabled plugins for an extension point. 182 // If an array is empty, missing, or nil, default plugins at that extension point will be used. 183 type PluginSet struct { 184 // Enabled specifies plugins that should be enabled in addition to default plugins. 185 // These are called after default plugins and in the same order specified here. 186 Enabled []Plugin 187 // Disabled specifies default plugins that should be disabled. 188 // When all default plugins need to be disabled, an array containing only one "*" should be provided. 189 Disabled []Plugin 190 } 191 192 // Plugin specifies a plugin name and its weight when applicable. Weight is used only for Score plugins. 193 type Plugin struct { 194 // Name defines the name of plugin 195 Name string 196 // Weight defines the weight of plugin, only used for Score plugins. 197 Weight int32 198 } 199 200 // PluginConfig specifies arguments that should be passed to a plugin at the time of initialization. 201 // A plugin that is invoked at multiple extension points is initialized once. Args can have arbitrary structure. 202 // It is up to the plugin to process these Args. 203 type PluginConfig struct { 204 // Name defines the name of plugin being configured 205 Name string 206 // Args defines the arguments passed to the plugins at the time of initialization. Args can have arbitrary structure. 207 Args runtime.Object 208 } 209 210 /* 211 * NOTE: The following variables and methods are intentionally left out of the staging mirror. 212 */ 213 const ( 214 // DefaultPercentageOfNodesToScore defines the percentage of nodes of all nodes 215 // that once found feasible, the scheduler stops looking for more nodes. 216 // A value of 0 means adaptive, meaning the scheduler figures out a proper default. 217 DefaultPercentageOfNodesToScore = 0 218 219 // MaxCustomPriorityScore is the max score UtilizationShapePoint expects. 220 MaxCustomPriorityScore int64 = 10 221 222 // MaxTotalScore is the maximum total score. 223 MaxTotalScore int64 = math.MaxInt64 224 225 // MaxWeight defines the max weight value allowed for custom PriorityPolicy 226 MaxWeight = MaxTotalScore / MaxCustomPriorityScore 227 ) 228 229 // Names returns the list of enabled plugin names. 230 func (p *Plugins) Names() []string { 231 if p == nil { 232 return nil 233 } 234 extensions := []PluginSet{ 235 p.PreEnqueue, 236 p.PreFilter, 237 p.Filter, 238 p.PostFilter, 239 p.Reserve, 240 p.PreScore, 241 p.Score, 242 p.PreBind, 243 p.Bind, 244 p.PostBind, 245 p.Permit, 246 p.QueueSort, 247 } 248 n := sets.New[string]() 249 for _, e := range extensions { 250 for _, pg := range e.Enabled { 251 n.Insert(pg.Name) 252 } 253 } 254 return sets.List(n) 255 } 256 257 // Extender holds the parameters used to communicate with the extender. If a verb is unspecified/empty, 258 // it is assumed that the extender chose not to provide that extension. 259 type Extender struct { 260 // URLPrefix at which the extender is available 261 URLPrefix string 262 // Verb for the filter call, empty if not supported. This verb is appended to the URLPrefix when issuing the filter call to extender. 263 FilterVerb string 264 // Verb for the preempt call, empty if not supported. This verb is appended to the URLPrefix when issuing the preempt call to extender. 265 PreemptVerb string 266 // Verb for the prioritize call, empty if not supported. This verb is appended to the URLPrefix when issuing the prioritize call to extender. 267 PrioritizeVerb string 268 // The numeric multiplier for the node scores that the prioritize call generates. 269 // The weight should be a positive integer 270 Weight int64 271 // Verb for the bind call, empty if not supported. This verb is appended to the URLPrefix when issuing the bind call to extender. 272 // If this method is implemented by the extender, it is the extender's responsibility to bind the pod to apiserver. Only one extender 273 // can implement this function. 274 BindVerb string 275 // EnableHTTPS specifies whether https should be used to communicate with the extender 276 EnableHTTPS bool 277 // TLSConfig specifies the transport layer security config 278 TLSConfig *ExtenderTLSConfig 279 // HTTPTimeout specifies the timeout duration for a call to the extender. Filter timeout fails the scheduling of the pod. Prioritize 280 // timeout is ignored, k8s/other extenders priorities are used to select the node. 281 HTTPTimeout metav1.Duration 282 // NodeCacheCapable specifies that the extender is capable of caching node information, 283 // so the scheduler should only send minimal information about the eligible nodes 284 // assuming that the extender already cached full details of all nodes in the cluster 285 NodeCacheCapable bool 286 // ManagedResources is a list of extended resources that are managed by 287 // this extender. 288 // - A pod will be sent to the extender on the Filter, Prioritize and Bind 289 // (if the extender is the binder) phases iff the pod requests at least 290 // one of the extended resources in this list. If empty or unspecified, 291 // all pods will be sent to this extender. 292 // - If IgnoredByScheduler is set to true for a resource, kube-scheduler 293 // will skip checking the resource in predicates. 294 // +optional 295 ManagedResources []ExtenderManagedResource 296 // Ignorable specifies if the extender is ignorable, i.e. scheduling should not 297 // fail when the extender returns an error or is not reachable. 298 Ignorable bool 299 } 300 301 // ExtenderManagedResource describes the arguments of extended resources 302 // managed by an extender. 303 type ExtenderManagedResource struct { 304 // Name is the extended resource name. 305 Name string 306 // IgnoredByScheduler indicates whether kube-scheduler should ignore this 307 // resource when applying predicates. 308 IgnoredByScheduler bool 309 } 310 311 // ExtenderTLSConfig contains settings to enable TLS with extender 312 type ExtenderTLSConfig struct { 313 // Server should be accessed without verifying the TLS certificate. For testing only. 314 Insecure bool 315 // ServerName is passed to the server for SNI and is used in the client to check server 316 // certificates against. If ServerName is empty, the hostname used to contact the 317 // server is used. 318 ServerName string 319 320 // Server requires TLS client certificate authentication 321 CertFile string 322 // Server requires TLS client certificate authentication 323 KeyFile string 324 // Trusted root certificates for server 325 CAFile string 326 327 // CertData holds PEM-encoded bytes (typically read from a client certificate file). 328 // CertData takes precedence over CertFile 329 CertData []byte 330 // KeyData holds PEM-encoded bytes (typically read from a client certificate key file). 331 // KeyData takes precedence over KeyFile 332 KeyData []byte `datapolicy:"security-key"` 333 // CAData holds PEM-encoded bytes (typically read from a root certificates bundle). 334 // CAData takes precedence over CAFile 335 CAData []byte 336 } 337