replica_calculator.go

Documentation: k8s.io/kubernetes/pkg/controller/podautoscaler

     1  /*
     2  Copyright 2016 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package podautoscaler
    18  
    19  import (
    20  	"context"
    21  	"fmt"
    22  	"math"
    23  	"time"
    24  
    25  	autoscaling "k8s.io/api/autoscaling/v2"
    26  	v1 "k8s.io/api/core/v1"
    27  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    28  	"k8s.io/apimachinery/pkg/labels"
    29  	"k8s.io/apimachinery/pkg/util/sets"
    30  	corelisters "k8s.io/client-go/listers/core/v1"
    31  	podutil "k8s.io/kubernetes/pkg/api/v1/pod"
    32  	metricsclient "k8s.io/kubernetes/pkg/controller/podautoscaler/metrics"
    33  )
    34  
    35  const (
    36  	// defaultTestingTolerance is default value for calculating when to
    37  	// scale up/scale down.
    38  	defaultTestingTolerance                     = 0.1
    39  	defaultTestingCPUInitializationPeriod       = 2 * time.Minute
    40  	defaultTestingDelayOfInitialReadinessStatus = 10 * time.Second
    41  )
    42  
    43  // ReplicaCalculator bundles all needed information to calculate the target amount of replicas
    44  type ReplicaCalculator struct {
    45  	metricsClient                 metricsclient.MetricsClient
    46  	podLister                     corelisters.PodLister
    47  	tolerance                     float64
    48  	cpuInitializationPeriod       time.Duration
    49  	delayOfInitialReadinessStatus time.Duration
    50  }
    51  
    52  // NewReplicaCalculator creates a new ReplicaCalculator and passes all necessary information to the new instance
    53  func NewReplicaCalculator(metricsClient metricsclient.MetricsClient, podLister corelisters.PodLister, tolerance float64, cpuInitializationPeriod, delayOfInitialReadinessStatus time.Duration) *ReplicaCalculator {
    54  	return &ReplicaCalculator{
    55  		metricsClient:                 metricsClient,
    56  		podLister:                     podLister,
    57  		tolerance:                     tolerance,
    58  		cpuInitializationPeriod:       cpuInitializationPeriod,
    59  		delayOfInitialReadinessStatus: delayOfInitialReadinessStatus,
    60  	}
    61  }
    62  
    63  // GetResourceReplicas calculates the desired replica count based on a target resource utilization percentage
    64  // of the given resource for pods matching the given selector in the given namespace, and the current replica count
    65  func (c *ReplicaCalculator) GetResourceReplicas(ctx context.Context, currentReplicas int32, targetUtilization int32, resource v1.ResourceName, namespace string, selector labels.Selector, container string) (replicaCount int32, utilization int32, rawUtilization int64, timestamp time.Time, err error) {
    66  	metrics, timestamp, err := c.metricsClient.GetResourceMetric(ctx, resource, namespace, selector, container)
    67  	if err != nil {
    68  		return 0, 0, 0, time.Time{}, fmt.Errorf("unable to get metrics for resource %s: %v", resource, err)
    69  	}
    70  	podList, err := c.podLister.Pods(namespace).List(selector)
    71  	if err != nil {
    72  		return 0, 0, 0, time.Time{}, fmt.Errorf("unable to get pods while calculating replica count: %v", err)
    73  	}
    74  	if len(podList) == 0 {
    75  		return 0, 0, 0, time.Time{}, fmt.Errorf("no pods returned by selector while calculating replica count")
    76  	}
    77  
    78  	readyPodCount, unreadyPods, missingPods, ignoredPods := groupPods(podList, metrics, resource, c.cpuInitializationPeriod, c.delayOfInitialReadinessStatus)
    79  	removeMetricsForPods(metrics, ignoredPods)
    80  	removeMetricsForPods(metrics, unreadyPods)
    81  	if len(metrics) == 0 {
    82  		return 0, 0, 0, time.Time{}, fmt.Errorf("did not receive metrics for targeted pods (pods might be unready)")
    83  	}
    84  
    85  	requests, err := calculatePodRequests(podList, container, resource)
    86  	if err != nil {
    87  		return 0, 0, 0, time.Time{}, err
    88  	}
    89  
    90  	usageRatio, utilization, rawUtilization, err := metricsclient.GetResourceUtilizationRatio(metrics, requests, targetUtilization)
    91  	if err != nil {
    92  		return 0, 0, 0, time.Time{}, err
    93  	}
    94  
    95  	scaleUpWithUnready := len(unreadyPods) > 0 && usageRatio > 1.0
    96  	if !scaleUpWithUnready && len(missingPods) == 0 {
    97  		if math.Abs(1.0-usageRatio) <= c.tolerance {
    98  			// return the current replicas if the change would be too small
    99  			return currentReplicas, utilization, rawUtilization, timestamp, nil
   100  		}
   101  
   102  		// if we don't have any unready or missing pods, we can calculate the new replica count now
   103  		return int32(math.Ceil(usageRatio * float64(readyPodCount))), utilization, rawUtilization, timestamp, nil
   104  	}
   105  
   106  	if len(missingPods) > 0 {
   107  		if usageRatio < 1.0 {
   108  			// on a scale-down, treat missing pods as using 100% (all) of the resource request
   109  			// or the utilization target for targets higher than 100%
   110  			fallbackUtilization := int64(max(100, targetUtilization))
   111  			for podName := range missingPods {
   112  				metrics[podName] = metricsclient.PodMetric{Value: requests[podName] * fallbackUtilization / 100}
   113  			}
   114  		} else if usageRatio > 1.0 {
   115  			// on a scale-up, treat missing pods as using 0% of the resource request
   116  			for podName := range missingPods {
   117  				metrics[podName] = metricsclient.PodMetric{Value: 0}
   118  			}
   119  		}
   120  	}
   121  
   122  	if scaleUpWithUnready {
   123  		// on a scale-up, treat unready pods as using 0% of the resource request
   124  		for podName := range unreadyPods {
   125  			metrics[podName] = metricsclient.PodMetric{Value: 0}
   126  		}
   127  	}
   128  
   129  	// re-run the utilization calculation with our new numbers
   130  	newUsageRatio, _, _, err := metricsclient.GetResourceUtilizationRatio(metrics, requests, targetUtilization)
   131  	if err != nil {
   132  		return 0, utilization, rawUtilization, time.Time{}, err
   133  	}
   134  
   135  	if math.Abs(1.0-newUsageRatio) <= c.tolerance || (usageRatio < 1.0 && newUsageRatio > 1.0) || (usageRatio > 1.0 && newUsageRatio < 1.0) {
   136  		// return the current replicas if the change would be too small,
   137  		// or if the new usage ratio would cause a change in scale direction
   138  		return currentReplicas, utilization, rawUtilization, timestamp, nil
   139  	}
   140  
   141  	newReplicas := int32(math.Ceil(newUsageRatio * float64(len(metrics))))
   142  	if (newUsageRatio < 1.0 && newReplicas > currentReplicas) || (newUsageRatio > 1.0 && newReplicas < currentReplicas) {
   143  		// return the current replicas if the change of metrics length would cause a change in scale direction
   144  		return currentReplicas, utilization, rawUtilization, timestamp, nil
   145  	}
   146  
   147  	// return the result, where the number of replicas considered is
   148  	// however many replicas factored into our calculation
   149  	return newReplicas, utilization, rawUtilization, timestamp, nil
   150  }
   151  
   152  // GetRawResourceReplicas calculates the desired replica count based on a target resource usage (as a raw milli-value)
   153  // for pods matching the given selector in the given namespace, and the current replica count
   154  func (c *ReplicaCalculator) GetRawResourceReplicas(ctx context.Context, currentReplicas int32, targetUsage int64, resource v1.ResourceName, namespace string, selector labels.Selector, container string) (replicaCount int32, usage int64, timestamp time.Time, err error) {
   155  	metrics, timestamp, err := c.metricsClient.GetResourceMetric(ctx, resource, namespace, selector, container)
   156  	if err != nil {
   157  		return 0, 0, time.Time{}, fmt.Errorf("unable to get metrics for resource %s: %v", resource, err)
   158  	}
   159  
   160  	replicaCount, usage, err = c.calcPlainMetricReplicas(metrics, currentReplicas, targetUsage, namespace, selector, resource)
   161  	return replicaCount, usage, timestamp, err
   162  }
   163  
   164  // GetMetricReplicas calculates the desired replica count based on a target metric usage
   165  // (as a milli-value) for pods matching the given selector in the given namespace, and the
   166  // current replica count
   167  func (c *ReplicaCalculator) GetMetricReplicas(currentReplicas int32, targetUsage int64, metricName string, namespace string, selector labels.Selector, metricSelector labels.Selector) (replicaCount int32, usage int64, timestamp time.Time, err error) {
   168  	metrics, timestamp, err := c.metricsClient.GetRawMetric(metricName, namespace, selector, metricSelector)
   169  	if err != nil {
   170  		return 0, 0, time.Time{}, fmt.Errorf("unable to get metric %s: %v", metricName, err)
   171  	}
   172  
   173  	replicaCount, usage, err = c.calcPlainMetricReplicas(metrics, currentReplicas, targetUsage, namespace, selector, v1.ResourceName(""))
   174  	return replicaCount, usage, timestamp, err
   175  }
   176  
   177  // calcPlainMetricReplicas calculates the desired replicas for plain (i.e. non-utilization percentage) metrics.
   178  func (c *ReplicaCalculator) calcPlainMetricReplicas(metrics metricsclient.PodMetricsInfo, currentReplicas int32, targetUsage int64, namespace string, selector labels.Selector, resource v1.ResourceName) (replicaCount int32, usage int64, err error) {
   179  
   180  	podList, err := c.podLister.Pods(namespace).List(selector)
   181  	if err != nil {
   182  		return 0, 0, fmt.Errorf("unable to get pods while calculating replica count: %v", err)
   183  	}
   184  
   185  	if len(podList) == 0 {
   186  		return 0, 0, fmt.Errorf("no pods returned by selector while calculating replica count")
   187  	}
   188  
   189  	readyPodCount, unreadyPods, missingPods, ignoredPods := groupPods(podList, metrics, resource, c.cpuInitializationPeriod, c.delayOfInitialReadinessStatus)
   190  	removeMetricsForPods(metrics, ignoredPods)
   191  	removeMetricsForPods(metrics, unreadyPods)
   192  
   193  	if len(metrics) == 0 {
   194  		return 0, 0, fmt.Errorf("did not receive metrics for targeted pods (pods might be unready)")
   195  	}
   196  
   197  	usageRatio, usage := metricsclient.GetMetricUsageRatio(metrics, targetUsage)
   198  
   199  	scaleUpWithUnready := len(unreadyPods) > 0 && usageRatio > 1.0
   200  
   201  	if !scaleUpWithUnready && len(missingPods) == 0 {
   202  		if math.Abs(1.0-usageRatio) <= c.tolerance {
   203  			// return the current replicas if the change would be too small
   204  			return currentReplicas, usage, nil
   205  		}
   206  
   207  		// if we don't have any unready or missing pods, we can calculate the new replica count now
   208  		return int32(math.Ceil(usageRatio * float64(readyPodCount))), usage, nil
   209  	}
   210  
   211  	if len(missingPods) > 0 {
   212  		if usageRatio < 1.0 {
   213  			// on a scale-down, treat missing pods as using exactly the target amount
   214  			for podName := range missingPods {
   215  				metrics[podName] = metricsclient.PodMetric{Value: targetUsage}
   216  			}
   217  		} else if usageRatio > 1.0 {
   218  			// on a scale-up, treat missing pods as using 0% of the resource request
   219  			for podName := range missingPods {
   220  				metrics[podName] = metricsclient.PodMetric{Value: 0}
   221  			}
   222  		}
   223  	}
   224  
   225  	if scaleUpWithUnready {
   226  		// on a scale-up, treat unready pods as using 0% of the resource request
   227  		for podName := range unreadyPods {
   228  			metrics[podName] = metricsclient.PodMetric{Value: 0}
   229  		}
   230  	}
   231  
   232  	// re-run the usage calculation with our new numbers
   233  	newUsageRatio, _ := metricsclient.GetMetricUsageRatio(metrics, targetUsage)
   234  
   235  	if math.Abs(1.0-newUsageRatio) <= c.tolerance || (usageRatio < 1.0 && newUsageRatio > 1.0) || (usageRatio > 1.0 && newUsageRatio < 1.0) {
   236  		// return the current replicas if the change would be too small,
   237  		// or if the new usage ratio would cause a change in scale direction
   238  		return currentReplicas, usage, nil
   239  	}
   240  
   241  	newReplicas := int32(math.Ceil(newUsageRatio * float64(len(metrics))))
   242  	if (newUsageRatio < 1.0 && newReplicas > currentReplicas) || (newUsageRatio > 1.0 && newReplicas < currentReplicas) {
   243  		// return the current replicas if the change of metrics length would cause a change in scale direction
   244  		return currentReplicas, usage, nil
   245  	}
   246  
   247  	// return the result, where the number of replicas considered is
   248  	// however many replicas factored into our calculation
   249  	return newReplicas, usage, nil
   250  }
   251  
   252  // GetObjectMetricReplicas calculates the desired replica count based on a target metric usage (as a milli-value)
   253  // for the given object in the given namespace, and the current replica count.
   254  func (c *ReplicaCalculator) GetObjectMetricReplicas(currentReplicas int32, targetUsage int64, metricName string, namespace string, objectRef *autoscaling.CrossVersionObjectReference, selector labels.Selector, metricSelector labels.Selector) (replicaCount int32, usage int64, timestamp time.Time, err error) {
   255  	usage, _, err = c.metricsClient.GetObjectMetric(metricName, namespace, objectRef, metricSelector)
   256  	if err != nil {
   257  		return 0, 0, time.Time{}, fmt.Errorf("unable to get metric %s: %v on %s %s/%s", metricName, objectRef.Kind, namespace, objectRef.Name, err)
   258  	}
   259  
   260  	usageRatio := float64(usage) / float64(targetUsage)
   261  	replicaCount, timestamp, err = c.getUsageRatioReplicaCount(currentReplicas, usageRatio, namespace, selector)
   262  	return replicaCount, usage, timestamp, err
   263  }
   264  
   265  // getUsageRatioReplicaCount calculates the desired replica count based on usageRatio and ready pods count.
   266  // For currentReplicas=0 doesn't take into account ready pods count and tolerance to support scaling to zero pods.
   267  func (c *ReplicaCalculator) getUsageRatioReplicaCount(currentReplicas int32, usageRatio float64, namespace string, selector labels.Selector) (replicaCount int32, timestamp time.Time, err error) {
   268  	if currentReplicas != 0 {
   269  		if math.Abs(1.0-usageRatio) <= c.tolerance {
   270  			// return the current replicas if the change would be too small
   271  			return currentReplicas, timestamp, nil
   272  		}
   273  		readyPodCount := int64(0)
   274  		readyPodCount, err = c.getReadyPodsCount(namespace, selector)
   275  		if err != nil {
   276  			return 0, time.Time{}, fmt.Errorf("unable to calculate ready pods: %s", err)
   277  		}
   278  		replicaCount = int32(math.Ceil(usageRatio * float64(readyPodCount)))
   279  	} else {
   280  		// Scale to zero or n pods depending on usageRatio
   281  		replicaCount = int32(math.Ceil(usageRatio))
   282  	}
   283  
   284  	return replicaCount, timestamp, err
   285  }
   286  
   287  // GetObjectPerPodMetricReplicas calculates the desired replica count based on a target metric usage (as a milli-value)
   288  // for the given object in the given namespace, and the current replica count.
   289  func (c *ReplicaCalculator) GetObjectPerPodMetricReplicas(statusReplicas int32, targetAverageUsage int64, metricName string, namespace string, objectRef *autoscaling.CrossVersionObjectReference, metricSelector labels.Selector) (replicaCount int32, usage int64, timestamp time.Time, err error) {
   290  	usage, timestamp, err = c.metricsClient.GetObjectMetric(metricName, namespace, objectRef, metricSelector)
   291  	if err != nil {
   292  		return 0, 0, time.Time{}, fmt.Errorf("unable to get metric %s: %v on %s %s/%s", metricName, objectRef.Kind, namespace, objectRef.Name, err)
   293  	}
   294  
   295  	replicaCount = statusReplicas
   296  	usageRatio := float64(usage) / (float64(targetAverageUsage) * float64(replicaCount))
   297  	if math.Abs(1.0-usageRatio) > c.tolerance {
   298  		// update number of replicas if change is large enough
   299  		replicaCount = int32(math.Ceil(float64(usage) / float64(targetAverageUsage)))
   300  	}
   301  	usage = int64(math.Ceil(float64(usage) / float64(statusReplicas)))
   302  	return replicaCount, usage, timestamp, nil
   303  }
   304  
   305  // @TODO(mattjmcnaughton) Many different functions in this module use variations
   306  // of this function. Make this function generic, so we don't repeat the same
   307  // logic in multiple places.
   308  func (c *ReplicaCalculator) getReadyPodsCount(namespace string, selector labels.Selector) (int64, error) {
   309  	podList, err := c.podLister.Pods(namespace).List(selector)
   310  	if err != nil {
   311  		return 0, fmt.Errorf("unable to get pods while calculating replica count: %v", err)
   312  	}
   313  
   314  	if len(podList) == 0 {
   315  		return 0, fmt.Errorf("no pods returned by selector while calculating replica count")
   316  	}
   317  
   318  	readyPodCount := 0
   319  
   320  	for _, pod := range podList {
   321  		if pod.Status.Phase == v1.PodRunning && podutil.IsPodReady(pod) {
   322  			readyPodCount++
   323  		}
   324  	}
   325  
   326  	return int64(readyPodCount), nil
   327  }
   328  
   329  // GetExternalMetricReplicas calculates the desired replica count based on a
   330  // target metric value (as a milli-value) for the external metric in the given
   331  // namespace, and the current replica count.
   332  func (c *ReplicaCalculator) GetExternalMetricReplicas(currentReplicas int32, targetUsage int64, metricName, namespace string, metricSelector *metav1.LabelSelector, podSelector labels.Selector) (replicaCount int32, usage int64, timestamp time.Time, err error) {
   333  	metricLabelSelector, err := metav1.LabelSelectorAsSelector(metricSelector)
   334  	if err != nil {
   335  		return 0, 0, time.Time{}, err
   336  	}
   337  	metrics, _, err := c.metricsClient.GetExternalMetric(metricName, namespace, metricLabelSelector)
   338  	if err != nil {
   339  		return 0, 0, time.Time{}, fmt.Errorf("unable to get external metric %s/%s/%+v: %s", namespace, metricName, metricSelector, err)
   340  	}
   341  	usage = 0
   342  	for _, val := range metrics {
   343  		usage = usage + val
   344  	}
   345  
   346  	usageRatio := float64(usage) / float64(targetUsage)
   347  	replicaCount, timestamp, err = c.getUsageRatioReplicaCount(currentReplicas, usageRatio, namespace, podSelector)
   348  	return replicaCount, usage, timestamp, err
   349  }
   350  
   351  // GetExternalPerPodMetricReplicas calculates the desired replica count based on a
   352  // target metric value per pod (as a milli-value) for the external metric in the
   353  // given namespace, and the current replica count.
   354  func (c *ReplicaCalculator) GetExternalPerPodMetricReplicas(statusReplicas int32, targetUsagePerPod int64, metricName, namespace string, metricSelector *metav1.LabelSelector) (replicaCount int32, usage int64, timestamp time.Time, err error) {
   355  	metricLabelSelector, err := metav1.LabelSelectorAsSelector(metricSelector)
   356  	if err != nil {
   357  		return 0, 0, time.Time{}, err
   358  	}
   359  	metrics, timestamp, err := c.metricsClient.GetExternalMetric(metricName, namespace, metricLabelSelector)
   360  	if err != nil {
   361  		return 0, 0, time.Time{}, fmt.Errorf("unable to get external metric %s/%s/%+v: %s", namespace, metricName, metricSelector, err)
   362  	}
   363  	usage = 0
   364  	for _, val := range metrics {
   365  		usage = usage + val
   366  	}
   367  
   368  	replicaCount = statusReplicas
   369  	usageRatio := float64(usage) / (float64(targetUsagePerPod) * float64(replicaCount))
   370  	if math.Abs(1.0-usageRatio) > c.tolerance {
   371  		// update number of replicas if the change is large enough
   372  		replicaCount = int32(math.Ceil(float64(usage) / float64(targetUsagePerPod)))
   373  	}
   374  	usage = int64(math.Ceil(float64(usage) / float64(statusReplicas)))
   375  	return replicaCount, usage, timestamp, nil
   376  }
   377  
   378  func groupPods(pods []*v1.Pod, metrics metricsclient.PodMetricsInfo, resource v1.ResourceName, cpuInitializationPeriod, delayOfInitialReadinessStatus time.Duration) (readyPodCount int, unreadyPods, missingPods, ignoredPods sets.String) {
   379  	missingPods = sets.NewString()
   380  	unreadyPods = sets.NewString()
   381  	ignoredPods = sets.NewString()
   382  	for _, pod := range pods {
   383  		if pod.DeletionTimestamp != nil || pod.Status.Phase == v1.PodFailed {
   384  			ignoredPods.Insert(pod.Name)
   385  			continue
   386  		}
   387  		// Pending pods are ignored.
   388  		if pod.Status.Phase == v1.PodPending {
   389  			unreadyPods.Insert(pod.Name)
   390  			continue
   391  		}
   392  		// Pods missing metrics.
   393  		metric, found := metrics[pod.Name]
   394  		if !found {
   395  			missingPods.Insert(pod.Name)
   396  			continue
   397  		}
   398  		// Unready pods are ignored.
   399  		if resource == v1.ResourceCPU {
   400  			var unready bool
   401  			_, condition := podutil.GetPodCondition(&pod.Status, v1.PodReady)
   402  			if condition == nil || pod.Status.StartTime == nil {
   403  				unready = true
   404  			} else {
   405  				// Pod still within possible initialisation period.
   406  				if pod.Status.StartTime.Add(cpuInitializationPeriod).After(time.Now()) {
   407  					// Ignore sample if pod is unready or one window of metric wasn't collected since last state transition.
   408  					unready = condition.Status == v1.ConditionFalse || metric.Timestamp.Before(condition.LastTransitionTime.Time.Add(metric.Window))
   409  				} else {
   410  					// Ignore metric if pod is unready and it has never been ready.
   411  					unready = condition.Status == v1.ConditionFalse && pod.Status.StartTime.Add(delayOfInitialReadinessStatus).After(condition.LastTransitionTime.Time)
   412  				}
   413  			}
   414  			if unready {
   415  				unreadyPods.Insert(pod.Name)
   416  				continue
   417  			}
   418  		}
   419  		readyPodCount++
   420  	}
   421  	return
   422  }
   423  
   424  func calculatePodRequests(pods []*v1.Pod, container string, resource v1.ResourceName) (map[string]int64, error) {
   425  	requests := make(map[string]int64, len(pods))
   426  	for _, pod := range pods {
   427  		podSum := int64(0)
   428  		// Calculate all regular containers and restartable init containers requests.
   429  		containers := append([]v1.Container{}, pod.Spec.Containers...)
   430  		for _, c := range pod.Spec.InitContainers {
   431  			if c.RestartPolicy != nil && *c.RestartPolicy == v1.ContainerRestartPolicyAlways {
   432  				containers = append(containers, c)
   433  			}
   434  		}
   435  		for _, c := range containers {
   436  			if container == "" || container == c.Name {
   437  				if containerRequest, ok := c.Resources.Requests[resource]; ok {
   438  					podSum += containerRequest.MilliValue()
   439  				} else {
   440  					return nil, fmt.Errorf("missing request for %s in container %s of Pod %s", resource, c.Name, pod.ObjectMeta.Name)
   441  				}
   442  			}
   443  		}
   444  		requests[pod.Name] = podSum
   445  	}
   446  	return requests, nil
   447  }
   448  
   449  func removeMetricsForPods(metrics metricsclient.PodMetricsInfo, pods sets.String) {
   450  	for _, pod := range pods.UnsortedList() {
   451  		delete(metrics, pod)
   452  	}
   453  }
   454
View as plain text