...

Source file src/k8s.io/kubernetes/test/e2e/autoscaling/dns_autoscaling.go

Documentation: k8s.io/kubernetes/test/e2e/autoscaling

     1  /*
     2  Copyright 2016 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package autoscaling
    18  
    19  import (
    20  	"context"
    21  	"fmt"
    22  	"math"
    23  	"strings"
    24  	"time"
    25  
    26  	v1 "k8s.io/api/core/v1"
    27  	"k8s.io/apimachinery/pkg/api/resource"
    28  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    29  	"k8s.io/apimachinery/pkg/labels"
    30  	"k8s.io/apimachinery/pkg/util/wait"
    31  	clientset "k8s.io/client-go/kubernetes"
    32  	"k8s.io/kubernetes/test/e2e/framework"
    33  	e2enode "k8s.io/kubernetes/test/e2e/framework/node"
    34  	e2epod "k8s.io/kubernetes/test/e2e/framework/pod"
    35  	e2eskipper "k8s.io/kubernetes/test/e2e/framework/skipper"
    36  	admissionapi "k8s.io/pod-security-admission/api"
    37  
    38  	"github.com/onsi/ginkgo/v2"
    39  )
    40  
    41  // This test requires coredns to be installed on the cluster with autoscaling enabled.
    42  // Compare your coredns manifest against the command below
    43  // helm template coredns -n kube-system coredns/coredns --set k8sAppLabelOverride=kube-dns --set fullnameOverride=coredns --set autoscaler.enabled=true
    44  
    45  // Constants used in dns-autoscaling test.
    46  const (
    47  	DNSdefaultTimeout    = 5 * time.Minute
    48  	ClusterAddonLabelKey = "k8s-app"
    49  	DNSLabelName         = "kube-dns"
    50  )
    51  
    52  var _ = SIGDescribe("DNS horizontal autoscaling", func() {
    53  	f := framework.NewDefaultFramework("dns-autoscaling")
    54  	f.NamespacePodSecurityLevel = admissionapi.LevelPrivileged
    55  	var c clientset.Interface
    56  	var previousParams map[string]string
    57  	var configMapNames map[string]string
    58  	var originDNSReplicasCount int
    59  	var DNSParams1 DNSParamsLinear
    60  	var DNSParams2 DNSParamsLinear
    61  	var DNSParams3 DNSParamsLinear
    62  
    63  	ginkgo.BeforeEach(func(ctx context.Context) {
    64  		e2eskipper.SkipUnlessProviderIs("gce", "gke")
    65  		c = f.ClientSet
    66  
    67  		nodes, err := e2enode.GetReadySchedulableNodes(ctx, c)
    68  		framework.ExpectNoError(err)
    69  		nodeCount := len(nodes.Items)
    70  
    71  		ginkgo.By("Collecting original replicas count and DNS scaling params")
    72  
    73  		// Check if we are running coredns or kube-dns, the only difference is the name of the autoscaling CM.
    74  		// The test should be have identically on both dns providers
    75  		provider, err := detectDNSProvider(ctx, c)
    76  		framework.ExpectNoError(err)
    77  
    78  		originDNSReplicasCount, err = getDNSReplicas(ctx, c)
    79  		framework.ExpectNoError(err)
    80  		configMapNames = map[string]string{
    81  			"kube-dns": "kube-dns-autoscaler",
    82  			"coredns":  "coredns-autoscaler",
    83  		}
    84  
    85  		pcm, err := fetchDNSScalingConfigMap(ctx, c, configMapNames[provider])
    86  		framework.Logf("original DNS scaling params: %v", pcm)
    87  		framework.ExpectNoError(err)
    88  		previousParams = pcm.Data
    89  
    90  		if nodeCount <= 500 {
    91  			DNSParams1 = DNSParamsLinear{
    92  				nodesPerReplica: 1,
    93  			}
    94  			DNSParams2 = DNSParamsLinear{
    95  				nodesPerReplica: 2,
    96  			}
    97  			DNSParams3 = DNSParamsLinear{
    98  				nodesPerReplica: 3,
    99  				coresPerReplica: 3,
   100  			}
   101  		} else {
   102  			// In large clusters, avoid creating/deleting too many DNS pods,
   103  			// it is supposed to be correctness test, not performance one.
   104  			// The default setup is: 256 cores/replica, 16 nodes/replica.
   105  			// With nodeCount > 500, nodes/13, nodes/14, nodes/15 and nodes/16
   106  			// are different numbers.
   107  			DNSParams1 = DNSParamsLinear{
   108  				nodesPerReplica: 13,
   109  			}
   110  			DNSParams2 = DNSParamsLinear{
   111  				nodesPerReplica: 14,
   112  			}
   113  			DNSParams3 = DNSParamsLinear{
   114  				nodesPerReplica: 15,
   115  				coresPerReplica: 15,
   116  			}
   117  		}
   118  	})
   119  
   120  	// This test is separated because it is slow and need to run serially.
   121  	// Will take around 5 minutes to run on a 4 nodes cluster.
   122  	// TODO(upodroid) This test will be removed in 1.33 when kubeup is removed
   123  	f.It(f.WithSerial(), f.WithSlow(), f.WithLabel("KubeUp"), "kube-dns-autoscaler should scale kube-dns pods when cluster size changed", func(ctx context.Context) {
   124  		numNodes, err := e2enode.TotalRegistered(ctx, c)
   125  		framework.ExpectNoError(err)
   126  
   127  		configMapNames = map[string]string{
   128  			"kube-dns": "kube-dns-autoscaler",
   129  			"coredns":  "coredns-autoscaler",
   130  		}
   131  		provider, err := detectDNSProvider(ctx, c)
   132  		framework.ExpectNoError(err)
   133  
   134  		ginkgo.By("Replace the dns autoscaling parameters with testing parameters")
   135  		err = updateDNSScalingConfigMap(ctx, c, packDNSScalingConfigMap(configMapNames[provider], packLinearParams(&DNSParams1)))
   136  		framework.ExpectNoError(err)
   137  		defer func() {
   138  			ginkgo.By("Restoring initial dns autoscaling parameters")
   139  			err = updateDNSScalingConfigMap(ctx, c, packDNSScalingConfigMap(configMapNames[provider], previousParams))
   140  			framework.ExpectNoError(err)
   141  
   142  			ginkgo.By("Wait for number of running and ready kube-dns pods recover")
   143  			label := labels.SelectorFromSet(labels.Set(map[string]string{ClusterAddonLabelKey: DNSLabelName}))
   144  			_, err := e2epod.WaitForPodsWithLabelRunningReady(ctx, c, metav1.NamespaceSystem, label, originDNSReplicasCount, DNSdefaultTimeout)
   145  			framework.ExpectNoError(err)
   146  		}()
   147  		ginkgo.By("Wait for kube-dns scaled to expected number")
   148  		getExpectReplicasLinear := getExpectReplicasFuncLinear(ctx, c, &DNSParams1)
   149  		err = waitForDNSReplicasSatisfied(ctx, c, getExpectReplicasLinear, DNSdefaultTimeout)
   150  		framework.ExpectNoError(err)
   151  
   152  		originalSizes := make(map[string]int)
   153  		for _, mig := range strings.Split(framework.TestContext.CloudConfig.NodeInstanceGroup, ",") {
   154  			size, err := framework.GroupSize(mig)
   155  			framework.ExpectNoError(err)
   156  			ginkgo.By(fmt.Sprintf("Initial size of %s: %d", mig, size))
   157  			originalSizes[mig] = size
   158  		}
   159  
   160  		ginkgo.By("Manually increase cluster size")
   161  		increasedSizes := make(map[string]int)
   162  		for key, val := range originalSizes {
   163  			increasedSizes[key] = val + 1
   164  		}
   165  		setMigSizes(increasedSizes)
   166  		err = WaitForClusterSizeFunc(ctx, c,
   167  			func(size int) bool { return size == numNodes+len(originalSizes) }, scaleUpTimeout)
   168  		framework.ExpectNoError(err)
   169  
   170  		ginkgo.By("Wait for kube-dns scaled to expected number")
   171  		getExpectReplicasLinear = getExpectReplicasFuncLinear(ctx, c, &DNSParams1)
   172  		err = waitForDNSReplicasSatisfied(ctx, c, getExpectReplicasLinear, DNSdefaultTimeout)
   173  		framework.ExpectNoError(err)
   174  
   175  		ginkgo.By("Replace the dns autoscaling parameters with another testing parameters")
   176  		err = updateDNSScalingConfigMap(ctx, c, packDNSScalingConfigMap(configMapNames[provider], packLinearParams(&DNSParams3)))
   177  		framework.ExpectNoError(err)
   178  
   179  		ginkgo.By("Wait for kube-dns scaled to expected number")
   180  		getExpectReplicasLinear = getExpectReplicasFuncLinear(ctx, c, &DNSParams3)
   181  		err = waitForDNSReplicasSatisfied(ctx, c, getExpectReplicasLinear, DNSdefaultTimeout)
   182  		framework.ExpectNoError(err)
   183  
   184  		ginkgo.By("Restoring cluster size")
   185  		setMigSizes(originalSizes)
   186  		err = e2enode.WaitForReadyNodes(ctx, c, numNodes, scaleDownTimeout)
   187  		framework.ExpectNoError(err)
   188  
   189  		ginkgo.By("Wait for kube-dns scaled to expected number")
   190  		err = waitForDNSReplicasSatisfied(ctx, c, getExpectReplicasLinear, DNSdefaultTimeout)
   191  		framework.ExpectNoError(err)
   192  	})
   193  
   194  	ginkgo.It("kube-dns-autoscaler should scale kube-dns pods in both nonfaulty and faulty scenarios", func(ctx context.Context) {
   195  
   196  		configMapNames = map[string]string{
   197  			"kube-dns": "kube-dns-autoscaler",
   198  			"coredns":  "coredns-autoscaler",
   199  		}
   200  		provider, err := detectDNSProvider(ctx, c)
   201  		framework.ExpectNoError(err)
   202  
   203  		ginkgo.By("Replace the dns autoscaling parameters with testing parameters")
   204  		cm := packDNSScalingConfigMap(configMapNames[provider], packLinearParams(&DNSParams1))
   205  		framework.Logf("Updating the following cm: %v", cm)
   206  		err = updateDNSScalingConfigMap(ctx, c, cm)
   207  		framework.ExpectNoError(err)
   208  		defer func() {
   209  			ginkgo.By("Restoring initial dns autoscaling parameters")
   210  			err = updateDNSScalingConfigMap(ctx, c, packDNSScalingConfigMap(configMapNames[provider], previousParams))
   211  			framework.ExpectNoError(err)
   212  		}()
   213  		ginkgo.By("Wait for kube-dns scaled to expected number")
   214  		getExpectReplicasLinear := getExpectReplicasFuncLinear(ctx, c, &DNSParams1)
   215  		err = waitForDNSReplicasSatisfied(ctx, c, getExpectReplicasLinear, DNSdefaultTimeout)
   216  		framework.ExpectNoError(err)
   217  
   218  		ginkgo.By("--- Scenario: should scale kube-dns based on changed parameters ---")
   219  		ginkgo.By("Replace the dns autoscaling parameters with another testing parameters")
   220  		err = updateDNSScalingConfigMap(ctx, c, packDNSScalingConfigMap(configMapNames[provider], packLinearParams(&DNSParams3)))
   221  		framework.ExpectNoError(err)
   222  		ginkgo.By("Wait for kube-dns scaled to expected number")
   223  		getExpectReplicasLinear = getExpectReplicasFuncLinear(ctx, c, &DNSParams3)
   224  		err = waitForDNSReplicasSatisfied(ctx, c, getExpectReplicasLinear, DNSdefaultTimeout)
   225  		framework.ExpectNoError(err)
   226  
   227  		ginkgo.By("--- Scenario: should re-create scaling parameters with default value when parameters got deleted ---")
   228  		ginkgo.By("Delete the ConfigMap for autoscaler")
   229  		err = deleteDNSScalingConfigMap(ctx, c, configMapNames[provider])
   230  		framework.ExpectNoError(err)
   231  
   232  		ginkgo.By("Wait for the ConfigMap got re-created")
   233  		_, err = waitForDNSConfigMapCreated(ctx, c, DNSdefaultTimeout, configMapNames[provider])
   234  		framework.ExpectNoError(err)
   235  
   236  		ginkgo.By("Replace the dns autoscaling parameters with another testing parameters")
   237  		err = updateDNSScalingConfigMap(ctx, c, packDNSScalingConfigMap(configMapNames[provider], packLinearParams(&DNSParams2)))
   238  		framework.ExpectNoError(err)
   239  		ginkgo.By("Wait for kube-dns/coredns scaled to expected number")
   240  		getExpectReplicasLinear = getExpectReplicasFuncLinear(ctx, c, &DNSParams2)
   241  		err = waitForDNSReplicasSatisfied(ctx, c, getExpectReplicasLinear, DNSdefaultTimeout)
   242  		framework.ExpectNoError(err)
   243  
   244  		ginkgo.By("--- Scenario: should recover after autoscaler pod got deleted ---")
   245  		ginkgo.By("Delete the autoscaler pod for kube-dns/coredns")
   246  		err = deleteDNSAutoscalerPod(ctx, c)
   247  		framework.ExpectNoError(err)
   248  
   249  		ginkgo.By("Replace the dns autoscaling parameters with another testing parameters")
   250  		err = updateDNSScalingConfigMap(ctx, c, packDNSScalingConfigMap(configMapNames[provider], packLinearParams(&DNSParams1)))
   251  		framework.ExpectNoError(err)
   252  		ginkgo.By("Wait for kube-dns/coredns scaled to expected number")
   253  		getExpectReplicasLinear = getExpectReplicasFuncLinear(ctx, c, &DNSParams1)
   254  		err = waitForDNSReplicasSatisfied(ctx, c, getExpectReplicasLinear, DNSdefaultTimeout)
   255  		framework.ExpectNoError(err)
   256  	})
   257  })
   258  
   259  // DNSParamsLinear is a struct for number of DNS pods.
   260  type DNSParamsLinear struct {
   261  	nodesPerReplica float64
   262  	coresPerReplica float64
   263  	min             int
   264  	max             int
   265  }
   266  
   267  type getExpectReplicasFunc func(c clientset.Interface) int
   268  
   269  func getExpectReplicasFuncLinear(ctx context.Context, c clientset.Interface, params *DNSParamsLinear) getExpectReplicasFunc {
   270  	return func(c clientset.Interface) int {
   271  		var replicasFromNodes float64
   272  		var replicasFromCores float64
   273  		nodes, err := e2enode.GetReadyNodesIncludingTainted(ctx, c)
   274  		framework.ExpectNoError(err)
   275  		if params.nodesPerReplica > 0 {
   276  			replicasFromNodes = math.Ceil(float64(len(nodes.Items)) / params.nodesPerReplica)
   277  		}
   278  		if params.coresPerReplica > 0 {
   279  			replicasFromCores = math.Ceil(float64(getSchedulableCores(nodes.Items)) / params.coresPerReplica)
   280  		}
   281  		return int(math.Max(1.0, math.Max(replicasFromNodes, replicasFromCores)))
   282  	}
   283  }
   284  
   285  func getSchedulableCores(nodes []v1.Node) int64 {
   286  	var sc resource.Quantity
   287  	for _, node := range nodes {
   288  		if !node.Spec.Unschedulable {
   289  			sc.Add(node.Status.Allocatable[v1.ResourceCPU])
   290  		}
   291  	}
   292  	return sc.Value()
   293  }
   294  
   295  func detectDNSProvider(ctx context.Context, c clientset.Interface) (string, error) {
   296  	cm, err := c.CoreV1().ConfigMaps(metav1.NamespaceSystem).Get(ctx, "coredns-autoscaler", metav1.GetOptions{})
   297  	if cm != nil && err == nil {
   298  		return "coredns", nil
   299  	}
   300  
   301  	cm, err = c.CoreV1().ConfigMaps(metav1.NamespaceSystem).Get(ctx, "kube-dns-autoscaler", metav1.GetOptions{})
   302  	if cm != nil && err == nil {
   303  		return "kube-dns", nil
   304  	}
   305  
   306  	return "", fmt.Errorf("the cluster doesn't have kube-dns or coredns autoscaling configured")
   307  }
   308  
   309  func fetchDNSScalingConfigMap(ctx context.Context, c clientset.Interface, configMapName string) (*v1.ConfigMap, error) {
   310  	cm, err := c.CoreV1().ConfigMaps(metav1.NamespaceSystem).Get(ctx, configMapName, metav1.GetOptions{})
   311  	if err != nil {
   312  		return nil, err
   313  	}
   314  	return cm, nil
   315  }
   316  
   317  func deleteDNSScalingConfigMap(ctx context.Context, c clientset.Interface, configMapName string) error {
   318  	if err := c.CoreV1().ConfigMaps(metav1.NamespaceSystem).Delete(ctx, configMapName, metav1.DeleteOptions{}); err != nil {
   319  		return err
   320  	}
   321  	framework.Logf("DNS autoscaling ConfigMap deleted.")
   322  	return nil
   323  }
   324  
   325  func packLinearParams(params *DNSParamsLinear) map[string]string {
   326  	paramsMap := make(map[string]string)
   327  	paramsMap["linear"] = fmt.Sprintf("{\"nodesPerReplica\": %v,\"coresPerReplica\": %v,\"min\": %v,\"max\": %v}",
   328  		params.nodesPerReplica,
   329  		params.coresPerReplica,
   330  		params.min,
   331  		params.max)
   332  	return paramsMap
   333  }
   334  
   335  func packDNSScalingConfigMap(configMapName string, params map[string]string) *v1.ConfigMap {
   336  	configMap := v1.ConfigMap{}
   337  	configMap.ObjectMeta.Name = configMapName
   338  	configMap.ObjectMeta.Namespace = metav1.NamespaceSystem
   339  	configMap.Data = params
   340  	return &configMap
   341  }
   342  
   343  func updateDNSScalingConfigMap(ctx context.Context, c clientset.Interface, configMap *v1.ConfigMap) error {
   344  	_, err := c.CoreV1().ConfigMaps(metav1.NamespaceSystem).Update(ctx, configMap, metav1.UpdateOptions{})
   345  	if err != nil {
   346  		return err
   347  	}
   348  	framework.Logf("DNS autoscaling ConfigMap updated.")
   349  	return nil
   350  }
   351  
   352  func getDNSReplicas(ctx context.Context, c clientset.Interface) (int, error) {
   353  	label := labels.SelectorFromSet(labels.Set(map[string]string{ClusterAddonLabelKey: DNSLabelName}))
   354  	listOpts := metav1.ListOptions{LabelSelector: label.String()}
   355  	deployments, err := c.AppsV1().Deployments(metav1.NamespaceSystem).List(ctx, listOpts)
   356  	if err != nil {
   357  		return 0, err
   358  	}
   359  	if len(deployments.Items) != 1 {
   360  		return 0, fmt.Errorf("expected 1 DNS deployment, got %v", len(deployments.Items))
   361  	}
   362  
   363  	deployment := deployments.Items[0]
   364  	return int(*(deployment.Spec.Replicas)), nil
   365  }
   366  
   367  func deleteDNSAutoscalerPod(ctx context.Context, c clientset.Interface) error {
   368  	selector, _ := labels.Parse(fmt.Sprintf("%s in (kube-dns-autoscaler, coredns-autoscaler)", ClusterAddonLabelKey))
   369  	listOpts := metav1.ListOptions{LabelSelector: selector.String()}
   370  	pods, err := c.CoreV1().Pods(metav1.NamespaceSystem).List(ctx, listOpts)
   371  	if err != nil {
   372  		return err
   373  	}
   374  	if len(pods.Items) != 1 {
   375  		return fmt.Errorf("expected 1 autoscaler pod, got %v", len(pods.Items))
   376  	}
   377  
   378  	podName := pods.Items[0].Name
   379  	if err := c.CoreV1().Pods(metav1.NamespaceSystem).Delete(ctx, podName, metav1.DeleteOptions{}); err != nil {
   380  		return err
   381  	}
   382  	framework.Logf("DNS autoscaling pod %v deleted.", podName)
   383  	return nil
   384  }
   385  
   386  func waitForDNSReplicasSatisfied(ctx context.Context, c clientset.Interface, getExpected getExpectReplicasFunc, timeout time.Duration) (err error) {
   387  	var current int
   388  	var expected int
   389  	framework.Logf("Waiting up to %v for kube-dns to reach expected replicas", timeout)
   390  	condition := func(ctx context.Context) (bool, error) {
   391  		current, err = getDNSReplicas(ctx, c)
   392  		if err != nil {
   393  			return false, err
   394  		}
   395  		expected = getExpected(c)
   396  		if current != expected {
   397  			framework.Logf("Replicas not as expected: got %v, expected %v", current, expected)
   398  			return false, nil
   399  		}
   400  		return true, nil
   401  	}
   402  
   403  	if err = wait.PollUntilContextTimeout(ctx, 2*time.Second, timeout, false, condition); err != nil {
   404  		return fmt.Errorf("err waiting for DNS replicas to satisfy %v, got %v: %w", expected, current, err)
   405  	}
   406  	framework.Logf("kube-dns reaches expected replicas: %v", expected)
   407  	return nil
   408  }
   409  
   410  func waitForDNSConfigMapCreated(ctx context.Context, c clientset.Interface, timeout time.Duration, configMapName string) (configMap *v1.ConfigMap, err error) {
   411  	framework.Logf("Waiting up to %v for DNS autoscaling ConfigMap to be re-created", timeout)
   412  	condition := func(ctx context.Context) (bool, error) {
   413  		configMap, err = fetchDNSScalingConfigMap(ctx, c, configMapName)
   414  		if err != nil {
   415  			return false, nil
   416  		}
   417  		return true, nil
   418  	}
   419  
   420  	if err = wait.PollUntilContextTimeout(ctx, time.Second, timeout, false, condition); err != nil {
   421  		return nil, fmt.Errorf("err waiting for DNS autoscaling ConfigMap got re-created: %w", err)
   422  	}
   423  	return configMap, nil
   424  }
   425  

View as plain text