1
16
17 package autoscaling
18
19 import (
20 "context"
21 "fmt"
22 "math"
23 "strings"
24 "time"
25
26 v1 "k8s.io/api/core/v1"
27 "k8s.io/apimachinery/pkg/api/resource"
28 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
29 "k8s.io/apimachinery/pkg/labels"
30 "k8s.io/apimachinery/pkg/util/wait"
31 clientset "k8s.io/client-go/kubernetes"
32 "k8s.io/kubernetes/test/e2e/framework"
33 e2enode "k8s.io/kubernetes/test/e2e/framework/node"
34 e2epod "k8s.io/kubernetes/test/e2e/framework/pod"
35 e2eskipper "k8s.io/kubernetes/test/e2e/framework/skipper"
36 admissionapi "k8s.io/pod-security-admission/api"
37
38 "github.com/onsi/ginkgo/v2"
39 )
40
41
42
43
44
45
46 const (
47 DNSdefaultTimeout = 5 * time.Minute
48 ClusterAddonLabelKey = "k8s-app"
49 DNSLabelName = "kube-dns"
50 )
51
52 var _ = SIGDescribe("DNS horizontal autoscaling", func() {
53 f := framework.NewDefaultFramework("dns-autoscaling")
54 f.NamespacePodSecurityLevel = admissionapi.LevelPrivileged
55 var c clientset.Interface
56 var previousParams map[string]string
57 var configMapNames map[string]string
58 var originDNSReplicasCount int
59 var DNSParams1 DNSParamsLinear
60 var DNSParams2 DNSParamsLinear
61 var DNSParams3 DNSParamsLinear
62
63 ginkgo.BeforeEach(func(ctx context.Context) {
64 e2eskipper.SkipUnlessProviderIs("gce", "gke")
65 c = f.ClientSet
66
67 nodes, err := e2enode.GetReadySchedulableNodes(ctx, c)
68 framework.ExpectNoError(err)
69 nodeCount := len(nodes.Items)
70
71 ginkgo.By("Collecting original replicas count and DNS scaling params")
72
73
74
75 provider, err := detectDNSProvider(ctx, c)
76 framework.ExpectNoError(err)
77
78 originDNSReplicasCount, err = getDNSReplicas(ctx, c)
79 framework.ExpectNoError(err)
80 configMapNames = map[string]string{
81 "kube-dns": "kube-dns-autoscaler",
82 "coredns": "coredns-autoscaler",
83 }
84
85 pcm, err := fetchDNSScalingConfigMap(ctx, c, configMapNames[provider])
86 framework.Logf("original DNS scaling params: %v", pcm)
87 framework.ExpectNoError(err)
88 previousParams = pcm.Data
89
90 if nodeCount <= 500 {
91 DNSParams1 = DNSParamsLinear{
92 nodesPerReplica: 1,
93 }
94 DNSParams2 = DNSParamsLinear{
95 nodesPerReplica: 2,
96 }
97 DNSParams3 = DNSParamsLinear{
98 nodesPerReplica: 3,
99 coresPerReplica: 3,
100 }
101 } else {
102
103
104
105
106
107 DNSParams1 = DNSParamsLinear{
108 nodesPerReplica: 13,
109 }
110 DNSParams2 = DNSParamsLinear{
111 nodesPerReplica: 14,
112 }
113 DNSParams3 = DNSParamsLinear{
114 nodesPerReplica: 15,
115 coresPerReplica: 15,
116 }
117 }
118 })
119
120
121
122
123 f.It(f.WithSerial(), f.WithSlow(), f.WithLabel("KubeUp"), "kube-dns-autoscaler should scale kube-dns pods when cluster size changed", func(ctx context.Context) {
124 numNodes, err := e2enode.TotalRegistered(ctx, c)
125 framework.ExpectNoError(err)
126
127 configMapNames = map[string]string{
128 "kube-dns": "kube-dns-autoscaler",
129 "coredns": "coredns-autoscaler",
130 }
131 provider, err := detectDNSProvider(ctx, c)
132 framework.ExpectNoError(err)
133
134 ginkgo.By("Replace the dns autoscaling parameters with testing parameters")
135 err = updateDNSScalingConfigMap(ctx, c, packDNSScalingConfigMap(configMapNames[provider], packLinearParams(&DNSParams1)))
136 framework.ExpectNoError(err)
137 defer func() {
138 ginkgo.By("Restoring initial dns autoscaling parameters")
139 err = updateDNSScalingConfigMap(ctx, c, packDNSScalingConfigMap(configMapNames[provider], previousParams))
140 framework.ExpectNoError(err)
141
142 ginkgo.By("Wait for number of running and ready kube-dns pods recover")
143 label := labels.SelectorFromSet(labels.Set(map[string]string{ClusterAddonLabelKey: DNSLabelName}))
144 _, err := e2epod.WaitForPodsWithLabelRunningReady(ctx, c, metav1.NamespaceSystem, label, originDNSReplicasCount, DNSdefaultTimeout)
145 framework.ExpectNoError(err)
146 }()
147 ginkgo.By("Wait for kube-dns scaled to expected number")
148 getExpectReplicasLinear := getExpectReplicasFuncLinear(ctx, c, &DNSParams1)
149 err = waitForDNSReplicasSatisfied(ctx, c, getExpectReplicasLinear, DNSdefaultTimeout)
150 framework.ExpectNoError(err)
151
152 originalSizes := make(map[string]int)
153 for _, mig := range strings.Split(framework.TestContext.CloudConfig.NodeInstanceGroup, ",") {
154 size, err := framework.GroupSize(mig)
155 framework.ExpectNoError(err)
156 ginkgo.By(fmt.Sprintf("Initial size of %s: %d", mig, size))
157 originalSizes[mig] = size
158 }
159
160 ginkgo.By("Manually increase cluster size")
161 increasedSizes := make(map[string]int)
162 for key, val := range originalSizes {
163 increasedSizes[key] = val + 1
164 }
165 setMigSizes(increasedSizes)
166 err = WaitForClusterSizeFunc(ctx, c,
167 func(size int) bool { return size == numNodes+len(originalSizes) }, scaleUpTimeout)
168 framework.ExpectNoError(err)
169
170 ginkgo.By("Wait for kube-dns scaled to expected number")
171 getExpectReplicasLinear = getExpectReplicasFuncLinear(ctx, c, &DNSParams1)
172 err = waitForDNSReplicasSatisfied(ctx, c, getExpectReplicasLinear, DNSdefaultTimeout)
173 framework.ExpectNoError(err)
174
175 ginkgo.By("Replace the dns autoscaling parameters with another testing parameters")
176 err = updateDNSScalingConfigMap(ctx, c, packDNSScalingConfigMap(configMapNames[provider], packLinearParams(&DNSParams3)))
177 framework.ExpectNoError(err)
178
179 ginkgo.By("Wait for kube-dns scaled to expected number")
180 getExpectReplicasLinear = getExpectReplicasFuncLinear(ctx, c, &DNSParams3)
181 err = waitForDNSReplicasSatisfied(ctx, c, getExpectReplicasLinear, DNSdefaultTimeout)
182 framework.ExpectNoError(err)
183
184 ginkgo.By("Restoring cluster size")
185 setMigSizes(originalSizes)
186 err = e2enode.WaitForReadyNodes(ctx, c, numNodes, scaleDownTimeout)
187 framework.ExpectNoError(err)
188
189 ginkgo.By("Wait for kube-dns scaled to expected number")
190 err = waitForDNSReplicasSatisfied(ctx, c, getExpectReplicasLinear, DNSdefaultTimeout)
191 framework.ExpectNoError(err)
192 })
193
194 ginkgo.It("kube-dns-autoscaler should scale kube-dns pods in both nonfaulty and faulty scenarios", func(ctx context.Context) {
195
196 configMapNames = map[string]string{
197 "kube-dns": "kube-dns-autoscaler",
198 "coredns": "coredns-autoscaler",
199 }
200 provider, err := detectDNSProvider(ctx, c)
201 framework.ExpectNoError(err)
202
203 ginkgo.By("Replace the dns autoscaling parameters with testing parameters")
204 cm := packDNSScalingConfigMap(configMapNames[provider], packLinearParams(&DNSParams1))
205 framework.Logf("Updating the following cm: %v", cm)
206 err = updateDNSScalingConfigMap(ctx, c, cm)
207 framework.ExpectNoError(err)
208 defer func() {
209 ginkgo.By("Restoring initial dns autoscaling parameters")
210 err = updateDNSScalingConfigMap(ctx, c, packDNSScalingConfigMap(configMapNames[provider], previousParams))
211 framework.ExpectNoError(err)
212 }()
213 ginkgo.By("Wait for kube-dns scaled to expected number")
214 getExpectReplicasLinear := getExpectReplicasFuncLinear(ctx, c, &DNSParams1)
215 err = waitForDNSReplicasSatisfied(ctx, c, getExpectReplicasLinear, DNSdefaultTimeout)
216 framework.ExpectNoError(err)
217
218 ginkgo.By("--- Scenario: should scale kube-dns based on changed parameters ---")
219 ginkgo.By("Replace the dns autoscaling parameters with another testing parameters")
220 err = updateDNSScalingConfigMap(ctx, c, packDNSScalingConfigMap(configMapNames[provider], packLinearParams(&DNSParams3)))
221 framework.ExpectNoError(err)
222 ginkgo.By("Wait for kube-dns scaled to expected number")
223 getExpectReplicasLinear = getExpectReplicasFuncLinear(ctx, c, &DNSParams3)
224 err = waitForDNSReplicasSatisfied(ctx, c, getExpectReplicasLinear, DNSdefaultTimeout)
225 framework.ExpectNoError(err)
226
227 ginkgo.By("--- Scenario: should re-create scaling parameters with default value when parameters got deleted ---")
228 ginkgo.By("Delete the ConfigMap for autoscaler")
229 err = deleteDNSScalingConfigMap(ctx, c, configMapNames[provider])
230 framework.ExpectNoError(err)
231
232 ginkgo.By("Wait for the ConfigMap got re-created")
233 _, err = waitForDNSConfigMapCreated(ctx, c, DNSdefaultTimeout, configMapNames[provider])
234 framework.ExpectNoError(err)
235
236 ginkgo.By("Replace the dns autoscaling parameters with another testing parameters")
237 err = updateDNSScalingConfigMap(ctx, c, packDNSScalingConfigMap(configMapNames[provider], packLinearParams(&DNSParams2)))
238 framework.ExpectNoError(err)
239 ginkgo.By("Wait for kube-dns/coredns scaled to expected number")
240 getExpectReplicasLinear = getExpectReplicasFuncLinear(ctx, c, &DNSParams2)
241 err = waitForDNSReplicasSatisfied(ctx, c, getExpectReplicasLinear, DNSdefaultTimeout)
242 framework.ExpectNoError(err)
243
244 ginkgo.By("--- Scenario: should recover after autoscaler pod got deleted ---")
245 ginkgo.By("Delete the autoscaler pod for kube-dns/coredns")
246 err = deleteDNSAutoscalerPod(ctx, c)
247 framework.ExpectNoError(err)
248
249 ginkgo.By("Replace the dns autoscaling parameters with another testing parameters")
250 err = updateDNSScalingConfigMap(ctx, c, packDNSScalingConfigMap(configMapNames[provider], packLinearParams(&DNSParams1)))
251 framework.ExpectNoError(err)
252 ginkgo.By("Wait for kube-dns/coredns scaled to expected number")
253 getExpectReplicasLinear = getExpectReplicasFuncLinear(ctx, c, &DNSParams1)
254 err = waitForDNSReplicasSatisfied(ctx, c, getExpectReplicasLinear, DNSdefaultTimeout)
255 framework.ExpectNoError(err)
256 })
257 })
258
259
260 type DNSParamsLinear struct {
261 nodesPerReplica float64
262 coresPerReplica float64
263 min int
264 max int
265 }
266
267 type getExpectReplicasFunc func(c clientset.Interface) int
268
269 func getExpectReplicasFuncLinear(ctx context.Context, c clientset.Interface, params *DNSParamsLinear) getExpectReplicasFunc {
270 return func(c clientset.Interface) int {
271 var replicasFromNodes float64
272 var replicasFromCores float64
273 nodes, err := e2enode.GetReadyNodesIncludingTainted(ctx, c)
274 framework.ExpectNoError(err)
275 if params.nodesPerReplica > 0 {
276 replicasFromNodes = math.Ceil(float64(len(nodes.Items)) / params.nodesPerReplica)
277 }
278 if params.coresPerReplica > 0 {
279 replicasFromCores = math.Ceil(float64(getSchedulableCores(nodes.Items)) / params.coresPerReplica)
280 }
281 return int(math.Max(1.0, math.Max(replicasFromNodes, replicasFromCores)))
282 }
283 }
284
285 func getSchedulableCores(nodes []v1.Node) int64 {
286 var sc resource.Quantity
287 for _, node := range nodes {
288 if !node.Spec.Unschedulable {
289 sc.Add(node.Status.Allocatable[v1.ResourceCPU])
290 }
291 }
292 return sc.Value()
293 }
294
295 func detectDNSProvider(ctx context.Context, c clientset.Interface) (string, error) {
296 cm, err := c.CoreV1().ConfigMaps(metav1.NamespaceSystem).Get(ctx, "coredns-autoscaler", metav1.GetOptions{})
297 if cm != nil && err == nil {
298 return "coredns", nil
299 }
300
301 cm, err = c.CoreV1().ConfigMaps(metav1.NamespaceSystem).Get(ctx, "kube-dns-autoscaler", metav1.GetOptions{})
302 if cm != nil && err == nil {
303 return "kube-dns", nil
304 }
305
306 return "", fmt.Errorf("the cluster doesn't have kube-dns or coredns autoscaling configured")
307 }
308
309 func fetchDNSScalingConfigMap(ctx context.Context, c clientset.Interface, configMapName string) (*v1.ConfigMap, error) {
310 cm, err := c.CoreV1().ConfigMaps(metav1.NamespaceSystem).Get(ctx, configMapName, metav1.GetOptions{})
311 if err != nil {
312 return nil, err
313 }
314 return cm, nil
315 }
316
317 func deleteDNSScalingConfigMap(ctx context.Context, c clientset.Interface, configMapName string) error {
318 if err := c.CoreV1().ConfigMaps(metav1.NamespaceSystem).Delete(ctx, configMapName, metav1.DeleteOptions{}); err != nil {
319 return err
320 }
321 framework.Logf("DNS autoscaling ConfigMap deleted.")
322 return nil
323 }
324
325 func packLinearParams(params *DNSParamsLinear) map[string]string {
326 paramsMap := make(map[string]string)
327 paramsMap["linear"] = fmt.Sprintf("{\"nodesPerReplica\": %v,\"coresPerReplica\": %v,\"min\": %v,\"max\": %v}",
328 params.nodesPerReplica,
329 params.coresPerReplica,
330 params.min,
331 params.max)
332 return paramsMap
333 }
334
335 func packDNSScalingConfigMap(configMapName string, params map[string]string) *v1.ConfigMap {
336 configMap := v1.ConfigMap{}
337 configMap.ObjectMeta.Name = configMapName
338 configMap.ObjectMeta.Namespace = metav1.NamespaceSystem
339 configMap.Data = params
340 return &configMap
341 }
342
343 func updateDNSScalingConfigMap(ctx context.Context, c clientset.Interface, configMap *v1.ConfigMap) error {
344 _, err := c.CoreV1().ConfigMaps(metav1.NamespaceSystem).Update(ctx, configMap, metav1.UpdateOptions{})
345 if err != nil {
346 return err
347 }
348 framework.Logf("DNS autoscaling ConfigMap updated.")
349 return nil
350 }
351
352 func getDNSReplicas(ctx context.Context, c clientset.Interface) (int, error) {
353 label := labels.SelectorFromSet(labels.Set(map[string]string{ClusterAddonLabelKey: DNSLabelName}))
354 listOpts := metav1.ListOptions{LabelSelector: label.String()}
355 deployments, err := c.AppsV1().Deployments(metav1.NamespaceSystem).List(ctx, listOpts)
356 if err != nil {
357 return 0, err
358 }
359 if len(deployments.Items) != 1 {
360 return 0, fmt.Errorf("expected 1 DNS deployment, got %v", len(deployments.Items))
361 }
362
363 deployment := deployments.Items[0]
364 return int(*(deployment.Spec.Replicas)), nil
365 }
366
367 func deleteDNSAutoscalerPod(ctx context.Context, c clientset.Interface) error {
368 selector, _ := labels.Parse(fmt.Sprintf("%s in (kube-dns-autoscaler, coredns-autoscaler)", ClusterAddonLabelKey))
369 listOpts := metav1.ListOptions{LabelSelector: selector.String()}
370 pods, err := c.CoreV1().Pods(metav1.NamespaceSystem).List(ctx, listOpts)
371 if err != nil {
372 return err
373 }
374 if len(pods.Items) != 1 {
375 return fmt.Errorf("expected 1 autoscaler pod, got %v", len(pods.Items))
376 }
377
378 podName := pods.Items[0].Name
379 if err := c.CoreV1().Pods(metav1.NamespaceSystem).Delete(ctx, podName, metav1.DeleteOptions{}); err != nil {
380 return err
381 }
382 framework.Logf("DNS autoscaling pod %v deleted.", podName)
383 return nil
384 }
385
386 func waitForDNSReplicasSatisfied(ctx context.Context, c clientset.Interface, getExpected getExpectReplicasFunc, timeout time.Duration) (err error) {
387 var current int
388 var expected int
389 framework.Logf("Waiting up to %v for kube-dns to reach expected replicas", timeout)
390 condition := func(ctx context.Context) (bool, error) {
391 current, err = getDNSReplicas(ctx, c)
392 if err != nil {
393 return false, err
394 }
395 expected = getExpected(c)
396 if current != expected {
397 framework.Logf("Replicas not as expected: got %v, expected %v", current, expected)
398 return false, nil
399 }
400 return true, nil
401 }
402
403 if err = wait.PollUntilContextTimeout(ctx, 2*time.Second, timeout, false, condition); err != nil {
404 return fmt.Errorf("err waiting for DNS replicas to satisfy %v, got %v: %w", expected, current, err)
405 }
406 framework.Logf("kube-dns reaches expected replicas: %v", expected)
407 return nil
408 }
409
410 func waitForDNSConfigMapCreated(ctx context.Context, c clientset.Interface, timeout time.Duration, configMapName string) (configMap *v1.ConfigMap, err error) {
411 framework.Logf("Waiting up to %v for DNS autoscaling ConfigMap to be re-created", timeout)
412 condition := func(ctx context.Context) (bool, error) {
413 configMap, err = fetchDNSScalingConfigMap(ctx, c, configMapName)
414 if err != nil {
415 return false, nil
416 }
417 return true, nil
418 }
419
420 if err = wait.PollUntilContextTimeout(ctx, time.Second, timeout, false, condition); err != nil {
421 return nil, fmt.Errorf("err waiting for DNS autoscaling ConfigMap got re-created: %w", err)
422 }
423 return configMap, nil
424 }
425
View as plain text