/* Copyright 2015 The Kubernetes Authors. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ package network import ( "context" "fmt" "strings" "time" "github.com/onsi/ginkgo/v2" v1 "k8s.io/api/core/v1" discoveryv1 "k8s.io/api/discovery/v1" "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/intstr" "k8s.io/apimachinery/pkg/util/wait" clientset "k8s.io/client-go/kubernetes" "k8s.io/kubernetes/test/e2e/feature" "k8s.io/kubernetes/test/e2e/framework" e2edaemonset "k8s.io/kubernetes/test/e2e/framework/daemonset" e2epod "k8s.io/kubernetes/test/e2e/framework/pod" e2eservice "k8s.io/kubernetes/test/e2e/framework/service" e2eskipper "k8s.io/kubernetes/test/e2e/framework/skipper" "k8s.io/kubernetes/test/e2e/network/common" imageutils "k8s.io/kubernetes/test/utils/image" admissionapi "k8s.io/pod-security-admission/api" ) var _ = common.SIGDescribe(feature.TopologyHints, func() { f := framework.NewDefaultFramework("topology-hints") f.NamespacePodSecurityLevel = admissionapi.LevelPrivileged // filled in BeforeEach var c clientset.Interface ginkgo.BeforeEach(func(ctx context.Context) { c = f.ClientSet e2eskipper.SkipUnlessMultizone(ctx, c) }) ginkgo.It("should distribute endpoints evenly", func(ctx context.Context) { portNum := int32(9376) thLabels := map[string]string{labelKey: clientLabelValue} img := imageutils.GetE2EImage(imageutils.Agnhost) ports := []v1.ContainerPort{{ContainerPort: int32(portNum)}} dsConf := e2edaemonset.NewDaemonSet("topology-serve-hostname", img, thLabels, nil, nil, ports, "serve-hostname") ds, err := c.AppsV1().DaemonSets(f.Namespace.Name).Create(ctx, dsConf, metav1.CreateOptions{}) framework.ExpectNoError(err, "error creating DaemonSet") svc := createServiceReportErr(ctx, c, f.Namespace.Name, &v1.Service{ ObjectMeta: metav1.ObjectMeta{ Name: "topology-hints", Annotations: map[string]string{ v1.AnnotationTopologyMode: "Auto", }, }, Spec: v1.ServiceSpec{ Selector: thLabels, PublishNotReadyAddresses: true, Ports: []v1.ServicePort{{ Name: "example", Port: 80, TargetPort: intstr.FromInt32(portNum), Protocol: v1.ProtocolTCP, }}, }, }) err = wait.PollWithContext(ctx, 5*time.Second, framework.PodStartTimeout, func(ctx context.Context) (bool, error) { return e2edaemonset.CheckRunningOnAllNodes(ctx, f, ds) }) framework.ExpectNoError(err, "timed out waiting for DaemonSets to be ready") // All Nodes should have same allocatable CPUs. If not, then skip the test. schedulableNodes := map[string]*v1.Node{} for _, nodeName := range e2edaemonset.SchedulableNodes(ctx, c, ds) { schedulableNodes[nodeName] = nil } nodeList, err := c.CoreV1().Nodes().List(ctx, metav1.ListOptions{}) framework.ExpectNoError(err, "Error when listing all Nodes") var lastNodeCPU resource.Quantity firstNode := true for i := range nodeList.Items { node := nodeList.Items[i] if _, ok := schedulableNodes[node.Name]; !ok { continue } schedulableNodes[node.Name] = &node nodeCPU, found := node.Status.Allocatable[v1.ResourceCPU] if !found { framework.Failf("Error when getting allocatable CPU of Node '%s'", node.Name) } if firstNode { lastNodeCPU = nodeCPU firstNode = false } else if !nodeCPU.Equal(lastNodeCPU) { e2eskipper.Skipf("Expected Nodes to have equivalent allocatable CPUs, but Node '%s' is different from the previous one. %d not equals %d", node.Name, nodeCPU.Value(), lastNodeCPU.Value()) } } framework.Logf("Waiting for %d endpoints to be tracked in EndpointSlices", len(schedulableNodes)) var finalSlices []discoveryv1.EndpointSlice err = wait.PollWithContext(ctx, 5*time.Second, 3*time.Minute, func(ctx context.Context) (bool, error) { slices, listErr := c.DiscoveryV1().EndpointSlices(f.Namespace.Name).List(ctx, metav1.ListOptions{LabelSelector: fmt.Sprintf("%s=%s", discoveryv1.LabelServiceName, svc.Name)}) if listErr != nil { return false, listErr } numEndpoints := 0 for _, slice := range slices.Items { numEndpoints += len(slice.Endpoints) } if len(schedulableNodes) > numEndpoints { framework.Logf("Expected %d endpoints, got %d", len(schedulableNodes), numEndpoints) return false, nil } finalSlices = slices.Items return true, nil }) framework.ExpectNoError(err, "timed out waiting for EndpointSlices to be ready") ginkgo.By("having hints set for each endpoint") for _, slice := range finalSlices { for _, ep := range slice.Endpoints { if ep.Zone == nil { framework.Failf("Expected endpoint in %s to have zone: %v", slice.Name, ep) } if ep.Hints == nil || len(ep.Hints.ForZones) == 0 { framework.Failf("Expected endpoint in %s to have hints: %v", slice.Name, ep) } if len(ep.Hints.ForZones) > 1 { framework.Failf("Expected endpoint in %s to have exactly 1 zone hint, got %d: %v", slice.Name, len(ep.Hints.ForZones), ep) } if *ep.Zone != ep.Hints.ForZones[0].Name { framework.Failf("Expected endpoint in %s to have same zone hint, got %s: %v", slice.Name, *ep.Zone, ep) } } } nodesByZone := map[string]string{} zonesWithNode := map[string]string{} for _, node := range schedulableNodes { if zone, ok := node.Labels[v1.LabelTopologyZone]; ok { nodesByZone[node.Name] = zone zonesWithNode[zone] = node.Name } } podList, err := c.CoreV1().Pods(f.Namespace.Name).List(ctx, metav1.ListOptions{}) framework.ExpectNoError(err) podsByZone := map[string]string{} for _, pod := range podList.Items { if zone, ok := nodesByZone[pod.Spec.NodeName]; ok { podsByZone[pod.Name] = zone } } ginkgo.By("keeping requests in the same zone") for fromZone, nodeName := range zonesWithNode { ginkgo.By("creating a client pod for probing the service from " + fromZone) podName := "curl-from-" + fromZone clientPod := e2epod.NewAgnhostPod(f.Namespace.Name, podName, nil, nil, nil, "serve-hostname") nodeSelection := e2epod.NodeSelection{Name: nodeName} e2epod.SetNodeSelection(&clientPod.Spec, nodeSelection) cmd := fmt.Sprintf(`date; for i in $(seq 1 3000); do sleep 1; echo "Date: $(date) Try: ${i}"; curl -q -s --connect-timeout 2 http://%s:80/ ; echo; done`, svc.Name) clientPod.Spec.Containers[0].Command = []string{"/bin/sh", "-c", cmd} clientPod.Spec.Containers[0].Name = clientPod.Name e2epod.NewPodClient(f).CreateSync(ctx, clientPod) framework.Logf("Ensuring that requests from %s pod on %s node stay in %s zone", clientPod.Name, nodeName, fromZone) var logs string if pollErr := wait.PollWithContext(ctx, 5*time.Second, e2eservice.KubeProxyLagTimeout, func(ctx context.Context) (bool, error) { var err error logs, err = e2epod.GetPodLogs(ctx, c, f.Namespace.Name, clientPod.Name, clientPod.Name) framework.ExpectNoError(err) framework.Logf("Pod client logs: %s", logs) logLines := strings.Split(logs, "\n") if len(logLines) < 6 { framework.Logf("only %d log lines, waiting for at least 6", len(logLines)) return false, nil } consecutiveSameZone := 0 for i := len(logLines) - 1; i > 0; i-- { if logLines[i] == "" || strings.HasPrefix(logLines[i], "Date:") { continue } destZone, ok := podsByZone[logLines[i]] if !ok { framework.Logf("could not determine dest zone from log line: %s", logLines[i]) return false, nil } if fromZone != destZone { framework.Logf("expected request from %s to stay in %s zone, delivered to %s zone", clientPod.Name, fromZone, destZone) return false, nil } consecutiveSameZone++ if consecutiveSameZone >= 5 { return true, nil } } return false, nil }); pollErr != nil { framework.Failf("expected 5 consecutive requests from %s to stay in zone %s within %v, stdout: %v", clientPod.Name, fromZone, e2eservice.KubeProxyLagTimeout, logs) } } }) })