1
16
17 package e2enode
18
19 import (
20 "context"
21 "fmt"
22 "time"
23
24 "github.com/onsi/ginkgo/v2"
25 "github.com/onsi/gomega"
26 "github.com/onsi/gomega/gstruct"
27 "github.com/onsi/gomega/types"
28
29 v1 "k8s.io/api/core/v1"
30 "k8s.io/apimachinery/pkg/api/resource"
31 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
32 kubeletconfig "k8s.io/kubernetes/pkg/kubelet/apis/config"
33 "k8s.io/kubernetes/pkg/kubelet/cm/cpumanager"
34 "k8s.io/kubernetes/test/e2e/feature"
35 "k8s.io/kubernetes/test/e2e/framework"
36 e2emetrics "k8s.io/kubernetes/test/e2e/framework/metrics"
37 e2epod "k8s.io/kubernetes/test/e2e/framework/pod"
38 e2eskipper "k8s.io/kubernetes/test/e2e/framework/skipper"
39 admissionapi "k8s.io/pod-security-admission/api"
40 "k8s.io/utils/cpuset"
41 )
42
43 var _ = SIGDescribe("CPU Manager Metrics", framework.WithSerial(), feature.CPUManager, func() {
44 f := framework.NewDefaultFramework("cpumanager-metrics")
45 f.NamespacePodSecurityLevel = admissionapi.LevelPrivileged
46
47 ginkgo.Context("when querying /metrics", func() {
48 var oldCfg *kubeletconfig.KubeletConfiguration
49 var testPod *v1.Pod
50 var smtLevel int
51
52 ginkgo.BeforeEach(func(ctx context.Context) {
53 var err error
54 if oldCfg == nil {
55 oldCfg, err = getCurrentKubeletConfig(ctx)
56 framework.ExpectNoError(err)
57 }
58
59 fullCPUsOnlyOpt := fmt.Sprintf("option=%s", cpumanager.FullPCPUsOnlyOption)
60 _, cpuAlloc, _ := getLocalNodeCPUDetails(ctx, f)
61 smtLevel = getSMTLevel()
62
63
64 if smtLevel < 2 {
65 e2eskipper.Skipf("Skipping CPU Manager %s tests since SMT disabled", fullCPUsOnlyOpt)
66 }
67
68
69 if cpuAlloc < int64(smtLevel*2) {
70 e2eskipper.Skipf("Skipping CPU Manager %s tests since the CPU capacity < 4", fullCPUsOnlyOpt)
71 }
72
73 framework.Logf("SMT level %d", smtLevel)
74
75
76
77 cpuPolicyOptions := map[string]string{
78 cpumanager.FullPCPUsOnlyOption: "true",
79 }
80 newCfg := configureCPUManagerInKubelet(oldCfg,
81 &cpuManagerKubeletArguments{
82 policyName: string(cpumanager.PolicyStatic),
83 reservedSystemCPUs: cpuset.New(0),
84 enableCPUManagerOptions: true,
85 options: cpuPolicyOptions,
86 },
87 )
88 updateKubeletConfig(ctx, f, newCfg, true)
89 })
90
91 ginkgo.AfterEach(func(ctx context.Context) {
92 if testPod != nil {
93 deletePodSyncByName(ctx, f, testPod.Name)
94 }
95 updateKubeletConfig(ctx, f, oldCfg, true)
96 })
97
98 ginkgo.It("should report zero pinning counters after a fresh restart", func(ctx context.Context) {
99
100
101 ginkgo.By("Checking the cpumanager metrics right after the kubelet restart, with no pods running")
102
103 matchResourceMetrics := gstruct.MatchKeys(gstruct.IgnoreExtras, gstruct.Keys{
104 "kubelet_cpu_manager_pinning_requests_total": gstruct.MatchAllElements(nodeID, gstruct.Elements{
105 "": timelessSample(0),
106 }),
107 "kubelet_cpu_manager_pinning_errors_total": gstruct.MatchAllElements(nodeID, gstruct.Elements{
108 "": timelessSample(0),
109 }),
110 })
111
112 ginkgo.By("Giving the Kubelet time to start up and produce metrics")
113 gomega.Eventually(ctx, getKubeletMetrics, 1*time.Minute, 15*time.Second).Should(matchResourceMetrics)
114 ginkgo.By("Ensuring the metrics match the expectations a few more times")
115 gomega.Consistently(ctx, getKubeletMetrics, 1*time.Minute, 15*time.Second).Should(matchResourceMetrics)
116 })
117
118 ginkgo.It("should report pinning failures when the cpumanager allocation is known to fail", func(ctx context.Context) {
119 ginkgo.By("Creating the test pod which will be rejected for SMTAlignmentError")
120 testPod = e2epod.NewPodClient(f).Create(ctx, makeGuaranteedCPUExclusiveSleeperPod("smt-align-err", 1))
121
122
123
124 ginkgo.By("Checking the cpumanager metrics right after the kubelet restart, with pod failed to admit")
125
126 matchResourceMetrics := gstruct.MatchKeys(gstruct.IgnoreExtras, gstruct.Keys{
127 "kubelet_cpu_manager_pinning_requests_total": gstruct.MatchAllElements(nodeID, gstruct.Elements{
128 "": timelessSample(1),
129 }),
130 "kubelet_cpu_manager_pinning_errors_total": gstruct.MatchAllElements(nodeID, gstruct.Elements{
131 "": timelessSample(1),
132 }),
133 })
134
135 ginkgo.By("Giving the Kubelet time to start up and produce metrics")
136 gomega.Eventually(ctx, getKubeletMetrics, 1*time.Minute, 15*time.Second).Should(matchResourceMetrics)
137 ginkgo.By("Ensuring the metrics match the expectations a few more times")
138 gomega.Consistently(ctx, getKubeletMetrics, 1*time.Minute, 15*time.Second).Should(matchResourceMetrics)
139 })
140
141 ginkgo.It("should not report any pinning failures when the cpumanager allocation is expected to succeed", func(ctx context.Context) {
142 ginkgo.By("Creating the test pod")
143 testPod = e2epod.NewPodClient(f).Create(ctx, makeGuaranteedCPUExclusiveSleeperPod("smt-align-ok", smtLevel))
144
145
146
147 ginkgo.By("Checking the cpumanager metrics right after the kubelet restart, with pod should be admitted")
148
149 matchResourceMetrics := gstruct.MatchKeys(gstruct.IgnoreExtras, gstruct.Keys{
150 "kubelet_cpu_manager_pinning_requests_total": gstruct.MatchAllElements(nodeID, gstruct.Elements{
151 "": timelessSample(1),
152 }),
153 "kubelet_cpu_manager_pinning_errors_total": gstruct.MatchAllElements(nodeID, gstruct.Elements{
154 "": timelessSample(0),
155 }),
156 })
157
158 ginkgo.By("Giving the Kubelet time to start up and produce metrics")
159 gomega.Eventually(ctx, getKubeletMetrics, 1*time.Minute, 15*time.Second).Should(matchResourceMetrics)
160 ginkgo.By("Ensuring the metrics match the expectations a few more times")
161 gomega.Consistently(ctx, getKubeletMetrics, 1*time.Minute, 15*time.Second).Should(matchResourceMetrics)
162 })
163 })
164 })
165
166 func getKubeletMetrics(ctx context.Context) (e2emetrics.KubeletMetrics, error) {
167 ginkgo.By("getting Kubelet metrics from the metrics API")
168 return e2emetrics.GrabKubeletMetricsWithoutProxy(ctx, nodeNameOrIP()+":10255", "/metrics")
169 }
170
171 func makeGuaranteedCPUExclusiveSleeperPod(name string, cpus int) *v1.Pod {
172 return &v1.Pod{
173 ObjectMeta: metav1.ObjectMeta{
174 Name: name + "-pod",
175 },
176 Spec: v1.PodSpec{
177 RestartPolicy: v1.RestartPolicyNever,
178 Containers: []v1.Container{
179 {
180 Name: name + "-cnt",
181 Image: busyboxImage,
182 Resources: v1.ResourceRequirements{
183 Requests: v1.ResourceList{
184 v1.ResourceCPU: resource.MustParse(fmt.Sprintf("%d", cpus)),
185 v1.ResourceMemory: resource.MustParse("64Mi"),
186 },
187 Limits: v1.ResourceList{
188 v1.ResourceCPU: resource.MustParse(fmt.Sprintf("%d", cpus)),
189 v1.ResourceMemory: resource.MustParse("64Mi"),
190 },
191 },
192 Command: []string{"sh", "-c", "sleep", "1d"},
193 },
194 },
195 },
196 }
197 }
198
199 func timelessSample(value interface{}) types.GomegaMatcher {
200 return gstruct.PointTo(gstruct.MatchAllFields(gstruct.Fields{
201
202 "Metric": gstruct.Ignore(),
203 "Value": gomega.BeNumerically("==", value),
204 "Timestamp": gstruct.Ignore(),
205 "Histogram": gstruct.Ignore(),
206 }))
207 }
208
View as plain text