1
16
17 package e2enode
18
19 import (
20 "context"
21 "time"
22
23 "github.com/onsi/ginkgo/v2"
24 "github.com/onsi/gomega"
25 "github.com/onsi/gomega/gstruct"
26 "github.com/onsi/gomega/types"
27
28 v1 "k8s.io/api/core/v1"
29 kubeletconfig "k8s.io/kubernetes/pkg/kubelet/apis/config"
30 "k8s.io/kubernetes/pkg/kubelet/cm/topologymanager"
31 "k8s.io/kubernetes/test/e2e/feature"
32 "k8s.io/kubernetes/test/e2e/framework"
33 e2epod "k8s.io/kubernetes/test/e2e/framework/pod"
34 e2eskipper "k8s.io/kubernetes/test/e2e/framework/skipper"
35 admissionapi "k8s.io/pod-security-admission/api"
36 )
37
38 var _ = SIGDescribe("Topology Manager Metrics", framework.WithSerial(), feature.TopologyManager, func() {
39 f := framework.NewDefaultFramework("topologymanager-metrics")
40 f.NamespacePodSecurityLevel = admissionapi.LevelPrivileged
41
42 ginkgo.Context("when querying /metrics", func() {
43 var oldCfg *kubeletconfig.KubeletConfiguration
44 var testPod *v1.Pod
45 var cpusNumPerNUMA, coresNumPerNUMA, numaNodes, threadsPerCore int
46
47 ginkgo.BeforeEach(func(ctx context.Context) {
48 var err error
49 if oldCfg == nil {
50 oldCfg, err = getCurrentKubeletConfig(ctx)
51 framework.ExpectNoError(err)
52 }
53
54 numaNodes, coresNumPerNUMA, threadsPerCore = hostCheck()
55 cpusNumPerNUMA = coresNumPerNUMA * threadsPerCore
56
57
58
59
60
61 framework.Logf("numaNodes on the system %d", numaNodes)
62 framework.Logf("Cores per NUMA on the system %d", coresNumPerNUMA)
63 framework.Logf("Threads per Core on the system %d", threadsPerCore)
64 framework.Logf("CPUs per NUMA on the system %d", cpusNumPerNUMA)
65
66 policy := topologymanager.PolicySingleNumaNode
67 scope := podScopeTopology
68
69 newCfg, _ := configureTopologyManagerInKubelet(oldCfg, policy, scope, nil, 0)
70 updateKubeletConfig(ctx, f, newCfg, true)
71
72 })
73
74 ginkgo.AfterEach(func(ctx context.Context) {
75 if testPod != nil {
76 deletePodSyncByName(ctx, f, testPod.Name)
77 }
78 updateKubeletConfig(ctx, f, oldCfg, true)
79 })
80
81 ginkgo.It("should report zero admission counters after a fresh restart", func(ctx context.Context) {
82
83
84 ginkgo.By("Checking the topologymanager metrics right after the kubelet restart, with no pods running")
85
86 matchResourceMetrics := gstruct.MatchKeys(gstruct.IgnoreExtras, gstruct.Keys{
87 "kubelet_topology_manager_admission_requests_total": gstruct.MatchAllElements(nodeID, gstruct.Elements{
88 "": timelessSample(0),
89 }),
90 "kubelet_topology_manager_admission_errors_total": gstruct.MatchAllElements(nodeID, gstruct.Elements{
91 "": timelessSample(0),
92 }),
93 "kubelet_topology_manager_admission_duration_ms_count": gstruct.MatchElements(nodeID, gstruct.IgnoreExtras, gstruct.Elements{
94 "": timelessSample(0),
95 }),
96 })
97
98 ginkgo.By("Giving the Kubelet time to start up and produce metrics")
99 gomega.Eventually(ctx, getKubeletMetrics, 2*time.Minute, 10*time.Second).Should(matchResourceMetrics)
100 ginkgo.By("Ensuring the metrics match the expectations a few more times")
101 gomega.Consistently(ctx, getKubeletMetrics, 2*time.Minute, 10*time.Second).Should(matchResourceMetrics)
102 })
103
104 ginkgo.It("should report admission failures when the topology manager alignment is known to fail", func(ctx context.Context) {
105 ginkgo.By("Creating the test pod which will be rejected for TopologyAffinity")
106 testPod = e2epod.NewPodClient(f).Create(ctx, makeGuaranteedCPUExclusiveSleeperPod("topology-affinity-err", cpusNumPerNUMA+1))
107
108
109
110 ginkgo.By("Checking the topologymanager metrics right after the kubelet restart, with pod failed to admit")
111
112 matchResourceMetrics := gstruct.MatchKeys(gstruct.IgnoreExtras, gstruct.Keys{
113 "kubelet_topology_manager_admission_requests_total": gstruct.MatchAllElements(nodeID, gstruct.Elements{
114 "": timelessSample(1),
115 }),
116 "kubelet_topology_manager_admission_errors_total": gstruct.MatchAllElements(nodeID, gstruct.Elements{
117 "": timelessSample(1),
118 }),
119 "kubelet_topology_manager_admission_duration_ms_count": gstruct.MatchElements(nodeID, gstruct.IgnoreExtras, gstruct.Elements{
120 "": checkMetricValueGreaterThan(0),
121 }),
122 })
123
124 ginkgo.By("Giving the Kubelet time to start up and produce metrics")
125 gomega.Eventually(ctx, getKubeletMetrics, 2*time.Minute, 10*time.Second).Should(matchResourceMetrics)
126 ginkgo.By("Ensuring the metrics match the expectations a few more times")
127 gomega.Consistently(ctx, getKubeletMetrics, 2*time.Minute, 10*time.Second).Should(matchResourceMetrics)
128 })
129
130 ginkgo.It("should not report any admission failures when the topology manager alignment is expected to succeed", func(ctx context.Context) {
131 ginkgo.By("Creating the test pod")
132 testPod = e2epod.NewPodClient(f).Create(ctx, makeGuaranteedCPUExclusiveSleeperPod("topology-alignment-ok", cpusNumPerNUMA))
133
134
135
136 ginkgo.By("Checking the topologymanager metrics right after the kubelet restart, with pod should be admitted")
137
138 matchResourceMetrics := gstruct.MatchKeys(gstruct.IgnoreExtras, gstruct.Keys{
139 "kubelet_topology_manager_admission_requests_total": gstruct.MatchAllElements(nodeID, gstruct.Elements{
140 "": timelessSample(1),
141 }),
142 "kubelet_topology_manager_admission_errors_total": gstruct.MatchAllElements(nodeID, gstruct.Elements{
143 "": timelessSample(0),
144 }),
145 "kubelet_topology_manager_admission_duration_ms_count": gstruct.MatchElements(nodeID, gstruct.IgnoreExtras, gstruct.Elements{
146 "": checkMetricValueGreaterThan(0),
147 }),
148 })
149
150 ginkgo.By("Giving the Kubelet time to start up and produce metrics")
151 gomega.Eventually(ctx, getKubeletMetrics, 2*time.Minute, 10*time.Second).Should(matchResourceMetrics)
152 ginkgo.By("Ensuring the metrics match the expectations a few more times")
153 gomega.Consistently(ctx, getKubeletMetrics, 2*time.Minute, 10*time.Second).Should(matchResourceMetrics)
154 })
155 })
156 })
157
158 func hostCheck() (int, int, int) {
159
160
161
162 numaNodes := detectNUMANodes()
163 if numaNodes < minNumaNodes {
164 e2eskipper.Skipf("this test is intended to be run on a multi-node NUMA system")
165 }
166
167 coreCount := detectCoresPerSocket()
168 if coreCount < minCoreCount {
169 e2eskipper.Skipf("this test is intended to be run on a system with at least %d cores per socket", minCoreCount)
170 }
171
172 threadsPerCore := detectThreadPerCore()
173
174 return numaNodes, coreCount, threadsPerCore
175 }
176
177 func checkMetricValueGreaterThan(value interface{}) types.GomegaMatcher {
178 return gstruct.PointTo(gstruct.MatchAllFields(gstruct.Fields{
179
180 "Metric": gstruct.Ignore(),
181 "Value": gomega.BeNumerically(">", value),
182 "Timestamp": gstruct.Ignore(),
183 "Histogram": gstruct.Ignore(),
184 }))
185 }
186
View as plain text