1
16
17 package cm
18
19 import (
20 "errors"
21 "fmt"
22 "os"
23 "path"
24 "strings"
25
26 libcontainercgroups "github.com/opencontainers/runc/libcontainer/cgroups"
27 v1 "k8s.io/api/core/v1"
28 "k8s.io/apimachinery/pkg/types"
29 utilerrors "k8s.io/apimachinery/pkg/util/errors"
30 utilfeature "k8s.io/apiserver/pkg/util/feature"
31 "k8s.io/klog/v2"
32 v1qos "k8s.io/kubernetes/pkg/apis/core/v1/helper/qos"
33 kubefeatures "k8s.io/kubernetes/pkg/features"
34 )
35
36 const (
37 podCgroupNamePrefix = "pod"
38 )
39
40
41
42
43 type podContainerManagerImpl struct {
44
45 qosContainersInfo QOSContainersInfo
46
47 subsystems *CgroupSubsystems
48
49
50 cgroupManager CgroupManager
51
52 podPidsLimit int64
53
54 enforceCPULimits bool
55
56
57 cpuCFSQuotaPeriod uint64
58 }
59
60
61 var _ PodContainerManager = &podContainerManagerImpl{}
62
63
64 func (m *podContainerManagerImpl) Exists(pod *v1.Pod) bool {
65 podContainerName, _ := m.GetPodContainerName(pod)
66 return m.cgroupManager.Exists(podContainerName)
67 }
68
69
70
71
72 func (m *podContainerManagerImpl) EnsureExists(pod *v1.Pod) error {
73
74 alreadyExists := m.Exists(pod)
75 if !alreadyExists {
76 enforceMemoryQoS := false
77 if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.MemoryQoS) &&
78 libcontainercgroups.IsCgroup2UnifiedMode() {
79 enforceMemoryQoS = true
80 }
81
82 podContainerName, _ := m.GetPodContainerName(pod)
83 containerConfig := &CgroupConfig{
84 Name: podContainerName,
85 ResourceParameters: ResourceConfigForPod(pod, m.enforceCPULimits, m.cpuCFSQuotaPeriod, enforceMemoryQoS),
86 }
87 if m.podPidsLimit > 0 {
88 containerConfig.ResourceParameters.PidsLimit = &m.podPidsLimit
89 }
90 if enforceMemoryQoS {
91 klog.V(4).InfoS("MemoryQoS config for pod", "pod", klog.KObj(pod), "unified", containerConfig.ResourceParameters.Unified)
92 }
93 if err := m.cgroupManager.Create(containerConfig); err != nil {
94 return fmt.Errorf("failed to create container for %v : %v", podContainerName, err)
95 }
96 }
97 return nil
98 }
99
100
101 func (m *podContainerManagerImpl) GetPodContainerName(pod *v1.Pod) (CgroupName, string) {
102 podQOS := v1qos.GetPodQOS(pod)
103
104 var parentContainer CgroupName
105 switch podQOS {
106 case v1.PodQOSGuaranteed:
107 parentContainer = m.qosContainersInfo.Guaranteed
108 case v1.PodQOSBurstable:
109 parentContainer = m.qosContainersInfo.Burstable
110 case v1.PodQOSBestEffort:
111 parentContainer = m.qosContainersInfo.BestEffort
112 }
113 podContainer := GetPodCgroupNameSuffix(pod.UID)
114
115
116 cgroupName := NewCgroupName(parentContainer, podContainer)
117
118 cgroupfsName := m.cgroupManager.Name(cgroupName)
119
120 return cgroupName, cgroupfsName
121 }
122
123 func (m *podContainerManagerImpl) GetPodCgroupMemoryUsage(pod *v1.Pod) (uint64, error) {
124 podCgroupName, _ := m.GetPodContainerName(pod)
125 memUsage, err := m.cgroupManager.MemoryUsage(podCgroupName)
126 if err != nil {
127 return 0, err
128 }
129 return uint64(memUsage), nil
130 }
131
132 func (m *podContainerManagerImpl) GetPodCgroupConfig(pod *v1.Pod, resource v1.ResourceName) (*ResourceConfig, error) {
133 podCgroupName, _ := m.GetPodContainerName(pod)
134 return m.cgroupManager.GetCgroupConfig(podCgroupName, resource)
135 }
136
137 func (m *podContainerManagerImpl) SetPodCgroupConfig(pod *v1.Pod, resource v1.ResourceName, resourceConfig *ResourceConfig) error {
138 podCgroupName, _ := m.GetPodContainerName(pod)
139 return m.cgroupManager.SetCgroupConfig(podCgroupName, resource, resourceConfig)
140 }
141
142
143 func (m *podContainerManagerImpl) killOnePid(pid int) error {
144
145
146 p, _ := os.FindProcess(pid)
147 if err := p.Kill(); err != nil {
148
149 if errors.Is(err, os.ErrProcessDone) {
150 klog.V(3).InfoS("Process no longer exists", "pid", pid)
151 return nil
152 }
153 return err
154 }
155 return nil
156 }
157
158
159
160 func (m *podContainerManagerImpl) tryKillingCgroupProcesses(podCgroup CgroupName) error {
161 pidsToKill := m.cgroupManager.Pids(podCgroup)
162
163 if len(pidsToKill) == 0 {
164 return nil
165 }
166
167 var errlist []error
168
169
170 removed := map[int]bool{}
171 for i := 0; i < 5; i++ {
172 if i != 0 {
173 klog.V(3).InfoS("Attempt failed to kill all unwanted process from cgroup, retrying", "attempt", i, "cgroupName", podCgroup)
174 }
175 errlist = []error{}
176 for _, pid := range pidsToKill {
177 if _, ok := removed[pid]; ok {
178 continue
179 }
180 klog.V(3).InfoS("Attempting to kill process from cgroup", "pid", pid, "cgroupName", podCgroup)
181 if err := m.killOnePid(pid); err != nil {
182 klog.V(3).InfoS("Failed to kill process from cgroup", "pid", pid, "cgroupName", podCgroup, "err", err)
183 errlist = append(errlist, err)
184 } else {
185 removed[pid] = true
186 }
187 }
188 if len(errlist) == 0 {
189 klog.V(3).InfoS("Successfully killed all unwanted processes from cgroup", "cgroupName", podCgroup)
190 return nil
191 }
192 }
193 return utilerrors.NewAggregate(errlist)
194 }
195
196
197 func (m *podContainerManagerImpl) Destroy(podCgroup CgroupName) error {
198
199 if err := m.tryKillingCgroupProcesses(podCgroup); err != nil {
200 klog.InfoS("Failed to kill all the processes attached to cgroup", "cgroupName", podCgroup, "err", err)
201 return fmt.Errorf("failed to kill all the processes attached to the %v cgroups : %v", podCgroup, err)
202 }
203
204
205 containerConfig := &CgroupConfig{
206 Name: podCgroup,
207 ResourceParameters: &ResourceConfig{},
208 }
209 if err := m.cgroupManager.Destroy(containerConfig); err != nil {
210 klog.InfoS("Failed to delete cgroup paths", "cgroupName", podCgroup, "err", err)
211 return fmt.Errorf("failed to delete cgroup paths for %v : %v", podCgroup, err)
212 }
213 return nil
214 }
215
216
217 func (m *podContainerManagerImpl) ReduceCPULimits(podCgroup CgroupName) error {
218 return m.cgroupManager.ReduceCPULimits(podCgroup)
219 }
220
221
222 func (m *podContainerManagerImpl) IsPodCgroup(cgroupfs string) (bool, types.UID) {
223
224 cgroupName := m.cgroupManager.CgroupName(cgroupfs)
225 qosContainersList := [3]CgroupName{m.qosContainersInfo.BestEffort, m.qosContainersInfo.Burstable, m.qosContainersInfo.Guaranteed}
226 basePath := ""
227 for _, qosContainerName := range qosContainersList {
228
229 if len(cgroupName) == len(qosContainerName)+1 {
230 basePath = cgroupName[len(qosContainerName)]
231 }
232 }
233 if basePath == "" {
234 return false, types.UID("")
235 }
236 if !strings.HasPrefix(basePath, podCgroupNamePrefix) {
237 return false, types.UID("")
238 }
239 parts := strings.Split(basePath, podCgroupNamePrefix)
240 if len(parts) != 2 {
241 return false, types.UID("")
242 }
243 return true, types.UID(parts[1])
244 }
245
246
247
248 func (m *podContainerManagerImpl) GetAllPodsFromCgroups() (map[types.UID]CgroupName, error) {
249
250 foundPods := make(map[types.UID]CgroupName)
251 qosContainersList := [3]CgroupName{m.qosContainersInfo.BestEffort, m.qosContainersInfo.Burstable, m.qosContainersInfo.Guaranteed}
252
253
254
255
256 for _, val := range m.subsystems.MountPoints {
257 for _, qosContainerName := range qosContainersList {
258
259 qcConversion := m.cgroupManager.Name(qosContainerName)
260 qc := path.Join(val, qcConversion)
261 dirInfo, err := os.ReadDir(qc)
262 if err != nil {
263 if os.IsNotExist(err) {
264 continue
265 }
266 return nil, fmt.Errorf("failed to read the cgroup directory %v : %v", qc, err)
267 }
268 for i := range dirInfo {
269
270 if !dirInfo[i].IsDir() {
271 continue
272 }
273
274
275
276
277 cgroupfsPath := path.Join(qcConversion, dirInfo[i].Name())
278 internalPath := m.cgroupManager.CgroupName(cgroupfsPath)
279
280
281 basePath := internalPath[len(internalPath)-1]
282 if !strings.Contains(basePath, podCgroupNamePrefix) {
283 continue
284 }
285
286 parts := strings.Split(basePath, podCgroupNamePrefix)
287
288 if len(parts) != 2 {
289 klog.InfoS("Pod cgroup manager ignored unexpected cgroup because it is not a pod", "path", cgroupfsPath)
290 continue
291 }
292 podUID := parts[1]
293 foundPods[types.UID(podUID)] = internalPath
294 }
295 }
296 }
297 return foundPods, nil
298 }
299
300
301
302
303
304
305 type podContainerManagerNoop struct {
306 cgroupRoot CgroupName
307 }
308
309
310 var _ PodContainerManager = &podContainerManagerNoop{}
311
312 func (m *podContainerManagerNoop) Exists(_ *v1.Pod) bool {
313 return true
314 }
315
316 func (m *podContainerManagerNoop) EnsureExists(_ *v1.Pod) error {
317 return nil
318 }
319
320 func (m *podContainerManagerNoop) GetPodContainerName(_ *v1.Pod) (CgroupName, string) {
321 return m.cgroupRoot, ""
322 }
323
324 func (m *podContainerManagerNoop) GetPodContainerNameForDriver(_ *v1.Pod) string {
325 return ""
326 }
327
328
329 func (m *podContainerManagerNoop) Destroy(_ CgroupName) error {
330 return nil
331 }
332
333 func (m *podContainerManagerNoop) ReduceCPULimits(_ CgroupName) error {
334 return nil
335 }
336
337 func (m *podContainerManagerNoop) GetAllPodsFromCgroups() (map[types.UID]CgroupName, error) {
338 return nil, nil
339 }
340
341 func (m *podContainerManagerNoop) IsPodCgroup(cgroupfs string) (bool, types.UID) {
342 return false, types.UID("")
343 }
344
345 func (m *podContainerManagerNoop) GetPodCgroupMemoryUsage(_ *v1.Pod) (uint64, error) {
346 return 0, nil
347 }
348
349 func (m *podContainerManagerNoop) GetPodCgroupConfig(_ *v1.Pod, _ v1.ResourceName) (*ResourceConfig, error) {
350 return nil, nil
351 }
352
353 func (m *podContainerManagerNoop) SetPodCgroupConfig(_ *v1.Pod, _ v1.ResourceName, _ *ResourceConfig) error {
354 return nil
355 }
356
View as plain text