1 /* 2 Copyright 2016 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 //go:generate mockgen -source=types.go -destination=mock_threshold_notifier_test.go -package=eviction NotifierFactory,ThresholdNotifier 18 package eviction 19 20 import ( 21 "context" 22 "time" 23 24 v1 "k8s.io/api/core/v1" 25 "k8s.io/apimachinery/pkg/api/resource" 26 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 27 statsapi "k8s.io/kubelet/pkg/apis/stats/v1alpha1" 28 evictionapi "k8s.io/kubernetes/pkg/kubelet/eviction/api" 29 ) 30 31 // fsStatsType defines the types of filesystem stats to collect. 32 type fsStatsType string 33 34 const ( 35 // fsStatsLocalVolumeSource identifies stats for pod local volume sources. 36 fsStatsLocalVolumeSource fsStatsType = "localVolumeSource" 37 // fsStatsLogs identifies stats for pod logs. 38 fsStatsLogs fsStatsType = "logs" 39 // fsStatsRoot identifies stats for pod container writable layers. 40 fsStatsRoot fsStatsType = "root" 41 // fsStatsContainer identifies stats for pod container read-only layers 42 fsStatsImages fsStatsType = "images" 43 ) 44 45 // Config holds information about how eviction is configured. 46 type Config struct { 47 // PressureTransitionPeriod is duration the kubelet has to wait before transitioning out of a pressure condition. 48 PressureTransitionPeriod time.Duration 49 // Maximum allowed grace period (in seconds) to use when terminating pods in response to a soft eviction threshold being met. 50 MaxPodGracePeriodSeconds int64 51 // Thresholds define the set of conditions monitored to trigger eviction. 52 Thresholds []evictionapi.Threshold 53 // KernelMemcgNotification if true will integrate with the kernel memcg notification to determine if memory thresholds are crossed. 54 KernelMemcgNotification bool 55 // PodCgroupRoot is the cgroup which contains all pods. 56 PodCgroupRoot string 57 } 58 59 // Manager evaluates when an eviction threshold for node stability has been met on the node. 60 type Manager interface { 61 // Start starts the control loop to monitor eviction thresholds at specified interval. 62 Start(diskInfoProvider DiskInfoProvider, podFunc ActivePodsFunc, podCleanedUpFunc PodCleanedUpFunc, monitoringInterval time.Duration) 63 64 // IsUnderMemoryPressure returns true if the node is under memory pressure. 65 IsUnderMemoryPressure() bool 66 67 // IsUnderDiskPressure returns true if the node is under disk pressure. 68 IsUnderDiskPressure() bool 69 70 // IsUnderPIDPressure returns true if the node is under PID pressure. 71 IsUnderPIDPressure() bool 72 } 73 74 // DiskInfoProvider is responsible for informing the manager how disk is configured. 75 type DiskInfoProvider interface { 76 // HasDedicatedImageFs returns true if the imagefs is on a separate device from the rootfs. 77 HasDedicatedImageFs(ctx context.Context) (bool, error) 78 } 79 80 // ImageGC is responsible for performing garbage collection of unused images. 81 type ImageGC interface { 82 // DeleteUnusedImages deletes unused images. 83 DeleteUnusedImages(ctx context.Context) error 84 } 85 86 // ContainerGC is responsible for performing garbage collection of unused containers. 87 type ContainerGC interface { 88 // DeleteAllUnusedContainers deletes all unused containers, even those that belong to pods that are terminated, but not deleted. 89 DeleteAllUnusedContainers(ctx context.Context) error 90 // IsContainerFsSeparateFromImageFs checks if container filesystem is split from image filesystem. 91 IsContainerFsSeparateFromImageFs(ctx context.Context) bool 92 } 93 94 // KillPodFunc kills a pod. 95 // The pod status is updated, and then it is killed with the specified grace period. 96 // This function must block until either the pod is killed or an error is encountered. 97 // Arguments: 98 // pod - the pod to kill 99 // status - the desired status to associate with the pod (i.e. why its killed) 100 // gracePeriodOverride - the grace period override to use instead of what is on the pod spec 101 type KillPodFunc func(pod *v1.Pod, isEvicted bool, gracePeriodOverride *int64, fn func(*v1.PodStatus)) error 102 103 // MirrorPodFunc returns the mirror pod for the given static pod and 104 // whether it was known to the pod manager. 105 type MirrorPodFunc func(*v1.Pod) (*v1.Pod, bool) 106 107 // ActivePodsFunc returns pods bound to the kubelet that are active (i.e. non-terminal state) 108 type ActivePodsFunc func() []*v1.Pod 109 110 // PodCleanedUpFunc returns true if all resources associated with a pod have been reclaimed. 111 type PodCleanedUpFunc func(*v1.Pod) bool 112 113 // statsFunc returns the usage stats if known for an input pod. 114 type statsFunc func(pod *v1.Pod) (statsapi.PodStats, bool) 115 116 // rankFunc sorts the pods in eviction order 117 type rankFunc func(pods []*v1.Pod, stats statsFunc) 118 119 // signalObservation is the observed resource usage 120 type signalObservation struct { 121 // The resource capacity 122 capacity *resource.Quantity 123 // The available resource 124 available *resource.Quantity 125 // Time at which the observation was taken 126 time metav1.Time 127 } 128 129 // signalObservations maps a signal to an observed quantity 130 type signalObservations map[evictionapi.Signal]signalObservation 131 132 // thresholdsObservedAt maps a threshold to a time that it was observed 133 type thresholdsObservedAt map[evictionapi.Threshold]time.Time 134 135 // nodeConditionsObservedAt maps a node condition to a time that it was observed 136 type nodeConditionsObservedAt map[v1.NodeConditionType]time.Time 137 138 // nodeReclaimFunc is a function that knows how to reclaim a resource from the node without impacting pods. 139 type nodeReclaimFunc func(ctx context.Context) error 140 141 // nodeReclaimFuncs is an ordered list of nodeReclaimFunc 142 type nodeReclaimFuncs []nodeReclaimFunc 143 144 // CgroupNotifier generates events from cgroup events 145 type CgroupNotifier interface { 146 // Start causes the CgroupNotifier to begin notifying on the eventCh 147 Start(eventCh chan<- struct{}) 148 // Stop stops all processes and cleans up file descriptors associated with the CgroupNotifier 149 Stop() 150 } 151 152 // NotifierFactory creates CgroupNotifer 153 type NotifierFactory interface { 154 // NewCgroupNotifier creates a CgroupNotifier that creates events when the threshold 155 // on the attribute in the cgroup specified by the path is crossed. 156 NewCgroupNotifier(path, attribute string, threshold int64) (CgroupNotifier, error) 157 } 158 159 // ThresholdNotifier manages CgroupNotifiers based on memory eviction thresholds, and performs a function 160 // when memory eviction thresholds are crossed 161 type ThresholdNotifier interface { 162 // Start calls the notifier function when the CgroupNotifier notifies the ThresholdNotifier that an event occurred 163 Start() 164 // UpdateThreshold updates the memory cgroup threshold based on the metrics provided. 165 // Calling UpdateThreshold with recent metrics allows the ThresholdNotifier to trigger at the 166 // eviction threshold more accurately 167 UpdateThreshold(summary *statsapi.Summary) error 168 // Description produces a relevant string describing the Memory Threshold Notifier 169 Description() string 170 } 171