...

Source file src/k8s.io/kubernetes/pkg/kubelet/cm/container_manager.go

Documentation: k8s.io/kubernetes/pkg/kubelet/cm

     1  /*
     2  Copyright 2015 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package cm
    18  
    19  import (
    20  	"fmt"
    21  	"strconv"
    22  	"strings"
    23  	"time"
    24  
    25  	"k8s.io/apimachinery/pkg/types"
    26  	"k8s.io/apimachinery/pkg/util/sets"
    27  
    28  	// TODO: Migrate kubelet to either use its own internal objects or client library.
    29  	v1 "k8s.io/api/core/v1"
    30  	internalapi "k8s.io/cri-api/pkg/apis"
    31  	podresourcesapi "k8s.io/kubelet/pkg/apis/podresources/v1"
    32  	kubeletconfig "k8s.io/kubernetes/pkg/kubelet/apis/config"
    33  	"k8s.io/kubernetes/pkg/kubelet/apis/podresources"
    34  	"k8s.io/kubernetes/pkg/kubelet/cm/devicemanager"
    35  	"k8s.io/kubernetes/pkg/kubelet/config"
    36  	kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
    37  	evictionapi "k8s.io/kubernetes/pkg/kubelet/eviction/api"
    38  	"k8s.io/kubernetes/pkg/kubelet/lifecycle"
    39  	"k8s.io/kubernetes/pkg/kubelet/pluginmanager/cache"
    40  	"k8s.io/kubernetes/pkg/kubelet/status"
    41  	schedulerframework "k8s.io/kubernetes/pkg/scheduler/framework"
    42  	"k8s.io/utils/cpuset"
    43  )
    44  
    45  type ActivePodsFunc func() []*v1.Pod
    46  
    47  // Manages the containers running on a machine.
    48  type ContainerManager interface {
    49  	// Runs the container manager's housekeeping.
    50  	// - Ensures that the Docker daemon is in a container.
    51  	// - Creates the system container where all non-containerized processes run.
    52  	Start(*v1.Node, ActivePodsFunc, config.SourcesReady, status.PodStatusProvider, internalapi.RuntimeService, bool) error
    53  
    54  	// SystemCgroupsLimit returns resources allocated to system cgroups in the machine.
    55  	// These cgroups include the system and Kubernetes services.
    56  	SystemCgroupsLimit() v1.ResourceList
    57  
    58  	// GetNodeConfig returns a NodeConfig that is being used by the container manager.
    59  	GetNodeConfig() NodeConfig
    60  
    61  	// Status returns internal Status.
    62  	Status() Status
    63  
    64  	// NewPodContainerManager is a factory method which returns a podContainerManager object
    65  	// Returns a noop implementation if qos cgroup hierarchy is not enabled
    66  	NewPodContainerManager() PodContainerManager
    67  
    68  	// GetMountedSubsystems returns the mounted cgroup subsystems on the node
    69  	GetMountedSubsystems() *CgroupSubsystems
    70  
    71  	// GetQOSContainersInfo returns the names of top level QoS containers
    72  	GetQOSContainersInfo() QOSContainersInfo
    73  
    74  	// GetNodeAllocatableReservation returns the amount of compute resources that have to be reserved from scheduling.
    75  	GetNodeAllocatableReservation() v1.ResourceList
    76  
    77  	// GetCapacity returns the amount of compute resources tracked by container manager available on the node.
    78  	GetCapacity(localStorageCapacityIsolation bool) v1.ResourceList
    79  
    80  	// GetDevicePluginResourceCapacity returns the node capacity (amount of total device plugin resources),
    81  	// node allocatable (amount of total healthy resources reported by device plugin),
    82  	// and inactive device plugin resources previously registered on the node.
    83  	GetDevicePluginResourceCapacity() (v1.ResourceList, v1.ResourceList, []string)
    84  
    85  	// UpdateQOSCgroups performs housekeeping updates to ensure that the top
    86  	// level QoS containers have their desired state in a thread-safe way
    87  	UpdateQOSCgroups() error
    88  
    89  	// GetResources returns RunContainerOptions with devices, mounts, and env fields populated for
    90  	// extended resources required by container.
    91  	GetResources(pod *v1.Pod, container *v1.Container) (*kubecontainer.RunContainerOptions, error)
    92  
    93  	// UpdatePluginResources calls Allocate of device plugin handler for potential
    94  	// requests for device plugin resources, and returns an error if fails.
    95  	// Otherwise, it updates allocatableResource in nodeInfo if necessary,
    96  	// to make sure it is at least equal to the pod's requested capacity for
    97  	// any registered device plugin resource
    98  	UpdatePluginResources(*schedulerframework.NodeInfo, *lifecycle.PodAdmitAttributes) error
    99  
   100  	InternalContainerLifecycle() InternalContainerLifecycle
   101  
   102  	// GetPodCgroupRoot returns the cgroup which contains all pods.
   103  	GetPodCgroupRoot() string
   104  
   105  	// GetPluginRegistrationHandler returns a plugin registration handler
   106  	// The pluginwatcher's Handlers allow to have a single module for handling
   107  	// registration.
   108  	GetPluginRegistrationHandler() cache.PluginHandler
   109  
   110  	// ShouldResetExtendedResourceCapacity returns whether or not the extended resources should be zeroed,
   111  	// due to node recreation.
   112  	ShouldResetExtendedResourceCapacity() bool
   113  
   114  	// GetAllocateResourcesPodAdmitHandler returns an instance of a PodAdmitHandler responsible for allocating pod resources.
   115  	GetAllocateResourcesPodAdmitHandler() lifecycle.PodAdmitHandler
   116  
   117  	// GetNodeAllocatableAbsolute returns the absolute value of Node Allocatable which is primarily useful for enforcement.
   118  	GetNodeAllocatableAbsolute() v1.ResourceList
   119  
   120  	// PrepareDynamicResource prepares dynamic pod resources
   121  	PrepareDynamicResources(*v1.Pod) error
   122  
   123  	// UnrepareDynamicResources unprepares dynamic pod resources
   124  	UnprepareDynamicResources(*v1.Pod) error
   125  
   126  	// PodMightNeedToUnprepareResources returns true if the pod with the given UID
   127  	// might need to unprepare resources.
   128  	PodMightNeedToUnprepareResources(UID types.UID) bool
   129  
   130  	// Implements the PodResources Provider API
   131  	podresources.CPUsProvider
   132  	podresources.DevicesProvider
   133  	podresources.MemoryProvider
   134  	podresources.DynamicResourcesProvider
   135  }
   136  
   137  type NodeConfig struct {
   138  	NodeName              types.NodeName
   139  	RuntimeCgroupsName    string
   140  	SystemCgroupsName     string
   141  	KubeletCgroupsName    string
   142  	KubeletOOMScoreAdj    int32
   143  	ContainerRuntime      string
   144  	CgroupsPerQOS         bool
   145  	CgroupRoot            string
   146  	CgroupDriver          string
   147  	KubeletRootDir        string
   148  	ProtectKernelDefaults bool
   149  	NodeAllocatableConfig
   150  	QOSReserved                             map[v1.ResourceName]int64
   151  	CPUManagerPolicy                        string
   152  	CPUManagerPolicyOptions                 map[string]string
   153  	TopologyManagerScope                    string
   154  	CPUManagerReconcilePeriod               time.Duration
   155  	ExperimentalMemoryManagerPolicy         string
   156  	ExperimentalMemoryManagerReservedMemory []kubeletconfig.MemoryReservation
   157  	PodPidsLimit                            int64
   158  	EnforceCPULimits                        bool
   159  	CPUCFSQuotaPeriod                       time.Duration
   160  	TopologyManagerPolicy                   string
   161  	TopologyManagerPolicyOptions            map[string]string
   162  }
   163  
   164  type NodeAllocatableConfig struct {
   165  	KubeReservedCgroupName   string
   166  	SystemReservedCgroupName string
   167  	ReservedSystemCPUs       cpuset.CPUSet
   168  	EnforceNodeAllocatable   sets.Set[string]
   169  	KubeReserved             v1.ResourceList
   170  	SystemReserved           v1.ResourceList
   171  	HardEvictionThresholds   []evictionapi.Threshold
   172  }
   173  
   174  type Status struct {
   175  	// Any soft requirements that were unsatisfied.
   176  	SoftRequirements error
   177  }
   178  
   179  // parsePercentage parses the percentage string to numeric value.
   180  func parsePercentage(v string) (int64, error) {
   181  	if !strings.HasSuffix(v, "%") {
   182  		return 0, fmt.Errorf("percentage expected, got '%s'", v)
   183  	}
   184  	percentage, err := strconv.ParseInt(strings.TrimRight(v, "%"), 10, 0)
   185  	if err != nil {
   186  		return 0, fmt.Errorf("invalid number in percentage '%s'", v)
   187  	}
   188  	if percentage < 0 || percentage > 100 {
   189  		return 0, fmt.Errorf("percentage must be between 0 and 100")
   190  	}
   191  	return percentage, nil
   192  }
   193  
   194  // ParseQOSReserved parses the --qos-reserved option
   195  func ParseQOSReserved(m map[string]string) (*map[v1.ResourceName]int64, error) {
   196  	reservations := make(map[v1.ResourceName]int64)
   197  	for k, v := range m {
   198  		switch v1.ResourceName(k) {
   199  		// Only memory resources are supported.
   200  		case v1.ResourceMemory:
   201  			q, err := parsePercentage(v)
   202  			if err != nil {
   203  				return nil, fmt.Errorf("failed to parse percentage %q for %q resource: %w", v, k, err)
   204  			}
   205  			reservations[v1.ResourceName(k)] = q
   206  		default:
   207  			return nil, fmt.Errorf("cannot reserve %q resource", k)
   208  		}
   209  	}
   210  	return &reservations, nil
   211  }
   212  
   213  func containerDevicesFromResourceDeviceInstances(devs devicemanager.ResourceDeviceInstances) []*podresourcesapi.ContainerDevices {
   214  	var respDevs []*podresourcesapi.ContainerDevices
   215  
   216  	for resourceName, resourceDevs := range devs {
   217  		for devID, dev := range resourceDevs {
   218  			topo := dev.GetTopology()
   219  			if topo == nil {
   220  				// Some device plugin do not report the topology information.
   221  				// This is legal, so we report the devices anyway,
   222  				// let the client decide what to do.
   223  				respDevs = append(respDevs, &podresourcesapi.ContainerDevices{
   224  					ResourceName: resourceName,
   225  					DeviceIds:    []string{devID},
   226  				})
   227  				continue
   228  			}
   229  
   230  			for _, node := range topo.GetNodes() {
   231  				respDevs = append(respDevs, &podresourcesapi.ContainerDevices{
   232  					ResourceName: resourceName,
   233  					DeviceIds:    []string{devID},
   234  					Topology: &podresourcesapi.TopologyInfo{
   235  						Nodes: []*podresourcesapi.NUMANode{
   236  							{
   237  								ID: node.GetID(),
   238  							},
   239  						},
   240  					},
   241  				})
   242  			}
   243  		}
   244  	}
   245  
   246  	return respDevs
   247  }
   248  

View as plain text