...

Source file src/k8s.io/kubernetes/pkg/kubelet/winstats/perfcounter_nodestats.go

Documentation: k8s.io/kubernetes/pkg/kubelet/winstats

     1  //go:build windows
     2  // +build windows
     3  
     4  /*
     5  Copyright 2017 The Kubernetes Authors.
     6  
     7  Licensed under the Apache License, Version 2.0 (the "License");
     8  you may not use this file except in compliance with the License.
     9  You may obtain a copy of the License at
    10  
    11      http://www.apache.org/licenses/LICENSE-2.0
    12  
    13  Unless required by applicable law or agreed to in writing, software
    14  distributed under the License is distributed on an "AS IS" BASIS,
    15  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    16  See the License for the specific language governing permissions and
    17  limitations under the License.
    18  */
    19  
    20  package winstats
    21  
    22  import (
    23  	"errors"
    24  	"fmt"
    25  	"os"
    26  	"os/exec"
    27  	"runtime"
    28  	"strconv"
    29  	"strings"
    30  	"sync"
    31  	"syscall"
    32  	"time"
    33  	"unsafe"
    34  
    35  	cadvisorapi "github.com/google/cadvisor/info/v1"
    36  	"golang.org/x/sys/windows"
    37  	"golang.org/x/sys/windows/registry"
    38  	"k8s.io/apimachinery/pkg/util/wait"
    39  	"k8s.io/klog/v2"
    40  )
    41  
    42  const (
    43  	bootIdRegistry = `SYSTEM\CurrentControlSet\Control\Session Manager\Memory Management\PrefetchParameters`
    44  	bootIdKey      = `BootId`
    45  )
    46  
    47  // MemoryStatusEx is the same as Windows structure MEMORYSTATUSEX
    48  // https://msdn.microsoft.com/en-us/library/windows/desktop/aa366770(v=vs.85).aspx
    49  type MemoryStatusEx struct {
    50  	Length               uint32
    51  	MemoryLoad           uint32
    52  	TotalPhys            uint64
    53  	AvailPhys            uint64
    54  	TotalPageFile        uint64
    55  	AvailPageFile        uint64
    56  	TotalVirtual         uint64
    57  	AvailVirtual         uint64
    58  	AvailExtendedVirtual uint64
    59  }
    60  
    61  var (
    62  	modkernel32                 = windows.NewLazySystemDLL("kernel32.dll")
    63  	procGlobalMemoryStatusEx    = modkernel32.NewProc("GlobalMemoryStatusEx")
    64  	procGetActiveProcessorCount = modkernel32.NewProc("GetActiveProcessorCount")
    65  )
    66  
    67  const allProcessorGroups = 0xFFFF
    68  
    69  // NewPerfCounterClient creates a client using perf counters
    70  func NewPerfCounterClient() (Client, error) {
    71  	// Initialize the cache
    72  	initCache := cpuUsageCoreNanoSecondsCache{0, 0}
    73  	return newClient(&perfCounterNodeStatsClient{
    74  		cpuUsageCoreNanoSecondsCache: initCache,
    75  	})
    76  }
    77  
    78  // perfCounterNodeStatsClient is a client that provides Windows Stats via PerfCounters
    79  type perfCounterNodeStatsClient struct {
    80  	nodeMetrics
    81  	mu sync.RWMutex // mu protects nodeMetrics
    82  	nodeInfo
    83  	// cpuUsageCoreNanoSecondsCache caches the cpu usage for nodes.
    84  	cpuUsageCoreNanoSecondsCache
    85  }
    86  
    87  func (p *perfCounterNodeStatsClient) startMonitoring() error {
    88  	memory, err := getPhysicallyInstalledSystemMemoryBytes()
    89  	if err != nil {
    90  		return err
    91  	}
    92  
    93  	osInfo, err := GetOSInfo()
    94  	if err != nil {
    95  		return err
    96  	}
    97  
    98  	p.nodeInfo = nodeInfo{
    99  		kernelVersion:               osInfo.GetPatchVersion(),
   100  		osImageVersion:              osInfo.ProductName,
   101  		memoryPhysicalCapacityBytes: memory,
   102  		startTime:                   time.Now(),
   103  	}
   104  
   105  	cpuCounter, err := newPerfCounter(cpuQuery)
   106  	if err != nil {
   107  		return err
   108  	}
   109  
   110  	memWorkingSetCounter, err := newPerfCounter(memoryPrivWorkingSetQuery)
   111  	if err != nil {
   112  		return err
   113  	}
   114  
   115  	memCommittedBytesCounter, err := newPerfCounter(memoryCommittedBytesQuery)
   116  	if err != nil {
   117  		return err
   118  	}
   119  
   120  	networkAdapterCounter, err := newNetworkCounters()
   121  	if err != nil {
   122  		return err
   123  	}
   124  
   125  	go wait.Forever(func() {
   126  		p.collectMetricsData(cpuCounter, memWorkingSetCounter, memCommittedBytesCounter, networkAdapterCounter)
   127  	}, perfCounterUpdatePeriod)
   128  
   129  	// Cache the CPU usage every defaultCachePeriod
   130  	go wait.Forever(func() {
   131  		newValue := p.nodeMetrics.cpuUsageCoreNanoSeconds
   132  		p.mu.Lock()
   133  		defer p.mu.Unlock()
   134  		p.cpuUsageCoreNanoSecondsCache = cpuUsageCoreNanoSecondsCache{
   135  			previousValue: p.cpuUsageCoreNanoSecondsCache.latestValue,
   136  			latestValue:   newValue,
   137  		}
   138  	}, defaultCachePeriod)
   139  
   140  	return nil
   141  }
   142  
   143  func (p *perfCounterNodeStatsClient) getMachineInfo() (*cadvisorapi.MachineInfo, error) {
   144  	hostname, err := os.Hostname()
   145  	if err != nil {
   146  		return nil, err
   147  	}
   148  
   149  	systemUUID, err := getSystemUUID()
   150  	if err != nil {
   151  		return nil, err
   152  	}
   153  
   154  	bootId, err := getBootID()
   155  	if err != nil {
   156  		return nil, err
   157  	}
   158  
   159  	return &cadvisorapi.MachineInfo{
   160  		NumCores:       ProcessorCount(),
   161  		MemoryCapacity: p.nodeInfo.memoryPhysicalCapacityBytes,
   162  		MachineID:      hostname,
   163  		SystemUUID:     systemUUID,
   164  		BootID:         bootId,
   165  	}, nil
   166  }
   167  
   168  // runtime.NumCPU() will only return the information for a single Processor Group.
   169  // Since a single group can only hold 64 logical processors, this
   170  // means when there are more they will be divided into multiple groups.
   171  // For the above reason, procGetActiveProcessorCount is used to get the
   172  // cpu count for all processor groups of the windows node.
   173  // more notes for this issue:
   174  // same issue in moby: https://github.com/moby/moby/issues/38935#issuecomment-744638345
   175  // solution in hcsshim: https://github.com/microsoft/hcsshim/blob/master/internal/processorinfo/processor_count.go
   176  func ProcessorCount() int {
   177  	if amount := getActiveProcessorCount(allProcessorGroups); amount != 0 {
   178  		return int(amount)
   179  	}
   180  	return runtime.NumCPU()
   181  }
   182  
   183  func getActiveProcessorCount(groupNumber uint16) int {
   184  	r0, _, _ := syscall.Syscall(procGetActiveProcessorCount.Addr(), 1, uintptr(groupNumber), 0, 0)
   185  	return int(r0)
   186  }
   187  
   188  func (p *perfCounterNodeStatsClient) getVersionInfo() (*cadvisorapi.VersionInfo, error) {
   189  	return &cadvisorapi.VersionInfo{
   190  		KernelVersion:      p.nodeInfo.kernelVersion,
   191  		ContainerOsVersion: p.nodeInfo.osImageVersion,
   192  	}, nil
   193  }
   194  
   195  func (p *perfCounterNodeStatsClient) getNodeMetrics() (nodeMetrics, error) {
   196  	p.mu.RLock()
   197  	defer p.mu.RUnlock()
   198  	return p.nodeMetrics, nil
   199  }
   200  
   201  func (p *perfCounterNodeStatsClient) getNodeInfo() nodeInfo {
   202  	return p.nodeInfo
   203  }
   204  
   205  func (p *perfCounterNodeStatsClient) collectMetricsData(cpuCounter, memWorkingSetCounter, memCommittedBytesCounter perfCounter, networkAdapterCounter *networkCounter) {
   206  	cpuValue, err := cpuCounter.getData()
   207  	cpuCores := ProcessorCount()
   208  	if err != nil {
   209  		klog.ErrorS(err, "Unable to get cpu perf counter data")
   210  		return
   211  	}
   212  
   213  	memWorkingSetValue, err := memWorkingSetCounter.getData()
   214  	if err != nil {
   215  		klog.ErrorS(err, "Unable to get memWorkingSet perf counter data")
   216  		return
   217  	}
   218  
   219  	memCommittedBytesValue, err := memCommittedBytesCounter.getData()
   220  	if err != nil {
   221  		klog.ErrorS(err, "Unable to get memCommittedBytes perf counter data")
   222  		return
   223  	}
   224  
   225  	networkAdapterStats, err := networkAdapterCounter.getData()
   226  	if err != nil {
   227  		klog.ErrorS(err, "Unable to get network adapter perf counter data")
   228  		return
   229  	}
   230  
   231  	p.mu.Lock()
   232  	defer p.mu.Unlock()
   233  	p.nodeMetrics = nodeMetrics{
   234  		cpuUsageCoreNanoSeconds:   p.convertCPUValue(cpuCores, cpuValue),
   235  		cpuUsageNanoCores:         p.getCPUUsageNanoCores(),
   236  		memoryPrivWorkingSetBytes: memWorkingSetValue,
   237  		memoryCommittedBytes:      memCommittedBytesValue,
   238  		interfaceStats:            networkAdapterStats,
   239  		timeStamp:                 time.Now(),
   240  	}
   241  }
   242  
   243  func (p *perfCounterNodeStatsClient) convertCPUValue(cpuCores int, cpuValue uint64) uint64 {
   244  	// This converts perf counter data which is cpu percentage for all cores into nanoseconds.
   245  	// The formula is (cpuPercentage / 100.0) * #cores * 1e+9 (nano seconds). More info here:
   246  	// https://github.com/kubernetes/heapster/issues/650
   247  	newValue := p.nodeMetrics.cpuUsageCoreNanoSeconds + uint64((float64(cpuValue)/100.0)*float64(cpuCores)*1e9)
   248  	return newValue
   249  }
   250  
   251  func (p *perfCounterNodeStatsClient) getCPUUsageNanoCores() uint64 {
   252  	cachePeriodSeconds := uint64(defaultCachePeriod / time.Second)
   253  	perfCounterUpdatePeriodSeconds := uint64(perfCounterUpdatePeriod / time.Second)
   254  	cpuUsageNanoCores := ((p.cpuUsageCoreNanoSecondsCache.latestValue - p.cpuUsageCoreNanoSecondsCache.previousValue) * perfCounterUpdatePeriodSeconds) / cachePeriodSeconds
   255  	return cpuUsageNanoCores
   256  }
   257  
   258  func getSystemUUID() (string, error) {
   259  	result, err := exec.Command("wmic", "csproduct", "get", "UUID").Output()
   260  	if err != nil {
   261  		return "", err
   262  	}
   263  	fields := strings.Fields(string(result))
   264  	if len(fields) != 2 {
   265  		return "", fmt.Errorf("received unexpected value retrieving vm uuid: %q", string(result))
   266  	}
   267  	return fields[1], nil
   268  }
   269  
   270  func getPhysicallyInstalledSystemMemoryBytes() (uint64, error) {
   271  	// We use GlobalMemoryStatusEx instead of GetPhysicallyInstalledSystemMemory
   272  	// on Windows node for the following reasons:
   273  	// 1. GetPhysicallyInstalledSystemMemory retrieves the amount of physically
   274  	// installed RAM from the computer's SMBIOS firmware tables.
   275  	// https://msdn.microsoft.com/en-us/library/windows/desktop/cc300158(v=vs.85).aspx
   276  	// On some VM, it is unable to read data from SMBIOS and fails with ERROR_INVALID_DATA.
   277  	// 2. On Linux node, total physical memory is read from MemTotal in /proc/meminfo.
   278  	// GlobalMemoryStatusEx returns the amount of physical memory that is available
   279  	// for the operating system to use. The amount returned by GlobalMemoryStatusEx
   280  	// is closer in parity with Linux
   281  	// https://www.kernel.org/doc/Documentation/filesystems/proc.txt
   282  	var statex MemoryStatusEx
   283  	statex.Length = uint32(unsafe.Sizeof(statex))
   284  	ret, _, _ := procGlobalMemoryStatusEx.Call(uintptr(unsafe.Pointer(&statex)))
   285  
   286  	if ret == 0 {
   287  		return 0, errors.New("unable to read physical memory")
   288  	}
   289  
   290  	return statex.TotalPhys, nil
   291  }
   292  
   293  func getBootID() (string, error) {
   294  	regKey, err := registry.OpenKey(registry.LOCAL_MACHINE, bootIdRegistry, registry.READ)
   295  	if err != nil {
   296  		return "", err
   297  	}
   298  	defer regKey.Close()
   299  	regValue, _, err := regKey.GetIntegerValue(bootIdKey)
   300  	if err != nil {
   301  		return "", err
   302  	}
   303  	return strconv.FormatUint(regValue, 10), nil
   304  }
   305  

View as plain text