# Copyright 2019 The Kubernetes Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. <# .SYNOPSIS Library for configuring Windows nodes and joining them to the cluster. .NOTES This module depends on common.psm1. Some portions copied / adapted from https://github.com/Microsoft/SDN/blob/master/Kubernetes/windows/start-kubelet.ps1. .EXAMPLE Suggested usage for dev/test: [Net.ServicePointManager]::SecurityProtocol = ` [Net.SecurityProtocolType]::Tls12 Invoke-WebRequest ` https://github.com/kubernetes/kubernetes/raw/master/cluster/gce/windows/k8s-node-setup.psm1 ` -OutFile C:\k8s-node-setup.psm1 Invoke-WebRequest ` https://github.com/kubernetes/kubernetes/raw/master/cluster/gce/windows/configure.ps1 ` -OutFile C:\configure.ps1 Import-Module -Force C:\k8s-node-setup.psm1 # -Force to override existing # Execute functions manually or run configure.ps1. #> # IMPORTANT PLEASE NOTE: # Any time the file structure in the `windows` directory changes, `windows/BUILD` # and `k8s.io/release/lib/releaselib.sh` must be manually updated with the changes. # We HIGHLY recommend not changing the file structure, because consumers of # Kubernetes releases depend on the release structure remaining stable. # TODO: update scripts for these style guidelines: # - Remove {} around variable references unless actually needed for clarity. # - Always use single-quoted strings unless actually interpolating variables # or using escape characters. # - Use "approved verbs": # https://docs.microsoft.com/en-us/powershell/developer/cmdlet/approved-verbs-for-windows-powershell-commands # - Document functions using proper syntax: # https://technet.microsoft.com/en-us/library/hh847834(v=wps.620).aspx $GCE_METADATA_SERVER = "169.254.169.254" # The "management" interface is used by the kubelet and by Windows pods to talk # to the rest of the Kubernetes cluster *without NAT*. This interface does not # exist until an initial HNS network has been created on the Windows node - see # Add_InitialHnsNetwork(). $MGMT_ADAPTER_NAME = "vEthernet (Ethernet*" $CRICTL_VERSION = 'v1.29.0' $CRICTL_SHA256 = '9b679305cb05f73e9e4868056e7d48805c47e24d2d38849e64395ff54cf5c701' Import-Module -Force C:\common.psm1 # Writes a TODO with $Message to the console. function Log_Todo { param ( [parameter(Mandatory=$true)] [string]$Message ) Log-Output "TODO: ${Message}" } # Writes a not-implemented warning with $Message to the console and exits the # script. function Log_NotImplemented { param ( [parameter(Mandatory=$true)] [string]$Message ) Log-Output "Not implemented yet: ${Message}" -Fatal } # Fails and exits if the route to the GCE metadata server is not present, # otherwise does nothing and emits nothing. function Verify_GceMetadataServerRouteIsPresent { Try { Get-NetRoute ` -ErrorAction "Stop" ` -AddressFamily IPv4 ` -DestinationPrefix ${GCE_METADATA_SERVER}/32 | Out-Null } Catch [Microsoft.PowerShell.Cmdletization.Cim.CimJobException] { Log-Output -Fatal ` ("GCE metadata server route is not present as expected.`n" + "$(Get-NetRoute -AddressFamily IPv4 | Out-String)") } } # Checks if the route to the GCE metadata server is present. Returns when the # route is NOT present or after a timeout has expired. function WaitFor_GceMetadataServerRouteToBeRemoved { $elapsed = 0 $timeout = 60 Log-Output ("Waiting up to ${timeout} seconds for GCE metadata server " + "route to be removed") while (${elapsed} -lt ${timeout}) { Try { Get-NetRoute ` -ErrorAction "Stop" ` -AddressFamily IPv4 ` -DestinationPrefix ${GCE_METADATA_SERVER}/32 | Out-Null } Catch [Microsoft.PowerShell.Cmdletization.Cim.CimJobException] { break } $sleeptime = 2 Start-Sleep ${sleeptime} ${elapsed} += ${sleeptime} } } # Adds a route to the GCE metadata server to every network interface. function Add_GceMetadataServerRoute { # Before setting up HNS the Windows VM has a "vEthernet (nat)" interface and # a "Ethernet" interface, and the route to the metadata server exists on the # Ethernet interface. After adding the HNS network a "vEthernet (Ethernet)" # interface is added, and it seems to subsume the routes of the "Ethernet" # interface (trying to add routes on the Ethernet interface at this point just # results in "New-NetRoute : Element not found" errors). I don't know what's # up with that, but since it's hard to know what's the right thing to do here # we just try to add the route on all of the network adapters. Get-NetAdapter | ForEach-Object { $adapter_index = $_.InterfaceIndex New-NetRoute ` -ErrorAction Ignore ` -DestinationPrefix "${GCE_METADATA_SERVER}/32" ` -InterfaceIndex ${adapter_index} | Out-Null } } # Returns a PowerShell object representing the Windows version. function Get_WindowsVersion { # Unlike checking `[System.Environment]::OSVersion.Version`, this long-winded # approach gets the OS revision/patch number correctly # (https://superuser.com/a/1160428/652018). $win_ver = New-Object -TypeName PSObject $win_ver | Add-Member -MemberType NoteProperty -Name Major -Value $(Get-ItemProperty -Path 'Registry::HKEY_LOCAL_MACHINE\Software\Microsoft\Windows NT\CurrentVersion' CurrentMajorVersionNumber).CurrentMajorVersionNumber $win_ver | Add-Member -MemberType NoteProperty -Name Minor -Value $(Get-ItemProperty -Path 'Registry::HKEY_LOCAL_MACHINE\Software\Microsoft\Windows NT\CurrentVersion' CurrentMinorVersionNumber).CurrentMinorVersionNumber $win_ver | Add-Member -MemberType NoteProperty -Name Build -Value $(Get-ItemProperty -Path 'Registry::HKEY_LOCAL_MACHINE\Software\Microsoft\Windows NT\CurrentVersion' CurrentBuild).CurrentBuild $win_ver | Add-Member -MemberType NoteProperty -Name Revision -Value $(Get-ItemProperty -Path 'Registry::HKEY_LOCAL_MACHINE\Software\Microsoft\Windows NT\CurrentVersion' UBR).UBR return $win_ver } # Writes debugging information, such as Windows version and patch info, to the # console. function Dump-DebugInfoToConsole { Try { $version = Get_WindowsVersion | Out-String $hotfixes = "$(Get-Hotfix | Out-String)" $image = "$(Get-InstanceMetadata 'image' | Out-String)" Log-Output "Windows version:`n$version" Log-Output "Installed hotfixes:`n$hotfixes" Log-Output "GCE Windows image:`n$image" } Catch { } } # Configures Window Defender preferences function Configure-WindowsDefender { if ((Get-WindowsFeature -Name 'Windows-Defender').Installed) { Log-Output "Configuring Windows Defender preferences" Set-MpPreference -SubmitSamplesConsent NeverSend Log-Output "Disabling Windows Defender sample submission" Set-MpPreference -MAPSReporting Disabled Log-Output "Disabling Windows Defender Microsoft Active Protection Service Reporting" Log-Output "Defender Preferences" Get-MpPreference } } # Converts the kube-env string in Yaml # # Returns: a PowerShell Hashtable object containing the key-value pairs from # kube-env. function ConvertFrom_Yaml_KubeEnv { param ( [parameter(Mandatory=$true)] [string]$kube_env_str ) $kube_env_table = @{} $currentLine = $null switch -regex (${kube_env_str} -split '\r?\n') { '^(\S.*)' { # record start pattern, line that doesn't start with a whitespace if ($null -ne $currentLine) { $key, $val = $currentLine -split ":",2 $kube_env_table[$key] = $val.Trim("'", " ", "`"") } $currentLine = $matches.1 continue } '^(\s+.*)' { # line that start with whitespace $currentLine += $matches.1 continue } } # Handle the last line if any if ($currentLine) { $key, $val = $currentLine -split ":",2 $kube_env_table[$key] = $val.Trim("'", " ", "`"") } return ${kube_env_table} } # Fetches the kube-env from the instance metadata. # # Returns: a PowerShell Hashtable object containing the key-value pairs from # kube-env. function Fetch-KubeEnv { # Testing / debugging: # First: # ${kube_env} = Get-InstanceMetadataAttribute 'kube-env' # or: # ${kube_env} = [IO.File]::ReadAllText(".\kubeEnv.txt") # ${kube_env_table} = ConvertFrom_Yaml_KubeEnv ${kube_env} # ${kube_env_table} # ${kube_env_table}.GetType() # The type of kube_env is a powershell String. $kube_env = Get-InstanceMetadataAttribute 'kube-env' $kube_env_table = ConvertFrom_Yaml_KubeEnv ${kube_env} Log-Output "Logging kube-env key-value pairs except CERT and KEY values" foreach ($entry in $kube_env_table.GetEnumerator()) { if ((-not ($entry.Name.contains("CERT"))) -and (-not ($entry.Name.contains("KEY")))) { Log-Output "$($entry.Name): $($entry.Value)" } } return ${kube_env_table} } # Sets the environment variable $Key to $Value at the Machine scope (will # be present in the environment for all new shells after a reboot). function Set_MachineEnvironmentVar { param ( [parameter(Mandatory=$true)] [string]$Key, [parameter(Mandatory=$true)] [AllowEmptyString()] [string]$Value ) [Environment]::SetEnvironmentVariable($Key, $Value, "Machine") } # Sets the environment variable $Key to $Value in the current shell. function Set_CurrentShellEnvironmentVar { param ( [parameter(Mandatory=$true)] [string]$Key, [parameter(Mandatory=$true)] [AllowEmptyString()] [string]$Value ) $expression = '$env:' + $Key + ' = "' + $Value + '"' Invoke-Expression ${expression} } # Sets environment variables used by Kubernetes binaries and by other functions # in this module. Depends on numerous ${kube_env} keys. function Set-EnvironmentVars { if ($kube_env.ContainsKey('WINDOWS_CONTAINER_RUNTIME_ENDPOINT')) { $container_runtime_endpoint = ${kube_env}['WINDOWS_CONTAINER_RUNTIME_ENDPOINT'] } else { Log-Output "ERROR: WINDOWS_CONTAINER_RUNTIME_ENDPOINT not set in kube-env, falling back in CONTAINER_RUNTIME_ENDPOINT" $container_runtime_endpoint = ${kube_env}['CONTAINER_RUNTIME_ENDPOINT'] } # Turning the kube-env values into environment variables is not required but # it makes debugging this script easier, and it also makes the syntax a lot # easier (${env:K8S_DIR} can be expanded within a string but # ${kube_env}['K8S_DIR'] cannot be afaik). $env_vars = @{ "K8S_DIR" = ${kube_env}['K8S_DIR'] # Typically 'C:\etc\kubernetes\node\bin' (not just 'C:\etc\kubernetes\node') "NODE_DIR" = ${kube_env}['NODE_DIR'] "CNI_DIR" = ${kube_env}['CNI_DIR'] "CNI_CONFIG_DIR" = ${kube_env}['CNI_CONFIG_DIR'] "WINDOWS_CNI_STORAGE_PATH" = ${kube_env}['WINDOWS_CNI_STORAGE_PATH'] "WINDOWS_CNI_VERSION" = ${kube_env}['WINDOWS_CNI_VERSION'] "CSI_PROXY_STORAGE_PATH" = ${kube_env}['CSI_PROXY_STORAGE_PATH'] "CSI_PROXY_VERSION" = ${kube_env}['CSI_PROXY_VERSION'] "CSI_PROXY_FLAGS" = ${kube_env}['CSI_PROXY_FLAGS'] "ENABLE_CSI_PROXY" = ${kube_env}['ENABLE_CSI_PROXY'] "PKI_DIR" = ${kube_env}['PKI_DIR'] "CA_FILE_PATH" = ${kube_env}['CA_FILE_PATH'] "KUBELET_CONFIG" = ${kube_env}['KUBELET_CONFIG_FILE'] "BOOTSTRAP_KUBECONFIG" = ${kube_env}['BOOTSTRAP_KUBECONFIG_FILE'] "KUBECONFIG" = ${kube_env}['KUBECONFIG_FILE'] "KUBEPROXY_KUBECONFIG" = ${kube_env}['KUBEPROXY_KUBECONFIG_FILE'] "LOGS_DIR" = ${kube_env}['LOGS_DIR'] "MANIFESTS_DIR" = ${kube_env}['MANIFESTS_DIR'] "INFRA_CONTAINER" = ${kube_env}['WINDOWS_INFRA_CONTAINER'] "WINDOWS_ENABLE_PIGZ" = ${kube_env}['WINDOWS_ENABLE_PIGZ'] "WINDOWS_ENABLE_HYPERV" = ${kube_env}['WINDOWS_ENABLE_HYPERV'] "ENABLE_NODE_PROBLEM_DETECTOR" = ${kube_env}['ENABLE_NODE_PROBLEM_DETECTOR'] "NODEPROBLEMDETECTOR_KUBECONFIG_FILE" = ${kube_env}['WINDOWS_NODEPROBLEMDETECTOR_KUBECONFIG_FILE'] "ENABLE_AUTH_PROVIDER_GCP" = ${kube_env}['ENABLE_AUTH_PROVIDER_GCP'] "AUTH_PROVIDER_GCP_STORAGE_PATH" = ${kube_env}['AUTH_PROVIDER_GCP_STORAGE_PATH'] "AUTH_PROVIDER_GCP_VERSION" = ${kube_env}['AUTH_PROVIDER_GCP_VERSION'] "AUTH_PROVIDER_GCP_HASH_WINDOWS_AMD64" = ${kube_env}['AUTH_PROVIDER_GCP_HASH_WINDOWS_AMD64'] "AUTH_PROVIDER_GCP_WINDOWS_BIN_DIR" = ${kube_env}['AUTH_PROVIDER_GCP_WINDOWS_BIN_DIR'] "AUTH_PROVIDER_GCP_WINDOWS_CONF_FILE" = ${kube_env}['AUTH_PROVIDER_GCP_WINDOWS_CONF_FILE'] "Path" = ${env:Path} + ";" + ${kube_env}['NODE_DIR'] "KUBE_NETWORK" = "l2bridge".ToLower() "KUBELET_CERT_PATH" = ${kube_env}['PKI_DIR'] + '\kubelet.crt' "KUBELET_KEY_PATH" = ${kube_env}['PKI_DIR'] + '\kubelet.key' "CONTAINER_RUNTIME_ENDPOINT" = $container_runtime_endpoint 'LICENSE_DIR' = 'C:\Program Files\Google\Compute Engine\THIRD_PARTY_NOTICES' } # Set the environment variables in two ways: permanently on the machine (only # takes effect after a reboot), and in the current shell. $env_vars.GetEnumerator() | ForEach-Object{ $message = "Setting environment variable: " + $_.key + " = " + $_.value Log-Output ${message} Set_MachineEnvironmentVar $_.key $_.value Set_CurrentShellEnvironmentVar $_.key $_.value } } # Configures various settings and prerequisites needed for the rest of the # functions in this module and the Kubernetes binaries to operate properly. function Set-PrerequisiteOptions { # Windows updates cause the node to reboot at arbitrary times. Log-Output "Disabling Windows Update service" & sc.exe config wuauserv start=disabled & sc.exe stop wuauserv Write-VerboseServiceInfoToConsole -Service 'wuauserv' -Delay 1 # Use TLS 1.2: needed for Invoke-WebRequest downloads from github.com. [Net.ServicePointManager]::SecurityProtocol = ` [Net.SecurityProtocolType]::Tls12 Configure-WindowsDefender } # Creates directories where other functions in this module will read and write # data. # Note: C:\tmp is required for running certain kubernetes tests. # C:\var\log is used by kubelet to stored container logs and also # hard-coded in the fluentd/stackdriver config for log collection. function Create-Directories { Log-Output "Creating ${env:K8S_DIR} and its subdirectories." ForEach ($dir in ("${env:K8S_DIR}", "${env:NODE_DIR}", "${env:LOGS_DIR}", "${env:CNI_DIR}", "${env:CNI_CONFIG_DIR}", "${env:MANIFESTS_DIR}", "${env:PKI_DIR}", "${env:LICENSE_DIR}"), "C:\tmp", "C:\var\log") { mkdir -Force $dir } } # Downloads some external helper scripts needed by other functions in this # module. function Download-HelperScripts { if (ShouldWrite-File ${env:K8S_DIR}\hns.psm1) { MustDownload-File ` -OutFile ${env:K8S_DIR}\hns.psm1 ` -URLs 'https://storage.googleapis.com/gke-release/winnode/config/sdn/master/hns.psm1' } } # Downloads the Kubernetes binaries from kube-env's NODE_BINARY_TAR_URL and # puts them in a subdirectory of $env:K8S_DIR. # # Required ${kube_env} keys: # NODE_BINARY_TAR_URL function DownloadAndInstall-KubernetesBinaries { # Assume that presence of kubelet.exe indicates that the kubernetes binaries # were already previously downloaded to this node. if (-not (ShouldWrite-File ${env:NODE_DIR}\kubelet.exe)) { return } $tmp_dir = 'C:\k8s_tmp' New-Item -Force -ItemType 'directory' $tmp_dir | Out-Null $urls = ${kube_env}['NODE_BINARY_TAR_URL'].Split(",") $filename = Split-Path -leaf $urls[0] $hash = $null if ($kube_env.ContainsKey('NODE_BINARY_TAR_HASH')) { $hash = ${kube_env}['NODE_BINARY_TAR_HASH'] } MustDownload-File -Hash $hash -OutFile $tmp_dir\$filename -URLs $urls tar xzvf $tmp_dir\$filename -C $tmp_dir Move-Item -Force $tmp_dir\kubernetes\node\bin\* ${env:NODE_DIR}\ Move-Item -Force ` $tmp_dir\kubernetes\LICENSES ${env:LICENSE_DIR}\LICENSES_kubernetes # Clean up the temporary directory Remove-Item -Force -Recurse $tmp_dir } # Downloads the csi-proxy binaries from kube-env's CSI_PROXY_STORAGE_PATH and # CSI_PROXY_VERSION, and then puts them in a subdirectory of $env:NODE_DIR. # Note: for now the installation is skipped for non-test clusters. Will be # installed for all cluster after tests pass. # Required ${kube_env} keys: # CSI_PROXY_STORAGE_PATH and CSI_PROXY_VERSION function DownloadAndInstall-CSIProxyBinaries { if ("${env:ENABLE_CSI_PROXY}" -eq "true") { if (ShouldWrite-File ${env:NODE_DIR}\csi-proxy.exe) { $tmp_dir = 'C:\k8s_tmp' New-Item -Force -ItemType 'directory' $tmp_dir | Out-Null $filename = 'csi-proxy.exe' $urls = "${env:CSI_PROXY_STORAGE_PATH}/${env:CSI_PROXY_VERSION}/$filename" MustDownload-File -OutFile $tmp_dir\$filename -URLs $urls Move-Item -Force $tmp_dir\$filename ${env:NODE_DIR}\$filename # Clean up the temporary directory Remove-Item -Force -Recurse $tmp_dir } } } function Start-CSIProxy { if ("${env:ENABLE_CSI_PROXY}" -eq "true") { Log-Output "Creating CSI Proxy Service" $flags = "-windows-service -log_file=${env:LOGS_DIR}\csi-proxy.log -logtostderr=false ${env:CSI_PROXY_FLAGS}" & sc.exe create csiproxy binPath= "${env:NODE_DIR}\csi-proxy.exe $flags" & sc.exe failure csiproxy reset= 0 actions= restart/10000 Log-Output "Starting CSI Proxy Service" & sc.exe start csiproxy Write-VerboseServiceInfoToConsole -Service 'csiproxy' -Delay 1 } } # TODO(pjh): this is copied from # https://github.com/Microsoft/SDN/blob/master/Kubernetes/windows/start-kubelet.ps1#L98. # See if there's a way to fetch or construct the "management subnet" so that # this is not needed. function ConvertTo_DecimalIP { param( [parameter(Mandatory = $true, Position = 0)] [Net.IPAddress] $IPAddress ) $i = 3; $decimal_ip = 0; $IPAddress.GetAddressBytes() | % { $decimal_ip += $_ * [Math]::Pow(256, $i); $i-- } return [UInt32]$decimal_ip } # TODO(pjh): this is copied from # https://github.com/Microsoft/SDN/blob/master/Kubernetes/windows/start-kubelet.ps1#L98. # See if there's a way to fetch or construct the "management subnet" so that # this is not needed. function ConvertTo_DottedDecimalIP { param( [parameter(Mandatory = $true, Position = 0)] [Uint32] $IPAddress ) $dotted_ip = $(for ($i = 3; $i -gt -1; $i--) { $remainder = $IPAddress % [Math]::Pow(256, $i) ($IPAddress - $remainder) / [Math]::Pow(256, $i) $IPAddress = $remainder }) return [String]::Join(".", $dotted_ip) } # TODO(pjh): this is copied from # https://github.com/Microsoft/SDN/blob/master/Kubernetes/windows/start-kubelet.ps1#L98. # See if there's a way to fetch or construct the "management subnet" so that # this is not needed. function ConvertTo_MaskLength { param( [parameter(Mandatory = $True, Position = 0)] [Net.IPAddress] $SubnetMask ) $bits = "$($SubnetMask.GetAddressBytes() | % { [Convert]::ToString($_, 2) } )" -replace "[\s0]" return $bits.Length } # Returns a network adapter object for the "management" interface via which the # Windows pods+kubelet will communicate with the rest of the Kubernetes cluster. # # This function will fail if Add_InitialHnsNetwork() has not been called first. function Get_MgmtNetAdapter { $net_adapter = Get-NetAdapter | Where-Object Name -like ${MGMT_ADAPTER_NAME} if (-not ${net_adapter}) { Throw ("Failed to find a suitable network adapter, check your network " + "settings.") } return $net_adapter } # Decodes the base64 $Data string and writes it as binary to $File. Does # nothing if $File already exists and $REDO_STEPS is not set. function Write_PkiData { param ( [parameter(Mandatory=$true)] [string] $Data, [parameter(Mandatory=$true)] [string] $File ) if (-not (ShouldWrite-File $File)) { return } # This command writes out a PEM certificate file, analogous to "base64 # --decode" on Linux. See https://stackoverflow.com/a/51914136/1230197. [IO.File]::WriteAllBytes($File, [Convert]::FromBase64String($Data)) Log_Todo ("need to set permissions correctly on ${File}; not sure what the " + "Windows equivalent of 'umask 077' is") # Linux: owned by root, rw by user only. # -rw------- 1 root root 1.2K Oct 12 00:56 ca-certificates.crt # -rw------- 1 root root 1.3K Oct 12 00:56 kubelet.crt # -rw------- 1 root root 1.7K Oct 12 00:56 kubelet.key # Windows: # https://docs.microsoft.com/en-us/dotnet/api/system.io.fileattributes # https://docs.microsoft.com/en-us/dotnet/api/system.io.fileattributes } # Creates the node PKI files in $env:PKI_DIR. # # Required ${kube_env} keys: # CA_CERT # ${kube_env} keys that can be omitted for nodes that do not use an # authentication plugin: # KUBELET_CERT # KUBELET_KEY function Create-NodePki { Log-Output 'Creating node pki files' if ($kube_env.ContainsKey('CA_CERT')) { $CA_CERT_BUNDLE = ${kube_env}['CA_CERT'] Write_PkiData "${CA_CERT_BUNDLE}" ${env:CA_FILE_PATH} } else { Log-Output -Fatal 'CA_CERT not present in kube-env' } if ($kube_env.ContainsKey('KUBELET_CERT')) { $KUBELET_CERT = ${kube_env}['KUBELET_CERT'] Write_PkiData "${KUBELET_CERT}" ${env:KUBELET_CERT_PATH} } else { Log-Output -Fatal 'KUBELET_CERT not present in kube-env' } if ($kube_env.ContainsKey('KUBELET_KEY')) { $KUBELET_KEY = ${kube_env}['KUBELET_KEY'] Write_PkiData "${KUBELET_KEY}" ${env:KUBELET_KEY_PATH} } else { Log-Output -Fatal 'KUBELET_KEY not present in kube-env' } Get-ChildItem ${env:PKI_DIR} } # Creates the bootstrap kubelet kubeconfig at $env:BOOTSTRAP_KUBECONFIG. # https://kubernetes.io/docs/reference/command-line-tools-reference/kubelet-tls-bootstrapping/ # # Create-NodePki() must be called first. # # Required ${kube_env} keys: # KUBERNETES_MASTER_NAME: the apiserver IP address. function Write_BootstrapKubeconfig { if (-not (ShouldWrite-File ${env:BOOTSTRAP_KUBECONFIG})) { return } # TODO(mtaufen): is user "kubelet" correct? Other examples use e.g. # "system:node:$(hostname)". $apiserverAddress = ${kube_env}['KUBERNETES_MASTER_NAME'] New-Item -Force -ItemType file ${env:BOOTSTRAP_KUBECONFIG} | Out-Null Set-Content ${env:BOOTSTRAP_KUBECONFIG} ` 'apiVersion: v1 kind: Config users: - name: kubelet user: client-certificate: KUBELET_CERT_PATH client-key: KUBELET_KEY_PATH clusters: - name: local cluster: server: https://APISERVER_ADDRESS certificate-authority: CA_FILE_PATH contexts: - context: cluster: local user: kubelet name: service-account-context current-context: service-account-context'.` replace('KUBELET_CERT_PATH', ${env:KUBELET_CERT_PATH}).` replace('KUBELET_KEY_PATH', ${env:KUBELET_KEY_PATH}).` replace('APISERVER_ADDRESS', ${apiserverAddress}).` replace('CA_FILE_PATH', ${env:CA_FILE_PATH}) Log-Output ("kubelet bootstrap kubeconfig:`n" + "$(Get-Content -Raw ${env:BOOTSTRAP_KUBECONFIG})") } # Fetches the kubelet kubeconfig from the metadata server and writes it to # $env:KUBECONFIG. # # Create-NodePki() must be called first. function Write_KubeconfigFromMetadata { if (-not (ShouldWrite-File ${env:KUBECONFIG})) { return } $kubeconfig = Get-InstanceMetadataAttribute 'kubeconfig' if ($kubeconfig -eq $null) { Log-Output ` "kubeconfig metadata key not found, can't write ${env:KUBECONFIG}" ` -Fatal } Set-Content ${env:KUBECONFIG} $kubeconfig Log-Output ("kubelet kubeconfig from metadata (non-bootstrap):`n" + "$(Get-Content -Raw ${env:KUBECONFIG})") } # Creates the kubelet kubeconfig at $env:KUBECONFIG for nodes that use an # authentication plugin, or at $env:BOOTSTRAP_KUBECONFIG for nodes that do not. # # Create-NodePki() must be called first. # # Required ${kube_env} keys: # KUBERNETES_MASTER_NAME: the apiserver IP address. function Create-KubeletKubeconfig { Write_BootstrapKubeconfig } # Creates the kubeconfig user file for applications that communicate with Kubernetes. # # Create-NodePki() must be called first. # # Required ${kube_env} keys: # CA_CERT # KUBERNETES_MASTER_NAME function Create-Kubeconfig { param ( [parameter(Mandatory=$true)] [string]$Name, [parameter(Mandatory=$true)] [string]$Path, [parameter(Mandatory=$true)] [string]$Token ) if (-not (ShouldWrite-File $Path)) { return } New-Item -Force -ItemType file $Path | Out-Null # In configure-helper.sh kubelet kubeconfig uses certificate-authority while # kubeproxy kubeconfig uses certificate-authority-data, ugh. Does it matter? # Use just one or the other for consistency? Set-Content $Path ` 'apiVersion: v1 kind: Config users: - name: APP_NAME user: token: APP_TOKEN clusters: - name: local cluster: server: https://APISERVER_ADDRESS certificate-authority-data: CA_CERT contexts: - context: cluster: local user: APP_NAME name: service-account-context current-context: service-account-context'.` replace('APP_NAME', $Name).` replace('APP_TOKEN', $Token).` replace('CA_CERT', ${kube_env}['CA_CERT']).` replace('APISERVER_ADDRESS', ${kube_env}['KUBERNETES_MASTER_NAME']) Log-Output ("${Name} kubeconfig:`n" + "$(Get-Content -Raw ${Path})") } # Creates the kube-proxy user kubeconfig file at $env:KUBEPROXY_KUBECONFIG. # # Create-NodePki() must be called first. # # Required ${kube_env} keys: # CA_CERT # KUBE_PROXY_TOKEN function Create-KubeproxyKubeconfig { Create-Kubeconfig -Name 'kube-proxy' ` -Path ${env:KUBEPROXY_KUBECONFIG} ` -Token ${kube_env}['KUBE_PROXY_TOKEN'] } # Returns the IP alias range configured for this GCE instance. function Get_IpAliasRange { $url = ("http://${GCE_METADATA_SERVER}/computeMetadata/v1/instance/" + "network-interfaces/0/ip-aliases/0") $client = New-Object Net.WebClient $client.Headers.Add('Metadata-Flavor', 'Google') return ($client.DownloadString($url)).Trim() } # Retrieves the pod CIDR and sets it in $env:POD_CIDR. function Set-PodCidr { while($true) { $pod_cidr = Get_IpAliasRange if (-not $?) { Log-Output ${pod_cIDR} Log-Output "Retrying Get_IpAliasRange..." Start-Sleep -sec 1 continue } break } Log-Output "fetched pod CIDR (same as IP alias range): ${pod_cidr}" Set_MachineEnvironmentVar "POD_CIDR" ${pod_cidr} Set_CurrentShellEnvironmentVar "POD_CIDR" ${pod_cidr} } # Adds an initial HNS network on the Windows node which forces the creation of # a virtual switch and the "management" interface that will be used to # communicate with the rest of the Kubernetes cluster without NAT. # # Note that adding the initial HNS network may cause connectivity to the GCE # metadata server to be lost due to a Windows bug. # Configure-HostNetworkingService() restores connectivity, look there for # details. # # Download-HelperScripts() must have been called first. function Add_InitialHnsNetwork { $INITIAL_HNS_NETWORK = 'External' # This comes from # https://github.com/Microsoft/SDN/blob/master/Kubernetes/flannel/l2bridge/start.ps1#L74 # (or # https://github.com/Microsoft/SDN/blob/master/Kubernetes/windows/start-kubelet.ps1#L206). # # daschott noted on Slack: "L2bridge networks require an external vSwitch. # The first network ("External") with hardcoded values in the script is just # a placeholder to create an external vSwitch. This is purely for convenience # to be able to remove/modify the actual HNS network ("cbr0") or rejoin the # nodes without a network blip. Creating a vSwitch takes time, causes network # blips, and it makes it more likely to hit the issue where flanneld is # stuck, so we want to do this as rarely as possible." $hns_network = Get-HnsNetwork | Where-Object Name -eq $INITIAL_HNS_NETWORK if ($hns_network) { if ($REDO_STEPS) { Log-Output ("Warning: initial '$INITIAL_HNS_NETWORK' HNS network " + "already exists, removing it and recreating it") $hns_network | Remove-HnsNetwork $hns_network = $null } else { Log-Output ("Skip: initial '$INITIAL_HNS_NETWORK' HNS network " + "already exists, not recreating it") return } } Log-Output ("Creating initial HNS network to force creation of " + "${MGMT_ADAPTER_NAME} interface") # Note: RDP connection will hiccup when running this command. New-HNSNetwork ` -Type "L2Bridge" ` -AddressPrefix "192.168.255.0/30" ` -Gateway "192.168.255.1" ` -Name $INITIAL_HNS_NETWORK ` -Verbose } # Get the network in uint32 for the given cidr function Get_NetworkDecimal_From_CIDR([string] $cidr) { $network, [int]$subnetlen = $cidr.Split('/') $decimal_network = ConvertTo_DecimalIP($network) return $decimal_network } # Get gateway ip string (the first address) based on pod cidr. # For Windows nodes the pod gateway IP address is the first address in the pod # CIDR for the host. function Get_Gateway_From_CIDR([string] $cidr) { $network=Get_NetworkDecimal_From_CIDR($cidr) $gateway=ConvertTo_DottedDecimalIP($network+1) return $gateway } # Get endpoint gateway ip string (the second address) based on pod cidr. # For Windows nodes the pod gateway IP address is the first address in the pod # CIDR for the host, but from inside containers it's the second address. function Get_Endpoint_Gateway_From_CIDR([string] $cidr) { $network=Get_NetworkDecimal_From_CIDR($cidr) $gateway=ConvertTo_DottedDecimalIP($network+2) return $gateway } # Get pod IP range start based (the third address) on pod cidr # We reserve the first two in the cidr range for gateways. Start the cidr # range from the third so that IPAM does not allocate those IPs to pods. function Get_PodIP_Range_Start([string] $cidr) { $network=Get_NetworkDecimal_From_CIDR($cidr) $start=ConvertTo_DottedDecimalIP($network+3) return $start } # Configures HNS on the Windows node to enable Kubernetes networking: # - Creates the "management" interface associated with an initial HNS network. # - Creates the HNS network $env:KUBE_NETWORK for pod networking. # - Creates an HNS endpoint for pod networking. # - Adds necessary routes on the management interface. # - Verifies that the GCE metadata server connection remains intact. # # Prerequisites: # $env:POD_CIDR is set (by Set-PodCidr). # Download-HelperScripts() has been called. function Configure-HostNetworkingService { Import-Module -Force ${env:K8S_DIR}\hns.psm1 Add_InitialHnsNetwork $pod_gateway = Get_Gateway_From_CIDR(${env:POD_CIDR}) $pod_endpoint_gateway = Get_Endpoint_Gateway_From_CIDR(${env:POD_CIDR}) Log-Output ("Setting up Windows node HNS networking: " + "podCidr = ${env:POD_CIDR}, podGateway = ${pod_gateway}, " + "podEndpointGateway = ${pod_endpoint_gateway}") $hns_network = Get-HnsNetwork | Where-Object Name -eq ${env:KUBE_NETWORK} if ($hns_network) { if ($REDO_STEPS) { Log-Output ("Warning: ${env:KUBE_NETWORK} HNS network already exists, " + "removing it and recreating it") $hns_network | Remove-HnsNetwork $hns_network = $null } else { Log-Output "Skip: ${env:KUBE_NETWORK} HNS network already exists" } } $created_hns_network = $false if (-not $hns_network) { # Note: RDP connection will hiccup when running this command. $hns_network = New-HNSNetwork ` -Type "L2Bridge" ` -AddressPrefix ${env:POD_CIDR} ` -Gateway ${pod_gateway} ` -Name ${env:KUBE_NETWORK} ` -Verbose $created_hns_network = $true } # This name of endpoint is referred in pkg/proxy/winkernel/proxier.go as part of # kube-proxy as well. A health check port for every service that is specified as # "externalTrafficPolicy: local" will be added on the endpoint. # PLEASE KEEP THEM CONSISTENT!!! $endpoint_name = "cbr0" $vnic_name = "vEthernet (${endpoint_name})" $hns_endpoint = Get-HnsEndpoint | Where-Object Name -eq $endpoint_name # Note: we don't expect to ever enter this block currently - while the HNS # network does seem to persist across reboots, the HNS endpoints do not. if ($hns_endpoint) { if ($REDO_STEPS) { Log-Output ("Warning: HNS endpoint $endpoint_name already exists, " + "removing it and recreating it") $hns_endpoint | Remove-HnsEndpoint $hns_endpoint = $null } else { Log-Output "Skip: HNS endpoint $endpoint_name already exists" } } if (-not $hns_endpoint) { $hns_endpoint = New-HnsEndpoint ` -NetworkId ${hns_network}.Id ` -Name ${endpoint_name} ` -IPAddress ${pod_endpoint_gateway} ` -Gateway "0.0.0.0" ` -Verbose # TODO(pjh): find out: why is this always CompartmentId 1? Attach-HnsHostEndpoint ` -EndpointID ${hns_endpoint}.Id ` -CompartmentID 1 ` -Verbose netsh interface ipv4 set interface "${vnic_name}" forwarding=enabled } Try { Get-HNSPolicyList | Remove-HnsPolicyList } Catch { } # Add a route from the management NIC to the pod CIDR. # # When a packet from a Kubernetes service backend arrives on the destination # Windows node, the reverse SNAT will be applied and the source address of # the packet gets replaced from the pod IP to the service VIP. The packet # will then leave the VM and return back through hairpinning. # # When IP alias is enabled, IP forwarding is disabled for anti-spoofing; # the packet with the service VIP will get blocked and be lost. With this # route, the packet will be routed to the pod subnetwork, and not leave the # VM. $mgmt_net_adapter = Get_MgmtNetAdapter New-NetRoute ` -ErrorAction Ignore ` -InterfaceAlias ${mgmt_net_adapter}.ifAlias ` -DestinationPrefix ${env:POD_CIDR} ` -NextHop "0.0.0.0" ` -Verbose if ($created_hns_network) { # There is an HNS bug where the route to the GCE metadata server will be # removed when the HNS network is created: # https://github.com/Microsoft/hcsshim/issues/299#issuecomment-425491610. # The behavior here is very unpredictable: the route may only be removed # after some delay, or it may appear to be removed then you'll add it back # but then it will be removed once again. So, we first wait a long # unfortunate amount of time to ensure that things have quiesced, then we # wait until we're sure the route is really gone before re-adding it again. Log-Output "Waiting 45 seconds for host network state to quiesce" Start-Sleep 45 WaitFor_GceMetadataServerRouteToBeRemoved Log-Output "Re-adding the GCE metadata server route" Add_GceMetadataServerRoute } Verify_GceMetadataServerRouteIsPresent Log-Output "Host network setup complete" } function Configure-GcePdTools { if (ShouldWrite-File ${env:K8S_DIR}\GetGcePdName.dll) { MustDownload-File -OutFile ${env:K8S_DIR}\GetGcePdName.dll ` -URLs "https://storage.googleapis.com/gke-release/winnode/config/gce-tools/master/GetGcePdName/GetGcePdName.dll" } if (-not (Test-Path $PsHome\profile.ps1)) { New-Item -path $PsHome\profile.ps1 -type file } Add-Content $PsHome\profile.ps1 ` '$modulePath = "K8S_DIR\GetGcePdName.dll" Unblock-File $modulePath Import-Module -Name $modulePath'.replace('K8S_DIR', ${env:K8S_DIR}) } # Setup cni network for containerd. function Prepare-CniNetworking { Configure_Containerd_CniNetworking } # Obtain the host dns conf and save it to a file so that kubelet/CNI # can use it to configure dns suffix search list for pods. # The value of DNS server is ignored right now because the pod will # always only use cluster DNS service, but for consistency, we still # parsed them here in the same format as Linux resolv.conf. # This function must be called after Configure-HostNetworkingService. function Configure-HostDnsConf { $net_adapter = Get_MgmtNetAdapter $server_ips = (Get-DnsClientServerAddress ` -InterfaceAlias ${net_adapter}.Name).ServerAddresses $search_list = (Get-DnsClient).ConnectionSpecificSuffixSearchList $conf = "" ForEach ($ip in $server_ips) { $conf = $conf + "nameserver $ip`r`n" } $conf = $conf + "search $search_list" # Do not put hostdns.conf into the CNI config directory so as to # avoid the container runtime treating it as CNI config. $hostdns_conf = "${env:CNI_DIR}\hostdns.conf" New-Item -Force -ItemType file ${hostdns_conf} | Out-Null Set-Content ${hostdns_conf} $conf Log-Output "HOST dns conf:`n$(Get-Content -Raw ${hostdns_conf})" } # Fetches the kubelet config from the instance metadata and puts it at # $env:KUBELET_CONFIG. function Configure-Kubelet { if (-not (ShouldWrite-File ${env:KUBELET_CONFIG})) { return } # The Kubelet config is built by build-kubelet-config() in # cluster/gce/util.sh, and stored in the metadata server under the # 'kubelet-config' key. $kubelet_config = Get-InstanceMetadataAttribute 'kubelet-config' Set-Content ${env:KUBELET_CONFIG} $kubelet_config Log-Output "Kubelet config:`n$(Get-Content -Raw ${env:KUBELET_CONFIG})" } # Sets up the kubelet and kube-proxy arguments and starts them as native # Windows services. # # Required ${kube_env} keys: # KUBELET_ARGS # KUBEPROXY_ARGS # CLUSTER_IP_RANGE function Start-WorkerServices { # Compute kubelet args $kubelet_args_str = ${kube_env}['KUBELET_ARGS'] $kubelet_args = $kubelet_args_str.Split(" ") Log-Output "kubelet_args from metadata: ${kubelet_args}" # To join GCE instances to AD, we need to shorten their names, as NetBIOS name # must be <= 15 characters, and GKE generated names are longer than that. # To perform the join in an automated way, it's preferable to apply the rename # and domain join in the GCESysprep step. However, after sysprep is complete # and the machine restarts, kubelet bootstrapping should not use the shortened # computer name, and instead use the instance's name by using --hostname-override, # otherwise kubelet and kube-proxy will not be able to run properly. $instance_name = "$(Get-InstanceMetadata 'name' | Out-String)" $default_kubelet_args = @(` "--pod-infra-container-image=${env:INFRA_CONTAINER}", "--hostname-override=${instance_name}" ) $kubelet_args = ${default_kubelet_args} + ${kubelet_args} Log-Output 'Using bootstrap kubeconfig for authentication' $kubelet_args = (${kubelet_args} + "--bootstrap-kubeconfig=${env:BOOTSTRAP_KUBECONFIG}") Log-Output "Final kubelet_args: ${kubelet_args}" # Compute kube-proxy args $kubeproxy_args_str = ${kube_env}['KUBEPROXY_ARGS'] $kubeproxy_args = $kubeproxy_args_str.Split(" ") Log-Output "kubeproxy_args from metadata: ${kubeproxy_args}" # kubeproxy is started on Linux nodes using # kube-manifests/kubernetes/gci-trusty/kube-proxy.manifest, which is # generated by start-kube-proxy in configure-helper.sh and contains e.g.: # kube-proxy --master=https://35.239.84.171 # --kubeconfig=/var/lib/kube-proxy/kubeconfig --cluster-cidr=10.64.0.0/14 # --oom-score-adj=-998 --v=2 # --iptables-sync-period=1m --iptables-min-sync-period=10s # --ipvs-sync-period=1m --ipvs-min-sync-period=10s # And also with various volumeMounts and "securityContext: privileged: true". $default_kubeproxy_args = @(` "--kubeconfig=${env:KUBEPROXY_KUBECONFIG}", "--cluster-cidr=$(${kube_env}['CLUSTER_IP_RANGE'])", "--hostname-override=${instance_name}" ) $kubeproxy_args = ${default_kubeproxy_args} + ${kubeproxy_args} Log-Output "Final kubeproxy_args: ${kubeproxy_args}" # TODO(pjh): kubelet is emitting these messages: # I1023 23:44:11.761915 2468 kubelet.go:274] Adding pod path: # C:\etc\kubernetes # I1023 23:44:11.775601 2468 file.go:68] Watching path # "C:\\etc\\kubernetes" # ... # E1023 23:44:31.794327 2468 file.go:182] Can't process manifest file # "C:\\etc\\kubernetes\\hns.psm1": C:\etc\kubernetes\hns.psm1: couldn't parse # as pod(yaml: line 10: did not find expected ), please check # config file. # # Figure out how to change the directory that the kubelet monitors for new # pod manifests. # We configure the service to restart on failure, after 10s wait. We reset # the restart count to 0 each time, so we re-use our restart/10000 action on # each failure. Note it currently restarts even when explicitly stopped, you # have to delete the service entry to *really* kill it (e.g. `sc.exe delete # kubelet`). See issue #72900. if (Get-Process | Where-Object Name -eq "kubelet") { Log-Output -Fatal ` "A kubelet process is already running, don't know what to do" } Log-Output "Creating kubelet service" & sc.exe create kubelet binPath= "${env:NODE_DIR}\kube-log-runner.exe -log-file=${env:LOGS_DIR}\kubelet.log ${env:NODE_DIR}\kubelet.exe ${kubelet_args}" start= demand & sc.exe failure kubelet reset= 0 actions= restart/10000 Log-Output "Starting kubelet service" & sc.exe start kubelet Log-Output "Waiting 10 seconds for kubelet to stabilize" Start-Sleep 10 Write-VerboseServiceInfoToConsole -Service 'kubelet' if (Get-Process | Where-Object Name -eq "kube-proxy") { Log-Output -Fatal ` "A kube-proxy process is already running, don't know what to do" } Log-Output "Creating kube-proxy service" & sc.exe create kube-proxy binPath= "${env:NODE_DIR}\kube-log-runner.exe -log-file=${env:LOGS_DIR}\kube-proxy.log ${env:NODE_DIR}\kube-proxy.exe ${kubeproxy_args}" start= demand & sc.exe failure kube-proxy reset= 0 actions= restart/10000 Log-Output "Starting kube-proxy service" & sc.exe start kube-proxy Write-VerboseServiceInfoToConsole -Service 'kube-proxy' -Delay 1 # F1020 23:08:52.000083 9136 server.go:361] unable to load in-cluster # configuration, KUBERNETES_SERVICE_HOST and KUBERNETES_SERVICE_PORT must be # defined # TODO(pjh): still getting errors like these in kube-proxy log: # E1023 04:03:58.143449 4840 reflector.go:205] k8s.io/kubernetes/pkg/client/informers/informers_generated/internalversion/factory.go:129: Failed to list *core.Endpoints: Get https://35.239.84.171/api/v1/endpoints?limit=500&resourceVersion=0: dial tcp 35.239.84.171:443: connectex: A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond. # E1023 04:03:58.150266 4840 reflector.go:205] k8s.io/kubernetes/pkg/client/informers/informers_generated/internalversion/factory.go:129: Failed to list *core.Service: Get https://35.239.84.171/api/v1/services?limit=500&resourceVersion=0: dial tcp 35.239.84.171:443: connectex: A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond. WaitFor_KubeletAndKubeProxyReady Verify_GceMetadataServerRouteIsPresent Log-Output "Kubernetes components started successfully" } # Stop and unregister both kubelet & kube-proxy services. function Unregister-WorkerServices { & sc.exe delete kube-proxy & sc.exe delete kubelet } # Wait for kubelet and kube-proxy to be ready within 10s. function WaitFor_KubeletAndKubeProxyReady { $waited = 0 $timeout = 10 while (((Get-Service kube-proxy).Status -ne 'Running' -or (Get-Service kubelet).Status -ne 'Running') -and $waited -lt $timeout) { Start-Sleep 1 $waited++ } # Timeout occurred if ($waited -ge $timeout) { Log-Output "$(Get-Service kube* | Out-String)" Throw ("Timeout while waiting ${timeout} seconds for kubelet and kube-proxy services to start") } } # Runs 'kubectl get nodes'. # Runs additional verification commands to ensure node successfully joined cluster # and that it connects to the API Server. function Verify-WorkerServices { $timeout = 12 $retries = 0 $retryDelayInSeconds = 5 Log-Output ("Testing node connection to API server...") do { $retries++ $nodes_list = & "${env:NODE_DIR}\kubectl.exe" get nodes -o=custom-columns=:.metadata.name -A | Out-String $host_status = & "${env:NODE_DIR}\kubectl.exe" get nodes (hostname) -o=custom-columns=:.status.conditions[4].type | Out-String Start-Sleep $retryDelayInSeconds } while (((-Not $nodes_list) -or (-Not $nodes_list.contains((hostname))) -or (-Not $host_status.contains("Ready")))-and ($retries -le $timeout)) If (-Not $nodes_list){ Throw ("Node: '$(hostname)' failed to connect to API server") }ElseIf (-Not $nodes_list.contains((hostname))) { Throw ("Node: '$(hostname)' failed to join the cluster; NODES: '`n $($nodes_list)'") }ELseIf (-Not $host_status.contains("Ready")) { Throw ("Node: '$(hostname)' is not in Ready state") } Log-Output ("Node: $(hostname) successfully joined cluster `n NODES: `n $($nodes_list)") Verify_GceMetadataServerRouteIsPresent } # Downloads the Windows crictl package and installs its contents (e.g. # crictl.exe) in $env:NODE_DIR. function DownloadAndInstall-Crictl { if (-not (ShouldWrite-File ${env:NODE_DIR}\crictl.exe)) { return } $CRI_TOOLS_GCS_BUCKET = 'k8s-artifacts-cri-tools' $url = ('https://storage.googleapis.com/' + $CRI_TOOLS_GCS_BUCKET + '/release/' + $CRICTL_VERSION + '/crictl-' + $CRICTL_VERSION + '-windows-amd64.tar.gz') MustDownload-File ` -URLs $url ` -OutFile ${env:NODE_DIR}\crictl.tar.gz ` -Hash $CRICTL_SHA256 ` -Algorithm SHA256 tar xzvf ${env:NODE_DIR}\crictl.tar.gz -C ${env:NODE_DIR} } # Sets crictl configuration values. function Configure-Crictl { if (${env:CONTAINER_RUNTIME_ENDPOINT}) { & "${env:NODE_DIR}\crictl.exe" config runtime-endpoint ` ${env:CONTAINER_RUNTIME_ENDPOINT} } } # Pulls the infra/pause container image onto the node so that it will be # immediately available when the kubelet tries to run pods. # TODO(pjh): downloading the container container image may take a few minutes; # figure out how to run this in the background while perform the rest of the # node startup steps! # Pull-InfraContainer must be called AFTER Verify-WorkerServices. function Pull-InfraContainer { $name, $label = ${env:INFRA_CONTAINER} -split ':',2 if (-not ("$(& crictl images)" -match "$name.*$label")) { & crictl pull ${env:INFRA_CONTAINER} if (!$?) { throw "Error running 'crictl pull ${env:INFRA_CONTAINER}'" } } $inspect = "$(& crictl inspecti ${env:INFRA_CONTAINER} | Out-String)" Log-Output "Infra/pause container:`n$inspect" } # Setup the containerd on the node. function Setup-ContainerRuntime { Install-Pigz Install_Containerd Configure_Containerd Start_Containerd } function Test-ContainersFeatureInstalled { return (Get-WindowsFeature Containers).Installed } # After this function returns, the computer must be restarted to complete # the installation! function Install-ContainersFeature { Log-Output "Installing Windows 'Containers' feature" Install-WindowsFeature Containers } # Verifies if Hyper-V should be enabled in the node function Test-ShouldEnableHyperVFeature { return "${env:WINDOWS_ENABLE_HYPERV}" -eq "true" } # Check if Hyper-V feature is enabled function Test-HyperVFeatureEnabled { return ((Get-WindowsOptionalFeature -Online -FeatureName Microsoft-Hyper-V).State -eq 'Enabled') } # After this function returns, the computer must be restarted to complete # the installation! function Enable-HyperVFeature { Log-Output "Enabling Windows 'HyperV' feature" Enable-WindowsOptionalFeature -Online -FeatureName Microsoft-Hyper-V -All -NoRestart Enable-WindowsOptionalFeature -Online -FeatureName Microsoft-Hyper-V-Management-PowerShell -All -NoRestart } # Configures the TCP/IP parameters to be in sync with the GCP recommendation. # Not setting these values correctly can cause network issues for connections # that live longer than 10 minutes. # See: https://cloud.google.com/compute/docs/troubleshooting/general-tips#idle-connections function Set-WindowsTCPParameters { Set-ItemProperty -Force -Confirm:$false -Path ` 'HKLM:\SYSTEM\CurrentControlSet\Services\Tcpip\Parameters' ` -Name 'KeepAliveInterval' -Type Dword -Value 1000 Set-ItemProperty -Force -Confirm:$false ` -Path 'HKLM:\SYSTEM\CurrentControlSet\Services\Tcpip\Parameters' ` -Name 'KeepAliveTime' -Type Dword -Value 60000 Set-ItemProperty -Force -Confirm:$false ` -Path 'HKLM:\SYSTEM\CurrentControlSet\Services\Tcpip\Parameters' ` -Name 'TcpMaxDataRetransmissions' -Type Dword -Value 10 Log-Output 'TCP/IP Parameters' Get-ItemProperty -Path 'HKLM:\SYSTEM\CurrentControlSet\Services\Tcpip\Parameters' } # Writes a CNI config file under $env:CNI_CONFIG_DIR for containerd. # # Prerequisites: # $env:POD_CIDR is set (by Set-PodCidr). # The "management" interface exists (Configure-HostNetworkingService). # The HNS network for pod networking has been configured # (Configure-HostNetworkingService). # Containerd is installed (Install_Containerd). # # Required ${kube_env} keys: # DNS_SERVER_IP # DNS_DOMAIN # SERVICE_CLUSTER_IP_RANGE function Configure_Containerd_CniNetworking { $l2bridge_conf = "${env:CNI_CONFIG_DIR}\l2bridge.conf" if (-not (ShouldWrite-File ${l2bridge_conf})) { return } $mgmt_ip = (Get_MgmtNetAdapter | Get-NetIPAddress -AddressFamily IPv4).IPAddress $pod_gateway = Get_Endpoint_Gateway_From_CIDR(${env:POD_CIDR}) # Explanation of the CNI config values: # POD_CIDR: the pod CIDR assigned to this node. # POD_GATEWAY: the gateway IP. # MGMT_IP: the IP address assigned to the node's primary network interface # (i.e. the internal IP of the GCE VM). # SERVICE_CIDR: the CIDR used for kubernetes services. # DNS_SERVER_IP: the cluster's DNS server IP address. # DNS_DOMAIN: the cluster's DNS domain, e.g. "cluster.local". # # OutBoundNAT ExceptionList: No SNAT for CIDRs in the list, the same as default GKE non-masquerade destination ranges listed at https://cloud.google.com/kubernetes-engine/docs/how-to/ip-masquerade-agent#default-non-masq-dests New-Item -Force -ItemType file ${l2bridge_conf} | Out-Null Set-Content ${l2bridge_conf} ` '{ "cniVersion": "0.2.0", "name": "l2bridge", "type": "sdnbridge", "master": "Ethernet", "capabilities": { "portMappings": true, "dns": true }, "ipam": { "subnet": "POD_CIDR", "routes": [ { "GW": "POD_GATEWAY" } ] }, "dns": { "Nameservers": [ "DNS_SERVER_IP" ], "Search": [ "DNS_DOMAIN" ] }, "AdditionalArgs": [ { "Name": "EndpointPolicy", "Value": { "Type": "OutBoundNAT", "Settings": { "Exceptions": [ "169.254.0.0/16", "10.0.0.0/8", "172.16.0.0/12", "192.168.0.0/16", "100.64.0.0/10", "192.0.0.0/24", "192.0.2.0/24", "192.88.99.0/24", "198.18.0.0/15", "198.51.100.0/24", "203.0.113.0/24", "240.0.0.0/4" ] } } }, { "Name": "EndpointPolicy", "Value": { "Type": "SDNRoute", "Settings": { "DestinationPrefix": "SERVICE_CIDR", "NeedEncap": true } } }, { "Name": "EndpointPolicy", "Value": { "Type": "SDNRoute", "Settings": { "DestinationPrefix": "MGMT_IP/32", "NeedEncap": true } } } ] }'.replace('POD_CIDR', ${env:POD_CIDR}).` replace('POD_GATEWAY', ${pod_gateway}).` replace('DNS_SERVER_IP', ${kube_env}['DNS_SERVER_IP']).` replace('DNS_DOMAIN', ${kube_env}['DNS_DOMAIN']).` replace('MGMT_IP', ${mgmt_ip}).` replace('SERVICE_CIDR', ${kube_env}['SERVICE_CLUSTER_IP_RANGE']) Log-Output "containerd CNI config:`n$(Get-Content -Raw ${l2bridge_conf})" } # Download and install containerd and CNI binaries into $env:NODE_DIR. function Install_Containerd { # Assume that presence of containerd.exe indicates that all containerd # binaries were already previously downloaded to this node. if (-not (ShouldWrite-File ${env:NODE_DIR}\containerd.exe)) { return } $tmp_dir = 'C:\containerd_tmp' New-Item $tmp_dir -ItemType 'directory' -Force | Out-Null # TODO(ibrahimab) Change this to a gcs bucket with CI maintained and accessible by community. $version = '1.6.2' $tar_url = ("https://github.com/containerd/containerd/releases/download/v${version}/" + "cri-containerd-cni-${version}-windows-amd64.tar.gz") $sha_url = $tar_url + ".sha256sum" MustDownload-File -URLs $sha_url -OutFile $tmp_dir\sha256sum $sha = $(Get-Content $tmp_dir\sha256sum).Split(" ")[0].ToUpper() MustDownload-File ` -URLs $tar_url ` -OutFile $tmp_dir\containerd.tar.gz ` -Hash $sha ` -Algorithm SHA256 tar xzvf $tmp_dir\containerd.tar.gz -C $tmp_dir Move-Item -Force $tmp_dir\cni\bin\*.exe "${env:CNI_DIR}\" Move-Item -Force $tmp_dir\*.exe "${env:NODE_DIR}\" Remove-Item -Force -Recurse $tmp_dir # Exclusion for Defender. Add-MpPreference -ExclusionProcess "${env:NODE_DIR}\containerd.exe" } # Lookup the path of containerd config if exists, else returns a default. function Get_Containerd_ConfigPath { $service = Get-WMIObject -Class Win32_Service -Filter "Name='containerd'" if (!($service -eq $null) -and $service.PathName -match ".*\s--config\s*(\S+).*" -and $matches.Count -eq 2) { return $matches[1] } else { return 'C:\Program Files\containerd\config.toml' } } # Generates the containerd config.toml file. function Configure_Containerd { $config_path = Get_Containerd_ConfigPath $config_dir = [System.IO.Path]::GetDirectoryName($config_path) New-Item $config_dir -ItemType 'directory' -Force | Out-Null Set-Content ${config_path} @" [plugins.scheduler] schedule_delay = '0s' startup_delay = '0s' [plugins.cri] sandbox_image = 'INFRA_CONTAINER_IMAGE' [plugins.cri.containerd] snapshotter = 'windows' default_runtime_name = 'runhcs-wcow-process' disable_snapshot_annotations = true discard_unpacked_layers = true [plugins.cri.cni] bin_dir = 'CNI_BIN_DIR' conf_dir = 'CNI_CONF_DIR' "@.replace('INFRA_CONTAINER_IMAGE', ${env:INFRA_CONTAINER}).` replace('CNI_BIN_DIR', "${env:CNI_DIR}").` replace('CNI_CONF_DIR', "${env:CNI_CONFIG_DIR}") } # Register if needed and start containerd service. function Start_Containerd { # Do the registration only if the containerd service does not exist. if ((Get-WMIObject -Class Win32_Service -Filter "Name='containerd'") -eq $null) { Log-Output "Creating containerd service" & containerd.exe --register-service --log-file "${env:LOGS_DIR}/containerd.log" } Log-Output "Starting containerd service" Restart-Service containerd } # Pigz Resources $PIGZ_ROOT = 'C:\pigz' $PIGZ_VERSION = '2.3.1' $PIGZ_TAR_URL = "https://storage.googleapis.com/gke-release/winnode/pigz/prod/gke_windows/pigz/release/5/20201104-134221/pigz-$PIGZ_VERSION.zip" $PIGZ_TAR_HASH = '5a6f8f5530acc85ea51797f58c1409e5af6b69e55da243ffc608784cf14fec0cd16f74cc61c564d69e1a267750aecfc1e4c53b5219ff5f893b42a7576306f34c' # Install Pigz (https://github.com/madler/pigz) into Windows for improved image # extraction performance. function Install-Pigz { if ("${env:WINDOWS_ENABLE_PIGZ}" -eq "true") { if (-not (Test-Path $PIGZ_ROOT)) { Log-Output "Installing Pigz $PIGZ_VERSION" New-Item -Path $PIGZ_ROOT -ItemType Directory MustDownload-File ` -Url $PIGZ_TAR_URL ` -OutFile "$PIGZ_ROOT\pigz-$PIGZ_VERSION.zip" ` -Hash $PIGZ_TAR_HASH ` -Algorithm SHA512 Expand-Archive -Path "$PIGZ_ROOT\pigz-$PIGZ_VERSION.zip" ` -DestinationPath $PIGZ_ROOT Remove-Item -Path "$PIGZ_ROOT\pigz-$PIGZ_VERSION.zip" # Containerd search for unpigz.exe on the first container image # pull request after the service is started. If unpigz.exe is in the # Windows path it'll use it instead of the default unzipper. # See: https://github.com/containerd/containerd/issues/1896 Add-MachineEnvironmentPath -Path $PIGZ_ROOT # Add process exclusion for Windows Defender to boost performance. Add-MpPreference -ExclusionProcess "$PIGZ_ROOT\unpigz.exe" Log-Output "Installed Pigz $PIGZ_VERSION" } else { Log-Output "Pigz already installed." } } } # Node Problem Detector Resources $NPD_SERVICE = "node-problem-detector" $DEFAULT_NPD_VERSION = '0.8.10-gke0.1' $DEFAULT_NPD_RELEASE_PATH = 'https://storage.googleapis.com/gke-release/winnode' $DEFAULT_NPD_HASH = '97ddfe3544da9e02a1cfb55d24f329eb29d606fca7fbbf800415d5de9dbc29a00563f8e0d1919595c8e316fd989d45b09b13c07be528841fc5fd37e21d016a2d' # Install Node Problem Detector (NPD). # NPD analyzes the host for problems that can disrupt workloads. # https://github.com/kubernetes/node-problem-detector function DownloadAndInstall-NodeProblemDetector { if ("${env:ENABLE_NODE_PROBLEM_DETECTOR}" -eq "standalone") { if (ShouldWrite-File "${env:NODE_DIR}\node-problem-detector.exe") { $npd_version = $DEFAULT_NPD_VERSION $npd_hash = $DEFAULT_NPD_HASH if (-not [string]::IsNullOrEmpty(${kube_env}['NODE_PROBLEM_DETECTOR_VERSION'])) { $npd_version = ${kube_env}['NODE_PROBLEM_DETECTOR_VERSION'] $npd_hash = ${kube_env}['NODE_PROBLEM_DETECTOR_TAR_HASH'] } $npd_release_path = $DEFAULT_NPD_RELEASE_PATH if (-not [string]::IsNullOrEmpty(${kube_env}['NODE_PROBLEM_DETECTOR_RELEASE_PATH'])) { $npd_release_path = ${kube_env}['NODE_PROBLEM_DETECTOR_RELEASE_PATH'] } $npd_tar = "node-problem-detector-v${npd_version}-windows_amd64.tar.gz" Log-Output "Downloading ${npd_tar}." $npd_dir = "${env:K8S_DIR}\node-problem-detector" New-Item -Path $npd_dir -ItemType Directory -Force -Confirm:$false MustDownload-File ` -URLs "${npd_release_path}/node-problem-detector/${npd_tar}" ` -Hash $npd_hash ` -Algorithm SHA512 ` -OutFile "${npd_dir}\${npd_tar}" tar xzvf "${npd_dir}\${npd_tar}" -C $npd_dir Move-Item "${npd_dir}\bin\*" "${env:NODE_DIR}\" -Force -Confirm:$false Remove-Item "${npd_dir}\bin" -Force -Confirm:$false Remove-Item "${npd_dir}\${npd_tar}" -Force -Confirm:$false } else { Log-Output "Node Problem Detector already installed." } } } # Creates the node-problem-detector user kubeconfig file at # $env:NODEPROBLEMDETECTOR_KUBECONFIG_FILE (if defined). # # Create-NodePki() must be called first. # # Required ${kube_env} keys: # CA_CERT # NODE_PROBLEM_DETECTOR_TOKEN function Create-NodeProblemDetectorKubeConfig { if (-not [string]::IsNullOrEmpty(${env:NODEPROBLEMDETECTOR_KUBECONFIG_FILE})) { Create-Kubeconfig -Name 'node-problem-detector' ` -Path ${env:NODEPROBLEMDETECTOR_KUBECONFIG_FILE} ` -Token ${kube_env}['NODE_PROBLEM_DETECTOR_TOKEN'] } } # Configures NPD to run with the bundled monitor configs and report against the Kubernetes api server. function Configure-NodeProblemDetector { $npd_bin = "${env:NODE_DIR}\node-problem-detector.exe" if ("${env:ENABLE_NODE_PROBLEM_DETECTOR}" -eq "standalone" -and (Test-Path $npd_bin)) { $npd_svc = Get-Service -Name $NPD_SERVICE -ErrorAction SilentlyContinue if ($npd_svc -eq $null) { $npd_dir = "${env:K8S_DIR}\node-problem-detector" $npd_logs_dir = "${env:LOGS_DIR}\node-problem-detector" New-Item -Path $npd_logs_dir -Type Directory -Force -Confirm:$false $flags = '' if ([string]::IsNullOrEmpty(${kube_env}['NODE_PROBLEM_DETECTOR_CUSTOM_FLAGS'])) { $system_log_monitors = @() $system_stats_monitors = @() $custom_plugin_monitors = @() # Custom Plugin Monitors $custom_plugin_monitors += @("${npd_dir}\config\windows-health-checker-kubelet.json") $custom_plugin_monitors += @("${npd_dir}\config\windows-health-checker-kubeproxy.json") $custom_plugin_monitors += @("${npd_dir}\config\windows-defender-monitor.json") # System Stats Monitors $system_stats_monitors += @("${npd_dir}\config\windows-system-stats-monitor.json") # NPD Configuration for CRI monitor $system_log_monitors += @("${npd_dir}\config\windows-containerd-monitor-filelog.json") $custom_plugin_monitors += @("${npd_dir}\config\windows-health-checker-containerd.json") $flags="--v=2 --port=20256 --log_dir=${npd_logs_dir}" if ($system_log_monitors.count -gt 0) { $flags+=" --config.system-log-monitor={0}" -f ($system_log_monitors -join ",") } if ($system_stats_monitors.count -gt 0) { $flags+=" --config.system-stats-monitor={0}" -f ($system_stats_monitors -join ",") } if ($custom_plugin_monitors.count -gt 0) { $flags+=" --config.custom-plugin-monitor={0}" -f ($custom_plugin_monitors -join ",") } } else { $flags = ${kube_env}['NODE_PROBLEM_DETECTOR_CUSTOM_FLAGS'] } $kubernetes_master_name = ${kube_env}['KUBERNETES_MASTER_NAME'] $flags = "${flags} --apiserver-override=`"https://${kubernetes_master_name}?inClusterConfig=false&auth=${env:NODEPROBLEMDETECTOR_KUBECONFIG_FILE}`"" Log-Output "Creating service: ${NPD_SERVICE}" Log-Output "${npd_bin} ${flags}" sc.exe create $NPD_SERVICE binpath= "${npd_bin} ${flags}" displayName= "Node Problem Detector" sc.exe failure $NPD_SERVICE reset= 30 actions= restart/5000 sc.exe start $NPD_SERVICE Write-VerboseServiceInfoToConsole -Service $NPD_SERVICE } else { Log-Output "${NPD_SERVICE} already configured." } } } # TODO(pjh): move the logging agent code below into a separate # module; it was put here temporarily to avoid disrupting the file layout in # the K8s release machinery. $LOGGINGAGENT_VERSION = '1.8.10' $LOGGINGAGENT_ROOT = 'C:\fluent-bit' $LOGGINGAGENT_SERVICE = 'fluent-bit' $LOGGINGAGENT_CMDLINE = '*fluent-bit.exe*' $LOGGINGEXPORTER_VERSION = 'v0.17.0' $LOGGINGEXPORTER_ROOT = 'C:\flb-exporter' $LOGGINGEXPORTER_SERVICE = 'flb-exporter' $LOGGINGEXPORTER_CMDLINE = '*flb-exporter.exe*' $LOGGINGEXPORTER_HASH = 'c808c9645d84b06b89932bd707d51a9d1d0b451b5a702a5f9b2b4462c8be6502' # Restart Logging agent or starts it if it is not currently running function Restart-LoggingAgent { if (IsStackdriverAgentInstalled) { Restart-StackdriverAgent return } Restart-LogService $LOGGINGEXPORTER_SERVICE $LOGGINGEXPORTER_CMDLINE Restart-LogService $LOGGINGAGENT_SERVICE $LOGGINGAGENT_CMDLINE } # Restarts the service, or starts it if it is not currently # running. A standard `Restart-Service` may fail because # the process is sometimes unstoppable, so this function works around it # by killing the processes. function Restart-LogService([string]$service, [string]$cmdline) { Stop-Service -NoWait -ErrorAction Ignore $service # Wait (if necessary) for service to stop. $timeout = 10 $stopped = (Get-service $service).Status -eq 'Stopped' for ($i = 0; $i -lt $timeout -and !($stopped); $i++) { Start-Sleep 1 $stopped = (Get-service $service).Status -eq 'Stopped' } if ((Get-service $service).Status -ne 'Stopped') { # Force kill the processes. Stop-Process -Force -PassThru -Id (Get-WmiObject win32_process | Where CommandLine -Like $cmdline).ProcessId # Wait until process has stopped. $waited = 0 $log_period = 10 $timeout = 60 while ((Get-service $service).Status -ne 'Stopped' -and $waited -lt $timeout) { Start-Sleep 1 $waited++ if ($waited % $log_period -eq 0) { Log-Output "Waiting for ${service} service to stop" } } # Timeout occurred if ($waited -ge $timeout) { Throw ("Timeout while waiting for ${service} service to stop") } } Start-Service $service } # Check whether the logging agent is installed by whether it's registered as service function IsLoggingAgentInstalled { $logging_status = (Get-Service $LOGGINGAGENT_SERVICE -ErrorAction Ignore).Status return -not [string]::IsNullOrEmpty($logging_status) } # Installs the logging agent according to https://docs.fluentbit.io/manual/installation/windows# # Also installs fluent bit stackdriver exporter function Install-LoggingAgent { if (IsStackdriverAgentInstalled) { # Remove the existing storage.json file if it exists. This is a workaround # for the bug where the logging agent cannot start up if the file is # corrupted. Remove-Item ` -Force ` -ErrorAction Ignore ` ("$STACKDRIVER_ROOT\LoggingAgent\Main\pos\winevtlog.pos\worker0\" + "storage.json") Log-Output ("Skip: Stackdriver logging agent is already installed") return } if (IsLoggingAgentInstalled) { # Note: we should reinstall the agent if $REDO_STEPS is true # here, but we don't know how to run the installer without it prompting # when logging agent is already installed. We dumped the strings in the # installer binary and searched for flags to do this but found nothing. Oh # well. Log-Output ("Skip: Fluentbit logging agent is already installed") return } DownloadAndInstall-LoggingAgents Create-LoggingAgentServices } function DownloadAndInstall-LoggingAgents { # Install Logging agent if not present if (ShouldWrite-File $LOGGINGAGENT_ROOT\td-agent-bit-${LOGGINGAGENT_VERSION}-win64) { $install_dir = 'C:\flb-installers' $url = ("https://storage.googleapis.com/gke-release/winnode/fluentbit/td-agent-bit-${LOGGINGAGENT_VERSION}-win64.zip") Log-Output 'Downloading Logging agent' New-Item $install_dir -ItemType 'directory' -Force | Out-Null MustDownload-File -OutFile $install_dir\td.zip -URLs $url cd $install_dir Log-Output 'Extracting Logging agent' Expand-Archive td.zip mv .\td\td-agent-bit-${LOGGINGAGENT_VERSION}-win64\ $LOGGINGAGENT_ROOT cd C:\ Remove-Item -Force -Recurse $install_dir } # Download Logging exporter if needed if (ShouldWrite-File $LOGGINGEXPORTER_ROOT\flb-exporter.exe) { $url = ("https://storage.googleapis.com/gke-release/winnode/fluentbit-exporter/${LOGGINGEXPORTER_VERSION}/flb-exporter-${LOGGINGEXPORTER_VERSION}.exe") Log-Output 'Downloading logging exporter' New-Item $LOGGINGEXPORTER_ROOT -ItemType 'directory' -Force | Out-Null MustDownload-File ` -OutFile $LOGGINGEXPORTER_ROOT\flb-exporter.exe ` -URLs $url ` -Hash $LOGGINGEXPORTER_HASH ` -Algorithm SHA256 } } function Create-LoggingAgentServices { cd $LOGGINGAGENT_ROOT Log-Output "Creating service: ${LOGGINGAGENT_SERVICE}" sc.exe create $LOGGINGAGENT_SERVICE binpath= "${LOGGINGAGENT_ROOT}\bin\fluent-bit.exe -c \fluent-bit\conf\fluent-bit.conf" sc.exe failure $LOGGINGAGENT_SERVICE reset= 30 actions= restart/5000 Write-VerboseServiceInfoToConsole -Service $LOGGINGAGENT_SERVICE Log-Output "Creating service: ${LOGGINGEXPORTER_SERVICE}" sc.exe create $LOGGINGEXPORTER_SERVICE binpath= "${LOGGINGEXPORTER_ROOT}\flb-exporter.exe --kubernetes-separator=_ --stackdriver-resource-model=k8s --enable-pod-label-discovery --logtostderr --winsvc --pod-label-dot-replacement=_" sc.exe failure $LOGGINGEXPORTER_SERVICE reset= 30 actions= restart/5000 Write-VerboseServiceInfoToConsole -Service $LOGGINGEXPORTER_SERVICE } # Writes the logging configuration file for Logging agent. Restart-LoggingAgent # should then be called to pick up the new configuration. function Configure-LoggingAgent { if (IsStackdriverAgentInstalled) { Configure-StackdriverAgent return } $fluentbit_config_file = "$LOGGINGAGENT_ROOT\conf\fluent-bit.conf" $FLUENTBIT_CONFIG | Out-File -FilePath $fluentbit_config_file -Encoding ASCII Log-Output "Wrote logging config to $fluentbit_config_file" $fluentbit_parser_file = "$LOGGINGAGENT_ROOT\conf\parsers.conf" $PARSERS_CONFIG | Out-File -FilePath $fluentbit_parser_file -Encoding ASCII # Create directory for all the log position files. New-Item -Type Directory -Path "/var/run/google-fluentbit/pos-files/" -Force | Out-Null Log-Output "Wrote logging config to $fluentbit_parser_file" } # Fluentbit main config file $FLUENTBIT_CONFIG = @' [SERVICE] Flush 5 Grace 120 Log_Level info Log_File /var/log/fluentbit.log Daemon off Parsers_File parsers.conf HTTP_Server off HTTP_Listen 0.0.0.0 HTTP_PORT 2020 plugins_file plugins.conf # Storage # ======= # Fluent Bit can use memory and filesystem buffering based mechanisms # # - https://docs.fluentbit.io/manual/administration/buffering-and-storage # # storage metrics # --------------- # publish storage pipeline metrics in '/api/v1/storage'. The metrics are # exported only if the 'http_server' option is enabled. # # storage.metrics on # storage.path # ------------ # absolute file system path to store filesystem data buffers (chunks). # # storage.path /tmp/storage # storage.sync # ------------ # configure the synchronization mode used to store the data into the # filesystem. It can take the values normal or full. # # storage.sync normal # storage.checksum # ---------------- # enable the data integrity check when writing and reading data from the # filesystem. The storage layer uses the CRC32 algorithm. # # storage.checksum off # storage.backlog.mem_limit # ------------------------- # if storage.path is set, Fluent Bit will look for data chunks that were # not delivered and are still in the storage layer, these are called # backlog data. This option configure a hint of maximum value of memory # to use when processing these records. # # storage.backlog.mem_limit 5M [INPUT] Name winlog Interval_Sec 2 # Channels Setup,Windows PowerShell Channels application,system,security Tag winevt.raw DB /var/run/google-fluentbit/pos-files/winlog.db # Json Log Example: # {"log":"[info:2016-02-16T16:04:05.930-08:00] Some log text here\n","stream":"stdout","time":"2016-02-17T00:04:05.931087621Z"} [INPUT] Name tail Alias kube_containers Tag kube___ Tag_Regex (?[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*)_(?[^_]+)_(?.+)- Mem_Buf_Limit 5MB Skip_Long_Lines On Refresh_Interval 5 Path C:\var\log\containers\*.log DB /var/run/google-fluentbit/pos-files/flb_kube.db [FILTER] Name parser Match kube_* Key_Name log Reserve_Data True Parser docker Parser containerd # Log line format: [IWEF]mmdd hh:mm:ss.uuuuuu threadid file:line] msg # Example: # I0716 02:08:55.559351 3356 log_spam.go:42] Command line arguments: [INPUT] Name tail Alias node-problem-detector Tag node-problem-detector Mem_Buf_Limit 5MB Skip_Long_Lines On Refresh_Interval 5 Path C:\etc\kubernetes\logs\node-problem-detector\*.log.INFO* DB /var/run/google-fluentbit/pos-files/node-problem-detector.db Multiline On Parser_Firstline glog # Example: # I0928 03:15:50.440223 4880 main.go:51] Starting CSI-Proxy Server ... [INPUT] Name tail Alias csi-proxy Tag csi-proxy Mem_Buf_Limit 5MB Skip_Long_Lines On Refresh_Interval 5 Path /etc/kubernetes/logs/csi-proxy.log DB /var/run/google-fluentbit/pos-files/csi-proxy.db Multiline On Parser_Firstline glog # I1118 21:26:53.975789 6 proxier.go:1096] Port "nodePort for kube-system/default-http-backend:http" (:31429/tcp) was open before and is still needed [INPUT] Name tail Alias kube-proxy Tag kube-proxy Mem_Buf_Limit 5MB Skip_Long_Lines On Refresh_Interval 5 Path /etc/kubernetes/logs/kube-proxy.log DB /var/run/google-fluentbit/pos-files/kube-proxy.db Multiline On Parser_Firstline glog # Example: # time="2019-12-10T21:27:59.836946700Z" level=info msg="loading plugin \"io.containerd.grpc.v1.cri\"..." type=io.containerd.grpc.v1 [INPUT] Name tail Alias container-runtime Tag container-runtime Mem_Buf_Limit 5MB Skip_Long_Lines On Refresh_Interval 5 Path /etc/kubernetes/logs/containerd.log DB /var/run/google-fluentbit/pos-files/container-runtime.db # TODO: Add custom parser for containerd logs once format is settled. # Example: # I0204 07:32:30.020537 3368 server.go:1048] POST /stats/container/: (13.972191ms) 200 [[Go-http-client/1.1] 10.244.1.3:40537] [INPUT] Name tail Alias kubelet Tag kubelet Mem_Buf_Limit 5MB Skip_Long_Lines On Refresh_Interval 5 Path /etc/kubernetes/logs/kubelet.log DB /var/run/google-fluentbit/pos-files/kubelet.db Multiline On Parser_Firstline glog [FILTER] Name modify Match * Hard_rename log message [FILTER] Name modify Match winevt.raw Hard_rename Message message [FILTER] Name parser Match kube_* Key_Name message Reserve_Data True Parser glog Parser json [OUTPUT] Name http Match * Host 127.0.0.1 Port 2021 URI /logs header_tag FLUENT-TAG Format msgpack Retry_Limit 2 '@ # Fluentbit parsers config file $PARSERS_CONFIG = @' [PARSER] Name docker Format json Time_Key time Time_Format %Y-%m-%dT%H:%M:%S.%L%z [PARSER] Name containerd Format regex Regex ^(?