...

Text file src/k8s.io/kubernetes/cluster/gce/windows/k8s-node-setup.psm1

Documentation: k8s.io/kubernetes/cluster/gce/windows

     1# Copyright 2019 The Kubernetes Authors.
     2#
     3# Licensed under the Apache License, Version 2.0 (the "License");
     4# you may not use this file except in compliance with the License.
     5# You may obtain a copy of the License at
     6#
     7#     http://www.apache.org/licenses/LICENSE-2.0
     8#
     9# Unless required by applicable law or agreed to in writing, software
    10# distributed under the License is distributed on an "AS IS" BASIS,
    11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12# See the License for the specific language governing permissions and
    13# limitations under the License.
    14
    15<#
    16.SYNOPSIS
    17  Library for configuring Windows nodes and joining them to the cluster.
    18
    19.NOTES
    20  This module depends on common.psm1.
    21
    22  Some portions copied / adapted from
    23  https://github.com/Microsoft/SDN/blob/master/Kubernetes/windows/start-kubelet.ps1.
    24
    25.EXAMPLE
    26  Suggested usage for dev/test:
    27    [Net.ServicePointManager]::SecurityProtocol = `
    28        [Net.SecurityProtocolType]::Tls12
    29    Invoke-WebRequest `
    30        https://github.com/kubernetes/kubernetes/raw/master/cluster/gce/windows/k8s-node-setup.psm1 `
    31        -OutFile C:\k8s-node-setup.psm1
    32    Invoke-WebRequest `
    33        https://github.com/kubernetes/kubernetes/raw/master/cluster/gce/windows/configure.ps1 `
    34        -OutFile C:\configure.ps1
    35    Import-Module -Force C:\k8s-node-setup.psm1  # -Force to override existing
    36    # Execute functions manually or run configure.ps1.
    37#>
    38
    39# IMPORTANT PLEASE NOTE:
    40# Any time the file structure in the `windows` directory changes, `windows/BUILD`
    41# and `k8s.io/release/lib/releaselib.sh` must be manually updated with the changes.
    42# We HIGHLY recommend not changing the file structure, because consumers of
    43# Kubernetes releases depend on the release structure remaining stable.
    44
    45# TODO: update scripts for these style guidelines:
    46#  - Remove {} around variable references unless actually needed for clarity.
    47#  - Always use single-quoted strings unless actually interpolating variables
    48#    or using escape characters.
    49#  - Use "approved verbs":
    50#    https://docs.microsoft.com/en-us/powershell/developer/cmdlet/approved-verbs-for-windows-powershell-commands
    51#  - Document functions using proper syntax:
    52#    https://technet.microsoft.com/en-us/library/hh847834(v=wps.620).aspx
    53
    54$GCE_METADATA_SERVER = "169.254.169.254"
    55# The "management" interface is used by the kubelet and by Windows pods to talk
    56# to the rest of the Kubernetes cluster *without NAT*. This interface does not
    57# exist until an initial HNS network has been created on the Windows node - see
    58# Add_InitialHnsNetwork().
    59$MGMT_ADAPTER_NAME = "vEthernet (Ethernet*"
    60$CRICTL_VERSION = 'v1.29.0'
    61$CRICTL_SHA256 = '9b679305cb05f73e9e4868056e7d48805c47e24d2d38849e64395ff54cf5c701'
    62
    63Import-Module -Force C:\common.psm1
    64
    65# Writes a TODO with $Message to the console.
    66function Log_Todo {
    67  param (
    68    [parameter(Mandatory=$true)] [string]$Message
    69  )
    70  Log-Output "TODO: ${Message}"
    71}
    72
    73# Writes a not-implemented warning with $Message to the console and exits the
    74# script.
    75function Log_NotImplemented {
    76  param (
    77    [parameter(Mandatory=$true)] [string]$Message
    78  )
    79  Log-Output "Not implemented yet: ${Message}" -Fatal
    80}
    81
    82# Fails and exits if the route to the GCE metadata server is not present,
    83# otherwise does nothing and emits nothing.
    84function Verify_GceMetadataServerRouteIsPresent {
    85  Try {
    86    Get-NetRoute `
    87        -ErrorAction "Stop" `
    88        -AddressFamily IPv4 `
    89        -DestinationPrefix ${GCE_METADATA_SERVER}/32 | Out-Null
    90  } Catch [Microsoft.PowerShell.Cmdletization.Cim.CimJobException] {
    91    Log-Output -Fatal `
    92        ("GCE metadata server route is not present as expected.`n" +
    93         "$(Get-NetRoute -AddressFamily IPv4 | Out-String)")
    94  }
    95}
    96
    97# Checks if the route to the GCE metadata server is present. Returns when the
    98# route is NOT present or after a timeout has expired.
    99function WaitFor_GceMetadataServerRouteToBeRemoved {
   100  $elapsed = 0
   101  $timeout = 60
   102  Log-Output ("Waiting up to ${timeout} seconds for GCE metadata server " +
   103              "route to be removed")
   104  while (${elapsed} -lt ${timeout}) {
   105    Try {
   106      Get-NetRoute `
   107          -ErrorAction "Stop" `
   108          -AddressFamily IPv4 `
   109          -DestinationPrefix ${GCE_METADATA_SERVER}/32 | Out-Null
   110    } Catch [Microsoft.PowerShell.Cmdletization.Cim.CimJobException] {
   111      break
   112    }
   113    $sleeptime = 2
   114    Start-Sleep ${sleeptime}
   115    ${elapsed} += ${sleeptime}
   116  }
   117}
   118
   119# Adds a route to the GCE metadata server to every network interface.
   120function Add_GceMetadataServerRoute {
   121  # Before setting up HNS the Windows VM has a "vEthernet (nat)" interface and
   122  # a "Ethernet" interface, and the route to the metadata server exists on the
   123  # Ethernet interface. After adding the HNS network a "vEthernet (Ethernet)"
   124  # interface is added, and it seems to subsume the routes of the "Ethernet"
   125  # interface (trying to add routes on the Ethernet interface at this point just
   126  # results in "New-NetRoute : Element not found" errors). I don't know what's
   127  # up with that, but since it's hard to know what's the right thing to do here
   128  # we just try to add the route on all of the network adapters.
   129  Get-NetAdapter | ForEach-Object {
   130    $adapter_index = $_.InterfaceIndex
   131    New-NetRoute `
   132        -ErrorAction Ignore `
   133        -DestinationPrefix "${GCE_METADATA_SERVER}/32" `
   134        -InterfaceIndex ${adapter_index} | Out-Null
   135  }
   136}
   137
   138# Returns a PowerShell object representing the Windows version.
   139function Get_WindowsVersion {
   140  # Unlike checking `[System.Environment]::OSVersion.Version`, this long-winded
   141  # approach gets the OS revision/patch number correctly
   142  # (https://superuser.com/a/1160428/652018).
   143  $win_ver = New-Object -TypeName PSObject
   144  $win_ver | Add-Member -MemberType NoteProperty -Name Major -Value $(Get-ItemProperty -Path 'Registry::HKEY_LOCAL_MACHINE\Software\Microsoft\Windows NT\CurrentVersion' CurrentMajorVersionNumber).CurrentMajorVersionNumber
   145  $win_ver | Add-Member -MemberType NoteProperty -Name Minor -Value $(Get-ItemProperty -Path 'Registry::HKEY_LOCAL_MACHINE\Software\Microsoft\Windows NT\CurrentVersion' CurrentMinorVersionNumber).CurrentMinorVersionNumber
   146  $win_ver | Add-Member -MemberType NoteProperty -Name Build -Value $(Get-ItemProperty -Path 'Registry::HKEY_LOCAL_MACHINE\Software\Microsoft\Windows NT\CurrentVersion' CurrentBuild).CurrentBuild
   147  $win_ver | Add-Member -MemberType NoteProperty -Name Revision -Value $(Get-ItemProperty -Path 'Registry::HKEY_LOCAL_MACHINE\Software\Microsoft\Windows NT\CurrentVersion' UBR).UBR
   148  return $win_ver
   149}
   150
   151# Writes debugging information, such as Windows version and patch info, to the
   152# console.
   153function Dump-DebugInfoToConsole {
   154  Try {
   155    $version = Get_WindowsVersion | Out-String
   156    $hotfixes = "$(Get-Hotfix | Out-String)"
   157    $image = "$(Get-InstanceMetadata 'image' | Out-String)"
   158    Log-Output "Windows version:`n$version"
   159    Log-Output "Installed hotfixes:`n$hotfixes"
   160    Log-Output "GCE Windows image:`n$image"
   161  } Catch { }
   162}
   163
   164# Configures Window Defender preferences
   165function Configure-WindowsDefender {
   166  if ((Get-WindowsFeature -Name 'Windows-Defender').Installed) {
   167    Log-Output "Configuring Windows Defender preferences"
   168    Set-MpPreference -SubmitSamplesConsent NeverSend
   169    Log-Output "Disabling Windows Defender sample submission"
   170    Set-MpPreference -MAPSReporting Disabled
   171    Log-Output "Disabling Windows Defender Microsoft Active Protection Service Reporting"
   172
   173    Log-Output "Defender Preferences"
   174    Get-MpPreference
   175  }
   176}
   177
   178# Converts the kube-env string in Yaml
   179#
   180# Returns: a PowerShell Hashtable object containing the key-value pairs from
   181#   kube-env.
   182function ConvertFrom_Yaml_KubeEnv {
   183  param (
   184    [parameter(Mandatory=$true)] [string]$kube_env_str
   185  )
   186  $kube_env_table = @{}
   187  $currentLine = $null
   188  switch -regex (${kube_env_str} -split '\r?\n') {
   189      '^(\S.*)' {
   190          # record start pattern, line that doesn't start with a whitespace
   191          if ($null -ne $currentLine) {
   192              $key, $val = $currentLine -split ":",2
   193              $kube_env_table[$key] = $val.Trim("'", " ", "`"")
   194          }
   195          $currentLine = $matches.1
   196          continue
   197      }
   198
   199      '^(\s+.*)' {
   200          # line that start with whitespace
   201          $currentLine += $matches.1
   202          continue
   203      }
   204  }
   205
   206  # Handle the last line if any
   207  if ($currentLine) {
   208      $key, $val = $currentLine -split ":",2
   209      $kube_env_table[$key] = $val.Trim("'", " ", "`"")
   210  }
   211
   212  return ${kube_env_table}
   213}
   214
   215# Fetches the kube-env from the instance metadata.
   216#
   217# Returns: a PowerShell Hashtable object containing the key-value pairs from
   218#   kube-env.
   219function Fetch-KubeEnv {
   220  # Testing / debugging:
   221  # First:
   222  #   ${kube_env} = Get-InstanceMetadataAttribute 'kube-env'
   223  # or:
   224  #   ${kube_env} = [IO.File]::ReadAllText(".\kubeEnv.txt")
   225  # ${kube_env_table} = ConvertFrom_Yaml_KubeEnv ${kube_env}
   226  # ${kube_env_table}
   227  # ${kube_env_table}.GetType()
   228
   229  # The type of kube_env is a powershell String.
   230  $kube_env = Get-InstanceMetadataAttribute 'kube-env'
   231  $kube_env_table = ConvertFrom_Yaml_KubeEnv ${kube_env}
   232
   233  Log-Output "Logging kube-env key-value pairs except CERT and KEY values"
   234  foreach ($entry in $kube_env_table.GetEnumerator()) {
   235    if ((-not ($entry.Name.contains("CERT"))) -and (-not ($entry.Name.contains("KEY")))) {
   236      Log-Output "$($entry.Name): $($entry.Value)"
   237    }
   238  }
   239  return ${kube_env_table}
   240}
   241
   242# Sets the environment variable $Key to $Value at the Machine scope (will
   243# be present in the environment for all new shells after a reboot).
   244function Set_MachineEnvironmentVar {
   245  param (
   246    [parameter(Mandatory=$true)] [string]$Key,
   247    [parameter(Mandatory=$true)] [AllowEmptyString()] [string]$Value
   248  )
   249  [Environment]::SetEnvironmentVariable($Key, $Value, "Machine")
   250}
   251
   252# Sets the environment variable $Key to $Value in the current shell.
   253function Set_CurrentShellEnvironmentVar {
   254  param (
   255    [parameter(Mandatory=$true)] [string]$Key,
   256    [parameter(Mandatory=$true)] [AllowEmptyString()] [string]$Value
   257  )
   258  $expression = '$env:' + $Key + ' = "' + $Value + '"'
   259  Invoke-Expression ${expression}
   260}
   261
   262# Sets environment variables used by Kubernetes binaries and by other functions
   263# in this module. Depends on numerous ${kube_env} keys.
   264function Set-EnvironmentVars {
   265  if ($kube_env.ContainsKey('WINDOWS_CONTAINER_RUNTIME_ENDPOINT')) {
   266      $container_runtime_endpoint = ${kube_env}['WINDOWS_CONTAINER_RUNTIME_ENDPOINT']
   267  } else {
   268      Log-Output "ERROR: WINDOWS_CONTAINER_RUNTIME_ENDPOINT not set in kube-env, falling back in CONTAINER_RUNTIME_ENDPOINT"
   269      $container_runtime_endpoint = ${kube_env}['CONTAINER_RUNTIME_ENDPOINT']
   270  }
   271  # Turning the kube-env values into environment variables is not required but
   272  # it makes debugging this script easier, and it also makes the syntax a lot
   273  # easier (${env:K8S_DIR} can be expanded within a string but
   274  # ${kube_env}['K8S_DIR'] cannot be afaik).
   275  $env_vars = @{
   276    "K8S_DIR" = ${kube_env}['K8S_DIR']
   277    # Typically 'C:\etc\kubernetes\node\bin' (not just 'C:\etc\kubernetes\node')
   278    "NODE_DIR" = ${kube_env}['NODE_DIR']
   279    "CNI_DIR" = ${kube_env}['CNI_DIR']
   280    "CNI_CONFIG_DIR" = ${kube_env}['CNI_CONFIG_DIR']
   281    "WINDOWS_CNI_STORAGE_PATH" = ${kube_env}['WINDOWS_CNI_STORAGE_PATH']
   282    "WINDOWS_CNI_VERSION" = ${kube_env}['WINDOWS_CNI_VERSION']
   283    "CSI_PROXY_STORAGE_PATH" = ${kube_env}['CSI_PROXY_STORAGE_PATH']
   284    "CSI_PROXY_VERSION" = ${kube_env}['CSI_PROXY_VERSION']
   285    "CSI_PROXY_FLAGS" = ${kube_env}['CSI_PROXY_FLAGS']
   286    "ENABLE_CSI_PROXY" = ${kube_env}['ENABLE_CSI_PROXY']
   287    "PKI_DIR" = ${kube_env}['PKI_DIR']
   288    "CA_FILE_PATH" = ${kube_env}['CA_FILE_PATH']
   289    "KUBELET_CONFIG" = ${kube_env}['KUBELET_CONFIG_FILE']
   290    "BOOTSTRAP_KUBECONFIG" = ${kube_env}['BOOTSTRAP_KUBECONFIG_FILE']
   291    "KUBECONFIG" = ${kube_env}['KUBECONFIG_FILE']
   292    "KUBEPROXY_KUBECONFIG" = ${kube_env}['KUBEPROXY_KUBECONFIG_FILE']
   293    "LOGS_DIR" = ${kube_env}['LOGS_DIR']
   294    "MANIFESTS_DIR" = ${kube_env}['MANIFESTS_DIR']
   295    "INFRA_CONTAINER" = ${kube_env}['WINDOWS_INFRA_CONTAINER']
   296    "WINDOWS_ENABLE_PIGZ" = ${kube_env}['WINDOWS_ENABLE_PIGZ']
   297    "WINDOWS_ENABLE_HYPERV" = ${kube_env}['WINDOWS_ENABLE_HYPERV']
   298    "ENABLE_NODE_PROBLEM_DETECTOR" = ${kube_env}['ENABLE_NODE_PROBLEM_DETECTOR']
   299    "NODEPROBLEMDETECTOR_KUBECONFIG_FILE" = ${kube_env}['WINDOWS_NODEPROBLEMDETECTOR_KUBECONFIG_FILE']
   300    "ENABLE_AUTH_PROVIDER_GCP" = ${kube_env}['ENABLE_AUTH_PROVIDER_GCP']
   301    "AUTH_PROVIDER_GCP_STORAGE_PATH" = ${kube_env}['AUTH_PROVIDER_GCP_STORAGE_PATH']
   302    "AUTH_PROVIDER_GCP_VERSION" = ${kube_env}['AUTH_PROVIDER_GCP_VERSION']
   303    "AUTH_PROVIDER_GCP_HASH_WINDOWS_AMD64" = ${kube_env}['AUTH_PROVIDER_GCP_HASH_WINDOWS_AMD64']
   304    "AUTH_PROVIDER_GCP_WINDOWS_BIN_DIR" = ${kube_env}['AUTH_PROVIDER_GCP_WINDOWS_BIN_DIR']
   305    "AUTH_PROVIDER_GCP_WINDOWS_CONF_FILE" = ${kube_env}['AUTH_PROVIDER_GCP_WINDOWS_CONF_FILE']
   306
   307    "Path" = ${env:Path} + ";" + ${kube_env}['NODE_DIR']
   308    "KUBE_NETWORK" = "l2bridge".ToLower()
   309    "KUBELET_CERT_PATH" = ${kube_env}['PKI_DIR'] + '\kubelet.crt'
   310    "KUBELET_KEY_PATH" = ${kube_env}['PKI_DIR'] + '\kubelet.key'
   311
   312    "CONTAINER_RUNTIME_ENDPOINT" = $container_runtime_endpoint
   313
   314    'LICENSE_DIR' = 'C:\Program Files\Google\Compute Engine\THIRD_PARTY_NOTICES'
   315  }
   316
   317  # Set the environment variables in two ways: permanently on the machine (only
   318  # takes effect after a reboot), and in the current shell.
   319  $env_vars.GetEnumerator() | ForEach-Object{
   320    $message = "Setting environment variable: " + $_.key + " = " + $_.value
   321    Log-Output ${message}
   322    Set_MachineEnvironmentVar $_.key $_.value
   323    Set_CurrentShellEnvironmentVar $_.key $_.value
   324  }
   325}
   326
   327# Configures various settings and prerequisites needed for the rest of the
   328# functions in this module and the Kubernetes binaries to operate properly.
   329function Set-PrerequisiteOptions {
   330  # Windows updates cause the node to reboot at arbitrary times.
   331  Log-Output "Disabling Windows Update service"
   332  & sc.exe config wuauserv start=disabled
   333  & sc.exe stop wuauserv
   334  Write-VerboseServiceInfoToConsole -Service 'wuauserv' -Delay 1
   335
   336  # Use TLS 1.2: needed for Invoke-WebRequest downloads from github.com.
   337  [Net.ServicePointManager]::SecurityProtocol = `
   338      [Net.SecurityProtocolType]::Tls12
   339
   340  Configure-WindowsDefender
   341}
   342
   343# Creates directories where other functions in this module will read and write
   344# data.
   345# Note: C:\tmp is required for running certain kubernetes tests.
   346#       C:\var\log is used by kubelet to stored container logs and also
   347#       hard-coded in the fluentd/stackdriver config for log collection.
   348function Create-Directories {
   349  Log-Output "Creating ${env:K8S_DIR} and its subdirectories."
   350  ForEach ($dir in ("${env:K8S_DIR}", "${env:NODE_DIR}", "${env:LOGS_DIR}",
   351    "${env:CNI_DIR}", "${env:CNI_CONFIG_DIR}", "${env:MANIFESTS_DIR}",
   352    "${env:PKI_DIR}", "${env:LICENSE_DIR}"), "C:\tmp", "C:\var\log") {
   353    mkdir -Force $dir
   354  }
   355}
   356
   357# Downloads some external helper scripts needed by other functions in this
   358# module.
   359function Download-HelperScripts {
   360  if (ShouldWrite-File ${env:K8S_DIR}\hns.psm1) {
   361    MustDownload-File `
   362        -OutFile ${env:K8S_DIR}\hns.psm1 `
   363        -URLs 'https://storage.googleapis.com/gke-release/winnode/config/sdn/master/hns.psm1'
   364  }
   365}
   366
   367# Downloads the Kubernetes binaries from kube-env's NODE_BINARY_TAR_URL and
   368# puts them in a subdirectory of $env:K8S_DIR.
   369#
   370# Required ${kube_env} keys:
   371#   NODE_BINARY_TAR_URL
   372function DownloadAndInstall-KubernetesBinaries {
   373  # Assume that presence of kubelet.exe indicates that the kubernetes binaries
   374  # were already previously downloaded to this node.
   375  if (-not (ShouldWrite-File ${env:NODE_DIR}\kubelet.exe)) {
   376    return
   377  }
   378
   379  $tmp_dir = 'C:\k8s_tmp'
   380  New-Item -Force -ItemType 'directory' $tmp_dir | Out-Null
   381
   382  $urls = ${kube_env}['NODE_BINARY_TAR_URL'].Split(",")
   383  $filename = Split-Path -leaf $urls[0]
   384  $hash = $null
   385  if ($kube_env.ContainsKey('NODE_BINARY_TAR_HASH')) {
   386    $hash = ${kube_env}['NODE_BINARY_TAR_HASH']
   387  }
   388  MustDownload-File -Hash $hash -OutFile $tmp_dir\$filename -URLs $urls
   389
   390  tar xzvf $tmp_dir\$filename -C $tmp_dir
   391  Move-Item -Force $tmp_dir\kubernetes\node\bin\* ${env:NODE_DIR}\
   392  Move-Item -Force `
   393      $tmp_dir\kubernetes\LICENSES ${env:LICENSE_DIR}\LICENSES_kubernetes
   394
   395  # Clean up the temporary directory
   396  Remove-Item -Force -Recurse $tmp_dir
   397}
   398
   399# Downloads the csi-proxy binaries from kube-env's CSI_PROXY_STORAGE_PATH and
   400# CSI_PROXY_VERSION, and then puts them in a subdirectory of $env:NODE_DIR.
   401# Note: for now the installation is skipped for non-test clusters. Will be
   402# installed for all cluster after tests pass.
   403# Required ${kube_env} keys:
   404#   CSI_PROXY_STORAGE_PATH and CSI_PROXY_VERSION
   405function DownloadAndInstall-CSIProxyBinaries {
   406  if ("${env:ENABLE_CSI_PROXY}" -eq "true") {
   407    if (ShouldWrite-File ${env:NODE_DIR}\csi-proxy.exe) {
   408      $tmp_dir = 'C:\k8s_tmp'
   409      New-Item -Force -ItemType 'directory' $tmp_dir | Out-Null
   410      $filename = 'csi-proxy.exe'
   411      $urls = "${env:CSI_PROXY_STORAGE_PATH}/${env:CSI_PROXY_VERSION}/$filename"
   412      MustDownload-File -OutFile $tmp_dir\$filename -URLs $urls
   413      Move-Item -Force $tmp_dir\$filename ${env:NODE_DIR}\$filename
   414      # Clean up the temporary directory
   415      Remove-Item -Force -Recurse $tmp_dir
   416    }
   417  }
   418}
   419
   420function Start-CSIProxy {
   421  if ("${env:ENABLE_CSI_PROXY}" -eq "true") {
   422    Log-Output "Creating CSI Proxy Service"
   423    $flags = "-windows-service -log_file=${env:LOGS_DIR}\csi-proxy.log -logtostderr=false ${env:CSI_PROXY_FLAGS}"
   424    & sc.exe create csiproxy binPath= "${env:NODE_DIR}\csi-proxy.exe $flags"
   425    & sc.exe failure csiproxy reset= 0 actions= restart/10000
   426    Log-Output "Starting CSI Proxy Service"
   427    & sc.exe start csiproxy
   428    Write-VerboseServiceInfoToConsole -Service 'csiproxy' -Delay 1
   429  }
   430}
   431
   432# TODO(pjh): this is copied from
   433# https://github.com/Microsoft/SDN/blob/master/Kubernetes/windows/start-kubelet.ps1#L98.
   434# See if there's a way to fetch or construct the "management subnet" so that
   435# this is not needed.
   436function ConvertTo_DecimalIP
   437{
   438  param(
   439    [parameter(Mandatory = $true, Position = 0)]
   440    [Net.IPAddress] $IPAddress
   441  )
   442
   443  $i = 3; $decimal_ip = 0;
   444  $IPAddress.GetAddressBytes() | % {
   445    $decimal_ip += $_ * [Math]::Pow(256, $i); $i--
   446  }
   447  return [UInt32]$decimal_ip
   448}
   449
   450# TODO(pjh): this is copied from
   451# https://github.com/Microsoft/SDN/blob/master/Kubernetes/windows/start-kubelet.ps1#L98.
   452# See if there's a way to fetch or construct the "management subnet" so that
   453# this is not needed.
   454function ConvertTo_DottedDecimalIP
   455{
   456  param(
   457    [parameter(Mandatory = $true, Position = 0)]
   458    [Uint32] $IPAddress
   459  )
   460
   461  $dotted_ip = $(for ($i = 3; $i -gt -1; $i--) {
   462    $remainder = $IPAddress % [Math]::Pow(256, $i)
   463    ($IPAddress - $remainder) / [Math]::Pow(256, $i)
   464    $IPAddress = $remainder
   465  })
   466  return [String]::Join(".", $dotted_ip)
   467}
   468
   469# TODO(pjh): this is copied from
   470# https://github.com/Microsoft/SDN/blob/master/Kubernetes/windows/start-kubelet.ps1#L98.
   471# See if there's a way to fetch or construct the "management subnet" so that
   472# this is not needed.
   473function ConvertTo_MaskLength
   474{
   475  param(
   476    [parameter(Mandatory = $True, Position = 0)]
   477    [Net.IPAddress] $SubnetMask
   478  )
   479
   480  $bits = "$($SubnetMask.GetAddressBytes() | % {
   481    [Convert]::ToString($_, 2)
   482  } )" -replace "[\s0]"
   483  return $bits.Length
   484}
   485
   486# Returns a network adapter object for the "management" interface via which the
   487# Windows pods+kubelet will communicate with the rest of the Kubernetes cluster.
   488#
   489# This function will fail if Add_InitialHnsNetwork() has not been called first.
   490function Get_MgmtNetAdapter {
   491  $net_adapter = Get-NetAdapter | Where-Object Name -like ${MGMT_ADAPTER_NAME}
   492  if (-not ${net_adapter}) {
   493    Throw ("Failed to find a suitable network adapter, check your network " +
   494           "settings.")
   495  }
   496
   497  return $net_adapter
   498}
   499
   500# Decodes the base64 $Data string and writes it as binary to $File. Does
   501# nothing if $File already exists and $REDO_STEPS is not set.
   502function Write_PkiData {
   503  param (
   504    [parameter(Mandatory=$true)] [string] $Data,
   505    [parameter(Mandatory=$true)] [string] $File
   506  )
   507
   508  if (-not (ShouldWrite-File $File)) {
   509    return
   510  }
   511
   512  # This command writes out a PEM certificate file, analogous to "base64
   513  # --decode" on Linux. See https://stackoverflow.com/a/51914136/1230197.
   514  [IO.File]::WriteAllBytes($File, [Convert]::FromBase64String($Data))
   515  Log_Todo ("need to set permissions correctly on ${File}; not sure what the " +
   516            "Windows equivalent of 'umask 077' is")
   517  # Linux: owned by root, rw by user only.
   518  #   -rw------- 1 root root 1.2K Oct 12 00:56 ca-certificates.crt
   519  #   -rw------- 1 root root 1.3K Oct 12 00:56 kubelet.crt
   520  #   -rw------- 1 root root 1.7K Oct 12 00:56 kubelet.key
   521  # Windows:
   522  #   https://docs.microsoft.com/en-us/dotnet/api/system.io.fileattributes
   523  #   https://docs.microsoft.com/en-us/dotnet/api/system.io.fileattributes
   524}
   525
   526# Creates the node PKI files in $env:PKI_DIR.
   527#
   528# Required ${kube_env} keys:
   529#   CA_CERT
   530# ${kube_env} keys that can be omitted for nodes that do not use an
   531# authentication plugin:
   532#   KUBELET_CERT
   533#   KUBELET_KEY
   534function Create-NodePki {
   535  Log-Output 'Creating node pki files'
   536
   537  if ($kube_env.ContainsKey('CA_CERT')) {
   538    $CA_CERT_BUNDLE = ${kube_env}['CA_CERT']
   539    Write_PkiData "${CA_CERT_BUNDLE}" ${env:CA_FILE_PATH}
   540  }
   541  else {
   542    Log-Output -Fatal 'CA_CERT not present in kube-env'
   543  }
   544
   545  if ($kube_env.ContainsKey('KUBELET_CERT')) {
   546    $KUBELET_CERT = ${kube_env}['KUBELET_CERT']
   547    Write_PkiData "${KUBELET_CERT}" ${env:KUBELET_CERT_PATH}
   548  }
   549  else {
   550    Log-Output -Fatal 'KUBELET_CERT not present in kube-env'
   551  }
   552  if ($kube_env.ContainsKey('KUBELET_KEY')) {
   553    $KUBELET_KEY = ${kube_env}['KUBELET_KEY']
   554    Write_PkiData "${KUBELET_KEY}" ${env:KUBELET_KEY_PATH}
   555  }
   556  else {
   557    Log-Output -Fatal 'KUBELET_KEY not present in kube-env'
   558  }
   559
   560  Get-ChildItem ${env:PKI_DIR}
   561}
   562
   563# Creates the bootstrap kubelet kubeconfig at $env:BOOTSTRAP_KUBECONFIG.
   564# https://kubernetes.io/docs/reference/command-line-tools-reference/kubelet-tls-bootstrapping/
   565#
   566# Create-NodePki() must be called first.
   567#
   568# Required ${kube_env} keys:
   569#   KUBERNETES_MASTER_NAME: the apiserver IP address.
   570function Write_BootstrapKubeconfig {
   571  if (-not (ShouldWrite-File ${env:BOOTSTRAP_KUBECONFIG})) {
   572    return
   573  }
   574
   575  # TODO(mtaufen): is user "kubelet" correct? Other examples use e.g.
   576  # "system:node:$(hostname)".
   577
   578  $apiserverAddress = ${kube_env}['KUBERNETES_MASTER_NAME']
   579  New-Item -Force -ItemType file ${env:BOOTSTRAP_KUBECONFIG} | Out-Null
   580  Set-Content ${env:BOOTSTRAP_KUBECONFIG} `
   581'apiVersion: v1
   582kind: Config
   583users:
   584- name: kubelet
   585  user:
   586    client-certificate: KUBELET_CERT_PATH
   587    client-key: KUBELET_KEY_PATH
   588clusters:
   589- name: local
   590  cluster:
   591    server: https://APISERVER_ADDRESS
   592    certificate-authority: CA_FILE_PATH
   593contexts:
   594- context:
   595    cluster: local
   596    user: kubelet
   597  name: service-account-context
   598current-context: service-account-context'.`
   599    replace('KUBELET_CERT_PATH', ${env:KUBELET_CERT_PATH}).`
   600    replace('KUBELET_KEY_PATH', ${env:KUBELET_KEY_PATH}).`
   601    replace('APISERVER_ADDRESS', ${apiserverAddress}).`
   602    replace('CA_FILE_PATH', ${env:CA_FILE_PATH})
   603  Log-Output ("kubelet bootstrap kubeconfig:`n" +
   604              "$(Get-Content -Raw ${env:BOOTSTRAP_KUBECONFIG})")
   605}
   606
   607# Fetches the kubelet kubeconfig from the metadata server and writes it to
   608# $env:KUBECONFIG.
   609#
   610# Create-NodePki() must be called first.
   611function Write_KubeconfigFromMetadata {
   612  if (-not (ShouldWrite-File ${env:KUBECONFIG})) {
   613    return
   614  }
   615
   616  $kubeconfig = Get-InstanceMetadataAttribute 'kubeconfig'
   617  if ($kubeconfig -eq $null) {
   618    Log-Output `
   619        "kubeconfig metadata key not found, can't write ${env:KUBECONFIG}" `
   620        -Fatal
   621  }
   622  Set-Content ${env:KUBECONFIG} $kubeconfig
   623  Log-Output ("kubelet kubeconfig from metadata (non-bootstrap):`n" +
   624              "$(Get-Content -Raw ${env:KUBECONFIG})")
   625}
   626
   627# Creates the kubelet kubeconfig at $env:KUBECONFIG for nodes that use an
   628# authentication plugin, or at $env:BOOTSTRAP_KUBECONFIG for nodes that do not.
   629#
   630# Create-NodePki() must be called first.
   631#
   632# Required ${kube_env} keys:
   633#   KUBERNETES_MASTER_NAME: the apiserver IP address.
   634function Create-KubeletKubeconfig {
   635  Write_BootstrapKubeconfig
   636}
   637
   638# Creates the kubeconfig user file for applications that communicate with Kubernetes.
   639#
   640# Create-NodePki() must be called first.
   641#
   642# Required ${kube_env} keys:
   643#   CA_CERT
   644#   KUBERNETES_MASTER_NAME
   645function Create-Kubeconfig {
   646  param (
   647    [parameter(Mandatory=$true)] [string]$Name,
   648    [parameter(Mandatory=$true)] [string]$Path,
   649    [parameter(Mandatory=$true)] [string]$Token
   650  )
   651  if (-not (ShouldWrite-File $Path)) {
   652    return
   653  }
   654
   655  New-Item -Force -ItemType file $Path | Out-Null
   656
   657  # In configure-helper.sh kubelet kubeconfig uses certificate-authority while
   658  # kubeproxy kubeconfig uses certificate-authority-data, ugh. Does it matter?
   659  # Use just one or the other for consistency?
   660  Set-Content $Path `
   661'apiVersion: v1
   662kind: Config
   663users:
   664- name: APP_NAME
   665  user:
   666    token: APP_TOKEN
   667clusters:
   668- name: local
   669  cluster:
   670    server: https://APISERVER_ADDRESS
   671    certificate-authority-data: CA_CERT
   672contexts:
   673- context:
   674    cluster: local
   675    user: APP_NAME
   676  name: service-account-context
   677current-context: service-account-context'.`
   678  replace('APP_NAME', $Name).`
   679  replace('APP_TOKEN', $Token).`
   680  replace('CA_CERT', ${kube_env}['CA_CERT']).`
   681  replace('APISERVER_ADDRESS', ${kube_env}['KUBERNETES_MASTER_NAME'])
   682
   683  Log-Output ("${Name} kubeconfig:`n" +
   684              "$(Get-Content -Raw ${Path})")
   685}
   686
   687# Creates the kube-proxy user kubeconfig file at $env:KUBEPROXY_KUBECONFIG.
   688#
   689# Create-NodePki() must be called first.
   690#
   691# Required ${kube_env} keys:
   692#   CA_CERT
   693#   KUBE_PROXY_TOKEN
   694function Create-KubeproxyKubeconfig {
   695  Create-Kubeconfig -Name 'kube-proxy' `
   696    -Path ${env:KUBEPROXY_KUBECONFIG} `
   697    -Token ${kube_env}['KUBE_PROXY_TOKEN']
   698}
   699
   700# Returns the IP alias range configured for this GCE instance.
   701function Get_IpAliasRange {
   702  $url = ("http://${GCE_METADATA_SERVER}/computeMetadata/v1/instance/" +
   703          "network-interfaces/0/ip-aliases/0")
   704  $client = New-Object Net.WebClient
   705  $client.Headers.Add('Metadata-Flavor', 'Google')
   706  return ($client.DownloadString($url)).Trim()
   707}
   708
   709# Retrieves the pod CIDR and sets it in $env:POD_CIDR.
   710function Set-PodCidr {
   711  while($true) {
   712    $pod_cidr = Get_IpAliasRange
   713    if (-not $?) {
   714      Log-Output ${pod_cIDR}
   715      Log-Output "Retrying Get_IpAliasRange..."
   716      Start-Sleep -sec 1
   717      continue
   718    }
   719    break
   720  }
   721
   722  Log-Output "fetched pod CIDR (same as IP alias range): ${pod_cidr}"
   723  Set_MachineEnvironmentVar "POD_CIDR" ${pod_cidr}
   724  Set_CurrentShellEnvironmentVar "POD_CIDR" ${pod_cidr}
   725}
   726
   727# Adds an initial HNS network on the Windows node which forces the creation of
   728# a virtual switch and the "management" interface that will be used to
   729# communicate with the rest of the Kubernetes cluster without NAT.
   730#
   731# Note that adding the initial HNS network may cause connectivity to the GCE
   732# metadata server to be lost due to a Windows bug.
   733# Configure-HostNetworkingService() restores connectivity, look there for
   734# details.
   735#
   736# Download-HelperScripts() must have been called first.
   737function Add_InitialHnsNetwork {
   738  $INITIAL_HNS_NETWORK = 'External'
   739
   740  # This comes from
   741  # https://github.com/Microsoft/SDN/blob/master/Kubernetes/flannel/l2bridge/start.ps1#L74
   742  # (or
   743  # https://github.com/Microsoft/SDN/blob/master/Kubernetes/windows/start-kubelet.ps1#L206).
   744  #
   745  # daschott noted on Slack: "L2bridge networks require an external vSwitch.
   746  # The first network ("External") with hardcoded values in the script is just
   747  # a placeholder to create an external vSwitch. This is purely for convenience
   748  # to be able to remove/modify the actual HNS network ("cbr0") or rejoin the
   749  # nodes without a network blip. Creating a vSwitch takes time, causes network
   750  # blips, and it makes it more likely to hit the issue where flanneld is
   751  # stuck, so we want to do this as rarely as possible."
   752  $hns_network = Get-HnsNetwork | Where-Object Name -eq $INITIAL_HNS_NETWORK
   753  if ($hns_network) {
   754    if ($REDO_STEPS) {
   755      Log-Output ("Warning: initial '$INITIAL_HNS_NETWORK' HNS network " +
   756                  "already exists, removing it and recreating it")
   757      $hns_network | Remove-HnsNetwork
   758      $hns_network = $null
   759    }
   760    else {
   761      Log-Output ("Skip: initial '$INITIAL_HNS_NETWORK' HNS network " +
   762                  "already exists, not recreating it")
   763      return
   764    }
   765  }
   766  Log-Output ("Creating initial HNS network to force creation of " +
   767              "${MGMT_ADAPTER_NAME} interface")
   768  # Note: RDP connection will hiccup when running this command.
   769  New-HNSNetwork `
   770      -Type "L2Bridge" `
   771      -AddressPrefix "192.168.255.0/30" `
   772      -Gateway "192.168.255.1" `
   773      -Name $INITIAL_HNS_NETWORK `
   774      -Verbose
   775}
   776
   777# Get the network in uint32 for the given cidr
   778function Get_NetworkDecimal_From_CIDR([string] $cidr) {
   779  $network, [int]$subnetlen = $cidr.Split('/')
   780  $decimal_network = ConvertTo_DecimalIP($network)
   781  return $decimal_network
   782}
   783
   784# Get gateway ip string (the first address) based on pod cidr.
   785# For Windows nodes the pod gateway IP address is the first address in the pod
   786# CIDR for the host.
   787function Get_Gateway_From_CIDR([string] $cidr) {
   788  $network=Get_NetworkDecimal_From_CIDR($cidr)
   789  $gateway=ConvertTo_DottedDecimalIP($network+1)
   790  return $gateway
   791}
   792
   793# Get endpoint gateway ip string (the second address) based on pod cidr.
   794# For Windows nodes the pod gateway IP address is the first address in the pod
   795# CIDR for the host, but from inside containers it's the second address.
   796function Get_Endpoint_Gateway_From_CIDR([string] $cidr) {
   797  $network=Get_NetworkDecimal_From_CIDR($cidr)
   798  $gateway=ConvertTo_DottedDecimalIP($network+2)
   799  return $gateway
   800}
   801
   802# Get pod IP range start based (the third address) on pod cidr
   803# We reserve the first two in the cidr range for gateways. Start the cidr
   804# range from the third so that IPAM does not allocate those IPs to pods.
   805function Get_PodIP_Range_Start([string] $cidr) {
   806  $network=Get_NetworkDecimal_From_CIDR($cidr)
   807  $start=ConvertTo_DottedDecimalIP($network+3)
   808  return $start
   809}
   810
   811# Configures HNS on the Windows node to enable Kubernetes networking:
   812#   - Creates the "management" interface associated with an initial HNS network.
   813#   - Creates the HNS network $env:KUBE_NETWORK for pod networking.
   814#   - Creates an HNS endpoint for pod networking.
   815#   - Adds necessary routes on the management interface.
   816#   - Verifies that the GCE metadata server connection remains intact.
   817#
   818# Prerequisites:
   819#   $env:POD_CIDR is set (by Set-PodCidr).
   820#   Download-HelperScripts() has been called.
   821function Configure-HostNetworkingService {
   822  Import-Module -Force ${env:K8S_DIR}\hns.psm1
   823
   824  Add_InitialHnsNetwork
   825
   826  $pod_gateway = Get_Gateway_From_CIDR(${env:POD_CIDR})
   827  $pod_endpoint_gateway = Get_Endpoint_Gateway_From_CIDR(${env:POD_CIDR})
   828  Log-Output ("Setting up Windows node HNS networking: " +
   829              "podCidr = ${env:POD_CIDR}, podGateway = ${pod_gateway}, " +
   830              "podEndpointGateway = ${pod_endpoint_gateway}")
   831
   832  $hns_network = Get-HnsNetwork | Where-Object Name -eq ${env:KUBE_NETWORK}
   833  if ($hns_network) {
   834    if ($REDO_STEPS) {
   835      Log-Output ("Warning: ${env:KUBE_NETWORK} HNS network already exists, " +
   836                  "removing it and recreating it")
   837      $hns_network | Remove-HnsNetwork
   838      $hns_network = $null
   839    }
   840    else {
   841      Log-Output "Skip: ${env:KUBE_NETWORK} HNS network already exists"
   842    }
   843  }
   844  $created_hns_network = $false
   845  if (-not $hns_network) {
   846    # Note: RDP connection will hiccup when running this command.
   847    $hns_network = New-HNSNetwork `
   848        -Type "L2Bridge" `
   849        -AddressPrefix ${env:POD_CIDR} `
   850        -Gateway ${pod_gateway} `
   851        -Name ${env:KUBE_NETWORK} `
   852        -Verbose
   853    $created_hns_network = $true
   854  }
   855  # This name of endpoint is referred in pkg/proxy/winkernel/proxier.go as part of
   856  # kube-proxy as well. A health check port for every service that is specified as
   857  # "externalTrafficPolicy: local" will be added on the endpoint.
   858  # PLEASE KEEP THEM CONSISTENT!!!
   859  $endpoint_name = "cbr0"
   860
   861  $vnic_name = "vEthernet (${endpoint_name})"
   862
   863  $hns_endpoint = Get-HnsEndpoint | Where-Object Name -eq $endpoint_name
   864  # Note: we don't expect to ever enter this block currently - while the HNS
   865  # network does seem to persist across reboots, the HNS endpoints do not.
   866  if ($hns_endpoint) {
   867    if ($REDO_STEPS) {
   868      Log-Output ("Warning: HNS endpoint $endpoint_name already exists, " +
   869                  "removing it and recreating it")
   870      $hns_endpoint | Remove-HnsEndpoint
   871      $hns_endpoint = $null
   872    }
   873    else {
   874      Log-Output "Skip: HNS endpoint $endpoint_name already exists"
   875    }
   876  }
   877  if (-not $hns_endpoint) {
   878    $hns_endpoint = New-HnsEndpoint `
   879        -NetworkId ${hns_network}.Id `
   880        -Name ${endpoint_name} `
   881        -IPAddress ${pod_endpoint_gateway} `
   882        -Gateway "0.0.0.0" `
   883        -Verbose
   884    # TODO(pjh): find out: why is this always CompartmentId 1?
   885    Attach-HnsHostEndpoint `
   886        -EndpointID ${hns_endpoint}.Id `
   887        -CompartmentID 1 `
   888        -Verbose
   889    netsh interface ipv4 set interface "${vnic_name}" forwarding=enabled
   890  }
   891
   892  Try {
   893    Get-HNSPolicyList | Remove-HnsPolicyList
   894  } Catch { }
   895
   896  # Add a route from the management NIC to the pod CIDR.
   897  #
   898  # When a packet from a Kubernetes service backend arrives on the destination
   899  # Windows node, the reverse SNAT will be applied and the source address of
   900  # the packet gets replaced from the pod IP to the service VIP. The packet
   901  # will then leave the VM and return back through hairpinning.
   902  #
   903  # When IP alias is enabled, IP forwarding is disabled for anti-spoofing;
   904  # the packet with the service VIP will get blocked and be lost. With this
   905  # route, the packet will be routed to the pod subnetwork, and not leave the
   906  # VM.
   907  $mgmt_net_adapter = Get_MgmtNetAdapter
   908  New-NetRoute `
   909      -ErrorAction Ignore `
   910      -InterfaceAlias ${mgmt_net_adapter}.ifAlias `
   911      -DestinationPrefix ${env:POD_CIDR} `
   912      -NextHop "0.0.0.0" `
   913      -Verbose
   914
   915  if ($created_hns_network) {
   916    # There is an HNS bug where the route to the GCE metadata server will be
   917    # removed when the HNS network is created:
   918    # https://github.com/Microsoft/hcsshim/issues/299#issuecomment-425491610.
   919    # The behavior here is very unpredictable: the route may only be removed
   920    # after some delay, or it may appear to be removed then you'll add it back
   921    # but then it will be removed once again. So, we first wait a long
   922    # unfortunate amount of time to ensure that things have quiesced, then we
   923    # wait until we're sure the route is really gone before re-adding it again.
   924    Log-Output "Waiting 45 seconds for host network state to quiesce"
   925    Start-Sleep 45
   926    WaitFor_GceMetadataServerRouteToBeRemoved
   927    Log-Output "Re-adding the GCE metadata server route"
   928    Add_GceMetadataServerRoute
   929  }
   930  Verify_GceMetadataServerRouteIsPresent
   931
   932  Log-Output "Host network setup complete"
   933}
   934
   935function Configure-GcePdTools {
   936  if (ShouldWrite-File ${env:K8S_DIR}\GetGcePdName.dll) {
   937    MustDownload-File -OutFile ${env:K8S_DIR}\GetGcePdName.dll `
   938      -URLs "https://storage.googleapis.com/gke-release/winnode/config/gce-tools/master/GetGcePdName/GetGcePdName.dll"
   939  }
   940  if (-not (Test-Path $PsHome\profile.ps1)) {
   941    New-Item -path $PsHome\profile.ps1 -type file
   942  }
   943
   944  Add-Content $PsHome\profile.ps1 `
   945  '$modulePath = "K8S_DIR\GetGcePdName.dll"
   946  Unblock-File $modulePath
   947  Import-Module -Name $modulePath'.replace('K8S_DIR', ${env:K8S_DIR})
   948}
   949
   950# Setup cni network for containerd.
   951function Prepare-CniNetworking {
   952    Configure_Containerd_CniNetworking
   953}
   954
   955# Obtain the host dns conf and save it to a file so that kubelet/CNI
   956# can use it to configure dns suffix search list for pods.
   957# The value of DNS server is ignored right now because the pod will
   958# always only use cluster DNS service, but for consistency, we still
   959# parsed them here in the same format as Linux resolv.conf.
   960# This function must be called after Configure-HostNetworkingService.
   961function Configure-HostDnsConf {
   962  $net_adapter = Get_MgmtNetAdapter
   963  $server_ips = (Get-DnsClientServerAddress `
   964          -InterfaceAlias ${net_adapter}.Name).ServerAddresses
   965  $search_list = (Get-DnsClient).ConnectionSpecificSuffixSearchList
   966  $conf = ""
   967  ForEach ($ip in $server_ips)  {
   968    $conf = $conf + "nameserver $ip`r`n"
   969  }
   970  $conf = $conf + "search $search_list"
   971  # Do not put hostdns.conf into the CNI config directory so as to
   972  # avoid the container runtime treating it as CNI config.
   973  $hostdns_conf = "${env:CNI_DIR}\hostdns.conf"
   974  New-Item -Force -ItemType file ${hostdns_conf} | Out-Null
   975  Set-Content ${hostdns_conf} $conf
   976  Log-Output "HOST dns conf:`n$(Get-Content -Raw ${hostdns_conf})"
   977}
   978
   979# Fetches the kubelet config from the instance metadata and puts it at
   980# $env:KUBELET_CONFIG.
   981function Configure-Kubelet {
   982  if (-not (ShouldWrite-File ${env:KUBELET_CONFIG})) {
   983    return
   984  }
   985
   986  # The Kubelet config is built by build-kubelet-config() in
   987  # cluster/gce/util.sh, and stored in the metadata server under the
   988  # 'kubelet-config' key.
   989  $kubelet_config = Get-InstanceMetadataAttribute 'kubelet-config'
   990  Set-Content ${env:KUBELET_CONFIG} $kubelet_config
   991  Log-Output "Kubelet config:`n$(Get-Content -Raw ${env:KUBELET_CONFIG})"
   992}
   993
   994# Sets up the kubelet and kube-proxy arguments and starts them as native
   995# Windows services.
   996#
   997# Required ${kube_env} keys:
   998#   KUBELET_ARGS
   999#   KUBEPROXY_ARGS
  1000#   CLUSTER_IP_RANGE
  1001function Start-WorkerServices {
  1002  # Compute kubelet args
  1003  $kubelet_args_str = ${kube_env}['KUBELET_ARGS']
  1004  $kubelet_args = $kubelet_args_str.Split(" ")
  1005  Log-Output "kubelet_args from metadata: ${kubelet_args}"
  1006
  1007  # To join GCE instances to AD, we need to shorten their names, as NetBIOS name
  1008  # must be <= 15 characters, and GKE generated names are longer than that.
  1009  # To perform the join in an automated way, it's preferable to apply the rename
  1010  # and domain join in the GCESysprep step. However, after sysprep is complete
  1011  # and the machine restarts, kubelet bootstrapping should not use the shortened
  1012  # computer name, and instead use the instance's name by using --hostname-override,
  1013  # otherwise kubelet and kube-proxy will not be able to run properly.
  1014  $instance_name = "$(Get-InstanceMetadata 'name' | Out-String)"
  1015  $default_kubelet_args = @(`
  1016      "--pod-infra-container-image=${env:INFRA_CONTAINER}",
  1017      "--hostname-override=${instance_name}"
  1018  )
  1019
  1020  $kubelet_args = ${default_kubelet_args} + ${kubelet_args}
  1021  Log-Output 'Using bootstrap kubeconfig for authentication'
  1022  $kubelet_args = (${kubelet_args} +
  1023                   "--bootstrap-kubeconfig=${env:BOOTSTRAP_KUBECONFIG}")
  1024  Log-Output "Final kubelet_args: ${kubelet_args}"
  1025
  1026  # Compute kube-proxy args
  1027  $kubeproxy_args_str = ${kube_env}['KUBEPROXY_ARGS']
  1028  $kubeproxy_args = $kubeproxy_args_str.Split(" ")
  1029  Log-Output "kubeproxy_args from metadata: ${kubeproxy_args}"
  1030
  1031  # kubeproxy is started on Linux nodes using
  1032  # kube-manifests/kubernetes/gci-trusty/kube-proxy.manifest, which is
  1033  # generated by start-kube-proxy in configure-helper.sh and contains e.g.:
  1034  #   kube-proxy --master=https://35.239.84.171
  1035  #   --kubeconfig=/var/lib/kube-proxy/kubeconfig --cluster-cidr=10.64.0.0/14
  1036  #   --oom-score-adj=-998 --v=2
  1037  #   --iptables-sync-period=1m --iptables-min-sync-period=10s
  1038  #   --ipvs-sync-period=1m --ipvs-min-sync-period=10s
  1039  # And also with various volumeMounts and "securityContext: privileged: true".
  1040  $default_kubeproxy_args = @(`
  1041      "--kubeconfig=${env:KUBEPROXY_KUBECONFIG}",
  1042      "--cluster-cidr=$(${kube_env}['CLUSTER_IP_RANGE'])",
  1043      "--hostname-override=${instance_name}"
  1044  )
  1045
  1046  $kubeproxy_args = ${default_kubeproxy_args} + ${kubeproxy_args}
  1047  Log-Output "Final kubeproxy_args: ${kubeproxy_args}"
  1048
  1049  # TODO(pjh): kubelet is emitting these messages:
  1050  # I1023 23:44:11.761915    2468 kubelet.go:274] Adding pod path:
  1051  # C:\etc\kubernetes
  1052  # I1023 23:44:11.775601    2468 file.go:68] Watching path
  1053  # "C:\\etc\\kubernetes"
  1054  # ...
  1055  # E1023 23:44:31.794327    2468 file.go:182] Can't process manifest file
  1056  # "C:\\etc\\kubernetes\\hns.psm1": C:\etc\kubernetes\hns.psm1: couldn't parse
  1057  # as pod(yaml: line 10: did not find expected <document start>), please check
  1058  # config file.
  1059  #
  1060  # Figure out how to change the directory that the kubelet monitors for new
  1061  # pod manifests.
  1062
  1063  # We configure the service to restart on failure, after 10s wait. We reset
  1064  # the restart count to 0 each time, so we re-use our restart/10000 action on
  1065  # each failure. Note it currently restarts even when explicitly stopped, you
  1066  # have to delete the service entry to *really* kill it (e.g. `sc.exe delete
  1067  # kubelet`). See issue #72900.
  1068  if (Get-Process | Where-Object Name -eq "kubelet") {
  1069    Log-Output -Fatal `
  1070        "A kubelet process is already running, don't know what to do"
  1071  }
  1072  Log-Output "Creating kubelet service"
  1073  & sc.exe create kubelet binPath= "${env:NODE_DIR}\kube-log-runner.exe -log-file=${env:LOGS_DIR}\kubelet.log ${env:NODE_DIR}\kubelet.exe ${kubelet_args}" start= demand
  1074  & sc.exe failure kubelet reset= 0 actions= restart/10000
  1075  Log-Output "Starting kubelet service"
  1076  & sc.exe start kubelet
  1077
  1078  Log-Output "Waiting 10 seconds for kubelet to stabilize"
  1079  Start-Sleep 10
  1080  Write-VerboseServiceInfoToConsole -Service 'kubelet'
  1081
  1082  if (Get-Process | Where-Object Name -eq "kube-proxy") {
  1083    Log-Output -Fatal `
  1084        "A kube-proxy process is already running, don't know what to do"
  1085  }
  1086  Log-Output "Creating kube-proxy service"
  1087  & sc.exe create kube-proxy binPath= "${env:NODE_DIR}\kube-log-runner.exe -log-file=${env:LOGS_DIR}\kube-proxy.log ${env:NODE_DIR}\kube-proxy.exe ${kubeproxy_args}" start= demand
  1088  & sc.exe failure kube-proxy reset= 0 actions= restart/10000
  1089  Log-Output "Starting kube-proxy service"
  1090  & sc.exe start kube-proxy
  1091  Write-VerboseServiceInfoToConsole -Service 'kube-proxy' -Delay 1
  1092
  1093  # F1020 23:08:52.000083    9136 server.go:361] unable to load in-cluster
  1094  # configuration, KUBERNETES_SERVICE_HOST and KUBERNETES_SERVICE_PORT must be
  1095  # defined
  1096  # TODO(pjh): still getting errors like these in kube-proxy log:
  1097  # E1023 04:03:58.143449    4840 reflector.go:205] k8s.io/kubernetes/pkg/client/informers/informers_generated/internalversion/factory.go:129: Failed to list *core.Endpoints: Get https://35.239.84.171/api/v1/endpoints?limit=500&resourceVersion=0: dial tcp 35.239.84.171:443: connectex: A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond.
  1098  # E1023 04:03:58.150266    4840 reflector.go:205] k8s.io/kubernetes/pkg/client/informers/informers_generated/internalversion/factory.go:129: Failed to list *core.Service: Get https://35.239.84.171/api/v1/services?limit=500&resourceVersion=0: dial tcp 35.239.84.171:443: connectex: A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond.
  1099  WaitFor_KubeletAndKubeProxyReady
  1100  Verify_GceMetadataServerRouteIsPresent
  1101  Log-Output "Kubernetes components started successfully"
  1102}
  1103
  1104# Stop and unregister both kubelet & kube-proxy services.
  1105function Unregister-WorkerServices {
  1106  & sc.exe delete kube-proxy
  1107  & sc.exe delete kubelet
  1108}
  1109
  1110# Wait for kubelet and kube-proxy to be ready within 10s.
  1111function WaitFor_KubeletAndKubeProxyReady {
  1112  $waited = 0
  1113  $timeout = 10
  1114  while (((Get-Service kube-proxy).Status -ne 'Running' -or (Get-Service kubelet).Status -ne 'Running') -and $waited -lt $timeout) {
  1115    Start-Sleep 1
  1116    $waited++
  1117  }
  1118
  1119  # Timeout occurred
  1120  if ($waited -ge $timeout) {
  1121    Log-Output "$(Get-Service kube* | Out-String)"
  1122    Throw ("Timeout while waiting ${timeout} seconds for kubelet and kube-proxy services to start")
  1123  }
  1124}
  1125
  1126# Runs 'kubectl get nodes'.
  1127# Runs additional verification commands to ensure node successfully joined cluster
  1128# and that it connects to the API Server.
  1129function Verify-WorkerServices {
  1130  $timeout = 12
  1131  $retries = 0
  1132  $retryDelayInSeconds = 5
  1133  
  1134  Log-Output ("Testing node connection to API server...")
  1135  do {
  1136      $retries++
  1137      $nodes_list = & "${env:NODE_DIR}\kubectl.exe" get nodes -o=custom-columns=:.metadata.name -A | Out-String
  1138      $host_status = & "${env:NODE_DIR}\kubectl.exe" get nodes (hostname) -o=custom-columns=:.status.conditions[4].type | Out-String
  1139      Start-Sleep $retryDelayInSeconds
  1140  } while (((-Not $nodes_list) -or (-Not $nodes_list.contains((hostname))) -or (-Not $host_status.contains("Ready")))-and ($retries -le $timeout))
  1141  
  1142  If (-Not $nodes_list){
  1143      Throw ("Node: '$(hostname)' failed to connect to API server")
  1144  
  1145  }ElseIf (-Not $nodes_list.contains((hostname))) {
  1146      Throw ("Node: '$(hostname)' failed to join the cluster; NODES: '`n $($nodes_list)'")
  1147
  1148  }ELseIf (-Not $host_status.contains("Ready")) {
  1149      Throw ("Node: '$(hostname)' is not in Ready state")
  1150  }
  1151  
  1152  Log-Output ("Node: $(hostname) successfully joined cluster `n NODES: `n $($nodes_list)")
  1153  Verify_GceMetadataServerRouteIsPresent
  1154
  1155}
  1156
  1157# Downloads the Windows crictl package and installs its contents (e.g.
  1158# crictl.exe) in $env:NODE_DIR.
  1159function DownloadAndInstall-Crictl {
  1160  if (-not (ShouldWrite-File ${env:NODE_DIR}\crictl.exe)) {
  1161    return
  1162  }
  1163  $CRI_TOOLS_GCS_BUCKET = 'k8s-artifacts-cri-tools'
  1164  $url = ('https://storage.googleapis.com/' + $CRI_TOOLS_GCS_BUCKET +
  1165          '/release/' + $CRICTL_VERSION + '/crictl-' + $CRICTL_VERSION +
  1166          '-windows-amd64.tar.gz')
  1167  MustDownload-File `
  1168      -URLs $url `
  1169      -OutFile ${env:NODE_DIR}\crictl.tar.gz `
  1170      -Hash $CRICTL_SHA256 `
  1171      -Algorithm SHA256
  1172  tar xzvf ${env:NODE_DIR}\crictl.tar.gz -C ${env:NODE_DIR}
  1173}
  1174
  1175# Sets crictl configuration values.
  1176function Configure-Crictl {
  1177  if (${env:CONTAINER_RUNTIME_ENDPOINT}) {
  1178    & "${env:NODE_DIR}\crictl.exe" config runtime-endpoint `
  1179        ${env:CONTAINER_RUNTIME_ENDPOINT}
  1180  }
  1181}
  1182
  1183# Pulls the infra/pause container image onto the node so that it will be
  1184# immediately available when the kubelet tries to run pods.
  1185# TODO(pjh): downloading the container container image may take a few minutes;
  1186# figure out how to run this in the background while perform the rest of the
  1187# node startup steps!
  1188# Pull-InfraContainer must be called AFTER Verify-WorkerServices.
  1189function Pull-InfraContainer {
  1190  $name, $label = ${env:INFRA_CONTAINER} -split ':',2
  1191  if (-not ("$(& crictl images)" -match "$name.*$label")) {
  1192    & crictl pull ${env:INFRA_CONTAINER}
  1193    if (!$?) {
  1194      throw "Error running 'crictl pull ${env:INFRA_CONTAINER}'"
  1195    }
  1196  }
  1197  $inspect = "$(& crictl inspecti ${env:INFRA_CONTAINER} | Out-String)"
  1198  Log-Output "Infra/pause container:`n$inspect"
  1199}
  1200
  1201# Setup the containerd on the node.
  1202function Setup-ContainerRuntime {
  1203  Install-Pigz
  1204  Install_Containerd
  1205  Configure_Containerd
  1206  Start_Containerd
  1207}
  1208
  1209function Test-ContainersFeatureInstalled {
  1210  return (Get-WindowsFeature Containers).Installed
  1211}
  1212
  1213# After this function returns, the computer must be restarted to complete
  1214# the installation!
  1215function Install-ContainersFeature {
  1216  Log-Output "Installing Windows 'Containers' feature"
  1217  Install-WindowsFeature Containers
  1218}
  1219
  1220# Verifies if Hyper-V should be enabled in the node
  1221function Test-ShouldEnableHyperVFeature {
  1222  return "${env:WINDOWS_ENABLE_HYPERV}" -eq "true"
  1223}
  1224
  1225# Check if Hyper-V feature is enabled
  1226function Test-HyperVFeatureEnabled {
  1227  return ((Get-WindowsOptionalFeature -Online -FeatureName Microsoft-Hyper-V).State -eq 'Enabled')
  1228}
  1229
  1230# After this function returns, the computer must be restarted to complete
  1231# the installation!
  1232function Enable-HyperVFeature {
  1233  Log-Output "Enabling Windows 'HyperV' feature"
  1234  Enable-WindowsOptionalFeature -Online -FeatureName Microsoft-Hyper-V -All -NoRestart
  1235  Enable-WindowsOptionalFeature -Online -FeatureName Microsoft-Hyper-V-Management-PowerShell -All -NoRestart
  1236}
  1237
  1238# Configures the TCP/IP parameters to be in sync with the GCP recommendation.
  1239# Not setting these values correctly can cause network issues for connections
  1240# that live longer than 10 minutes.
  1241# See: https://cloud.google.com/compute/docs/troubleshooting/general-tips#idle-connections
  1242function Set-WindowsTCPParameters {
  1243  Set-ItemProperty -Force -Confirm:$false -Path `
  1244    'HKLM:\SYSTEM\CurrentControlSet\Services\Tcpip\Parameters' `
  1245    -Name 'KeepAliveInterval' -Type Dword -Value 1000
  1246  Set-ItemProperty -Force -Confirm:$false `
  1247    -Path 'HKLM:\SYSTEM\CurrentControlSet\Services\Tcpip\Parameters' `
  1248    -Name 'KeepAliveTime' -Type Dword -Value 60000
  1249  Set-ItemProperty -Force -Confirm:$false `
  1250    -Path 'HKLM:\SYSTEM\CurrentControlSet\Services\Tcpip\Parameters' `
  1251    -Name 'TcpMaxDataRetransmissions' -Type Dword -Value 10
  1252
  1253  Log-Output 'TCP/IP Parameters'
  1254  Get-ItemProperty -Path 'HKLM:\SYSTEM\CurrentControlSet\Services\Tcpip\Parameters'
  1255}
  1256
  1257# Writes a CNI config file under $env:CNI_CONFIG_DIR for containerd.
  1258#
  1259# Prerequisites:
  1260#   $env:POD_CIDR is set (by Set-PodCidr).
  1261#   The "management" interface exists (Configure-HostNetworkingService).
  1262#   The HNS network for pod networking has been configured
  1263#     (Configure-HostNetworkingService).
  1264#   Containerd is installed (Install_Containerd).
  1265#
  1266# Required ${kube_env} keys:
  1267#   DNS_SERVER_IP
  1268#   DNS_DOMAIN
  1269#   SERVICE_CLUSTER_IP_RANGE
  1270function Configure_Containerd_CniNetworking {
  1271  $l2bridge_conf = "${env:CNI_CONFIG_DIR}\l2bridge.conf"
  1272  if (-not (ShouldWrite-File ${l2bridge_conf})) {
  1273    return
  1274  }
  1275
  1276  $mgmt_ip = (Get_MgmtNetAdapter |
  1277              Get-NetIPAddress -AddressFamily IPv4).IPAddress
  1278
  1279  $pod_gateway = Get_Endpoint_Gateway_From_CIDR(${env:POD_CIDR})
  1280
  1281  # Explanation of the CNI config values:
  1282  #   POD_CIDR: the pod CIDR assigned to this node.
  1283  #   POD_GATEWAY: the gateway IP.
  1284  #   MGMT_IP: the IP address assigned to the node's primary network interface
  1285  #     (i.e. the internal IP of the GCE VM).
  1286  #   SERVICE_CIDR: the CIDR used for kubernetes services.
  1287  #   DNS_SERVER_IP: the cluster's DNS server IP address.
  1288  #   DNS_DOMAIN: the cluster's DNS domain, e.g. "cluster.local".
  1289  #
  1290  # OutBoundNAT ExceptionList: No SNAT for CIDRs in the list, the same as default GKE non-masquerade destination ranges listed at https://cloud.google.com/kubernetes-engine/docs/how-to/ip-masquerade-agent#default-non-masq-dests
  1291
  1292  New-Item -Force -ItemType file ${l2bridge_conf} | Out-Null
  1293  Set-Content ${l2bridge_conf} `
  1294'{
  1295  "cniVersion":  "0.2.0",
  1296  "name":  "l2bridge",
  1297  "type":  "sdnbridge",
  1298  "master": "Ethernet",
  1299  "capabilities":  {
  1300    "portMappings":  true,
  1301    "dns": true
  1302  },
  1303  "ipam":  {
  1304    "subnet": "POD_CIDR",
  1305    "routes": [
  1306      {
  1307        "GW": "POD_GATEWAY"
  1308      }
  1309    ]
  1310  },
  1311  "dns":  {
  1312    "Nameservers":  [
  1313      "DNS_SERVER_IP"
  1314    ],
  1315    "Search": [
  1316      "DNS_DOMAIN"
  1317    ]
  1318  },
  1319  "AdditionalArgs": [
  1320    {
  1321      "Name":  "EndpointPolicy",
  1322      "Value":  {
  1323        "Type":  "OutBoundNAT",
  1324        "Settings": {
  1325          "Exceptions":  [
  1326            "169.254.0.0/16",
  1327            "10.0.0.0/8",
  1328            "172.16.0.0/12",
  1329            "192.168.0.0/16",
  1330            "100.64.0.0/10",
  1331            "192.0.0.0/24",
  1332            "192.0.2.0/24",
  1333            "192.88.99.0/24",
  1334            "198.18.0.0/15",
  1335            "198.51.100.0/24",
  1336            "203.0.113.0/24",
  1337            "240.0.0.0/4"
  1338          ]
  1339        }
  1340      }
  1341    },
  1342    {
  1343      "Name":  "EndpointPolicy",
  1344      "Value":  {
  1345        "Type":  "SDNRoute",
  1346        "Settings": {
  1347          "DestinationPrefix":  "SERVICE_CIDR",
  1348          "NeedEncap":  true
  1349        }
  1350      }
  1351    },
  1352    {
  1353      "Name":  "EndpointPolicy",
  1354      "Value":  {
  1355        "Type":  "SDNRoute",
  1356        "Settings": {
  1357          "DestinationPrefix":  "MGMT_IP/32",
  1358          "NeedEncap":  true
  1359        }
  1360      }
  1361    }
  1362  ]
  1363}'.replace('POD_CIDR', ${env:POD_CIDR}).`
  1364  replace('POD_GATEWAY', ${pod_gateway}).`
  1365  replace('DNS_SERVER_IP', ${kube_env}['DNS_SERVER_IP']).`
  1366  replace('DNS_DOMAIN', ${kube_env}['DNS_DOMAIN']).`
  1367  replace('MGMT_IP', ${mgmt_ip}).`
  1368  replace('SERVICE_CIDR', ${kube_env}['SERVICE_CLUSTER_IP_RANGE'])
  1369
  1370  Log-Output "containerd CNI config:`n$(Get-Content -Raw ${l2bridge_conf})"
  1371}
  1372
  1373# Download and install containerd and CNI binaries into $env:NODE_DIR.
  1374function Install_Containerd {
  1375  # Assume that presence of containerd.exe indicates that all containerd
  1376  # binaries were already previously downloaded to this node.
  1377  if (-not (ShouldWrite-File ${env:NODE_DIR}\containerd.exe)) {
  1378    return
  1379  }
  1380
  1381  $tmp_dir = 'C:\containerd_tmp'
  1382  New-Item $tmp_dir -ItemType 'directory' -Force | Out-Null
  1383
  1384  # TODO(ibrahimab) Change this to a gcs bucket with CI maintained and accessible by community.
  1385  $version = '1.6.2'
  1386  $tar_url = ("https://github.com/containerd/containerd/releases/download/v${version}/" +
  1387              "cri-containerd-cni-${version}-windows-amd64.tar.gz")
  1388  $sha_url = $tar_url + ".sha256sum"
  1389  MustDownload-File -URLs $sha_url -OutFile $tmp_dir\sha256sum
  1390  $sha = $(Get-Content $tmp_dir\sha256sum).Split(" ")[0].ToUpper()
  1391
  1392  MustDownload-File `
  1393      -URLs $tar_url `
  1394      -OutFile $tmp_dir\containerd.tar.gz `
  1395      -Hash $sha `
  1396      -Algorithm SHA256
  1397
  1398  tar xzvf $tmp_dir\containerd.tar.gz -C $tmp_dir
  1399  Move-Item -Force $tmp_dir\cni\bin\*.exe "${env:CNI_DIR}\"
  1400  Move-Item -Force $tmp_dir\*.exe "${env:NODE_DIR}\"
  1401  Remove-Item -Force -Recurse $tmp_dir
  1402
  1403  # Exclusion for Defender.
  1404  Add-MpPreference -ExclusionProcess "${env:NODE_DIR}\containerd.exe"
  1405}
  1406
  1407# Lookup the path of containerd config if exists, else returns a default.
  1408function Get_Containerd_ConfigPath {
  1409  $service = Get-WMIObject -Class Win32_Service -Filter  "Name='containerd'"
  1410  if (!($service -eq $null) -and
  1411      $service.PathName -match ".*\s--config\s*(\S+).*" -and
  1412      $matches.Count -eq 2) {
  1413    return $matches[1]
  1414  } else {
  1415    return 'C:\Program Files\containerd\config.toml'
  1416  }
  1417}
  1418
  1419# Generates the containerd config.toml file.
  1420function Configure_Containerd {
  1421  $config_path = Get_Containerd_ConfigPath
  1422  $config_dir = [System.IO.Path]::GetDirectoryName($config_path)
  1423  New-Item $config_dir -ItemType 'directory' -Force | Out-Null
  1424  Set-Content ${config_path} @"
  1425[plugins.scheduler]
  1426  schedule_delay = '0s'
  1427  startup_delay = '0s'
  1428[plugins.cri]
  1429  sandbox_image = 'INFRA_CONTAINER_IMAGE'
  1430[plugins.cri.containerd]
  1431  snapshotter = 'windows'
  1432  default_runtime_name = 'runhcs-wcow-process'
  1433  disable_snapshot_annotations = true
  1434  discard_unpacked_layers = true
  1435[plugins.cri.cni]
  1436  bin_dir = 'CNI_BIN_DIR'
  1437  conf_dir = 'CNI_CONF_DIR'
  1438"@.replace('INFRA_CONTAINER_IMAGE', ${env:INFRA_CONTAINER}).`
  1439    replace('CNI_BIN_DIR', "${env:CNI_DIR}").`
  1440    replace('CNI_CONF_DIR', "${env:CNI_CONFIG_DIR}")
  1441}
  1442
  1443# Register if needed and start containerd service.
  1444function Start_Containerd {
  1445  # Do the registration only if the containerd service does not exist.
  1446  if ((Get-WMIObject -Class Win32_Service -Filter  "Name='containerd'") -eq $null) {
  1447    Log-Output "Creating containerd service"
  1448    & containerd.exe --register-service --log-file "${env:LOGS_DIR}/containerd.log"
  1449  }
  1450
  1451  Log-Output "Starting containerd service"
  1452  Restart-Service containerd
  1453}
  1454
  1455# Pigz Resources
  1456$PIGZ_ROOT = 'C:\pigz'
  1457$PIGZ_VERSION = '2.3.1'
  1458$PIGZ_TAR_URL = "https://storage.googleapis.com/gke-release/winnode/pigz/prod/gke_windows/pigz/release/5/20201104-134221/pigz-$PIGZ_VERSION.zip"
  1459$PIGZ_TAR_HASH = '5a6f8f5530acc85ea51797f58c1409e5af6b69e55da243ffc608784cf14fec0cd16f74cc61c564d69e1a267750aecfc1e4c53b5219ff5f893b42a7576306f34c'
  1460
  1461# Install Pigz (https://github.com/madler/pigz) into Windows for improved image
  1462# extraction performance.
  1463function Install-Pigz {
  1464  if ("${env:WINDOWS_ENABLE_PIGZ}" -eq "true") {
  1465    if (-not (Test-Path $PIGZ_ROOT)) {
  1466      Log-Output "Installing Pigz $PIGZ_VERSION"
  1467      New-Item -Path $PIGZ_ROOT -ItemType Directory
  1468      MustDownload-File `
  1469        -Url $PIGZ_TAR_URL `
  1470        -OutFile "$PIGZ_ROOT\pigz-$PIGZ_VERSION.zip" `
  1471        -Hash $PIGZ_TAR_HASH `
  1472        -Algorithm SHA512
  1473      Expand-Archive -Path "$PIGZ_ROOT\pigz-$PIGZ_VERSION.zip" `
  1474        -DestinationPath $PIGZ_ROOT
  1475      Remove-Item -Path "$PIGZ_ROOT\pigz-$PIGZ_VERSION.zip"
  1476      # Containerd search for unpigz.exe on the first container image
  1477      # pull request after the service is started. If unpigz.exe is in the
  1478      # Windows path it'll use it instead of the default unzipper.
  1479      # See: https://github.com/containerd/containerd/issues/1896
  1480      Add-MachineEnvironmentPath -Path $PIGZ_ROOT
  1481      # Add process exclusion for Windows Defender to boost performance.
  1482      Add-MpPreference -ExclusionProcess "$PIGZ_ROOT\unpigz.exe"
  1483      Log-Output "Installed Pigz $PIGZ_VERSION"
  1484    } else {
  1485      Log-Output "Pigz already installed."
  1486    }
  1487  }
  1488}
  1489
  1490# Node Problem Detector Resources
  1491$NPD_SERVICE = "node-problem-detector"
  1492$DEFAULT_NPD_VERSION = '0.8.10-gke0.1'
  1493$DEFAULT_NPD_RELEASE_PATH = 'https://storage.googleapis.com/gke-release/winnode'
  1494$DEFAULT_NPD_HASH = '97ddfe3544da9e02a1cfb55d24f329eb29d606fca7fbbf800415d5de9dbc29a00563f8e0d1919595c8e316fd989d45b09b13c07be528841fc5fd37e21d016a2d'
  1495
  1496# Install Node Problem Detector (NPD).
  1497# NPD analyzes the host for problems that can disrupt workloads.
  1498# https://github.com/kubernetes/node-problem-detector
  1499function DownloadAndInstall-NodeProblemDetector {
  1500  if ("${env:ENABLE_NODE_PROBLEM_DETECTOR}" -eq "standalone") {
  1501    if (ShouldWrite-File "${env:NODE_DIR}\node-problem-detector.exe") {
  1502      $npd_version = $DEFAULT_NPD_VERSION
  1503      $npd_hash = $DEFAULT_NPD_HASH
  1504      if (-not [string]::IsNullOrEmpty(${kube_env}['NODE_PROBLEM_DETECTOR_VERSION'])) {
  1505        $npd_version = ${kube_env}['NODE_PROBLEM_DETECTOR_VERSION']
  1506        $npd_hash = ${kube_env}['NODE_PROBLEM_DETECTOR_TAR_HASH']
  1507      }
  1508      $npd_release_path = $DEFAULT_NPD_RELEASE_PATH
  1509      if (-not [string]::IsNullOrEmpty(${kube_env}['NODE_PROBLEM_DETECTOR_RELEASE_PATH'])) {
  1510        $npd_release_path = ${kube_env}['NODE_PROBLEM_DETECTOR_RELEASE_PATH']
  1511      }
  1512
  1513      $npd_tar = "node-problem-detector-v${npd_version}-windows_amd64.tar.gz"
  1514
  1515      Log-Output "Downloading ${npd_tar}."
  1516
  1517      $npd_dir = "${env:K8S_DIR}\node-problem-detector"
  1518      New-Item -Path $npd_dir -ItemType Directory -Force -Confirm:$false
  1519
  1520      MustDownload-File `
  1521          -URLs "${npd_release_path}/node-problem-detector/${npd_tar}" `
  1522          -Hash $npd_hash `
  1523          -Algorithm SHA512 `
  1524          -OutFile "${npd_dir}\${npd_tar}"
  1525
  1526      tar xzvf "${npd_dir}\${npd_tar}" -C $npd_dir
  1527      Move-Item "${npd_dir}\bin\*" "${env:NODE_DIR}\" -Force -Confirm:$false
  1528      Remove-Item "${npd_dir}\bin" -Force -Confirm:$false
  1529      Remove-Item "${npd_dir}\${npd_tar}" -Force -Confirm:$false
  1530    }
  1531    else {
  1532        Log-Output "Node Problem Detector already installed."
  1533    }
  1534  }
  1535}
  1536
  1537# Creates the node-problem-detector user kubeconfig file at
  1538# $env:NODEPROBLEMDETECTOR_KUBECONFIG_FILE (if defined).
  1539#
  1540# Create-NodePki() must be called first.
  1541#
  1542# Required ${kube_env} keys:
  1543#   CA_CERT
  1544#   NODE_PROBLEM_DETECTOR_TOKEN
  1545function Create-NodeProblemDetectorKubeConfig {
  1546  if (-not [string]::IsNullOrEmpty(${env:NODEPROBLEMDETECTOR_KUBECONFIG_FILE})) {
  1547    Create-Kubeconfig -Name 'node-problem-detector' `
  1548      -Path ${env:NODEPROBLEMDETECTOR_KUBECONFIG_FILE} `
  1549      -Token ${kube_env}['NODE_PROBLEM_DETECTOR_TOKEN']
  1550  }
  1551}
  1552
  1553# Configures NPD to run with the bundled monitor configs and report against the Kubernetes api server.
  1554function Configure-NodeProblemDetector {
  1555  $npd_bin = "${env:NODE_DIR}\node-problem-detector.exe"
  1556  if ("${env:ENABLE_NODE_PROBLEM_DETECTOR}" -eq "standalone" -and (Test-Path $npd_bin)) {
  1557    $npd_svc = Get-Service -Name $NPD_SERVICE -ErrorAction SilentlyContinue
  1558    if ($npd_svc -eq $null) {
  1559      $npd_dir = "${env:K8S_DIR}\node-problem-detector"
  1560      $npd_logs_dir = "${env:LOGS_DIR}\node-problem-detector"
  1561
  1562      New-Item -Path $npd_logs_dir -Type Directory -Force -Confirm:$false
  1563
  1564      $flags = ''
  1565      if ([string]::IsNullOrEmpty(${kube_env}['NODE_PROBLEM_DETECTOR_CUSTOM_FLAGS'])) {
  1566        $system_log_monitors = @()
  1567        $system_stats_monitors = @()
  1568        $custom_plugin_monitors = @()
  1569
  1570        # Custom Plugin Monitors
  1571        $custom_plugin_monitors += @("${npd_dir}\config\windows-health-checker-kubelet.json")
  1572        $custom_plugin_monitors += @("${npd_dir}\config\windows-health-checker-kubeproxy.json")
  1573        $custom_plugin_monitors += @("${npd_dir}\config\windows-defender-monitor.json")
  1574
  1575        # System Stats Monitors
  1576        $system_stats_monitors += @("${npd_dir}\config\windows-system-stats-monitor.json")
  1577
  1578        # NPD Configuration for CRI monitor
  1579        $system_log_monitors += @("${npd_dir}\config\windows-containerd-monitor-filelog.json")
  1580        $custom_plugin_monitors += @("${npd_dir}\config\windows-health-checker-containerd.json")
  1581
  1582        $flags="--v=2 --port=20256 --log_dir=${npd_logs_dir}"
  1583        if ($system_log_monitors.count -gt 0) {
  1584          $flags+=" --config.system-log-monitor={0}" -f ($system_log_monitors -join ",")
  1585        }
  1586        if ($system_stats_monitors.count -gt 0) {
  1587          $flags+=" --config.system-stats-monitor={0}" -f ($system_stats_monitors -join ",")
  1588        }
  1589        if ($custom_plugin_monitors.count -gt 0) {
  1590          $flags+=" --config.custom-plugin-monitor={0}" -f ($custom_plugin_monitors -join ",")
  1591        }
  1592      }
  1593      else {
  1594        $flags = ${kube_env}['NODE_PROBLEM_DETECTOR_CUSTOM_FLAGS']
  1595      }
  1596      $kubernetes_master_name = ${kube_env}['KUBERNETES_MASTER_NAME']
  1597      $flags = "${flags} --apiserver-override=`"https://${kubernetes_master_name}?inClusterConfig=false&auth=${env:NODEPROBLEMDETECTOR_KUBECONFIG_FILE}`""
  1598
  1599      Log-Output "Creating service: ${NPD_SERVICE}"
  1600      Log-Output "${npd_bin} ${flags}"
  1601      sc.exe create $NPD_SERVICE binpath= "${npd_bin} ${flags}" displayName= "Node Problem Detector"
  1602      sc.exe failure $NPD_SERVICE reset= 30 actions= restart/5000
  1603      sc.exe start $NPD_SERVICE
  1604
  1605      Write-VerboseServiceInfoToConsole -Service $NPD_SERVICE
  1606    }
  1607    else {
  1608      Log-Output "${NPD_SERVICE} already configured."
  1609    }
  1610  }
  1611}
  1612
  1613# TODO(pjh): move the logging agent code below into a separate
  1614# module; it was put here temporarily to avoid disrupting the file layout in
  1615# the K8s release machinery.
  1616$LOGGINGAGENT_VERSION = '1.8.10'
  1617$LOGGINGAGENT_ROOT = 'C:\fluent-bit'
  1618$LOGGINGAGENT_SERVICE = 'fluent-bit'
  1619$LOGGINGAGENT_CMDLINE = '*fluent-bit.exe*'
  1620
  1621$LOGGINGEXPORTER_VERSION = 'v0.17.0'
  1622$LOGGINGEXPORTER_ROOT = 'C:\flb-exporter'
  1623$LOGGINGEXPORTER_SERVICE = 'flb-exporter'
  1624$LOGGINGEXPORTER_CMDLINE = '*flb-exporter.exe*'
  1625$LOGGINGEXPORTER_HASH = 'c808c9645d84b06b89932bd707d51a9d1d0b451b5a702a5f9b2b4462c8be6502'
  1626
  1627# Restart Logging agent or starts it if it is not currently running
  1628function Restart-LoggingAgent {
  1629  if (IsStackdriverAgentInstalled) {
  1630      Restart-StackdriverAgent
  1631      return
  1632  }
  1633
  1634   Restart-LogService $LOGGINGEXPORTER_SERVICE $LOGGINGEXPORTER_CMDLINE
  1635   Restart-LogService $LOGGINGAGENT_SERVICE $LOGGINGAGENT_CMDLINE
  1636}
  1637
  1638# Restarts the service, or starts it if it is not currently
  1639# running. A standard `Restart-Service` may fail because
  1640# the process is sometimes unstoppable, so this function works around it
  1641# by killing the processes.
  1642function Restart-LogService([string]$service, [string]$cmdline) {
  1643  Stop-Service -NoWait -ErrorAction Ignore $service
  1644
  1645  # Wait (if necessary) for service to stop.
  1646  $timeout = 10
  1647  $stopped = (Get-service $service).Status -eq 'Stopped'
  1648  for ($i = 0; $i -lt $timeout -and !($stopped); $i++) {
  1649      Start-Sleep 1
  1650      $stopped = (Get-service $service).Status -eq 'Stopped'
  1651  }
  1652
  1653  if ((Get-service $service).Status -ne 'Stopped') {
  1654    # Force kill the processes.
  1655    Stop-Process -Force -PassThru -Id (Get-WmiObject win32_process |
  1656      Where CommandLine -Like $cmdline).ProcessId
  1657
  1658    # Wait until process has stopped.
  1659    $waited = 0
  1660    $log_period = 10
  1661    $timeout = 60
  1662    while ((Get-service $service).Status -ne 'Stopped' -and $waited -lt $timeout) {
  1663      Start-Sleep 1
  1664      $waited++
  1665
  1666      if ($waited % $log_period -eq 0) {
  1667        Log-Output "Waiting for ${service} service to stop"
  1668      }
  1669    }
  1670
  1671    # Timeout occurred
  1672    if ($waited -ge $timeout) {
  1673      Throw ("Timeout while waiting for ${service} service to stop")
  1674    }
  1675  }
  1676
  1677  Start-Service $service
  1678}
  1679
  1680# Check whether the logging agent is installed by whether it's registered as service
  1681function IsLoggingAgentInstalled {
  1682  $logging_status = (Get-Service $LOGGINGAGENT_SERVICE -ErrorAction Ignore).Status
  1683  return -not [string]::IsNullOrEmpty($logging_status)
  1684}
  1685
  1686# Installs the logging agent according to https://docs.fluentbit.io/manual/installation/windows#
  1687# Also installs fluent bit stackdriver exporter
  1688function Install-LoggingAgent {
  1689  if (IsStackdriverAgentInstalled) {
  1690    # Remove the existing storage.json file if it exists. This is a workaround
  1691    # for the bug where the logging agent cannot start up if the file is
  1692    # corrupted.
  1693    Remove-Item `
  1694      -Force `
  1695      -ErrorAction Ignore `
  1696      ("$STACKDRIVER_ROOT\LoggingAgent\Main\pos\winevtlog.pos\worker0\" +
  1697       "storage.json")
  1698    Log-Output ("Skip: Stackdriver logging agent is already installed")
  1699    return
  1700  }
  1701
  1702  if (IsLoggingAgentInstalled) {
  1703    # Note: we should reinstall the agent if $REDO_STEPS is true
  1704    # here, but we don't know how to run the installer without it prompting
  1705    # when logging agent is already installed. We dumped the strings in the
  1706    # installer binary and searched for flags to do this but found nothing. Oh
  1707    # well.
  1708    Log-Output ("Skip: Fluentbit logging agent is already installed")
  1709    return
  1710  }
  1711
  1712  DownloadAndInstall-LoggingAgents
  1713  Create-LoggingAgentServices
  1714}
  1715
  1716function DownloadAndInstall-LoggingAgents {
  1717  # Install Logging agent if not present
  1718  if (ShouldWrite-File $LOGGINGAGENT_ROOT\td-agent-bit-${LOGGINGAGENT_VERSION}-win64) {
  1719      $install_dir = 'C:\flb-installers'
  1720      $url = ("https://storage.googleapis.com/gke-release/winnode/fluentbit/td-agent-bit-${LOGGINGAGENT_VERSION}-win64.zip")
  1721
  1722      Log-Output 'Downloading Logging agent'
  1723      New-Item $install_dir -ItemType 'directory' -Force | Out-Null
  1724      MustDownload-File -OutFile $install_dir\td.zip -URLs $url
  1725
  1726      cd $install_dir
  1727      Log-Output 'Extracting Logging agent'
  1728      Expand-Archive td.zip
  1729      mv .\td\td-agent-bit-${LOGGINGAGENT_VERSION}-win64\ $LOGGINGAGENT_ROOT
  1730      cd C:\
  1731      Remove-Item -Force -Recurse $install_dir
  1732  }
  1733
  1734  # Download Logging exporter if needed
  1735  if (ShouldWrite-File $LOGGINGEXPORTER_ROOT\flb-exporter.exe) {
  1736      $url = ("https://storage.googleapis.com/gke-release/winnode/fluentbit-exporter/${LOGGINGEXPORTER_VERSION}/flb-exporter-${LOGGINGEXPORTER_VERSION}.exe")
  1737      Log-Output 'Downloading logging exporter'
  1738      New-Item $LOGGINGEXPORTER_ROOT -ItemType 'directory' -Force | Out-Null
  1739      MustDownload-File `
  1740          -OutFile $LOGGINGEXPORTER_ROOT\flb-exporter.exe `
  1741          -URLs $url `
  1742          -Hash $LOGGINGEXPORTER_HASH `
  1743          -Algorithm SHA256
  1744  }
  1745}
  1746
  1747function Create-LoggingAgentServices {
  1748  cd $LOGGINGAGENT_ROOT
  1749
  1750  Log-Output "Creating service: ${LOGGINGAGENT_SERVICE}"
  1751  sc.exe create $LOGGINGAGENT_SERVICE binpath= "${LOGGINGAGENT_ROOT}\bin\fluent-bit.exe -c \fluent-bit\conf\fluent-bit.conf"
  1752  sc.exe failure $LOGGINGAGENT_SERVICE reset= 30 actions= restart/5000
  1753  Write-VerboseServiceInfoToConsole -Service $LOGGINGAGENT_SERVICE
  1754
  1755  Log-Output "Creating service: ${LOGGINGEXPORTER_SERVICE}"
  1756  sc.exe create  $LOGGINGEXPORTER_SERVICE  binpath= "${LOGGINGEXPORTER_ROOT}\flb-exporter.exe --kubernetes-separator=_ --stackdriver-resource-model=k8s --enable-pod-label-discovery --logtostderr --winsvc  --pod-label-dot-replacement=_"
  1757  sc.exe failure $LOGGINGEXPORTER_SERVICE reset= 30 actions= restart/5000
  1758  Write-VerboseServiceInfoToConsole -Service $LOGGINGEXPORTER_SERVICE
  1759}
  1760
  1761# Writes the logging configuration file for Logging agent. Restart-LoggingAgent
  1762# should then be called to pick up the new configuration.
  1763function Configure-LoggingAgent {
  1764  if (IsStackdriverAgentInstalled) {
  1765      Configure-StackdriverAgent
  1766      return
  1767  }
  1768
  1769  $fluentbit_config_file = "$LOGGINGAGENT_ROOT\conf\fluent-bit.conf"
  1770  $FLUENTBIT_CONFIG | Out-File -FilePath $fluentbit_config_file -Encoding ASCII
  1771  Log-Output "Wrote logging config to $fluentbit_config_file"
  1772
  1773  $fluentbit_parser_file = "$LOGGINGAGENT_ROOT\conf\parsers.conf"
  1774  $PARSERS_CONFIG | Out-File -FilePath $fluentbit_parser_file -Encoding ASCII
  1775
  1776  # Create directory for all the log position files.
  1777  New-Item -Type Directory -Path "/var/run/google-fluentbit/pos-files/" -Force | Out-Null
  1778
  1779  Log-Output "Wrote logging config to $fluentbit_parser_file"
  1780}
  1781
  1782# Fluentbit main config file
  1783$FLUENTBIT_CONFIG = @'
  1784[SERVICE]
  1785    Flush         5
  1786    Grace         120
  1787    Log_Level     info
  1788    Log_File      /var/log/fluentbit.log
  1789    Daemon        off
  1790    Parsers_File  parsers.conf
  1791    HTTP_Server   off
  1792    HTTP_Listen   0.0.0.0
  1793    HTTP_PORT     2020
  1794    plugins_file plugins.conf
  1795
  1796    # Storage
  1797    # =======
  1798    # Fluent Bit can use memory and filesystem buffering based mechanisms
  1799    #
  1800    # - https://docs.fluentbit.io/manual/administration/buffering-and-storage
  1801    #
  1802    # storage metrics
  1803    # ---------------
  1804    # publish storage pipeline metrics in '/api/v1/storage'. The metrics are
  1805    # exported only if the 'http_server' option is enabled.
  1806    #
  1807    # storage.metrics on
  1808
  1809    # storage.path
  1810    # ------------
  1811    # absolute file system path to store filesystem data buffers (chunks).
  1812    #
  1813    # storage.path /tmp/storage
  1814
  1815    # storage.sync
  1816    # ------------
  1817    # configure the synchronization mode used to store the data into the
  1818    # filesystem. It can take the values normal or full.
  1819    #
  1820    # storage.sync normal
  1821
  1822    # storage.checksum
  1823    # ----------------
  1824    # enable the data integrity check when writing and reading data from the
  1825    # filesystem. The storage layer uses the CRC32 algorithm.
  1826    #
  1827    # storage.checksum off
  1828
  1829    # storage.backlog.mem_limit
  1830    # -------------------------
  1831    # if storage.path is set, Fluent Bit will look for data chunks that were
  1832    # not delivered and are still in the storage layer, these are called
  1833    # backlog data. This option configure a hint of maximum value of memory
  1834    # to use when processing these records.
  1835    #
  1836    # storage.backlog.mem_limit 5M
  1837
  1838[INPUT]
  1839    Name         winlog
  1840    Interval_Sec 2
  1841    # Channels Setup,Windows PowerShell
  1842    Channels     application,system,security
  1843    Tag          winevt.raw
  1844    DB           /var/run/google-fluentbit/pos-files/winlog.db
  1845
  1846# Json Log Example:
  1847# {"log":"[info:2016-02-16T16:04:05.930-08:00] Some log text here\n","stream":"stdout","time":"2016-02-17T00:04:05.931087621Z"}
  1848[INPUT]
  1849    Name             tail
  1850    Alias            kube_containers
  1851    Tag              kube_<namespace_name>_<pod_name>_<container_name>
  1852    Tag_Regex        (?<pod_name>[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*)_(?<namespace_name>[^_]+)_(?<container_name>.+)-
  1853    Mem_Buf_Limit    5MB
  1854    Skip_Long_Lines  On
  1855    Refresh_Interval 5
  1856    Path             C:\var\log\containers\*.log
  1857    DB               /var/run/google-fluentbit/pos-files/flb_kube.db
  1858
  1859[FILTER]
  1860    Name         parser
  1861    Match        kube_*
  1862    Key_Name     log
  1863    Reserve_Data True
  1864    Parser       docker
  1865    Parser       containerd
  1866
  1867# Log line format: [IWEF]mmdd hh:mm:ss.uuuuuu threadid file:line] msg
  1868# Example:
  1869# I0716 02:08:55.559351    3356 log_spam.go:42] Command line arguments:
  1870[INPUT]
  1871    Name             tail
  1872    Alias            node-problem-detector
  1873    Tag              node-problem-detector
  1874    Mem_Buf_Limit    5MB
  1875    Skip_Long_Lines  On
  1876    Refresh_Interval 5
  1877    Path             C:\etc\kubernetes\logs\node-problem-detector\*.log.INFO*
  1878    DB               /var/run/google-fluentbit/pos-files/node-problem-detector.db
  1879    Multiline        On
  1880    Parser_Firstline glog
  1881
  1882# Example:
  1883# I0928 03:15:50.440223    4880 main.go:51] Starting CSI-Proxy Server ...
  1884[INPUT]
  1885    Name             tail
  1886    Alias            csi-proxy
  1887    Tag              csi-proxy
  1888    Mem_Buf_Limit    5MB
  1889    Skip_Long_Lines  On
  1890    Refresh_Interval 5
  1891    Path             /etc/kubernetes/logs/csi-proxy.log
  1892    DB               /var/run/google-fluentbit/pos-files/csi-proxy.db
  1893    Multiline        On
  1894    Parser_Firstline glog
  1895
  1896# I1118 21:26:53.975789       6 proxier.go:1096] Port "nodePort for kube-system/default-http-backend:http" (:31429/tcp) was open before and is still needed
  1897[INPUT]
  1898    Name             tail
  1899    Alias            kube-proxy
  1900    Tag              kube-proxy
  1901    Mem_Buf_Limit    5MB
  1902    Skip_Long_Lines  On
  1903    Refresh_Interval 5
  1904    Path             /etc/kubernetes/logs/kube-proxy.log
  1905    DB               /var/run/google-fluentbit/pos-files/kube-proxy.db
  1906    Multiline        On
  1907    Parser_Firstline glog
  1908
  1909# Example:
  1910# time="2019-12-10T21:27:59.836946700Z" level=info msg="loading plugin \"io.containerd.grpc.v1.cri\"..." type=io.containerd.grpc.v1
  1911[INPUT]
  1912    Name             tail
  1913    Alias            container-runtime
  1914    Tag              container-runtime
  1915    Mem_Buf_Limit    5MB
  1916    Skip_Long_Lines  On
  1917    Refresh_Interval 5
  1918    Path             /etc/kubernetes/logs/containerd.log
  1919    DB               /var/run/google-fluentbit/pos-files/container-runtime.db
  1920    # TODO: Add custom parser for containerd logs once format is settled.
  1921
  1922# Example:
  1923# I0204 07:32:30.020537    3368 server.go:1048] POST /stats/container/: (13.972191ms) 200 [[Go-http-client/1.1] 10.244.1.3:40537]
  1924[INPUT]
  1925    Name             tail
  1926    Alias            kubelet
  1927    Tag              kubelet
  1928    Mem_Buf_Limit    5MB
  1929    Skip_Long_Lines  On
  1930    Refresh_Interval 5
  1931    Path             /etc/kubernetes/logs/kubelet.log
  1932    DB               /var/run/google-fluentbit/pos-files/kubelet.db
  1933    Multiline        On
  1934    Parser_Firstline glog
  1935
  1936[FILTER]
  1937    Name        modify
  1938    Match       *
  1939    Hard_rename log message
  1940
  1941[FILTER]
  1942    Name        modify
  1943    Match       winevt.raw
  1944    Hard_rename Message message
  1945
  1946[FILTER]
  1947    Name         parser
  1948    Match        kube_*
  1949    Key_Name     message
  1950    Reserve_Data True
  1951    Parser       glog
  1952    Parser       json
  1953
  1954[OUTPUT]
  1955    Name        http
  1956    Match       *
  1957    Host        127.0.0.1
  1958    Port        2021
  1959    URI         /logs
  1960    header_tag  FLUENT-TAG
  1961    Format      msgpack
  1962    Retry_Limit 2
  1963'@
  1964
  1965# Fluentbit parsers config file
  1966$PARSERS_CONFIG = @'
  1967[PARSER]
  1968    Name        docker
  1969    Format      json
  1970    Time_Key    time
  1971    Time_Format %Y-%m-%dT%H:%M:%S.%L%z
  1972
  1973[PARSER]
  1974    Name        containerd
  1975    Format      regex
  1976    Regex       ^(?<time>.+) (?<stream>stdout|stderr) [^ ]* (?<log>.*)$
  1977    Time_Key    time
  1978    Time_Format %Y-%m-%dT%H:%M:%S.%L%z
  1979
  1980[PARSER]
  1981    Name        json
  1982    Format      json
  1983
  1984[PARSER]
  1985    Name        syslog
  1986    Format      regex
  1987    Regex       ^\<(?<pri>[0-9]+)\>(?<time>[^ ]* {1,2}[^ ]* [^ ]*) (?<host>[^ ]*) (?<ident>[a-zA-Z0-9_\/\.\-]*)(?:\[(?<pid>[0-9]+)\])?(?:[^\:]*\:)? *(?<message>.*)$
  1988    Time_Key    time
  1989    Time_Format %b %d %H:%M:%S
  1990
  1991[PARSER]
  1992    Name        glog
  1993    Format      regex
  1994    Regex       ^(?<severity>\w)(?<time>\d{4} [^\s]*)\s+(?<pid>\d+)\s+(?<source_file>[^ \]]+)\:(?<source_line>\d+)\]\s(?<message>.*)$
  1995    Time_Key    time
  1996    Time_Format %m%d %H:%M:%S.%L
  1997
  1998[PARSER]
  1999    Name        network-log
  2000    Format      json
  2001    Time_Key    timestamp
  2002    Time_Format %Y-%m-%dT%H:%M:%S.%L%z
  2003
  2004[PARSER]
  2005    Name        syslog-rfc5424
  2006    Format      regex
  2007    Regex       ^\<(?<pri>[0-9]{1,5})\>1 (?<time>[^ ]+) (?<host>[^ ]+) (?<ident>[^ ]+) (?<pid>[-0-9]+) (?<msgid>[^ ]+) (?<extradata>(\[(.*?)\]|-)) (?<message>.+)$
  2008    Time_Key    time
  2009    Time_Format %Y-%m-%dT%H:%M:%S.%L%z
  2010    Time_Keep   On
  2011
  2012[PARSER]
  2013    Name        syslog-rfc3164-local
  2014    Format      regex
  2015    Regex       ^\<(?<pri>[0-9]+)\>(?<time>[^ ]* {1,2}[^ ]* [^ ]*) (?<ident>[a-zA-Z0-9_\/\.\-]*)(?:\[(?<pid>[0-9]+)\])?(?:[^\:]*\:)? *(?<message>.*)$
  2016    Time_Key    time
  2017    Time_Format %b %d %H:%M:%S
  2018    Time_Keep   On
  2019
  2020[PARSER]
  2021    Name        syslog-rfc3164
  2022    Format      regex
  2023    Regex       /^\<(?<pri>[0-9]+)\>(?<time>[^ ]* {1,2}[^ ]* [^ ]*) (?<host>[^ ]*) (?<ident>[a-zA-Z0-9_\/\.\-]*)(?:\[(?<pid>[0-9]+)\])?(?:[^\:]*\:)? *(?<message>.*)$/
  2024    Time_Key    time
  2025    Time_Format %b %d %H:%M:%S
  2026    Time_Keep   On
  2027
  2028[PARSER]
  2029    Name    kube-custom
  2030    Format  regex
  2031    Regex   (?<tag>[^.]+)?\.?(?<pod_name>[a-z0-9](?:[-a-z0-9]*[a-z0-9])?(?:\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*)_(?<namespace_name>[^_]+)_(?<container_name>.+)-(?<docker_id>[a-z0-9]{64})\.log$
  2032'@
  2033
  2034
  2035# ----------- Stackdriver logging setup --------------------------
  2036# This section would be deprecated soon
  2037#
  2038
  2039$STACKDRIVER_ROOT = 'C:\Program Files (x86)\Stackdriver'
  2040
  2041# Restarts the Stackdriver logging agent, or starts it if it is not currently
  2042# running. A standard `Restart-Service StackdriverLogging` may fail because
  2043# StackdriverLogging sometimes is unstoppable, so this function works around it
  2044# by killing the processes.
  2045function Restart-StackdriverAgent {
  2046  Stop-Service -NoWait -ErrorAction Ignore StackdriverLogging
  2047
  2048  # Wait (if necessary) for service to stop.
  2049  $timeout = 10
  2050  $stopped = (Get-service StackdriverLogging).Status -eq 'Stopped'
  2051  for ($i = 0; $i -lt $timeout -and !($stopped); $i++) {
  2052      Start-Sleep 1
  2053      $stopped = (Get-service StackdriverLogging).Status -eq 'Stopped'
  2054  }
  2055
  2056  if ((Get-service StackdriverLogging).Status -ne 'Stopped') {
  2057    # Force kill the processes.
  2058    Stop-Process -Force -PassThru -Id (Get-WmiObject win32_process |
  2059      Where CommandLine -Like '*Stackdriver/logging*').ProcessId
  2060
  2061    # Wait until process has stopped.
  2062    $waited = 0
  2063    $log_period = 10
  2064    $timeout = 60
  2065    while ((Get-service StackdriverLogging).Status -ne 'Stopped' -and $waited -lt $timeout) {
  2066      Start-Sleep 1
  2067      $waited++
  2068
  2069      if ($waited % $log_period -eq 0) {
  2070        Log-Output "Waiting for StackdriverLogging service to stop"
  2071      }
  2072    }
  2073
  2074    # Timeout occurred
  2075    if ($waited -ge $timeout) {
  2076      Throw ("Timeout while waiting for StackdriverLogging service to stop")
  2077    }
  2078  }
  2079
  2080  Start-Service StackdriverLogging
  2081}
  2082
  2083# Check whether the logging agent is installed by whether it's registered as service
  2084function IsStackdriverAgentInstalled {
  2085  $stackdriver_status = (Get-Service StackdriverLogging -ErrorAction Ignore).Status
  2086  return -not [string]::IsNullOrEmpty($stackdriver_status)
  2087}
  2088
  2089# Writes the logging configuration file for Stackdriver. Restart-LoggingAgent
  2090# should then be called to pick up the new configuration.
  2091function Configure-StackdriverAgent {
  2092  $fluentd_config_dir = "$STACKDRIVER_ROOT\LoggingAgent\config.d"
  2093  $fluentd_config_file = "$fluentd_config_dir\k8s_containers.conf"
  2094
  2095  # Create a configuration file for kubernetes containers.
  2096  # The config.d directory should have already been created automatically, but
  2097  # try creating again just in case.
  2098  New-Item $fluentd_config_dir -ItemType 'directory' -Force | Out-Null
  2099
  2100  $config = $FLUENTD_CONFIG.replace('NODE_NAME', (hostname))
  2101  $config | Out-File -FilePath $fluentd_config_file -Encoding ASCII
  2102  Log-Output "Wrote fluentd logging config to $fluentd_config_file"
  2103
  2104  # Configure StackdriverLogging to automatically restart on failure after 10
  2105  # seconds. The logging agent may die die to various disruptions but can be
  2106  # resumed.
  2107  sc.exe failure StackdriverLogging reset= 0 actions= restart/1000/restart/10000
  2108  Write-VerboseServiceInfoToConsole -Service 'StackdriverLogging'
  2109}
  2110
  2111# The NODE_NAME placeholder must be replaced with the node's name (hostname).
  2112$FLUENTD_CONFIG = @'
  2113# This configuration file for Fluentd is used to watch changes to kubernetes
  2114# container logs in the directory /var/lib/docker/containers/ and submit the
  2115# log records to Google Cloud Logging using the cloud-logging plugin.
  2116#
  2117# Example
  2118# =======
  2119# A line in the Docker log file might look like this JSON:
  2120#
  2121# {"log":"2014/09/25 21:15:03 Got request with path wombat\\n",
  2122#  "stream":"stderr",
  2123#   "time":"2014-09-25T21:15:03.499185026Z"}
  2124#
  2125# The original tag is derived from the log file's location.
  2126# For example a Docker container's logs might be in the directory:
  2127#  /var/lib/docker/containers/997599971ee6366d4a5920d25b79286ad45ff37a74494f262e3bc98d909d0a7b
  2128# and in the file:
  2129#  997599971ee6366d4a5920d25b79286ad45ff37a74494f262e3bc98d909d0a7b-json.log
  2130# where 997599971ee6... is the Docker ID of the running container.
  2131# The Kubernetes kubelet makes a symbolic link to this file on the host
  2132# machine in the /var/log/containers directory which includes the pod name,
  2133# the namespace name and the Kubernetes container name:
  2134#    synthetic-logger-0.25lps-pod_default_synth-lgr-997599971ee6366d4a5920d25b79286ad45ff37a74494f262e3bc98d909d0a7b.log
  2135#    ->
  2136#    /var/lib/docker/containers/997599971ee6366d4a5920d25b79286ad45ff37a74494f262e3bc98d909d0a7b/997599971ee6366d4a5920d25b79286ad45ff37a74494f262e3bc98d909d0a7b-json.log
  2137# The /var/log directory on the host is mapped to the /var/log directory in the container
  2138# running this instance of Fluentd and we end up collecting the file:
  2139#   /var/log/containers/synthetic-logger-0.25lps-pod_default_synth-lgr-997599971ee6366d4a5920d25b79286ad45ff37a74494f262e3bc98d909d0a7b.log
  2140# This results in the tag:
  2141#  var.log.containers.synthetic-logger-0.25lps-pod_default_synth-lgr-997599971ee6366d4a5920d25b79286ad45ff37a74494f262e3bc98d909d0a7b.log
  2142# where 'synthetic-logger-0.25lps-pod' is the pod name, 'default' is the
  2143# namespace name, 'synth-lgr' is the container name and '997599971ee6..' is
  2144# the container ID.
  2145# The record reformer is used to extract pod_name, namespace_name and
  2146# container_name from the tag and set them in a local_resource_id in the
  2147# format of:
  2148# 'k8s_container.<NAMESPACE_NAME>.<POD_NAME>.<CONTAINER_NAME>'.
  2149# The reformer also changes the tags to 'stderr' or 'stdout' based on the
  2150# value of 'stream'.
  2151# local_resource_id is later used by google_cloud plugin to determine the
  2152# monitored resource to ingest logs against.
  2153# Json Log Example:
  2154# {"log":"[info:2016-02-16T16:04:05.930-08:00] Some log text here\n","stream":"stdout","time":"2016-02-17T00:04:05.931087621Z"}
  2155# CRI Log Example:
  2156# 2016-02-17T00:04:05.931087621Z stdout F [info:2016-02-16T16:04:05.930-08:00] Some log text here
  2157<source>
  2158  @type tail
  2159  path /var/log/containers/*.log
  2160  pos_file /var/log/gcp-containers.log.pos
  2161  # Tags at this point are in the format of:
  2162  # reform.var.log.containers.<POD_NAME>_<NAMESPACE_NAME>_<CONTAINER_NAME>-<CONTAINER_ID>.log
  2163  tag reform.*
  2164  read_from_head true
  2165  <parse>
  2166    @type multi_format
  2167    <pattern>
  2168      format json
  2169      time_key time
  2170      time_format %Y-%m-%dT%H:%M:%S.%NZ
  2171      keep_time_key
  2172    </pattern>
  2173    <pattern>
  2174      format /^(?<time>.+) (?<stream>stdout|stderr) [^ ]* (?<log>.*)$/
  2175      time_format %Y-%m-%dT%H:%M:%S.%N%:z
  2176    </pattern>
  2177  </parse>
  2178</source>
  2179# Example:
  2180# I0204 07:32:30.020537    3368 server.go:1048] POST /stats/container/: (13.972191ms) 200 [[Go-http-client/1.1] 10.244.1.3:40537]
  2181<source>
  2182  @type tail
  2183  format multiline
  2184  multiline_flush_interval 5s
  2185  format_firstline /^\w\d{4}/
  2186  format1 /^(?<severity>\w)(?<time>\d{4} [^\s]*)\s+(?<pid>\d+)\s+(?<source>[^ \]]+)\] (?<message>.*)/
  2187  time_format %m%d %H:%M:%S.%N
  2188  path /etc/kubernetes/logs/kubelet.log
  2189  pos_file /etc/kubernetes/logs/gcp-kubelet.log.pos
  2190  tag kubelet
  2191</source>
  2192# Example:
  2193# I1118 21:26:53.975789       6 proxier.go:1096] Port "nodePort for kube-system/default-http-backend:http" (:31429/tcp) was open before and is still needed
  2194<source>
  2195  @type tail
  2196  format multiline
  2197  multiline_flush_interval 5s
  2198  format_firstline /^\w\d{4}/
  2199  format1 /^(?<severity>\w)(?<time>\d{4} [^\s]*)\s+(?<pid>\d+)\s+(?<source>[^ \]]+)\] (?<message>.*)/
  2200  time_format %m%d %H:%M:%S.%N
  2201  path /etc/kubernetes/logs/kube-proxy.log
  2202  pos_file /etc/kubernetes/logs/gcp-kube-proxy.log.pos
  2203  tag kube-proxy
  2204</source>
  2205# Example:
  2206# I0928 03:15:50.440223    4880 main.go:51] Starting CSI-Proxy Server ...
  2207<source>
  2208  @type tail
  2209  format multiline
  2210  multiline_flush_interval 5s
  2211  format_firstline /^\w\d{4}/
  2212  format1 /^(?<severity>\w)(?<time>\d{4} [^\s]*)\s+(?<pid>\d+)\s+(?<source>[^ \]]+)\] (?<message>.*)/
  2213  time_format %m%d %H:%M:%S.%N
  2214  path /etc/kubernetes/logs/csi-proxy.log
  2215  pos_file /etc/kubernetes/logs/gcp-csi-proxy.log.pos
  2216  tag csi-proxy
  2217</source>
  2218# Example:
  2219# time="2019-12-10T21:27:59.836946700Z" level=info msg="loading plugin \"io.containerd.grpc.v1.cri\"..." type=io.containerd.grpc.v1
  2220<source>
  2221  @type tail
  2222  format multiline
  2223  multiline_flush_interval 5s
  2224  format_firstline /^time=/
  2225  format1 /^time="(?<time>[^ ]*)" level=(?<severity>\w*) (?<message>.*)/
  2226  time_format %Y-%m-%dT%H:%M:%S.%N%z
  2227  path /etc/kubernetes/logs/containerd.log
  2228  pos_file /etc/kubernetes/logs/gcp-containerd.log.pos
  2229  tag container-runtime
  2230</source>
  2231<match reform.**>
  2232  @type record_reformer
  2233  enable_ruby true
  2234  <record>
  2235    # Extract local_resource_id from tag for 'k8s_container' monitored
  2236    # resource. The format is:
  2237    # 'k8s_container.<namespace_name>.<pod_name>.<container_name>'.
  2238    "logging.googleapis.com/local_resource_id" ${"k8s_container.#{tag_suffix[4].rpartition('.')[0].split('_')[1]}.#{tag_suffix[4].rpartition('.')[0].split('_')[0]}.#{tag_suffix[4].rpartition('.')[0].split('_')[2].rpartition('-')[0]}"}
  2239    # Rename the field 'log' to a more generic field 'message'. This way the
  2240    # fluent-plugin-google-cloud knows to flatten the field as textPayload
  2241    # instead of jsonPayload after extracting 'time', 'severity' and
  2242    # 'stream' from the record.
  2243    message ${record['log']}
  2244    # If 'severity' is not set, assume stderr is ERROR and stdout is INFO.
  2245    severity ${record['severity'] || if record['stream'] == 'stderr' then 'ERROR' else 'INFO' end}
  2246  </record>
  2247  tag ${if record['stream'] == 'stderr' then 'raw.stderr' else 'raw.stdout' end}
  2248  remove_keys stream,log
  2249</match>
  2250# TODO: detect exceptions and forward them as one log entry using the
  2251# detect_exceptions plugin
  2252# This section is exclusive for k8s_container logs. These logs come with
  2253# 'raw.stderr' or 'raw.stdout' tags.
  2254<match {raw.stderr,raw.stdout}>
  2255  @type google_cloud
  2256  # Try to detect JSON formatted log entries.
  2257  detect_json true
  2258  # Allow log entries from multiple containers to be sent in the same request.
  2259  split_logs_by_tag false
  2260  # Set the buffer type to file to improve the reliability and reduce the memory consumption
  2261  buffer_type file
  2262  buffer_path /var/log/fluentd-buffers/kubernetes.containers.buffer
  2263  # Set queue_full action to block because we want to pause gracefully
  2264  # in case of the off-the-limits load instead of throwing an exception
  2265  buffer_queue_full_action block
  2266  # Set the chunk limit conservatively to avoid exceeding the recommended
  2267  # chunk size of 5MB per write request.
  2268  buffer_chunk_limit 512k
  2269  # Cap the combined memory usage of this buffer and the one below to
  2270  # 512KiB/chunk * (6 + 2) chunks = 4 MiB
  2271  buffer_queue_limit 6
  2272  # Never wait more than 5 seconds before flushing logs in the non-error case.
  2273  flush_interval 5s
  2274  # Never wait longer than 30 seconds between retries.
  2275  max_retry_wait 30
  2276  # Disable the limit on the number of retries (retry forever).
  2277  disable_retry_limit
  2278  # Use multiple threads for processing.
  2279  num_threads 2
  2280  use_grpc true
  2281  # Skip timestamp adjustment as this is in a controlled environment with
  2282  # known timestamp format. This helps with CPU usage.
  2283  adjust_invalid_timestamps false
  2284</match>
  2285# Attach local_resource_id for 'k8s_node' monitored resource.
  2286<filter **>
  2287  @type record_transformer
  2288  enable_ruby true
  2289  <record>
  2290    "logging.googleapis.com/local_resource_id" ${"k8s_node.NODE_NAME"}
  2291  </record>
  2292</filter>
  2293'@
  2294
  2295# Downloads the out-of-tree kubelet image credential provider binaries.
  2296function DownloadAndInstall-AuthProviderGcpBinary {
  2297  if ("${env:ENABLE_AUTH_PROVIDER_GCP}" -eq "true") {
  2298    $filename = 'auth-provider-gcp.exe'
  2299    if (ShouldWrite-File ${env:AUTH_PROVIDER_GCP_WINDOWS_BIN_DIR}\$filename) {
  2300      Log-Output "Installing auth provider gcp binaries"
  2301      $tmp_dir = 'C:\k8s_tmp'
  2302      New-Item -Force -ItemType 'directory' $tmp_dir | Out-Null
  2303      $url = "${env:AUTH_PROVIDER_GCP_STORAGE_PATH}/${env:AUTH_PROVIDER_GCP_VERSION}/windows_amd64/$filename"
  2304      MustDownload-File -Hash $AUTH_PROVIDER_GCP_HASH_WINDOWS_AMD64 -Algorithm SHA512 -OutFile $tmp_dir\$filename -URLs $url
  2305      Move-Item -Force $tmp_dir\$filename ${env:AUTH_PROVIDER_GCP_WINDOWS_BIN_DIR}
  2306      Remove-Item -Force -Recurse $tmp_dir
  2307    } else {
  2308      Log-Output "Skipping auth provider gcp binaries installation, auth-provider-gcp.exe file already exists."
  2309    }
  2310  }
  2311}
  2312
  2313# Creates config file for the out-of-tree kubelet image credential provider.
  2314function Create-AuthProviderGcpConfig {
  2315  if ("${env:ENABLE_AUTH_PROVIDER_GCP}" -eq "true") {
  2316    if (ShouldWrite-File ${env:AUTH_PROVIDER_GCP_WINDOWS_CONF_FILE}) {
  2317      Log-Output "Creating auth provider gcp config file"
  2318      Set-Content ${env:AUTH_PROVIDER_GCP_WINDOWS_CONF_FILE} @'
  2319kind: CredentialProviderConfig
  2320apiVersion: kubelet.config.k8s.io/v1
  2321providers:
  2322  - name: auth-provider-gcp.exe
  2323    apiVersion: credentialprovider.kubelet.k8s.io/v1
  2324    matchImages:
  2325    - "container.cloud.google.com"
  2326    - "gcr.io"
  2327    - "*.gcr.io"
  2328    - "*.pkg.dev"
  2329    args:
  2330    - get-credentials
  2331    - --v=3
  2332    defaultCacheDuration: 1m
  2333'@
  2334    } else {
  2335      Log-Output "Skipping auth provider gcp config file creation, it already exists"
  2336    }
  2337  }
  2338}
  2339
  2340
  2341# Export all public functions:
  2342Export-ModuleMember -Function *-*

View as plain text