upgrade.sh

Documentation: k8s.io/kubernetes/cluster/gce

     1#!/usr/bin/env bash
     2
     3# Copyright 2015 The Kubernetes Authors.
     4#
     5# Licensed under the Apache License, Version 2.0 (the "License");
     6# you may not use this file except in compliance with the License.
     7# You may obtain a copy of the License at
     8#
     9#     http://www.apache.org/licenses/LICENSE-2.0
    10#
    11# Unless required by applicable law or agreed to in writing, software
    12# distributed under the License is distributed on an "AS IS" BASIS,
    13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14# See the License for the specific language governing permissions and
    15# limitations under the License.
    16
    17# !!!EXPERIMENTAL !!! Upgrade script for GCE. Expect this to get
    18# rewritten in Go in relatively short order, but it allows us to start
    19# testing the concepts.
    20
    21set -o errexit
    22set -o nounset
    23set -o pipefail
    24
    25if [[ "${KUBERNETES_PROVIDER:-gce}" != "gce" ]]; then
    26  echo "!!! ${1} only works on GCE" >&2
    27  exit 1
    28fi
    29
    30KUBE_ROOT=$(dirname "${BASH_SOURCE[0]}")/../..
    31source "${KUBE_ROOT}/hack/lib/util.sh"
    32source "${KUBE_ROOT}/cluster/kube-util.sh"
    33
    34function usage() {
    35  echo "!!! EXPERIMENTAL !!!"
    36  echo "!!! This upgrade script is not meant to be run in production !!!"
    37  echo ""
    38  echo "${0} [-M | -N | -P] [-o] (-l | <version number or publication>)"
    39  echo "  Upgrades master and nodes by default"
    40  echo "  -M:  Upgrade master only"
    41  echo "  -N:  Upgrade nodes only"
    42  echo "  -P:  Node upgrade prerequisites only (create a new instance template)"
    43  echo "  -c:  Upgrade NODE_UPGRADE_PARALLELISM nodes in parallel (default=1) within a single instance group. The MIGs themselves are dealt serially."
    44  echo "  -o:  Use os distro specified in KUBE_NODE_OS_DISTRIBUTION for new nodes. Options include 'debian' or 'gci'"
    45  echo "  -l:  Use local(dev) binaries. This is only supported for master upgrades."
    46  echo ""
    47  echo '  Version number or publication is either a proper version number'
    48  echo '  (e.g. "v1.0.6", "v1.2.0-alpha.1.881+376438b69c7612") or a version'
    49  echo '  publication of the form <bucket>/<version> (e.g. "release/stable",'
    50  echo '  "ci/latest-1").  Some common ones are:'
    51  echo '    - "release/stable"'
    52  echo '    - "release/latest"'
    53  echo '    - "ci/latest"'
    54  echo '  See the docs on getting builds for more information about version publication.'
    55  echo ""
    56  echo "(... Fetching current release versions ...)"
    57  echo ""
    58
    59  # NOTE: IF YOU CHANGE THE FOLLOWING LIST, ALSO UPDATE test/e2e/cluster_upgrade.go
    60  local release_stable
    61  local release_latest
    62  local ci_latest
    63
    64  release_stable=$(curl -sL https://dl.k8s.io/release/stable.txt)
    65  release_latest=$(curl -sL https://dl.k8s.io/release/latest.txt)
    66  ci_latest=$(curl -sL https://dl.k8s.io/ci/latest.txt)
    67
    68  echo "Right now, versions are as follows:"
    69  echo "  release/stable: ${0} ${release_stable}"
    70  echo "  release/latest: ${0} ${release_latest}"
    71  echo "  ci/latest:      ${0} ${ci_latest}"
    72}
    73
    74function print-node-version-info() {
    75  echo "== $1 Node OS and Kubelet Versions =="
    76  "${KUBE_ROOT}/cluster/kubectl.sh" get nodes -o=jsonpath='{range .items[*]}name: "{.metadata.name}", osImage: "{.status.nodeInfo.osImage}", kubeletVersion: "{.status.nodeInfo.kubeletVersion}"{"\n"}{end}'
    77}
    78
    79function upgrade-master() {
    80  local num_masters
    81  num_masters=$(get-master-replicas-count)
    82  if [[ "${num_masters}" -gt 1 ]]; then
    83    echo "Upgrade of master not supported if more than one master replica present. The current number of master replicas: ${num_masters}"
    84    exit 1
    85  fi
    86
    87  echo "== Upgrading master to '${SERVER_BINARY_TAR_URL}'. Do not interrupt, deleting master instance. =="
    88
    89  # Tries to figure out KUBE_USER/KUBE_PASSWORD by first looking under
    90  # kubeconfig:username, and then under kubeconfig:username-basic-auth.
    91  # TODO: KUBE_USER is used in generating ABAC policy which the
    92  # apiserver may not have enabled. If it's enabled, we must have a user
    93  # to generate a valid ABAC policy. If the username changes, should
    94  # the script fail? Should we generate a default username and password
    95  # if the section is missing in kubeconfig? Handle this better in 1.5.
    96  get-kubeconfig-basicauth
    97  get-kubeconfig-bearertoken
    98
    99  detect-master
   100  parse-master-env
   101  upgrade-master-env
   102
   103  # Delete the master instance. Note that the master-pd is created
   104  # with auto-delete=no, so it should not be deleted.
   105  gcloud compute instances delete \
   106    --project "${PROJECT}" \
   107    --quiet \
   108    --zone "${ZONE}" \
   109    "${MASTER_NAME}"
   110
   111  create-master-instance "${MASTER_NAME}-ip"
   112  wait-for-master
   113}
   114
   115function upgrade-master-env() {
   116  echo "== Upgrading master environment variables. =="
   117  # Generate the node problem detector token if it isn't present on the original
   118  # master.
   119 if [[ "${ENABLE_NODE_PROBLEM_DETECTOR:-}" == "standalone" && "${NODE_PROBLEM_DETECTOR_TOKEN:-}" == "" ]]; then
   120    NODE_PROBLEM_DETECTOR_TOKEN=$(dd if=/dev/urandom bs=128 count=1 2>/dev/null | base64 | tr -d "=+/" | dd bs=32 count=1 2>/dev/null)
   121  fi
   122}
   123
   124function wait-for-master() {
   125  echo "== Waiting for new master to respond to API requests =="
   126
   127  local curl_auth_arg
   128  if [[ -n ${KUBE_BEARER_TOKEN:-} ]]; then
   129    curl_auth_arg=(-H "Authorization: Bearer ${KUBE_BEARER_TOKEN}")
   130  elif [[ -n ${KUBE_PASSWORD:-} ]]; then
   131    curl_auth_arg=(--user "${KUBE_USER}:${KUBE_PASSWORD}")
   132  else
   133    echo "can't get auth credentials for the current master"
   134    exit 1
   135  fi
   136
   137  until curl --insecure "${curl_auth_arg[@]}" --max-time 5 \
   138    --fail --output /dev/null --silent "https://${KUBE_MASTER_IP}/healthz"; do
   139    printf "."
   140    sleep 2
   141  done
   142
   143  echo "== Done =="
   144}
   145
   146# Perform common upgrade setup tasks
   147#
   148# Assumed vars
   149#   KUBE_VERSION
   150function prepare-upgrade() {
   151  kube::util::ensure-temp-dir
   152  detect-project
   153  detect-subnetworks
   154  detect-node-names # sets INSTANCE_GROUPS
   155  write-cluster-location
   156  write-cluster-name
   157  tars_from_version
   158}
   159
   160# Reads kube-env metadata from first node in NODE_NAMES.
   161#
   162# Assumed vars:
   163#   NODE_NAMES
   164#   PROJECT
   165#   ZONE
   166function get-node-env() {
   167  # TODO(zmerlynn): Make this more reliable with retries.
   168  gcloud compute --project "${PROJECT}" ssh --zone "${ZONE}" "${NODE_NAMES[0]}" --command \
   169    "curl --fail --silent -H 'Metadata-Flavor: Google' \
   170      'http://metadata/computeMetadata/v1/instance/attributes/kube-env'" 2>/dev/null
   171}
   172
   173# Read os distro information from /os/release on node.
   174# $1: The name of node
   175#
   176# Assumed vars:
   177#   PROJECT
   178#   ZONE
   179function get-node-os() {
   180  gcloud compute ssh "$1" \
   181    --project "${PROJECT}" \
   182    --zone "${ZONE}" \
   183    --command \
   184    "cat /etc/os-release | grep \"^ID=.*\" | cut -c 4-"
   185}
   186
   187# Assumed vars:
   188#   KUBE_VERSION
   189#   NODE_SCOPES
   190#   NODE_INSTANCE_PREFIX
   191#   PROJECT
   192#   ZONE
   193#
   194# Vars set:
   195#   KUBE_PROXY_TOKEN
   196#   NODE_PROBLEM_DETECTOR_TOKEN
   197#   CA_CERT_BASE64
   198#   EXTRA_DOCKER_OPTS
   199#   KUBELET_CERT_BASE64
   200#   KUBELET_KEY_BASE64
   201function upgrade-nodes() {
   202  prepare-node-upgrade
   203  do-node-upgrade
   204}
   205
   206function setup-base-image() {
   207  if [[ "${env_os_distro}" == "false" ]]; then
   208    echo "== Ensuring that new Node base OS image matched the existing Node base OS image"
   209    NODE_OS_DISTRIBUTION=$(get-node-os "${NODE_NAMES[0]}")
   210
   211    if [[ "${NODE_OS_DISTRIBUTION}" == "cos" ]]; then
   212        NODE_OS_DISTRIBUTION="gci"
   213    fi
   214
   215    source "${KUBE_ROOT}/cluster/gce/${NODE_OS_DISTRIBUTION}/node-helper.sh"
   216    # Reset the node image based on current os distro
   217    set-linux-node-image
   218  fi
   219}
   220
   221# prepare-node-upgrade creates a new instance template suitable for upgrading
   222# to KUBE_VERSION and echos a single line with the name of the new template.
   223#
   224# Assumed vars:
   225#   KUBE_VERSION
   226#   NODE_SCOPES
   227#   NODE_INSTANCE_PREFIX
   228#   PROJECT
   229#   ZONE
   230#
   231# Vars set:
   232#   SANITIZED_VERSION
   233#   INSTANCE_GROUPS
   234#   KUBE_PROXY_TOKEN
   235#   NODE_PROBLEM_DETECTOR_TOKEN
   236#   CA_CERT_BASE64
   237#   EXTRA_DOCKER_OPTS
   238#   KUBELET_CERT_BASE64
   239#   KUBELET_KEY_BASE64
   240function prepare-node-upgrade() {
   241  echo "== Preparing node upgrade (to ${KUBE_VERSION}). ==" >&2
   242  setup-base-image
   243
   244  SANITIZED_VERSION="${KUBE_VERSION//[\.\+]/-}"
   245
   246  # TODO(zmerlynn): Refactor setting scope flags.
   247  local scope_flags=
   248  if [ -n "${NODE_SCOPES}" ]; then
   249    scope_flags="--scopes ${NODE_SCOPES}"
   250  else
   251    # shellcheck disable=SC2034 # 'scope_flags' is used by upstream
   252    scope_flags="--no-scopes"
   253  fi
   254
   255  # Get required node env vars from exiting template.
   256  local node_env
   257  node_env=$(get-node-env)
   258  KUBE_PROXY_TOKEN=$(get-env-val "${node_env}" "KUBE_PROXY_TOKEN")
   259  export KUBE_PROXY_TOKEN
   260  NODE_PROBLEM_DETECTOR_TOKEN=$(get-env-val "${node_env}" "NODE_PROBLEM_DETECTOR_TOKEN")
   261  CA_CERT_BASE64=$(get-env-val "${node_env}" "CA_CERT")
   262  export CA_CERT_BASE64
   263  EXTRA_DOCKER_OPTS=$(get-env-val "${node_env}" "EXTRA_DOCKER_OPTS")
   264  export EXTRA_DOCKER_OPTS
   265  KUBELET_CERT_BASE64=$(get-env-val "${node_env}" "KUBELET_CERT")
   266  export KUBELET_CERT_BASE64
   267  KUBELET_KEY_BASE64=$(get-env-val "${node_env}" "KUBELET_KEY")
   268  export KUBELET_KEY_BASE64
   269
   270  upgrade-node-env
   271
   272  # TODO(zmerlynn): How do we ensure kube-env is written in a ${version}-
   273  #                 compatible way?
   274  write-linux-node-env
   275
   276  # TODO(zmerlynn): Get configure-vm script from ${version}. (Must plumb this
   277  #                 through all create-linux-node-instance-template implementations).
   278  local template_name
   279  template_name=$(get-template-name-from-version "${SANITIZED_VERSION}" "${NODE_INSTANCE_PREFIX}")
   280  create-linux-node-instance-template "${template_name}"
   281  # The following is echo'd so that callers can get the template name.
   282  echo "Instance template name: ${template_name}"
   283  echo "== Finished preparing node upgrade (to ${KUBE_VERSION}). ==" >&2
   284}
   285
   286function upgrade-node-env() {
   287  echo "== Upgrading node environment variables. =="
   288  # Get the node problem detector token from master if it isn't present on
   289  # the original node.
   290  if [[ "${ENABLE_NODE_PROBLEM_DETECTOR:-}" == "standalone" && "${NODE_PROBLEM_DETECTOR_TOKEN:-}" == "" ]]; then
   291    detect-master
   292    local master_env
   293    master_env=$(get-master-env)
   294    NODE_PROBLEM_DETECTOR_TOKEN=$(get-env-val "${master_env}" "NODE_PROBLEM_DETECTOR_TOKEN")
   295  fi
   296}
   297
   298# Upgrades a single node.
   299# $1: The name of the node
   300#
   301# Note: This is called multiple times from do-node-upgrade() in parallel, so should be thread-safe.
   302function do-single-node-upgrade() {
   303  local -r instance="$1"
   304  local kubectl_rc
   305  local boot_id
   306  boot_id=$("${KUBE_ROOT}/cluster/kubectl.sh" get node "${instance}" --output=jsonpath='{.status.nodeInfo.bootID}' 2>&1) && kubectl_rc=$? || kubectl_rc=$?
   307  if [[ "${kubectl_rc}" != 0 ]]; then
   308    echo "== FAILED to get bootID ${instance} =="
   309    echo "${boot_id}"
   310    return ${kubectl_rc}
   311  fi
   312
   313  # Drain node
   314  echo "== Draining ${instance}. == " >&2
   315  local drain_rc
   316  "${KUBE_ROOT}/cluster/kubectl.sh" drain --delete-emptydir-data --force --ignore-daemonsets "${instance}" \
   317    && drain_rc=$? || drain_rc=$?
   318  if [[ "${drain_rc}" != 0 ]]; then
   319    echo "== FAILED to drain ${instance} =="
   320    return ${drain_rc}
   321  fi
   322
   323  # Recreate instance
   324  echo "== Recreating instance ${instance}. ==" >&2
   325  local recreate_rc
   326  local recreate
   327  recreate=$(gcloud compute instance-groups managed recreate-instances "${group}" \
   328    --project="${PROJECT}" \
   329    --zone="${ZONE}" \
   330    --instances="${instance}" 2>&1) && recreate_rc=$? || recreate_rc=$?
   331  if [[ "${recreate_rc}" != 0 ]]; then
   332    echo "== FAILED to recreate ${instance} =="
   333    echo "${recreate}"
   334    return ${recreate_rc}
   335  fi
   336
   337  # Wait for node status to reflect a new boot ID. This guarantees us
   338  # that the node status in the API is from a different boot. This
   339  # does not guarantee that the status is from the upgraded node, but
   340  # it is a best effort approximation.
   341  echo "== Waiting for new node to be added to k8s.  ==" >&2
   342  while true; do
   343    local new_boot_id
   344    new_boot_id=$("${KUBE_ROOT}/cluster/kubectl.sh" get node "${instance}" --output=jsonpath='{.status.nodeInfo.bootID}' 2>&1) && kubectl_rc=$? || kubectl_rc=$?
   345    if [[ "${kubectl_rc}" != 0 ]]; then
   346      echo "== FAILED to get node ${instance} =="
   347      echo "${boot_id}"
   348      echo "  (Will retry.)"
   349    elif [[ "${boot_id}" != "${new_boot_id}" ]]; then
   350      echo "Node ${instance} recreated."
   351      break
   352    else
   353      echo -n .
   354    fi
   355    sleep 1
   356  done
   357
   358  # Wait for the node to have Ready=True.
   359  echo "== Waiting for ${instance} to become ready. ==" >&2
   360  while true; do
   361    local ready
   362    ready=$("${KUBE_ROOT}/cluster/kubectl.sh" get node "${instance}" --output='jsonpath={.status.conditions[?(@.type == "Ready")].status}')
   363    if [[ "${ready}" != 'True' ]]; then
   364      echo "Node ${instance} is still not ready: Ready=${ready}"
   365    else
   366      echo "Node ${instance} Ready=${ready}"
   367      break
   368    fi
   369    sleep 1
   370  done
   371
   372  # Uncordon the node.
   373  echo "== Uncordon ${instance}. == " >&2
   374  local uncordon_rc
   375  "${KUBE_ROOT}/cluster/kubectl.sh" uncordon "${instance}" \
   376    && uncordon_rc=$? || uncordon_rc=$?
   377  if [[ "${uncordon_rc}" != 0 ]]; then
   378    echo "== FAILED to uncordon ${instance} =="
   379    return ${uncordon_rc}
   380  fi
   381}
   382
   383# Prereqs:
   384# - prepare-node-upgrade should have been called successfully
   385function do-node-upgrade() {
   386  echo "== Upgrading nodes to ${KUBE_VERSION} with max parallelism of ${node_upgrade_parallelism}. ==" >&2
   387  # Do the actual upgrade.
   388  # NOTE(zmerlynn): If you are changing this gcloud command, update
   389  #                 test/e2e/cluster_upgrade.go to match this EXACTLY.
   390  local template_name
   391  template_name=$(get-template-name-from-version "${SANITIZED_VERSION}" "${NODE_INSTANCE_PREFIX}")
   392  local old_templates=()
   393  for group in "${INSTANCE_GROUPS[@]}"; do
   394    while IFS='' read -r line; do old_templates+=("$line"); done < <(gcloud compute instance-groups managed list \
   395        --project="${PROJECT}" \
   396        --filter="name ~ '${group}' AND zone:(${ZONE})" \
   397        --format='value(instanceTemplate)' || true)
   398    set_instance_template_out=$(gcloud compute instance-groups managed set-instance-template "${group}" \
   399      --template="${template_name}" \
   400      --project="${PROJECT}" \
   401      --zone="${ZONE}" 2>&1) && set_instance_template_rc=$? || set_instance_template_rc=$?
   402    if [[ "${set_instance_template_rc}" != 0 ]]; then
   403      echo "== FAILED to set-instance-template for ${group} to ${template_name} =="
   404      echo "${set_instance_template_out}"
   405      return "${set_instance_template_rc}"
   406    fi
   407    instances=()
   408    while IFS='' read -r line; do instances+=("$line"); done < <(gcloud compute instance-groups managed list-instances "${group}" \
   409        --format='value(name)' \
   410        --project="${PROJECT}" \
   411        --zone="${ZONE}" 2>&1) && list_instances_rc=$? || list_instances_rc=$?
   412    if [[ "${list_instances_rc}" != 0 ]]; then
   413      echo "== FAILED to list instances in group ${group} =="
   414      echo "${instances[@]}"
   415      return "${list_instances_rc}"
   416    fi
   417
   418    process_count_left=${node_upgrade_parallelism}
   419    pids=()
   420    ret_code_sum=0  # Should stay 0 in the loop iff all parallel node upgrades succeed.
   421    for instance in "${instances[@]}"; do
   422      do-single-node-upgrade "${instance}" & pids+=("$!")
   423
   424      # We don't want to run more than ${node_upgrade_parallelism} upgrades at a time,
   425      # so wait once we hit that many nodes. This isn't ideal, since one might take much
   426      # longer than the others, but it should help.
   427      process_count_left=$((process_count_left - 1))
   428      if [[ process_count_left -eq 0 || "${instance}" == "${instances[-1]}" ]]; then
   429        # Wait for each of the parallel node upgrades to finish.
   430        for pid in "${pids[@]}"; do
   431          wait "$pid"
   432          ret_code_sum=$(( ret_code_sum + $? ))
   433        done
   434        # Return even if at least one of the node upgrades failed.
   435        if [[ ${ret_code_sum} != 0 ]]; then
   436          echo "== Some of the ${node_upgrade_parallelism} parallel node upgrades failed. =="
   437          return ${ret_code_sum}
   438        fi
   439        process_count_left=${node_upgrade_parallelism}
   440      fi
   441    done
   442  done
   443
   444  # Remove the old templates.
   445  echo "== Deleting old templates in ${PROJECT}. ==" >&2
   446  for tmpl in "${old_templates[@]}"; do
   447    gcloud compute instance-templates delete \
   448        --quiet \
   449        --project="${PROJECT}" \
   450        "${tmpl}" || true
   451  done
   452
   453  echo "== Finished upgrading nodes to ${KUBE_VERSION}. ==" >&2
   454}
   455
   456
   457function update-coredns-config() {
   458  # Get the current CoreDNS version
   459  local -r coredns_addon_path="/etc/kubernetes/addons/0-dns/coredns"
   460  local -r tmpdir=/tmp
   461  local -r download_dir=$(mktemp --tmpdir=${tmpdir} -d coredns-migration.XXXXXXXXXX) || exit 1
   462
   463  # clean up
   464  cleanup() {
   465    if [ -n "${download_dir:-}" ]; then
   466      rm -rf "${download_dir}"
   467    fi
   468  }
   469  trap cleanup RETURN
   470
   471  # Get the new installed CoreDNS version
   472  echo "== Waiting for CoreDNS to update =="
   473  local -r endtime=$(date -ud "3 minute" +%s)
   474  until [[ $("${KUBE_ROOT}"/cluster/kubectl.sh -n kube-system get deployment coredns -o=jsonpath='{$.metadata.resourceVersion}') -ne ${COREDNS_DEPLOY_RESOURCE_VERSION} ]] || [[ $(date -u +%s) -gt $endtime ]]; do
   475     sleep 1
   476  done
   477
   478  if [[ $("${KUBE_ROOT}"/cluster/kubectl.sh -n kube-system get deployment coredns -o=jsonpath='{$.metadata.resourceVersion}') -ne ${COREDNS_DEPLOY_RESOURCE_VERSION} ]]; then
   479    echo "== CoreDNS ResourceVersion changed =="
   480  fi
   481
   482  echo "== Fetching the latest installed CoreDNS version =="
   483  NEW_COREDNS_VERSION=$("${KUBE_ROOT}"/cluster/kubectl.sh -n kube-system get deployment coredns -o=jsonpath='{$.spec.template.spec.containers[:1].image}' | sed -r 's/.+:v?(.+)/\1/')
   484
   485  case "$(uname -m)" in
   486      x86_64*)
   487        host_arch=amd64
   488        corefile_tool_SHA="686792ec91ad52e0761839845c7e09e02234c959b5c459b2cd358d24474c5c66"
   489        ;;
   490      i?86_64*)
   491        host_arch=amd64
   492        corefile_tool_SHA="686792ec91ad52e0761839845c7e09e02234c959b5c459b2cd358d24474c5c66"
   493        ;;
   494      amd64*)
   495        host_arch=amd64
   496        corefile_tool_SHA="686792ec91ad52e0761839845c7e09e02234c959b5c459b2cd358d24474c5c66"
   497        ;;
   498      aarch64*)
   499        host_arch=arm64
   500        corefile_tool_SHA="a968593d68c5564663f9068efa8c34f1baa7bd263be542a71b0b8d8dd44ad124"
   501        ;;
   502      arm64*)
   503        host_arch=arm64
   504        corefile_tool_SHA="a968593d68c5564663f9068efa8c34f1baa7bd263be542a71b0b8d8dd44ad124"
   505        ;;
   506      arm*)
   507        host_arch=arm
   508        corefile_tool_SHA="721dbfcabda71a2648fd7d4d9241930313397a07d72828b2054315f85b177794"
   509        ;;
   510      s390x*)
   511        host_arch=s390x
   512        corefile_tool_SHA="56452a00a703afd4f816d558f78f279af5f29f1940a478baa694da20f4586698"
   513        ;;
   514      ppc64le*)
   515        host_arch=ppc64le
   516        corefile_tool_SHA="8a5118cb0c998a79ad1d7e4b001af2e23c2cfa83b5489c2823d04ab1c9e33498"
   517        ;;
   518      *)
   519        echo "Unsupported host arch. Must be x86_64, 386, arm, arm64, s390x or ppc64le." >&2
   520        exit 1
   521        ;;
   522    esac
   523
   524  # Download the CoreDNS migration tool
   525  echo "== Downloading the CoreDNS migration tool =="
   526  wget -P "${download_dir}" "https://github.com/coredns/corefile-migration/releases/download/v1.0.17/corefile-tool-${host_arch}" >/dev/null 2>&1
   527
   528  local -r checkSHA=$(sha256sum "${download_dir}/corefile-tool-${host_arch}" | cut -d " " -f 1)
   529  if [[ "${checkSHA}" != "${corefile_tool_SHA}" ]]; then
   530    echo "!!! CheckSum for the CoreDNS migration tool did not match !!!" >&2
   531    exit 1
   532  fi
   533
   534  chmod +x "${download_dir}/corefile-tool-${host_arch}"
   535
   536  # Migrate the CoreDNS ConfigMap depending on whether it is being downgraded or upgraded.
   537  "${KUBE_ROOT}/cluster/kubectl.sh" -n kube-system get cm coredns -o jsonpath='{.data.Corefile}' > "${download_dir}/Corefile-old"
   538
   539  if test "$(printf '%s\n' "${CURRENT_COREDNS_VERSION}" "${NEW_COREDNS_VERSION}" | sort -V | head -n 1)" != "${NEW_COREDNS_VERSION}"; then
   540     echo "== Upgrading the CoreDNS ConfigMap =="
   541     "${download_dir}/corefile-tool-${host_arch}" migrate --from "${CURRENT_COREDNS_VERSION}" --to "${NEW_COREDNS_VERSION}" --corefile "${download_dir}/Corefile-old" > "${download_dir}/Corefile"
   542     "${KUBE_ROOT}/cluster/kubectl.sh" -n kube-system create configmap coredns --from-file "${download_dir}/Corefile" -o yaml --dry-run=client | "${KUBE_ROOT}/cluster/kubectl.sh" apply -f -
   543  else
   544     # In case of a downgrade, a custom CoreDNS Corefile will be overwritten by a default Corefile. In that case,
   545     # the user will need to manually modify the resulting (default) Corefile after the downgrade is complete.
   546     echo "== Applying the latest default CoreDNS configuration =="
   547     gcloud compute --project "${PROJECT}"  scp --zone "${ZONE}" "${MASTER_NAME}:${coredns_addon_path}/coredns.yaml" "${download_dir}/coredns-manifest.yaml" > /dev/null
   548     "${KUBE_ROOT}/cluster/kubectl.sh" apply -f "${download_dir}/coredns-manifest.yaml"
   549  fi
   550
   551  echo "== The CoreDNS Config has been updated =="
   552}
   553
   554echo "Fetching the previously installed CoreDNS version"
   555CURRENT_COREDNS_VERSION=$("${KUBE_ROOT}/cluster/kubectl.sh" -n kube-system get deployment coredns -o=jsonpath='{$.spec.template.spec.containers[:1].image}' | sed -r 's/.+:v?(.+)/\1/')
   556COREDNS_DEPLOY_RESOURCE_VERSION=$("${KUBE_ROOT}/cluster/kubectl.sh" -n kube-system get deployment coredns -o=jsonpath='{$.metadata.resourceVersion}')
   557
   558master_upgrade=true
   559node_upgrade=true
   560node_prereqs=false
   561local_binaries=false
   562env_os_distro=false
   563node_upgrade_parallelism=1
   564
   565while getopts ":MNPlcho" opt; do
   566  case "${opt}" in
   567    M)
   568      node_upgrade=false
   569      ;;
   570    N)
   571      master_upgrade=false
   572      ;;
   573    P)
   574      node_prereqs=true
   575      ;;
   576    l)
   577      local_binaries=true
   578      ;;
   579    c)
   580      node_upgrade_parallelism=${NODE_UPGRADE_PARALLELISM:-1}
   581      ;;
   582    o)
   583      env_os_distro=true
   584      ;;
   585    h)
   586      usage
   587      exit 0
   588      ;;
   589    \?)
   590      echo "Invalid option: -$OPTARG" >&2
   591      usage
   592      exit 1
   593      ;;
   594  esac
   595done
   596shift $((OPTIND-1))
   597
   598if [[ $# -gt 1 ]]; then
   599  echo "Error: Only one parameter (<version number or publication>) may be passed after the set of flags!" >&2
   600  usage
   601  exit 1
   602fi
   603
   604if [[ $# -lt 1 ]] && [[ "${local_binaries}" == "false" ]]; then
   605  usage
   606  exit 1
   607fi
   608
   609if [[ "${master_upgrade}" == "false" ]] && [[ "${node_upgrade}" == "false" ]]; then
   610  echo "Can't specify both -M and -N" >&2
   611  exit 1
   612fi
   613
   614# prompt if etcd storage media type isn't set unless using etcd2 when doing master upgrade
   615if [[ -z "${STORAGE_MEDIA_TYPE:-}" ]] && [[ "${STORAGE_BACKEND:-}" != "etcd2" ]] && [[ "${master_upgrade}" == "true" ]]; then
   616  echo "The default etcd storage media type in 1.6 has changed from application/json to application/vnd.kubernetes.protobuf."
   617  echo "Documentation about the change can be found at https://kubernetes.io/docs/admin/etcd_upgrade."
   618  echo ""
   619  echo "ETCD2 DOES NOT SUPPORT PROTOBUF: If you wish to have to ability to downgrade to etcd2 later application/json must be used."
   620  echo ""
   621  echo "It's HIGHLY recommended that etcd be backed up before this step!!"
   622  echo ""
   623  echo "To enable using json, before running this script set:"
   624  echo "export STORAGE_MEDIA_TYPE=application/json"
   625  echo ""
   626  if [ -t 0 ] && [ -t 1 ]; then
   627    read -r -p "Would you like to continue with the new default, and lose the ability to downgrade to etcd2? [y/N] " confirm
   628    if [[ "${confirm}" != "y" ]]; then
   629      exit 1
   630    fi
   631  else
   632    echo "To enable using protobuf, before running this script set:"
   633    echo "export STORAGE_MEDIA_TYPE=application/vnd.kubernetes.protobuf"
   634    echo ""
   635    echo "STORAGE_MEDIA_TYPE must be specified when run non-interactively." >&2
   636    exit 1
   637  fi
   638fi
   639
   640# Prompt if etcd image/version is unspecified when doing master upgrade.
   641# In e2e tests, we use TEST_ALLOW_IMPLICIT_ETCD_UPGRADE=true to skip this
   642# prompt, simulating the behavior when the user confirms interactively.
   643# All other automated use of this script should explicitly specify a version.
   644if [[ "${master_upgrade}" == "true" ]]; then
   645  if [[ -z "${ETCD_IMAGE:-}" && -z "${TEST_ETCD_IMAGE:-}" ]] || [[ -z "${ETCD_VERSION:-}" && -z "${TEST_ETCD_VERSION:-}" ]]; then
   646    echo
   647    echo "***WARNING***"
   648    echo "Upgrading Kubernetes with this script might result in an upgrade to a new etcd version."
   649    echo "Some etcd version upgrades, such as 3.0.x to 3.1.x, DO NOT offer a downgrade path."
   650    echo "To pin the etcd version to your current one (e.g. v3.0.17), set the following variables"
   651    echo "before running this script:"
   652    echo
   653    echo "# example: pin to etcd v3.0.17"
   654    echo "export ETCD_IMAGE=3.0.17"
   655    echo "export ETCD_VERSION=3.0.17"
   656    echo
   657    echo "Alternatively, if you choose to allow an etcd upgrade that doesn't support downgrade,"
   658    echo "you might still be able to downgrade Kubernetes by pinning to the newer etcd version."
   659    echo "In all cases, it is strongly recommended to have an etcd backup before upgrading."
   660    echo
   661    if [ -t 0 ] && [ -t 1 ]; then
   662      read -r -p "Continue with default etcd version, which might upgrade etcd? [y/N] " confirm
   663      if [[ "${confirm}" != "y" ]]; then
   664        exit 1
   665      fi
   666    elif [[ "${TEST_ALLOW_IMPLICIT_ETCD_UPGRADE:-}" != "true" ]]; then
   667      echo "ETCD_IMAGE and ETCD_VERSION must be specified when run non-interactively." >&2
   668      exit 1
   669    fi
   670  fi
   671fi
   672
   673print-node-version-info "Pre-Upgrade"
   674
   675if [[ "${local_binaries}" == "false" ]]; then
   676  set_binary_version "${1}"
   677fi
   678
   679prepare-upgrade
   680
   681if [[ "${node_prereqs}" == "true" ]]; then
   682  prepare-node-upgrade
   683  exit 0
   684fi
   685
   686if [[ "${master_upgrade}" == "true" ]]; then
   687  upgrade-master
   688fi
   689
   690if [[ "${node_upgrade}" == "true" ]]; then
   691  if [[ "${local_binaries}" == "true" ]]; then
   692    echo "Upgrading nodes to local binaries is not yet supported." >&2
   693    exit 1
   694  else
   695    upgrade-nodes
   696  fi
   697fi
   698
   699if [[ "${CLUSTER_DNS_CORE_DNS:-}" == "true" ]]; then
   700  update-coredns-config
   701fi
   702
   703echo "== Validating cluster post-upgrade =="
   704"${KUBE_ROOT}/cluster/validate-cluster.sh"
   705
   706print-node-version-info "Post-Upgrade"
View as plain text