...

Source file src/k8s.io/kubernetes/test/e2e/cloud/gcp/ha_master.go

Documentation: k8s.io/kubernetes/test/e2e/cloud/gcp

     1  /*
     2  Copyright 2015 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package gcp
    18  
    19  import (
    20  	"context"
    21  	"fmt"
    22  	"os/exec"
    23  	"path"
    24  	"regexp"
    25  	"strconv"
    26  	"strings"
    27  	"time"
    28  
    29  	"github.com/onsi/ginkgo/v2"
    30  	v1 "k8s.io/api/core/v1"
    31  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    32  	clientset "k8s.io/client-go/kubernetes"
    33  	"k8s.io/kubernetes/test/e2e/common"
    34  	"k8s.io/kubernetes/test/e2e/feature"
    35  	"k8s.io/kubernetes/test/e2e/framework"
    36  	e2enode "k8s.io/kubernetes/test/e2e/framework/node"
    37  	e2epod "k8s.io/kubernetes/test/e2e/framework/pod"
    38  	e2eskipper "k8s.io/kubernetes/test/e2e/framework/skipper"
    39  	admissionapi "k8s.io/pod-security-admission/api"
    40  )
    41  
    42  func addMasterReplica(zone string) error {
    43  	framework.Logf(fmt.Sprintf("Adding a new master replica, zone: %s", zone))
    44  	_, _, err := framework.RunCmd(path.Join(framework.TestContext.RepoRoot, "hack/e2e-internal/e2e-grow-cluster.sh"), zone, "true", "true", "false")
    45  	if err != nil {
    46  		return err
    47  	}
    48  	return nil
    49  }
    50  
    51  func removeMasterReplica(zone string) error {
    52  	framework.Logf(fmt.Sprintf("Removing an existing master replica, zone: %s", zone))
    53  	_, _, err := framework.RunCmd(path.Join(framework.TestContext.RepoRoot, "hack/e2e-internal/e2e-shrink-cluster.sh"), zone, "true", "false", "false")
    54  	if err != nil {
    55  		return err
    56  	}
    57  	return nil
    58  }
    59  
    60  func addWorkerNodes(zone string) error {
    61  	framework.Logf(fmt.Sprintf("Adding worker nodes, zone: %s", zone))
    62  	_, _, err := framework.RunCmd(path.Join(framework.TestContext.RepoRoot, "hack/e2e-internal/e2e-grow-cluster.sh"), zone, "true", "false", "true")
    63  	if err != nil {
    64  		return err
    65  	}
    66  	return nil
    67  }
    68  
    69  func removeWorkerNodes(zone string) error {
    70  	framework.Logf(fmt.Sprintf("Removing worker nodes, zone: %s", zone))
    71  	_, _, err := framework.RunCmd(path.Join(framework.TestContext.RepoRoot, "hack/e2e-internal/e2e-shrink-cluster.sh"), zone, "true", "true", "true")
    72  	if err != nil {
    73  		return err
    74  	}
    75  	return nil
    76  }
    77  
    78  func verifyRCs(ctx context.Context, c clientset.Interface, ns string, names []string) {
    79  	for _, name := range names {
    80  		framework.ExpectNoError(e2epod.VerifyPods(ctx, c, ns, name, true, 1))
    81  	}
    82  }
    83  
    84  func createNewRC(c clientset.Interface, ns string, name string) {
    85  	_, err := common.NewRCByName(c, ns, name, 1, nil, nil)
    86  	framework.ExpectNoError(err)
    87  }
    88  
    89  func findRegionForZone(zone string) string {
    90  	region, err := exec.Command("gcloud", "compute", "zones", "list", zone, "--quiet", "--format=csv[no-heading](region)").Output()
    91  	framework.ExpectNoError(err)
    92  	if string(region) == "" {
    93  		framework.Failf("Region not found; zone: %s", zone)
    94  	}
    95  	return string(region)
    96  }
    97  
    98  func findZonesForRegion(region string) []string {
    99  	output, err := exec.Command("gcloud", "compute", "zones", "list", "--filter=region="+region,
   100  		"--quiet", "--format=csv[no-heading](name)").Output()
   101  	framework.ExpectNoError(err)
   102  	zones := strings.Split(string(output), "\n")
   103  	return zones
   104  }
   105  
   106  // removeZoneFromZones removes zone from zones slide.
   107  // Please note that entries in zones can be repeated. In such situation only one replica is removed.
   108  func removeZoneFromZones(zones []string, zone string) []string {
   109  	idx := -1
   110  	for j, z := range zones {
   111  		if z == zone {
   112  			idx = j
   113  			break
   114  		}
   115  	}
   116  	if idx >= 0 {
   117  		return zones[:idx+copy(zones[idx:], zones[idx+1:])]
   118  	}
   119  	return zones
   120  }
   121  
   122  // generateMasterRegexp returns a regex for matching master node name.
   123  func generateMasterRegexp(prefix string) string {
   124  	return prefix + "(-...)?"
   125  }
   126  
   127  // waitForMasters waits until the cluster has the desired number of ready masters in it.
   128  func waitForMasters(ctx context.Context, masterPrefix string, c clientset.Interface, size int, timeout time.Duration) error {
   129  	for start := time.Now(); time.Since(start) < timeout; time.Sleep(20 * time.Second) {
   130  		nodes, err := c.CoreV1().Nodes().List(ctx, metav1.ListOptions{})
   131  		if err != nil {
   132  			framework.Logf("Failed to list nodes: %v", err)
   133  			continue
   134  		}
   135  
   136  		// Filter out nodes that are not master replicas
   137  		e2enode.Filter(nodes, func(node v1.Node) bool {
   138  			res, err := regexp.Match(generateMasterRegexp(masterPrefix), ([]byte)(node.Name))
   139  			if err != nil {
   140  				framework.Logf("Failed to match regexp to node name: %v", err)
   141  				return false
   142  			}
   143  			return res
   144  		})
   145  
   146  		numNodes := len(nodes.Items)
   147  
   148  		// Filter out not-ready nodes.
   149  		e2enode.Filter(nodes, func(node v1.Node) bool {
   150  			return e2enode.IsConditionSetAsExpected(&node, v1.NodeReady, true)
   151  		})
   152  
   153  		numReady := len(nodes.Items)
   154  
   155  		if numNodes == size && numReady == size {
   156  			framework.Logf("Cluster has reached the desired number of masters %d", size)
   157  			return nil
   158  		}
   159  		framework.Logf("Waiting for the number of masters %d, current %d, not ready master nodes %d", size, numNodes, numNodes-numReady)
   160  	}
   161  	return fmt.Errorf("timeout waiting %v for the number of masters to be %d", timeout, size)
   162  }
   163  
   164  var _ = SIGDescribe("HA-master", feature.HAMaster, func() {
   165  	f := framework.NewDefaultFramework("ha-master")
   166  	f.NamespacePodSecurityLevel = admissionapi.LevelPrivileged
   167  	var c clientset.Interface
   168  	var ns string
   169  	var additionalReplicaZones []string
   170  	var additionalNodesZones []string
   171  	var existingRCs []string
   172  
   173  	ginkgo.BeforeEach(func(ctx context.Context) {
   174  		e2eskipper.SkipUnlessProviderIs("gce")
   175  		c = f.ClientSet
   176  		ns = f.Namespace.Name
   177  		framework.ExpectNoError(waitForMasters(ctx, framework.TestContext.CloudConfig.MasterName, c, 1, 10*time.Minute))
   178  		additionalReplicaZones = make([]string, 0)
   179  		existingRCs = make([]string, 0)
   180  	})
   181  
   182  	ginkgo.AfterEach(func(ctx context.Context) {
   183  		// Clean-up additional worker nodes if the test execution was broken.
   184  		for _, zone := range additionalNodesZones {
   185  			removeWorkerNodes(zone)
   186  		}
   187  		framework.ExpectNoError(e2enode.AllNodesReady(ctx, c, 5*time.Minute))
   188  
   189  		// Clean-up additional master replicas if the test execution was broken.
   190  		for _, zone := range additionalReplicaZones {
   191  			removeMasterReplica(zone)
   192  		}
   193  		framework.ExpectNoError(waitForMasters(ctx, framework.TestContext.CloudConfig.MasterName, c, 1, 10*time.Minute))
   194  	})
   195  
   196  	type Action int
   197  	const (
   198  		None Action = iota
   199  		AddReplica
   200  		RemoveReplica
   201  		AddNodes
   202  		RemoveNodes
   203  	)
   204  
   205  	step := func(ctx context.Context, action Action, zone string) {
   206  		switch action {
   207  		case None:
   208  		case AddReplica:
   209  			framework.ExpectNoError(addMasterReplica(zone))
   210  			additionalReplicaZones = append(additionalReplicaZones, zone)
   211  		case RemoveReplica:
   212  			framework.ExpectNoError(removeMasterReplica(zone))
   213  			additionalReplicaZones = removeZoneFromZones(additionalReplicaZones, zone)
   214  		case AddNodes:
   215  			framework.ExpectNoError(addWorkerNodes(zone))
   216  			additionalNodesZones = append(additionalNodesZones, zone)
   217  		case RemoveNodes:
   218  			framework.ExpectNoError(removeWorkerNodes(zone))
   219  			additionalNodesZones = removeZoneFromZones(additionalNodesZones, zone)
   220  		}
   221  		framework.ExpectNoError(waitForMasters(ctx, framework.TestContext.CloudConfig.MasterName, c, len(additionalReplicaZones)+1, 10*time.Minute))
   222  		framework.ExpectNoError(e2enode.AllNodesReady(ctx, c, 5*time.Minute))
   223  
   224  		// Verify that API server works correctly with HA master.
   225  		rcName := "ha-master-" + strconv.Itoa(len(existingRCs))
   226  		createNewRC(c, ns, rcName)
   227  		existingRCs = append(existingRCs, rcName)
   228  		verifyRCs(ctx, c, ns, existingRCs)
   229  	}
   230  
   231  	f.It("survive addition/removal replicas same zone", f.WithSerial(), f.WithDisruptive(), func(ctx context.Context) {
   232  		zone := framework.TestContext.CloudConfig.Zone
   233  		step(ctx, None, "")
   234  		numAdditionalReplicas := 2
   235  		for i := 0; i < numAdditionalReplicas; i++ {
   236  			step(ctx, AddReplica, zone)
   237  		}
   238  		for i := 0; i < numAdditionalReplicas; i++ {
   239  			step(ctx, RemoveReplica, zone)
   240  		}
   241  	})
   242  
   243  	f.It("survive addition/removal replicas different zones", f.WithSerial(), f.WithDisruptive(), func(ctx context.Context) {
   244  		zone := framework.TestContext.CloudConfig.Zone
   245  		region := findRegionForZone(zone)
   246  		zones := findZonesForRegion(region)
   247  		zones = removeZoneFromZones(zones, zone)
   248  
   249  		step(ctx, None, "")
   250  		// If numAdditionalReplicas is larger then the number of remaining zones in the region,
   251  		// we create a few masters in the same zone and zone entry is repeated in additionalReplicaZones.
   252  		numAdditionalReplicas := 2
   253  		for i := 0; i < numAdditionalReplicas; i++ {
   254  			step(ctx, AddReplica, zones[i%len(zones)])
   255  		}
   256  		for i := 0; i < numAdditionalReplicas; i++ {
   257  			step(ctx, RemoveReplica, zones[i%len(zones)])
   258  		}
   259  	})
   260  
   261  	f.It("survive addition/removal replicas multizone workers", f.WithSerial(), f.WithDisruptive(), func(ctx context.Context) {
   262  		zone := framework.TestContext.CloudConfig.Zone
   263  		region := findRegionForZone(zone)
   264  		zones := findZonesForRegion(region)
   265  		zones = removeZoneFromZones(zones, zone)
   266  
   267  		step(ctx, None, "")
   268  		numAdditionalReplicas := 2
   269  
   270  		// Add worker nodes.
   271  		for i := 0; i < numAdditionalReplicas && i < len(zones); i++ {
   272  			step(ctx, AddNodes, zones[i])
   273  		}
   274  
   275  		// Add master repilcas.
   276  		//
   277  		// If numAdditionalReplicas is larger then the number of remaining zones in the region,
   278  		// we create a few masters in the same zone and zone entry is repeated in additionalReplicaZones.
   279  		for i := 0; i < numAdditionalReplicas; i++ {
   280  			step(ctx, AddReplica, zones[i%len(zones)])
   281  		}
   282  
   283  		// Remove master repilcas.
   284  		for i := 0; i < numAdditionalReplicas; i++ {
   285  			step(ctx, RemoveReplica, zones[i%len(zones)])
   286  		}
   287  
   288  		// Remove worker nodes.
   289  		for i := 0; i < numAdditionalReplicas && i < len(zones); i++ {
   290  			step(ctx, RemoveNodes, zones[i])
   291  		}
   292  	})
   293  })
   294  

View as plain text