...

Source file src/k8s.io/kubernetes/test/e2e_node/e2e_node_suite_test.go

Documentation: k8s.io/kubernetes/test/e2e_node

     1  //go:build linux
     2  // +build linux
     3  
     4  /*
     5  Copyright 2016 The Kubernetes Authors.
     6  
     7  Licensed under the Apache License, Version 2.0 (the "License");
     8  you may not use this file except in compliance with the License.
     9  You may obtain a copy of the License at
    10  
    11      http://www.apache.org/licenses/LICENSE-2.0
    12  
    13  Unless required by applicable law or agreed to in writing, software
    14  distributed under the License is distributed on an "AS IS" BASIS,
    15  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    16  See the License for the specific language governing permissions and
    17  limitations under the License.
    18  */
    19  
    20  // To run tests in this suite
    21  // NOTE: This test suite requires password-less sudo capabilities to run the kubelet and kube-apiserver.
    22  package e2enode
    23  
    24  import (
    25  	"bytes"
    26  	"context"
    27  	"encoding/json"
    28  	"flag"
    29  	"fmt"
    30  
    31  	"math/rand"
    32  	"os"
    33  	"os/exec"
    34  	"syscall"
    35  	"testing"
    36  	"time"
    37  
    38  	v1 "k8s.io/api/core/v1"
    39  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    40  	utilyaml "k8s.io/apimachinery/pkg/util/yaml"
    41  	utilfeature "k8s.io/apiserver/pkg/util/feature"
    42  	clientset "k8s.io/client-go/kubernetes"
    43  	cliflag "k8s.io/component-base/cli/flag"
    44  	"k8s.io/kubernetes/pkg/util/rlimit"
    45  	commontest "k8s.io/kubernetes/test/e2e/common"
    46  	"k8s.io/kubernetes/test/e2e/framework"
    47  	e2econfig "k8s.io/kubernetes/test/e2e/framework/config"
    48  	e2eskipper "k8s.io/kubernetes/test/e2e/framework/skipper"
    49  	e2etestfiles "k8s.io/kubernetes/test/e2e/framework/testfiles"
    50  	e2etestingmanifests "k8s.io/kubernetes/test/e2e/testing-manifests"
    51  	"k8s.io/kubernetes/test/e2e_node/services"
    52  	e2enodetestingmanifests "k8s.io/kubernetes/test/e2e_node/testing-manifests"
    53  	system "k8s.io/system-validators/validators"
    54  
    55  	// define and freeze constants
    56  	_ "k8s.io/kubernetes/test/e2e/feature"
    57  	_ "k8s.io/kubernetes/test/e2e/nodefeature"
    58  
    59  	// reconfigure framework
    60  	_ "k8s.io/kubernetes/test/e2e/framework/debug/init"
    61  	_ "k8s.io/kubernetes/test/e2e/framework/metrics/init"
    62  	_ "k8s.io/kubernetes/test/e2e/framework/node/init"
    63  	_ "k8s.io/kubernetes/test/utils/format"
    64  
    65  	"github.com/onsi/ginkgo/v2"
    66  	"github.com/onsi/gomega"
    67  	"github.com/spf13/pflag"
    68  	"k8s.io/klog/v2"
    69  )
    70  
    71  var (
    72  	e2es *services.E2EServices
    73  	// featureGates is a map of feature names to bools that enable or disable alpha/experimental features.
    74  	featureGates map[string]bool
    75  	// serviceFeatureGates is a map of feature names to bools that enable or
    76  	// disable alpha/experimental features for API service.
    77  	serviceFeatureGates map[string]bool
    78  
    79  	// TODO(random-liu): Change the following modes to sub-command.
    80  	runServicesMode    = flag.Bool("run-services-mode", false, "If true, only run services (etcd, apiserver) in current process, and not run test.")
    81  	runKubeletMode     = flag.Bool("run-kubelet-mode", false, "If true, only start kubelet, and not run test.")
    82  	systemValidateMode = flag.Bool("system-validate-mode", false, "If true, only run system validation in current process, and not run test.")
    83  	systemSpecFile     = flag.String("system-spec-file", "", "The name of the system spec file that will be used for node conformance test. If it's unspecified or empty, the default system spec (system.DefaultSysSpec) will be used.")
    84  )
    85  
    86  // registerNodeFlags registers flags specific to the node e2e test suite.
    87  func registerNodeFlags(flags *flag.FlagSet) {
    88  	// Mark the test as node e2e when node flags are api.Registry.
    89  	framework.TestContext.NodeE2E = true
    90  	flags.StringVar(&framework.TestContext.BearerToken, "bearer-token", "", "The bearer token to authenticate with. If not specified, it would be a random token. Currently this token is only used in node e2e tests.")
    91  	flags.StringVar(&framework.TestContext.NodeName, "node-name", "", "Name of the node to run tests on.")
    92  	flags.StringVar(&framework.TestContext.KubeletConfigDropinDir, "config-dir", "", "Path to a directory containing drop-in configurations for the kubelet.")
    93  	// TODO(random-liu): Move kubelet start logic out of the test.
    94  	// TODO(random-liu): Move log fetch logic out of the test.
    95  	// There are different ways to start kubelet (systemd, initd, docker, manually started etc.)
    96  	// and manage logs (journald, upstart etc.).
    97  	// For different situation we need to mount different things into the container, run different commands.
    98  	// It is hard and unnecessary to deal with the complexity inside the test suite.
    99  	flags.BoolVar(&framework.TestContext.NodeConformance, "conformance", false, "If true, the test suite will not start kubelet, and fetch system log (kernel, docker, kubelet log etc.) to the report directory.")
   100  	flags.BoolVar(&framework.TestContext.PrepullImages, "prepull-images", true, "If true, prepull images so image pull failures do not cause test failures.")
   101  	flags.BoolVar(&framework.TestContext.RestartKubelet, "restart-kubelet", false, "If true, restart Kubelet unit when the process is killed.")
   102  	flags.StringVar(&framework.TestContext.ImageDescription, "image-description", "", "The description of the image which the test will be running on.")
   103  	flags.StringVar(&framework.TestContext.SystemSpecName, "system-spec-name", "", "The name of the system spec (e.g., gke) that's used in the node e2e test. The system specs are in test/e2e_node/system/specs/. This is used by the test framework to determine which tests to run for validating the system requirements.")
   104  	flags.Var(cliflag.NewMapStringString(&framework.TestContext.ExtraEnvs), "extra-envs", "The extra environment variables needed for node e2e tests. Format: a list of key=value pairs, e.g., env1=val1,env2=val2")
   105  	flags.StringVar(&framework.TestContext.SriovdpConfigMapFile, "sriovdp-configmap-file", "", "The name of the SRIOV device plugin Config Map to load.")
   106  	flag.StringVar(&framework.TestContext.ClusterDNSDomain, "dns-domain", "", "The DNS Domain of the cluster.")
   107  	flag.Var(cliflag.NewMapStringString(&framework.TestContext.RuntimeConfig), "runtime-config", "The runtime configuration used on node e2e tests.")
   108  	flags.BoolVar(&framework.TestContext.RequireDevices, "require-devices", false, "If true, require device plugins to be installed in the running environment.")
   109  	flags.Var(cliflag.NewMapStringBool(&featureGates), "feature-gates", "A set of key=value pairs that describe feature gates for alpha/experimental features.")
   110  	flags.Var(cliflag.NewMapStringBool(&serviceFeatureGates), "service-feature-gates", "A set of key=value pairs that describe feature gates for alpha/experimental features for API service.")
   111  	flags.BoolVar(&framework.TestContext.StandaloneMode, "standalone-mode", false, "If true, starts kubelet in standalone mode.")
   112  }
   113  
   114  func init() {
   115  	// Enable embedded FS file lookup as fallback
   116  	e2etestfiles.AddFileSource(e2etestingmanifests.GetE2ETestingManifestsFS())
   117  	e2etestfiles.AddFileSource(e2enodetestingmanifests.GetE2ENodeTestingManifestsFS())
   118  }
   119  
   120  func TestMain(m *testing.M) {
   121  	// Copy go flags in TestMain, to ensure go test flags are registered (no longer available in init() as of go1.13)
   122  	e2econfig.CopyFlags(e2econfig.Flags, flag.CommandLine)
   123  	framework.RegisterCommonFlags(flag.CommandLine)
   124  	registerNodeFlags(flag.CommandLine)
   125  	pflag.CommandLine.AddGoFlagSet(flag.CommandLine)
   126  	// Mark the run-services-mode flag as hidden to prevent user from using it.
   127  	pflag.CommandLine.MarkHidden("run-services-mode")
   128  	// It's weird that if I directly use pflag in TestContext, it will report error.
   129  	// It seems that someone is using flag.Parse() after init() and TestMain().
   130  	// TODO(random-liu): Find who is using flag.Parse() and cause errors and move the following logic
   131  	// into TestContext.
   132  	// TODO(pohly): remove RegisterNodeFlags from test_context.go enable Viper config support here?
   133  
   134  	rand.Seed(time.Now().UnixNano())
   135  	pflag.Parse()
   136  	if pflag.CommandLine.NArg() > 0 {
   137  		fmt.Fprintf(os.Stderr, "unknown additional command line arguments: %s", pflag.CommandLine.Args())
   138  		os.Exit(1)
   139  	}
   140  	framework.AfterReadingAllFlags(&framework.TestContext)
   141  	if err := e2eskipper.InitFeatureGates(utilfeature.DefaultFeatureGate, featureGates); err != nil {
   142  		fmt.Fprintf(os.Stderr, "ERROR: initialize feature gates: %v", err)
   143  		os.Exit(1)
   144  	}
   145  
   146  	if err := services.SetFeatureGatesForInProcessComponents(serviceFeatureGates); err != nil {
   147  		fmt.Fprintf(os.Stderr, "ERROR: initialize process feature gates for API service: %v", err)
   148  		os.Exit(1)
   149  	}
   150  
   151  	setExtraEnvs()
   152  	os.Exit(m.Run())
   153  }
   154  
   155  // When running the containerized conformance test, we'll mount the
   156  // host root filesystem as readonly to /rootfs.
   157  const rootfs = "/rootfs"
   158  
   159  func TestE2eNode(t *testing.T) {
   160  	// Make sure we are not limited by sshd when it comes to open files
   161  	if err := rlimit.SetNumFiles(1000000); err != nil {
   162  		klog.Infof("failed to set rlimit on max file handles: %v", err)
   163  	}
   164  
   165  	if *runServicesMode {
   166  		// If run-services-mode is specified, only run services in current process.
   167  		services.RunE2EServices(t)
   168  		return
   169  	}
   170  	if *runKubeletMode {
   171  		// If run-kubelet-mode is specified, only start kubelet.
   172  		services.RunKubelet(featureGates)
   173  		return
   174  	}
   175  	if *systemValidateMode {
   176  		// If system-validate-mode is specified, only run system validation in current process.
   177  		spec := &system.DefaultSysSpec
   178  		if *systemSpecFile != "" {
   179  			var err error
   180  			spec, err = loadSystemSpecFromFile(*systemSpecFile)
   181  			if err != nil {
   182  				klog.Exitf("Failed to load system spec: %v", err)
   183  			}
   184  		}
   185  		if framework.TestContext.NodeConformance {
   186  			// Chroot to /rootfs to make system validation can check system
   187  			// as in the root filesystem.
   188  			// TODO(random-liu): Consider to chroot the whole test process to make writing
   189  			// test easier.
   190  			if err := syscall.Chroot(rootfs); err != nil {
   191  				klog.Exitf("chroot %q failed: %v", rootfs, err)
   192  			}
   193  		}
   194  		if _, err := system.ValidateSpec(*spec, "remote"); len(err) != 0 {
   195  			klog.Exitf("system validation failed: %v", err)
   196  		}
   197  		return
   198  	}
   199  
   200  	// We're not running in a special mode so lets run tests.
   201  	gomega.RegisterFailHandler(ginkgo.Fail)
   202  	// Initialize the KubeletConfigDropinDir again if the test doesn't run in run-kubelet-mode.
   203  	if framework.TestContext.KubeletConfigDropinDir == "" {
   204  		var err error
   205  		framework.TestContext.KubeletConfigDropinDir, err = services.KubeletConfigDirCWDDir()
   206  		if err != nil {
   207  			klog.Errorf("failed to create kubelet config directory: %v", err)
   208  		}
   209  	}
   210  	reportDir := framework.TestContext.ReportDir
   211  	if reportDir != "" {
   212  		// Create the directory if it doesn't already exist
   213  		// NOTE: junit report can be simply created by executing your tests with the new --junit-report flags instead.
   214  		if err := os.MkdirAll(reportDir, 0755); err != nil {
   215  			klog.Errorf("Failed creating report directory: %v", err)
   216  		}
   217  	}
   218  	suiteConfig, reporterConfig := framework.CreateGinkgoConfig()
   219  	ginkgo.RunSpecs(t, "E2eNode Suite", suiteConfig, reporterConfig)
   220  }
   221  
   222  // Setup the kubelet on the node
   223  var _ = ginkgo.SynchronizedBeforeSuite(func(ctx context.Context) []byte {
   224  	// Run system validation test.
   225  	gomega.Expect(validateSystem()).To(gomega.Succeed(), "system validation")
   226  
   227  	// Pre-pull the images tests depend on so we can fail immediately if there is an image pull issue
   228  	// This helps with debugging test flakes since it is hard to tell when a test failure is due to image pulling.
   229  	if framework.TestContext.PrepullImages {
   230  		klog.Infof("Pre-pulling images so that they are cached for the tests.")
   231  		updateImageAllowList(ctx)
   232  		err := PrePullAllImages()
   233  		gomega.Expect(err).ShouldNot(gomega.HaveOccurred())
   234  	}
   235  
   236  	// TODO(yifan): Temporary workaround to disable coreos from auto restart
   237  	// by masking the locksmithd.
   238  	// We should mask locksmithd when provisioning the machine.
   239  	maskLocksmithdOnCoreos()
   240  
   241  	if *startServices {
   242  		// If the services are expected to stop after test, they should monitor the test process.
   243  		// If the services are expected to keep running after test, they should not monitor the test process.
   244  		e2es = services.NewE2EServices(*stopServices)
   245  		gomega.Expect(e2es.Start(featureGates)).To(gomega.Succeed(), "should be able to start node services.")
   246  	} else {
   247  		klog.Infof("Running tests without starting services.")
   248  	}
   249  
   250  	if !framework.TestContext.StandaloneMode {
   251  		klog.Infof("Wait for the node to be ready")
   252  		waitForNodeReady(ctx)
   253  	}
   254  
   255  	// Reference common test to make the import valid.
   256  	commontest.CurrentSuite = commontest.NodeE2E
   257  
   258  	// ginkgo would spawn multiple processes to run tests.
   259  	// Since the bearer token is generated randomly at run time,
   260  	// we need to distribute the bearer token to other processes to make them use the same token.
   261  	return []byte(framework.TestContext.BearerToken)
   262  }, func(ctx context.Context, token []byte) {
   263  	framework.TestContext.BearerToken = string(token)
   264  	// update test context with node configuration.
   265  	gomega.Expect(updateTestContext(ctx)).To(gomega.Succeed(), "update test context with node config.")
   266  
   267  	// Store current Kubelet configuration in the package variable
   268  	// This assumes all tests which dynamically change kubelet configuration
   269  	// must: 1) run in serial; 2) restore kubelet configuration after test.
   270  	var err error
   271  	kubeletCfg, err = getCurrentKubeletConfig(ctx)
   272  	framework.ExpectNoError(err)
   273  })
   274  
   275  // Tear down the kubelet on the node
   276  var _ = ginkgo.SynchronizedAfterSuite(func() {}, func() {
   277  	if e2es != nil {
   278  		if *startServices && *stopServices {
   279  			klog.Infof("Stopping node services...")
   280  			e2es.Stop()
   281  		}
   282  	}
   283  
   284  	klog.Infof("Tests Finished")
   285  })
   286  
   287  // validateSystem runs system validation in a separate process and returns error if validation fails.
   288  func validateSystem() error {
   289  	testBin, err := os.Executable()
   290  	if err != nil {
   291  		return fmt.Errorf("can't get current binary: %w", err)
   292  	}
   293  	// Pass all flags into the child process, so that it will see the same flag set.
   294  	output, err := exec.Command(testBin, append([]string{"--system-validate-mode"}, os.Args[1:]...)...).CombinedOutput()
   295  	// The output of system validation should have been formatted, directly print here.
   296  	fmt.Print(string(output))
   297  	if err != nil {
   298  		return fmt.Errorf("system validation failed: %w", err)
   299  	}
   300  	return nil
   301  }
   302  
   303  func maskLocksmithdOnCoreos() {
   304  	data, err := os.ReadFile("/etc/os-release")
   305  	if err != nil {
   306  		// Not all distros contain this file.
   307  		klog.Infof("Could not read /etc/os-release: %v", err)
   308  		return
   309  	}
   310  	if bytes.Contains(data, []byte("ID=coreos")) {
   311  		output, err := exec.Command("systemctl", "mask", "--now", "locksmithd").CombinedOutput()
   312  		framework.ExpectNoError(err, fmt.Sprintf("should be able to mask locksmithd - output: %q", string(output)))
   313  		klog.Infof("Locksmithd is masked successfully")
   314  	}
   315  }
   316  
   317  func waitForNodeReady(ctx context.Context) {
   318  	const (
   319  		// nodeReadyTimeout is the time to wait for node to become ready.
   320  		nodeReadyTimeout = 2 * time.Minute
   321  		// nodeReadyPollInterval is the interval to check node ready.
   322  		nodeReadyPollInterval = 1 * time.Second
   323  	)
   324  	client, err := getAPIServerClient()
   325  	framework.ExpectNoError(err, "should be able to get apiserver client.")
   326  	gomega.Eventually(ctx, func() error {
   327  		node, err := getNode(client)
   328  		if err != nil {
   329  			return fmt.Errorf("failed to get node: %w", err)
   330  		}
   331  		if !isNodeReady(node) {
   332  			return fmt.Errorf("node is not ready: %+v", node)
   333  		}
   334  		return nil
   335  	}, nodeReadyTimeout, nodeReadyPollInterval).Should(gomega.Succeed())
   336  }
   337  
   338  // updateTestContext updates the test context with the node name.
   339  func updateTestContext(ctx context.Context) error {
   340  	setExtraEnvs()
   341  	updateImageAllowList(ctx)
   342  
   343  	client, err := getAPIServerClient()
   344  	if err != nil {
   345  		return fmt.Errorf("failed to get apiserver client: %w", err)
   346  	}
   347  
   348  	if !framework.TestContext.StandaloneMode {
   349  		// Update test context with current node object.
   350  		node, err := getNode(client)
   351  		if err != nil {
   352  			return fmt.Errorf("failed to get node: %w", err)
   353  		}
   354  		framework.TestContext.NodeName = node.Name // Set node name from API server, it is already set to the computer name by default.
   355  	}
   356  
   357  	framework.Logf("Node name: %s", framework.TestContext.NodeName)
   358  
   359  	return nil
   360  }
   361  
   362  // getNode gets node object from the apiserver.
   363  func getNode(c *clientset.Clientset) (*v1.Node, error) {
   364  	nodes, err := c.CoreV1().Nodes().List(context.TODO(), metav1.ListOptions{})
   365  	framework.ExpectNoError(err, "should be able to list nodes.")
   366  	if nodes == nil {
   367  		return nil, fmt.Errorf("the node list is nil")
   368  	}
   369  	gomega.Expect(len(nodes.Items)).To(gomega.BeNumerically("<=", 1), "the number of nodes is more than 1.")
   370  	if len(nodes.Items) == 0 {
   371  		return nil, fmt.Errorf("empty node list: %+v", nodes)
   372  	}
   373  	return &nodes.Items[0], nil
   374  }
   375  
   376  // getAPIServerClient gets a apiserver client.
   377  func getAPIServerClient() (*clientset.Clientset, error) {
   378  	config, err := framework.LoadConfig()
   379  	if err != nil {
   380  		return nil, fmt.Errorf("failed to load config: %w", err)
   381  	}
   382  	client, err := clientset.NewForConfig(config)
   383  	if err != nil {
   384  		return nil, fmt.Errorf("failed to create client: %w", err)
   385  	}
   386  	return client, nil
   387  }
   388  
   389  // loadSystemSpecFromFile returns the system spec from the file with the
   390  // filename.
   391  func loadSystemSpecFromFile(filename string) (*system.SysSpec, error) {
   392  	b, err := os.ReadFile(filename)
   393  	if err != nil {
   394  		return nil, err
   395  	}
   396  	data, err := utilyaml.ToJSON(b)
   397  	if err != nil {
   398  		return nil, err
   399  	}
   400  	spec := new(system.SysSpec)
   401  	if err := json.Unmarshal(data, spec); err != nil {
   402  		return nil, err
   403  	}
   404  	return spec, nil
   405  }
   406  
   407  // isNodeReady returns true if a node is ready; false otherwise.
   408  func isNodeReady(node *v1.Node) bool {
   409  	for _, c := range node.Status.Conditions {
   410  		if c.Type == v1.NodeReady {
   411  			return c.Status == v1.ConditionTrue
   412  		}
   413  	}
   414  	return false
   415  }
   416  
   417  func setExtraEnvs() {
   418  	for name, value := range framework.TestContext.ExtraEnvs {
   419  		os.Setenv(name, value)
   420  	}
   421  }
   422  

View as plain text