...
1
16
17 package node
18
19 import (
20 "context"
21 "sync"
22 "time"
23
24 "github.com/onsi/ginkgo/v2"
25
26 v1 "k8s.io/api/core/v1"
27 "k8s.io/apimachinery/pkg/util/wait"
28 clientset "k8s.io/client-go/kubernetes"
29
30 "k8s.io/kubernetes/test/e2e/framework"
31 e2essh "k8s.io/kubernetes/test/e2e/framework/ssh"
32 )
33
34
35 type NodeKiller struct {
36 config framework.NodeKillerConfig
37 client clientset.Interface
38 provider string
39 }
40
41
42 func NewNodeKiller(config framework.NodeKillerConfig, client clientset.Interface, provider string) *NodeKiller {
43 config.NodeKillerStopCtx, config.NodeKillerStop = context.WithCancel(context.Background())
44 return &NodeKiller{config, client, provider}
45 }
46
47
48 func (k *NodeKiller) Run(ctx context.Context) {
49
50 time.Sleep(wait.Jitter(k.config.Interval, k.config.JitterFactor))
51 wait.JitterUntilWithContext(ctx, func(ctx context.Context) {
52 nodes := k.pickNodes(ctx)
53 k.kill(ctx, nodes)
54 }, k.config.Interval, k.config.JitterFactor, true)
55 }
56
57 func (k *NodeKiller) pickNodes(ctx context.Context) []v1.Node {
58 nodes, err := GetReadySchedulableNodes(ctx, k.client)
59 framework.ExpectNoError(err)
60 numNodes := int(k.config.FailureRatio * float64(len(nodes.Items)))
61
62 nodes, err = GetBoundedReadySchedulableNodes(ctx, k.client, numNodes)
63 framework.ExpectNoError(err)
64 return nodes.Items
65 }
66
67 func (k *NodeKiller) kill(ctx context.Context, nodes []v1.Node) {
68 wg := sync.WaitGroup{}
69 wg.Add(len(nodes))
70 for _, node := range nodes {
71 node := node
72 go func() {
73 defer ginkgo.GinkgoRecover()
74 defer wg.Done()
75
76 framework.Logf("Stopping docker and kubelet on %q to simulate failure", node.Name)
77 err := e2essh.IssueSSHCommand(ctx, "sudo systemctl stop docker kubelet", k.provider, &node)
78 if err != nil {
79 framework.Logf("ERROR while stopping node %q: %v", node.Name, err)
80 return
81 }
82
83 time.Sleep(k.config.SimulatedDowntime)
84
85 framework.Logf("Rebooting %q to repair the node", node.Name)
86 err = e2essh.IssueSSHCommand(ctx, "sudo reboot", k.provider, &node)
87 if err != nil {
88 framework.Logf("ERROR while rebooting node %q: %v", node.Name, err)
89 return
90 }
91 }()
92 }
93 wg.Wait()
94 }
95
View as plain text