1
16
17 package services
18
19 import (
20 "flag"
21 "fmt"
22 "net/http"
23 "os"
24 "os/exec"
25 "path"
26 "reflect"
27 "strconv"
28 "strings"
29 "syscall"
30 "time"
31
32 "k8s.io/klog/v2"
33
34 "k8s.io/kubernetes/test/e2e/framework"
35 )
36
37 var serverStartTimeout = flag.Duration("server-start-timeout", time.Second*120, "Time to wait for each server to become healthy.")
38
39
40
41 type server struct {
42
43 name string
44
45 startCommand *exec.Cmd
46
47
48 killCommand *exec.Cmd
49
50
51 restartCommand *exec.Cmd
52
53 healthCheckUrls []string
54
55
56 outFilename string
57
58
59 monitorParent bool
60
61 restartOnExit bool
62
63
64 stopRestartingCh chan<- bool
65
66 ackStopRestartingCh <-chan bool
67
68 systemdUnitName string
69 }
70
71
72
73 func newServer(name string, start, kill, restart *exec.Cmd, urls []string, outputFileName string, monitorParent, restartOnExit bool, systemdUnitName string) *server {
74 return &server{
75 name: name,
76 startCommand: start,
77 killCommand: kill,
78 restartCommand: restart,
79 healthCheckUrls: urls,
80 outFilename: outputFileName,
81 monitorParent: monitorParent,
82 restartOnExit: restartOnExit,
83 systemdUnitName: systemdUnitName,
84 }
85 }
86
87
88 func commandToString(c *exec.Cmd) string {
89 if c == nil {
90 return ""
91 }
92 return strings.Join(append([]string{c.Path}, c.Args[1:]...), " ")
93 }
94
95 func (s *server) String() string {
96 return fmt.Sprintf("server %q start-command: `%s`, kill-command: `%s`, restart-command: `%s`, health-check: %v, output-file: %q", s.name,
97 commandToString(s.startCommand), commandToString(s.killCommand), commandToString(s.restartCommand), s.healthCheckUrls, s.outFilename)
98 }
99
100
101
102
103
104 func (s *server) start() error {
105 klog.Infof("Starting server %q with command %q", s.name, commandToString(s.startCommand))
106 errCh := make(chan error)
107
108
109 var stopRestartingCh, ackStopRestartingCh chan bool
110 if s.restartOnExit {
111 if len(s.healthCheckUrls) == 0 {
112 return fmt.Errorf("tried to start %s which has s.restartOnExit == true, but no health check urls provided", s)
113 }
114
115 stopRestartingCh = make(chan bool)
116 ackStopRestartingCh = make(chan bool)
117
118 s.stopRestartingCh = stopRestartingCh
119 s.ackStopRestartingCh = ackStopRestartingCh
120 }
121
122
123 go func() {
124 defer close(errCh)
125
126
127 outPath := path.Join(framework.TestContext.ReportDir, s.outFilename)
128 outfile, err := os.Create(outPath)
129 if err != nil {
130 errCh <- fmt.Errorf("failed to create file %q for `%s` %v", outPath, s, err)
131 return
132 }
133 klog.Infof("Output file for server %q: %v", s.name, outfile.Name())
134 defer outfile.Close()
135 defer outfile.Sync()
136
137
138 s.startCommand.Stdout = outfile
139 s.startCommand.Stderr = outfile
140
141
142 if s.monitorParent {
143
144 attrs := &syscall.SysProcAttr{}
145
146 deathSigField := reflect.ValueOf(attrs).Elem().FieldByName("Pdeathsig")
147 if deathSigField.IsValid() {
148 deathSigField.Set(reflect.ValueOf(syscall.SIGTERM))
149 } else {
150 errCh <- fmt.Errorf("failed to set Pdeathsig field (non-linux build)")
151 return
152 }
153 s.startCommand.SysProcAttr = attrs
154 }
155
156
157 err = s.startCommand.Start()
158 if err != nil {
159 errCh <- fmt.Errorf("failed to run %s: %w", s, err)
160 return
161 }
162 if !s.restartOnExit {
163 klog.Infof("Waiting for server %q start command to complete", s.name)
164
165
166 err = s.startCommand.Wait()
167 if err != nil {
168 errCh <- fmt.Errorf("failed to run start command for server %q: %w", s.name, err)
169 return
170 }
171 } else {
172 usedStartCmd := true
173 for {
174 klog.Infof("Running health check for service %q", s.name)
175
176 err := readinessCheck(s.name, s.healthCheckUrls, nil)
177 if err != nil {
178 if usedStartCmd {
179 klog.Infof("Waiting for server %q start command to complete after initial health check failed", s.name)
180 s.startCommand.Wait()
181 }
182
183 klog.Fatalf("Restart loop readinessCheck failed for %q", s.name)
184 } else {
185 klog.Infof("Initial health check passed for service %q", s.name)
186 }
187
188
189 stillAlive:
190 for {
191 select {
192 case <-stopRestartingCh:
193 ackStopRestartingCh <- true
194 return
195 case <-time.After(time.Second):
196 for _, url := range s.healthCheckUrls {
197 resp, err := http.Head(url)
198 if err != nil || resp.StatusCode != http.StatusOK {
199 break stillAlive
200 }
201 }
202 }
203 }
204
205 if usedStartCmd {
206 s.startCommand.Wait()
207 usedStartCmd = false
208 }
209 if s.restartCommand != nil {
210
211
212 s.restartCommand = &exec.Cmd{
213 Path: s.restartCommand.Path,
214 Args: s.restartCommand.Args,
215 Env: s.restartCommand.Env,
216 Dir: s.restartCommand.Dir,
217 Stdin: s.restartCommand.Stdin,
218 Stdout: s.restartCommand.Stdout,
219 Stderr: s.restartCommand.Stderr,
220 ExtraFiles: s.restartCommand.ExtraFiles,
221 SysProcAttr: s.restartCommand.SysProcAttr,
222 }
223
224
225 klog.Infof("Restarting server %q with restart command", s.name)
226 err = s.restartCommand.Run()
227 if err != nil {
228
229 klog.Fatalf("Restarting server %s with restartCommand failed. Error: %v.", s, err)
230 }
231 } else {
232 s.startCommand = &exec.Cmd{
233 Path: s.startCommand.Path,
234 Args: s.startCommand.Args,
235 Env: s.startCommand.Env,
236 Dir: s.startCommand.Dir,
237 Stdin: s.startCommand.Stdin,
238 Stdout: s.startCommand.Stdout,
239 Stderr: s.startCommand.Stderr,
240 ExtraFiles: s.startCommand.ExtraFiles,
241 SysProcAttr: s.startCommand.SysProcAttr,
242 }
243 klog.Infof("Restarting server %q with start command", s.name)
244 err = s.startCommand.Start()
245 usedStartCmd = true
246 if err != nil {
247
248 klog.Fatalf("Restarting server %s with startCommand failed. Error: %v.", s, err)
249 }
250 }
251 }
252 }
253 }()
254
255 return readinessCheck(s.name, s.healthCheckUrls, errCh)
256 }
257
258
259 func (s *server) kill() error {
260 klog.Infof("Kill server %q", s.name)
261 name := s.name
262 cmd := s.startCommand
263
264
265 if s.restartOnExit {
266 s.stopRestartingCh <- true
267 <-s.ackStopRestartingCh
268 }
269
270 if s.killCommand != nil {
271 return s.killCommand.Run()
272 }
273
274 if cmd == nil {
275 return fmt.Errorf("could not kill %q because both `killCommand` and `startCommand` are nil", name)
276 }
277
278 if cmd.Process == nil {
279 klog.V(2).Infof("%q not running", name)
280 return nil
281 }
282 pid := cmd.Process.Pid
283 if pid <= 1 {
284 return fmt.Errorf("invalid PID %d for %q", pid, name)
285 }
286
287
288 waitChan := make(chan error)
289 go func() {
290 _, err := cmd.Process.Wait()
291 waitChan <- err
292 close(waitChan)
293 }()
294
295 const timeout = 10 * time.Second
296 for _, signal := range []string{"-TERM", "-KILL"} {
297 klog.V(2).Infof("Killing process %d (%s) with %s", pid, name, signal)
298 cmd := exec.Command("kill", signal, strconv.Itoa(pid))
299 _, err := cmd.Output()
300 if err != nil {
301 klog.Errorf("Error signaling process %d (%s) with %s: %v", pid, name, signal, err)
302 continue
303 }
304
305 select {
306 case err := <-waitChan:
307 if err != nil {
308 return fmt.Errorf("error stopping %q: %w", name, err)
309 }
310
311 return nil
312 case <-time.After(timeout):
313
314 }
315 }
316
317 return fmt.Errorf("unable to stop %q", name)
318 }
319
320 func (s *server) stopUnit() error {
321 klog.Infof("Stopping systemd unit for server %q with unit name: %q", s.name, s.systemdUnitName)
322 if s.systemdUnitName != "" {
323 err := exec.Command("sudo", "systemctl", "stop", s.systemdUnitName).Run()
324 if err != nil {
325 return fmt.Errorf("Failed to stop systemd unit name: %q: %w", s.systemdUnitName, err)
326 }
327 }
328 return nil
329 }
330
View as plain text