...
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package main
19
20 import (
21 "flag"
22 "fmt"
23 "os"
24 "os/exec"
25 "path/filepath"
26 "regexp"
27 "runtime"
28 "syscall"
29 "time"
30 )
31
32 var (
33 flagP = flag.Int("p", runtime.NumCPU(), "run `N` processes in parallel")
34 flagTimeout = flag.Duration("timeout", 10*time.Minute, "timeout each process after `duration`")
35 flagKill = flag.Bool("kill", true, "kill timed out processes if true, otherwise just print pid (to attach with gdb)")
36 flagFailure = flag.String("failure", "", "fail only if output matches `regexp`")
37 flagIgnore = flag.String("ignore", "", "ignore failure if output matches `regexp`")
38 flagOutput = flag.String("o", defaultPrefix(), "output failure logs to `path` plus a unique suffix")
39 )
40
41 func init() {
42 flag.Usage = func() {
43 os.Stderr.WriteString(`The stress utility is intended for catching sporadic failures.
44 It runs a given process in parallel in a loop and collects any failures.
45 Usage:
46
47 $ stress ./fmt.test -test.run=TestSometing -test.cpu=10
48
49 `)
50 flag.PrintDefaults()
51 }
52 }
53
54 func defaultPrefix() string {
55 date := time.Now().Format("go-stress-20060102T150405-")
56 return filepath.Join(os.TempDir(), date)
57 }
58
59 func main() {
60 flag.Parse()
61 if *flagP <= 0 || *flagTimeout <= 0 || len(flag.Args()) == 0 {
62 flag.Usage()
63 os.Exit(1)
64 }
65 var failureRe, ignoreRe *regexp.Regexp
66 if *flagFailure != "" {
67 var err error
68 if failureRe, err = regexp.Compile(*flagFailure); err != nil {
69 fmt.Println("bad failure regexp:", err)
70 os.Exit(1)
71 }
72 }
73 if *flagIgnore != "" {
74 var err error
75 if ignoreRe, err = regexp.Compile(*flagIgnore); err != nil {
76 fmt.Println("bad ignore regexp:", err)
77 os.Exit(1)
78 }
79 }
80 res := make(chan []byte)
81 for i := 0; i < *flagP; i++ {
82 go func() {
83 for {
84 cmd := exec.Command(flag.Args()[0], flag.Args()[1:]...)
85 done := make(chan bool)
86 if *flagTimeout > 0 {
87 go func() {
88 select {
89 case <-done:
90 return
91 case <-time.After(*flagTimeout):
92 }
93 if !*flagKill {
94 fmt.Printf("process %v timed out\n", cmd.Process.Pid)
95 return
96 }
97 cmd.Process.Signal(syscall.SIGABRT)
98 select {
99 case <-done:
100 return
101 case <-time.After(10 * time.Second):
102 }
103 cmd.Process.Kill()
104 }()
105 }
106 out, err := cmd.CombinedOutput()
107 close(done)
108 if err != nil && (failureRe == nil || failureRe.Match(out)) && (ignoreRe == nil || !ignoreRe.Match(out)) {
109 out = append(out, fmt.Sprintf("\n\nERROR: %v\n", err)...)
110 } else {
111 out = []byte{}
112 }
113 res <- out
114 }
115 }()
116 }
117 runs, fails := 0, 0
118 start := time.Now()
119 ticker := time.NewTicker(5 * time.Second).C
120 for {
121 select {
122 case out := <-res:
123 runs++
124 if len(out) == 0 {
125 continue
126 }
127 fails++
128 dir, path := filepath.Split(*flagOutput)
129 f, err := os.CreateTemp(dir, path)
130 if err != nil {
131 fmt.Printf("failed to create temp file: %v\n", err)
132 os.Exit(1)
133 }
134 f.Write(out)
135 f.Close()
136 if len(out) > 2<<10 {
137 out := out[:2<<10]
138 fmt.Printf("\n%s\n%s\n…\n", f.Name(), out)
139 } else {
140 fmt.Printf("\n%s\n%s\n", f.Name(), out)
141 }
142 case <-ticker:
143 elapsed := time.Since(start).Truncate(time.Second)
144 var pct string
145 if fails > 0 {
146 pct = fmt.Sprintf(" (%0.2f%%)", 100.0*float64(fails)/float64(runs))
147 }
148 fmt.Printf("%v: %v runs so far, %v failures%s\n", elapsed, runs, fails, pct)
149 }
150 }
151 }
152
View as plain text