1 package etcdhttp
2
3 import (
4 "context"
5 "fmt"
6 "io"
7 "net/http"
8 "net/http/httptest"
9 "strings"
10 "testing"
11
12 "github.com/prometheus/client_golang/prometheus"
13 "go.uber.org/zap/zaptest"
14
15 pb "go.etcd.io/etcd/api/v3/etcdserverpb"
16 "go.etcd.io/etcd/client/pkg/v3/testutil"
17 "go.etcd.io/etcd/client/pkg/v3/types"
18 "go.etcd.io/etcd/raft/v3"
19 "go.etcd.io/etcd/server/v3/auth"
20 "go.etcd.io/etcd/server/v3/config"
21 betesting "go.etcd.io/etcd/server/v3/mvcc/backend/testing"
22 )
23
24 type fakeHealthServer struct {
25 fakeServer
26 serializableReadError error
27 linearizableReadError error
28 missingLeader bool
29 authStore auth.AuthStore
30 }
31
32 func (s *fakeHealthServer) Range(_ context.Context, req *pb.RangeRequest) (*pb.RangeResponse, error) {
33 if req.Serializable {
34 return nil, s.serializableReadError
35 }
36 return nil, s.linearizableReadError
37 }
38
39 func (s *fakeHealthServer) Config() config.ServerConfig {
40 return config.ServerConfig{}
41 }
42
43 func (s *fakeHealthServer) Leader() types.ID {
44 if !s.missingLeader {
45 return 1
46 }
47 return types.ID(raft.None)
48 }
49
50 func (s *fakeHealthServer) AuthStore() auth.AuthStore { return s.authStore }
51
52 func (s *fakeHealthServer) ClientCertAuthEnabled() bool { return false }
53
54 type healthTestCase struct {
55 name string
56 healthCheckURL string
57 expectStatusCode int
58 inResult []string
59 notInResult []string
60
61 alarms []*pb.AlarmMember
62 apiError error
63 missingLeader bool
64 }
65
66 func TestHealthHandler(t *testing.T) {
67
68
69 tests := []healthTestCase{
70 {
71 name: "Healthy if no alarm",
72 alarms: []*pb.AlarmMember{},
73 healthCheckURL: "/health",
74 expectStatusCode: http.StatusOK,
75 },
76 {
77 name: "Unhealthy if NOSPACE alarm is on",
78 alarms: []*pb.AlarmMember{{MemberID: uint64(0), Alarm: pb.AlarmType_NOSPACE}},
79 healthCheckURL: "/health",
80 expectStatusCode: http.StatusServiceUnavailable,
81 },
82 {
83 name: "Healthy if NOSPACE alarm is on and excluded",
84 alarms: []*pb.AlarmMember{{MemberID: uint64(0), Alarm: pb.AlarmType_NOSPACE}},
85 healthCheckURL: "/health?exclude=NOSPACE",
86 expectStatusCode: http.StatusOK,
87 },
88 {
89 name: "Healthy if NOSPACE alarm is excluded",
90 alarms: []*pb.AlarmMember{},
91 healthCheckURL: "/health?exclude=NOSPACE",
92 expectStatusCode: http.StatusOK,
93 },
94 {
95 name: "Healthy if multiple NOSPACE alarms are on and excluded",
96 alarms: []*pb.AlarmMember{{MemberID: uint64(1), Alarm: pb.AlarmType_NOSPACE}, {MemberID: uint64(2), Alarm: pb.AlarmType_NOSPACE}, {MemberID: uint64(3), Alarm: pb.AlarmType_NOSPACE}},
97 healthCheckURL: "/health?exclude=NOSPACE",
98 expectStatusCode: http.StatusOK,
99 },
100 {
101 name: "Unhealthy if NOSPACE alarms is excluded and CORRUPT is on",
102 alarms: []*pb.AlarmMember{{MemberID: uint64(0), Alarm: pb.AlarmType_NOSPACE}, {MemberID: uint64(1), Alarm: pb.AlarmType_CORRUPT}},
103 healthCheckURL: "/health?exclude=NOSPACE",
104 expectStatusCode: http.StatusServiceUnavailable,
105 },
106 {
107 name: "Unhealthy if both NOSPACE and CORRUPT are on and excluded",
108 alarms: []*pb.AlarmMember{{MemberID: uint64(0), Alarm: pb.AlarmType_NOSPACE}, {MemberID: uint64(1), Alarm: pb.AlarmType_CORRUPT}},
109 healthCheckURL: "/health?exclude=NOSPACE&exclude=CORRUPT",
110 expectStatusCode: http.StatusOK,
111 },
112 {
113 name: "Unhealthy if api is not available",
114 healthCheckURL: "/health",
115 apiError: fmt.Errorf("Unexpected error"),
116 expectStatusCode: http.StatusServiceUnavailable,
117 },
118 {
119 name: "Unhealthy if no leader",
120 healthCheckURL: "/health",
121 expectStatusCode: http.StatusServiceUnavailable,
122 missingLeader: true,
123 },
124 {
125 name: "Healthy if no leader and serializable=true",
126 healthCheckURL: "/health?serializable=true",
127 expectStatusCode: http.StatusOK,
128 missingLeader: true,
129 },
130 }
131
132 for _, tt := range tests {
133 t.Run(tt.name, func(t *testing.T) {
134 mux := http.NewServeMux()
135 lg := zaptest.NewLogger(t)
136 be, _ := betesting.NewDefaultTmpBackend(t)
137 defer betesting.Close(t, be)
138 HandleHealth(zaptest.NewLogger(t), mux, &fakeHealthServer{
139 fakeServer: fakeServer{alarms: tt.alarms},
140 serializableReadError: tt.apiError,
141 linearizableReadError: tt.apiError,
142 missingLeader: tt.missingLeader,
143 authStore: auth.NewAuthStore(lg, be, nil, 0),
144 })
145 ts := httptest.NewServer(mux)
146 defer ts.Close()
147 checkHttpResponse(t, ts, tt.healthCheckURL, tt.expectStatusCode, nil, nil)
148 })
149 }
150 }
151
152 func TestHttpSubPath(t *testing.T) {
153 be, _ := betesting.NewDefaultTmpBackend(t)
154 defer betesting.Close(t, be)
155 tests := []healthTestCase{
156 {
157 name: "/readyz/data_corruption ok",
158 healthCheckURL: "/readyz/data_corruption",
159 expectStatusCode: http.StatusOK,
160 },
161 {
162 name: "/readyz/serializable_read not ok with error",
163 apiError: fmt.Errorf("Unexpected error"),
164 healthCheckURL: "/readyz/serializable_read",
165 expectStatusCode: http.StatusServiceUnavailable,
166 notInResult: []string{"data_corruption"},
167 },
168 {
169 name: "/readyz/non_exist 404",
170 healthCheckURL: "/readyz/non_exist",
171 expectStatusCode: http.StatusNotFound,
172 },
173 }
174 for _, tt := range tests {
175 t.Run(tt.name, func(t *testing.T) {
176 mux := http.NewServeMux()
177 logger := zaptest.NewLogger(t)
178 s := &fakeHealthServer{
179 serializableReadError: tt.apiError,
180 authStore: auth.NewAuthStore(logger, be, nil, 0),
181 }
182 HandleHealth(logger, mux, s)
183 ts := httptest.NewServer(mux)
184 defer ts.Close()
185 checkHttpResponse(t, ts, tt.healthCheckURL, tt.expectStatusCode, tt.inResult, tt.notInResult)
186 checkMetrics(t, tt.healthCheckURL, "", tt.expectStatusCode)
187 })
188 }
189 }
190
191 func TestDataCorruptionCheck(t *testing.T) {
192 be, _ := betesting.NewDefaultTmpBackend(t)
193 defer betesting.Close(t, be)
194 tests := []healthTestCase{
195 {
196 name: "Live if CORRUPT alarm is on",
197 alarms: []*pb.AlarmMember{{MemberID: uint64(0), Alarm: pb.AlarmType_CORRUPT}},
198 healthCheckURL: "/livez",
199 expectStatusCode: http.StatusOK,
200 notInResult: []string{"data_corruption"},
201 },
202 {
203 name: "Not ready if CORRUPT alarm is on",
204 alarms: []*pb.AlarmMember{{MemberID: uint64(0), Alarm: pb.AlarmType_CORRUPT}},
205 healthCheckURL: "/readyz",
206 expectStatusCode: http.StatusServiceUnavailable,
207 inResult: []string{"[-]data_corruption failed: alarm activated: CORRUPT"},
208 },
209 {
210 name: "ready if CORRUPT alarm is not on",
211 alarms: []*pb.AlarmMember{{MemberID: uint64(0), Alarm: pb.AlarmType_NOSPACE}},
212 healthCheckURL: "/readyz",
213 expectStatusCode: http.StatusOK,
214 },
215 {
216 name: "ready if CORRUPT alarm is excluded",
217 alarms: []*pb.AlarmMember{{MemberID: uint64(0), Alarm: pb.AlarmType_CORRUPT}, {MemberID: uint64(0), Alarm: pb.AlarmType_NOSPACE}},
218 healthCheckURL: "/readyz?exclude=data_corruption",
219 expectStatusCode: http.StatusOK,
220 },
221 {
222 name: "Not ready if CORRUPT alarm is on",
223 alarms: []*pb.AlarmMember{{MemberID: uint64(0), Alarm: pb.AlarmType_CORRUPT}},
224 healthCheckURL: "/readyz?exclude=non_exist",
225 expectStatusCode: http.StatusServiceUnavailable,
226 inResult: []string{"[-]data_corruption failed: alarm activated: CORRUPT"},
227 },
228 }
229 for _, tt := range tests {
230 t.Run(tt.name, func(t *testing.T) {
231 mux := http.NewServeMux()
232 logger := zaptest.NewLogger(t)
233 s := &fakeHealthServer{
234 authStore: auth.NewAuthStore(logger, be, nil, 0),
235 }
236 HandleHealth(logger, mux, s)
237 ts := httptest.NewServer(mux)
238 defer ts.Close()
239
240 checkHttpResponse(t, ts, tt.healthCheckURL, http.StatusOK, nil, nil)
241
242 s.alarms = tt.alarms
243 checkHttpResponse(t, ts, tt.healthCheckURL, tt.expectStatusCode, tt.inResult, tt.notInResult)
244 })
245 }
246 }
247
248 func TestSerializableReadCheck(t *testing.T) {
249 be, _ := betesting.NewDefaultTmpBackend(t)
250 defer betesting.Close(t, be)
251 tests := []healthTestCase{
252 {
253 name: "Alive normal",
254 healthCheckURL: "/livez?verbose",
255 expectStatusCode: http.StatusOK,
256 inResult: []string{"[+]serializable_read ok"},
257 },
258 {
259 name: "Not alive if range api is not available",
260 healthCheckURL: "/livez",
261 apiError: fmt.Errorf("Unexpected error"),
262 expectStatusCode: http.StatusServiceUnavailable,
263 inResult: []string{"[-]serializable_read failed: Unexpected error"},
264 },
265 {
266 name: "Not ready if range api is not available",
267 healthCheckURL: "/readyz",
268 apiError: fmt.Errorf("Unexpected error"),
269 expectStatusCode: http.StatusServiceUnavailable,
270 inResult: []string{"[-]serializable_read failed: Unexpected error"},
271 },
272 }
273 for _, tt := range tests {
274 t.Run(tt.name, func(t *testing.T) {
275 mux := http.NewServeMux()
276 logger := zaptest.NewLogger(t)
277 s := &fakeHealthServer{
278 serializableReadError: tt.apiError,
279 authStore: auth.NewAuthStore(logger, be, nil, 0),
280 }
281 HandleHealth(logger, mux, s)
282 ts := httptest.NewServer(mux)
283 defer ts.Close()
284 checkHttpResponse(t, ts, tt.healthCheckURL, tt.expectStatusCode, tt.inResult, tt.notInResult)
285 checkMetrics(t, tt.healthCheckURL, "serializable_read", tt.expectStatusCode)
286 })
287 }
288 }
289
290 func TestLinearizableReadCheck(t *testing.T) {
291 be, _ := betesting.NewDefaultTmpBackend(t)
292 defer betesting.Close(t, be)
293 tests := []healthTestCase{
294 {
295 name: "Alive normal",
296 healthCheckURL: "/livez?verbose",
297 expectStatusCode: http.StatusOK,
298 inResult: []string{"[+]serializable_read ok"},
299 },
300 {
301 name: "Alive if lineariable range api is not available",
302 healthCheckURL: "/livez",
303 apiError: fmt.Errorf("Unexpected error"),
304 expectStatusCode: http.StatusOK,
305 },
306 {
307 name: "Not ready if range api is not available",
308 healthCheckURL: "/readyz",
309 apiError: fmt.Errorf("Unexpected error"),
310 expectStatusCode: http.StatusServiceUnavailable,
311 inResult: []string{"[+]serializable_read ok", "[-]linearizable_read failed: Unexpected error"},
312 },
313 }
314 for _, tt := range tests {
315 t.Run(tt.name, func(t *testing.T) {
316 mux := http.NewServeMux()
317 logger := zaptest.NewLogger(t)
318 s := &fakeHealthServer{
319 linearizableReadError: tt.apiError,
320 authStore: auth.NewAuthStore(logger, be, nil, 0),
321 }
322 HandleHealth(logger, mux, s)
323 ts := httptest.NewServer(mux)
324 defer ts.Close()
325 checkHttpResponse(t, ts, tt.healthCheckURL, tt.expectStatusCode, tt.inResult, tt.notInResult)
326 checkMetrics(t, tt.healthCheckURL, "linearizable_read", tt.expectStatusCode)
327 })
328 }
329 }
330
331 func checkHttpResponse(t *testing.T, ts *httptest.Server, url string, expectStatusCode int, inResult []string, notInResult []string) {
332 res, err := ts.Client().Do(&http.Request{Method: http.MethodGet, URL: testutil.MustNewURL(t, ts.URL+url)})
333
334 if err != nil {
335 t.Fatalf("fail serve http request %s %v", url, err)
336 }
337 if res.StatusCode != expectStatusCode {
338 t.Errorf("want statusCode %d but got %d", expectStatusCode, res.StatusCode)
339 }
340 defer res.Body.Close()
341 b, err := io.ReadAll(res.Body)
342 if err != nil {
343 t.Fatalf("Failed to read response for %s", url)
344 }
345 result := string(b)
346 for _, substr := range inResult {
347 if !strings.Contains(result, substr) {
348 t.Errorf("Could not find substring : %s, in response: %s", substr, result)
349 return
350 }
351 }
352 for _, substr := range notInResult {
353 if strings.Contains(result, substr) {
354 t.Errorf("Do not expect substring : %s, in response: %s", substr, result)
355 return
356 }
357 }
358 }
359
360 func checkMetrics(t *testing.T, url, checkName string, expectStatusCode int) {
361 defer healthCheckGauge.Reset()
362 defer healthCheckCounter.Reset()
363
364 typeName := strings.TrimPrefix(strings.Split(url, "?")[0], "/")
365 if len(checkName) == 0 {
366 checkName = strings.Split(typeName, "/")[1]
367 typeName = strings.Split(typeName, "/")[0]
368 }
369
370 expectedSuccessCount := 1
371 expectedErrorCount := 0
372 if expectStatusCode != http.StatusOK {
373 expectedSuccessCount = 0
374 expectedErrorCount = 1
375 }
376
377 gather, _ := prometheus.DefaultGatherer.Gather()
378 for _, mf := range gather {
379 name := *mf.Name
380 val := 0
381 switch name {
382 case "etcd_server_healthcheck":
383 val = int(mf.GetMetric()[0].GetGauge().GetValue())
384 case "etcd_server_healthcheck_total":
385 val = int(mf.GetMetric()[0].GetCounter().GetValue())
386 default:
387 continue
388 }
389 labelMap := make(map[string]string)
390 for _, label := range mf.GetMetric()[0].Label {
391 labelMap[label.GetName()] = label.GetValue()
392 }
393 if typeName != labelMap["type"] {
394 continue
395 }
396 if labelMap["name"] != checkName {
397 continue
398 }
399 if statusLabel, found := labelMap["status"]; found && statusLabel == HealthStatusError {
400 if val != expectedErrorCount {
401 t.Fatalf("%s got errorCount %d, wanted %d\n", name, val, expectedErrorCount)
402 }
403 } else {
404 if val != expectedSuccessCount {
405 t.Fatalf("%s got expectedSuccessCount %d, wanted %d\n", name, val, expectedSuccessCount)
406 }
407 }
408 }
409 }
410
View as plain text