...

Source file src/go.etcd.io/etcd/server/v3/etcdserver/api/etcdhttp/health_test.go

Documentation: go.etcd.io/etcd/server/v3/etcdserver/api/etcdhttp

     1  package etcdhttp
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"io"
     7  	"net/http"
     8  	"net/http/httptest"
     9  	"strings"
    10  	"testing"
    11  
    12  	"github.com/prometheus/client_golang/prometheus"
    13  	"go.uber.org/zap/zaptest"
    14  
    15  	pb "go.etcd.io/etcd/api/v3/etcdserverpb"
    16  	"go.etcd.io/etcd/client/pkg/v3/testutil"
    17  	"go.etcd.io/etcd/client/pkg/v3/types"
    18  	"go.etcd.io/etcd/raft/v3"
    19  	"go.etcd.io/etcd/server/v3/auth"
    20  	"go.etcd.io/etcd/server/v3/config"
    21  	betesting "go.etcd.io/etcd/server/v3/mvcc/backend/testing"
    22  )
    23  
    24  type fakeHealthServer struct {
    25  	fakeServer
    26  	serializableReadError error
    27  	linearizableReadError error
    28  	missingLeader         bool
    29  	authStore             auth.AuthStore
    30  }
    31  
    32  func (s *fakeHealthServer) Range(_ context.Context, req *pb.RangeRequest) (*pb.RangeResponse, error) {
    33  	if req.Serializable {
    34  		return nil, s.serializableReadError
    35  	}
    36  	return nil, s.linearizableReadError
    37  }
    38  
    39  func (s *fakeHealthServer) Config() config.ServerConfig {
    40  	return config.ServerConfig{}
    41  }
    42  
    43  func (s *fakeHealthServer) Leader() types.ID {
    44  	if !s.missingLeader {
    45  		return 1
    46  	}
    47  	return types.ID(raft.None)
    48  }
    49  
    50  func (s *fakeHealthServer) AuthStore() auth.AuthStore { return s.authStore }
    51  
    52  func (s *fakeHealthServer) ClientCertAuthEnabled() bool { return false }
    53  
    54  type healthTestCase struct {
    55  	name             string
    56  	healthCheckURL   string
    57  	expectStatusCode int
    58  	inResult         []string
    59  	notInResult      []string
    60  
    61  	alarms        []*pb.AlarmMember
    62  	apiError      error
    63  	missingLeader bool
    64  }
    65  
    66  func TestHealthHandler(t *testing.T) {
    67  	// define the input and expected output
    68  	// input: alarms, and healthCheckURL
    69  	tests := []healthTestCase{
    70  		{
    71  			name:             "Healthy if no alarm",
    72  			alarms:           []*pb.AlarmMember{},
    73  			healthCheckURL:   "/health",
    74  			expectStatusCode: http.StatusOK,
    75  		},
    76  		{
    77  			name:             "Unhealthy if NOSPACE alarm is on",
    78  			alarms:           []*pb.AlarmMember{{MemberID: uint64(0), Alarm: pb.AlarmType_NOSPACE}},
    79  			healthCheckURL:   "/health",
    80  			expectStatusCode: http.StatusServiceUnavailable,
    81  		},
    82  		{
    83  			name:             "Healthy if NOSPACE alarm is on and excluded",
    84  			alarms:           []*pb.AlarmMember{{MemberID: uint64(0), Alarm: pb.AlarmType_NOSPACE}},
    85  			healthCheckURL:   "/health?exclude=NOSPACE",
    86  			expectStatusCode: http.StatusOK,
    87  		},
    88  		{
    89  			name:             "Healthy if NOSPACE alarm is excluded",
    90  			alarms:           []*pb.AlarmMember{},
    91  			healthCheckURL:   "/health?exclude=NOSPACE",
    92  			expectStatusCode: http.StatusOK,
    93  		},
    94  		{
    95  			name:             "Healthy if multiple NOSPACE alarms are on and excluded",
    96  			alarms:           []*pb.AlarmMember{{MemberID: uint64(1), Alarm: pb.AlarmType_NOSPACE}, {MemberID: uint64(2), Alarm: pb.AlarmType_NOSPACE}, {MemberID: uint64(3), Alarm: pb.AlarmType_NOSPACE}},
    97  			healthCheckURL:   "/health?exclude=NOSPACE",
    98  			expectStatusCode: http.StatusOK,
    99  		},
   100  		{
   101  			name:             "Unhealthy if NOSPACE alarms is excluded and CORRUPT is on",
   102  			alarms:           []*pb.AlarmMember{{MemberID: uint64(0), Alarm: pb.AlarmType_NOSPACE}, {MemberID: uint64(1), Alarm: pb.AlarmType_CORRUPT}},
   103  			healthCheckURL:   "/health?exclude=NOSPACE",
   104  			expectStatusCode: http.StatusServiceUnavailable,
   105  		},
   106  		{
   107  			name:             "Unhealthy if both NOSPACE and CORRUPT are on and excluded",
   108  			alarms:           []*pb.AlarmMember{{MemberID: uint64(0), Alarm: pb.AlarmType_NOSPACE}, {MemberID: uint64(1), Alarm: pb.AlarmType_CORRUPT}},
   109  			healthCheckURL:   "/health?exclude=NOSPACE&exclude=CORRUPT",
   110  			expectStatusCode: http.StatusOK,
   111  		},
   112  		{
   113  			name:             "Unhealthy if api is not available",
   114  			healthCheckURL:   "/health",
   115  			apiError:         fmt.Errorf("Unexpected error"),
   116  			expectStatusCode: http.StatusServiceUnavailable,
   117  		},
   118  		{
   119  			name:             "Unhealthy if no leader",
   120  			healthCheckURL:   "/health",
   121  			expectStatusCode: http.StatusServiceUnavailable,
   122  			missingLeader:    true,
   123  		},
   124  		{
   125  			name:             "Healthy if no leader and serializable=true",
   126  			healthCheckURL:   "/health?serializable=true",
   127  			expectStatusCode: http.StatusOK,
   128  			missingLeader:    true,
   129  		},
   130  	}
   131  
   132  	for _, tt := range tests {
   133  		t.Run(tt.name, func(t *testing.T) {
   134  			mux := http.NewServeMux()
   135  			lg := zaptest.NewLogger(t)
   136  			be, _ := betesting.NewDefaultTmpBackend(t)
   137  			defer betesting.Close(t, be)
   138  			HandleHealth(zaptest.NewLogger(t), mux, &fakeHealthServer{
   139  				fakeServer:            fakeServer{alarms: tt.alarms},
   140  				serializableReadError: tt.apiError,
   141  				linearizableReadError: tt.apiError,
   142  				missingLeader:         tt.missingLeader,
   143  				authStore:             auth.NewAuthStore(lg, be, nil, 0),
   144  			})
   145  			ts := httptest.NewServer(mux)
   146  			defer ts.Close()
   147  			checkHttpResponse(t, ts, tt.healthCheckURL, tt.expectStatusCode, nil, nil)
   148  		})
   149  	}
   150  }
   151  
   152  func TestHttpSubPath(t *testing.T) {
   153  	be, _ := betesting.NewDefaultTmpBackend(t)
   154  	defer betesting.Close(t, be)
   155  	tests := []healthTestCase{
   156  		{
   157  			name:             "/readyz/data_corruption ok",
   158  			healthCheckURL:   "/readyz/data_corruption",
   159  			expectStatusCode: http.StatusOK,
   160  		},
   161  		{
   162  			name:             "/readyz/serializable_read not ok with error",
   163  			apiError:         fmt.Errorf("Unexpected error"),
   164  			healthCheckURL:   "/readyz/serializable_read",
   165  			expectStatusCode: http.StatusServiceUnavailable,
   166  			notInResult:      []string{"data_corruption"},
   167  		},
   168  		{
   169  			name:             "/readyz/non_exist 404",
   170  			healthCheckURL:   "/readyz/non_exist",
   171  			expectStatusCode: http.StatusNotFound,
   172  		},
   173  	}
   174  	for _, tt := range tests {
   175  		t.Run(tt.name, func(t *testing.T) {
   176  			mux := http.NewServeMux()
   177  			logger := zaptest.NewLogger(t)
   178  			s := &fakeHealthServer{
   179  				serializableReadError: tt.apiError,
   180  				authStore:             auth.NewAuthStore(logger, be, nil, 0),
   181  			}
   182  			HandleHealth(logger, mux, s)
   183  			ts := httptest.NewServer(mux)
   184  			defer ts.Close()
   185  			checkHttpResponse(t, ts, tt.healthCheckURL, tt.expectStatusCode, tt.inResult, tt.notInResult)
   186  			checkMetrics(t, tt.healthCheckURL, "", tt.expectStatusCode)
   187  		})
   188  	}
   189  }
   190  
   191  func TestDataCorruptionCheck(t *testing.T) {
   192  	be, _ := betesting.NewDefaultTmpBackend(t)
   193  	defer betesting.Close(t, be)
   194  	tests := []healthTestCase{
   195  		{
   196  			name:             "Live if CORRUPT alarm is on",
   197  			alarms:           []*pb.AlarmMember{{MemberID: uint64(0), Alarm: pb.AlarmType_CORRUPT}},
   198  			healthCheckURL:   "/livez",
   199  			expectStatusCode: http.StatusOK,
   200  			notInResult:      []string{"data_corruption"},
   201  		},
   202  		{
   203  			name:             "Not ready if CORRUPT alarm is on",
   204  			alarms:           []*pb.AlarmMember{{MemberID: uint64(0), Alarm: pb.AlarmType_CORRUPT}},
   205  			healthCheckURL:   "/readyz",
   206  			expectStatusCode: http.StatusServiceUnavailable,
   207  			inResult:         []string{"[-]data_corruption failed: alarm activated: CORRUPT"},
   208  		},
   209  		{
   210  			name:             "ready if CORRUPT alarm is not on",
   211  			alarms:           []*pb.AlarmMember{{MemberID: uint64(0), Alarm: pb.AlarmType_NOSPACE}},
   212  			healthCheckURL:   "/readyz",
   213  			expectStatusCode: http.StatusOK,
   214  		},
   215  		{
   216  			name:             "ready if CORRUPT alarm is excluded",
   217  			alarms:           []*pb.AlarmMember{{MemberID: uint64(0), Alarm: pb.AlarmType_CORRUPT}, {MemberID: uint64(0), Alarm: pb.AlarmType_NOSPACE}},
   218  			healthCheckURL:   "/readyz?exclude=data_corruption",
   219  			expectStatusCode: http.StatusOK,
   220  		},
   221  		{
   222  			name:             "Not ready if CORRUPT alarm is on",
   223  			alarms:           []*pb.AlarmMember{{MemberID: uint64(0), Alarm: pb.AlarmType_CORRUPT}},
   224  			healthCheckURL:   "/readyz?exclude=non_exist",
   225  			expectStatusCode: http.StatusServiceUnavailable,
   226  			inResult:         []string{"[-]data_corruption failed: alarm activated: CORRUPT"},
   227  		},
   228  	}
   229  	for _, tt := range tests {
   230  		t.Run(tt.name, func(t *testing.T) {
   231  			mux := http.NewServeMux()
   232  			logger := zaptest.NewLogger(t)
   233  			s := &fakeHealthServer{
   234  				authStore: auth.NewAuthStore(logger, be, nil, 0),
   235  			}
   236  			HandleHealth(logger, mux, s)
   237  			ts := httptest.NewServer(mux)
   238  			defer ts.Close()
   239  			// OK before alarms are activated.
   240  			checkHttpResponse(t, ts, tt.healthCheckURL, http.StatusOK, nil, nil)
   241  			// Activate the alarms.
   242  			s.alarms = tt.alarms
   243  			checkHttpResponse(t, ts, tt.healthCheckURL, tt.expectStatusCode, tt.inResult, tt.notInResult)
   244  		})
   245  	}
   246  }
   247  
   248  func TestSerializableReadCheck(t *testing.T) {
   249  	be, _ := betesting.NewDefaultTmpBackend(t)
   250  	defer betesting.Close(t, be)
   251  	tests := []healthTestCase{
   252  		{
   253  			name:             "Alive normal",
   254  			healthCheckURL:   "/livez?verbose",
   255  			expectStatusCode: http.StatusOK,
   256  			inResult:         []string{"[+]serializable_read ok"},
   257  		},
   258  		{
   259  			name:             "Not alive if range api is not available",
   260  			healthCheckURL:   "/livez",
   261  			apiError:         fmt.Errorf("Unexpected error"),
   262  			expectStatusCode: http.StatusServiceUnavailable,
   263  			inResult:         []string{"[-]serializable_read failed: Unexpected error"},
   264  		},
   265  		{
   266  			name:             "Not ready if range api is not available",
   267  			healthCheckURL:   "/readyz",
   268  			apiError:         fmt.Errorf("Unexpected error"),
   269  			expectStatusCode: http.StatusServiceUnavailable,
   270  			inResult:         []string{"[-]serializable_read failed: Unexpected error"},
   271  		},
   272  	}
   273  	for _, tt := range tests {
   274  		t.Run(tt.name, func(t *testing.T) {
   275  			mux := http.NewServeMux()
   276  			logger := zaptest.NewLogger(t)
   277  			s := &fakeHealthServer{
   278  				serializableReadError: tt.apiError,
   279  				authStore:             auth.NewAuthStore(logger, be, nil, 0),
   280  			}
   281  			HandleHealth(logger, mux, s)
   282  			ts := httptest.NewServer(mux)
   283  			defer ts.Close()
   284  			checkHttpResponse(t, ts, tt.healthCheckURL, tt.expectStatusCode, tt.inResult, tt.notInResult)
   285  			checkMetrics(t, tt.healthCheckURL, "serializable_read", tt.expectStatusCode)
   286  		})
   287  	}
   288  }
   289  
   290  func TestLinearizableReadCheck(t *testing.T) {
   291  	be, _ := betesting.NewDefaultTmpBackend(t)
   292  	defer betesting.Close(t, be)
   293  	tests := []healthTestCase{
   294  		{
   295  			name:             "Alive normal",
   296  			healthCheckURL:   "/livez?verbose",
   297  			expectStatusCode: http.StatusOK,
   298  			inResult:         []string{"[+]serializable_read ok"},
   299  		},
   300  		{
   301  			name:             "Alive if lineariable range api is not available",
   302  			healthCheckURL:   "/livez",
   303  			apiError:         fmt.Errorf("Unexpected error"),
   304  			expectStatusCode: http.StatusOK,
   305  		},
   306  		{
   307  			name:             "Not ready if range api is not available",
   308  			healthCheckURL:   "/readyz",
   309  			apiError:         fmt.Errorf("Unexpected error"),
   310  			expectStatusCode: http.StatusServiceUnavailable,
   311  			inResult:         []string{"[+]serializable_read ok", "[-]linearizable_read failed: Unexpected error"},
   312  		},
   313  	}
   314  	for _, tt := range tests {
   315  		t.Run(tt.name, func(t *testing.T) {
   316  			mux := http.NewServeMux()
   317  			logger := zaptest.NewLogger(t)
   318  			s := &fakeHealthServer{
   319  				linearizableReadError: tt.apiError,
   320  				authStore:             auth.NewAuthStore(logger, be, nil, 0),
   321  			}
   322  			HandleHealth(logger, mux, s)
   323  			ts := httptest.NewServer(mux)
   324  			defer ts.Close()
   325  			checkHttpResponse(t, ts, tt.healthCheckURL, tt.expectStatusCode, tt.inResult, tt.notInResult)
   326  			checkMetrics(t, tt.healthCheckURL, "linearizable_read", tt.expectStatusCode)
   327  		})
   328  	}
   329  }
   330  
   331  func checkHttpResponse(t *testing.T, ts *httptest.Server, url string, expectStatusCode int, inResult []string, notInResult []string) {
   332  	res, err := ts.Client().Do(&http.Request{Method: http.MethodGet, URL: testutil.MustNewURL(t, ts.URL+url)})
   333  
   334  	if err != nil {
   335  		t.Fatalf("fail serve http request %s %v", url, err)
   336  	}
   337  	if res.StatusCode != expectStatusCode {
   338  		t.Errorf("want statusCode %d but got %d", expectStatusCode, res.StatusCode)
   339  	}
   340  	defer res.Body.Close()
   341  	b, err := io.ReadAll(res.Body)
   342  	if err != nil {
   343  		t.Fatalf("Failed to read response for %s", url)
   344  	}
   345  	result := string(b)
   346  	for _, substr := range inResult {
   347  		if !strings.Contains(result, substr) {
   348  			t.Errorf("Could not find substring : %s, in response: %s", substr, result)
   349  			return
   350  		}
   351  	}
   352  	for _, substr := range notInResult {
   353  		if strings.Contains(result, substr) {
   354  			t.Errorf("Do not expect substring : %s, in response: %s", substr, result)
   355  			return
   356  		}
   357  	}
   358  }
   359  
   360  func checkMetrics(t *testing.T, url, checkName string, expectStatusCode int) {
   361  	defer healthCheckGauge.Reset()
   362  	defer healthCheckCounter.Reset()
   363  
   364  	typeName := strings.TrimPrefix(strings.Split(url, "?")[0], "/")
   365  	if len(checkName) == 0 {
   366  		checkName = strings.Split(typeName, "/")[1]
   367  		typeName = strings.Split(typeName, "/")[0]
   368  	}
   369  
   370  	expectedSuccessCount := 1
   371  	expectedErrorCount := 0
   372  	if expectStatusCode != http.StatusOK {
   373  		expectedSuccessCount = 0
   374  		expectedErrorCount = 1
   375  	}
   376  
   377  	gather, _ := prometheus.DefaultGatherer.Gather()
   378  	for _, mf := range gather {
   379  		name := *mf.Name
   380  		val := 0
   381  		switch name {
   382  		case "etcd_server_healthcheck":
   383  			val = int(mf.GetMetric()[0].GetGauge().GetValue())
   384  		case "etcd_server_healthcheck_total":
   385  			val = int(mf.GetMetric()[0].GetCounter().GetValue())
   386  		default:
   387  			continue
   388  		}
   389  		labelMap := make(map[string]string)
   390  		for _, label := range mf.GetMetric()[0].Label {
   391  			labelMap[label.GetName()] = label.GetValue()
   392  		}
   393  		if typeName != labelMap["type"] {
   394  			continue
   395  		}
   396  		if labelMap["name"] != checkName {
   397  			continue
   398  		}
   399  		if statusLabel, found := labelMap["status"]; found && statusLabel == HealthStatusError {
   400  			if val != expectedErrorCount {
   401  				t.Fatalf("%s got errorCount %d, wanted %d\n", name, val, expectedErrorCount)
   402  			}
   403  		} else {
   404  			if val != expectedSuccessCount {
   405  				t.Fatalf("%s got expectedSuccessCount %d, wanted %d\n", name, val, expectedSuccessCount)
   406  			}
   407  		}
   408  	}
   409  }
   410  

View as plain text