...

Source file src/google.golang.org/grpc/xds/internal/balancer/outlierdetection/e2e_test/outlierdetection_test.go

Documentation: google.golang.org/grpc/xds/internal/balancer/outlierdetection/e2e_test

     1  /*
     2   *
     3   * Copyright 2022 gRPC authors.
     4   *
     5   * Licensed under the Apache License, Version 2.0 (the "License");
     6   * you may not use this file except in compliance with the License.
     7   * You may obtain a copy of the License at
     8   *
     9   *     http://www.apache.org/licenses/LICENSE-2.0
    10   *
    11   * Unless required by applicable law or agreed to in writing, software
    12   * distributed under the License is distributed on an "AS IS" BASIS,
    13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14   * See the License for the specific language governing permissions and
    15   * limitations under the License.
    16   *
    17   */
    18  
    19  // Package e2e_test contains e2e test cases for the Outlier Detection LB Policy.
    20  package e2e_test
    21  
    22  import (
    23  	"context"
    24  	"errors"
    25  	"fmt"
    26  	"testing"
    27  	"time"
    28  
    29  	"github.com/google/go-cmp/cmp"
    30  	"google.golang.org/grpc"
    31  	"google.golang.org/grpc/credentials/insecure"
    32  	"google.golang.org/grpc/internal"
    33  	"google.golang.org/grpc/internal/grpctest"
    34  	"google.golang.org/grpc/internal/stubserver"
    35  	"google.golang.org/grpc/peer"
    36  	"google.golang.org/grpc/resolver"
    37  	"google.golang.org/grpc/resolver/manual"
    38  	"google.golang.org/grpc/serviceconfig"
    39  
    40  	testgrpc "google.golang.org/grpc/interop/grpc_testing"
    41  	testpb "google.golang.org/grpc/interop/grpc_testing"
    42  
    43  	_ "google.golang.org/grpc/xds/internal/balancer/outlierdetection" // To register helper functions which register/unregister Outlier Detection LB Policy.
    44  )
    45  
    46  var defaultTestTimeout = 5 * time.Second
    47  
    48  type s struct {
    49  	grpctest.Tester
    50  }
    51  
    52  func Test(t *testing.T) {
    53  	grpctest.RunSubTests(t, s{})
    54  }
    55  
    56  // Setup spins up three test backends, each listening on a port on localhost.
    57  // Two of the backends are configured to always reply with an empty response and
    58  // no error and one is configured to always return an error.
    59  func setupBackends(t *testing.T) ([]string, func()) {
    60  	t.Helper()
    61  
    62  	backends := make([]*stubserver.StubServer, 3)
    63  	addresses := make([]string, 3)
    64  	// Construct and start 2 working backends.
    65  	for i := 0; i < 2; i++ {
    66  		backend := &stubserver.StubServer{
    67  			EmptyCallF: func(ctx context.Context, in *testpb.Empty) (*testpb.Empty, error) {
    68  				return &testpb.Empty{}, nil
    69  			},
    70  		}
    71  		if err := backend.StartServer(); err != nil {
    72  			t.Fatalf("Failed to start backend: %v", err)
    73  		}
    74  		t.Logf("Started good TestService backend at: %q", backend.Address)
    75  		backends[i] = backend
    76  		addresses[i] = backend.Address
    77  	}
    78  
    79  	// Construct and start a failing backend.
    80  	backend := &stubserver.StubServer{
    81  		EmptyCallF: func(ctx context.Context, in *testpb.Empty) (*testpb.Empty, error) {
    82  			return nil, errors.New("some error")
    83  		},
    84  	}
    85  	if err := backend.StartServer(); err != nil {
    86  		t.Fatalf("Failed to start backend: %v", err)
    87  	}
    88  	t.Logf("Started bad TestService backend at: %q", backend.Address)
    89  	backends[2] = backend
    90  	addresses[2] = backend.Address
    91  	cancel := func() {
    92  		for _, backend := range backends {
    93  			backend.Stop()
    94  		}
    95  	}
    96  	return addresses, cancel
    97  }
    98  
    99  // checkRoundRobinRPCs verifies that EmptyCall RPCs on the given ClientConn,
   100  // connected to a server exposing the test.grpc_testing.TestService, are
   101  // roundrobined across the given backend addresses.
   102  //
   103  // Returns a non-nil error if context deadline expires before RPCs start to get
   104  // roundrobined across the given backends.
   105  func checkRoundRobinRPCs(ctx context.Context, client testgrpc.TestServiceClient, addrs []resolver.Address) error {
   106  	wantAddrCount := make(map[string]int)
   107  	for _, addr := range addrs {
   108  		wantAddrCount[addr.Addr]++
   109  	}
   110  	gotAddrCount := make(map[string]int)
   111  	for ; ctx.Err() == nil; <-time.After(time.Millisecond) {
   112  		gotAddrCount = make(map[string]int)
   113  		// Perform 3 iterations.
   114  		var iterations [][]string
   115  		for i := 0; i < 3; i++ {
   116  			iteration := make([]string, len(addrs))
   117  			for c := 0; c < len(addrs); c++ {
   118  				var peer peer.Peer
   119  				client.EmptyCall(ctx, &testpb.Empty{}, grpc.Peer(&peer))
   120  				if peer.Addr != nil {
   121  					iteration[c] = peer.Addr.String()
   122  				}
   123  			}
   124  			iterations = append(iterations, iteration)
   125  		}
   126  		// Ensure the first iteration contains all addresses in addrs.
   127  		for _, addr := range iterations[0] {
   128  			gotAddrCount[addr]++
   129  		}
   130  		if diff := cmp.Diff(gotAddrCount, wantAddrCount); diff != "" {
   131  			continue
   132  		}
   133  		// Ensure all three iterations contain the same addresses.
   134  		if !cmp.Equal(iterations[0], iterations[1]) || !cmp.Equal(iterations[0], iterations[2]) {
   135  			continue
   136  		}
   137  		return nil
   138  	}
   139  	return fmt.Errorf("timeout when waiting for roundrobin distribution of RPCs across addresses: %v; got: %v", addrs, gotAddrCount)
   140  }
   141  
   142  // TestOutlierDetectionAlgorithmsE2E tests the Outlier Detection Success Rate
   143  // and Failure Percentage algorithms in an e2e fashion. The Outlier Detection
   144  // Balancer is configured as the top level LB Policy of the channel with a Round
   145  // Robin child, and connects to three upstreams. Two of the upstreams are healthy and
   146  // one is unhealthy. The two algorithms should at some point eject the failing
   147  // upstream, causing RPC's to not be routed to that upstream, and only be
   148  // Round Robined across the two healthy upstreams. Other than the intervals the
   149  // unhealthy upstream is ejected, RPC's should regularly round robin
   150  // across all three upstreams.
   151  func (s) TestOutlierDetectionAlgorithmsE2E(t *testing.T) {
   152  	tests := []struct {
   153  		name     string
   154  		odscJSON string
   155  	}{
   156  		{
   157  			name: "Success Rate Algorithm",
   158  			odscJSON: `
   159  {
   160    "loadBalancingConfig": [
   161      {
   162        "outlier_detection_experimental": {
   163          "interval": "0.050s",
   164  		"baseEjectionTime": "0.100s",
   165  		"maxEjectionTime": "300s",
   166  		"maxEjectionPercent": 33,
   167  		"successRateEjection": {
   168  			"stdevFactor": 50,
   169  			"enforcementPercentage": 100,
   170  			"minimumHosts": 3,
   171  			"requestVolume": 5
   172  		},
   173          "childPolicy": [{"round_robin": {}}]
   174        }
   175      }
   176    ]
   177  }`,
   178  		},
   179  		{
   180  			name: "Failure Percentage Algorithm",
   181  			odscJSON: `
   182  {
   183    "loadBalancingConfig": [
   184      {
   185        "outlier_detection_experimental": {
   186          "interval": "0.050s",
   187  		"baseEjectionTime": "0.100s",
   188  		"maxEjectionTime": "300s",
   189  		"maxEjectionPercent": 33,
   190  		"failurePercentageEjection": {
   191  			"threshold": 50,
   192  			"enforcementPercentage": 100,
   193  			"minimumHosts": 3,
   194  			"requestVolume": 5
   195  		},
   196          "childPolicy": [{"round_robin": {}}
   197  		]
   198        }
   199      }
   200    ]
   201  }`,
   202  		},
   203  	}
   204  	for _, test := range tests {
   205  		t.Run(test.name, func(t *testing.T) {
   206  			addresses, cancel := setupBackends(t)
   207  			defer cancel()
   208  
   209  			mr := manual.NewBuilderWithScheme("od-e2e")
   210  			defer mr.Close()
   211  
   212  			sc := internal.ParseServiceConfig.(func(string) *serviceconfig.ParseResult)(test.odscJSON)
   213  			// The full list of addresses.
   214  			fullAddresses := []resolver.Address{
   215  				{Addr: addresses[0]},
   216  				{Addr: addresses[1]},
   217  				{Addr: addresses[2]},
   218  			}
   219  			mr.InitialState(resolver.State{
   220  				Addresses:     fullAddresses,
   221  				ServiceConfig: sc,
   222  			})
   223  
   224  			cc, err := grpc.NewClient(mr.Scheme()+":///", grpc.WithResolvers(mr), grpc.WithTransportCredentials(insecure.NewCredentials()))
   225  			if err != nil {
   226  				t.Fatalf("grpc.NewClient() failed: %v", err)
   227  			}
   228  			defer cc.Close()
   229  			ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
   230  			defer cancel()
   231  			testServiceClient := testgrpc.NewTestServiceClient(cc)
   232  
   233  			// At first, due to no statistics on each of the backends, the 3
   234  			// upstreams should all be round robined across.
   235  			if err = checkRoundRobinRPCs(ctx, testServiceClient, fullAddresses); err != nil {
   236  				t.Fatalf("error in expected round robin: %v", err)
   237  			}
   238  
   239  			// The addresses which don't return errors.
   240  			okAddresses := []resolver.Address{
   241  				{Addr: addresses[0]},
   242  				{Addr: addresses[1]},
   243  			}
   244  			// After calling the three upstreams, one of them constantly error
   245  			// and should eventually be ejected for a period of time. This
   246  			// period of time should cause the RPC's to be round robined only
   247  			// across the two that are healthy.
   248  			if err = checkRoundRobinRPCs(ctx, testServiceClient, okAddresses); err != nil {
   249  				t.Fatalf("error in expected round robin: %v", err)
   250  			}
   251  
   252  			// The failing upstream isn't ejected indefinitely, and eventually
   253  			// should be unejected in subsequent iterations of the interval
   254  			// algorithm as per the spec for the two specific algorithms.
   255  			if err = checkRoundRobinRPCs(ctx, testServiceClient, fullAddresses); err != nil {
   256  				t.Fatalf("error in expected round robin: %v", err)
   257  			}
   258  		})
   259  	}
   260  }
   261  
   262  // TestNoopConfiguration tests the Outlier Detection Balancer configured with a
   263  // noop configuration. The noop configuration should cause the Outlier Detection
   264  // Balancer to not count RPC's, and thus never eject any upstreams and continue
   265  // to route to every upstream connected to, even if they continuously error.
   266  // Once the Outlier Detection Balancer gets reconfigured with configuration
   267  // requiring counting RPC's, the Outlier Detection Balancer should start
   268  // ejecting any upstreams as specified in the configuration.
   269  func (s) TestNoopConfiguration(t *testing.T) {
   270  	addresses, cancel := setupBackends(t)
   271  	defer cancel()
   272  
   273  	mr := manual.NewBuilderWithScheme("od-e2e")
   274  	defer mr.Close()
   275  
   276  	noopODServiceConfigJSON := `
   277  {
   278    "loadBalancingConfig": [
   279      {
   280        "outlier_detection_experimental": {
   281          "interval": "0.050s",
   282  		"baseEjectionTime": "0.100s",
   283  		"maxEjectionTime": "300s",
   284  		"maxEjectionPercent": 33,
   285          "childPolicy": [{"round_robin": {}}]
   286        }
   287      }
   288    ]
   289  }`
   290  	sc := internal.ParseServiceConfig.(func(string) *serviceconfig.ParseResult)(noopODServiceConfigJSON)
   291  	// The full list of addresses.
   292  	fullAddresses := []resolver.Address{
   293  		{Addr: addresses[0]},
   294  		{Addr: addresses[1]},
   295  		{Addr: addresses[2]},
   296  	}
   297  	mr.InitialState(resolver.State{
   298  		Addresses:     fullAddresses,
   299  		ServiceConfig: sc,
   300  	})
   301  	cc, err := grpc.NewClient(mr.Scheme()+":///", grpc.WithResolvers(mr), grpc.WithTransportCredentials(insecure.NewCredentials()))
   302  	if err != nil {
   303  		t.Fatalf("grpc.NewClient() failed: %v", err)
   304  	}
   305  	defer cc.Close()
   306  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
   307  	defer cancel()
   308  	testServiceClient := testgrpc.NewTestServiceClient(cc)
   309  
   310  	for i := 0; i < 2; i++ {
   311  		// Since the Outlier Detection Balancer starts with a noop
   312  		// configuration, it shouldn't count RPCs or eject any upstreams. Thus,
   313  		// even though an upstream it connects to constantly errors, it should
   314  		// continue to Round Robin across every upstream.
   315  		if err := checkRoundRobinRPCs(ctx, testServiceClient, fullAddresses); err != nil {
   316  			t.Fatalf("error in expected round robin: %v", err)
   317  		}
   318  	}
   319  
   320  	// Reconfigure the Outlier Detection Balancer with a configuration that
   321  	// specifies to count RPC's and eject upstreams. Due to the balancer no
   322  	// longer being a noop, it should eject any unhealthy addresses as specified
   323  	// by the failure percentage portion of the configuration.
   324  	countingODServiceConfigJSON := `
   325  {
   326    "loadBalancingConfig": [
   327      {
   328        "outlier_detection_experimental": {
   329          "interval": "0.050s",
   330  		"baseEjectionTime": "0.100s",
   331  		"maxEjectionTime": "300s",
   332  		"maxEjectionPercent": 33,
   333  		"failurePercentageEjection": {
   334  			"threshold": 50,
   335  			"enforcementPercentage": 100,
   336  			"minimumHosts": 3,
   337  			"requestVolume": 5
   338  		},
   339          "childPolicy": [{"round_robin": {}}]
   340        }
   341      }
   342    ]
   343  }`
   344  	sc = internal.ParseServiceConfig.(func(string) *serviceconfig.ParseResult)(countingODServiceConfigJSON)
   345  
   346  	mr.UpdateState(resolver.State{
   347  		Addresses:     fullAddresses,
   348  		ServiceConfig: sc,
   349  	})
   350  
   351  	// At first on the reconfigured balancer, the balancer has no stats
   352  	// collected about upstreams. Thus, it should at first route across the full
   353  	// upstream list.
   354  	if err = checkRoundRobinRPCs(ctx, testServiceClient, fullAddresses); err != nil {
   355  		t.Fatalf("error in expected round robin: %v", err)
   356  	}
   357  
   358  	// The addresses which don't return errors.
   359  	okAddresses := []resolver.Address{
   360  		{Addr: addresses[0]},
   361  		{Addr: addresses[1]},
   362  	}
   363  	// Now that the reconfigured balancer has data about the failing upstream,
   364  	// it should eject the upstream and only route across the two healthy
   365  	// upstreams.
   366  	if err = checkRoundRobinRPCs(ctx, testServiceClient, okAddresses); err != nil {
   367  		t.Fatalf("error in expected round robin: %v", err)
   368  	}
   369  }
   370  

View as plain text