...

Source file src/github.com/GoogleCloudPlatform/k8s-config-connector/pkg/test/controller/reconcile.go

Documentation: github.com/GoogleCloudPlatform/k8s-config-connector/pkg/test/controller

     1  // Copyright 2022 Google LLC
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package testcontroller
    16  
    17  import (
    18  	"context"
    19  	"errors"
    20  	"fmt"
    21  	"log"
    22  	"math/rand"
    23  	"regexp"
    24  	"strings"
    25  	"sync"
    26  	"testing"
    27  	"time"
    28  
    29  	"github.com/GoogleCloudPlatform/k8s-config-connector/pkg/controller/kccmanager/nocache"
    30  	"github.com/GoogleCloudPlatform/k8s-config-connector/pkg/crd/crdloader"
    31  	"github.com/GoogleCloudPlatform/k8s-config-connector/pkg/k8s"
    32  	"github.com/GoogleCloudPlatform/k8s-config-connector/pkg/test"
    33  	testgcp "github.com/GoogleCloudPlatform/k8s-config-connector/pkg/test/gcp"
    34  	cnrmwebhook "github.com/GoogleCloudPlatform/k8s-config-connector/pkg/webhook"
    35  
    36  	corev1 "k8s.io/api/core/v1"
    37  	apiextensions "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1"
    38  	apierrors "k8s.io/apimachinery/pkg/api/errors"
    39  	v1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    40  	"k8s.io/apimachinery/pkg/types"
    41  	"k8s.io/klog/v2"
    42  	"sigs.k8s.io/controller-runtime/pkg/client"
    43  	"sigs.k8s.io/controller-runtime/pkg/envtest"
    44  	"sigs.k8s.io/controller-runtime/pkg/manager"
    45  	"sigs.k8s.io/controller-runtime/pkg/reconcile"
    46  	"sigs.k8s.io/controller-runtime/pkg/webhook"
    47  )
    48  
    49  const (
    50  	// transientErrorsMaxRetries sets the max number of retries on a transient error
    51  	transientErrorsMaxRetries = 5
    52  	// transientErrorsRetryInterval sets the interval between retries on a transient error
    53  	transientErrorsRetryInterval = 5 * time.Second
    54  )
    55  
    56  // StartTestManager begins a new test manager, and returns a function
    57  // to gracefully shutdown.
    58  func StartTestManagerInstance(env *envtest.Environment, testType test.TestType, whCfgs []cnrmwebhook.WebhookConfig) (manager.Manager, func()) {
    59  	mgr, stopFunc, err := startTestManager(env, testType, whCfgs)
    60  	if err != nil {
    61  		log.Fatal(err)
    62  	}
    63  	return mgr, stopFunc
    64  }
    65  
    66  func startTestManager(env *envtest.Environment, testType test.TestType, whCfgs []cnrmwebhook.WebhookConfig) (manager.Manager, func(), error) {
    67  	mgr, err := manager.New(env.Config, manager.Options{
    68  		Port:    env.WebhookInstallOptions.LocalServingPort,
    69  		Host:    env.WebhookInstallOptions.LocalServingHost,
    70  		CertDir: env.WebhookInstallOptions.LocalServingCertDir,
    71  		// supply a concrete client to disable the default behavior of caching
    72  		NewClient: nocache.NoCacheClientFunc,
    73  		// Disable metrics server for testing
    74  		MetricsBindAddress: "0",
    75  	})
    76  	if err != nil {
    77  		return nil, nil, fmt.Errorf("error creating manager: %v", err)
    78  	}
    79  	if testType == test.IntegrationTestType {
    80  		server := mgr.GetWebhookServer()
    81  		for _, cfg := range whCfgs {
    82  			server.Register(cfg.Path, &webhook.Admission{Handler: cfg.Handler})
    83  		}
    84  	}
    85  	stop := startMgr(mgr, log.Fatalf)
    86  	return mgr, stop, nil
    87  }
    88  
    89  func StartMgr(t *testing.T, mgr manager.Manager) func() {
    90  	return startMgr(mgr, t.Fatalf)
    91  }
    92  
    93  func startMgr(mgr manager.Manager, mgrStartErrHandler func(string, ...interface{})) func() {
    94  	ctx, cancel := context.WithCancel(context.TODO())
    95  	// it is important to wait for the below goroutine to terminate before attempting to exit the application because
    96  	// otherwise there can be panics and unexpected behavior while the manager is shutting down
    97  	wg := sync.WaitGroup{}
    98  	wg.Add(1)
    99  	go func() {
   100  		defer wg.Done()
   101  		if err := mgr.Start(ctx); err != nil {
   102  			mgrStartErrHandler("unable to start manager: %v", err)
   103  		}
   104  	}()
   105  	stop := func() {
   106  		// calling cancel() will cancel the context 'ctx', the mgr will stop all runnables and Start() will return and
   107  		// the above goroutine will exit
   108  		cancel()
   109  		// wait for the goroutine above to exit (it has a deferred wg.Done())
   110  		wg.Wait()
   111  	}
   112  	return stop
   113  }
   114  
   115  // isTransientError reports whether the reconciler error is a random "flake" and we should retry.
   116  func isTransientError(t *testing.T, err error) bool {
   117  	if err == nil {
   118  		return false
   119  	}
   120  
   121  	// Print the chain so we don't have to use string matching for future errors
   122  	var chain []string
   123  	current := err
   124  	for {
   125  		chain = append(chain, fmt.Sprintf("[%T: %+v]", current, current))
   126  		current = errors.Unwrap(current)
   127  		if current == nil {
   128  			break
   129  		}
   130  	}
   131  
   132  	errorMessage := err.Error()
   133  
   134  	// Permission denied errors are considered transient
   135  	// We don't know the exact error currently, use string matching for now...
   136  	//
   137  	// Example error:
   138  	// {"severity":"info","timestamp":"2022-12-06T20:27:32.799Z","logger":"iapidentityawareproxyclient-controller","msg":"creating/updating underlying resource","resource":{"namespace":"jcjjsgqldlbw7hcvseiq","name":"iapidentityawareproxyclient-jcjjsgqldlbw7hcvseiq"}}
   139  	// W1206 20:27:35.461665  113200 logger.go:58] [DCL WARNING] [RequestID:km5nd0fv]  get returned error: googleapi: Error 403: The caller does not have permission
   140  	// {"severity":"error","timestamp":"2022-12-06T20:27:35.461Z","logger":"iapidentityawareproxyclient-controller","msg":"error applying desired state","resource":{"namespace":"jcjjsgqldlbw7hcvseiq","name":"iapidentityawareproxyclient-jcjjsgqldlbw7hcvseiq"},"error":"googleapi: Error 403: The caller does not have permission"}
   141  	// dynamic_controller_integration_test.go:190: reconcile returned unexpected error: Update call failed: error applying desired state: googleapi: Error 403: The caller does not have permission
   142  	if strings.Contains(errorMessage, "The caller does not have permission") {
   143  		t.Logf("permission error found; considered transient; chain is %v", chain)
   144  		return true
   145  	}
   146  
   147  	// Internal errors are considered transient
   148  	// We don't know the exact error currently, use string matching for now...
   149  	//
   150  	// Example error:
   151  	// chain is [[*errors.errorString: Delete call failed: error deleting resource: [{0 Error when reading or editing Project Service projects/clienttls-244gvcgzxgegwhmfvqgq/services/: Error disabling service "networksecurity.googleapis.com" for project "clienttls-244gvcgzxgegwhmfvqgq": Error waiting for api to disable: Error code 13, message: [An internal exception occurred.  Help Token: AZWD64pDMtDdLt4XOiuQgfBiJS-s2K6hSHg4cKv6GBl2Wibfb_wEnkl8HZjT7unqZSibwlNEmXpHwJ3AFbmfidKSWtWc9CtNL15HcR53H0ETgtB8] with failed services [networksecurity.googleapis.com]  []}]]]
   152  	//testreconciler.go:96: reconcile returned unexpected error: Delete call failed: error deleting resource: [{0 Error when reading or editing Project Service projects/clienttls-244gvcgzxgegwhmfvqgq/services/: Error disabling service "networksecurity.googleapis.com" for project "clienttls-244gvcgzxgegwhmfvqgq": Error waiting for api to disable: Error code 13, message: [An internal exception occurred.
   153  	if strings.Contains(errorMessage, "An internal exception occurred") {
   154  		t.Logf("internal error found; considered transient; chain is %v", chain)
   155  		return true
   156  	}
   157  
   158  	// "is not ready" errors are considered transient
   159  	// We don't know the exact error currently, use string matching for now...
   160  	//
   161  	// Example error:
   162  	// reconcile.go:164: error was not considered transient; chain is [[*errors.errorString: Update call failed: error applying desired state: operation received error: error code "3", message: The resource 'projects/cnrm-test-mqtuo70y3lg3w1m7/regions/us-central1/subnetworks/default' is not ready, details: []
   163  	// details: map[]]]
   164  	if strings.Contains(errorMessage, "is not ready") {
   165  		t.Logf("internal error found; considered transient; chain is %v", chain)
   166  		return true
   167  	}
   168  
   169  	// "missing permission on" errors are considered transient
   170  	// We don't know the exact error currently, use string matching for now...
   171  	//
   172  	// Example error:
   173  	// reconcile.go:175: error was not considered transient; chain is [[*errors.errorString: Update call failed: error applying desired state: summary: failed pre-requisites: missing permission on "billingAccounts/0162D7-7B0CB6-ED962E": billing.resourceAssociations.create]]
   174  	if strings.Contains(errorMessage, "missing permission on") {
   175  		t.Logf("internal error found; considered transient; chain is %v", chain)
   176  		return true
   177  	}
   178  
   179  	// "Hook call/poll failed for service" errors are considered transient
   180  	// We don't know the exact error currently, use string matching for now...
   181  	//
   182  	// Example error:
   183  	// testreconciler.go:96: reconcile returned unexpected error: Delete call failed: error deleting resource: [{0 Error when reading or editing Project Service projects/clienttls-aaoksjdrfqbos22kkhaa/services/: Error disabling service "networksecurity.googleapis.com" for project "clienttls-aaoksjdrfqbos22kkhaa": Error waiting for api to disable: Error code 8, message: [Hook call/poll failed for service "networksecurity.googleapis.com".
   184  	// Help Token: AZWD64q7zyHTI4hHRS7MG0gHM4T8nMAXsiKCMAohWDFWVzK5BIZes3oQScpmnmkpTBlr0T9zldAZZuOWsjgv7BdRwGCGoOFdr2KqNqOarqlffbV3] with failed services [networksecurity.googleapis.com]  []}]
   185  	if strings.Contains(errorMessage, "Hook call/poll failed for service") {
   186  		t.Logf("internal error found; considered transient; chain is %v", chain)
   187  		return true
   188  	}
   189  
   190  	t.Logf("error was not considered transient; chain is %v", chain)
   191  	return false
   192  }
   193  
   194  // RunReconcilerAssertResults asserts the expected state of the reconciler run.
   195  func RunReconcilerAssertResults(t *testing.T, reconciler reconcile.Reconciler, objectMeta v1.ObjectMeta,
   196  	expectedResult reconcile.Result, expectedErrorRegex *regexp.Regexp) {
   197  	attempt := 0
   198  tryAgain:
   199  	attempt++
   200  	t.Helper()
   201  	reconcileRequest := reconcile.Request{NamespacedName: k8s.GetNamespacedName(objectMeta.GetObjectMeta())}
   202  	result, err := reconciler.Reconcile(context.Background(), reconcileRequest)
   203  
   204  	// Retry if we see a "transient" error (up to our retry limit)
   205  	if err != nil {
   206  		if isTransientError(t, err) {
   207  			if attempt < transientErrorsMaxRetries {
   208  				t.Logf("detected transient error, will retry: %v", err)
   209  				time.Sleep(transientErrorsRetryInterval)
   210  				goto tryAgain
   211  			} else {
   212  				t.Logf("detected transient error, but maximum number of retries reached: %v", err)
   213  			}
   214  		}
   215  	}
   216  
   217  	if expectedErrorRegex == nil {
   218  		if err != nil {
   219  			t.Fatalf("reconcile returned unexpected error: %v", err)
   220  		}
   221  	} else {
   222  		if err == nil || !expectedErrorRegex.MatchString(err.Error()) {
   223  			t.Fatalf("error '%v' does not match regex '%v'", err, expectedErrorRegex)
   224  		}
   225  	}
   226  	if !(requeueEqualAndRequeueAfterWithinBoundsOfMean(result, expectedResult)) {
   227  		t.Fatalf("reconcile result mismatch: got '%v', want within %v of '%v'", result, k8s.MeanReconcileReenqueuePeriod/2, expectedResult)
   228  	}
   229  }
   230  
   231  func GetCRDForKind(t *testing.T, kubeClient client.Client, kind string) *apiextensions.CustomResourceDefinition {
   232  	t.Helper()
   233  	c, err := crdloader.GetCRDForKind(kind)
   234  	if err != nil {
   235  		t.Fatal(err)
   236  	}
   237  	return c
   238  }
   239  
   240  func SetupNamespaceForDefaultProject(t *testing.T, c client.Client, name string) {
   241  	projectID := testgcp.GetDefaultProjectID(t)
   242  	SetupNamespaceForProject(t, c, name, projectID)
   243  }
   244  
   245  func SetupNamespaceForProject(t *testing.T, c client.Client, name, projectID string) {
   246  	EnsureNamespaceExistsT(t, c, name)
   247  	EnsureNamespaceHasProjectIDAnnotation(t, c, name, projectID)
   248  }
   249  
   250  func EnsureNamespaceExists(c client.Client, name string) error {
   251  	ns := &corev1.Namespace{}
   252  	ns.SetName(name)
   253  	if err := c.Create(context.Background(), ns); err != nil {
   254  		if !apierrors.IsAlreadyExists(err) {
   255  			return fmt.Errorf("error creating namespace %v: %v", name, err)
   256  		}
   257  	}
   258  	return nil
   259  }
   260  
   261  func EnsureNamespaceExistsT(t *testing.T, c client.Client, name string) {
   262  	t.Helper()
   263  	if err := EnsureNamespaceExists(c, name); err != nil {
   264  		t.Fatal(err)
   265  	}
   266  }
   267  
   268  func EnsureNamespaceHasProjectIDAnnotation(t *testing.T, c client.Client, namespaceName, projectId string) {
   269  	t.Helper()
   270  	err := createNamespaceProjectIdAnnotation(context.TODO(), c, namespaceName, projectId)
   271  	if err != nil {
   272  		t.Fatal(err)
   273  	}
   274  }
   275  
   276  func createNamespaceProjectIdAnnotation(ctx context.Context, c client.Client, namespaceName, projectId string) error {
   277  tryAgain:
   278  	attempt := 0
   279  	var ns corev1.Namespace
   280  	if err := c.Get(ctx, types.NamespacedName{Name: namespaceName}, &ns); err != nil {
   281  		return fmt.Errorf("error getting namespace %q: %w", namespaceName, err)
   282  	}
   283  	if val, ok := k8s.GetAnnotation(k8s.ProjectIDAnnotation, &ns); ok {
   284  		if val == projectId {
   285  			klog.Infof("namespace %q already has project id annotation value %q", namespaceName, projectId)
   286  			return nil
   287  		} else {
   288  			return fmt.Errorf("cannot set project id annotatation value to %q: the annotation already contained a value of %q",
   289  				projectId, val)
   290  		}
   291  	}
   292  	k8s.SetAnnotation(k8s.ProjectIDAnnotation, projectId, &ns)
   293  	err := c.Update(ctx, &ns)
   294  	if err != nil {
   295  		if apierrors.IsConflict(err) {
   296  			attempt++
   297  			if attempt < 10 {
   298  				klog.Warningf("detected concurrent modification error updating namespace %q, will retry", namespaceName)
   299  				time.Sleep(time.Millisecond * time.Duration(rand.Intn(1000)))
   300  				goto tryAgain
   301  			}
   302  		}
   303  		return fmt.Errorf("error setting project id on namespace %q: %w", namespaceName, err)
   304  	}
   305  	return nil
   306  }
   307  
   308  func requeueEqualAndRequeueAfterWithinBoundsOfMean(result reconcile.Result, expectedResult reconcile.Result) bool {
   309  	requeueEqual := result.Requeue == expectedResult.Requeue
   310  	lowerBound := expectedResult.RequeueAfter / 2
   311  	upperBound := expectedResult.RequeueAfter / 2 * 3
   312  	return requeueEqual && (result.RequeueAfter >= lowerBound && result.RequeueAfter < upperBound || result.RequeueAfter == 0 && expectedResult.RequeueAfter == 0)
   313  }
   314  

View as plain text