...

Source file src/edge-infra.dev/pkg/f8n/warehouse/k8s/controllers/lumperctl/internal/liveness.go

Documentation: edge-infra.dev/pkg/f8n/warehouse/k8s/controllers/lumperctl/internal

     1  package internal
     2  
     3  import (
     4  	"errors"
     5  	"fmt"
     6  	"strings"
     7  
     8  	"net/http"
     9  
    10  	"edge-infra.dev/pkg/k8s/runtime/conditions"
    11  
    12  	"github.com/go-logr/logr"
    13  )
    14  
    15  // maxIterations is the total number of statuses to keep in the LivenessChecker
    16  // struct. Ex. on iteration 101, the status at index 0 will be replaced and the values
    17  // will shift "left" once with the 101st status occupying the last value of the
    18  // array.
    19  // errThreshold is the count of errors that needs to be reached in order for the
    20  // Status function to return an error.
    21  const (
    22  	maxIterations = 100
    23  	errThreshold  = 95
    24  )
    25  
    26  // knownUnrecoverableErrs contains substrings that can be used to check for known errors
    27  // that should increment the count of statuses that contribute to the error threshold.
    28  var knownUnrecoverableErrs = []string{
    29  	"no matching credentials were found",
    30  }
    31  
    32  type livenessStatus struct {
    33  	err     error
    34  	objName string
    35  }
    36  
    37  // LivenessChecker implements the logic for the lumper livenesss probe.
    38  // Each time the lumper controller Reconcile loop executes, the object name and error
    39  // are passed to the checker.
    40  //
    41  // Right now, the only logger that is checked is the go-containerregistry which will
    42  // be extended in the future as needed
    43  //
    44  // If an error is passed, that status will contribute towards the errThreshold value
    45  // when the checker status is checked. If the count of statuses with errors exceeds
    46  // the errThreshold value once the maxIterations is reached, the Status function will
    47  // report an error and the Check function will return an error. If the Pod receives an
    48  // error from the checker, Kubernetes will restart the Pod.
    49  type LivenessChecker struct {
    50  	statuses      []livenessStatus
    51  	logger        logr.Logger
    52  	currIteration int
    53  }
    54  
    55  // New takes an external logr.Logger and initializes the checker struct with the logger
    56  // and statuses array with a length of maxIterations.
    57  func New(l logr.Logger) *LivenessChecker {
    58  	return &LivenessChecker{
    59  		statuses: make([]livenessStatus, maxIterations),
    60  		logger:   l,
    61  	}
    62  }
    63  
    64  // The AddStatus function is called from lumper controllers and is used to track the
    65  // statuses the controllers find
    66  func (lc *LivenessChecker) AddStatus(obj conditions.Setter, err error) {
    67  	if err != nil && strings.Contains(strings.ToLower(err.Error()), "go-containerregistry") {
    68  		for _, errSubstr := range knownUnrecoverableErrs {
    69  			if strings.Contains(errSubstr, err.Error()) {
    70  				unrecoverableErrMsg := fmt.Sprintf("found unrecoverable error: %s", err)
    71  				lc.logger.Error(err, unrecoverableErrMsg)
    72  			}
    73  		}
    74  	}
    75  
    76  	ls := livenessStatus{
    77  		err:     err,
    78  		objName: obj.GetName(),
    79  	}
    80  
    81  	lc.statuses[lc.currIteration%maxIterations] = ls
    82  
    83  	lc.currIteration++
    84  }
    85  
    86  // The Status method evaluates if the lumper controller is in error and should be restarted
    87  // if the count of statuses with errors exceeds the value of errThreshold.
    88  func (lc *LivenessChecker) Status() error {
    89  	errCount := 0
    90  	var err error
    91  	for _, status := range lc.statuses {
    92  		if status.err != nil {
    93  			errCount++
    94  		}
    95  		if errCount == errThreshold {
    96  			err = status.err
    97  
    98  			lc.logger.Error(err, "unrecoverable error in lumperctl liveness")
    99  			return err
   100  		}
   101  	}
   102  
   103  	return nil
   104  }
   105  
   106  // Check implements the kubernetes Checker interface so the liveness checker can be
   107  // added as a liveness probe.
   108  func (lc *LivenessChecker) Check(req *http.Request) error {
   109  	if req.Method != http.MethodGet {
   110  		return errors.New("please use http GET for liveness status")
   111  	}
   112  
   113  	return lc.Status()
   114  }
   115  

View as plain text