package internal import ( "errors" "fmt" "strings" "net/http" "edge-infra.dev/pkg/k8s/runtime/conditions" "github.com/go-logr/logr" ) // maxIterations is the total number of statuses to keep in the LivenessChecker // struct. Ex. on iteration 101, the status at index 0 will be replaced and the values // will shift "left" once with the 101st status occupying the last value of the // array. // errThreshold is the count of errors that needs to be reached in order for the // Status function to return an error. const ( maxIterations = 100 errThreshold = 95 ) // knownUnrecoverableErrs contains substrings that can be used to check for known errors // that should increment the count of statuses that contribute to the error threshold. var knownUnrecoverableErrs = []string{ "no matching credentials were found", } type livenessStatus struct { err error objName string } // LivenessChecker implements the logic for the lumper livenesss probe. // Each time the lumper controller Reconcile loop executes, the object name and error // are passed to the checker. // // Right now, the only logger that is checked is the go-containerregistry which will // be extended in the future as needed // // If an error is passed, that status will contribute towards the errThreshold value // when the checker status is checked. If the count of statuses with errors exceeds // the errThreshold value once the maxIterations is reached, the Status function will // report an error and the Check function will return an error. If the Pod receives an // error from the checker, Kubernetes will restart the Pod. type LivenessChecker struct { statuses []livenessStatus logger logr.Logger currIteration int } // New takes an external logr.Logger and initializes the checker struct with the logger // and statuses array with a length of maxIterations. func New(l logr.Logger) *LivenessChecker { return &LivenessChecker{ statuses: make([]livenessStatus, maxIterations), logger: l, } } // The AddStatus function is called from lumper controllers and is used to track the // statuses the controllers find func (lc *LivenessChecker) AddStatus(obj conditions.Setter, err error) { if err != nil && strings.Contains(strings.ToLower(err.Error()), "go-containerregistry") { for _, errSubstr := range knownUnrecoverableErrs { if strings.Contains(errSubstr, err.Error()) { unrecoverableErrMsg := fmt.Sprintf("found unrecoverable error: %s", err) lc.logger.Error(err, unrecoverableErrMsg) } } } ls := livenessStatus{ err: err, objName: obj.GetName(), } lc.statuses[lc.currIteration%maxIterations] = ls lc.currIteration++ } // The Status method evaluates if the lumper controller is in error and should be restarted // if the count of statuses with errors exceeds the value of errThreshold. func (lc *LivenessChecker) Status() error { errCount := 0 var err error for _, status := range lc.statuses { if status.err != nil { errCount++ } if errCount == errThreshold { err = status.err lc.logger.Error(err, "unrecoverable error in lumperctl liveness") return err } } return nil } // Check implements the kubernetes Checker interface so the liveness checker can be // added as a liveness probe. func (lc *LivenessChecker) Check(req *http.Request) error { if req.Method != http.MethodGet { return errors.New("please use http GET for liveness status") } return lc.Status() }