package patchmanager import ( "context" "fmt" "os" "path/filepath" "slices" "strings" "github.com/hashicorp/go-version" "github.com/spf13/viper" "k8s.io/apimachinery/pkg/util/wait" "github.com/spf13/afero" "edge-infra.dev/pkg/k8s/runtime/conditions" "edge-infra.dev/pkg/sds/lib/dbus/systemd" v1 "edge-infra.dev/pkg/sds/patching/k8s/apis/ienpatch/v1" "edge-infra.dev/pkg/sds/patching/common" ) type envFile struct { Current string Target string } func ValidateVersions(currentVer, targetVer string) error { target, err := version.NewVersion(targetVer) if err != nil { return err } current, err := version.NewVersion(currentVer) if err != nil { return err } if target.Prerelease() == "" && target.LessThanOrEqual(current) { return fmt.Errorf("Target version %s is lower than or equal to current version %s", targetVer, currentVer) } allowedVer := os.Getenv("SOURCE_VERSION") if allowedVer != "" { for _, ver := range strings.Split(allowedVer, ",") { if ver == currentVer { return nil } } return fmt.Errorf("Source version is not in %s. Cannot proceed with upgrade", allowedVer) } if target.Segments()[0] == current.Segments()[0] && target.Segments()[1]-current.Segments()[1] > 2 { return fmt.Errorf("Target version %s is not a valid upgrade from current version %s", targetVer, currentVer) } return nil } func checkLinksExist(targetVer string, cfg common.Config) error { // TODO: Remove this version check // 1.15.0, 1.15.1 and dev versions may not have live boot bwcVer, _ := version.NewVersion("v1.16.0") p := cfg.LiveBootPath if target, err := version.NewVersion(targetVer); err != nil { return err } else if target.LessThan(bwcVer) { p = cfg.LiveBootPath + "/casper" } path, err := filepath.EvalSymlinks(p) if err != nil { return err } if strings.Contains(path, "/"+targetVer+"/") { return fmt.Errorf("New version symlinks already exists, patching risks corruption: %s", targetVer) } return nil } func readConfig(fs afero.Fs, filePath string) (envFile, error) { var env envFile file, err := fs.Open(filePath) if err != nil { return env, err } defer file.Close() viper.SetConfigType("env") if err = viper.ReadConfig(file); err != nil { return env, err } err = viper.Unmarshal(&env) return env, err } func validateEnvFile(fs afero.OsFs, env envFile, currentVer, targetVer string, cfg common.Config) error { if currentVer == env.Current || targetVer != env.Target { if err := remount(cfg.MountPath); err != nil { return err } return fs.Remove(cfg.EnvFilePath) } if currentVer != env.Target { return fmt.Errorf("Env file exists but IEN version does not match target version") } return nil } func getServiceStatus(ctx context.Context, service string) (string, error) { conn, err := systemd.NewConnection(ctx) if err != nil { return "", err } defer conn.Close() return conn.ActiveState(ctx, service) } func (p *PatchManager) checkUpgradeStatus() (v1.PatchStatus, error) { exists, err := afero.Exists(p.Fs, p.Cfg.RebootPath) if err != nil { return v1.Reboot, fmt.Errorf("An error occurred while checking for a scheduled reboot: %w", err) } else if exists { p.Log.Info("Detected scheduled reboot. Not patching until complete") return v1.Reboot, nil } exists, err = afero.Exists(p.Fs, p.Cfg.EnvFilePath) if err != nil { return v1.Failed, fmt.Errorf("An error occurred with an ongoing upgrade: %w", err) } else if exists { if err = p.waitForScriptsUpgrade(); err != nil { return v1.Failed, fmt.Errorf("An error occurred with an ongoing upgrade: %w", err) } if p.CurrentVer == p.TargetVer { return v1.Success, nil } return v1.Failed, fmt.Errorf("Post upgrade service is inactive but node is not at expected version") } if p.CurrentVer != p.TargetVer { if err := checkLinksExist(p.TargetVer, p.Cfg); err != nil { return v1.Failed, err } // Upgrade required, continue with upgrade flow if err := ValidateVersions(p.CurrentVer, p.TargetVer); err != nil { return v1.Retry, fmt.Errorf("Invalid versions: %w", err) } return v1.Pending, nil } state, err := getServiceStatus(p.Ctx, "zynstra-post-reboot-upgrade.service") if err != nil { return v1.Retry, fmt.Errorf("Failed to get post reboot upgrade status: %w", err) } if state == "active" { p.Log.Info("Post upgrade service is still running yet patching.env does not exist. Race condition?") return v1.Retry, nil } else if state == "failed" { // Likely remount failed return v1.Failed, fmt.Errorf("Post upgrade service has failed - Check journal logs for zynstra-post-reboot-upgrade.service") } else if state == "inactive" { // Upgrade complete return v1.Success, nil } return v1.Failed, fmt.Errorf("Post upgrade service is an unknown state: %s", state) } func (p *PatchManager) isNodeTargetted() bool { if len(p.Ienpatch.Spec.NodeTargets) > 0 && !slices.Contains(p.Ienpatch.Spec.NodeTargets, p.HostName) { return false } return true } // Wait for ienpatches CR for cp to change to ready func (p *PatchManager) waitForScriptsUpgrade() error { env, err := readConfig(p.Fs, p.Cfg.EnvFilePath) if err != nil { return err } if err = validateEnvFile(p.Fs, env, p.CurrentVer, p.TargetVer, p.Cfg); err != nil { return err } p.Log.Info("Env file contents", "current", env.Current, "target", env.Target) var lastError error err = wait.PollUntilContextCancel(p.Ctx, upgradeRetrySleep, true, func(ctx context.Context) (done bool, err error) { state, lastError := getServiceStatus(ctx, "zynstra-post-reboot-upgrade.service") if lastError != nil { return false, fmt.Errorf("Waiting for post-reboot script upgrade, failed to get service status. Retrying: %w", lastError) } else if state == "active" { return false, nil } else if state == "failed" { return true, fmt.Errorf("Post upgrade service has failed - Check journal logs for zynstra-post-reboot-upgrade.service") } else if state == "inactive" { return true, nil } return true, fmt.Errorf("Post upgrade service is an unknown state: %s", state) }) if lastError != nil { return lastError } return err } func (p *PatchManager) DownloadComplete() (bool, error) { artefactsPath := GetArtefactsPath(p.TargetVer, p.Cfg) if exists, err := afero.Exists(p.Fs, filepath.Join(artefactsPath, ".complete")); err != nil || !exists { return false, err } return true, nil } func (p *PatchManager) PrintRequest() { p.Log.Info("Received patch request", "version", p.TargetVer, "node", p.Ienpatch.Spec.NodeTargets, "downloadOnly", p.Ienpatch.Spec.DownloadOnly, "autoreboot", p.Ienpatch.Spec.AutoReboot, "disablePatchOrdering", p.Ienpatch.Spec.DisablePatchOrdering) } func (p *PatchManager) PreChecks() (v1.PatchStatus, error) { p.PrintRequest() if !p.isNodeTargetted() { p.Log.Info(fmt.Sprintf("Nodename %s not in target nodes: %s", p.HostName, p.Ienpatch.Spec.NodeTargets)) return v1.Pending, nil } upgradeStatus, err := p.checkUpgradeStatus() switch upgradeStatus { case v1.Success: p.logSuccess() return upgradeStatus, nil case v1.Failed: return upgradeStatus, fmt.Errorf("Pre-upgrade checks failed: %w", err) case v1.Retry: return upgradeStatus, fmt.Errorf("Retrying pre-upgrade checks %w", err) case v1.Reboot: return upgradeStatus, nil case v1.Pending: p.Log.Info("Patching has started", "Current Version", p.CurrentVer, "Target Version", p.TargetVer) return upgradeStatus, nil default: return v1.Retry, fmt.Errorf("returned v1.PatchStatus is not matching any case in switch statement: %d %w", upgradeStatus, err) } } func (p *PatchManager) logSuccess() { condition := conditions.Get(p.Ienpatch, p.HostName) if condition == nil { p.Log.Info("No conditions with type matching " + p.HostName) return } successMessage := fmt.Sprintf("Successful upgrade for %s to %s", p.HostName, p.TargetVer) if condition.Message == successMessage { p.Log.Info("Patching has already completed for " + p.TargetVer) } else { p.Log.Info("Post upgrade service complete. Upgrade complete for " + p.TargetVer) } }