...

Source file src/edge-infra.dev/pkg/sds/patching/patchmanager/prechecks.go

Documentation: edge-infra.dev/pkg/sds/patching/patchmanager

     1  package patchmanager
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"os"
     7  	"path/filepath"
     8  	"slices"
     9  	"strings"
    10  
    11  	"github.com/hashicorp/go-version"
    12  	"github.com/spf13/viper"
    13  	"k8s.io/apimachinery/pkg/util/wait"
    14  
    15  	"github.com/spf13/afero"
    16  
    17  	"edge-infra.dev/pkg/k8s/runtime/conditions"
    18  	"edge-infra.dev/pkg/sds/lib/dbus/systemd"
    19  	v1 "edge-infra.dev/pkg/sds/patching/k8s/apis/ienpatch/v1"
    20  
    21  	"edge-infra.dev/pkg/sds/patching/common"
    22  )
    23  
    24  type envFile struct {
    25  	Current string
    26  	Target  string
    27  }
    28  
    29  func ValidateVersions(currentVer, targetVer string) error {
    30  	target, err := version.NewVersion(targetVer)
    31  	if err != nil {
    32  		return err
    33  	}
    34  	current, err := version.NewVersion(currentVer)
    35  	if err != nil {
    36  		return err
    37  	}
    38  
    39  	if target.Prerelease() == "" && target.LessThanOrEqual(current) {
    40  		return fmt.Errorf("Target version %s is lower than or equal to current version %s", targetVer, currentVer)
    41  	}
    42  
    43  	allowedVer := os.Getenv("SOURCE_VERSION")
    44  	if allowedVer != "" {
    45  		for _, ver := range strings.Split(allowedVer, ",") {
    46  			if ver == currentVer {
    47  				return nil
    48  			}
    49  		}
    50  		return fmt.Errorf("Source version is not in %s. Cannot proceed with upgrade", allowedVer)
    51  	}
    52  
    53  	if target.Segments()[0] == current.Segments()[0] && target.Segments()[1]-current.Segments()[1] > 2 {
    54  		return fmt.Errorf("Target version %s is not a valid upgrade from current version %s", targetVer, currentVer)
    55  	}
    56  
    57  	return nil
    58  }
    59  
    60  func checkLinksExist(targetVer string, cfg common.Config) error {
    61  	// TODO: Remove this version check
    62  	// 1.15.0, 1.15.1 and dev versions may not have live boot
    63  	bwcVer, _ := version.NewVersion("v1.16.0")
    64  	p := cfg.LiveBootPath
    65  	if target, err := version.NewVersion(targetVer); err != nil {
    66  		return err
    67  	} else if target.LessThan(bwcVer) {
    68  		p = cfg.LiveBootPath + "/casper"
    69  	}
    70  
    71  	path, err := filepath.EvalSymlinks(p)
    72  	if err != nil {
    73  		return err
    74  	}
    75  
    76  	if strings.Contains(path, "/"+targetVer+"/") {
    77  		return fmt.Errorf("New version symlinks already exists, patching risks corruption: %s", targetVer)
    78  	}
    79  	return nil
    80  }
    81  
    82  func readConfig(fs afero.Fs, filePath string) (envFile, error) {
    83  	var env envFile
    84  	file, err := fs.Open(filePath)
    85  	if err != nil {
    86  		return env, err
    87  	}
    88  
    89  	defer file.Close()
    90  
    91  	viper.SetConfigType("env")
    92  	if err = viper.ReadConfig(file); err != nil {
    93  		return env, err
    94  	}
    95  
    96  	err = viper.Unmarshal(&env)
    97  	return env, err
    98  }
    99  
   100  func validateEnvFile(fs afero.OsFs, env envFile, currentVer, targetVer string, cfg common.Config) error {
   101  	if currentVer == env.Current || targetVer != env.Target {
   102  		if err := remount(cfg.MountPath); err != nil {
   103  			return err
   104  		}
   105  		return fs.Remove(cfg.EnvFilePath)
   106  	}
   107  
   108  	if currentVer != env.Target {
   109  		return fmt.Errorf("Env file exists but IEN version does not match target version")
   110  	}
   111  
   112  	return nil
   113  }
   114  
   115  func getServiceStatus(ctx context.Context, service string) (string, error) {
   116  	conn, err := systemd.NewConnection(ctx)
   117  	if err != nil {
   118  		return "", err
   119  	}
   120  	defer conn.Close()
   121  
   122  	return conn.ActiveState(ctx, service)
   123  }
   124  
   125  func (p *PatchManager) checkUpgradeStatus() (v1.PatchStatus, error) {
   126  	exists, err := afero.Exists(p.Fs, p.Cfg.RebootPath)
   127  	if err != nil {
   128  		return v1.Reboot, fmt.Errorf("An error occurred while checking for a scheduled reboot: %w", err)
   129  	} else if exists {
   130  		p.Log.Info("Detected scheduled reboot. Not patching until complete")
   131  		return v1.Reboot, nil
   132  	}
   133  
   134  	exists, err = afero.Exists(p.Fs, p.Cfg.EnvFilePath)
   135  	if err != nil {
   136  		return v1.Failed, fmt.Errorf("An error occurred with an ongoing upgrade: %w", err)
   137  	} else if exists {
   138  		if err = p.waitForScriptsUpgrade(); err != nil {
   139  			return v1.Failed, fmt.Errorf("An error occurred with an ongoing upgrade: %w", err)
   140  		}
   141  		if p.CurrentVer == p.TargetVer {
   142  			return v1.Success, nil
   143  		}
   144  		return v1.Failed, fmt.Errorf("Post upgrade service is inactive but node is not at expected version")
   145  	}
   146  
   147  	if p.CurrentVer != p.TargetVer {
   148  		if err := checkLinksExist(p.TargetVer, p.Cfg); err != nil {
   149  			return v1.Failed, err
   150  		}
   151  		// Upgrade required, continue with upgrade flow
   152  		if err := ValidateVersions(p.CurrentVer, p.TargetVer); err != nil {
   153  			return v1.Retry, fmt.Errorf("Invalid versions: %w", err)
   154  		}
   155  		return v1.Pending, nil
   156  	}
   157  
   158  	state, err := getServiceStatus(p.Ctx, "zynstra-post-reboot-upgrade.service")
   159  	if err != nil {
   160  		return v1.Retry, fmt.Errorf("Failed to get post reboot upgrade status: %w", err)
   161  	}
   162  	if state == "active" {
   163  		p.Log.Info("Post upgrade service is still running yet patching.env does not exist. Race condition?")
   164  		return v1.Retry, nil
   165  	} else if state == "failed" {
   166  		// Likely remount failed
   167  		return v1.Failed, fmt.Errorf("Post upgrade service has failed - Check journal logs for zynstra-post-reboot-upgrade.service")
   168  	} else if state == "inactive" {
   169  		// Upgrade complete
   170  		return v1.Success, nil
   171  	}
   172  	return v1.Failed, fmt.Errorf("Post upgrade service is an unknown state: %s", state)
   173  }
   174  
   175  func (p *PatchManager) isNodeTargetted() bool {
   176  	if len(p.Ienpatch.Spec.NodeTargets) > 0 && !slices.Contains(p.Ienpatch.Spec.NodeTargets, p.HostName) {
   177  		return false
   178  	}
   179  	return true
   180  }
   181  
   182  // Wait for ienpatches CR for cp to change to ready
   183  func (p *PatchManager) waitForScriptsUpgrade() error {
   184  	env, err := readConfig(p.Fs, p.Cfg.EnvFilePath)
   185  	if err != nil {
   186  		return err
   187  	}
   188  
   189  	if err = validateEnvFile(p.Fs, env, p.CurrentVer, p.TargetVer, p.Cfg); err != nil {
   190  		return err
   191  	}
   192  	p.Log.Info("Env file contents", "current", env.Current, "target", env.Target)
   193  
   194  	var lastError error
   195  
   196  	err = wait.PollUntilContextCancel(p.Ctx, upgradeRetrySleep, true, func(ctx context.Context) (done bool, err error) {
   197  		state, lastError := getServiceStatus(ctx, "zynstra-post-reboot-upgrade.service")
   198  		if lastError != nil {
   199  			return false, fmt.Errorf("Waiting for post-reboot script upgrade, failed to get service status. Retrying: %w", lastError)
   200  		} else if state == "active" {
   201  			return false, nil
   202  		} else if state == "failed" {
   203  			return true, fmt.Errorf("Post upgrade service has failed - Check journal logs for zynstra-post-reboot-upgrade.service")
   204  		} else if state == "inactive" {
   205  			return true, nil
   206  		}
   207  		return true, fmt.Errorf("Post upgrade service is an unknown state: %s", state)
   208  	})
   209  	if lastError != nil {
   210  		return lastError
   211  	}
   212  	return err
   213  }
   214  
   215  func (p *PatchManager) DownloadComplete() (bool, error) {
   216  	artefactsPath := GetArtefactsPath(p.TargetVer, p.Cfg)
   217  
   218  	if exists, err := afero.Exists(p.Fs, filepath.Join(artefactsPath, ".complete")); err != nil || !exists {
   219  		return false, err
   220  	}
   221  	return true, nil
   222  }
   223  
   224  func (p *PatchManager) PrintRequest() {
   225  	p.Log.Info("Received patch request", "version", p.TargetVer, "node",
   226  		p.Ienpatch.Spec.NodeTargets, "downloadOnly", p.Ienpatch.Spec.DownloadOnly, "autoreboot", p.Ienpatch.Spec.AutoReboot, "disablePatchOrdering", p.Ienpatch.Spec.DisablePatchOrdering)
   227  }
   228  
   229  func (p *PatchManager) PreChecks() (v1.PatchStatus, error) {
   230  	p.PrintRequest()
   231  
   232  	if !p.isNodeTargetted() {
   233  		p.Log.Info(fmt.Sprintf("Nodename %s not in target nodes: %s", p.HostName, p.Ienpatch.Spec.NodeTargets))
   234  		return v1.Pending, nil
   235  	}
   236  
   237  	upgradeStatus, err := p.checkUpgradeStatus()
   238  	switch upgradeStatus {
   239  	case v1.Success:
   240  		p.logSuccess()
   241  		return upgradeStatus, nil
   242  	case v1.Failed:
   243  		return upgradeStatus, fmt.Errorf("Pre-upgrade checks failed: %w", err)
   244  	case v1.Retry:
   245  		return upgradeStatus, fmt.Errorf("Retrying pre-upgrade checks %w", err)
   246  	case v1.Reboot:
   247  		return upgradeStatus, nil
   248  	case v1.Pending:
   249  		p.Log.Info("Patching has started", "Current Version", p.CurrentVer, "Target Version", p.TargetVer)
   250  		return upgradeStatus, nil
   251  	default:
   252  		return v1.Retry, fmt.Errorf("returned v1.PatchStatus is not matching any case in switch statement: %d %w", upgradeStatus, err)
   253  	}
   254  }
   255  
   256  func (p *PatchManager) logSuccess() {
   257  	condition := conditions.Get(p.Ienpatch, p.HostName)
   258  	if condition == nil {
   259  		p.Log.Info("No conditions with type matching " + p.HostName)
   260  		return
   261  	}
   262  	successMessage := fmt.Sprintf("Successful upgrade for %s to %s", p.HostName, p.TargetVer)
   263  	if condition.Message == successMessage {
   264  		p.Log.Info("Patching has already completed for " + p.TargetVer)
   265  	} else {
   266  		p.Log.Info("Post upgrade service complete. Upgrade complete for " + p.TargetVer)
   267  	}
   268  }
   269  

View as plain text