...

Source file src/edge-infra.dev/cmd/edge/monitoring/alertman/cli/sync.go

Documentation: edge-infra.dev/cmd/edge/monitoring/alertman/cli

     1  package cli
     2  
     3  import (
     4  	"context"
     5  	"regexp"
     6  	"slices"
     7  
     8  	"flag"
     9  	"path/filepath"
    10  	"strings"
    11  
    12  	"github.com/peterbourgon/ff/v3/ffcli"
    13  
    14  	alertmgr "edge-infra.dev/pkg/lib/gcp/monitoring/alertmanager"
    15  )
    16  
    17  type syncArgsT struct {
    18  	templatePath        string
    19  	push                bool
    20  	continues           bool
    21  	removeNotifChannels bool
    22  	notifChannelName    string
    23  }
    24  
    25  var syncArgs syncArgsT
    26  var syncFlagSet = newSyncFlagSet(&syncArgs)
    27  
    28  func newSyncFlagSet(syncArgs *syncArgsT) *flag.FlagSet {
    29  	syncf := newFlagSet("sync")
    30  	syncf.StringVar(&syncArgs.templatePath, "path", "config/observability/alerts", "Path to alert JSON template(s). (required)\nNOTE: If `--push` is specified, template folder path must be specified. Individual files are only supported for verification and update.")
    31  	syncf.BoolVar(&syncArgs.push, "push", false, "Reconcile the existing project alertPolicies and push alertPolicies templates from source. (optional)")
    32  	syncf.BoolVar(&syncArgs.continues, "continue", false, "Continues processing alert policy sync process even if an issue with a template or request is encountered. (optional)")
    33  	syncf.BoolVar(&syncArgs.removeNotifChannels, "removeChannels", false, "Remove Notification Channels. (optional)")
    34  	syncf.StringVar(&syncArgs.notifChannelName, "channelName", "projects/PROJECT-ID/notificationChannels/NOTIFICATION-CHANNEL-ID", "link to notification channel (optional)")
    35  	return syncf
    36  }
    37  
    38  var syncCmd = &ffcli.Command{
    39  	Name:       "sync",
    40  	ShortUsage: "sync [flags]",
    41  	ShortHelp:  "Sync Alert Policies",
    42  	LongHelp: strings.TrimSpace(`
    43  Performs automated management of alert policies, verifies and updates alert policies template required labels, and creates or updates project alert policies from templates in the specified [templates] path.
    44  `),
    45  	FlagSet: withGlobalFlags(syncFlagSet),
    46  	Exec:    runSync,
    47  }
    48  
    49  func runSync(_ context.Context, args []string) error {
    50  	var err error
    51  
    52  	if len(args) > 0 {
    53  		Fatalf("too many non-flag arguments: %q", args)
    54  	}
    55  	if !checkSyncFlags() {
    56  		Println()
    57  		return flag.ErrHelp
    58  	}
    59  
    60  	alertmgr.Continues = syncArgs.continues
    61  
    62  	templates, err := verifyTemplate()
    63  	if err != nil {
    64  		logger.Error(err, "Failed to verify Template", "function", "verifyTemplate")
    65  		return err
    66  	}
    67  
    68  	// skip the templates here that include the exclude user label or include user label
    69  	templates = excludeTemplates(templates)
    70  	templates = includeTemplates(templates)
    71  
    72  	// sync AlertPolicies from templates to the specified project.
    73  	if syncArgs.push {
    74  		if err = mitigateDuplicateAlertPolicies(templates); err != nil {
    75  			logger.Error(err, "Failed to mitigate Duplicate AlertPolicies", "function", "mitigateDuplicateAlertPolicies")
    76  		}
    77  		// mitigate project AlertPolicies.
    78  		if err = mitigateAlertPoliciesLabels(); err != nil {
    79  			logger.Error(err, "Failed to mitigate AlertPolicies Labels", "function", "mitigateAlertPoliciesLabels")
    80  		}
    81  		if err = cleanupAndDeleteAlertPolicies(templates); err != nil {
    82  			logger.Error(err, "Failed to cleanup AlertPolicies", "function", "cleanupAndDeleteAlertPolicies")
    83  		}
    84  		pushTemplates(templates)
    85  	}
    86  	return nil
    87  }
    88  
    89  func checkSyncFlags() bool {
    90  	if syncArgs.removeNotifChannels && len(syncArgs.notifChannelName) > 0 {
    91  		quickDeleteNotifChannels(projectID, syncArgs.notifChannelName)
    92  	}
    93  
    94  	if syncArgs.push && len(projectID) == 0 {
    95  		logger.Error(nil, "Error: no value specified for [project] - a valid project-id is required")
    96  		return false
    97  	}
    98  
    99  	path, err := filepath.Abs(syncArgs.templatePath)
   100  	if err != nil || !checkPath(path, syncArgs.push) {
   101  		logger.Error(err, "Failed to read template from path")
   102  		return false
   103  	}
   104  	return true
   105  }
   106  
   107  func quickDeleteNotifChannels(projectID string, notifChannelName string) {
   108  	alertmgr.DeleteNotifChannelFromPolicies(projectID, notifChannelName)
   109  	alertmgr.DeleteNotifChannel(notifChannelName)
   110  }
   111  
   112  func verifyTemplate() ([]*alertmgr.AlertPolicy, error) {
   113  	logger.Info("Alert Policy Template Verification Started.")
   114  	sourceTemplate, err := alertmgr.ReadAlertPolicyFromPath(syncArgs.templatePath)
   115  	if err != nil {
   116  		return nil, err
   117  	}
   118  
   119  	var templates []*alertmgr.AlertPolicy
   120  	for i := 0; i < len(sourceTemplate); i++ {
   121  		cTemp := sourceTemplate[i]
   122  		fName := filepath.Base(cTemp.TemplatePath)
   123  
   124  		switch {
   125  		case alertmgr.InList(cTemp.DisplayName, templates):
   126  			logger.Error(nil, "alert policy name already exists in another template - check for duplicates", "display-name", cTemp.DisplayName, "template-name", fName)
   127  			return nil, Errorf("%s alert policy name already exists in another template %s", cTemp.DisplayName, fName)
   128  		case !alertmgr.ValidLabels(cTemp) && !alertmgr.DuplicateLabels(cTemp):
   129  			logger.Error(nil, "alert policy template contains invalid labels or duplicate labels - review the Alert Policy Template spec", "display-name", cTemp.DisplayName, "template-name", fName)
   130  			return nil, Errorf("alert policy template contains invalid labels or duplicate labels %s", fName)
   131  		case !alertmgr.HasRequiredLabels(cTemp):
   132  			logger.Error(nil, "alert policy template missing required labels - review the Alert Policy Template spec", "display-name", cTemp.DisplayName, "template-name", fName)
   133  			return nil, Errorf("alert policy template missing required labels: policy-owner or severity %s", fName)
   134  		case alertmgr.HasReservedLabels(cTemp):
   135  			logger.Error(nil, "alert policy template contains reserved labels - review the Alert Policy Template spec", "display-name", cTemp.DisplayName, "template-name", fName)
   136  			return nil, Errorf("alert policy template contains reserved labels: managed-by-observability, version, deletion-date, or deactivation-date %s", fName)
   137  		case !alertmgr.Documentation(cTemp):
   138  			logger.Error(nil, "alert policy template missing information in documentation field", "display-name", cTemp.DisplayName, "template-name", fName)
   139  			return nil, Errorf("alert policy template must contain information in the documentation field %s", cTemp.DisplayName)
   140  		case !alertmgr.Notification(cTemp):
   141  			logger.Error(nil, "alert policy template missing notification-channel field", "display-name", cTemp.DisplayName, "template-name", fName)
   142  			return nil, Errorf("alert policy template must contain a notification-channel field %s", cTemp.DisplayName)
   143  		case alertmgr.IsChannelLinks(cTemp.NotificationChannels):
   144  			logger.Error(nil, "alert policy template notification-channel field cannot be a url", "display-name", cTemp.DisplayName, "template-name", fName)
   145  			return nil, Errorf("notification-channel field cannot be a url %s", cTemp.DisplayName)
   146  		case !alertmgr.CheckConditionFilter(cTemp):
   147  			logger.Error(nil, "alert policy filter condition cannot contain a project id or cluster id", "display-name", cTemp.DisplayName, "template-name", fName)
   148  			return nil, Errorf("filter condition contains a project id or cluster id %s", cTemp.DisplayName)
   149  		case !alertmgr.IncludeExclude(cTemp):
   150  			logger.Error(nil, "alert policy cannot contain an exclude-env user label AND an include-env user label. choose only one.", "display-name", cTemp.DisplayName, "template-name", fName)
   151  			return nil, Errorf("user label section contains two competing labels %s", cTemp.DisplayName)
   152  		case len(projectID) > 0:
   153  			templates = append(templates, cTemp)
   154  		}
   155  	}
   156  	logger.Info("Alert Policy Template Verification Completed.")
   157  	return templates, nil
   158  }
   159  
   160  // Reconcile AlertPolicies labels by flagging unmanaged policies for deactivation and deletion.
   161  func mitigateAlertPoliciesLabels() error {
   162  	logger.Info("Alert Policy Reconcile Labels Started.")
   163  	filter := "(NOT user_labels='managed') AND (NOT user_labels='deactivation-date' OR NOT user_labels='deletion-date')"
   164  	policies, err := alertmgr.GetAlertPolicies(projectID, "", filter)
   165  	if err != nil && policies == nil && !syncArgs.continues {
   166  		logger.Error(err, "Failed to get alert policies matching filter")
   167  		return err
   168  	}
   169  	for index, policy := range policies {
   170  		policies[index].AlertPolicy, err = alertmgr.MitigateAlertPoliciesLabels(policy.AlertPolicy)
   171  		if err != nil && !syncArgs.continues {
   172  			logger.Error(err, "Failed to mitigate project alert policies labels")
   173  			return err
   174  		}
   175  	}
   176  	logger.Info("Alert Policy Reconcile Labels Completed")
   177  	return nil
   178  }
   179  
   180  // Deactivate or Delete unmanaged AlertPolicies based on the deactivation-date or deletion-date.
   181  func cleanupAndDeleteAlertPolicies(templates []*alertmgr.AlertPolicy) error {
   182  	// validate removed from template policies are removed from the project
   183  	policies, err := alertmgr.GetAlertPolicies(projectID, "", "")
   184  	if err != nil && !syncArgs.continues {
   185  		return err
   186  	}
   187  	for _, projectPolicy := range policies {
   188  		if strings.Contains(projectPolicy.DisplayName, "Managed Removed") {
   189  			if err = alertmgr.DeleteAlertPolicies([]string{projectPolicy.Name}); err != nil {
   190  				logger.Error(err, "The AlertPolicy DELETE FAILED", "display-name", projectPolicy.DisplayName)
   191  				return err
   192  			}
   193  		}
   194  	}
   195  
   196  	for _, projectPolicy := range policies {
   197  		for _, p := range templates {
   198  			if !projectPolicy.Enabled.Value && p.Enabled.Value && projectPolicy.DisplayName == p.DisplayName {
   199  				if err = alertmgr.ActivateAlertPolicy(projectID, true, false, projectPolicy.DisplayName, ""); err != nil {
   200  					logger.Error(err, "The AlertPolicy ACTIVATION FAILED", "display-name", projectPolicy.DisplayName)
   201  					return err
   202  				}
   203  			}
   204  		}
   205  	}
   206  
   207  	// delete based on deactivation-date or deletion-date
   208  	logger.Info("Alert Policy Reconcile Cleanup Started")
   209  	filter := "(NOT user_labels='managed') AND (user_labels='deactivation-date' OR user_labels='deletion-date')"
   210  	policies, err = alertmgr.GetAlertPolicies(projectID, "", filter)
   211  	if err != nil && policies == nil && !syncArgs.continues {
   212  		logger.Error(err, "Failed to get alert policies matching filter")
   213  		return err
   214  	}
   215  	for index, policy := range policies {
   216  		policies[index].AlertPolicy, err = alertmgr.CleanupAlertPolicies(policy.AlertPolicy)
   217  		if err != nil && !syncArgs.continues {
   218  			logger.Error(err, "Failed during cleanup")
   219  			return err
   220  		}
   221  	}
   222  	logger.Info("Alert Policy Reconcile Cleanup Completed")
   223  	return nil
   224  }
   225  
   226  func mitigateDuplicateAlertPolicies(templates []*alertmgr.AlertPolicy) error {
   227  	logger.Info("Alert Policy Reconcile Duplicates Started")
   228  	managedFilter := "user_labels='managed'"
   229  	managedPolicies, err := alertmgr.GetAlertPolicies(projectID, "", managedFilter)
   230  	if err != nil && managedPolicies == nil && !syncArgs.continues {
   231  		logger.Error(err, "Failed to get alert policies matching filter")
   232  		return err
   233  	}
   234  	for index, managedPolicy := range managedPolicies {
   235  		managedPolicies[index].AlertPolicy, err = alertmgr.MitigateDuplicateManaged(projectID, managedPolicy.AlertPolicy, templates)
   236  		if err != nil && !syncArgs.continues {
   237  			logger.Error(err, "Failed during mamaged mitigation")
   238  			return err
   239  		}
   240  	}
   241  
   242  	unmanagedFilter := "NOT user_labels='managed'"
   243  	unmanagedPolicies, err := alertmgr.GetAlertPolicies(projectID, "", unmanagedFilter)
   244  	if err != nil && unmanagedPolicies == nil && !syncArgs.continues {
   245  		logger.Error(err, "Failed to get alert policies matching filter")
   246  		return err
   247  	}
   248  
   249  	for index, unmanagedPolicy := range unmanagedPolicies {
   250  		logger.Info("Unmanaged alert policies", "policy-name", unmanagedPolicy.DisplayName, "policy-userlabel", unmanagedPolicy.UserLabels)
   251  
   252  		unmanagedPolicies[index].AlertPolicy, err = alertmgr.MitigateDuplicateUnmanaged(projectID, unmanagedPolicy.AlertPolicy)
   253  		if err != nil && !syncArgs.continues {
   254  			logger.Error(err, "Failed during unmamaged mitigation")
   255  			return err
   256  		}
   257  	}
   258  	logger.Info("Alert Policy Reconcile Duplicates Completed.")
   259  	return nil
   260  }
   261  
   262  func excludeTemplates(templates []*alertmgr.AlertPolicy) []*alertmgr.AlertPolicy {
   263  	var temp []*alertmgr.AlertPolicy
   264  
   265  	for _, template := range templates {
   266  		if len(template.UserLabels["exclude-env"]) > 0 {
   267  			splitLabel := regexp.MustCompile("_").Split(template.UserLabels["exclude-env"], -1)
   268  
   269  			for _, s := range splitLabel {
   270  				re := regexp.MustCompile("(?i)" + s)
   271  				if !re.MatchString(projectID) && !slices.Contains(temp, template) {
   272  					temp = append(temp, template)
   273  				}
   274  			}
   275  		} else {
   276  			temp = append(temp, template)
   277  		}
   278  	}
   279  	return temp
   280  }
   281  
   282  func includeTemplates(templates []*alertmgr.AlertPolicy) []*alertmgr.AlertPolicy {
   283  	var temp []*alertmgr.AlertPolicy
   284  
   285  	for _, template := range templates {
   286  		if len(template.UserLabels["include-env"]) > 0 {
   287  			splitLabel := regexp.MustCompile("_").Split(template.UserLabels["include-env"], -1)
   288  
   289  			for _, s := range splitLabel {
   290  				re := regexp.MustCompile("(?i)" + s)
   291  				if re.MatchString(projectID) && !slices.Contains(temp, template) {
   292  					temp = append(temp, template)
   293  				}
   294  			}
   295  		} else {
   296  			temp = append(temp, template)
   297  		}
   298  	}
   299  	return temp
   300  }
   301  
   302  // Push Templates to project. Updates existing or otherwise creates the alert policy.
   303  func pushTemplates(t []*alertmgr.AlertPolicy) {
   304  	var flag = true
   305  	var err error
   306  
   307  	for i := 0; i < len(t); i++ {
   308  		cTemp := t[i]
   309  		alertsInProj, _ := alertmgr.GetAlertPolicies(projectID, cTemp.DisplayName, "")
   310  
   311  		if alertmgr.IsDisplayInProject(cTemp, projectID) && alertmgr.IsAlertPolicyDifferent(cTemp.AlertPolicy, alertsInProj) {
   312  			if err = alertmgr.UpdateAlertPolicyFromTemplate(projectID, flag, cTemp.AlertPolicy, alertsInProj); err != nil {
   313  				logger.Error(err, "Alert policy could not be updated in this project", "alert-policy", cTemp.DisplayName, "project", projectID)
   314  			}
   315  		} else {
   316  			if err = alertmgr.CreateAlertPolicies(cTemp.AlertPolicy, projectID); err != nil {
   317  				logger.Error(err, "Alert policy could not be created in this project", "alert-policy", cTemp.DisplayName, "project", projectID)
   318  			}
   319  		}
   320  	}
   321  
   322  	logger.Info("AlertPolicy SYNC PUSH COMPLETE", "project-id", projectID)
   323  }
   324  

View as plain text