package cli import ( "context" "regexp" "slices" "flag" "path/filepath" "strings" "github.com/peterbourgon/ff/v3/ffcli" alertmgr "edge-infra.dev/pkg/lib/gcp/monitoring/alertmanager" ) type syncArgsT struct { templatePath string push bool continues bool removeNotifChannels bool notifChannelName string } var syncArgs syncArgsT var syncFlagSet = newSyncFlagSet(&syncArgs) func newSyncFlagSet(syncArgs *syncArgsT) *flag.FlagSet { syncf := newFlagSet("sync") syncf.StringVar(&syncArgs.templatePath, "path", "config/observability/alerts", "Path to alert JSON template(s). (required)\nNOTE: If `--push` is specified, template folder path must be specified. Individual files are only supported for verification and update.") syncf.BoolVar(&syncArgs.push, "push", false, "Reconcile the existing project alertPolicies and push alertPolicies templates from source. (optional)") syncf.BoolVar(&syncArgs.continues, "continue", false, "Continues processing alert policy sync process even if an issue with a template or request is encountered. (optional)") syncf.BoolVar(&syncArgs.removeNotifChannels, "removeChannels", false, "Remove Notification Channels. (optional)") syncf.StringVar(&syncArgs.notifChannelName, "channelName", "projects/PROJECT-ID/notificationChannels/NOTIFICATION-CHANNEL-ID", "link to notification channel (optional)") return syncf } var syncCmd = &ffcli.Command{ Name: "sync", ShortUsage: "sync [flags]", ShortHelp: "Sync Alert Policies", LongHelp: strings.TrimSpace(` Performs automated management of alert policies, verifies and updates alert policies template required labels, and creates or updates project alert policies from templates in the specified [templates] path. `), FlagSet: withGlobalFlags(syncFlagSet), Exec: runSync, } func runSync(_ context.Context, args []string) error { var err error if len(args) > 0 { Fatalf("too many non-flag arguments: %q", args) } if !checkSyncFlags() { Println() return flag.ErrHelp } alertmgr.Continues = syncArgs.continues templates, err := verifyTemplate() if err != nil { logger.Error(err, "Failed to verify Template", "function", "verifyTemplate") return err } // skip the templates here that include the exclude user label or include user label templates = excludeTemplates(templates) templates = includeTemplates(templates) // sync AlertPolicies from templates to the specified project. if syncArgs.push { if err = mitigateDuplicateAlertPolicies(templates); err != nil { logger.Error(err, "Failed to mitigate Duplicate AlertPolicies", "function", "mitigateDuplicateAlertPolicies") } // mitigate project AlertPolicies. if err = mitigateAlertPoliciesLabels(); err != nil { logger.Error(err, "Failed to mitigate AlertPolicies Labels", "function", "mitigateAlertPoliciesLabels") } if err = cleanupAndDeleteAlertPolicies(templates); err != nil { logger.Error(err, "Failed to cleanup AlertPolicies", "function", "cleanupAndDeleteAlertPolicies") } pushTemplates(templates) } return nil } func checkSyncFlags() bool { if syncArgs.removeNotifChannels && len(syncArgs.notifChannelName) > 0 { quickDeleteNotifChannels(projectID, syncArgs.notifChannelName) } if syncArgs.push && len(projectID) == 0 { logger.Error(nil, "Error: no value specified for [project] - a valid project-id is required") return false } path, err := filepath.Abs(syncArgs.templatePath) if err != nil || !checkPath(path, syncArgs.push) { logger.Error(err, "Failed to read template from path") return false } return true } func quickDeleteNotifChannels(projectID string, notifChannelName string) { alertmgr.DeleteNotifChannelFromPolicies(projectID, notifChannelName) alertmgr.DeleteNotifChannel(notifChannelName) } func verifyTemplate() ([]*alertmgr.AlertPolicy, error) { logger.Info("Alert Policy Template Verification Started.") sourceTemplate, err := alertmgr.ReadAlertPolicyFromPath(syncArgs.templatePath) if err != nil { return nil, err } var templates []*alertmgr.AlertPolicy for i := 0; i < len(sourceTemplate); i++ { cTemp := sourceTemplate[i] fName := filepath.Base(cTemp.TemplatePath) switch { case alertmgr.InList(cTemp.DisplayName, templates): logger.Error(nil, "alert policy name already exists in another template - check for duplicates", "display-name", cTemp.DisplayName, "template-name", fName) return nil, Errorf("%s alert policy name already exists in another template %s", cTemp.DisplayName, fName) case !alertmgr.ValidLabels(cTemp) && !alertmgr.DuplicateLabels(cTemp): logger.Error(nil, "alert policy template contains invalid labels or duplicate labels - review the Alert Policy Template spec", "display-name", cTemp.DisplayName, "template-name", fName) return nil, Errorf("alert policy template contains invalid labels or duplicate labels %s", fName) case !alertmgr.HasRequiredLabels(cTemp): logger.Error(nil, "alert policy template missing required labels - review the Alert Policy Template spec", "display-name", cTemp.DisplayName, "template-name", fName) return nil, Errorf("alert policy template missing required labels: policy-owner or severity %s", fName) case alertmgr.HasReservedLabels(cTemp): logger.Error(nil, "alert policy template contains reserved labels - review the Alert Policy Template spec", "display-name", cTemp.DisplayName, "template-name", fName) return nil, Errorf("alert policy template contains reserved labels: managed-by-observability, version, deletion-date, or deactivation-date %s", fName) case !alertmgr.Documentation(cTemp): logger.Error(nil, "alert policy template missing information in documentation field", "display-name", cTemp.DisplayName, "template-name", fName) return nil, Errorf("alert policy template must contain information in the documentation field %s", cTemp.DisplayName) case !alertmgr.Notification(cTemp): logger.Error(nil, "alert policy template missing notification-channel field", "display-name", cTemp.DisplayName, "template-name", fName) return nil, Errorf("alert policy template must contain a notification-channel field %s", cTemp.DisplayName) case alertmgr.IsChannelLinks(cTemp.NotificationChannels): logger.Error(nil, "alert policy template notification-channel field cannot be a url", "display-name", cTemp.DisplayName, "template-name", fName) return nil, Errorf("notification-channel field cannot be a url %s", cTemp.DisplayName) case !alertmgr.CheckConditionFilter(cTemp): logger.Error(nil, "alert policy filter condition cannot contain a project id or cluster id", "display-name", cTemp.DisplayName, "template-name", fName) return nil, Errorf("filter condition contains a project id or cluster id %s", cTemp.DisplayName) case !alertmgr.IncludeExclude(cTemp): logger.Error(nil, "alert policy cannot contain an exclude-env user label AND an include-env user label. choose only one.", "display-name", cTemp.DisplayName, "template-name", fName) return nil, Errorf("user label section contains two competing labels %s", cTemp.DisplayName) case len(projectID) > 0: templates = append(templates, cTemp) } } logger.Info("Alert Policy Template Verification Completed.") return templates, nil } // Reconcile AlertPolicies labels by flagging unmanaged policies for deactivation and deletion. func mitigateAlertPoliciesLabels() error { logger.Info("Alert Policy Reconcile Labels Started.") filter := "(NOT user_labels='managed') AND (NOT user_labels='deactivation-date' OR NOT user_labels='deletion-date')" policies, err := alertmgr.GetAlertPolicies(projectID, "", filter) if err != nil && policies == nil && !syncArgs.continues { logger.Error(err, "Failed to get alert policies matching filter") return err } for index, policy := range policies { policies[index].AlertPolicy, err = alertmgr.MitigateAlertPoliciesLabels(policy.AlertPolicy) if err != nil && !syncArgs.continues { logger.Error(err, "Failed to mitigate project alert policies labels") return err } } logger.Info("Alert Policy Reconcile Labels Completed") return nil } // Deactivate or Delete unmanaged AlertPolicies based on the deactivation-date or deletion-date. func cleanupAndDeleteAlertPolicies(templates []*alertmgr.AlertPolicy) error { // validate removed from template policies are removed from the project policies, err := alertmgr.GetAlertPolicies(projectID, "", "") if err != nil && !syncArgs.continues { return err } for _, projectPolicy := range policies { if strings.Contains(projectPolicy.DisplayName, "Managed Removed") { if err = alertmgr.DeleteAlertPolicies([]string{projectPolicy.Name}); err != nil { logger.Error(err, "The AlertPolicy DELETE FAILED", "display-name", projectPolicy.DisplayName) return err } } } for _, projectPolicy := range policies { for _, p := range templates { if !projectPolicy.Enabled.Value && p.Enabled.Value && projectPolicy.DisplayName == p.DisplayName { if err = alertmgr.ActivateAlertPolicy(projectID, true, false, projectPolicy.DisplayName, ""); err != nil { logger.Error(err, "The AlertPolicy ACTIVATION FAILED", "display-name", projectPolicy.DisplayName) return err } } } } // delete based on deactivation-date or deletion-date logger.Info("Alert Policy Reconcile Cleanup Started") filter := "(NOT user_labels='managed') AND (user_labels='deactivation-date' OR user_labels='deletion-date')" policies, err = alertmgr.GetAlertPolicies(projectID, "", filter) if err != nil && policies == nil && !syncArgs.continues { logger.Error(err, "Failed to get alert policies matching filter") return err } for index, policy := range policies { policies[index].AlertPolicy, err = alertmgr.CleanupAlertPolicies(policy.AlertPolicy) if err != nil && !syncArgs.continues { logger.Error(err, "Failed during cleanup") return err } } logger.Info("Alert Policy Reconcile Cleanup Completed") return nil } func mitigateDuplicateAlertPolicies(templates []*alertmgr.AlertPolicy) error { logger.Info("Alert Policy Reconcile Duplicates Started") managedFilter := "user_labels='managed'" managedPolicies, err := alertmgr.GetAlertPolicies(projectID, "", managedFilter) if err != nil && managedPolicies == nil && !syncArgs.continues { logger.Error(err, "Failed to get alert policies matching filter") return err } for index, managedPolicy := range managedPolicies { managedPolicies[index].AlertPolicy, err = alertmgr.MitigateDuplicateManaged(projectID, managedPolicy.AlertPolicy, templates) if err != nil && !syncArgs.continues { logger.Error(err, "Failed during mamaged mitigation") return err } } unmanagedFilter := "NOT user_labels='managed'" unmanagedPolicies, err := alertmgr.GetAlertPolicies(projectID, "", unmanagedFilter) if err != nil && unmanagedPolicies == nil && !syncArgs.continues { logger.Error(err, "Failed to get alert policies matching filter") return err } for index, unmanagedPolicy := range unmanagedPolicies { logger.Info("Unmanaged alert policies", "policy-name", unmanagedPolicy.DisplayName, "policy-userlabel", unmanagedPolicy.UserLabels) unmanagedPolicies[index].AlertPolicy, err = alertmgr.MitigateDuplicateUnmanaged(projectID, unmanagedPolicy.AlertPolicy) if err != nil && !syncArgs.continues { logger.Error(err, "Failed during unmamaged mitigation") return err } } logger.Info("Alert Policy Reconcile Duplicates Completed.") return nil } func excludeTemplates(templates []*alertmgr.AlertPolicy) []*alertmgr.AlertPolicy { var temp []*alertmgr.AlertPolicy for _, template := range templates { if len(template.UserLabels["exclude-env"]) > 0 { splitLabel := regexp.MustCompile("_").Split(template.UserLabels["exclude-env"], -1) for _, s := range splitLabel { re := regexp.MustCompile("(?i)" + s) if !re.MatchString(projectID) && !slices.Contains(temp, template) { temp = append(temp, template) } } } else { temp = append(temp, template) } } return temp } func includeTemplates(templates []*alertmgr.AlertPolicy) []*alertmgr.AlertPolicy { var temp []*alertmgr.AlertPolicy for _, template := range templates { if len(template.UserLabels["include-env"]) > 0 { splitLabel := regexp.MustCompile("_").Split(template.UserLabels["include-env"], -1) for _, s := range splitLabel { re := regexp.MustCompile("(?i)" + s) if re.MatchString(projectID) && !slices.Contains(temp, template) { temp = append(temp, template) } } } else { temp = append(temp, template) } } return temp } // Push Templates to project. Updates existing or otherwise creates the alert policy. func pushTemplates(t []*alertmgr.AlertPolicy) { var flag = true var err error for i := 0; i < len(t); i++ { cTemp := t[i] alertsInProj, _ := alertmgr.GetAlertPolicies(projectID, cTemp.DisplayName, "") if alertmgr.IsDisplayInProject(cTemp, projectID) && alertmgr.IsAlertPolicyDifferent(cTemp.AlertPolicy, alertsInProj) { if err = alertmgr.UpdateAlertPolicyFromTemplate(projectID, flag, cTemp.AlertPolicy, alertsInProj); err != nil { logger.Error(err, "Alert policy could not be updated in this project", "alert-policy", cTemp.DisplayName, "project", projectID) } } else { if err = alertmgr.CreateAlertPolicies(cTemp.AlertPolicy, projectID); err != nil { logger.Error(err, "Alert policy could not be created in this project", "alert-policy", cTemp.DisplayName, "project", projectID) } } } logger.Info("AlertPolicy SYNC PUSH COMPLETE", "project-id", projectID) }