1 package cli
2
3 import (
4 "context"
5 "regexp"
6 "slices"
7
8 "flag"
9 "path/filepath"
10 "strings"
11
12 "github.com/peterbourgon/ff/v3/ffcli"
13
14 alertmgr "edge-infra.dev/pkg/lib/gcp/monitoring/alertmanager"
15 )
16
17 type syncArgsT struct {
18 templatePath string
19 push bool
20 continues bool
21 removeNotifChannels bool
22 notifChannelName string
23 }
24
25 var syncArgs syncArgsT
26 var syncFlagSet = newSyncFlagSet(&syncArgs)
27
28 func newSyncFlagSet(syncArgs *syncArgsT) *flag.FlagSet {
29 syncf := newFlagSet("sync")
30 syncf.StringVar(&syncArgs.templatePath, "path", "config/observability/alerts", "Path to alert JSON template(s). (required)\nNOTE: If `--push` is specified, template folder path must be specified. Individual files are only supported for verification and update.")
31 syncf.BoolVar(&syncArgs.push, "push", false, "Reconcile the existing project alertPolicies and push alertPolicies templates from source. (optional)")
32 syncf.BoolVar(&syncArgs.continues, "continue", false, "Continues processing alert policy sync process even if an issue with a template or request is encountered. (optional)")
33 syncf.BoolVar(&syncArgs.removeNotifChannels, "removeChannels", false, "Remove Notification Channels. (optional)")
34 syncf.StringVar(&syncArgs.notifChannelName, "channelName", "projects/PROJECT-ID/notificationChannels/NOTIFICATION-CHANNEL-ID", "link to notification channel (optional)")
35 return syncf
36 }
37
38 var syncCmd = &ffcli.Command{
39 Name: "sync",
40 ShortUsage: "sync [flags]",
41 ShortHelp: "Sync Alert Policies",
42 LongHelp: strings.TrimSpace(`
43 Performs automated management of alert policies, verifies and updates alert policies template required labels, and creates or updates project alert policies from templates in the specified [templates] path.
44 `),
45 FlagSet: withGlobalFlags(syncFlagSet),
46 Exec: runSync,
47 }
48
49 func runSync(_ context.Context, args []string) error {
50 var err error
51
52 if len(args) > 0 {
53 Fatalf("too many non-flag arguments: %q", args)
54 }
55 if !checkSyncFlags() {
56 Println()
57 return flag.ErrHelp
58 }
59
60 alertmgr.Continues = syncArgs.continues
61
62 templates, err := verifyTemplate()
63 if err != nil {
64 logger.Error(err, "Failed to verify Template", "function", "verifyTemplate")
65 return err
66 }
67
68
69 templates = excludeTemplates(templates)
70 templates = includeTemplates(templates)
71
72
73 if syncArgs.push {
74 if err = mitigateDuplicateAlertPolicies(templates); err != nil {
75 logger.Error(err, "Failed to mitigate Duplicate AlertPolicies", "function", "mitigateDuplicateAlertPolicies")
76 }
77
78 if err = mitigateAlertPoliciesLabels(); err != nil {
79 logger.Error(err, "Failed to mitigate AlertPolicies Labels", "function", "mitigateAlertPoliciesLabels")
80 }
81 if err = cleanupAndDeleteAlertPolicies(templates); err != nil {
82 logger.Error(err, "Failed to cleanup AlertPolicies", "function", "cleanupAndDeleteAlertPolicies")
83 }
84 pushTemplates(templates)
85 }
86 return nil
87 }
88
89 func checkSyncFlags() bool {
90 if syncArgs.removeNotifChannels && len(syncArgs.notifChannelName) > 0 {
91 quickDeleteNotifChannels(projectID, syncArgs.notifChannelName)
92 }
93
94 if syncArgs.push && len(projectID) == 0 {
95 logger.Error(nil, "Error: no value specified for [project] - a valid project-id is required")
96 return false
97 }
98
99 path, err := filepath.Abs(syncArgs.templatePath)
100 if err != nil || !checkPath(path, syncArgs.push) {
101 logger.Error(err, "Failed to read template from path")
102 return false
103 }
104 return true
105 }
106
107 func quickDeleteNotifChannels(projectID string, notifChannelName string) {
108 alertmgr.DeleteNotifChannelFromPolicies(projectID, notifChannelName)
109 alertmgr.DeleteNotifChannel(notifChannelName)
110 }
111
112 func verifyTemplate() ([]*alertmgr.AlertPolicy, error) {
113 logger.Info("Alert Policy Template Verification Started.")
114 sourceTemplate, err := alertmgr.ReadAlertPolicyFromPath(syncArgs.templatePath)
115 if err != nil {
116 return nil, err
117 }
118
119 var templates []*alertmgr.AlertPolicy
120 for i := 0; i < len(sourceTemplate); i++ {
121 cTemp := sourceTemplate[i]
122 fName := filepath.Base(cTemp.TemplatePath)
123
124 switch {
125 case alertmgr.InList(cTemp.DisplayName, templates):
126 logger.Error(nil, "alert policy name already exists in another template - check for duplicates", "display-name", cTemp.DisplayName, "template-name", fName)
127 return nil, Errorf("%s alert policy name already exists in another template %s", cTemp.DisplayName, fName)
128 case !alertmgr.ValidLabels(cTemp) && !alertmgr.DuplicateLabels(cTemp):
129 logger.Error(nil, "alert policy template contains invalid labels or duplicate labels - review the Alert Policy Template spec", "display-name", cTemp.DisplayName, "template-name", fName)
130 return nil, Errorf("alert policy template contains invalid labels or duplicate labels %s", fName)
131 case !alertmgr.HasRequiredLabels(cTemp):
132 logger.Error(nil, "alert policy template missing required labels - review the Alert Policy Template spec", "display-name", cTemp.DisplayName, "template-name", fName)
133 return nil, Errorf("alert policy template missing required labels: policy-owner or severity %s", fName)
134 case alertmgr.HasReservedLabels(cTemp):
135 logger.Error(nil, "alert policy template contains reserved labels - review the Alert Policy Template spec", "display-name", cTemp.DisplayName, "template-name", fName)
136 return nil, Errorf("alert policy template contains reserved labels: managed-by-observability, version, deletion-date, or deactivation-date %s", fName)
137 case !alertmgr.Documentation(cTemp):
138 logger.Error(nil, "alert policy template missing information in documentation field", "display-name", cTemp.DisplayName, "template-name", fName)
139 return nil, Errorf("alert policy template must contain information in the documentation field %s", cTemp.DisplayName)
140 case !alertmgr.Notification(cTemp):
141 logger.Error(nil, "alert policy template missing notification-channel field", "display-name", cTemp.DisplayName, "template-name", fName)
142 return nil, Errorf("alert policy template must contain a notification-channel field %s", cTemp.DisplayName)
143 case alertmgr.IsChannelLinks(cTemp.NotificationChannels):
144 logger.Error(nil, "alert policy template notification-channel field cannot be a url", "display-name", cTemp.DisplayName, "template-name", fName)
145 return nil, Errorf("notification-channel field cannot be a url %s", cTemp.DisplayName)
146 case !alertmgr.CheckConditionFilter(cTemp):
147 logger.Error(nil, "alert policy filter condition cannot contain a project id or cluster id", "display-name", cTemp.DisplayName, "template-name", fName)
148 return nil, Errorf("filter condition contains a project id or cluster id %s", cTemp.DisplayName)
149 case !alertmgr.IncludeExclude(cTemp):
150 logger.Error(nil, "alert policy cannot contain an exclude-env user label AND an include-env user label. choose only one.", "display-name", cTemp.DisplayName, "template-name", fName)
151 return nil, Errorf("user label section contains two competing labels %s", cTemp.DisplayName)
152 case len(projectID) > 0:
153 templates = append(templates, cTemp)
154 }
155 }
156 logger.Info("Alert Policy Template Verification Completed.")
157 return templates, nil
158 }
159
160
161 func mitigateAlertPoliciesLabels() error {
162 logger.Info("Alert Policy Reconcile Labels Started.")
163 filter := "(NOT user_labels='managed') AND (NOT user_labels='deactivation-date' OR NOT user_labels='deletion-date')"
164 policies, err := alertmgr.GetAlertPolicies(projectID, "", filter)
165 if err != nil && policies == nil && !syncArgs.continues {
166 logger.Error(err, "Failed to get alert policies matching filter")
167 return err
168 }
169 for index, policy := range policies {
170 policies[index].AlertPolicy, err = alertmgr.MitigateAlertPoliciesLabels(policy.AlertPolicy)
171 if err != nil && !syncArgs.continues {
172 logger.Error(err, "Failed to mitigate project alert policies labels")
173 return err
174 }
175 }
176 logger.Info("Alert Policy Reconcile Labels Completed")
177 return nil
178 }
179
180
181 func cleanupAndDeleteAlertPolicies(templates []*alertmgr.AlertPolicy) error {
182
183 policies, err := alertmgr.GetAlertPolicies(projectID, "", "")
184 if err != nil && !syncArgs.continues {
185 return err
186 }
187 for _, projectPolicy := range policies {
188 if strings.Contains(projectPolicy.DisplayName, "Managed Removed") {
189 if err = alertmgr.DeleteAlertPolicies([]string{projectPolicy.Name}); err != nil {
190 logger.Error(err, "The AlertPolicy DELETE FAILED", "display-name", projectPolicy.DisplayName)
191 return err
192 }
193 }
194 }
195
196 for _, projectPolicy := range policies {
197 for _, p := range templates {
198 if !projectPolicy.Enabled.Value && p.Enabled.Value && projectPolicy.DisplayName == p.DisplayName {
199 if err = alertmgr.ActivateAlertPolicy(projectID, true, false, projectPolicy.DisplayName, ""); err != nil {
200 logger.Error(err, "The AlertPolicy ACTIVATION FAILED", "display-name", projectPolicy.DisplayName)
201 return err
202 }
203 }
204 }
205 }
206
207
208 logger.Info("Alert Policy Reconcile Cleanup Started")
209 filter := "(NOT user_labels='managed') AND (user_labels='deactivation-date' OR user_labels='deletion-date')"
210 policies, err = alertmgr.GetAlertPolicies(projectID, "", filter)
211 if err != nil && policies == nil && !syncArgs.continues {
212 logger.Error(err, "Failed to get alert policies matching filter")
213 return err
214 }
215 for index, policy := range policies {
216 policies[index].AlertPolicy, err = alertmgr.CleanupAlertPolicies(policy.AlertPolicy)
217 if err != nil && !syncArgs.continues {
218 logger.Error(err, "Failed during cleanup")
219 return err
220 }
221 }
222 logger.Info("Alert Policy Reconcile Cleanup Completed")
223 return nil
224 }
225
226 func mitigateDuplicateAlertPolicies(templates []*alertmgr.AlertPolicy) error {
227 logger.Info("Alert Policy Reconcile Duplicates Started")
228 managedFilter := "user_labels='managed'"
229 managedPolicies, err := alertmgr.GetAlertPolicies(projectID, "", managedFilter)
230 if err != nil && managedPolicies == nil && !syncArgs.continues {
231 logger.Error(err, "Failed to get alert policies matching filter")
232 return err
233 }
234 for index, managedPolicy := range managedPolicies {
235 managedPolicies[index].AlertPolicy, err = alertmgr.MitigateDuplicateManaged(projectID, managedPolicy.AlertPolicy, templates)
236 if err != nil && !syncArgs.continues {
237 logger.Error(err, "Failed during mamaged mitigation")
238 return err
239 }
240 }
241
242 unmanagedFilter := "NOT user_labels='managed'"
243 unmanagedPolicies, err := alertmgr.GetAlertPolicies(projectID, "", unmanagedFilter)
244 if err != nil && unmanagedPolicies == nil && !syncArgs.continues {
245 logger.Error(err, "Failed to get alert policies matching filter")
246 return err
247 }
248
249 for index, unmanagedPolicy := range unmanagedPolicies {
250 logger.Info("Unmanaged alert policies", "policy-name", unmanagedPolicy.DisplayName, "policy-userlabel", unmanagedPolicy.UserLabels)
251
252 unmanagedPolicies[index].AlertPolicy, err = alertmgr.MitigateDuplicateUnmanaged(projectID, unmanagedPolicy.AlertPolicy)
253 if err != nil && !syncArgs.continues {
254 logger.Error(err, "Failed during unmamaged mitigation")
255 return err
256 }
257 }
258 logger.Info("Alert Policy Reconcile Duplicates Completed.")
259 return nil
260 }
261
262 func excludeTemplates(templates []*alertmgr.AlertPolicy) []*alertmgr.AlertPolicy {
263 var temp []*alertmgr.AlertPolicy
264
265 for _, template := range templates {
266 if len(template.UserLabels["exclude-env"]) > 0 {
267 splitLabel := regexp.MustCompile("_").Split(template.UserLabels["exclude-env"], -1)
268
269 for _, s := range splitLabel {
270 re := regexp.MustCompile("(?i)" + s)
271 if !re.MatchString(projectID) && !slices.Contains(temp, template) {
272 temp = append(temp, template)
273 }
274 }
275 } else {
276 temp = append(temp, template)
277 }
278 }
279 return temp
280 }
281
282 func includeTemplates(templates []*alertmgr.AlertPolicy) []*alertmgr.AlertPolicy {
283 var temp []*alertmgr.AlertPolicy
284
285 for _, template := range templates {
286 if len(template.UserLabels["include-env"]) > 0 {
287 splitLabel := regexp.MustCompile("_").Split(template.UserLabels["include-env"], -1)
288
289 for _, s := range splitLabel {
290 re := regexp.MustCompile("(?i)" + s)
291 if re.MatchString(projectID) && !slices.Contains(temp, template) {
292 temp = append(temp, template)
293 }
294 }
295 } else {
296 temp = append(temp, template)
297 }
298 }
299 return temp
300 }
301
302
303 func pushTemplates(t []*alertmgr.AlertPolicy) {
304 var flag = true
305 var err error
306
307 for i := 0; i < len(t); i++ {
308 cTemp := t[i]
309 alertsInProj, _ := alertmgr.GetAlertPolicies(projectID, cTemp.DisplayName, "")
310
311 if alertmgr.IsDisplayInProject(cTemp, projectID) && alertmgr.IsAlertPolicyDifferent(cTemp.AlertPolicy, alertsInProj) {
312 if err = alertmgr.UpdateAlertPolicyFromTemplate(projectID, flag, cTemp.AlertPolicy, alertsInProj); err != nil {
313 logger.Error(err, "Alert policy could not be updated in this project", "alert-policy", cTemp.DisplayName, "project", projectID)
314 }
315 } else {
316 if err = alertmgr.CreateAlertPolicies(cTemp.AlertPolicy, projectID); err != nil {
317 logger.Error(err, "Alert policy could not be created in this project", "alert-policy", cTemp.DisplayName, "project", projectID)
318 }
319 }
320 }
321
322 logger.Info("AlertPolicy SYNC PUSH COMPLETE", "project-id", projectID)
323 }
324
View as plain text