notify_test.go

Documentation: github.com/prometheus/alertmanager/notify

     1  // Copyright 2015 Prometheus Team
     2  // Licensed under the Apache License, Version 2.0 (the "License");
     3  // you may not use this file except in compliance with the License.
     4  // You may obtain a copy of the License at
     5  //
     6  // http://www.apache.org/licenses/LICENSE-2.0
     7  //
     8  // Unless required by applicable law or agreed to in writing, software
     9  // distributed under the License is distributed on an "AS IS" BASIS,
    10  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package notify
    15  
    16  import (
    17  	"context"
    18  	"errors"
    19  	"fmt"
    20  	"io"
    21  	"reflect"
    22  	"testing"
    23  	"time"
    24  
    25  	"github.com/go-kit/log"
    26  	"github.com/prometheus/client_golang/prometheus"
    27  	"github.com/prometheus/common/model"
    28  	"github.com/stretchr/testify/require"
    29  	"gopkg.in/yaml.v2"
    30  
    31  	"github.com/prometheus/alertmanager/nflog"
    32  	"github.com/prometheus/alertmanager/nflog/nflogpb"
    33  	"github.com/prometheus/alertmanager/silence"
    34  	"github.com/prometheus/alertmanager/silence/silencepb"
    35  	"github.com/prometheus/alertmanager/timeinterval"
    36  	"github.com/prometheus/alertmanager/types"
    37  )
    38  
    39  type sendResolved bool
    40  
    41  func (s sendResolved) SendResolved() bool {
    42  	return bool(s)
    43  }
    44  
    45  type notifierFunc func(ctx context.Context, alerts ...*types.Alert) (bool, error)
    46  
    47  func (f notifierFunc) Notify(ctx context.Context, alerts ...*types.Alert) (bool, error) {
    48  	return f(ctx, alerts...)
    49  }
    50  
    51  type failStage struct{}
    52  
    53  func (s failStage) Exec(ctx context.Context, l log.Logger, as ...*types.Alert) (context.Context, []*types.Alert, error) {
    54  	return ctx, nil, fmt.Errorf("some error")
    55  }
    56  
    57  type testNflog struct {
    58  	qres []*nflogpb.Entry
    59  	qerr error
    60  
    61  	logFunc func(r *nflogpb.Receiver, gkey string, firingAlerts, resolvedAlerts []uint64, expiry time.Duration) error
    62  }
    63  
    64  func (l *testNflog) Query(p ...nflog.QueryParam) ([]*nflogpb.Entry, error) {
    65  	return l.qres, l.qerr
    66  }
    67  
    68  func (l *testNflog) Log(r *nflogpb.Receiver, gkey string, firingAlerts, resolvedAlerts []uint64, expiry time.Duration) error {
    69  	return l.logFunc(r, gkey, firingAlerts, resolvedAlerts, expiry)
    70  }
    71  
    72  func (l *testNflog) GC() (int, error) {
    73  	return 0, nil
    74  }
    75  
    76  func (l *testNflog) Snapshot(w io.Writer) (int, error) {
    77  	return 0, nil
    78  }
    79  
    80  func alertHashSet(hashes ...uint64) map[uint64]struct{} {
    81  	res := map[uint64]struct{}{}
    82  
    83  	for _, h := range hashes {
    84  		res[h] = struct{}{}
    85  	}
    86  
    87  	return res
    88  }
    89  
    90  func TestDedupStageNeedsUpdate(t *testing.T) {
    91  	now := utcNow()
    92  
    93  	cases := []struct {
    94  		entry          *nflogpb.Entry
    95  		firingAlerts   map[uint64]struct{}
    96  		resolvedAlerts map[uint64]struct{}
    97  		repeat         time.Duration
    98  		resolve        bool
    99  
   100  		res bool
   101  	}{
   102  		{
   103  			// No matching nflog entry should update.
   104  			entry:        nil,
   105  			firingAlerts: alertHashSet(2, 3, 4),
   106  			res:          true,
   107  		}, {
   108  			// No matching nflog entry shouldn't update if no alert fires.
   109  			entry:          nil,
   110  			resolvedAlerts: alertHashSet(2, 3, 4),
   111  			res:            false,
   112  		}, {
   113  			// Different sets of firing alerts should update.
   114  			entry:        &nflogpb.Entry{FiringAlerts: []uint64{1, 2, 3}},
   115  			firingAlerts: alertHashSet(2, 3, 4),
   116  			res:          true,
   117  		}, {
   118  			// Zero timestamp in the nflog entry should always update.
   119  			entry: &nflogpb.Entry{
   120  				FiringAlerts: []uint64{1, 2, 3},
   121  				Timestamp:    time.Time{},
   122  			},
   123  			firingAlerts: alertHashSet(1, 2, 3),
   124  			res:          true,
   125  		}, {
   126  			// Identical sets of alerts shouldn't update before repeat_interval.
   127  			entry: &nflogpb.Entry{
   128  				FiringAlerts: []uint64{1, 2, 3},
   129  				Timestamp:    now.Add(-9 * time.Minute),
   130  			},
   131  			repeat:       10 * time.Minute,
   132  			firingAlerts: alertHashSet(1, 2, 3),
   133  			res:          false,
   134  		}, {
   135  			// Identical sets of alerts should update after repeat_interval.
   136  			entry: &nflogpb.Entry{
   137  				FiringAlerts: []uint64{1, 2, 3},
   138  				Timestamp:    now.Add(-11 * time.Minute),
   139  			},
   140  			repeat:       10 * time.Minute,
   141  			firingAlerts: alertHashSet(1, 2, 3),
   142  			res:          true,
   143  		}, {
   144  			// Different sets of resolved alerts without firing alerts shouldn't update after repeat_interval.
   145  			entry: &nflogpb.Entry{
   146  				ResolvedAlerts: []uint64{1, 2, 3},
   147  				Timestamp:      now.Add(-11 * time.Minute),
   148  			},
   149  			repeat:         10 * time.Minute,
   150  			resolvedAlerts: alertHashSet(3, 4, 5),
   151  			resolve:        true,
   152  			res:            false,
   153  		}, {
   154  			// Different sets of resolved alerts shouldn't update when resolve is false.
   155  			entry: &nflogpb.Entry{
   156  				FiringAlerts:   []uint64{1, 2},
   157  				ResolvedAlerts: []uint64{3},
   158  				Timestamp:      now.Add(-9 * time.Minute),
   159  			},
   160  			repeat:         10 * time.Minute,
   161  			firingAlerts:   alertHashSet(1),
   162  			resolvedAlerts: alertHashSet(2, 3),
   163  			resolve:        false,
   164  			res:            false,
   165  		}, {
   166  			// Different sets of resolved alerts should update when resolve is true.
   167  			entry: &nflogpb.Entry{
   168  				FiringAlerts:   []uint64{1, 2},
   169  				ResolvedAlerts: []uint64{3},
   170  				Timestamp:      now.Add(-9 * time.Minute),
   171  			},
   172  			repeat:         10 * time.Minute,
   173  			firingAlerts:   alertHashSet(1),
   174  			resolvedAlerts: alertHashSet(2, 3),
   175  			resolve:        true,
   176  			res:            true,
   177  		}, {
   178  			// Empty set of firing alerts should update when resolve is false.
   179  			entry: &nflogpb.Entry{
   180  				FiringAlerts:   []uint64{1, 2},
   181  				ResolvedAlerts: []uint64{3},
   182  				Timestamp:      now.Add(-9 * time.Minute),
   183  			},
   184  			repeat:         10 * time.Minute,
   185  			firingAlerts:   alertHashSet(),
   186  			resolvedAlerts: alertHashSet(1, 2, 3),
   187  			resolve:        false,
   188  			res:            true,
   189  		}, {
   190  			// Empty set of firing alerts should update when resolve is true.
   191  			entry: &nflogpb.Entry{
   192  				FiringAlerts:   []uint64{1, 2},
   193  				ResolvedAlerts: []uint64{3},
   194  				Timestamp:      now.Add(-9 * time.Minute),
   195  			},
   196  			repeat:         10 * time.Minute,
   197  			firingAlerts:   alertHashSet(),
   198  			resolvedAlerts: alertHashSet(1, 2, 3),
   199  			resolve:        true,
   200  			res:            true,
   201  		},
   202  	}
   203  	for i, c := range cases {
   204  		t.Log("case", i)
   205  
   206  		s := &DedupStage{
   207  			now: func() time.Time { return now },
   208  			rs:  sendResolved(c.resolve),
   209  		}
   210  		res := s.needsUpdate(c.entry, c.firingAlerts, c.resolvedAlerts, c.repeat)
   211  		require.Equal(t, c.res, res)
   212  	}
   213  }
   214  
   215  func TestDedupStage(t *testing.T) {
   216  	i := 0
   217  	now := utcNow()
   218  	s := &DedupStage{
   219  		hash: func(a *types.Alert) uint64 {
   220  			res := uint64(i)
   221  			i++
   222  			return res
   223  		},
   224  		now: func() time.Time {
   225  			return now
   226  		},
   227  		rs: sendResolved(false),
   228  	}
   229  
   230  	ctx := context.Background()
   231  
   232  	_, _, err := s.Exec(ctx, log.NewNopLogger())
   233  	require.EqualError(t, err, "group key missing")
   234  
   235  	ctx = WithGroupKey(ctx, "1")
   236  
   237  	_, _, err = s.Exec(ctx, log.NewNopLogger())
   238  	require.EqualError(t, err, "repeat interval missing")
   239  
   240  	ctx = WithRepeatInterval(ctx, time.Hour)
   241  
   242  	alerts := []*types.Alert{{}, {}, {}}
   243  
   244  	// Must catch notification log query errors.
   245  	s.nflog = &testNflog{
   246  		qerr: errors.New("bad things"),
   247  	}
   248  	ctx, _, err = s.Exec(ctx, log.NewNopLogger(), alerts...)
   249  	require.EqualError(t, err, "bad things")
   250  
   251  	// ... but skip ErrNotFound.
   252  	s.nflog = &testNflog{
   253  		qerr: nflog.ErrNotFound,
   254  	}
   255  	ctx, res, err := s.Exec(ctx, log.NewNopLogger(), alerts...)
   256  	require.NoError(t, err, "unexpected error on not found log entry")
   257  	require.Equal(t, alerts, res, "input alerts differ from result alerts")
   258  
   259  	s.nflog = &testNflog{
   260  		qerr: nil,
   261  		qres: []*nflogpb.Entry{
   262  			{FiringAlerts: []uint64{0, 1, 2}},
   263  			{FiringAlerts: []uint64{1, 2, 3}},
   264  		},
   265  	}
   266  	ctx, _, err = s.Exec(ctx, log.NewNopLogger(), alerts...)
   267  	require.Contains(t, err.Error(), "result size")
   268  
   269  	// Must return no error and no alerts no need to update.
   270  	i = 0
   271  	s.nflog = &testNflog{
   272  		qerr: nflog.ErrNotFound,
   273  		qres: []*nflogpb.Entry{
   274  			{
   275  				FiringAlerts: []uint64{0, 1, 2},
   276  				Timestamp:    now,
   277  			},
   278  		},
   279  	}
   280  	ctx, res, err = s.Exec(ctx, log.NewNopLogger(), alerts...)
   281  	require.NoError(t, err)
   282  	require.Nil(t, res, "unexpected alerts returned")
   283  
   284  	// Must return no error and all input alerts on changes.
   285  	i = 0
   286  	s.nflog = &testNflog{
   287  		qerr: nil,
   288  		qres: []*nflogpb.Entry{
   289  			{
   290  				FiringAlerts: []uint64{1, 2, 3, 4},
   291  				Timestamp:    now,
   292  			},
   293  		},
   294  	}
   295  	_, res, err = s.Exec(ctx, log.NewNopLogger(), alerts...)
   296  	require.NoError(t, err)
   297  	require.Equal(t, alerts, res, "unexpected alerts returned")
   298  }
   299  
   300  func TestMultiStage(t *testing.T) {
   301  	var (
   302  		alerts1 = []*types.Alert{{}}
   303  		alerts2 = []*types.Alert{{}, {}}
   304  		alerts3 = []*types.Alert{{}, {}, {}}
   305  	)
   306  
   307  	stage := MultiStage{
   308  		StageFunc(func(ctx context.Context, l log.Logger, alerts ...*types.Alert) (context.Context, []*types.Alert, error) {
   309  			if !reflect.DeepEqual(alerts, alerts1) {
   310  				t.Fatal("Input not equal to input of MultiStage")
   311  			}
   312  			//nolint:staticcheck // Ignore SA1029
   313  			ctx = context.WithValue(ctx, "key", "value")
   314  			return ctx, alerts2, nil
   315  		}),
   316  		StageFunc(func(ctx context.Context, l log.Logger, alerts ...*types.Alert) (context.Context, []*types.Alert, error) {
   317  			if !reflect.DeepEqual(alerts, alerts2) {
   318  				t.Fatal("Input not equal to output of previous stage")
   319  			}
   320  			v, ok := ctx.Value("key").(string)
   321  			if !ok || v != "value" {
   322  				t.Fatalf("Expected value %q for key %q but got %q", "value", "key", v)
   323  			}
   324  			return ctx, alerts3, nil
   325  		}),
   326  	}
   327  
   328  	_, alerts, err := stage.Exec(context.Background(), log.NewNopLogger(), alerts1...)
   329  	if err != nil {
   330  		t.Fatalf("Exec failed: %s", err)
   331  	}
   332  
   333  	if !reflect.DeepEqual(alerts, alerts3) {
   334  		t.Fatal("Output of MultiStage is not equal to the output of the last stage")
   335  	}
   336  }
   337  
   338  func TestMultiStageFailure(t *testing.T) {
   339  	var (
   340  		ctx   = context.Background()
   341  		s1    = failStage{}
   342  		stage = MultiStage{s1}
   343  	)
   344  
   345  	_, _, err := stage.Exec(ctx, log.NewNopLogger(), nil)
   346  	if err.Error() != "some error" {
   347  		t.Fatal("Errors were not propagated correctly by MultiStage")
   348  	}
   349  }
   350  
   351  func TestRoutingStage(t *testing.T) {
   352  	var (
   353  		alerts1 = []*types.Alert{{}}
   354  		alerts2 = []*types.Alert{{}, {}}
   355  	)
   356  
   357  	stage := RoutingStage{
   358  		"name": StageFunc(func(ctx context.Context, l log.Logger, alerts ...*types.Alert) (context.Context, []*types.Alert, error) {
   359  			if !reflect.DeepEqual(alerts, alerts1) {
   360  				t.Fatal("Input not equal to input of RoutingStage")
   361  			}
   362  			return ctx, alerts2, nil
   363  		}),
   364  		"not": failStage{},
   365  	}
   366  
   367  	ctx := WithReceiverName(context.Background(), "name")
   368  
   369  	_, alerts, err := stage.Exec(ctx, log.NewNopLogger(), alerts1...)
   370  	if err != nil {
   371  		t.Fatalf("Exec failed: %s", err)
   372  	}
   373  
   374  	if !reflect.DeepEqual(alerts, alerts2) {
   375  		t.Fatal("Output of RoutingStage is not equal to the output of the inner stage")
   376  	}
   377  }
   378  
   379  func TestRetryStageWithError(t *testing.T) {
   380  	fail, retry := true, true
   381  	sent := []*types.Alert{}
   382  	i := Integration{
   383  		notifier: notifierFunc(func(ctx context.Context, alerts ...*types.Alert) (bool, error) {
   384  			if fail {
   385  				fail = false
   386  				return retry, errors.New("fail to deliver notification")
   387  			}
   388  			sent = append(sent, alerts...)
   389  			return false, nil
   390  		}),
   391  		rs: sendResolved(false),
   392  	}
   393  	r := RetryStage{
   394  		integration: i,
   395  		metrics:     NewMetrics(prometheus.NewRegistry()),
   396  	}
   397  
   398  	alerts := []*types.Alert{
   399  		{
   400  			Alert: model.Alert{
   401  				EndsAt: time.Now().Add(time.Hour),
   402  			},
   403  		},
   404  	}
   405  
   406  	ctx := context.Background()
   407  	ctx = WithFiringAlerts(ctx, []uint64{0})
   408  
   409  	// Notify with a recoverable error should retry and succeed.
   410  	resctx, res, err := r.Exec(ctx, log.NewNopLogger(), alerts...)
   411  	require.Nil(t, err)
   412  	require.Equal(t, alerts, res)
   413  	require.Equal(t, alerts, sent)
   414  	require.NotNil(t, resctx)
   415  
   416  	// Notify with an unrecoverable error should fail.
   417  	sent = sent[:0]
   418  	fail = true
   419  	retry = false
   420  	resctx, _, err = r.Exec(ctx, log.NewNopLogger(), alerts...)
   421  	require.NotNil(t, err)
   422  	require.NotNil(t, resctx)
   423  }
   424  
   425  func TestRetryStageNoResolved(t *testing.T) {
   426  	sent := []*types.Alert{}
   427  	i := Integration{
   428  		notifier: notifierFunc(func(ctx context.Context, alerts ...*types.Alert) (bool, error) {
   429  			sent = append(sent, alerts...)
   430  			return false, nil
   431  		}),
   432  		rs: sendResolved(false),
   433  	}
   434  	r := RetryStage{
   435  		integration: i,
   436  		metrics:     NewMetrics(prometheus.NewRegistry()),
   437  	}
   438  
   439  	alerts := []*types.Alert{
   440  		{
   441  			Alert: model.Alert{
   442  				EndsAt: time.Now().Add(-time.Hour),
   443  			},
   444  		},
   445  		{
   446  			Alert: model.Alert{
   447  				EndsAt: time.Now().Add(time.Hour),
   448  			},
   449  		},
   450  	}
   451  
   452  	ctx := context.Background()
   453  
   454  	resctx, res, err := r.Exec(ctx, log.NewNopLogger(), alerts...)
   455  	require.EqualError(t, err, "firing alerts missing")
   456  	require.Nil(t, res)
   457  	require.NotNil(t, resctx)
   458  
   459  	ctx = WithFiringAlerts(ctx, []uint64{0})
   460  
   461  	resctx, res, err = r.Exec(ctx, log.NewNopLogger(), alerts...)
   462  	require.Nil(t, err)
   463  	require.Equal(t, alerts, res)
   464  	require.Equal(t, []*types.Alert{alerts[1]}, sent)
   465  	require.NotNil(t, resctx)
   466  
   467  	// All alerts are resolved.
   468  	sent = sent[:0]
   469  	ctx = WithFiringAlerts(ctx, []uint64{})
   470  	alerts[1].Alert.EndsAt = time.Now().Add(-time.Hour)
   471  
   472  	resctx, res, err = r.Exec(ctx, log.NewNopLogger(), alerts...)
   473  	require.Nil(t, err)
   474  	require.Equal(t, alerts, res)
   475  	require.Equal(t, []*types.Alert{}, sent)
   476  	require.NotNil(t, resctx)
   477  }
   478  
   479  func TestRetryStageSendResolved(t *testing.T) {
   480  	sent := []*types.Alert{}
   481  	i := Integration{
   482  		notifier: notifierFunc(func(ctx context.Context, alerts ...*types.Alert) (bool, error) {
   483  			sent = append(sent, alerts...)
   484  			return false, nil
   485  		}),
   486  		rs: sendResolved(true),
   487  	}
   488  	r := RetryStage{
   489  		integration: i,
   490  		metrics:     NewMetrics(prometheus.NewRegistry()),
   491  	}
   492  
   493  	alerts := []*types.Alert{
   494  		{
   495  			Alert: model.Alert{
   496  				EndsAt: time.Now().Add(-time.Hour),
   497  			},
   498  		},
   499  		{
   500  			Alert: model.Alert{
   501  				EndsAt: time.Now().Add(time.Hour),
   502  			},
   503  		},
   504  	}
   505  
   506  	ctx := context.Background()
   507  	ctx = WithFiringAlerts(ctx, []uint64{0})
   508  
   509  	resctx, res, err := r.Exec(ctx, log.NewNopLogger(), alerts...)
   510  	require.Nil(t, err)
   511  	require.Equal(t, alerts, res)
   512  	require.Equal(t, alerts, sent)
   513  	require.NotNil(t, resctx)
   514  
   515  	// All alerts are resolved.
   516  	sent = sent[:0]
   517  	ctx = WithFiringAlerts(ctx, []uint64{})
   518  	alerts[1].Alert.EndsAt = time.Now().Add(-time.Hour)
   519  
   520  	resctx, res, err = r.Exec(ctx, log.NewNopLogger(), alerts...)
   521  	require.Nil(t, err)
   522  	require.Equal(t, alerts, res)
   523  	require.Equal(t, alerts, sent)
   524  	require.NotNil(t, resctx)
   525  }
   526  
   527  func TestSetNotifiesStage(t *testing.T) {
   528  	tnflog := &testNflog{}
   529  	s := &SetNotifiesStage{
   530  		recv:  &nflogpb.Receiver{GroupName: "test"},
   531  		nflog: tnflog,
   532  	}
   533  	alerts := []*types.Alert{{}, {}, {}}
   534  	ctx := context.Background()
   535  
   536  	resctx, res, err := s.Exec(ctx, log.NewNopLogger(), alerts...)
   537  	require.EqualError(t, err, "group key missing")
   538  	require.Nil(t, res)
   539  	require.NotNil(t, resctx)
   540  
   541  	ctx = WithGroupKey(ctx, "1")
   542  
   543  	resctx, res, err = s.Exec(ctx, log.NewNopLogger(), alerts...)
   544  	require.EqualError(t, err, "firing alerts missing")
   545  	require.Nil(t, res)
   546  	require.NotNil(t, resctx)
   547  
   548  	ctx = WithFiringAlerts(ctx, []uint64{0, 1, 2})
   549  
   550  	resctx, res, err = s.Exec(ctx, log.NewNopLogger(), alerts...)
   551  	require.EqualError(t, err, "resolved alerts missing")
   552  	require.Nil(t, res)
   553  	require.NotNil(t, resctx)
   554  
   555  	ctx = WithResolvedAlerts(ctx, []uint64{})
   556  	ctx = WithRepeatInterval(ctx, time.Hour)
   557  
   558  	tnflog.logFunc = func(r *nflogpb.Receiver, gkey string, firingAlerts, resolvedAlerts []uint64, expiry time.Duration) error {
   559  		require.Equal(t, s.recv, r)
   560  		require.Equal(t, "1", gkey)
   561  		require.Equal(t, []uint64{0, 1, 2}, firingAlerts)
   562  		require.Equal(t, []uint64{}, resolvedAlerts)
   563  		require.Equal(t, 2*time.Hour, expiry)
   564  		return nil
   565  	}
   566  	resctx, res, err = s.Exec(ctx, log.NewNopLogger(), alerts...)
   567  	require.Nil(t, err)
   568  	require.Equal(t, alerts, res)
   569  	require.NotNil(t, resctx)
   570  
   571  	ctx = WithFiringAlerts(ctx, []uint64{})
   572  	ctx = WithResolvedAlerts(ctx, []uint64{0, 1, 2})
   573  
   574  	tnflog.logFunc = func(r *nflogpb.Receiver, gkey string, firingAlerts, resolvedAlerts []uint64, expiry time.Duration) error {
   575  		require.Equal(t, s.recv, r)
   576  		require.Equal(t, "1", gkey)
   577  		require.Equal(t, []uint64{}, firingAlerts)
   578  		require.Equal(t, []uint64{0, 1, 2}, resolvedAlerts)
   579  		require.Equal(t, 2*time.Hour, expiry)
   580  		return nil
   581  	}
   582  	resctx, res, err = s.Exec(ctx, log.NewNopLogger(), alerts...)
   583  	require.Nil(t, err)
   584  	require.Equal(t, alerts, res)
   585  	require.NotNil(t, resctx)
   586  }
   587  
   588  func TestMuteStage(t *testing.T) {
   589  	// Mute all label sets that have a "mute" key.
   590  	muter := types.MuteFunc(func(lset model.LabelSet) bool {
   591  		_, ok := lset["mute"]
   592  		return ok
   593  	})
   594  
   595  	stage := NewMuteStage(muter)
   596  
   597  	in := []model.LabelSet{
   598  		{},
   599  		{"test": "set"},
   600  		{"mute": "me"},
   601  		{"foo": "bar", "test": "set"},
   602  		{"foo": "bar", "mute": "me"},
   603  		{},
   604  		{"not": "muted"},
   605  	}
   606  	out := []model.LabelSet{
   607  		{},
   608  		{"test": "set"},
   609  		{"foo": "bar", "test": "set"},
   610  		{},
   611  		{"not": "muted"},
   612  	}
   613  
   614  	var inAlerts []*types.Alert
   615  	for _, lset := range in {
   616  		inAlerts = append(inAlerts, &types.Alert{
   617  			Alert: model.Alert{Labels: lset},
   618  		})
   619  	}
   620  
   621  	_, alerts, err := stage.Exec(context.Background(), log.NewNopLogger(), inAlerts...)
   622  	if err != nil {
   623  		t.Fatalf("Exec failed: %s", err)
   624  	}
   625  
   626  	var got []model.LabelSet
   627  	for _, a := range alerts {
   628  		got = append(got, a.Labels)
   629  	}
   630  
   631  	if !reflect.DeepEqual(got, out) {
   632  		t.Fatalf("Muting failed, expected: %v\ngot %v", out, got)
   633  	}
   634  }
   635  
   636  func TestMuteStageWithSilences(t *testing.T) {
   637  	silences, err := silence.New(silence.Options{Retention: time.Hour})
   638  	if err != nil {
   639  		t.Fatal(err)
   640  	}
   641  	silID, err := silences.Set(&silencepb.Silence{
   642  		EndsAt:   utcNow().Add(time.Hour),
   643  		Matchers: []*silencepb.Matcher{{Name: "mute", Pattern: "me"}},
   644  	})
   645  	if err != nil {
   646  		t.Fatal(err)
   647  	}
   648  
   649  	marker := types.NewMarker(prometheus.NewRegistry())
   650  	silencer := silence.NewSilencer(silences, marker, log.NewNopLogger())
   651  	stage := NewMuteStage(silencer)
   652  
   653  	in := []model.LabelSet{
   654  		{},
   655  		{"test": "set"},
   656  		{"mute": "me"},
   657  		{"foo": "bar", "test": "set"},
   658  		{"foo": "bar", "mute": "me"},
   659  		{},
   660  		{"not": "muted"},
   661  	}
   662  	out := []model.LabelSet{
   663  		{},
   664  		{"test": "set"},
   665  		{"foo": "bar", "test": "set"},
   666  		{},
   667  		{"not": "muted"},
   668  	}
   669  
   670  	var inAlerts []*types.Alert
   671  	for _, lset := range in {
   672  		inAlerts = append(inAlerts, &types.Alert{
   673  			Alert: model.Alert{Labels: lset},
   674  		})
   675  	}
   676  
   677  	// Set the second alert as previously silenced with an old version
   678  	// number. This is expected to get unsilenced by the stage.
   679  	marker.SetActiveOrSilenced(inAlerts[1].Fingerprint(), 0, []string{"123"}, nil)
   680  
   681  	_, alerts, err := stage.Exec(context.Background(), log.NewNopLogger(), inAlerts...)
   682  	if err != nil {
   683  		t.Fatalf("Exec failed: %s", err)
   684  	}
   685  
   686  	var got []model.LabelSet
   687  	for _, a := range alerts {
   688  		got = append(got, a.Labels)
   689  	}
   690  
   691  	if !reflect.DeepEqual(got, out) {
   692  		t.Fatalf("Muting failed, expected: %v\ngot %v", out, got)
   693  	}
   694  
   695  	// Do it again to exercise the version tracking of silences.
   696  	_, alerts, err = stage.Exec(context.Background(), log.NewNopLogger(), inAlerts...)
   697  	if err != nil {
   698  		t.Fatalf("Exec failed: %s", err)
   699  	}
   700  
   701  	got = got[:0]
   702  	for _, a := range alerts {
   703  		got = append(got, a.Labels)
   704  	}
   705  
   706  	if !reflect.DeepEqual(got, out) {
   707  		t.Fatalf("Muting failed, expected: %v\ngot %v", out, got)
   708  	}
   709  
   710  	// Expire the silence and verify that no alerts are silenced now.
   711  	if err := silences.Expire(silID); err != nil {
   712  		t.Fatal(err)
   713  	}
   714  
   715  	_, alerts, err = stage.Exec(context.Background(), log.NewNopLogger(), inAlerts...)
   716  	if err != nil {
   717  		t.Fatalf("Exec failed: %s", err)
   718  	}
   719  	got = got[:0]
   720  	for _, a := range alerts {
   721  		got = append(got, a.Labels)
   722  	}
   723  
   724  	if !reflect.DeepEqual(got, in) {
   725  		t.Fatalf("Unmuting failed, expected: %v\ngot %v", in, got)
   726  	}
   727  }
   728  
   729  func TestTimeMuteStage(t *testing.T) {
   730  	// Route mutes alerts outside business hours in November, using the +1100 timezone.
   731  	muteIn := `
   732  ---
   733  - weekdays: ['monday:friday']
   734    location: 'Australia/Sydney'
   735    months: ['November']
   736    times:
   737     - start_time: '00:00'
   738       end_time: '09:00'
   739     - start_time: '17:00'
   740       end_time: '24:00'
   741  - weekdays: ['saturday', 'sunday']
   742    months: ['November']
   743    location: 'Australia/Sydney'`
   744  
   745  	cases := []struct {
   746  		fireTime   string
   747  		labels     model.LabelSet
   748  		shouldMute bool
   749  	}{
   750  		{
   751  			// Friday during business hours
   752  			fireTime:   "19 Nov 21 13:00 +1100",
   753  			labels:     model.LabelSet{"foo": "bar"},
   754  			shouldMute: false,
   755  		},
   756  		{
   757  			// Tuesday before 5pm
   758  			fireTime:   "16 Nov 21 16:59 +1100",
   759  			labels:     model.LabelSet{"dont": "mute"},
   760  			shouldMute: false,
   761  		},
   762  		{
   763  			// Saturday
   764  			fireTime:   "20 Nov 21 10:00 +1100",
   765  			labels:     model.LabelSet{"mute": "me"},
   766  			shouldMute: true,
   767  		},
   768  		{
   769  			// Wednesday before 9am
   770  			fireTime:   "17 Nov 21 05:00 +1100",
   771  			labels:     model.LabelSet{"mute": "me"},
   772  			shouldMute: true,
   773  		},
   774  		{
   775  			// Ensure comparisons with other time zones work as expected.
   776  			fireTime:   "14 Nov 21 20:00 +0900",
   777  			labels:     model.LabelSet{"mute": "kst"},
   778  			shouldMute: true,
   779  		},
   780  		{
   781  			fireTime:   "14 Nov 21 21:30 +0000",
   782  			labels:     model.LabelSet{"mute": "utc"},
   783  			shouldMute: true,
   784  		},
   785  		{
   786  			fireTime:   "15 Nov 22 14:30 +0900",
   787  			labels:     model.LabelSet{"kst": "dont_mute"},
   788  			shouldMute: false,
   789  		},
   790  		{
   791  			fireTime:   "15 Nov 21 02:00 -0500",
   792  			labels:     model.LabelSet{"mute": "0500"},
   793  			shouldMute: true,
   794  		},
   795  	}
   796  	var intervals []timeinterval.TimeInterval
   797  	err := yaml.Unmarshal([]byte(muteIn), &intervals)
   798  	if err != nil {
   799  		t.Fatalf("Couldn't unmarshal time interval %s", err)
   800  	}
   801  	m := map[string][]timeinterval.TimeInterval{"test": intervals}
   802  	stage := NewTimeMuteStage(m)
   803  
   804  	outAlerts := []*types.Alert{}
   805  	nonMuteCount := 0
   806  	for _, tc := range cases {
   807  		now, err := time.Parse(time.RFC822Z, tc.fireTime)
   808  		if err != nil {
   809  			t.Fatalf("Couldn't parse fire time %s %s", tc.fireTime, err)
   810  		}
   811  		// Count alerts with shouldMute == false and compare to ensure none are muted incorrectly
   812  		if !tc.shouldMute {
   813  			nonMuteCount++
   814  		}
   815  		a := model.Alert{Labels: tc.labels}
   816  		alerts := []*types.Alert{{Alert: a}}
   817  		ctx := context.Background()
   818  		ctx = WithNow(ctx, now)
   819  		ctx = WithActiveTimeIntervals(ctx, []string{})
   820  		ctx = WithMuteTimeIntervals(ctx, []string{"test"})
   821  
   822  		_, out, err := stage.Exec(ctx, log.NewNopLogger(), alerts...)
   823  		if err != nil {
   824  			t.Fatalf("Unexpected error in time mute stage %s", err)
   825  		}
   826  		outAlerts = append(outAlerts, out...)
   827  	}
   828  	for _, alert := range outAlerts {
   829  		if _, ok := alert.Alert.Labels["mute"]; ok {
   830  			t.Fatalf("Expected alert to be muted %+v", alert.Alert)
   831  		}
   832  	}
   833  	if len(outAlerts) != nonMuteCount {
   834  		t.Fatalf("Expected %d alerts after time mute stage but got %d", nonMuteCount, len(outAlerts))
   835  	}
   836  }
   837  
   838  func TestTimeActiveStage(t *testing.T) {
   839  	// Route mutes alerts inside business hours if it is an active time interval
   840  	muteIn := `
   841  ---
   842  - weekdays: ['monday:friday']
   843    times:
   844     - start_time: '00:00'
   845       end_time: '09:00'
   846     - start_time: '17:00'
   847       end_time: '24:00'
   848  - weekdays: ['saturday', 'sunday']`
   849  
   850  	cases := []struct {
   851  		fireTime   string
   852  		labels     model.LabelSet
   853  		shouldMute bool
   854  	}{
   855  		{
   856  			// Friday during business hours
   857  			fireTime:   "01 Jan 21 09:00 +0000",
   858  			labels:     model.LabelSet{"mute": "me"},
   859  			shouldMute: true,
   860  		},
   861  		{
   862  			// Tuesday before 5pm
   863  			fireTime:   "01 Dec 20 16:59 +0000",
   864  			labels:     model.LabelSet{"mute": "me"},
   865  			shouldMute: true,
   866  		},
   867  		{
   868  			// Saturday
   869  			fireTime:   "17 Oct 20 10:00 +0000",
   870  			labels:     model.LabelSet{"foo": "bar"},
   871  			shouldMute: false,
   872  		},
   873  		{
   874  			// Wednesday before 9am
   875  			fireTime:   "14 Oct 20 05:00 +0000",
   876  			labels:     model.LabelSet{"dont": "mute"},
   877  			shouldMute: false,
   878  		},
   879  	}
   880  	var intervals []timeinterval.TimeInterval
   881  	err := yaml.Unmarshal([]byte(muteIn), &intervals)
   882  	if err != nil {
   883  		t.Fatalf("Couldn't unmarshal time interval %s", err)
   884  	}
   885  	m := map[string][]timeinterval.TimeInterval{"test": intervals}
   886  	stage := NewTimeActiveStage(m)
   887  
   888  	outAlerts := []*types.Alert{}
   889  	nonMuteCount := 0
   890  	for _, tc := range cases {
   891  		now, err := time.Parse(time.RFC822Z, tc.fireTime)
   892  		if err != nil {
   893  			t.Fatalf("Couldn't parse fire time %s %s", tc.fireTime, err)
   894  		}
   895  		// Count alerts with shouldMute == false and compare to ensure none are muted incorrectly
   896  		if !tc.shouldMute {
   897  			nonMuteCount++
   898  		}
   899  		a := model.Alert{Labels: tc.labels}
   900  		alerts := []*types.Alert{{Alert: a}}
   901  		ctx := context.Background()
   902  		ctx = WithNow(ctx, now)
   903  		ctx = WithActiveTimeIntervals(ctx, []string{"test"})
   904  		ctx = WithMuteTimeIntervals(ctx, []string{})
   905  
   906  		_, out, err := stage.Exec(ctx, log.NewNopLogger(), alerts...)
   907  		if err != nil {
   908  			t.Fatalf("Unexpected error in time mute stage %s", err)
   909  		}
   910  		outAlerts = append(outAlerts, out...)
   911  	}
   912  	for _, alert := range outAlerts {
   913  		if _, ok := alert.Alert.Labels["mute"]; ok {
   914  			t.Fatalf("Expected alert to be muted %+v", alert.Alert)
   915  		}
   916  	}
   917  	if len(outAlerts) != nonMuteCount {
   918  		t.Fatalf("Expected %d alerts after time mute stage but got %d", nonMuteCount, len(outAlerts))
   919  	}
   920  }
   921  
   922  func BenchmarkHashAlert(b *testing.B) {
   923  	alert := &types.Alert{
   924  		Alert: model.Alert{
   925  			Labels: model.LabelSet{"foo": "the_first_value", "bar": "the_second_value", "another": "value"},
   926  		},
   927  	}
   928  	for i := 0; i < b.N; i++ {
   929  		hashAlert(alert)
   930  	}
   931  }
   932
View as plain text