...

Source file src/github.com/prometheus/alertmanager/test/with_api_v2/acceptance/send_test.go

Documentation: github.com/prometheus/alertmanager/test/with_api_v2/acceptance

     1  // Copyright 2018 Prometheus Team
     2  // Licensed under the Apache License, Version 2.0 (the "License");
     3  // you may not use this file except in compliance with the License.
     4  // You may obtain a copy of the License at
     5  //
     6  // http://www.apache.org/licenses/LICENSE-2.0
     7  //
     8  // Unless required by applicable law or agreed to in writing, software
     9  // distributed under the License is distributed on an "AS IS" BASIS,
    10  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package test
    15  
    16  import (
    17  	"fmt"
    18  	"testing"
    19  	"time"
    20  
    21  	. "github.com/prometheus/alertmanager/test/with_api_v2"
    22  )
    23  
    24  // This file contains acceptance tests around the basic sending logic
    25  // for notifications, which includes batching and ensuring that each
    26  // notification is eventually sent at least once and ideally exactly
    27  // once.
    28  
    29  func testMergeAlerts(t *testing.T, endsAt bool) {
    30  	t.Parallel()
    31  
    32  	timerange := func(ts float64) []float64 {
    33  		if !endsAt {
    34  			return []float64{ts}
    35  		}
    36  		return []float64{ts, ts + 3.0}
    37  	}
    38  
    39  	conf := `
    40  route:
    41    receiver: "default"
    42    group_by: [alertname]
    43    group_wait:      1s
    44    group_interval:  1s
    45    repeat_interval: 1ms
    46  
    47  receivers:
    48  - name: "default"
    49    webhook_configs:
    50    - url: 'http://%s'
    51      send_resolved: true
    52  `
    53  
    54  	at := NewAcceptanceTest(t, &AcceptanceOpts{
    55  		Tolerance: 150 * time.Millisecond,
    56  	})
    57  
    58  	co := at.Collector("webhook")
    59  	wh := NewWebhook(t, co)
    60  
    61  	am := at.AlertmanagerCluster(fmt.Sprintf(conf, wh.Address()), 1)
    62  
    63  	// Refresh an alert several times. The starting time must remain at the earliest
    64  	// point in time.
    65  	am.Push(At(1), Alert("alertname", "test").Active(timerange(1.1)...))
    66  	// Another Prometheus server might be sending later but with an earlier start time.
    67  	am.Push(At(1.2), Alert("alertname", "test").Active(1))
    68  
    69  	co.Want(Between(2, 2.5), Alert("alertname", "test").Active(1))
    70  
    71  	am.Push(At(2.1), Alert("alertname", "test").Annotate("ann", "v1").Active(timerange(2)...))
    72  
    73  	co.Want(Between(3, 3.5), Alert("alertname", "test").Annotate("ann", "v1").Active(1))
    74  
    75  	// Annotations are always overwritten by the alert that arrived most recently.
    76  	am.Push(At(3.6), Alert("alertname", "test").Annotate("ann", "v2").Active(timerange(1.5)...))
    77  
    78  	co.Want(Between(4, 4.5), Alert("alertname", "test").Annotate("ann", "v2").Active(1))
    79  
    80  	// If an alert is marked resolved twice, the latest point in time must be
    81  	// set as the eventual resolve time.
    82  	am.Push(At(4.6), Alert("alertname", "test").Annotate("ann", "v2").Active(3, 4.5))
    83  	am.Push(At(4.8), Alert("alertname", "test").Annotate("ann", "v3").Active(2.9, 4.8))
    84  	am.Push(At(4.8), Alert("alertname", "test").Annotate("ann", "v3").Active(2.9, 4.1))
    85  
    86  	co.Want(Between(5, 5.5), Alert("alertname", "test").Annotate("ann", "v3").Active(1, 4.8))
    87  
    88  	// Reactivate an alert after a previous occurrence has been resolved.
    89  	// No overlap, no merge must occur.
    90  	am.Push(At(5.3), Alert("alertname", "test").Active(timerange(5)...))
    91  
    92  	co.Want(Between(6, 6.5), Alert("alertname", "test").Active(5))
    93  
    94  	at.Run()
    95  
    96  	t.Log(co.Check())
    97  }
    98  
    99  func TestMergeAlerts(t *testing.T) {
   100  	testMergeAlerts(t, false)
   101  }
   102  
   103  // This test is similar to TestMergeAlerts except that the firing alerts have
   104  // the EndsAt field set to StartsAt + 3s. This is what Prometheus starting from
   105  // version 2.4.0 sends to AlertManager.
   106  func TestMergeAlertsWithEndsAt(t *testing.T) {
   107  	testMergeAlerts(t, true)
   108  }
   109  
   110  func TestRepeat(t *testing.T) {
   111  	t.Parallel()
   112  
   113  	conf := `
   114  route:
   115    receiver: "default"
   116    group_by: [alertname]
   117    group_wait:      1s
   118    group_interval:  1s
   119    repeat_interval: 1ms
   120  
   121  receivers:
   122  - name: "default"
   123    webhook_configs:
   124    - url: 'http://%s'
   125  `
   126  
   127  	// Create a new acceptance test that instantiates new Alertmanagers
   128  	// with the given configuration and verifies times with the given
   129  	// tolerance.
   130  	at := NewAcceptanceTest(t, &AcceptanceOpts{
   131  		Tolerance: 150 * time.Millisecond,
   132  	})
   133  
   134  	// Create a collector to which alerts can be written and verified
   135  	// against a set of expected alert notifications.
   136  	co := at.Collector("webhook")
   137  	// Run something that satisfies the webhook interface to which the
   138  	// Alertmanager pushes as defined by its configuration.
   139  	wh := NewWebhook(t, co)
   140  
   141  	// Create a new Alertmanager process listening to a random port
   142  	am := at.AlertmanagerCluster(fmt.Sprintf(conf, wh.Address()), 1)
   143  
   144  	// Declare pushes to be made to the Alertmanager at the given time.
   145  	// Times are provided in fractions of seconds.
   146  	am.Push(At(1), Alert("alertname", "test").Active(1))
   147  
   148  	// XXX(fabxc): disabled as long as alerts are not persisted.
   149  	// at.Do(At(1.2), func() {
   150  	//	am.Terminate()
   151  	//	am.Start()
   152  	// })
   153  	am.Push(At(3.5), Alert("alertname", "test").Active(1, 3))
   154  
   155  	// Declare which alerts are expected to arrive at the collector within
   156  	// the defined time intervals.
   157  	co.Want(Between(2, 2.5), Alert("alertname", "test").Active(1))
   158  	co.Want(Between(3, 3.5), Alert("alertname", "test").Active(1))
   159  	co.Want(Between(4, 4.5), Alert("alertname", "test").Active(1, 3))
   160  
   161  	// Start the flow as defined above and run the checks afterwards.
   162  	at.Run()
   163  
   164  	t.Log(co.Check())
   165  }
   166  
   167  func TestRetry(t *testing.T) {
   168  	t.Parallel()
   169  
   170  	// We create a notification config that fans out into two different
   171  	// webhooks.
   172  	// The succeeding one must still only receive the first successful
   173  	// notifications. Sending to the succeeding one must eventually succeed.
   174  	conf := `
   175  route:
   176    receiver: "default"
   177    group_by: [alertname]
   178    group_wait:      1s
   179    group_interval:  2s
   180    repeat_interval: 3s
   181  
   182  receivers:
   183  - name: "default"
   184    webhook_configs:
   185    - url: 'http://%s'
   186    - url: 'http://%s'
   187  `
   188  
   189  	at := NewAcceptanceTest(t, &AcceptanceOpts{
   190  		Tolerance: 150 * time.Millisecond,
   191  	})
   192  
   193  	co1 := at.Collector("webhook")
   194  	wh1 := NewWebhook(t, co1)
   195  
   196  	co2 := at.Collector("webhook_failing")
   197  	wh2 := NewWebhook(t, co2)
   198  
   199  	wh2.Func = func(ts float64) bool {
   200  		// Fail the first interval period but eventually succeed in the third
   201  		// interval after a few failed attempts.
   202  		return ts < 4.5
   203  	}
   204  
   205  	am := at.AlertmanagerCluster(fmt.Sprintf(conf, wh1.Address(), wh2.Address()), 1)
   206  
   207  	am.Push(At(1), Alert("alertname", "test1"))
   208  
   209  	co1.Want(Between(2, 2.5), Alert("alertname", "test1").Active(1))
   210  	co1.Want(Between(6, 6.5), Alert("alertname", "test1").Active(1))
   211  
   212  	co2.Want(Between(6, 6.5), Alert("alertname", "test1").Active(1))
   213  
   214  	at.Run()
   215  
   216  	for _, c := range []*Collector{co1, co2} {
   217  		t.Log(c.Check())
   218  	}
   219  }
   220  
   221  func TestBatching(t *testing.T) {
   222  	t.Parallel()
   223  
   224  	conf := `
   225  route:
   226    receiver: "default"
   227    group_by: []
   228    group_wait:      1s
   229    group_interval:  1s
   230    # use a value slightly below the 5s interval to avoid timing issues
   231    repeat_interval: 4900ms
   232  
   233  receivers:
   234  - name: "default"
   235    webhook_configs:
   236    - url: 'http://%s'
   237  `
   238  
   239  	at := NewAcceptanceTest(t, &AcceptanceOpts{
   240  		Tolerance: 150 * time.Millisecond,
   241  	})
   242  
   243  	co := at.Collector("webhook")
   244  	wh := NewWebhook(t, co)
   245  
   246  	am := at.AlertmanagerCluster(fmt.Sprintf(conf, wh.Address()), 1)
   247  
   248  	am.Push(At(1.1), Alert("alertname", "test1").Active(1))
   249  	am.Push(At(1.7), Alert("alertname", "test5").Active(1))
   250  
   251  	co.Want(Between(2.0, 2.5),
   252  		Alert("alertname", "test1").Active(1),
   253  		Alert("alertname", "test5").Active(1),
   254  	)
   255  
   256  	am.Push(At(3.3),
   257  		Alert("alertname", "test2").Active(1.5),
   258  		Alert("alertname", "test3").Active(1.5),
   259  		Alert("alertname", "test4").Active(1.6),
   260  	)
   261  
   262  	co.Want(Between(4.1, 4.5),
   263  		Alert("alertname", "test1").Active(1),
   264  		Alert("alertname", "test5").Active(1),
   265  		Alert("alertname", "test2").Active(1.5),
   266  		Alert("alertname", "test3").Active(1.5),
   267  		Alert("alertname", "test4").Active(1.6),
   268  	)
   269  
   270  	// While no changes happen expect no additional notifications
   271  	// until the 5s repeat interval has ended.
   272  
   273  	co.Want(Between(9.1, 9.5),
   274  		Alert("alertname", "test1").Active(1),
   275  		Alert("alertname", "test5").Active(1),
   276  		Alert("alertname", "test2").Active(1.5),
   277  		Alert("alertname", "test3").Active(1.5),
   278  		Alert("alertname", "test4").Active(1.6),
   279  	)
   280  
   281  	at.Run()
   282  
   283  	t.Log(co.Check())
   284  }
   285  
   286  func TestResolved(t *testing.T) {
   287  	t.Parallel()
   288  
   289  	for i := 0; i < 2; i++ {
   290  		conf := `
   291  global:
   292    resolve_timeout: 10s
   293  
   294  route:
   295    receiver: "default"
   296    group_by: [alertname]
   297    group_wait: 1s
   298    group_interval: 5s
   299  
   300  receivers:
   301  - name: "default"
   302    webhook_configs:
   303    - url: 'http://%s'
   304  `
   305  
   306  		at := NewAcceptanceTest(t, &AcceptanceOpts{
   307  			Tolerance: 150 * time.Millisecond,
   308  		})
   309  
   310  		co := at.Collector("webhook")
   311  		wh := NewWebhook(t, co)
   312  
   313  		am := at.AlertmanagerCluster(fmt.Sprintf(conf, wh.Address()), 1)
   314  
   315  		am.Push(At(1),
   316  			Alert("alertname", "test", "lbl", "v1"),
   317  			Alert("alertname", "test", "lbl", "v2"),
   318  			Alert("alertname", "test", "lbl", "v3"),
   319  		)
   320  
   321  		co.Want(Between(2, 2.5),
   322  			Alert("alertname", "test", "lbl", "v1").Active(1),
   323  			Alert("alertname", "test", "lbl", "v2").Active(1),
   324  			Alert("alertname", "test", "lbl", "v3").Active(1),
   325  		)
   326  		co.Want(Between(12, 13),
   327  			Alert("alertname", "test", "lbl", "v1").Active(1, 11),
   328  			Alert("alertname", "test", "lbl", "v2").Active(1, 11),
   329  			Alert("alertname", "test", "lbl", "v3").Active(1, 11),
   330  		)
   331  
   332  		at.Run()
   333  
   334  		t.Log(co.Check())
   335  	}
   336  }
   337  
   338  func TestResolvedFilter(t *testing.T) {
   339  	t.Parallel()
   340  
   341  	// This integration test ensures that even though resolved alerts may not be
   342  	// notified about, they must be set as notified. Resolved alerts, even when
   343  	// filtered, have to end up in the SetNotifiesStage, otherwise when an alert
   344  	// fires again it is ambiguous whether it was resolved in between or not.
   345  
   346  	conf := `
   347  global:
   348    resolve_timeout: 10s
   349  
   350  route:
   351    receiver: "default"
   352    group_by: [alertname]
   353    group_wait: 1s
   354    group_interval: 5s
   355  
   356  receivers:
   357  - name: "default"
   358    webhook_configs:
   359    - url: 'http://%s'
   360      send_resolved: true
   361    - url: 'http://%s'
   362      send_resolved: false
   363  `
   364  
   365  	at := NewAcceptanceTest(t, &AcceptanceOpts{
   366  		Tolerance: 150 * time.Millisecond,
   367  	})
   368  
   369  	co1 := at.Collector("webhook1")
   370  	wh1 := NewWebhook(t, co1)
   371  
   372  	co2 := at.Collector("webhook2")
   373  	wh2 := NewWebhook(t, co2)
   374  
   375  	amc := at.AlertmanagerCluster(fmt.Sprintf(conf, wh1.Address(), wh2.Address()), 1)
   376  
   377  	amc.Push(At(1),
   378  		Alert("alertname", "test", "lbl", "v1"),
   379  		Alert("alertname", "test", "lbl", "v2"),
   380  	)
   381  	amc.Push(At(3),
   382  		Alert("alertname", "test", "lbl", "v1").Active(1, 4),
   383  		Alert("alertname", "test", "lbl", "v3"),
   384  	)
   385  	amc.Push(At(8),
   386  		Alert("alertname", "test", "lbl", "v3").Active(3),
   387  	)
   388  
   389  	co1.Want(Between(2, 2.5),
   390  		Alert("alertname", "test", "lbl", "v1").Active(1),
   391  		Alert("alertname", "test", "lbl", "v2").Active(1),
   392  	)
   393  	co1.Want(Between(7, 7.5),
   394  		Alert("alertname", "test", "lbl", "v1").Active(1, 4),
   395  		Alert("alertname", "test", "lbl", "v2").Active(1),
   396  		Alert("alertname", "test", "lbl", "v3").Active(3),
   397  	)
   398  	// Notification should be sent because the v2 alert is resolved due to the time-out.
   399  	co1.Want(Between(12, 12.5),
   400  		Alert("alertname", "test", "lbl", "v2").Active(1, 11),
   401  		Alert("alertname", "test", "lbl", "v3").Active(3),
   402  	)
   403  
   404  	co2.Want(Between(2, 2.5),
   405  		Alert("alertname", "test", "lbl", "v1").Active(1),
   406  		Alert("alertname", "test", "lbl", "v2").Active(1),
   407  	)
   408  	co2.Want(Between(7, 7.5),
   409  		Alert("alertname", "test", "lbl", "v2").Active(1),
   410  		Alert("alertname", "test", "lbl", "v3").Active(3),
   411  	)
   412  	// No notification should be sent after group_interval because no new alert has been fired.
   413  	co2.Want(Between(12, 12.5))
   414  
   415  	at.Run()
   416  
   417  	for _, c := range []*Collector{co1, co2} {
   418  		t.Log(c.Check())
   419  	}
   420  }
   421  
   422  func TestReload(t *testing.T) {
   423  	t.Parallel()
   424  
   425  	// This integration test ensures that the first alert isn't notified twice
   426  	// and repeat_interval applies after the AlertManager process has been
   427  	// reloaded.
   428  	conf := `
   429  route:
   430    receiver: "default"
   431    group_by: []
   432    group_wait:      1s
   433    group_interval:  6s
   434    repeat_interval: 10m
   435  
   436  receivers:
   437  - name: "default"
   438    webhook_configs:
   439    - url: 'http://%s'
   440  `
   441  
   442  	at := NewAcceptanceTest(t, &AcceptanceOpts{
   443  		Tolerance: 150 * time.Millisecond,
   444  	})
   445  
   446  	co := at.Collector("webhook")
   447  	wh := NewWebhook(t, co)
   448  
   449  	amc := at.AlertmanagerCluster(fmt.Sprintf(conf, wh.Address()), 1)
   450  
   451  	amc.Push(At(1), Alert("alertname", "test1"))
   452  	at.Do(At(3), amc.Reload)
   453  	amc.Push(At(4), Alert("alertname", "test2"))
   454  
   455  	co.Want(Between(2, 2.5), Alert("alertname", "test1").Active(1))
   456  	// Timers are reset on reload regardless, so we count the 6 second group
   457  	// interval from 3 onwards.
   458  	co.Want(Between(9, 9.5),
   459  		Alert("alertname", "test1").Active(1),
   460  		Alert("alertname", "test2").Active(4),
   461  	)
   462  
   463  	at.Run()
   464  
   465  	t.Log(co.Check())
   466  }
   467  

View as plain text