...

Source file src/github.com/prometheus/alertmanager/types/types.go

Documentation: github.com/prometheus/alertmanager/types

     1  // Copyright 2015 Prometheus Team
     2  // Licensed under the Apache License, Version 2.0 (the "License");
     3  // you may not use this file except in compliance with the License.
     4  // You may obtain a copy of the License at
     5  //
     6  // http://www.apache.org/licenses/LICENSE-2.0
     7  //
     8  // Unless required by applicable law or agreed to in writing, software
     9  // distributed under the License is distributed on an "AS IS" BASIS,
    10  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package types
    15  
    16  import (
    17  	"strings"
    18  	"sync"
    19  	"time"
    20  
    21  	"github.com/prometheus/client_golang/prometheus"
    22  	"github.com/prometheus/common/model"
    23  
    24  	"github.com/prometheus/alertmanager/pkg/labels"
    25  )
    26  
    27  // AlertState is used as part of AlertStatus.
    28  type AlertState string
    29  
    30  // Possible values for AlertState.
    31  const (
    32  	AlertStateUnprocessed AlertState = "unprocessed"
    33  	AlertStateActive      AlertState = "active"
    34  	AlertStateSuppressed  AlertState = "suppressed"
    35  )
    36  
    37  // AlertStatus stores the state of an alert and, as applicable, the IDs of
    38  // silences silencing the alert and of other alerts inhibiting the alert. Note
    39  // that currently, SilencedBy is supposed to be the complete set of the relevant
    40  // silences while InhibitedBy may contain only a subset of the inhibiting alerts
    41  // – in practice exactly one ID. (This somewhat confusing semantics might change
    42  // in the future.)
    43  type AlertStatus struct {
    44  	State       AlertState `json:"state"`
    45  	SilencedBy  []string   `json:"silencedBy"`
    46  	InhibitedBy []string   `json:"inhibitedBy"`
    47  
    48  	// For internal tracking, not exposed in the API.
    49  	pendingSilences []string
    50  	silencesVersion int
    51  }
    52  
    53  // Marker helps to mark alerts as silenced and/or inhibited.
    54  // All methods are goroutine-safe.
    55  type Marker interface {
    56  	// SetActiveOrSilenced replaces the previous SilencedBy by the provided IDs of
    57  	// active and pending silences, including the version number of the
    58  	// silences state. The set of provided IDs is supposed to represent the
    59  	// complete set of relevant silences. If no active silence IDs are provided and
    60  	// InhibitedBy is already empty, it sets the provided alert to AlertStateActive.
    61  	// Otherwise, it sets the provided alert to AlertStateSuppressed.
    62  	SetActiveOrSilenced(alert model.Fingerprint, version int, activeSilenceIDs, pendingSilenceIDs []string)
    63  	// SetInhibited replaces the previous InhibitedBy by the provided IDs of
    64  	// alerts. In contrast to SetActiveOrSilenced, the set of provided IDs is not
    65  	// expected to represent the complete set of inhibiting alerts. (In
    66  	// practice, this method is only called with one or zero IDs. However,
    67  	// this expectation might change in the future. If no IDs are provided
    68  	// and InhibitedBy is already empty, it sets the provided alert to
    69  	// AlertStateActive. Otherwise, it sets the provided alert to
    70  	// AlertStateSuppressed.
    71  	SetInhibited(alert model.Fingerprint, alertIDs ...string)
    72  
    73  	// Count alerts of the given state(s). With no state provided, count all
    74  	// alerts.
    75  	Count(...AlertState) int
    76  
    77  	// Status of the given alert.
    78  	Status(model.Fingerprint) AlertStatus
    79  	// Delete the given alert.
    80  	Delete(model.Fingerprint)
    81  
    82  	// Various methods to inquire if the given alert is in a certain
    83  	// AlertState. Silenced also returns all the active and pending
    84  	// silences, while Inhibited may return only a subset of inhibiting
    85  	// alerts. Silenced also returns the version of the silences state the
    86  	// result is based on.
    87  	Unprocessed(model.Fingerprint) bool
    88  	Active(model.Fingerprint) bool
    89  	Silenced(model.Fingerprint) (activeIDs, pendingIDs []string, version int, silenced bool)
    90  	Inhibited(model.Fingerprint) ([]string, bool)
    91  }
    92  
    93  // NewMarker returns an instance of a Marker implementation.
    94  func NewMarker(r prometheus.Registerer) Marker {
    95  	m := &memMarker{
    96  		m: map[model.Fingerprint]*AlertStatus{},
    97  	}
    98  
    99  	m.registerMetrics(r)
   100  
   101  	return m
   102  }
   103  
   104  type memMarker struct {
   105  	m map[model.Fingerprint]*AlertStatus
   106  
   107  	mtx sync.RWMutex
   108  }
   109  
   110  func (m *memMarker) registerMetrics(r prometheus.Registerer) {
   111  	newMarkedAlertMetricByState := func(st AlertState) prometheus.GaugeFunc {
   112  		return prometheus.NewGaugeFunc(
   113  			prometheus.GaugeOpts{
   114  				Name:        "alertmanager_marked_alerts",
   115  				Help:        "How many alerts by state are currently marked in the Alertmanager regardless of their expiry.",
   116  				ConstLabels: prometheus.Labels{"state": string(st)},
   117  			},
   118  			func() float64 {
   119  				return float64(m.Count(st))
   120  			},
   121  		)
   122  	}
   123  
   124  	alertsActive := newMarkedAlertMetricByState(AlertStateActive)
   125  	alertsSuppressed := newMarkedAlertMetricByState(AlertStateSuppressed)
   126  	alertStateUnprocessed := newMarkedAlertMetricByState(AlertStateUnprocessed)
   127  
   128  	r.MustRegister(alertsActive)
   129  	r.MustRegister(alertsSuppressed)
   130  	r.MustRegister(alertStateUnprocessed)
   131  }
   132  
   133  // Count implements Marker.
   134  func (m *memMarker) Count(states ...AlertState) int {
   135  	m.mtx.RLock()
   136  	defer m.mtx.RUnlock()
   137  
   138  	if len(states) == 0 {
   139  		return len(m.m)
   140  	}
   141  
   142  	var count int
   143  	for _, status := range m.m {
   144  		for _, state := range states {
   145  			if status.State == state {
   146  				count++
   147  			}
   148  		}
   149  	}
   150  	return count
   151  }
   152  
   153  // SetActiveOrSilenced implements Marker.
   154  func (m *memMarker) SetActiveOrSilenced(alert model.Fingerprint, version int, activeIDs, pendingIDs []string) {
   155  	m.mtx.Lock()
   156  	defer m.mtx.Unlock()
   157  
   158  	s, found := m.m[alert]
   159  	if !found {
   160  		s = &AlertStatus{}
   161  		m.m[alert] = s
   162  	}
   163  	s.SilencedBy = activeIDs
   164  	s.pendingSilences = pendingIDs
   165  	s.silencesVersion = version
   166  
   167  	// If there are any silence or alert IDs associated with the
   168  	// fingerprint, it is suppressed. Otherwise, set it to
   169  	// AlertStateActive.
   170  	if len(activeIDs) == 0 && len(s.InhibitedBy) == 0 {
   171  		s.State = AlertStateActive
   172  		return
   173  	}
   174  
   175  	s.State = AlertStateSuppressed
   176  }
   177  
   178  // SetInhibited implements Marker.
   179  func (m *memMarker) SetInhibited(alert model.Fingerprint, ids ...string) {
   180  	m.mtx.Lock()
   181  	defer m.mtx.Unlock()
   182  
   183  	s, found := m.m[alert]
   184  	if !found {
   185  		s = &AlertStatus{}
   186  		m.m[alert] = s
   187  	}
   188  	s.InhibitedBy = ids
   189  
   190  	// If there are any silence or alert IDs associated with the
   191  	// fingerprint, it is suppressed. Otherwise, set it to
   192  	// AlertStateActive.
   193  	if len(ids) == 0 && len(s.SilencedBy) == 0 {
   194  		s.State = AlertStateActive
   195  		return
   196  	}
   197  
   198  	s.State = AlertStateSuppressed
   199  }
   200  
   201  // Status implements Marker.
   202  func (m *memMarker) Status(alert model.Fingerprint) AlertStatus {
   203  	m.mtx.RLock()
   204  	defer m.mtx.RUnlock()
   205  
   206  	if s, found := m.m[alert]; found {
   207  		return *s
   208  	}
   209  	return AlertStatus{
   210  		State:       AlertStateUnprocessed,
   211  		SilencedBy:  []string{},
   212  		InhibitedBy: []string{},
   213  	}
   214  }
   215  
   216  // Delete implements Marker.
   217  func (m *memMarker) Delete(alert model.Fingerprint) {
   218  	m.mtx.Lock()
   219  	defer m.mtx.Unlock()
   220  
   221  	delete(m.m, alert)
   222  }
   223  
   224  // Unprocessed implements Marker.
   225  func (m *memMarker) Unprocessed(alert model.Fingerprint) bool {
   226  	return m.Status(alert).State == AlertStateUnprocessed
   227  }
   228  
   229  // Active implements Marker.
   230  func (m *memMarker) Active(alert model.Fingerprint) bool {
   231  	return m.Status(alert).State == AlertStateActive
   232  }
   233  
   234  // Inhibited implements Marker.
   235  func (m *memMarker) Inhibited(alert model.Fingerprint) ([]string, bool) {
   236  	s := m.Status(alert)
   237  	return s.InhibitedBy,
   238  		s.State == AlertStateSuppressed && len(s.InhibitedBy) > 0
   239  }
   240  
   241  // Silenced returns whether the alert for the given Fingerprint is in the
   242  // Silenced state, any associated silence IDs, and the silences state version
   243  // the result is based on.
   244  func (m *memMarker) Silenced(alert model.Fingerprint) (activeIDs, pendingIDs []string, version int, silenced bool) {
   245  	s := m.Status(alert)
   246  	return s.SilencedBy, s.pendingSilences, s.silencesVersion,
   247  		s.State == AlertStateSuppressed && len(s.SilencedBy) > 0
   248  }
   249  
   250  // MultiError contains multiple errors and implements the error interface. Its
   251  // zero value is ready to use. All its methods are goroutine safe.
   252  type MultiError struct {
   253  	mtx    sync.Mutex
   254  	errors []error
   255  }
   256  
   257  // Add adds an error to the MultiError.
   258  func (e *MultiError) Add(err error) {
   259  	e.mtx.Lock()
   260  	defer e.mtx.Unlock()
   261  
   262  	e.errors = append(e.errors, err)
   263  }
   264  
   265  // Len returns the number of errors added to the MultiError.
   266  func (e *MultiError) Len() int {
   267  	e.mtx.Lock()
   268  	defer e.mtx.Unlock()
   269  
   270  	return len(e.errors)
   271  }
   272  
   273  // Errors returns the errors added to the MuliError. The returned slice is a
   274  // copy of the internal slice of errors.
   275  func (e *MultiError) Errors() []error {
   276  	e.mtx.Lock()
   277  	defer e.mtx.Unlock()
   278  
   279  	return append(make([]error, 0, len(e.errors)), e.errors...)
   280  }
   281  
   282  func (e *MultiError) Error() string {
   283  	e.mtx.Lock()
   284  	defer e.mtx.Unlock()
   285  
   286  	es := make([]string, 0, len(e.errors))
   287  	for _, err := range e.errors {
   288  		es = append(es, err.Error())
   289  	}
   290  	return strings.Join(es, "; ")
   291  }
   292  
   293  // Alert wraps a model.Alert with additional information relevant
   294  // to internal of the Alertmanager.
   295  // The type is never exposed to external communication and the
   296  // embedded alert has to be sanitized beforehand.
   297  type Alert struct {
   298  	model.Alert
   299  
   300  	// The authoritative timestamp.
   301  	UpdatedAt time.Time
   302  	Timeout   bool
   303  }
   304  
   305  // AlertSlice is a sortable slice of Alerts.
   306  type AlertSlice []*Alert
   307  
   308  func (as AlertSlice) Less(i, j int) bool {
   309  	// Look at labels.job, then labels.instance.
   310  	for _, overrideKey := range [...]model.LabelName{"job", "instance"} {
   311  		iVal, iOk := as[i].Labels[overrideKey]
   312  		jVal, jOk := as[j].Labels[overrideKey]
   313  		if !iOk && !jOk {
   314  			continue
   315  		}
   316  		if !iOk {
   317  			return false
   318  		}
   319  		if !jOk {
   320  			return true
   321  		}
   322  		if iVal != jVal {
   323  			return iVal < jVal
   324  		}
   325  	}
   326  	return as[i].Labels.Before(as[j].Labels)
   327  }
   328  func (as AlertSlice) Swap(i, j int) { as[i], as[j] = as[j], as[i] }
   329  func (as AlertSlice) Len() int      { return len(as) }
   330  
   331  // Alerts turns a sequence of internal alerts into a list of
   332  // exposable model.Alert structures.
   333  func Alerts(alerts ...*Alert) model.Alerts {
   334  	res := make(model.Alerts, 0, len(alerts))
   335  	for _, a := range alerts {
   336  		v := a.Alert
   337  		// If the end timestamp is not reached yet, do not expose it.
   338  		if !a.Resolved() {
   339  			v.EndsAt = time.Time{}
   340  		}
   341  		res = append(res, &v)
   342  	}
   343  	return res
   344  }
   345  
   346  // Merge merges the timespan of two alerts based and overwrites annotations
   347  // based on the authoritative timestamp.  A new alert is returned, the labels
   348  // are assumed to be equal.
   349  func (a *Alert) Merge(o *Alert) *Alert {
   350  	// Let o always be the younger alert.
   351  	if o.UpdatedAt.Before(a.UpdatedAt) {
   352  		return o.Merge(a)
   353  	}
   354  
   355  	res := *o
   356  
   357  	// Always pick the earliest starting time.
   358  	if a.StartsAt.Before(o.StartsAt) {
   359  		res.StartsAt = a.StartsAt
   360  	}
   361  
   362  	if o.Resolved() {
   363  		// The latest explicit resolved timestamp wins if both alerts are effectively resolved.
   364  		if a.Resolved() && a.EndsAt.After(o.EndsAt) {
   365  			res.EndsAt = a.EndsAt
   366  		}
   367  	} else {
   368  		// A non-timeout timestamp always rules if it is the latest.
   369  		if a.EndsAt.After(o.EndsAt) && !a.Timeout {
   370  			res.EndsAt = a.EndsAt
   371  		}
   372  	}
   373  
   374  	return &res
   375  }
   376  
   377  // A Muter determines whether a given label set is muted. Implementers that
   378  // maintain an underlying Marker are expected to update it during a call of
   379  // Mutes.
   380  type Muter interface {
   381  	Mutes(model.LabelSet) bool
   382  }
   383  
   384  // A MuteFunc is a function that implements the Muter interface.
   385  type MuteFunc func(model.LabelSet) bool
   386  
   387  // Mutes implements the Muter interface.
   388  func (f MuteFunc) Mutes(lset model.LabelSet) bool { return f(lset) }
   389  
   390  // A Silence determines whether a given label set is muted.
   391  type Silence struct {
   392  	// A unique identifier across all connected instances.
   393  	ID string `json:"id"`
   394  	// A set of matchers determining if a label set is affected
   395  	// by the silence.
   396  	Matchers labels.Matchers `json:"matchers"`
   397  
   398  	// Time range of the silence.
   399  	//
   400  	// * StartsAt must not be before creation time
   401  	// * EndsAt must be after StartsAt
   402  	// * Deleting a silence means to set EndsAt to now
   403  	// * Time range must not be modified in different ways
   404  	//
   405  	// TODO(fabxc): this may potentially be extended by
   406  	// creation and update timestamps.
   407  	StartsAt time.Time `json:"startsAt"`
   408  	EndsAt   time.Time `json:"endsAt"`
   409  
   410  	// The last time the silence was updated.
   411  	UpdatedAt time.Time `json:"updatedAt"`
   412  
   413  	// Information about who created the silence for which reason.
   414  	CreatedBy string `json:"createdBy"`
   415  	Comment   string `json:"comment,omitempty"`
   416  
   417  	Status SilenceStatus `json:"status"`
   418  }
   419  
   420  // Expired return if the silence is expired
   421  // meaning that both StartsAt and EndsAt are equal
   422  func (s *Silence) Expired() bool {
   423  	return s.StartsAt.Equal(s.EndsAt)
   424  }
   425  
   426  // SilenceStatus stores the state of a silence.
   427  type SilenceStatus struct {
   428  	State SilenceState `json:"state"`
   429  }
   430  
   431  // SilenceState is used as part of SilenceStatus.
   432  type SilenceState string
   433  
   434  // Possible values for SilenceState.
   435  const (
   436  	SilenceStateExpired SilenceState = "expired"
   437  	SilenceStateActive  SilenceState = "active"
   438  	SilenceStatePending SilenceState = "pending"
   439  )
   440  
   441  // CalcSilenceState returns the SilenceState that a silence with the given start
   442  // and end time would have right now.
   443  func CalcSilenceState(start, end time.Time) SilenceState {
   444  	current := time.Now()
   445  	if current.Before(start) {
   446  		return SilenceStatePending
   447  	}
   448  	if current.Before(end) {
   449  		return SilenceStateActive
   450  	}
   451  	return SilenceStateExpired
   452  }
   453  

View as plain text