...

Source file src/github.com/prometheus/alertmanager/cluster/channel.go

Documentation: github.com/prometheus/alertmanager/cluster

     1  // Copyright 2018 Prometheus Team
     2  // Licensed under the Apache License, Version 2.0 (the "License");
     3  // you may not use this file except in compliance with the License.
     4  // You may obtain a copy of the License at
     5  //
     6  // http://www.apache.org/licenses/LICENSE-2.0
     7  //
     8  // Unless required by applicable law or agreed to in writing, software
     9  // distributed under the License is distributed on an "AS IS" BASIS,
    10  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package cluster
    15  
    16  import (
    17  	"sync"
    18  	"time"
    19  
    20  	"github.com/go-kit/log"
    21  	"github.com/go-kit/log/level"
    22  	"github.com/gogo/protobuf/proto"
    23  	"github.com/hashicorp/memberlist"
    24  	"github.com/prometheus/client_golang/prometheus"
    25  
    26  	"github.com/prometheus/alertmanager/cluster/clusterpb"
    27  )
    28  
    29  // Channel allows clients to send messages for a specific state type that will be
    30  // broadcasted in a best-effort manner.
    31  type Channel struct {
    32  	key          string
    33  	send         func([]byte)
    34  	peers        func() []*memberlist.Node
    35  	sendOversize func(*memberlist.Node, []byte) error
    36  
    37  	msgc   chan []byte
    38  	logger log.Logger
    39  
    40  	oversizeGossipMessageFailureTotal prometheus.Counter
    41  	oversizeGossipMessageDroppedTotal prometheus.Counter
    42  	oversizeGossipMessageSentTotal    prometheus.Counter
    43  	oversizeGossipDuration            prometheus.Histogram
    44  }
    45  
    46  // NewChannel creates a new Channel struct, which handles sending normal and
    47  // oversize messages to peers.
    48  func NewChannel(
    49  	key string,
    50  	send func([]byte),
    51  	peers func() []*memberlist.Node,
    52  	sendOversize func(*memberlist.Node, []byte) error,
    53  	logger log.Logger,
    54  	stopc chan struct{},
    55  	reg prometheus.Registerer,
    56  ) *Channel {
    57  	oversizeGossipMessageFailureTotal := prometheus.NewCounter(prometheus.CounterOpts{
    58  		Name:        "alertmanager_oversized_gossip_message_failure_total",
    59  		Help:        "Number of oversized gossip message sends that failed.",
    60  		ConstLabels: prometheus.Labels{"key": key},
    61  	})
    62  	oversizeGossipMessageSentTotal := prometheus.NewCounter(prometheus.CounterOpts{
    63  		Name:        "alertmanager_oversized_gossip_message_sent_total",
    64  		Help:        "Number of oversized gossip message sent.",
    65  		ConstLabels: prometheus.Labels{"key": key},
    66  	})
    67  	oversizeGossipMessageDroppedTotal := prometheus.NewCounter(prometheus.CounterOpts{
    68  		Name:        "alertmanager_oversized_gossip_message_dropped_total",
    69  		Help:        "Number of oversized gossip messages that were dropped due to a full message queue.",
    70  		ConstLabels: prometheus.Labels{"key": key},
    71  	})
    72  	oversizeGossipDuration := prometheus.NewHistogram(prometheus.HistogramOpts{
    73  		Name:        "alertmanager_oversize_gossip_message_duration_seconds",
    74  		Help:        "Duration of oversized gossip message requests.",
    75  		ConstLabels: prometheus.Labels{"key": key},
    76  	})
    77  
    78  	reg.MustRegister(oversizeGossipDuration, oversizeGossipMessageFailureTotal, oversizeGossipMessageDroppedTotal, oversizeGossipMessageSentTotal)
    79  
    80  	c := &Channel{
    81  		key:                               key,
    82  		send:                              send,
    83  		peers:                             peers,
    84  		logger:                            logger,
    85  		msgc:                              make(chan []byte, 200),
    86  		sendOversize:                      sendOversize,
    87  		oversizeGossipMessageFailureTotal: oversizeGossipMessageFailureTotal,
    88  		oversizeGossipMessageDroppedTotal: oversizeGossipMessageDroppedTotal,
    89  		oversizeGossipMessageSentTotal:    oversizeGossipMessageSentTotal,
    90  		oversizeGossipDuration:            oversizeGossipDuration,
    91  	}
    92  
    93  	go c.handleOverSizedMessages(stopc)
    94  
    95  	return c
    96  }
    97  
    98  // handleOverSizedMessages prevents memberlist from opening too many parallel
    99  // TCP connections to its peers.
   100  func (c *Channel) handleOverSizedMessages(stopc chan struct{}) {
   101  	var wg sync.WaitGroup
   102  	for {
   103  		select {
   104  		case b := <-c.msgc:
   105  			for _, n := range c.peers() {
   106  				wg.Add(1)
   107  				go func(n *memberlist.Node) {
   108  					defer wg.Done()
   109  					c.oversizeGossipMessageSentTotal.Inc()
   110  					start := time.Now()
   111  					if err := c.sendOversize(n, b); err != nil {
   112  						level.Debug(c.logger).Log("msg", "failed to send reliable", "key", c.key, "node", n, "err", err)
   113  						c.oversizeGossipMessageFailureTotal.Inc()
   114  						return
   115  					}
   116  					c.oversizeGossipDuration.Observe(time.Since(start).Seconds())
   117  				}(n)
   118  			}
   119  
   120  			wg.Wait()
   121  		case <-stopc:
   122  			return
   123  		}
   124  	}
   125  }
   126  
   127  // Broadcast enqueues a message for broadcasting.
   128  func (c *Channel) Broadcast(b []byte) {
   129  	b, err := proto.Marshal(&clusterpb.Part{Key: c.key, Data: b})
   130  	if err != nil {
   131  		return
   132  	}
   133  
   134  	if OversizedMessage(b) {
   135  		select {
   136  		case c.msgc <- b:
   137  		default:
   138  			level.Debug(c.logger).Log("msg", "oversized gossip channel full")
   139  			c.oversizeGossipMessageDroppedTotal.Inc()
   140  		}
   141  	} else {
   142  		c.send(b)
   143  	}
   144  }
   145  
   146  // OversizedMessage indicates whether or not the byte payload should be sent
   147  // via TCP.
   148  func OversizedMessage(b []byte) bool {
   149  	return len(b) > MaxGossipPacketSize/2
   150  }
   151  

View as plain text