...

Source file src/go.etcd.io/etcd/server/v3/etcdserver/api/rafthttp/probing_status.go

Documentation: go.etcd.io/etcd/server/v3/etcdserver/api/rafthttp

     1  // Copyright 2015 The etcd Authors
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package rafthttp
    16  
    17  import (
    18  	"time"
    19  
    20  	"github.com/prometheus/client_golang/prometheus"
    21  	"github.com/xiang90/probing"
    22  	"go.uber.org/zap"
    23  )
    24  
    25  const (
    26  	// RoundTripperNameRaftMessage is the name of round-tripper that sends
    27  	// all other Raft messages, other than "snap.Message".
    28  	RoundTripperNameRaftMessage = "ROUND_TRIPPER_RAFT_MESSAGE"
    29  	// RoundTripperNameSnapshot is the name of round-tripper that sends merged snapshot message.
    30  	RoundTripperNameSnapshot = "ROUND_TRIPPER_SNAPSHOT"
    31  )
    32  
    33  var (
    34  	// proberInterval must be shorter than read timeout.
    35  	// Or the connection will time-out.
    36  	proberInterval           = ConnReadTimeout - time.Second
    37  	statusMonitoringInterval = 30 * time.Second
    38  	statusErrorInterval      = 5 * time.Second
    39  )
    40  
    41  func addPeerToProber(lg *zap.Logger, p probing.Prober, id string, us []string, roundTripperName string, rttSecProm *prometheus.HistogramVec) {
    42  	hus := make([]string, len(us))
    43  	for i := range us {
    44  		hus[i] = us[i] + ProbingPrefix
    45  	}
    46  
    47  	p.AddHTTP(id, proberInterval, hus)
    48  
    49  	s, err := p.Status(id)
    50  	if err != nil {
    51  		if lg != nil {
    52  			lg.Warn("failed to add peer into prober", zap.String("remote-peer-id", id), zap.Error(err))
    53  		}
    54  		return
    55  	}
    56  
    57  	go monitorProbingStatus(lg, s, id, roundTripperName, rttSecProm)
    58  }
    59  
    60  func monitorProbingStatus(lg *zap.Logger, s probing.Status, id string, roundTripperName string, rttSecProm *prometheus.HistogramVec) {
    61  	// set the first interval short to log error early.
    62  	interval := statusErrorInterval
    63  	for {
    64  		select {
    65  		case <-time.After(interval):
    66  			if !s.Health() {
    67  				if lg != nil {
    68  					lg.Warn(
    69  						"prober detected unhealthy status",
    70  						zap.String("round-tripper-name", roundTripperName),
    71  						zap.String("remote-peer-id", id),
    72  						zap.Duration("rtt", s.SRTT()),
    73  						zap.Error(s.Err()),
    74  					)
    75  				}
    76  				interval = statusErrorInterval
    77  			} else {
    78  				interval = statusMonitoringInterval
    79  			}
    80  			if s.ClockDiff() > time.Second {
    81  				if lg != nil {
    82  					lg.Warn(
    83  						"prober found high clock drift",
    84  						zap.String("round-tripper-name", roundTripperName),
    85  						zap.String("remote-peer-id", id),
    86  						zap.Duration("clock-drift", s.ClockDiff()),
    87  						zap.Duration("rtt", s.SRTT()),
    88  						zap.Error(s.Err()),
    89  					)
    90  				}
    91  			}
    92  			rttSecProm.WithLabelValues(id).Observe(s.SRTT().Seconds())
    93  
    94  		case <-s.StopNotify():
    95  			return
    96  		}
    97  	}
    98  }
    99  

View as plain text