...

Source file src/edge-infra.dev/pkg/sds/etcd/manager/cluster/cluster.go

Documentation: edge-infra.dev/pkg/sds/etcd/manager/cluster

     1  // Package cluster provides functionality for monitoring etcd cluster health
     2  // and recovering the cluster if quorum is lost.
     3  package cluster
     4  
     5  import (
     6  	"context"
     7  	"time"
     8  
     9  	"go.etcd.io/etcd/api/v3/etcdserverpb"
    10  
    11  	"edge-infra.dev/pkg/sds/lib/etcd/client/retry"
    12  )
    13  
    14  // Cluster represents an etcd cluster
    15  type Cluster struct {
    16  	endpoint     string
    17  	maxUnhealthy time.Duration
    18  	Status
    19  }
    20  
    21  // Status represents the health status of a resource
    22  type Status struct {
    23  	lastUnhealthy time.Time
    24  	lastHealthy   time.Time
    25  }
    26  
    27  func New(endpoint string, maxUnhealthy time.Duration, status Status) Cluster {
    28  	return Cluster{
    29  		endpoint,
    30  		maxUnhealthy,
    31  		status,
    32  	}
    33  }
    34  
    35  // InitializeStatus initializes the status times to ensure there are no inaccurate
    36  // time comparrisons
    37  func (c *Cluster) InitializeStatus() {
    38  	c.ResetTimer()
    39  }
    40  
    41  // UpdateStatus checks the status of the etcd cluster and updates the last healthy/unhealthy time
    42  // based on the result
    43  func (c *Cluster) UpdateStatus(ctx context.Context, client retry.Retrier) {
    44  	resp, err := client.SafeStatus(ctx, c.endpoint)
    45  	if err != nil || len(resp.Errors) != 0 {
    46  		c.lastUnhealthy = time.Now()
    47  		return
    48  	}
    49  
    50  	c.lastHealthy = time.Now()
    51  }
    52  
    53  // IsResetRequired checks if the stored state for the etcd cluster indicates that a reset is required
    54  func (c *Cluster) IsResetRequired() bool {
    55  	if c.IsHealthy() {
    56  		return false
    57  	}
    58  	return time.Since(c.lastHealthy) > c.maxUnhealthy
    59  }
    60  
    61  // IsHealthy checks if the stored state for the etcd cluster indicate that it is healthy
    62  func (s *Status) IsHealthy() bool {
    63  	return s.lastHealthy.After(s.lastUnhealthy)
    64  }
    65  
    66  // ResetTimer will reset the status timers so that both lastHealthy and lastUnhealthy
    67  // are time.Now()
    68  func (s *Status) ResetTimer() {
    69  	now := time.Now()
    70  	s.lastHealthy = now
    71  	s.lastUnhealthy = now
    72  }
    73  
    74  // GetAlarms will retrieve all etcd cluster alarms of types: CORRUPT, NOSPACE
    75  func GetAlarms(ctx context.Context, client retry.Retrier) []*etcdserverpb.AlarmMember {
    76  	resp, err := client.SafeAlarmList(ctx)
    77  	if err != nil {
    78  		return []*etcdserverpb.AlarmMember{}
    79  	}
    80  
    81  	return resp.Alarms
    82  }
    83  

View as plain text