...
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17 package server
18
19 import (
20 "context"
21 "net/http"
22 "sync"
23 "time"
24
25 "github.com/sassoftware/relic/token/worker"
26 )
27
28 var (
29 healthStatus int
30 healthLastPing time.Time
31 healthMu sync.Mutex
32 )
33
34 func (s *Server) healthCheckInterval() time.Duration {
35 return time.Second * time.Duration(s.Config.Server.TokenCheckInterval)
36 }
37
38 func (s *Server) startHealthCheck() error {
39 healthStatus = s.Config.Server.TokenCheckFailures
40 go s.healthCheckLoop()
41 return nil
42 }
43
44 func (s *Server) healthCheckLoop() {
45 interval := s.healthCheckInterval()
46 t := time.NewTimer(0)
47 defer t.Stop()
48 for {
49 select {
50 case <-t.C:
51 s.healthCheck()
52 t.Reset(interval)
53 case <-s.Closed:
54 break
55 }
56 }
57 }
58
59 func (s *Server) healthCheck() bool {
60 healthMu.Lock()
61 last := healthStatus
62 healthMu.Unlock()
63 ok := true
64 for _, token := range s.tokens {
65 if !s.pingOne(token) {
66 ok = false
67 }
68 }
69 next := last
70 if ok {
71 if last == 0 {
72 s.Logf("recovered to normal state, status is now OK")
73 } else if last < s.Config.Server.TokenCheckFailures {
74 s.Logf("recovered to normal state")
75 }
76 next = s.Config.Server.TokenCheckFailures
77 } else if last > 0 {
78 next--
79 if next == 0 {
80 s.Logf("exceeded maximum health check failures, flagging as ERROR")
81 }
82 }
83 healthMu.Lock()
84 defer healthMu.Unlock()
85 healthStatus = next
86 healthLastPing = time.Now()
87 return ok
88 }
89
90 func (s *Server) pingOne(tok *worker.WorkerToken) bool {
91 ctx, cancel := context.WithTimeout(context.Background(), time.Second*time.Duration(s.Config.Server.TokenCheckTimeout))
92 defer cancel()
93 if err := tok.PingContext(ctx); err != nil {
94 if ctx.Err() != nil {
95 s.Logf("error: health check of token %s timed out", tok.Config().Name())
96 } else {
97 s.Logf("error: health check of token %s failed: %s", tok.Config().Name(), err)
98 }
99 return false
100 }
101 return true
102 }
103
104 func (s *Server) Healthy(request *http.Request) bool {
105 if s.Config.Server.Disabled {
106 return false
107 }
108 healthMu.Lock()
109 defer healthMu.Unlock()
110 if time.Since(healthLastPing) > 3*s.healthCheckInterval() {
111 if request != nil {
112 s.Logr(request, "error: health check AWOL for %d seconds", time.Since(healthLastPing)/time.Second)
113 }
114 return false
115 }
116 return healthStatus > 0
117 }
118
119 func (s *Server) serveHealth(request *http.Request) (res Response, err error) {
120 if request.Method != "GET" {
121 return ErrorResponse(http.StatusMethodNotAllowed), nil
122 }
123 if s.Healthy(request) {
124 return StringResponse(http.StatusOK, "OK"), nil
125 }
126 return ErrorResponse(http.StatusServiceUnavailable), nil
127 }
128
View as plain text