1 // Copyright 2020 Datawire. All rights reserved. 2 // 3 // package acp contains stuff dealing with the Ambassador Control Plane as a whole. 4 // 5 // This is the EnvoyWatcher, which is a class that can keep an eye on a running 6 // Envoy - and just Envoy, all other Ambassador elements are ignored - and tell you 7 // whether it's alive and ready, or not. 8 // 9 // At the moment, "alive" and "ready" mean the same thing for an EnvoyWatcher. Both 10 // IsAlive() and IsReady() methods exist, though, for a future in which we monitor 11 // them separately. 12 // 13 // TESTING HOOKS: 14 // Since we try to check Envoy readiness to see how Envoy is doing, you can use 15 // EnvoyWatcher.SetReadyCheck to change the function that EnvoyWatcher uses to 16 // check readiness. The default is EnvoyWatcher.defaultFetcher, which tries to pull 17 // readiness from http://localhost:8001/ready. 18 // 19 // This hook is NOT meant for you to change the fetcher on the fly in a running 20 // EnvoyWatcher. Set it at instantiation, then leave it alone. See envoy_test.go 21 // for more. 22 23 package acp 24 25 import ( 26 "context" 27 "fmt" 28 "io/ioutil" 29 "net/http" 30 "sync" 31 "time" 32 33 "github.com/datawire/dlib/dlog" 34 ) 35 36 // EnvoyWatcher encapsulates state and methods for keeping an eye on a running 37 // Envoy, and deciding if it's healthy. 38 type EnvoyWatcher struct { 39 // This mutex is mostly rank paranoia, since we've really only the one 40 // data element at this point... 41 mutex sync.Mutex 42 43 // How shall we determine Envoy's readiness? 44 readyCheck envoyFetcher 45 46 // Did the last ready check succeed? 47 LastSucceeded bool 48 } 49 50 // NewEnvoyWatcher creates a new EnvoyWatcher, given a fetcher. 51 func NewEnvoyWatcher() *EnvoyWatcher { 52 w := &EnvoyWatcher{} 53 w.SetReadyCheck(w.defaultFetcher) 54 55 return w 56 } 57 58 // This the default Fetcher for the EnvoyWatcher -- it actually connects to Envoy 59 // and checks for ready. 60 func (w *EnvoyWatcher) defaultFetcher(ctx context.Context) (*EnvoyFetcherResponse, error) { 61 // Set up a context with a deliberate 2-second timeout. Envoy shouldn't ever take more 62 // than 100ms to answer the ready check, and if we don't pick a short timeout here, 63 // this call can hang for way longer than we would like it to. 64 tctx, tcancel := context.WithTimeout(ctx, 2*time.Second) 65 defer tcancel() 66 67 // Build a request... 68 req, err := http.NewRequestWithContext(tctx, http.MethodGet, "http://localhost:8001/ready", nil) 69 70 if err != nil { 71 // ...which should never fail. WTFO? 72 return nil, fmt.Errorf("error creating request: %v", err) 73 } 74 75 // We were able to create the request, so now fire it off. 76 resp, err := http.DefaultClient.Do(req) 77 78 if err != nil { 79 // Unlike the last error case, this one isn't a weird situation at 80 // all -- e.g. if Envoy isn't running yet, we'll land here. 81 return nil, fmt.Errorf("error fetching /ready: %v", err) 82 } 83 84 // Don't forget to close the body once done. 85 defer resp.Body.Close() 86 87 // We're going to return the status code and the response body, so we 88 // need to grab those. 89 statusCode := resp.StatusCode 90 text, err := ioutil.ReadAll(resp.Body) 91 92 if err != nil { 93 // This is a bit strange -- if we can't read the body, it implies 94 // that something has gone wrong with the connection, so we'll 95 // call that an error in calling ready. 96 return nil, fmt.Errorf("error reading body: %v", err) 97 } 98 99 return &EnvoyFetcherResponse{StatusCode: statusCode, Text: text}, nil 100 } 101 102 // SetReadyCheck will change the function we use to get check if Envoy is ready. This is 103 // here for testing; the assumption is that you'll call it at instantiation if you need 104 // to, then leave it alone. 105 func (w *EnvoyWatcher) SetReadyCheck(readyCheck envoyFetcher) { 106 w.readyCheck = readyCheck 107 } 108 109 // FetchEnvoyReady will check whether Envoy's ready endpoint is fetchable. 110 func (w *EnvoyWatcher) FetchEnvoyReady(ctx context.Context) { 111 succeeded := false 112 113 // Actually check if ready... 114 readyResponse, err := w.readyCheck(ctx) 115 116 // ...and see if we were able to. 117 if err == nil { 118 // Well, nothing blatantly failed, so check the status. (For the 119 // moment, we don't care about the text.) 120 if readyResponse.StatusCode == 200 { 121 succeeded = true 122 } 123 } else { 124 dlog.Debugf(ctx, "could not fetch Envoy status: %v", err) 125 } 126 127 w.mutex.Lock() 128 defer w.mutex.Unlock() 129 w.LastSucceeded = succeeded 130 } 131 132 // IsAlive returns true IFF Envoy should be considered alive. 133 func (w *EnvoyWatcher) IsAlive() bool { 134 w.mutex.Lock() 135 defer w.mutex.Unlock() 136 137 // Currently we just return LastSucceeded: we will not consider Envoy alive 138 // unless we were able to talk to it. 139 return w.LastSucceeded 140 } 141 142 // IsReady returns true IFF Envoy should be considered ready. Currently Envoy is 143 // considered ready whenever it's alive; this method is here for future-proofing. 144 func (w *EnvoyWatcher) IsReady() bool { 145 return w.IsAlive() 146 } 147