1 // Copyright 2020 Datawire. All rights reserved. 2 // 3 // package acp contains stuff dealing with the Ambassador Control Plane as a whole. 4 // 5 // This is the EnvoyWatcher, which is a class that can keep an eye on a running 6 // Envoy - and just Envoy, all other Ambassador elements are ignored - and tell you 7 // whether it's alive and ready, or not. 8 // 9 // At the moment, "alive" and "ready" mean the same thing for an EnvoyWatcher. Both 10 // IsAlive() and IsReady() methods exist, though, for a future in which we monitor 11 // them separately. 12 // 13 // TESTING HOOKS: 14 // Since we try to check Envoy readiness to see how Envoy is doing, you can use 15 // EnvoyWatcher.SetReadyCheck to change the function that EnvoyWatcher uses to 16 // check readiness. The default is EnvoyWatcher.defaultFetcher, which tries to pull 17 // readiness from http://localhost:8001/ready. 18 // 19 // This hook is NOT meant for you to change the fetcher on the fly in a running 20 // EnvoyWatcher. Set it at instantiation, then leave it alone. See envoy_test.go 21 // for more. 22 23 package acp 24 25 import ( 26 "context" 27 "fmt" 28 "io/ioutil" 29 "net/http" 30 "os" 31 "strconv" 32 "sync" 33 "time" 34 35 "github.com/datawire/dlib/dlog" 36 ) 37 38 // EnvoyWatcher encapsulates state and methods for keeping an eye on a running 39 // Envoy, and deciding if it's healthy. 40 type EnvoyWatcher struct { 41 // This mutex is mostly rank paranoia, since we've really only the one 42 // data element at this point... 43 mutex sync.Mutex 44 45 // How shall we determine Envoy's readiness? 46 readyCheck envoyFetcher 47 48 // For default fetcher, the port for /ready endpoint listener 49 defaultReadyURL string 50 51 // Did the last ready check succeed? 52 LastSucceeded bool 53 } 54 55 // NewEnvoyWatcher creates a new EnvoyWatcher, given a fetcher. 56 func NewEnvoyWatcher() *EnvoyWatcher { 57 w := &EnvoyWatcher{ 58 defaultReadyURL: getDefaultReadyURL(), 59 } 60 w.SetReadyCheck(w.defaultFetcher) 61 62 return w 63 } 64 65 // This the default Fetcher for the EnvoyWatcher -- it actually connects to Envoy 66 // and checks for ready. 67 func (w *EnvoyWatcher) defaultFetcher(ctx context.Context) (*EnvoyFetcherResponse, error) { 68 // Set up a context with a deliberate 2-second timeout. Envoy shouldn't ever take more 69 // than 100ms to answer the ready check, and if we don't pick a short timeout here, 70 // this call can hang for way longer than we would like it to. 71 tctx, tcancel := context.WithTimeout(ctx, 2*time.Second) 72 defer tcancel() 73 74 // Build a request... 75 req, err := http.NewRequestWithContext(tctx, http.MethodGet, w.defaultReadyURL, nil) 76 77 if err != nil { 78 // ...which should never fail. WTFO? 79 return nil, fmt.Errorf("error creating request: %v", err) 80 } 81 82 // We were able to create the request, so now fire it off. 83 resp, err := http.DefaultClient.Do(req) 84 85 if err != nil { 86 // Unlike the last error case, this one isn't a weird situation at 87 // all -- e.g. if Envoy isn't running yet, we'll land here. 88 return nil, fmt.Errorf("error fetching /ready: %v", err) 89 } 90 91 // Don't forget to close the body once done. 92 defer resp.Body.Close() 93 94 // We're going to return the status code and the response body, so we 95 // need to grab those. 96 statusCode := resp.StatusCode 97 text, err := ioutil.ReadAll(resp.Body) 98 99 if err != nil { 100 // This is a bit strange -- if we can't read the body, it implies 101 // that something has gone wrong with the connection, so we'll 102 // call that an error in calling ready. 103 return nil, fmt.Errorf("error reading body: %v", err) 104 } 105 106 return &EnvoyFetcherResponse{StatusCode: statusCode, Text: text}, nil 107 } 108 109 // SetReadyCheck will change the function we use to get check if Envoy is ready. This is 110 // here for testing; the assumption is that you'll call it at instantiation if you need 111 // to, then leave it alone. 112 func (w *EnvoyWatcher) SetReadyCheck(readyCheck envoyFetcher) { 113 w.readyCheck = readyCheck 114 } 115 116 // FetchEnvoyReady will check whether Envoy's ready endpoint is fetchable. 117 func (w *EnvoyWatcher) FetchEnvoyReady(ctx context.Context) { 118 succeeded := false 119 120 // Actually check if ready... 121 readyResponse, err := w.readyCheck(ctx) 122 123 // ...and see if we were able to. 124 if err == nil { 125 // Well, nothing blatantly failed, so check the status. (For the 126 // moment, we don't care about the text.) 127 if readyResponse.StatusCode == 200 { 128 succeeded = true 129 } 130 } else { 131 dlog.Debugf(ctx, "could not fetch Envoy status: %v", err) 132 } 133 134 w.mutex.Lock() 135 defer w.mutex.Unlock() 136 w.LastSucceeded = succeeded 137 } 138 139 // IsAlive returns true IFF Envoy should be considered alive. 140 func (w *EnvoyWatcher) IsAlive() bool { 141 w.mutex.Lock() 142 defer w.mutex.Unlock() 143 144 // Currently we just return LastSucceeded: we will not consider Envoy alive 145 // unless we were able to talk to it. 146 return w.LastSucceeded 147 } 148 149 // IsReady returns true IFF Envoy should be considered ready. Currently Envoy is 150 // considered ready whenever it's alive; this method is here for future-proofing. 151 func (w *EnvoyWatcher) IsReady() bool { 152 return w.IsAlive() 153 } 154 155 func getDefaultReadyURL() string { 156 var readyPort uint64 157 var err error 158 strReadyPort := os.Getenv("AMBASSADOR_READY_PORT") 159 if strReadyPort != "" { 160 readyPort, err = strconv.ParseUint(strReadyPort, 10, 15) 161 if err != nil { 162 dlog.Infof(context.Background(), "Unable to parse AMBASSADOR_READY_PORT or port is out of bounds: %s", err) 163 } 164 } 165 if readyPort < 1 { 166 readyPort = 8006 167 } 168 return fmt.Sprintf("http://localhost:%d/ready", readyPort) 169 } 170