1 // The debug package is intended to aid in live debugging of misbehaving Ambassadors in 2 // production. Its role is somewhat akin to a debugger and/or a profiler in that it gives you 3 // visiblity into important internal state and performance charactaristics of Ambassador, however 4 // unlike a debugger and/or profiler it is intended to function well in a production context. 5 // 6 // The debug package is also very complementary to a logging library. To make an analogy to GPS 7 // Navigation, a logging library is kind of like directions that your nav system produces. The 8 // information that logging provides is often similar to "turn left", "go straight", etc. This is 9 // super useful if you are trying to understand how your code got to the wrong destination, but 10 // often if you are trying to live debug a misbehaving Ambassador in production, you don't 11 // necessarily care as much about how it got to where it is. You care about *quickly* understanding 12 // where it is and how to get it to someplace better. The debug library is intended to help with 13 // that by giving you an understanding of exactly where Ambassador is without requiring the tedius 14 // exercise of pooring over the logs while also staring at and mentally executing the code in order 15 // to reconstruct the relevant state. Instead of doing that we just make the code capable of telling 16 // us exactly where it has ended up. 17 // 18 // So how does this work? Well there is a new endpoint at `localhost:8877/debug` and you can run 19 // `curl localhost:8877/debug` to see some useful information. 20 // 21 // There are currently two kinds of debug information that it exposes: 22 // 23 // 1. Timers 24 // 25 // We've learned that the timing of various actions that ambassador takes is actually very important 26 // to its production behavior. Things like: 27 // 28 // - how long it takes us to load secrets 29 // - how long it takes us to validate envoy configuration 30 // - how long it takes us to compute a new envoy configuration 31 // - how long it takes us to respond to a probe request 32 // 33 // Anywhere in the code can now easily add new timing information to this debug endpoint by doing 34 // the following: 35 // 36 // dbg := debug.FromContext(ctx) 37 // timer := dbg.Timer("myAction") 38 // 39 // timer.Time(func() { 40 // // ... do some work 41 // }) 42 // 43 // // or 44 // 45 // stop := timer.Start() 46 // // ... do some work 47 // stop() 48 // 49 // // or 50 // 51 // func() { 52 // defer timer.Start()() // this is really the same as the stop case but using defer to guarantee stop gets called 53 // // ... do some work 54 // }() 55 // 56 // There are also special purpose convenience middleware for adding timing information to http 57 // handlers: 58 // 59 // handler = timer.TimedHandler(handler) 60 // // or 61 // handlerFunc = timer.TimedHandlerFunc(handlerFunc) 62 // 63 // A timer tracks a count, minimum, maximum, and average timing info for all actions: 64 // 65 // { 66 // "timers": { 67 // "check_alive": "0, 0s/0s/0s", 68 // "check_ready": "0, 0s/0s/0s", 69 // "consulUpdate": "0, 0s/0s/0s", 70 // "katesUpdate": "615, 29.385µs/297.114µs/95.220222ms", 71 // "notifyWebhook:diagd": "2, 1.206967947s/1.3298432s/1.452718454s", 72 // "notifyWebhooks": "2, 1.207007216s/1.329901037s/1.452794859s", 73 // "parseAnnotations": "2, 21.944µs/22.541µs/23.138µs", 74 // "reconcileConsul": "2, 50.104µs/55.499µs/60.894µs", 75 // "reconcileSecrets": "2, 18.704µs/20.786µs/22.868µs" 76 // }, 77 // ... 78 // } 79 // 80 // 2. Atomic Values 81 // 82 // Another tool in the toolkit for externalizing relevant state is atomic values. Anywhere in the 83 // code can now expose important values in the following way: 84 // 85 // dbg := debug.FromContext(ctx) 86 // value := dbg.Value("myValue") 87 // 88 // // ... 89 // 90 // value.Store("blah") 91 // 92 // // or 93 // 94 // v := StructThatCanBeJsonMarshalled{...} 95 // value.Store(v) 96 // 97 // The debug endpoint will now show the current value for "myValue". The only requirement for the 98 // stored value is that it can be marshalled as json. 99 // 100 // We currently use this to expose how much memory ambassador is using as well as the state of the 101 // envoy reconfiguration, but there are lots of other relevant state we can/should expose in the 102 // future as we understand what is important to ambassador's behavior: 103 // 104 // { 105 // ... 106 // "values": { 107 // "envoyReconfigs": { 108 // "times": [ 109 // "2020-11-06T13:13:24.218707995-05:00", 110 // "2020-11-06T13:13:27.185754494-05:00", 111 // "2020-11-06T13:13:28.612279777-05:00" 112 // ], 113 // "staleCount": 2, 114 // "staleMax": 0, 115 // "synced": true 116 // }, 117 // "memory": "39.68Gi of Unlimited (0%)" 118 // } 119 // } 120 // 121 // The full output of the debug endpoint now currently looks like this: 122 // 123 // $ curl localhost:8877/debug 124 // { 125 // "timers": { 126 // # these two timers track how long it takes to respond to liveness and readiness probes 127 // "check_alive": "7, 45.411495ms/61.85999ms/81.358927ms", 128 // "check_ready": "7, 49.951304ms/61.976205ms/86.279038ms", 129 // 130 // # these two timers track how long we spend updating our in-memory snapshot when our kubernetes 131 // # watches tell us something has changed 132 // "consulUpdate": "0, 0s/0s/0s", 133 // "katesUpdate": "3382, 28.662µs/102.784µs/95.220222ms", 134 // 135 // # These timers tell us how long we spend notifying the sidecars if changed input. This 136 // # includes how long the sidecars take to process that input. 137 // "notifyWebhook:diagd": "2, 1.206967947s/1.3298432s/1.452718454s", 138 // "notifyWebhooks": "2, 1.207007216s/1.329901037s/1.452794859s", 139 // 140 // # This timer tells us how long we spend parsing annotations. 141 // "parseAnnotations": "2, 21.944µs/22.541µs/23.138µs", 142 // 143 // # This timer tells us how long we spend reconciling changes to consul inputs. 144 // "reconcileConsul": "2, 50.104µs/55.499µs/60.894µs", 145 // 146 // # This timer tells us how long we spend reconciling secrets related changes to ambassador 147 // # inputs. 148 // "reconcileSecrets": "2, 18.704µs/20.786µs/22.868µs" 149 // }, 150 // "values": { 151 // "envoyReconfigs": { 152 // "times": [ 153 // "2020-11-06T13:13:24.218707995-05:00", 154 // "2020-11-06T13:13:27.185754494-05:00", 155 // "2020-11-06T13:13:28.612279777-05:00" 156 // ], 157 // "staleCount": 2, 158 // "staleMax": 0, 159 // "synced": true 160 // }, 161 // "memory": "39.73Gi of Unlimited (0%)" 162 // } 163 // } 164 // 165 // TODO: 166 // 167 // - Extend the API to permit a description of a given Timer or Value in the code. This will let us 168 // implement `curl localhost:8877/debug/help` or similar instead of manually keeping the above 169 // docs up to date. 170 package debug 171