1from typing import TYPE_CHECKING, Any, Dict, List, Optional
2
3from ..config import Config
4from .irfilter import IRFilter
5
6if TYPE_CHECKING:
7 from .ir import IR # pragma: no cover
8 from .ir.irresource import IRResource # pragma: no cover
9
10import re
11
12# github.com/datawire/apro/issues/2661
13# Use a whitelist to validate that any command operators in error response body are supported by envoy
14# TODO: remove this after support for escaping "%" lands in envoy
15ALLOWED_ENVOY_FMT_TOKENS = [
16 "START_TIME",
17 "REQUEST_HEADERS_BYTES",
18 "BYTES_RECEIVED",
19 "PROTOCOL",
20 "RESPONSE_CODE",
21 "RESPONSE_CODE_DETAILS",
22 "CONNECTION_TERMINATION_DETAILS",
23 "RESPONSE_HEADERS_BYTES",
24 "RESPONSE_TRAILERS_BYTES",
25 "BYTES_SENT",
26 "UPSTREAM_WIRE_BYTES_SENT",
27 "UPSTREAM_WIRE_BYTES_RECEIVED",
28 "UPSTREAM_HEADER_BYTES_SENT",
29 "UPSTREAM_HEADER_BYTES_RECEIVED",
30 "DOWNSTREAM_WIRE_BYTES_SENT",
31 "DOWNSTREAM_WIRE_BYTES_RECEIVED",
32 "DOWNSTREAM_HEADER_BYTES_SENT",
33 "DOWNSTREAM_HEADER_BYTES_RECEIVED",
34 "DURATION",
35 "REQUEST_DURATION",
36 "REQUEST_TX_DURATION",
37 "RESPONSE_DURATION",
38 "RESPONSE_TX_DURATION",
39 "RESPONSE_FLAGS",
40 "ROUTE_NAME",
41 "UPSTREAM_HOST",
42 "UPSTREAM_CLUSTER",
43 "UPSTREAM_LOCAL_ADDRESS",
44 "UPSTREAM_TRANSPORT_FAILURE_REASON",
45 "DOWNSTREAM_REMOTE_ADDRESS",
46 "DOWNSTREAM_REMOTE_ADDRESS_WITHOUT_PORT",
47 "DOWNSTREAM_DIRECT_REMOTE_ADDRESS",
48 "DOWNSTREAM_DIRECT_REMOTE_ADDRESS_WITHOUT_PORT",
49 "DOWNSTREAM_LOCAL_ADDRESS",
50 "DOWNSTREAM_LOCAL_ADDRESS_WITHOUT_PORT",
51 "CONNECTION_ID",
52 "GRPC_STATUS",
53 "DOWNSTREAM_LOCAL_PORT",
54 "REQ",
55 "RESP",
56 "TRAILER",
57 "DYNAMIC_METADATA",
58 "CLUSTER_METADATA",
59 "FILTER_STATE",
60 "REQUESTED_SERVER_NAME",
61 "DOWNSTREAM_LOCAL_URI_SAN",
62 "DOWNSTREAM_PEER_URI_SAN",
63 "DOWNSTREAM_LOCAL_SUBJECT",
64 "DOWNSTREAM_PEER_SUBJECT",
65 "DOWNSTREAM_PEER_ISSUER",
66 "DOWNSTREAM_TLS_SESSION_ID",
67 "DOWNSTREAM_TLS_CIPHER",
68 "DOWNSTREAM_TLS_VERSION",
69 "DOWNSTREAM_PEER_FINGERPRINT_256",
70 "DOWNSTREAM_PEER_FINGERPRINT_1",
71 "DOWNSTREAM_PEER_SERIAL",
72 "DOWNSTREAM_PEER_CERT",
73 "DOWNSTREAM_PEER_CERT_V_START",
74 "DOWNSTREAM_PEER_CERT_V_END",
75 "HOSTNAME",
76 "LOCAL_REPLY_BODY",
77 "FILTER_CHAIN_NAME",
78]
79ENVOY_FMT_TOKEN_REGEX = (
80 "\%([A-Za-z0-9_]+?)(\([A-Za-z0-9_.]+?((:|\?)[A-Za-z0-9_.]+?)+\))?(:[A-Za-z0-9_]+?)?\%"
81)
82
83# IRErrorResponse implements custom error response bodies using Envoy's HTTP response_map filter.
84#
85# Error responses are configured as an array of rules on the Ambassador module. Rules can be
86# bypassed on a Mapping using `bypass_error_response_overrides`. In a future implementation,
87# rules will be supported at both the Module level and at the Mapping level, allowing a flexible
88# configuration where certain behaviors apply globally and Mappings can override them.
89#
90# The Ambassador module config isn't subject to strict typing at higher layers, so this IR has
91# to pay special attention to the types and format of the incoming config.
92class IRErrorResponse(IRFilter):
93
94 # The list of mappers that will make up the final error response config
95 _mappers: Optional[List[Dict[str, Any]]]
96
97 # The IR config, used as input, typically from an `error_response_overrides` field
98 # on a Resource (eg: the Ambassador module or a Mapping)
99 _ir_config: List[Dict[str, Any]]
100
101 # The object that references this IRErrorResource.
102 # Use by diagnostics to report the exact source of configuration errors.
103 _referenced_by_obj: Optional["IRResource"]
104
105 def __init__(
106 self,
107 ir: "IR",
108 aconf: Config,
109 error_response_config: List[Dict[str, Any]],
110 referenced_by_obj: Optional["IRResource"] = None,
111 rkey: str = "ir.error_response",
112 kind: str = "IRErrorResponse",
113 name: str = "error_response",
114 type: Optional[str] = "decoder",
115 **kwargs,
116 ) -> None:
117 self._ir_config = error_response_config
118 self._referenced_by_obj = referenced_by_obj
119 self._mappers = None
120 super().__init__(ir=ir, aconf=aconf, rkey=rkey, kind=kind, name=name, **kwargs)
121
122 # Return the final config, or None if there isn't any, either because
123 # there was no input config, or none of the input config was valid.
124 #
125 # Callers shoulh always check for None to mean that this IRErrorResponse
126 # has no config to generate, and so the underlying envoy.http.filter.response_map
127 # (or per-route config) does not need to be configured.
128 def config(self) -> Optional[Dict[str, Any]]:
129 if not self._mappers:
130 return None
131 return {"mappers": self._mappers}
132
133 # Runs setup and always returns true to indicate success. This is safe because
134 # _setup is tolerant of missing or invalid config. At the end of setup, the caller
135 # should retain this object and use `config()` get the final, good config, if any.
136 def setup(self, ir: "IR", aconf: Config) -> bool:
137 self._setup(ir, aconf)
138 return True
139
140 def _setup(self, ir: "IR", aconf: Config) -> None:
141 # Do nothing (and post no errors) if there's no config.
142 if not self._ir_config:
143 return
144
145 # The error_response_overrides config must be an array
146 if not isinstance(self._ir_config, list):
147 self.post_error(
148 f"IRErrorResponse: error_response_overrides: field must be an array, got {type(self._ir_config)}"
149 )
150 return
151
152 # Do nothing (and post no errors) if there's config, but it's empty.
153 if len(self._ir_config) == 0:
154 return
155
156 # If we have some configuration to deal with, try to load it, and post any errors
157 # that we find along the way. Internally, _load_config will skip any error response rules
158 # that are invalid, preserving other rules. This prevents one bad rule from eliminating
159 # the others. In practice this isn't as useful as it sounds because module config is only
160 # loaded once on startup, but ideally we'll move away from that limitation.
161 self._mappers = self._generate_mappers()
162 if self._mappers is not None:
163 ir.logger.debug("IRErrorResponse: loaded mappers %s" % repr(self._mappers))
164 if self._referenced_by_obj is not None:
165 self.referenced_by(self._referenced_by_obj)
166
167 def _generate_mappers(self) -> Optional[List[Dict[str, Any]]]:
168 all_mappers: List[Dict[str, Any]] = []
169 for error_response in self._ir_config:
170 # Try to parse `on_status_code` (a required field) as an integer
171 # in the interval [400, 600). We don't support matching on 3XX
172 # (or 1xx/2xx for that matter) codes yet. If there's appetite for
173 # that in the future, it should be as easy as relaxing the rules
174 # enforced here. The underlying response_map filter in Envoy supports
175 # it natively.
176 try:
177 ir_on_status_code = error_response.get("on_status_code", None)
178 if ir_on_status_code is None:
179 raise ValueError("field must exist")
180
181 code = int(ir_on_status_code)
182 if code < 400 or code >= 600:
183 raise ValueError("field must be an integer >= 400 and < 600")
184
185 status_code_str: str = str(code)
186 except ValueError as e:
187 self.post_error(f"IRErrorResponse: on_status_code: %s" % e)
188 continue
189
190 # Try to parse `body` (a required field) as an object.
191 ir_body = error_response.get("body", None)
192 if ir_body is None:
193 self.post_error(f"IRErrorResponse: body: field must exist")
194 continue
195 if not isinstance(ir_body, dict):
196 self.post_error(
197 f"IRErrorResponse: body: field must be an object, found %s" % ir_body
198 )
199 continue
200
201 # We currently only support filtering using an equality match on status codes.
202 # The underlying response_map filter in Envoy supports a larger set of filters,
203 # however, and adding support for them should be relatively straight-forward.
204 mapper: Dict[str, Any] = {
205 "filter": {
206 "status_code_filter": {
207 "comparison": {
208 "op": "EQ",
209 "value": {
210 "default_value": status_code_str,
211 # Envoy _requires_ that the status code comparison value
212 # has an associated "runtime_key". This is used as a key
213 # in the runtime config system for changing config values
214 # without restarting Envoy.
215 # We definitely do not want this value to ever change
216 # inside of Envoy at runtime, so the best we can do is name
217 # this key something arbitrary and hopefully unused.
218 "runtime_key": "_donotsetthiskey",
219 },
220 }
221 }
222 }
223 }
224
225 # Content type is optional. It can be used to override the content type of the
226 # error response body.
227 ir_content_type = ir_body.get("content_type", None)
228
229 ir_text_format_source = ir_body.get("text_format_source", None)
230 ir_text_format = ir_body.get("text_format", None)
231 ir_json_format = ir_body.get("json_format", None)
232
233 # get the text used for error response body so we can check it for bad tokens
234 # TODO: remove once envoy supports escaping "%"
235 format_body = ""
236
237 # Only one of text_format, json_format, or text_format_source may be set.
238 # Post an error if we found more than one these fields set.
239 formats_set: int = 0
240 for f in [ir_text_format_source, ir_text_format, ir_json_format]:
241 if f is not None:
242 formats_set += 1
243 if formats_set > 1:
244 self.post_error(
245 'IRErrorResponse: only one of "text_format", "json_format", '
246 + 'or "text_format_source" may be set, found %d of these fields set.'
247 % formats_set
248 )
249 continue
250
251 body_format_override: Dict[str, Any] = {}
252
253 if ir_text_format_source is not None:
254 # Verify that the text_format_source field is an object with a string filename.
255 if not isinstance(ir_text_format_source, dict) or not isinstance(
256 ir_text_format_source.get("filename", None), str
257 ):
258 self.post_error(
259 f'IRErrorResponse: text_format_source field must be an object with a single filename field, found "{ir_text_format_source}"'
260 )
261 continue
262
263 body_format_override["text_format_source"] = ir_text_format_source
264 try:
265 fmt_file = open(ir_text_format_source["filename"], mode="r")
266 format_body = fmt_file.read()
267 fmt_file.close()
268 except OSError:
269 self.post_error(
270 "IRErrorResponse: text_format_source field references a file that does not exist"
271 )
272 continue
273
274 elif ir_text_format is not None:
275 # Verify that the text_format field is a string
276 try:
277 body_format_override["text_format"] = str(ir_text_format)
278 format_body = str(ir_text_format)
279 except ValueError as e:
280 self.post_error(f"IRErrorResponse: text_format: %s" % e)
281 elif ir_json_format is not None:
282 # Verify that the json_format field is an object
283 if not isinstance(ir_json_format, dict):
284 self.post_error(
285 f'IRErrorResponse: json_format field must be an object, found "{ir_json_format}"'
286 )
287 continue
288
289 # Envoy requires string values for json_format. Validate that every field in the
290 # json_format can be trivially converted to a string, error otherwise.
291 #
292 # The mapping CRD validates that json_format maps strings to strings, but our
293 # module config doesn't have the same validation, so we do it here.
294 error: str = ""
295 sanitized: Dict[str, str] = {}
296 try:
297 for k, v in ir_json_format.items():
298 k = str(k)
299 if isinstance(v, bool):
300 sanitized[k] = str(v).lower()
301 format_body += f"{k}: {str(v).upper()}, "
302 elif isinstance(v, (int, float, str)):
303 sanitized[k] = str(v)
304 format_body += f"{k}: {str(v)}, "
305 else:
306 error = f'IRErrorResponse: json_format only supports string values, and type "{type(v)}" for key "{k}" cannot be implicitly converted to string'
307 break
308 except ValueError as e:
309 # This really shouldn't be possible, because the string casts we do above
310 # are "safely" done on types where casting is always valid (eg: bool, int).
311 error = f"IRErrorResponse: unexpected ValueError while sanitizing ir_json_format {ir_json_format}: {e}"
312
313 if error:
314 self.post_error(error)
315 continue
316
317 body_format_override["json_format"] = sanitized
318 else:
319 self.post_error(
320 f'IRErrorResponse: could not find a valid format field in body "{ir_body}"'
321 )
322 continue
323
324 if ir_content_type is not None:
325 # Content type is optional, but it must be a string if set.
326 if not isinstance(ir_content_type, str):
327 self.post_error(f"IRErrorResponse: content_type: field must be a string")
328 continue
329
330 body_format_override["content_type"] = ir_content_type
331
332 # search the body for command tokens
333 # TODO: remove this code when envoy supports escaping "%"
334 token_finder = re.compile(ENVOY_FMT_TOKEN_REGEX)
335 matches = token_finder.findall(format_body)
336
337 bad_token = False
338 for i in matches:
339 # i[0] is first group in regex match which will contain the command operator name
340 if not i[0] in ALLOWED_ENVOY_FMT_TOKENS:
341 self.post_error(f"IRErrorResponse: Invalid Envoy command token: {i[0]}")
342 bad_token = True
343
344 if bad_token:
345 continue
346
347 # The mapper config now has a `filter` (the rule) and a `body_format_override` (the action)
348 mapper["body_format_override"] = body_format_override
349 all_mappers.append(mapper)
350
351 # If nothing could be parsed successfully, post an error.
352 if len(all_mappers) == 0:
353 self.post_error(f"IRErrorResponse: no valid error response mappers could be parsed")
354 return None
355
356 return all_mappers
View as plain text