...

Text file src/github.com/emissary-ingress/emissary/v3/python/ambassador/ir/irerrorresponse.py

Documentation: github.com/emissary-ingress/emissary/v3/python/ambassador/ir

     1from typing import TYPE_CHECKING, Any, Dict, List, Optional
     2
     3from ..config import Config
     4from .irfilter import IRFilter
     5
     6if TYPE_CHECKING:
     7    from .ir import IR  # pragma: no cover
     8    from .ir.irresource import IRResource  # pragma: no cover
     9
    10import re
    11
    12# github.com/datawire/apro/issues/2661
    13# Use a whitelist to validate that any command operators in error response body are supported by envoy
    14# TODO: remove this after support for escaping "%" lands in envoy
    15ALLOWED_ENVOY_FMT_TOKENS = [
    16    "START_TIME",
    17    "REQUEST_HEADERS_BYTES",
    18    "BYTES_RECEIVED",
    19    "PROTOCOL",
    20    "RESPONSE_CODE",
    21    "RESPONSE_CODE_DETAILS",
    22    "CONNECTION_TERMINATION_DETAILS",
    23    "RESPONSE_HEADERS_BYTES",
    24    "RESPONSE_TRAILERS_BYTES",
    25    "BYTES_SENT",
    26    "UPSTREAM_WIRE_BYTES_SENT",
    27    "UPSTREAM_WIRE_BYTES_RECEIVED",
    28    "UPSTREAM_HEADER_BYTES_SENT",
    29    "UPSTREAM_HEADER_BYTES_RECEIVED",
    30    "DOWNSTREAM_WIRE_BYTES_SENT",
    31    "DOWNSTREAM_WIRE_BYTES_RECEIVED",
    32    "DOWNSTREAM_HEADER_BYTES_SENT",
    33    "DOWNSTREAM_HEADER_BYTES_RECEIVED",
    34    "DURATION",
    35    "REQUEST_DURATION",
    36    "REQUEST_TX_DURATION",
    37    "RESPONSE_DURATION",
    38    "RESPONSE_TX_DURATION",
    39    "RESPONSE_FLAGS",
    40    "ROUTE_NAME",
    41    "UPSTREAM_HOST",
    42    "UPSTREAM_CLUSTER",
    43    "UPSTREAM_LOCAL_ADDRESS",
    44    "UPSTREAM_TRANSPORT_FAILURE_REASON",
    45    "DOWNSTREAM_REMOTE_ADDRESS",
    46    "DOWNSTREAM_REMOTE_ADDRESS_WITHOUT_PORT",
    47    "DOWNSTREAM_DIRECT_REMOTE_ADDRESS",
    48    "DOWNSTREAM_DIRECT_REMOTE_ADDRESS_WITHOUT_PORT",
    49    "DOWNSTREAM_LOCAL_ADDRESS",
    50    "DOWNSTREAM_LOCAL_ADDRESS_WITHOUT_PORT",
    51    "CONNECTION_ID",
    52    "GRPC_STATUS",
    53    "DOWNSTREAM_LOCAL_PORT",
    54    "REQ",
    55    "RESP",
    56    "TRAILER",
    57    "DYNAMIC_METADATA",
    58    "CLUSTER_METADATA",
    59    "FILTER_STATE",
    60    "REQUESTED_SERVER_NAME",
    61    "DOWNSTREAM_LOCAL_URI_SAN",
    62    "DOWNSTREAM_PEER_URI_SAN",
    63    "DOWNSTREAM_LOCAL_SUBJECT",
    64    "DOWNSTREAM_PEER_SUBJECT",
    65    "DOWNSTREAM_PEER_ISSUER",
    66    "DOWNSTREAM_TLS_SESSION_ID",
    67    "DOWNSTREAM_TLS_CIPHER",
    68    "DOWNSTREAM_TLS_VERSION",
    69    "DOWNSTREAM_PEER_FINGERPRINT_256",
    70    "DOWNSTREAM_PEER_FINGERPRINT_1",
    71    "DOWNSTREAM_PEER_SERIAL",
    72    "DOWNSTREAM_PEER_CERT",
    73    "DOWNSTREAM_PEER_CERT_V_START",
    74    "DOWNSTREAM_PEER_CERT_V_END",
    75    "HOSTNAME",
    76    "LOCAL_REPLY_BODY",
    77    "FILTER_CHAIN_NAME",
    78]
    79ENVOY_FMT_TOKEN_REGEX = (
    80    "\%([A-Za-z0-9_]+?)(\([A-Za-z0-9_.]+?((:|\?)[A-Za-z0-9_.]+?)+\))?(:[A-Za-z0-9_]+?)?\%"
    81)
    82
    83# IRErrorResponse implements custom error response bodies using Envoy's HTTP response_map filter.
    84#
    85# Error responses are configured as an array of rules on the Ambassador module. Rules can be
    86# bypassed on a Mapping using `bypass_error_response_overrides`. In a future implementation,
    87# rules will be supported at both the Module level and at the Mapping level, allowing a flexible
    88# configuration where certain behaviors apply globally and Mappings can override them.
    89#
    90# The Ambassador module config isn't subject to strict typing at higher layers, so this IR has
    91# to pay special attention to the types and format of the incoming config.
    92class IRErrorResponse(IRFilter):
    93
    94    # The list of mappers that will make up the final error response config
    95    _mappers: Optional[List[Dict[str, Any]]]
    96
    97    # The IR config, used as input, typically from an `error_response_overrides` field
    98    # on a Resource (eg: the Ambassador module or a Mapping)
    99    _ir_config: List[Dict[str, Any]]
   100
   101    # The object that references this IRErrorResource.
   102    # Use by diagnostics to report the exact source of configuration errors.
   103    _referenced_by_obj: Optional["IRResource"]
   104
   105    def __init__(
   106        self,
   107        ir: "IR",
   108        aconf: Config,
   109        error_response_config: List[Dict[str, Any]],
   110        referenced_by_obj: Optional["IRResource"] = None,
   111        rkey: str = "ir.error_response",
   112        kind: str = "IRErrorResponse",
   113        name: str = "error_response",
   114        type: Optional[str] = "decoder",
   115        **kwargs,
   116    ) -> None:
   117        self._ir_config = error_response_config
   118        self._referenced_by_obj = referenced_by_obj
   119        self._mappers = None
   120        super().__init__(ir=ir, aconf=aconf, rkey=rkey, kind=kind, name=name, **kwargs)
   121
   122    # Return the final config, or None if there isn't any, either because
   123    # there was no input config, or none of the input config was valid.
   124    #
   125    # Callers shoulh always check for None to mean that this IRErrorResponse
   126    # has no config to generate, and so the underlying envoy.http.filter.response_map
   127    # (or per-route config) does not need to be configured.
   128    def config(self) -> Optional[Dict[str, Any]]:
   129        if not self._mappers:
   130            return None
   131        return {"mappers": self._mappers}
   132
   133    # Runs setup and always returns true to indicate success. This is safe because
   134    # _setup is tolerant of missing or invalid config. At the end of setup, the caller
   135    # should retain this object and use `config()` get the final, good config, if any.
   136    def setup(self, ir: "IR", aconf: Config) -> bool:
   137        self._setup(ir, aconf)
   138        return True
   139
   140    def _setup(self, ir: "IR", aconf: Config) -> None:
   141        # Do nothing (and post no errors) if there's no config.
   142        if not self._ir_config:
   143            return
   144
   145        # The error_response_overrides config must be an array
   146        if not isinstance(self._ir_config, list):
   147            self.post_error(
   148                f"IRErrorResponse: error_response_overrides: field must be an array, got {type(self._ir_config)}"
   149            )
   150            return
   151
   152        # Do nothing (and post no errors) if there's config, but it's empty.
   153        if len(self._ir_config) == 0:
   154            return
   155
   156        # If we have some configuration to deal with, try to load it, and post any errors
   157        # that we find along the way. Internally, _load_config will skip any error response rules
   158        # that are invalid, preserving other rules. This prevents one bad rule from eliminating
   159        # the others. In practice this isn't as useful as it sounds because module config is only
   160        # loaded once on startup, but ideally we'll move away from that limitation.
   161        self._mappers = self._generate_mappers()
   162        if self._mappers is not None:
   163            ir.logger.debug("IRErrorResponse: loaded mappers %s" % repr(self._mappers))
   164            if self._referenced_by_obj is not None:
   165                self.referenced_by(self._referenced_by_obj)
   166
   167    def _generate_mappers(self) -> Optional[List[Dict[str, Any]]]:
   168        all_mappers: List[Dict[str, Any]] = []
   169        for error_response in self._ir_config:
   170            # Try to parse `on_status_code` (a required field) as an integer
   171            # in the interval [400, 600). We don't support matching on 3XX
   172            # (or 1xx/2xx for that matter) codes yet. If there's appetite for
   173            # that in the future, it should be as easy as relaxing the rules
   174            # enforced here. The underlying response_map filter in Envoy supports
   175            # it natively.
   176            try:
   177                ir_on_status_code = error_response.get("on_status_code", None)
   178                if ir_on_status_code is None:
   179                    raise ValueError("field must exist")
   180
   181                code = int(ir_on_status_code)
   182                if code < 400 or code >= 600:
   183                    raise ValueError("field must be an integer >= 400 and < 600")
   184
   185                status_code_str: str = str(code)
   186            except ValueError as e:
   187                self.post_error(f"IRErrorResponse: on_status_code: %s" % e)
   188                continue
   189
   190            # Try to parse `body` (a required field) as an object.
   191            ir_body = error_response.get("body", None)
   192            if ir_body is None:
   193                self.post_error(f"IRErrorResponse: body: field must exist")
   194                continue
   195            if not isinstance(ir_body, dict):
   196                self.post_error(
   197                    f"IRErrorResponse: body: field must be an object, found %s" % ir_body
   198                )
   199                continue
   200
   201            # We currently only support filtering using an equality match on status codes.
   202            # The underlying response_map filter in Envoy supports a larger set of filters,
   203            # however, and adding support for them should be relatively straight-forward.
   204            mapper: Dict[str, Any] = {
   205                "filter": {
   206                    "status_code_filter": {
   207                        "comparison": {
   208                            "op": "EQ",
   209                            "value": {
   210                                "default_value": status_code_str,
   211                                # Envoy _requires_ that the status code comparison value
   212                                # has an associated "runtime_key". This is used as a key
   213                                # in the runtime config system for changing config values
   214                                # without restarting Envoy.
   215                                # We definitely do not want this value to ever change
   216                                # inside of Envoy at runtime, so the best we can do is name
   217                                # this key something arbitrary and hopefully unused.
   218                                "runtime_key": "_donotsetthiskey",
   219                            },
   220                        }
   221                    }
   222                }
   223            }
   224
   225            # Content type is optional. It can be used to override the content type of the
   226            # error response body.
   227            ir_content_type = ir_body.get("content_type", None)
   228
   229            ir_text_format_source = ir_body.get("text_format_source", None)
   230            ir_text_format = ir_body.get("text_format", None)
   231            ir_json_format = ir_body.get("json_format", None)
   232
   233            # get the text used for error response body so we can check it for bad tokens
   234            # TODO: remove once envoy supports escaping "%"
   235            format_body = ""
   236
   237            # Only one of text_format, json_format, or text_format_source may be set.
   238            # Post an error if we found more than one these fields set.
   239            formats_set: int = 0
   240            for f in [ir_text_format_source, ir_text_format, ir_json_format]:
   241                if f is not None:
   242                    formats_set += 1
   243            if formats_set > 1:
   244                self.post_error(
   245                    'IRErrorResponse: only one of "text_format", "json_format", '
   246                    + 'or "text_format_source" may be set, found %d of these fields set.'
   247                    % formats_set
   248                )
   249                continue
   250
   251            body_format_override: Dict[str, Any] = {}
   252
   253            if ir_text_format_source is not None:
   254                # Verify that the text_format_source field is an object with a string filename.
   255                if not isinstance(ir_text_format_source, dict) or not isinstance(
   256                    ir_text_format_source.get("filename", None), str
   257                ):
   258                    self.post_error(
   259                        f'IRErrorResponse: text_format_source field must be an object with a single filename field, found "{ir_text_format_source}"'
   260                    )
   261                    continue
   262
   263                body_format_override["text_format_source"] = ir_text_format_source
   264                try:
   265                    fmt_file = open(ir_text_format_source["filename"], mode="r")
   266                    format_body = fmt_file.read()
   267                    fmt_file.close()
   268                except OSError:
   269                    self.post_error(
   270                        "IRErrorResponse: text_format_source field references a file that does not exist"
   271                    )
   272                    continue
   273
   274            elif ir_text_format is not None:
   275                # Verify that the text_format field is a string
   276                try:
   277                    body_format_override["text_format"] = str(ir_text_format)
   278                    format_body = str(ir_text_format)
   279                except ValueError as e:
   280                    self.post_error(f"IRErrorResponse: text_format: %s" % e)
   281            elif ir_json_format is not None:
   282                # Verify that the json_format field is an object
   283                if not isinstance(ir_json_format, dict):
   284                    self.post_error(
   285                        f'IRErrorResponse: json_format field must be an object, found "{ir_json_format}"'
   286                    )
   287                    continue
   288
   289                # Envoy requires string values for json_format. Validate that every field in the
   290                # json_format can be trivially converted to a string, error otherwise.
   291                #
   292                # The mapping CRD validates that json_format maps strings to strings, but our
   293                # module config doesn't have the same validation, so we do it here.
   294                error: str = ""
   295                sanitized: Dict[str, str] = {}
   296                try:
   297                    for k, v in ir_json_format.items():
   298                        k = str(k)
   299                        if isinstance(v, bool):
   300                            sanitized[k] = str(v).lower()
   301                            format_body += f"{k}: {str(v).upper()}, "
   302                        elif isinstance(v, (int, float, str)):
   303                            sanitized[k] = str(v)
   304                            format_body += f"{k}: {str(v)}, "
   305                        else:
   306                            error = f'IRErrorResponse: json_format only supports string values, and type "{type(v)}" for key "{k}" cannot be implicitly converted to string'
   307                            break
   308                except ValueError as e:
   309                    # This really shouldn't be possible, because the string casts we do above
   310                    # are "safely" done on types where casting is always valid (eg: bool, int).
   311                    error = f"IRErrorResponse: unexpected ValueError while sanitizing ir_json_format {ir_json_format}: {e}"
   312
   313                if error:
   314                    self.post_error(error)
   315                    continue
   316
   317                body_format_override["json_format"] = sanitized
   318            else:
   319                self.post_error(
   320                    f'IRErrorResponse: could not find a valid format field in body "{ir_body}"'
   321                )
   322                continue
   323
   324            if ir_content_type is not None:
   325                # Content type is optional, but it must be a string if set.
   326                if not isinstance(ir_content_type, str):
   327                    self.post_error(f"IRErrorResponse: content_type: field must be a string")
   328                    continue
   329
   330                body_format_override["content_type"] = ir_content_type
   331
   332            # search the body for command tokens
   333            # TODO: remove this code when envoy supports escaping "%"
   334            token_finder = re.compile(ENVOY_FMT_TOKEN_REGEX)
   335            matches = token_finder.findall(format_body)
   336
   337            bad_token = False
   338            for i in matches:
   339                # i[0] is first group in regex match which will contain the command operator name
   340                if not i[0] in ALLOWED_ENVOY_FMT_TOKENS:
   341                    self.post_error(f"IRErrorResponse: Invalid Envoy command token: {i[0]}")
   342                    bad_token = True
   343
   344            if bad_token:
   345                continue
   346
   347            # The mapper config now has a `filter` (the rule) and a `body_format_override` (the action)
   348            mapper["body_format_override"] = body_format_override
   349            all_mappers.append(mapper)
   350
   351        # If nothing could be parsed successfully, post an error.
   352        if len(all_mappers) == 0:
   353            self.post_error(f"IRErrorResponse: no valid error response mappers could be parsed")
   354            return None
   355
   356        return all_mappers

View as plain text