...

Source file src/cloud.google.com/go/httpreplay/internal/proxy/converter.go

Documentation: cloud.google.com/go/httpreplay/internal/proxy

     1  // Copyright 2019 Google LLC
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package proxy
    16  
    17  import (
    18  	"bytes"
    19  	"io"
    20  	"mime"
    21  	"mime/multipart"
    22  	"net/http"
    23  	"net/url"
    24  	"regexp"
    25  	"strings"
    26  )
    27  
    28  // A Converter converts HTTP requests and responses to the Request and Response types
    29  // of this package, while removing or redacting information.
    30  type Converter struct {
    31  	// These all apply to both headers and trailers.
    32  	ClearHeaders          []tRegexp // replace matching headers with "CLEARED"
    33  	RemoveRequestHeaders  []tRegexp // remove matching headers in requests
    34  	RemoveResponseHeaders []tRegexp // remove matching headers in responses
    35  	ClearParams           []tRegexp // replace matching query params with "CLEARED"
    36  	RemoveParams          []tRegexp // remove matching query params
    37  }
    38  
    39  // A regexp that can be marshaled to and from text.
    40  type tRegexp struct {
    41  	*regexp.Regexp
    42  }
    43  
    44  func (r tRegexp) MarshalText() ([]byte, error) {
    45  	return []byte(r.String()), nil
    46  }
    47  
    48  func (r *tRegexp) UnmarshalText(b []byte) error {
    49  	var err error
    50  	r.Regexp, err = regexp.Compile(string(b))
    51  	return err
    52  }
    53  
    54  func (c *Converter) registerRemoveRequestHeaders(pat string) {
    55  	c.RemoveRequestHeaders = append(c.RemoveRequestHeaders, pattern(pat))
    56  }
    57  
    58  func (c *Converter) registerClearHeaders(pat string) {
    59  	c.ClearHeaders = append(c.ClearHeaders, pattern(pat))
    60  }
    61  
    62  func (c *Converter) registerRemoveParams(pat string) {
    63  	c.RemoveParams = append(c.RemoveParams, pattern(pat))
    64  }
    65  
    66  func (c *Converter) registerClearParams(pat string) {
    67  	c.ClearParams = append(c.ClearParams, pattern(pat))
    68  }
    69  
    70  var (
    71  	defaultRemoveRequestHeaders = []string{
    72  		"Authorization", // not only is it secret, but it is probably missing on replay
    73  		"Proxy-Authorization",
    74  		"Connection",
    75  		"Content-Type", // because it may contain a random multipart boundary
    76  		"Date",
    77  		"Host",
    78  		"Transfer-Encoding",
    79  		"Via",
    80  		"X-Forwarded-*",
    81  		// Google-specific
    82  		"X-Cloud-Trace-Context",        // OpenCensus traces have a random ID
    83  		"X-Goog-Api-Client",            // can differ for, e.g., different Go versions
    84  		"X-Goog-Gcs-Idempotency-Token", // Used by Cloud Storage
    85  	}
    86  
    87  	defaultRemoveBothHeaders = []string{
    88  		// Google-specific
    89  		// GFEs scrub X-Google- and X-GFE- headers from requests and responses.
    90  		// Drop them from recordings made by users inside Google.
    91  		// http://g3doc/gfe/g3doc/gfe3/design/http_filters/google_header_filter
    92  		// (internal Google documentation).
    93  		"X-Google-*",
    94  		"X-Gfe-*",
    95  	}
    96  
    97  	defaultClearHeaders = []string{
    98  		// Google-specific
    99  		// Used by Cloud Storage for customer-supplied encryption.
   100  		"X-Goog-*Encryption-Key",
   101  	}
   102  )
   103  
   104  func defaultConverter() *Converter {
   105  	c := &Converter{}
   106  	for _, h := range defaultClearHeaders {
   107  		c.registerClearHeaders(h)
   108  	}
   109  	for _, h := range defaultRemoveRequestHeaders {
   110  		c.registerRemoveRequestHeaders(h)
   111  	}
   112  	for _, h := range defaultRemoveBothHeaders {
   113  		c.registerRemoveRequestHeaders(h)
   114  		c.RemoveResponseHeaders = append(c.RemoveResponseHeaders, pattern(h))
   115  	}
   116  	return c
   117  }
   118  
   119  // Convert a pattern into a regexp.
   120  // A pattern is like a literal regexp anchored on both ends, with only one
   121  // non-literal character: "*", which matches zero or more characters.
   122  func pattern(p string) tRegexp {
   123  	q := regexp.QuoteMeta(p)
   124  	q = "^" + strings.Replace(q, `\*`, `.*`, -1) + "$"
   125  	// q must be a legal regexp.
   126  	return tRegexp{regexp.MustCompile(q)}
   127  }
   128  
   129  func (c *Converter) convertRequest(req *http.Request) (*Request, error) {
   130  	body, err := snapshotBody(&req.Body)
   131  	if err != nil {
   132  		return nil, err
   133  	}
   134  	// If the body is empty, set it to nil to make sure the proxy sends a
   135  	// Content-Length header.
   136  	if len(body) == 0 {
   137  		req.Body = nil
   138  	}
   139  	mediaType, parts, err := parseRequestBody(req.Header.Get("Content-Type"), body)
   140  	if err != nil {
   141  		return nil, err
   142  	}
   143  	url2 := *req.URL
   144  	url2.RawQuery = scrubQuery(url2.RawQuery, c.ClearParams, c.RemoveParams)
   145  	return &Request{
   146  		Method:    req.Method,
   147  		URL:       url2.String(),
   148  		Header:    scrubHeaders(req.Header, c.ClearHeaders, c.RemoveRequestHeaders),
   149  		MediaType: mediaType,
   150  		BodyParts: parts,
   151  		Trailer:   scrubHeaders(req.Trailer, c.ClearHeaders, c.RemoveRequestHeaders),
   152  	}, nil
   153  }
   154  
   155  // parseRequestBody parses the Content-Type header, reads the body, and splits it into
   156  // parts if necessary. It returns the media type and the body parts.
   157  func parseRequestBody(contentType string, body []byte) (string, [][]byte, error) {
   158  	if contentType == "" {
   159  		// No content-type header. Treat the body as a single part.
   160  		return "", [][]byte{body}, nil
   161  	}
   162  	mediaType, params, err := mime.ParseMediaType(contentType)
   163  	if err != nil {
   164  		return "", nil, err
   165  	}
   166  	var parts [][]byte
   167  	if strings.HasPrefix(mediaType, "multipart/") {
   168  		mr := multipart.NewReader(bytes.NewReader(body), params["boundary"])
   169  		for {
   170  			p, err := mr.NextPart()
   171  			if err == io.EOF {
   172  				break
   173  			}
   174  			if err != nil {
   175  				return "", nil, err
   176  			}
   177  			part, err := io.ReadAll(p)
   178  			if err != nil {
   179  				return "", nil, err
   180  			}
   181  			// TODO(jba): care about part headers?
   182  			parts = append(parts, part)
   183  		}
   184  	} else {
   185  		parts = [][]byte{body}
   186  	}
   187  	return mediaType, parts, nil
   188  }
   189  
   190  func (c *Converter) convertResponse(res *http.Response) (*Response, error) {
   191  	data, err := snapshotBody(&res.Body)
   192  	if err != nil {
   193  		return nil, err
   194  	}
   195  	return &Response{
   196  		StatusCode: res.StatusCode,
   197  		Proto:      res.Proto,
   198  		ProtoMajor: res.ProtoMajor,
   199  		ProtoMinor: res.ProtoMinor,
   200  		Header:     scrubHeaders(res.Header, c.ClearHeaders, c.RemoveResponseHeaders),
   201  		Body:       data,
   202  		Trailer:    scrubHeaders(res.Trailer, c.ClearHeaders, c.RemoveResponseHeaders),
   203  	}, nil
   204  }
   205  
   206  func snapshotBody(body *io.ReadCloser) ([]byte, error) {
   207  	data, err := io.ReadAll(*body)
   208  	if err != nil {
   209  		return nil, err
   210  	}
   211  	(*body).Close()
   212  	*body = io.NopCloser(bytes.NewReader(data))
   213  	return data, nil
   214  }
   215  
   216  // Copy headers, clearing some and removing others.
   217  func scrubHeaders(hs http.Header, clear, remove []tRegexp) http.Header {
   218  	rh := http.Header{}
   219  	for k, v := range hs {
   220  		switch {
   221  		case match(k, clear):
   222  			rh.Set(k, "CLEARED")
   223  		case match(k, remove):
   224  			// skip
   225  		default:
   226  			rh[k] = v
   227  		}
   228  	}
   229  	return rh
   230  }
   231  
   232  // Copy the query string, clearing some query params and removing others.
   233  // Preserve the order of the string.
   234  func scrubQuery(query string, clear, remove []tRegexp) string {
   235  	// We can't use url.ParseQuery because it doesn't preserve order.
   236  	var buf bytes.Buffer
   237  	for {
   238  		if i := strings.IndexAny(query, "&;"); i >= 0 {
   239  			scrubParam(&buf, query[:i], query[i], clear, remove)
   240  			query = query[i+1:]
   241  		} else {
   242  			scrubParam(&buf, query, 0, clear, remove)
   243  			break
   244  		}
   245  	}
   246  	s := buf.String()
   247  	if strings.HasSuffix(s, "&") {
   248  		return s[:len(s)-1]
   249  	}
   250  	return s
   251  }
   252  
   253  func scrubParam(buf *bytes.Buffer, param string, sep byte, clear, remove []tRegexp) {
   254  	if param == "" {
   255  		return
   256  	}
   257  	key := param
   258  	value := ""
   259  	if i := strings.Index(param, "="); i >= 0 {
   260  		key, value = key[:i], key[i+1:]
   261  	}
   262  	ukey, err := url.QueryUnescape(key)
   263  	// If the key is bad, just pass it and the value through.
   264  	if err != nil {
   265  		buf.WriteString(param)
   266  		if sep != 0 {
   267  			buf.WriteByte(sep)
   268  		}
   269  		return
   270  	}
   271  	if match(ukey, remove) {
   272  		return
   273  	}
   274  	if match(ukey, clear) && value != "" {
   275  		value = "CLEARED"
   276  	}
   277  	buf.WriteString(key)
   278  	buf.WriteByte('=')
   279  	buf.WriteString(value)
   280  	if sep != 0 {
   281  		buf.WriteByte(sep)
   282  	}
   283  }
   284  
   285  func match(s string, res []tRegexp) bool {
   286  	for _, re := range res {
   287  		if re.MatchString(s) {
   288  			return true
   289  		}
   290  	}
   291  	return false
   292  }
   293  

View as plain text