compress.go

Documentation: github.com/klauspost/compress/gzhttp

     1  package gzhttp
     2  
     3  import (
     4  	"bufio"
     5  	"crypto/rand"
     6  	"crypto/sha256"
     7  	"encoding/binary"
     8  	"errors"
     9  	"fmt"
    10  	"hash/crc32"
    11  	"io"
    12  	"math"
    13  	"math/bits"
    14  	"mime"
    15  	"net"
    16  	"net/http"
    17  	"strconv"
    18  	"strings"
    19  	"sync"
    20  
    21  	"github.com/klauspost/compress/gzhttp/writer"
    22  	"github.com/klauspost/compress/gzhttp/writer/gzkp"
    23  	"github.com/klauspost/compress/gzip"
    24  )
    25  
    26  const (
    27  	// HeaderNoCompression can be used to disable compression.
    28  	// Any header value will disable compression.
    29  	// The Header is always removed from output.
    30  	HeaderNoCompression = "No-Gzip-Compression"
    31  
    32  	vary            = "Vary"
    33  	acceptEncoding  = "Accept-Encoding"
    34  	contentEncoding = "Content-Encoding"
    35  	contentRange    = "Content-Range"
    36  	acceptRanges    = "Accept-Ranges"
    37  	contentType     = "Content-Type"
    38  	contentLength   = "Content-Length"
    39  	eTag            = "ETag"
    40  )
    41  
    42  type codings map[string]float64
    43  
    44  const (
    45  	// DefaultQValue is the default qvalue to assign to an encoding if no explicit qvalue is set.
    46  	// This is actually kind of ambiguous in RFC 2616, so hopefully it's correct.
    47  	// The examples seem to indicate that it is.
    48  	DefaultQValue = 1.0
    49  
    50  	// DefaultMinSize is the default minimum size until we enable gzip compression.
    51  	// 1500 bytes is the MTU size for the internet since that is the largest size allowed at the network layer.
    52  	// If you take a file that is 1300 bytes and compress it to 800 bytes, it’s still transmitted in that same 1500 byte packet regardless, so you’ve gained nothing.
    53  	// That being the case, you should restrict the gzip compression to files with a size (plus header) greater than a single packet,
    54  	// 1024 bytes (1KB) is therefore default.
    55  	DefaultMinSize = 1024
    56  )
    57  
    58  // GzipResponseWriter provides an http.ResponseWriter interface, which gzips
    59  // bytes before writing them to the underlying response. This doesn't close the
    60  // writers, so don't forget to do that.
    61  // It can be configured to skip response smaller than minSize.
    62  type GzipResponseWriter struct {
    63  	http.ResponseWriter
    64  	level     int
    65  	gwFactory writer.GzipWriterFactory
    66  	gw        writer.GzipWriter
    67  
    68  	code int // Saves the WriteHeader value.
    69  
    70  	minSize          int    // Specifies the minimum response size to gzip. If the response length is bigger than this value, it is compressed.
    71  	buf              []byte // Holds the first part of the write before reaching the minSize or the end of the write.
    72  	ignore           bool   // If true, then we immediately passthru writes to the underlying ResponseWriter.
    73  	keepAcceptRanges bool   // Keep "Accept-Ranges" header.
    74  	setContentType   bool   // Add content type, if missing and detected.
    75  	suffixETag       string // Suffix to add to ETag header if response is compressed.
    76  	dropETag         bool   // Drop ETag header if response is compressed (supersedes suffixETag).
    77  	sha256Jitter     bool   // Use sha256 for jitter.
    78  	randomJitter     string // Add random bytes to output as header field.
    79  	jitterBuffer     int    // Maximum buffer to accumulate before doing jitter.
    80  
    81  	contentTypeFilter func(ct string) bool // Only compress if the response is one of these content-types. All are accepted if empty.
    82  }
    83  
    84  type GzipResponseWriterWithCloseNotify struct {
    85  	*GzipResponseWriter
    86  }
    87  
    88  func (w GzipResponseWriterWithCloseNotify) CloseNotify() <-chan bool {
    89  	return w.ResponseWriter.(http.CloseNotifier).CloseNotify()
    90  }
    91  
    92  // Write appends data to the gzip writer.
    93  func (w *GzipResponseWriter) Write(b []byte) (int, error) {
    94  	// GZIP responseWriter is initialized. Use the GZIP responseWriter.
    95  	if w.gw != nil {
    96  		return w.gw.Write(b)
    97  	}
    98  
    99  	// If we have already decided not to use GZIP, immediately passthrough.
   100  	if w.ignore {
   101  		return w.ResponseWriter.Write(b)
   102  	}
   103  
   104  	// Save the write into a buffer for later use in GZIP responseWriter
   105  	// (if content is long enough) or at close with regular responseWriter.
   106  	wantBuf := 512
   107  	if w.minSize > wantBuf {
   108  		wantBuf = w.minSize
   109  	}
   110  	if w.jitterBuffer > 0 && w.jitterBuffer > wantBuf {
   111  		wantBuf = w.jitterBuffer
   112  	}
   113  	toAdd := len(b)
   114  	if len(w.buf)+toAdd > wantBuf {
   115  		toAdd = wantBuf - len(w.buf)
   116  	}
   117  	w.buf = append(w.buf, b[:toAdd]...)
   118  	remain := b[toAdd:]
   119  	hdr := w.Header()
   120  
   121  	// Only continue if they didn't already choose an encoding or a known unhandled content length or type.
   122  	if len(hdr[HeaderNoCompression]) == 0 && hdr.Get(contentEncoding) == "" && hdr.Get(contentRange) == "" {
   123  		// Check more expensive parts now.
   124  		cl, _ := atoi(hdr.Get(contentLength))
   125  		ct := hdr.Get(contentType)
   126  		if cl == 0 || cl >= w.minSize && (ct == "" || w.contentTypeFilter(ct)) {
   127  			// If the current buffer is less than minSize and a Content-Length isn't set, then wait until we have more data.
   128  			if len(w.buf) < w.minSize && cl == 0 || (w.jitterBuffer > 0 && len(w.buf) < w.jitterBuffer) {
   129  				return len(b), nil
   130  			}
   131  
   132  			// If the Content-Length is larger than minSize or the current buffer is larger than minSize, then continue.
   133  			if cl >= w.minSize || len(w.buf) >= w.minSize {
   134  				// If a Content-Type wasn't specified, infer it from the current buffer.
   135  				if ct == "" {
   136  					ct = http.DetectContentType(w.buf)
   137  				}
   138  
   139  				// Handles the intended case of setting a nil Content-Type (as for http/server or http/fs)
   140  				// Set the header only if the key does not exist
   141  				if _, ok := hdr[contentType]; w.setContentType && !ok {
   142  					hdr.Set(contentType, ct)
   143  				}
   144  
   145  				// If the Content-Type is acceptable to GZIP, initialize the GZIP writer.
   146  				if w.contentTypeFilter(ct) {
   147  					if err := w.startGzip(remain); err != nil {
   148  						return 0, err
   149  					}
   150  					if len(remain) > 0 {
   151  						if _, err := w.gw.Write(remain); err != nil {
   152  							return 0, err
   153  						}
   154  					}
   155  					return len(b), nil
   156  				}
   157  			}
   158  		}
   159  	}
   160  	// If we got here, we should not GZIP this response.
   161  	if err := w.startPlain(); err != nil {
   162  		return 0, err
   163  	}
   164  	if len(remain) > 0 {
   165  		if _, err := w.ResponseWriter.Write(remain); err != nil {
   166  			return 0, err
   167  		}
   168  	}
   169  	return len(b), nil
   170  }
   171  
   172  func (w *GzipResponseWriter) Unwrap() http.ResponseWriter {
   173  	return w.ResponseWriter
   174  }
   175  
   176  var castagnoliTable = crc32.MakeTable(crc32.Castagnoli)
   177  
   178  // startGzip initializes a GZIP writer and writes the buffer.
   179  func (w *GzipResponseWriter) startGzip(remain []byte) error {
   180  	// Set the GZIP header.
   181  	w.Header().Set(contentEncoding, "gzip")
   182  
   183  	// if the Content-Length is already set, then calls to Write on gzip
   184  	// will fail to set the Content-Length header since its already set
   185  	// See: https://github.com/golang/go/issues/14975.
   186  	w.Header().Del(contentLength)
   187  
   188  	// Delete Accept-Ranges.
   189  	if !w.keepAcceptRanges {
   190  		w.Header().Del(acceptRanges)
   191  	}
   192  
   193  	// Suffix ETag.
   194  	if w.suffixETag != "" && !w.dropETag && w.Header().Get(eTag) != "" {
   195  		orig := w.Header().Get(eTag)
   196  		insertPoint := strings.LastIndex(orig, `"`)
   197  		if insertPoint == -1 {
   198  			insertPoint = len(orig)
   199  		}
   200  		w.Header().Set(eTag, orig[:insertPoint]+w.suffixETag+orig[insertPoint:])
   201  	}
   202  
   203  	// Delete ETag.
   204  	if w.dropETag {
   205  		w.Header().Del(eTag)
   206  	}
   207  
   208  	// Write the header to gzip response.
   209  	if w.code != 0 {
   210  		w.ResponseWriter.WriteHeader(w.code)
   211  		// Ensure that no other WriteHeader's happen
   212  		w.code = 0
   213  	}
   214  
   215  	// Initialize and flush the buffer into the gzip response if there are any bytes.
   216  	// If there aren't any, we shouldn't initialize it yet because on Close it will
   217  	// write the gzip header even if nothing was ever written.
   218  	if len(w.buf) > 0 {
   219  		// Initialize the GZIP response.
   220  		w.init()
   221  
   222  		// Set random jitter based on CRC or SHA-256 of current buffer.
   223  		// Before first write.
   224  		if len(w.randomJitter) > 0 {
   225  			var jitRNG uint32
   226  			if w.jitterBuffer > 0 {
   227  				if w.sha256Jitter {
   228  					h := sha256.New()
   229  					h.Write(w.buf)
   230  					// Use only up to "w.jitterBuffer", otherwise the output depends on write sizes.
   231  					if len(remain) > 0 && len(w.buf) < w.jitterBuffer {
   232  						remain := remain
   233  						if len(remain)+len(w.buf) > w.jitterBuffer {
   234  							remain = remain[:w.jitterBuffer-len(w.buf)]
   235  						}
   236  						h.Write(remain)
   237  					}
   238  					var tmp [sha256.Size]byte
   239  					jitRNG = binary.LittleEndian.Uint32(h.Sum(tmp[:0]))
   240  				} else {
   241  					h := crc32.Update(0, castagnoliTable, w.buf)
   242  					// Use only up to "w.jitterBuffer", otherwise the output depends on write sizes.
   243  					if len(remain) > 0 && len(w.buf) < w.jitterBuffer {
   244  						remain := remain
   245  						if len(remain)+len(w.buf) > w.jitterBuffer {
   246  							remain = remain[:w.jitterBuffer-len(w.buf)]
   247  						}
   248  						h = crc32.Update(h, castagnoliTable, remain)
   249  					}
   250  					jitRNG = bits.RotateLeft32(h, 19) ^ 0xab0755de
   251  				}
   252  			} else {
   253  				// Get from rand.Reader
   254  				var tmp [4]byte
   255  				_, err := rand.Read(tmp[:])
   256  				if err != nil {
   257  					return fmt.Errorf("gzhttp: %w", err)
   258  				}
   259  				jitRNG = binary.LittleEndian.Uint32(tmp[:])
   260  			}
   261  			jit := w.randomJitter[:1+jitRNG%uint32(len(w.randomJitter)-1)]
   262  			w.gw.(writer.GzipWriterExt).SetHeader(writer.Header{Comment: jit})
   263  		}
   264  		n, err := w.gw.Write(w.buf)
   265  
   266  		// This should never happen (per io.Writer docs), but if the write didn't
   267  		// accept the entire buffer but returned no specific error, we have no clue
   268  		// what's going on, so abort just to be safe.
   269  		if err == nil && n < len(w.buf) {
   270  			err = io.ErrShortWrite
   271  		}
   272  		w.buf = w.buf[:0]
   273  		return err
   274  	}
   275  	return nil
   276  }
   277  
   278  // startPlain writes to sent bytes and buffer the underlying ResponseWriter without gzip.
   279  func (w *GzipResponseWriter) startPlain() error {
   280  	w.Header().Del(HeaderNoCompression)
   281  	if w.code != 0 {
   282  		w.ResponseWriter.WriteHeader(w.code)
   283  		// Ensure that no other WriteHeader's happen
   284  		w.code = 0
   285  	}
   286  
   287  	w.ignore = true
   288  	// If Write was never called then don't call Write on the underlying ResponseWriter.
   289  	if len(w.buf) == 0 {
   290  		return nil
   291  	}
   292  	n, err := w.ResponseWriter.Write(w.buf)
   293  	// This should never happen (per io.Writer docs), but if the write didn't
   294  	// accept the entire buffer but returned no specific error, we have no clue
   295  	// what's going on, so abort just to be safe.
   296  	if err == nil && n < len(w.buf) {
   297  		err = io.ErrShortWrite
   298  	}
   299  
   300  	w.buf = w.buf[:0]
   301  	return err
   302  }
   303  
   304  // WriteHeader just saves the response code until close or GZIP effective writes.
   305  // In the specific case of 1xx status codes, WriteHeader is directly calling the wrapped ResponseWriter.
   306  func (w *GzipResponseWriter) WriteHeader(code int) {
   307  	// Handle informational headers
   308  	// This is gated to not forward 1xx responses on builds prior to go1.20.
   309  	if shouldWrite1xxResponses() && code >= 100 && code <= 199 {
   310  		w.ResponseWriter.WriteHeader(code)
   311  		return
   312  	}
   313  
   314  	if w.code == 0 {
   315  		w.code = code
   316  	}
   317  }
   318  
   319  // init graps a new gzip writer from the gzipWriterPool and writes the correct
   320  // content encoding header.
   321  func (w *GzipResponseWriter) init() {
   322  	// Bytes written during ServeHTTP are redirected to this gzip writer
   323  	// before being written to the underlying response.
   324  	w.gw = w.gwFactory.New(w.ResponseWriter, w.level)
   325  }
   326  
   327  // Close will close the gzip.Writer and will put it back in the gzipWriterPool.
   328  func (w *GzipResponseWriter) Close() error {
   329  	if w.ignore {
   330  		return nil
   331  	}
   332  	if w.gw == nil {
   333  		var (
   334  			ct = w.Header().Get(contentType)
   335  			ce = w.Header().Get(contentEncoding)
   336  			cr = w.Header().Get(contentRange)
   337  		)
   338  		if ct == "" {
   339  			ct = http.DetectContentType(w.buf)
   340  
   341  			// Handles the intended case of setting a nil Content-Type (as for http/server or http/fs)
   342  			// Set the header only if the key does not exist
   343  			if _, ok := w.Header()[contentType]; w.setContentType && !ok {
   344  				w.Header().Set(contentType, ct)
   345  			}
   346  		}
   347  
   348  		if len(w.buf) == 0 || len(w.buf) < w.minSize || len(w.Header()[HeaderNoCompression]) != 0 || ce != "" || cr != "" || !w.contentTypeFilter(ct) {
   349  			// GZIP not triggered, write out regular response.
   350  			return w.startPlain()
   351  		}
   352  		err := w.startGzip(nil)
   353  		if err != nil {
   354  			return err
   355  		}
   356  	}
   357  
   358  	err := w.gw.Close()
   359  	w.gw = nil
   360  	return err
   361  }
   362  
   363  // Flush flushes the underlying *gzip.Writer and then the underlying
   364  // http.ResponseWriter if it is an http.Flusher. This makes GzipResponseWriter
   365  // an http.Flusher.
   366  // If not enough bytes has been written to determine if we have reached minimum size,
   367  // this will be ignored.
   368  // If nothing has been written yet, nothing will be flushed.
   369  func (w *GzipResponseWriter) Flush() {
   370  	if w.gw == nil && !w.ignore {
   371  		if len(w.buf) == 0 {
   372  			// Nothing written yet.
   373  			return
   374  		}
   375  		var (
   376  			cl, _ = atoi(w.Header().Get(contentLength))
   377  			ct    = w.Header().Get(contentType)
   378  			ce    = w.Header().Get(contentEncoding)
   379  			cr    = w.Header().Get(contentRange)
   380  		)
   381  
   382  		if ct == "" {
   383  			ct = http.DetectContentType(w.buf)
   384  
   385  			// Handles the intended case of setting a nil Content-Type (as for http/server or http/fs)
   386  			// Set the header only if the key does not exist
   387  			if _, ok := w.Header()[contentType]; w.setContentType && !ok {
   388  				w.Header().Set(contentType, ct)
   389  			}
   390  		}
   391  		if cl == 0 {
   392  			// Assume minSize.
   393  			cl = w.minSize
   394  		}
   395  
   396  		// See if we should compress...
   397  		if len(w.Header()[HeaderNoCompression]) == 0 && ce == "" && cr == "" && cl >= w.minSize && w.contentTypeFilter(ct) {
   398  			w.startGzip(nil)
   399  		} else {
   400  			w.startPlain()
   401  		}
   402  	}
   403  
   404  	if w.gw != nil {
   405  		w.gw.Flush()
   406  	}
   407  
   408  	if fw, ok := w.ResponseWriter.(http.Flusher); ok {
   409  		fw.Flush()
   410  	}
   411  }
   412  
   413  // Hijack implements http.Hijacker. If the underlying ResponseWriter is a
   414  // Hijacker, its Hijack method is returned. Otherwise an error is returned.
   415  func (w *GzipResponseWriter) Hijack() (net.Conn, *bufio.ReadWriter, error) {
   416  	if hj, ok := w.ResponseWriter.(http.Hijacker); ok {
   417  		return hj.Hijack()
   418  	}
   419  	return nil, nil, fmt.Errorf("http.Hijacker interface is not supported")
   420  }
   421  
   422  // verify Hijacker interface implementation
   423  var _ http.Hijacker = &GzipResponseWriter{}
   424  
   425  var onceDefault sync.Once
   426  var defaultWrapper func(http.Handler) http.HandlerFunc
   427  
   428  // GzipHandler allows to easily wrap an http handler with default settings.
   429  func GzipHandler(h http.Handler) http.HandlerFunc {
   430  	onceDefault.Do(func() {
   431  		var err error
   432  		defaultWrapper, err = NewWrapper()
   433  		if err != nil {
   434  			panic(err)
   435  		}
   436  	})
   437  
   438  	return defaultWrapper(h)
   439  }
   440  
   441  var grwPool = sync.Pool{New: func() interface{} { return &GzipResponseWriter{} }}
   442  
   443  // NewWrapper returns a reusable wrapper with the supplied options.
   444  func NewWrapper(opts ...option) (func(http.Handler) http.HandlerFunc, error) {
   445  	c := &config{
   446  		level:   gzip.DefaultCompression,
   447  		minSize: DefaultMinSize,
   448  		writer: writer.GzipWriterFactory{
   449  			Levels: gzkp.Levels,
   450  			New:    gzkp.NewWriter,
   451  		},
   452  		contentTypes:   DefaultContentTypeFilter,
   453  		setContentType: true,
   454  	}
   455  
   456  	for _, o := range opts {
   457  		o(c)
   458  	}
   459  
   460  	if err := c.validate(); err != nil {
   461  		return nil, err
   462  	}
   463  
   464  	return func(h http.Handler) http.HandlerFunc {
   465  		return func(w http.ResponseWriter, r *http.Request) {
   466  			w.Header().Add(vary, acceptEncoding)
   467  			if acceptsGzip(r) {
   468  				gw := grwPool.Get().(*GzipResponseWriter)
   469  				*gw = GzipResponseWriter{
   470  					ResponseWriter:    w,
   471  					gwFactory:         c.writer,
   472  					level:             c.level,
   473  					minSize:           c.minSize,
   474  					contentTypeFilter: c.contentTypes,
   475  					keepAcceptRanges:  c.keepAcceptRanges,
   476  					dropETag:          c.dropETag,
   477  					suffixETag:        c.suffixETag,
   478  					buf:               gw.buf,
   479  					setContentType:    c.setContentType,
   480  					randomJitter:      c.randomJitter,
   481  					jitterBuffer:      c.jitterBuffer,
   482  					sha256Jitter:      c.sha256Jitter,
   483  				}
   484  				if len(gw.buf) > 0 {
   485  					gw.buf = gw.buf[:0]
   486  				}
   487  				defer func() {
   488  					gw.Close()
   489  					gw.ResponseWriter = nil
   490  					grwPool.Put(gw)
   491  				}()
   492  
   493  				if _, ok := w.(http.CloseNotifier); ok {
   494  					gwcn := GzipResponseWriterWithCloseNotify{gw}
   495  					h.ServeHTTP(gwcn, r)
   496  				} else {
   497  					h.ServeHTTP(gw, r)
   498  				}
   499  				w.Header().Del(HeaderNoCompression)
   500  			} else {
   501  				h.ServeHTTP(newNoGzipResponseWriter(w), r)
   502  				w.Header().Del(HeaderNoCompression)
   503  			}
   504  		}
   505  	}, nil
   506  }
   507  
   508  // Parsed representation of one of the inputs to ContentTypes.
   509  // See https://golang.org/pkg/mime/#ParseMediaType
   510  type parsedContentType struct {
   511  	mediaType string
   512  	params    map[string]string
   513  }
   514  
   515  // equals returns whether this content type matches another content type.
   516  func (pct parsedContentType) equals(mediaType string, params map[string]string) bool {
   517  	if pct.mediaType != mediaType {
   518  		return false
   519  	}
   520  	// if pct has no params, don't care about other's params
   521  	if len(pct.params) == 0 {
   522  		return true
   523  	}
   524  
   525  	// if pct has any params, they must be identical to other's.
   526  	if len(pct.params) != len(params) {
   527  		return false
   528  	}
   529  	for k, v := range pct.params {
   530  		if w, ok := params[k]; !ok || v != w {
   531  			return false
   532  		}
   533  	}
   534  	return true
   535  }
   536  
   537  // Used for functional configuration.
   538  type config struct {
   539  	minSize          int
   540  	level            int
   541  	writer           writer.GzipWriterFactory
   542  	contentTypes     func(ct string) bool
   543  	keepAcceptRanges bool
   544  	setContentType   bool
   545  	suffixETag       string
   546  	dropETag         bool
   547  	jitterBuffer     int
   548  	randomJitter     string
   549  	sha256Jitter     bool
   550  }
   551  
   552  func (c *config) validate() error {
   553  	min, max := c.writer.Levels()
   554  	if c.level < min || c.level > max {
   555  		return fmt.Errorf("invalid compression level requested: %d, valid range %d -> %d", c.level, min, max)
   556  	}
   557  
   558  	if c.minSize < 0 {
   559  		return fmt.Errorf("minimum size must be more than zero")
   560  	}
   561  	if len(c.randomJitter) >= math.MaxUint16 {
   562  		return fmt.Errorf("random jitter size exceeded")
   563  	}
   564  	if len(c.randomJitter) > 0 {
   565  		gzw, ok := c.writer.New(io.Discard, c.level).(writer.GzipWriterExt)
   566  		if !ok {
   567  			return errors.New("the custom compressor does not allow setting headers for random jitter")
   568  		}
   569  		gzw.Close()
   570  	}
   571  	return nil
   572  }
   573  
   574  type option func(c *config)
   575  
   576  func MinSize(size int) option {
   577  	return func(c *config) {
   578  		c.minSize = size
   579  	}
   580  }
   581  
   582  // CompressionLevel sets the compression level
   583  func CompressionLevel(level int) option {
   584  	return func(c *config) {
   585  		c.level = level
   586  	}
   587  }
   588  
   589  // SetContentType sets the content type before returning
   590  // requests, if unset before returning, and it was detected.
   591  // Default: true.
   592  func SetContentType(b bool) option {
   593  	return func(c *config) {
   594  		c.setContentType = b
   595  	}
   596  }
   597  
   598  // Implementation changes the implementation of GzipWriter
   599  //
   600  // The default implementation is backed by github.com/klauspost/compress
   601  // To support RandomJitter, the GzipWriterExt must also be
   602  // supported by the returned writers.
   603  func Implementation(writer writer.GzipWriterFactory) option {
   604  	return func(c *config) {
   605  		c.writer = writer
   606  	}
   607  }
   608  
   609  // ContentTypes specifies a list of content types to compare
   610  // the Content-Type header to before compressing. If none
   611  // match, the response will be returned as-is.
   612  //
   613  // Content types are compared in a case-insensitive, whitespace-ignored
   614  // manner.
   615  //
   616  // A MIME type without any other directive will match a content type
   617  // that has the same MIME type, regardless of that content type's other
   618  // directives. I.e., "text/html" will match both "text/html" and
   619  // "text/html; charset=utf-8".
   620  //
   621  // A MIME type with any other directive will only match a content type
   622  // that has the same MIME type and other directives. I.e.,
   623  // "text/html; charset=utf-8" will only match "text/html; charset=utf-8".
   624  //
   625  // By default common compressed audio, video and archive formats, see DefaultContentTypeFilter.
   626  //
   627  // Setting this will override default and any previous Content Type settings.
   628  func ContentTypes(types []string) option {
   629  	return func(c *config) {
   630  		var contentTypes []parsedContentType
   631  		for _, v := range types {
   632  			mediaType, params, err := mime.ParseMediaType(v)
   633  			if err == nil {
   634  				contentTypes = append(contentTypes, parsedContentType{mediaType, params})
   635  			}
   636  		}
   637  		c.contentTypes = func(ct string) bool {
   638  			return handleContentType(contentTypes, ct)
   639  		}
   640  	}
   641  }
   642  
   643  // ExceptContentTypes specifies a list of content types to compare
   644  // the Content-Type header to before compressing. If none
   645  // match, the response will be compressed.
   646  //
   647  // Content types are compared in a case-insensitive, whitespace-ignored
   648  // manner.
   649  //
   650  // A MIME type without any other directive will match a content type
   651  // that has the same MIME type, regardless of that content type's other
   652  // directives. I.e., "text/html" will match both "text/html" and
   653  // "text/html; charset=utf-8".
   654  //
   655  // A MIME type with any other directive will only match a content type
   656  // that has the same MIME type and other directives. I.e.,
   657  // "text/html; charset=utf-8" will only match "text/html; charset=utf-8".
   658  //
   659  // By default common compressed audio, video and archive formats, see DefaultContentTypeFilter.
   660  //
   661  // Setting this will override default and any previous Content Type settings.
   662  func ExceptContentTypes(types []string) option {
   663  	return func(c *config) {
   664  		var contentTypes []parsedContentType
   665  		for _, v := range types {
   666  			mediaType, params, err := mime.ParseMediaType(v)
   667  			if err == nil {
   668  				contentTypes = append(contentTypes, parsedContentType{mediaType, params})
   669  			}
   670  		}
   671  		c.contentTypes = func(ct string) bool {
   672  			return !handleContentType(contentTypes, ct)
   673  		}
   674  	}
   675  }
   676  
   677  // KeepAcceptRanges will keep Accept-Ranges header on gzipped responses.
   678  // This will likely break ranged requests since that cannot be transparently
   679  // handled by the filter.
   680  func KeepAcceptRanges() option {
   681  	return func(c *config) {
   682  		c.keepAcceptRanges = true
   683  	}
   684  }
   685  
   686  // ContentTypeFilter allows adding a custom content type filter.
   687  //
   688  // The supplied function must return true/false to indicate if content
   689  // should be compressed.
   690  //
   691  // When called no parsing of the content type 'ct' has been done.
   692  // It may have been set or auto-detected.
   693  //
   694  // Setting this will override default and any previous Content Type settings.
   695  func ContentTypeFilter(compress func(ct string) bool) option {
   696  	return func(c *config) {
   697  		c.contentTypes = compress
   698  	}
   699  }
   700  
   701  // SuffixETag adds the specified suffix to the ETag header (if it exists) of
   702  // responses which are compressed.
   703  //
   704  // Per [RFC 7232 Section 2.3.3](https://www.rfc-editor.org/rfc/rfc7232#section-2.3.3),
   705  // the ETag of a compressed response must differ from it's uncompressed version.
   706  //
   707  // A suffix such as "-gzip" is sometimes used as a workaround for generating a
   708  // unique new ETag (see https://bz.apache.org/bugzilla/show_bug.cgi?id=39727).
   709  func SuffixETag(suffix string) option {
   710  	return func(c *config) {
   711  		c.suffixETag = suffix
   712  	}
   713  }
   714  
   715  // DropETag removes the ETag of responses which are compressed. If DropETag is
   716  // specified in conjunction with SuffixETag, this option will take precedence
   717  // and the ETag will be dropped.
   718  //
   719  // Per [RFC 7232 Section 2.3.3](https://www.rfc-editor.org/rfc/rfc7232#section-2.3.3),
   720  // the ETag of a compressed response must differ from it's uncompressed version.
   721  //
   722  // This workaround eliminates ETag conflicts between the compressed and
   723  // uncompressed versions by removing the ETag from the compressed version.
   724  func DropETag() option {
   725  	return func(c *config) {
   726  		c.dropETag = true
   727  	}
   728  }
   729  
   730  // RandomJitter adds 1->n random bytes to output based on checksum of payload.
   731  // Specify the amount of input to buffer before applying jitter.
   732  // This should cover the sensitive part of your response.
   733  // This can be used to obfuscate the exact compressed size.
   734  // Specifying 0 will use a buffer size of 64KB.
   735  // 'paranoid' will use a slower hashing function, that MAY provide more safety.
   736  // See README.md for more information.
   737  // If a negative buffer is given, the amount of jitter will not be content dependent.
   738  // This provides *less* security than applying content based jitter.
   739  func RandomJitter(n, buffer int, paranoid bool) option {
   740  	return func(c *config) {
   741  		if n > 0 {
   742  			c.sha256Jitter = paranoid
   743  			c.randomJitter = strings.Repeat("Padding-", 1+(n/8))[:n+1]
   744  			c.jitterBuffer = buffer
   745  			if c.jitterBuffer == 0 {
   746  				c.jitterBuffer = 64 << 10
   747  			}
   748  		} else {
   749  			c.randomJitter = ""
   750  			c.jitterBuffer = 0
   751  		}
   752  	}
   753  }
   754  
   755  // acceptsGzip returns true if the given HTTP request indicates that it will
   756  // accept a gzipped response.
   757  func acceptsGzip(r *http.Request) bool {
   758  	// Note that we don't request this for HEAD requests,
   759  	// due to a bug in nginx:
   760  	//   https://trac.nginx.org/nginx/ticket/358
   761  	//   https://golang.org/issue/5522
   762  	return r.Method != http.MethodHead && parseEncodingGzip(r.Header.Get(acceptEncoding)) > 0
   763  }
   764  
   765  // returns true if we've been configured to compress the specific content type.
   766  func handleContentType(contentTypes []parsedContentType, ct string) bool {
   767  	// If contentTypes is empty we handle all content types.
   768  	if len(contentTypes) == 0 {
   769  		return true
   770  	}
   771  
   772  	mediaType, params, err := mime.ParseMediaType(ct)
   773  	if err != nil {
   774  		return false
   775  	}
   776  
   777  	for _, c := range contentTypes {
   778  		if c.equals(mediaType, params) {
   779  			return true
   780  		}
   781  	}
   782  
   783  	return false
   784  }
   785  
   786  // parseEncodingGzip returns the qvalue of gzip compression.
   787  func parseEncodingGzip(s string) float64 {
   788  	s = strings.TrimSpace(s)
   789  
   790  	for len(s) > 0 {
   791  		stop := strings.IndexByte(s, ',')
   792  		if stop < 0 {
   793  			stop = len(s)
   794  		}
   795  		coding, qvalue, _ := parseCoding(s[:stop])
   796  
   797  		if coding == "gzip" {
   798  			return qvalue
   799  		}
   800  		if stop == len(s) {
   801  			break
   802  		}
   803  		s = s[stop+1:]
   804  	}
   805  	return 0
   806  }
   807  
   808  func parseEncodings(s string) (codings, error) {
   809  	split := strings.Split(s, ",")
   810  	c := make(codings, len(split))
   811  	var e []string
   812  
   813  	for _, ss := range split {
   814  		coding, qvalue, err := parseCoding(ss)
   815  
   816  		if err != nil {
   817  			e = append(e, err.Error())
   818  		} else {
   819  			c[coding] = qvalue
   820  		}
   821  	}
   822  
   823  	// TODO (adammck): Use a proper multi-error struct, so the individual errors
   824  	//                 can be extracted if anyone cares.
   825  	if len(e) > 0 {
   826  		return c, fmt.Errorf("errors while parsing encodings: %s", strings.Join(e, ", "))
   827  	}
   828  
   829  	return c, nil
   830  }
   831  
   832  var errEmptyEncoding = errors.New("empty content-coding")
   833  
   834  // parseCoding parses a single coding (content-coding with an optional qvalue),
   835  // as might appear in an Accept-Encoding header. It attempts to forgive minor
   836  // formatting errors.
   837  func parseCoding(s string) (coding string, qvalue float64, err error) {
   838  	// Avoid splitting if we can...
   839  	if len(s) == 0 {
   840  		return "", 0, errEmptyEncoding
   841  	}
   842  	if !strings.ContainsRune(s, ';') {
   843  		coding = strings.ToLower(strings.TrimSpace(s))
   844  		if coding == "" {
   845  			err = errEmptyEncoding
   846  		}
   847  		return coding, DefaultQValue, err
   848  	}
   849  	for n, part := range strings.Split(s, ";") {
   850  		part = strings.TrimSpace(part)
   851  		qvalue = DefaultQValue
   852  
   853  		if n == 0 {
   854  			coding = strings.ToLower(part)
   855  		} else if strings.HasPrefix(part, "q=") {
   856  			qvalue, err = strconv.ParseFloat(strings.TrimPrefix(part, "q="), 64)
   857  
   858  			if qvalue < 0.0 {
   859  				qvalue = 0.0
   860  			} else if qvalue > 1.0 {
   861  				qvalue = 1.0
   862  			}
   863  		}
   864  	}
   865  
   866  	if coding == "" {
   867  		err = errEmptyEncoding
   868  	}
   869  
   870  	return
   871  }
   872  
   873  // Don't compress any audio/video types.
   874  var excludePrefixDefault = []string{"video/", "audio/", "image/jp"}
   875  
   876  // Skip a bunch of compressed types that contains this string.
   877  // Curated by supposedly still active formats on https://en.wikipedia.org/wiki/List_of_archive_formats
   878  var excludeContainsDefault = []string{"compress", "zip", "snappy", "lzma", "xz", "zstd", "brotli", "stuffit"}
   879  
   880  // DefaultContentTypeFilter excludes common compressed audio, video and archive formats.
   881  func DefaultContentTypeFilter(ct string) bool {
   882  	ct = strings.TrimSpace(strings.ToLower(ct))
   883  	if ct == "" {
   884  		return true
   885  	}
   886  	for _, s := range excludeContainsDefault {
   887  		if strings.Contains(ct, s) {
   888  			return false
   889  		}
   890  	}
   891  
   892  	for _, prefix := range excludePrefixDefault {
   893  		if strings.HasPrefix(ct, prefix) {
   894  			return false
   895  		}
   896  	}
   897  	return true
   898  }
   899  
   900  // CompressAllContentTypeFilter will compress all mime types.
   901  func CompressAllContentTypeFilter(ct string) bool {
   902  	return true
   903  }
   904  
   905  const intSize = 32 << (^uint(0) >> 63)
   906  
   907  // atoi is equivalent to ParseInt(s, 10, 0), converted to type int.
   908  func atoi(s string) (int, bool) {
   909  	if len(s) == 0 {
   910  		return 0, false
   911  	}
   912  	sLen := len(s)
   913  	if intSize == 32 && (0 < sLen && sLen < 10) ||
   914  		intSize == 64 && (0 < sLen && sLen < 19) {
   915  		// Fast path for small integers that fit int type.
   916  		s0 := s
   917  		if s[0] == '-' || s[0] == '+' {
   918  			s = s[1:]
   919  			if len(s) < 1 {
   920  				return 0, false
   921  			}
   922  		}
   923  
   924  		n := 0
   925  		for _, ch := range []byte(s) {
   926  			ch -= '0'
   927  			if ch > 9 {
   928  				return 0, false
   929  			}
   930  			n = n*10 + int(ch)
   931  		}
   932  		if s0[0] == '-' {
   933  			n = -n
   934  		}
   935  		return n, true
   936  	}
   937  
   938  	// Slow path for invalid, big, or underscored integers.
   939  	i64, err := strconv.ParseInt(s, 10, 0)
   940  	return int(i64), err == nil
   941  }
   942  
   943  type unwrapper interface {
   944  	Unwrap() http.ResponseWriter
   945  }
   946  
   947  // newNoGzipResponseWriter will return a response writer that
   948  // cleans up compression artifacts.
   949  // Depending on whether http.Hijacker is supported the returned will as well.
   950  func newNoGzipResponseWriter(w http.ResponseWriter) http.ResponseWriter {
   951  	n := &NoGzipResponseWriter{ResponseWriter: w}
   952  	if hj, ok := w.(http.Hijacker); ok {
   953  		x := struct {
   954  			http.ResponseWriter
   955  			http.Hijacker
   956  			http.Flusher
   957  			unwrapper
   958  		}{
   959  			ResponseWriter: n,
   960  			Hijacker:       hj,
   961  			Flusher:        n,
   962  			unwrapper:      n,
   963  		}
   964  		return x
   965  	}
   966  
   967  	return n
   968  }
   969  
   970  // NoGzipResponseWriter filters out HeaderNoCompression.
   971  type NoGzipResponseWriter struct {
   972  	http.ResponseWriter
   973  	hdrCleaned bool
   974  }
   975  
   976  func (n *NoGzipResponseWriter) CloseNotify() <-chan bool {
   977  	if cn, ok := n.ResponseWriter.(http.CloseNotifier); ok {
   978  		return cn.CloseNotify()
   979  	}
   980  	return nil
   981  }
   982  
   983  func (n *NoGzipResponseWriter) Flush() {
   984  	if !n.hdrCleaned {
   985  		n.ResponseWriter.Header().Del(HeaderNoCompression)
   986  		n.hdrCleaned = true
   987  	}
   988  	if f, ok := n.ResponseWriter.(http.Flusher); ok {
   989  		f.Flush()
   990  	}
   991  }
   992  
   993  func (n *NoGzipResponseWriter) Header() http.Header {
   994  	return n.ResponseWriter.Header()
   995  }
   996  
   997  func (n *NoGzipResponseWriter) Write(bytes []byte) (int, error) {
   998  	if !n.hdrCleaned {
   999  		n.ResponseWriter.Header().Del(HeaderNoCompression)
  1000  		n.hdrCleaned = true
  1001  	}
  1002  	return n.ResponseWriter.Write(bytes)
  1003  }
  1004  
  1005  func (n *NoGzipResponseWriter) WriteHeader(statusCode int) {
  1006  	if !n.hdrCleaned {
  1007  		n.ResponseWriter.Header().Del(HeaderNoCompression)
  1008  		n.hdrCleaned = true
  1009  	}
  1010  	n.ResponseWriter.WriteHeader(statusCode)
  1011  }
  1012  
  1013  func (n *NoGzipResponseWriter) Unwrap() http.ResponseWriter {
  1014  	return n.ResponseWriter
  1015  }
  1016
View as plain text