stateless.go

Documentation: github.com/klauspost/compress/flate

     1  package flate
     2  
     3  import (
     4  	"io"
     5  	"math"
     6  	"sync"
     7  )
     8  
     9  const (
    10  	maxStatelessBlock = math.MaxInt16
    11  	// dictionary will be taken from maxStatelessBlock, so limit it.
    12  	maxStatelessDict = 8 << 10
    13  
    14  	slTableBits  = 13
    15  	slTableSize  = 1 << slTableBits
    16  	slTableShift = 32 - slTableBits
    17  )
    18  
    19  type statelessWriter struct {
    20  	dst    io.Writer
    21  	closed bool
    22  }
    23  
    24  func (s *statelessWriter) Close() error {
    25  	if s.closed {
    26  		return nil
    27  	}
    28  	s.closed = true
    29  	// Emit EOF block
    30  	return StatelessDeflate(s.dst, nil, true, nil)
    31  }
    32  
    33  func (s *statelessWriter) Write(p []byte) (n int, err error) {
    34  	err = StatelessDeflate(s.dst, p, false, nil)
    35  	if err != nil {
    36  		return 0, err
    37  	}
    38  	return len(p), nil
    39  }
    40  
    41  func (s *statelessWriter) Reset(w io.Writer) {
    42  	s.dst = w
    43  	s.closed = false
    44  }
    45  
    46  // NewStatelessWriter will do compression but without maintaining any state
    47  // between Write calls.
    48  // There will be no memory kept between Write calls,
    49  // but compression and speed will be suboptimal.
    50  // Because of this, the size of actual Write calls will affect output size.
    51  func NewStatelessWriter(dst io.Writer) io.WriteCloser {
    52  	return &statelessWriter{dst: dst}
    53  }
    54  
    55  // bitWriterPool contains bit writers that can be reused.
    56  var bitWriterPool = sync.Pool{
    57  	New: func() interface{} {
    58  		return newHuffmanBitWriter(nil)
    59  	},
    60  }
    61  
    62  // StatelessDeflate allows compressing directly to a Writer without retaining state.
    63  // When returning everything will be flushed.
    64  // Up to 8KB of an optional dictionary can be given which is presumed to precede the block.
    65  // Longer dictionaries will be truncated and will still produce valid output.
    66  // Sending nil dictionary is perfectly fine.
    67  func StatelessDeflate(out io.Writer, in []byte, eof bool, dict []byte) error {
    68  	var dst tokens
    69  	bw := bitWriterPool.Get().(*huffmanBitWriter)
    70  	bw.reset(out)
    71  	defer func() {
    72  		// don't keep a reference to our output
    73  		bw.reset(nil)
    74  		bitWriterPool.Put(bw)
    75  	}()
    76  	if eof && len(in) == 0 {
    77  		// Just write an EOF block.
    78  		// Could be faster...
    79  		bw.writeStoredHeader(0, true)
    80  		bw.flush()
    81  		return bw.err
    82  	}
    83  
    84  	// Truncate dict
    85  	if len(dict) > maxStatelessDict {
    86  		dict = dict[len(dict)-maxStatelessDict:]
    87  	}
    88  
    89  	// For subsequent loops, keep shallow dict reference to avoid alloc+copy.
    90  	var inDict []byte
    91  
    92  	for len(in) > 0 {
    93  		todo := in
    94  		if len(inDict) > 0 {
    95  			if len(todo) > maxStatelessBlock-maxStatelessDict {
    96  				todo = todo[:maxStatelessBlock-maxStatelessDict]
    97  			}
    98  		} else if len(todo) > maxStatelessBlock-len(dict) {
    99  			todo = todo[:maxStatelessBlock-len(dict)]
   100  		}
   101  		inOrg := in
   102  		in = in[len(todo):]
   103  		uncompressed := todo
   104  		if len(dict) > 0 {
   105  			// combine dict and source
   106  			bufLen := len(todo) + len(dict)
   107  			combined := make([]byte, bufLen)
   108  			copy(combined, dict)
   109  			copy(combined[len(dict):], todo)
   110  			todo = combined
   111  		}
   112  		// Compress
   113  		if len(inDict) == 0 {
   114  			statelessEnc(&dst, todo, int16(len(dict)))
   115  		} else {
   116  			statelessEnc(&dst, inDict[:maxStatelessDict+len(todo)], maxStatelessDict)
   117  		}
   118  		isEof := eof && len(in) == 0
   119  
   120  		if dst.n == 0 {
   121  			bw.writeStoredHeader(len(uncompressed), isEof)
   122  			if bw.err != nil {
   123  				return bw.err
   124  			}
   125  			bw.writeBytes(uncompressed)
   126  		} else if int(dst.n) > len(uncompressed)-len(uncompressed)>>4 {
   127  			// If we removed less than 1/16th, huffman compress the block.
   128  			bw.writeBlockHuff(isEof, uncompressed, len(in) == 0)
   129  		} else {
   130  			bw.writeBlockDynamic(&dst, isEof, uncompressed, len(in) == 0)
   131  		}
   132  		if len(in) > 0 {
   133  			// Retain a dict if we have more
   134  			inDict = inOrg[len(uncompressed)-maxStatelessDict:]
   135  			dict = nil
   136  			dst.Reset()
   137  		}
   138  		if bw.err != nil {
   139  			return bw.err
   140  		}
   141  	}
   142  	if !eof {
   143  		// Align, only a stored block can do that.
   144  		bw.writeStoredHeader(0, false)
   145  	}
   146  	bw.flush()
   147  	return bw.err
   148  }
   149  
   150  func hashSL(u uint32) uint32 {
   151  	return (u * 0x1e35a7bd) >> slTableShift
   152  }
   153  
   154  func load3216(b []byte, i int16) uint32 {
   155  	// Help the compiler eliminate bounds checks on the read so it can be done in a single read.
   156  	b = b[i:]
   157  	b = b[:4]
   158  	return uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24
   159  }
   160  
   161  func load6416(b []byte, i int16) uint64 {
   162  	// Help the compiler eliminate bounds checks on the read so it can be done in a single read.
   163  	b = b[i:]
   164  	b = b[:8]
   165  	return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 |
   166  		uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56
   167  }
   168  
   169  func statelessEnc(dst *tokens, src []byte, startAt int16) {
   170  	const (
   171  		inputMargin            = 12 - 1
   172  		minNonLiteralBlockSize = 1 + 1 + inputMargin
   173  	)
   174  
   175  	type tableEntry struct {
   176  		offset int16
   177  	}
   178  
   179  	var table [slTableSize]tableEntry
   180  
   181  	// This check isn't in the Snappy implementation, but there, the caller
   182  	// instead of the callee handles this case.
   183  	if len(src)-int(startAt) < minNonLiteralBlockSize {
   184  		// We do not fill the token table.
   185  		// This will be picked up by caller.
   186  		dst.n = 0
   187  		return
   188  	}
   189  	// Index until startAt
   190  	if startAt > 0 {
   191  		cv := load3232(src, 0)
   192  		for i := int16(0); i < startAt; i++ {
   193  			table[hashSL(cv)] = tableEntry{offset: i}
   194  			cv = (cv >> 8) | (uint32(src[i+4]) << 24)
   195  		}
   196  	}
   197  
   198  	s := startAt + 1
   199  	nextEmit := startAt
   200  	// sLimit is when to stop looking for offset/length copies. The inputMargin
   201  	// lets us use a fast path for emitLiteral in the main loop, while we are
   202  	// looking for copies.
   203  	sLimit := int16(len(src) - inputMargin)
   204  
   205  	// nextEmit is where in src the next emitLiteral should start from.
   206  	cv := load3216(src, s)
   207  
   208  	for {
   209  		const skipLog = 5
   210  		const doEvery = 2
   211  
   212  		nextS := s
   213  		var candidate tableEntry
   214  		for {
   215  			nextHash := hashSL(cv)
   216  			candidate = table[nextHash]
   217  			nextS = s + doEvery + (s-nextEmit)>>skipLog
   218  			if nextS > sLimit || nextS <= 0 {
   219  				goto emitRemainder
   220  			}
   221  
   222  			now := load6416(src, nextS)
   223  			table[nextHash] = tableEntry{offset: s}
   224  			nextHash = hashSL(uint32(now))
   225  
   226  			if cv == load3216(src, candidate.offset) {
   227  				table[nextHash] = tableEntry{offset: nextS}
   228  				break
   229  			}
   230  
   231  			// Do one right away...
   232  			cv = uint32(now)
   233  			s = nextS
   234  			nextS++
   235  			candidate = table[nextHash]
   236  			now >>= 8
   237  			table[nextHash] = tableEntry{offset: s}
   238  
   239  			if cv == load3216(src, candidate.offset) {
   240  				table[nextHash] = tableEntry{offset: nextS}
   241  				break
   242  			}
   243  			cv = uint32(now)
   244  			s = nextS
   245  		}
   246  
   247  		// A 4-byte match has been found. We'll later see if more than 4 bytes
   248  		// match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
   249  		// them as literal bytes.
   250  		for {
   251  			// Invariant: we have a 4-byte match at s, and no need to emit any
   252  			// literal bytes prior to s.
   253  
   254  			// Extend the 4-byte match as long as possible.
   255  			t := candidate.offset
   256  			l := int16(matchLen(src[s+4:], src[t+4:]) + 4)
   257  
   258  			// Extend backwards
   259  			for t > 0 && s > nextEmit && src[t-1] == src[s-1] {
   260  				s--
   261  				t--
   262  				l++
   263  			}
   264  			if nextEmit < s {
   265  				if false {
   266  					emitLiteral(dst, src[nextEmit:s])
   267  				} else {
   268  					for _, v := range src[nextEmit:s] {
   269  						dst.tokens[dst.n] = token(v)
   270  						dst.litHist[v]++
   271  						dst.n++
   272  					}
   273  				}
   274  			}
   275  
   276  			// Save the match found
   277  			dst.AddMatchLong(int32(l), uint32(s-t-baseMatchOffset))
   278  			s += l
   279  			nextEmit = s
   280  			if nextS >= s {
   281  				s = nextS + 1
   282  			}
   283  			if s >= sLimit {
   284  				goto emitRemainder
   285  			}
   286  
   287  			// We could immediately start working at s now, but to improve
   288  			// compression we first update the hash table at s-2 and at s. If
   289  			// another emitCopy is not our next move, also calculate nextHash
   290  			// at s+1. At least on GOARCH=amd64, these three hash calculations
   291  			// are faster as one load64 call (with some shifts) instead of
   292  			// three load32 calls.
   293  			x := load6416(src, s-2)
   294  			o := s - 2
   295  			prevHash := hashSL(uint32(x))
   296  			table[prevHash] = tableEntry{offset: o}
   297  			x >>= 16
   298  			currHash := hashSL(uint32(x))
   299  			candidate = table[currHash]
   300  			table[currHash] = tableEntry{offset: o + 2}
   301  
   302  			if uint32(x) != load3216(src, candidate.offset) {
   303  				cv = uint32(x >> 8)
   304  				s++
   305  				break
   306  			}
   307  		}
   308  	}
   309  
   310  emitRemainder:
   311  	if int(nextEmit) < len(src) {
   312  		// If nothing was added, don't encode literals.
   313  		if dst.n == 0 {
   314  			return
   315  		}
   316  		emitLiteral(dst, src[nextEmit:])
   317  	}
   318  }
   319
View as plain text