...

Source file src/github.com/klauspost/compress/zstd/encoder_options.go

Documentation: github.com/klauspost/compress/zstd

     1  package zstd
     2  
     3  import (
     4  	"errors"
     5  	"fmt"
     6  	"math"
     7  	"math/bits"
     8  	"runtime"
     9  	"strings"
    10  )
    11  
    12  // EOption is an option for creating a encoder.
    13  type EOption func(*encoderOptions) error
    14  
    15  // options retains accumulated state of multiple options.
    16  type encoderOptions struct {
    17  	concurrent      int
    18  	level           EncoderLevel
    19  	single          *bool
    20  	pad             int
    21  	blockSize       int
    22  	windowSize      int
    23  	crc             bool
    24  	fullZero        bool
    25  	noEntropy       bool
    26  	allLitEntropy   bool
    27  	customWindow    bool
    28  	customALEntropy bool
    29  	customBlockSize bool
    30  	lowMem          bool
    31  	dict            *dict
    32  }
    33  
    34  func (o *encoderOptions) setDefault() {
    35  	*o = encoderOptions{
    36  		concurrent:    runtime.GOMAXPROCS(0),
    37  		crc:           true,
    38  		single:        nil,
    39  		blockSize:     maxCompressedBlockSize,
    40  		windowSize:    8 << 20,
    41  		level:         SpeedDefault,
    42  		allLitEntropy: false,
    43  		lowMem:        false,
    44  	}
    45  }
    46  
    47  // encoder returns an encoder with the selected options.
    48  func (o encoderOptions) encoder() encoder {
    49  	switch o.level {
    50  	case SpeedFastest:
    51  		if o.dict != nil {
    52  			return &fastEncoderDict{fastEncoder: fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), bufferReset: math.MaxInt32 - int32(o.windowSize*2), lowMem: o.lowMem}}}
    53  		}
    54  		return &fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), bufferReset: math.MaxInt32 - int32(o.windowSize*2), lowMem: o.lowMem}}
    55  
    56  	case SpeedDefault:
    57  		if o.dict != nil {
    58  			return &doubleFastEncoderDict{fastEncoderDict: fastEncoderDict{fastEncoder: fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), bufferReset: math.MaxInt32 - int32(o.windowSize*2), lowMem: o.lowMem}}}}
    59  		}
    60  		return &doubleFastEncoder{fastEncoder: fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), bufferReset: math.MaxInt32 - int32(o.windowSize*2), lowMem: o.lowMem}}}
    61  	case SpeedBetterCompression:
    62  		if o.dict != nil {
    63  			return &betterFastEncoderDict{betterFastEncoder: betterFastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), bufferReset: math.MaxInt32 - int32(o.windowSize*2), lowMem: o.lowMem}}}
    64  		}
    65  		return &betterFastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), bufferReset: math.MaxInt32 - int32(o.windowSize*2), lowMem: o.lowMem}}
    66  	case SpeedBestCompression:
    67  		return &bestFastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), bufferReset: math.MaxInt32 - int32(o.windowSize*2), lowMem: o.lowMem}}
    68  	}
    69  	panic("unknown compression level")
    70  }
    71  
    72  // WithEncoderCRC will add CRC value to output.
    73  // Output will be 4 bytes larger.
    74  func WithEncoderCRC(b bool) EOption {
    75  	return func(o *encoderOptions) error { o.crc = b; return nil }
    76  }
    77  
    78  // WithEncoderConcurrency will set the concurrency,
    79  // meaning the maximum number of encoders to run concurrently.
    80  // The value supplied must be at least 1.
    81  // For streams, setting a value of 1 will disable async compression.
    82  // By default this will be set to GOMAXPROCS.
    83  func WithEncoderConcurrency(n int) EOption {
    84  	return func(o *encoderOptions) error {
    85  		if n <= 0 {
    86  			return fmt.Errorf("concurrency must be at least 1")
    87  		}
    88  		o.concurrent = n
    89  		return nil
    90  	}
    91  }
    92  
    93  // WithWindowSize will set the maximum allowed back-reference distance.
    94  // The value must be a power of two between MinWindowSize and MaxWindowSize.
    95  // A larger value will enable better compression but allocate more memory and,
    96  // for above-default values, take considerably longer.
    97  // The default value is determined by the compression level and max 8MB.
    98  func WithWindowSize(n int) EOption {
    99  	return func(o *encoderOptions) error {
   100  		switch {
   101  		case n < MinWindowSize:
   102  			return fmt.Errorf("window size must be at least %d", MinWindowSize)
   103  		case n > MaxWindowSize:
   104  			return fmt.Errorf("window size must be at most %d", MaxWindowSize)
   105  		case (n & (n - 1)) != 0:
   106  			return errors.New("window size must be a power of 2")
   107  		}
   108  
   109  		o.windowSize = n
   110  		o.customWindow = true
   111  		if o.blockSize > o.windowSize {
   112  			o.blockSize = o.windowSize
   113  			o.customBlockSize = true
   114  		}
   115  		return nil
   116  	}
   117  }
   118  
   119  // WithEncoderPadding will add padding to all output so the size will be a multiple of n.
   120  // This can be used to obfuscate the exact output size or make blocks of a certain size.
   121  // The contents will be a skippable frame, so it will be invisible by the decoder.
   122  // n must be > 0 and <= 1GB, 1<<30 bytes.
   123  // The padded area will be filled with data from crypto/rand.Reader.
   124  // If `EncodeAll` is used with data already in the destination, the total size will be multiple of this.
   125  func WithEncoderPadding(n int) EOption {
   126  	return func(o *encoderOptions) error {
   127  		if n <= 0 {
   128  			return fmt.Errorf("padding must be at least 1")
   129  		}
   130  		// No need to waste our time.
   131  		if n == 1 {
   132  			n = 0
   133  		}
   134  		if n > 1<<30 {
   135  			return fmt.Errorf("padding must less than 1GB (1<<30 bytes) ")
   136  		}
   137  		o.pad = n
   138  		return nil
   139  	}
   140  }
   141  
   142  // EncoderLevel predefines encoder compression levels.
   143  // Only use the constants made available, since the actual mapping
   144  // of these values are very likely to change and your compression could change
   145  // unpredictably when upgrading the library.
   146  type EncoderLevel int
   147  
   148  const (
   149  	speedNotSet EncoderLevel = iota
   150  
   151  	// SpeedFastest will choose the fastest reasonable compression.
   152  	// This is roughly equivalent to the fastest Zstandard mode.
   153  	SpeedFastest
   154  
   155  	// SpeedDefault is the default "pretty fast" compression option.
   156  	// This is roughly equivalent to the default Zstandard mode (level 3).
   157  	SpeedDefault
   158  
   159  	// SpeedBetterCompression will yield better compression than the default.
   160  	// Currently it is about zstd level 7-8 with ~ 2x-3x the default CPU usage.
   161  	// By using this, notice that CPU usage may go up in the future.
   162  	SpeedBetterCompression
   163  
   164  	// SpeedBestCompression will choose the best available compression option.
   165  	// This will offer the best compression no matter the CPU cost.
   166  	SpeedBestCompression
   167  
   168  	// speedLast should be kept as the last actual compression option.
   169  	// The is not for external usage, but is used to keep track of the valid options.
   170  	speedLast
   171  )
   172  
   173  // EncoderLevelFromString will convert a string representation of an encoding level back
   174  // to a compression level. The compare is not case sensitive.
   175  // If the string wasn't recognized, (false, SpeedDefault) will be returned.
   176  func EncoderLevelFromString(s string) (bool, EncoderLevel) {
   177  	for l := speedNotSet + 1; l < speedLast; l++ {
   178  		if strings.EqualFold(s, l.String()) {
   179  			return true, l
   180  		}
   181  	}
   182  	return false, SpeedDefault
   183  }
   184  
   185  // EncoderLevelFromZstd will return an encoder level that closest matches the compression
   186  // ratio of a specific zstd compression level.
   187  // Many input values will provide the same compression level.
   188  func EncoderLevelFromZstd(level int) EncoderLevel {
   189  	switch {
   190  	case level < 3:
   191  		return SpeedFastest
   192  	case level >= 3 && level < 6:
   193  		return SpeedDefault
   194  	case level >= 6 && level < 10:
   195  		return SpeedBetterCompression
   196  	default:
   197  		return SpeedBestCompression
   198  	}
   199  }
   200  
   201  // String provides a string representation of the compression level.
   202  func (e EncoderLevel) String() string {
   203  	switch e {
   204  	case SpeedFastest:
   205  		return "fastest"
   206  	case SpeedDefault:
   207  		return "default"
   208  	case SpeedBetterCompression:
   209  		return "better"
   210  	case SpeedBestCompression:
   211  		return "best"
   212  	default:
   213  		return "invalid"
   214  	}
   215  }
   216  
   217  // WithEncoderLevel specifies a predefined compression level.
   218  func WithEncoderLevel(l EncoderLevel) EOption {
   219  	return func(o *encoderOptions) error {
   220  		switch {
   221  		case l <= speedNotSet || l >= speedLast:
   222  			return fmt.Errorf("unknown encoder level")
   223  		}
   224  		o.level = l
   225  		if !o.customWindow {
   226  			switch o.level {
   227  			case SpeedFastest:
   228  				o.windowSize = 4 << 20
   229  				if !o.customBlockSize {
   230  					o.blockSize = 1 << 16
   231  				}
   232  			case SpeedDefault:
   233  				o.windowSize = 8 << 20
   234  			case SpeedBetterCompression:
   235  				o.windowSize = 8 << 20
   236  			case SpeedBestCompression:
   237  				o.windowSize = 8 << 20
   238  			}
   239  		}
   240  		if !o.customALEntropy {
   241  			o.allLitEntropy = l > SpeedDefault
   242  		}
   243  
   244  		return nil
   245  	}
   246  }
   247  
   248  // WithZeroFrames will encode 0 length input as full frames.
   249  // This can be needed for compatibility with zstandard usage,
   250  // but is not needed for this package.
   251  func WithZeroFrames(b bool) EOption {
   252  	return func(o *encoderOptions) error {
   253  		o.fullZero = b
   254  		return nil
   255  	}
   256  }
   257  
   258  // WithAllLitEntropyCompression will apply entropy compression if no matches are found.
   259  // Disabling this will skip incompressible data faster, but in cases with no matches but
   260  // skewed character distribution compression is lost.
   261  // Default value depends on the compression level selected.
   262  func WithAllLitEntropyCompression(b bool) EOption {
   263  	return func(o *encoderOptions) error {
   264  		o.customALEntropy = true
   265  		o.allLitEntropy = b
   266  		return nil
   267  	}
   268  }
   269  
   270  // WithNoEntropyCompression will always skip entropy compression of literals.
   271  // This can be useful if content has matches, but unlikely to benefit from entropy
   272  // compression. Usually the slight speed improvement is not worth enabling this.
   273  func WithNoEntropyCompression(b bool) EOption {
   274  	return func(o *encoderOptions) error {
   275  		o.noEntropy = b
   276  		return nil
   277  	}
   278  }
   279  
   280  // WithSingleSegment will set the "single segment" flag when EncodeAll is used.
   281  // If this flag is set, data must be regenerated within a single continuous memory segment.
   282  // In this case, Window_Descriptor byte is skipped, but Frame_Content_Size is necessarily present.
   283  // As a consequence, the decoder must allocate a memory segment of size equal or larger than size of your content.
   284  // In order to preserve the decoder from unreasonable memory requirements,
   285  // a decoder is allowed to reject a compressed frame which requests a memory size beyond decoder's authorized range.
   286  // For broader compatibility, decoders are recommended to support memory sizes of at least 8 MB.
   287  // This is only a recommendation, each decoder is free to support higher or lower limits, depending on local limitations.
   288  // If this is not specified, block encodes will automatically choose this based on the input size and the window size.
   289  // This setting has no effect on streamed encodes.
   290  func WithSingleSegment(b bool) EOption {
   291  	return func(o *encoderOptions) error {
   292  		o.single = &b
   293  		return nil
   294  	}
   295  }
   296  
   297  // WithLowerEncoderMem will trade in some memory cases trade less memory usage for
   298  // slower encoding speed.
   299  // This will not change the window size which is the primary function for reducing
   300  // memory usage. See WithWindowSize.
   301  func WithLowerEncoderMem(b bool) EOption {
   302  	return func(o *encoderOptions) error {
   303  		o.lowMem = b
   304  		return nil
   305  	}
   306  }
   307  
   308  // WithEncoderDict allows to register a dictionary that will be used for the encode.
   309  //
   310  // The slice dict must be in the [dictionary format] produced by
   311  // "zstd --train" from the Zstandard reference implementation.
   312  //
   313  // The encoder *may* choose to use no dictionary instead for certain payloads.
   314  //
   315  // [dictionary format]: https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#dictionary-format
   316  func WithEncoderDict(dict []byte) EOption {
   317  	return func(o *encoderOptions) error {
   318  		d, err := loadDict(dict)
   319  		if err != nil {
   320  			return err
   321  		}
   322  		o.dict = d
   323  		return nil
   324  	}
   325  }
   326  
   327  // WithEncoderDictRaw registers a dictionary that may be used by the encoder.
   328  //
   329  // The slice content may contain arbitrary data. It will be used as an initial
   330  // history.
   331  func WithEncoderDictRaw(id uint32, content []byte) EOption {
   332  	return func(o *encoderOptions) error {
   333  		if bits.UintSize > 32 && uint(len(content)) > dictMaxLength {
   334  			return fmt.Errorf("dictionary of size %d > 2GiB too large", len(content))
   335  		}
   336  		o.dict = &dict{id: id, content: content, offsets: [3]int{1, 4, 8}}
   337  		return nil
   338  	}
   339  }
   340  

View as plain text