decode.go

Documentation: golang.org/x/image/vp8

     1  // Copyright 2011 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Package vp8 implements a decoder for the VP8 lossy image format.
     6  //
     7  // The VP8 specification is RFC 6386.
     8  package vp8 // import "golang.org/x/image/vp8"
     9  
    10  // This file implements the top-level decoding algorithm.
    11  
    12  import (
    13  	"errors"
    14  	"image"
    15  	"io"
    16  )
    17  
    18  // limitReader wraps an io.Reader to read at most n bytes from it.
    19  type limitReader struct {
    20  	r io.Reader
    21  	n int
    22  }
    23  
    24  // ReadFull reads exactly len(p) bytes into p.
    25  func (r *limitReader) ReadFull(p []byte) error {
    26  	if len(p) > r.n {
    27  		return io.ErrUnexpectedEOF
    28  	}
    29  	n, err := io.ReadFull(r.r, p)
    30  	r.n -= n
    31  	return err
    32  }
    33  
    34  // FrameHeader is a frame header, as specified in section 9.1.
    35  type FrameHeader struct {
    36  	KeyFrame          bool
    37  	VersionNumber     uint8
    38  	ShowFrame         bool
    39  	FirstPartitionLen uint32
    40  	Width             int
    41  	Height            int
    42  	XScale            uint8
    43  	YScale            uint8
    44  }
    45  
    46  const (
    47  	nSegment     = 4
    48  	nSegmentProb = 3
    49  )
    50  
    51  // segmentHeader holds segment-related header information.
    52  type segmentHeader struct {
    53  	useSegment     bool
    54  	updateMap      bool
    55  	relativeDelta  bool
    56  	quantizer      [nSegment]int8
    57  	filterStrength [nSegment]int8
    58  	prob           [nSegmentProb]uint8
    59  }
    60  
    61  const (
    62  	nRefLFDelta  = 4
    63  	nModeLFDelta = 4
    64  )
    65  
    66  // filterHeader holds filter-related header information.
    67  type filterHeader struct {
    68  	simple          bool
    69  	level           int8
    70  	sharpness       uint8
    71  	useLFDelta      bool
    72  	refLFDelta      [nRefLFDelta]int8
    73  	modeLFDelta     [nModeLFDelta]int8
    74  	perSegmentLevel [nSegment]int8
    75  }
    76  
    77  // mb is the per-macroblock decode state. A decoder maintains mbw+1 of these
    78  // as it is decoding macroblocks left-to-right and top-to-bottom: mbw for the
    79  // macroblocks in the row above, and one for the macroblock to the left.
    80  type mb struct {
    81  	// pred is the predictor mode for the 4 bottom or right 4x4 luma regions.
    82  	pred [4]uint8
    83  	// nzMask is a mask of 8 bits: 4 for the bottom or right 4x4 luma regions,
    84  	// and 2 + 2 for the bottom or right 4x4 chroma regions. A 1 bit indicates
    85  	// that region has non-zero coefficients.
    86  	nzMask uint8
    87  	// nzY16 is a 0/1 value that is 1 if the macroblock used Y16 prediction and
    88  	// had non-zero coefficients.
    89  	nzY16 uint8
    90  }
    91  
    92  // Decoder decodes VP8 bitstreams into frames. Decoding one frame consists of
    93  // calling Init, DecodeFrameHeader and then DecodeFrame in that order.
    94  // A Decoder can be re-used to decode multiple frames.
    95  type Decoder struct {
    96  	// r is the input bitsream.
    97  	r limitReader
    98  	// scratch is a scratch buffer.
    99  	scratch [8]byte
   100  	// img is the YCbCr image to decode into.
   101  	img *image.YCbCr
   102  	// mbw and mbh are the number of 16x16 macroblocks wide and high the image is.
   103  	mbw, mbh int
   104  	// frameHeader is the frame header. When decoding multiple frames,
   105  	// frames that aren't key frames will inherit the Width, Height,
   106  	// XScale and YScale of the most recent key frame.
   107  	frameHeader FrameHeader
   108  	// Other headers.
   109  	segmentHeader segmentHeader
   110  	filterHeader  filterHeader
   111  	// The image data is divided into a number of independent partitions.
   112  	// There is 1 "first partition" and between 1 and 8 "other partitions"
   113  	// for coefficient data.
   114  	fp  partition
   115  	op  [8]partition
   116  	nOP int
   117  	// Quantization factors.
   118  	quant [nSegment]quant
   119  	// DCT/WHT coefficient decoding probabilities.
   120  	tokenProb   [nPlane][nBand][nContext][nProb]uint8
   121  	useSkipProb bool
   122  	skipProb    uint8
   123  	// Loop filter parameters.
   124  	filterParams      [nSegment][2]filterParam
   125  	perMBFilterParams []filterParam
   126  
   127  	// The eight fields below relate to the current macroblock being decoded.
   128  	//
   129  	// Segment-based adjustments.
   130  	segment int
   131  	// Per-macroblock state for the macroblock immediately left of and those
   132  	// macroblocks immediately above the current macroblock.
   133  	leftMB mb
   134  	upMB   []mb
   135  	// Bitmasks for which 4x4 regions of coeff contain non-zero coefficients.
   136  	nzDCMask, nzACMask uint32
   137  	// Predictor modes.
   138  	usePredY16 bool // The libwebp C code calls this !is_i4x4_.
   139  	predY16    uint8
   140  	predC8     uint8
   141  	predY4     [4][4]uint8
   142  
   143  	// The two fields below form a workspace for reconstructing a macroblock.
   144  	// Their specific sizes are documented in reconstruct.go.
   145  	coeff [1*16*16 + 2*8*8 + 1*4*4]int16
   146  	ybr   [1 + 16 + 1 + 8][32]uint8
   147  }
   148  
   149  // NewDecoder returns a new Decoder.
   150  func NewDecoder() *Decoder {
   151  	return &Decoder{}
   152  }
   153  
   154  // Init initializes the decoder to read at most n bytes from r.
   155  func (d *Decoder) Init(r io.Reader, n int) {
   156  	d.r = limitReader{r, n}
   157  }
   158  
   159  // DecodeFrameHeader decodes the frame header.
   160  func (d *Decoder) DecodeFrameHeader() (fh FrameHeader, err error) {
   161  	// All frame headers are at least 3 bytes long.
   162  	b := d.scratch[:3]
   163  	if err = d.r.ReadFull(b); err != nil {
   164  		return
   165  	}
   166  	d.frameHeader.KeyFrame = (b[0] & 1) == 0
   167  	d.frameHeader.VersionNumber = (b[0] >> 1) & 7
   168  	d.frameHeader.ShowFrame = (b[0]>>4)&1 == 1
   169  	d.frameHeader.FirstPartitionLen = uint32(b[0])>>5 | uint32(b[1])<<3 | uint32(b[2])<<11
   170  	if !d.frameHeader.KeyFrame {
   171  		return d.frameHeader, nil
   172  	}
   173  	// Frame headers for key frames are an additional 7 bytes long.
   174  	b = d.scratch[:7]
   175  	if err = d.r.ReadFull(b); err != nil {
   176  		return
   177  	}
   178  	// Check the magic sync code.
   179  	if b[0] != 0x9d || b[1] != 0x01 || b[2] != 0x2a {
   180  		err = errors.New("vp8: invalid format")
   181  		return
   182  	}
   183  	d.frameHeader.Width = int(b[4]&0x3f)<<8 | int(b[3])
   184  	d.frameHeader.Height = int(b[6]&0x3f)<<8 | int(b[5])
   185  	d.frameHeader.XScale = b[4] >> 6
   186  	d.frameHeader.YScale = b[6] >> 6
   187  	d.mbw = (d.frameHeader.Width + 0x0f) >> 4
   188  	d.mbh = (d.frameHeader.Height + 0x0f) >> 4
   189  	d.segmentHeader = segmentHeader{
   190  		prob: [3]uint8{0xff, 0xff, 0xff},
   191  	}
   192  	d.tokenProb = defaultTokenProb
   193  	d.segment = 0
   194  	return d.frameHeader, nil
   195  }
   196  
   197  // ensureImg ensures that d.img is large enough to hold the decoded frame.
   198  func (d *Decoder) ensureImg() {
   199  	if d.img != nil {
   200  		p0, p1 := d.img.Rect.Min, d.img.Rect.Max
   201  		if p0.X == 0 && p0.Y == 0 && p1.X >= 16*d.mbw && p1.Y >= 16*d.mbh {
   202  			return
   203  		}
   204  	}
   205  	m := image.NewYCbCr(image.Rect(0, 0, 16*d.mbw, 16*d.mbh), image.YCbCrSubsampleRatio420)
   206  	d.img = m.SubImage(image.Rect(0, 0, d.frameHeader.Width, d.frameHeader.Height)).(*image.YCbCr)
   207  	d.perMBFilterParams = make([]filterParam, d.mbw*d.mbh)
   208  	d.upMB = make([]mb, d.mbw)
   209  }
   210  
   211  // parseSegmentHeader parses the segment header, as specified in section 9.3.
   212  func (d *Decoder) parseSegmentHeader() {
   213  	d.segmentHeader.useSegment = d.fp.readBit(uniformProb)
   214  	if !d.segmentHeader.useSegment {
   215  		d.segmentHeader.updateMap = false
   216  		return
   217  	}
   218  	d.segmentHeader.updateMap = d.fp.readBit(uniformProb)
   219  	if d.fp.readBit(uniformProb) {
   220  		d.segmentHeader.relativeDelta = !d.fp.readBit(uniformProb)
   221  		for i := range d.segmentHeader.quantizer {
   222  			d.segmentHeader.quantizer[i] = int8(d.fp.readOptionalInt(uniformProb, 7))
   223  		}
   224  		for i := range d.segmentHeader.filterStrength {
   225  			d.segmentHeader.filterStrength[i] = int8(d.fp.readOptionalInt(uniformProb, 6))
   226  		}
   227  	}
   228  	if !d.segmentHeader.updateMap {
   229  		return
   230  	}
   231  	for i := range d.segmentHeader.prob {
   232  		if d.fp.readBit(uniformProb) {
   233  			d.segmentHeader.prob[i] = uint8(d.fp.readUint(uniformProb, 8))
   234  		} else {
   235  			d.segmentHeader.prob[i] = 0xff
   236  		}
   237  	}
   238  }
   239  
   240  // parseFilterHeader parses the filter header, as specified in section 9.4.
   241  func (d *Decoder) parseFilterHeader() {
   242  	d.filterHeader.simple = d.fp.readBit(uniformProb)
   243  	d.filterHeader.level = int8(d.fp.readUint(uniformProb, 6))
   244  	d.filterHeader.sharpness = uint8(d.fp.readUint(uniformProb, 3))
   245  	d.filterHeader.useLFDelta = d.fp.readBit(uniformProb)
   246  	if d.filterHeader.useLFDelta && d.fp.readBit(uniformProb) {
   247  		for i := range d.filterHeader.refLFDelta {
   248  			d.filterHeader.refLFDelta[i] = int8(d.fp.readOptionalInt(uniformProb, 6))
   249  		}
   250  		for i := range d.filterHeader.modeLFDelta {
   251  			d.filterHeader.modeLFDelta[i] = int8(d.fp.readOptionalInt(uniformProb, 6))
   252  		}
   253  	}
   254  	if d.filterHeader.level == 0 {
   255  		return
   256  	}
   257  	if d.segmentHeader.useSegment {
   258  		for i := range d.filterHeader.perSegmentLevel {
   259  			strength := d.segmentHeader.filterStrength[i]
   260  			if d.segmentHeader.relativeDelta {
   261  				strength += d.filterHeader.level
   262  			}
   263  			d.filterHeader.perSegmentLevel[i] = strength
   264  		}
   265  	} else {
   266  		d.filterHeader.perSegmentLevel[0] = d.filterHeader.level
   267  	}
   268  	d.computeFilterParams()
   269  }
   270  
   271  // parseOtherPartitions parses the other partitions, as specified in section 9.5.
   272  func (d *Decoder) parseOtherPartitions() error {
   273  	const maxNOP = 1 << 3
   274  	var partLens [maxNOP]int
   275  	d.nOP = 1 << d.fp.readUint(uniformProb, 2)
   276  
   277  	// The final partition length is implied by the remaining chunk data
   278  	// (d.r.n) and the other d.nOP-1 partition lengths. Those d.nOP-1 partition
   279  	// lengths are stored as 24-bit uints, i.e. up to 16 MiB per partition.
   280  	n := 3 * (d.nOP - 1)
   281  	partLens[d.nOP-1] = d.r.n - n
   282  	if partLens[d.nOP-1] < 0 {
   283  		return io.ErrUnexpectedEOF
   284  	}
   285  	if n > 0 {
   286  		buf := make([]byte, n)
   287  		if err := d.r.ReadFull(buf); err != nil {
   288  			return err
   289  		}
   290  		for i := 0; i < d.nOP-1; i++ {
   291  			pl := int(buf[3*i+0]) | int(buf[3*i+1])<<8 | int(buf[3*i+2])<<16
   292  			if pl > partLens[d.nOP-1] {
   293  				return io.ErrUnexpectedEOF
   294  			}
   295  			partLens[i] = pl
   296  			partLens[d.nOP-1] -= pl
   297  		}
   298  	}
   299  
   300  	// We check if the final partition length can also fit into a 24-bit uint.
   301  	// Strictly speaking, this isn't part of the spec, but it guards against a
   302  	// malicious WEBP image that is too large to ReadFull the encoded DCT
   303  	// coefficients into memory, whether that's because the actual WEBP file is
   304  	// too large, or whether its RIFF metadata lists too large a chunk.
   305  	if 1<<24 <= partLens[d.nOP-1] {
   306  		return errors.New("vp8: too much data to decode")
   307  	}
   308  
   309  	buf := make([]byte, d.r.n)
   310  	if err := d.r.ReadFull(buf); err != nil {
   311  		return err
   312  	}
   313  	for i, pl := range partLens {
   314  		if i == d.nOP {
   315  			break
   316  		}
   317  		d.op[i].init(buf[:pl])
   318  		buf = buf[pl:]
   319  	}
   320  	return nil
   321  }
   322  
   323  // parseOtherHeaders parses header information other than the frame header.
   324  func (d *Decoder) parseOtherHeaders() error {
   325  	// Initialize and parse the first partition.
   326  	firstPartition := make([]byte, d.frameHeader.FirstPartitionLen)
   327  	if err := d.r.ReadFull(firstPartition); err != nil {
   328  		return err
   329  	}
   330  	d.fp.init(firstPartition)
   331  	if d.frameHeader.KeyFrame {
   332  		// Read and ignore the color space and pixel clamp values. They are
   333  		// specified in section 9.2, but are unimplemented.
   334  		d.fp.readBit(uniformProb)
   335  		d.fp.readBit(uniformProb)
   336  	}
   337  	d.parseSegmentHeader()
   338  	d.parseFilterHeader()
   339  	if err := d.parseOtherPartitions(); err != nil {
   340  		return err
   341  	}
   342  	d.parseQuant()
   343  	if !d.frameHeader.KeyFrame {
   344  		// Golden and AltRef frames are specified in section 9.7.
   345  		// TODO(nigeltao): implement. Note that they are only used for video, not still images.
   346  		return errors.New("vp8: Golden / AltRef frames are not implemented")
   347  	}
   348  	// Read and ignore the refreshLastFrameBuffer bit, specified in section 9.8.
   349  	// It applies only to video, and not still images.
   350  	d.fp.readBit(uniformProb)
   351  	d.parseTokenProb()
   352  	d.useSkipProb = d.fp.readBit(uniformProb)
   353  	if d.useSkipProb {
   354  		d.skipProb = uint8(d.fp.readUint(uniformProb, 8))
   355  	}
   356  	if d.fp.unexpectedEOF {
   357  		return io.ErrUnexpectedEOF
   358  	}
   359  	return nil
   360  }
   361  
   362  // DecodeFrame decodes the frame and returns it as an YCbCr image.
   363  // The image's contents are valid up until the next call to Decoder.Init.
   364  func (d *Decoder) DecodeFrame() (*image.YCbCr, error) {
   365  	d.ensureImg()
   366  	if err := d.parseOtherHeaders(); err != nil {
   367  		return nil, err
   368  	}
   369  	// Reconstruct the rows.
   370  	for mbx := 0; mbx < d.mbw; mbx++ {
   371  		d.upMB[mbx] = mb{}
   372  	}
   373  	for mby := 0; mby < d.mbh; mby++ {
   374  		d.leftMB = mb{}
   375  		for mbx := 0; mbx < d.mbw; mbx++ {
   376  			skip := d.reconstruct(mbx, mby)
   377  			fs := d.filterParams[d.segment][btou(!d.usePredY16)]
   378  			fs.inner = fs.inner || !skip
   379  			d.perMBFilterParams[d.mbw*mby+mbx] = fs
   380  		}
   381  	}
   382  	if d.fp.unexpectedEOF {
   383  		return nil, io.ErrUnexpectedEOF
   384  	}
   385  	for i := 0; i < d.nOP; i++ {
   386  		if d.op[i].unexpectedEOF {
   387  			return nil, io.ErrUnexpectedEOF
   388  		}
   389  	}
   390  	// Apply the loop filter.
   391  	//
   392  	// Even if we are using per-segment levels, section 15 says that "loop
   393  	// filtering must be skipped entirely if loop_filter_level at either the
   394  	// frame header level or macroblock override level is 0".
   395  	if d.filterHeader.level != 0 {
   396  		if d.filterHeader.simple {
   397  			d.simpleFilter()
   398  		} else {
   399  			d.normalFilter()
   400  		}
   401  	}
   402  	return d.img, nil
   403  }
   404
View as plain text