reader.go

Documentation: github.com/xi2/xz

     1  /*
     2   * Package xz Go Reader API
     3   *
     4   * Author: Michael Cross <https://github.com/xi2>
     5   *
     6   * This file has been put into the public domain.
     7   * You can do whatever you want with this file.
     8   */
     9  
    10  package xz
    11  
    12  import (
    13  	"errors"
    14  	"io"
    15  )
    16  
    17  // Package specific errors.
    18  var (
    19  	ErrUnsupportedCheck = errors.New("xz: integrity check type not supported")
    20  	ErrMemlimit         = errors.New("xz: LZMA2 dictionary size exceeds max")
    21  	ErrFormat           = errors.New("xz: file format not recognized")
    22  	ErrOptions          = errors.New("xz: compression options not supported")
    23  	ErrData             = errors.New("xz: data is corrupt")
    24  	ErrBuf              = errors.New("xz: data is truncated or corrupt")
    25  )
    26  
    27  // DefaultDictMax is the default maximum dictionary size in bytes used
    28  // by the decoder. This value is sufficient to decompress files
    29  // created with XZ Utils "xz -9".
    30  const DefaultDictMax = 1 << 26 // 64 MiB
    31  
    32  // inBufSize is the input buffer size used by the decoder.
    33  const inBufSize = 1 << 13 // 8 KiB
    34  
    35  // A Reader is an io.Reader that can be used to retrieve uncompressed
    36  // data from an XZ file.
    37  //
    38  // In general, an XZ file can be a concatenation of other XZ
    39  // files. Reads from the Reader return the concatenation of the
    40  // uncompressed data of each.
    41  type Reader struct {
    42  	Header
    43  	r           io.Reader       // the wrapped io.Reader
    44  	multistream bool            // true if reader is in multistream mode
    45  	rEOF        bool            // true after io.EOF received on r
    46  	dEOF        bool            // true after decoder has completed
    47  	padding     int             // bytes of stream padding read (or -1)
    48  	in          [inBufSize]byte // backing array for buf.in
    49  	buf         *xzBuf          // decoder input/output buffers
    50  	dec         *xzDec          // decoder state
    51  	err         error           // the result of the last decoder call
    52  }
    53  
    54  // NewReader creates a new Reader reading from r. The decompressor
    55  // will use an LZMA2 dictionary size up to dictMax bytes in
    56  // size. Passing a value of zero sets dictMax to DefaultDictMax.  If
    57  // an individual XZ stream requires a dictionary size greater than
    58  // dictMax in order to decompress, Read will return ErrMemlimit.
    59  //
    60  // If NewReader is passed a value of nil for r then a Reader is
    61  // created such that all read attempts will return io.EOF. This is
    62  // useful if you just want to allocate memory for a Reader which will
    63  // later be initialized with Reset.
    64  //
    65  // Due to internal buffering, the Reader may read more data than
    66  // necessary from r.
    67  func NewReader(r io.Reader, dictMax uint32) (*Reader, error) {
    68  	if dictMax == 0 {
    69  		dictMax = DefaultDictMax
    70  	}
    71  	z := &Reader{
    72  		r:           r,
    73  		multistream: true,
    74  		padding:     -1,
    75  		buf:         &xzBuf{},
    76  	}
    77  	if r == nil {
    78  		z.rEOF, z.dEOF = true, true
    79  	}
    80  	z.dec = xzDecInit(dictMax, &z.Header)
    81  	var err error
    82  	if r != nil {
    83  		_, err = z.Read(nil) // read stream header
    84  	}
    85  	return z, err
    86  }
    87  
    88  // decode is a wrapper around xzDecRun that additionally handles
    89  // stream padding. It treats the padding as a kind of stream that
    90  // decodes to nothing.
    91  //
    92  // When decoding padding, z.padding >= 0
    93  // When decoding a real stream, z.padding == -1
    94  func (z *Reader) decode() (ret xzRet) {
    95  	if z.padding >= 0 {
    96  		// read all padding in input buffer
    97  		for z.buf.inPos < len(z.buf.in) &&
    98  			z.buf.in[z.buf.inPos] == 0 {
    99  			z.buf.inPos++
   100  			z.padding++
   101  		}
   102  		switch {
   103  		case z.buf.inPos == len(z.buf.in) && z.rEOF:
   104  			// case: out of padding. no more input data available
   105  			if z.padding%4 != 0 {
   106  				ret = xzDataError
   107  			} else {
   108  				ret = xzStreamEnd
   109  			}
   110  		case z.buf.inPos == len(z.buf.in):
   111  			// case: read more padding next loop iteration
   112  			ret = xzOK
   113  		default:
   114  			// case: out of padding. more input data available
   115  			if z.padding%4 != 0 {
   116  				ret = xzDataError
   117  			} else {
   118  				xzDecReset(z.dec)
   119  				ret = xzStreamEnd
   120  			}
   121  		}
   122  	} else {
   123  		ret = xzDecRun(z.dec, z.buf)
   124  	}
   125  	return
   126  }
   127  
   128  func (z *Reader) Read(p []byte) (n int, err error) {
   129  	// restore err
   130  	err = z.err
   131  	// set decoder output buffer to p
   132  	z.buf.out = p
   133  	z.buf.outPos = 0
   134  	for {
   135  		// update n
   136  		n = z.buf.outPos
   137  		// if last call to decoder ended with an error, return that error
   138  		if err != nil {
   139  			break
   140  		}
   141  		// if decoder has finished, return with err == io.EOF
   142  		if z.dEOF {
   143  			err = io.EOF
   144  			break
   145  		}
   146  		// if p full, return with err == nil, unless we have not yet
   147  		// read the stream header with Read(nil)
   148  		if n == len(p) && z.CheckType != checkUnset {
   149  			break
   150  		}
   151  		// if needed, read more data from z.r
   152  		if z.buf.inPos == len(z.buf.in) && !z.rEOF {
   153  			rn, e := z.r.Read(z.in[:])
   154  			if e != nil && e != io.EOF {
   155  				// read error
   156  				err = e
   157  				break
   158  			}
   159  			if e == io.EOF {
   160  				z.rEOF = true
   161  			}
   162  			// set new input buffer in z.buf
   163  			z.buf.in = z.in[:rn]
   164  			z.buf.inPos = 0
   165  		}
   166  		// decode more data
   167  		ret := z.decode()
   168  		switch ret {
   169  		case xzOK:
   170  			// no action needed
   171  		case xzStreamEnd:
   172  			if z.padding >= 0 {
   173  				z.padding = -1
   174  				if !z.multistream || z.rEOF {
   175  					z.dEOF = true
   176  				}
   177  			} else {
   178  				z.padding = 0
   179  			}
   180  		case xzUnsupportedCheck:
   181  			err = ErrUnsupportedCheck
   182  		case xzMemlimitError:
   183  			err = ErrMemlimit
   184  		case xzFormatError:
   185  			err = ErrFormat
   186  		case xzOptionsError:
   187  			err = ErrOptions
   188  		case xzDataError:
   189  			err = ErrData
   190  		case xzBufError:
   191  			err = ErrBuf
   192  		}
   193  		// save err
   194  		z.err = err
   195  	}
   196  	return
   197  }
   198  
   199  // Multistream controls whether the reader is operating in multistream
   200  // mode.
   201  //
   202  // If enabled (the default), the Reader expects the input to be a
   203  // sequence of XZ streams, possibly interspersed with stream padding,
   204  // which it reads one after another. The effect is that the
   205  // concatenation of a sequence of XZ streams or XZ files is
   206  // treated as equivalent to the compressed result of the concatenation
   207  // of the sequence. This is standard behaviour for XZ readers.
   208  //
   209  // Calling Multistream(false) disables this behaviour; disabling the
   210  // behaviour can be useful when reading file formats that distinguish
   211  // individual XZ streams. In this mode, when the Reader reaches the
   212  // end of the stream, Read returns io.EOF. To start the next stream,
   213  // call z.Reset(nil) followed by z.Multistream(false). If there is no
   214  // next stream, z.Reset(nil) will return io.EOF.
   215  func (z *Reader) Multistream(ok bool) {
   216  	z.multistream = ok
   217  }
   218  
   219  // Reset, for non-nil values of io.Reader r, discards the Reader z's
   220  // state and makes it equivalent to the result of its original state
   221  // from NewReader, but reading from r instead. This permits reusing a
   222  // Reader rather than allocating a new one.
   223  //
   224  // If you wish to leave r unchanged use z.Reset(nil). This keeps r
   225  // unchanged and ensures internal buffering is preserved. If the
   226  // Reader was at the end of a stream it is then ready to read any
   227  // follow on streams. If there are no follow on streams z.Reset(nil)
   228  // returns io.EOF. If the Reader was not at the end of a stream then
   229  // z.Reset(nil) does nothing.
   230  func (z *Reader) Reset(r io.Reader) error {
   231  	switch {
   232  	case r == nil:
   233  		z.multistream = true
   234  		if !z.dEOF {
   235  			return nil
   236  		}
   237  		if z.rEOF {
   238  			return io.EOF
   239  		}
   240  		z.dEOF = false
   241  		_, err := z.Read(nil) // read stream header
   242  		return err
   243  	default:
   244  		z.r = r
   245  		z.multistream = true
   246  		z.rEOF = false
   247  		z.dEOF = false
   248  		z.padding = -1
   249  		z.buf.in = nil
   250  		z.buf.inPos = 0
   251  		xzDecReset(z.dec)
   252  		z.err = nil
   253  		_, err := z.Read(nil) // read stream header
   254  		return err
   255  	}
   256  }
   257
View as plain text