1 /* 2 * Package xz Go Reader API 3 * 4 * Author: Michael Cross <https://github.com/xi2> 5 * 6 * This file has been put into the public domain. 7 * You can do whatever you want with this file. 8 */ 9 10 package xz 11 12 import ( 13 "errors" 14 "io" 15 ) 16 17 // Package specific errors. 18 var ( 19 ErrUnsupportedCheck = errors.New("xz: integrity check type not supported") 20 ErrMemlimit = errors.New("xz: LZMA2 dictionary size exceeds max") 21 ErrFormat = errors.New("xz: file format not recognized") 22 ErrOptions = errors.New("xz: compression options not supported") 23 ErrData = errors.New("xz: data is corrupt") 24 ErrBuf = errors.New("xz: data is truncated or corrupt") 25 ) 26 27 // DefaultDictMax is the default maximum dictionary size in bytes used 28 // by the decoder. This value is sufficient to decompress files 29 // created with XZ Utils "xz -9". 30 const DefaultDictMax = 1 << 26 // 64 MiB 31 32 // inBufSize is the input buffer size used by the decoder. 33 const inBufSize = 1 << 13 // 8 KiB 34 35 // A Reader is an io.Reader that can be used to retrieve uncompressed 36 // data from an XZ file. 37 // 38 // In general, an XZ file can be a concatenation of other XZ 39 // files. Reads from the Reader return the concatenation of the 40 // uncompressed data of each. 41 type Reader struct { 42 Header 43 r io.Reader // the wrapped io.Reader 44 multistream bool // true if reader is in multistream mode 45 rEOF bool // true after io.EOF received on r 46 dEOF bool // true after decoder has completed 47 padding int // bytes of stream padding read (or -1) 48 in [inBufSize]byte // backing array for buf.in 49 buf *xzBuf // decoder input/output buffers 50 dec *xzDec // decoder state 51 err error // the result of the last decoder call 52 } 53 54 // NewReader creates a new Reader reading from r. The decompressor 55 // will use an LZMA2 dictionary size up to dictMax bytes in 56 // size. Passing a value of zero sets dictMax to DefaultDictMax. If 57 // an individual XZ stream requires a dictionary size greater than 58 // dictMax in order to decompress, Read will return ErrMemlimit. 59 // 60 // If NewReader is passed a value of nil for r then a Reader is 61 // created such that all read attempts will return io.EOF. This is 62 // useful if you just want to allocate memory for a Reader which will 63 // later be initialized with Reset. 64 // 65 // Due to internal buffering, the Reader may read more data than 66 // necessary from r. 67 func NewReader(r io.Reader, dictMax uint32) (*Reader, error) { 68 if dictMax == 0 { 69 dictMax = DefaultDictMax 70 } 71 z := &Reader{ 72 r: r, 73 multistream: true, 74 padding: -1, 75 buf: &xzBuf{}, 76 } 77 if r == nil { 78 z.rEOF, z.dEOF = true, true 79 } 80 z.dec = xzDecInit(dictMax, &z.Header) 81 var err error 82 if r != nil { 83 _, err = z.Read(nil) // read stream header 84 } 85 return z, err 86 } 87 88 // decode is a wrapper around xzDecRun that additionally handles 89 // stream padding. It treats the padding as a kind of stream that 90 // decodes to nothing. 91 // 92 // When decoding padding, z.padding >= 0 93 // When decoding a real stream, z.padding == -1 94 func (z *Reader) decode() (ret xzRet) { 95 if z.padding >= 0 { 96 // read all padding in input buffer 97 for z.buf.inPos < len(z.buf.in) && 98 z.buf.in[z.buf.inPos] == 0 { 99 z.buf.inPos++ 100 z.padding++ 101 } 102 switch { 103 case z.buf.inPos == len(z.buf.in) && z.rEOF: 104 // case: out of padding. no more input data available 105 if z.padding%4 != 0 { 106 ret = xzDataError 107 } else { 108 ret = xzStreamEnd 109 } 110 case z.buf.inPos == len(z.buf.in): 111 // case: read more padding next loop iteration 112 ret = xzOK 113 default: 114 // case: out of padding. more input data available 115 if z.padding%4 != 0 { 116 ret = xzDataError 117 } else { 118 xzDecReset(z.dec) 119 ret = xzStreamEnd 120 } 121 } 122 } else { 123 ret = xzDecRun(z.dec, z.buf) 124 } 125 return 126 } 127 128 func (z *Reader) Read(p []byte) (n int, err error) { 129 // restore err 130 err = z.err 131 // set decoder output buffer to p 132 z.buf.out = p 133 z.buf.outPos = 0 134 for { 135 // update n 136 n = z.buf.outPos 137 // if last call to decoder ended with an error, return that error 138 if err != nil { 139 break 140 } 141 // if decoder has finished, return with err == io.EOF 142 if z.dEOF { 143 err = io.EOF 144 break 145 } 146 // if p full, return with err == nil, unless we have not yet 147 // read the stream header with Read(nil) 148 if n == len(p) && z.CheckType != checkUnset { 149 break 150 } 151 // if needed, read more data from z.r 152 if z.buf.inPos == len(z.buf.in) && !z.rEOF { 153 rn, e := z.r.Read(z.in[:]) 154 if e != nil && e != io.EOF { 155 // read error 156 err = e 157 break 158 } 159 if e == io.EOF { 160 z.rEOF = true 161 } 162 // set new input buffer in z.buf 163 z.buf.in = z.in[:rn] 164 z.buf.inPos = 0 165 } 166 // decode more data 167 ret := z.decode() 168 switch ret { 169 case xzOK: 170 // no action needed 171 case xzStreamEnd: 172 if z.padding >= 0 { 173 z.padding = -1 174 if !z.multistream || z.rEOF { 175 z.dEOF = true 176 } 177 } else { 178 z.padding = 0 179 } 180 case xzUnsupportedCheck: 181 err = ErrUnsupportedCheck 182 case xzMemlimitError: 183 err = ErrMemlimit 184 case xzFormatError: 185 err = ErrFormat 186 case xzOptionsError: 187 err = ErrOptions 188 case xzDataError: 189 err = ErrData 190 case xzBufError: 191 err = ErrBuf 192 } 193 // save err 194 z.err = err 195 } 196 return 197 } 198 199 // Multistream controls whether the reader is operating in multistream 200 // mode. 201 // 202 // If enabled (the default), the Reader expects the input to be a 203 // sequence of XZ streams, possibly interspersed with stream padding, 204 // which it reads one after another. The effect is that the 205 // concatenation of a sequence of XZ streams or XZ files is 206 // treated as equivalent to the compressed result of the concatenation 207 // of the sequence. This is standard behaviour for XZ readers. 208 // 209 // Calling Multistream(false) disables this behaviour; disabling the 210 // behaviour can be useful when reading file formats that distinguish 211 // individual XZ streams. In this mode, when the Reader reaches the 212 // end of the stream, Read returns io.EOF. To start the next stream, 213 // call z.Reset(nil) followed by z.Multistream(false). If there is no 214 // next stream, z.Reset(nil) will return io.EOF. 215 func (z *Reader) Multistream(ok bool) { 216 z.multistream = ok 217 } 218 219 // Reset, for non-nil values of io.Reader r, discards the Reader z's 220 // state and makes it equivalent to the result of its original state 221 // from NewReader, but reading from r instead. This permits reusing a 222 // Reader rather than allocating a new one. 223 // 224 // If you wish to leave r unchanged use z.Reset(nil). This keeps r 225 // unchanged and ensures internal buffering is preserved. If the 226 // Reader was at the end of a stream it is then ready to read any 227 // follow on streams. If there are no follow on streams z.Reset(nil) 228 // returns io.EOF. If the Reader was not at the end of a stream then 229 // z.Reset(nil) does nothing. 230 func (z *Reader) Reset(r io.Reader) error { 231 switch { 232 case r == nil: 233 z.multistream = true 234 if !z.dEOF { 235 return nil 236 } 237 if z.rEOF { 238 return io.EOF 239 } 240 z.dEOF = false 241 _, err := z.Read(nil) // read stream header 242 return err 243 default: 244 z.r = r 245 z.multistream = true 246 z.rEOF = false 247 z.dEOF = false 248 z.padding = -1 249 z.buf.in = nil 250 z.buf.inPos = 0 251 xzDecReset(z.dec) 252 z.err = nil 253 _, err := z.Read(nil) // read stream header 254 return err 255 } 256 } 257