...

Source file src/github.com/xi2/xz/dec_bcj.go

Documentation: github.com/xi2/xz

     1  /*
     2   * Branch/Call/Jump (BCJ) filter decoders
     3   *
     4   * Authors: Lasse Collin <lasse.collin@tukaani.org>
     5   *          Igor Pavlov <http://7-zip.org/>
     6   *
     7   * Translation to Go: Michael Cross <https://github.com/xi2>
     8   *
     9   * This file has been put into the public domain.
    10   * You can do whatever you want with this file.
    11   */
    12  
    13  package xz
    14  
    15  /* from linux/lib/xz/xz_dec_bcj.c *************************************/
    16  
    17  type xzDecBCJ struct {
    18  	/* Type of the BCJ filter being used */
    19  	typ xzFilterID
    20  	/*
    21  	 * Return value of the next filter in the chain. We need to preserve
    22  	 * this information across calls, because we must not call the next
    23  	 * filter anymore once it has returned xzStreamEnd
    24  	 */
    25  	ret xzRet
    26  	/*
    27  	 * Absolute position relative to the beginning of the uncompressed
    28  	 * data (in a single .xz Block).
    29  	 */
    30  	pos int
    31  	/* x86 filter state */
    32  	x86PrevMask uint32
    33  	/* Temporary space to hold the variables from xzBuf */
    34  	out    []byte
    35  	outPos int
    36  	temp   struct {
    37  		/* Amount of already filtered data in the beginning of buf */
    38  		filtered int
    39  		/*
    40  		 * Buffer to hold a mix of filtered and unfiltered data. This
    41  		 * needs to be big enough to hold Alignment + 2 * Look-ahead:
    42  		 *
    43  		 * Type         Alignment   Look-ahead
    44  		 * x86              1           4
    45  		 * PowerPC          4           0
    46  		 * IA-64           16           0
    47  		 * ARM              4           0
    48  		 * ARM-Thumb        2           2
    49  		 * SPARC            4           0
    50  		 */
    51  		buf      []byte // slice buf will be backed by bufArray
    52  		bufArray [16]byte
    53  	}
    54  }
    55  
    56  /*
    57   * This is used to test the most significant byte of a memory address
    58   * in an x86 instruction.
    59   */
    60  func bcjX86TestMSByte(b byte) bool {
    61  	return b == 0x00 || b == 0xff
    62  }
    63  
    64  func bcjX86Filter(s *xzDecBCJ, buf []byte) int {
    65  	var maskToAllowedStatus = []bool{
    66  		true, true, true, false, true, false, false, false,
    67  	}
    68  	var maskToBitNum = []byte{0, 1, 2, 2, 3, 3, 3, 3}
    69  	var i int
    70  	var prevPos int = -1
    71  	var prevMask uint32 = s.x86PrevMask
    72  	var src uint32
    73  	var dest uint32
    74  	var j uint32
    75  	var b byte
    76  	if len(buf) <= 4 {
    77  		return 0
    78  	}
    79  	for i = 0; i < len(buf)-4; i++ {
    80  		if buf[i]&0xfe != 0xe8 {
    81  			continue
    82  		}
    83  		prevPos = i - prevPos
    84  		if prevPos > 3 {
    85  			prevMask = 0
    86  		} else {
    87  			prevMask = (prevMask << (uint(prevPos) - 1)) & 7
    88  			if prevMask != 0 {
    89  				b = buf[i+4-int(maskToBitNum[prevMask])]
    90  				if !maskToAllowedStatus[prevMask] || bcjX86TestMSByte(b) {
    91  					prevPos = i
    92  					prevMask = prevMask<<1 | 1
    93  					continue
    94  				}
    95  			}
    96  		}
    97  		prevPos = i
    98  		if bcjX86TestMSByte(buf[i+4]) {
    99  			src = getLE32(buf[i+1:])
   100  			for {
   101  				dest = src - uint32(s.pos+i+5)
   102  				if prevMask == 0 {
   103  					break
   104  				}
   105  				j = uint32(maskToBitNum[prevMask]) * 8
   106  				b = byte(dest >> (24 - j))
   107  				if !bcjX86TestMSByte(b) {
   108  					break
   109  				}
   110  				src = dest ^ (1<<(32-j) - 1)
   111  			}
   112  			dest &= 0x01FFFFFF
   113  			dest |= 0 - dest&0x01000000
   114  			putLE32(dest, buf[i+1:])
   115  			i += 4
   116  		} else {
   117  			prevMask = prevMask<<1 | 1
   118  		}
   119  	}
   120  	prevPos = i - prevPos
   121  	if prevPos > 3 {
   122  		s.x86PrevMask = 0
   123  	} else {
   124  		s.x86PrevMask = prevMask << (uint(prevPos) - 1)
   125  	}
   126  	return i
   127  }
   128  
   129  func bcjPowerPCFilter(s *xzDecBCJ, buf []byte) int {
   130  	var i int
   131  	var instr uint32
   132  	for i = 0; i+4 <= len(buf); i += 4 {
   133  		instr = getBE32(buf[i:])
   134  		if instr&0xFC000003 == 0x48000001 {
   135  			instr &= 0x03FFFFFC
   136  			instr -= uint32(s.pos + i)
   137  			instr &= 0x03FFFFFC
   138  			instr |= 0x48000001
   139  			putBE32(instr, buf[i:])
   140  		}
   141  	}
   142  	return i
   143  }
   144  
   145  var bcjIA64BranchTable = [...]byte{
   146  	0, 0, 0, 0, 0, 0, 0, 0,
   147  	0, 0, 0, 0, 0, 0, 0, 0,
   148  	4, 4, 6, 6, 0, 0, 7, 7,
   149  	4, 4, 0, 0, 4, 4, 0, 0,
   150  }
   151  
   152  func bcjIA64Filter(s *xzDecBCJ, buf []byte) int {
   153  	var branchTable = bcjIA64BranchTable[:]
   154  	/*
   155  	 * The local variables take a little bit stack space, but it's less
   156  	 * than what LZMA2 decoder takes, so it doesn't make sense to reduce
   157  	 * stack usage here without doing that for the LZMA2 decoder too.
   158  	 */
   159  	/* Loop counters */
   160  	var i int
   161  	var j int
   162  	/* Instruction slot (0, 1, or 2) in the 128-bit instruction word */
   163  	var slot uint32
   164  	/* Bitwise offset of the instruction indicated by slot */
   165  	var bitPos uint32
   166  	/* bit_pos split into byte and bit parts */
   167  	var bytePos uint32
   168  	var bitRes uint32
   169  	/* Address part of an instruction */
   170  	var addr uint32
   171  	/* Mask used to detect which instructions to convert */
   172  	var mask uint32
   173  	/* 41-bit instruction stored somewhere in the lowest 48 bits */
   174  	var instr uint64
   175  	/* Instruction normalized with bit_res for easier manipulation */
   176  	var norm uint64
   177  	for i = 0; i+16 <= len(buf); i += 16 {
   178  		mask = uint32(branchTable[buf[i]&0x1f])
   179  		for slot, bitPos = 0, 5; slot < 3; slot, bitPos = slot+1, bitPos+41 {
   180  			if (mask>>slot)&1 == 0 {
   181  				continue
   182  			}
   183  			bytePos = bitPos >> 3
   184  			bitRes = bitPos & 7
   185  			instr = 0
   186  			for j = 0; j < 6; j++ {
   187  				instr |= uint64(buf[i+j+int(bytePos)]) << (8 * uint(j))
   188  			}
   189  			norm = instr >> bitRes
   190  			if (norm>>37)&0x0f == 0x05 && (norm>>9)&0x07 == 0 {
   191  				addr = uint32((norm >> 13) & 0x0fffff)
   192  				addr |= (uint32(norm>>36) & 1) << 20
   193  				addr <<= 4
   194  				addr -= uint32(s.pos + i)
   195  				addr >>= 4
   196  				norm &= ^(uint64(0x8fffff) << 13)
   197  				norm |= uint64(addr&0x0fffff) << 13
   198  				norm |= uint64(addr&0x100000) << (36 - 20)
   199  				instr &= 1<<bitRes - 1
   200  				instr |= norm << bitRes
   201  				for j = 0; j < 6; j++ {
   202  					buf[i+j+int(bytePos)] = byte(instr >> (8 * uint(j)))
   203  				}
   204  			}
   205  		}
   206  	}
   207  	return i
   208  }
   209  
   210  func bcjARMFilter(s *xzDecBCJ, buf []byte) int {
   211  	var i int
   212  	var addr uint32
   213  	for i = 0; i+4 <= len(buf); i += 4 {
   214  		if buf[i+3] == 0xeb {
   215  			addr = uint32(buf[i]) | uint32(buf[i+1])<<8 |
   216  				uint32(buf[i+2])<<16
   217  			addr <<= 2
   218  			addr -= uint32(s.pos + i + 8)
   219  			addr >>= 2
   220  			buf[i] = byte(addr)
   221  			buf[i+1] = byte(addr >> 8)
   222  			buf[i+2] = byte(addr >> 16)
   223  		}
   224  	}
   225  	return i
   226  }
   227  
   228  func bcjARMThumbFilter(s *xzDecBCJ, buf []byte) int {
   229  	var i int
   230  	var addr uint32
   231  	for i = 0; i+4 <= len(buf); i += 2 {
   232  		if buf[i+1]&0xf8 == 0xf0 && buf[i+3]&0xf8 == 0xf8 {
   233  			addr = uint32(buf[i+1]&0x07)<<19 |
   234  				uint32(buf[i])<<11 |
   235  				uint32(buf[i+3]&0x07)<<8 |
   236  				uint32(buf[i+2])
   237  			addr <<= 1
   238  			addr -= uint32(s.pos + i + 4)
   239  			addr >>= 1
   240  			buf[i+1] = byte(0xf0 | (addr>>19)&0x07)
   241  			buf[i] = byte(addr >> 11)
   242  			buf[i+3] = byte(0xf8 | (addr>>8)&0x07)
   243  			buf[i+2] = byte(addr)
   244  			i += 2
   245  		}
   246  	}
   247  	return i
   248  }
   249  
   250  func bcjSPARCFilter(s *xzDecBCJ, buf []byte) int {
   251  	var i int
   252  	var instr uint32
   253  	for i = 0; i+4 <= len(buf); i += 4 {
   254  		instr = getBE32(buf[i:])
   255  		if instr>>22 == 0x100 || instr>>22 == 0x1ff {
   256  			instr <<= 2
   257  			instr -= uint32(s.pos + i)
   258  			instr >>= 2
   259  			instr = (0x40000000 - instr&0x400000) |
   260  				0x40000000 | (instr & 0x3FFFFF)
   261  			putBE32(instr, buf[i:])
   262  		}
   263  	}
   264  	return i
   265  }
   266  
   267  /*
   268   * Apply the selected BCJ filter. Update *pos and s.pos to match the amount
   269   * of data that got filtered.
   270   */
   271  func bcjApply(s *xzDecBCJ, buf []byte, pos *int) {
   272  	var filtered int
   273  	buf = buf[*pos:]
   274  	switch s.typ {
   275  	case idBCJX86:
   276  		filtered = bcjX86Filter(s, buf)
   277  	case idBCJPowerPC:
   278  		filtered = bcjPowerPCFilter(s, buf)
   279  	case idBCJIA64:
   280  		filtered = bcjIA64Filter(s, buf)
   281  	case idBCJARM:
   282  		filtered = bcjARMFilter(s, buf)
   283  	case idBCJARMThumb:
   284  		filtered = bcjARMThumbFilter(s, buf)
   285  	case idBCJSPARC:
   286  		filtered = bcjSPARCFilter(s, buf)
   287  	default:
   288  		/* Never reached */
   289  	}
   290  	*pos += filtered
   291  	s.pos += filtered
   292  }
   293  
   294  /*
   295   * Flush pending filtered data from temp to the output buffer.
   296   * Move the remaining mixture of possibly filtered and unfiltered
   297   * data to the beginning of temp.
   298   */
   299  func bcjFlush(s *xzDecBCJ, b *xzBuf) {
   300  	var copySize int
   301  	copySize = len(b.out) - b.outPos
   302  	if copySize > s.temp.filtered {
   303  		copySize = s.temp.filtered
   304  	}
   305  	copy(b.out[b.outPos:], s.temp.buf[:copySize])
   306  	b.outPos += copySize
   307  	s.temp.filtered -= copySize
   308  	copy(s.temp.buf, s.temp.buf[copySize:])
   309  	s.temp.buf = s.temp.buf[:len(s.temp.buf)-copySize]
   310  }
   311  
   312  /*
   313   * Decode raw stream which has a BCJ filter as the first filter.
   314   *
   315   * The BCJ filter functions are primitive in sense that they process the
   316   * data in chunks of 1-16 bytes. To hide this issue, this function does
   317   * some buffering.
   318   */
   319  func xzDecBCJRun(s *xzDecBCJ, b *xzBuf, chain func(*xzBuf) xzRet) xzRet {
   320  	var outStart int
   321  	/*
   322  	 * Flush pending already filtered data to the output buffer. Return
   323  	 * immediately if we couldn't flush everything, or if the next
   324  	 * filter in the chain had already returned xzStreamEnd.
   325  	 */
   326  	if s.temp.filtered > 0 {
   327  		bcjFlush(s, b)
   328  		if s.temp.filtered > 0 {
   329  			return xzOK
   330  		}
   331  		if s.ret == xzStreamEnd {
   332  			return xzStreamEnd
   333  		}
   334  	}
   335  	/*
   336  	 * If we have more output space than what is currently pending in
   337  	 * temp, copy the unfiltered data from temp to the output buffer
   338  	 * and try to fill the output buffer by decoding more data from the
   339  	 * next filter in the chain. Apply the BCJ filter on the new data
   340  	 * in the output buffer. If everything cannot be filtered, copy it
   341  	 * to temp and rewind the output buffer position accordingly.
   342  	 *
   343  	 * This needs to be always run when len(temp.buf) == 0 to handle a special
   344  	 * case where the output buffer is full and the next filter has no
   345  	 * more output coming but hasn't returned xzStreamEnd yet.
   346  	 */
   347  	if len(s.temp.buf) < len(b.out)-b.outPos || len(s.temp.buf) == 0 {
   348  		outStart = b.outPos
   349  		copy(b.out[b.outPos:], s.temp.buf)
   350  		b.outPos += len(s.temp.buf)
   351  		s.ret = chain(b)
   352  		if s.ret != xzStreamEnd && s.ret != xzOK {
   353  			return s.ret
   354  		}
   355  		bcjApply(s, b.out[:b.outPos], &outStart)
   356  		/*
   357  		 * As an exception, if the next filter returned xzStreamEnd,
   358  		 * we can do that too, since the last few bytes that remain
   359  		 * unfiltered are meant to remain unfiltered.
   360  		 */
   361  		if s.ret == xzStreamEnd {
   362  			return xzStreamEnd
   363  		}
   364  		s.temp.buf = s.temp.bufArray[:b.outPos-outStart]
   365  		b.outPos -= len(s.temp.buf)
   366  		copy(s.temp.buf, b.out[b.outPos:])
   367  		/*
   368  		 * If there wasn't enough input to the next filter to fill
   369  		 * the output buffer with unfiltered data, there's no point
   370  		 * to try decoding more data to temp.
   371  		 */
   372  		if b.outPos+len(s.temp.buf) < len(b.out) {
   373  			return xzOK
   374  		}
   375  	}
   376  	/*
   377  	 * We have unfiltered data in temp. If the output buffer isn't full
   378  	 * yet, try to fill the temp buffer by decoding more data from the
   379  	 * next filter. Apply the BCJ filter on temp. Then we hopefully can
   380  	 * fill the actual output buffer by copying filtered data from temp.
   381  	 * A mix of filtered and unfiltered data may be left in temp; it will
   382  	 * be taken care on the next call to this function.
   383  	 */
   384  	if b.outPos < len(b.out) {
   385  		/* Make b.out temporarily point to s.temp. */
   386  		s.out = b.out
   387  		s.outPos = b.outPos
   388  		b.out = s.temp.bufArray[:]
   389  		b.outPos = len(s.temp.buf)
   390  		s.ret = chain(b)
   391  		s.temp.buf = s.temp.bufArray[:b.outPos]
   392  		b.out = s.out
   393  		b.outPos = s.outPos
   394  		if s.ret != xzOK && s.ret != xzStreamEnd {
   395  			return s.ret
   396  		}
   397  		bcjApply(s, s.temp.buf, &s.temp.filtered)
   398  		/*
   399  		 * If the next filter returned xzStreamEnd, we mark that
   400  		 * everything is filtered, since the last unfiltered bytes
   401  		 * of the stream are meant to be left as is.
   402  		 */
   403  		if s.ret == xzStreamEnd {
   404  			s.temp.filtered = len(s.temp.buf)
   405  		}
   406  		bcjFlush(s, b)
   407  		if s.temp.filtered > 0 {
   408  			return xzOK
   409  		}
   410  	}
   411  	return s.ret
   412  }
   413  
   414  /*
   415   * Allocate memory for BCJ decoders. xzDecBCJReset must be used before
   416   * calling xzDecBCJRun.
   417   */
   418  func xzDecBCJCreate() *xzDecBCJ {
   419  	return new(xzDecBCJ)
   420  }
   421  
   422  /*
   423   * Decode the Filter ID of a BCJ filter and check the start offset is
   424   * valid. Returns xzOK if the given Filter ID and offset is
   425   * supported. Otherwise xzOptionsError is returned.
   426   */
   427  func xzDecBCJReset(s *xzDecBCJ, id xzFilterID, offset int) xzRet {
   428  	switch id {
   429  	case idBCJX86:
   430  	case idBCJPowerPC:
   431  	case idBCJIA64:
   432  	case idBCJARM:
   433  	case idBCJARMThumb:
   434  	case idBCJSPARC:
   435  	default:
   436  		/* Unsupported Filter ID */
   437  		return xzOptionsError
   438  	}
   439  	// check offset is a multiple of alignment
   440  	switch id {
   441  	case idBCJPowerPC, idBCJARM, idBCJSPARC:
   442  		if offset%4 != 0 {
   443  			return xzOptionsError
   444  		}
   445  	case idBCJIA64:
   446  		if offset%16 != 0 {
   447  			return xzOptionsError
   448  		}
   449  	case idBCJARMThumb:
   450  		if offset%2 != 0 {
   451  			return xzOptionsError
   452  		}
   453  	}
   454  	s.typ = id
   455  	s.ret = xzOK
   456  	s.pos = offset
   457  	s.x86PrevMask = 0
   458  	s.temp.filtered = 0
   459  	s.temp.buf = nil
   460  	return xzOK
   461  }
   462  

View as plain text