1
2
3
4
5 package flate
6
7 import (
8 "bytes"
9 "encoding/binary"
10 "fmt"
11 "io"
12 "math"
13 )
14
15 const (
16
17
18
19
20 lengthShift = 22
21 offsetMask = 1<<lengthShift - 1
22 typeMask = 3 << 30
23 literalType = 0 << 30
24 matchType = 1 << 30
25 matchOffsetOnlyMask = 0xffff
26 )
27
28
29
30 var lengthCodes = [256]uint8{
31 0, 1, 2, 3, 4, 5, 6, 7, 8, 8,
32 9, 9, 10, 10, 11, 11, 12, 12, 12, 12,
33 13, 13, 13, 13, 14, 14, 14, 14, 15, 15,
34 15, 15, 16, 16, 16, 16, 16, 16, 16, 16,
35 17, 17, 17, 17, 17, 17, 17, 17, 18, 18,
36 18, 18, 18, 18, 18, 18, 19, 19, 19, 19,
37 19, 19, 19, 19, 20, 20, 20, 20, 20, 20,
38 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
39 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
40 21, 21, 21, 21, 21, 21, 22, 22, 22, 22,
41 22, 22, 22, 22, 22, 22, 22, 22, 22, 22,
42 22, 22, 23, 23, 23, 23, 23, 23, 23, 23,
43 23, 23, 23, 23, 23, 23, 23, 23, 24, 24,
44 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
45 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
46 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
47 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
48 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
49 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
50 25, 25, 26, 26, 26, 26, 26, 26, 26, 26,
51 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
52 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
53 26, 26, 26, 26, 27, 27, 27, 27, 27, 27,
54 27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
55 27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
56 27, 27, 27, 27, 27, 28,
57 }
58
59
60 var lengthCodes1 = [256]uint8{
61 1, 2, 3, 4, 5, 6, 7, 8, 9, 9,
62 10, 10, 11, 11, 12, 12, 13, 13, 13, 13,
63 14, 14, 14, 14, 15, 15, 15, 15, 16, 16,
64 16, 16, 17, 17, 17, 17, 17, 17, 17, 17,
65 18, 18, 18, 18, 18, 18, 18, 18, 19, 19,
66 19, 19, 19, 19, 19, 19, 20, 20, 20, 20,
67 20, 20, 20, 20, 21, 21, 21, 21, 21, 21,
68 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
69 22, 22, 22, 22, 22, 22, 22, 22, 22, 22,
70 22, 22, 22, 22, 22, 22, 23, 23, 23, 23,
71 23, 23, 23, 23, 23, 23, 23, 23, 23, 23,
72 23, 23, 24, 24, 24, 24, 24, 24, 24, 24,
73 24, 24, 24, 24, 24, 24, 24, 24, 25, 25,
74 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
75 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
76 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
77 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
78 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
79 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
80 26, 26, 27, 27, 27, 27, 27, 27, 27, 27,
81 27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
82 27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
83 27, 27, 27, 27, 28, 28, 28, 28, 28, 28,
84 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
85 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
86 28, 28, 28, 28, 28, 29,
87 }
88
89 var offsetCodes = [256]uint32{
90 0, 1, 2, 3, 4, 4, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7,
91 8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9,
92 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
93 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
94 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
95 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
96 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
97 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
98 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
99 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
100 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
101 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
102 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
103 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
104 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
105 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
106 }
107
108
109 var offsetCodes14 = [256]uint32{
110 14, 15, 16, 17, 18, 18, 19, 19, 20, 20, 20, 20, 21, 21, 21, 21,
111 22, 22, 22, 22, 22, 22, 22, 22, 23, 23, 23, 23, 23, 23, 23, 23,
112 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
113 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
114 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
115 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
116 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
117 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
118 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
119 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
120 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
121 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
122 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29,
123 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29,
124 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29,
125 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29,
126 }
127
128 type token uint32
129
130 type tokens struct {
131 extraHist [32]uint16
132 offHist [32]uint16
133 litHist [256]uint16
134 nFilled int
135 n uint16
136 tokens [maxStoreBlockSize + 1]token
137 }
138
139 func (t *tokens) Reset() {
140 if t.n == 0 {
141 return
142 }
143 t.n = 0
144 t.nFilled = 0
145 for i := range t.litHist[:] {
146 t.litHist[i] = 0
147 }
148 for i := range t.extraHist[:] {
149 t.extraHist[i] = 0
150 }
151 for i := range t.offHist[:] {
152 t.offHist[i] = 0
153 }
154 }
155
156 func (t *tokens) Fill() {
157 if t.n == 0 {
158 return
159 }
160 for i, v := range t.litHist[:] {
161 if v == 0 {
162 t.litHist[i] = 1
163 t.nFilled++
164 }
165 }
166 for i, v := range t.extraHist[:literalCount-256] {
167 if v == 0 {
168 t.nFilled++
169 t.extraHist[i] = 1
170 }
171 }
172 for i, v := range t.offHist[:offsetCodeCount] {
173 if v == 0 {
174 t.offHist[i] = 1
175 }
176 }
177 }
178
179 func indexTokens(in []token) tokens {
180 var t tokens
181 t.indexTokens(in)
182 return t
183 }
184
185 func (t *tokens) indexTokens(in []token) {
186 t.Reset()
187 for _, tok := range in {
188 if tok < matchType {
189 t.AddLiteral(tok.literal())
190 continue
191 }
192 t.AddMatch(uint32(tok.length()), tok.offset()&matchOffsetOnlyMask)
193 }
194 }
195
196
197 func emitLiteral(dst *tokens, lit []byte) {
198 for _, v := range lit {
199 dst.tokens[dst.n] = token(v)
200 dst.litHist[v]++
201 dst.n++
202 }
203 }
204
205 func (t *tokens) AddLiteral(lit byte) {
206 t.tokens[t.n] = token(lit)
207 t.litHist[lit]++
208 t.n++
209 }
210
211
212 func mFastLog2(val float32) float32 {
213 ux := int32(math.Float32bits(val))
214 log2 := (float32)(((ux >> 23) & 255) - 128)
215 ux &= -0x7f800001
216 ux += 127 << 23
217 uval := math.Float32frombits(uint32(ux))
218 log2 += ((-0.34484843)*uval+2.02466578)*uval - 0.67487759
219 return log2
220 }
221
222
223
224
225 func (t *tokens) EstimatedBits() int {
226 shannon := float32(0)
227 bits := int(0)
228 nMatches := 0
229 total := int(t.n) + t.nFilled
230 if total > 0 {
231 invTotal := 1.0 / float32(total)
232 for _, v := range t.litHist[:] {
233 if v > 0 {
234 n := float32(v)
235 shannon += atLeastOne(-mFastLog2(n*invTotal)) * n
236 }
237 }
238
239 shannon += 15
240 for i, v := range t.extraHist[1 : literalCount-256] {
241 if v > 0 {
242 n := float32(v)
243 shannon += atLeastOne(-mFastLog2(n*invTotal)) * n
244 bits += int(lengthExtraBits[i&31]) * int(v)
245 nMatches += int(v)
246 }
247 }
248 }
249 if nMatches > 0 {
250 invTotal := 1.0 / float32(nMatches)
251 for i, v := range t.offHist[:offsetCodeCount] {
252 if v > 0 {
253 n := float32(v)
254 shannon += atLeastOne(-mFastLog2(n*invTotal)) * n
255 bits += int(offsetExtraBits[i&31]) * int(v)
256 }
257 }
258 }
259 return int(shannon) + bits
260 }
261
262
263
264 func (t *tokens) AddMatch(xlength uint32, xoffset uint32) {
265 if debugDeflate {
266 if xlength >= maxMatchLength+baseMatchLength {
267 panic(fmt.Errorf("invalid length: %v", xlength))
268 }
269 if xoffset >= maxMatchOffset+baseMatchOffset {
270 panic(fmt.Errorf("invalid offset: %v", xoffset))
271 }
272 }
273 oCode := offsetCode(xoffset)
274 xoffset |= oCode << 16
275
276 t.extraHist[lengthCodes1[uint8(xlength)]]++
277 t.offHist[oCode&31]++
278 t.tokens[t.n] = token(matchType | xlength<<lengthShift | xoffset)
279 t.n++
280 }
281
282
283
284 func (t *tokens) AddMatchLong(xlength int32, xoffset uint32) {
285 if debugDeflate {
286 if xoffset >= maxMatchOffset+baseMatchOffset {
287 panic(fmt.Errorf("invalid offset: %v", xoffset))
288 }
289 }
290 oc := offsetCode(xoffset)
291 xoffset |= oc << 16
292 for xlength > 0 {
293 xl := xlength
294 if xl > 258 {
295
296 if xl > 258+baseMatchLength {
297 xl = 258
298 } else {
299 xl = 258 - baseMatchLength
300 }
301 }
302 xlength -= xl
303 xl -= baseMatchLength
304 t.extraHist[lengthCodes1[uint8(xl)]]++
305 t.offHist[oc&31]++
306 t.tokens[t.n] = token(matchType | uint32(xl)<<lengthShift | xoffset)
307 t.n++
308 }
309 }
310
311 func (t *tokens) AddEOB() {
312 t.tokens[t.n] = token(endBlockMarker)
313 t.extraHist[0]++
314 t.n++
315 }
316
317 func (t *tokens) Slice() []token {
318 return t.tokens[:t.n]
319 }
320
321
322 func (t *tokens) VarInt() []byte {
323 var b = make([]byte, binary.MaxVarintLen32*int(t.n))
324 var off int
325 for _, v := range t.tokens[:t.n] {
326 off += binary.PutUvarint(b[off:], uint64(v))
327 }
328 return b[:off]
329 }
330
331
332
333 func (t *tokens) FromVarInt(b []byte) error {
334 var buf = bytes.NewReader(b)
335 var toks []token
336 for {
337 r, err := binary.ReadUvarint(buf)
338 if err == io.EOF {
339 break
340 }
341 if err != nil {
342 return err
343 }
344 toks = append(toks, token(r))
345 }
346 t.indexTokens(toks)
347 return nil
348 }
349
350
351 func (t token) typ() uint32 { return uint32(t) & typeMask }
352
353
354 func (t token) literal() uint8 { return uint8(t) }
355
356
357 func (t token) offset() uint32 { return uint32(t) & offsetMask }
358
359 func (t token) length() uint8 { return uint8(t >> lengthShift) }
360
361
362 func lengthCode(len uint8) uint8 { return lengthCodes[len] }
363
364
365 func offsetCode(off uint32) uint32 {
366 if false {
367 if off < uint32(len(offsetCodes)) {
368 return offsetCodes[off&255]
369 } else if off>>7 < uint32(len(offsetCodes)) {
370 return offsetCodes[(off>>7)&255] + 14
371 } else {
372 return offsetCodes[(off>>14)&255] + 28
373 }
374 }
375 if off < uint32(len(offsetCodes)) {
376 return offsetCodes[uint8(off)]
377 }
378 return offsetCodes14[uint8(off>>7)]
379 }
380
View as plain text