1
2
3
4
5 package flate
6
7 import (
8 "math"
9 "math/bits"
10 )
11
12 const (
13 maxBitsLimit = 16
14
15 literalCount = 286
16 )
17
18
19 type hcode uint32
20
21 func (h hcode) len() uint8 {
22 return uint8(h)
23 }
24
25 func (h hcode) code64() uint64 {
26 return uint64(h >> 8)
27 }
28
29 func (h hcode) zero() bool {
30 return h == 0
31 }
32
33 type huffmanEncoder struct {
34 codes []hcode
35 bitCount [17]int32
36
37
38
39
40 freqcache [literalCount + 1]literalNode
41 }
42
43 type literalNode struct {
44 literal uint16
45 freq uint16
46 }
47
48
49 type levelInfo struct {
50
51 level int32
52
53
54 lastFreq int32
55
56
57 nextCharFreq int32
58
59
60
61 nextPairFreq int32
62
63
64
65 needed int32
66 }
67
68
69 func (h *hcode) set(code uint16, length uint8) {
70 *h = hcode(length) | (hcode(code) << 8)
71 }
72
73 func newhcode(code uint16, length uint8) hcode {
74 return hcode(length) | (hcode(code) << 8)
75 }
76
77 func reverseBits(number uint16, bitLength byte) uint16 {
78 return bits.Reverse16(number << ((16 - bitLength) & 15))
79 }
80
81 func maxNode() literalNode { return literalNode{math.MaxUint16, math.MaxUint16} }
82
83 func newHuffmanEncoder(size int) *huffmanEncoder {
84
85 c := uint(bits.Len32(uint32(size - 1)))
86 return &huffmanEncoder{codes: make([]hcode, size, 1<<c)}
87 }
88
89
90 func generateFixedLiteralEncoding() *huffmanEncoder {
91 h := newHuffmanEncoder(literalCount)
92 codes := h.codes
93 var ch uint16
94 for ch = 0; ch < literalCount; ch++ {
95 var bits uint16
96 var size uint8
97 switch {
98 case ch < 144:
99
100 bits = ch + 48
101 size = 8
102 case ch < 256:
103
104 bits = ch + 400 - 144
105 size = 9
106 case ch < 280:
107
108 bits = ch - 256
109 size = 7
110 default:
111
112 bits = ch + 192 - 280
113 size = 8
114 }
115 codes[ch] = newhcode(reverseBits(bits, size), size)
116 }
117 return h
118 }
119
120 func generateFixedOffsetEncoding() *huffmanEncoder {
121 h := newHuffmanEncoder(30)
122 codes := h.codes
123 for ch := range codes {
124 codes[ch] = newhcode(reverseBits(uint16(ch), 5), 5)
125 }
126 return h
127 }
128
129 var fixedLiteralEncoding = generateFixedLiteralEncoding()
130 var fixedOffsetEncoding = generateFixedOffsetEncoding()
131
132 func (h *huffmanEncoder) bitLength(freq []uint16) int {
133 var total int
134 for i, f := range freq {
135 if f != 0 {
136 total += int(f) * int(h.codes[i].len())
137 }
138 }
139 return total
140 }
141
142 func (h *huffmanEncoder) bitLengthRaw(b []byte) int {
143 var total int
144 for _, f := range b {
145 total += int(h.codes[f].len())
146 }
147 return total
148 }
149
150
151 func (h *huffmanEncoder) canReuseBits(freq []uint16) int {
152 var total int
153 for i, f := range freq {
154 if f != 0 {
155 code := h.codes[i]
156 if code.zero() {
157 return math.MaxInt32
158 }
159 total += int(f) * int(code.len())
160 }
161 }
162 return total
163 }
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183 func (h *huffmanEncoder) bitCounts(list []literalNode, maxBits int32) []int32 {
184 if maxBits >= maxBitsLimit {
185 panic("flate: maxBits too large")
186 }
187 n := int32(len(list))
188 list = list[0 : n+1]
189 list[n] = maxNode()
190
191
192
193 if maxBits > n-1 {
194 maxBits = n - 1
195 }
196
197
198
199
200
201 var levels [maxBitsLimit]levelInfo
202
203
204
205
206 var leafCounts [maxBitsLimit][maxBitsLimit]int32
207
208
209 l2f := int32(list[2].freq)
210 l1f := int32(list[1].freq)
211 l0f := int32(list[0].freq) + int32(list[1].freq)
212
213 for level := int32(1); level <= maxBits; level++ {
214
215
216 levels[level] = levelInfo{
217 level: level,
218 lastFreq: l1f,
219 nextCharFreq: l2f,
220 nextPairFreq: l0f,
221 }
222 leafCounts[level][level] = 2
223 if level == 1 {
224 levels[level].nextPairFreq = math.MaxInt32
225 }
226 }
227
228
229 levels[maxBits].needed = 2*n - 4
230
231 level := uint32(maxBits)
232 for level < 16 {
233 l := &levels[level]
234 if l.nextPairFreq == math.MaxInt32 && l.nextCharFreq == math.MaxInt32 {
235
236
237
238
239 l.needed = 0
240 levels[level+1].nextPairFreq = math.MaxInt32
241 level++
242 continue
243 }
244
245 prevFreq := l.lastFreq
246 if l.nextCharFreq < l.nextPairFreq {
247
248 n := leafCounts[level][level] + 1
249 l.lastFreq = l.nextCharFreq
250
251 leafCounts[level][level] = n
252 e := list[n]
253 if e.literal < math.MaxUint16 {
254 l.nextCharFreq = int32(e.freq)
255 } else {
256 l.nextCharFreq = math.MaxInt32
257 }
258 } else {
259
260
261
262 l.lastFreq = l.nextPairFreq
263
264 if true {
265 save := leafCounts[level][level]
266 leafCounts[level] = leafCounts[level-1]
267 leafCounts[level][level] = save
268 } else {
269 copy(leafCounts[level][:level], leafCounts[level-1][:level])
270 }
271 levels[l.level-1].needed = 2
272 }
273
274 if l.needed--; l.needed == 0 {
275
276
277
278
279 if l.level == maxBits {
280
281 break
282 }
283 levels[l.level+1].nextPairFreq = prevFreq + l.lastFreq
284 level++
285 } else {
286
287 for levels[level-1].needed > 0 {
288 level--
289 }
290 }
291 }
292
293
294
295 if leafCounts[maxBits][maxBits] != n {
296 panic("leafCounts[maxBits][maxBits] != n")
297 }
298
299 bitCount := h.bitCount[:maxBits+1]
300 bits := 1
301 counts := &leafCounts[maxBits]
302 for level := maxBits; level > 0; level-- {
303
304
305 bitCount[bits] = counts[level] - counts[level-1]
306 bits++
307 }
308 return bitCount
309 }
310
311
312
313 func (h *huffmanEncoder) assignEncodingAndSize(bitCount []int32, list []literalNode) {
314 code := uint16(0)
315 for n, bits := range bitCount {
316 code <<= 1
317 if n == 0 || bits == 0 {
318 continue
319 }
320
321
322
323
324 chunk := list[len(list)-int(bits):]
325
326 sortByLiteral(chunk)
327 for _, node := range chunk {
328 h.codes[node.literal] = newhcode(reverseBits(code, uint8(n)), uint8(n))
329 code++
330 }
331 list = list[0 : len(list)-int(bits)]
332 }
333 }
334
335
336
337
338
339 func (h *huffmanEncoder) generate(freq []uint16, maxBits int32) {
340 list := h.freqcache[:len(freq)+1]
341 codes := h.codes[:len(freq)]
342
343 count := 0
344
345 for i, f := range freq {
346 if f != 0 {
347 list[count] = literalNode{uint16(i), f}
348 count++
349 } else {
350 codes[i] = 0
351 }
352 }
353 list[count] = literalNode{}
354
355 list = list[:count]
356 if count <= 2 {
357
358
359 for i, node := range list {
360
361 h.codes[node.literal].set(uint16(i), 1)
362 }
363 return
364 }
365 sortByFreq(list)
366
367
368 bitCount := h.bitCounts(list, maxBits)
369
370 h.assignEncodingAndSize(bitCount, list)
371 }
372
373
374 func atLeastOne(v float32) float32 {
375 if v < 1 {
376 return 1
377 }
378 if v > 15 {
379 return 15
380 }
381 return v
382 }
383
384 func histogram(b []byte, h []uint16) {
385 if true && len(b) >= 8<<10 {
386
387 histogramSplit(b, h)
388 } else {
389 h = h[:256]
390 for _, t := range b {
391 h[t]++
392 }
393 }
394 }
395
396 func histogramSplit(b []byte, h []uint16) {
397
398
399 h = h[:256]
400 for len(b)&3 != 0 {
401 h[b[0]]++
402 b = b[1:]
403 }
404 n := len(b) / 4
405 x, y, z, w := b[:n], b[n:], b[n+n:], b[n+n+n:]
406 y, z, w = y[:len(x)], z[:len(x)], w[:len(x)]
407 for i, t := range x {
408 v0 := &h[t]
409 v1 := &h[y[i]]
410 v3 := &h[w[i]]
411 v2 := &h[z[i]]
412 *v0++
413 *v1++
414 *v2++
415 *v3++
416 }
417 }
418
View as plain text