...
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17 package compress_test
18
19 import (
20 "bytes"
21 "io"
22 "math/rand"
23 "testing"
24
25 "github.com/apache/arrow/go/v15/parquet/compress"
26 "github.com/stretchr/testify/assert"
27 )
28
29 const (
30 RandomDataSize = 3 * 1024 * 1024
31 CompressibleDataSize = 8 * 1024 * 1024
32 )
33
34 func makeRandomData(size int) []byte {
35 ret := make([]byte, size)
36 r := rand.New(rand.NewSource(1234))
37 r.Read(ret)
38 return ret
39 }
40
41 func makeCompressibleData(size int) []byte {
42 const base = "Apache Arrow is a cross-language development platform for in-memory data"
43
44 data := make([]byte, size)
45 n := copy(data, base)
46 for i := n; i < len(data); i *= 2 {
47 copy(data[i:], data[:i])
48 }
49 return data
50 }
51
52 func TestErrorForUnimplemented(t *testing.T) {
53 _, err := compress.GetCodec(compress.Codecs.Lzo)
54 assert.Error(t, err)
55
56 _, err = compress.GetCodec(compress.Codecs.Lz4)
57 assert.Error(t, err)
58 }
59
60 func TestCompressDataOneShot(t *testing.T) {
61 tests := []struct {
62 c compress.Compression
63 }{
64 {compress.Codecs.Uncompressed},
65 {compress.Codecs.Snappy},
66 {compress.Codecs.Gzip},
67 {compress.Codecs.Brotli},
68 {compress.Codecs.Zstd},
69
70
71 }
72
73 for _, tt := range tests {
74 t.Run(tt.c.String(), func(t *testing.T) {
75 codec, err := compress.GetCodec(tt.c)
76 assert.NoError(t, err)
77 data := makeCompressibleData(CompressibleDataSize)
78
79 buf := make([]byte, codec.CompressBound(int64(len(data))))
80 compressed := codec.Encode(buf, data)
81 assert.Same(t, &buf[0], &compressed[0])
82
83 out := make([]byte, len(data))
84 uncompressed := codec.Decode(out, compressed)
85 assert.Same(t, &out[0], &uncompressed[0])
86
87 assert.Exactly(t, data, uncompressed)
88 })
89 }
90 }
91
92 func TestCompressReaderWriter(t *testing.T) {
93 tests := []struct {
94 c compress.Compression
95 }{
96 {compress.Codecs.Uncompressed},
97 {compress.Codecs.Snappy},
98 {compress.Codecs.Gzip},
99 {compress.Codecs.Brotli},
100 {compress.Codecs.Zstd},
101
102
103 }
104
105 for _, tt := range tests {
106 t.Run(tt.c.String(), func(t *testing.T) {
107 var buf bytes.Buffer
108 codec, err := compress.GetCodec(tt.c)
109 assert.NoError(t, err)
110 data := makeRandomData(RandomDataSize)
111
112 wr := codec.NewWriter(&buf)
113
114 const chunkSize = 1111
115 input := data
116 for len(input) > 0 {
117 var (
118 n int
119 err error
120 )
121 if len(input) > chunkSize {
122 n, err = wr.Write(input[:chunkSize])
123 } else {
124 n, err = wr.Write(input)
125 }
126
127 assert.NoError(t, err)
128 input = input[n:]
129 }
130 wr.Close()
131
132 rdr := codec.NewReader(&buf)
133 out, err := io.ReadAll(rdr)
134 assert.NoError(t, err)
135 assert.Exactly(t, data, out)
136 })
137 }
138 }
139
View as plain text