1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17 package metadata
18
19 import (
20 "encoding/binary"
21 "testing"
22
23 "github.com/apache/arrow/go/v15/parquet"
24 "github.com/apache/arrow/go/v15/parquet/schema"
25 "github.com/stretchr/testify/assert"
26 "github.com/stretchr/testify/require"
27 )
28
29 func TestSignedByteArrayCompare(t *testing.T) {
30 s := ByteArrayStatistics{
31 statistics: statistics{
32 order: schema.SortSIGNED,
33 },
34 }
35
36
37
38
39
40
41 tests := []struct {
42 b []byte
43 order int
44 }{
45 {[]byte{0x80, 0x80, 0, 0}, 0},
46 {[]byte{ 0x80, 0, 0}, 1},
47 {[]byte{0xFF, 0x80, 0, 0}, 1},
48 {[]byte{ 0xFF, 0x01, 0}, 2},
49 {[]byte{ 0x80, 0}, 3},
50 {[]byte{ 0xFF, 0x80, 0}, 3},
51 {[]byte{0xFF, 0xFF, 0x80, 0}, 3},
52 {[]byte{ 0x80}, 4},
53 {[]byte{ 0xFF}, 5},
54 {[]byte{ 0x01, 0x01}, 6},
55 {[]byte{ 0, 0x01, 0x01}, 6},
56 {[]byte{0, 0, 0x01, 0x01}, 6},
57 {[]byte{ 0x01, 0x01, 0}, 7},
58 {[]byte{0x01, 0x01, 0, 0}, 8},
59 }
60
61 for i, tt := range tests {
62
63 assert.Truef(t, s.less(parquet.ByteArray{}, parquet.ByteArray(tt.b)), "case: %d", i)
64 assert.Falsef(t, s.less(parquet.ByteArray(tt.b), parquet.ByteArray{}), "case: %d", i)
65
66 assert.Falsef(t, s.less(parquet.ByteArray(tt.b), parquet.ByteArray(tt.b)), "case: %d", i)
67
68 for j, case2 := range tests {
69 var fn func(assert.TestingT, bool, string, ...interface{}) bool
70 if tt.order < case2.order {
71 fn = assert.Truef
72 } else {
73 fn = assert.Falsef
74 }
75 fn(t, s.less(parquet.ByteArray(tt.b), parquet.ByteArray(case2.b)),
76 "%d (order: %d) %d (order: %d)", i, tt.order, j, case2.order)
77 }
78 }
79 }
80
81 func TestUnsignedByteArrayCompare(t *testing.T) {
82 s := ByteArrayStatistics{
83 statistics: statistics{
84 order: schema.SortUNSIGNED,
85 },
86 }
87
88 s1ba := parquet.ByteArray("arrange")
89 s2ba := parquet.ByteArray("arrangement")
90 assert.True(t, s.less(s1ba, s2ba))
91
92
93 s1ba = parquet.ByteArray("braten")
94 s2ba = parquet.ByteArray("bügeln")
95 assert.True(t, s.less(s1ba, s2ba))
96
97 s1ba = parquet.ByteArray("ünk123456")
98 s2ba = parquet.ByteArray("ănk123456")
99 assert.True(t, s.less(s1ba, s2ba))
100 }
101
102 func TestSignedCompareFLBA(t *testing.T) {
103 s := FixedLenByteArrayStatistics{
104 statistics: statistics{order: schema.SortSIGNED},
105 }
106
107 values := []parquet.FixedLenByteArray{
108 []byte{0x80, 0, 0, 0},
109 []byte{0xFF, 0xFF, 0x01, 0},
110 []byte{0xFF, 0xFF, 0x80, 0},
111 []byte{0xFF, 0xFF, 0xFF, 0x80},
112 []byte{0xFF, 0xFF, 0xFF, 0xFF},
113 []byte{0, 0, 0x01, 0x01},
114 []byte{0, 0x01, 0x01, 0},
115 []byte{0x01, 0x01, 0, 0},
116 }
117
118 for i, v := range values {
119 assert.Falsef(t, s.less(v, v), "%d", i)
120 for j, v2 := range values[i+1:] {
121 assert.Truef(t, s.less(v, v2), "%d %d", i, j)
122 assert.Falsef(t, s.less(v2, v), "%d %d", j, i)
123 }
124 }
125 }
126
127 func TestUnsignedCompareFLBA(t *testing.T) {
128 s := FixedLenByteArrayStatistics{
129 statistics: statistics{order: schema.SortUNSIGNED},
130 }
131
132 s1flba := parquet.FixedLenByteArray("Anti123456")
133 s2flba := parquet.FixedLenByteArray("Bunkd123456")
134 assert.True(t, s.less(s1flba, s2flba))
135
136 s1flba = parquet.FixedLenByteArray("Bunk123456")
137 s2flba = parquet.FixedLenByteArray("Bünk123456")
138 assert.True(t, s.less(s1flba, s2flba))
139 }
140
141 func TestSignedCompareInt96(t *testing.T) {
142 s := Int96Statistics{
143 statistics: statistics{order: schema.SortSIGNED},
144 }
145
146 val := -14
147
148 var (
149 a = parquet.NewInt96([3]uint32{1, 41, 14})
150 b = parquet.NewInt96([3]uint32{1, 41, 42})
151 aa = parquet.NewInt96([3]uint32{1, 41, 14})
152 bb = parquet.NewInt96([3]uint32{1, 41, 14})
153 aaa = parquet.NewInt96([3]uint32{1, 41, uint32(val)})
154 bbb = parquet.NewInt96([3]uint32{1, 41, 42})
155 )
156
157 assert.True(t, s.less(a, b))
158 assert.True(t, !s.less(aa, bb) && !s.less(bb, aa))
159 assert.True(t, s.less(aaa, bbb))
160 }
161
162 func TestUnsignedCompareInt96(t *testing.T) {
163 s := Int96Statistics{
164 statistics: statistics{order: schema.SortUNSIGNED},
165 }
166
167 valb := -41
168 valbb := -14
169
170 var (
171 a = parquet.NewInt96([3]uint32{1, 41, 14})
172 b = parquet.NewInt96([3]uint32{1, uint32(valb), 42})
173 aa = parquet.NewInt96([3]uint32{1, 41, 14})
174 bb = parquet.NewInt96([3]uint32{1, 41, uint32(valbb)})
175 aaa parquet.Int96
176 bbb parquet.Int96
177 )
178
179 assert.True(t, s.less(a, b))
180 assert.True(t, s.less(aa, bb))
181
182 binary.LittleEndian.PutUint32(aaa[8:], 2451545)
183 binary.LittleEndian.PutUint32(bbb[8:], 2451546)
184
185 aaa.SetNanoSeconds(45296000000000)
186
187 bbb.SetNanoSeconds(45290000000000)
188 assert.True(t, s.less(aaa, bbb))
189
190 binary.LittleEndian.PutUint32(aaa[8:], 2451545)
191 binary.LittleEndian.PutUint32(bbb[8:], 2451545)
192
193 aaa.SetNanoSeconds(41696000000000)
194
195 bbb.SetNanoSeconds(45290000000000)
196 assert.True(t, s.less(aaa, bbb))
197
198 binary.LittleEndian.PutUint32(aaa[8:], 2451545)
199 binary.LittleEndian.PutUint32(bbb[8:], 2451545)
200
201 aaa.SetNanoSeconds(45295000000000)
202
203 bbb.SetNanoSeconds(45296000000000)
204 assert.True(t, s.less(aaa, bbb))
205 }
206
207 func TestCompareSignedInt64(t *testing.T) {
208 var (
209 a int64 = 1
210 b int64 = 4
211 aa int64 = 1
212 bb int64 = 1
213 aaa int64 = -1
214 bbb int64 = 1
215 )
216
217 n := schema.NewInt64Node("signedint64", parquet.Repetitions.Required, -1)
218 descr := schema.NewColumn(n, 0, 0)
219 s := NewStatistics(descr, nil).(*Int64Statistics)
220
221 assert.True(t, s.less(a, b))
222 assert.True(t, !s.less(aa, bb) && !s.less(bb, aa))
223 assert.True(t, s.less(aaa, bbb))
224 }
225
226 func TestCompareUnsignedInt64(t *testing.T) {
227 var (
228 a int64 = 1
229 b int64 = 4
230 aa int64 = 1
231 bb int64 = 1
232 aaa int64 = 1
233 bbb int64 = -1
234 )
235
236 n, err := schema.NewPrimitiveNodeConverted("unsigned int64", parquet.Repetitions.Required, parquet.Types.Int64, schema.ConvertedTypes.Uint64, 0, 0, 0, 0)
237 require.NoError(t, err)
238 descr := schema.NewColumn(n, 0, 0)
239
240 assert.Equal(t, schema.SortUNSIGNED, descr.SortOrder())
241 s := NewStatistics(descr, nil).(*Int64Statistics)
242
243 assert.True(t, s.less(a, b))
244 assert.True(t, !s.less(aa, bb) && !s.less(bb, aa))
245 assert.True(t, s.less(aaa, bbb))
246 }
247
248 func TestCompareUnsignedInt32(t *testing.T) {
249 var (
250 a int32 = 1
251 b int32 = 4
252 aa int32 = 1
253 bb int32 = 1
254 aaa int32 = 1
255 bbb int32 = -1
256 )
257
258 n, err := schema.NewPrimitiveNodeConverted("unsigned int32", parquet.Repetitions.Required, parquet.Types.Int32, schema.ConvertedTypes.Uint32, 0, 0, 0, 0)
259 require.NoError(t, err)
260 descr := schema.NewColumn(n, 0, 0)
261
262 assert.Equal(t, schema.SortUNSIGNED, descr.SortOrder())
263 s := NewStatistics(descr, nil).(*Int32Statistics)
264
265 assert.True(t, s.less(a, b))
266 assert.True(t, !s.less(aa, bb) && !s.less(bb, aa))
267 assert.True(t, s.less(aaa, bbb))
268 }
269
View as plain text