1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17 package schema_test
18
19 import (
20 "log"
21 "os"
22 "reflect"
23 "testing"
24
25 "github.com/apache/arrow/go/v15/arrow/float16"
26 "github.com/apache/arrow/go/v15/parquet"
27 "github.com/apache/arrow/go/v15/parquet/schema"
28 "github.com/stretchr/testify/assert"
29 )
30
31 func ExampleNewSchemaFromStruct_primitives() {
32 type Schema struct {
33 Bool bool
34 Int8 int8
35 Uint16 uint16
36 Int32 int32
37 Int64 int64
38 Int96 parquet.Int96
39 Float float32
40 Double float64
41 ByteArray string
42 FixedLenByteArray [10]byte
43 }
44
45 sc, err := schema.NewSchemaFromStruct(Schema{})
46 if err != nil {
47 log.Fatal(err)
48 }
49
50 schema.PrintSchema(sc.Root(), os.Stdout, 2)
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65 }
66
67 func ExampleNewSchemaFromStruct_convertedtypes() {
68 type ConvertedSchema struct {
69 Utf8 string `parquet:"name=utf8, converted=UTF8"`
70 Uint32 uint32 `parquet:"converted=INT_32"`
71 Date int32 `parquet:"name=date, converted=date"`
72 TimeMilli int32 `parquet:"name=timemilli, converted=TIME_MILLIS"`
73 TimeMicro int64 `parquet:"name=timemicro, converted=time_micros"`
74 TimeStampMilli int64 `parquet:"converted=timestamp_millis"`
75 TimeStampMicro int64 `parquet:"converted=timestamp_micros"`
76 Interval parquet.Int96 `parquet:"converted=INTERVAL"`
77 Decimal1 int32 `parquet:"converted=decimal, scale=2, precision=9"`
78 Decimal2 int64 `parquet:"converted=decimal, scale=2, precision=18"`
79 Decimal3 [12]byte `parquet:"converted=decimal, scale=2, precision=10"`
80 Decimal4 string `parquet:"converted=decimal, scale=2, precision=20"`
81 }
82
83 sc, err := schema.NewSchemaFromStruct(&ConvertedSchema{})
84 if err != nil {
85 log.Fatal(err)
86 }
87
88 schema.PrintSchema(sc.Root(), os.Stdout, 2)
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105 }
106
107 func ExampleNewSchemaFromStruct_repetition() {
108 type RepetitionSchema struct {
109 List []int64 `parquet:"fieldid=1"`
110 Repeated []int64 `parquet:"repetition=repeated, fieldid=2"`
111 Optional *int64 `parquet:"fieldid=3"`
112 Required *int64 `parquet:"repetition=REQUIRED, fieldid=4"`
113 Opt int64 `parquet:"repetition=OPTIONAL, fieldid=5"`
114 }
115
116 sc, err := schema.NewSchemaFromStruct(RepetitionSchema{})
117 if err != nil {
118 log.Fatal(err)
119 }
120
121 schema.PrintSchema(sc.Root(), os.Stdout, 2)
122
123
124
125
126
127
128
129
130
131
132
133
134
135 }
136
137 func ExampleNewSchemaFromStruct_logicaltypes() {
138 type LogicalTypes struct {
139 String []byte `parquet:"logical=String"`
140 Enum string `parquet:"logical=enum"`
141 Date int32 `parquet:"logical=date"`
142 Decimal1 int32 `parquet:"logical=decimal, precision=9, scale=2"`
143 Decimal2 int32 `parquet:"logical=decimal, logical.precision=9, scale=2"`
144 Decimal3 int32 `parquet:"logical=decimal, precision=5, logical.precision=9, scale=1, logical.scale=3"`
145 TimeMilliUTC int32 `parquet:"logical=TIME, logical.unit=millis"`
146 TimeMilli int32 `parquet:"logical=Time, logical.unit=millis, logical.isadjustedutc=false"`
147 TimeMicros int64 `parquet:"logical=time, logical.unit=micros, logical.isadjustedutc=false"`
148 TimeMicrosUTC int64 `parquet:"logical=time, logical.unit=micros, logical.isadjustedutc=true"`
149 TimeNanos int64 `parquet:"logical=time, logical.unit=nanos"`
150 TimestampMilli int64 `parquet:"logical=timestamp, logical.unit=millis"`
151 TimestampMicrosNotUTC int64 `parquet:"logical=timestamp, logical.unit=micros, logical.isadjustedutc=false"`
152 TimestampNanos int64 `parquet:"logical=timestamp, logical.unit=nanos"`
153 JSON string `parquet:"logical=json"`
154 BSON []byte `parquet:"logical=BSON"`
155 UUID [16]byte `parquet:"logical=uuid"`
156 Float16 [2]byte `parquet:"logical=float16"`
157 Float16Optional *[2]byte `parquet:"logical=float16"`
158 Float16Num float16.Num
159 }
160
161 sc, err := schema.NewSchemaFromStruct(LogicalTypes{})
162 if err != nil {
163 log.Fatal(err)
164 }
165
166 schema.PrintSchema(sc.Root(), os.Stdout, 2)
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191 }
192
193 func ExampleNewSchemaFromStruct_physicaltype() {
194 type ChangeTypes struct {
195 Int32 int64 `parquet:"type=int32"`
196 FixedLen string `parquet:"type=fixed_len_byte_array, length=10"`
197 SliceAsFixed []byte `parquet:"type=fixed_len_byte_array, length=12"`
198 Int int `parquet:"type=int32"`
199 }
200
201 sc, err := schema.NewSchemaFromStruct(ChangeTypes{})
202 if err != nil {
203 log.Fatal(err)
204 }
205
206 schema.PrintSchema(sc.Root(), os.Stdout, 2)
207
208
209
210
211
212
213
214
215 }
216
217 func ExampleNewSchemaFromStruct_nestedtypes() {
218 type Other struct {
219 OptionalMap *map[string]*string `parquet:"valuerepetition=required, keylogical=String, valueconverted=BSON"`
220 }
221
222 type MyMap map[int32]string
223
224 type Nested struct {
225 SimpleMap map[int32]string
226 FixedLenMap map[string][]byte `parquet:"keytype=fixed_len_byte_array, keyfieldid=10, valuefieldid=11, keylength=10"`
227 DecimalMap map[int32]string `parquet:"logical=map, keyconverted=DECIMAL, keyscale=3, keyprecision=7, valuetype=fixed_len_byte_array, valuelength=4, valuelogical=decimal, valuelogical.precision=9, valuescale=2"`
228 OtherList []*Other
229 OtherRepeated []Other `parquet:"repetition=repeated"`
230 DateArray [5]int32 `parquet:"valuelogical=date, logical=list"`
231 DateMap MyMap `parquet:"keylogical=TIME, keylogical.unit=MILLIS, keylogical.isadjustedutc=false, valuelogical=enum"`
232 }
233
234 sc, err := schema.NewSchemaFromStruct(Nested{})
235 if err != nil {
236 log.Fatal(err)
237 }
238
239 schema.PrintSchema(sc.Root(), os.Stdout, 2)
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293 }
294
295 func TestStructFromSchema(t *testing.T) {
296 root, err := schema.NewGroupNode("schema", parquet.Repetitions.Repeated, schema.FieldList{
297 schema.NewBooleanNode("bool", parquet.Repetitions.Required, -1),
298 schema.NewInt32Node("int32", parquet.Repetitions.Optional, -1),
299 schema.NewInt64Node("int64", parquet.Repetitions.Repeated, -1),
300 schema.NewInt96Node("int96", parquet.Repetitions.Required, -1),
301 schema.NewFloat32Node("float", parquet.Repetitions.Required, -1),
302 schema.NewByteArrayNode("bytearray", parquet.Repetitions.Required, -1),
303 schema.NewFixedLenByteArrayNode("fixedLen", parquet.Repetitions.Required, 10, -1),
304 }, -1)
305 assert.NoError(t, err)
306
307 sc := schema.NewSchema(root)
308
309 typ, err := schema.NewStructFromSchema(sc)
310 assert.NoError(t, err)
311
312 assert.Equal(t, reflect.Struct, typ.Kind())
313 assert.Equal(t, "struct { bool bool; int32 *int32; int64 []int64; int96 parquet.Int96; float float32; bytearray parquet.ByteArray; fixedLen parquet.FixedLenByteArray }",
314 typ.String())
315 }
316
317 func TestStructFromSchemaWithNesting(t *testing.T) {
318 type Other struct {
319 List *[]*float32
320 Excluded int32 `parquet:"-"`
321 }
322
323 type Nested struct {
324 Nest []int32
325 OptionalNest []*int64
326 Mapped map[string]float32
327 Other []Other
328 Other2 Other
329 }
330
331 sc, err := schema.NewSchemaFromStruct(Nested{})
332 assert.NoError(t, err)
333
334 typ, err := schema.NewStructFromSchema(sc)
335 assert.NoError(t, err)
336 assert.Equal(t, "struct { Nest []int32; OptionalNest []*int64; Mapped map[string]float32; Other []struct { List *[]*float32 }; Other2 struct { List *[]*float32 } }",
337 typ.String())
338 }
339
340 func TestStructFromSchemaBackwardsCompatList(t *testing.T) {
341 tests := []struct {
342 name string
343 n schema.Node
344 expected string
345 }{
346 {"proper list", schema.MustGroup(schema.NewGroupNodeLogical("my_list", parquet.Repetitions.Required,
347 schema.FieldList{
348 schema.MustGroup(schema.NewGroupNode("list", parquet.Repetitions.Repeated, schema.FieldList{schema.NewBooleanNode("element", parquet.Repetitions.Optional, -1)}, -1)),
349 }, schema.NewListLogicalType(), -1)), "struct { my_list []*bool }"},
350 {"backward nullable list nonnull ints", schema.MustGroup(schema.NewGroupNodeLogical("my_list", parquet.Repetitions.Optional, schema.FieldList{
351 schema.NewInt32Node("element", parquet.Repetitions.Repeated, -1),
352 }, schema.NewListLogicalType(), -1)), "struct { my_list *[]int32 }"},
353 {"backward nullable list tuple string int", schema.MustGroup(schema.NewGroupNodeLogical("my_list", parquet.Repetitions.Optional, schema.FieldList{
354 schema.MustGroup(schema.NewGroupNode("element", parquet.Repetitions.Repeated, schema.FieldList{
355 schema.MustPrimitive(schema.NewPrimitiveNodeLogical("str", parquet.Repetitions.Required, schema.StringLogicalType{}, parquet.Types.ByteArray, 0, -1)),
356 schema.NewInt32Node("num", parquet.Repetitions.Required, -1),
357 }, -1)),
358 }, schema.NewListLogicalType(), -1)), "struct { my_list *[]struct { str string; num int32 } }"},
359 {"list tuple string", schema.MustGroup(schema.NewGroupNodeLogical("my_list", parquet.Repetitions.Required, schema.FieldList{
360 schema.MustGroup(schema.NewGroupNode("array", parquet.Repetitions.Repeated, schema.FieldList{
361 schema.NewByteArrayNode("str", parquet.Repetitions.Required, -1),
362 }, -1)),
363 }, schema.NewListLogicalType(), -1)), "struct { my_list []struct { str parquet.ByteArray } }"},
364 {"list tuple string my_list_tuple", schema.MustGroup(schema.NewGroupNodeLogical("my_list", parquet.Repetitions.Optional, schema.FieldList{
365 schema.MustGroup(schema.NewGroupNode("my_list_tuple", parquet.Repetitions.Repeated, schema.FieldList{
366 schema.MustPrimitive(schema.NewPrimitiveNodeLogical("str", parquet.Repetitions.Required, schema.StringLogicalType{}, parquet.Types.ByteArray, 0, -1)),
367 }, -1)),
368 }, schema.NewListLogicalType(), -1)), "struct { my_list *[]struct { str string } }"},
369 }
370
371 for _, tt := range tests {
372 t.Run(tt.name, func(t *testing.T) {
373 typ, err := schema.NewStructFromSchema(schema.NewSchema(schema.MustGroup(schema.NewGroupNode("schema", parquet.Repetitions.Repeated, schema.FieldList{tt.n}, -1))))
374 assert.NoError(t, err)
375 assert.Equal(t, tt.expected, typ.String())
376 })
377 }
378 }
379
380 func TestStructFromSchemaMaps(t *testing.T) {
381 tests := []struct {
382 name string
383 n schema.Node
384 expected string
385 }{
386 {"map string int", schema.MustGroup(schema.NewGroupNodeLogical("my_map", parquet.Repetitions.Required, schema.FieldList{
387 schema.MustGroup(schema.NewGroupNode("key_value", parquet.Repetitions.Repeated, schema.FieldList{
388 schema.MustPrimitive(schema.NewPrimitiveNodeLogical("key", parquet.Repetitions.Required, schema.StringLogicalType{}, parquet.Types.ByteArray, 0, -1)),
389 schema.NewInt32Node("value", parquet.Repetitions.Optional, -1),
390 }, -1)),
391 }, schema.MapLogicalType{}, -1)), "struct { my_map map[string]*int32 }"},
392 {"nullable map string, int, required values", schema.MustGroup(schema.NewGroupNodeLogical("my_map", parquet.Repetitions.Optional, schema.FieldList{
393 schema.MustGroup(schema.NewGroupNode("map", parquet.Repetitions.Repeated, schema.FieldList{
394 schema.NewByteArrayNode("str", parquet.Repetitions.Required, -1),
395 schema.NewInt32Node("num", parquet.Repetitions.Required, -1),
396 }, -1)),
397 }, schema.MapLogicalType{}, -1)), "struct { my_map *map[string]int32 }"},
398 {"map_key_value with missing value", schema.MustGroup(schema.NewGroupNodeConverted("my_map", parquet.Repetitions.Optional, schema.FieldList{
399 schema.MustGroup(schema.NewGroupNode("map", parquet.Repetitions.Repeated, schema.FieldList{
400 schema.NewByteArrayNode("key", parquet.Repetitions.Required, -1),
401 }, -1)),
402 }, schema.ConvertedTypes.MapKeyValue, -1)), "struct { my_map *map[string]bool }"},
403 }
404 for _, tt := range tests {
405 t.Run(tt.name, func(t *testing.T) {
406 typ, err := schema.NewStructFromSchema(schema.NewSchema(schema.MustGroup(schema.NewGroupNode("schema", parquet.Repetitions.Repeated, schema.FieldList{tt.n}, -1))))
407 assert.NoError(t, err)
408 assert.Equal(t, tt.expected, typ.String())
409 })
410 }
411 }
412
View as plain text