...

Source file src/github.com/apache/arrow/go/v15/parquet/schema/schema_element_test.go

Documentation: github.com/apache/arrow/go/v15/parquet/schema

     1  // Licensed to the Apache Software Foundation (ASF) under one
     2  // or more contributor license agreements.  See the NOTICE file
     3  // distributed with this work for additional information
     4  // regarding copyright ownership.  The ASF licenses this file
     5  // to you under the Apache License, Version 2.0 (the
     6  // "License"); you may not use this file except in compliance
     7  // with the License.  You may obtain a copy of the License at
     8  //
     9  // http://www.apache.org/licenses/LICENSE-2.0
    10  //
    11  // Unless required by applicable law or agreed to in writing, software
    12  // distributed under the License is distributed on an "AS IS" BASIS,
    13  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14  // See the License for the specific language governing permissions and
    15  // limitations under the License.
    16  
    17  package schema
    18  
    19  import (
    20  	"testing"
    21  
    22  	"github.com/apache/arrow/go/v15/parquet"
    23  	format "github.com/apache/arrow/go/v15/parquet/internal/gen-go/parquet"
    24  	"github.com/stretchr/testify/assert"
    25  	"github.com/stretchr/testify/suite"
    26  )
    27  
    28  type schemaElementConstruction struct {
    29  	node            Node
    30  	element         *format.SchemaElement
    31  	name            string
    32  	expectConverted bool
    33  	converted       ConvertedType
    34  	expectLogical   bool
    35  	checkLogical    func(*format.SchemaElement) bool
    36  }
    37  
    38  type decimalSchemaElementConstruction struct {
    39  	schemaElementConstruction
    40  	precision int
    41  	scale     int
    42  }
    43  
    44  type temporalSchemaElementConstruction struct {
    45  	schemaElementConstruction
    46  	adjusted bool
    47  	unit     TimeUnitType
    48  	getUnit  func(*format.SchemaElement) *format.TimeUnit
    49  }
    50  
    51  type intSchemaElementConstruction struct {
    52  	schemaElementConstruction
    53  	width  int8
    54  	signed bool
    55  }
    56  
    57  type legacySchemaElementConstructArgs struct {
    58  	name            string
    59  	physical        parquet.Type
    60  	len             int
    61  	expectConverted bool
    62  	converted       ConvertedType
    63  	expectLogical   bool
    64  	checkLogical    func(*format.SchemaElement) bool
    65  }
    66  
    67  type schemaElementConstructArgs struct {
    68  	name            string
    69  	logical         LogicalType
    70  	physical        parquet.Type
    71  	len             int
    72  	expectConverted bool
    73  	converted       ConvertedType
    74  	expectLogical   bool
    75  	checkLogical    func(*format.SchemaElement) bool
    76  }
    77  type SchemaElementConstructionSuite struct {
    78  	suite.Suite
    79  }
    80  
    81  func (s *SchemaElementConstructionSuite) reconstruct(c schemaElementConstructArgs) *schemaElementConstruction {
    82  	ret := &schemaElementConstruction{
    83  		node:            MustPrimitive(NewPrimitiveNodeLogical(c.name, parquet.Repetitions.Required, c.logical, c.physical, c.len, -1)),
    84  		name:            c.name,
    85  		expectConverted: c.expectConverted,
    86  		converted:       c.converted,
    87  		expectLogical:   c.expectLogical,
    88  		checkLogical:    c.checkLogical,
    89  	}
    90  	ret.element = ret.node.toThrift()
    91  	return ret
    92  }
    93  
    94  func (s *SchemaElementConstructionSuite) legacyReconstruct(c legacySchemaElementConstructArgs) *schemaElementConstruction {
    95  	ret := &schemaElementConstruction{
    96  		node:            MustPrimitive(NewPrimitiveNodeConverted(c.name, parquet.Repetitions.Required, c.physical, c.converted, c.len, 0, 0, -1)),
    97  		name:            c.name,
    98  		expectConverted: c.expectConverted,
    99  		converted:       c.converted,
   100  		expectLogical:   c.expectLogical,
   101  		checkLogical:    c.checkLogical,
   102  	}
   103  	ret.element = ret.node.toThrift()
   104  	return ret
   105  }
   106  
   107  func (s *SchemaElementConstructionSuite) inspect(c *schemaElementConstruction) {
   108  	if c.expectConverted {
   109  		s.True(c.element.IsSetConvertedType())
   110  		s.Equal(c.converted, ConvertedType(*c.element.ConvertedType))
   111  	} else {
   112  		s.False(c.element.IsSetConvertedType())
   113  	}
   114  	if c.expectLogical {
   115  		s.True(c.element.IsSetLogicalType())
   116  		s.True(c.checkLogical(c.element))
   117  	} else {
   118  		s.False(c.element.IsSetLogicalType())
   119  	}
   120  }
   121  
   122  func (s *SchemaElementConstructionSuite) TestSimple() {
   123  	checkNone := func(*format.SchemaElement) bool { return true }
   124  
   125  	tests := []struct {
   126  		name   string
   127  		args   *schemaElementConstructArgs
   128  		legacy *legacySchemaElementConstructArgs
   129  	}{
   130  		{"string", &schemaElementConstructArgs{
   131  			"string", StringLogicalType{}, parquet.Types.ByteArray, -1, true, ConvertedTypes.UTF8, true,
   132  			func(e *format.SchemaElement) bool { return e.LogicalType.IsSetSTRING() },
   133  		}, nil},
   134  		{"enum", &schemaElementConstructArgs{
   135  			"enum", EnumLogicalType{}, parquet.Types.ByteArray, -1, true, ConvertedTypes.Enum, true,
   136  			func(e *format.SchemaElement) bool { return e.LogicalType.IsSetENUM() },
   137  		}, nil},
   138  		{"date", &schemaElementConstructArgs{
   139  			"date", DateLogicalType{}, parquet.Types.Int32, -1, true, ConvertedTypes.Date, true,
   140  			func(e *format.SchemaElement) bool { return e.LogicalType.IsSetDATE() },
   141  		}, nil},
   142  		{"interval", &schemaElementConstructArgs{
   143  			"interval", IntervalLogicalType{}, parquet.Types.FixedLenByteArray, 12, true, ConvertedTypes.Interval, false,
   144  			checkNone,
   145  		}, nil},
   146  		{"null", &schemaElementConstructArgs{
   147  			"null", NullLogicalType{}, parquet.Types.Double, -1, false, ConvertedTypes.NA, true,
   148  			func(e *format.SchemaElement) bool { return e.LogicalType.IsSetUNKNOWN() },
   149  		}, nil},
   150  		{"json", &schemaElementConstructArgs{
   151  			"json", JSONLogicalType{}, parquet.Types.ByteArray, -1, true, ConvertedTypes.JSON, true,
   152  			func(e *format.SchemaElement) bool { return e.LogicalType.IsSetJSON() },
   153  		}, nil},
   154  		{"bson", &schemaElementConstructArgs{
   155  			"bson", BSONLogicalType{}, parquet.Types.ByteArray, -1, true, ConvertedTypes.BSON, true,
   156  			func(e *format.SchemaElement) bool { return e.LogicalType.IsSetBSON() },
   157  		}, nil},
   158  		{"uuid", &schemaElementConstructArgs{
   159  			"uuid", UUIDLogicalType{}, parquet.Types.FixedLenByteArray, 16, false, ConvertedTypes.NA, true,
   160  			func(e *format.SchemaElement) bool { return e.LogicalType.IsSetUUID() },
   161  		}, nil},
   162  		{"float16", &schemaElementConstructArgs{
   163  			"float16", Float16LogicalType{}, parquet.Types.FixedLenByteArray, 2, false, ConvertedTypes.NA, true,
   164  			func(e *format.SchemaElement) bool { return e.LogicalType.IsSetFLOAT16() },
   165  		}, nil},
   166  		{"none", &schemaElementConstructArgs{
   167  			"none", NoLogicalType{}, parquet.Types.Int64, -1, false, ConvertedTypes.NA, false,
   168  			checkNone,
   169  		}, nil},
   170  		{"unknown", &schemaElementConstructArgs{
   171  			"unknown", UnknownLogicalType{}, parquet.Types.Int64, -1, true, ConvertedTypes.NA, false,
   172  			checkNone,
   173  		}, nil},
   174  		{"timestamp_ms", nil, &legacySchemaElementConstructArgs{
   175  			"timestamp_ms", parquet.Types.Int64, -1, true, ConvertedTypes.TimestampMillis, false, checkNone}},
   176  		{"timestamp_us", nil, &legacySchemaElementConstructArgs{
   177  			"timestamp_us", parquet.Types.Int64, -1, true, ConvertedTypes.TimestampMicros, false, checkNone}},
   178  	}
   179  	for _, tt := range tests {
   180  		s.Run(tt.name, func() {
   181  			var sc *schemaElementConstruction
   182  			if tt.args != nil {
   183  				sc = s.reconstruct(*tt.args)
   184  			} else {
   185  				sc = s.legacyReconstruct(*tt.legacy)
   186  			}
   187  			s.Equal(tt.name, sc.element.Name)
   188  			s.inspect(sc)
   189  		})
   190  	}
   191  }
   192  
   193  func (s *SchemaElementConstructionSuite) reconstructDecimal(c schemaElementConstructArgs) *decimalSchemaElementConstruction {
   194  	ret := s.reconstruct(c)
   195  	dec := c.logical.(*DecimalLogicalType)
   196  	return &decimalSchemaElementConstruction{*ret, int(dec.Precision()), int(dec.Scale())}
   197  }
   198  
   199  func (s *SchemaElementConstructionSuite) inspectDecimal(d *decimalSchemaElementConstruction) {
   200  	s.inspect(&d.schemaElementConstruction)
   201  	s.EqualValues(d.precision, d.element.GetPrecision())
   202  	s.EqualValues(d.scale, d.element.GetScale())
   203  	s.EqualValues(d.precision, d.element.LogicalType.DECIMAL.Precision)
   204  	s.EqualValues(d.scale, d.element.LogicalType.DECIMAL.Scale)
   205  }
   206  
   207  func (s *SchemaElementConstructionSuite) TestDecimal() {
   208  	checkDecimal := func(p *format.SchemaElement) bool { return p.LogicalType.IsSetDECIMAL() }
   209  
   210  	tests := []schemaElementConstructArgs{
   211  		{
   212  			name: "decimal16_6", logical: NewDecimalLogicalType(16 /* precision */, 6 /* scale */),
   213  			physical: parquet.Types.Int64, len: -1, expectConverted: true, converted: ConvertedTypes.Decimal,
   214  			expectLogical: true, checkLogical: checkDecimal,
   215  		},
   216  		{
   217  			name: "decimal1_0", logical: NewDecimalLogicalType(1 /* precision */, 0 /* scale */),
   218  			physical: parquet.Types.Int32, len: -1, expectConverted: true, converted: ConvertedTypes.Decimal,
   219  			expectLogical: true, checkLogical: checkDecimal,
   220  		},
   221  		{
   222  			name: "decimal10", logical: NewDecimalLogicalType(10 /* precision */, 0 /* scale */),
   223  			physical: parquet.Types.Int64, len: -1, expectConverted: true, converted: ConvertedTypes.Decimal,
   224  			expectLogical: true, checkLogical: checkDecimal,
   225  		},
   226  		{
   227  			name: "decimal11_11", logical: NewDecimalLogicalType(11 /* precision */, 11 /* scale */),
   228  			physical: parquet.Types.Int64, len: -1, expectConverted: true, converted: ConvertedTypes.Decimal,
   229  			expectLogical: true, checkLogical: checkDecimal,
   230  		},
   231  	}
   232  	for _, tt := range tests {
   233  		s.Run(tt.name, func() {
   234  			d := s.reconstructDecimal(tt)
   235  			s.Equal(tt.name, d.element.Name)
   236  			s.inspectDecimal(d)
   237  		})
   238  	}
   239  }
   240  
   241  func (s *SchemaElementConstructionSuite) reconstructTemporal(c schemaElementConstructArgs, getUnit func(*format.SchemaElement) *format.TimeUnit) *temporalSchemaElementConstruction {
   242  	base := s.reconstruct(c)
   243  	t := c.logical.(TemporalLogicalType)
   244  	return &temporalSchemaElementConstruction{
   245  		*base,
   246  		t.IsAdjustedToUTC(),
   247  		t.TimeUnit(),
   248  		getUnit,
   249  	}
   250  }
   251  
   252  func (s *SchemaElementConstructionSuite) inspectTemporal(t *temporalSchemaElementConstruction) {
   253  	s.inspect(&t.schemaElementConstruction)
   254  	switch t.unit {
   255  	case TimeUnitMillis:
   256  		s.True(t.getUnit(t.element).IsSetMILLIS())
   257  	case TimeUnitMicros:
   258  		s.True(t.getUnit(t.element).IsSetMICROS())
   259  	case TimeUnitNanos:
   260  		s.True(t.getUnit(t.element).IsSetNANOS())
   261  	case TimeUnitUnknown:
   262  		fallthrough
   263  	default:
   264  		s.Fail("invalid time unit in test case")
   265  	}
   266  }
   267  
   268  func (s *SchemaElementConstructionSuite) TestTemporal() {
   269  	checkTime := func(p *format.SchemaElement) bool {
   270  		return p.LogicalType.IsSetTIME()
   271  	}
   272  	checkTimestamp := func(p *format.SchemaElement) bool {
   273  		return p.LogicalType.IsSetTIMESTAMP()
   274  	}
   275  
   276  	getTimeUnit := func(p *format.SchemaElement) *format.TimeUnit {
   277  		return p.LogicalType.TIME.Unit
   278  	}
   279  	getTimestampUnit := func(p *format.SchemaElement) *format.TimeUnit {
   280  		return p.LogicalType.TIMESTAMP.Unit
   281  	}
   282  
   283  	timeTests := []schemaElementConstructArgs{
   284  		{
   285  			name: "time_T_ms", logical: NewTimeLogicalType(true, TimeUnitMillis), physical: parquet.Types.Int32, len: -1,
   286  			expectConverted: true, converted: ConvertedTypes.TimeMillis, expectLogical: true, checkLogical: checkTime,
   287  		},
   288  		{
   289  			name: "time_F_ms", logical: NewTimeLogicalType(false, TimeUnitMillis), physical: parquet.Types.Int32, len: -1,
   290  			expectConverted: false, converted: ConvertedTypes.NA, expectLogical: true, checkLogical: checkTime,
   291  		},
   292  		{
   293  			name: "time_T_us", logical: NewTimeLogicalType(true, TimeUnitMicros), physical: parquet.Types.Int64, len: -1,
   294  			expectConverted: true, converted: ConvertedTypes.TimeMicros, expectLogical: true, checkLogical: checkTime,
   295  		},
   296  		{
   297  			name: "time_F_us", logical: NewTimeLogicalType(false, TimeUnitMicros), physical: parquet.Types.Int64, len: -1,
   298  			expectConverted: false, converted: ConvertedTypes.NA, expectLogical: true, checkLogical: checkTime,
   299  		},
   300  		{
   301  			name: "time_T_ns", logical: NewTimeLogicalType(true, TimeUnitNanos), physical: parquet.Types.Int64, len: -1,
   302  			expectConverted: false, converted: ConvertedTypes.NA, expectLogical: true, checkLogical: checkTime,
   303  		},
   304  		{
   305  			name: "time_F_ns", logical: NewTimeLogicalType(false, TimeUnitNanos), physical: parquet.Types.Int64, len: -1,
   306  			expectConverted: false, converted: ConvertedTypes.NA, expectLogical: true, checkLogical: checkTime,
   307  		},
   308  	}
   309  	timeStampTests := []schemaElementConstructArgs{
   310  		{
   311  			name: "timestamp_T_ms", logical: NewTimestampLogicalType(true, TimeUnitMillis), physical: parquet.Types.Int64, len: -1,
   312  			expectConverted: true, converted: ConvertedTypes.TimestampMillis, expectLogical: true, checkLogical: checkTimestamp,
   313  		},
   314  		{
   315  			name: "timestamp_F_ms", logical: NewTimestampLogicalType(false, TimeUnitMillis), physical: parquet.Types.Int64, len: -1,
   316  			expectConverted: false, converted: ConvertedTypes.NA, expectLogical: true, checkLogical: checkTimestamp,
   317  		},
   318  		{
   319  			name: "timestamp_F_ms_force", logical: NewTimestampLogicalTypeForce(false, TimeUnitMillis), physical: parquet.Types.Int64, len: -1,
   320  			expectConverted: true, converted: ConvertedTypes.TimestampMillis, expectLogical: true, checkLogical: checkTimestamp,
   321  		},
   322  		{
   323  			name: "timestamp_T_us", logical: NewTimestampLogicalType(true, TimeUnitMicros), physical: parquet.Types.Int64, len: -1,
   324  			expectConverted: true, converted: ConvertedTypes.TimestampMicros, expectLogical: true, checkLogical: checkTimestamp,
   325  		},
   326  		{
   327  			name: "timestamp_F_us", logical: NewTimestampLogicalType(false, TimeUnitMicros), physical: parquet.Types.Int64, len: -1,
   328  			expectConverted: false, converted: ConvertedTypes.NA, expectLogical: true, checkLogical: checkTimestamp,
   329  		},
   330  		{
   331  			name: "timestamp_F_us_force", logical: NewTimestampLogicalTypeForce(false, TimeUnitMicros), physical: parquet.Types.Int64, len: -1,
   332  			expectConverted: true, converted: ConvertedTypes.TimestampMicros, expectLogical: true, checkLogical: checkTimestamp,
   333  		},
   334  		{
   335  			name: "timestamp_T_ns", logical: NewTimestampLogicalType(true, TimeUnitNanos), physical: parquet.Types.Int64, len: -1,
   336  			expectConverted: false, converted: ConvertedTypes.NA, expectLogical: true, checkLogical: checkTimestamp,
   337  		},
   338  		{
   339  			name: "timestamp_F_ns", logical: NewTimestampLogicalType(false, TimeUnitNanos), physical: parquet.Types.Int64, len: -1,
   340  			expectConverted: false, converted: ConvertedTypes.NA, expectLogical: true, checkLogical: checkTimestamp,
   341  		},
   342  	}
   343  
   344  	for _, tt := range timeTests {
   345  		s.Run(tt.name, func() {
   346  			t := s.reconstructTemporal(tt, getTimeUnit)
   347  			s.Equal(t.adjusted, t.element.LogicalType.TIME.IsAdjustedToUTC)
   348  			s.inspectTemporal(t)
   349  		})
   350  	}
   351  	for _, tt := range timeStampTests {
   352  		s.Run(tt.name, func() {
   353  			t := s.reconstructTemporal(tt, getTimestampUnit)
   354  			s.Equal(t.adjusted, t.element.LogicalType.TIMESTAMP.IsAdjustedToUTC)
   355  			s.inspectTemporal(t)
   356  		})
   357  	}
   358  }
   359  
   360  func (s *SchemaElementConstructionSuite) reconstructInteger(c schemaElementConstructArgs) *intSchemaElementConstruction {
   361  	base := s.reconstruct(c)
   362  	l := c.logical.(*IntLogicalType)
   363  	return &intSchemaElementConstruction{
   364  		*base,
   365  		l.BitWidth(),
   366  		l.IsSigned(),
   367  	}
   368  }
   369  
   370  func (s *SchemaElementConstructionSuite) inspectInt(i *intSchemaElementConstruction) {
   371  	s.inspect(&i.schemaElementConstruction)
   372  	s.Equal(i.width, i.element.LogicalType.INTEGER.BitWidth)
   373  	s.Equal(i.signed, i.element.LogicalType.INTEGER.IsSigned)
   374  }
   375  
   376  func (s *SchemaElementConstructionSuite) TestIntegerCases() {
   377  	checkInt := func(p *format.SchemaElement) bool { return p.LogicalType.IsSetINTEGER() }
   378  
   379  	tests := []schemaElementConstructArgs{
   380  		{
   381  			name: "uint8", logical: NewIntLogicalType(8, false), physical: parquet.Types.Int32, len: -1,
   382  			expectConverted: true, converted: ConvertedTypes.Uint8, expectLogical: true, checkLogical: checkInt,
   383  		},
   384  		{
   385  			name: "uint16", logical: NewIntLogicalType(16, false), physical: parquet.Types.Int32, len: -1,
   386  			expectConverted: true, converted: ConvertedTypes.Uint16, expectLogical: true, checkLogical: checkInt,
   387  		},
   388  		{
   389  			name: "uint32", logical: NewIntLogicalType(32, false), physical: parquet.Types.Int32, len: -1,
   390  			expectConverted: true, converted: ConvertedTypes.Uint32, expectLogical: true, checkLogical: checkInt,
   391  		},
   392  		{
   393  			name: "uint64", logical: NewIntLogicalType(64, false), physical: parquet.Types.Int64, len: -1,
   394  			expectConverted: true, converted: ConvertedTypes.Uint64, expectLogical: true, checkLogical: checkInt,
   395  		},
   396  		{
   397  			name: "int8", logical: NewIntLogicalType(8, true), physical: parquet.Types.Int32, len: -1,
   398  			expectConverted: true, converted: ConvertedTypes.Int8, expectLogical: true, checkLogical: checkInt,
   399  		},
   400  		{
   401  			name: "int16", logical: NewIntLogicalType(16, true), physical: parquet.Types.Int32, len: -1,
   402  			expectConverted: true, converted: ConvertedTypes.Int16, expectLogical: true, checkLogical: checkInt,
   403  		},
   404  		{
   405  			name: "int32", logical: NewIntLogicalType(32, true), physical: parquet.Types.Int32, len: -1,
   406  			expectConverted: true, converted: ConvertedTypes.Int32, expectLogical: true, checkLogical: checkInt,
   407  		},
   408  		{
   409  			name: "int64", logical: NewIntLogicalType(64, true), physical: parquet.Types.Int64, len: -1,
   410  			expectConverted: true, converted: ConvertedTypes.Int64, expectLogical: true, checkLogical: checkInt,
   411  		},
   412  	}
   413  	for _, tt := range tests {
   414  		s.Run(tt.name, func() {
   415  			t := s.reconstructInteger(tt)
   416  			s.inspectInt(t)
   417  		})
   418  	}
   419  }
   420  
   421  func TestSchemaElementNestedSerialization(t *testing.T) {
   422  	// confirm that the intermediate thrift objects created during node serialization
   423  	// contain correct ConvertedType and ConvertedType information
   424  
   425  	strNode := MustPrimitive(NewPrimitiveNodeLogical("string" /*name */, parquet.Repetitions.Required, StringLogicalType{}, parquet.Types.ByteArray, -1 /* type len */, -1 /* fieldID */))
   426  	dateNode := MustPrimitive(NewPrimitiveNodeLogical("date" /*name */, parquet.Repetitions.Required, DateLogicalType{}, parquet.Types.Int32, -1 /* type len */, -1 /* fieldID */))
   427  	jsonNode := MustPrimitive(NewPrimitiveNodeLogical("json" /*name */, parquet.Repetitions.Required, JSONLogicalType{}, parquet.Types.ByteArray, -1 /* type len */, -1 /* fieldID */))
   428  	uuidNode := MustPrimitive(NewPrimitiveNodeLogical("uuid" /*name */, parquet.Repetitions.Required, UUIDLogicalType{}, parquet.Types.FixedLenByteArray, 16 /* type len */, - /* fieldID */ 1))
   429  	timestampNode := MustPrimitive(NewPrimitiveNodeLogical("timestamp" /*name */, parquet.Repetitions.Required, NewTimestampLogicalType(false /* adjustedToUTC */, TimeUnitNanos), parquet.Types.Int64, -1 /* type len */, -1 /* fieldID */))
   430  	intNode := MustPrimitive(NewPrimitiveNodeLogical("int" /*name */, parquet.Repetitions.Required, NewIntLogicalType(64 /* bitWidth */, false /* signed */), parquet.Types.Int64, -1 /* type len */, -1 /* fieldID */))
   431  	decimalNode := MustPrimitive(NewPrimitiveNodeLogical("decimal" /*name */, parquet.Repetitions.Required, NewDecimalLogicalType(16 /* precision */, 6 /* scale */), parquet.Types.Int64, -1 /* type len */, -1 /* fieldID */))
   432  	float16Node := MustPrimitive(NewPrimitiveNodeLogical("float16" /*name */, parquet.Repetitions.Required, Float16LogicalType{}, parquet.Types.FixedLenByteArray, 2 /* type len */, - /* fieldID */ 1))
   433  	listNode := MustGroup(NewGroupNodeLogical("list" /*name */, parquet.Repetitions.Repeated, []Node{strNode, dateNode, jsonNode, uuidNode, timestampNode, intNode, decimalNode, float16Node}, NewListLogicalType(), -1 /* fieldID */))
   434  
   435  	listElems := ToThrift(listNode)
   436  	assert.Equal(t, "list", listElems[0].Name)
   437  	assert.True(t, listElems[0].IsSetConvertedType())
   438  	assert.True(t, listElems[0].IsSetLogicalType())
   439  	assert.Equal(t, format.ConvertedType(ConvertedTypes.List), listElems[0].GetConvertedType())
   440  	assert.True(t, listElems[0].LogicalType.IsSetLIST())
   441  	assert.True(t, listElems[1].LogicalType.IsSetSTRING())
   442  	assert.True(t, listElems[2].LogicalType.IsSetDATE())
   443  	assert.True(t, listElems[3].LogicalType.IsSetJSON())
   444  	assert.True(t, listElems[4].LogicalType.IsSetUUID())
   445  	assert.True(t, listElems[5].LogicalType.IsSetTIMESTAMP())
   446  	assert.True(t, listElems[6].LogicalType.IsSetINTEGER())
   447  	assert.True(t, listElems[7].LogicalType.IsSetDECIMAL())
   448  	assert.True(t, listElems[8].LogicalType.IsSetFLOAT16())
   449  
   450  	mapNode := MustGroup(NewGroupNodeLogical("map" /* name */, parquet.Repetitions.Required, []Node{}, MapLogicalType{}, -1 /* fieldID */))
   451  	mapElems := ToThrift(mapNode)
   452  	assert.Equal(t, "map", mapElems[0].Name)
   453  	assert.True(t, mapElems[0].IsSetConvertedType())
   454  	assert.True(t, mapElems[0].IsSetLogicalType())
   455  	assert.Equal(t, format.ConvertedType(ConvertedTypes.Map), mapElems[0].GetConvertedType())
   456  	assert.True(t, mapElems[0].LogicalType.IsSetMAP())
   457  }
   458  
   459  func TestLogicalTypeSerializationRoundTrip(t *testing.T) {
   460  	tests := []struct {
   461  		name     string
   462  		logical  LogicalType
   463  		physical parquet.Type
   464  		len      int
   465  	}{
   466  		{"string", StringLogicalType{}, parquet.Types.ByteArray, -1},
   467  		{"enum", EnumLogicalType{}, parquet.Types.ByteArray, -1},
   468  		{"decimal", NewDecimalLogicalType(16, 6), parquet.Types.Int64, -1},
   469  		{"date", DateLogicalType{}, parquet.Types.Int32, -1},
   470  		{"time_T_ms", NewTimeLogicalType(true, TimeUnitMillis), parquet.Types.Int32, -1},
   471  		{"time_T_us", NewTimeLogicalType(true, TimeUnitMicros), parquet.Types.Int64, -1},
   472  		{"time_T_ns", NewTimeLogicalType(true, TimeUnitNanos), parquet.Types.Int64, -1},
   473  		{"time_F_ms", NewTimeLogicalType(false, TimeUnitMillis), parquet.Types.Int32, -1},
   474  		{"time_F_us", NewTimeLogicalType(false, TimeUnitMicros), parquet.Types.Int64, -1},
   475  		{"time_F_ns", NewTimeLogicalType(false, TimeUnitNanos), parquet.Types.Int64, -1},
   476  		{"timestamp_T_ms", NewTimestampLogicalType(true, TimeUnitMillis), parquet.Types.Int64, -1},
   477  		{"timestamp_T_us", NewTimestampLogicalType(true, TimeUnitMicros), parquet.Types.Int64, -1},
   478  		{"timestamp_T_ns", NewTimestampLogicalType(true, TimeUnitNanos), parquet.Types.Int64, -1},
   479  		{"timestamp_F_ms", NewTimestampLogicalType(false, TimeUnitMillis), parquet.Types.Int64, -1},
   480  		{"timestamp_F_us", NewTimestampLogicalType(false, TimeUnitMicros), parquet.Types.Int64, -1},
   481  		{"timestamp_F_ns", NewTimestampLogicalType(false, TimeUnitNanos), parquet.Types.Int64, -1},
   482  		{"interval", IntervalLogicalType{}, parquet.Types.FixedLenByteArray, 12},
   483  		{"uint8", NewIntLogicalType(8, false), parquet.Types.Int32, -1},
   484  		{"uint16", NewIntLogicalType(16, false), parquet.Types.Int32, -1},
   485  		{"uint32", NewIntLogicalType(32, false), parquet.Types.Int32, -1},
   486  		{"uint64", NewIntLogicalType(64, false), parquet.Types.Int64, -1},
   487  		{"int8", NewIntLogicalType(8, true), parquet.Types.Int32, -1},
   488  		{"int16", NewIntLogicalType(16, true), parquet.Types.Int32, -1},
   489  		{"int32", NewIntLogicalType(32, true), parquet.Types.Int32, -1},
   490  		{"int64", NewIntLogicalType(64, true), parquet.Types.Int64, -1},
   491  		{"null", NullLogicalType{}, parquet.Types.Boolean, -1},
   492  		{"json", JSONLogicalType{}, parquet.Types.ByteArray, -1},
   493  		{"bson", BSONLogicalType{}, parquet.Types.ByteArray, -1},
   494  		{"uuid", UUIDLogicalType{}, parquet.Types.FixedLenByteArray, 16},
   495  		{"float16", Float16LogicalType{}, parquet.Types.FixedLenByteArray, 2},
   496  		{"none", NoLogicalType{}, parquet.Types.Boolean, -1},
   497  	}
   498  
   499  	for _, tt := range tests {
   500  		t.Run(tt.name, func(t *testing.T) {
   501  			n := MustPrimitive(NewPrimitiveNodeLogical("something" /* name */, parquet.Repetitions.Required, tt.logical, tt.physical, tt.len, -1 /* fieldID */))
   502  			elem := n.toThrift()
   503  			recover := MustPrimitive(PrimitiveNodeFromThrift(elem))
   504  			assert.True(t, n.Equals(recover))
   505  		})
   506  	}
   507  
   508  	n := MustGroup(NewGroupNodeLogical("map" /* name */, parquet.Repetitions.Required, []Node{}, MapLogicalType{}, -1 /* fieldID */))
   509  	elem := n.toThrift()
   510  	recover := MustGroup(GroupNodeFromThrift(elem, []Node{}))
   511  	assert.True(t, recover.Equals(n))
   512  
   513  	n = MustGroup(NewGroupNodeLogical("list" /* name */, parquet.Repetitions.Required, []Node{}, ListLogicalType{}, -1 /* fieldID */))
   514  	elem = n.toThrift()
   515  	recover = MustGroup(GroupNodeFromThrift(elem, []Node{}))
   516  	assert.True(t, recover.Equals(n))
   517  }
   518  
   519  func TestSchemaElementConstruction(t *testing.T) {
   520  	suite.Run(t, new(SchemaElementConstructionSuite))
   521  }
   522  

View as plain text