// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package schema import ( "fmt" "strings" "github.com/apache/arrow/go/v15/parquet" format "github.com/apache/arrow/go/v15/parquet/internal/gen-go/parquet" ) // Column encapsulates the information necessary to interpret primitive // column data in the context of a particular schema. We have to examine // the node structure of a column's path to the root in the schema tree // to be able to reassemble the nested structure from the repetition and // definition levels. type Column struct { pnode *PrimitiveNode // the maximum definition level in this column // if this is > 0 then either this column or a parent column must be optional. maxDefLvl int16 // the maximum repetition level in this column // if this is > 0, then either this column or a parent column must be repeated. // when the repetition level in the column data equals this value, it indicates // additional elements in the innermost list. maxRepLvl int16 } // NewColumn returns a new column object for the given node with the provided // maximum definition and repetition levels. func NewColumn(n *PrimitiveNode, maxDefinitionLvl, maxRepetitionLvl int16) *Column { return &Column{n, maxDefinitionLvl, maxRepetitionLvl} } // Name is the column's name func (c *Column) Name() string { return c.pnode.Name() } // ColumnPath returns the full path to this column from the root of the schema func (c *Column) ColumnPath() parquet.ColumnPath { return c.pnode.columnPath() } // Path is equivalent to ColumnPath().String() returning the dot-string version of the path func (c *Column) Path() string { return c.pnode.Path() } // TypeLength is -1 if not a FixedLenByteArray, otherwise it is the length of elements in the column func (c *Column) TypeLength() int { return c.pnode.TypeLength() } func (c *Column) MaxDefinitionLevel() int16 { return c.maxDefLvl } func (c *Column) MaxRepetitionLevel() int16 { return c.maxRepLvl } func (c *Column) PhysicalType() parquet.Type { return c.pnode.PhysicalType() } func (c *Column) ConvertedType() ConvertedType { return c.pnode.convertedType } func (c *Column) LogicalType() LogicalType { return c.pnode.logicalType } func (c *Column) ColumnOrder() parquet.ColumnOrder { return c.pnode.ColumnOrder } func (c *Column) String() string { var bld strings.Builder bld.WriteString("column descriptor = {\n") fmt.Fprintf(&bld, " name: %s,\n", c.Name()) fmt.Fprintf(&bld, " path: %s,\n", c.Path()) fmt.Fprintf(&bld, " physical_type: %s,\n", c.PhysicalType()) fmt.Fprintf(&bld, " converted_type: %s,\n", c.ConvertedType()) fmt.Fprintf(&bld, " logical_type: %s,\n", c.LogicalType()) fmt.Fprintf(&bld, " max_definition_level: %d,\n", c.MaxDefinitionLevel()) fmt.Fprintf(&bld, " max_repetition_level: %d,\n", c.MaxRepetitionLevel()) if c.PhysicalType() == parquet.Types.FixedLenByteArray { fmt.Fprintf(&bld, " length: %d,\n", c.TypeLength()) } if c.ConvertedType() == ConvertedTypes.Decimal { fmt.Fprintf(&bld, " precision: %d,\n scale: %d,\n", c.pnode.decimalMetaData.Precision, c.pnode.decimalMetaData.Scale) } bld.WriteString("}") return bld.String() } // Equals will return true if the rhs Column has the same Max Repetition and Definition levels // along with having the same node definition. func (c *Column) Equals(rhs *Column) bool { return c.pnode.Equals(rhs.pnode) && c.MaxRepetitionLevel() == rhs.MaxRepetitionLevel() && c.MaxDefinitionLevel() == rhs.MaxDefinitionLevel() } // SchemaNode returns the underlying Node in the schema tree for this column. func (c *Column) SchemaNode() Node { return c.pnode } // SortOrder returns the sort order of this column's statistics based on the // Logical and Converted types. func (c *Column) SortOrder() SortOrder { if c.LogicalType() != nil { return GetLogicalSortOrder(c.LogicalType(), format.Type(c.pnode.PhysicalType())) } return GetSortOrder(c.ConvertedType(), format.Type(c.pnode.PhysicalType())) }