var ( // ConvertedTypes is a struct containing the constants for the types // to make it easy to reference them while making it clear what they are ConvertedTypes = struct { None ConvertedType UTF8 ConvertedType Map ConvertedType MapKeyValue ConvertedType List ConvertedType Enum ConvertedType Decimal ConvertedType Date ConvertedType TimeMillis ConvertedType TimeMicros ConvertedType TimestampMillis ConvertedType TimestampMicros ConvertedType Uint8 ConvertedType Uint16 ConvertedType Uint32 ConvertedType Uint64 ConvertedType Int8 ConvertedType Int16 ConvertedType Int32 ConvertedType Int64 ConvertedType JSON ConvertedType BSON ConvertedType Interval ConvertedType NA ConvertedType }{ None: -1, UTF8: ConvertedType(format.ConvertedType_UTF8), Map: ConvertedType(format.ConvertedType_MAP), MapKeyValue: ConvertedType(format.ConvertedType_MAP_KEY_VALUE), List: ConvertedType(format.ConvertedType_LIST), Enum: ConvertedType(format.ConvertedType_ENUM), Decimal: ConvertedType(format.ConvertedType_DECIMAL), Date: ConvertedType(format.ConvertedType_DATE), TimeMillis: ConvertedType(format.ConvertedType_TIME_MILLIS), TimeMicros: ConvertedType(format.ConvertedType_TIME_MICROS), TimestampMillis: ConvertedType(format.ConvertedType_TIMESTAMP_MILLIS), TimestampMicros: ConvertedType(format.ConvertedType_TIMESTAMP_MICROS), Uint8: ConvertedType(format.ConvertedType_UINT_8), Uint16: ConvertedType(format.ConvertedType_UINT_16), Uint32: ConvertedType(format.ConvertedType_UINT_32), Uint64: ConvertedType(format.ConvertedType_UINT_64), Int8: ConvertedType(format.ConvertedType_INT_8), Int16: ConvertedType(format.ConvertedType_INT_16), Int32: ConvertedType(format.ConvertedType_INT_32), Int64: ConvertedType(format.ConvertedType_INT_64), JSON: ConvertedType(format.ConvertedType_JSON), BSON: ConvertedType(format.ConvertedType_BSON), Interval: ConvertedType(format.ConvertedType_INTERVAL), NA: 24, } )
func ColumnPathFromNode(n Node) parquet.ColumnPath
ColumnPathFromNode walks the parents of the given node to construct it's column path
func NewStructFromSchema(sc *Schema) (t reflect.Type, err error)
NewStructFromSchema generates a struct type as a reflect.Type from the schema by using the appropriate physical types and making things either pointers or slices based on whether they are repeated/optional/required. It does not use the logical or converted types to change the physical storage so that it is more efficient to use the resulting type for reading without having to do conversions.
It will use maps for map types and slices for list types, but otherwise ignores the converted and logical types of the nodes. Group nodes that are not List or Map will be nested structs.
func PrintSchema(n Node, w io.Writer, indentWidth int)
PrintSchema writes a string representation of the tree to w using the indent width provided.
▹ Example
func ToThrift(schema *GroupNode) []*format.SchemaElement
ToThrift converts a GroupNode to a slice of SchemaElements which is used for thrift serialization.
BSONLogicalType represents a binary JSON string in the byte array
type BSONLogicalType struct {
// contains filtered or unexported fields
}
func (BSONLogicalType) Equals(rhs LogicalType) bool
func (BSONLogicalType) IsApplicable(t parquet.Type, _ int32) bool
func (BSONLogicalType) IsCompatible(c ConvertedType, dec DecimalMetadata) bool
func (BSONLogicalType) IsNested() bool
func (BSONLogicalType) IsNone() bool
func (BSONLogicalType) IsSerialized() bool
func (BSONLogicalType) IsValid() bool
func (BSONLogicalType) MarshalJSON() ([]byte, error)
func (BSONLogicalType) SortOrder() SortOrder
func (BSONLogicalType) String() string
func (BSONLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata)
Column encapsulates the information necessary to interpret primitive column data in the context of a particular schema. We have to examine the node structure of a column's path to the root in the schema tree to be able to reassemble the nested structure from the repetition and definition levels.
type Column struct {
// contains filtered or unexported fields
}
func NewColumn(n *PrimitiveNode, maxDefinitionLvl, maxRepetitionLvl int16) *Column
NewColumn returns a new column object for the given node with the provided maximum definition and repetition levels.
func (c *Column) ColumnOrder() parquet.ColumnOrder
func (c *Column) ColumnPath() parquet.ColumnPath
ColumnPath returns the full path to this column from the root of the schema
func (c *Column) ConvertedType() ConvertedType
func (c *Column) Equals(rhs *Column) bool
Equals will return true if the rhs Column has the same Max Repetition and Definition levels along with having the same node definition.
func (c *Column) LogicalType() LogicalType
func (c *Column) MaxDefinitionLevel() int16
func (c *Column) MaxRepetitionLevel() int16
func (c *Column) Name() string
Name is the column's name
func (c *Column) Path() string
Path is equivalent to ColumnPath().String() returning the dot-string version of the path
func (c *Column) PhysicalType() parquet.Type
func (c *Column) SchemaNode() Node
SchemaNode returns the underlying Node in the schema tree for this column.
func (c *Column) SortOrder() SortOrder
SortOrder returns the sort order of this column's statistics based on the Logical and Converted types.
func (c *Column) String() string
func (c *Column) TypeLength() int
TypeLength is -1 if not a FixedLenByteArray, otherwise it is the length of elements in the column
ConvertedType corresponds to the ConvertedType in the parquet.Thrift, with added values of None and NA for handling when these values are not set in the metadata
type ConvertedType format.ConvertedType
func (p ConvertedType) String() string
func (p ConvertedType) ToLogicalType(convertedDecimal DecimalMetadata) LogicalType
ToLogicalType returns the correct LogicalType for the given ConvertedType, using the decimal metadata provided to define the precision/scale if necessary
DateLogicalType is an int32 representing the number of days since the Unix Epoch 1 January 1970
type DateLogicalType struct {
// contains filtered or unexported fields
}
func (DateLogicalType) Equals(rhs LogicalType) bool
func (DateLogicalType) IsApplicable(t parquet.Type, _ int32) bool
func (DateLogicalType) IsCompatible(t ConvertedType, dec DecimalMetadata) bool
func (DateLogicalType) IsNested() bool
func (DateLogicalType) IsNone() bool
func (DateLogicalType) IsSerialized() bool
func (DateLogicalType) IsValid() bool
func (DateLogicalType) MarshalJSON() ([]byte, error)
func (DateLogicalType) SortOrder() SortOrder
func (DateLogicalType) String() string
func (DateLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata)
DecimalLogicalType is used to represent a decimal value of a given precision and scale
type DecimalLogicalType struct {
// contains filtered or unexported fields
}
func (t DecimalLogicalType) Equals(rhs LogicalType) bool
func (t DecimalLogicalType) IsApplicable(typ parquet.Type, tlen int32) bool
func (t DecimalLogicalType) IsCompatible(c ConvertedType, dec DecimalMetadata) bool
func (DecimalLogicalType) IsNested() bool
func (DecimalLogicalType) IsNone() bool
func (DecimalLogicalType) IsSerialized() bool
func (DecimalLogicalType) IsValid() bool
func (t DecimalLogicalType) MarshalJSON() ([]byte, error)
func (t DecimalLogicalType) Precision() int32
func (t DecimalLogicalType) Scale() int32
func (DecimalLogicalType) SortOrder() SortOrder
func (t DecimalLogicalType) String() string
func (t DecimalLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata)
DecimalMetadata is a struct for managing scale and precision information between converted and logical types.
type DecimalMetadata struct { IsSet bool Scale int32 Precision int32 }
EnumLogicalType is for representing an enum, which should be a byte array type
type EnumLogicalType struct {
// contains filtered or unexported fields
}
func (EnumLogicalType) Equals(rhs LogicalType) bool
func (EnumLogicalType) IsApplicable(t parquet.Type, _ int32) bool
func (EnumLogicalType) IsCompatible(t ConvertedType, dec DecimalMetadata) bool
func (EnumLogicalType) IsNested() bool
func (EnumLogicalType) IsNone() bool
func (EnumLogicalType) IsSerialized() bool
func (EnumLogicalType) IsValid() bool
func (EnumLogicalType) MarshalJSON() ([]byte, error)
func (EnumLogicalType) SortOrder() SortOrder
func (EnumLogicalType) String() string
func (EnumLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata)
FieldList is an alias for a slice of Nodes
type FieldList []Node
func (f FieldList) Len() int
Len is equivalent to len(fieldlist)
Float16LogicalType can only be used with a FixedLength byte array column that is exactly 2 bytes long
type Float16LogicalType struct {
// contains filtered or unexported fields
}
func (Float16LogicalType) Equals(rhs LogicalType) bool
func (Float16LogicalType) IsApplicable(t parquet.Type, tlen int32) bool
func (Float16LogicalType) IsCompatible(c ConvertedType, dec DecimalMetadata) bool
func (Float16LogicalType) IsNested() bool
func (Float16LogicalType) IsNone() bool
func (Float16LogicalType) IsSerialized() bool
func (Float16LogicalType) IsValid() bool
func (Float16LogicalType) MarshalJSON() ([]byte, error)
func (Float16LogicalType) SortOrder() SortOrder
func (Float16LogicalType) String() string
func (Float16LogicalType) ToConvertedType() (ConvertedType, DecimalMetadata)
GroupNode is for managing nested nodes like List, Map, etc.
type GroupNode struct {
// contains filtered or unexported fields
}
func GroupNodeFromThrift(elem *format.SchemaElement, fields FieldList) (*GroupNode, error)
func ListOf(n Node, rep parquet.Repetition, fieldID int32) (*GroupNode, error)
ListOf is a convenience helper function to create a properly structured list structure according to the Parquet Spec.
<list-repetition> group <name> (LIST) { repeated group list { <element-repetition> <element-type> element; } }
<list-repetition> can only be optional or required. <element-repetition> can only be optional or required.
func ListOfWithName(listName string, element Node, rep parquet.Repetition, fieldID int32) (*GroupNode, error)
ListOf is a convenience helper function to create a properly structured list structure according to the Parquet Spec.
<list-repetition> group <name> (LIST) { repeated group list { <element-repetition> <element-type> element; } }
<list-repetition> can only be optional or required. <element-repetition> can only be optional or required.
func MapOf(name string, key Node, value Node, mapRep parquet.Repetition, fieldID int32) (*GroupNode, error)
MapOf is a convenience helper function to create a properly structured parquet map node setup according to the Parquet Spec.
<map-repetition> group <name> (MAP) { repeated group key_value { required <key-type> key; <value-repetition> <value-type> value; } }
key node will be renamed to "key", value node if not nil will be renamed to "value"
<map-repetition> must be only optional or required. panics if repeated is passed.
the key node *must* be required repetition. panics if optional or repeated
value node can be nil (omitted) or have a repetition of required or optional *only*.
func MustGroup(n Node, err error) *GroupNode
MustGroup is like Must, except it casts the node to a *GroupNode, which will panic if it is a primitive node.
func NewGroupNode(name string, repetition parquet.Repetition, fields FieldList, fieldID int32) (*GroupNode, error)
NewGroupNode constructs a new group node with the provided fields, but with converted type None and No Logical Type
func NewGroupNodeConverted(name string, repetition parquet.Repetition, fields FieldList, converted ConvertedType, id int32) (n *GroupNode, err error)
NewGroupNodeConverted constructs a group node with the provided fields and converted type, determining the logical type from that converted type.
func NewGroupNodeLogical(name string, repetition parquet.Repetition, fields FieldList, logical LogicalType, id int32) (n *GroupNode, err error)
NewGroupNodeLogical constructs a group node with the provided fields and logical type, determining the converted type from the provided logical type.
func (n *GroupNode) ConvertedType() ConvertedType
func (g *GroupNode) Equals(rhs Node) bool
Equals will compare this node to the provided node and only return true if this node and all of it's children are the same as the passed in node and its children.
func (g *GroupNode) Field(i int) Node
Field returns the node in the field list which is of the provided (0-based) index
func (n *GroupNode) FieldID() int32
func (g *GroupNode) FieldIndexByField(n Node) int
FieldIndexByField looks up the index child of this node. Returns -1 if n isn't a child of this group
func (g *GroupNode) FieldIndexByName(name string) int
FieldIndexByName provides the index for the field of the given name. Returns -1 if not found.
If there are more than one field of this name, it returns the index for the first one.
func (g *GroupNode) HasRepeatedFields() bool
HasRepeatedFields returns true if any of the children of this node have Repeated as its repetition type.
This is recursive and will check the children of any group nodes that are children.
func (n *GroupNode) LogicalType() LogicalType
func (n *GroupNode) Name() string
func (g *GroupNode) NumFields() int
NumFields returns the number of direct child fields for this group node
func (n *GroupNode) Parent() Node
func (n *GroupNode) Path() string
func (n *GroupNode) RepetitionType() parquet.Repetition
func (n *GroupNode) SetParent(p Node)
func (n *GroupNode) Type() NodeType
func (g *GroupNode) Visit(v Visitor)
Visit is for implementing a Visitor pattern handler to walk a schema's tree. One example is the Schema Printer which walks the tree to print out the schema in order.
IntLogicalType represents an integer type of a specific bit width and is either signed or unsigned.
type IntLogicalType struct {
// contains filtered or unexported fields
}
func (t IntLogicalType) BitWidth() int8
func (t IntLogicalType) Equals(rhs LogicalType) bool
func (t IntLogicalType) IsApplicable(typ parquet.Type, _ int32) bool
func (t IntLogicalType) IsCompatible(c ConvertedType, dec DecimalMetadata) bool
func (IntLogicalType) IsNested() bool
func (IntLogicalType) IsNone() bool
func (IntLogicalType) IsSerialized() bool
func (t IntLogicalType) IsSigned() bool
func (IntLogicalType) IsValid() bool
func (t IntLogicalType) MarshalJSON() ([]byte, error)
func (t IntLogicalType) SortOrder() SortOrder
func (t IntLogicalType) String() string
func (t IntLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata)
IntervalLogicalType is not yet in the thrift spec, but represents an interval time and needs to be a fixed length byte array of 12 bytes
type IntervalLogicalType struct {
// contains filtered or unexported fields
}
func (IntervalLogicalType) Equals(rhs LogicalType) bool
func (IntervalLogicalType) IsApplicable(t parquet.Type, tlen int32) bool
func (IntervalLogicalType) IsCompatible(c ConvertedType, dec DecimalMetadata) bool
func (IntervalLogicalType) IsNested() bool
func (IntervalLogicalType) IsNone() bool
func (IntervalLogicalType) IsSerialized() bool
func (IntervalLogicalType) IsValid() bool
func (IntervalLogicalType) MarshalJSON() ([]byte, error)
func (IntervalLogicalType) SortOrder() SortOrder
func (IntervalLogicalType) String() string
func (IntervalLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata)
JSONLogicalType represents a byte array column which is to be interpreted as a JSON string.
type JSONLogicalType struct {
// contains filtered or unexported fields
}
func (JSONLogicalType) Equals(rhs LogicalType) bool
func (JSONLogicalType) IsApplicable(t parquet.Type, _ int32) bool
func (JSONLogicalType) IsCompatible(c ConvertedType, dec DecimalMetadata) bool
func (JSONLogicalType) IsNested() bool
func (JSONLogicalType) IsNone() bool
func (JSONLogicalType) IsSerialized() bool
func (JSONLogicalType) IsValid() bool
func (JSONLogicalType) MarshalJSON() ([]byte, error)
func (JSONLogicalType) SortOrder() SortOrder
func (JSONLogicalType) String() string
func (JSONLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata)
ListLogicalType is used for columns which are themselves nested lists
type ListLogicalType struct {
// contains filtered or unexported fields
}
func (ListLogicalType) Equals(rhs LogicalType) bool
func (ListLogicalType) IsApplicable(parquet.Type, int32) bool
func (ListLogicalType) IsCompatible(t ConvertedType, dec DecimalMetadata) bool
func (ListLogicalType) IsNested() bool
func (ListLogicalType) IsNone() bool
func (ListLogicalType) IsSerialized() bool
func (ListLogicalType) IsValid() bool
func (ListLogicalType) MarshalJSON() ([]byte, error)
func (ListLogicalType) SortOrder() SortOrder
func (ListLogicalType) String() string
func (ListLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata)
LogicalType is the descriptor that defines the usage of a physical primitive type in the schema, such as an Interval, Date, etc.
type LogicalType interface { // Returns true if a nested type like List or Map IsNested() bool // Returns true if this type can be serialized, ie: not Unknown/NoType/Interval IsSerialized() bool // Returns true if not NoLogicalType IsValid() bool // Returns true if it is NoType IsNone() bool // returns a string representation of the Logical Type String() string // Return the equivalent ConvertedType for legacy Parquet systems ToConvertedType() (ConvertedType, DecimalMetadata) // Returns true if the specified ConvertedType is compatible with this // logical type IsCompatible(ConvertedType, DecimalMetadata) bool // Returns true if this logical type can be used with the provided physical type IsApplicable(t parquet.Type, tlen int32) bool // Returns true if the logical types are the same Equals(LogicalType) bool // Returns the default stat sort order for this logical type SortOrder() SortOrder // contains filtered or unexported methods }
func NewDecimalLogicalType(precision int32, scale int32) LogicalType
NewDecimalLogicalType returns a Decimal logical type with the given precision and scale.
Panics if precision < 1 or scale is not in the range (0, precision)
func NewIntLogicalType(bitWidth int8, signed bool) LogicalType
NewIntLogicalType creates an integer logical type of the desired bitwidth and whether it is signed or not.
Bit width must be exactly 8, 16, 32 or 64 for an integer logical type
func NewListLogicalType() LogicalType
func NewTimeLogicalType(isAdjustedToUTC bool, unit TimeUnitType) LogicalType
NewTimeLogicalType returns a time type of the given unit.
func NewTimestampLogicalType(isAdjustedToUTC bool, unit TimeUnitType) LogicalType
NewTimestampLogicalType returns a logical timestamp type with "forceConverted" set to false
func NewTimestampLogicalTypeForce(isAdjustedToUTC bool, unit TimeUnitType) LogicalType
NewTimestampLogicalTypeForce returns a timestamp logical type with "forceConverted" set to true
func NewTimestampLogicalTypeWithOpts(opts ...TimestampOpt) LogicalType
NewTimestampLogicalTypeWithOpts creates a new TimestampLogicalType with the provided options.
TimestampType Unit defaults to milliseconds (TimeUnitMillis)
MapLogicalType represents a mapped type
type MapLogicalType struct {
// contains filtered or unexported fields
}
func (MapLogicalType) Equals(rhs LogicalType) bool
func (MapLogicalType) IsApplicable(parquet.Type, int32) bool
func (MapLogicalType) IsCompatible(t ConvertedType, dec DecimalMetadata) bool
func (MapLogicalType) IsNested() bool
func (MapLogicalType) IsNone() bool
func (MapLogicalType) IsSerialized() bool
func (MapLogicalType) IsValid() bool
func (MapLogicalType) MarshalJSON() ([]byte, error)
func (MapLogicalType) SortOrder() SortOrder
func (MapLogicalType) String() string
func (MapLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata)
type NoLogicalType struct {
// contains filtered or unexported fields
}
func (NoLogicalType) Equals(rhs LogicalType) bool
func (NoLogicalType) IsApplicable(parquet.Type, int32) bool
func (NoLogicalType) IsCompatible(c ConvertedType, dec DecimalMetadata) bool
func (NoLogicalType) IsNested() bool
func (NoLogicalType) IsNone() bool
func (NoLogicalType) IsSerialized() bool
func (NoLogicalType) IsValid() bool
func (NoLogicalType) MarshalJSON() ([]byte, error)
func (NoLogicalType) SortOrder() SortOrder
func (NoLogicalType) String() string
func (NoLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata)
Node is the interface for both Group and Primitive Nodes. A logical schema type has a name, repetition level, and optionally a logical type (converted type is the deprecated version of the logical type concept, which is maintained for forward compatibility)
type Node interface { Name() string Type() NodeType RepetitionType() parquet.Repetition ConvertedType() ConvertedType LogicalType() LogicalType FieldID() int32 Parent() Node SetParent(Node) Path() string Equals(Node) bool Visit(v Visitor) // contains filtered or unexported methods }
func FromParquet(elems []*format.SchemaElement) (Node, error)
FromParquet converts a slice of thrift Schema Elements to the correct node type
func Must(n Node, err error) Node
Must is a convenience function for the NewNode functions that return a Node and an error, panic'ing if err != nil or returning the node
NodeType describes whether the Node is a Primitive or Group node
type NodeType int
the available constants for NodeType
const ( Primitive NodeType = iota Group )
type NullLogicalType struct {
// contains filtered or unexported fields
}
func (NullLogicalType) Equals(rhs LogicalType) bool
func (NullLogicalType) IsApplicable(parquet.Type, int32) bool
func (NullLogicalType) IsCompatible(c ConvertedType, dec DecimalMetadata) bool
func (NullLogicalType) IsNested() bool
func (NullLogicalType) IsNone() bool
func (NullLogicalType) IsSerialized() bool
func (NullLogicalType) IsValid() bool
func (NullLogicalType) MarshalJSON() ([]byte, error)
func (NullLogicalType) SortOrder() SortOrder
func (NullLogicalType) String() string
func (NullLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata)
A PrimitiveNode is a type that is one of the primitive Parquet storage types. In addition to the other type metadata (name, repetition level, logical type), also has the physical storage type and their type-specific metadata (byte width, decimal parameters)
type PrimitiveNode struct { ColumnOrder parquet.ColumnOrder // contains filtered or unexported fields }
func MustPrimitive(n Node, err error) *PrimitiveNode
MustPrimitive is like Must except it casts the node to *PrimitiveNode which will panic if it is a group node.
func NewBooleanNode(name string, rep parquet.Repetition, fieldID int32) *PrimitiveNode
NewBooleanNode is a convenience factory for constructing an Boolean Primitive Node
func NewByteArrayNode(name string, rep parquet.Repetition, fieldID int32) *PrimitiveNode
NewByteArrayNode is a convenience factory for constructing an Byte Array Primitive Node
func NewFixedLenByteArrayNode(name string, rep parquet.Repetition, length int32, fieldID int32) *PrimitiveNode
NewFixedLenByteArrayNode is a convenience factory for constructing an Fixed Length Byte Array Primitive Node of the given length
func NewFloat32Node(name string, rep parquet.Repetition, fieldID int32) *PrimitiveNode
NewFloat32Node is a convenience factory for constructing an Float Primitive Node
func NewFloat64Node(name string, rep parquet.Repetition, fieldID int32) *PrimitiveNode
NewFloat64Node is a convenience factory for constructing an Double Primitive Node
func NewInt32Node(name string, rep parquet.Repetition, fieldID int32) *PrimitiveNode
NewInt32Node is a convenience factory for constructing an Int32 Primitive Node
func NewInt64Node(name string, rep parquet.Repetition, fieldID int32) *PrimitiveNode
NewInt64Node is a convenience factory for constructing an Int64 Primitive Node
func NewInt96Node(name string, rep parquet.Repetition, fieldID int32) *PrimitiveNode
NewInt96Node is a convenience factory for constructing an Int96 Primitive Node
func NewPrimitiveNode(name string, repetition parquet.Repetition, typ parquet.Type, fieldID, typeLength int32) (*PrimitiveNode, error)
NewPrimitiveNode constructs a primitive node with the ConvertedType of None and no logical type.
Use NewPrimitiveNodeLogical and NewPrimitiveNodeConverted to specify the logical or converted type.
func NewPrimitiveNodeConverted(name string, repetition parquet.Repetition, typ parquet.Type, converted ConvertedType, typeLen, precision, scale int, id int32) (*PrimitiveNode, error)
NewPrimitiveNodeConverted constructs a primitive node from the given physical type and converted type, determining the logical type from the converted type.
func NewPrimitiveNodeLogical(name string, repetition parquet.Repetition, logicalType LogicalType, physicalType parquet.Type, typeLen int, id int32) (*PrimitiveNode, error)
NewPrimitiveNodeLogical constructs a Primitive node using the provided logical type for a given physical type and typelength.
func PrimitiveNodeFromThrift(elem *format.SchemaElement) (*PrimitiveNode, error)
func (n *PrimitiveNode) ConvertedType() ConvertedType
func (p *PrimitiveNode) DecimalMetadata() DecimalMetadata
DecimalMetadata returns the current metadata for the node. If not a decimal typed column, the return should have IsSet == false.
func (p *PrimitiveNode) Equals(rhs Node) bool
Equals returns true if both nodes are primitive nodes with the same physical and converted/logical types.
func (n *PrimitiveNode) FieldID() int32
func (n *PrimitiveNode) LogicalType() LogicalType
func (n *PrimitiveNode) Name() string
func (n *PrimitiveNode) Parent() Node
func (n *PrimitiveNode) Path() string
func (p *PrimitiveNode) PhysicalType() parquet.Type
PhysicalType returns the proper Physical parquet.Type primitive that is used to store the values in this column.
func (n *PrimitiveNode) RepetitionType() parquet.Repetition
func (n *PrimitiveNode) SetParent(p Node)
func (p *PrimitiveNode) SetTypeLength(length int)
SetTypeLength will change the type length of the node, has no effect if the physical type is not FixedLength Byte Array
func (n *PrimitiveNode) Type() NodeType
func (p *PrimitiveNode) TypeLength() int
TypeLength will be -1 if not a FixedLenByteArray column, otherwise will be the length of the FixedLen Byte Array
func (p *PrimitiveNode) Visit(v Visitor)
Visit is for implementing a Visitor pattern handler to walk a schema's tree. One example is the Schema Printer which walks the tree to print out the schema in order.
Schema is the container for the converted Parquet schema with a computed information from the schema analysis needed for file reading
* Column index to Node
* Max repetition / definition levels for each primitive node
The ColumnDescriptor objects produced by this class can be used to assist in the reconstruction of fully materialized data structures from the repetition-definition level encoding of nested data
type Schema struct {
// contains filtered or unexported fields
}
func NewSchema(root *GroupNode) *Schema
NewSchema constructs a new Schema object from a root group node.
Any fields with a field-id of -1 will be given an appropriate field number based on their order.
func NewSchemaFromStruct(obj interface{}) (sc *Schema, err error)
NewSchemaFromStruct generates a schema from an object type via reflection of the type and reading struct tags for "parquet".
Everything defaults to Required repetition, unless otherwise specified. Pointer types become Optional repetition. Arrays and Slices become logical List types unless using the tag `repetition=repeated`.
A length specified byte field (like [5]byte) becomes a fixed_len_byte_array of that length unless otherwise specified by tags.
string and []byte both become ByteArray unless otherwise specified.
Integer types will default to having a logical type of the appropriate bit width and signedness rather than having no logical type, ie: an int8 will become an int32 node with logical type Int(bitWidth=8, signed=true).
Structs will become group nodes with the fields of the struct as the fields of the group, recursively creating the nodes.
maps will become appropriate Map structures in the schema of the defined key and values.
name: by default the node will have the same name as the field, this tag let's you specify a name
type: Specify the physical type instead of using the field type
length: specify the type length of the node, only relevant for fixed_len_byte_array
scale: specify the scale for a decimal field
precision: specify the precision for a decimal field
fieldid: specify the field ID for that node, defaults to -1 which means it is not set in the parquet file.
repetition: specify the repetition as something other than what is determined by the type
converted: specify the Converted Type of the field
logical: specify the logical type of the field, if using decimal then the scale and precision will be determined by the precision and scale fields, or by the logical.precision / logical.scale fields with the logical. prefixed versions taking precedence. For Time or Timestamp logical types, use logical.unit=<millis|micros|nanos> and logical.isadjustedutc=<true|false> to set those. Unit is required isadjustedutc defaults to true. For Integer logical type, use logical.bitwidth and logical.signed to specify those values, with bitwidth being required, and signed defaulting to true.
All tags other than name can use a prefix of "key<tagname>=<value>" to refer to the type of the key for a map and "value<tagname>=<value>" to refer to the value type of a map or the element of a list (such as the type of a slice)
▹ Example (Convertedtypes)
▹ Example (Logicaltypes)
▹ Example (Nestedtypes)
▹ Example (Physicaltype)
▹ Example (Primitives)
▹ Example (Repetition)
func (s *Schema) Column(i int) *Column
Column returns the (0-indexed) column of the provided index.
func (s *Schema) ColumnIndexByName(nodePath string) int
ColumnIndexByName looks up the column by it's full dot separated node path. If there are multiple columns that match, it returns the first one.
Returns -1 if not found.
func (s *Schema) ColumnIndexByNode(n Node) int
ColumnIndexByNode returns the index of the column represented by this node.
Returns -1 if not found.
func (s *Schema) ColumnRoot(i int) Node
ColumnRoot returns the root node of a given column if it is under a nested group node, providing that root group node.
func (s *Schema) Equals(rhs *Schema) bool
Equals returns true as long as the leaf columns are equal, doesn't take into account the groups and only checks whether the schemas are compatible at the physical storage level.
func (s *Schema) HasRepeatedFields() bool
HasRepeatedFields returns true if any node in the schema has a repeated field type.
func (s *Schema) NumColumns() int
NumColumns returns the number of leaf nodes that are the actual primitive columns in this schema.
func (s *Schema) Root() *GroupNode
Root returns the group node that is the root of this schema
func (s *Schema) String() string
func (s *Schema) UpdateColumnOrders(orders []parquet.ColumnOrder) error
UpdateColumnOrders must get a slice that is the same length as the number of leaf columns and is used to update the schema metadata Column Orders. len(orders) must equal s.NumColumns()
SortOrder mirrors the parquet.thrift sort order type
type SortOrder int8
Constants for the Stat sort order definitions
const ( SortSIGNED SortOrder = iota SortUNSIGNED SortUNKNOWN )
func DefaultSortOrder(primitive format.Type) SortOrder
DefaultSortOrder returns the default stat sort order for the given physical type
func GetLogicalSortOrder(logical LogicalType, primitive format.Type) SortOrder
GetLogicalSortOrder returns the default sort order for this logical type or falls back to the default sort order for the physical type if not valid
func GetSortOrder(convert ConvertedType, primitive format.Type) SortOrder
GetSortOrder defaults to the sort order based on the physical type if convert is ConvertedTypes.None, otherwise determines the sort order by the converted type.
StringLogicalType is a UTF8 string, only usable with ByteArray and FixedLenByteArray
type StringLogicalType struct {
// contains filtered or unexported fields
}
func (StringLogicalType) Equals(rhs LogicalType) bool
func (StringLogicalType) IsApplicable(t parquet.Type, _ int32) bool
func (StringLogicalType) IsCompatible(t ConvertedType, dec DecimalMetadata) bool
func (StringLogicalType) IsNested() bool
func (StringLogicalType) IsNone() bool
func (StringLogicalType) IsSerialized() bool
func (StringLogicalType) IsValid() bool
func (StringLogicalType) MarshalJSON() ([]byte, error)
func (StringLogicalType) SortOrder() SortOrder
func (StringLogicalType) String() string
func (StringLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata)
TemporalLogicalType is a smaller interface for Time based logical types like Time / Timestamp
type TemporalLogicalType interface { LogicalType IsAdjustedToUTC() bool TimeUnit() TimeUnitType }
TimeLogicalType is a time type without a date and must be an int32 for milliseconds, or an int64 for micro or nano seconds.
type TimeLogicalType struct {
// contains filtered or unexported fields
}
func (t TimeLogicalType) Equals(rhs LogicalType) bool
func (t TimeLogicalType) IsAdjustedToUTC() bool
func (t TimeLogicalType) IsApplicable(typ parquet.Type, _ int32) bool
func (t TimeLogicalType) IsCompatible(c ConvertedType, dec DecimalMetadata) bool
func (TimeLogicalType) IsNested() bool
func (TimeLogicalType) IsNone() bool
func (TimeLogicalType) IsSerialized() bool
func (TimeLogicalType) IsValid() bool
func (t TimeLogicalType) MarshalJSON() ([]byte, error)
func (TimeLogicalType) SortOrder() SortOrder
func (t TimeLogicalType) String() string
func (t TimeLogicalType) TimeUnit() TimeUnitType
func (t TimeLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata)
TimeUnitType is an enum for denoting whether a time based logical type is using milliseconds, microseconds or nanoseconds.
type TimeUnitType int
Constants for the TimeUnitType
const ( TimeUnitMillis TimeUnitType = iota TimeUnitMicros TimeUnitNanos TimeUnitUnknown )
TimestampLogicalType represents an int64 number that can be decoded into a year, month, day, hour, minute, second, and subsecond
type TimestampLogicalType struct {
// contains filtered or unexported fields
}
func (t TimestampLogicalType) Equals(rhs LogicalType) bool
func (t TimestampLogicalType) IsAdjustedToUTC() bool
func (TimestampLogicalType) IsApplicable(t parquet.Type, _ int32) bool
func (t TimestampLogicalType) IsCompatible(c ConvertedType, dec DecimalMetadata) bool
func (t TimestampLogicalType) IsFromConvertedType() bool
func (TimestampLogicalType) IsNested() bool
func (TimestampLogicalType) IsNone() bool
func (t TimestampLogicalType) IsSerialized() bool
func (TimestampLogicalType) IsValid() bool
func (t TimestampLogicalType) MarshalJSON() ([]byte, error)
func (TimestampLogicalType) SortOrder() SortOrder
func (t TimestampLogicalType) String() string
func (t TimestampLogicalType) TimeUnit() TimeUnitType
func (t TimestampLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata)
TimestampOpt options used with New Timestamp Logical Type
type TimestampOpt func(*TimestampLogicalType)
func WithTSForceConverted() TimestampOpt
WithTSForceConverted enable force converted mode
func WithTSFromConverted() TimestampOpt
WithTSFromConverted enable the timestamp logical type to be constructed from a converted type.
func WithTSIsAdjustedToUTC() TimestampOpt
WithTSIsAdjustedToUTC sets the IsAdjustedToUTC field of the timestamp type.
func WithTSTimeUnitType(unit TimeUnitType) TimestampOpt
WithTSTimeUnitType sets the time unit for the timestamp type
UUIDLogicalType can only be used with a FixedLength byte array column that is exactly 16 bytes long
type UUIDLogicalType struct {
// contains filtered or unexported fields
}
func (UUIDLogicalType) Equals(rhs LogicalType) bool
func (UUIDLogicalType) IsApplicable(t parquet.Type, tlen int32) bool
func (UUIDLogicalType) IsCompatible(c ConvertedType, dec DecimalMetadata) bool
func (UUIDLogicalType) IsNested() bool
func (UUIDLogicalType) IsNone() bool
func (UUIDLogicalType) IsSerialized() bool
func (UUIDLogicalType) IsValid() bool
func (UUIDLogicalType) MarshalJSON() ([]byte, error)
func (UUIDLogicalType) SortOrder() SortOrder
func (UUIDLogicalType) String() string
func (UUIDLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata)
UnknownLogicalType is a type that is essentially a placeholder for when we don't know the type.
type UnknownLogicalType struct {
// contains filtered or unexported fields
}
func (UnknownLogicalType) Equals(rhs LogicalType) bool
func (UnknownLogicalType) IsApplicable(parquet.Type, int32) bool
func (UnknownLogicalType) IsCompatible(c ConvertedType, dec DecimalMetadata) bool
func (UnknownLogicalType) IsNested() bool
func (UnknownLogicalType) IsNone() bool
func (UnknownLogicalType) IsSerialized() bool
func (UnknownLogicalType) IsValid() bool
func (UnknownLogicalType) MarshalJSON() ([]byte, error)
func (UnknownLogicalType) SortOrder() SortOrder
func (UnknownLogicalType) String() string
func (UnknownLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata)
Visitor is an interface for creating functionality to walk the schema tree.
A visitor can be passed to the Visit function of a Node in order to walk the tree. VisitPre is called the first time a node is encountered. If it is a group node, the return is checked and if it is false, the children will be skipped.
VisitPost is called after visiting any children
type Visitor interface { VisitPre(Node) bool VisitPost(Node) }