...

Package metadata

import "github.com/apache/arrow/go/v15/parquet/metadata"
Overview
Index

Overview ▾

Index ▾

Variables
func GetStatValue(typ parquet.Type, val []byte) interface{}
type AppVersion
    func NewAppVersion(createdby string) *AppVersion
    func NewAppVersionExplicit(app string, major, minor, patch int) *AppVersion
    func (v AppVersion) Equal(other *AppVersion) bool
    func (v AppVersion) HasCorrectStatistics(coltype parquet.Type, logicalType schema.LogicalType, stats EncodedStatistics, sort schema.SortOrder) bool
    func (v AppVersion) LessThan(other *AppVersion) bool
type BooleanStatistics
    func NewBooleanStatistics(descr *schema.Column, mem memory.Allocator) *BooleanStatistics
    func NewBooleanStatisticsFromEncoded(descr *schema.Column, mem memory.Allocator, nvalues int64, encoded StatProvider) *BooleanStatistics
    func (s *BooleanStatistics) Descr() *schema.Column
    func (s *BooleanStatistics) DistinctCount() int64
    func (s *BooleanStatistics) Encode() (enc EncodedStatistics, err error)
    func (s *BooleanStatistics) EncodeMax() []byte
    func (s *BooleanStatistics) EncodeMin() []byte
    func (s *BooleanStatistics) Equals(other TypedStatistics) bool
    func (s *BooleanStatistics) HasDistinctCount() bool
    func (s *BooleanStatistics) HasMinMax() bool
    func (s *BooleanStatistics) HasNullCount() bool
    func (s *BooleanStatistics) IncDistinct(n int64)
    func (s *BooleanStatistics) IncNulls(n int64)
    func (s *BooleanStatistics) IncNumValues(n int64)
    func (s *BooleanStatistics) Max() bool
    func (s *BooleanStatistics) Merge(other TypedStatistics)
    func (s *BooleanStatistics) Min() bool
    func (s *BooleanStatistics) MinMaxEqual(rhs *BooleanStatistics) bool
    func (s *BooleanStatistics) NullCount() int64
    func (s *BooleanStatistics) NumValues() int64
    func (s *BooleanStatistics) Reset()
    func (s *BooleanStatistics) SetMinMax(argMin, argMax bool)
    func (s *BooleanStatistics) Type() parquet.Type
    func (s *BooleanStatistics) Update(values []bool, numNull int64)
    func (s *BooleanStatistics) UpdateFromArrow(values arrow.Array, updateCounts bool) error
    func (s *BooleanStatistics) UpdateSpaced(values []bool, validBits []byte, validBitsOffset, numNull int64)
type ByteArrayStatistics
    func NewByteArrayStatistics(descr *schema.Column, mem memory.Allocator) *ByteArrayStatistics
    func NewByteArrayStatisticsFromEncoded(descr *schema.Column, mem memory.Allocator, nvalues int64, encoded StatProvider) *ByteArrayStatistics
    func (s *ByteArrayStatistics) Descr() *schema.Column
    func (s *ByteArrayStatistics) DistinctCount() int64
    func (s *ByteArrayStatistics) Encode() (enc EncodedStatistics, err error)
    func (s *ByteArrayStatistics) EncodeMax() []byte
    func (s *ByteArrayStatistics) EncodeMin() []byte
    func (s *ByteArrayStatistics) Equals(other TypedStatistics) bool
    func (s *ByteArrayStatistics) HasDistinctCount() bool
    func (s *ByteArrayStatistics) HasMinMax() bool
    func (s *ByteArrayStatistics) HasNullCount() bool
    func (s *ByteArrayStatistics) IncDistinct(n int64)
    func (s *ByteArrayStatistics) IncNulls(n int64)
    func (s *ByteArrayStatistics) IncNumValues(n int64)
    func (s *ByteArrayStatistics) Max() parquet.ByteArray
    func (s *ByteArrayStatistics) Merge(other TypedStatistics)
    func (s *ByteArrayStatistics) Min() parquet.ByteArray
    func (s *ByteArrayStatistics) MinMaxEqual(rhs *ByteArrayStatistics) bool
    func (s *ByteArrayStatistics) NullCount() int64
    func (s *ByteArrayStatistics) NumValues() int64
    func (s *ByteArrayStatistics) Reset()
    func (s *ByteArrayStatistics) SetMinMax(argMin, argMax parquet.ByteArray)
    func (s *ByteArrayStatistics) Type() parquet.Type
    func (s *ByteArrayStatistics) Update(values []parquet.ByteArray, numNull int64)
    func (s *ByteArrayStatistics) UpdateFromArrow(values arrow.Array, updateCounts bool) error
    func (s *ByteArrayStatistics) UpdateSpaced(values []parquet.ByteArray, validBits []byte, validBitsOffset, numNull int64)
type ChunkMetaInfo
type ColumnChunkMetaData
    func NewColumnChunkMetaData(column *format.ColumnChunk, descr *schema.Column, writerVersion *AppVersion, rowGroupOrdinal, columnOrdinal int16, fileDecryptor encryption.FileDecryptor) (*ColumnChunkMetaData, error)
    func (c *ColumnChunkMetaData) BloomFilterOffset() int64
    func (c *ColumnChunkMetaData) Compression() compress.Compression
    func (c *ColumnChunkMetaData) CryptoMetadata() *format.ColumnCryptoMetaData
    func (c *ColumnChunkMetaData) DataPageOffset() int64
    func (c *ColumnChunkMetaData) DictionaryPageOffset() int64
    func (c *ColumnChunkMetaData) EncodingStats() []PageEncodingStats
    func (c *ColumnChunkMetaData) Encodings() []parquet.Encoding
    func (c *ColumnChunkMetaData) Equals(other *ColumnChunkMetaData) bool
    func (c *ColumnChunkMetaData) FileOffset() int64
    func (c *ColumnChunkMetaData) FilePath() string
    func (c *ColumnChunkMetaData) HasDictionaryPage() bool
    func (c *ColumnChunkMetaData) HasIndexPage() bool
    func (c *ColumnChunkMetaData) IndexPageOffset() int64
    func (c *ColumnChunkMetaData) NumValues() int64
    func (c *ColumnChunkMetaData) PathInSchema() parquet.ColumnPath
    func (c *ColumnChunkMetaData) Statistics() (TypedStatistics, error)
    func (c *ColumnChunkMetaData) StatsSet() (bool, error)
    func (c *ColumnChunkMetaData) TotalCompressedSize() int64
    func (c *ColumnChunkMetaData) TotalUncompressedSize() int64
    func (c *ColumnChunkMetaData) Type() parquet.Type
type ColumnChunkMetaDataBuilder
    func NewColumnChunkMetaDataBuilder(props *parquet.WriterProperties, column *schema.Column) *ColumnChunkMetaDataBuilder
    func NewColumnChunkMetaDataBuilderWithContents(props *parquet.WriterProperties, column *schema.Column, chunk *format.ColumnChunk) *ColumnChunkMetaDataBuilder
    func (c *ColumnChunkMetaDataBuilder) Contents() *format.ColumnChunk
    func (c *ColumnChunkMetaDataBuilder) Descr() *schema.Column
    func (c *ColumnChunkMetaDataBuilder) Finish(info ChunkMetaInfo, hasDict, dictFallback bool, encStats EncodingStats, metaEncryptor encryption.Encryptor) error
    func (c *ColumnChunkMetaDataBuilder) SetFilePath(val string)
    func (c *ColumnChunkMetaDataBuilder) SetStats(val EncodedStatistics)
    func (c *ColumnChunkMetaDataBuilder) TotalCompressedSize() int64
    func (c *ColumnChunkMetaDataBuilder) WriteTo(w io.Writer) (int64, error)
type EncodedStatistics
    func (e *EncodedStatistics) ApplyStatSizeLimits(length int)
    func (e *EncodedStatistics) IsSet() bool
    func (e *EncodedStatistics) SetDistinctCount(val int64) *EncodedStatistics
    func (e *EncodedStatistics) SetMax(val []byte) *EncodedStatistics
    func (e *EncodedStatistics) SetMin(val []byte) *EncodedStatistics
    func (e *EncodedStatistics) SetNullCount(val int64) *EncodedStatistics
    func (e *EncodedStatistics) ToThrift() (stats *format.Statistics)
type EncodingStats
type FileCryptoMetadata
    func NewFileCryptoMetaData(metadata []byte) (ret FileCryptoMetadata, err error)
    func (fc FileCryptoMetadata) EncryptionAlgorithm() parquet.Algorithm
    func (fc FileCryptoMetadata) KeyMetadata() []byte
    func (fc FileCryptoMetadata) Len() int
    func (fc FileCryptoMetadata) WriteTo(w io.Writer) (int64, error)
type FileMetaData
    func NewFileMetaData(data []byte, fileDecryptor encryption.FileDecryptor) (*FileMetaData, error)
    func (f *FileMetaData) AppendRowGroups(other *FileMetaData) error
    func (f *FileMetaData) EncryptionAlgorithm() parquet.Algorithm
    func (f *FileMetaData) Equals(other *FileMetaData) bool
    func (f *FileMetaData) KeyValueMetadata() KeyValueMetadata
    func (f *FileMetaData) NumSchemaElements() int
    func (f *FileMetaData) RowGroup(i int) *RowGroupMetaData
    func (f *FileMetaData) Serialize(ctx context.Context) ([]byte, error)
    func (f *FileMetaData) SerializeString(ctx context.Context) (string, error)
    func (f *FileMetaData) SetFilePath(path string)
    func (f *FileMetaData) Size() int
    func (f *FileMetaData) Subset(rowGroups []int) (*FileMetaData, error)
    func (f *FileMetaData) VerifySignature(signature []byte) bool
    func (f *FileMetaData) Version() parquet.Version
    func (f *FileMetaData) WriteTo(w io.Writer, encryptor encryption.Encryptor) (int64, error)
    func (f *FileMetaData) WriterVersion() *AppVersion
type FileMetaDataBuilder
    func NewFileMetadataBuilder(schema *schema.Schema, props *parquet.WriterProperties, kvmeta KeyValueMetadata) *FileMetaDataBuilder
    func (f *FileMetaDataBuilder) AppendKeyValueMetadata(key string, value string) error
    func (f *FileMetaDataBuilder) AppendRowGroup() *RowGroupMetaDataBuilder
    func (f *FileMetaDataBuilder) Finish() (*FileMetaData, error)
    func (f *FileMetaDataBuilder) GetFileCryptoMetaData() *FileCryptoMetadata
type FixedLenByteArrayStatistics
    func NewFixedLenByteArrayStatistics(descr *schema.Column, mem memory.Allocator) *FixedLenByteArrayStatistics
    func NewFixedLenByteArrayStatisticsFromEncoded(descr *schema.Column, mem memory.Allocator, nvalues int64, encoded StatProvider) *FixedLenByteArrayStatistics
    func (s *FixedLenByteArrayStatistics) Descr() *schema.Column
    func (s *FixedLenByteArrayStatistics) DistinctCount() int64
    func (s *FixedLenByteArrayStatistics) Encode() (enc EncodedStatistics, err error)
    func (s *FixedLenByteArrayStatistics) EncodeMax() []byte
    func (s *FixedLenByteArrayStatistics) EncodeMin() []byte
    func (s *FixedLenByteArrayStatistics) Equals(other TypedStatistics) bool
    func (s *FixedLenByteArrayStatistics) HasDistinctCount() bool
    func (s *FixedLenByteArrayStatistics) HasMinMax() bool
    func (s *FixedLenByteArrayStatistics) HasNullCount() bool
    func (s *FixedLenByteArrayStatistics) IncDistinct(n int64)
    func (s *FixedLenByteArrayStatistics) IncNulls(n int64)
    func (s *FixedLenByteArrayStatistics) IncNumValues(n int64)
    func (s *FixedLenByteArrayStatistics) Max() parquet.FixedLenByteArray
    func (s *FixedLenByteArrayStatistics) Merge(other TypedStatistics)
    func (s *FixedLenByteArrayStatistics) Min() parquet.FixedLenByteArray
    func (s *FixedLenByteArrayStatistics) MinMaxEqual(rhs *FixedLenByteArrayStatistics) bool
    func (s *FixedLenByteArrayStatistics) NullCount() int64
    func (s *FixedLenByteArrayStatistics) NumValues() int64
    func (s *FixedLenByteArrayStatistics) Reset()
    func (s *FixedLenByteArrayStatistics) SetMinMax(argMin, argMax parquet.FixedLenByteArray)
    func (s *FixedLenByteArrayStatistics) Type() parquet.Type
    func (s *FixedLenByteArrayStatistics) Update(values []parquet.FixedLenByteArray, numNull int64)
    func (s *FixedLenByteArrayStatistics) UpdateFromArrow(values arrow.Array, updateCounts bool) error
    func (s *FixedLenByteArrayStatistics) UpdateSpaced(values []parquet.FixedLenByteArray, validBits []byte, validBitsOffset, numNull int64)
type Float16Statistics
    func NewFloat16Statistics(descr *schema.Column, mem memory.Allocator) *Float16Statistics
    func NewFloat16StatisticsFromEncoded(descr *schema.Column, mem memory.Allocator, nvalues int64, encoded StatProvider) *Float16Statistics
    func (s *Float16Statistics) Descr() *schema.Column
    func (s *Float16Statistics) DistinctCount() int64
    func (s *Float16Statistics) Encode() (enc EncodedStatistics, err error)
    func (s *Float16Statistics) EncodeMax() []byte
    func (s *Float16Statistics) EncodeMin() []byte
    func (s *Float16Statistics) Equals(other TypedStatistics) bool
    func (s *Float16Statistics) HasDistinctCount() bool
    func (s *Float16Statistics) HasMinMax() bool
    func (s *Float16Statistics) HasNullCount() bool
    func (s *Float16Statistics) IncDistinct(n int64)
    func (s *Float16Statistics) IncNulls(n int64)
    func (s *Float16Statistics) IncNumValues(n int64)
    func (s *Float16Statistics) Max() parquet.FixedLenByteArray
    func (s *Float16Statistics) Merge(other TypedStatistics)
    func (s *Float16Statistics) Min() parquet.FixedLenByteArray
    func (s *Float16Statistics) MinMaxEqual(rhs *Float16Statistics) bool
    func (s *Float16Statistics) NullCount() int64
    func (s *Float16Statistics) NumValues() int64
    func (s *Float16Statistics) Reset()
    func (s *Float16Statistics) SetMinMax(argMin, argMax parquet.FixedLenByteArray)
    func (s *Float16Statistics) Type() parquet.Type
    func (s *Float16Statistics) Update(values []parquet.FixedLenByteArray, numNull int64)
    func (s *Float16Statistics) UpdateFromArrow(values arrow.Array, updateCounts bool) error
    func (s *Float16Statistics) UpdateSpaced(values []parquet.FixedLenByteArray, validBits []byte, validBitsOffset, numNull int64)
type Float32Statistics
    func NewFloat32Statistics(descr *schema.Column, mem memory.Allocator) *Float32Statistics
    func NewFloat32StatisticsFromEncoded(descr *schema.Column, mem memory.Allocator, nvalues int64, encoded StatProvider) *Float32Statistics
    func (s *Float32Statistics) Descr() *schema.Column
    func (s *Float32Statistics) DistinctCount() int64
    func (s *Float32Statistics) Encode() (enc EncodedStatistics, err error)
    func (s *Float32Statistics) EncodeMax() []byte
    func (s *Float32Statistics) EncodeMin() []byte
    func (s *Float32Statistics) Equals(other TypedStatistics) bool
    func (s *Float32Statistics) HasDistinctCount() bool
    func (s *Float32Statistics) HasMinMax() bool
    func (s *Float32Statistics) HasNullCount() bool
    func (s *Float32Statistics) IncDistinct(n int64)
    func (s *Float32Statistics) IncNulls(n int64)
    func (s *Float32Statistics) IncNumValues(n int64)
    func (s *Float32Statistics) Max() float32
    func (s *Float32Statistics) Merge(other TypedStatistics)
    func (s *Float32Statistics) Min() float32
    func (s *Float32Statistics) MinMaxEqual(rhs *Float32Statistics) bool
    func (s *Float32Statistics) NullCount() int64
    func (s *Float32Statistics) NumValues() int64
    func (s *Float32Statistics) Reset()
    func (s *Float32Statistics) SetMinMax(argMin, argMax float32)
    func (s *Float32Statistics) Type() parquet.Type
    func (s *Float32Statistics) Update(values []float32, numNull int64)
    func (s *Float32Statistics) UpdateFromArrow(values arrow.Array, updateCounts bool) error
    func (s *Float32Statistics) UpdateSpaced(values []float32, validBits []byte, validBitsOffset, numNull int64)
type Float64Statistics
    func NewFloat64Statistics(descr *schema.Column, mem memory.Allocator) *Float64Statistics
    func NewFloat64StatisticsFromEncoded(descr *schema.Column, mem memory.Allocator, nvalues int64, encoded StatProvider) *Float64Statistics
    func (s *Float64Statistics) Descr() *schema.Column
    func (s *Float64Statistics) DistinctCount() int64
    func (s *Float64Statistics) Encode() (enc EncodedStatistics, err error)
    func (s *Float64Statistics) EncodeMax() []byte
    func (s *Float64Statistics) EncodeMin() []byte
    func (s *Float64Statistics) Equals(other TypedStatistics) bool
    func (s *Float64Statistics) HasDistinctCount() bool
    func (s *Float64Statistics) HasMinMax() bool
    func (s *Float64Statistics) HasNullCount() bool
    func (s *Float64Statistics) IncDistinct(n int64)
    func (s *Float64Statistics) IncNulls(n int64)
    func (s *Float64Statistics) IncNumValues(n int64)
    func (s *Float64Statistics) Max() float64
    func (s *Float64Statistics) Merge(other TypedStatistics)
    func (s *Float64Statistics) Min() float64
    func (s *Float64Statistics) MinMaxEqual(rhs *Float64Statistics) bool
    func (s *Float64Statistics) NullCount() int64
    func (s *Float64Statistics) NumValues() int64
    func (s *Float64Statistics) Reset()
    func (s *Float64Statistics) SetMinMax(argMin, argMax float64)
    func (s *Float64Statistics) Type() parquet.Type
    func (s *Float64Statistics) Update(values []float64, numNull int64)
    func (s *Float64Statistics) UpdateFromArrow(values arrow.Array, updateCounts bool) error
    func (s *Float64Statistics) UpdateSpaced(values []float64, validBits []byte, validBitsOffset, numNull int64)
type Int32Statistics
    func NewInt32Statistics(descr *schema.Column, mem memory.Allocator) *Int32Statistics
    func NewInt32StatisticsFromEncoded(descr *schema.Column, mem memory.Allocator, nvalues int64, encoded StatProvider) *Int32Statistics
    func (s *Int32Statistics) Descr() *schema.Column
    func (s *Int32Statistics) DistinctCount() int64
    func (s *Int32Statistics) Encode() (enc EncodedStatistics, err error)
    func (s *Int32Statistics) EncodeMax() []byte
    func (s *Int32Statistics) EncodeMin() []byte
    func (s *Int32Statistics) Equals(other TypedStatistics) bool
    func (s *Int32Statistics) HasDistinctCount() bool
    func (s *Int32Statistics) HasMinMax() bool
    func (s *Int32Statistics) HasNullCount() bool
    func (s *Int32Statistics) IncDistinct(n int64)
    func (s *Int32Statistics) IncNulls(n int64)
    func (s *Int32Statistics) IncNumValues(n int64)
    func (s *Int32Statistics) Max() int32
    func (s *Int32Statistics) Merge(other TypedStatistics)
    func (s *Int32Statistics) Min() int32
    func (s *Int32Statistics) MinMaxEqual(rhs *Int32Statistics) bool
    func (s *Int32Statistics) NullCount() int64
    func (s *Int32Statistics) NumValues() int64
    func (s *Int32Statistics) Reset()
    func (s *Int32Statistics) SetMinMax(argMin, argMax int32)
    func (s *Int32Statistics) Type() parquet.Type
    func (s *Int32Statistics) Update(values []int32, numNull int64)
    func (s *Int32Statistics) UpdateFromArrow(values arrow.Array, updateCounts bool) error
    func (s *Int32Statistics) UpdateSpaced(values []int32, validBits []byte, validBitsOffset, numNull int64)
type Int64Statistics
    func NewInt64Statistics(descr *schema.Column, mem memory.Allocator) *Int64Statistics
    func NewInt64StatisticsFromEncoded(descr *schema.Column, mem memory.Allocator, nvalues int64, encoded StatProvider) *Int64Statistics
    func (s *Int64Statistics) Descr() *schema.Column
    func (s *Int64Statistics) DistinctCount() int64
    func (s *Int64Statistics) Encode() (enc EncodedStatistics, err error)
    func (s *Int64Statistics) EncodeMax() []byte
    func (s *Int64Statistics) EncodeMin() []byte
    func (s *Int64Statistics) Equals(other TypedStatistics) bool
    func (s *Int64Statistics) HasDistinctCount() bool
    func (s *Int64Statistics) HasMinMax() bool
    func (s *Int64Statistics) HasNullCount() bool
    func (s *Int64Statistics) IncDistinct(n int64)
    func (s *Int64Statistics) IncNulls(n int64)
    func (s *Int64Statistics) IncNumValues(n int64)
    func (s *Int64Statistics) Max() int64
    func (s *Int64Statistics) Merge(other TypedStatistics)
    func (s *Int64Statistics) Min() int64
    func (s *Int64Statistics) MinMaxEqual(rhs *Int64Statistics) bool
    func (s *Int64Statistics) NullCount() int64
    func (s *Int64Statistics) NumValues() int64
    func (s *Int64Statistics) Reset()
    func (s *Int64Statistics) SetMinMax(argMin, argMax int64)
    func (s *Int64Statistics) Type() parquet.Type
    func (s *Int64Statistics) Update(values []int64, numNull int64)
    func (s *Int64Statistics) UpdateFromArrow(values arrow.Array, updateCounts bool) error
    func (s *Int64Statistics) UpdateSpaced(values []int64, validBits []byte, validBitsOffset, numNull int64)
type Int96Statistics
    func NewInt96Statistics(descr *schema.Column, mem memory.Allocator) *Int96Statistics
    func NewInt96StatisticsFromEncoded(descr *schema.Column, mem memory.Allocator, nvalues int64, encoded StatProvider) *Int96Statistics
    func (s *Int96Statistics) Descr() *schema.Column
    func (s *Int96Statistics) DistinctCount() int64
    func (s *Int96Statistics) Encode() (enc EncodedStatistics, err error)
    func (s *Int96Statistics) EncodeMax() []byte
    func (s *Int96Statistics) EncodeMin() []byte
    func (s *Int96Statistics) Equals(other TypedStatistics) bool
    func (s *Int96Statistics) HasDistinctCount() bool
    func (s *Int96Statistics) HasMinMax() bool
    func (s *Int96Statistics) HasNullCount() bool
    func (s *Int96Statistics) IncDistinct(n int64)
    func (s *Int96Statistics) IncNulls(n int64)
    func (s *Int96Statistics) IncNumValues(n int64)
    func (s *Int96Statistics) Max() parquet.Int96
    func (s *Int96Statistics) Merge(other TypedStatistics)
    func (s *Int96Statistics) Min() parquet.Int96
    func (s *Int96Statistics) MinMaxEqual(rhs *Int96Statistics) bool
    func (s *Int96Statistics) NullCount() int64
    func (s *Int96Statistics) NumValues() int64
    func (s *Int96Statistics) Reset()
    func (s *Int96Statistics) SetMinMax(argMin, argMax parquet.Int96)
    func (s *Int96Statistics) Type() parquet.Type
    func (s *Int96Statistics) Update(values []parquet.Int96, numNull int64)
    func (s *Int96Statistics) UpdateFromArrow(values arrow.Array, updateCounts bool) error
    func (s *Int96Statistics) UpdateSpaced(values []parquet.Int96, validBits []byte, validBitsOffset, numNull int64)
type KeyValueMetadata
    func NewKeyValueMetadata() KeyValueMetadata
    func (k *KeyValueMetadata) Append(key, value string) error
    func (k KeyValueMetadata) Equals(other KeyValueMetadata) bool
    func (k KeyValueMetadata) FindValue(key string) *string
    func (k KeyValueMetadata) Keys() (ret []string)
    func (k KeyValueMetadata) Len() int
    func (k KeyValueMetadata) Values() (ret []string)
type PageEncodingStats
type RowGroupMetaData
    func NewRowGroupMetaData(rg *format.RowGroup, sc *schema.Schema, version *AppVersion, decryptor encryption.FileDecryptor) *RowGroupMetaData
    func (r *RowGroupMetaData) ColumnChunk(i int) (*ColumnChunkMetaData, error)
    func (r *RowGroupMetaData) Equals(other *RowGroupMetaData) bool
    func (r *RowGroupMetaData) FileOffset() int64
    func (r *RowGroupMetaData) NumColumns() int
    func (r *RowGroupMetaData) NumRows() int64
    func (r *RowGroupMetaData) Ordinal() int16
    func (r *RowGroupMetaData) TotalByteSize() int64
    func (r *RowGroupMetaData) TotalCompressedSize() int64
type RowGroupMetaDataBuilder
    func NewRowGroupMetaDataBuilder(props *parquet.WriterProperties, schema *schema.Schema, rg *format.RowGroup) *RowGroupMetaDataBuilder
    func (r *RowGroupMetaDataBuilder) CurrentColumn() int
    func (r *RowGroupMetaDataBuilder) Finish(totalBytesWritten int64, ordinal int16) error
    func (r *RowGroupMetaDataBuilder) NextColumnChunk() *ColumnChunkMetaDataBuilder
    func (r *RowGroupMetaDataBuilder) NumColumns() int
    func (r *RowGroupMetaDataBuilder) NumRows() int64
    func (r *RowGroupMetaDataBuilder) SetNumRows(nrows int)
type StatProvider
type TypedStatistics
    func NewStatistics(descr *schema.Column, mem memory.Allocator) TypedStatistics
    func NewStatisticsFromEncoded(descr *schema.Column, mem memory.Allocator, nvalues int64, encoded StatProvider) TypedStatistics

Package files

app_version.go column_chunk.go file.go row_group.go statistics.go statistics_types.gen.go

Variables

DefaultCompressionType is used unless a different compression is specified in the properties

var DefaultCompressionType = compress.Codecs.Uncompressed
var (

    // Parquet816FixedVersion is the version used for fixing PARQUET-816
    // that changed the padding calculations for dictionary headers on row groups.
    Parquet816FixedVersion = NewAppVersionExplicit("parquet-mr", 1, 2, 9)
)

func GetStatValue

func GetStatValue(typ parquet.Type, val []byte) interface{}

type AppVersion

AppVersion represents a specific application version either read from or written to a parquet file.

type AppVersion struct {
    App     string
    Build   string
    Version struct {
        Major      int
        Minor      int
        Patch      int
        Unknown    string
        PreRelease string
        BuildInfo  string
    }
}

func NewAppVersion

func NewAppVersion(createdby string) *AppVersion

NewAppVersion parses a "created by" string such as "parquet-go 1.0.0".

It also supports handling pre-releases and build info such as

parquet-cpp version 1.5.0ab-xyz5.5.0+cd (build abcd)

func NewAppVersionExplicit

func NewAppVersionExplicit(app string, major, minor, patch int) *AppVersion

NewAppVersionExplicit is a convenience function to construct a specific application version from the given app string and version

func (AppVersion) Equal

func (v AppVersion) Equal(other *AppVersion) bool

Equal only compares the Application and major/minor/patch versions.

Pre-release and build info are not considered.

func (AppVersion) HasCorrectStatistics

func (v AppVersion) HasCorrectStatistics(coltype parquet.Type, logicalType schema.LogicalType, stats EncodedStatistics, sort schema.SortOrder) bool

HasCorrectStatistics checks whether or not the statistics are valid to be used based on the primitive type and the version since previous versions had issues with properly computing stats.

Reference: parquet-cpp/src/parquet/metadata.cc

PARQUET-686 has more discussion on statistics

func (AppVersion) LessThan

func (v AppVersion) LessThan(other *AppVersion) bool

LessThan compares the app versions and returns true if this version is "less than" the passed version.

If the apps don't match, this always returns false. Otherwise it compares the major versions first, then the minor versions, and finally the patch versions.

Pre-release and build info are not considered.

type BooleanStatistics

BooleanStatistics is the typed interface for managing stats for a column of Boolean type.

type BooleanStatistics struct {
    // contains filtered or unexported fields
}

func NewBooleanStatistics

func NewBooleanStatistics(descr *schema.Column, mem memory.Allocator) *BooleanStatistics

NewBooleanStatistics constructs an appropriate stat object type using the given column descriptor and allocator.

Panics if the physical type of descr is not parquet.Type.Boolean

func NewBooleanStatisticsFromEncoded

func NewBooleanStatisticsFromEncoded(descr *schema.Column, mem memory.Allocator, nvalues int64, encoded StatProvider) *BooleanStatistics

NewBooleanStatisticsFromEncoded will construct a properly typed statistics object initializing it with the provided information.

func (*BooleanStatistics) Descr

func (s *BooleanStatistics) Descr() *schema.Column

func (*BooleanStatistics) DistinctCount

func (s *BooleanStatistics) DistinctCount() int64

func (*BooleanStatistics) Encode

func (s *BooleanStatistics) Encode() (enc EncodedStatistics, err error)

Encode returns a populated EncodedStatistics object

func (*BooleanStatistics) EncodeMax

func (s *BooleanStatistics) EncodeMax() []byte

EncodeMax returns the current encoded max value with plain encoding

ByteArray stats do not include the length in the encoding

func (*BooleanStatistics) EncodeMin

func (s *BooleanStatistics) EncodeMin() []byte

EncodeMin returns the encoded min value with plain encoding.

ByteArray stats do not include the length in the encoding.

func (*BooleanStatistics) Equals

func (s *BooleanStatistics) Equals(other TypedStatistics) bool

Equals returns true only if both objects are the same type, have the same min and max values, null count, distinct count and number of values.

func (*BooleanStatistics) HasDistinctCount

func (s *BooleanStatistics) HasDistinctCount() bool

func (*BooleanStatistics) HasMinMax

func (s *BooleanStatistics) HasMinMax() bool

func (*BooleanStatistics) HasNullCount

func (s *BooleanStatistics) HasNullCount() bool

func (*BooleanStatistics) IncDistinct

func (s *BooleanStatistics) IncDistinct(n int64)

func (*BooleanStatistics) IncNulls

func (s *BooleanStatistics) IncNulls(n int64)

func (*BooleanStatistics) IncNumValues

func (s *BooleanStatistics) IncNumValues(n int64)

func (*BooleanStatistics) Max

func (s *BooleanStatistics) Max() bool

func (*BooleanStatistics) Merge

func (s *BooleanStatistics) Merge(other TypedStatistics)

Merge merges the stats from other into this stat object, updating the null count, distinct count, number of values and the min/max if appropriate.

func (*BooleanStatistics) Min

func (s *BooleanStatistics) Min() bool

func (*BooleanStatistics) MinMaxEqual

func (s *BooleanStatistics) MinMaxEqual(rhs *BooleanStatistics) bool

MinMaxEqual returns true if both stat objects have the same Min and Max values

func (*BooleanStatistics) NullCount

func (s *BooleanStatistics) NullCount() int64

func (*BooleanStatistics) NumValues

func (s *BooleanStatistics) NumValues() int64

func (*BooleanStatistics) Reset

func (s *BooleanStatistics) Reset()

func (*BooleanStatistics) SetMinMax

func (s *BooleanStatistics) SetMinMax(argMin, argMax bool)

SetMinMax updates the min and max values only if they are not currently set or if argMin is less than the current min / argMax is greater than the current max

func (*BooleanStatistics) Type

func (s *BooleanStatistics) Type() parquet.Type

func (*BooleanStatistics) Update

func (s *BooleanStatistics) Update(values []bool, numNull int64)

Update is used to add more values to the current stat object, finding the min and max values etc.

func (*BooleanStatistics) UpdateFromArrow

func (s *BooleanStatistics) UpdateFromArrow(values arrow.Array, updateCounts bool) error

func (*BooleanStatistics) UpdateSpaced

func (s *BooleanStatistics) UpdateSpaced(values []bool, validBits []byte, validBitsOffset, numNull int64)

UpdateSpaced is just like Update, but for spaced values using validBits to determine and skip null values.

type ByteArrayStatistics

ByteArrayStatistics is the typed interface for managing stats for a column of ByteArray type.

type ByteArrayStatistics struct {
    // contains filtered or unexported fields
}

func NewByteArrayStatistics

func NewByteArrayStatistics(descr *schema.Column, mem memory.Allocator) *ByteArrayStatistics

NewByteArrayStatistics constructs an appropriate stat object type using the given column descriptor and allocator.

Panics if the physical type of descr is not parquet.Type.ByteArray

func NewByteArrayStatisticsFromEncoded

func NewByteArrayStatisticsFromEncoded(descr *schema.Column, mem memory.Allocator, nvalues int64, encoded StatProvider) *ByteArrayStatistics

NewByteArrayStatisticsFromEncoded will construct a properly typed statistics object initializing it with the provided information.

func (*ByteArrayStatistics) Descr

func (s *ByteArrayStatistics) Descr() *schema.Column

func (*ByteArrayStatistics) DistinctCount

func (s *ByteArrayStatistics) DistinctCount() int64

func (*ByteArrayStatistics) Encode

func (s *ByteArrayStatistics) Encode() (enc EncodedStatistics, err error)

Encode returns a populated EncodedStatistics object

func (*ByteArrayStatistics) EncodeMax

func (s *ByteArrayStatistics) EncodeMax() []byte

EncodeMax returns the current encoded max value with plain encoding

ByteArray stats do not include the length in the encoding

func (*ByteArrayStatistics) EncodeMin

func (s *ByteArrayStatistics) EncodeMin() []byte

EncodeMin returns the encoded min value with plain encoding.

ByteArray stats do not include the length in the encoding.

func (*ByteArrayStatistics) Equals

func (s *ByteArrayStatistics) Equals(other TypedStatistics) bool

Equals returns true only if both objects are the same type, have the same min and max values, null count, distinct count and number of values.

func (*ByteArrayStatistics) HasDistinctCount

func (s *ByteArrayStatistics) HasDistinctCount() bool

func (*ByteArrayStatistics) HasMinMax

func (s *ByteArrayStatistics) HasMinMax() bool

func (*ByteArrayStatistics) HasNullCount

func (s *ByteArrayStatistics) HasNullCount() bool

func (*ByteArrayStatistics) IncDistinct

func (s *ByteArrayStatistics) IncDistinct(n int64)

func (*ByteArrayStatistics) IncNulls

func (s *ByteArrayStatistics) IncNulls(n int64)

func (*ByteArrayStatistics) IncNumValues

func (s *ByteArrayStatistics) IncNumValues(n int64)

func (*ByteArrayStatistics) Max

func (s *ByteArrayStatistics) Max() parquet.ByteArray

func (*ByteArrayStatistics) Merge

func (s *ByteArrayStatistics) Merge(other TypedStatistics)

Merge merges the stats from other into this stat object, updating the null count, distinct count, number of values and the min/max if appropriate.

func (*ByteArrayStatistics) Min

func (s *ByteArrayStatistics) Min() parquet.ByteArray

func (*ByteArrayStatistics) MinMaxEqual

func (s *ByteArrayStatistics) MinMaxEqual(rhs *ByteArrayStatistics) bool

MinMaxEqual returns true if both stat objects have the same Min and Max values

func (*ByteArrayStatistics) NullCount

func (s *ByteArrayStatistics) NullCount() int64

func (*ByteArrayStatistics) NumValues

func (s *ByteArrayStatistics) NumValues() int64

func (*ByteArrayStatistics) Reset

func (s *ByteArrayStatistics) Reset()

func (*ByteArrayStatistics) SetMinMax

func (s *ByteArrayStatistics) SetMinMax(argMin, argMax parquet.ByteArray)

SetMinMax updates the min and max values only if they are not currently set or if argMin is less than the current min / argMax is greater than the current max

func (*ByteArrayStatistics) Type

func (s *ByteArrayStatistics) Type() parquet.Type

func (*ByteArrayStatistics) Update

func (s *ByteArrayStatistics) Update(values []parquet.ByteArray, numNull int64)

Update is used to add more values to the current stat object, finding the min and max values etc.

func (*ByteArrayStatistics) UpdateFromArrow

func (s *ByteArrayStatistics) UpdateFromArrow(values arrow.Array, updateCounts bool) error

func (*ByteArrayStatistics) UpdateSpaced

func (s *ByteArrayStatistics) UpdateSpaced(values []parquet.ByteArray, validBits []byte, validBitsOffset, numNull int64)

UpdateSpaced is just like Update, but for spaced values using validBits to determine and skip null values.

type ChunkMetaInfo

ChunkMetaInfo is a helper struct for passing the offset and size information for finishing the building of column chunk metadata

type ChunkMetaInfo struct {
    NumValues        int64
    DictPageOffset   int64
    IndexPageOffset  int64
    DataPageOffset   int64
    CompressedSize   int64
    UncompressedSize int64
}

type ColumnChunkMetaData

ColumnChunkMetaData is a proxy around format.ColumnChunkMetaData containing all of the information and metadata for a given column chunk and it's associated Column

type ColumnChunkMetaData struct {
    // contains filtered or unexported fields
}

func NewColumnChunkMetaData

func NewColumnChunkMetaData(column *format.ColumnChunk, descr *schema.Column, writerVersion *AppVersion, rowGroupOrdinal, columnOrdinal int16, fileDecryptor encryption.FileDecryptor) (*ColumnChunkMetaData, error)

NewColumnChunkMetaData creates an instance of the metadata from a column chunk and descriptor

this is primarily used internally or between the subpackages. ColumnChunkMetaDataBuilder should be used by consumers instead of using this directly.

func (*ColumnChunkMetaData) BloomFilterOffset

func (c *ColumnChunkMetaData) BloomFilterOffset() int64

BloomFilterOffset is the byte offset from the beginning of the file to the bloom filter data.

func (*ColumnChunkMetaData) Compression

func (c *ColumnChunkMetaData) Compression() compress.Compression

Compression provides the type of compression used for this particular chunk.

func (*ColumnChunkMetaData) CryptoMetadata

func (c *ColumnChunkMetaData) CryptoMetadata() *format.ColumnCryptoMetaData

CryptoMetadata returns the cryptographic metadata for how this column was encrypted and how to decrypt it.

func (*ColumnChunkMetaData) DataPageOffset

func (c *ColumnChunkMetaData) DataPageOffset() int64

DataPageOffset returns the location in the file where the data pages begin for this column

func (*ColumnChunkMetaData) DictionaryPageOffset

func (c *ColumnChunkMetaData) DictionaryPageOffset() int64

DictionaryPageOffset returns the location in the file where the dictionary page starts

func (*ColumnChunkMetaData) EncodingStats

func (c *ColumnChunkMetaData) EncodingStats() []PageEncodingStats

EncodingStats connects the order of encodings based on the list of pages and types

func (*ColumnChunkMetaData) Encodings

func (c *ColumnChunkMetaData) Encodings() []parquet.Encoding

Encodings returns the list of different encodings used in this chunk

func (*ColumnChunkMetaData) Equals

func (c *ColumnChunkMetaData) Equals(other *ColumnChunkMetaData) bool

func (*ColumnChunkMetaData) FileOffset

func (c *ColumnChunkMetaData) FileOffset() int64

FileOffset is the location in the file where the column data begins

func (*ColumnChunkMetaData) FilePath

func (c *ColumnChunkMetaData) FilePath() string

FilePath gives the name of the parquet file if provided in the metadata

func (*ColumnChunkMetaData) HasDictionaryPage

func (c *ColumnChunkMetaData) HasDictionaryPage() bool

HasDictionaryPage returns true if there is a dictionary page offset set in this metadata.

func (*ColumnChunkMetaData) HasIndexPage

func (c *ColumnChunkMetaData) HasIndexPage() bool

HasIndexPage returns true if the offset for the index page is set in the metadata

func (*ColumnChunkMetaData) IndexPageOffset

func (c *ColumnChunkMetaData) IndexPageOffset() int64

IndexPageOffset is the location in the file where the index page starts.

func (*ColumnChunkMetaData) NumValues

func (c *ColumnChunkMetaData) NumValues() int64

NumValues is the number of values stored in just this chunk including nulls.

func (*ColumnChunkMetaData) PathInSchema

func (c *ColumnChunkMetaData) PathInSchema() parquet.ColumnPath

PathInSchema is the full path to this column from the root of the schema including any nested columns

func (*ColumnChunkMetaData) Statistics

func (c *ColumnChunkMetaData) Statistics() (TypedStatistics, error)

Statistics can return nil if there are no stats in this metadata

func (*ColumnChunkMetaData) StatsSet

func (c *ColumnChunkMetaData) StatsSet() (bool, error)

StatsSet returns true only if there are statistics set in the metadata and the column descriptor has a sort order that is not SortUnknown

It also checks the writer version to ensure that it was not written by a version of parquet which is known to have incorrect stat computations.

func (*ColumnChunkMetaData) TotalCompressedSize

func (c *ColumnChunkMetaData) TotalCompressedSize() int64

TotalCompressedSize will be equal to TotalUncompressedSize if the data is not compressed. Otherwise this will be the size of the actual data in the file.

func (*ColumnChunkMetaData) TotalUncompressedSize

func (c *ColumnChunkMetaData) TotalUncompressedSize() int64

TotalUncompressedSize is the total size of the raw data after uncompressing the chunk

func (*ColumnChunkMetaData) Type

func (c *ColumnChunkMetaData) Type() parquet.Type

Type is the physical storage type used in the parquet file for this column chunk.

type ColumnChunkMetaDataBuilder

ColumnChunkMetaDataBuilder is used during writing to construct metadata for a given column chunk while writing, providing a proxy around constructing the actual thrift object.

type ColumnChunkMetaDataBuilder struct {
    // contains filtered or unexported fields
}

func NewColumnChunkMetaDataBuilder

func NewColumnChunkMetaDataBuilder(props *parquet.WriterProperties, column *schema.Column) *ColumnChunkMetaDataBuilder

func NewColumnChunkMetaDataBuilderWithContents

func NewColumnChunkMetaDataBuilderWithContents(props *parquet.WriterProperties, column *schema.Column, chunk *format.ColumnChunk) *ColumnChunkMetaDataBuilder

NewColumnChunkMetaDataBuilderWithContents will construct a builder and start it with the provided column chunk information rather than with an empty column chunk.

func (*ColumnChunkMetaDataBuilder) Contents

func (c *ColumnChunkMetaDataBuilder) Contents() *format.ColumnChunk

Contents returns the underlying thrift ColumnChunk object so that it can be used for constructing or duplicating column metadata

func (*ColumnChunkMetaDataBuilder) Descr

func (c *ColumnChunkMetaDataBuilder) Descr() *schema.Column

Descr returns the associated column descriptor for this column chunk

func (*ColumnChunkMetaDataBuilder) Finish

func (c *ColumnChunkMetaDataBuilder) Finish(info ChunkMetaInfo, hasDict, dictFallback bool, encStats EncodingStats, metaEncryptor encryption.Encryptor) error

Finish finalizes the metadata with the given offsets, flushes any compression that needs to be done, and performs any encryption if an encryptor is provided.

func (*ColumnChunkMetaDataBuilder) SetFilePath

func (c *ColumnChunkMetaDataBuilder) SetFilePath(val string)

func (*ColumnChunkMetaDataBuilder) SetStats

func (c *ColumnChunkMetaDataBuilder) SetStats(val EncodedStatistics)

func (*ColumnChunkMetaDataBuilder) TotalCompressedSize

func (c *ColumnChunkMetaDataBuilder) TotalCompressedSize() int64

func (*ColumnChunkMetaDataBuilder) WriteTo

func (c *ColumnChunkMetaDataBuilder) WriteTo(w io.Writer) (int64, error)

WriteTo will always return 0 as the int64 since the thrift writer library does not return the number of bytes written, we only use the signature of (int64, error) in order to match the standard WriteTo interfaces.

type EncodedStatistics

EncodedStatistics are raw statistics with encoded values that will be written to the parquet file, or was read from the parquet file.

type EncodedStatistics struct {
    HasMax           bool
    Max              []byte
    HasMin           bool
    Min              []byte
    Signed           bool
    HasNullCount     bool
    NullCount        int64
    HasDistinctCount bool
    DistinctCount    int64
}

func (*EncodedStatistics) ApplyStatSizeLimits

func (e *EncodedStatistics) ApplyStatSizeLimits(length int)

ApplyStatSizeLimits sets the maximum size of the min/max values.

from parquet-mr we don't write stats larger than the max size rather than truncating. the rationale is that some engines may use the minimum value in the page as the true minimum for aggregations and there is no way to mark that a value has been truncated and is a lower bound and not in the page

func (*EncodedStatistics) IsSet

func (e *EncodedStatistics) IsSet() bool

IsSet returns true iff one of the Has* values is true.

func (*EncodedStatistics) SetDistinctCount

func (e *EncodedStatistics) SetDistinctCount(val int64) *EncodedStatistics

SetDistinctCount sets the DistinctCount to val and sets HasDistinctCount to true

func (*EncodedStatistics) SetMax

func (e *EncodedStatistics) SetMax(val []byte) *EncodedStatistics

SetMax sets the encoded Max value to val and sets HasMax to true

func (*EncodedStatistics) SetMin

func (e *EncodedStatistics) SetMin(val []byte) *EncodedStatistics

SetMin sets the encoded Min value to val, and sets HasMin to true

func (*EncodedStatistics) SetNullCount

func (e *EncodedStatistics) SetNullCount(val int64) *EncodedStatistics

SetNullCount sets the NullCount to val and sets HasNullCount to true

func (*EncodedStatistics) ToThrift

func (e *EncodedStatistics) ToThrift() (stats *format.Statistics)

type EncodingStats

EncodingStats is a helper struct for passing the encoding stat information for finishing up metadata for a column chunk.

type EncodingStats struct {
    DictEncodingStats map[parquet.Encoding]int32
    DataEncodingStats map[parquet.Encoding]int32
}

type FileCryptoMetadata

FileCryptoMetadata is a proxy for the thrift fileCryptoMetadata object

type FileCryptoMetadata struct {
    // contains filtered or unexported fields
}

func NewFileCryptoMetaData

func NewFileCryptoMetaData(metadata []byte) (ret FileCryptoMetadata, err error)

NewFileCryptoMetaData takes in the raw serialized bytes to deserialize storing the number of bytes that were actually deserialized.

func (FileCryptoMetadata) EncryptionAlgorithm

func (fc FileCryptoMetadata) EncryptionAlgorithm() parquet.Algorithm

EncryptionAlgorithm constructs the object from the thrift instance of the encryption algorithm

func (FileCryptoMetadata) KeyMetadata

func (fc FileCryptoMetadata) KeyMetadata() []byte

func (FileCryptoMetadata) Len

func (fc FileCryptoMetadata) Len() int

Len is the number of bytes that were deserialized to create this object

func (FileCryptoMetadata) WriteTo

func (fc FileCryptoMetadata) WriteTo(w io.Writer) (int64, error)

WriteTo writes out the serialized crypto metadata to w

type FileMetaData

FileMetaData is a proxy around the underlying thrift FileMetaData object to make it easier to use and interact with.

type FileMetaData struct {
    *format.FileMetaData
    Schema        *schema.Schema
    FileDecryptor encryption.FileDecryptor
    // contains filtered or unexported fields
}

func NewFileMetaData

func NewFileMetaData(data []byte, fileDecryptor encryption.FileDecryptor) (*FileMetaData, error)

NewFileMetaData takes in the raw bytes of the serialized metadata to deserialize and will attempt to decrypt the footer if a decryptor is provided.

func (*FileMetaData) AppendRowGroups

func (f *FileMetaData) AppendRowGroups(other *FileMetaData) error

AppendRowGroups will add all of the rowgroup metadata from other to the current file metadata

func (*FileMetaData) EncryptionAlgorithm

func (f *FileMetaData) EncryptionAlgorithm() parquet.Algorithm

EncryptionAlgorithm constructs the algorithm object from the thrift information or returns an empty instance if it was not set.

func (*FileMetaData) Equals

func (f *FileMetaData) Equals(other *FileMetaData) bool

func (*FileMetaData) KeyValueMetadata

func (f *FileMetaData) KeyValueMetadata() KeyValueMetadata

func (*FileMetaData) NumSchemaElements

func (f *FileMetaData) NumSchemaElements() int

NumSchemaElements is the length of the flattened schema list in the thrift

func (*FileMetaData) RowGroup

func (f *FileMetaData) RowGroup(i int) *RowGroupMetaData

RowGroup provides the metadata for the (0-based) index of the row group

func (*FileMetaData) Serialize

func (f *FileMetaData) Serialize(ctx context.Context) ([]byte, error)

func (*FileMetaData) SerializeString

func (f *FileMetaData) SerializeString(ctx context.Context) (string, error)

func (*FileMetaData) SetFilePath

func (f *FileMetaData) SetFilePath(path string)

SetFilePath will set the file path into all of the columns in each row group.

func (*FileMetaData) Size

func (f *FileMetaData) Size() int

Size is the length of the raw serialized metadata bytes in the footer

func (*FileMetaData) Subset

func (f *FileMetaData) Subset(rowGroups []int) (*FileMetaData, error)

Subset will construct a new FileMetaData object containing only the requested row groups by index

func (*FileMetaData) VerifySignature

func (f *FileMetaData) VerifySignature(signature []byte) bool

VerifySignature constructs a cryptographic signature using the FileDecryptor of the footer and then verifies it's integrity.

Panics if f.FileDecryptor is nil

func (*FileMetaData) Version

func (f *FileMetaData) Version() parquet.Version

Version returns the "version" of the file

WARNING: The value returned by this method is unreliable as 1) the parquet file metadata stores the version as a single integer and 2) some producers are known to always write a hardcoded value. Therefore you cannot use this value to know which features are used in the file.

func (*FileMetaData) WriteTo

func (f *FileMetaData) WriteTo(w io.Writer, encryptor encryption.Encryptor) (int64, error)

WriteTo will serialize and write out this file metadata, encrypting it if appropriate.

If it is an encrypted file with a plaintext footer, then we will write the signature with the unencrypted footer.

func (*FileMetaData) WriterVersion

func (f *FileMetaData) WriterVersion() *AppVersion

WriterVersion returns the constructed application version from the created by string

type FileMetaDataBuilder

FileMetaDataBuilder is a proxy for more easily constructing file metadata particularly used when writing a file out.

type FileMetaDataBuilder struct {
    // contains filtered or unexported fields
}

func NewFileMetadataBuilder

func NewFileMetadataBuilder(schema *schema.Schema, props *parquet.WriterProperties, kvmeta KeyValueMetadata) *FileMetaDataBuilder

NewFileMetadataBuilder will use the default writer properties if nil is passed for the writer properties and nil is allowable for the key value metadata.

func (*FileMetaDataBuilder) AppendKeyValueMetadata

func (f *FileMetaDataBuilder) AppendKeyValueMetadata(key string, value string) error

AppendKeyValueMetadata appends a key/value pair to the existing key/value metadata

func (*FileMetaDataBuilder) AppendRowGroup

func (f *FileMetaDataBuilder) AppendRowGroup() *RowGroupMetaDataBuilder

AppendRowGroup adds a rowgroup to the list and returns a builder for that row group

func (*FileMetaDataBuilder) Finish

func (f *FileMetaDataBuilder) Finish() (*FileMetaData, error)

Finish will finalize the metadata of the number of rows, row groups, version etc. This will clear out this filemetadatabuilder so it can be re-used

func (*FileMetaDataBuilder) GetFileCryptoMetaData

func (f *FileMetaDataBuilder) GetFileCryptoMetaData() *FileCryptoMetadata

GetFileCryptoMetaData returns the cryptographic information for encrypting/ decrypting the file.

type FixedLenByteArrayStatistics

FixedLenByteArrayStatistics is the typed interface for managing stats for a column of FixedLenByteArray type.

type FixedLenByteArrayStatistics struct {
    // contains filtered or unexported fields
}

func NewFixedLenByteArrayStatistics

func NewFixedLenByteArrayStatistics(descr *schema.Column, mem memory.Allocator) *FixedLenByteArrayStatistics

NewFixedLenByteArrayStatistics constructs an appropriate stat object type using the given column descriptor and allocator.

Panics if the physical type of descr is not parquet.Type.FixedLenByteArray

func NewFixedLenByteArrayStatisticsFromEncoded

func NewFixedLenByteArrayStatisticsFromEncoded(descr *schema.Column, mem memory.Allocator, nvalues int64, encoded StatProvider) *FixedLenByteArrayStatistics

NewFixedLenByteArrayStatisticsFromEncoded will construct a properly typed statistics object initializing it with the provided information.

func (*FixedLenByteArrayStatistics) Descr

func (s *FixedLenByteArrayStatistics) Descr() *schema.Column

func (*FixedLenByteArrayStatistics) DistinctCount

func (s *FixedLenByteArrayStatistics) DistinctCount() int64

func (*FixedLenByteArrayStatistics) Encode

func (s *FixedLenByteArrayStatistics) Encode() (enc EncodedStatistics, err error)

Encode returns a populated EncodedStatistics object

func (*FixedLenByteArrayStatistics) EncodeMax

func (s *FixedLenByteArrayStatistics) EncodeMax() []byte

EncodeMax returns the current encoded max value with plain encoding

ByteArray stats do not include the length in the encoding

func (*FixedLenByteArrayStatistics) EncodeMin

func (s *FixedLenByteArrayStatistics) EncodeMin() []byte

EncodeMin returns the encoded min value with plain encoding.

ByteArray stats do not include the length in the encoding.

func (*FixedLenByteArrayStatistics) Equals

func (s *FixedLenByteArrayStatistics) Equals(other TypedStatistics) bool

Equals returns true only if both objects are the same type, have the same min and max values, null count, distinct count and number of values.

func (*FixedLenByteArrayStatistics) HasDistinctCount

func (s *FixedLenByteArrayStatistics) HasDistinctCount() bool

func (*FixedLenByteArrayStatistics) HasMinMax

func (s *FixedLenByteArrayStatistics) HasMinMax() bool

func (*FixedLenByteArrayStatistics) HasNullCount

func (s *FixedLenByteArrayStatistics) HasNullCount() bool

func (*FixedLenByteArrayStatistics) IncDistinct

func (s *FixedLenByteArrayStatistics) IncDistinct(n int64)

func (*FixedLenByteArrayStatistics) IncNulls

func (s *FixedLenByteArrayStatistics) IncNulls(n int64)

func (*FixedLenByteArrayStatistics) IncNumValues

func (s *FixedLenByteArrayStatistics) IncNumValues(n int64)

func (*FixedLenByteArrayStatistics) Max

func (s *FixedLenByteArrayStatistics) Max() parquet.FixedLenByteArray

func (*FixedLenByteArrayStatistics) Merge

func (s *FixedLenByteArrayStatistics) Merge(other TypedStatistics)

Merge merges the stats from other into this stat object, updating the null count, distinct count, number of values and the min/max if appropriate.

func (*FixedLenByteArrayStatistics) Min

func (s *FixedLenByteArrayStatistics) Min() parquet.FixedLenByteArray

func (*FixedLenByteArrayStatistics) MinMaxEqual

func (s *FixedLenByteArrayStatistics) MinMaxEqual(rhs *FixedLenByteArrayStatistics) bool

MinMaxEqual returns true if both stat objects have the same Min and Max values

func (*FixedLenByteArrayStatistics) NullCount

func (s *FixedLenByteArrayStatistics) NullCount() int64

func (*FixedLenByteArrayStatistics) NumValues

func (s *FixedLenByteArrayStatistics) NumValues() int64

func (*FixedLenByteArrayStatistics) Reset

func (s *FixedLenByteArrayStatistics) Reset()

func (*FixedLenByteArrayStatistics) SetMinMax

func (s *FixedLenByteArrayStatistics) SetMinMax(argMin, argMax parquet.FixedLenByteArray)

SetMinMax updates the min and max values only if they are not currently set or if argMin is less than the current min / argMax is greater than the current max

func (*FixedLenByteArrayStatistics) Type

func (s *FixedLenByteArrayStatistics) Type() parquet.Type

func (*FixedLenByteArrayStatistics) Update

func (s *FixedLenByteArrayStatistics) Update(values []parquet.FixedLenByteArray, numNull int64)

Update is used to add more values to the current stat object, finding the min and max values etc.

func (*FixedLenByteArrayStatistics) UpdateFromArrow

func (s *FixedLenByteArrayStatistics) UpdateFromArrow(values arrow.Array, updateCounts bool) error

func (*FixedLenByteArrayStatistics) UpdateSpaced

func (s *FixedLenByteArrayStatistics) UpdateSpaced(values []parquet.FixedLenByteArray, validBits []byte, validBitsOffset, numNull int64)

UpdateSpaced is just like Update, but for spaced values using validBits to determine and skip null values.

type Float16Statistics

Float16Statistics is the typed interface for managing stats for a column of Float16 type.

type Float16Statistics struct {
    // contains filtered or unexported fields
}

func NewFloat16Statistics

func NewFloat16Statistics(descr *schema.Column, mem memory.Allocator) *Float16Statistics

NewFloat16Statistics constructs an appropriate stat object type using the given column descriptor and allocator.

Panics if the physical type of descr is not parquet.Type.FixedLenByteArray Panics if the logical type of descr is not schema.Float16LogicalType

func NewFloat16StatisticsFromEncoded

func NewFloat16StatisticsFromEncoded(descr *schema.Column, mem memory.Allocator, nvalues int64, encoded StatProvider) *Float16Statistics

NewFloat16StatisticsFromEncoded will construct a properly typed statistics object initializing it with the provided information.

func (*Float16Statistics) Descr

func (s *Float16Statistics) Descr() *schema.Column

func (*Float16Statistics) DistinctCount

func (s *Float16Statistics) DistinctCount() int64

func (*Float16Statistics) Encode

func (s *Float16Statistics) Encode() (enc EncodedStatistics, err error)

Encode returns a populated EncodedStatistics object

func (*Float16Statistics) EncodeMax

func (s *Float16Statistics) EncodeMax() []byte

EncodeMax returns the current encoded max value with plain encoding

ByteArray stats do not include the length in the encoding

func (*Float16Statistics) EncodeMin

func (s *Float16Statistics) EncodeMin() []byte

EncodeMin returns the encoded min value with plain encoding.

ByteArray stats do not include the length in the encoding.

func (*Float16Statistics) Equals

func (s *Float16Statistics) Equals(other TypedStatistics) bool

Equals returns true only if both objects are the same type, have the same min and max values, null count, distinct count and number of values.

func (*Float16Statistics) HasDistinctCount

func (s *Float16Statistics) HasDistinctCount() bool

func (*Float16Statistics) HasMinMax

func (s *Float16Statistics) HasMinMax() bool

func (*Float16Statistics) HasNullCount

func (s *Float16Statistics) HasNullCount() bool

func (*Float16Statistics) IncDistinct

func (s *Float16Statistics) IncDistinct(n int64)

func (*Float16Statistics) IncNulls

func (s *Float16Statistics) IncNulls(n int64)

func (*Float16Statistics) IncNumValues

func (s *Float16Statistics) IncNumValues(n int64)

func (*Float16Statistics) Max

func (s *Float16Statistics) Max() parquet.FixedLenByteArray

func (*Float16Statistics) Merge

func (s *Float16Statistics) Merge(other TypedStatistics)

Merge merges the stats from other into this stat object, updating the null count, distinct count, number of values and the min/max if appropriate.

func (*Float16Statistics) Min

func (s *Float16Statistics) Min() parquet.FixedLenByteArray

func (*Float16Statistics) MinMaxEqual

func (s *Float16Statistics) MinMaxEqual(rhs *Float16Statistics) bool

MinMaxEqual returns true if both stat objects have the same Min and Max values

func (*Float16Statistics) NullCount

func (s *Float16Statistics) NullCount() int64

func (*Float16Statistics) NumValues

func (s *Float16Statistics) NumValues() int64

func (*Float16Statistics) Reset

func (s *Float16Statistics) Reset()

func (*Float16Statistics) SetMinMax

func (s *Float16Statistics) SetMinMax(argMin, argMax parquet.FixedLenByteArray)

SetMinMax updates the min and max values only if they are not currently set or if argMin is less than the current min / argMax is greater than the current max

func (*Float16Statistics) Type

func (s *Float16Statistics) Type() parquet.Type

func (*Float16Statistics) Update

func (s *Float16Statistics) Update(values []parquet.FixedLenByteArray, numNull int64)

Update is used to add more values to the current stat object, finding the min and max values etc.

func (*Float16Statistics) UpdateFromArrow

func (s *Float16Statistics) UpdateFromArrow(values arrow.Array, updateCounts bool) error

func (*Float16Statistics) UpdateSpaced

func (s *Float16Statistics) UpdateSpaced(values []parquet.FixedLenByteArray, validBits []byte, validBitsOffset, numNull int64)

UpdateSpaced is just like Update, but for spaced values using validBits to determine and skip null values.

type Float32Statistics

Float32Statistics is the typed interface for managing stats for a column of Float32 type.

type Float32Statistics struct {
    // contains filtered or unexported fields
}

func NewFloat32Statistics

func NewFloat32Statistics(descr *schema.Column, mem memory.Allocator) *Float32Statistics

NewFloat32Statistics constructs an appropriate stat object type using the given column descriptor and allocator.

Panics if the physical type of descr is not parquet.Type.Float

func NewFloat32StatisticsFromEncoded

func NewFloat32StatisticsFromEncoded(descr *schema.Column, mem memory.Allocator, nvalues int64, encoded StatProvider) *Float32Statistics

NewFloat32StatisticsFromEncoded will construct a properly typed statistics object initializing it with the provided information.

func (*Float32Statistics) Descr

func (s *Float32Statistics) Descr() *schema.Column

func (*Float32Statistics) DistinctCount

func (s *Float32Statistics) DistinctCount() int64

func (*Float32Statistics) Encode

func (s *Float32Statistics) Encode() (enc EncodedStatistics, err error)

Encode returns a populated EncodedStatistics object

func (*Float32Statistics) EncodeMax

func (s *Float32Statistics) EncodeMax() []byte

EncodeMax returns the current encoded max value with plain encoding

ByteArray stats do not include the length in the encoding

func (*Float32Statistics) EncodeMin

func (s *Float32Statistics) EncodeMin() []byte

EncodeMin returns the encoded min value with plain encoding.

ByteArray stats do not include the length in the encoding.

func (*Float32Statistics) Equals

func (s *Float32Statistics) Equals(other TypedStatistics) bool

Equals returns true only if both objects are the same type, have the same min and max values, null count, distinct count and number of values.

func (*Float32Statistics) HasDistinctCount

func (s *Float32Statistics) HasDistinctCount() bool

func (*Float32Statistics) HasMinMax

func (s *Float32Statistics) HasMinMax() bool

func (*Float32Statistics) HasNullCount

func (s *Float32Statistics) HasNullCount() bool

func (*Float32Statistics) IncDistinct

func (s *Float32Statistics) IncDistinct(n int64)

func (*Float32Statistics) IncNulls

func (s *Float32Statistics) IncNulls(n int64)

func (*Float32Statistics) IncNumValues

func (s *Float32Statistics) IncNumValues(n int64)

func (*Float32Statistics) Max

func (s *Float32Statistics) Max() float32

func (*Float32Statistics) Merge

func (s *Float32Statistics) Merge(other TypedStatistics)

Merge merges the stats from other into this stat object, updating the null count, distinct count, number of values and the min/max if appropriate.

func (*Float32Statistics) Min

func (s *Float32Statistics) Min() float32

func (*Float32Statistics) MinMaxEqual

func (s *Float32Statistics) MinMaxEqual(rhs *Float32Statistics) bool

MinMaxEqual returns true if both stat objects have the same Min and Max values

func (*Float32Statistics) NullCount

func (s *Float32Statistics) NullCount() int64

func (*Float32Statistics) NumValues

func (s *Float32Statistics) NumValues() int64

func (*Float32Statistics) Reset

func (s *Float32Statistics) Reset()

func (*Float32Statistics) SetMinMax

func (s *Float32Statistics) SetMinMax(argMin, argMax float32)

SetMinMax updates the min and max values only if they are not currently set or if argMin is less than the current min / argMax is greater than the current max

func (*Float32Statistics) Type

func (s *Float32Statistics) Type() parquet.Type

func (*Float32Statistics) Update

func (s *Float32Statistics) Update(values []float32, numNull int64)

Update is used to add more values to the current stat object, finding the min and max values etc.

func (*Float32Statistics) UpdateFromArrow

func (s *Float32Statistics) UpdateFromArrow(values arrow.Array, updateCounts bool) error

func (*Float32Statistics) UpdateSpaced

func (s *Float32Statistics) UpdateSpaced(values []float32, validBits []byte, validBitsOffset, numNull int64)

UpdateSpaced is just like Update, but for spaced values using validBits to determine and skip null values.

type Float64Statistics

Float64Statistics is the typed interface for managing stats for a column of Float64 type.

type Float64Statistics struct {
    // contains filtered or unexported fields
}

func NewFloat64Statistics

func NewFloat64Statistics(descr *schema.Column, mem memory.Allocator) *Float64Statistics

NewFloat64Statistics constructs an appropriate stat object type using the given column descriptor and allocator.

Panics if the physical type of descr is not parquet.Type.Double

func NewFloat64StatisticsFromEncoded

func NewFloat64StatisticsFromEncoded(descr *schema.Column, mem memory.Allocator, nvalues int64, encoded StatProvider) *Float64Statistics

NewFloat64StatisticsFromEncoded will construct a properly typed statistics object initializing it with the provided information.

func (*Float64Statistics) Descr

func (s *Float64Statistics) Descr() *schema.Column

func (*Float64Statistics) DistinctCount

func (s *Float64Statistics) DistinctCount() int64

func (*Float64Statistics) Encode

func (s *Float64Statistics) Encode() (enc EncodedStatistics, err error)

Encode returns a populated EncodedStatistics object

func (*Float64Statistics) EncodeMax

func (s *Float64Statistics) EncodeMax() []byte

EncodeMax returns the current encoded max value with plain encoding

ByteArray stats do not include the length in the encoding

func (*Float64Statistics) EncodeMin

func (s *Float64Statistics) EncodeMin() []byte

EncodeMin returns the encoded min value with plain encoding.

ByteArray stats do not include the length in the encoding.

func (*Float64Statistics) Equals

func (s *Float64Statistics) Equals(other TypedStatistics) bool

Equals returns true only if both objects are the same type, have the same min and max values, null count, distinct count and number of values.

func (*Float64Statistics) HasDistinctCount

func (s *Float64Statistics) HasDistinctCount() bool

func (*Float64Statistics) HasMinMax

func (s *Float64Statistics) HasMinMax() bool

func (*Float64Statistics) HasNullCount

func (s *Float64Statistics) HasNullCount() bool

func (*Float64Statistics) IncDistinct

func (s *Float64Statistics) IncDistinct(n int64)

func (*Float64Statistics) IncNulls

func (s *Float64Statistics) IncNulls(n int64)

func (*Float64Statistics) IncNumValues

func (s *Float64Statistics) IncNumValues(n int64)

func (*Float64Statistics) Max

func (s *Float64Statistics) Max() float64

func (*Float64Statistics) Merge

func (s *Float64Statistics) Merge(other TypedStatistics)

Merge merges the stats from other into this stat object, updating the null count, distinct count, number of values and the min/max if appropriate.

func (*Float64Statistics) Min

func (s *Float64Statistics) Min() float64

func (*Float64Statistics) MinMaxEqual

func (s *Float64Statistics) MinMaxEqual(rhs *Float64Statistics) bool

MinMaxEqual returns true if both stat objects have the same Min and Max values

func (*Float64Statistics) NullCount

func (s *Float64Statistics) NullCount() int64

func (*Float64Statistics) NumValues

func (s *Float64Statistics) NumValues() int64

func (*Float64Statistics) Reset

func (s *Float64Statistics) Reset()

func (*Float64Statistics) SetMinMax

func (s *Float64Statistics) SetMinMax(argMin, argMax float64)

SetMinMax updates the min and max values only if they are not currently set or if argMin is less than the current min / argMax is greater than the current max

func (*Float64Statistics) Type

func (s *Float64Statistics) Type() parquet.Type

func (*Float64Statistics) Update

func (s *Float64Statistics) Update(values []float64, numNull int64)

Update is used to add more values to the current stat object, finding the min and max values etc.

func (*Float64Statistics) UpdateFromArrow

func (s *Float64Statistics) UpdateFromArrow(values arrow.Array, updateCounts bool) error

func (*Float64Statistics) UpdateSpaced

func (s *Float64Statistics) UpdateSpaced(values []float64, validBits []byte, validBitsOffset, numNull int64)

UpdateSpaced is just like Update, but for spaced values using validBits to determine and skip null values.

type Int32Statistics

Int32Statistics is the typed interface for managing stats for a column of Int32 type.

type Int32Statistics struct {
    // contains filtered or unexported fields
}

func NewInt32Statistics

func NewInt32Statistics(descr *schema.Column, mem memory.Allocator) *Int32Statistics

NewInt32Statistics constructs an appropriate stat object type using the given column descriptor and allocator.

Panics if the physical type of descr is not parquet.Type.Int32

func NewInt32StatisticsFromEncoded

func NewInt32StatisticsFromEncoded(descr *schema.Column, mem memory.Allocator, nvalues int64, encoded StatProvider) *Int32Statistics

NewInt32StatisticsFromEncoded will construct a properly typed statistics object initializing it with the provided information.

func (*Int32Statistics) Descr

func (s *Int32Statistics) Descr() *schema.Column

func (*Int32Statistics) DistinctCount

func (s *Int32Statistics) DistinctCount() int64

func (*Int32Statistics) Encode

func (s *Int32Statistics) Encode() (enc EncodedStatistics, err error)

Encode returns a populated EncodedStatistics object

func (*Int32Statistics) EncodeMax

func (s *Int32Statistics) EncodeMax() []byte

EncodeMax returns the current encoded max value with plain encoding

ByteArray stats do not include the length in the encoding

func (*Int32Statistics) EncodeMin

func (s *Int32Statistics) EncodeMin() []byte

EncodeMin returns the encoded min value with plain encoding.

ByteArray stats do not include the length in the encoding.

func (*Int32Statistics) Equals

func (s *Int32Statistics) Equals(other TypedStatistics) bool

Equals returns true only if both objects are the same type, have the same min and max values, null count, distinct count and number of values.

func (*Int32Statistics) HasDistinctCount

func (s *Int32Statistics) HasDistinctCount() bool

func (*Int32Statistics) HasMinMax

func (s *Int32Statistics) HasMinMax() bool

func (*Int32Statistics) HasNullCount

func (s *Int32Statistics) HasNullCount() bool

func (*Int32Statistics) IncDistinct

func (s *Int32Statistics) IncDistinct(n int64)

func (*Int32Statistics) IncNulls

func (s *Int32Statistics) IncNulls(n int64)

func (*Int32Statistics) IncNumValues

func (s *Int32Statistics) IncNumValues(n int64)

func (*Int32Statistics) Max

func (s *Int32Statistics) Max() int32

func (*Int32Statistics) Merge

func (s *Int32Statistics) Merge(other TypedStatistics)

Merge merges the stats from other into this stat object, updating the null count, distinct count, number of values and the min/max if appropriate.

func (*Int32Statistics) Min

func (s *Int32Statistics) Min() int32

func (*Int32Statistics) MinMaxEqual

func (s *Int32Statistics) MinMaxEqual(rhs *Int32Statistics) bool

MinMaxEqual returns true if both stat objects have the same Min and Max values

func (*Int32Statistics) NullCount

func (s *Int32Statistics) NullCount() int64

func (*Int32Statistics) NumValues

func (s *Int32Statistics) NumValues() int64

func (*Int32Statistics) Reset

func (s *Int32Statistics) Reset()

func (*Int32Statistics) SetMinMax

func (s *Int32Statistics) SetMinMax(argMin, argMax int32)

SetMinMax updates the min and max values only if they are not currently set or if argMin is less than the current min / argMax is greater than the current max

func (*Int32Statistics) Type

func (s *Int32Statistics) Type() parquet.Type

func (*Int32Statistics) Update

func (s *Int32Statistics) Update(values []int32, numNull int64)

Update is used to add more values to the current stat object, finding the min and max values etc.

func (*Int32Statistics) UpdateFromArrow

func (s *Int32Statistics) UpdateFromArrow(values arrow.Array, updateCounts bool) error

func (*Int32Statistics) UpdateSpaced

func (s *Int32Statistics) UpdateSpaced(values []int32, validBits []byte, validBitsOffset, numNull int64)

UpdateSpaced is just like Update, but for spaced values using validBits to determine and skip null values.

type Int64Statistics

Int64Statistics is the typed interface for managing stats for a column of Int64 type.

type Int64Statistics struct {
    // contains filtered or unexported fields
}

func NewInt64Statistics

func NewInt64Statistics(descr *schema.Column, mem memory.Allocator) *Int64Statistics

NewInt64Statistics constructs an appropriate stat object type using the given column descriptor and allocator.

Panics if the physical type of descr is not parquet.Type.Int64

func NewInt64StatisticsFromEncoded

func NewInt64StatisticsFromEncoded(descr *schema.Column, mem memory.Allocator, nvalues int64, encoded StatProvider) *Int64Statistics

NewInt64StatisticsFromEncoded will construct a properly typed statistics object initializing it with the provided information.

func (*Int64Statistics) Descr

func (s *Int64Statistics) Descr() *schema.Column

func (*Int64Statistics) DistinctCount

func (s *Int64Statistics) DistinctCount() int64

func (*Int64Statistics) Encode

func (s *Int64Statistics) Encode() (enc EncodedStatistics, err error)

Encode returns a populated EncodedStatistics object

func (*Int64Statistics) EncodeMax

func (s *Int64Statistics) EncodeMax() []byte

EncodeMax returns the current encoded max value with plain encoding

ByteArray stats do not include the length in the encoding

func (*Int64Statistics) EncodeMin

func (s *Int64Statistics) EncodeMin() []byte

EncodeMin returns the encoded min value with plain encoding.

ByteArray stats do not include the length in the encoding.

func (*Int64Statistics) Equals

func (s *Int64Statistics) Equals(other TypedStatistics) bool

Equals returns true only if both objects are the same type, have the same min and max values, null count, distinct count and number of values.

func (*Int64Statistics) HasDistinctCount

func (s *Int64Statistics) HasDistinctCount() bool

func (*Int64Statistics) HasMinMax

func (s *Int64Statistics) HasMinMax() bool

func (*Int64Statistics) HasNullCount

func (s *Int64Statistics) HasNullCount() bool

func (*Int64Statistics) IncDistinct

func (s *Int64Statistics) IncDistinct(n int64)

func (*Int64Statistics) IncNulls

func (s *Int64Statistics) IncNulls(n int64)

func (*Int64Statistics) IncNumValues

func (s *Int64Statistics) IncNumValues(n int64)

func (*Int64Statistics) Max

func (s *Int64Statistics) Max() int64

func (*Int64Statistics) Merge

func (s *Int64Statistics) Merge(other TypedStatistics)

Merge merges the stats from other into this stat object, updating the null count, distinct count, number of values and the min/max if appropriate.

func (*Int64Statistics) Min

func (s *Int64Statistics) Min() int64

func (*Int64Statistics) MinMaxEqual

func (s *Int64Statistics) MinMaxEqual(rhs *Int64Statistics) bool

MinMaxEqual returns true if both stat objects have the same Min and Max values

func (*Int64Statistics) NullCount

func (s *Int64Statistics) NullCount() int64

func (*Int64Statistics) NumValues

func (s *Int64Statistics) NumValues() int64

func (*Int64Statistics) Reset

func (s *Int64Statistics) Reset()

func (*Int64Statistics) SetMinMax

func (s *Int64Statistics) SetMinMax(argMin, argMax int64)

SetMinMax updates the min and max values only if they are not currently set or if argMin is less than the current min / argMax is greater than the current max

func (*Int64Statistics) Type

func (s *Int64Statistics) Type() parquet.Type

func (*Int64Statistics) Update

func (s *Int64Statistics) Update(values []int64, numNull int64)

Update is used to add more values to the current stat object, finding the min and max values etc.

func (*Int64Statistics) UpdateFromArrow

func (s *Int64Statistics) UpdateFromArrow(values arrow.Array, updateCounts bool) error

func (*Int64Statistics) UpdateSpaced

func (s *Int64Statistics) UpdateSpaced(values []int64, validBits []byte, validBitsOffset, numNull int64)

UpdateSpaced is just like Update, but for spaced values using validBits to determine and skip null values.

type Int96Statistics

Int96Statistics is the typed interface for managing stats for a column of Int96 type.

type Int96Statistics struct {
    // contains filtered or unexported fields
}

func NewInt96Statistics

func NewInt96Statistics(descr *schema.Column, mem memory.Allocator) *Int96Statistics

NewInt96Statistics constructs an appropriate stat object type using the given column descriptor and allocator.

Panics if the physical type of descr is not parquet.Type.Int96

func NewInt96StatisticsFromEncoded

func NewInt96StatisticsFromEncoded(descr *schema.Column, mem memory.Allocator, nvalues int64, encoded StatProvider) *Int96Statistics

NewInt96StatisticsFromEncoded will construct a properly typed statistics object initializing it with the provided information.

func (*Int96Statistics) Descr

func (s *Int96Statistics) Descr() *schema.Column

func (*Int96Statistics) DistinctCount

func (s *Int96Statistics) DistinctCount() int64

func (*Int96Statistics) Encode

func (s *Int96Statistics) Encode() (enc EncodedStatistics, err error)

Encode returns a populated EncodedStatistics object

func (*Int96Statistics) EncodeMax

func (s *Int96Statistics) EncodeMax() []byte

EncodeMax returns the current encoded max value with plain encoding

ByteArray stats do not include the length in the encoding

func (*Int96Statistics) EncodeMin

func (s *Int96Statistics) EncodeMin() []byte

EncodeMin returns the encoded min value with plain encoding.

ByteArray stats do not include the length in the encoding.

func (*Int96Statistics) Equals

func (s *Int96Statistics) Equals(other TypedStatistics) bool

Equals returns true only if both objects are the same type, have the same min and max values, null count, distinct count and number of values.

func (*Int96Statistics) HasDistinctCount

func (s *Int96Statistics) HasDistinctCount() bool

func (*Int96Statistics) HasMinMax

func (s *Int96Statistics) HasMinMax() bool

func (*Int96Statistics) HasNullCount

func (s *Int96Statistics) HasNullCount() bool

func (*Int96Statistics) IncDistinct

func (s *Int96Statistics) IncDistinct(n int64)

func (*Int96Statistics) IncNulls

func (s *Int96Statistics) IncNulls(n int64)

func (*Int96Statistics) IncNumValues

func (s *Int96Statistics) IncNumValues(n int64)

func (*Int96Statistics) Max

func (s *Int96Statistics) Max() parquet.Int96

func (*Int96Statistics) Merge

func (s *Int96Statistics) Merge(other TypedStatistics)

Merge merges the stats from other into this stat object, updating the null count, distinct count, number of values and the min/max if appropriate.

func (*Int96Statistics) Min

func (s *Int96Statistics) Min() parquet.Int96

func (*Int96Statistics) MinMaxEqual

func (s *Int96Statistics) MinMaxEqual(rhs *Int96Statistics) bool

MinMaxEqual returns true if both stat objects have the same Min and Max values

func (*Int96Statistics) NullCount

func (s *Int96Statistics) NullCount() int64

func (*Int96Statistics) NumValues

func (s *Int96Statistics) NumValues() int64

func (*Int96Statistics) Reset

func (s *Int96Statistics) Reset()

func (*Int96Statistics) SetMinMax

func (s *Int96Statistics) SetMinMax(argMin, argMax parquet.Int96)

SetMinMax updates the min and max values only if they are not currently set or if argMin is less than the current min / argMax is greater than the current max

func (*Int96Statistics) Type

func (s *Int96Statistics) Type() parquet.Type

func (*Int96Statistics) Update

func (s *Int96Statistics) Update(values []parquet.Int96, numNull int64)

Update is used to add more values to the current stat object, finding the min and max values etc.

func (*Int96Statistics) UpdateFromArrow

func (s *Int96Statistics) UpdateFromArrow(values arrow.Array, updateCounts bool) error

func (*Int96Statistics) UpdateSpaced

func (s *Int96Statistics) UpdateSpaced(values []parquet.Int96, validBits []byte, validBitsOffset, numNull int64)

UpdateSpaced is just like Update, but for spaced values using validBits to determine and skip null values.

type KeyValueMetadata

KeyValueMetadata is an alias for a slice of thrift keyvalue pairs.

It is presumed that the metadata should all be utf8 valid.

type KeyValueMetadata []*format.KeyValue

func NewKeyValueMetadata

func NewKeyValueMetadata() KeyValueMetadata

NewKeyValueMetadata is equivalent to make(KeyValueMetadata, 0)

func (*KeyValueMetadata) Append

func (k *KeyValueMetadata) Append(key, value string) error

Append adds the passed in key and value to the metadata, if either contains any invalid utf8 runes, then it is not added and an error is returned.

func (KeyValueMetadata) Equals

func (k KeyValueMetadata) Equals(other KeyValueMetadata) bool

Equals compares all of the metadata keys and values to check they are equal

func (KeyValueMetadata) FindValue

func (k KeyValueMetadata) FindValue(key string) *string

func (KeyValueMetadata) Keys

func (k KeyValueMetadata) Keys() (ret []string)

func (KeyValueMetadata) Len

func (k KeyValueMetadata) Len() int

func (KeyValueMetadata) Values

func (k KeyValueMetadata) Values() (ret []string)

type PageEncodingStats

PageEncodingStats is used for counting the number of pages of specific types with the given internal encoding.

type PageEncodingStats struct {
    Encoding parquet.Encoding
    PageType format.PageType
}

type RowGroupMetaData

RowGroupMetaData is a proxy around the thrift RowGroup meta data object

type RowGroupMetaData struct {
    Schema *schema.Schema
    // contains filtered or unexported fields
}

func NewRowGroupMetaData

func NewRowGroupMetaData(rg *format.RowGroup, sc *schema.Schema, version *AppVersion, decryptor encryption.FileDecryptor) *RowGroupMetaData

NewRowGroupMetaData constructs an object from the underlying thrift objects and schema, decrypting if provided and necessary. This is primarily used internally and consumers should use the RowGroupMetaDataBuilder rather than this directly.

func (*RowGroupMetaData) ColumnChunk

func (r *RowGroupMetaData) ColumnChunk(i int) (*ColumnChunkMetaData, error)

ColumnChunk returns the metadata for the requested (0-based) chunk index

func (*RowGroupMetaData) Equals

func (r *RowGroupMetaData) Equals(other *RowGroupMetaData) bool

func (*RowGroupMetaData) FileOffset

func (r *RowGroupMetaData) FileOffset() int64

FileOffset is the location in the file where the data for this rowgroup begins

func (*RowGroupMetaData) NumColumns

func (r *RowGroupMetaData) NumColumns() int

NumColumns returns the number of column metadata objects in this row group

func (*RowGroupMetaData) NumRows

func (r *RowGroupMetaData) NumRows() int64

NumRows is just the number of rows in this row group. All columns have the same number of rows for a row group regardless of repetition and definition levels.

func (*RowGroupMetaData) Ordinal

func (r *RowGroupMetaData) Ordinal() int16

Ordinal is the row group number in order for the given file.

func (*RowGroupMetaData) TotalByteSize

func (r *RowGroupMetaData) TotalByteSize() int64

TotalByteSize is the total size of this rowgroup on disk

func (*RowGroupMetaData) TotalCompressedSize

func (r *RowGroupMetaData) TotalCompressedSize() int64

type RowGroupMetaDataBuilder

RowGroupMetaDataBuilder is a convenience object for constructing row group metadata information. Primarily used in conjunction with writing new files.

type RowGroupMetaDataBuilder struct {
    // contains filtered or unexported fields
}

func NewRowGroupMetaDataBuilder

func NewRowGroupMetaDataBuilder(props *parquet.WriterProperties, schema *schema.Schema, rg *format.RowGroup) *RowGroupMetaDataBuilder

NewRowGroupMetaDataBuilder returns a builder using the given properties and underlying thrift object.

This is primarily used internally, consumers should use the file metadatabuilder and call AppendRowGroup on it to get instances of RowGroupMetaDataBuilder

func (*RowGroupMetaDataBuilder) CurrentColumn

func (r *RowGroupMetaDataBuilder) CurrentColumn() int

CurrentColumn returns the current column chunk (0-based) index that is being built.

Returns -1 until the first time NextColumnChunk is called.

func (*RowGroupMetaDataBuilder) Finish

func (r *RowGroupMetaDataBuilder) Finish(totalBytesWritten int64, ordinal int16) error

Finish should be called when complete and updates the metadata with the final file offset, and total compressed sizes. totalBytesWritten gets written as the TotalByteSize for the row group and Ordinal should be the index of the row group being written. e.g. first row group should be 0, second is 1, and so on...

func (*RowGroupMetaDataBuilder) NextColumnChunk

func (r *RowGroupMetaDataBuilder) NextColumnChunk() *ColumnChunkMetaDataBuilder

NextColumnChunk appends a new column chunk, updates the column index, and returns a builder for that column chunk's metadata

func (*RowGroupMetaDataBuilder) NumColumns

func (r *RowGroupMetaDataBuilder) NumColumns() int

NumColumns returns the current number of columns in this metadata

func (*RowGroupMetaDataBuilder) NumRows

func (r *RowGroupMetaDataBuilder) NumRows() int64

func (*RowGroupMetaDataBuilder) SetNumRows

func (r *RowGroupMetaDataBuilder) SetNumRows(nrows int)

type StatProvider

type StatProvider interface {
    GetMin() []byte
    GetMax() []byte
    GetNullCount() int64
    GetDistinctCount() int64
    IsSetMax() bool
    IsSetMin() bool
    IsSetNullCount() bool
    IsSetDistinctCount() bool
}

type TypedStatistics

TypedStatistics is the base interface for dealing with stats as they are being populated

type TypedStatistics interface {
    // Type is the underlying physical type for this stat block
    Type() parquet.Type
    // Returns true if there is a min and max value set for this stat object
    HasMinMax() bool
    // Returns true if a nullcount has been set
    HasNullCount() bool
    // returns true only if a distinct count has been set
    // current implementation does of the writer does not automatically populate
    // the distinct count right now.
    HasDistinctCount() bool
    NullCount() int64
    DistinctCount() int64
    NumValues() int64
    // return the column descriptor that this stat object was initialized with
    Descr() *schema.Column

    // Encode the current min value and return the bytes. ByteArray does not
    // include the len in the encoded bytes, otherwise this is identical to
    // plain encoding
    EncodeMin() []byte
    // Encode the current max value and return the bytes. ByteArray does not
    // include the len in the encoded bytes, otherwise this is identical to
    // plain encoding
    EncodeMax() []byte
    // Populate an EncodedStatistics object from the current stats
    Encode() (EncodedStatistics, error)
    // Resets all values to 0 to enable reusing this stat object for multiple
    // columns, by calling Encode to get the finished values and then calling
    // reset
    Reset()
    // Merge the min/max/nullcounts and distinct count from the passed stat object
    // into this one.
    Merge(TypedStatistics)

    // UpdateFromArrow updates the statistics from an Arrow Array,
    // only updating the null and num value counts if updateCounts
    // is true.
    UpdateFromArrow(values arrow.Array, updateCounts bool) error
    // IncNulls increments the number of nulls in the statistics
    // and marks HasNullCount as true
    IncNulls(int64)
    // IncDistinct increments the number of distinct values in
    // the statistics and marks HasDistinctCount as true
    IncDistinct(int64)
    // IncNumValues increments the total number of values in
    // the statistics
    IncNumValues(int64)
}

func NewStatistics

func NewStatistics(descr *schema.Column, mem memory.Allocator) TypedStatistics

NewStatistics uses the type in the column descriptor to construct the appropriate typed stats object. If mem is nil, then memory.DefaultAllocator will be used.

func NewStatisticsFromEncoded

func NewStatisticsFromEncoded(descr *schema.Column, mem memory.Allocator, nvalues int64, encoded StatProvider) TypedStatistics

NewStatisticsFromEncoded uses the provided information to initialize a typed stat object by checking the type of the provided column descriptor.

If mem is nil, then memory.DefaultAllocator is used.