...

Source file src/github.com/apache/arrow/go/v15/parquet/pqarrow/file_writer_test.go

Documentation: github.com/apache/arrow/go/v15/parquet/pqarrow

     1  // Licensed to the Apache Software Foundation (ASF) under one
     2  // or more contributor license agreements.  See the NOTICE file
     3  // distributed with this work for additional information
     4  // regarding copyright ownership.  The ASF licenses this file
     5  // to you under the Apache License, Version 2.0 (the
     6  // "License"); you may not use this file except in compliance
     7  // with the License.  You may obtain a copy of the License at
     8  //
     9  // http://www.apache.org/licenses/LICENSE-2.0
    10  //
    11  // Unless required by applicable law or agreed to in writing, software
    12  // distributed under the License is distributed on an "AS IS" BASIS,
    13  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14  // See the License for the specific language governing permissions and
    15  // limitations under the License.
    16  
    17  package pqarrow_test
    18  
    19  import (
    20  	"bytes"
    21  	"strings"
    22  	"testing"
    23  
    24  	"github.com/apache/arrow/go/v15/arrow"
    25  	"github.com/apache/arrow/go/v15/arrow/array"
    26  	"github.com/apache/arrow/go/v15/arrow/memory"
    27  	"github.com/apache/arrow/go/v15/parquet"
    28  	"github.com/apache/arrow/go/v15/parquet/pqarrow"
    29  	"github.com/stretchr/testify/assert"
    30  	"github.com/stretchr/testify/require"
    31  )
    32  
    33  func TestFileWriterRowGroupNumRows(t *testing.T) {
    34  	schema := arrow.NewSchema([]arrow.Field{
    35  		{Name: "one", Nullable: true, Type: arrow.PrimitiveTypes.Float64},
    36  		{Name: "two", Nullable: true, Type: arrow.PrimitiveTypes.Float64},
    37  	}, nil)
    38  
    39  	data := `[
    40  		{"one": 1, "two": 2},
    41  		{"one": 1, "two": null},
    42  		{"one": null, "two": 2},
    43  		{"one": null, "two": null}
    44  	]`
    45  	record, _, err := array.RecordFromJSON(memory.DefaultAllocator, schema, strings.NewReader(data))
    46  	require.NoError(t, err)
    47  
    48  	output := &bytes.Buffer{}
    49  	writerProps := parquet.NewWriterProperties(parquet.WithMaxRowGroupLength(100))
    50  	writer, err := pqarrow.NewFileWriter(schema, output, writerProps, pqarrow.DefaultWriterProps())
    51  	require.NoError(t, err)
    52  
    53  	require.NoError(t, writer.Write(record))
    54  	numRows, err := writer.RowGroupNumRows()
    55  	require.NoError(t, err)
    56  	assert.Equal(t, 4, numRows)
    57  	require.NoError(t, writer.Close())
    58  }
    59  
    60  func TestFileWriterNumRows(t *testing.T) {
    61  	schema := arrow.NewSchema([]arrow.Field{
    62  		{Name: "one", Nullable: true, Type: arrow.PrimitiveTypes.Float64},
    63  		{Name: "two", Nullable: true, Type: arrow.PrimitiveTypes.Float64},
    64  	}, nil)
    65  
    66  	data := `[
    67  		{"one": 1, "two": 2},
    68  		{"one": 1, "two": null},
    69  		{"one": null, "two": 2},
    70  		{"one": null, "two": null}
    71  	]`
    72  	record, _, err := array.RecordFromJSON(memory.DefaultAllocator, schema, strings.NewReader(data))
    73  	require.NoError(t, err)
    74  
    75  	maxRowGroupLength := 2
    76  
    77  	output := &bytes.Buffer{}
    78  	writerProps := parquet.NewWriterProperties(parquet.WithMaxRowGroupLength(int64(maxRowGroupLength)))
    79  	writer, err := pqarrow.NewFileWriter(schema, output, writerProps, pqarrow.DefaultWriterProps())
    80  	require.NoError(t, err)
    81  
    82  	require.NoError(t, writer.Write(record))
    83  	rowGroupNumRows, err := writer.RowGroupNumRows()
    84  	require.NoError(t, err)
    85  	assert.Equal(t, maxRowGroupLength, rowGroupNumRows)
    86  
    87  	require.NoError(t, writer.Close())
    88  	assert.Equal(t, 4, writer.NumRows())
    89  }
    90  

View as plain text