...

Source file src/github.com/apache/arrow/go/v15/parquet/internal/testutils/random_arrow.go

Documentation: github.com/apache/arrow/go/v15/parquet/internal/testutils

     1  // Licensed to the Apache Software Foundation (ASF) under one
     2  // or more contributor license agreements.  See the NOTICE file
     3  // distributed with this work for additional information
     4  // regarding copyright ownership.  The ASF licenses this file
     5  // to you under the Apache License, Version 2.0 (the
     6  // "License"); you may not use this file except in compliance
     7  // with the License.  You may obtain a copy of the License at
     8  //
     9  // http://www.apache.org/licenses/LICENSE-2.0
    10  //
    11  // Unless required by applicable law or agreed to in writing, software
    12  // distributed under the License is distributed on an "AS IS" BASIS,
    13  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14  // See the License for the specific language governing permissions and
    15  // limitations under the License.
    16  
    17  package testutils
    18  
    19  import (
    20  	"github.com/apache/arrow/go/v15/arrow"
    21  	"github.com/apache/arrow/go/v15/arrow/array"
    22  	"github.com/apache/arrow/go/v15/arrow/float16"
    23  	"github.com/apache/arrow/go/v15/arrow/memory"
    24  	"golang.org/x/exp/rand"
    25  )
    26  
    27  // RandomNonNull generates a random arrow array of the requested type with length size with no nulls.
    28  // Accepts float32, float64, all integer primitives, Date32, date64, string, binary, fixed_size_binary, bool and decimal.
    29  //
    30  // Always uses 0 as the seed with the following min/max restrictions:
    31  // int16, uint16, int8, and uint8 will be min 0, max 64
    32  // Date32 and Date64 will be between 0 and 24 * 86400000 in increments of 86400000
    33  // String will all have the value "test-string"
    34  // binary will have each value between length 2 and 12 but random bytes that are not limited to ascii
    35  // fixed size binary will all be of length 10, random bytes are not limited to ascii
    36  // bool will be approximately half false and half true randomly.
    37  func RandomNonNull(mem memory.Allocator, dt arrow.DataType, size int) arrow.Array {
    38  	switch dt.ID() {
    39  	case arrow.FLOAT32:
    40  		bldr := array.NewFloat32Builder(mem)
    41  		defer bldr.Release()
    42  		values := make([]float32, size)
    43  		FillRandomFloat32(0, values)
    44  		bldr.AppendValues(values, nil)
    45  		return bldr.NewArray()
    46  	case arrow.FLOAT64:
    47  		bldr := array.NewFloat64Builder(mem)
    48  		defer bldr.Release()
    49  		values := make([]float64, size)
    50  		FillRandomFloat64(0, values)
    51  		bldr.AppendValues(values, nil)
    52  		return bldr.NewArray()
    53  	case arrow.FLOAT16:
    54  		bldr := array.NewFloat16Builder(mem)
    55  		defer bldr.Release()
    56  		values := make([]float16.Num, size)
    57  		FillRandomFloat16(0, values)
    58  		bldr.AppendValues(values, nil)
    59  		return bldr.NewArray()
    60  	case arrow.INT64:
    61  		bldr := array.NewInt64Builder(mem)
    62  		defer bldr.Release()
    63  		values := make([]int64, size)
    64  		FillRandomInt64(0, values)
    65  		bldr.AppendValues(values, nil)
    66  		return bldr.NewArray()
    67  	case arrow.UINT64:
    68  		bldr := array.NewUint64Builder(mem)
    69  		defer bldr.Release()
    70  		values := make([]uint64, size)
    71  		FillRandomUint64(0, values)
    72  		bldr.AppendValues(values, nil)
    73  		return bldr.NewArray()
    74  	case arrow.INT32:
    75  		bldr := array.NewInt32Builder(mem)
    76  		defer bldr.Release()
    77  		values := make([]int32, size)
    78  		FillRandomInt32(0, values)
    79  		bldr.AppendValues(values, nil)
    80  		return bldr.NewArray()
    81  	case arrow.UINT32:
    82  		bldr := array.NewUint32Builder(mem)
    83  		defer bldr.Release()
    84  		values := make([]uint32, size)
    85  		FillRandomUint32(0, values)
    86  		bldr.AppendValues(values, nil)
    87  		return bldr.NewArray()
    88  	case arrow.INT16:
    89  		bldr := array.NewInt16Builder(mem)
    90  		defer bldr.Release()
    91  		values := make([]int16, size)
    92  		FillRandomInt16(0, 0, 64, values)
    93  		bldr.AppendValues(values, nil)
    94  		return bldr.NewArray()
    95  	case arrow.UINT16:
    96  		bldr := array.NewUint16Builder(mem)
    97  		defer bldr.Release()
    98  		values := make([]uint16, size)
    99  		FillRandomUint16(0, 0, 64, values)
   100  		bldr.AppendValues(values, nil)
   101  		return bldr.NewArray()
   102  	case arrow.INT8:
   103  		bldr := array.NewInt8Builder(mem)
   104  		defer bldr.Release()
   105  		values := make([]int8, size)
   106  		FillRandomInt8(0, 0, 64, values)
   107  		bldr.AppendValues(values, nil)
   108  		return bldr.NewArray()
   109  	case arrow.UINT8:
   110  		bldr := array.NewUint8Builder(mem)
   111  		defer bldr.Release()
   112  		values := make([]uint8, size)
   113  		FillRandomUint8(0, 0, 64, values)
   114  		bldr.AppendValues(values, nil)
   115  		return bldr.NewArray()
   116  	case arrow.DATE32:
   117  		bldr := array.NewDate32Builder(mem)
   118  		defer bldr.Release()
   119  		values := make([]int32, size)
   120  		FillRandomInt32Max(0, 24, values)
   121  
   122  		dates := make([]arrow.Date32, size)
   123  		for idx, val := range values {
   124  			dates[idx] = arrow.Date32(val) * 86400000
   125  		}
   126  		bldr.AppendValues(dates, nil)
   127  		return bldr.NewArray()
   128  	case arrow.DATE64:
   129  		bldr := array.NewDate64Builder(mem)
   130  		defer bldr.Release()
   131  		values := make([]int64, size)
   132  		FillRandomInt64Max(0, 24, values)
   133  
   134  		dates := make([]arrow.Date64, size)
   135  		for idx, val := range values {
   136  			dates[idx] = arrow.Date64(val) * 86400000
   137  		}
   138  		bldr.AppendValues(dates, nil)
   139  		return bldr.NewArray()
   140  	case arrow.STRING:
   141  		bldr := array.NewStringBuilder(mem)
   142  		defer bldr.Release()
   143  		for i := 0; i < size; i++ {
   144  			bldr.Append("test-string")
   145  		}
   146  		return bldr.NewArray()
   147  	case arrow.LARGE_STRING:
   148  		bldr := array.NewLargeStringBuilder(mem)
   149  		defer bldr.Release()
   150  		for i := 0; i < size; i++ {
   151  			bldr.Append("test-large-string")
   152  		}
   153  		return bldr.NewArray()
   154  	case arrow.BINARY, arrow.LARGE_BINARY:
   155  		bldr := array.NewBinaryBuilder(mem, dt.(arrow.BinaryDataType))
   156  		defer bldr.Release()
   157  
   158  		buf := make([]byte, 12)
   159  		r := rand.New(rand.NewSource(0))
   160  		for i := 0; i < size; i++ {
   161  			length := r.Intn(12-2+1) + 2
   162  			r.Read(buf[:length])
   163  			bldr.Append(buf[:length])
   164  		}
   165  		return bldr.NewArray()
   166  	case arrow.FIXED_SIZE_BINARY:
   167  		bldr := array.NewFixedSizeBinaryBuilder(mem, &arrow.FixedSizeBinaryType{ByteWidth: 10})
   168  		defer bldr.Release()
   169  
   170  		buf := make([]byte, 10)
   171  		r := rand.New(rand.NewSource(0))
   172  		for i := 0; i < size; i++ {
   173  			r.Read(buf)
   174  			bldr.Append(buf)
   175  		}
   176  		return bldr.NewArray()
   177  	case arrow.DECIMAL:
   178  		dectype := dt.(*arrow.Decimal128Type)
   179  		bldr := array.NewDecimal128Builder(mem, dectype)
   180  		defer bldr.Release()
   181  
   182  		data := RandomDecimals(int64(size), 0, dectype.Precision)
   183  		bldr.AppendValues(arrow.Decimal128Traits.CastFromBytes(data), nil)
   184  		return bldr.NewArray()
   185  	case arrow.BOOL:
   186  		bldr := array.NewBooleanBuilder(mem)
   187  		defer bldr.Release()
   188  
   189  		values := make([]bool, size)
   190  		FillRandomBooleans(0.5, 0, values)
   191  		bldr.AppendValues(values, nil)
   192  		return bldr.NewArray()
   193  	}
   194  	return nil
   195  }
   196  
   197  // RandomNullable generates a random arrow array of length size with approximately numNulls,
   198  // at most there can be size/2 nulls. Other than there being nulls, the values follow the same rules
   199  // as described in the docs for RandomNonNull.
   200  func RandomNullable(dt arrow.DataType, size int, numNulls int) arrow.Array {
   201  	switch dt.ID() {
   202  	case arrow.FLOAT32:
   203  		bldr := array.NewFloat32Builder(memory.DefaultAllocator)
   204  		defer bldr.Release()
   205  		values := make([]float32, size)
   206  		FillRandomFloat32(0, values)
   207  
   208  		valid := make([]bool, size)
   209  		for idx := range valid {
   210  			valid[idx] = true
   211  		}
   212  		for i := 0; i < numNulls; i++ {
   213  			valid[i*2] = false
   214  		}
   215  		bldr.AppendValues(values, valid)
   216  		return bldr.NewArray()
   217  	case arrow.FLOAT64:
   218  		bldr := array.NewFloat64Builder(memory.DefaultAllocator)
   219  		defer bldr.Release()
   220  		values := make([]float64, size)
   221  		FillRandomFloat64(0, values)
   222  
   223  		valid := make([]bool, size)
   224  		for idx := range valid {
   225  			valid[idx] = true
   226  		}
   227  		for i := 0; i < numNulls; i++ {
   228  			valid[i*2] = false
   229  		}
   230  		bldr.AppendValues(values, valid)
   231  		return bldr.NewArray()
   232  	case arrow.FLOAT16:
   233  		bldr := array.NewFloat16Builder(memory.DefaultAllocator)
   234  		defer bldr.Release()
   235  		values := make([]float16.Num, size)
   236  		FillRandomFloat16(0, values)
   237  
   238  		valid := make([]bool, size)
   239  		for idx := range valid {
   240  			valid[idx] = true
   241  		}
   242  		for i := 0; i < numNulls; i++ {
   243  			valid[i*2] = false
   244  		}
   245  		bldr.AppendValues(values, valid)
   246  		return bldr.NewArray()
   247  	case arrow.INT8:
   248  		bldr := array.NewInt8Builder(memory.DefaultAllocator)
   249  		defer bldr.Release()
   250  		values := make([]int8, size)
   251  		FillRandomInt8(0, 0, 64, values)
   252  		valid := make([]bool, size)
   253  		for idx := range valid {
   254  			valid[idx] = true
   255  		}
   256  		for i := 0; i < numNulls; i++ {
   257  			valid[i*2] = false
   258  		}
   259  
   260  		bldr.AppendValues(values, valid)
   261  		return bldr.NewArray()
   262  	case arrow.UINT8:
   263  		bldr := array.NewUint8Builder(memory.DefaultAllocator)
   264  		defer bldr.Release()
   265  		values := make([]uint8, size)
   266  		FillRandomUint8(0, 0, 64, values)
   267  		valid := make([]bool, size)
   268  		for idx := range valid {
   269  			valid[idx] = true
   270  		}
   271  		for i := 0; i < numNulls; i++ {
   272  			valid[i*2] = false
   273  		}
   274  
   275  		bldr.AppendValues(values, valid)
   276  		return bldr.NewArray()
   277  	case arrow.INT16:
   278  		bldr := array.NewInt16Builder(memory.DefaultAllocator)
   279  		defer bldr.Release()
   280  		values := make([]int16, size)
   281  		FillRandomInt16(0, 0, 64, values)
   282  		valid := make([]bool, size)
   283  		for idx := range valid {
   284  			valid[idx] = true
   285  		}
   286  		for i := 0; i < numNulls; i++ {
   287  			valid[i*2] = false
   288  		}
   289  
   290  		bldr.AppendValues(values, valid)
   291  		return bldr.NewArray()
   292  	case arrow.UINT16:
   293  		bldr := array.NewUint16Builder(memory.DefaultAllocator)
   294  		defer bldr.Release()
   295  		values := make([]uint16, size)
   296  		FillRandomUint16(0, 0, 64, values)
   297  		valid := make([]bool, size)
   298  		for idx := range valid {
   299  			valid[idx] = true
   300  		}
   301  		for i := 0; i < numNulls; i++ {
   302  			valid[i*2] = false
   303  		}
   304  
   305  		bldr.AppendValues(values, valid)
   306  		return bldr.NewArray()
   307  	case arrow.INT32:
   308  		bldr := array.NewInt32Builder(memory.DefaultAllocator)
   309  		defer bldr.Release()
   310  		values := make([]int32, size)
   311  		FillRandomInt32Max(0, 64, values)
   312  		valid := make([]bool, size)
   313  		for idx := range valid {
   314  			valid[idx] = true
   315  		}
   316  		for i := 0; i < numNulls; i++ {
   317  			valid[i*2] = false
   318  		}
   319  
   320  		bldr.AppendValues(values, valid)
   321  		return bldr.NewArray()
   322  	case arrow.UINT32:
   323  		bldr := array.NewUint32Builder(memory.DefaultAllocator)
   324  		defer bldr.Release()
   325  		values := make([]uint32, size)
   326  		FillRandomUint32Max(0, 64, values)
   327  		valid := make([]bool, size)
   328  		for idx := range valid {
   329  			valid[idx] = true
   330  		}
   331  		for i := 0; i < numNulls; i++ {
   332  			valid[i*2] = false
   333  		}
   334  
   335  		bldr.AppendValues(values, valid)
   336  		return bldr.NewArray()
   337  
   338  	case arrow.INT64:
   339  		bldr := array.NewInt64Builder(memory.DefaultAllocator)
   340  		defer bldr.Release()
   341  		values := make([]int64, size)
   342  		FillRandomInt64Max(0, 64, values)
   343  		valid := make([]bool, size)
   344  		for idx := range valid {
   345  			valid[idx] = true
   346  		}
   347  		for i := 0; i < numNulls; i++ {
   348  			valid[i*2] = false
   349  		}
   350  
   351  		bldr.AppendValues(values, valid)
   352  		return bldr.NewArray()
   353  	case arrow.UINT64:
   354  		bldr := array.NewUint64Builder(memory.DefaultAllocator)
   355  		defer bldr.Release()
   356  		values := make([]uint64, size)
   357  		FillRandomUint64Max(0, 64, values)
   358  		valid := make([]bool, size)
   359  		for idx := range valid {
   360  			valid[idx] = true
   361  		}
   362  		for i := 0; i < numNulls; i++ {
   363  			valid[i*2] = false
   364  		}
   365  
   366  		bldr.AppendValues(values, valid)
   367  		return bldr.NewArray()
   368  	case arrow.DATE32:
   369  		bldr := array.NewDate32Builder(memory.DefaultAllocator)
   370  		defer bldr.Release()
   371  		values := make([]int32, size)
   372  		FillRandomInt32Max(0, 24, values)
   373  
   374  		dates := make([]arrow.Date32, size)
   375  		for idx, val := range values {
   376  			dates[idx] = arrow.Date32(val) * 86400000
   377  		}
   378  		valid := make([]bool, size)
   379  		for idx := range valid {
   380  			valid[idx] = true
   381  		}
   382  		for i := 0; i < numNulls; i++ {
   383  			valid[i*2] = false
   384  		}
   385  		bldr.AppendValues(dates, valid)
   386  		return bldr.NewArray()
   387  	case arrow.DATE64:
   388  		bldr := array.NewDate64Builder(memory.DefaultAllocator)
   389  		defer bldr.Release()
   390  		values := make([]int64, size)
   391  		FillRandomInt64Max(0, 24, values)
   392  
   393  		dates := make([]arrow.Date64, size)
   394  		for idx, val := range values {
   395  			dates[idx] = arrow.Date64(val) * 86400000
   396  		}
   397  		valid := make([]bool, size)
   398  		for idx := range valid {
   399  			valid[idx] = true
   400  		}
   401  		for i := 0; i < numNulls; i++ {
   402  			valid[i*2] = false
   403  		}
   404  		bldr.AppendValues(dates, valid)
   405  		return bldr.NewArray()
   406  	case arrow.BINARY:
   407  		bldr := array.NewBinaryBuilder(memory.DefaultAllocator, arrow.BinaryTypes.Binary)
   408  		defer bldr.Release()
   409  
   410  		valid := make([]bool, size)
   411  		for idx := range valid {
   412  			valid[idx] = true
   413  		}
   414  		for i := 0; i < numNulls; i++ {
   415  			valid[i*2] = false
   416  		}
   417  
   418  		buf := make([]byte, 12)
   419  		r := rand.New(rand.NewSource(0))
   420  		for i := 0; i < size; i++ {
   421  			if !valid[i] {
   422  				bldr.AppendNull()
   423  				continue
   424  			}
   425  
   426  			length := r.Intn(12-2+1) + 2
   427  			r.Read(buf[:length])
   428  			bldr.Append(buf[:length])
   429  		}
   430  		return bldr.NewArray()
   431  	case arrow.STRING:
   432  		bldr := array.NewStringBuilder(memory.DefaultAllocator)
   433  		defer bldr.Release()
   434  
   435  		valid := make([]bool, size)
   436  		for idx := range valid {
   437  			valid[idx] = true
   438  		}
   439  		for i := 0; i < numNulls; i++ {
   440  			valid[i*2] = false
   441  		}
   442  
   443  		buf := make([]byte, 12)
   444  		r := rand.New(rand.NewSource(0))
   445  		for i := 0; i < size; i++ {
   446  			if !valid[i] {
   447  				bldr.AppendNull()
   448  				continue
   449  			}
   450  
   451  			length := r.Intn(12-2+1) + 2
   452  			r.Read(buf[:length])
   453  			// trivially force data to be valid UTF8 by making it all ASCII
   454  			for idx := range buf[:length] {
   455  				buf[idx] &= 0x7f
   456  			}
   457  			bldr.Append(string(buf[:length]))
   458  		}
   459  		return bldr.NewArray()
   460  	case arrow.FIXED_SIZE_BINARY:
   461  		bldr := array.NewFixedSizeBinaryBuilder(memory.DefaultAllocator, &arrow.FixedSizeBinaryType{ByteWidth: 10})
   462  		defer bldr.Release()
   463  
   464  		valid := make([]bool, size)
   465  		for idx := range valid {
   466  			valid[idx] = true
   467  		}
   468  		for i := 0; i < numNulls; i++ {
   469  			valid[i*2] = false
   470  		}
   471  
   472  		buf := make([]byte, 10)
   473  		r := rand.New(rand.NewSource(0))
   474  		for i := 0; i < size; i++ {
   475  			if !valid[i] {
   476  				bldr.AppendNull()
   477  				continue
   478  			}
   479  
   480  			r.Read(buf)
   481  			bldr.Append(buf)
   482  		}
   483  		return bldr.NewArray()
   484  	case arrow.DECIMAL:
   485  		dectype := dt.(*arrow.Decimal128Type)
   486  		bldr := array.NewDecimal128Builder(memory.DefaultAllocator, dectype)
   487  		defer bldr.Release()
   488  
   489  		valid := make([]bool, size)
   490  		for idx := range valid {
   491  			valid[idx] = true
   492  		}
   493  		for i := 0; i < numNulls; i++ {
   494  			valid[i*2] = false
   495  		}
   496  
   497  		data := RandomDecimals(int64(size), 0, dectype.Precision)
   498  		bldr.AppendValues(arrow.Decimal128Traits.CastFromBytes(data), valid)
   499  		return bldr.NewArray()
   500  	case arrow.BOOL:
   501  		bldr := array.NewBooleanBuilder(memory.DefaultAllocator)
   502  		defer bldr.Release()
   503  
   504  		valid := make([]bool, size)
   505  		for idx := range valid {
   506  			valid[idx] = true
   507  		}
   508  		for i := 0; i < numNulls; i++ {
   509  			valid[i*2] = false
   510  		}
   511  
   512  		values := make([]bool, size)
   513  		FillRandomBooleans(0.5, 0, values)
   514  		bldr.AppendValues(values, valid)
   515  		return bldr.NewArray()
   516  	}
   517  	return nil
   518  }
   519  

View as plain text