...

Source file src/cloud.google.com/go/bigquery/file.go

Documentation: cloud.google.com/go/bigquery

     1  // Copyright 2016 Google LLC
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package bigquery
    16  
    17  import (
    18  	"io"
    19  
    20  	bq "google.golang.org/api/bigquery/v2"
    21  )
    22  
    23  // A ReaderSource is a source for a load operation that gets
    24  // data from an io.Reader.
    25  //
    26  // When a ReaderSource is part of a LoadConfig obtained via Job.Config,
    27  // its internal io.Reader will be nil, so it cannot be used for a
    28  // subsequent load operation.
    29  type ReaderSource struct {
    30  	r io.Reader
    31  	FileConfig
    32  }
    33  
    34  // NewReaderSource creates a ReaderSource from an io.Reader. You may
    35  // optionally configure properties on the ReaderSource that describe the
    36  // data being read, before passing it to Table.LoaderFrom.
    37  func NewReaderSource(r io.Reader) *ReaderSource {
    38  	return &ReaderSource{r: r}
    39  }
    40  
    41  func (r *ReaderSource) populateLoadConfig(lc *bq.JobConfigurationLoad) io.Reader {
    42  	r.FileConfig.populateLoadConfig(lc)
    43  	return r.r
    44  }
    45  
    46  // FileConfig contains configuration options that pertain to files, typically
    47  // text files that require interpretation to be used as a BigQuery table. A
    48  // file may live in Google Cloud Storage (see GCSReference), or it may be
    49  // loaded into a table via the Table.LoaderFromReader.
    50  type FileConfig struct {
    51  	// SourceFormat is the format of the data to be read.
    52  	// Allowed values are: Avro, CSV, DatastoreBackup, JSON, ORC, and Parquet.  The default is CSV.
    53  	SourceFormat DataFormat
    54  
    55  	// Indicates if we should automatically infer the options and
    56  	// schema for CSV and JSON sources.
    57  	AutoDetect bool
    58  
    59  	// MaxBadRecords is the maximum number of bad records that will be ignored
    60  	// when reading data.
    61  	MaxBadRecords int64
    62  
    63  	// IgnoreUnknownValues causes values not matching the schema to be
    64  	// tolerated. Unknown values are ignored. For CSV this ignores extra values
    65  	// at the end of a line. For JSON this ignores named values that do not
    66  	// match any column name. If this field is not set, records containing
    67  	// unknown values are treated as bad records. The MaxBadRecords field can
    68  	// be used to customize how bad records are handled.
    69  	IgnoreUnknownValues bool
    70  
    71  	// Schema describes the data. It is required when reading CSV or JSON data,
    72  	// unless the data is being loaded into a table that already exists.
    73  	Schema Schema
    74  
    75  	// Additional options for CSV files.
    76  	CSVOptions
    77  
    78  	// Additional options for Parquet files.
    79  	ParquetOptions *ParquetOptions
    80  
    81  	// Additional options for Avro files.
    82  	AvroOptions *AvroOptions
    83  }
    84  
    85  func (fc *FileConfig) populateLoadConfig(conf *bq.JobConfigurationLoad) {
    86  	conf.SkipLeadingRows = fc.SkipLeadingRows
    87  	conf.SourceFormat = string(fc.SourceFormat)
    88  	conf.Autodetect = fc.AutoDetect
    89  	conf.AllowJaggedRows = fc.AllowJaggedRows
    90  	conf.AllowQuotedNewlines = fc.AllowQuotedNewlines
    91  	conf.Encoding = string(fc.Encoding)
    92  	conf.FieldDelimiter = fc.FieldDelimiter
    93  	conf.IgnoreUnknownValues = fc.IgnoreUnknownValues
    94  	conf.MaxBadRecords = fc.MaxBadRecords
    95  	conf.NullMarker = fc.NullMarker
    96  	conf.PreserveAsciiControlCharacters = fc.PreserveASCIIControlCharacters
    97  	if fc.Schema != nil {
    98  		conf.Schema = fc.Schema.toBQ()
    99  	}
   100  	if fc.ParquetOptions != nil {
   101  		conf.ParquetOptions = &bq.ParquetOptions{
   102  			EnumAsString:        fc.ParquetOptions.EnumAsString,
   103  			EnableListInference: fc.ParquetOptions.EnableListInference,
   104  		}
   105  	}
   106  	if fc.AvroOptions != nil {
   107  		conf.UseAvroLogicalTypes = fc.AvroOptions.UseAvroLogicalTypes
   108  	}
   109  	conf.Quote = fc.quote()
   110  }
   111  
   112  func bqPopulateFileConfig(conf *bq.JobConfigurationLoad, fc *FileConfig) {
   113  	fc.SourceFormat = DataFormat(conf.SourceFormat)
   114  	fc.AutoDetect = conf.Autodetect
   115  	fc.MaxBadRecords = conf.MaxBadRecords
   116  	fc.IgnoreUnknownValues = conf.IgnoreUnknownValues
   117  	fc.Schema = bqToSchema(conf.Schema)
   118  	fc.SkipLeadingRows = conf.SkipLeadingRows
   119  	fc.AllowJaggedRows = conf.AllowJaggedRows
   120  	fc.AllowQuotedNewlines = conf.AllowQuotedNewlines
   121  	fc.Encoding = Encoding(conf.Encoding)
   122  	fc.FieldDelimiter = conf.FieldDelimiter
   123  	fc.CSVOptions.NullMarker = conf.NullMarker
   124  	fc.CSVOptions.PreserveASCIIControlCharacters = conf.PreserveAsciiControlCharacters
   125  	fc.CSVOptions.setQuote(conf.Quote)
   126  }
   127  
   128  func (fc *FileConfig) populateExternalDataConfig(conf *bq.ExternalDataConfiguration) {
   129  	format := fc.SourceFormat
   130  	if format == "" {
   131  		// Format must be explicitly set for external data sources.
   132  		format = CSV
   133  	}
   134  	conf.Autodetect = fc.AutoDetect
   135  	conf.IgnoreUnknownValues = fc.IgnoreUnknownValues
   136  	conf.MaxBadRecords = fc.MaxBadRecords
   137  	conf.SourceFormat = string(format)
   138  	if fc.Schema != nil {
   139  		conf.Schema = fc.Schema.toBQ()
   140  	}
   141  	if format == CSV {
   142  		fc.CSVOptions.populateExternalDataConfig(conf)
   143  	}
   144  	if fc.AvroOptions != nil {
   145  		conf.AvroOptions = &bq.AvroOptions{
   146  			UseAvroLogicalTypes: fc.AvroOptions.UseAvroLogicalTypes,
   147  		}
   148  	}
   149  	if fc.ParquetOptions != nil {
   150  		conf.ParquetOptions = &bq.ParquetOptions{
   151  			EnumAsString:        fc.ParquetOptions.EnumAsString,
   152  			EnableListInference: fc.ParquetOptions.EnableListInference,
   153  		}
   154  	}
   155  }
   156  
   157  // Encoding specifies the character encoding of data to be loaded into BigQuery.
   158  // See https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.load.encoding
   159  // for more details about how this is used.
   160  type Encoding string
   161  
   162  const (
   163  	// UTF_8 specifies the UTF-8 encoding type.
   164  	UTF_8 Encoding = "UTF-8"
   165  	// ISO_8859_1 specifies the ISO-8859-1 encoding type.
   166  	ISO_8859_1 Encoding = "ISO-8859-1"
   167  )
   168  

View as plain text