...

Source file src/cloud.google.com/go/bigquery/schema.go

Documentation: cloud.google.com/go/bigquery

     1  // Copyright 2015 Google LLC
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package bigquery
    16  
    17  import (
    18  	"encoding/json"
    19  	"errors"
    20  	"fmt"
    21  	"reflect"
    22  	"sync"
    23  
    24  	bq "google.golang.org/api/bigquery/v2"
    25  )
    26  
    27  // Schema describes the fields in a table or query result.
    28  type Schema []*FieldSchema
    29  
    30  // Relax returns a version of the schema where no fields are marked
    31  // as Required.
    32  func (s Schema) Relax() Schema {
    33  	var out Schema
    34  	for _, v := range s {
    35  		relaxed := &FieldSchema{
    36  			Name:        v.Name,
    37  			Description: v.Description,
    38  			Repeated:    v.Repeated,
    39  			Required:    false,
    40  			Type:        v.Type,
    41  			Schema:      v.Schema.Relax(),
    42  		}
    43  		out = append(out, relaxed)
    44  	}
    45  	return out
    46  }
    47  
    48  // ToJSONFields exposes the schema as a JSON array of
    49  // TableFieldSchema objects: https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#TableFieldSchema
    50  //
    51  // Generally this isn't needed for direct usage of this library, but is
    52  // provided for use cases where you're interacting with other tools
    53  // that consume the underlying API representation directly such as the
    54  // BQ CLI tool.
    55  func (s Schema) ToJSONFields() ([]byte, error) {
    56  	var rawSchema []*bq.TableFieldSchema
    57  	for _, f := range s {
    58  		rawSchema = append(rawSchema, f.toBQ())
    59  	}
    60  	// Use json.MarshalIndent to make the output more human-readable.
    61  	return json.MarshalIndent(rawSchema, "", " ")
    62  }
    63  
    64  // FieldSchema describes a single field.
    65  type FieldSchema struct {
    66  	// The field name.
    67  	// Must contain only letters (a-z, A-Z), numbers (0-9), or underscores (_),
    68  	// and must start with a letter or underscore.
    69  	// The maximum length is 128 characters.
    70  	Name string
    71  
    72  	// A description of the field. The maximum length is 16,384 characters.
    73  	Description string
    74  
    75  	// Whether the field may contain multiple values.
    76  	Repeated bool
    77  	// Whether the field is required.  Ignored if Repeated is true.
    78  	Required bool
    79  
    80  	// The field data type.  If Type is Record, then this field contains a nested schema,
    81  	// which is described by Schema.
    82  	Type FieldType
    83  
    84  	// Annotations for enforcing column-level security constraints.
    85  	PolicyTags *PolicyTagList
    86  
    87  	// Describes the nested schema if Type is set to Record.
    88  	Schema Schema
    89  
    90  	// Maximum length of the field for STRING or BYTES type.
    91  	//
    92  	// It is invalid to set value for types other than STRING or BYTES.
    93  	//
    94  	// For STRING type, this represents the maximum UTF-8 length of strings
    95  	// allowed in the field. For BYTES type, this represents the maximum
    96  	// number of bytes in the field.
    97  	MaxLength int64
    98  
    99  	// Precision can be used to constrain the maximum number of
   100  	// total digits allowed for NUMERIC or BIGNUMERIC types.
   101  	//
   102  	// It is invalid to set values for Precision for types other than
   103  	// NUMERIC or BIGNUMERIC.
   104  	//
   105  	// For NUMERIC type, acceptable values for Precision must
   106  	// be: 1 ≤ (Precision - Scale) ≤ 29. Values for Scale
   107  	// must be: 0 ≤ Scale ≤ 9.
   108  	//
   109  	// For BIGNUMERIC type, acceptable values for Precision must
   110  	// be: 1 ≤ (Precision - Scale) ≤ 38. Values for Scale
   111  	// must be: 0 ≤ Scale ≤ 38.
   112  	Precision int64
   113  
   114  	// Scale can be used to constrain the maximum number of digits
   115  	// in the fractional part of a NUMERIC or BIGNUMERIC type.
   116  	//
   117  	// If the Scale value is set, the Precision value must be set as well.
   118  	//
   119  	// It is invalid to set values for Scale for types other than
   120  	// NUMERIC or BIGNUMERIC.
   121  	//
   122  	// See the Precision field for additional guidance about valid values.
   123  	Scale int64
   124  
   125  	// DefaultValueExpression is used to specify the default value of a field
   126  	// using a SQL expression.  It can only be set for top level fields (columns).
   127  	//
   128  	// You can use struct or array expression to specify default value for the
   129  	// entire struct or array. The valid SQL expressions are:
   130  	//
   131  	// - Literals for all data types, including STRUCT and ARRAY.
   132  	// - The following functions:
   133  	//   - CURRENT_TIMESTAMP
   134  	//   - CURRENT_TIME
   135  	//   - CURRENT_DATE
   136  	//   - CURRENT_DATETIME
   137  	//   - GENERATE_UUID
   138  	//   - RAND
   139  	//   - SESSION_USER
   140  	//   - ST_GEOGPOINT
   141  	//   - Struct or array composed with the above allowed functions, for example:
   142  	//       [CURRENT_DATE(), DATE '2020-01-01']"
   143  	DefaultValueExpression string
   144  
   145  	// Collation can be set only when the type of field is STRING.
   146  	// The following values are supported:
   147  	//   - 'und:ci': undetermined locale, case insensitive.
   148  	//   - '': empty string. Default to case-sensitive behavior.
   149  	// More information: https://cloud.google.com/bigquery/docs/reference/standard-sql/collation-concepts
   150  	Collation string
   151  
   152  	// Information about the range.
   153  	// If the type is RANGE, this field is required.
   154  	RangeElementType *RangeElementType
   155  }
   156  
   157  func (fs *FieldSchema) toBQ() *bq.TableFieldSchema {
   158  	tfs := &bq.TableFieldSchema{
   159  		Description:            fs.Description,
   160  		Name:                   fs.Name,
   161  		Type:                   string(fs.Type),
   162  		PolicyTags:             fs.PolicyTags.toBQ(),
   163  		MaxLength:              fs.MaxLength,
   164  		Precision:              fs.Precision,
   165  		Scale:                  fs.Scale,
   166  		DefaultValueExpression: fs.DefaultValueExpression,
   167  		Collation:              string(fs.Collation),
   168  		RangeElementType:       fs.RangeElementType.toBQ(),
   169  	}
   170  
   171  	if fs.Repeated {
   172  		tfs.Mode = "REPEATED"
   173  	} else if fs.Required {
   174  		tfs.Mode = "REQUIRED"
   175  	} // else leave as default, which is interpreted as NULLABLE.
   176  
   177  	for _, f := range fs.Schema {
   178  		tfs.Fields = append(tfs.Fields, f.toBQ())
   179  	}
   180  
   181  	return tfs
   182  }
   183  
   184  // RangeElementType describes information about the range type.
   185  type RangeElementType struct {
   186  	// The subtype of the RANGE, if the type of this field is RANGE.
   187  	// Possible values for the field element type of a RANGE include:
   188  	// DATE, DATETIME, or TIMESTAMP.
   189  	Type FieldType
   190  }
   191  
   192  func (rt *RangeElementType) toBQ() *bq.TableFieldSchemaRangeElementType {
   193  	if rt == nil {
   194  		return nil
   195  	}
   196  	return &bq.TableFieldSchemaRangeElementType{
   197  		Type: string(rt.Type),
   198  	}
   199  }
   200  
   201  func bqToRangeElementType(rt *bq.TableFieldSchemaRangeElementType) *RangeElementType {
   202  	if rt == nil {
   203  		return nil
   204  	}
   205  	return &RangeElementType{
   206  		Type: FieldType(rt.Type),
   207  	}
   208  }
   209  
   210  // PolicyTagList represents the annotations on a schema column for enforcing column-level security.
   211  // For more information, see https://cloud.google.com/bigquery/docs/column-level-security-intro
   212  type PolicyTagList struct {
   213  	Names []string
   214  }
   215  
   216  func (ptl *PolicyTagList) toBQ() *bq.TableFieldSchemaPolicyTags {
   217  	if ptl == nil {
   218  		return nil
   219  	}
   220  	return &bq.TableFieldSchemaPolicyTags{
   221  		Names: ptl.Names,
   222  	}
   223  }
   224  
   225  func bqToPolicyTagList(pt *bq.TableFieldSchemaPolicyTags) *PolicyTagList {
   226  	if pt == nil {
   227  		return nil
   228  	}
   229  	return &PolicyTagList{
   230  		Names: pt.Names,
   231  	}
   232  }
   233  
   234  func (s Schema) toBQ() *bq.TableSchema {
   235  	var fields []*bq.TableFieldSchema
   236  	for _, f := range s {
   237  		fields = append(fields, f.toBQ())
   238  	}
   239  	return &bq.TableSchema{Fields: fields}
   240  }
   241  
   242  func bqToFieldSchema(tfs *bq.TableFieldSchema) *FieldSchema {
   243  	fs := &FieldSchema{
   244  		Description:            tfs.Description,
   245  		Name:                   tfs.Name,
   246  		Repeated:               tfs.Mode == "REPEATED",
   247  		Required:               tfs.Mode == "REQUIRED",
   248  		Type:                   FieldType(tfs.Type),
   249  		PolicyTags:             bqToPolicyTagList(tfs.PolicyTags),
   250  		MaxLength:              tfs.MaxLength,
   251  		Precision:              tfs.Precision,
   252  		Scale:                  tfs.Scale,
   253  		DefaultValueExpression: tfs.DefaultValueExpression,
   254  		Collation:              tfs.Collation,
   255  		RangeElementType:       bqToRangeElementType(tfs.RangeElementType),
   256  	}
   257  
   258  	for _, f := range tfs.Fields {
   259  		fs.Schema = append(fs.Schema, bqToFieldSchema(f))
   260  	}
   261  	return fs
   262  }
   263  
   264  func bqToSchema(ts *bq.TableSchema) Schema {
   265  	if ts == nil {
   266  		return nil
   267  	}
   268  	var s Schema
   269  	for _, f := range ts.Fields {
   270  		s = append(s, bqToFieldSchema(f))
   271  	}
   272  	return s
   273  }
   274  
   275  // FieldType is the type of field.
   276  type FieldType string
   277  
   278  const (
   279  	// StringFieldType is a string field type.
   280  	StringFieldType FieldType = "STRING"
   281  	// BytesFieldType is a bytes field type.
   282  	BytesFieldType FieldType = "BYTES"
   283  	// IntegerFieldType is a integer field type.
   284  	IntegerFieldType FieldType = "INTEGER"
   285  	// FloatFieldType is a float field type.
   286  	FloatFieldType FieldType = "FLOAT"
   287  	// BooleanFieldType is a boolean field type.
   288  	BooleanFieldType FieldType = "BOOLEAN"
   289  	// TimestampFieldType is a timestamp field type.
   290  	TimestampFieldType FieldType = "TIMESTAMP"
   291  	// RecordFieldType is a record field type. It is typically used to create columns with repeated or nested data.
   292  	RecordFieldType FieldType = "RECORD"
   293  	// DateFieldType is a date field type.
   294  	DateFieldType FieldType = "DATE"
   295  	// TimeFieldType is a time field type.
   296  	TimeFieldType FieldType = "TIME"
   297  	// DateTimeFieldType is a datetime field type.
   298  	DateTimeFieldType FieldType = "DATETIME"
   299  	// NumericFieldType is a numeric field type. Numeric types include integer types, floating point types and the
   300  	// NUMERIC data type.
   301  	NumericFieldType FieldType = "NUMERIC"
   302  	// GeographyFieldType is a string field type.  Geography types represent a set of points
   303  	// on the Earth's surface, represented in Well Known Text (WKT) format.
   304  	GeographyFieldType FieldType = "GEOGRAPHY"
   305  	// BigNumericFieldType is a numeric field type that supports values of larger precision
   306  	// and scale than the NumericFieldType.
   307  	BigNumericFieldType FieldType = "BIGNUMERIC"
   308  	// IntervalFieldType is a representation of a duration or an amount of time.
   309  	IntervalFieldType FieldType = "INTERVAL"
   310  	// JSONFieldType is a representation of a json object.
   311  	JSONFieldType FieldType = "JSON"
   312  	// RangeFieldType represents a continuous range of values.
   313  	RangeFieldType FieldType = "RANGE"
   314  )
   315  
   316  var (
   317  	errEmptyJSONSchema = errors.New("bigquery: empty JSON schema")
   318  	fieldTypes         = map[FieldType]bool{
   319  		StringFieldType:     true,
   320  		BytesFieldType:      true,
   321  		IntegerFieldType:    true,
   322  		FloatFieldType:      true,
   323  		BooleanFieldType:    true,
   324  		TimestampFieldType:  true,
   325  		RecordFieldType:     true,
   326  		DateFieldType:       true,
   327  		TimeFieldType:       true,
   328  		DateTimeFieldType:   true,
   329  		NumericFieldType:    true,
   330  		GeographyFieldType:  true,
   331  		BigNumericFieldType: true,
   332  		IntervalFieldType:   true,
   333  		JSONFieldType:       true,
   334  		RangeFieldType:      true,
   335  	}
   336  	// The API will accept alias names for the types based on the Standard SQL type names.
   337  	fieldAliases = map[FieldType]FieldType{
   338  		"BOOL":       BooleanFieldType,
   339  		"FLOAT64":    FloatFieldType,
   340  		"INT64":      IntegerFieldType,
   341  		"STRUCT":     RecordFieldType,
   342  		"DECIMAL":    NumericFieldType,
   343  		"BIGDECIMAL": BigNumericFieldType,
   344  	}
   345  )
   346  
   347  var typeOfByteSlice = reflect.TypeOf([]byte{})
   348  
   349  // InferSchema tries to derive a BigQuery schema from the supplied struct value.
   350  // Each exported struct field is mapped to a field in the schema.
   351  //
   352  // The following BigQuery types are inferred from the corresponding Go types.
   353  // (This is the same mapping as that used for RowIterator.Next.) Fields inferred
   354  // from these types are marked required (non-nullable).
   355  //
   356  //	STRING      string
   357  //	BOOL        bool
   358  //	INTEGER     int, int8, int16, int32, int64, uint8, uint16, uint32
   359  //	FLOAT       float32, float64
   360  //	BYTES       []byte
   361  //	TIMESTAMP   time.Time
   362  //	DATE        civil.Date
   363  //	TIME        civil.Time
   364  //	DATETIME    civil.DateTime
   365  //	NUMERIC     *big.Rat
   366  //	JSON        map[string]interface{}
   367  //
   368  // The big.Rat type supports numbers of arbitrary size and precision. Values
   369  // will be rounded to 9 digits after the decimal point before being transmitted
   370  // to BigQuery. See https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#numeric-type
   371  // for more on NUMERIC.
   372  //
   373  // A Go slice or array type is inferred to be a BigQuery repeated field of the
   374  // element type. The element type must be one of the above listed types.
   375  //
   376  // Due to lack of unique native Go type for GEOGRAPHY, there is no schema
   377  // inference to GEOGRAPHY at this time.
   378  //
   379  // This package also provides some value types for expressing the corresponding SQL types.
   380  //
   381  // INTERVAL		*IntervalValue
   382  // RANGE    	*RangeValue
   383  //
   384  // In the case of RANGE types, a RANGE represents a continuous set of values of a given
   385  // element type (DATE, DATETIME, or TIMESTAMP).  InferSchema does not attempt to determine
   386  // the element type, as it uses generic Value types to denote the start/end of the range.
   387  //
   388  // Nullable fields are inferred from the NullXXX types, declared in this package:
   389  //
   390  //	STRING      NullString
   391  //	BOOL        NullBool
   392  //	INTEGER     NullInt64
   393  //	FLOAT       NullFloat64
   394  //	TIMESTAMP   NullTimestamp
   395  //	DATE        NullDate
   396  //	TIME        NullTime
   397  //	DATETIME    NullDateTime
   398  //	GEOGRAPHY   NullGeography
   399  //
   400  // For a nullable BYTES field, use the type []byte and tag the field "nullable" (see below).
   401  // For a nullable NUMERIC field, use the type *big.Rat and tag the field "nullable".
   402  //
   403  // A struct field that is of struct type is inferred to be a required field of type
   404  // RECORD with a schema inferred recursively. For backwards compatibility, a field of
   405  // type pointer to struct is also inferred to be required. To get a nullable RECORD
   406  // field, use the "nullable" tag (see below).
   407  //
   408  // InferSchema returns an error if any of the examined fields is of type uint,
   409  // uint64, uintptr, map, interface, complex64, complex128, func, or chan. Future
   410  // versions may handle these cases without error.
   411  //
   412  // Recursively defined structs are also disallowed.
   413  //
   414  // Struct fields may be tagged in a way similar to the encoding/json package.
   415  // A tag of the form
   416  //
   417  //	bigquery:"name"
   418  //
   419  // uses "name" instead of the struct field name as the BigQuery field name.
   420  // A tag of the form
   421  //
   422  //	bigquery:"-"
   423  //
   424  // omits the field from the inferred schema.
   425  // The "nullable" option marks the field as nullable (not required). It is only
   426  // needed for []byte, *big.Rat and pointer-to-struct fields, and cannot appear on other
   427  // fields. In this example, the Go name of the field is retained:
   428  //
   429  //	bigquery:",nullable"
   430  func InferSchema(st interface{}) (Schema, error) {
   431  	return inferSchemaReflectCached(reflect.TypeOf(st))
   432  }
   433  
   434  var schemaCache sync.Map
   435  
   436  type cacheVal struct {
   437  	schema Schema
   438  	err    error
   439  }
   440  
   441  func inferSchemaReflectCached(t reflect.Type) (Schema, error) {
   442  	var cv cacheVal
   443  	v, ok := schemaCache.Load(t)
   444  	if ok {
   445  		cv = v.(cacheVal)
   446  	} else {
   447  		s, err := inferSchemaReflect(t)
   448  		cv = cacheVal{s, err}
   449  		schemaCache.Store(t, cv)
   450  	}
   451  	return cv.schema, cv.err
   452  }
   453  
   454  func inferSchemaReflect(t reflect.Type) (Schema, error) {
   455  	rec, err := hasRecursiveType(t, nil)
   456  	if err != nil {
   457  		return nil, err
   458  	}
   459  	if rec {
   460  		return nil, fmt.Errorf("bigquery: schema inference for recursive type %s", t)
   461  	}
   462  	return inferStruct(t)
   463  }
   464  
   465  func inferStruct(t reflect.Type) (Schema, error) {
   466  	switch t.Kind() {
   467  	case reflect.Ptr:
   468  		if t.Elem().Kind() != reflect.Struct {
   469  			return nil, noStructError{t}
   470  		}
   471  		t = t.Elem()
   472  		fallthrough
   473  
   474  	case reflect.Struct:
   475  		return inferFields(t)
   476  	default:
   477  		return nil, noStructError{t}
   478  	}
   479  }
   480  
   481  // inferFieldSchema infers the FieldSchema for a Go type
   482  func inferFieldSchema(fieldName string, rt reflect.Type, nullable, json bool) (*FieldSchema, error) {
   483  	// Only []byte and struct pointers can be tagged nullable.
   484  	if nullable && !(rt == typeOfByteSlice || rt.Kind() == reflect.Ptr && rt.Elem().Kind() == reflect.Struct) {
   485  		return nil, badNullableError{fieldName, rt}
   486  	}
   487  	// Only structs and struct pointers can be tagged as json.
   488  	if json && !(rt.Kind() == reflect.Struct || rt.Kind() == reflect.Ptr && rt.Elem().Kind() == reflect.Struct) {
   489  		return nil, badJSONError{fieldName, rt}
   490  	}
   491  	switch rt {
   492  	case typeOfByteSlice:
   493  		return &FieldSchema{Required: !nullable, Type: BytesFieldType}, nil
   494  	case typeOfGoTime:
   495  		return &FieldSchema{Required: true, Type: TimestampFieldType}, nil
   496  	case typeOfDate:
   497  		return &FieldSchema{Required: true, Type: DateFieldType}, nil
   498  	case typeOfTime:
   499  		return &FieldSchema{Required: true, Type: TimeFieldType}, nil
   500  	case typeOfDateTime:
   501  		return &FieldSchema{Required: true, Type: DateTimeFieldType}, nil
   502  	case typeOfRat:
   503  		// We automatically infer big.Rat values as NUMERIC as we cannot
   504  		// determine precision/scale from the type.  Users who want the
   505  		// larger precision of BIGNUMERIC need to manipulate the inferred
   506  		// schema.
   507  		return &FieldSchema{Required: !nullable, Type: NumericFieldType}, nil
   508  	case typeOfIntervalValue:
   509  		return &FieldSchema{Required: !nullable, Type: IntervalFieldType}, nil
   510  	case typeOfRangeValue:
   511  		// We can't fully infer the element type of a range without additional
   512  		// information, and don't set the RangeElementType when inferred.
   513  		return &FieldSchema{Required: !nullable, Type: RangeFieldType}, nil
   514  	}
   515  	if ft := nullableFieldType(rt); ft != "" {
   516  		return &FieldSchema{Required: false, Type: ft}, nil
   517  	}
   518  	if isSupportedIntType(rt) || isSupportedUintType(rt) {
   519  		return &FieldSchema{Required: true, Type: IntegerFieldType}, nil
   520  	}
   521  	switch rt.Kind() {
   522  	case reflect.Slice, reflect.Array:
   523  		et := rt.Elem()
   524  		if et != typeOfByteSlice && (et.Kind() == reflect.Slice || et.Kind() == reflect.Array) {
   525  			// Multi dimensional slices/arrays are not supported by BigQuery
   526  			return nil, unsupportedFieldTypeError{fieldName, rt}
   527  		}
   528  		if nullableFieldType(et) != "" {
   529  			// Repeated nullable types are not supported by BigQuery.
   530  			return nil, unsupportedFieldTypeError{fieldName, rt}
   531  		}
   532  		f, err := inferFieldSchema(fieldName, et, false, false)
   533  		if err != nil {
   534  			return nil, err
   535  		}
   536  		f.Repeated = true
   537  		f.Required = false
   538  		return f, nil
   539  	case reflect.Ptr:
   540  		if rt.Elem().Kind() != reflect.Struct {
   541  			return nil, unsupportedFieldTypeError{fieldName, rt}
   542  		}
   543  		fallthrough
   544  	case reflect.Struct:
   545  		if json {
   546  			return &FieldSchema{Required: !nullable, Type: JSONFieldType}, nil
   547  		}
   548  
   549  		nested, err := inferStruct(rt)
   550  		if err != nil {
   551  			return nil, err
   552  		}
   553  		return &FieldSchema{Required: !nullable, Type: RecordFieldType, Schema: nested}, nil
   554  	case reflect.String:
   555  		return &FieldSchema{Required: !nullable, Type: StringFieldType}, nil
   556  	case reflect.Bool:
   557  		return &FieldSchema{Required: !nullable, Type: BooleanFieldType}, nil
   558  	case reflect.Float32, reflect.Float64:
   559  		return &FieldSchema{Required: !nullable, Type: FloatFieldType}, nil
   560  	case reflect.Map:
   561  		if rt.Key().Kind() != reflect.String {
   562  			return nil, unsupportedFieldTypeError{fieldName, rt}
   563  		}
   564  		return &FieldSchema{Required: !nullable, Type: JSONFieldType}, nil
   565  	default:
   566  		return nil, unsupportedFieldTypeError{fieldName, rt}
   567  	}
   568  }
   569  
   570  // inferFields extracts all exported field types from struct type.
   571  func inferFields(rt reflect.Type) (Schema, error) {
   572  	var s Schema
   573  	fields, err := fieldCache.Fields(rt)
   574  	if err != nil {
   575  		return nil, err
   576  	}
   577  	for _, field := range fields {
   578  		var nullable, json bool
   579  		for _, opt := range field.ParsedTag.([]string) {
   580  			if opt == nullableTagOption {
   581  				nullable = true
   582  			}
   583  			if opt == jsonTagOption {
   584  				json = true
   585  			}
   586  		}
   587  		f, err := inferFieldSchema(field.Name, field.Type, nullable, json)
   588  		if err != nil {
   589  			return nil, err
   590  		}
   591  		f.Name = field.Name
   592  		s = append(s, f)
   593  	}
   594  	return s, nil
   595  }
   596  
   597  // isSupportedIntType reports whether t is an int type that can be properly
   598  // represented by the BigQuery INTEGER/INT64 type.
   599  func isSupportedIntType(t reflect.Type) bool {
   600  	switch t.Kind() {
   601  	case reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, reflect.Int:
   602  		return true
   603  	default:
   604  		return false
   605  	}
   606  }
   607  
   608  // isSupportedIntType reports whether t is a uint type that can be properly
   609  // represented by the BigQuery INTEGER/INT64 type.
   610  func isSupportedUintType(t reflect.Type) bool {
   611  	switch t.Kind() {
   612  	case reflect.Uint8, reflect.Uint16, reflect.Uint32:
   613  		return true
   614  	default:
   615  		return false
   616  	}
   617  }
   618  
   619  // typeList is a linked list of reflect.Types.
   620  type typeList struct {
   621  	t    reflect.Type
   622  	next *typeList
   623  }
   624  
   625  func (l *typeList) has(t reflect.Type) bool {
   626  	for l != nil {
   627  		if l.t == t {
   628  			return true
   629  		}
   630  		l = l.next
   631  	}
   632  	return false
   633  }
   634  
   635  // hasRecursiveType reports whether t or any type inside t refers to itself, directly or indirectly,
   636  // via exported fields. (Schema inference ignores unexported fields.)
   637  func hasRecursiveType(t reflect.Type, seen *typeList) (bool, error) {
   638  	for t.Kind() == reflect.Ptr || t.Kind() == reflect.Slice || t.Kind() == reflect.Array {
   639  		t = t.Elem()
   640  	}
   641  	if t.Kind() != reflect.Struct {
   642  		return false, nil
   643  	}
   644  	if seen.has(t) {
   645  		return true, nil
   646  	}
   647  	fields, err := fieldCache.Fields(t)
   648  	if err != nil {
   649  		return false, err
   650  	}
   651  	seen = &typeList{t, seen}
   652  	// Because seen is a linked list, additions to it from one field's
   653  	// recursive call will not affect the value for subsequent fields' calls.
   654  	for _, field := range fields {
   655  		ok, err := hasRecursiveType(field.Type, seen)
   656  		if err != nil {
   657  			return false, err
   658  		}
   659  		if ok {
   660  			return true, nil
   661  		}
   662  	}
   663  	return false, nil
   664  }
   665  
   666  // validateKnownType ensures a type is known (or alias of a known type).
   667  func validateKnownType(in FieldType) (FieldType, error) {
   668  	if _, ok := fieldTypes[in]; !ok {
   669  		// not a defined type, check aliases.
   670  		if resolved, ok := fieldAliases[in]; ok {
   671  			return resolved, nil
   672  		}
   673  		return "", fmt.Errorf("unknown field type (%s)", in)
   674  	}
   675  	return in, nil
   676  }
   677  
   678  // SchemaFromJSON takes a native JSON BigQuery table schema definition and converts it to
   679  // a populated Schema.  The native API definition is used by tools such as the BQ CLI and
   680  // https://github.com/GoogleCloudPlatform/protoc-gen-bq-schema.
   681  //
   682  // The expected format is a JSON array of TableFieldSchema objects from the underlying API:
   683  // https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#TableFieldSchema
   684  func SchemaFromJSON(schemaJSON []byte) (Schema, error) {
   685  
   686  	// Make sure we actually have some content:
   687  	if len(schemaJSON) == 0 {
   688  		return nil, errEmptyJSONSchema
   689  	}
   690  
   691  	var rawSchema []*bq.TableFieldSchema
   692  
   693  	if err := json.Unmarshal(schemaJSON, &rawSchema); err != nil {
   694  		return nil, err
   695  	}
   696  
   697  	convertedSchema := Schema{}
   698  	for _, f := range rawSchema {
   699  		convField := bqToFieldSchema(f)
   700  		// Normalize the types.
   701  		validType, err := validateKnownType(convField.Type)
   702  		if err != nil {
   703  			return nil, err
   704  		}
   705  		convField.Type = validType
   706  		convertedSchema = append(convertedSchema, convField)
   707  	}
   708  	return convertedSchema, nil
   709  }
   710  
   711  type noStructError struct {
   712  	typ reflect.Type
   713  }
   714  
   715  func (e noStructError) Error() string {
   716  	return fmt.Sprintf("bigquery: can only infer schema from struct or pointer to struct, not %s", e.typ)
   717  }
   718  
   719  type badNullableError struct {
   720  	name string
   721  	typ  reflect.Type
   722  }
   723  
   724  func (e badNullableError) Error() string {
   725  	return fmt.Sprintf(`bigquery: field %q of type %s: use "nullable" only for []byte and struct pointers; for all other types, use a NullXXX type`, e.name, e.typ)
   726  }
   727  
   728  type badJSONError struct {
   729  	name string
   730  	typ  reflect.Type
   731  }
   732  
   733  func (e badJSONError) Error() string {
   734  	return fmt.Sprintf(`bigquery: field %q of type %s: use "json" only for struct and struct pointers`, e.name, e.typ)
   735  }
   736  
   737  type unsupportedFieldTypeError struct {
   738  	name string
   739  	typ  reflect.Type
   740  }
   741  
   742  func (e unsupportedFieldTypeError) Error() string {
   743  	return fmt.Sprintf("bigquery: field %q: type %s is not supported", e.name, e.typ)
   744  }
   745  

View as plain text