...

Source file src/cloud.google.com/go/bigquery/extract.go

Documentation: cloud.google.com/go/bigquery

     1  // Copyright 2016 Google LLC
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package bigquery
    16  
    17  import (
    18  	"context"
    19  	"time"
    20  
    21  	"cloud.google.com/go/internal/trace"
    22  	bq "google.golang.org/api/bigquery/v2"
    23  )
    24  
    25  // ExtractConfig holds the configuration for an extract job.
    26  type ExtractConfig struct {
    27  	// Src is the table from which data will be extracted.
    28  	// Only one of Src or SrcModel should be specified.
    29  	Src *Table
    30  
    31  	// SrcModel is the ML model from which the data will be extracted.
    32  	// Only one of Src or SrcModel should be specified.
    33  	SrcModel *Model
    34  
    35  	// Dst is the destination into which the data will be extracted.
    36  	Dst *GCSReference
    37  
    38  	// DisableHeader disables the printing of a header row in exported data.
    39  	DisableHeader bool
    40  
    41  	// The labels associated with this job.
    42  	Labels map[string]string
    43  
    44  	// For Avro-based extracts, controls whether logical type annotations are generated.
    45  	//
    46  	// Example:  With this enabled, writing a BigQuery TIMESTAMP column will result in
    47  	// an integer column annotated with the appropriate timestamp-micros/millis annotation
    48  	// in the resulting Avro files.
    49  	UseAvroLogicalTypes bool
    50  
    51  	// Sets a best-effort deadline on a specific job.  If job execution exceeds this
    52  	// timeout, BigQuery may attempt to cancel this work automatically.
    53  	//
    54  	// This deadline cannot be adjusted or removed once the job is created.  Consider
    55  	// using Job.Cancel in situations where you need more dynamic behavior.
    56  	//
    57  	// Experimental: this option is experimental and may be modified or removed in future versions,
    58  	// regardless of any other documented package stability guarantees.
    59  	JobTimeout time.Duration
    60  }
    61  
    62  func (e *ExtractConfig) toBQ() *bq.JobConfiguration {
    63  	var printHeader *bool
    64  	if e.DisableHeader {
    65  		f := false
    66  		printHeader = &f
    67  	}
    68  	cfg := &bq.JobConfiguration{
    69  		Labels: e.Labels,
    70  		Extract: &bq.JobConfigurationExtract{
    71  			DestinationUris:   append([]string{}, e.Dst.URIs...),
    72  			Compression:       string(e.Dst.Compression),
    73  			DestinationFormat: string(e.Dst.DestinationFormat),
    74  			FieldDelimiter:    e.Dst.FieldDelimiter,
    75  
    76  			PrintHeader:         printHeader,
    77  			UseAvroLogicalTypes: e.UseAvroLogicalTypes,
    78  		},
    79  		JobTimeoutMs: e.JobTimeout.Milliseconds(),
    80  	}
    81  	if e.Src != nil {
    82  		cfg.Extract.SourceTable = e.Src.toBQ()
    83  	}
    84  	if e.SrcModel != nil {
    85  		cfg.Extract.SourceModel = e.SrcModel.toBQ()
    86  	}
    87  	return cfg
    88  }
    89  
    90  func bqToExtractConfig(q *bq.JobConfiguration, c *Client) *ExtractConfig {
    91  	qe := q.Extract
    92  	return &ExtractConfig{
    93  		Labels: q.Labels,
    94  		Dst: &GCSReference{
    95  			URIs:              qe.DestinationUris,
    96  			Compression:       Compression(qe.Compression),
    97  			DestinationFormat: DataFormat(qe.DestinationFormat),
    98  			FileConfig: FileConfig{
    99  				CSVOptions: CSVOptions{
   100  					FieldDelimiter: qe.FieldDelimiter,
   101  				},
   102  			},
   103  		},
   104  		DisableHeader:       qe.PrintHeader != nil && !*qe.PrintHeader,
   105  		Src:                 bqToTable(qe.SourceTable, c),
   106  		SrcModel:            bqToModel(qe.SourceModel, c),
   107  		UseAvroLogicalTypes: qe.UseAvroLogicalTypes,
   108  		JobTimeout:          time.Duration(q.JobTimeoutMs) * time.Millisecond,
   109  	}
   110  }
   111  
   112  // An Extractor extracts data from a BigQuery table into Google Cloud Storage.
   113  type Extractor struct {
   114  	JobIDConfig
   115  	ExtractConfig
   116  	c *Client
   117  }
   118  
   119  // ExtractorTo returns an Extractor which can be used to extract data from a
   120  // BigQuery table into Google Cloud Storage.
   121  // The returned Extractor may optionally be further configured before its Run method is called.
   122  func (t *Table) ExtractorTo(dst *GCSReference) *Extractor {
   123  	return &Extractor{
   124  		c: t.c,
   125  		ExtractConfig: ExtractConfig{
   126  			Src: t,
   127  			Dst: dst,
   128  		},
   129  	}
   130  }
   131  
   132  // ExtractorTo returns an Extractor which can be persist a BigQuery Model into
   133  // Google Cloud Storage.
   134  // The returned Extractor may be further configured before its Run method is called.
   135  func (m *Model) ExtractorTo(dst *GCSReference) *Extractor {
   136  	return &Extractor{
   137  		c: m.c,
   138  		ExtractConfig: ExtractConfig{
   139  			SrcModel: m,
   140  			Dst:      dst,
   141  		},
   142  	}
   143  }
   144  
   145  // Run initiates an extract job.
   146  func (e *Extractor) Run(ctx context.Context) (j *Job, err error) {
   147  	ctx = trace.StartSpan(ctx, "cloud.google.com/go/bigquery.Extractor.Run")
   148  	defer func() { trace.EndSpan(ctx, err) }()
   149  
   150  	return e.c.insertJob(ctx, e.newJob(), nil)
   151  }
   152  
   153  func (e *Extractor) newJob() *bq.Job {
   154  	return &bq.Job{
   155  		JobReference:  e.JobIDConfig.createJobRef(e.c),
   156  		Configuration: e.ExtractConfig.toBQ(),
   157  	}
   158  }
   159  

View as plain text