1 // Copyright 2015 Google LLC 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 /* 16 Package bigquery provides a client for the BigQuery service. 17 18 The following assumes a basic familiarity with BigQuery concepts. 19 See https://cloud.google.com/bigquery/docs. 20 21 See https://godoc.org/cloud.google.com/go for authentication, timeouts, 22 connection pooling and similar aspects of this package. 23 24 # Creating a Client 25 26 To start working with this package, create a client: 27 28 ctx := context.Background() 29 client, err := bigquery.NewClient(ctx, projectID) 30 if err != nil { 31 // TODO: Handle error. 32 } 33 34 # Querying 35 36 To query existing tables, create a Query and call its Read method, which starts the 37 query and waits for it to complete: 38 39 q := client.Query(` 40 SELECT year, SUM(number) as num 41 FROM bigquery-public-data.usa_names.usa_1910_2013 42 WHERE name = @name 43 GROUP BY year 44 ORDER BY year 45 `) 46 q.Parameters = []bigquery.QueryParameter{ 47 {Name: "name", Value: "William"}, 48 } 49 it, err := q.Read(ctx) 50 if err != nil { 51 // TODO: Handle error. 52 } 53 54 Then iterate through the resulting rows. You can store a row using 55 anything that implements the ValueLoader interface, or with a slice or map of bigquery.Value. 56 A slice is simplest: 57 58 for { 59 var values []bigquery.Value 60 err := it.Next(&values) 61 if err == iterator.Done { 62 break 63 } 64 if err != nil { 65 // TODO: Handle error. 66 } 67 fmt.Println(values) 68 } 69 70 You can also use a struct whose exported fields match the query: 71 72 type Count struct { 73 Year int 74 Num int 75 } 76 for { 77 var c Count 78 err := it.Next(&c) 79 if err == iterator.Done { 80 break 81 } 82 if err != nil { 83 // TODO: Handle error. 84 } 85 fmt.Println(c) 86 } 87 88 You can also start the query running and get the results later. 89 Create the query as above, but call Run instead of Read. This returns a Job, 90 which represents an asynchronous operation. 91 92 job, err := q.Run(ctx) 93 if err != nil { 94 // TODO: Handle error. 95 } 96 97 Get the job's ID, a printable string. You can save this string to retrieve 98 the results at a later time, even in another process. 99 100 jobID := job.ID() 101 fmt.Printf("The job ID is %s\n", jobID) 102 103 To retrieve the job's results from the ID, first look up the Job: 104 105 job, err = client.JobFromID(ctx, jobID) 106 if err != nil { 107 // TODO: Handle error. 108 } 109 110 Use the Job.Read method to obtain an iterator, and loop over the rows. 111 Calling Query.Read is preferred for queries with a relatively small result set, 112 as it will call BigQuery jobs.query API for a optimized query path. If the query 113 doesn't meet that criteria, the method will just combine Query.Run and Job.Read. 114 115 it, err = job.Read(ctx) 116 if err != nil { 117 // TODO: Handle error. 118 } 119 // Proceed with iteration as above. 120 121 # Datasets and Tables 122 123 You can refer to datasets in the client's project with the Dataset method, and 124 in other projects with the DatasetInProject method: 125 126 myDataset := client.Dataset("my_dataset") 127 yourDataset := client.DatasetInProject("your-project-id", "your_dataset") 128 129 These methods create references to datasets, not the datasets themselves. You can have 130 a dataset reference even if the dataset doesn't exist yet. Use Dataset.Create to 131 create a dataset from a reference: 132 133 if err := myDataset.Create(ctx, nil); err != nil { 134 // TODO: Handle error. 135 } 136 137 You can refer to tables with Dataset.Table. Like bigquery.Dataset, bigquery.Table is a reference 138 to an object in BigQuery that may or may not exist. 139 140 table := myDataset.Table("my_table") 141 142 You can create, delete and update the metadata of tables with methods on Table. 143 For instance, you could create a temporary table with: 144 145 err = myDataset.Table("temp").Create(ctx, &bigquery.TableMetadata{ 146 ExpirationTime: time.Now().Add(1*time.Hour)}) 147 if err != nil { 148 // TODO: Handle error. 149 } 150 151 We'll see how to create a table with a schema in the next section. 152 153 # Schemas 154 155 There are two ways to construct schemas with this package. 156 You can build a schema by hand, like so: 157 158 schema1 := bigquery.Schema{ 159 {Name: "Name", Required: true, Type: bigquery.StringFieldType}, 160 {Name: "Grades", Repeated: true, Type: bigquery.IntegerFieldType}, 161 {Name: "Optional", Required: false, Type: bigquery.IntegerFieldType}, 162 } 163 164 Or you can infer the schema from a struct: 165 166 type student struct { 167 Name string 168 Grades []int 169 Optional bigquery.NullInt64 170 } 171 schema2, err := bigquery.InferSchema(student{}) 172 if err != nil { 173 // TODO: Handle error. 174 } 175 // schema1 and schema2 are identical. 176 177 Struct inference supports tags like those of the encoding/json package, so you can 178 change names, ignore fields, or mark a field as nullable (non-required). Fields 179 declared as one of the Null types (NullInt64, NullFloat64, NullString, NullBool, 180 NullTimestamp, NullDate, NullTime, NullDateTime, and NullGeography) are 181 automatically inferred as nullable, so the "nullable" tag is only needed for []byte, 182 *big.Rat and pointer-to-struct fields. 183 184 type student2 struct { 185 Name string `bigquery:"full_name"` 186 Grades []int 187 Secret string `bigquery:"-"` 188 Optional []byte `bigquery:",nullable"` 189 } 190 schema3, err := bigquery.InferSchema(student2{}) 191 if err != nil { 192 // TODO: Handle error. 193 } 194 // schema3 has required fields "full_name" and "Grade", and nullable BYTES field "Optional". 195 196 Having constructed a schema, you can create a table with it like so: 197 198 if err := table.Create(ctx, &bigquery.TableMetadata{Schema: schema1}); err != nil { 199 // TODO: Handle error. 200 } 201 202 # Copying 203 204 You can copy one or more tables to another table. Begin by constructing a Copier 205 describing the copy. Then set any desired copy options, and finally call Run to get a Job: 206 207 copier := myDataset.Table("dest").CopierFrom(myDataset.Table("src")) 208 copier.WriteDisposition = bigquery.WriteTruncate 209 job, err = copier.Run(ctx) 210 if err != nil { 211 // TODO: Handle error. 212 } 213 214 You can chain the call to Run if you don't want to set options: 215 216 job, err = myDataset.Table("dest").CopierFrom(myDataset.Table("src")).Run(ctx) 217 if err != nil { 218 // TODO: Handle error. 219 } 220 221 You can wait for your job to complete: 222 223 status, err := job.Wait(ctx) 224 if err != nil { 225 // TODO: Handle error. 226 } 227 228 Job.Wait polls with exponential backoff. You can also poll yourself, if you 229 wish: 230 231 for { 232 status, err := job.Status(ctx) 233 if err != nil { 234 // TODO: Handle error. 235 } 236 if status.Done() { 237 if status.Err() != nil { 238 log.Fatalf("Job failed with error %v", status.Err()) 239 } 240 break 241 } 242 time.Sleep(pollInterval) 243 } 244 245 # Loading and Uploading 246 247 There are two ways to populate a table with this package: load the data from a Google Cloud Storage 248 object, or upload rows directly from your program. 249 250 For loading, first create a GCSReference, configuring it if desired. Then make a Loader, optionally configure 251 it as well, and call its Run method. 252 253 gcsRef := bigquery.NewGCSReference("gs://my-bucket/my-object") 254 gcsRef.AllowJaggedRows = true 255 loader := myDataset.Table("dest").LoaderFrom(gcsRef) 256 loader.CreateDisposition = bigquery.CreateNever 257 job, err = loader.Run(ctx) 258 // Poll the job for completion if desired, as above. 259 260 To upload, first define a type that implements the ValueSaver interface, which has a single method named Save. 261 Then create an Inserter, and call its Put method with a slice of values. 262 263 u := table.Inserter() 264 // Item implements the ValueSaver interface. 265 items := []*Item{ 266 {Name: "n1", Size: 32.6, Count: 7}, 267 {Name: "n2", Size: 4, Count: 2}, 268 {Name: "n3", Size: 101.5, Count: 1}, 269 } 270 if err := u.Put(ctx, items); err != nil { 271 // TODO: Handle error. 272 } 273 274 You can also upload a struct that doesn't implement ValueSaver. Use the StructSaver type 275 to specify the schema and insert ID by hand, or just supply the struct or struct pointer 276 directly and the schema will be inferred: 277 278 type Item2 struct { 279 Name string 280 Size float64 281 Count int 282 } 283 // Item implements the ValueSaver interface. 284 items2 := []*Item2{ 285 {Name: "n1", Size: 32.6, Count: 7}, 286 {Name: "n2", Size: 4, Count: 2}, 287 {Name: "n3", Size: 101.5, Count: 1}, 288 } 289 if err := u.Put(ctx, items2); err != nil { 290 // TODO: Handle error. 291 } 292 293 BigQuery allows for higher throughput when omitting insertion IDs. To enable this, 294 specify the sentinel `NoDedupeID` value for the insertion ID when implementing a ValueSaver. 295 296 # Extracting 297 298 If you've been following so far, extracting data from a BigQuery table 299 into a Google Cloud Storage object will feel familiar. First create an 300 Extractor, then optionally configure it, and lastly call its Run method. 301 302 extractor := table.ExtractorTo(gcsRef) 303 extractor.DisableHeader = true 304 job, err = extractor.Run(ctx) 305 // Poll the job for completion if desired, as above. 306 307 # Errors 308 309 Errors returned by this client are often of the type googleapi.Error: https://godoc.org/google.golang.org/api/googleapi#Error 310 311 These errors can be introspected for more information by using `xerrors.As` with the richer *googleapi.Error type. For example: 312 313 var e *googleapi.Error 314 if ok := xerrors.As(err, &e); ok { 315 if e.Code == 409 { ... } 316 } 317 318 In some cases, your client may received unstructured googleapi.Error error responses. In such cases, it is likely that 319 you have exceeded BigQuery request limits, documented at: https://cloud.google.com/bigquery/quotas 320 */ 321 package bigquery // import "cloud.google.com/go/bigquery" 322