...

Source file src/github.com/google/go-containerregistry/pkg/v1/tarball/write.go

Documentation: github.com/google/go-containerregistry/pkg/v1/tarball

     1  // Copyright 2018 Google LLC All Rights Reserved.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //    http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package tarball
    16  
    17  import (
    18  	"archive/tar"
    19  	"bytes"
    20  	"encoding/json"
    21  	"errors"
    22  	"fmt"
    23  	"io"
    24  	"os"
    25  	"sort"
    26  	"strings"
    27  
    28  	"github.com/google/go-containerregistry/pkg/name"
    29  	v1 "github.com/google/go-containerregistry/pkg/v1"
    30  	"github.com/google/go-containerregistry/pkg/v1/partial"
    31  )
    32  
    33  // WriteToFile writes in the compressed format to a tarball, on disk.
    34  // This is just syntactic sugar wrapping tarball.Write with a new file.
    35  func WriteToFile(p string, ref name.Reference, img v1.Image, opts ...WriteOption) error {
    36  	w, err := os.Create(p)
    37  	if err != nil {
    38  		return err
    39  	}
    40  	defer w.Close()
    41  
    42  	return Write(ref, img, w, opts...)
    43  }
    44  
    45  // MultiWriteToFile writes in the compressed format to a tarball, on disk.
    46  // This is just syntactic sugar wrapping tarball.MultiWrite with a new file.
    47  func MultiWriteToFile(p string, tagToImage map[name.Tag]v1.Image, opts ...WriteOption) error {
    48  	refToImage := make(map[name.Reference]v1.Image, len(tagToImage))
    49  	for i, d := range tagToImage {
    50  		refToImage[i] = d
    51  	}
    52  	return MultiRefWriteToFile(p, refToImage, opts...)
    53  }
    54  
    55  // MultiRefWriteToFile writes in the compressed format to a tarball, on disk.
    56  // This is just syntactic sugar wrapping tarball.MultiRefWrite with a new file.
    57  func MultiRefWriteToFile(p string, refToImage map[name.Reference]v1.Image, opts ...WriteOption) error {
    58  	w, err := os.Create(p)
    59  	if err != nil {
    60  		return err
    61  	}
    62  	defer w.Close()
    63  
    64  	return MultiRefWrite(refToImage, w, opts...)
    65  }
    66  
    67  // Write is a wrapper to write a single image and tag to a tarball.
    68  func Write(ref name.Reference, img v1.Image, w io.Writer, opts ...WriteOption) error {
    69  	return MultiRefWrite(map[name.Reference]v1.Image{ref: img}, w, opts...)
    70  }
    71  
    72  // MultiWrite writes the contents of each image to the provided writer, in the compressed format.
    73  // The contents are written in the following format:
    74  // One manifest.json file at the top level containing information about several images.
    75  // One file for each layer, named after the layer's SHA.
    76  // One file for the config blob, named after its SHA.
    77  func MultiWrite(tagToImage map[name.Tag]v1.Image, w io.Writer, opts ...WriteOption) error {
    78  	refToImage := make(map[name.Reference]v1.Image, len(tagToImage))
    79  	for i, d := range tagToImage {
    80  		refToImage[i] = d
    81  	}
    82  	return MultiRefWrite(refToImage, w, opts...)
    83  }
    84  
    85  // MultiRefWrite writes the contents of each image to the provided writer, in the compressed format.
    86  // The contents are written in the following format:
    87  // One manifest.json file at the top level containing information about several images.
    88  // One file for each layer, named after the layer's SHA.
    89  // One file for the config blob, named after its SHA.
    90  func MultiRefWrite(refToImage map[name.Reference]v1.Image, w io.Writer, opts ...WriteOption) error {
    91  	// process options
    92  	o := &writeOptions{
    93  		updates: nil,
    94  	}
    95  	for _, option := range opts {
    96  		if err := option(o); err != nil {
    97  			return err
    98  		}
    99  	}
   100  
   101  	imageToTags := dedupRefToImage(refToImage)
   102  	size, mBytes, err := getSizeAndManifest(imageToTags)
   103  	if err != nil {
   104  		return sendUpdateReturn(o, err)
   105  	}
   106  
   107  	return writeImagesToTar(imageToTags, mBytes, size, w, o)
   108  }
   109  
   110  // sendUpdateReturn return the passed in error message, also sending on update channel, if it exists
   111  func sendUpdateReturn(o *writeOptions, err error) error {
   112  	if o != nil && o.updates != nil {
   113  		o.updates <- v1.Update{
   114  			Error: err,
   115  		}
   116  	}
   117  	return err
   118  }
   119  
   120  // sendProgressWriterReturn return the passed in error message, also sending on update channel, if it exists, along with downloaded information
   121  func sendProgressWriterReturn(pw *progressWriter, err error) error {
   122  	if pw != nil {
   123  		return pw.Error(err)
   124  	}
   125  	return err
   126  }
   127  
   128  // writeImagesToTar writes the images to the tarball
   129  func writeImagesToTar(imageToTags map[v1.Image][]string, m []byte, size int64, w io.Writer, o *writeOptions) (err error) {
   130  	if w == nil {
   131  		return sendUpdateReturn(o, errors.New("must pass valid writer"))
   132  	}
   133  
   134  	tw := w
   135  	var pw *progressWriter
   136  
   137  	// we only calculate the sizes and use a progressWriter if we were provided
   138  	// an option with a progress channel
   139  	if o != nil && o.updates != nil {
   140  		pw = &progressWriter{
   141  			w:       w,
   142  			updates: o.updates,
   143  			size:    size,
   144  		}
   145  		tw = pw
   146  	}
   147  
   148  	tf := tar.NewWriter(tw)
   149  	defer tf.Close()
   150  
   151  	seenLayerDigests := make(map[string]struct{})
   152  
   153  	for img := range imageToTags {
   154  		// Write the config.
   155  		cfgName, err := img.ConfigName()
   156  		if err != nil {
   157  			return sendProgressWriterReturn(pw, err)
   158  		}
   159  		cfgBlob, err := img.RawConfigFile()
   160  		if err != nil {
   161  			return sendProgressWriterReturn(pw, err)
   162  		}
   163  		if err := writeTarEntry(tf, cfgName.String(), bytes.NewReader(cfgBlob), int64(len(cfgBlob))); err != nil {
   164  			return sendProgressWriterReturn(pw, err)
   165  		}
   166  
   167  		// Write the layers.
   168  		layers, err := img.Layers()
   169  		if err != nil {
   170  			return sendProgressWriterReturn(pw, err)
   171  		}
   172  		layerFiles := make([]string, len(layers))
   173  		for i, l := range layers {
   174  			d, err := l.Digest()
   175  			if err != nil {
   176  				return sendProgressWriterReturn(pw, err)
   177  			}
   178  			// Munge the file name to appease ancient technology.
   179  			//
   180  			// tar assumes anything with a colon is a remote tape drive:
   181  			// https://www.gnu.org/software/tar/manual/html_section/tar_45.html
   182  			// Drop the algorithm prefix, e.g. "sha256:"
   183  			hex := d.Hex
   184  
   185  			// gunzip expects certain file extensions:
   186  			// https://www.gnu.org/software/gzip/manual/html_node/Overview.html
   187  			layerFiles[i] = fmt.Sprintf("%s.tar.gz", hex)
   188  
   189  			if _, ok := seenLayerDigests[hex]; ok {
   190  				continue
   191  			}
   192  			seenLayerDigests[hex] = struct{}{}
   193  
   194  			r, err := l.Compressed()
   195  			if err != nil {
   196  				return sendProgressWriterReturn(pw, err)
   197  			}
   198  			blobSize, err := l.Size()
   199  			if err != nil {
   200  				return sendProgressWriterReturn(pw, err)
   201  			}
   202  
   203  			if err := writeTarEntry(tf, layerFiles[i], r, blobSize); err != nil {
   204  				return sendProgressWriterReturn(pw, err)
   205  			}
   206  		}
   207  	}
   208  	if err := writeTarEntry(tf, "manifest.json", bytes.NewReader(m), int64(len(m))); err != nil {
   209  		return sendProgressWriterReturn(pw, err)
   210  	}
   211  
   212  	// be sure to close the tar writer so everything is flushed out before we send our EOF
   213  	if err := tf.Close(); err != nil {
   214  		return sendProgressWriterReturn(pw, err)
   215  	}
   216  	// send an EOF to indicate finished on the channel, but nil as our return error
   217  	_ = sendProgressWriterReturn(pw, io.EOF)
   218  	return nil
   219  }
   220  
   221  // calculateManifest calculates the manifest and optionally the size of the tar file
   222  func calculateManifest(imageToTags map[v1.Image][]string) (m Manifest, err error) {
   223  	if len(imageToTags) == 0 {
   224  		return nil, errors.New("set of images is empty")
   225  	}
   226  
   227  	for img, tags := range imageToTags {
   228  		cfgName, err := img.ConfigName()
   229  		if err != nil {
   230  			return nil, err
   231  		}
   232  
   233  		// Store foreign layer info.
   234  		layerSources := make(map[v1.Hash]v1.Descriptor)
   235  
   236  		// Write the layers.
   237  		layers, err := img.Layers()
   238  		if err != nil {
   239  			return nil, err
   240  		}
   241  		layerFiles := make([]string, len(layers))
   242  		for i, l := range layers {
   243  			d, err := l.Digest()
   244  			if err != nil {
   245  				return nil, err
   246  			}
   247  			// Munge the file name to appease ancient technology.
   248  			//
   249  			// tar assumes anything with a colon is a remote tape drive:
   250  			// https://www.gnu.org/software/tar/manual/html_section/tar_45.html
   251  			// Drop the algorithm prefix, e.g. "sha256:"
   252  			hex := d.Hex
   253  
   254  			// gunzip expects certain file extensions:
   255  			// https://www.gnu.org/software/gzip/manual/html_node/Overview.html
   256  			layerFiles[i] = fmt.Sprintf("%s.tar.gz", hex)
   257  
   258  			// Add to LayerSources if it's a foreign layer.
   259  			desc, err := partial.BlobDescriptor(img, d)
   260  			if err != nil {
   261  				return nil, err
   262  			}
   263  			if !desc.MediaType.IsDistributable() {
   264  				diffid, err := partial.BlobToDiffID(img, d)
   265  				if err != nil {
   266  					return nil, err
   267  				}
   268  				layerSources[diffid] = *desc
   269  			}
   270  		}
   271  
   272  		// Generate the tar descriptor and write it.
   273  		m = append(m, Descriptor{
   274  			Config:       cfgName.String(),
   275  			RepoTags:     tags,
   276  			Layers:       layerFiles,
   277  			LayerSources: layerSources,
   278  		})
   279  	}
   280  	// sort by name of the repotags so it is consistent. Alternatively, we could sort by hash of the
   281  	// descriptor, but that would make it hard for humans to process
   282  	sort.Slice(m, func(i, j int) bool {
   283  		return strings.Join(m[i].RepoTags, ",") < strings.Join(m[j].RepoTags, ",")
   284  	})
   285  
   286  	return m, nil
   287  }
   288  
   289  // CalculateSize calculates the expected complete size of the output tar file
   290  func CalculateSize(refToImage map[name.Reference]v1.Image) (size int64, err error) {
   291  	imageToTags := dedupRefToImage(refToImage)
   292  	size, _, err = getSizeAndManifest(imageToTags)
   293  	return size, err
   294  }
   295  
   296  func getSizeAndManifest(imageToTags map[v1.Image][]string) (int64, []byte, error) {
   297  	m, err := calculateManifest(imageToTags)
   298  	if err != nil {
   299  		return 0, nil, fmt.Errorf("unable to calculate manifest: %w", err)
   300  	}
   301  	mBytes, err := json.Marshal(m)
   302  	if err != nil {
   303  		return 0, nil, fmt.Errorf("could not marshall manifest to bytes: %w", err)
   304  	}
   305  
   306  	size, err := calculateTarballSize(imageToTags, mBytes)
   307  	if err != nil {
   308  		return 0, nil, fmt.Errorf("error calculating tarball size: %w", err)
   309  	}
   310  	return size, mBytes, nil
   311  }
   312  
   313  // calculateTarballSize calculates the size of the tar file
   314  func calculateTarballSize(imageToTags map[v1.Image][]string, mBytes []byte) (size int64, err error) {
   315  	seenLayerDigests := make(map[string]struct{})
   316  	for img, name := range imageToTags {
   317  		manifest, err := img.Manifest()
   318  		if err != nil {
   319  			return size, fmt.Errorf("unable to get manifest for img %s: %w", name, err)
   320  		}
   321  		size += calculateSingleFileInTarSize(manifest.Config.Size)
   322  		for _, l := range manifest.Layers {
   323  			hex := l.Digest.Hex
   324  			if _, ok := seenLayerDigests[hex]; ok {
   325  				continue
   326  			}
   327  			seenLayerDigests[hex] = struct{}{}
   328  			size += calculateSingleFileInTarSize(l.Size)
   329  		}
   330  	}
   331  	// add the manifest
   332  	size += calculateSingleFileInTarSize(int64(len(mBytes)))
   333  
   334  	// add the two padding blocks that indicate end of a tar file
   335  	size += 1024
   336  	return size, nil
   337  }
   338  
   339  func dedupRefToImage(refToImage map[name.Reference]v1.Image) map[v1.Image][]string {
   340  	imageToTags := make(map[v1.Image][]string)
   341  
   342  	for ref, img := range refToImage {
   343  		if tag, ok := ref.(name.Tag); ok {
   344  			if tags, ok := imageToTags[img]; !ok || tags == nil {
   345  				imageToTags[img] = []string{}
   346  			}
   347  			// Docker cannot load tarballs without an explicit tag:
   348  			// https://github.com/google/go-containerregistry/issues/890
   349  			//
   350  			// We can't use the fully qualified tag.Name() because of rules_docker:
   351  			// https://github.com/google/go-containerregistry/issues/527
   352  			//
   353  			// If the tag is "latest", but tag.String() doesn't end in ":latest",
   354  			// just append it. Kind of gross, but should work for now.
   355  			ts := tag.String()
   356  			if tag.Identifier() == name.DefaultTag && !strings.HasSuffix(ts, ":"+name.DefaultTag) {
   357  				ts = fmt.Sprintf("%s:%s", ts, name.DefaultTag)
   358  			}
   359  			imageToTags[img] = append(imageToTags[img], ts)
   360  		} else if _, ok := imageToTags[img]; !ok {
   361  			imageToTags[img] = nil
   362  		}
   363  	}
   364  
   365  	return imageToTags
   366  }
   367  
   368  // writeTarEntry writes a file to the provided writer with a corresponding tar header
   369  func writeTarEntry(tf *tar.Writer, path string, r io.Reader, size int64) error {
   370  	hdr := &tar.Header{
   371  		Mode:     0644,
   372  		Typeflag: tar.TypeReg,
   373  		Size:     size,
   374  		Name:     path,
   375  	}
   376  	if err := tf.WriteHeader(hdr); err != nil {
   377  		return err
   378  	}
   379  	_, err := io.Copy(tf, r)
   380  	return err
   381  }
   382  
   383  // ComputeManifest get the manifest.json that will be written to the tarball
   384  // for multiple references
   385  func ComputeManifest(refToImage map[name.Reference]v1.Image) (Manifest, error) {
   386  	imageToTags := dedupRefToImage(refToImage)
   387  	return calculateManifest(imageToTags)
   388  }
   389  
   390  // WriteOption a function option to pass to Write()
   391  type WriteOption func(*writeOptions) error
   392  type writeOptions struct {
   393  	updates chan<- v1.Update
   394  }
   395  
   396  // WithProgress create a WriteOption for passing to Write() that enables
   397  // a channel to receive updates as they are downloaded and written to disk.
   398  func WithProgress(updates chan<- v1.Update) WriteOption {
   399  	return func(o *writeOptions) error {
   400  		o.updates = updates
   401  		return nil
   402  	}
   403  }
   404  
   405  // progressWriter is a writer which will send the download progress
   406  type progressWriter struct {
   407  	w              io.Writer
   408  	updates        chan<- v1.Update
   409  	size, complete int64
   410  }
   411  
   412  func (pw *progressWriter) Write(p []byte) (int, error) {
   413  	n, err := pw.w.Write(p)
   414  	if err != nil {
   415  		return n, err
   416  	}
   417  
   418  	pw.complete += int64(n)
   419  
   420  	pw.updates <- v1.Update{
   421  		Total:    pw.size,
   422  		Complete: pw.complete,
   423  	}
   424  
   425  	return n, err
   426  }
   427  
   428  func (pw *progressWriter) Error(err error) error {
   429  	pw.updates <- v1.Update{
   430  		Total:    pw.size,
   431  		Complete: pw.complete,
   432  		Error:    err,
   433  	}
   434  	return err
   435  }
   436  
   437  func (pw *progressWriter) Close() error {
   438  	pw.updates <- v1.Update{
   439  		Total:    pw.size,
   440  		Complete: pw.complete,
   441  		Error:    io.EOF,
   442  	}
   443  	return io.EOF
   444  }
   445  
   446  // calculateSingleFileInTarSize calculate the size a file will take up in a tar archive,
   447  // given the input data. Provided by rounding up to nearest whole block (512)
   448  // and adding header 512
   449  func calculateSingleFileInTarSize(in int64) (out int64) {
   450  	// doing this manually, because math.Round() works with float64
   451  	out += in
   452  	if remainder := out % 512; remainder != 0 {
   453  		out += (512 - remainder)
   454  	}
   455  	out += 512
   456  	return out
   457  }
   458  

View as plain text