types.go

Documentation: github.com/containerd/stargz-snapshotter/estargz

     1  /*
     2     Copyright The containerd Authors.
     3  
     4     Licensed under the Apache License, Version 2.0 (the "License");
     5     you may not use this file except in compliance with the License.
     6     You may obtain a copy of the License at
     7  
     8         http://www.apache.org/licenses/LICENSE-2.0
     9  
    10     Unless required by applicable law or agreed to in writing, software
    11     distributed under the License is distributed on an "AS IS" BASIS,
    12     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13     See the License for the specific language governing permissions and
    14     limitations under the License.
    15  */
    16  
    17  /*
    18     Copyright 2019 The Go Authors. All rights reserved.
    19     Use of this source code is governed by a BSD-style
    20     license that can be found in the LICENSE file.
    21  */
    22  
    23  package estargz
    24  
    25  import (
    26  	"archive/tar"
    27  	"hash"
    28  	"io"
    29  	"os"
    30  	"path"
    31  	"time"
    32  
    33  	digest "github.com/opencontainers/go-digest"
    34  )
    35  
    36  const (
    37  	// TOCTarName is the name of the JSON file in the tar archive in the
    38  	// table of contents gzip stream.
    39  	TOCTarName = "stargz.index.json"
    40  
    41  	// FooterSize is the number of bytes in the footer
    42  	//
    43  	// The footer is an empty gzip stream with no compression and an Extra
    44  	// header of the form "%016xSTARGZ", where the 64 bit hex-encoded
    45  	// number is the offset to the gzip stream of JSON TOC.
    46  	//
    47  	// 51 comes from:
    48  	//
    49  	// 10 bytes  gzip header
    50  	// 2  bytes  XLEN (length of Extra field) = 26 (4 bytes header + 16 hex digits + len("STARGZ"))
    51  	// 2  bytes  Extra: SI1 = 'S', SI2 = 'G'
    52  	// 2  bytes  Extra: LEN = 22 (16 hex digits + len("STARGZ"))
    53  	// 22 bytes  Extra: subfield = fmt.Sprintf("%016xSTARGZ", offsetOfTOC)
    54  	// 5  bytes  flate header
    55  	// 8  bytes  gzip footer
    56  	// (End of the eStargz blob)
    57  	//
    58  	// NOTE: For Extra fields, subfield IDs SI1='S' SI2='G' is used for eStargz.
    59  	FooterSize = 51
    60  
    61  	// legacyFooterSize is the number of bytes in the legacy stargz footer.
    62  	//
    63  	// 47 comes from:
    64  	//
    65  	//   10 byte gzip header +
    66  	//   2 byte (LE16) length of extra, encoding 22 (16 hex digits + len("STARGZ")) == "\x16\x00" +
    67  	//   22 bytes of extra (fmt.Sprintf("%016xSTARGZ", tocGzipOffset))
    68  	//   5 byte flate header
    69  	//   8 byte gzip footer (two little endian uint32s: digest, size)
    70  	legacyFooterSize = 47
    71  
    72  	// TOCJSONDigestAnnotation is an annotation for an image layer. This stores the
    73  	// digest of the TOC JSON.
    74  	// This annotation is valid only when it is specified in `.[]layers.annotations`
    75  	// of an image manifest.
    76  	TOCJSONDigestAnnotation = "containerd.io/snapshot/stargz/toc.digest"
    77  
    78  	// StoreUncompressedSizeAnnotation is an additional annotation key for eStargz to enable lazy
    79  	// pulling on containers/storage. Stargz Store is required to expose the layer's uncompressed size
    80  	// to the runtime but current OCI image doesn't ship this information by default. So we store this
    81  	// to the special annotation.
    82  	StoreUncompressedSizeAnnotation = "io.containers.estargz.uncompressed-size"
    83  
    84  	// PrefetchLandmark is a file entry which indicates the end position of
    85  	// prefetch in the stargz file.
    86  	PrefetchLandmark = ".prefetch.landmark"
    87  
    88  	// NoPrefetchLandmark is a file entry which indicates that no prefetch should
    89  	// occur in the stargz file.
    90  	NoPrefetchLandmark = ".no.prefetch.landmark"
    91  
    92  	landmarkContents = 0xf
    93  )
    94  
    95  // JTOC is the JSON-serialized table of contents index of the files in the stargz file.
    96  type JTOC struct {
    97  	Version int         `json:"version"`
    98  	Entries []*TOCEntry `json:"entries"`
    99  }
   100  
   101  // TOCEntry is an entry in the stargz file's TOC (Table of Contents).
   102  type TOCEntry struct {
   103  	// Name is the tar entry's name. It is the complete path
   104  	// stored in the tar file, not just the base name.
   105  	Name string `json:"name"`
   106  
   107  	// Type is one of "dir", "reg", "symlink", "hardlink", "char",
   108  	// "block", "fifo", or "chunk".
   109  	// The "chunk" type is used for regular file data chunks past the first
   110  	// TOCEntry; the 2nd chunk and on have only Type ("chunk"), Offset,
   111  	// ChunkOffset, and ChunkSize populated.
   112  	Type string `json:"type"`
   113  
   114  	// Size, for regular files, is the logical size of the file.
   115  	Size int64 `json:"size,omitempty"`
   116  
   117  	// ModTime3339 is the modification time of the tar entry. Empty
   118  	// means zero or unknown. Otherwise it's in UTC RFC3339
   119  	// format. Use the ModTime method to access the time.Time value.
   120  	ModTime3339 string `json:"modtime,omitempty"`
   121  	modTime     time.Time
   122  
   123  	// LinkName, for symlinks and hardlinks, is the link target.
   124  	LinkName string `json:"linkName,omitempty"`
   125  
   126  	// Mode is the permission and mode bits.
   127  	Mode int64 `json:"mode,omitempty"`
   128  
   129  	// UID is the user ID of the owner.
   130  	UID int `json:"uid,omitempty"`
   131  
   132  	// GID is the group ID of the owner.
   133  	GID int `json:"gid,omitempty"`
   134  
   135  	// Uname is the username of the owner.
   136  	//
   137  	// In the serialized JSON, this field may only be present for
   138  	// the first entry with the same UID.
   139  	Uname string `json:"userName,omitempty"`
   140  
   141  	// Gname is the group name of the owner.
   142  	//
   143  	// In the serialized JSON, this field may only be present for
   144  	// the first entry with the same GID.
   145  	Gname string `json:"groupName,omitempty"`
   146  
   147  	// Offset, for regular files, provides the offset in the
   148  	// stargz file to the file's data bytes. See ChunkOffset and
   149  	// ChunkSize.
   150  	Offset int64 `json:"offset,omitempty"`
   151  
   152  	// InnerOffset is an optional field indicates uncompressed offset
   153  	// of this "reg" or "chunk" payload in a stream starts from Offset.
   154  	// This field enables to put multiple "reg" or "chunk" payloads
   155  	// in one chunk with having the same Offset but different InnerOffset.
   156  	InnerOffset int64 `json:"innerOffset,omitempty"`
   157  
   158  	nextOffset int64 // the Offset of the next entry with a non-zero Offset
   159  
   160  	// DevMajor is the major device number for "char" and "block" types.
   161  	DevMajor int `json:"devMajor,omitempty"`
   162  
   163  	// DevMinor is the major device number for "char" and "block" types.
   164  	DevMinor int `json:"devMinor,omitempty"`
   165  
   166  	// NumLink is the number of entry names pointing to this entry.
   167  	// Zero means one name references this entry.
   168  	// This field is calculated during runtime and not recorded in TOC JSON.
   169  	NumLink int `json:"-"`
   170  
   171  	// Xattrs are the extended attribute for the entry.
   172  	Xattrs map[string][]byte `json:"xattrs,omitempty"`
   173  
   174  	// Digest stores the OCI checksum for regular files payload.
   175  	// It has the form "sha256:abcdef01234....".
   176  	Digest string `json:"digest,omitempty"`
   177  
   178  	// ChunkOffset is non-zero if this is a chunk of a large,
   179  	// regular file. If so, the Offset is where the gzip header of
   180  	// ChunkSize bytes at ChunkOffset in Name begin.
   181  	//
   182  	// In serialized form, a "chunkSize" JSON field of zero means
   183  	// that the chunk goes to the end of the file. After reading
   184  	// from the stargz TOC, though, the ChunkSize is initialized
   185  	// to a non-zero file for when Type is either "reg" or
   186  	// "chunk".
   187  	ChunkOffset int64 `json:"chunkOffset,omitempty"`
   188  	ChunkSize   int64 `json:"chunkSize,omitempty"`
   189  
   190  	// ChunkDigest stores an OCI digest of the chunk. This must be formed
   191  	// as "sha256:0123abcd...".
   192  	ChunkDigest string `json:"chunkDigest,omitempty"`
   193  
   194  	children map[string]*TOCEntry
   195  
   196  	// chunkTopIndex is index of the entry where Offset starts in the blob.
   197  	chunkTopIndex int
   198  }
   199  
   200  // ModTime returns the entry's modification time.
   201  func (e *TOCEntry) ModTime() time.Time { return e.modTime }
   202  
   203  // NextOffset returns the position (relative to the start of the
   204  // stargz file) of the next gzip boundary after e.Offset.
   205  func (e *TOCEntry) NextOffset() int64 { return e.nextOffset }
   206  
   207  func (e *TOCEntry) addChild(baseName string, child *TOCEntry) {
   208  	if e.children == nil {
   209  		e.children = make(map[string]*TOCEntry)
   210  	}
   211  	if child.Type == "dir" {
   212  		e.NumLink++ // Entry ".." in the subdirectory links to this directory
   213  	}
   214  	e.children[baseName] = child
   215  }
   216  
   217  // isDataType reports whether TOCEntry is a regular file or chunk (something that
   218  // contains regular file data).
   219  func (e *TOCEntry) isDataType() bool { return e.Type == "reg" || e.Type == "chunk" }
   220  
   221  // Stat returns a FileInfo value representing e.
   222  func (e *TOCEntry) Stat() os.FileInfo { return fileInfo{e} }
   223  
   224  // ForeachChild calls f for each child item. If f returns false, iteration ends.
   225  // If e is not a directory, f is not called.
   226  func (e *TOCEntry) ForeachChild(f func(baseName string, ent *TOCEntry) bool) {
   227  	for name, ent := range e.children {
   228  		if !f(name, ent) {
   229  			return
   230  		}
   231  	}
   232  }
   233  
   234  // LookupChild returns the directory e's child by its base name.
   235  func (e *TOCEntry) LookupChild(baseName string) (child *TOCEntry, ok bool) {
   236  	child, ok = e.children[baseName]
   237  	return
   238  }
   239  
   240  // fileInfo implements os.FileInfo using the wrapped *TOCEntry.
   241  type fileInfo struct{ e *TOCEntry }
   242  
   243  var _ os.FileInfo = fileInfo{}
   244  
   245  func (fi fileInfo) Name() string       { return path.Base(fi.e.Name) }
   246  func (fi fileInfo) IsDir() bool        { return fi.e.Type == "dir" }
   247  func (fi fileInfo) Size() int64        { return fi.e.Size }
   248  func (fi fileInfo) ModTime() time.Time { return fi.e.ModTime() }
   249  func (fi fileInfo) Sys() interface{}   { return fi.e }
   250  func (fi fileInfo) Mode() (m os.FileMode) {
   251  	// TOCEntry.Mode is tar.Header.Mode so we can understand the these bits using `tar` pkg.
   252  	m = (&tar.Header{Mode: fi.e.Mode}).FileInfo().Mode() &
   253  		(os.ModePerm | os.ModeSetuid | os.ModeSetgid | os.ModeSticky)
   254  	switch fi.e.Type {
   255  	case "dir":
   256  		m |= os.ModeDir
   257  	case "symlink":
   258  		m |= os.ModeSymlink
   259  	case "char":
   260  		m |= os.ModeDevice | os.ModeCharDevice
   261  	case "block":
   262  		m |= os.ModeDevice
   263  	case "fifo":
   264  		m |= os.ModeNamedPipe
   265  	}
   266  	return m
   267  }
   268  
   269  // TOCEntryVerifier holds verifiers that are usable for verifying chunks contained
   270  // in a eStargz blob.
   271  type TOCEntryVerifier interface {
   272  
   273  	// Verifier provides a content verifier that can be used for verifying the
   274  	// contents of the specified TOCEntry.
   275  	Verifier(ce *TOCEntry) (digest.Verifier, error)
   276  }
   277  
   278  // Compression provides the compression helper to be used creating and parsing eStargz.
   279  // This package provides gzip-based Compression by default, but any compression
   280  // algorithm (e.g. zstd) can be used as long as it implements Compression.
   281  type Compression interface {
   282  	Compressor
   283  	Decompressor
   284  }
   285  
   286  // Compressor represents the helper mothods to be used for creating eStargz.
   287  type Compressor interface {
   288  	// Writer returns WriteCloser to be used for writing a chunk to eStargz.
   289  	// Everytime a chunk is written, the WriteCloser is closed and Writer is
   290  	// called again for writing the next chunk.
   291  	//
   292  	// The returned writer should implement "Flush() error" function that flushes
   293  	// any pending compressed data to the underlying writer.
   294  	Writer(w io.Writer) (WriteFlushCloser, error)
   295  
   296  	// WriteTOCAndFooter is called to write JTOC to the passed Writer.
   297  	// diffHash calculates the DiffID (uncompressed sha256 hash) of the blob
   298  	// WriteTOCAndFooter can optionally write anything that affects DiffID calculation
   299  	// (e.g. uncompressed TOC JSON).
   300  	//
   301  	// This function returns tocDgst that represents the digest of TOC that will be used
   302  	// to verify this blob when it's parsed.
   303  	WriteTOCAndFooter(w io.Writer, off int64, toc *JTOC, diffHash hash.Hash) (tocDgst digest.Digest, err error)
   304  }
   305  
   306  // Decompressor represents the helper mothods to be used for parsing eStargz.
   307  type Decompressor interface {
   308  	// Reader returns ReadCloser to be used for decompressing file payload.
   309  	Reader(r io.Reader) (io.ReadCloser, error)
   310  
   311  	// FooterSize returns the size of the footer of this blob.
   312  	FooterSize() int64
   313  
   314  	// ParseFooter parses the footer and returns the offset and (compressed) size of TOC.
   315  	// payloadBlobSize is the (compressed) size of the blob payload (i.e. the size between
   316  	// the top until the TOC JSON).
   317  	//
   318  	// If tocOffset < 0, we assume that TOC isn't contained in the blob and pass nil reader
   319  	// to ParseTOC. We expect that ParseTOC acquire TOC from the external location and return it.
   320  	//
   321  	// tocSize is optional. If tocSize <= 0, it's by default the size of the range from tocOffset until the beginning of the
   322  	// footer (blob size - tocOff - FooterSize).
   323  	// If blobPayloadSize < 0, blobPayloadSize become the blob size.
   324  	ParseFooter(p []byte) (blobPayloadSize, tocOffset, tocSize int64, err error)
   325  
   326  	// ParseTOC parses TOC from the passed reader. The reader provides the partial contents
   327  	// of the underlying blob that has the range specified by ParseFooter method.
   328  	//
   329  	// This function returns tocDgst that represents the digest of TOC that will be used
   330  	// to verify this blob. This must match to the value returned from
   331  	// Compressor.WriteTOCAndFooter that is used when creating this blob.
   332  	//
   333  	// If tocOffset returned by ParseFooter is < 0, we assume that TOC isn't contained in the blob.
   334  	// Pass nil reader to ParseTOC then we expect that ParseTOC acquire TOC from the external location
   335  	// and return it.
   336  	ParseTOC(r io.Reader) (toc *JTOC, tocDgst digest.Digest, err error)
   337  }
   338  
   339  type WriteFlushCloser interface {
   340  	io.WriteCloser
   341  	Flush() error
   342  }
   343
View as plain text