
Source file src/github.com/containerd/stargz-snapshotter/estargz/types.go

Documentation: github.com/containerd/stargz-snapshotter/estargz

     1  /*
     2     Copyright The containerd Authors.
     4     Licensed under the Apache License, Version 2.0 (the "License");
     5     you may not use this file except in compliance with the License.
     6     You may obtain a copy of the License at
     8         http://www.apache.org/licenses/LICENSE-2.0
    10     Unless required by applicable law or agreed to in writing, software
    11     distributed under the License is distributed on an "AS IS" BASIS,
    12     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13     See the License for the specific language governing permissions and
    14     limitations under the License.
    15  */
    17  /*
    18     Copyright 2019 The Go Authors. All rights reserved.
    19     Use of this source code is governed by a BSD-style
    20     license that can be found in the LICENSE file.
    21  */
    23  package estargz
    25  import (
    26  	"archive/tar"
    27  	"hash"
    28  	"io"
    29  	"os"
    30  	"path"
    31  	"time"
    33  	digest "github.com/opencontainers/go-digest"
    34  )
    36  const (
    37  	// TOCTarName is the name of the JSON file in the tar archive in the
    38  	// table of contents gzip stream.
    39  	TOCTarName = "stargz.index.json"
    41  	// FooterSize is the number of bytes in the footer
    42  	//
    43  	// The footer is an empty gzip stream with no compression and an Extra
    44  	// header of the form "%016xSTARGZ", where the 64 bit hex-encoded
    45  	// number is the offset to the gzip stream of JSON TOC.
    46  	//
    47  	// 51 comes from:
    48  	//
    49  	// 10 bytes  gzip header
    50  	// 2  bytes  XLEN (length of Extra field) = 26 (4 bytes header + 16 hex digits + len("STARGZ"))
    51  	// 2  bytes  Extra: SI1 = 'S', SI2 = 'G'
    52  	// 2  bytes  Extra: LEN = 22 (16 hex digits + len("STARGZ"))
    53  	// 22 bytes  Extra: subfield = fmt.Sprintf("%016xSTARGZ", offsetOfTOC)
    54  	// 5  bytes  flate header
    55  	// 8  bytes  gzip footer
    56  	// (End of the eStargz blob)
    57  	//
    58  	// NOTE: For Extra fields, subfield IDs SI1='S' SI2='G' is used for eStargz.
    59  	FooterSize = 51
    61  	// legacyFooterSize is the number of bytes in the legacy stargz footer.
    62  	//
    63  	// 47 comes from:
    64  	//
    65  	//   10 byte gzip header +
    66  	//   2 byte (LE16) length of extra, encoding 22 (16 hex digits + len("STARGZ")) == "\x16\x00" +
    67  	//   22 bytes of extra (fmt.Sprintf("%016xSTARGZ", tocGzipOffset))
    68  	//   5 byte flate header
    69  	//   8 byte gzip footer (two little endian uint32s: digest, size)
    70  	legacyFooterSize = 47
    72  	// TOCJSONDigestAnnotation is an annotation for an image layer. This stores the
    73  	// digest of the TOC JSON.
    74  	// This annotation is valid only when it is specified in `.[]layers.annotations`
    75  	// of an image manifest.
    76  	TOCJSONDigestAnnotation = "containerd.io/snapshot/stargz/toc.digest"
    78  	// StoreUncompressedSizeAnnotation is an additional annotation key for eStargz to enable lazy
    79  	// pulling on containers/storage. Stargz Store is required to expose the layer's uncompressed size
    80  	// to the runtime but current OCI image doesn't ship this information by default. So we store this
    81  	// to the special annotation.
    82  	StoreUncompressedSizeAnnotation = "io.containers.estargz.uncompressed-size"
    84  	// PrefetchLandmark is a file entry which indicates the end position of
    85  	// prefetch in the stargz file.
    86  	PrefetchLandmark = ".prefetch.landmark"
    88  	// NoPrefetchLandmark is a file entry which indicates that no prefetch should
    89  	// occur in the stargz file.
    90  	NoPrefetchLandmark = ".no.prefetch.landmark"
    92  	landmarkContents = 0xf
    93  )
    95  // JTOC is the JSON-serialized table of contents index of the files in the stargz file.
    96  type JTOC struct {
    97  	Version int         `json:"version"`
    98  	Entries []*TOCEntry `json:"entries"`
    99  }
   101  // TOCEntry is an entry in the stargz file's TOC (Table of Contents).
   102  type TOCEntry struct {
   103  	// Name is the tar entry's name. It is the complete path
   104  	// stored in the tar file, not just the base name.
   105  	Name string `json:"name"`
   107  	// Type is one of "dir", "reg", "symlink", "hardlink", "char",
   108  	// "block", "fifo", or "chunk".
   109  	// The "chunk" type is used for regular file data chunks past the first
   110  	// TOCEntry; the 2nd chunk and on have only Type ("chunk"), Offset,
   111  	// ChunkOffset, and ChunkSize populated.
   112  	Type string `json:"type"`
   114  	// Size, for regular files, is the logical size of the file.
   115  	Size int64 `json:"size,omitempty"`
   117  	// ModTime3339 is the modification time of the tar entry. Empty
   118  	// means zero or unknown. Otherwise it's in UTC RFC3339
   119  	// format. Use the ModTime method to access the time.Time value.
   120  	ModTime3339 string `json:"modtime,omitempty"`
   121  	modTime     time.Time
   123  	// LinkName, for symlinks and hardlinks, is the link target.
   124  	LinkName string `json:"linkName,omitempty"`
   126  	// Mode is the permission and mode bits.
   127  	Mode int64 `json:"mode,omitempty"`
   129  	// UID is the user ID of the owner.
   130  	UID int `json:"uid,omitempty"`
   132  	// GID is the group ID of the owner.
   133  	GID int `json:"gid,omitempty"`
   135  	// Uname is the username of the owner.
   136  	//
   137  	// In the serialized JSON, this field may only be present for
   138  	// the first entry with the same UID.
   139  	Uname string `json:"userName,omitempty"`
   141  	// Gname is the group name of the owner.
   142  	//
   143  	// In the serialized JSON, this field may only be present for
   144  	// the first entry with the same GID.
   145  	Gname string `json:"groupName,omitempty"`
   147  	// Offset, for regular files, provides the offset in the
   148  	// stargz file to the file's data bytes. See ChunkOffset and
   149  	// ChunkSize.
   150  	Offset int64 `json:"offset,omitempty"`
   152  	// InnerOffset is an optional field indicates uncompressed offset
   153  	// of this "reg" or "chunk" payload in a stream starts from Offset.
   154  	// This field enables to put multiple "reg" or "chunk" payloads
   155  	// in one chunk with having the same Offset but different InnerOffset.
   156  	InnerOffset int64 `json:"innerOffset,omitempty"`
   158  	nextOffset int64 // the Offset of the next entry with a non-zero Offset
   160  	// DevMajor is the major device number for "char" and "block" types.
   161  	DevMajor int `json:"devMajor,omitempty"`
   163  	// DevMinor is the major device number for "char" and "block" types.
   164  	DevMinor int `json:"devMinor,omitempty"`
   166  	// NumLink is the number of entry names pointing to this entry.
   167  	// Zero means one name references this entry.
   168  	// This field is calculated during runtime and not recorded in TOC JSON.
   169  	NumLink int `json:"-"`
   171  	// Xattrs are the extended attribute for the entry.
   172  	Xattrs map[string][]byte `json:"xattrs,omitempty"`
   174  	// Digest stores the OCI checksum for regular files payload.
   175  	// It has the form "sha256:abcdef01234....".
   176  	Digest string `json:"digest,omitempty"`
   178  	// ChunkOffset is non-zero if this is a chunk of a large,
   179  	// regular file. If so, the Offset is where the gzip header of
   180  	// ChunkSize bytes at ChunkOffset in Name begin.
   181  	//
   182  	// In serialized form, a "chunkSize" JSON field of zero means
   183  	// that the chunk goes to the end of the file. After reading
   184  	// from the stargz TOC, though, the ChunkSize is initialized
   185  	// to a non-zero file for when Type is either "reg" or
   186  	// "chunk".
   187  	ChunkOffset int64 `json:"chunkOffset,omitempty"`
   188  	ChunkSize   int64 `json:"chunkSize,omitempty"`
   190  	// ChunkDigest stores an OCI digest of the chunk. This must be formed
   191  	// as "sha256:0123abcd...".
   192  	ChunkDigest string `json:"chunkDigest,omitempty"`
   194  	children map[string]*TOCEntry
   196  	// chunkTopIndex is index of the entry where Offset starts in the blob.
   197  	chunkTopIndex int
   198  }
   200  // ModTime returns the entry's modification time.
   201  func (e *TOCEntry) ModTime() time.Time { return e.modTime }
   203  // NextOffset returns the position (relative to the start of the
   204  // stargz file) of the next gzip boundary after e.Offset.
   205  func (e *TOCEntry) NextOffset() int64 { return e.nextOffset }
   207  func (e *TOCEntry) addChild(baseName string, child *TOCEntry) {
   208  	if e.children == nil {
   209  		e.children = make(map[string]*TOCEntry)
   210  	}
   211  	if child.Type == "dir" {
   212  		e.NumLink++ // Entry ".." in the subdirectory links to this directory
   213  	}
   214  	e.children[baseName] = child
   215  }
   217  // isDataType reports whether TOCEntry is a regular file or chunk (something that
   218  // contains regular file data).
   219  func (e *TOCEntry) isDataType() bool { return e.Type == "reg" || e.Type == "chunk" }
   221  // Stat returns a FileInfo value representing e.
   222  func (e *TOCEntry) Stat() os.FileInfo { return fileInfo{e} }
   224  // ForeachChild calls f for each child item. If f returns false, iteration ends.
   225  // If e is not a directory, f is not called.
   226  func (e *TOCEntry) ForeachChild(f func(baseName string, ent *TOCEntry) bool) {
   227  	for name, ent := range e.children {
   228  		if !f(name, ent) {
   229  			return
   230  		}
   231  	}
   232  }
   234  // LookupChild returns the directory e's child by its base name.
   235  func (e *TOCEntry) LookupChild(baseName string) (child *TOCEntry, ok bool) {
   236  	child, ok = e.children[baseName]
   237  	return
   238  }
   240  // fileInfo implements os.FileInfo using the wrapped *TOCEntry.
   241  type fileInfo struct{ e *TOCEntry }
   243  var _ os.FileInfo = fileInfo{}
   245  func (fi fileInfo) Name() string       { return path.Base(fi.e.Name) }
   246  func (fi fileInfo) IsDir() bool        { return fi.e.Type == "dir" }
   247  func (fi fileInfo) Size() int64        { return fi.e.Size }
   248  func (fi fileInfo) ModTime() time.Time { return fi.e.ModTime() }
   249  func (fi fileInfo) Sys() interface{}   { return fi.e }
   250  func (fi fileInfo) Mode() (m os.FileMode) {
   251  	// TOCEntry.Mode is tar.Header.Mode so we can understand the these bits using `tar` pkg.
   252  	m = (&tar.Header{Mode: fi.e.Mode}).FileInfo().Mode() &
   253  		(os.ModePerm | os.ModeSetuid | os.ModeSetgid | os.ModeSticky)
   254  	switch fi.e.Type {
   255  	case "dir":
   256  		m |= os.ModeDir
   257  	case "symlink":
   258  		m |= os.ModeSymlink
   259  	case "char":
   260  		m |= os.ModeDevice | os.ModeCharDevice
   261  	case "block":
   262  		m |= os.ModeDevice
   263  	case "fifo":
   264  		m |= os.ModeNamedPipe
   265  	}
   266  	return m
   267  }
   269  // TOCEntryVerifier holds verifiers that are usable for verifying chunks contained
   270  // in a eStargz blob.
   271  type TOCEntryVerifier interface {
   273  	// Verifier provides a content verifier that can be used for verifying the
   274  	// contents of the specified TOCEntry.
   275  	Verifier(ce *TOCEntry) (digest.Verifier, error)
   276  }
   278  // Compression provides the compression helper to be used creating and parsing eStargz.
   279  // This package provides gzip-based Compression by default, but any compression
   280  // algorithm (e.g. zstd) can be used as long as it implements Compression.
   281  type Compression interface {
   282  	Compressor
   283  	Decompressor
   284  }
   286  // Compressor represents the helper mothods to be used for creating eStargz.
   287  type Compressor interface {
   288  	// Writer returns WriteCloser to be used for writing a chunk to eStargz.
   289  	// Everytime a chunk is written, the WriteCloser is closed and Writer is
   290  	// called again for writing the next chunk.
   291  	//
   292  	// The returned writer should implement "Flush() error" function that flushes
   293  	// any pending compressed data to the underlying writer.
   294  	Writer(w io.Writer) (WriteFlushCloser, error)
   296  	// WriteTOCAndFooter is called to write JTOC to the passed Writer.
   297  	// diffHash calculates the DiffID (uncompressed sha256 hash) of the blob
   298  	// WriteTOCAndFooter can optionally write anything that affects DiffID calculation
   299  	// (e.g. uncompressed TOC JSON).
   300  	//
   301  	// This function returns tocDgst that represents the digest of TOC that will be used
   302  	// to verify this blob when it's parsed.
   303  	WriteTOCAndFooter(w io.Writer, off int64, toc *JTOC, diffHash hash.Hash) (tocDgst digest.Digest, err error)
   304  }
   306  // Decompressor represents the helper mothods to be used for parsing eStargz.
   307  type Decompressor interface {
   308  	// Reader returns ReadCloser to be used for decompressing file payload.
   309  	Reader(r io.Reader) (io.ReadCloser, error)
   311  	// FooterSize returns the size of the footer of this blob.
   312  	FooterSize() int64
   314  	// ParseFooter parses the footer and returns the offset and (compressed) size of TOC.
   315  	// payloadBlobSize is the (compressed) size of the blob payload (i.e. the size between
   316  	// the top until the TOC JSON).
   317  	//
   318  	// If tocOffset < 0, we assume that TOC isn't contained in the blob and pass nil reader
   319  	// to ParseTOC. We expect that ParseTOC acquire TOC from the external location and return it.
   320  	//
   321  	// tocSize is optional. If tocSize <= 0, it's by default the size of the range from tocOffset until the beginning of the
   322  	// footer (blob size - tocOff - FooterSize).
   323  	// If blobPayloadSize < 0, blobPayloadSize become the blob size.
   324  	ParseFooter(p []byte) (blobPayloadSize, tocOffset, tocSize int64, err error)
   326  	// ParseTOC parses TOC from the passed reader. The reader provides the partial contents
   327  	// of the underlying blob that has the range specified by ParseFooter method.
   328  	//
   329  	// This function returns tocDgst that represents the digest of TOC that will be used
   330  	// to verify this blob. This must match to the value returned from
   331  	// Compressor.WriteTOCAndFooter that is used when creating this blob.
   332  	//
   333  	// If tocOffset returned by ParseFooter is < 0, we assume that TOC isn't contained in the blob.
   334  	// Pass nil reader to ParseTOC then we expect that ParseTOC acquire TOC from the external location
   335  	// and return it.
   336  	ParseTOC(r io.Reader) (toc *JTOC, tocDgst digest.Digest, err error)
   337  }
   339  type WriteFlushCloser interface {
   340  	io.WriteCloser
   341  	Flush() error
   342  }

View as plain text