Package estargz

const (
    // TOCTarName is the name of the JSON file in the tar archive in the
    // table of contents gzip stream.
    TOCTarName = "stargz.index.json"

    // FooterSize is the number of bytes in the footer
    //
    // The footer is an empty gzip stream with no compression and an Extra
    // header of the form "%016xSTARGZ", where the 64 bit hex-encoded
    // number is the offset to the gzip stream of JSON TOC.
    //
    // 51 comes from:
    //
    // 10 bytes  gzip header
    // 2  bytes  XLEN (length of Extra field) = 26 (4 bytes header + 16 hex digits + len("STARGZ"))
    // 2  bytes  Extra: SI1 = 'S', SI2 = 'G'
    // 2  bytes  Extra: LEN = 22 (16 hex digits + len("STARGZ"))
    // 22 bytes  Extra: subfield = fmt.Sprintf("%016xSTARGZ", offsetOfTOC)
    // 5  bytes  flate header
    // 8  bytes  gzip footer
    // (End of the eStargz blob)
    //
    // NOTE: For Extra fields, subfield IDs SI1='S' SI2='G' is used for eStargz.
    FooterSize = 51

    // TOCJSONDigestAnnotation is an annotation for an image layer. This stores the
    // digest of the TOC JSON.
    // This annotation is valid only when it is specified in `.[]layers.annotations`
    // of an image manifest.
    TOCJSONDigestAnnotation = "containerd.io/snapshot/stargz/toc.digest"

    // StoreUncompressedSizeAnnotation is an additional annotation key for eStargz to enable lazy
    // pulling on containers/storage. Stargz Store is required to expose the layer's uncompressed size
    // to the runtime but current OCI image doesn't ship this information by default. So we store this
    // to the special annotation.
    StoreUncompressedSizeAnnotation = "io.containers.estargz.uncompressed-size"

    // PrefetchLandmark is a file entry which indicates the end position of
    // prefetch in the stargz file.
    PrefetchLandmark = ".prefetch.landmark"

    // NoPrefetchLandmark is a file entry which indicates that no prefetch should
    // occur in the stargz file.
    NoPrefetchLandmark = ".no.prefetch.landmark"
)

func CheckGzipHasStreams ¶

func CheckGzipHasStreams(t *testing.T, b []byte, streams []int64)

func CompressionTestSuite ¶

func CompressionTestSuite(t *testing.T, controllers ...TestingControllerFactory)

CompressionTestSuite tests this pkg with controllers can build valid eStargz blobs and parse them.

func GzipDiffIDOf ¶

func GzipDiffIDOf(t *testing.T, b []byte) string

func OpenFooter ¶

func OpenFooter(sr *io.SectionReader) (tocOffset int64, footerSize int64, rErr error)

OpenFooter extracts and parses footer from the given blob. only supports gzip-based eStargz.

func Unpack ¶

func Unpack(sr *io.SectionReader, c Decompressor) (io.ReadCloser, error)

Unpack decompresses the given estargz blob and returns a ReadCloser of the tar blob. TOC JSON and footer are removed.

type Blob ¶

Blob is an eStargz blob.

type Blob struct {
    io.ReadCloser
    // contains filtered or unexported fields
}

func Build ¶

func Build(tarBlob *io.SectionReader, opt ...Option) (_ *Blob, rErr error)

Build builds an eStargz blob which is an extended version of stargz, from a blob (gzip, zstd or plain tar) passed through the argument. If there are some prioritized files are listed in the option, these files are grouped as "prioritized" and can be used for runtime optimization (e.g. prefetch). This function builds a blob in parallel, with dividing that blob into several (at least the number of runtime.GOMAXPROCS(0)) sub-blobs.

func (*Blob) DiffID ¶

func (b *Blob) DiffID() digest.Digest

DiffID returns the digest of uncompressed blob. It is only valid to call DiffID after Close.

func (*Blob) TOCDigest ¶

func (b *Blob) TOCDigest() digest.Digest

TOCDigest returns the digest of uncompressed TOC JSON.

type Compression ¶

Compression provides the compression helper to be used creating and parsing eStargz. This package provides gzip-based Compression by default, but any compression algorithm (e.g. zstd) can be used as long as it implements Compression.

type Compression interface {
    Compressor
    Decompressor
}

type Compressor ¶

Compressor represents the helper mothods to be used for creating eStargz.

type Compressor interface {
    // Writer returns WriteCloser to be used for writing a chunk to eStargz.
    // Everytime a chunk is written, the WriteCloser is closed and Writer is
    // called again for writing the next chunk.
    //
    // The returned writer should implement "Flush() error" function that flushes
    // any pending compressed data to the underlying writer.
    Writer(w io.Writer) (WriteFlushCloser, error)

    // WriteTOCAndFooter is called to write JTOC to the passed Writer.
    // diffHash calculates the DiffID (uncompressed sha256 hash) of the blob
    // WriteTOCAndFooter can optionally write anything that affects DiffID calculation
    // (e.g. uncompressed TOC JSON).
    //
    // This function returns tocDgst that represents the digest of TOC that will be used
    // to verify this blob when it's parsed.
    WriteTOCAndFooter(w io.Writer, off int64, toc *JTOC, diffHash hash.Hash) (tocDgst digest.Digest, err error)
}

type Decompressor ¶

Decompressor represents the helper mothods to be used for parsing eStargz.

type Decompressor interface {
    // Reader returns ReadCloser to be used for decompressing file payload.
    Reader(r io.Reader) (io.ReadCloser, error)

    // FooterSize returns the size of the footer of this blob.
    FooterSize() int64

    // ParseFooter parses the footer and returns the offset and (compressed) size of TOC.
    // payloadBlobSize is the (compressed) size of the blob payload (i.e. the size between
    // the top until the TOC JSON).
    //
    // If tocOffset < 0, we assume that TOC isn't contained in the blob and pass nil reader
    // to ParseTOC. We expect that ParseTOC acquire TOC from the external location and return it.
    //
    // tocSize is optional. If tocSize <= 0, it's by default the size of the range from tocOffset until the beginning of the
    // footer (blob size - tocOff - FooterSize).
    // If blobPayloadSize < 0, blobPayloadSize become the blob size.
    ParseFooter(p []byte) (blobPayloadSize, tocOffset, tocSize int64, err error)

    // ParseTOC parses TOC from the passed reader. The reader provides the partial contents
    // of the underlying blob that has the range specified by ParseFooter method.
    //
    // This function returns tocDgst that represents the digest of TOC that will be used
    // to verify this blob. This must match to the value returned from
    // Compressor.WriteTOCAndFooter that is used when creating this blob.
    //
    // If tocOffset returned by ParseFooter is < 0, we assume that TOC isn't contained in the blob.
    // Pass nil reader to ParseTOC then we expect that ParseTOC acquire TOC from the external location
    // and return it.
    ParseTOC(r io.Reader) (toc *JTOC, tocDgst digest.Digest, err error)
}

type GzipCompressor ¶

type GzipCompressor struct {
    // contains filtered or unexported fields
}

func NewGzipCompressor ¶

func NewGzipCompressor() *GzipCompressor

func NewGzipCompressorWithLevel ¶

func NewGzipCompressorWithLevel(level int) *GzipCompressor

func (*GzipCompressor) WriteTOCAndFooter ¶

func (gc *GzipCompressor) WriteTOCAndFooter(w io.Writer, off int64, toc *JTOC, diffHash hash.Hash) (digest.Digest, error)

func (*GzipCompressor) Writer ¶

func (gc *GzipCompressor) Writer(w io.Writer) (WriteFlushCloser, error)

type GzipDecompressor ¶

type GzipDecompressor struct{}

func (*GzipDecompressor) DecompressTOC ¶

func (gz *GzipDecompressor) DecompressTOC(r io.Reader) (tocJSON io.ReadCloser, err error)

func (*GzipDecompressor) FooterSize ¶

func (gz *GzipDecompressor) FooterSize() int64

func (*GzipDecompressor) ParseFooter ¶

func (gz *GzipDecompressor) ParseFooter(p []byte) (blobPayloadSize, tocOffset, tocSize int64, err error)

func (*GzipDecompressor) ParseTOC ¶

func (gz *GzipDecompressor) ParseTOC(r io.Reader) (toc *JTOC, tocDgst digest.Digest, err error)

func (*GzipDecompressor) Reader ¶

func (gz *GzipDecompressor) Reader(r io.Reader) (io.ReadCloser, error)

type JTOC ¶

JTOC is the JSON-serialized table of contents index of the files in the stargz file.

type JTOC struct {
    Version int         `json:"version"`
    Entries []*TOCEntry `json:"entries"`
}

type LegacyGzipDecompressor ¶

type LegacyGzipDecompressor struct{}

func (*LegacyGzipDecompressor) DecompressTOC ¶

func (gz *LegacyGzipDecompressor) DecompressTOC(r io.Reader) (tocJSON io.ReadCloser, err error)

func (*LegacyGzipDecompressor) FooterSize ¶

func (gz *LegacyGzipDecompressor) FooterSize() int64

func (*LegacyGzipDecompressor) ParseFooter ¶

func (gz *LegacyGzipDecompressor) ParseFooter(p []byte) (blobPayloadSize, tocOffset, tocSize int64, err error)

func (*LegacyGzipDecompressor) ParseTOC ¶

func (gz *LegacyGzipDecompressor) ParseTOC(r io.Reader) (toc *JTOC, tocDgst digest.Digest, err error)

func (*LegacyGzipDecompressor) Reader ¶

func (gz *LegacyGzipDecompressor) Reader(r io.Reader) (io.ReadCloser, error)

type MeasureLatencyHook ¶

MeasureLatencyHook is a func which takes start time and records the diff

type MeasureLatencyHook func(time.Time)

type OpenOption ¶

OpenOption is an option used during opening the layer

type OpenOption func(o *openOpts) error

func WithDecompressors ¶

func WithDecompressors(decompressors ...Decompressor) OpenOption

WithDecompressors option specifies decompressors to use. Default is gzip-based decompressor.

func WithTOCOffset ¶

func WithTOCOffset(tocOffset int64) OpenOption

WithTOCOffset option specifies the offset of TOC

func WithTelemetry ¶

func WithTelemetry(telemetry *Telemetry) OpenOption

WithTelemetry option specifies the telemetry hooks

type Option ¶

type Option func(o *options) error

func WithAllowPrioritizeNotFound ¶

func WithAllowPrioritizeNotFound(missedFiles *[]string) Option

WithAllowPrioritizeNotFound makes Build continue the execution even if some of prioritized files specified by WithPrioritizedFiles option aren't found in the input tar. Instead, this records all missed file names to the passed slice.

func WithChunkSize ¶

func WithChunkSize(chunkSize int) Option

WithChunkSize option specifies the chunk size of eStargz blob to build.

func WithCompression ¶

func WithCompression(compression Compression) Option

WithCompression specifies compression algorithm to be used. Default is gzip.

func WithCompressionLevel ¶

func WithCompressionLevel(level int) Option

WithCompressionLevel option specifies the gzip compression level. The default is gzip.BestCompression. This option will be ignored if WithCompression option is used. See also: https://godoc.org/compress/gzip#pkg-constants

func WithContext ¶

func WithContext(ctx context.Context) Option

WithContext specifies a context that can be used for clean canceleration.

func WithMinChunkSize ¶

func WithMinChunkSize(minChunkSize int) Option

WithMinChunkSize option specifies the minimal number of bytes of data must be written in one gzip stream. By increasing this number, one gzip stream can contain multiple files and it hopefully leads to smaller result blob. NOTE: This adds a TOC property that old reader doesn't understand.

func WithPrioritizedFiles ¶

func WithPrioritizedFiles(files []string) Option

WithPrioritizedFiles option specifies the list of prioritized files. These files must be complete paths that are absolute or relative to "/" For example, all of "foo/bar", "/foo/bar", "./foo/bar" and "../foo/bar" are treated as "/foo/bar".

type Reader ¶

A Reader permits random access reads from a stargz file.

type Reader struct {
    // contains filtered or unexported fields
}

func Open ¶

func Open(sr *io.SectionReader, opt ...OpenOption) (*Reader, error)

Open opens a stargz file for reading. The behavior is configurable using options.

Note that each entry name is normalized as the path that is relative to root.

func (*Reader) ChunkEntryForOffset ¶

func (r *Reader) ChunkEntryForOffset(name string, offset int64) (e *TOCEntry, ok bool)

ChunkEntryForOffset returns the TOCEntry containing the byte of the named file at the given offset within the file. Name must be absolute path or one that is relative to root.

func (*Reader) Lookup ¶

func (r *Reader) Lookup(path string) (e *TOCEntry, ok bool)

Lookup returns the Table of Contents entry for the given path.

To get the root directory, use the empty string. Path must be absolute path or one that is relative to root.

func (*Reader) OpenFile ¶

func (r *Reader) OpenFile(name string) (*io.SectionReader, error)

OpenFile returns the reader of the specified file payload.

Name must be absolute path or one that is relative to root.

func (*Reader) OpenFileWithPreReader ¶

func (r *Reader) OpenFileWithPreReader(name string, preRead func(*TOCEntry, io.Reader) error) (*io.SectionReader, error)

func (*Reader) TOCDigest ¶

func (r *Reader) TOCDigest() digest.Digest

func (*Reader) Verifiers ¶

func (r *Reader) Verifiers() (TOCEntryVerifier, error)

Verifiers returns TOCEntryVerifier of this chunk. Use VerifyTOC instead in most cases because this doesn't verify TOC.

func (*Reader) VerifyTOC ¶

func (r *Reader) VerifyTOC(tocDigest digest.Digest) (TOCEntryVerifier, error)

VerifyTOC checks that the TOC JSON in the passed blob matches the passed digests and that the TOC JSON contains digests for all chunks contained in the blob. If the verification succceeds, this function returns TOCEntryVerifier which holds all chunk digests in the stargz blob.

type TOCEntry ¶

TOCEntry is an entry in the stargz file's TOC (Table of Contents).

type TOCEntry struct {
    // Name is the tar entry's name. It is the complete path
    // stored in the tar file, not just the base name.
    Name string `json:"name"`

    // Type is one of "dir", "reg", "symlink", "hardlink", "char",
    // "block", "fifo", or "chunk".
    // The "chunk" type is used for regular file data chunks past the first
    // TOCEntry; the 2nd chunk and on have only Type ("chunk"), Offset,
    // ChunkOffset, and ChunkSize populated.
    Type string `json:"type"`

    // Size, for regular files, is the logical size of the file.
    Size int64 `json:"size,omitempty"`

    // ModTime3339 is the modification time of the tar entry. Empty
    // means zero or unknown. Otherwise it's in UTC RFC3339
    // format. Use the ModTime method to access the time.Time value.
    ModTime3339 string `json:"modtime,omitempty"`

    // LinkName, for symlinks and hardlinks, is the link target.
    LinkName string `json:"linkName,omitempty"`

    // Mode is the permission and mode bits.
    Mode int64 `json:"mode,omitempty"`

    // UID is the user ID of the owner.
    UID int `json:"uid,omitempty"`

    // GID is the group ID of the owner.
    GID int `json:"gid,omitempty"`

    // Uname is the username of the owner.
    //
    // In the serialized JSON, this field may only be present for
    // the first entry with the same UID.
    Uname string `json:"userName,omitempty"`

    // Gname is the group name of the owner.
    //
    // In the serialized JSON, this field may only be present for
    // the first entry with the same GID.
    Gname string `json:"groupName,omitempty"`

    // Offset, for regular files, provides the offset in the
    // stargz file to the file's data bytes. See ChunkOffset and
    // ChunkSize.
    Offset int64 `json:"offset,omitempty"`

    // InnerOffset is an optional field indicates uncompressed offset
    // of this "reg" or "chunk" payload in a stream starts from Offset.
    // This field enables to put multiple "reg" or "chunk" payloads
    // in one chunk with having the same Offset but different InnerOffset.
    InnerOffset int64 `json:"innerOffset,omitempty"`

    // DevMajor is the major device number for "char" and "block" types.
    DevMajor int `json:"devMajor,omitempty"`

    // DevMinor is the major device number for "char" and "block" types.
    DevMinor int `json:"devMinor,omitempty"`

    // NumLink is the number of entry names pointing to this entry.
    // Zero means one name references this entry.
    // This field is calculated during runtime and not recorded in TOC JSON.
    NumLink int `json:"-"`

    // Xattrs are the extended attribute for the entry.
    Xattrs map[string][]byte `json:"xattrs,omitempty"`

    // Digest stores the OCI checksum for regular files payload.
    // It has the form "sha256:abcdef01234....".
    Digest string `json:"digest,omitempty"`

    // ChunkOffset is non-zero if this is a chunk of a large,
    // regular file. If so, the Offset is where the gzip header of
    // ChunkSize bytes at ChunkOffset in Name begin.
    //
    // In serialized form, a "chunkSize" JSON field of zero means
    // that the chunk goes to the end of the file. After reading
    // from the stargz TOC, though, the ChunkSize is initialized
    // to a non-zero file for when Type is either "reg" or
    // "chunk".
    ChunkOffset int64 `json:"chunkOffset,omitempty"`
    ChunkSize   int64 `json:"chunkSize,omitempty"`

    // ChunkDigest stores an OCI digest of the chunk. This must be formed
    // as "sha256:0123abcd...".
    ChunkDigest string `json:"chunkDigest,omitempty"`
    // contains filtered or unexported fields
}

func (*TOCEntry) ForeachChild ¶

func (e *TOCEntry) ForeachChild(f func(baseName string, ent *TOCEntry) bool)

ForeachChild calls f for each child item. If f returns false, iteration ends. If e is not a directory, f is not called.

func (*TOCEntry) LookupChild ¶

func (e *TOCEntry) LookupChild(baseName string) (child *TOCEntry, ok bool)

LookupChild returns the directory e's child by its base name.

func (*TOCEntry) ModTime ¶

func (e *TOCEntry) ModTime() time.Time

ModTime returns the entry's modification time.

func (*TOCEntry) NextOffset ¶

func (e *TOCEntry) NextOffset() int64

NextOffset returns the position (relative to the start of the stargz file) of the next gzip boundary after e.Offset.

func (*TOCEntry) Stat ¶

func (e *TOCEntry) Stat() os.FileInfo

Stat returns a FileInfo value representing e.

type TOCEntryVerifier ¶

TOCEntryVerifier holds verifiers that are usable for verifying chunks contained in a eStargz blob.

type TOCEntryVerifier interface {

    // Verifier provides a content verifier that can be used for verifying the
    // contents of the specified TOCEntry.
    Verifier(ce *TOCEntry) (digest.Verifier, error)
}

type Telemetry ¶

Telemetry is a struct which defines telemetry hooks. By implementing these hooks you should be able to record the latency metrics of the respective steps of estargz open operation. To be used with estargz.OpenWithTelemetry(...)

type Telemetry struct {
    GetFooterLatency      MeasureLatencyHook // measure time to get stargz footer (in milliseconds)
    GetTocLatency         MeasureLatencyHook // measure time to GET TOC JSON (in milliseconds)
    DeserializeTocLatency MeasureLatencyHook // measure time to deserialize TOC JSON (in milliseconds)
}

type TestingController ¶

TestingController is Compression with some helper methods necessary for testing.

type TestingController interface {
    Compression
    TestStreams(t *testing.T, b []byte, streams []int64)
    DiffIDOf(*testing.T, []byte) string
    String() string
}

type TestingControllerFactory ¶

type TestingControllerFactory func() TestingController

type WriteFlushCloser ¶

type WriteFlushCloser interface {
    io.WriteCloser
    Flush() error
}

type Writer ¶

A Writer writes stargz files.

Use NewWriter to create a new Writer.

type Writer struct {

    // ChunkSize optionally controls the maximum number of bytes
    // of data of a regular file that can be written in one gzip
    // stream before a new gzip stream is started.
    // Zero means to use a default, currently 4 MiB.
    ChunkSize int

    // MinChunkSize optionally controls the minimum number of bytes
    // of data must be written in one gzip stream before a new gzip
    // NOTE: This adds a TOC property that stargz snapshotter < v0.13.0 doesn't understand.
    MinChunkSize int
    // contains filtered or unexported fields
}

func NewWriter ¶

func NewWriter(w io.Writer) *Writer

NewWriter returns a new stargz writer (gzip-based) writing to w.

The writer must be closed to write its trailing table of contents.

func NewWriterLevel ¶

func NewWriterLevel(w io.Writer, compressionLevel int) *Writer

NewWriterLevel returns a new stargz writer (gzip-based) writing to w. The compression level is configurable.

The writer must be closed to write its trailing table of contents.

func NewWriterWithCompressor ¶

func NewWriterWithCompressor(w io.Writer, c Compressor) *Writer

NewWriterWithCompressor returns a new stargz writer writing to w. The compression method is configurable.

The writer must be closed to write its trailing table of contents.

func (*Writer) AppendTar ¶

func (w *Writer) AppendTar(r io.Reader) error

AppendTar reads the tar or tar.gz file from r and appends each of its contents to w.

The input r can optionally be gzip compressed but the output will always be compressed by the specified compressor.

func (*Writer) AppendTarLossLess ¶

func (w *Writer) AppendTarLossLess(r io.Reader) error

AppendTarLossLess reads the tar or tar.gz file from r and appends each of its contents to w.

The input r can optionally be gzip compressed but the output will always be compressed by the specified compressor.

The difference of this func with AppendTar is that this writes the input tar stream into w without any modification (e.g. to header bytes).

Note that if the input tar stream already contains TOC JSON, this returns error because w cannot overwrite the TOC JSON to the one generated by w without lossy modification. To avoid this error, if the input stream is known to be stargz/estargz, you shoud decompress it and remove TOC JSON in advance.

func (*Writer) Close ¶

func (w *Writer) Close() (digest.Digest, error)

Close writes the stargz's table of contents and flushes all the buffers, returning any error.

func (*Writer) DiffID ¶

func (w *Writer) DiffID() string

DiffID returns the SHA-256 of the uncompressed tar bytes. It is only valid to call DiffID after Close.

Subdirectories

Name	Synopsis
..
errorutil
externaltoc
zstdchunked