const ( // TOCTarName is the name of the JSON file in the tar archive in the // table of contents gzip stream. TOCTarName = "stargz.index.json" // FooterSize is the number of bytes in the footer // // The footer is an empty gzip stream with no compression and an Extra // header of the form "%016xSTARGZ", where the 64 bit hex-encoded // number is the offset to the gzip stream of JSON TOC. // // 51 comes from: // // 10 bytes gzip header // 2 bytes XLEN (length of Extra field) = 26 (4 bytes header + 16 hex digits + len("STARGZ")) // 2 bytes Extra: SI1 = 'S', SI2 = 'G' // 2 bytes Extra: LEN = 22 (16 hex digits + len("STARGZ")) // 22 bytes Extra: subfield = fmt.Sprintf("%016xSTARGZ", offsetOfTOC) // 5 bytes flate header // 8 bytes gzip footer // (End of the eStargz blob) // // NOTE: For Extra fields, subfield IDs SI1='S' SI2='G' is used for eStargz. = 51 // TOCJSONDigestAnnotation is an annotation for an image layer. This stores the // digest of the TOC JSON. // This annotation is valid only when it is specified in `.[]layers.annotations` // of an image manifest. TOCJSONDigestAnnotation = "containerd.io/snapshot/stargz/toc.digest" // StoreUncompressedSizeAnnotation is an additional annotation key for eStargz to enable lazy // pulling on containers/storage. Stargz Store is required to expose the layer's uncompressed size // to the runtime but current OCI image doesn't ship this information by default. So we store this // to the special annotation. StoreUncompressedSizeAnnotation = "io.containers.estargz.uncompressed-size" // PrefetchLandmark is a file entry which indicates the end position of // prefetch in the stargz file. PrefetchLandmark = ".prefetch.landmark" // NoPrefetchLandmark is a file entry which indicates that no prefetch should // occur in the stargz file. NoPrefetchLandmark = ".no.prefetch.landmark" )
func CheckGzipHasStreams(t *testing.T, b []byte, streams []int64)
func CompressionTestSuite(t *testing.T, controllers ...TestingControllerFactory)
CompressionTestSuite tests this pkg with controllers can build valid eStargz blobs and parse them.
func GzipDiffIDOf(t *testing.T, b []byte) string
func OpenFooter(sr *io.SectionReader) (tocOffset int64, footerSize int64, rErr error)
OpenFooter extracts and parses footer from the given blob. only supports gzip-based eStargz.
func Unpack(sr *io.SectionReader, c Decompressor) (io.ReadCloser, error)
Unpack decompresses the given estargz blob and returns a ReadCloser of the tar blob. TOC JSON and footer are removed.
Blob is an eStargz blob.
type Blob struct { io.ReadCloser // contains filtered or unexported fields }
func Build(tarBlob *io.SectionReader, opt ...Option) (_ *Blob, rErr error)
Build builds an eStargz blob which is an extended version of stargz, from a blob (gzip, zstd or plain tar) passed through the argument. If there are some prioritized files are listed in the option, these files are grouped as "prioritized" and can be used for runtime optimization (e.g. prefetch). This function builds a blob in parallel, with dividing that blob into several (at least the number of runtime.GOMAXPROCS(0)) sub-blobs.
func (b *Blob) DiffID() digest.Digest
DiffID returns the digest of uncompressed blob. It is only valid to call DiffID after Close.
func (b *Blob) TOCDigest() digest.Digest
TOCDigest returns the digest of uncompressed TOC JSON.
Compression provides the compression helper to be used creating and parsing eStargz. This package provides gzip-based Compression by default, but any compression algorithm (e.g. zstd) can be used as long as it implements Compression.
type Compression interface { Compressor Decompressor }
Compressor represents the helper mothods to be used for creating eStargz.
type Compressor interface { // Writer returns WriteCloser to be used for writing a chunk to eStargz. // Everytime a chunk is written, the WriteCloser is closed and Writer is // called again for writing the next chunk. // // The returned writer should implement "Flush() error" function that flushes // any pending compressed data to the underlying writer. Writer(w io.Writer) (WriteFlushCloser, error) // WriteTOCAndFooter is called to write JTOC to the passed Writer. // diffHash calculates the DiffID (uncompressed sha256 hash) of the blob // WriteTOCAndFooter can optionally write anything that affects DiffID calculation // (e.g. uncompressed TOC JSON). // // This function returns tocDgst that represents the digest of TOC that will be used // to verify this blob when it's parsed. WriteTOCAndFooter(w io.Writer, off int64, toc *JTOC, diffHash hash.Hash) (tocDgst digest.Digest, err error) }
Decompressor represents the helper mothods to be used for parsing eStargz.
type Decompressor interface { // Reader returns ReadCloser to be used for decompressing file payload. Reader(r io.Reader) (io.ReadCloser, error) // FooterSize returns the size of the footer of this blob. FooterSize() int64 // ParseFooter parses the footer and returns the offset and (compressed) size of TOC. // payloadBlobSize is the (compressed) size of the blob payload (i.e. the size between // the top until the TOC JSON). // // If tocOffset < 0, we assume that TOC isn't contained in the blob and pass nil reader // to ParseTOC. We expect that ParseTOC acquire TOC from the external location and return it. // // tocSize is optional. If tocSize <= 0, it's by default the size of the range from tocOffset until the beginning of the // footer (blob size - tocOff - FooterSize). // If blobPayloadSize < 0, blobPayloadSize become the blob size. ParseFooter(p []byte) (blobPayloadSize, tocOffset, tocSize int64, err error) // ParseTOC parses TOC from the passed reader. The reader provides the partial contents // of the underlying blob that has the range specified by ParseFooter method. // // This function returns tocDgst that represents the digest of TOC that will be used // to verify this blob. This must match to the value returned from // Compressor.WriteTOCAndFooter that is used when creating this blob. // // If tocOffset returned by ParseFooter is < 0, we assume that TOC isn't contained in the blob. // Pass nil reader to ParseTOC then we expect that ParseTOC acquire TOC from the external location // and return it. ParseTOC(r io.Reader) (toc *JTOC, tocDgst digest.Digest, err error) }
type GzipCompressor struct {
// contains filtered or unexported fields
}
func NewGzipCompressor() *GzipCompressor
func NewGzipCompressorWithLevel(level int) *GzipCompressor
func (gc *GzipCompressor) WriteTOCAndFooter(w io.Writer, off int64, toc *JTOC, diffHash hash.Hash) (digest.Digest, error)
func (gc *GzipCompressor) Writer(w io.Writer) (WriteFlushCloser, error)
type GzipDecompressor struct{}
func (gz *GzipDecompressor) DecompressTOC(r io.Reader) (tocJSON io.ReadCloser, err error)
func (gz *GzipDecompressor) FooterSize() int64
func (gz *GzipDecompressor) ParseFooter(p []byte) (blobPayloadSize, tocOffset, tocSize int64, err error)
func (gz *GzipDecompressor) ParseTOC(r io.Reader) (toc *JTOC, tocDgst digest.Digest, err error)
func (gz *GzipDecompressor) Reader(r io.Reader) (io.ReadCloser, error)
JTOC is the JSON-serialized table of contents index of the files in the stargz file.
type JTOC struct { Version int `json:"version"` Entries []*TOCEntry `json:"entries"` }
type LegacyGzipDecompressor struct{}
func (gz *LegacyGzipDecompressor) DecompressTOC(r io.Reader) (tocJSON io.ReadCloser, err error)
func (gz *LegacyGzipDecompressor) FooterSize() int64
func (gz *LegacyGzipDecompressor) ParseFooter(p []byte) (blobPayloadSize, tocOffset, tocSize int64, err error)
func (gz *LegacyGzipDecompressor) ParseTOC(r io.Reader) (toc *JTOC, tocDgst digest.Digest, err error)
func (gz *LegacyGzipDecompressor) Reader(r io.Reader) (io.ReadCloser, error)
MeasureLatencyHook is a func which takes start time and records the diff
type MeasureLatencyHook func(time.Time)
OpenOption is an option used during opening the layer
type OpenOption func(o *openOpts) error
func WithDecompressors(decompressors ...Decompressor) OpenOption
WithDecompressors option specifies decompressors to use. Default is gzip-based decompressor.
func WithTOCOffset(tocOffset int64) OpenOption
WithTOCOffset option specifies the offset of TOC
func WithTelemetry(telemetry *Telemetry) OpenOption
WithTelemetry option specifies the telemetry hooks
type Option func(o *options) error
func WithAllowPrioritizeNotFound(missedFiles *[]string) Option
WithAllowPrioritizeNotFound makes Build continue the execution even if some of prioritized files specified by WithPrioritizedFiles option aren't found in the input tar. Instead, this records all missed file names to the passed slice.
func WithChunkSize(chunkSize int) Option
WithChunkSize option specifies the chunk size of eStargz blob to build.
func WithCompression(compression Compression) Option
WithCompression specifies compression algorithm to be used. Default is gzip.
func WithCompressionLevel(level int) Option
WithCompressionLevel option specifies the gzip compression level. The default is gzip.BestCompression. This option will be ignored if WithCompression option is used. See also: https://godoc.org/compress/gzip#pkg-constants
func WithContext(ctx context.Context) Option
WithContext specifies a context that can be used for clean canceleration.
func WithMinChunkSize(minChunkSize int) Option
WithMinChunkSize option specifies the minimal number of bytes of data must be written in one gzip stream. By increasing this number, one gzip stream can contain multiple files and it hopefully leads to smaller result blob. NOTE: This adds a TOC property that old reader doesn't understand.
func WithPrioritizedFiles(files []string) Option
WithPrioritizedFiles option specifies the list of prioritized files. These files must be complete paths that are absolute or relative to "/" For example, all of "foo/bar", "/foo/bar", "./foo/bar" and "../foo/bar" are treated as "/foo/bar".
A Reader permits random access reads from a stargz file.
type Reader struct {
// contains filtered or unexported fields
}
func Open(sr *io.SectionReader, opt ...OpenOption) (*Reader, error)
Open opens a stargz file for reading. The behavior is configurable using options.
Note that each entry name is normalized as the path that is relative to root.
func (r *Reader) ChunkEntryForOffset(name string, offset int64) (e *TOCEntry, ok bool)
ChunkEntryForOffset returns the TOCEntry containing the byte of the named file at the given offset within the file. Name must be absolute path or one that is relative to root.
func (r *Reader) Lookup(path string) (e *TOCEntry, ok bool)
Lookup returns the Table of Contents entry for the given path.
To get the root directory, use the empty string. Path must be absolute path or one that is relative to root.
func (r *Reader) OpenFile(name string) (*io.SectionReader, error)
OpenFile returns the reader of the specified file payload.
Name must be absolute path or one that is relative to root.
func (r *Reader) OpenFileWithPreReader(name string, preRead func(*TOCEntry, io.Reader) error) (*io.SectionReader, error)
func (r *Reader) TOCDigest() digest.Digest
func (r *Reader) Verifiers() (TOCEntryVerifier, error)
Verifiers returns TOCEntryVerifier of this chunk. Use VerifyTOC instead in most cases because this doesn't verify TOC.
func (r *Reader) VerifyTOC(tocDigest digest.Digest) (TOCEntryVerifier, error)
VerifyTOC checks that the TOC JSON in the passed blob matches the passed digests and that the TOC JSON contains digests for all chunks contained in the blob. If the verification succceeds, this function returns TOCEntryVerifier which holds all chunk digests in the stargz blob.
TOCEntry is an entry in the stargz file's TOC (Table of Contents).
type TOCEntry struct { // Name is the tar entry's name. It is the complete path // stored in the tar file, not just the base name. Name string `json:"name"` // Type is one of "dir", "reg", "symlink", "hardlink", "char", // "block", "fifo", or "chunk". // The "chunk" type is used for regular file data chunks past the first // TOCEntry; the 2nd chunk and on have only Type ("chunk"), Offset, // ChunkOffset, and ChunkSize populated. Type string `json:"type"` // Size, for regular files, is the logical size of the file. Size int64 `json:"size,omitempty"` // ModTime3339 is the modification time of the tar entry. Empty // means zero or unknown. Otherwise it's in UTC RFC3339 // format. Use the ModTime method to access the time.Time value. ModTime3339 string `json:"modtime,omitempty"` // LinkName, for symlinks and hardlinks, is the link target. LinkName string `json:"linkName,omitempty"` // Mode is the permission and mode bits. Mode int64 `json:"mode,omitempty"` // UID is the user ID of the owner. UID int `json:"uid,omitempty"` // GID is the group ID of the owner. GID int `json:"gid,omitempty"` // Uname is the username of the owner. // // In the serialized JSON, this field may only be present for // the first entry with the same UID. Uname string `json:"userName,omitempty"` // Gname is the group name of the owner. // // In the serialized JSON, this field may only be present for // the first entry with the same GID. Gname string `json:"groupName,omitempty"` // Offset, for regular files, provides the offset in the // stargz file to the file's data bytes. See ChunkOffset and // ChunkSize. Offset int64 `json:"offset,omitempty"` // InnerOffset is an optional field indicates uncompressed offset // of this "reg" or "chunk" payload in a stream starts from Offset. // This field enables to put multiple "reg" or "chunk" payloads // in one chunk with having the same Offset but different InnerOffset. InnerOffset int64 `json:"innerOffset,omitempty"` // DevMajor is the major device number for "char" and "block" types. DevMajor int `json:"devMajor,omitempty"` // DevMinor is the major device number for "char" and "block" types. DevMinor int `json:"devMinor,omitempty"` // NumLink is the number of entry names pointing to this entry. // Zero means one name references this entry. // This field is calculated during runtime and not recorded in TOC JSON. NumLink int `json:"-"` // Xattrs are the extended attribute for the entry. Xattrs map[string][]byte `json:"xattrs,omitempty"` // Digest stores the OCI checksum for regular files payload. // It has the form "sha256:abcdef01234....". Digest string `json:"digest,omitempty"` // ChunkOffset is non-zero if this is a chunk of a large, // regular file. If so, the Offset is where the gzip header of // ChunkSize bytes at ChunkOffset in Name begin. // // In serialized form, a "chunkSize" JSON field of zero means // that the chunk goes to the end of the file. After reading // from the stargz TOC, though, the ChunkSize is initialized // to a non-zero file for when Type is either "reg" or // "chunk". ChunkOffset int64 `json:"chunkOffset,omitempty"` ChunkSize int64 `json:"chunkSize,omitempty"` // ChunkDigest stores an OCI digest of the chunk. This must be formed // as "sha256:0123abcd...". ChunkDigest string `json:"chunkDigest,omitempty"` // contains filtered or unexported fields }
func (e *TOCEntry) ForeachChild(f func(baseName string, ent *TOCEntry) bool)
ForeachChild calls f for each child item. If f returns false, iteration ends. If e is not a directory, f is not called.
func (e *TOCEntry) LookupChild(baseName string) (child *TOCEntry, ok bool)
LookupChild returns the directory e's child by its base name.
func (e *TOCEntry) ModTime() time.Time
ModTime returns the entry's modification time.
func (e *TOCEntry) NextOffset() int64
NextOffset returns the position (relative to the start of the stargz file) of the next gzip boundary after e.Offset.
func (e *TOCEntry) Stat() os.FileInfo
Stat returns a FileInfo value representing e.
TOCEntryVerifier holds verifiers that are usable for verifying chunks contained in a eStargz blob.
type TOCEntryVerifier interface { // Verifier provides a content verifier that can be used for verifying the // contents of the specified TOCEntry. Verifier(ce *TOCEntry) (digest.Verifier, error) }
Telemetry is a struct which defines telemetry hooks. By implementing these hooks you should be able to record the latency metrics of the respective steps of estargz open operation. To be used with estargz.OpenWithTelemetry(...)
type Telemetry struct { GetFooterLatency MeasureLatencyHook // measure time to get stargz footer (in milliseconds) GetTocLatency MeasureLatencyHook // measure time to GET TOC JSON (in milliseconds) DeserializeTocLatency MeasureLatencyHook // measure time to deserialize TOC JSON (in milliseconds) }
TestingController is Compression with some helper methods necessary for testing.
type TestingController interface { Compression TestStreams(t *testing.T, b []byte, streams []int64) DiffIDOf(*testing.T, []byte) string String() string }
type TestingControllerFactory func() TestingController
type WriteFlushCloser interface { io.WriteCloser Flush() error }
A Writer writes stargz files.
Use NewWriter to create a new Writer.
type Writer struct { // ChunkSize optionally controls the maximum number of bytes // of data of a regular file that can be written in one gzip // stream before a new gzip stream is started. // Zero means to use a default, currently 4 MiB. ChunkSize int // MinChunkSize optionally controls the minimum number of bytes // of data must be written in one gzip stream before a new gzip // NOTE: This adds a TOC property that stargz snapshotter < v0.13.0 doesn't understand. MinChunkSize int // contains filtered or unexported fields }
func NewWriter(w io.Writer) *Writer
NewWriter returns a new stargz writer (gzip-based) writing to w.
The writer must be closed to write its trailing table of contents.
func NewWriterLevel(w io.Writer, compressionLevel int) *Writer
NewWriterLevel returns a new stargz writer (gzip-based) writing to w. The compression level is configurable.
The writer must be closed to write its trailing table of contents.
func NewWriterWithCompressor(w io.Writer, c Compressor) *Writer
NewWriterWithCompressor returns a new stargz writer writing to w. The compression method is configurable.
The writer must be closed to write its trailing table of contents.
func (w *Writer) AppendTar(r io.Reader) error
AppendTar reads the tar or tar.gz file from r and appends each of its contents to w.
The input r can optionally be gzip compressed but the output will always be compressed by the specified compressor.
func (w *Writer) AppendTarLossLess(r io.Reader) error
AppendTarLossLess reads the tar or tar.gz file from r and appends each of its contents to w.
The input r can optionally be gzip compressed but the output will always be compressed by the specified compressor.
The difference of this func with AppendTar is that this writes the input tar stream into w without any modification (e.g. to header bytes).
Note that if the input tar stream already contains TOC JSON, this returns error because w cannot overwrite the TOC JSON to the one generated by w without lossy modification. To avoid this error, if the input stream is known to be stargz/estargz, you shoud decompress it and remove TOC JSON in advance.
func (w *Writer) Close() (digest.Digest, error)
Close writes the stargz's table of contents and flushes all the buffers, returning any error.
func (w *Writer) DiffID() string
DiffID returns the SHA-256 of the uncompressed tar bytes. It is only valid to call DiffID after Close.
Name | Synopsis |
---|---|
.. | |
errorutil | |
externaltoc | |
zstdchunked |