1
16
17
22
23 package estargz
24
25 import (
26 "bufio"
27 "bytes"
28 "compress/gzip"
29 "crypto/sha256"
30 "errors"
31 "fmt"
32 "hash"
33 "io"
34 "os"
35 "path"
36 "sort"
37 "strings"
38 "sync"
39 "time"
40
41 "github.com/containerd/stargz-snapshotter/estargz/errorutil"
42 digest "github.com/opencontainers/go-digest"
43 "github.com/vbatts/tar-split/archive/tar"
44 )
45
46
47 type Reader struct {
48 sr *io.SectionReader
49 toc *JTOC
50 tocDigest digest.Digest
51
52
53 m map[string]*TOCEntry
54
55
56
57
58 chunks map[string][]*TOCEntry
59
60 decompressor Decompressor
61 }
62
63 type openOpts struct {
64 tocOffset int64
65 decompressors []Decompressor
66 telemetry *Telemetry
67 }
68
69
70 type OpenOption func(o *openOpts) error
71
72
73 func WithTOCOffset(tocOffset int64) OpenOption {
74 return func(o *openOpts) error {
75 o.tocOffset = tocOffset
76 return nil
77 }
78 }
79
80
81
82 func WithDecompressors(decompressors ...Decompressor) OpenOption {
83 return func(o *openOpts) error {
84 o.decompressors = decompressors
85 return nil
86 }
87 }
88
89
90 func WithTelemetry(telemetry *Telemetry) OpenOption {
91 return func(o *openOpts) error {
92 o.telemetry = telemetry
93 return nil
94 }
95 }
96
97
98 type MeasureLatencyHook func(time.Time)
99
100
101
102 type Telemetry struct {
103 GetFooterLatency MeasureLatencyHook
104 GetTocLatency MeasureLatencyHook
105 DeserializeTocLatency MeasureLatencyHook
106 }
107
108
109
110
111
112 func Open(sr *io.SectionReader, opt ...OpenOption) (*Reader, error) {
113 var opts openOpts
114 for _, o := range opt {
115 if err := o(&opts); err != nil {
116 return nil, err
117 }
118 }
119
120 gzipCompressors := []Decompressor{new(GzipDecompressor), new(LegacyGzipDecompressor)}
121 decompressors := append(gzipCompressors, opts.decompressors...)
122
123
124 fetchSize := maxFooterSize(sr.Size(), decompressors...)
125 if maybeTocOffset := opts.tocOffset; maybeTocOffset > fetchSize {
126 if maybeTocOffset > sr.Size() {
127 return nil, fmt.Errorf("blob size %d is smaller than the toc offset", sr.Size())
128 }
129 fetchSize = sr.Size() - maybeTocOffset
130 }
131
132 start := time.Now()
133 footer := make([]byte, fetchSize)
134 if _, err := sr.ReadAt(footer, sr.Size()-fetchSize); err != nil {
135 return nil, fmt.Errorf("error reading footer: %v", err)
136 }
137 if opts.telemetry != nil && opts.telemetry.GetFooterLatency != nil {
138 opts.telemetry.GetFooterLatency(start)
139 }
140
141 var allErr []error
142 var found bool
143 var r *Reader
144 for _, d := range decompressors {
145 fSize := d.FooterSize()
146 fOffset := positive(int64(len(footer)) - fSize)
147 maybeTocBytes := footer[:fOffset]
148 _, tocOffset, tocSize, err := d.ParseFooter(footer[fOffset:])
149 if err != nil {
150 allErr = append(allErr, err)
151 continue
152 }
153 if tocOffset >= 0 && tocSize <= 0 {
154 tocSize = sr.Size() - tocOffset - fSize
155 }
156 if tocOffset >= 0 && tocSize < int64(len(maybeTocBytes)) {
157 maybeTocBytes = maybeTocBytes[:tocSize]
158 }
159 r, err = parseTOC(d, sr, tocOffset, tocSize, maybeTocBytes, opts)
160 if err == nil {
161 found = true
162 break
163 }
164 allErr = append(allErr, err)
165 }
166 if !found {
167 return nil, errorutil.Aggregate(allErr)
168 }
169 if err := r.initFields(); err != nil {
170 return nil, fmt.Errorf("failed to initialize fields of entries: %v", err)
171 }
172 return r, nil
173 }
174
175
176
177 func OpenFooter(sr *io.SectionReader) (tocOffset int64, footerSize int64, rErr error) {
178 if sr.Size() < FooterSize && sr.Size() < legacyFooterSize {
179 return 0, 0, fmt.Errorf("blob size %d is smaller than the footer size", sr.Size())
180 }
181 var footer [FooterSize]byte
182 if _, err := sr.ReadAt(footer[:], sr.Size()-FooterSize); err != nil {
183 return 0, 0, fmt.Errorf("error reading footer: %v", err)
184 }
185 var allErr []error
186 for _, d := range []Decompressor{new(GzipDecompressor), new(LegacyGzipDecompressor)} {
187 fSize := d.FooterSize()
188 fOffset := positive(int64(len(footer)) - fSize)
189 _, tocOffset, _, err := d.ParseFooter(footer[fOffset:])
190 if err == nil {
191 return tocOffset, fSize, err
192 }
193 allErr = append(allErr, err)
194 }
195 return 0, 0, errorutil.Aggregate(allErr)
196 }
197
198
199
200
201
202
203 func (r *Reader) initFields() error {
204 r.m = make(map[string]*TOCEntry, len(r.toc.Entries))
205 r.chunks = make(map[string][]*TOCEntry)
206 var lastPath string
207 uname := map[int]string{}
208 gname := map[int]string{}
209 var lastRegEnt *TOCEntry
210 var chunkTopIndex int
211 for i, ent := range r.toc.Entries {
212 ent.Name = cleanEntryName(ent.Name)
213 switch ent.Type {
214 case "reg", "chunk":
215 if ent.Offset != r.toc.Entries[chunkTopIndex].Offset {
216 chunkTopIndex = i
217 }
218 ent.chunkTopIndex = chunkTopIndex
219 }
220 if ent.Type == "reg" {
221 lastRegEnt = ent
222 }
223 if ent.Type == "chunk" {
224 ent.Name = lastPath
225 r.chunks[ent.Name] = append(r.chunks[ent.Name], ent)
226 if ent.ChunkSize == 0 && lastRegEnt != nil {
227 ent.ChunkSize = lastRegEnt.Size - ent.ChunkOffset
228 }
229 } else {
230 lastPath = ent.Name
231
232 if ent.Uname != "" {
233 uname[ent.UID] = ent.Uname
234 } else {
235 ent.Uname = uname[ent.UID]
236 }
237 if ent.Gname != "" {
238 gname[ent.GID] = ent.Gname
239 } else {
240 ent.Gname = uname[ent.GID]
241 }
242
243 ent.modTime, _ = time.Parse(time.RFC3339, ent.ModTime3339)
244
245 if ent.Type == "dir" {
246 ent.NumLink++
247 }
248 r.m[ent.Name] = ent
249 }
250 if ent.Type == "reg" && ent.ChunkSize > 0 && ent.ChunkSize < ent.Size {
251 r.chunks[ent.Name] = make([]*TOCEntry, 0, ent.Size/ent.ChunkSize+1)
252 r.chunks[ent.Name] = append(r.chunks[ent.Name], ent)
253 }
254 if ent.ChunkSize == 0 && ent.Size != 0 {
255 ent.ChunkSize = ent.Size
256 }
257 }
258
259
260 for _, ent := range r.toc.Entries {
261 if ent.Type == "chunk" {
262 continue
263 }
264
265
266
267
268
269
270
271
272
273
274
275
276
277 name := ent.Name
278 pdirName := parentDir(name)
279 if name == pdirName {
280
281
282
283
284 continue
285 }
286 pdir := r.getOrCreateDir(pdirName)
287 ent.NumLink++
288 if ent.Type == "hardlink" {
289 org, err := r.getSource(ent)
290 if err != nil {
291 return err
292 }
293 org.NumLink++
294 ent = org
295 }
296 pdir.addChild(path.Base(name), ent)
297 }
298
299 lastOffset := r.sr.Size()
300 for i := len(r.toc.Entries) - 1; i >= 0; i-- {
301 e := r.toc.Entries[i]
302 if e.isDataType() {
303 e.nextOffset = lastOffset
304 }
305 if e.Offset != 0 && e.InnerOffset == 0 {
306 lastOffset = e.Offset
307 }
308 }
309
310 return nil
311 }
312
313 func (r *Reader) getSource(ent *TOCEntry) (_ *TOCEntry, err error) {
314 if ent.Type == "hardlink" {
315 org, ok := r.m[cleanEntryName(ent.LinkName)]
316 if !ok {
317 return nil, fmt.Errorf("%q is a hardlink but the linkname %q isn't found", ent.Name, ent.LinkName)
318 }
319 ent, err = r.getSource(org)
320 if err != nil {
321 return nil, err
322 }
323 }
324 return ent, nil
325 }
326
327 func parentDir(p string) string {
328 dir, _ := path.Split(p)
329 return strings.TrimSuffix(dir, "/")
330 }
331
332 func (r *Reader) getOrCreateDir(d string) *TOCEntry {
333 e, ok := r.m[d]
334 if !ok {
335 e = &TOCEntry{
336 Name: d,
337 Type: "dir",
338 Mode: 0755,
339 NumLink: 2,
340 }
341 r.m[d] = e
342 if d != "" {
343 pdir := r.getOrCreateDir(parentDir(d))
344 pdir.addChild(path.Base(d), e)
345 }
346 }
347 return e
348 }
349
350 func (r *Reader) TOCDigest() digest.Digest {
351 return r.tocDigest
352 }
353
354
355
356
357
358 func (r *Reader) VerifyTOC(tocDigest digest.Digest) (TOCEntryVerifier, error) {
359
360 if r.tocDigest != tocDigest {
361 return nil, fmt.Errorf("invalid TOC JSON %q; want %q", r.tocDigest, tocDigest)
362 }
363 return r.Verifiers()
364 }
365
366
367
368 func (r *Reader) Verifiers() (TOCEntryVerifier, error) {
369 chunkDigestMap := make(map[int64]digest.Digest)
370 regDigestMap := make(map[int64]digest.Digest)
371 var chunkDigestMapIncomplete bool
372 var regDigestMapIncomplete bool
373 var containsChunk bool
374 for _, e := range r.toc.Entries {
375 if e.Type != "reg" && e.Type != "chunk" {
376 continue
377 }
378
379
380 _, dOK := chunkDigestMap[e.Offset]
381 _, rOK := regDigestMap[e.Offset]
382 if dOK || rOK {
383 return nil, fmt.Errorf("offset %d found twice", e.Offset)
384 }
385
386 if e.Type == "reg" {
387 if e.Size == 0 {
388 continue
389 }
390
391
392 if e.Digest != "" {
393 d, err := digest.Parse(e.Digest)
394 if err != nil {
395 return nil, fmt.Errorf("failed to parse regular file digest %q: %w", e.Digest, err)
396 }
397 regDigestMap[e.Offset] = d
398 } else {
399 regDigestMapIncomplete = true
400 }
401 } else {
402 containsChunk = true
403 }
404
405
406
407 if e.ChunkDigest != "" {
408 d, err := digest.Parse(e.ChunkDigest)
409 if err != nil {
410 return nil, fmt.Errorf("failed to parse chunk digest %q: %w", e.ChunkDigest, err)
411 }
412 chunkDigestMap[e.Offset] = d
413 } else {
414 chunkDigestMapIncomplete = true
415 }
416 }
417
418 if chunkDigestMapIncomplete {
419
420
421 if !containsChunk && !regDigestMapIncomplete {
422 return &verifier{digestMap: regDigestMap}, nil
423 }
424 return nil, fmt.Errorf("some ChunkDigest not found in TOC JSON")
425 }
426
427 return &verifier{digestMap: chunkDigestMap}, nil
428 }
429
430
431
432 type verifier struct {
433 digestMap map[int64]digest.Digest
434 digestMapMu sync.Mutex
435 }
436
437
438 func (v *verifier) Verifier(ce *TOCEntry) (digest.Verifier, error) {
439 v.digestMapMu.Lock()
440 defer v.digestMapMu.Unlock()
441 d, ok := v.digestMap[ce.Offset]
442 if !ok {
443 return nil, fmt.Errorf("verifier for offset=%d,size=%d hasn't been registered",
444 ce.Offset, ce.ChunkSize)
445 }
446 return d.Verifier(), nil
447 }
448
449
450
451
452 func (r *Reader) ChunkEntryForOffset(name string, offset int64) (e *TOCEntry, ok bool) {
453 name = cleanEntryName(name)
454 e, ok = r.Lookup(name)
455 if !ok || !e.isDataType() {
456 return nil, false
457 }
458 ents := r.chunks[name]
459 if len(ents) < 2 {
460 if offset >= e.ChunkSize {
461 return nil, false
462 }
463 return e, true
464 }
465 i := sort.Search(len(ents), func(i int) bool {
466 e := ents[i]
467 return e.ChunkOffset >= offset || (offset > e.ChunkOffset && offset < e.ChunkOffset+e.ChunkSize)
468 })
469 if i == len(ents) {
470 return nil, false
471 }
472 return ents[i], true
473 }
474
475
476
477
478
479 func (r *Reader) Lookup(path string) (e *TOCEntry, ok bool) {
480 path = cleanEntryName(path)
481 if r == nil {
482 return
483 }
484 e, ok = r.m[path]
485 if ok && e.Type == "hardlink" {
486 var err error
487 e, err = r.getSource(e)
488 if err != nil {
489 return nil, false
490 }
491 }
492 return
493 }
494
495
496
497
498 func (r *Reader) OpenFile(name string) (*io.SectionReader, error) {
499 fr, err := r.newFileReader(name)
500 if err != nil {
501 return nil, err
502 }
503 return io.NewSectionReader(fr, 0, fr.size), nil
504 }
505
506 func (r *Reader) newFileReader(name string) (*fileReader, error) {
507 name = cleanEntryName(name)
508 ent, ok := r.Lookup(name)
509 if !ok {
510
511 return nil, &os.PathError{
512 Path: name,
513 Op: "OpenFile",
514 Err: os.ErrNotExist,
515 }
516 }
517 if ent.Type != "reg" {
518 return nil, &os.PathError{
519 Path: name,
520 Op: "OpenFile",
521 Err: errors.New("not a regular file"),
522 }
523 }
524 return &fileReader{
525 r: r,
526 size: ent.Size,
527 ents: r.getChunks(ent),
528 }, nil
529 }
530
531 func (r *Reader) OpenFileWithPreReader(name string, preRead func(*TOCEntry, io.Reader) error) (*io.SectionReader, error) {
532 fr, err := r.newFileReader(name)
533 if err != nil {
534 return nil, err
535 }
536 fr.preRead = preRead
537 return io.NewSectionReader(fr, 0, fr.size), nil
538 }
539
540 func (r *Reader) getChunks(ent *TOCEntry) []*TOCEntry {
541 if ents, ok := r.chunks[ent.Name]; ok {
542 return ents
543 }
544 return []*TOCEntry{ent}
545 }
546
547 type fileReader struct {
548 r *Reader
549 size int64
550 ents []*TOCEntry
551 preRead func(*TOCEntry, io.Reader) error
552 }
553
554 func (fr *fileReader) ReadAt(p []byte, off int64) (n int, err error) {
555 if off >= fr.size {
556 return 0, io.EOF
557 }
558 if off < 0 {
559 return 0, errors.New("invalid offset")
560 }
561 var i int
562 if len(fr.ents) > 1 {
563 i = sort.Search(len(fr.ents), func(i int) bool {
564 return fr.ents[i].ChunkOffset >= off
565 })
566 if i == len(fr.ents) {
567 i = len(fr.ents) - 1
568 }
569 }
570 ent := fr.ents[i]
571 if ent.ChunkOffset > off {
572 if i == 0 {
573 return 0, errors.New("internal error; first chunk offset is non-zero")
574 }
575 ent = fr.ents[i-1]
576 }
577
578
579
580 off -= ent.ChunkOffset
581
582 finalEnt := fr.ents[len(fr.ents)-1]
583 compressedOff := ent.Offset
584
585
586 compressedBytesRemain := finalEnt.NextOffset() - compressedOff
587
588 sr := io.NewSectionReader(fr.r.sr, compressedOff, compressedBytesRemain)
589
590 const maxRead = 2 << 20
591 var bufSize = maxRead
592 if compressedBytesRemain < maxRead {
593 bufSize = int(compressedBytesRemain)
594 }
595
596 br := bufio.NewReaderSize(sr, bufSize)
597 if _, err := br.Peek(bufSize); err != nil {
598 return 0, fmt.Errorf("fileReader.ReadAt.peek: %v", err)
599 }
600
601 dr, err := fr.r.decompressor.Reader(br)
602 if err != nil {
603 return 0, fmt.Errorf("fileReader.ReadAt.decompressor.Reader: %v", err)
604 }
605 defer dr.Close()
606
607 if fr.preRead == nil {
608 if n, err := io.CopyN(io.Discard, dr, ent.InnerOffset+off); n != ent.InnerOffset+off || err != nil {
609 return 0, fmt.Errorf("discard of %d bytes != %v, %v", ent.InnerOffset+off, n, err)
610 }
611 return io.ReadFull(dr, p)
612 }
613
614 var retN int
615 var retErr error
616 var found bool
617 var nr int64
618 for _, e := range fr.r.toc.Entries[ent.chunkTopIndex:] {
619 if !e.isDataType() {
620 continue
621 }
622 if e.Offset != fr.r.toc.Entries[ent.chunkTopIndex].Offset {
623 break
624 }
625 if in, err := io.CopyN(io.Discard, dr, e.InnerOffset-nr); err != nil || in != e.InnerOffset-nr {
626 return 0, fmt.Errorf("discard of remaining %d bytes != %v, %v", e.InnerOffset-nr, in, err)
627 }
628 nr = e.InnerOffset
629 if e == ent {
630 found = true
631 if n, err := io.CopyN(io.Discard, dr, off); n != off || err != nil {
632 return 0, fmt.Errorf("discard of offset %d bytes != %v, %v", off, n, err)
633 }
634 retN, retErr = io.ReadFull(dr, p)
635 nr += off + int64(retN)
636 continue
637 }
638 cr := &countReader{r: io.LimitReader(dr, e.ChunkSize)}
639 if err := fr.preRead(e, cr); err != nil {
640 return 0, fmt.Errorf("failed to pre read: %w", err)
641 }
642 nr += cr.n
643 }
644 if !found {
645 return 0, fmt.Errorf("fileReader.ReadAt: target entry not found")
646 }
647 return retN, retErr
648 }
649
650
651
652
653 type Writer struct {
654 bw *bufio.Writer
655 cw *countWriter
656 toc *JTOC
657 diffHash hash.Hash
658
659 closed bool
660 gz io.WriteCloser
661 lastUsername map[int]string
662 lastGroupname map[int]string
663 compressor Compressor
664
665 uncompressedCounter *countWriteFlusher
666
667
668
669
670
671 ChunkSize int
672
673
674
675
676 MinChunkSize int
677
678 needsOpenGzEntries map[string]struct{}
679 }
680
681
682
683
684
685
686 type currentCompressionWriter struct{ w *Writer }
687
688 func (ccw currentCompressionWriter) Write(p []byte) (int, error) {
689 ccw.w.diffHash.Write(p)
690 if ccw.w.gz == nil {
691 if err := ccw.w.condOpenGz(); err != nil {
692 return 0, err
693 }
694 }
695 return ccw.w.gz.Write(p)
696 }
697
698 func (w *Writer) chunkSize() int {
699 if w.ChunkSize <= 0 {
700 return 4 << 20
701 }
702 return w.ChunkSize
703 }
704
705
706
707 func Unpack(sr *io.SectionReader, c Decompressor) (io.ReadCloser, error) {
708 footerSize := c.FooterSize()
709 if sr.Size() < footerSize {
710 return nil, fmt.Errorf("blob is too small; %d < %d", sr.Size(), footerSize)
711 }
712 footerOffset := sr.Size() - footerSize
713 footer := make([]byte, footerSize)
714 if _, err := sr.ReadAt(footer, footerOffset); err != nil {
715 return nil, err
716 }
717 blobPayloadSize, _, _, err := c.ParseFooter(footer)
718 if err != nil {
719 return nil, fmt.Errorf("failed to parse footer: %w", err)
720 }
721 if blobPayloadSize < 0 {
722 blobPayloadSize = sr.Size()
723 }
724 return c.Reader(io.LimitReader(sr, blobPayloadSize))
725 }
726
727
728
729
730 func NewWriter(w io.Writer) *Writer {
731 return NewWriterLevel(w, gzip.BestCompression)
732 }
733
734
735
736
737
738 func NewWriterLevel(w io.Writer, compressionLevel int) *Writer {
739 return NewWriterWithCompressor(w, NewGzipCompressorWithLevel(compressionLevel))
740 }
741
742
743
744
745
746 func NewWriterWithCompressor(w io.Writer, c Compressor) *Writer {
747 bw := bufio.NewWriter(w)
748 cw := &countWriter{w: bw}
749 return &Writer{
750 bw: bw,
751 cw: cw,
752 toc: &JTOC{Version: 1},
753 diffHash: sha256.New(),
754 compressor: c,
755 uncompressedCounter: &countWriteFlusher{},
756 }
757 }
758
759
760
761 func (w *Writer) Close() (digest.Digest, error) {
762 if w.closed {
763 return "", nil
764 }
765 defer func() { w.closed = true }()
766
767 if err := w.closeGz(); err != nil {
768 return "", err
769 }
770
771
772 tocDigest, err := w.compressor.WriteTOCAndFooter(w.cw, w.cw.n, w.toc, w.diffHash)
773 if err != nil {
774 return "", err
775 }
776 if err := w.bw.Flush(); err != nil {
777 return "", err
778 }
779
780 return tocDigest, nil
781 }
782
783 func (w *Writer) closeGz() error {
784 if w.closed {
785 return errors.New("write on closed Writer")
786 }
787 if w.gz != nil {
788 if err := w.gz.Close(); err != nil {
789 return err
790 }
791 w.gz = nil
792 }
793 return nil
794 }
795
796 func (w *Writer) flushGz() error {
797 if w.closed {
798 return errors.New("flush on closed Writer")
799 }
800 if w.gz != nil {
801 if f, ok := w.gz.(interface {
802 Flush() error
803 }); ok {
804 return f.Flush()
805 }
806 }
807 return nil
808 }
809
810
811
812 func (w *Writer) nameIfChanged(mp *map[int]string, id int, name string) string {
813 if name == "" {
814 return ""
815 }
816 if *mp == nil {
817 *mp = make(map[int]string)
818 }
819 if (*mp)[id] == name {
820 return ""
821 }
822 (*mp)[id] = name
823 return name
824 }
825
826 func (w *Writer) condOpenGz() (err error) {
827 if w.gz == nil {
828 w.gz, err = w.compressor.Writer(w.cw)
829 if w.gz != nil {
830 w.gz = w.uncompressedCounter.register(w.gz)
831 }
832 }
833 return
834 }
835
836
837
838
839
840
841 func (w *Writer) AppendTar(r io.Reader) error {
842 return w.appendTar(r, false)
843 }
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858 func (w *Writer) AppendTarLossLess(r io.Reader) error {
859 return w.appendTar(r, true)
860 }
861
862 func (w *Writer) appendTar(r io.Reader, lossless bool) error {
863 var src io.Reader
864 br := bufio.NewReader(r)
865 if isGzip(br) {
866 zr, _ := gzip.NewReader(br)
867 src = zr
868 } else {
869 src = io.Reader(br)
870 }
871 dst := currentCompressionWriter{w}
872 var tw *tar.Writer
873 if !lossless {
874 tw = tar.NewWriter(dst)
875 }
876 tr := tar.NewReader(src)
877 if lossless {
878 tr.RawAccounting = true
879 }
880 prevOffset := w.cw.n
881 var prevOffsetUncompressed int64
882 for {
883 h, err := tr.Next()
884 if err == io.EOF {
885 if lossless {
886 if remain := tr.RawBytes(); len(remain) > 0 {
887
888
889 if _, err := dst.Write(remain); err != nil {
890 return err
891 }
892 }
893 }
894 break
895 }
896 if err != nil {
897 return fmt.Errorf("error reading from source tar: tar.Reader.Next: %v", err)
898 }
899 if cleanEntryName(h.Name) == TOCTarName {
900
901
902
903 if lossless {
904
905 return fmt.Errorf("existing TOC JSON is not allowed; decompress layer before append")
906 }
907 continue
908 }
909
910 xattrs := make(map[string][]byte)
911 const xattrPAXRecordsPrefix = "SCHILY.xattr."
912 if h.PAXRecords != nil {
913 for k, v := range h.PAXRecords {
914 if strings.HasPrefix(k, xattrPAXRecordsPrefix) {
915 xattrs[k[len(xattrPAXRecordsPrefix):]] = []byte(v)
916 }
917 }
918 }
919 ent := &TOCEntry{
920 Name: h.Name,
921 Mode: h.Mode,
922 UID: h.Uid,
923 GID: h.Gid,
924 Uname: w.nameIfChanged(&w.lastUsername, h.Uid, h.Uname),
925 Gname: w.nameIfChanged(&w.lastGroupname, h.Gid, h.Gname),
926 ModTime3339: formatModtime(h.ModTime),
927 Xattrs: xattrs,
928 }
929 if err := w.condOpenGz(); err != nil {
930 return err
931 }
932 if tw != nil {
933 if err := tw.WriteHeader(h); err != nil {
934 return err
935 }
936 } else {
937 if _, err := dst.Write(tr.RawBytes()); err != nil {
938 return err
939 }
940 }
941 switch h.Typeflag {
942 case tar.TypeLink:
943 ent.Type = "hardlink"
944 ent.LinkName = h.Linkname
945 case tar.TypeSymlink:
946 ent.Type = "symlink"
947 ent.LinkName = h.Linkname
948 case tar.TypeDir:
949 ent.Type = "dir"
950 case tar.TypeReg:
951 ent.Type = "reg"
952 ent.Size = h.Size
953 case tar.TypeChar:
954 ent.Type = "char"
955 ent.DevMajor = int(h.Devmajor)
956 ent.DevMinor = int(h.Devminor)
957 case tar.TypeBlock:
958 ent.Type = "block"
959 ent.DevMajor = int(h.Devmajor)
960 ent.DevMinor = int(h.Devminor)
961 case tar.TypeFifo:
962 ent.Type = "fifo"
963 default:
964 return fmt.Errorf("unsupported input tar entry %q", h.Typeflag)
965 }
966
967
968
969 var regFileEntry *TOCEntry
970 var payloadDigest digest.Digester
971 if h.Typeflag == tar.TypeReg {
972 regFileEntry = ent
973 payloadDigest = digest.Canonical.Digester()
974 }
975
976 if h.Typeflag == tar.TypeReg && ent.Size > 0 {
977 var written int64
978 totalSize := ent.Size
979 tee := io.TeeReader(tr, payloadDigest.Hash())
980 for written < totalSize {
981 chunkSize := int64(w.chunkSize())
982 remain := totalSize - written
983 if remain < chunkSize {
984 chunkSize = remain
985 } else {
986 ent.ChunkSize = chunkSize
987 }
988
989
990 if err := w.flushGz(); err != nil {
991 return err
992 }
993 if w.needsOpenGz(ent) || w.cw.n-prevOffset >= int64(w.MinChunkSize) {
994 if err := w.closeGz(); err != nil {
995 return err
996 }
997 ent.Offset = w.cw.n
998 prevOffset = ent.Offset
999 prevOffsetUncompressed = w.uncompressedCounter.n
1000 } else {
1001 ent.Offset = prevOffset
1002 ent.InnerOffset = w.uncompressedCounter.n - prevOffsetUncompressed
1003 }
1004
1005 ent.ChunkOffset = written
1006 chunkDigest := digest.Canonical.Digester()
1007
1008 if err := w.condOpenGz(); err != nil {
1009 return err
1010 }
1011
1012 teeChunk := io.TeeReader(tee, chunkDigest.Hash())
1013 var out io.Writer
1014 if tw != nil {
1015 out = tw
1016 } else {
1017 out = dst
1018 }
1019 if _, err := io.CopyN(out, teeChunk, chunkSize); err != nil {
1020 return fmt.Errorf("error copying %q: %v", h.Name, err)
1021 }
1022 ent.ChunkDigest = chunkDigest.Digest().String()
1023 w.toc.Entries = append(w.toc.Entries, ent)
1024 written += chunkSize
1025 ent = &TOCEntry{
1026 Name: h.Name,
1027 Type: "chunk",
1028 }
1029 }
1030 } else {
1031 w.toc.Entries = append(w.toc.Entries, ent)
1032 }
1033 if payloadDigest != nil {
1034 regFileEntry.Digest = payloadDigest.Digest().String()
1035 }
1036 if tw != nil {
1037 if err := tw.Flush(); err != nil {
1038 return err
1039 }
1040 }
1041 }
1042 remainDest := io.Discard
1043 if lossless {
1044 remainDest = dst
1045 }
1046 _, err := io.Copy(remainDest, src)
1047 return err
1048 }
1049
1050 func (w *Writer) needsOpenGz(ent *TOCEntry) bool {
1051 if ent.Type != "reg" {
1052 return false
1053 }
1054 if w.needsOpenGzEntries == nil {
1055 return false
1056 }
1057 _, ok := w.needsOpenGzEntries[ent.Name]
1058 return ok
1059 }
1060
1061
1062
1063 func (w *Writer) DiffID() string {
1064 return fmt.Sprintf("sha256:%x", w.diffHash.Sum(nil))
1065 }
1066
1067 func maxFooterSize(blobSize int64, decompressors ...Decompressor) (res int64) {
1068 for _, d := range decompressors {
1069 if s := d.FooterSize(); res < s && s <= blobSize {
1070 res = s
1071 }
1072 }
1073 return
1074 }
1075
1076 func parseTOC(d Decompressor, sr *io.SectionReader, tocOff, tocSize int64, tocBytes []byte, opts openOpts) (*Reader, error) {
1077 if tocOff < 0 {
1078
1079
1080
1081 start := time.Now()
1082 toc, tocDgst, err := d.ParseTOC(nil)
1083 if err != nil {
1084 return nil, err
1085 }
1086 if opts.telemetry != nil && opts.telemetry.GetTocLatency != nil {
1087 opts.telemetry.GetTocLatency(start)
1088 }
1089 if opts.telemetry != nil && opts.telemetry.DeserializeTocLatency != nil {
1090 opts.telemetry.DeserializeTocLatency(start)
1091 }
1092 return &Reader{
1093 sr: sr,
1094 toc: toc,
1095 tocDigest: tocDgst,
1096 decompressor: d,
1097 }, nil
1098 }
1099 if len(tocBytes) > 0 {
1100 start := time.Now()
1101 toc, tocDgst, err := d.ParseTOC(bytes.NewReader(tocBytes))
1102 if err == nil {
1103 if opts.telemetry != nil && opts.telemetry.DeserializeTocLatency != nil {
1104 opts.telemetry.DeserializeTocLatency(start)
1105 }
1106 return &Reader{
1107 sr: sr,
1108 toc: toc,
1109 tocDigest: tocDgst,
1110 decompressor: d,
1111 }, nil
1112 }
1113 }
1114
1115 start := time.Now()
1116 tocBytes = make([]byte, tocSize)
1117 if _, err := sr.ReadAt(tocBytes, tocOff); err != nil {
1118 return nil, fmt.Errorf("error reading %d byte TOC targz: %v", len(tocBytes), err)
1119 }
1120 if opts.telemetry != nil && opts.telemetry.GetTocLatency != nil {
1121 opts.telemetry.GetTocLatency(start)
1122 }
1123 start = time.Now()
1124 toc, tocDgst, err := d.ParseTOC(bytes.NewReader(tocBytes))
1125 if err != nil {
1126 return nil, err
1127 }
1128 if opts.telemetry != nil && opts.telemetry.DeserializeTocLatency != nil {
1129 opts.telemetry.DeserializeTocLatency(start)
1130 }
1131 return &Reader{
1132 sr: sr,
1133 toc: toc,
1134 tocDigest: tocDgst,
1135 decompressor: d,
1136 }, nil
1137 }
1138
1139 func formatModtime(t time.Time) string {
1140 if t.IsZero() || t.Unix() == 0 {
1141 return ""
1142 }
1143 return t.UTC().Round(time.Second).Format(time.RFC3339)
1144 }
1145
1146 func cleanEntryName(name string) string {
1147
1148 return strings.TrimPrefix(path.Clean("/"+name), "/")
1149 }
1150
1151
1152
1153 type countWriter struct {
1154 w io.Writer
1155 n int64
1156 }
1157
1158 func (cw *countWriter) Write(p []byte) (n int, err error) {
1159 n, err = cw.w.Write(p)
1160 cw.n += int64(n)
1161 return
1162 }
1163
1164 type countWriteFlusher struct {
1165 io.WriteCloser
1166 n int64
1167 }
1168
1169 func (wc *countWriteFlusher) register(w io.WriteCloser) io.WriteCloser {
1170 wc.WriteCloser = w
1171 return wc
1172 }
1173
1174 func (wc *countWriteFlusher) Write(p []byte) (n int, err error) {
1175 n, err = wc.WriteCloser.Write(p)
1176 wc.n += int64(n)
1177 return
1178 }
1179
1180 func (wc *countWriteFlusher) Flush() error {
1181 if f, ok := wc.WriteCloser.(interface {
1182 Flush() error
1183 }); ok {
1184 return f.Flush()
1185 }
1186 return nil
1187 }
1188
1189 func (wc *countWriteFlusher) Close() error {
1190 err := wc.WriteCloser.Close()
1191 wc.WriteCloser = nil
1192 return err
1193 }
1194
1195
1196
1197 func isGzip(br *bufio.Reader) bool {
1198 const (
1199 gzipID1 = 0x1f
1200 gzipID2 = 0x8b
1201 gzipDeflate = 8
1202 )
1203 peek, _ := br.Peek(3)
1204 return len(peek) >= 3 && peek[0] == gzipID1 && peek[1] == gzipID2 && peek[2] == gzipDeflate
1205 }
1206
1207 func positive(n int64) int64 {
1208 if n < 0 {
1209 return 0
1210 }
1211 return n
1212 }
1213
1214 type countReader struct {
1215 r io.Reader
1216 n int64
1217 }
1218
1219 func (cr *countReader) Read(p []byte) (n int, err error) {
1220 n, err = cr.r.Read(p)
1221 cr.n += int64(n)
1222 return
1223 }
1224
View as plain text