1
2
3
4
5 package zip
6
7 import (
8 "bufio"
9 "encoding/binary"
10 "errors"
11 "hash"
12 "hash/crc32"
13 "io"
14 "io/fs"
15 "os"
16 "path"
17 "sort"
18 "strings"
19 "sync"
20 "time"
21 )
22
23 var (
24 ErrFormat = errors.New("zip: not a valid zip file")
25 ErrAlgorithm = errors.New("zip: unsupported compression algorithm")
26 ErrChecksum = errors.New("zip: checksum error")
27 )
28
29
30 type Reader struct {
31 r io.ReaderAt
32 File []*File
33 Comment string
34 decompressors map[uint16]Decompressor
35
36
37
38 baseOffset int64
39
40
41
42 fileListOnce sync.Once
43 fileList []fileListEntry
44 }
45
46
47 type ReadCloser struct {
48 f *os.File
49 Reader
50 }
51
52
53
54
55 type File struct {
56 FileHeader
57 zip *Reader
58 zipr io.ReaderAt
59 headerOffset int64
60 zip64 bool
61 }
62
63
64 func OpenReader(name string) (*ReadCloser, error) {
65 f, err := os.Open(name)
66 if err != nil {
67 return nil, err
68 }
69 fi, err := f.Stat()
70 if err != nil {
71 f.Close()
72 return nil, err
73 }
74 r := new(ReadCloser)
75 if err := r.init(f, fi.Size()); err != nil {
76 f.Close()
77 return nil, err
78 }
79 r.f = f
80 return r, nil
81 }
82
83
84
85 func NewReader(r io.ReaderAt, size int64) (*Reader, error) {
86 if size < 0 {
87 return nil, errors.New("zip: size cannot be negative")
88 }
89 zr := new(Reader)
90 if err := zr.init(r, size); err != nil {
91 return nil, err
92 }
93 return zr, nil
94 }
95
96 func (z *Reader) init(r io.ReaderAt, size int64) error {
97 end, baseOffset, err := readDirectoryEnd(r, size)
98 if err != nil {
99 return err
100 }
101 z.r = r
102 z.baseOffset = baseOffset
103
104
105
106
107
108
109 if end.directorySize < uint64(size) && (uint64(size)-end.directorySize)/30 >= end.directoryRecords {
110 z.File = make([]*File, 0, end.directoryRecords)
111 }
112 z.Comment = end.comment
113 rs := io.NewSectionReader(r, 0, size)
114 if _, err = rs.Seek(z.baseOffset+int64(end.directoryOffset), io.SeekStart); err != nil {
115 return err
116 }
117 buf := bufio.NewReader(rs)
118
119
120
121
122
123 for {
124 f := &File{zip: z, zipr: r}
125 err = readDirectoryHeader(f, buf)
126
127
128
129
130
131 if err == ErrFormat && z.baseOffset != 0 && len(z.File) == 0 {
132 z.baseOffset = 0
133 if _, err = rs.Seek(int64(end.directoryOffset), io.SeekStart); err != nil {
134 return err
135 }
136 buf.Reset(rs)
137 continue
138 }
139
140 if err == ErrFormat || err == io.ErrUnexpectedEOF {
141 break
142 }
143 if err != nil {
144 return err
145 }
146 f.headerOffset += z.baseOffset
147 z.File = append(z.File, f)
148 }
149 if uint16(len(z.File)) != uint16(end.directoryRecords) {
150
151
152 return err
153 }
154 return nil
155 }
156
157
158
159
160 func (z *Reader) RegisterDecompressor(method uint16, dcomp Decompressor) {
161 if z.decompressors == nil {
162 z.decompressors = make(map[uint16]Decompressor)
163 }
164 z.decompressors[method] = dcomp
165 }
166
167 func (z *Reader) decompressor(method uint16) Decompressor {
168 dcomp := z.decompressors[method]
169 if dcomp == nil {
170 dcomp = decompressor(method)
171 }
172 return dcomp
173 }
174
175
176 func (rc *ReadCloser) Close() error {
177 return rc.f.Close()
178 }
179
180
181
182
183
184
185 func (f *File) DataOffset() (offset int64, err error) {
186 bodyOffset, err := f.findBodyOffset()
187 if err != nil {
188 return
189 }
190 return f.headerOffset + bodyOffset, nil
191 }
192
193
194
195 func (f *File) Open() (io.ReadCloser, error) {
196 bodyOffset, err := f.findBodyOffset()
197 if err != nil {
198 return nil, err
199 }
200 size := int64(f.CompressedSize64)
201 r := io.NewSectionReader(f.zipr, f.headerOffset+bodyOffset, size)
202 dcomp := f.zip.decompressor(f.Method)
203 if dcomp == nil {
204 return nil, ErrAlgorithm
205 }
206 var rc io.ReadCloser = dcomp(r)
207 var desr io.Reader
208 if f.hasDataDescriptor() {
209 desr = io.NewSectionReader(f.zipr, f.headerOffset+bodyOffset+size, dataDescriptorLen)
210 }
211 rc = &checksumReader{
212 rc: rc,
213 hash: crc32.NewIEEE(),
214 f: f,
215 desr: desr,
216 }
217 return rc, nil
218 }
219
220
221
222 func (f *File) OpenRaw() (io.Reader, error) {
223 bodyOffset, err := f.findBodyOffset()
224 if err != nil {
225 return nil, err
226 }
227 r := io.NewSectionReader(f.zipr, f.headerOffset+bodyOffset, int64(f.CompressedSize64))
228 return r, nil
229 }
230
231 type checksumReader struct {
232 rc io.ReadCloser
233 hash hash.Hash32
234 nread uint64
235 f *File
236 desr io.Reader
237 err error
238 }
239
240 func (r *checksumReader) Stat() (fs.FileInfo, error) {
241 return headerFileInfo{&r.f.FileHeader}, nil
242 }
243
244 func (r *checksumReader) Read(b []byte) (n int, err error) {
245 if r.err != nil {
246 return 0, r.err
247 }
248 n, err = r.rc.Read(b)
249 r.hash.Write(b[:n])
250 r.nread += uint64(n)
251 if r.nread > r.f.UncompressedSize64 {
252 return 0, ErrFormat
253 }
254 if err == nil {
255 return
256 }
257 if err == io.EOF {
258 if r.nread != r.f.UncompressedSize64 {
259 return 0, io.ErrUnexpectedEOF
260 }
261 if r.desr != nil {
262 if err1 := readDataDescriptor(r.desr, r.f); err1 != nil {
263 if err1 == io.EOF {
264 err = io.ErrUnexpectedEOF
265 } else {
266 err = err1
267 }
268 } else if r.hash.Sum32() != r.f.CRC32 {
269 err = ErrChecksum
270 }
271 } else {
272
273
274
275 if r.f.CRC32 != 0 && r.hash.Sum32() != r.f.CRC32 {
276 err = ErrChecksum
277 }
278 }
279 }
280 r.err = err
281 return
282 }
283
284 func (r *checksumReader) Close() error { return r.rc.Close() }
285
286
287
288 func (f *File) findBodyOffset() (int64, error) {
289 var buf [fileHeaderLen]byte
290 if _, err := f.zipr.ReadAt(buf[:], f.headerOffset); err != nil {
291 return 0, err
292 }
293 b := readBuf(buf[:])
294 if sig := b.uint32(); sig != fileHeaderSignature {
295 return 0, ErrFormat
296 }
297 b = b[22:]
298 filenameLen := int(b.uint16())
299 extraLen := int(b.uint16())
300 return int64(fileHeaderLen + filenameLen + extraLen), nil
301 }
302
303
304
305
306 func readDirectoryHeader(f *File, r io.Reader) error {
307 var buf [directoryHeaderLen]byte
308 if _, err := io.ReadFull(r, buf[:]); err != nil {
309 return err
310 }
311 b := readBuf(buf[:])
312 if sig := b.uint32(); sig != directoryHeaderSignature {
313 return ErrFormat
314 }
315 f.CreatorVersion = b.uint16()
316 f.ReaderVersion = b.uint16()
317 f.Flags = b.uint16()
318 f.Method = b.uint16()
319 f.ModifiedTime = b.uint16()
320 f.ModifiedDate = b.uint16()
321 f.CRC32 = b.uint32()
322 f.CompressedSize = b.uint32()
323 f.UncompressedSize = b.uint32()
324 f.CompressedSize64 = uint64(f.CompressedSize)
325 f.UncompressedSize64 = uint64(f.UncompressedSize)
326 filenameLen := int(b.uint16())
327 extraLen := int(b.uint16())
328 commentLen := int(b.uint16())
329 b = b[4:]
330 f.ExternalAttrs = b.uint32()
331 f.headerOffset = int64(b.uint32())
332 d := make([]byte, filenameLen+extraLen+commentLen)
333 if _, err := io.ReadFull(r, d); err != nil {
334 return err
335 }
336 f.Name = string(d[:filenameLen])
337 f.Extra = d[filenameLen : filenameLen+extraLen]
338 f.Comment = string(d[filenameLen+extraLen:])
339
340
341 utf8Valid1, utf8Require1 := detectUTF8(f.Name)
342 utf8Valid2, utf8Require2 := detectUTF8(f.Comment)
343 switch {
344 case !utf8Valid1 || !utf8Valid2:
345
346 f.NonUTF8 = true
347 case !utf8Require1 && !utf8Require2:
348
349 f.NonUTF8 = false
350 default:
351
352
353
354
355 f.NonUTF8 = f.Flags&0x800 == 0
356 }
357
358 needUSize := f.UncompressedSize == ^uint32(0)
359 needCSize := f.CompressedSize == ^uint32(0)
360 needHeaderOffset := f.headerOffset == int64(^uint32(0))
361
362
363
364
365 var modified time.Time
366 parseExtras:
367 for extra := readBuf(f.Extra); len(extra) >= 4; {
368 fieldTag := extra.uint16()
369 fieldSize := int(extra.uint16())
370 if len(extra) < fieldSize {
371 break
372 }
373 fieldBuf := extra.sub(fieldSize)
374
375 switch fieldTag {
376 case zip64ExtraID:
377 f.zip64 = true
378
379
380
381
382
383 if needUSize {
384 needUSize = false
385 if len(fieldBuf) < 8 {
386 return ErrFormat
387 }
388 f.UncompressedSize64 = fieldBuf.uint64()
389 }
390 if needCSize {
391 needCSize = false
392 if len(fieldBuf) < 8 {
393 return ErrFormat
394 }
395 f.CompressedSize64 = fieldBuf.uint64()
396 }
397 if needHeaderOffset {
398 needHeaderOffset = false
399 if len(fieldBuf) < 8 {
400 return ErrFormat
401 }
402 f.headerOffset = int64(fieldBuf.uint64())
403 }
404 case ntfsExtraID:
405 if len(fieldBuf) < 4 {
406 continue parseExtras
407 }
408 fieldBuf.uint32()
409 for len(fieldBuf) >= 4 {
410 attrTag := fieldBuf.uint16()
411 attrSize := int(fieldBuf.uint16())
412 if len(fieldBuf) < attrSize {
413 continue parseExtras
414 }
415 attrBuf := fieldBuf.sub(attrSize)
416 if attrTag != 1 || attrSize != 24 {
417 continue
418 }
419
420 const ticksPerSecond = 1e7
421 ts := int64(attrBuf.uint64())
422 secs := int64(ts / ticksPerSecond)
423 nsecs := (1e9 / ticksPerSecond) * int64(ts%ticksPerSecond)
424 epoch := time.Date(1601, time.January, 1, 0, 0, 0, 0, time.UTC)
425 modified = time.Unix(epoch.Unix()+secs, nsecs)
426 }
427 case unixExtraID, infoZipUnixExtraID:
428 if len(fieldBuf) < 8 {
429 continue parseExtras
430 }
431 fieldBuf.uint32()
432 ts := int64(fieldBuf.uint32())
433 modified = time.Unix(ts, 0)
434 case extTimeExtraID:
435 if len(fieldBuf) < 5 || fieldBuf.uint8()&1 == 0 {
436 continue parseExtras
437 }
438 ts := int64(fieldBuf.uint32())
439 modified = time.Unix(ts, 0)
440 }
441 }
442
443 msdosModified := msDosTimeToTime(f.ModifiedDate, f.ModifiedTime)
444 f.Modified = msdosModified
445 if !modified.IsZero() {
446 f.Modified = modified.UTC()
447
448
449
450
451
452
453
454
455
456 if f.ModifiedTime != 0 || f.ModifiedDate != 0 {
457 f.Modified = modified.In(timeZone(msdosModified.Sub(modified)))
458 }
459 }
460
461
462
463
464
465
466
467
468
469 _ = needUSize
470
471 if needCSize || needHeaderOffset {
472 return ErrFormat
473 }
474
475 return nil
476 }
477
478 func readDataDescriptor(r io.Reader, f *File) error {
479 var buf [dataDescriptorLen]byte
480
481
482
483
484
485
486
487
488
489
490 if _, err := io.ReadFull(r, buf[:4]); err != nil {
491 return err
492 }
493 off := 0
494 maybeSig := readBuf(buf[:4])
495 if maybeSig.uint32() != dataDescriptorSignature {
496
497
498 off += 4
499 }
500 if _, err := io.ReadFull(r, buf[off:12]); err != nil {
501 return err
502 }
503 b := readBuf(buf[:12])
504 if b.uint32() != f.CRC32 {
505 return ErrChecksum
506 }
507
508
509
510
511
512
513
514 return nil
515 }
516
517 func readDirectoryEnd(r io.ReaderAt, size int64) (dir *directoryEnd, baseOffset int64, err error) {
518
519 var buf []byte
520 var directoryEndOffset int64
521 for i, bLen := range []int64{1024, 65 * 1024} {
522 if bLen > size {
523 bLen = size
524 }
525 buf = make([]byte, int(bLen))
526 if _, err := r.ReadAt(buf, size-bLen); err != nil && err != io.EOF {
527 return nil, 0, err
528 }
529 if p := findSignatureInBlock(buf); p >= 0 {
530 buf = buf[p:]
531 directoryEndOffset = size - bLen + int64(p)
532 break
533 }
534 if i == 1 || bLen == size {
535 return nil, 0, ErrFormat
536 }
537 }
538
539
540 b := readBuf(buf[4:])
541 d := &directoryEnd{
542 diskNbr: uint32(b.uint16()),
543 dirDiskNbr: uint32(b.uint16()),
544 dirRecordsThisDisk: uint64(b.uint16()),
545 directoryRecords: uint64(b.uint16()),
546 directorySize: uint64(b.uint32()),
547 directoryOffset: uint64(b.uint32()),
548 commentLen: b.uint16(),
549 }
550 l := int(d.commentLen)
551 if l > len(b) {
552 return nil, 0, errors.New("zip: invalid comment length")
553 }
554 d.comment = string(b[:l])
555
556
557 if d.directoryRecords == 0xffff || d.directorySize == 0xffff || d.directoryOffset == 0xffffffff {
558 p, err := findDirectory64End(r, directoryEndOffset)
559 if err == nil && p >= 0 {
560 directoryEndOffset = p
561 err = readDirectory64End(r, p, d)
562 }
563 if err != nil {
564 return nil, 0, err
565 }
566 }
567
568 baseOffset = directoryEndOffset - int64(d.directorySize) - int64(d.directoryOffset)
569
570
571 if o := baseOffset + int64(d.directoryOffset); o < 0 || o >= size {
572 return nil, 0, ErrFormat
573 }
574 return d, baseOffset, nil
575 }
576
577
578
579
580 func findDirectory64End(r io.ReaderAt, directoryEndOffset int64) (int64, error) {
581 locOffset := directoryEndOffset - directory64LocLen
582 if locOffset < 0 {
583 return -1, nil
584 }
585 buf := make([]byte, directory64LocLen)
586 if _, err := r.ReadAt(buf, locOffset); err != nil {
587 return -1, err
588 }
589 b := readBuf(buf)
590 if sig := b.uint32(); sig != directory64LocSignature {
591 return -1, nil
592 }
593 if b.uint32() != 0 {
594 return -1, nil
595 }
596 p := b.uint64()
597 if b.uint32() != 1 {
598 return -1, nil
599 }
600 return int64(p), nil
601 }
602
603
604
605 func readDirectory64End(r io.ReaderAt, offset int64, d *directoryEnd) (err error) {
606 buf := make([]byte, directory64EndLen)
607 if _, err := r.ReadAt(buf, offset); err != nil {
608 return err
609 }
610
611 b := readBuf(buf)
612 if sig := b.uint32(); sig != directory64EndSignature {
613 return ErrFormat
614 }
615
616 b = b[12:]
617 d.diskNbr = b.uint32()
618 d.dirDiskNbr = b.uint32()
619 d.dirRecordsThisDisk = b.uint64()
620 d.directoryRecords = b.uint64()
621 d.directorySize = b.uint64()
622 d.directoryOffset = b.uint64()
623
624 return nil
625 }
626
627 func findSignatureInBlock(b []byte) int {
628 for i := len(b) - directoryEndLen; i >= 0; i-- {
629
630 if b[i] == 'P' && b[i+1] == 'K' && b[i+2] == 0x05 && b[i+3] == 0x06 {
631
632 n := int(b[i+directoryEndLen-2]) | int(b[i+directoryEndLen-1])<<8
633 if n+directoryEndLen+i <= len(b) {
634 return i
635 }
636 }
637 }
638 return -1
639 }
640
641 type readBuf []byte
642
643 func (b *readBuf) uint8() uint8 {
644 v := (*b)[0]
645 *b = (*b)[1:]
646 return v
647 }
648
649 func (b *readBuf) uint16() uint16 {
650 v := binary.LittleEndian.Uint16(*b)
651 *b = (*b)[2:]
652 return v
653 }
654
655 func (b *readBuf) uint32() uint32 {
656 v := binary.LittleEndian.Uint32(*b)
657 *b = (*b)[4:]
658 return v
659 }
660
661 func (b *readBuf) uint64() uint64 {
662 v := binary.LittleEndian.Uint64(*b)
663 *b = (*b)[8:]
664 return v
665 }
666
667 func (b *readBuf) sub(n int) readBuf {
668 b2 := (*b)[:n]
669 *b = (*b)[n:]
670 return b2
671 }
672
673
674
675 type fileListEntry struct {
676 name string
677 file *File
678 isDir bool
679 isDup bool
680 }
681
682 type fileInfoDirEntry interface {
683 fs.FileInfo
684 fs.DirEntry
685 }
686
687 func (e *fileListEntry) stat() (fileInfoDirEntry, error) {
688 if e.isDup {
689 return nil, errors.New(e.name + ": duplicate entries in zip file")
690 }
691 if !e.isDir {
692 return headerFileInfo{&e.file.FileHeader}, nil
693 }
694 return e, nil
695 }
696
697
698 func (f *fileListEntry) Name() string { _, elem, _ := split(f.name); return elem }
699 func (f *fileListEntry) Size() int64 { return 0 }
700 func (f *fileListEntry) Mode() fs.FileMode { return fs.ModeDir | 0555 }
701 func (f *fileListEntry) Type() fs.FileMode { return fs.ModeDir }
702 func (f *fileListEntry) IsDir() bool { return true }
703 func (f *fileListEntry) Sys() interface{} { return nil }
704
705 func (f *fileListEntry) ModTime() time.Time {
706 if f.file == nil {
707 return time.Time{}
708 }
709 return f.file.FileHeader.Modified.UTC()
710 }
711
712 func (f *fileListEntry) Info() (fs.FileInfo, error) { return f, nil }
713
714
715 func toValidName(name string) string {
716 name = strings.ReplaceAll(name, `\`, `/`)
717 p := path.Clean(name)
718 p = strings.TrimPrefix(p, "/")
719 p = strings.TrimPrefix(p, "../")
720 return p
721 }
722
723 func (r *Reader) initFileList() {
724 r.fileListOnce.Do(func() {
725
726
727
728 files := make(map[string]int)
729 knownDirs := make(map[string]int)
730
731
732
733 dirs := make(map[string]bool)
734
735 for _, file := range r.File {
736 isDir := len(file.Name) > 0 && file.Name[len(file.Name)-1] == '/'
737 name := toValidName(file.Name)
738 if name == "" {
739 continue
740 }
741
742 if idx, ok := files[name]; ok {
743 r.fileList[idx].isDup = true
744 continue
745 }
746 if idx, ok := knownDirs[name]; ok {
747 r.fileList[idx].isDup = true
748 continue
749 }
750
751 for dir := path.Dir(name); dir != "."; dir = path.Dir(dir) {
752 dirs[dir] = true
753 }
754
755 idx := len(r.fileList)
756 entry := fileListEntry{
757 name: name,
758 file: file,
759 isDir: isDir,
760 }
761 r.fileList = append(r.fileList, entry)
762 if isDir {
763 knownDirs[name] = idx
764 } else {
765 files[name] = idx
766 }
767 }
768 for dir := range dirs {
769 if _, ok := knownDirs[dir]; !ok {
770 if idx, ok := files[dir]; ok {
771 r.fileList[idx].isDup = true
772 } else {
773 entry := fileListEntry{
774 name: dir,
775 file: nil,
776 isDir: true,
777 }
778 r.fileList = append(r.fileList, entry)
779 }
780 }
781 }
782
783 sort.Slice(r.fileList, func(i, j int) bool { return fileEntryLess(r.fileList[i].name, r.fileList[j].name) })
784 })
785 }
786
787 func fileEntryLess(x, y string) bool {
788 xdir, xelem, _ := split(x)
789 ydir, yelem, _ := split(y)
790 return xdir < ydir || xdir == ydir && xelem < yelem
791 }
792
793
794
795
796
797 func (r *Reader) Open(name string) (fs.File, error) {
798 r.initFileList()
799
800 if !fs.ValidPath(name) {
801 return nil, &fs.PathError{Op: "open", Path: name, Err: fs.ErrInvalid}
802 }
803 e := r.openLookup(name)
804 if e == nil {
805 return nil, &fs.PathError{Op: "open", Path: name, Err: fs.ErrNotExist}
806 }
807 if e.isDir {
808 return &openDir{e, r.openReadDir(name), 0}, nil
809 }
810 rc, err := e.file.Open()
811 if err != nil {
812 return nil, err
813 }
814 return rc.(fs.File), nil
815 }
816
817 func split(name string) (dir, elem string, isDir bool) {
818 if len(name) > 0 && name[len(name)-1] == '/' {
819 isDir = true
820 name = name[:len(name)-1]
821 }
822 i := len(name) - 1
823 for i >= 0 && name[i] != '/' {
824 i--
825 }
826 if i < 0 {
827 return ".", name, isDir
828 }
829 return name[:i], name[i+1:], isDir
830 }
831
832 var dotFile = &fileListEntry{name: "./", isDir: true}
833
834 func (r *Reader) openLookup(name string) *fileListEntry {
835 if name == "." {
836 return dotFile
837 }
838
839 dir, elem, _ := split(name)
840 files := r.fileList
841 i := sort.Search(len(files), func(i int) bool {
842 idir, ielem, _ := split(files[i].name)
843 return idir > dir || idir == dir && ielem >= elem
844 })
845 if i < len(files) {
846 fname := files[i].name
847 if fname == name || len(fname) == len(name)+1 && fname[len(name)] == '/' && fname[:len(name)] == name {
848 return &files[i]
849 }
850 }
851 return nil
852 }
853
854 func (r *Reader) openReadDir(dir string) []fileListEntry {
855 files := r.fileList
856 i := sort.Search(len(files), func(i int) bool {
857 idir, _, _ := split(files[i].name)
858 return idir >= dir
859 })
860 j := sort.Search(len(files), func(j int) bool {
861 jdir, _, _ := split(files[j].name)
862 return jdir > dir
863 })
864 return files[i:j]
865 }
866
867 type openDir struct {
868 e *fileListEntry
869 files []fileListEntry
870 offset int
871 }
872
873 func (d *openDir) Close() error { return nil }
874 func (d *openDir) Stat() (fs.FileInfo, error) { return d.e.stat() }
875
876 func (d *openDir) Read([]byte) (int, error) {
877 return 0, &fs.PathError{Op: "read", Path: d.e.name, Err: errors.New("is a directory")}
878 }
879
880 func (d *openDir) ReadDir(count int) ([]fs.DirEntry, error) {
881 n := len(d.files) - d.offset
882 if count > 0 && n > count {
883 n = count
884 }
885 if n == 0 {
886 if count <= 0 {
887 return nil, nil
888 }
889 return nil, io.EOF
890 }
891 list := make([]fs.DirEntry, n)
892 for i := range list {
893 s, err := d.files[d.offset+i].stat()
894 if err != nil {
895 return nil, err
896 }
897 list[i] = s
898 }
899 d.offset += n
900 return list, nil
901 }
902
View as plain text