...

Source file src/github.com/gabriel-vasile/mimetype/internal/magic/archive.go

Documentation: github.com/gabriel-vasile/mimetype/internal/magic

     1  package magic
     2  
     3  import (
     4  	"bytes"
     5  	"encoding/binary"
     6  )
     7  
     8  var (
     9  	// SevenZ matches a 7z archive.
    10  	SevenZ = prefix([]byte{0x37, 0x7A, 0xBC, 0xAF, 0x27, 0x1C})
    11  	// Gzip matches gzip files based on http://www.zlib.org/rfc-gzip.html#header-trailer.
    12  	Gzip = prefix([]byte{0x1f, 0x8b})
    13  	// Fits matches an Flexible Image Transport System file.
    14  	Fits = prefix([]byte{
    15  		0x53, 0x49, 0x4D, 0x50, 0x4C, 0x45, 0x20, 0x20, 0x3D, 0x20,
    16  		0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    17  		0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x54,
    18  	})
    19  	// Xar matches an eXtensible ARchive format file.
    20  	Xar = prefix([]byte{0x78, 0x61, 0x72, 0x21})
    21  	// Bz2 matches a bzip2 file.
    22  	Bz2 = prefix([]byte{0x42, 0x5A, 0x68})
    23  	// Ar matches an ar (Unix) archive file.
    24  	Ar = prefix([]byte{0x21, 0x3C, 0x61, 0x72, 0x63, 0x68, 0x3E})
    25  	// Deb matches a Debian package file.
    26  	Deb = offset([]byte{
    27  		0x64, 0x65, 0x62, 0x69, 0x61, 0x6E, 0x2D,
    28  		0x62, 0x69, 0x6E, 0x61, 0x72, 0x79,
    29  	}, 8)
    30  	// Warc matches a Web ARChive file.
    31  	Warc = prefix([]byte("WARC/1.0"), []byte("WARC/1.1"))
    32  	// Cab matches a Microsoft Cabinet archive file.
    33  	Cab = prefix([]byte("MSCF\x00\x00\x00\x00"))
    34  	// Xz matches an xz compressed stream based on https://tukaani.org/xz/xz-file-format.txt.
    35  	Xz = prefix([]byte{0xFD, 0x37, 0x7A, 0x58, 0x5A, 0x00})
    36  	// Lzip matches an Lzip compressed file.
    37  	Lzip = prefix([]byte{0x4c, 0x5a, 0x49, 0x50})
    38  	// RPM matches an RPM or Delta RPM package file.
    39  	RPM = prefix([]byte{0xed, 0xab, 0xee, 0xdb}, []byte("drpm"))
    40  	// Cpio matches a cpio archive file.
    41  	Cpio = prefix([]byte("070707"), []byte("070701"), []byte("070702"))
    42  	// RAR matches a RAR archive file.
    43  	RAR = prefix([]byte("Rar!\x1A\x07\x00"), []byte("Rar!\x1A\x07\x01\x00"))
    44  )
    45  
    46  // InstallShieldCab matches an InstallShield Cabinet archive file.
    47  func InstallShieldCab(raw []byte, _ uint32) bool {
    48  	return len(raw) > 7 &&
    49  		bytes.Equal(raw[0:4], []byte("ISc(")) &&
    50  		raw[6] == 0 &&
    51  		(raw[7] == 1 || raw[7] == 2 || raw[7] == 4)
    52  }
    53  
    54  // Zstd matches a Zstandard archive file.
    55  func Zstd(raw []byte, limit uint32) bool {
    56  	return len(raw) >= 4 &&
    57  		(0x22 <= raw[0] && raw[0] <= 0x28 || raw[0] == 0x1E) && // Different Zstandard versions.
    58  		bytes.HasPrefix(raw[1:], []byte{0xB5, 0x2F, 0xFD})
    59  }
    60  
    61  // CRX matches a Chrome extension file: a zip archive prepended by a package header.
    62  func CRX(raw []byte, limit uint32) bool {
    63  	const minHeaderLen = 16
    64  	if len(raw) < minHeaderLen || !bytes.HasPrefix(raw, []byte("Cr24")) {
    65  		return false
    66  	}
    67  	pubkeyLen := binary.LittleEndian.Uint32(raw[8:12])
    68  	sigLen := binary.LittleEndian.Uint32(raw[12:16])
    69  	zipOffset := minHeaderLen + pubkeyLen + sigLen
    70  	if uint32(len(raw)) < zipOffset {
    71  		return false
    72  	}
    73  	return Zip(raw[zipOffset:], limit)
    74  }
    75  
    76  // Tar matches a (t)ape (ar)chive file.
    77  func Tar(raw []byte, _ uint32) bool {
    78  	// The "magic" header field for files in in UStar (POSIX IEEE P1003.1) archives
    79  	// has the prefix "ustar". The values of the remaining bytes in this field vary
    80  	// by archiver implementation.
    81  	if len(raw) >= 512 && bytes.HasPrefix(raw[257:], []byte{0x75, 0x73, 0x74, 0x61, 0x72}) {
    82  		return true
    83  	}
    84  
    85  	if len(raw) < 256 {
    86  		return false
    87  	}
    88  
    89  	// The older v7 format has no "magic" field, and therefore must be identified
    90  	// with heuristics based on legal ranges of values for other header fields:
    91  	// https://www.nationalarchives.gov.uk/PRONOM/Format/proFormatSearch.aspx?status=detailReport&id=385&strPageToDisplay=signatures
    92  	rules := []struct {
    93  		min, max uint8
    94  		i        int
    95  	}{
    96  		{0x21, 0xEF, 0},
    97  		{0x30, 0x37, 105},
    98  		{0x20, 0x37, 106},
    99  		{0x00, 0x00, 107},
   100  		{0x30, 0x37, 113},
   101  		{0x20, 0x37, 114},
   102  		{0x00, 0x00, 115},
   103  		{0x30, 0x37, 121},
   104  		{0x20, 0x37, 122},
   105  		{0x00, 0x00, 123},
   106  		{0x30, 0x37, 134},
   107  		{0x30, 0x37, 146},
   108  		{0x30, 0x37, 153},
   109  		{0x00, 0x37, 154},
   110  	}
   111  	for _, r := range rules {
   112  		if raw[r.i] < r.min || raw[r.i] > r.max {
   113  			return false
   114  		}
   115  	}
   116  
   117  	for _, i := range []uint8{135, 147, 155} {
   118  		if raw[i] != 0x00 && raw[i] != 0x20 {
   119  			return false
   120  		}
   121  	}
   122  
   123  	return true
   124  }
   125  

View as plain text