...

Source file src/github.com/gabriel-vasile/mimetype/internal/magic/binary.go

Documentation: github.com/gabriel-vasile/mimetype/internal/magic

     1  package magic
     2  
     3  import (
     4  	"bytes"
     5  	"debug/macho"
     6  	"encoding/binary"
     7  )
     8  
     9  var (
    10  	// Lnk matches Microsoft lnk binary format.
    11  	Lnk = prefix([]byte{0x4C, 0x00, 0x00, 0x00, 0x01, 0x14, 0x02, 0x00})
    12  	// Wasm matches a web assembly File Format file.
    13  	Wasm = prefix([]byte{0x00, 0x61, 0x73, 0x6D})
    14  	// Exe matches a Windows/DOS executable file.
    15  	Exe = prefix([]byte{0x4D, 0x5A})
    16  	// Elf matches an Executable and Linkable Format file.
    17  	Elf = prefix([]byte{0x7F, 0x45, 0x4C, 0x46})
    18  	// Nes matches a Nintendo Entertainment system ROM file.
    19  	Nes = prefix([]byte{0x4E, 0x45, 0x53, 0x1A})
    20  	// SWF matches an Adobe Flash swf file.
    21  	SWF = prefix([]byte("CWS"), []byte("FWS"), []byte("ZWS"))
    22  	// Torrent has bencoded text in the beginning.
    23  	Torrent = prefix([]byte("d8:announce"))
    24  )
    25  
    26  // Java bytecode and Mach-O binaries share the same magic number.
    27  // More info here https://github.com/threatstack/libmagic/blob/master/magic/Magdir/cafebabe
    28  func classOrMachOFat(in []byte) bool {
    29  	// There should be at least 8 bytes for both of them because the only way to
    30  	// quickly distinguish them is by comparing byte at position 7
    31  	if len(in) < 8 {
    32  		return false
    33  	}
    34  
    35  	return bytes.HasPrefix(in, []byte{0xCA, 0xFE, 0xBA, 0xBE})
    36  }
    37  
    38  // Class matches a java class file.
    39  func Class(raw []byte, limit uint32) bool {
    40  	return classOrMachOFat(raw) && raw[7] > 30
    41  }
    42  
    43  // MachO matches Mach-O binaries format.
    44  func MachO(raw []byte, limit uint32) bool {
    45  	if classOrMachOFat(raw) && raw[7] < 20 {
    46  		return true
    47  	}
    48  
    49  	if len(raw) < 4 {
    50  		return false
    51  	}
    52  
    53  	be := binary.BigEndian.Uint32(raw)
    54  	le := binary.LittleEndian.Uint32(raw)
    55  
    56  	return be == macho.Magic32 ||
    57  		le == macho.Magic32 ||
    58  		be == macho.Magic64 ||
    59  		le == macho.Magic64
    60  }
    61  
    62  // Dbf matches a dBase file.
    63  // https://www.dbase.com/Knowledgebase/INT/db7_file_fmt.htm
    64  func Dbf(raw []byte, limit uint32) bool {
    65  	if len(raw) < 68 {
    66  		return false
    67  	}
    68  
    69  	// 3rd and 4th bytes contain the last update month and day of month.
    70  	if !(0 < raw[2] && raw[2] < 13 && 0 < raw[3] && raw[3] < 32) {
    71  		return false
    72  	}
    73  
    74  	// 12, 13, 30, 31 are reserved bytes and always filled with 0x00.
    75  	if raw[12] != 0x00 || raw[13] != 0x00 || raw[30] != 0x00 || raw[31] != 0x00 {
    76  		return false
    77  	}
    78  	// Production MDX flag;
    79  	// 0x01 if a production .MDX file exists for this table;
    80  	// 0x00 if no .MDX file exists.
    81  	if raw[28] > 0x01 {
    82  		return false
    83  	}
    84  
    85  	// dbf type is dictated by the first byte.
    86  	dbfTypes := []byte{
    87  		0x02, 0x03, 0x04, 0x05, 0x30, 0x31, 0x32, 0x42, 0x62, 0x7B, 0x82,
    88  		0x83, 0x87, 0x8A, 0x8B, 0x8E, 0xB3, 0xCB, 0xE5, 0xF5, 0xF4, 0xFB,
    89  	}
    90  	for _, b := range dbfTypes {
    91  		if raw[0] == b {
    92  			return true
    93  		}
    94  	}
    95  
    96  	return false
    97  }
    98  
    99  // ElfObj matches an object file.
   100  func ElfObj(raw []byte, limit uint32) bool {
   101  	return len(raw) > 17 && ((raw[16] == 0x01 && raw[17] == 0x00) ||
   102  		(raw[16] == 0x00 && raw[17] == 0x01))
   103  }
   104  
   105  // ElfExe matches an executable file.
   106  func ElfExe(raw []byte, limit uint32) bool {
   107  	return len(raw) > 17 && ((raw[16] == 0x02 && raw[17] == 0x00) ||
   108  		(raw[16] == 0x00 && raw[17] == 0x02))
   109  }
   110  
   111  // ElfLib matches a shared library file.
   112  func ElfLib(raw []byte, limit uint32) bool {
   113  	return len(raw) > 17 && ((raw[16] == 0x03 && raw[17] == 0x00) ||
   114  		(raw[16] == 0x00 && raw[17] == 0x03))
   115  }
   116  
   117  // ElfDump matches a core dump file.
   118  func ElfDump(raw []byte, limit uint32) bool {
   119  	return len(raw) > 17 && ((raw[16] == 0x04 && raw[17] == 0x00) ||
   120  		(raw[16] == 0x00 && raw[17] == 0x04))
   121  }
   122  
   123  // Dcm matches a DICOM medical format file.
   124  func Dcm(raw []byte, limit uint32) bool {
   125  	return len(raw) > 131 &&
   126  		bytes.Equal(raw[128:132], []byte{0x44, 0x49, 0x43, 0x4D})
   127  }
   128  
   129  // Marc matches a MARC21 (MAchine-Readable Cataloging) file.
   130  func Marc(raw []byte, limit uint32) bool {
   131  	// File is at least 24 bytes ("leader" field size).
   132  	if len(raw) < 24 {
   133  		return false
   134  	}
   135  
   136  	// Fixed bytes at offset 20.
   137  	if !bytes.Equal(raw[20:24], []byte("4500")) {
   138  		return false
   139  	}
   140  
   141  	// First 5 bytes are ASCII digits.
   142  	for i := 0; i < 5; i++ {
   143  		if raw[i] < '0' || raw[i] > '9' {
   144  			return false
   145  		}
   146  	}
   147  
   148  	// Field terminator is present in first 2048 bytes.
   149  	return bytes.Contains(raw[:min(2048, len(raw))], []byte{0x1E})
   150  }
   151  
   152  // Glb matches a glTF model format file.
   153  // GLB is the binary file format representation of 3D models saved in
   154  // the GL transmission Format (glTF).
   155  // GLB uses little endian and its header structure is as follows:
   156  //
   157  // 	<-- 12-byte header                             -->
   158  // 	| magic            | version          | length   |
   159  // 	| (uint32)         | (uint32)         | (uint32) |
   160  // 	| \x67\x6C\x54\x46 | \x01\x00\x00\x00 | ...      |
   161  // 	| g   l   T   F    | 1                | ...      |
   162  //
   163  // Visit [glTF specification] and [IANA glTF entry] for more details.
   164  //
   165  // [glTF specification]: https://registry.khronos.org/glTF/specs/2.0/glTF-2.0.html
   166  // [IANA glTF entry]: https://www.iana.org/assignments/media-types/model/gltf-binary
   167  var Glb = prefix([]byte("\x67\x6C\x54\x46\x02\x00\x00\x00"),
   168  	[]byte("\x67\x6C\x54\x46\x01\x00\x00\x00"))
   169  
   170  // TzIf matches a Time Zone Information Format (TZif) file.
   171  // See more: https://tools.ietf.org/id/draft-murchison-tzdist-tzif-00.html#rfc.section.3
   172  // Its header structure is shown below:
   173  // 	+---------------+---+
   174  // 	|  magic    (4) | <-+-- version (1)
   175  // 	+---------------+---+---------------------------------------+
   176  // 	|           [unused - reserved for future use] (15)         |
   177  // 	+---------------+---------------+---------------+-----------+
   178  // 	|  isutccnt (4) |  isstdcnt (4) |  leapcnt  (4) |
   179  // 	+---------------+---------------+---------------+
   180  // 	|  timecnt  (4) |  typecnt  (4) |  charcnt  (4) |
   181  func TzIf(raw []byte, limit uint32) bool {
   182  	// File is at least 44 bytes (header size).
   183  	if len(raw) < 44 {
   184  		return false
   185  	}
   186  
   187  	if !bytes.HasPrefix(raw, []byte("TZif")) {
   188  		return false
   189  	}
   190  
   191  	// Field "typecnt" MUST not be zero.
   192  	if binary.BigEndian.Uint32(raw[36:40]) == 0 {
   193  		return false
   194  	}
   195  
   196  	// Version has to be NUL (0x00), '2' (0x32) or '3' (0x33).
   197  	return raw[4] == 0x00 || raw[4] == 0x32 || raw[4] == 0x33
   198  }
   199  

View as plain text