magic.go

Documentation: github.com/gabriel-vasile/mimetype/internal/magic

     1  // Package magic holds the matching functions used to find MIME types.
     2  package magic
     3  
     4  import (
     5  	"bytes"
     6  	"fmt"
     7  )
     8  
     9  type (
    10  	// Detector receiveѕ the raw data of a file and returns whether the data
    11  	// meets any conditions. The limit parameter is an upper limit to the number
    12  	// of bytes received and is used to tell if the byte slice represents the
    13  	// whole file or is just the header of a file: len(raw) < limit or len(raw)>limit.
    14  	Detector func(raw []byte, limit uint32) bool
    15  	xmlSig   struct {
    16  		// the local name of the root tag
    17  		localName []byte
    18  		// the namespace of the XML document
    19  		xmlns []byte
    20  	}
    21  )
    22  
    23  // prefix creates a Detector which returns true if any of the provided signatures
    24  // is the prefix of the raw input.
    25  func prefix(sigs ...[]byte) Detector {
    26  	return func(raw []byte, limit uint32) bool {
    27  		for _, s := range sigs {
    28  			if bytes.HasPrefix(raw, s) {
    29  				return true
    30  			}
    31  		}
    32  		return false
    33  	}
    34  }
    35  
    36  // offset creates a Detector which returns true if the provided signature can be
    37  // found at offset in the raw input.
    38  func offset(sig []byte, offset int) Detector {
    39  	return func(raw []byte, limit uint32) bool {
    40  		return len(raw) > offset && bytes.HasPrefix(raw[offset:], sig)
    41  	}
    42  }
    43  
    44  // ciPrefix is like prefix but the check is case insensitive.
    45  func ciPrefix(sigs ...[]byte) Detector {
    46  	return func(raw []byte, limit uint32) bool {
    47  		for _, s := range sigs {
    48  			if ciCheck(s, raw) {
    49  				return true
    50  			}
    51  		}
    52  		return false
    53  	}
    54  }
    55  func ciCheck(sig, raw []byte) bool {
    56  	if len(raw) < len(sig)+1 {
    57  		return false
    58  	}
    59  	// perform case insensitive check
    60  	for i, b := range sig {
    61  		db := raw[i]
    62  		if 'A' <= b && b <= 'Z' {
    63  			db &= 0xDF
    64  		}
    65  		if b != db {
    66  			return false
    67  		}
    68  	}
    69  
    70  	return true
    71  }
    72  
    73  // xml creates a Detector which returns true if any of the provided XML signatures
    74  // matches the raw input.
    75  func xml(sigs ...xmlSig) Detector {
    76  	return func(raw []byte, limit uint32) bool {
    77  		raw = trimLWS(raw)
    78  		if len(raw) == 0 {
    79  			return false
    80  		}
    81  		for _, s := range sigs {
    82  			if xmlCheck(s, raw) {
    83  				return true
    84  			}
    85  		}
    86  		return false
    87  	}
    88  }
    89  func xmlCheck(sig xmlSig, raw []byte) bool {
    90  	raw = raw[:min(len(raw), 512)]
    91  
    92  	if len(sig.localName) == 0 {
    93  		return bytes.Index(raw, sig.xmlns) > 0
    94  	}
    95  	if len(sig.xmlns) == 0 {
    96  		return bytes.Index(raw, sig.localName) > 0
    97  	}
    98  
    99  	localNameIndex := bytes.Index(raw, sig.localName)
   100  	return localNameIndex != -1 && localNameIndex < bytes.Index(raw, sig.xmlns)
   101  }
   102  
   103  // markup creates a Detector which returns true is any of the HTML signatures
   104  // matches the raw input.
   105  func markup(sigs ...[]byte) Detector {
   106  	return func(raw []byte, limit uint32) bool {
   107  		if bytes.HasPrefix(raw, []byte{0xEF, 0xBB, 0xBF}) {
   108  			// We skip the UTF-8 BOM if present to ensure we correctly
   109  			// process any leading whitespace. The presence of the BOM
   110  			// is taken into account during charset detection in charset.go.
   111  			raw = trimLWS(raw[3:])
   112  		} else {
   113  			raw = trimLWS(raw)
   114  		}
   115  		if len(raw) == 0 {
   116  			return false
   117  		}
   118  		for _, s := range sigs {
   119  			if markupCheck(s, raw) {
   120  				return true
   121  			}
   122  		}
   123  		return false
   124  	}
   125  }
   126  func markupCheck(sig, raw []byte) bool {
   127  	if len(raw) < len(sig)+1 {
   128  		return false
   129  	}
   130  
   131  	// perform case insensitive check
   132  	for i, b := range sig {
   133  		db := raw[i]
   134  		if 'A' <= b && b <= 'Z' {
   135  			db &= 0xDF
   136  		}
   137  		if b != db {
   138  			return false
   139  		}
   140  	}
   141  	// Next byte must be space or right angle bracket.
   142  	if db := raw[len(sig)]; db != ' ' && db != '>' {
   143  		return false
   144  	}
   145  
   146  	return true
   147  }
   148  
   149  // ftyp creates a Detector which returns true if any of the FTYP signatures
   150  // matches the raw input.
   151  func ftyp(sigs ...[]byte) Detector {
   152  	return func(raw []byte, limit uint32) bool {
   153  		if len(raw) < 12 {
   154  			return false
   155  		}
   156  		for _, s := range sigs {
   157  			if bytes.Equal(raw[4:12], append([]byte("ftyp"), s...)) {
   158  				return true
   159  			}
   160  		}
   161  		return false
   162  	}
   163  }
   164  
   165  func newXMLSig(localName, xmlns string) xmlSig {
   166  	ret := xmlSig{xmlns: []byte(xmlns)}
   167  	if localName != "" {
   168  		ret.localName = []byte(fmt.Sprintf("<%s", localName))
   169  	}
   170  
   171  	return ret
   172  }
   173  
   174  // A valid shebang starts with the "#!" characters,
   175  // followed by any number of spaces,
   176  // followed by the path to the interpreter,
   177  // and, optionally, followed by the arguments for the interpreter.
   178  //
   179  // Ex:
   180  //
   181  //	#! /usr/bin/env php
   182  //
   183  // /usr/bin/env is the interpreter, php is the first and only argument.
   184  func shebang(sigs ...[]byte) Detector {
   185  	return func(raw []byte, limit uint32) bool {
   186  		for _, s := range sigs {
   187  			if shebangCheck(s, firstLine(raw)) {
   188  				return true
   189  			}
   190  		}
   191  		return false
   192  	}
   193  }
   194  
   195  func shebangCheck(sig, raw []byte) bool {
   196  	if len(raw) < len(sig)+2 {
   197  		return false
   198  	}
   199  	if raw[0] != '#' || raw[1] != '!' {
   200  		return false
   201  	}
   202  
   203  	return bytes.Equal(trimLWS(trimRWS(raw[2:])), sig)
   204  }
   205  
   206  // trimLWS trims whitespace from beginning of the input.
   207  func trimLWS(in []byte) []byte {
   208  	firstNonWS := 0
   209  	for ; firstNonWS < len(in) && isWS(in[firstNonWS]); firstNonWS++ {
   210  	}
   211  
   212  	return in[firstNonWS:]
   213  }
   214  
   215  // trimRWS trims whitespace from the end of the input.
   216  func trimRWS(in []byte) []byte {
   217  	lastNonWS := len(in) - 1
   218  	for ; lastNonWS > 0 && isWS(in[lastNonWS]); lastNonWS-- {
   219  	}
   220  
   221  	return in[:lastNonWS+1]
   222  }
   223  
   224  func firstLine(in []byte) []byte {
   225  	lineEnd := 0
   226  	for ; lineEnd < len(in) && in[lineEnd] != '\n'; lineEnd++ {
   227  	}
   228  
   229  	return in[:lineEnd]
   230  }
   231  
   232  func isWS(b byte) bool {
   233  	return b == '\t' || b == '\n' || b == '\x0c' || b == '\r' || b == ' '
   234  }
   235  
   236  func min(a, b int) int {
   237  	if a < b {
   238  		return a
   239  	}
   240  	return b
   241  }
   242
View as plain text