fnmatch.go

Documentation: github.com/danwakefield/fnmatch

     1  // Provide string-matching based on fnmatch.3
     2  package fnmatch
     3  
     4  // There are a few issues that I believe to be bugs, but this implementation is
     5  // based as closely as possible on BSD fnmatch. These bugs are present in the
     6  // source of BSD fnmatch, and so are replicated here. The issues are as follows:
     7  //
     8  // * FNM_PERIOD is no longer observed after the first * in a pattern
     9  //   This only applies to matches done with FNM_PATHNAME as well
    10  // * FNM_PERIOD doesn't apply to ranges. According to the documentation,
    11  //   a period must be matched explicitly, but a range will match it too
    12  
    13  import (
    14  	"unicode"
    15  	"unicode/utf8"
    16  )
    17  
    18  const (
    19  	FNM_NOESCAPE = (1 << iota)
    20  	FNM_PATHNAME
    21  	FNM_PERIOD
    22  
    23  	FNM_LEADING_DIR
    24  	FNM_CASEFOLD
    25  
    26  	FNM_IGNORECASE = FNM_CASEFOLD
    27  	FNM_FILE_NAME  = FNM_PATHNAME
    28  )
    29  
    30  func unpackRune(str *string) rune {
    31  	rune, size := utf8.DecodeRuneInString(*str)
    32  	*str = (*str)[size:]
    33  	return rune
    34  }
    35  
    36  // Matches the pattern against the string, with the given flags,
    37  // and returns true if the match is successful.
    38  // This function should match fnmatch.3 as closely as possible.
    39  func Match(pattern, s string, flags int) bool {
    40  	// The implementation for this function was patterned after the BSD fnmatch.c
    41  	// source found at http://src.gnu-darwin.org/src/contrib/csup/fnmatch.c.html
    42  	noescape := (flags&FNM_NOESCAPE != 0)
    43  	pathname := (flags&FNM_PATHNAME != 0)
    44  	period := (flags&FNM_PERIOD != 0)
    45  	leadingdir := (flags&FNM_LEADING_DIR != 0)
    46  	casefold := (flags&FNM_CASEFOLD != 0)
    47  	// the following is some bookkeeping that the original fnmatch.c implementation did not do
    48  	// We are forced to do this because we're not keeping indexes into C strings but rather
    49  	// processing utf8-encoded strings. Use a custom unpacker to maintain our state for us
    50  	sAtStart := true
    51  	sLastAtStart := true
    52  	sLastSlash := false
    53  	sLastUnpacked := rune(0)
    54  	unpackS := func() rune {
    55  		sLastSlash = (sLastUnpacked == '/')
    56  		sLastUnpacked = unpackRune(&s)
    57  		sLastAtStart = sAtStart
    58  		sAtStart = false
    59  		return sLastUnpacked
    60  	}
    61  	for len(pattern) > 0 {
    62  		c := unpackRune(&pattern)
    63  		switch c {
    64  		case '?':
    65  			if len(s) == 0 {
    66  				return false
    67  			}
    68  			sc := unpackS()
    69  			if pathname && sc == '/' {
    70  				return false
    71  			}
    72  			if period && sc == '.' && (sLastAtStart || (pathname && sLastSlash)) {
    73  				return false
    74  			}
    75  		case '*':
    76  			// collapse multiple *'s
    77  			// don't use unpackRune here, the only char we care to detect is ASCII
    78  			for len(pattern) > 0 && pattern[0] == '*' {
    79  				pattern = pattern[1:]
    80  			}
    81  			if period && s[0] == '.' && (sAtStart || (pathname && sLastUnpacked == '/')) {
    82  				return false
    83  			}
    84  			// optimize for patterns with * at end or before /
    85  			if len(pattern) == 0 {
    86  				if pathname {
    87  					return leadingdir || (strchr(s, '/') == -1)
    88  				} else {
    89  					return true
    90  				}
    91  				return !(pathname && strchr(s, '/') >= 0)
    92  			} else if pathname && pattern[0] == '/' {
    93  				offset := strchr(s, '/')
    94  				if offset == -1 {
    95  					return false
    96  				} else {
    97  					// we already know our pattern and string have a /, skip past it
    98  					s = s[offset:] // use unpackS here to maintain our bookkeeping state
    99  					unpackS()
   100  					pattern = pattern[1:] // we know / is one byte long
   101  					break
   102  				}
   103  			}
   104  			// general case, recurse
   105  			for test := s; len(test) > 0; unpackRune(&test) {
   106  				// I believe the (flags &^ FNM_PERIOD) is a bug when FNM_PATHNAME is specified
   107  				// but this follows exactly from how fnmatch.c implements it
   108  				if Match(pattern, test, (flags &^ FNM_PERIOD)) {
   109  					return true
   110  				} else if pathname && test[0] == '/' {
   111  					break
   112  				}
   113  			}
   114  			return false
   115  		case '[':
   116  			if len(s) == 0 {
   117  				return false
   118  			}
   119  			if pathname && s[0] == '/' {
   120  				return false
   121  			}
   122  			sc := unpackS()
   123  			if !rangematch(&pattern, sc, flags) {
   124  				return false
   125  			}
   126  		case '\\':
   127  			if !noescape {
   128  				if len(pattern) > 0 {
   129  					c = unpackRune(&pattern)
   130  				}
   131  			}
   132  			fallthrough
   133  		default:
   134  			if len(s) == 0 {
   135  				return false
   136  			}
   137  			sc := unpackS()
   138  			switch {
   139  			case sc == c:
   140  			case casefold && unicode.ToLower(sc) == unicode.ToLower(c):
   141  			default:
   142  				return false
   143  			}
   144  		}
   145  	}
   146  	return len(s) == 0 || (leadingdir && s[0] == '/')
   147  }
   148  
   149  func rangematch(pattern *string, test rune, flags int) bool {
   150  	if len(*pattern) == 0 {
   151  		return false
   152  	}
   153  	casefold := (flags&FNM_CASEFOLD != 0)
   154  	noescape := (flags&FNM_NOESCAPE != 0)
   155  	if casefold {
   156  		test = unicode.ToLower(test)
   157  	}
   158  	var negate, matched bool
   159  	if (*pattern)[0] == '^' || (*pattern)[0] == '!' {
   160  		negate = true
   161  		(*pattern) = (*pattern)[1:]
   162  	}
   163  	for !matched && len(*pattern) > 1 && (*pattern)[0] != ']' {
   164  		c := unpackRune(pattern)
   165  		if !noescape && c == '\\' {
   166  			if len(*pattern) > 1 {
   167  				c = unpackRune(pattern)
   168  			} else {
   169  				return false
   170  			}
   171  		}
   172  		if casefold {
   173  			c = unicode.ToLower(c)
   174  		}
   175  		if (*pattern)[0] == '-' && len(*pattern) > 1 && (*pattern)[1] != ']' {
   176  			unpackRune(pattern) // skip the -
   177  			c2 := unpackRune(pattern)
   178  			if !noescape && c2 == '\\' {
   179  				if len(*pattern) > 0 {
   180  					c2 = unpackRune(pattern)
   181  				} else {
   182  					return false
   183  				}
   184  			}
   185  			if casefold {
   186  				c2 = unicode.ToLower(c2)
   187  			}
   188  			// this really should be more intelligent, but it looks like
   189  			// fnmatch.c does simple int comparisons, therefore we will as well
   190  			if c <= test && test <= c2 {
   191  				matched = true
   192  			}
   193  		} else if c == test {
   194  			matched = true
   195  		}
   196  	}
   197  	// skip past the rest of the pattern
   198  	ok := false
   199  	for !ok && len(*pattern) > 0 {
   200  		c := unpackRune(pattern)
   201  		if c == '\\' && len(*pattern) > 0 {
   202  			unpackRune(pattern)
   203  		} else if c == ']' {
   204  			ok = true
   205  		}
   206  	}
   207  	return ok && matched != negate
   208  }
   209  
   210  // define strchr because strings.Index() seems a bit overkill
   211  // returns the index of c in s, or -1 if there is no match
   212  func strchr(s string, c rune) int {
   213  	for i, sc := range s {
   214  		if sc == c {
   215  			return i
   216  		}
   217  	}
   218  	return -1
   219  }
   220
View as plain text