gitignore.go

Documentation: github.com/shibumi/go-pathspec

     1  //
     2  // Copyright 2014, Sander van Harmelen
     3  // Copyright 2020, Christian Rebischke
     4  //
     5  // Licensed under the Apache License, Version 2.0 (the "License");
     6  // you may not use this file except in compliance with the License.
     7  // You may obtain a copy of the License at
     8  //
     9  //     http://www.apache.org/licenses/LICENSE-2.0
    10  //
    11  // Unless required by applicable law or agreed to in writing, software
    12  // distributed under the License is distributed on an "AS IS" BASIS,
    13  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14  // See the License for the specific language governing permissions and
    15  // limitations under the License.
    16  //
    17  
    18  // Package pathspec implements git compatible gitignore pattern matching.
    19  // See the description below, if you are unfamiliar with it:
    20  //
    21  // A blank line matches no files, so it can serve as a separator for readability.
    22  //
    23  // A line starting with # serves as a comment. Put a backslash ("\") in front of
    24  // the first hash for patterns that begin with a hash.
    25  //
    26  // An optional prefix "!" which negates the pattern; any matching file excluded
    27  // by a previous pattern will become included again. If a negated pattern matches,
    28  // this will override lower precedence patterns sources. Put a backslash ("\") in
    29  // front of the first "!" for patterns that begin with a literal "!", for example,
    30  // "\!important!.txt".
    31  //
    32  // If the pattern ends with a slash, it is removed for the purpose of the following
    33  // description, but it would only find a match with a directory. In other words,
    34  // foo/ will match a directory foo and paths underneath it, but will not match a
    35  // regular file or a symbolic link foo (this is consistent with the way how pathspec
    36  // works in general in Git).
    37  //
    38  // If the pattern does not contain a slash /, Git treats it as a shell glob pattern
    39  // and checks for a match against the pathname relative to the location of the
    40  // .gitignore file (relative to the toplevel of the work tree if not from a
    41  // .gitignore file).
    42  //
    43  // Otherwise, Git treats the pattern as a shell glob suitable for consumption by
    44  // fnmatch(3) with the FNM_PATHNAME flag: wildcards in the pattern will not match
    45  // a / in the pathname. For example, "Documentation/*.html" matches
    46  // "Documentation/git.html" but not "Documentation/ppc/ppc.html" or/
    47  // "tools/perf/Documentation/perf.html".
    48  //
    49  // A leading slash matches the beginning of the pathname. For example, "/*.c"
    50  // matches "cat-file.c" but not "mozilla-sha1/sha1.c".
    51  //
    52  // Two consecutive asterisks ("**") in patterns matched against full pathname
    53  // may have special meaning:
    54  //
    55  // A leading "**" followed by a slash means match in all directories. For example,
    56  // "**/foo" matches file or directory "foo" anywhere, the same as pattern "foo".
    57  // "**/foo/bar" matches file or directory "bar" anywhere that is directly under
    58  // directory "foo".
    59  //
    60  // A trailing "/" matches everything inside. For example, "abc/" matches all files
    61  // inside directory "abc", relative to the location of the .gitignore file, with
    62  // infinite depth.
    63  //
    64  // A slash followed by two consecutive asterisks then a slash matches zero or more
    65  // directories. For example, "a/**/b" matches "a/b", "a/x/b", "a/x/y/b" and so on.
    66  //
    67  // Other consecutive asterisks are considered invalid.
    68  package pathspec
    69  
    70  import (
    71  	"bufio"
    72  	"bytes"
    73  	"io"
    74  	"path/filepath"
    75  	"regexp"
    76  	"strings"
    77  )
    78  
    79  type gitIgnorePattern struct {
    80  	Regex   string
    81  	Include bool
    82  }
    83  
    84  // GitIgnore uses a string slice of patterns for matching on a filepath string.
    85  // On match it returns true, otherwise false. On error it passes the error through.
    86  func GitIgnore(patterns []string, name string) (ignore bool, err error) {
    87  	for _, pattern := range patterns {
    88  		p := parsePattern(pattern)
    89  		// Convert Windows paths to Unix paths
    90  		name = filepath.ToSlash(name)
    91  		match, err := regexp.MatchString(p.Regex, name)
    92  		if err != nil {
    93  			return ignore, err
    94  		}
    95  		if match {
    96  			if p.Include {
    97  				return false, nil
    98  			}
    99  			ignore = true
   100  		}
   101  	}
   102  	return ignore, nil
   103  }
   104  
   105  // ReadGitIgnore implements the io.Reader interface for reading a gitignore file
   106  // line by line. It behaves exactly like the GitIgnore function. The only difference
   107  // is that GitIgnore works on a string slice.
   108  //
   109  // ReadGitIgnore returns a boolean value if we match or not and an error.
   110  func ReadGitIgnore(content io.Reader, name string) (ignore bool, err error) {
   111  	scanner := bufio.NewScanner(content)
   112  
   113  	for scanner.Scan() {
   114  		pattern := strings.TrimSpace(scanner.Text())
   115  		if len(pattern) == 0 || pattern[0] == '#' {
   116  			continue
   117  		}
   118  		p := parsePattern(pattern)
   119  		// Convert Windows paths to Unix paths
   120  		name = filepath.ToSlash(name)
   121  		match, err := regexp.MatchString(p.Regex, name)
   122  		if err != nil {
   123  			return ignore, err
   124  		}
   125  		if match {
   126  			if p.Include {
   127  				return false, scanner.Err()
   128  			}
   129  			ignore = true
   130  		}
   131  	}
   132  	return ignore, scanner.Err()
   133  }
   134  
   135  func parsePattern(pattern string) *gitIgnorePattern {
   136  	p := &gitIgnorePattern{}
   137  
   138  	// An optional prefix "!" which negates the pattern; any matching file
   139  	// excluded by a previous pattern will become included again.
   140  	if strings.HasPrefix(pattern, "!") {
   141  		pattern = pattern[1:]
   142  		p.Include = true
   143  	} else {
   144  		p.Include = false
   145  	}
   146  
   147  	// Remove leading back-slash escape for escaped hash ('#') or
   148  	// exclamation mark ('!').
   149  	if strings.HasPrefix(pattern, "\\") {
   150  		pattern = pattern[1:]
   151  	}
   152  
   153  	// Split pattern into segments.
   154  	patternSegs := strings.Split(pattern, "/")
   155  
   156  	// A pattern beginning with a slash ('/') will only match paths
   157  	// directly on the root directory instead of any descendant paths.
   158  	// So remove empty first segment to make pattern absoluut to root.
   159  	// A pattern without a beginning slash ('/') will match any
   160  	// descendant path. This is equivilent to "**/{pattern}". So
   161  	// prepend with double-asterisks to make pattern relative to
   162  	// root.
   163  	if patternSegs[0] == "" {
   164  		patternSegs = patternSegs[1:]
   165  	} else if patternSegs[0] != "**" {
   166  		patternSegs = append([]string{"**"}, patternSegs...)
   167  	}
   168  
   169  	// A pattern ending with a slash ('/') will match all descendant
   170  	// paths of if it is a directory but not if it is a regular file.
   171  	// This is equivalent to "{pattern}/**". So, set last segment to
   172  	// double asterisks to include all descendants.
   173  	if patternSegs[len(patternSegs)-1] == "" {
   174  		patternSegs[len(patternSegs)-1] = "**"
   175  	}
   176  
   177  	// Build regular expression from pattern.
   178  	var expr bytes.Buffer
   179  	expr.WriteString("^")
   180  	needSlash := false
   181  
   182  	for i, seg := range patternSegs {
   183  		switch seg {
   184  		case "**":
   185  			switch {
   186  			case i == 0 && i == len(patternSegs)-1:
   187  				// A pattern consisting solely of double-asterisks ('**')
   188  				// will match every path.
   189  				expr.WriteString(".+")
   190  			case i == 0:
   191  				// A normalized pattern beginning with double-asterisks
   192  				// ('**') will match any leading path segments.
   193  				expr.WriteString("(?:.+/)?")
   194  				needSlash = false
   195  			case i == len(patternSegs)-1:
   196  				// A normalized pattern ending with double-asterisks ('**')
   197  				// will match any trailing path segments.
   198  				expr.WriteString("/.+")
   199  			default:
   200  				// A pattern with inner double-asterisks ('**') will match
   201  				// multiple (or zero) inner path segments.
   202  				expr.WriteString("(?:/.+)?")
   203  				needSlash = true
   204  			}
   205  		case "*":
   206  			// Match single path segment.
   207  			if needSlash {
   208  				expr.WriteString("/")
   209  			}
   210  			expr.WriteString("[^/]+")
   211  			needSlash = true
   212  		default:
   213  			// Match segment glob pattern.
   214  			if needSlash {
   215  				expr.WriteString("/")
   216  			}
   217  			expr.WriteString(translateGlob(seg))
   218  			needSlash = true
   219  		}
   220  	}
   221  	expr.WriteString("$")
   222  	p.Regex = expr.String()
   223  	return p
   224  }
   225  
   226  // NOTE: This is derived from `fnmatch.translate()` and is similar to
   227  // the POSIX function `fnmatch()` with the `FNM_PATHNAME` flag set.
   228  func translateGlob(glob string) string {
   229  	var regex bytes.Buffer
   230  	escape := false
   231  
   232  	for i := 0; i < len(glob); i++ {
   233  		char := glob[i]
   234  		// Escape the character.
   235  		switch {
   236  		case escape:
   237  			escape = false
   238  			regex.WriteString(regexp.QuoteMeta(string(char)))
   239  		case char == '\\':
   240  			// Escape character, escape next character.
   241  			escape = true
   242  		case char == '*':
   243  			// Multi-character wildcard. Match any string (except slashes),
   244  			// including an empty string.
   245  			regex.WriteString("[^/]*")
   246  		case char == '?':
   247  			// Single-character wildcard. Match any single character (except
   248  			// a slash).
   249  			regex.WriteString("[^/]")
   250  		case char == '[':
   251  			regex.WriteString(translateBracketExpression(&i, glob))
   252  		default:
   253  			// Regular character, escape it for regex.
   254  			regex.WriteString(regexp.QuoteMeta(string(char)))
   255  		}
   256  	}
   257  	return regex.String()
   258  }
   259  
   260  // Bracket expression wildcard. Except for the beginning
   261  // exclamation mark, the whole bracket expression can be used
   262  // directly as regex but we have to find where the expression
   263  // ends.
   264  // - "[][!]" matches ']', '[' and '!'.
   265  // - "[]-]" matches ']' and '-'.
   266  // - "[!]a-]" matches any character except ']', 'a' and '-'.
   267  func translateBracketExpression(i *int, glob string) string {
   268  	regex := string(glob[*i])
   269  	*i++
   270  	j := *i
   271  
   272  	// Pass bracket expression negation.
   273  	if j < len(glob) && glob[j] == '!' {
   274  		j++
   275  	}
   276  	// Pass first closing bracket if it is at the beginning of the
   277  	// expression.
   278  	if j < len(glob) && glob[j] == ']' {
   279  		j++
   280  	}
   281  	// Find closing bracket. Stop once we reach the end or find it.
   282  	for j < len(glob) && glob[j] != ']' {
   283  		j++
   284  	}
   285  
   286  	if j < len(glob) {
   287  		if glob[*i] == '!' {
   288  			regex = regex + "^"
   289  			*i++
   290  		}
   291  		regex = regexp.QuoteMeta(glob[*i:j])
   292  		*i = j
   293  	} else {
   294  		// Failed to find closing bracket, treat opening bracket as a
   295  		// bracket literal instead of as an expression.
   296  		regex = regexp.QuoteMeta(string(glob[*i]))
   297  	}
   298  	return "[" + regex + "]"
   299  }
   300
View as plain text