1 // 2 // Copyright 2014, Sander van Harmelen 3 // Copyright 2020, Christian Rebischke 4 // 5 // Licensed under the Apache License, Version 2.0 (the "License"); 6 // you may not use this file except in compliance with the License. 7 // You may obtain a copy of the License at 8 // 9 // http://www.apache.org/licenses/LICENSE-2.0 10 // 11 // Unless required by applicable law or agreed to in writing, software 12 // distributed under the License is distributed on an "AS IS" BASIS, 13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 // See the License for the specific language governing permissions and 15 // limitations under the License. 16 // 17 18 // Package pathspec implements git compatible gitignore pattern matching. 19 // See the description below, if you are unfamiliar with it: 20 // 21 // A blank line matches no files, so it can serve as a separator for readability. 22 // 23 // A line starting with # serves as a comment. Put a backslash ("\") in front of 24 // the first hash for patterns that begin with a hash. 25 // 26 // An optional prefix "!" which negates the pattern; any matching file excluded 27 // by a previous pattern will become included again. If a negated pattern matches, 28 // this will override lower precedence patterns sources. Put a backslash ("\") in 29 // front of the first "!" for patterns that begin with a literal "!", for example, 30 // "\!important!.txt". 31 // 32 // If the pattern ends with a slash, it is removed for the purpose of the following 33 // description, but it would only find a match with a directory. In other words, 34 // foo/ will match a directory foo and paths underneath it, but will not match a 35 // regular file or a symbolic link foo (this is consistent with the way how pathspec 36 // works in general in Git). 37 // 38 // If the pattern does not contain a slash /, Git treats it as a shell glob pattern 39 // and checks for a match against the pathname relative to the location of the 40 // .gitignore file (relative to the toplevel of the work tree if not from a 41 // .gitignore file). 42 // 43 // Otherwise, Git treats the pattern as a shell glob suitable for consumption by 44 // fnmatch(3) with the FNM_PATHNAME flag: wildcards in the pattern will not match 45 // a / in the pathname. For example, "Documentation/*.html" matches 46 // "Documentation/git.html" but not "Documentation/ppc/ppc.html" or/ 47 // "tools/perf/Documentation/perf.html". 48 // 49 // A leading slash matches the beginning of the pathname. For example, "/*.c" 50 // matches "cat-file.c" but not "mozilla-sha1/sha1.c". 51 // 52 // Two consecutive asterisks ("**") in patterns matched against full pathname 53 // may have special meaning: 54 // 55 // A leading "**" followed by a slash means match in all directories. For example, 56 // "**/foo" matches file or directory "foo" anywhere, the same as pattern "foo". 57 // "**/foo/bar" matches file or directory "bar" anywhere that is directly under 58 // directory "foo". 59 // 60 // A trailing "/" matches everything inside. For example, "abc/" matches all files 61 // inside directory "abc", relative to the location of the .gitignore file, with 62 // infinite depth. 63 // 64 // A slash followed by two consecutive asterisks then a slash matches zero or more 65 // directories. For example, "a/**/b" matches "a/b", "a/x/b", "a/x/y/b" and so on. 66 // 67 // Other consecutive asterisks are considered invalid. 68 package pathspec 69 70 import ( 71 "bufio" 72 "bytes" 73 "io" 74 "path/filepath" 75 "regexp" 76 "strings" 77 ) 78 79 type gitIgnorePattern struct { 80 Regex string 81 Include bool 82 } 83 84 // GitIgnore uses a string slice of patterns for matching on a filepath string. 85 // On match it returns true, otherwise false. On error it passes the error through. 86 func GitIgnore(patterns []string, name string) (ignore bool, err error) { 87 for _, pattern := range patterns { 88 p := parsePattern(pattern) 89 // Convert Windows paths to Unix paths 90 name = filepath.ToSlash(name) 91 match, err := regexp.MatchString(p.Regex, name) 92 if err != nil { 93 return ignore, err 94 } 95 if match { 96 if p.Include { 97 return false, nil 98 } 99 ignore = true 100 } 101 } 102 return ignore, nil 103 } 104 105 // ReadGitIgnore implements the io.Reader interface for reading a gitignore file 106 // line by line. It behaves exactly like the GitIgnore function. The only difference 107 // is that GitIgnore works on a string slice. 108 // 109 // ReadGitIgnore returns a boolean value if we match or not and an error. 110 func ReadGitIgnore(content io.Reader, name string) (ignore bool, err error) { 111 scanner := bufio.NewScanner(content) 112 113 for scanner.Scan() { 114 pattern := strings.TrimSpace(scanner.Text()) 115 if len(pattern) == 0 || pattern[0] == '#' { 116 continue 117 } 118 p := parsePattern(pattern) 119 // Convert Windows paths to Unix paths 120 name = filepath.ToSlash(name) 121 match, err := regexp.MatchString(p.Regex, name) 122 if err != nil { 123 return ignore, err 124 } 125 if match { 126 if p.Include { 127 return false, scanner.Err() 128 } 129 ignore = true 130 } 131 } 132 return ignore, scanner.Err() 133 } 134 135 func parsePattern(pattern string) *gitIgnorePattern { 136 p := &gitIgnorePattern{} 137 138 // An optional prefix "!" which negates the pattern; any matching file 139 // excluded by a previous pattern will become included again. 140 if strings.HasPrefix(pattern, "!") { 141 pattern = pattern[1:] 142 p.Include = true 143 } else { 144 p.Include = false 145 } 146 147 // Remove leading back-slash escape for escaped hash ('#') or 148 // exclamation mark ('!'). 149 if strings.HasPrefix(pattern, "\\") { 150 pattern = pattern[1:] 151 } 152 153 // Split pattern into segments. 154 patternSegs := strings.Split(pattern, "/") 155 156 // A pattern beginning with a slash ('/') will only match paths 157 // directly on the root directory instead of any descendant paths. 158 // So remove empty first segment to make pattern absoluut to root. 159 // A pattern without a beginning slash ('/') will match any 160 // descendant path. This is equivilent to "**/{pattern}". So 161 // prepend with double-asterisks to make pattern relative to 162 // root. 163 if patternSegs[0] == "" { 164 patternSegs = patternSegs[1:] 165 } else if patternSegs[0] != "**" { 166 patternSegs = append([]string{"**"}, patternSegs...) 167 } 168 169 // A pattern ending with a slash ('/') will match all descendant 170 // paths of if it is a directory but not if it is a regular file. 171 // This is equivalent to "{pattern}/**". So, set last segment to 172 // double asterisks to include all descendants. 173 if patternSegs[len(patternSegs)-1] == "" { 174 patternSegs[len(patternSegs)-1] = "**" 175 } 176 177 // Build regular expression from pattern. 178 var expr bytes.Buffer 179 expr.WriteString("^") 180 needSlash := false 181 182 for i, seg := range patternSegs { 183 switch seg { 184 case "**": 185 switch { 186 case i == 0 && i == len(patternSegs)-1: 187 // A pattern consisting solely of double-asterisks ('**') 188 // will match every path. 189 expr.WriteString(".+") 190 case i == 0: 191 // A normalized pattern beginning with double-asterisks 192 // ('**') will match any leading path segments. 193 expr.WriteString("(?:.+/)?") 194 needSlash = false 195 case i == len(patternSegs)-1: 196 // A normalized pattern ending with double-asterisks ('**') 197 // will match any trailing path segments. 198 expr.WriteString("/.+") 199 default: 200 // A pattern with inner double-asterisks ('**') will match 201 // multiple (or zero) inner path segments. 202 expr.WriteString("(?:/.+)?") 203 needSlash = true 204 } 205 case "*": 206 // Match single path segment. 207 if needSlash { 208 expr.WriteString("/") 209 } 210 expr.WriteString("[^/]+") 211 needSlash = true 212 default: 213 // Match segment glob pattern. 214 if needSlash { 215 expr.WriteString("/") 216 } 217 expr.WriteString(translateGlob(seg)) 218 needSlash = true 219 } 220 } 221 expr.WriteString("$") 222 p.Regex = expr.String() 223 return p 224 } 225 226 // NOTE: This is derived from `fnmatch.translate()` and is similar to 227 // the POSIX function `fnmatch()` with the `FNM_PATHNAME` flag set. 228 func translateGlob(glob string) string { 229 var regex bytes.Buffer 230 escape := false 231 232 for i := 0; i < len(glob); i++ { 233 char := glob[i] 234 // Escape the character. 235 switch { 236 case escape: 237 escape = false 238 regex.WriteString(regexp.QuoteMeta(string(char))) 239 case char == '\\': 240 // Escape character, escape next character. 241 escape = true 242 case char == '*': 243 // Multi-character wildcard. Match any string (except slashes), 244 // including an empty string. 245 regex.WriteString("[^/]*") 246 case char == '?': 247 // Single-character wildcard. Match any single character (except 248 // a slash). 249 regex.WriteString("[^/]") 250 case char == '[': 251 regex.WriteString(translateBracketExpression(&i, glob)) 252 default: 253 // Regular character, escape it for regex. 254 regex.WriteString(regexp.QuoteMeta(string(char))) 255 } 256 } 257 return regex.String() 258 } 259 260 // Bracket expression wildcard. Except for the beginning 261 // exclamation mark, the whole bracket expression can be used 262 // directly as regex but we have to find where the expression 263 // ends. 264 // - "[][!]" matches ']', '[' and '!'. 265 // - "[]-]" matches ']' and '-'. 266 // - "[!]a-]" matches any character except ']', 'a' and '-'. 267 func translateBracketExpression(i *int, glob string) string { 268 regex := string(glob[*i]) 269 *i++ 270 j := *i 271 272 // Pass bracket expression negation. 273 if j < len(glob) && glob[j] == '!' { 274 j++ 275 } 276 // Pass first closing bracket if it is at the beginning of the 277 // expression. 278 if j < len(glob) && glob[j] == ']' { 279 j++ 280 } 281 // Find closing bracket. Stop once we reach the end or find it. 282 for j < len(glob) && glob[j] != ']' { 283 j++ 284 } 285 286 if j < len(glob) { 287 if glob[*i] == '!' { 288 regex = regex + "^" 289 *i++ 290 } 291 regex = regexp.QuoteMeta(glob[*i:j]) 292 *i = j 293 } else { 294 // Failed to find closing bracket, treat opening bracket as a 295 // bracket literal instead of as an expression. 296 regex = regexp.QuoteMeta(string(glob[*i])) 297 } 298 return "[" + regex + "]" 299 } 300