1 // Copyright 2019 CUE Authors 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package regexp implements regular expression search. 16 // 17 // The syntax of the regular expressions accepted is the same 18 // general syntax used by Perl, Python, and other languages. 19 // More precisely, it is the syntax accepted by RE2 and described at 20 // https://golang.org/s/re2syntax, except for \C. 21 // For an overview of the syntax, run 22 // 23 // go doc regexp/syntax 24 // 25 // The regexp implementation provided by this package is 26 // guaranteed to run in time linear in the size of the input. 27 // (This is a property not guaranteed by most open source 28 // implementations of regular expressions.) For more information 29 // about this property, see 30 // 31 // https://swtch.com/~rsc/regexp/regexp1.html 32 // 33 // or any book about automata theory. 34 // 35 // All characters are UTF-8-encoded code points. 36 // 37 // The regexp package functions match a regular expression and identify 38 // the matched text. Their names are matched by this regular expression: 39 // 40 // Find(All)?(Submatch)? 41 // 42 // If 'All' is present, the routine matches successive non-overlapping 43 // matches of the entire expression. Empty matches abutting a preceding 44 // match are ignored. The return value is a slice containing the successive 45 // return values of the corresponding non-'All' routine. These routines take 46 // an extra integer argument, n. If n >= 0, the function returns at most n 47 // matches/submatches; otherwise, it returns all of them. 48 // 49 // If 'Submatch' is present, the return value is a slice identifying the 50 // successive submatches of the expression. Submatches are matches of 51 // parenthesized subexpressions (also known as capturing groups) within the 52 // regular expression, numbered from left to right in order of opening 53 // parenthesis. Submatch 0 is the match of the entire expression, submatch 1 54 // the match of the first parenthesized subexpression, and so on. 55 package regexp 56 57 import ( 58 "regexp" 59 60 "cuelang.org/go/cue/errors" 61 ) 62 63 var errNoMatch = errors.New("no match") 64 65 // Find returns a list holding the text of the leftmost match in b of the regular expression. 66 // A return value of bottom indicates no match. 67 func Find(pattern, s string) (string, error) { 68 re, err := regexp.Compile(pattern) 69 if err != nil { 70 return "", err 71 } 72 m := re.FindStringIndex(s) 73 if m == nil { 74 return "", errNoMatch 75 } 76 return s[m[0]:m[1]], nil 77 } 78 79 // FindAll is the 'All' version of Find; it returns a list of all successive 80 // matches of the expression, as defined by the 'All' description in the 81 // package comment. 82 // A return value of bottom indicates no match. 83 func FindAll(pattern, s string, n int) ([]string, error) { 84 re, err := regexp.Compile(pattern) 85 if err != nil { 86 return nil, err 87 } 88 m := re.FindAllString(s, n) 89 if m == nil { 90 return nil, errNoMatch 91 } 92 return m, nil 93 } 94 95 // FindAllNamedSubmatch is like FindAllSubmatch, but returns a list of maps 96 // with the named used in capturing groups. See FindNamedSubmatch for an 97 // example on how to use named groups. 98 func FindAllNamedSubmatch(pattern, s string, n int) ([]map[string]string, error) { 99 re, err := regexp.Compile(pattern) 100 if err != nil { 101 return nil, err 102 } 103 names := re.SubexpNames() 104 if len(names) == 0 { 105 return nil, errNoNamedGroup 106 } 107 m := re.FindAllStringSubmatch(s, n) 108 if m == nil { 109 return nil, errNoMatch 110 } 111 result := make([]map[string]string, len(m)) 112 for i, m := range m { 113 r := make(map[string]string, len(names)-1) 114 for k, name := range names { 115 if name != "" { 116 r[name] = m[k] 117 } 118 } 119 result[i] = r 120 } 121 return result, nil 122 } 123 124 var errNoNamedGroup = errors.New("no named groups") 125 126 // FindAllSubmatch is the 'All' version of FindSubmatch; it returns a list 127 // of all successive matches of the expression, as defined by the 'All' 128 // description in the package comment. 129 // A return value of bottom indicates no match. 130 func FindAllSubmatch(pattern, s string, n int) ([][]string, error) { 131 re, err := regexp.Compile(pattern) 132 if err != nil { 133 return nil, err 134 } 135 m := re.FindAllStringSubmatch(s, n) 136 if m == nil { 137 return nil, errNoMatch 138 } 139 return m, nil 140 } 141 142 // FindNamedSubmatch is like FindSubmatch, but returns a map with the names used 143 // in capturing groups. 144 // 145 // Example: 146 // 147 // regexp.FindNamedSubmatch(#"Hello (?P<person>\w*)!"#, "Hello World!") 148 // 149 // Output: 150 // 151 // [{person: "World"}] 152 func FindNamedSubmatch(pattern, s string) (map[string]string, error) { 153 re, err := regexp.Compile(pattern) 154 if err != nil { 155 return nil, err 156 } 157 names := re.SubexpNames() 158 if len(names) == 0 { 159 return nil, errNoNamedGroup 160 } 161 m := re.FindStringSubmatch(s) 162 if m == nil { 163 return nil, errNoMatch 164 } 165 r := make(map[string]string, len(names)-1) 166 for k, name := range names { 167 if name != "" { 168 r[name] = m[k] 169 } 170 } 171 return r, nil 172 } 173 174 // FindSubmatch returns a list of lists holding the text of the leftmost 175 // match of the regular expression in b and the matches, if any, of its 176 // subexpressions, as defined by the 'Submatch' descriptions in the package 177 // comment. 178 // A return value of bottom indicates no match. 179 func FindSubmatch(pattern, s string) ([]string, error) { 180 re, err := regexp.Compile(pattern) 181 if err != nil { 182 return nil, err 183 } 184 m := re.FindStringSubmatch(s) 185 if m == nil { 186 return nil, errNoMatch 187 } 188 return m, nil 189 } 190 191 // ReplaceAll returns a copy of src, replacing variables in repl with 192 // corresponding matches drawn from src, according to the following rules. 193 // 194 // In the template repl, a variable is denoted by a substring of the form $name 195 // or ${name}, where name is a non-empty sequence of letters, digits, and 196 // underscores. A purely numeric name like $1 refers to the submatch with the 197 // corresponding index; other names refer to capturing parentheses named with 198 // the (?P<name>...) syntax. A reference to an out of range or unmatched index 199 // or a name that is not present in the regular expression is replaced with an 200 // empty slice. 201 // 202 // In the $name form, name is taken to be as long as possible: $1x is 203 // equivalent to ${1x}, not ${1}x, and, $10 is equivalent to ${10}, not ${1}0. 204 // 205 // To insert a literal $ in the output, use $$ in the template. 206 func ReplaceAll(pattern, src, repl string) (string, error) { 207 re, err := regexp.Compile(pattern) 208 if err != nil { 209 return "", err 210 } 211 return re.ReplaceAllString(src, repl), nil 212 } 213 214 // ReplaceAllLiteral returns a copy of src, replacing matches of the regexp 215 // pattern with the replacement string repl. The replacement repl is substituted 216 // directly. 217 func ReplaceAllLiteral(pattern, src, repl string) (string, error) { 218 re, err := regexp.Compile(pattern) 219 if err != nil { 220 return "", err 221 } 222 return re.ReplaceAllLiteralString(src, repl), nil 223 } 224 225 // Valid reports whether the given regular expression 226 // is valid. 227 func Valid(pattern string) (bool, error) { 228 _, err := regexp.Compile(pattern) 229 return err == nil, err 230 } 231