...

Source file src/cuelang.org/go/pkg/regexp/manual.go

Documentation: cuelang.org/go/pkg/regexp

     1  // Copyright 2019 CUE Authors
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Package regexp implements regular expression search.
    16  //
    17  // The syntax of the regular expressions accepted is the same
    18  // general syntax used by Perl, Python, and other languages.
    19  // More precisely, it is the syntax accepted by RE2 and described at
    20  // https://golang.org/s/re2syntax, except for \C.
    21  // For an overview of the syntax, run
    22  //
    23  //	go doc regexp/syntax
    24  //
    25  // The regexp implementation provided by this package is
    26  // guaranteed to run in time linear in the size of the input.
    27  // (This is a property not guaranteed by most open source
    28  // implementations of regular expressions.) For more information
    29  // about this property, see
    30  //
    31  //	https://swtch.com/~rsc/regexp/regexp1.html
    32  //
    33  // or any book about automata theory.
    34  //
    35  // All characters are UTF-8-encoded code points.
    36  //
    37  // The regexp package functions match a regular expression and identify
    38  // the matched text. Their names are matched by this regular expression:
    39  //
    40  //	Find(All)?(Submatch)?
    41  //
    42  // If 'All' is present, the routine matches successive non-overlapping
    43  // matches of the entire expression. Empty matches abutting a preceding
    44  // match are ignored. The return value is a slice containing the successive
    45  // return values of the corresponding non-'All' routine. These routines take
    46  // an extra integer argument, n. If n >= 0, the function returns at most n
    47  // matches/submatches; otherwise, it returns all of them.
    48  //
    49  // If 'Submatch' is present, the return value is a slice identifying the
    50  // successive submatches of the expression. Submatches are matches of
    51  // parenthesized subexpressions (also known as capturing groups) within the
    52  // regular expression, numbered from left to right in order of opening
    53  // parenthesis. Submatch 0 is the match of the entire expression, submatch 1
    54  // the match of the first parenthesized subexpression, and so on.
    55  package regexp
    56  
    57  import (
    58  	"regexp"
    59  
    60  	"cuelang.org/go/cue/errors"
    61  )
    62  
    63  var errNoMatch = errors.New("no match")
    64  
    65  // Find returns a list holding the text of the leftmost match in b of the regular expression.
    66  // A return value of bottom indicates no match.
    67  func Find(pattern, s string) (string, error) {
    68  	re, err := regexp.Compile(pattern)
    69  	if err != nil {
    70  		return "", err
    71  	}
    72  	m := re.FindStringIndex(s)
    73  	if m == nil {
    74  		return "", errNoMatch
    75  	}
    76  	return s[m[0]:m[1]], nil
    77  }
    78  
    79  // FindAll is the 'All' version of Find; it returns a list of all successive
    80  // matches of the expression, as defined by the 'All' description in the
    81  // package comment.
    82  // A return value of bottom indicates no match.
    83  func FindAll(pattern, s string, n int) ([]string, error) {
    84  	re, err := regexp.Compile(pattern)
    85  	if err != nil {
    86  		return nil, err
    87  	}
    88  	m := re.FindAllString(s, n)
    89  	if m == nil {
    90  		return nil, errNoMatch
    91  	}
    92  	return m, nil
    93  }
    94  
    95  // FindAllNamedSubmatch is like FindAllSubmatch, but returns a list of maps
    96  // with the named used in capturing groups. See FindNamedSubmatch for an
    97  // example on how to use named groups.
    98  func FindAllNamedSubmatch(pattern, s string, n int) ([]map[string]string, error) {
    99  	re, err := regexp.Compile(pattern)
   100  	if err != nil {
   101  		return nil, err
   102  	}
   103  	names := re.SubexpNames()
   104  	if len(names) == 0 {
   105  		return nil, errNoNamedGroup
   106  	}
   107  	m := re.FindAllStringSubmatch(s, n)
   108  	if m == nil {
   109  		return nil, errNoMatch
   110  	}
   111  	result := make([]map[string]string, len(m))
   112  	for i, m := range m {
   113  		r := make(map[string]string, len(names)-1)
   114  		for k, name := range names {
   115  			if name != "" {
   116  				r[name] = m[k]
   117  			}
   118  		}
   119  		result[i] = r
   120  	}
   121  	return result, nil
   122  }
   123  
   124  var errNoNamedGroup = errors.New("no named groups")
   125  
   126  // FindAllSubmatch is the 'All' version of FindSubmatch; it returns a list
   127  // of all successive matches of the expression, as defined by the 'All'
   128  // description in the package comment.
   129  // A return value of bottom indicates no match.
   130  func FindAllSubmatch(pattern, s string, n int) ([][]string, error) {
   131  	re, err := regexp.Compile(pattern)
   132  	if err != nil {
   133  		return nil, err
   134  	}
   135  	m := re.FindAllStringSubmatch(s, n)
   136  	if m == nil {
   137  		return nil, errNoMatch
   138  	}
   139  	return m, nil
   140  }
   141  
   142  // FindNamedSubmatch is like FindSubmatch, but returns a map with the names used
   143  // in capturing groups.
   144  //
   145  // Example:
   146  //
   147  //	regexp.FindNamedSubmatch(#"Hello (?P<person>\w*)!"#, "Hello World!")
   148  //
   149  // Output:
   150  //
   151  //	[{person: "World"}]
   152  func FindNamedSubmatch(pattern, s string) (map[string]string, error) {
   153  	re, err := regexp.Compile(pattern)
   154  	if err != nil {
   155  		return nil, err
   156  	}
   157  	names := re.SubexpNames()
   158  	if len(names) == 0 {
   159  		return nil, errNoNamedGroup
   160  	}
   161  	m := re.FindStringSubmatch(s)
   162  	if m == nil {
   163  		return nil, errNoMatch
   164  	}
   165  	r := make(map[string]string, len(names)-1)
   166  	for k, name := range names {
   167  		if name != "" {
   168  			r[name] = m[k]
   169  		}
   170  	}
   171  	return r, nil
   172  }
   173  
   174  // FindSubmatch returns a list of lists holding the text of the leftmost
   175  // match of the regular expression in b and the matches, if any, of its
   176  // subexpressions, as defined by the 'Submatch' descriptions in the package
   177  // comment.
   178  // A return value of bottom indicates no match.
   179  func FindSubmatch(pattern, s string) ([]string, error) {
   180  	re, err := regexp.Compile(pattern)
   181  	if err != nil {
   182  		return nil, err
   183  	}
   184  	m := re.FindStringSubmatch(s)
   185  	if m == nil {
   186  		return nil, errNoMatch
   187  	}
   188  	return m, nil
   189  }
   190  
   191  // ReplaceAll returns a copy of src, replacing variables in repl with
   192  // corresponding matches drawn from src, according to the following rules.
   193  //
   194  // In the template repl, a variable is denoted by a substring of the form $name
   195  // or ${name}, where name is a non-empty sequence of letters, digits, and
   196  // underscores. A purely numeric name like $1 refers to the submatch with the
   197  // corresponding index; other names refer to capturing parentheses named with
   198  // the (?P<name>...) syntax. A reference to an out of range or unmatched index
   199  // or a name that is not present in the regular expression is replaced with an
   200  // empty slice.
   201  //
   202  // In the $name form, name is taken to be as long as possible: $1x is
   203  // equivalent to ${1x}, not ${1}x, and, $10 is equivalent to ${10}, not ${1}0.
   204  //
   205  // To insert a literal $ in the output, use $$ in the template.
   206  func ReplaceAll(pattern, src, repl string) (string, error) {
   207  	re, err := regexp.Compile(pattern)
   208  	if err != nil {
   209  		return "", err
   210  	}
   211  	return re.ReplaceAllString(src, repl), nil
   212  }
   213  
   214  // ReplaceAllLiteral returns a copy of src, replacing matches of the regexp
   215  // pattern with the replacement string repl. The replacement repl is substituted
   216  // directly.
   217  func ReplaceAllLiteral(pattern, src, repl string) (string, error) {
   218  	re, err := regexp.Compile(pattern)
   219  	if err != nil {
   220  		return "", err
   221  	}
   222  	return re.ReplaceAllLiteralString(src, repl), nil
   223  }
   224  
   225  // Valid reports whether the given regular expression
   226  // is valid.
   227  func Valid(pattern string) (bool, error) {
   228  	_, err := regexp.Compile(pattern)
   229  	return err == nil, err
   230  }
   231  

View as plain text