...

Source file src/github.com/dlclark/regexp2/syntax/parser.go

Documentation: github.com/dlclark/regexp2/syntax

     1  package syntax
     2  
     3  import (
     4  	"fmt"
     5  	"math"
     6  	"os"
     7  	"sort"
     8  	"strconv"
     9  	"unicode"
    10  )
    11  
    12  type RegexOptions int32
    13  
    14  const (
    15  	IgnoreCase              RegexOptions = 0x0001 // "i"
    16  	Multiline                            = 0x0002 // "m"
    17  	ExplicitCapture                      = 0x0004 // "n"
    18  	Compiled                             = 0x0008 // "c"
    19  	Singleline                           = 0x0010 // "s"
    20  	IgnorePatternWhitespace              = 0x0020 // "x"
    21  	RightToLeft                          = 0x0040 // "r"
    22  	Debug                                = 0x0080 // "d"
    23  	ECMAScript                           = 0x0100 // "e"
    24  	RE2                                  = 0x0200 // RE2 compat mode
    25  	Unicode                              = 0x0400 // "u"
    26  )
    27  
    28  func optionFromCode(ch rune) RegexOptions {
    29  	// case-insensitive
    30  	switch ch {
    31  	case 'i', 'I':
    32  		return IgnoreCase
    33  	case 'r', 'R':
    34  		return RightToLeft
    35  	case 'm', 'M':
    36  		return Multiline
    37  	case 'n', 'N':
    38  		return ExplicitCapture
    39  	case 's', 'S':
    40  		return Singleline
    41  	case 'x', 'X':
    42  		return IgnorePatternWhitespace
    43  	case 'd', 'D':
    44  		return Debug
    45  	case 'e', 'E':
    46  		return ECMAScript
    47  	case 'u', 'U':
    48  		return Unicode
    49  	default:
    50  		return 0
    51  	}
    52  }
    53  
    54  // An Error describes a failure to parse a regular expression
    55  // and gives the offending expression.
    56  type Error struct {
    57  	Code ErrorCode
    58  	Expr string
    59  	Args []interface{}
    60  }
    61  
    62  func (e *Error) Error() string {
    63  	if len(e.Args) == 0 {
    64  		return "error parsing regexp: " + e.Code.String() + " in `" + e.Expr + "`"
    65  	}
    66  	return "error parsing regexp: " + fmt.Sprintf(e.Code.String(), e.Args...) + " in `" + e.Expr + "`"
    67  }
    68  
    69  // An ErrorCode describes a failure to parse a regular expression.
    70  type ErrorCode string
    71  
    72  const (
    73  	// internal issue
    74  	ErrInternalError ErrorCode = "regexp/syntax: internal error"
    75  	// Parser errors
    76  	ErrUnterminatedComment        = "unterminated comment"
    77  	ErrInvalidCharRange           = "invalid character class range"
    78  	ErrInvalidRepeatSize          = "invalid repeat count"
    79  	ErrInvalidUTF8                = "invalid UTF-8"
    80  	ErrCaptureGroupOutOfRange     = "capture group number out of range"
    81  	ErrUnexpectedParen            = "unexpected )"
    82  	ErrMissingParen               = "missing closing )"
    83  	ErrMissingBrace               = "missing closing }"
    84  	ErrInvalidRepeatOp            = "invalid nested repetition operator"
    85  	ErrMissingRepeatArgument      = "missing argument to repetition operator"
    86  	ErrConditionalExpression      = "illegal conditional (?(...)) expression"
    87  	ErrTooManyAlternates          = "too many | in (?()|)"
    88  	ErrUnrecognizedGrouping       = "unrecognized grouping construct: (%v"
    89  	ErrInvalidGroupName           = "invalid group name: group names must begin with a word character and have a matching terminator"
    90  	ErrCapNumNotZero              = "capture number cannot be zero"
    91  	ErrUndefinedBackRef           = "reference to undefined group number %v"
    92  	ErrUndefinedNameRef           = "reference to undefined group name %v"
    93  	ErrAlternationCantCapture     = "alternation conditions do not capture and cannot be named"
    94  	ErrAlternationCantHaveComment = "alternation conditions cannot be comments"
    95  	ErrMalformedReference         = "(?(%v) ) malformed"
    96  	ErrUndefinedReference         = "(?(%v) ) reference to undefined group"
    97  	ErrIllegalEndEscape           = "illegal \\ at end of pattern"
    98  	ErrMalformedSlashP            = "malformed \\p{X} character escape"
    99  	ErrIncompleteSlashP           = "incomplete \\p{X} character escape"
   100  	ErrUnknownSlashP              = "unknown unicode category, script, or property '%v'"
   101  	ErrUnrecognizedEscape         = "unrecognized escape sequence \\%v"
   102  	ErrMissingControl             = "missing control character"
   103  	ErrUnrecognizedControl        = "unrecognized control character"
   104  	ErrTooFewHex                  = "insufficient hexadecimal digits"
   105  	ErrInvalidHex                 = "hex values may not be larger than 0x10FFFF"
   106  	ErrMalformedNameRef           = "malformed \\k<...> named back reference"
   107  	ErrBadClassInCharRange        = "cannot include class \\%v in character range"
   108  	ErrUnterminatedBracket        = "unterminated [] set"
   109  	ErrSubtractionMustBeLast      = "a subtraction must be the last element in a character class"
   110  	ErrReversedCharRange          = "[%c-%c] range in reverse order"
   111  )
   112  
   113  func (e ErrorCode) String() string {
   114  	return string(e)
   115  }
   116  
   117  type parser struct {
   118  	stack         *regexNode
   119  	group         *regexNode
   120  	alternation   *regexNode
   121  	concatenation *regexNode
   122  	unit          *regexNode
   123  
   124  	patternRaw string
   125  	pattern    []rune
   126  
   127  	currentPos  int
   128  	specialCase *unicode.SpecialCase
   129  
   130  	autocap  int
   131  	capcount int
   132  	captop   int
   133  	capsize  int
   134  
   135  	caps     map[int]int
   136  	capnames map[string]int
   137  
   138  	capnumlist  []int
   139  	capnamelist []string
   140  
   141  	options         RegexOptions
   142  	optionsStack    []RegexOptions
   143  	ignoreNextParen bool
   144  }
   145  
   146  const (
   147  	maxValueDiv10 int = math.MaxInt32 / 10
   148  	maxValueMod10     = math.MaxInt32 % 10
   149  )
   150  
   151  // Parse converts a regex string into a parse tree
   152  func Parse(re string, op RegexOptions) (*RegexTree, error) {
   153  	p := parser{
   154  		options: op,
   155  		caps:    make(map[int]int),
   156  	}
   157  	p.setPattern(re)
   158  
   159  	if err := p.countCaptures(); err != nil {
   160  		return nil, err
   161  	}
   162  
   163  	p.reset(op)
   164  	root, err := p.scanRegex()
   165  
   166  	if err != nil {
   167  		return nil, err
   168  	}
   169  	tree := &RegexTree{
   170  		root:       root,
   171  		caps:       p.caps,
   172  		capnumlist: p.capnumlist,
   173  		captop:     p.captop,
   174  		Capnames:   p.capnames,
   175  		Caplist:    p.capnamelist,
   176  		options:    op,
   177  	}
   178  
   179  	if tree.options&Debug > 0 {
   180  		os.Stdout.WriteString(tree.Dump())
   181  	}
   182  
   183  	return tree, nil
   184  }
   185  
   186  func (p *parser) setPattern(pattern string) {
   187  	p.patternRaw = pattern
   188  	p.pattern = make([]rune, 0, len(pattern))
   189  
   190  	//populate our rune array to handle utf8 encoding
   191  	for _, r := range pattern {
   192  		p.pattern = append(p.pattern, r)
   193  	}
   194  }
   195  func (p *parser) getErr(code ErrorCode, args ...interface{}) error {
   196  	return &Error{Code: code, Expr: p.patternRaw, Args: args}
   197  }
   198  
   199  func (p *parser) noteCaptureSlot(i, pos int) {
   200  	if _, ok := p.caps[i]; !ok {
   201  		// the rhs of the hashtable isn't used in the parser
   202  		p.caps[i] = pos
   203  		p.capcount++
   204  
   205  		if p.captop <= i {
   206  			if i == math.MaxInt32 {
   207  				p.captop = i
   208  			} else {
   209  				p.captop = i + 1
   210  			}
   211  		}
   212  	}
   213  }
   214  
   215  func (p *parser) noteCaptureName(name string, pos int) {
   216  	if p.capnames == nil {
   217  		p.capnames = make(map[string]int)
   218  	}
   219  
   220  	if _, ok := p.capnames[name]; !ok {
   221  		p.capnames[name] = pos
   222  		p.capnamelist = append(p.capnamelist, name)
   223  	}
   224  }
   225  
   226  func (p *parser) assignNameSlots() {
   227  	if p.capnames != nil {
   228  		for _, name := range p.capnamelist {
   229  			for p.isCaptureSlot(p.autocap) {
   230  				p.autocap++
   231  			}
   232  			pos := p.capnames[name]
   233  			p.capnames[name] = p.autocap
   234  			p.noteCaptureSlot(p.autocap, pos)
   235  
   236  			p.autocap++
   237  		}
   238  	}
   239  
   240  	// if the caps array has at least one gap, construct the list of used slots
   241  	if p.capcount < p.captop {
   242  		p.capnumlist = make([]int, p.capcount)
   243  		i := 0
   244  
   245  		for k := range p.caps {
   246  			p.capnumlist[i] = k
   247  			i++
   248  		}
   249  
   250  		sort.Ints(p.capnumlist)
   251  	}
   252  
   253  	// merge capsnumlist into capnamelist
   254  	if p.capnames != nil || p.capnumlist != nil {
   255  		var oldcapnamelist []string
   256  		var next int
   257  		var k int
   258  
   259  		if p.capnames == nil {
   260  			oldcapnamelist = nil
   261  			p.capnames = make(map[string]int)
   262  			p.capnamelist = []string{}
   263  			next = -1
   264  		} else {
   265  			oldcapnamelist = p.capnamelist
   266  			p.capnamelist = []string{}
   267  			next = p.capnames[oldcapnamelist[0]]
   268  		}
   269  
   270  		for i := 0; i < p.capcount; i++ {
   271  			j := i
   272  			if p.capnumlist != nil {
   273  				j = p.capnumlist[i]
   274  			}
   275  
   276  			if next == j {
   277  				p.capnamelist = append(p.capnamelist, oldcapnamelist[k])
   278  				k++
   279  
   280  				if k == len(oldcapnamelist) {
   281  					next = -1
   282  				} else {
   283  					next = p.capnames[oldcapnamelist[k]]
   284  				}
   285  
   286  			} else {
   287  				//feature: culture?
   288  				str := strconv.Itoa(j)
   289  				p.capnamelist = append(p.capnamelist, str)
   290  				p.capnames[str] = j
   291  			}
   292  		}
   293  	}
   294  }
   295  
   296  func (p *parser) consumeAutocap() int {
   297  	r := p.autocap
   298  	p.autocap++
   299  	return r
   300  }
   301  
   302  // CountCaptures is a prescanner for deducing the slots used for
   303  // captures by doing a partial tokenization of the pattern.
   304  func (p *parser) countCaptures() error {
   305  	var ch rune
   306  
   307  	p.noteCaptureSlot(0, 0)
   308  
   309  	p.autocap = 1
   310  
   311  	for p.charsRight() > 0 {
   312  		pos := p.textpos()
   313  		ch = p.moveRightGetChar()
   314  		switch ch {
   315  		case '\\':
   316  			if p.charsRight() > 0 {
   317  				p.scanBackslash(true)
   318  			}
   319  
   320  		case '#':
   321  			if p.useOptionX() {
   322  				p.moveLeft()
   323  				p.scanBlank()
   324  			}
   325  
   326  		case '[':
   327  			p.scanCharSet(false, true)
   328  
   329  		case ')':
   330  			if !p.emptyOptionsStack() {
   331  				p.popOptions()
   332  			}
   333  
   334  		case '(':
   335  			if p.charsRight() >= 2 && p.rightChar(1) == '#' && p.rightChar(0) == '?' {
   336  				p.moveLeft()
   337  				p.scanBlank()
   338  			} else {
   339  				p.pushOptions()
   340  				if p.charsRight() > 0 && p.rightChar(0) == '?' {
   341  					// we have (?...
   342  					p.moveRight(1)
   343  
   344  					if p.charsRight() > 1 && (p.rightChar(0) == '<' || p.rightChar(0) == '\'') {
   345  						// named group: (?<... or (?'...
   346  
   347  						p.moveRight(1)
   348  						ch = p.rightChar(0)
   349  
   350  						if ch != '0' && IsWordChar(ch) {
   351  							if ch >= '1' && ch <= '9' {
   352  								dec, err := p.scanDecimal()
   353  								if err != nil {
   354  									return err
   355  								}
   356  								p.noteCaptureSlot(dec, pos)
   357  							} else {
   358  								p.noteCaptureName(p.scanCapname(), pos)
   359  							}
   360  						}
   361  					} else if p.useRE2() && p.charsRight() > 2 && (p.rightChar(0) == 'P' && p.rightChar(1) == '<') {
   362  						// RE2-compat (?P<)
   363  						p.moveRight(2)
   364  						ch = p.rightChar(0)
   365  						if IsWordChar(ch) {
   366  							p.noteCaptureName(p.scanCapname(), pos)
   367  						}
   368  
   369  					} else {
   370  						// (?...
   371  
   372  						// get the options if it's an option construct (?cimsx-cimsx...)
   373  						p.scanOptions()
   374  
   375  						if p.charsRight() > 0 {
   376  							if p.rightChar(0) == ')' {
   377  								// (?cimsx-cimsx)
   378  								p.moveRight(1)
   379  								p.popKeepOptions()
   380  							} else if p.rightChar(0) == '(' {
   381  								// alternation construct: (?(foo)yes|no)
   382  								// ignore the next paren so we don't capture the condition
   383  								p.ignoreNextParen = true
   384  
   385  								// break from here so we don't reset ignoreNextParen
   386  								continue
   387  							}
   388  						}
   389  					}
   390  				} else {
   391  					if !p.useOptionN() && !p.ignoreNextParen {
   392  						p.noteCaptureSlot(p.consumeAutocap(), pos)
   393  					}
   394  				}
   395  			}
   396  
   397  			p.ignoreNextParen = false
   398  
   399  		}
   400  	}
   401  
   402  	p.assignNameSlots()
   403  	return nil
   404  }
   405  
   406  func (p *parser) reset(topopts RegexOptions) {
   407  	p.currentPos = 0
   408  	p.autocap = 1
   409  	p.ignoreNextParen = false
   410  
   411  	if len(p.optionsStack) > 0 {
   412  		p.optionsStack = p.optionsStack[:0]
   413  	}
   414  
   415  	p.options = topopts
   416  	p.stack = nil
   417  }
   418  
   419  func (p *parser) scanRegex() (*regexNode, error) {
   420  	ch := '@' // nonspecial ch, means at beginning
   421  	isQuant := false
   422  
   423  	p.startGroup(newRegexNodeMN(ntCapture, p.options, 0, -1))
   424  
   425  	for p.charsRight() > 0 {
   426  		wasPrevQuantifier := isQuant
   427  		isQuant = false
   428  
   429  		if err := p.scanBlank(); err != nil {
   430  			return nil, err
   431  		}
   432  
   433  		startpos := p.textpos()
   434  
   435  		// move past all of the normal characters.  We'll stop when we hit some kind of control character,
   436  		// or if IgnorePatternWhiteSpace is on, we'll stop when we see some whitespace.
   437  		if p.useOptionX() {
   438  			for p.charsRight() > 0 {
   439  				ch = p.rightChar(0)
   440  				//UGLY: clean up, this is ugly
   441  				if !(!isStopperX(ch) || (ch == '{' && !p.isTrueQuantifier())) {
   442  					break
   443  				}
   444  				p.moveRight(1)
   445  			}
   446  		} else {
   447  			for p.charsRight() > 0 {
   448  				ch = p.rightChar(0)
   449  				if !(!isSpecial(ch) || ch == '{' && !p.isTrueQuantifier()) {
   450  					break
   451  				}
   452  				p.moveRight(1)
   453  			}
   454  		}
   455  
   456  		endpos := p.textpos()
   457  
   458  		p.scanBlank()
   459  
   460  		if p.charsRight() == 0 {
   461  			ch = '!' // nonspecial, means at end
   462  		} else if ch = p.rightChar(0); isSpecial(ch) {
   463  			isQuant = isQuantifier(ch)
   464  			p.moveRight(1)
   465  		} else {
   466  			ch = ' ' // nonspecial, means at ordinary char
   467  		}
   468  
   469  		if startpos < endpos {
   470  			cchUnquantified := endpos - startpos
   471  			if isQuant {
   472  				cchUnquantified--
   473  			}
   474  			wasPrevQuantifier = false
   475  
   476  			if cchUnquantified > 0 {
   477  				p.addToConcatenate(startpos, cchUnquantified, false)
   478  			}
   479  
   480  			if isQuant {
   481  				p.addUnitOne(p.charAt(endpos - 1))
   482  			}
   483  		}
   484  
   485  		switch ch {
   486  		case '!':
   487  			goto BreakOuterScan
   488  
   489  		case ' ':
   490  			goto ContinueOuterScan
   491  
   492  		case '[':
   493  			cc, err := p.scanCharSet(p.useOptionI(), false)
   494  			if err != nil {
   495  				return nil, err
   496  			}
   497  			p.addUnitSet(cc)
   498  
   499  		case '(':
   500  			p.pushOptions()
   501  
   502  			if grouper, err := p.scanGroupOpen(); err != nil {
   503  				return nil, err
   504  			} else if grouper == nil {
   505  				p.popKeepOptions()
   506  			} else {
   507  				p.pushGroup()
   508  				p.startGroup(grouper)
   509  			}
   510  
   511  			continue
   512  
   513  		case '|':
   514  			p.addAlternate()
   515  			goto ContinueOuterScan
   516  
   517  		case ')':
   518  			if p.emptyStack() {
   519  				return nil, p.getErr(ErrUnexpectedParen)
   520  			}
   521  
   522  			if err := p.addGroup(); err != nil {
   523  				return nil, err
   524  			}
   525  			if err := p.popGroup(); err != nil {
   526  				return nil, err
   527  			}
   528  			p.popOptions()
   529  
   530  			if p.unit == nil {
   531  				goto ContinueOuterScan
   532  			}
   533  
   534  		case '\\':
   535  			n, err := p.scanBackslash(false)
   536  			if err != nil {
   537  				return nil, err
   538  			}
   539  			p.addUnitNode(n)
   540  
   541  		case '^':
   542  			if p.useOptionM() {
   543  				p.addUnitType(ntBol)
   544  			} else {
   545  				p.addUnitType(ntBeginning)
   546  			}
   547  
   548  		case '$':
   549  			if p.useOptionM() {
   550  				p.addUnitType(ntEol)
   551  			} else {
   552  				p.addUnitType(ntEndZ)
   553  			}
   554  
   555  		case '.':
   556  			if p.useOptionE() {
   557  				p.addUnitSet(ECMAAnyClass())
   558  			} else if p.useOptionS() {
   559  				p.addUnitSet(AnyClass())
   560  			} else {
   561  				p.addUnitNotone('\n')
   562  			}
   563  
   564  		case '{', '*', '+', '?':
   565  			if p.unit == nil {
   566  				if wasPrevQuantifier {
   567  					return nil, p.getErr(ErrInvalidRepeatOp)
   568  				} else {
   569  					return nil, p.getErr(ErrMissingRepeatArgument)
   570  				}
   571  			}
   572  			p.moveLeft()
   573  
   574  		default:
   575  			return nil, p.getErr(ErrInternalError)
   576  		}
   577  
   578  		if err := p.scanBlank(); err != nil {
   579  			return nil, err
   580  		}
   581  
   582  		if p.charsRight() > 0 {
   583  			isQuant = p.isTrueQuantifier()
   584  		}
   585  		if p.charsRight() == 0 || !isQuant {
   586  			//maintain odd C# assignment order -- not sure if required, could clean up?
   587  			p.addConcatenate()
   588  			goto ContinueOuterScan
   589  		}
   590  
   591  		ch = p.moveRightGetChar()
   592  
   593  		// Handle quantifiers
   594  		for p.unit != nil {
   595  			var min, max int
   596  			var lazy bool
   597  
   598  			switch ch {
   599  			case '*':
   600  				min = 0
   601  				max = math.MaxInt32
   602  
   603  			case '?':
   604  				min = 0
   605  				max = 1
   606  
   607  			case '+':
   608  				min = 1
   609  				max = math.MaxInt32
   610  
   611  			case '{':
   612  				{
   613  					var err error
   614  					startpos = p.textpos()
   615  					if min, err = p.scanDecimal(); err != nil {
   616  						return nil, err
   617  					}
   618  					max = min
   619  					if startpos < p.textpos() {
   620  						if p.charsRight() > 0 && p.rightChar(0) == ',' {
   621  							p.moveRight(1)
   622  							if p.charsRight() == 0 || p.rightChar(0) == '}' {
   623  								max = math.MaxInt32
   624  							} else {
   625  								if max, err = p.scanDecimal(); err != nil {
   626  									return nil, err
   627  								}
   628  							}
   629  						}
   630  					}
   631  
   632  					if startpos == p.textpos() || p.charsRight() == 0 || p.moveRightGetChar() != '}' {
   633  						p.addConcatenate()
   634  						p.textto(startpos - 1)
   635  						goto ContinueOuterScan
   636  					}
   637  				}
   638  
   639  			default:
   640  				return nil, p.getErr(ErrInternalError)
   641  			}
   642  
   643  			if err := p.scanBlank(); err != nil {
   644  				return nil, err
   645  			}
   646  
   647  			if p.charsRight() == 0 || p.rightChar(0) != '?' {
   648  				lazy = false
   649  			} else {
   650  				p.moveRight(1)
   651  				lazy = true
   652  			}
   653  
   654  			if min > max {
   655  				return nil, p.getErr(ErrInvalidRepeatSize)
   656  			}
   657  
   658  			p.addConcatenate3(lazy, min, max)
   659  		}
   660  
   661  	ContinueOuterScan:
   662  	}
   663  
   664  BreakOuterScan:
   665  	;
   666  
   667  	if !p.emptyStack() {
   668  		return nil, p.getErr(ErrMissingParen)
   669  	}
   670  
   671  	if err := p.addGroup(); err != nil {
   672  		return nil, err
   673  	}
   674  
   675  	return p.unit, nil
   676  
   677  }
   678  
   679  /*
   680   * Simple parsing for replacement patterns
   681   */
   682  func (p *parser) scanReplacement() (*regexNode, error) {
   683  	var c, startpos int
   684  
   685  	p.concatenation = newRegexNode(ntConcatenate, p.options)
   686  
   687  	for {
   688  		c = p.charsRight()
   689  		if c == 0 {
   690  			break
   691  		}
   692  
   693  		startpos = p.textpos()
   694  
   695  		for c > 0 && p.rightChar(0) != '$' {
   696  			p.moveRight(1)
   697  			c--
   698  		}
   699  
   700  		p.addToConcatenate(startpos, p.textpos()-startpos, true)
   701  
   702  		if c > 0 {
   703  			if p.moveRightGetChar() == '$' {
   704  				n, err := p.scanDollar()
   705  				if err != nil {
   706  					return nil, err
   707  				}
   708  				p.addUnitNode(n)
   709  			}
   710  			p.addConcatenate()
   711  		}
   712  	}
   713  
   714  	return p.concatenation, nil
   715  }
   716  
   717  /*
   718   * Scans $ patterns recognized within replacement patterns
   719   */
   720  func (p *parser) scanDollar() (*regexNode, error) {
   721  	if p.charsRight() == 0 {
   722  		return newRegexNodeCh(ntOne, p.options, '$'), nil
   723  	}
   724  
   725  	ch := p.rightChar(0)
   726  	angled := false
   727  	backpos := p.textpos()
   728  	lastEndPos := backpos
   729  
   730  	// Note angle
   731  
   732  	if ch == '{' && p.charsRight() > 1 {
   733  		angled = true
   734  		p.moveRight(1)
   735  		ch = p.rightChar(0)
   736  	}
   737  
   738  	// Try to parse backreference: \1 or \{1} or \{cap}
   739  
   740  	if ch >= '0' && ch <= '9' {
   741  		if !angled && p.useOptionE() {
   742  			capnum := -1
   743  			newcapnum := int(ch - '0')
   744  			p.moveRight(1)
   745  			if p.isCaptureSlot(newcapnum) {
   746  				capnum = newcapnum
   747  				lastEndPos = p.textpos()
   748  			}
   749  
   750  			for p.charsRight() > 0 {
   751  				ch = p.rightChar(0)
   752  				if ch < '0' || ch > '9' {
   753  					break
   754  				}
   755  				digit := int(ch - '0')
   756  				if newcapnum > maxValueDiv10 || (newcapnum == maxValueDiv10 && digit > maxValueMod10) {
   757  					return nil, p.getErr(ErrCaptureGroupOutOfRange)
   758  				}
   759  
   760  				newcapnum = newcapnum*10 + digit
   761  
   762  				p.moveRight(1)
   763  				if p.isCaptureSlot(newcapnum) {
   764  					capnum = newcapnum
   765  					lastEndPos = p.textpos()
   766  				}
   767  			}
   768  			p.textto(lastEndPos)
   769  			if capnum >= 0 {
   770  				return newRegexNodeM(ntRef, p.options, capnum), nil
   771  			}
   772  		} else {
   773  			capnum, err := p.scanDecimal()
   774  			if err != nil {
   775  				return nil, err
   776  			}
   777  			if !angled || p.charsRight() > 0 && p.moveRightGetChar() == '}' {
   778  				if p.isCaptureSlot(capnum) {
   779  					return newRegexNodeM(ntRef, p.options, capnum), nil
   780  				}
   781  			}
   782  		}
   783  	} else if angled && IsWordChar(ch) {
   784  		capname := p.scanCapname()
   785  
   786  		if p.charsRight() > 0 && p.moveRightGetChar() == '}' {
   787  			if p.isCaptureName(capname) {
   788  				return newRegexNodeM(ntRef, p.options, p.captureSlotFromName(capname)), nil
   789  			}
   790  		}
   791  	} else if !angled {
   792  		capnum := 1
   793  
   794  		switch ch {
   795  		case '$':
   796  			p.moveRight(1)
   797  			return newRegexNodeCh(ntOne, p.options, '$'), nil
   798  		case '&':
   799  			capnum = 0
   800  		case '`':
   801  			capnum = replaceLeftPortion
   802  		case '\'':
   803  			capnum = replaceRightPortion
   804  		case '+':
   805  			capnum = replaceLastGroup
   806  		case '_':
   807  			capnum = replaceWholeString
   808  		}
   809  
   810  		if capnum != 1 {
   811  			p.moveRight(1)
   812  			return newRegexNodeM(ntRef, p.options, capnum), nil
   813  		}
   814  	}
   815  
   816  	// unrecognized $: literalize
   817  
   818  	p.textto(backpos)
   819  	return newRegexNodeCh(ntOne, p.options, '$'), nil
   820  }
   821  
   822  // scanGroupOpen scans chars following a '(' (not counting the '('), and returns
   823  // a RegexNode for the type of group scanned, or nil if the group
   824  // simply changed options (?cimsx-cimsx) or was a comment (#...).
   825  func (p *parser) scanGroupOpen() (*regexNode, error) {
   826  	var ch rune
   827  	var nt nodeType
   828  	var err error
   829  	close := '>'
   830  	start := p.textpos()
   831  
   832  	// just return a RegexNode if we have:
   833  	// 1. "(" followed by nothing
   834  	// 2. "(x" where x != ?
   835  	// 3. "(?)"
   836  	if p.charsRight() == 0 || p.rightChar(0) != '?' || (p.rightChar(0) == '?' && (p.charsRight() > 1 && p.rightChar(1) == ')')) {
   837  		if p.useOptionN() || p.ignoreNextParen {
   838  			p.ignoreNextParen = false
   839  			return newRegexNode(ntGroup, p.options), nil
   840  		}
   841  		return newRegexNodeMN(ntCapture, p.options, p.consumeAutocap(), -1), nil
   842  	}
   843  
   844  	p.moveRight(1)
   845  
   846  	for {
   847  		if p.charsRight() == 0 {
   848  			break
   849  		}
   850  
   851  		switch ch = p.moveRightGetChar(); ch {
   852  		case ':':
   853  			nt = ntGroup
   854  
   855  		case '=':
   856  			p.options &= ^RightToLeft
   857  			nt = ntRequire
   858  
   859  		case '!':
   860  			p.options &= ^RightToLeft
   861  			nt = ntPrevent
   862  
   863  		case '>':
   864  			nt = ntGreedy
   865  
   866  		case '\'':
   867  			close = '\''
   868  			fallthrough
   869  
   870  		case '<':
   871  			if p.charsRight() == 0 {
   872  				goto BreakRecognize
   873  			}
   874  
   875  			switch ch = p.moveRightGetChar(); ch {
   876  			case '=':
   877  				if close == '\'' {
   878  					goto BreakRecognize
   879  				}
   880  
   881  				p.options |= RightToLeft
   882  				nt = ntRequire
   883  
   884  			case '!':
   885  				if close == '\'' {
   886  					goto BreakRecognize
   887  				}
   888  
   889  				p.options |= RightToLeft
   890  				nt = ntPrevent
   891  
   892  			default:
   893  				p.moveLeft()
   894  				capnum := -1
   895  				uncapnum := -1
   896  				proceed := false
   897  
   898  				// grab part before -
   899  
   900  				if ch >= '0' && ch <= '9' {
   901  					if capnum, err = p.scanDecimal(); err != nil {
   902  						return nil, err
   903  					}
   904  
   905  					if !p.isCaptureSlot(capnum) {
   906  						capnum = -1
   907  					}
   908  
   909  					// check if we have bogus characters after the number
   910  					if p.charsRight() > 0 && !(p.rightChar(0) == close || p.rightChar(0) == '-') {
   911  						return nil, p.getErr(ErrInvalidGroupName)
   912  					}
   913  					if capnum == 0 {
   914  						return nil, p.getErr(ErrCapNumNotZero)
   915  					}
   916  				} else if IsWordChar(ch) {
   917  					capname := p.scanCapname()
   918  
   919  					if p.isCaptureName(capname) {
   920  						capnum = p.captureSlotFromName(capname)
   921  					}
   922  
   923  					// check if we have bogus character after the name
   924  					if p.charsRight() > 0 && !(p.rightChar(0) == close || p.rightChar(0) == '-') {
   925  						return nil, p.getErr(ErrInvalidGroupName)
   926  					}
   927  				} else if ch == '-' {
   928  					proceed = true
   929  				} else {
   930  					// bad group name - starts with something other than a word character and isn't a number
   931  					return nil, p.getErr(ErrInvalidGroupName)
   932  				}
   933  
   934  				// grab part after - if any
   935  
   936  				if (capnum != -1 || proceed == true) && p.charsRight() > 0 && p.rightChar(0) == '-' {
   937  					p.moveRight(1)
   938  
   939  					//no more chars left, no closing char, etc
   940  					if p.charsRight() == 0 {
   941  						return nil, p.getErr(ErrInvalidGroupName)
   942  					}
   943  
   944  					ch = p.rightChar(0)
   945  					if ch >= '0' && ch <= '9' {
   946  						if uncapnum, err = p.scanDecimal(); err != nil {
   947  							return nil, err
   948  						}
   949  
   950  						if !p.isCaptureSlot(uncapnum) {
   951  							return nil, p.getErr(ErrUndefinedBackRef, uncapnum)
   952  						}
   953  
   954  						// check if we have bogus characters after the number
   955  						if p.charsRight() > 0 && p.rightChar(0) != close {
   956  							return nil, p.getErr(ErrInvalidGroupName)
   957  						}
   958  					} else if IsWordChar(ch) {
   959  						uncapname := p.scanCapname()
   960  
   961  						if !p.isCaptureName(uncapname) {
   962  							return nil, p.getErr(ErrUndefinedNameRef, uncapname)
   963  						}
   964  						uncapnum = p.captureSlotFromName(uncapname)
   965  
   966  						// check if we have bogus character after the name
   967  						if p.charsRight() > 0 && p.rightChar(0) != close {
   968  							return nil, p.getErr(ErrInvalidGroupName)
   969  						}
   970  					} else {
   971  						// bad group name - starts with something other than a word character and isn't a number
   972  						return nil, p.getErr(ErrInvalidGroupName)
   973  					}
   974  				}
   975  
   976  				// actually make the node
   977  
   978  				if (capnum != -1 || uncapnum != -1) && p.charsRight() > 0 && p.moveRightGetChar() == close {
   979  					return newRegexNodeMN(ntCapture, p.options, capnum, uncapnum), nil
   980  				}
   981  				goto BreakRecognize
   982  			}
   983  
   984  		case '(':
   985  			// alternation construct (?(...) | )
   986  
   987  			parenPos := p.textpos()
   988  			if p.charsRight() > 0 {
   989  				ch = p.rightChar(0)
   990  
   991  				// check if the alternation condition is a backref
   992  				if ch >= '0' && ch <= '9' {
   993  					var capnum int
   994  					if capnum, err = p.scanDecimal(); err != nil {
   995  						return nil, err
   996  					}
   997  					if p.charsRight() > 0 && p.moveRightGetChar() == ')' {
   998  						if p.isCaptureSlot(capnum) {
   999  							return newRegexNodeM(ntTestref, p.options, capnum), nil
  1000  						}
  1001  						return nil, p.getErr(ErrUndefinedReference, capnum)
  1002  					}
  1003  
  1004  					return nil, p.getErr(ErrMalformedReference, capnum)
  1005  
  1006  				} else if IsWordChar(ch) {
  1007  					capname := p.scanCapname()
  1008  
  1009  					if p.isCaptureName(capname) && p.charsRight() > 0 && p.moveRightGetChar() == ')' {
  1010  						return newRegexNodeM(ntTestref, p.options, p.captureSlotFromName(capname)), nil
  1011  					}
  1012  				}
  1013  			}
  1014  			// not a backref
  1015  			nt = ntTestgroup
  1016  			p.textto(parenPos - 1)   // jump to the start of the parentheses
  1017  			p.ignoreNextParen = true // but make sure we don't try to capture the insides
  1018  
  1019  			charsRight := p.charsRight()
  1020  			if charsRight >= 3 && p.rightChar(1) == '?' {
  1021  				rightchar2 := p.rightChar(2)
  1022  				// disallow comments in the condition
  1023  				if rightchar2 == '#' {
  1024  					return nil, p.getErr(ErrAlternationCantHaveComment)
  1025  				}
  1026  
  1027  				// disallow named capture group (?<..>..) in the condition
  1028  				if rightchar2 == '\'' {
  1029  					return nil, p.getErr(ErrAlternationCantCapture)
  1030  				}
  1031  
  1032  				if charsRight >= 4 && (rightchar2 == '<' && p.rightChar(3) != '!' && p.rightChar(3) != '=') {
  1033  					return nil, p.getErr(ErrAlternationCantCapture)
  1034  				}
  1035  			}
  1036  
  1037  		case 'P':
  1038  			if p.useRE2() {
  1039  				// support for P<name> syntax
  1040  				if p.charsRight() < 3 {
  1041  					goto BreakRecognize
  1042  				}
  1043  
  1044  				ch = p.moveRightGetChar()
  1045  				if ch != '<' {
  1046  					goto BreakRecognize
  1047  				}
  1048  
  1049  				ch = p.moveRightGetChar()
  1050  				p.moveLeft()
  1051  
  1052  				if IsWordChar(ch) {
  1053  					capnum := -1
  1054  					capname := p.scanCapname()
  1055  
  1056  					if p.isCaptureName(capname) {
  1057  						capnum = p.captureSlotFromName(capname)
  1058  					}
  1059  
  1060  					// check if we have bogus character after the name
  1061  					if p.charsRight() > 0 && p.rightChar(0) != '>' {
  1062  						return nil, p.getErr(ErrInvalidGroupName)
  1063  					}
  1064  
  1065  					// actually make the node
  1066  
  1067  					if capnum != -1 && p.charsRight() > 0 && p.moveRightGetChar() == '>' {
  1068  						return newRegexNodeMN(ntCapture, p.options, capnum, -1), nil
  1069  					}
  1070  					goto BreakRecognize
  1071  
  1072  				} else {
  1073  					// bad group name - starts with something other than a word character and isn't a number
  1074  					return nil, p.getErr(ErrInvalidGroupName)
  1075  				}
  1076  			}
  1077  			// if we're not using RE2 compat mode then
  1078  			// we just behave like normal
  1079  			fallthrough
  1080  
  1081  		default:
  1082  			p.moveLeft()
  1083  
  1084  			nt = ntGroup
  1085  			// disallow options in the children of a testgroup node
  1086  			if p.group.t != ntTestgroup {
  1087  				p.scanOptions()
  1088  			}
  1089  			if p.charsRight() == 0 {
  1090  				goto BreakRecognize
  1091  			}
  1092  
  1093  			if ch = p.moveRightGetChar(); ch == ')' {
  1094  				return nil, nil
  1095  			}
  1096  
  1097  			if ch != ':' {
  1098  				goto BreakRecognize
  1099  			}
  1100  
  1101  		}
  1102  
  1103  		return newRegexNode(nt, p.options), nil
  1104  	}
  1105  
  1106  BreakRecognize:
  1107  
  1108  	// break Recognize comes here
  1109  
  1110  	return nil, p.getErr(ErrUnrecognizedGrouping, string(p.pattern[start:p.textpos()]))
  1111  }
  1112  
  1113  // scans backslash specials and basics
  1114  func (p *parser) scanBackslash(scanOnly bool) (*regexNode, error) {
  1115  
  1116  	if p.charsRight() == 0 {
  1117  		return nil, p.getErr(ErrIllegalEndEscape)
  1118  	}
  1119  
  1120  	switch ch := p.rightChar(0); ch {
  1121  	case 'b', 'B', 'A', 'G', 'Z', 'z':
  1122  		p.moveRight(1)
  1123  		return newRegexNode(p.typeFromCode(ch), p.options), nil
  1124  
  1125  	case 'w':
  1126  		p.moveRight(1)
  1127  		if p.useOptionE() || p.useRE2() {
  1128  			return newRegexNodeSet(ntSet, p.options, ECMAWordClass()), nil
  1129  		}
  1130  		return newRegexNodeSet(ntSet, p.options, WordClass()), nil
  1131  
  1132  	case 'W':
  1133  		p.moveRight(1)
  1134  		if p.useOptionE() || p.useRE2() {
  1135  			return newRegexNodeSet(ntSet, p.options, NotECMAWordClass()), nil
  1136  		}
  1137  		return newRegexNodeSet(ntSet, p.options, NotWordClass()), nil
  1138  
  1139  	case 's':
  1140  		p.moveRight(1)
  1141  		if p.useOptionE() {
  1142  			return newRegexNodeSet(ntSet, p.options, ECMASpaceClass()), nil
  1143  		} else if p.useRE2() {
  1144  			return newRegexNodeSet(ntSet, p.options, RE2SpaceClass()), nil
  1145  		}
  1146  		return newRegexNodeSet(ntSet, p.options, SpaceClass()), nil
  1147  
  1148  	case 'S':
  1149  		p.moveRight(1)
  1150  		if p.useOptionE() {
  1151  			return newRegexNodeSet(ntSet, p.options, NotECMASpaceClass()), nil
  1152  		} else if p.useRE2() {
  1153  			return newRegexNodeSet(ntSet, p.options, NotRE2SpaceClass()), nil
  1154  		}
  1155  		return newRegexNodeSet(ntSet, p.options, NotSpaceClass()), nil
  1156  
  1157  	case 'd':
  1158  		p.moveRight(1)
  1159  		if p.useOptionE() || p.useRE2() {
  1160  			return newRegexNodeSet(ntSet, p.options, ECMADigitClass()), nil
  1161  		}
  1162  		return newRegexNodeSet(ntSet, p.options, DigitClass()), nil
  1163  
  1164  	case 'D':
  1165  		p.moveRight(1)
  1166  		if p.useOptionE() || p.useRE2() {
  1167  			return newRegexNodeSet(ntSet, p.options, NotECMADigitClass()), nil
  1168  		}
  1169  		return newRegexNodeSet(ntSet, p.options, NotDigitClass()), nil
  1170  
  1171  	case 'p', 'P':
  1172  		p.moveRight(1)
  1173  		prop, err := p.parseProperty()
  1174  		if err != nil {
  1175  			return nil, err
  1176  		}
  1177  		cc := &CharSet{}
  1178  		cc.addCategory(prop, (ch != 'p'), p.useOptionI(), p.patternRaw)
  1179  		if p.useOptionI() {
  1180  			cc.addLowercase()
  1181  		}
  1182  
  1183  		return newRegexNodeSet(ntSet, p.options, cc), nil
  1184  
  1185  	default:
  1186  		return p.scanBasicBackslash(scanOnly)
  1187  	}
  1188  }
  1189  
  1190  // Scans \-style backreferences and character escapes
  1191  func (p *parser) scanBasicBackslash(scanOnly bool) (*regexNode, error) {
  1192  	if p.charsRight() == 0 {
  1193  		return nil, p.getErr(ErrIllegalEndEscape)
  1194  	}
  1195  	angled := false
  1196  	k := false
  1197  	close := '\x00'
  1198  
  1199  	backpos := p.textpos()
  1200  	ch := p.rightChar(0)
  1201  
  1202  	// Allow \k<foo> instead of \<foo>, which is now deprecated.
  1203  
  1204  	// According to ECMAScript specification, \k<name> is only parsed as a named group reference if
  1205  	// there is at least one group name in the regexp.
  1206  	// See https://www.ecma-international.org/ecma-262/#sec-isvalidregularexpressionliteral, step 7.
  1207  	// Note, during the first (scanOnly) run we may not have all group names scanned, but that's ok.
  1208  	if ch == 'k' && (!p.useOptionE() || len(p.capnames) > 0) {
  1209  		if p.charsRight() >= 2 {
  1210  			p.moveRight(1)
  1211  			ch = p.moveRightGetChar()
  1212  
  1213  			if ch == '<' || (!p.useOptionE() && ch == '\'') { // No support for \k'name' in ECMAScript
  1214  				angled = true
  1215  				if ch == '\'' {
  1216  					close = '\''
  1217  				} else {
  1218  					close = '>'
  1219  				}
  1220  			}
  1221  		}
  1222  
  1223  		if !angled || p.charsRight() <= 0 {
  1224  			return nil, p.getErr(ErrMalformedNameRef)
  1225  		}
  1226  
  1227  		ch = p.rightChar(0)
  1228  		k = true
  1229  
  1230  	} else if !p.useOptionE() && (ch == '<' || ch == '\'') && p.charsRight() > 1 { // Note angle without \g
  1231  		angled = true
  1232  		if ch == '\'' {
  1233  			close = '\''
  1234  		} else {
  1235  			close = '>'
  1236  		}
  1237  
  1238  		p.moveRight(1)
  1239  		ch = p.rightChar(0)
  1240  	}
  1241  
  1242  	// Try to parse backreference: \<1> or \<cap>
  1243  
  1244  	if angled && ch >= '0' && ch <= '9' {
  1245  		capnum, err := p.scanDecimal()
  1246  		if err != nil {
  1247  			return nil, err
  1248  		}
  1249  
  1250  		if p.charsRight() > 0 && p.moveRightGetChar() == close {
  1251  			if p.isCaptureSlot(capnum) {
  1252  				return newRegexNodeM(ntRef, p.options, capnum), nil
  1253  			}
  1254  			return nil, p.getErr(ErrUndefinedBackRef, capnum)
  1255  		}
  1256  	} else if !angled && ch >= '1' && ch <= '9' { // Try to parse backreference or octal: \1
  1257  		capnum, err := p.scanDecimal()
  1258  		if err != nil {
  1259  			return nil, err
  1260  		}
  1261  
  1262  		if scanOnly {
  1263  			return nil, nil
  1264  		}
  1265  
  1266  		if p.isCaptureSlot(capnum) {
  1267  			return newRegexNodeM(ntRef, p.options, capnum), nil
  1268  		}
  1269  		if capnum <= 9 && !p.useOptionE() {
  1270  			return nil, p.getErr(ErrUndefinedBackRef, capnum)
  1271  		}
  1272  
  1273  	} else if angled {
  1274  		capname := p.scanCapname()
  1275  
  1276  		if capname != "" && p.charsRight() > 0 && p.moveRightGetChar() == close {
  1277  
  1278  			if scanOnly {
  1279  				return nil, nil
  1280  			}
  1281  
  1282  			if p.isCaptureName(capname) {
  1283  				return newRegexNodeM(ntRef, p.options, p.captureSlotFromName(capname)), nil
  1284  			}
  1285  			return nil, p.getErr(ErrUndefinedNameRef, capname)
  1286  		} else {
  1287  			if k {
  1288  				return nil, p.getErr(ErrMalformedNameRef)
  1289  			}
  1290  		}
  1291  	}
  1292  
  1293  	// Not backreference: must be char code
  1294  
  1295  	p.textto(backpos)
  1296  	ch, err := p.scanCharEscape()
  1297  	if err != nil {
  1298  		return nil, err
  1299  	}
  1300  
  1301  	if scanOnly {
  1302  		return nil, nil
  1303  	}
  1304  
  1305  	if p.useOptionI() {
  1306  		ch = unicode.ToLower(ch)
  1307  	}
  1308  
  1309  	return newRegexNodeCh(ntOne, p.options, ch), nil
  1310  }
  1311  
  1312  // Scans X for \p{X} or \P{X}
  1313  func (p *parser) parseProperty() (string, error) {
  1314  	// RE2 and PCRE supports \pX syntax (no {} and only 1 letter unicode cats supported)
  1315  	// since this is purely additive syntax it's not behind a flag
  1316  	if p.charsRight() >= 1 && p.rightChar(0) != '{' {
  1317  		ch := string(p.moveRightGetChar())
  1318  		// check if it's a valid cat
  1319  		if !isValidUnicodeCat(ch) {
  1320  			return "", p.getErr(ErrUnknownSlashP, ch)
  1321  		}
  1322  		return ch, nil
  1323  	}
  1324  
  1325  	if p.charsRight() < 3 {
  1326  		return "", p.getErr(ErrIncompleteSlashP)
  1327  	}
  1328  	ch := p.moveRightGetChar()
  1329  	if ch != '{' {
  1330  		return "", p.getErr(ErrMalformedSlashP)
  1331  	}
  1332  
  1333  	startpos := p.textpos()
  1334  	for p.charsRight() > 0 {
  1335  		ch = p.moveRightGetChar()
  1336  		if !(IsWordChar(ch) || ch == '-') {
  1337  			p.moveLeft()
  1338  			break
  1339  		}
  1340  	}
  1341  	capname := string(p.pattern[startpos:p.textpos()])
  1342  
  1343  	if p.charsRight() == 0 || p.moveRightGetChar() != '}' {
  1344  		return "", p.getErr(ErrIncompleteSlashP)
  1345  	}
  1346  
  1347  	if !isValidUnicodeCat(capname) {
  1348  		return "", p.getErr(ErrUnknownSlashP, capname)
  1349  	}
  1350  
  1351  	return capname, nil
  1352  }
  1353  
  1354  // Returns ReNode type for zero-length assertions with a \ code.
  1355  func (p *parser) typeFromCode(ch rune) nodeType {
  1356  	switch ch {
  1357  	case 'b':
  1358  		if p.useOptionE() {
  1359  			return ntECMABoundary
  1360  		}
  1361  		return ntBoundary
  1362  	case 'B':
  1363  		if p.useOptionE() {
  1364  			return ntNonECMABoundary
  1365  		}
  1366  		return ntNonboundary
  1367  	case 'A':
  1368  		return ntBeginning
  1369  	case 'G':
  1370  		return ntStart
  1371  	case 'Z':
  1372  		return ntEndZ
  1373  	case 'z':
  1374  		return ntEnd
  1375  	default:
  1376  		return ntNothing
  1377  	}
  1378  }
  1379  
  1380  // Scans whitespace or x-mode comments.
  1381  func (p *parser) scanBlank() error {
  1382  	if p.useOptionX() {
  1383  		for {
  1384  			for p.charsRight() > 0 && isSpace(p.rightChar(0)) {
  1385  				p.moveRight(1)
  1386  			}
  1387  
  1388  			if p.charsRight() == 0 {
  1389  				break
  1390  			}
  1391  
  1392  			if p.rightChar(0) == '#' {
  1393  				for p.charsRight() > 0 && p.rightChar(0) != '\n' {
  1394  					p.moveRight(1)
  1395  				}
  1396  			} else if p.charsRight() >= 3 && p.rightChar(2) == '#' &&
  1397  				p.rightChar(1) == '?' && p.rightChar(0) == '(' {
  1398  				for p.charsRight() > 0 && p.rightChar(0) != ')' {
  1399  					p.moveRight(1)
  1400  				}
  1401  				if p.charsRight() == 0 {
  1402  					return p.getErr(ErrUnterminatedComment)
  1403  				}
  1404  				p.moveRight(1)
  1405  			} else {
  1406  				break
  1407  			}
  1408  		}
  1409  	} else {
  1410  		for {
  1411  			if p.charsRight() < 3 || p.rightChar(2) != '#' ||
  1412  				p.rightChar(1) != '?' || p.rightChar(0) != '(' {
  1413  				return nil
  1414  			}
  1415  
  1416  			for p.charsRight() > 0 && p.rightChar(0) != ')' {
  1417  				p.moveRight(1)
  1418  			}
  1419  			if p.charsRight() == 0 {
  1420  				return p.getErr(ErrUnterminatedComment)
  1421  			}
  1422  			p.moveRight(1)
  1423  		}
  1424  	}
  1425  	return nil
  1426  }
  1427  
  1428  func (p *parser) scanCapname() string {
  1429  	startpos := p.textpos()
  1430  
  1431  	for p.charsRight() > 0 {
  1432  		if !IsWordChar(p.moveRightGetChar()) {
  1433  			p.moveLeft()
  1434  			break
  1435  		}
  1436  	}
  1437  
  1438  	return string(p.pattern[startpos:p.textpos()])
  1439  }
  1440  
  1441  // Scans contents of [] (not including []'s), and converts to a set.
  1442  func (p *parser) scanCharSet(caseInsensitive, scanOnly bool) (*CharSet, error) {
  1443  	ch := '\x00'
  1444  	chPrev := '\x00'
  1445  	inRange := false
  1446  	firstChar := true
  1447  	closed := false
  1448  
  1449  	var cc *CharSet
  1450  	if !scanOnly {
  1451  		cc = &CharSet{}
  1452  	}
  1453  
  1454  	if p.charsRight() > 0 && p.rightChar(0) == '^' {
  1455  		p.moveRight(1)
  1456  		if !scanOnly {
  1457  			cc.negate = true
  1458  		}
  1459  	}
  1460  
  1461  	for ; p.charsRight() > 0; firstChar = false {
  1462  		fTranslatedChar := false
  1463  		ch = p.moveRightGetChar()
  1464  		if ch == ']' {
  1465  			if !firstChar {
  1466  				closed = true
  1467  				break
  1468  			} else if p.useOptionE() {
  1469  				if !scanOnly {
  1470  					cc.addRanges(NoneClass().ranges)
  1471  				}
  1472  				closed = true
  1473  				break
  1474  			}
  1475  
  1476  		} else if ch == '\\' && p.charsRight() > 0 {
  1477  			switch ch = p.moveRightGetChar(); ch {
  1478  			case 'D', 'd':
  1479  				if !scanOnly {
  1480  					if inRange {
  1481  						return nil, p.getErr(ErrBadClassInCharRange, ch)
  1482  					}
  1483  					cc.addDigit(p.useOptionE() || p.useRE2(), ch == 'D', p.patternRaw)
  1484  				}
  1485  				continue
  1486  
  1487  			case 'S', 's':
  1488  				if !scanOnly {
  1489  					if inRange {
  1490  						return nil, p.getErr(ErrBadClassInCharRange, ch)
  1491  					}
  1492  					cc.addSpace(p.useOptionE(), p.useRE2(), ch == 'S')
  1493  				}
  1494  				continue
  1495  
  1496  			case 'W', 'w':
  1497  				if !scanOnly {
  1498  					if inRange {
  1499  						return nil, p.getErr(ErrBadClassInCharRange, ch)
  1500  					}
  1501  
  1502  					cc.addWord(p.useOptionE() || p.useRE2(), ch == 'W')
  1503  				}
  1504  				continue
  1505  
  1506  			case 'p', 'P':
  1507  				if !scanOnly {
  1508  					if inRange {
  1509  						return nil, p.getErr(ErrBadClassInCharRange, ch)
  1510  					}
  1511  					prop, err := p.parseProperty()
  1512  					if err != nil {
  1513  						return nil, err
  1514  					}
  1515  					cc.addCategory(prop, (ch != 'p'), caseInsensitive, p.patternRaw)
  1516  				} else {
  1517  					p.parseProperty()
  1518  				}
  1519  
  1520  				continue
  1521  
  1522  			case '-':
  1523  				if !scanOnly {
  1524  					cc.addRange(ch, ch)
  1525  				}
  1526  				continue
  1527  
  1528  			default:
  1529  				p.moveLeft()
  1530  				var err error
  1531  				ch, err = p.scanCharEscape() // non-literal character
  1532  				if err != nil {
  1533  					return nil, err
  1534  				}
  1535  				fTranslatedChar = true
  1536  				break // this break will only break out of the switch
  1537  			}
  1538  		} else if ch == '[' {
  1539  			// This is code for Posix style properties - [:Ll:] or [:IsTibetan:].
  1540  			// It currently doesn't do anything other than skip the whole thing!
  1541  			if p.charsRight() > 0 && p.rightChar(0) == ':' && !inRange {
  1542  				savePos := p.textpos()
  1543  
  1544  				p.moveRight(1)
  1545  				negate := false
  1546  				if p.charsRight() > 1 && p.rightChar(0) == '^' {
  1547  					negate = true
  1548  					p.moveRight(1)
  1549  				}
  1550  
  1551  				nm := p.scanCapname() // snag the name
  1552  				if !scanOnly && p.useRE2() {
  1553  					// look up the name since these are valid for RE2
  1554  					// add the group based on the name
  1555  					if ok := cc.addNamedASCII(nm, negate); !ok {
  1556  						return nil, p.getErr(ErrInvalidCharRange)
  1557  					}
  1558  				}
  1559  				if p.charsRight() < 2 || p.moveRightGetChar() != ':' || p.moveRightGetChar() != ']' {
  1560  					p.textto(savePos)
  1561  				} else if p.useRE2() {
  1562  					// move on
  1563  					continue
  1564  				}
  1565  			}
  1566  		}
  1567  
  1568  		if inRange {
  1569  			inRange = false
  1570  			if !scanOnly {
  1571  				if ch == '[' && !fTranslatedChar && !firstChar {
  1572  					// We thought we were in a range, but we're actually starting a subtraction.
  1573  					// In that case, we'll add chPrev to our char class, skip the opening [, and
  1574  					// scan the new character class recursively.
  1575  					cc.addChar(chPrev)
  1576  					sub, err := p.scanCharSet(caseInsensitive, false)
  1577  					if err != nil {
  1578  						return nil, err
  1579  					}
  1580  					cc.addSubtraction(sub)
  1581  
  1582  					if p.charsRight() > 0 && p.rightChar(0) != ']' {
  1583  						return nil, p.getErr(ErrSubtractionMustBeLast)
  1584  					}
  1585  				} else {
  1586  					// a regular range, like a-z
  1587  					if chPrev > ch {
  1588  						return nil, p.getErr(ErrReversedCharRange, chPrev, ch)
  1589  					}
  1590  					cc.addRange(chPrev, ch)
  1591  				}
  1592  			}
  1593  		} else if p.charsRight() >= 2 && p.rightChar(0) == '-' && p.rightChar(1) != ']' {
  1594  			// this could be the start of a range
  1595  			chPrev = ch
  1596  			inRange = true
  1597  			p.moveRight(1)
  1598  		} else if p.charsRight() >= 1 && ch == '-' && !fTranslatedChar && p.rightChar(0) == '[' && !firstChar {
  1599  			// we aren't in a range, and now there is a subtraction.  Usually this happens
  1600  			// only when a subtraction follows a range, like [a-z-[b]]
  1601  			if !scanOnly {
  1602  				p.moveRight(1)
  1603  				sub, err := p.scanCharSet(caseInsensitive, false)
  1604  				if err != nil {
  1605  					return nil, err
  1606  				}
  1607  				cc.addSubtraction(sub)
  1608  
  1609  				if p.charsRight() > 0 && p.rightChar(0) != ']' {
  1610  					return nil, p.getErr(ErrSubtractionMustBeLast)
  1611  				}
  1612  			} else {
  1613  				p.moveRight(1)
  1614  				p.scanCharSet(caseInsensitive, true)
  1615  			}
  1616  		} else {
  1617  			if !scanOnly {
  1618  				cc.addRange(ch, ch)
  1619  			}
  1620  		}
  1621  	}
  1622  
  1623  	if !closed {
  1624  		return nil, p.getErr(ErrUnterminatedBracket)
  1625  	}
  1626  
  1627  	if !scanOnly && caseInsensitive {
  1628  		cc.addLowercase()
  1629  	}
  1630  
  1631  	return cc, nil
  1632  }
  1633  
  1634  // Scans any number of decimal digits (pegs value at 2^31-1 if too large)
  1635  func (p *parser) scanDecimal() (int, error) {
  1636  	i := 0
  1637  	var d int
  1638  
  1639  	for p.charsRight() > 0 {
  1640  		d = int(p.rightChar(0) - '0')
  1641  		if d < 0 || d > 9 {
  1642  			break
  1643  		}
  1644  		p.moveRight(1)
  1645  
  1646  		if i > maxValueDiv10 || (i == maxValueDiv10 && d > maxValueMod10) {
  1647  			return 0, p.getErr(ErrCaptureGroupOutOfRange)
  1648  		}
  1649  
  1650  		i *= 10
  1651  		i += d
  1652  	}
  1653  
  1654  	return int(i), nil
  1655  }
  1656  
  1657  // Returns true for options allowed only at the top level
  1658  func isOnlyTopOption(option RegexOptions) bool {
  1659  	return option == RightToLeft || option == ECMAScript || option == RE2
  1660  }
  1661  
  1662  // Scans cimsx-cimsx option string, stops at the first unrecognized char.
  1663  func (p *parser) scanOptions() {
  1664  
  1665  	for off := false; p.charsRight() > 0; p.moveRight(1) {
  1666  		ch := p.rightChar(0)
  1667  
  1668  		if ch == '-' {
  1669  			off = true
  1670  		} else if ch == '+' {
  1671  			off = false
  1672  		} else {
  1673  			option := optionFromCode(ch)
  1674  			if option == 0 || isOnlyTopOption(option) {
  1675  				return
  1676  			}
  1677  
  1678  			if off {
  1679  				p.options &= ^option
  1680  			} else {
  1681  				p.options |= option
  1682  			}
  1683  		}
  1684  	}
  1685  }
  1686  
  1687  // Scans \ code for escape codes that map to single unicode chars.
  1688  func (p *parser) scanCharEscape() (r rune, err error) {
  1689  
  1690  	ch := p.moveRightGetChar()
  1691  
  1692  	if ch >= '0' && ch <= '7' {
  1693  		p.moveLeft()
  1694  		return p.scanOctal(), nil
  1695  	}
  1696  
  1697  	pos := p.textpos()
  1698  
  1699  	switch ch {
  1700  	case 'x':
  1701  		// support for \x{HEX} syntax from Perl and PCRE
  1702  		if p.charsRight() > 0 && p.rightChar(0) == '{' {
  1703  			if p.useOptionE() {
  1704  				return ch, nil
  1705  			}
  1706  			p.moveRight(1)
  1707  			return p.scanHexUntilBrace()
  1708  		} else {
  1709  			r, err = p.scanHex(2)
  1710  		}
  1711  	case 'u':
  1712  		// ECMAscript suppot \u{HEX} only if `u` is also set
  1713  		if p.useOptionE() && p.useOptionU() && p.charsRight() > 0 && p.rightChar(0) == '{' {
  1714  			p.moveRight(1)
  1715  			return p.scanHexUntilBrace()
  1716  		} else {
  1717  			r, err = p.scanHex(4)
  1718  		}
  1719  	case 'a':
  1720  		return '\u0007', nil
  1721  	case 'b':
  1722  		return '\b', nil
  1723  	case 'e':
  1724  		return '\u001B', nil
  1725  	case 'f':
  1726  		return '\f', nil
  1727  	case 'n':
  1728  		return '\n', nil
  1729  	case 'r':
  1730  		return '\r', nil
  1731  	case 't':
  1732  		return '\t', nil
  1733  	case 'v':
  1734  		return '\u000B', nil
  1735  	case 'c':
  1736  		r, err = p.scanControl()
  1737  	default:
  1738  		if !p.useOptionE() && !p.useRE2() && IsWordChar(ch) {
  1739  			return 0, p.getErr(ErrUnrecognizedEscape, string(ch))
  1740  		}
  1741  		return ch, nil
  1742  	}
  1743  	if err != nil && p.useOptionE() {
  1744  		p.textto(pos)
  1745  		return ch, nil
  1746  	}
  1747  	return
  1748  }
  1749  
  1750  // Grabs and converts an ascii control character
  1751  func (p *parser) scanControl() (rune, error) {
  1752  	if p.charsRight() <= 0 {
  1753  		return 0, p.getErr(ErrMissingControl)
  1754  	}
  1755  
  1756  	ch := p.moveRightGetChar()
  1757  
  1758  	// \ca interpreted as \cA
  1759  
  1760  	if ch >= 'a' && ch <= 'z' {
  1761  		ch = (ch - ('a' - 'A'))
  1762  	}
  1763  	ch = (ch - '@')
  1764  	if ch >= 0 && ch < ' ' {
  1765  		return ch, nil
  1766  	}
  1767  
  1768  	return 0, p.getErr(ErrUnrecognizedControl)
  1769  
  1770  }
  1771  
  1772  // Scan hex digits until we hit a closing brace.
  1773  // Non-hex digits, hex value too large for UTF-8, or running out of chars are errors
  1774  func (p *parser) scanHexUntilBrace() (rune, error) {
  1775  	// PCRE spec reads like unlimited hex digits are allowed, but unicode has a limit
  1776  	// so we can enforce that
  1777  	i := 0
  1778  	hasContent := false
  1779  
  1780  	for p.charsRight() > 0 {
  1781  		ch := p.moveRightGetChar()
  1782  		if ch == '}' {
  1783  			// hit our close brace, we're done here
  1784  			// prevent \x{}
  1785  			if !hasContent {
  1786  				return 0, p.getErr(ErrTooFewHex)
  1787  			}
  1788  			return rune(i), nil
  1789  		}
  1790  		hasContent = true
  1791  		// no brace needs to be hex digit
  1792  		d := hexDigit(ch)
  1793  		if d < 0 {
  1794  			return 0, p.getErr(ErrMissingBrace)
  1795  		}
  1796  
  1797  		i *= 0x10
  1798  		i += d
  1799  
  1800  		if i > unicode.MaxRune {
  1801  			return 0, p.getErr(ErrInvalidHex)
  1802  		}
  1803  	}
  1804  
  1805  	// we only make it here if we run out of digits without finding the brace
  1806  	return 0, p.getErr(ErrMissingBrace)
  1807  }
  1808  
  1809  // Scans exactly c hex digits (c=2 for \xFF, c=4 for \uFFFF)
  1810  func (p *parser) scanHex(c int) (rune, error) {
  1811  
  1812  	i := 0
  1813  
  1814  	if p.charsRight() >= c {
  1815  		for c > 0 {
  1816  			d := hexDigit(p.moveRightGetChar())
  1817  			if d < 0 {
  1818  				break
  1819  			}
  1820  			i *= 0x10
  1821  			i += d
  1822  			c--
  1823  		}
  1824  	}
  1825  
  1826  	if c > 0 {
  1827  		return 0, p.getErr(ErrTooFewHex)
  1828  	}
  1829  
  1830  	return rune(i), nil
  1831  }
  1832  
  1833  // Returns n <= 0xF for a hex digit.
  1834  func hexDigit(ch rune) int {
  1835  
  1836  	if d := uint(ch - '0'); d <= 9 {
  1837  		return int(d)
  1838  	}
  1839  
  1840  	if d := uint(ch - 'a'); d <= 5 {
  1841  		return int(d + 0xa)
  1842  	}
  1843  
  1844  	if d := uint(ch - 'A'); d <= 5 {
  1845  		return int(d + 0xa)
  1846  	}
  1847  
  1848  	return -1
  1849  }
  1850  
  1851  // Scans up to three octal digits (stops before exceeding 0377).
  1852  func (p *parser) scanOctal() rune {
  1853  	// Consume octal chars only up to 3 digits and value 0377
  1854  
  1855  	c := 3
  1856  
  1857  	if c > p.charsRight() {
  1858  		c = p.charsRight()
  1859  	}
  1860  
  1861  	//we know the first char is good because the caller had to check
  1862  	i := 0
  1863  	d := int(p.rightChar(0) - '0')
  1864  	for c > 0 && d <= 7 && d >= 0 {
  1865  		if i >= 0x20 && p.useOptionE() {
  1866  			break
  1867  		}
  1868  		i *= 8
  1869  		i += d
  1870  		c--
  1871  
  1872  		p.moveRight(1)
  1873  		if !p.rightMost() {
  1874  			d = int(p.rightChar(0) - '0')
  1875  		}
  1876  	}
  1877  
  1878  	// Octal codes only go up to 255.  Any larger and the behavior that Perl follows
  1879  	// is simply to truncate the high bits.
  1880  	i &= 0xFF
  1881  
  1882  	return rune(i)
  1883  }
  1884  
  1885  // Returns the current parsing position.
  1886  func (p *parser) textpos() int {
  1887  	return p.currentPos
  1888  }
  1889  
  1890  // Zaps to a specific parsing position.
  1891  func (p *parser) textto(pos int) {
  1892  	p.currentPos = pos
  1893  }
  1894  
  1895  // Returns the char at the right of the current parsing position and advances to the right.
  1896  func (p *parser) moveRightGetChar() rune {
  1897  	ch := p.pattern[p.currentPos]
  1898  	p.currentPos++
  1899  	return ch
  1900  }
  1901  
  1902  // Moves the current position to the right.
  1903  func (p *parser) moveRight(i int) {
  1904  	// default would be 1
  1905  	p.currentPos += i
  1906  }
  1907  
  1908  // Moves the current parsing position one to the left.
  1909  func (p *parser) moveLeft() {
  1910  	p.currentPos--
  1911  }
  1912  
  1913  // Returns the char left of the current parsing position.
  1914  func (p *parser) charAt(i int) rune {
  1915  	return p.pattern[i]
  1916  }
  1917  
  1918  // Returns the char i chars right of the current parsing position.
  1919  func (p *parser) rightChar(i int) rune {
  1920  	// default would be 0
  1921  	return p.pattern[p.currentPos+i]
  1922  }
  1923  
  1924  // Number of characters to the right of the current parsing position.
  1925  func (p *parser) charsRight() int {
  1926  	return len(p.pattern) - p.currentPos
  1927  }
  1928  
  1929  func (p *parser) rightMost() bool {
  1930  	return p.currentPos == len(p.pattern)
  1931  }
  1932  
  1933  // Looks up the slot number for a given name
  1934  func (p *parser) captureSlotFromName(capname string) int {
  1935  	return p.capnames[capname]
  1936  }
  1937  
  1938  // True if the capture slot was noted
  1939  func (p *parser) isCaptureSlot(i int) bool {
  1940  	if p.caps != nil {
  1941  		_, ok := p.caps[i]
  1942  		return ok
  1943  	}
  1944  
  1945  	return (i >= 0 && i < p.capsize)
  1946  }
  1947  
  1948  // Looks up the slot number for a given name
  1949  func (p *parser) isCaptureName(capname string) bool {
  1950  	if p.capnames == nil {
  1951  		return false
  1952  	}
  1953  
  1954  	_, ok := p.capnames[capname]
  1955  	return ok
  1956  }
  1957  
  1958  // option shortcuts
  1959  
  1960  // True if N option disabling '(' autocapture is on.
  1961  func (p *parser) useOptionN() bool {
  1962  	return (p.options & ExplicitCapture) != 0
  1963  }
  1964  
  1965  // True if I option enabling case-insensitivity is on.
  1966  func (p *parser) useOptionI() bool {
  1967  	return (p.options & IgnoreCase) != 0
  1968  }
  1969  
  1970  // True if M option altering meaning of $ and ^ is on.
  1971  func (p *parser) useOptionM() bool {
  1972  	return (p.options & Multiline) != 0
  1973  }
  1974  
  1975  // True if S option altering meaning of . is on.
  1976  func (p *parser) useOptionS() bool {
  1977  	return (p.options & Singleline) != 0
  1978  }
  1979  
  1980  // True if X option enabling whitespace/comment mode is on.
  1981  func (p *parser) useOptionX() bool {
  1982  	return (p.options & IgnorePatternWhitespace) != 0
  1983  }
  1984  
  1985  // True if E option enabling ECMAScript behavior on.
  1986  func (p *parser) useOptionE() bool {
  1987  	return (p.options & ECMAScript) != 0
  1988  }
  1989  
  1990  // true to use RE2 compatibility parsing behavior.
  1991  func (p *parser) useRE2() bool {
  1992  	return (p.options & RE2) != 0
  1993  }
  1994  
  1995  // True if U option enabling ECMAScript's Unicode behavior on.
  1996  func (p *parser) useOptionU() bool {
  1997  	return (p.options & Unicode) != 0
  1998  }
  1999  
  2000  // True if options stack is empty.
  2001  func (p *parser) emptyOptionsStack() bool {
  2002  	return len(p.optionsStack) == 0
  2003  }
  2004  
  2005  // Finish the current quantifiable (when a quantifier is not found or is not possible)
  2006  func (p *parser) addConcatenate() {
  2007  	// The first (| inside a Testgroup group goes directly to the group
  2008  	p.concatenation.addChild(p.unit)
  2009  	p.unit = nil
  2010  }
  2011  
  2012  // Finish the current quantifiable (when a quantifier is found)
  2013  func (p *parser) addConcatenate3(lazy bool, min, max int) {
  2014  	p.concatenation.addChild(p.unit.makeQuantifier(lazy, min, max))
  2015  	p.unit = nil
  2016  }
  2017  
  2018  // Sets the current unit to a single char node
  2019  func (p *parser) addUnitOne(ch rune) {
  2020  	if p.useOptionI() {
  2021  		ch = unicode.ToLower(ch)
  2022  	}
  2023  
  2024  	p.unit = newRegexNodeCh(ntOne, p.options, ch)
  2025  }
  2026  
  2027  // Sets the current unit to a single inverse-char node
  2028  func (p *parser) addUnitNotone(ch rune) {
  2029  	if p.useOptionI() {
  2030  		ch = unicode.ToLower(ch)
  2031  	}
  2032  
  2033  	p.unit = newRegexNodeCh(ntNotone, p.options, ch)
  2034  }
  2035  
  2036  // Sets the current unit to a single set node
  2037  func (p *parser) addUnitSet(set *CharSet) {
  2038  	p.unit = newRegexNodeSet(ntSet, p.options, set)
  2039  }
  2040  
  2041  // Sets the current unit to a subtree
  2042  func (p *parser) addUnitNode(node *regexNode) {
  2043  	p.unit = node
  2044  }
  2045  
  2046  // Sets the current unit to an assertion of the specified type
  2047  func (p *parser) addUnitType(t nodeType) {
  2048  	p.unit = newRegexNode(t, p.options)
  2049  }
  2050  
  2051  // Finish the current group (in response to a ')' or end)
  2052  func (p *parser) addGroup() error {
  2053  	if p.group.t == ntTestgroup || p.group.t == ntTestref {
  2054  		p.group.addChild(p.concatenation.reverseLeft())
  2055  		if (p.group.t == ntTestref && len(p.group.children) > 2) || len(p.group.children) > 3 {
  2056  			return p.getErr(ErrTooManyAlternates)
  2057  		}
  2058  	} else {
  2059  		p.alternation.addChild(p.concatenation.reverseLeft())
  2060  		p.group.addChild(p.alternation)
  2061  	}
  2062  
  2063  	p.unit = p.group
  2064  	return nil
  2065  }
  2066  
  2067  // Pops the option stack, but keeps the current options unchanged.
  2068  func (p *parser) popKeepOptions() {
  2069  	lastIdx := len(p.optionsStack) - 1
  2070  	p.optionsStack = p.optionsStack[:lastIdx]
  2071  }
  2072  
  2073  // Recalls options from the stack.
  2074  func (p *parser) popOptions() {
  2075  	lastIdx := len(p.optionsStack) - 1
  2076  	// get the last item on the stack and then remove it by reslicing
  2077  	p.options = p.optionsStack[lastIdx]
  2078  	p.optionsStack = p.optionsStack[:lastIdx]
  2079  }
  2080  
  2081  // Saves options on a stack.
  2082  func (p *parser) pushOptions() {
  2083  	p.optionsStack = append(p.optionsStack, p.options)
  2084  }
  2085  
  2086  // Add a string to the last concatenate.
  2087  func (p *parser) addToConcatenate(pos, cch int, isReplacement bool) {
  2088  	var node *regexNode
  2089  
  2090  	if cch == 0 {
  2091  		return
  2092  	}
  2093  
  2094  	if cch > 1 {
  2095  		str := make([]rune, cch)
  2096  		copy(str, p.pattern[pos:pos+cch])
  2097  
  2098  		if p.useOptionI() && !isReplacement {
  2099  			// We do the ToLower character by character for consistency.  With surrogate chars, doing
  2100  			// a ToLower on the entire string could actually change the surrogate pair.  This is more correct
  2101  			// linguistically, but since Regex doesn't support surrogates, it's more important to be
  2102  			// consistent.
  2103  			for i := 0; i < len(str); i++ {
  2104  				str[i] = unicode.ToLower(str[i])
  2105  			}
  2106  		}
  2107  
  2108  		node = newRegexNodeStr(ntMulti, p.options, str)
  2109  	} else {
  2110  		ch := p.charAt(pos)
  2111  
  2112  		if p.useOptionI() && !isReplacement {
  2113  			ch = unicode.ToLower(ch)
  2114  		}
  2115  
  2116  		node = newRegexNodeCh(ntOne, p.options, ch)
  2117  	}
  2118  
  2119  	p.concatenation.addChild(node)
  2120  }
  2121  
  2122  // Push the parser state (in response to an open paren)
  2123  func (p *parser) pushGroup() {
  2124  	p.group.next = p.stack
  2125  	p.alternation.next = p.group
  2126  	p.concatenation.next = p.alternation
  2127  	p.stack = p.concatenation
  2128  }
  2129  
  2130  // Remember the pushed state (in response to a ')')
  2131  func (p *parser) popGroup() error {
  2132  	p.concatenation = p.stack
  2133  	p.alternation = p.concatenation.next
  2134  	p.group = p.alternation.next
  2135  	p.stack = p.group.next
  2136  
  2137  	// The first () inside a Testgroup group goes directly to the group
  2138  	if p.group.t == ntTestgroup && len(p.group.children) == 0 {
  2139  		if p.unit == nil {
  2140  			return p.getErr(ErrConditionalExpression)
  2141  		}
  2142  
  2143  		p.group.addChild(p.unit)
  2144  		p.unit = nil
  2145  	}
  2146  	return nil
  2147  }
  2148  
  2149  // True if the group stack is empty.
  2150  func (p *parser) emptyStack() bool {
  2151  	return p.stack == nil
  2152  }
  2153  
  2154  // Start a new round for the parser state (in response to an open paren or string start)
  2155  func (p *parser) startGroup(openGroup *regexNode) {
  2156  	p.group = openGroup
  2157  	p.alternation = newRegexNode(ntAlternate, p.options)
  2158  	p.concatenation = newRegexNode(ntConcatenate, p.options)
  2159  }
  2160  
  2161  // Finish the current concatenation (in response to a |)
  2162  func (p *parser) addAlternate() {
  2163  	// The | parts inside a Testgroup group go directly to the group
  2164  
  2165  	if p.group.t == ntTestgroup || p.group.t == ntTestref {
  2166  		p.group.addChild(p.concatenation.reverseLeft())
  2167  	} else {
  2168  		p.alternation.addChild(p.concatenation.reverseLeft())
  2169  	}
  2170  
  2171  	p.concatenation = newRegexNode(ntConcatenate, p.options)
  2172  }
  2173  
  2174  // For categorizing ascii characters.
  2175  
  2176  const (
  2177  	Q byte = 5 // quantifier
  2178  	S      = 4 // ordinary stopper
  2179  	Z      = 3 // ScanBlank stopper
  2180  	X      = 2 // whitespace
  2181  	E      = 1 // should be escaped
  2182  )
  2183  
  2184  var _category = []byte{
  2185  	//01  2  3  4  5  6  7  8  9  A  B  C  D  E  F  0  1  2  3  4  5  6  7  8  9  A  B  C  D  E  F
  2186  	0, 0, 0, 0, 0, 0, 0, 0, 0, X, X, X, X, X, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  2187  	// !  "  #  $  %  &  '  (  )  *  +  ,  -  .  /  0  1  2  3  4  5  6  7  8  9  :  ;  <  =  >  ?
  2188  	X, 0, 0, Z, S, 0, 0, 0, S, S, Q, Q, 0, 0, S, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, Q,
  2189  	//@A  B  C  D  E  F  G  H  I  J  K  L  M  N  O  P  Q  R  S  T  U  V  W  X  Y  Z  [  \  ]  ^  _
  2190  	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, S, S, 0, S, 0,
  2191  	//'a  b  c  d  e  f  g  h  i  j  k  l  m  n  o  p  q  r  s  t  u  v  w  x  y  z  {  |  }  ~
  2192  	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, Q, S, 0, 0, 0,
  2193  }
  2194  
  2195  func isSpace(ch rune) bool {
  2196  	return (ch <= ' ' && _category[ch] == X)
  2197  }
  2198  
  2199  // Returns true for those characters that terminate a string of ordinary chars.
  2200  func isSpecial(ch rune) bool {
  2201  	return (ch <= '|' && _category[ch] >= S)
  2202  }
  2203  
  2204  // Returns true for those characters that terminate a string of ordinary chars.
  2205  func isStopperX(ch rune) bool {
  2206  	return (ch <= '|' && _category[ch] >= X)
  2207  }
  2208  
  2209  // Returns true for those characters that begin a quantifier.
  2210  func isQuantifier(ch rune) bool {
  2211  	return (ch <= '{' && _category[ch] >= Q)
  2212  }
  2213  
  2214  func (p *parser) isTrueQuantifier() bool {
  2215  	nChars := p.charsRight()
  2216  	if nChars == 0 {
  2217  		return false
  2218  	}
  2219  
  2220  	startpos := p.textpos()
  2221  	ch := p.charAt(startpos)
  2222  	if ch != '{' {
  2223  		return ch <= '{' && _category[ch] >= Q
  2224  	}
  2225  
  2226  	//UGLY: this is ugly -- the original code was ugly too
  2227  	pos := startpos
  2228  	for {
  2229  		nChars--
  2230  		if nChars <= 0 {
  2231  			break
  2232  		}
  2233  		pos++
  2234  		ch = p.charAt(pos)
  2235  		if ch < '0' || ch > '9' {
  2236  			break
  2237  		}
  2238  	}
  2239  
  2240  	if nChars == 0 || pos-startpos == 1 {
  2241  		return false
  2242  	}
  2243  	if ch == '}' {
  2244  		return true
  2245  	}
  2246  	if ch != ',' {
  2247  		return false
  2248  	}
  2249  	for {
  2250  		nChars--
  2251  		if nChars <= 0 {
  2252  			break
  2253  		}
  2254  		pos++
  2255  		ch = p.charAt(pos)
  2256  		if ch < '0' || ch > '9' {
  2257  			break
  2258  		}
  2259  	}
  2260  
  2261  	return nChars > 0 && ch == '}'
  2262  }
  2263  

View as plain text