...

Source file src/github.com/dlclark/regexp2/match.go

Documentation: github.com/dlclark/regexp2

     1  package regexp2
     2  
     3  import (
     4  	"bytes"
     5  	"fmt"
     6  )
     7  
     8  // Match is a single regex result match that contains groups and repeated captures
     9  // 	-Groups
    10  //    -Capture
    11  type Match struct {
    12  	Group //embeded group 0
    13  
    14  	regex       *Regexp
    15  	otherGroups []Group
    16  
    17  	// input to the match
    18  	textpos   int
    19  	textstart int
    20  
    21  	capcount   int
    22  	caps       []int
    23  	sparseCaps map[int]int
    24  
    25  	// output from the match
    26  	matches    [][]int
    27  	matchcount []int
    28  
    29  	// whether we've done any balancing with this match.  If we
    30  	// have done balancing, we'll need to do extra work in Tidy().
    31  	balancing bool
    32  }
    33  
    34  // Group is an explicit or implit (group 0) matched group within the pattern
    35  type Group struct {
    36  	Capture // the last capture of this group is embeded for ease of use
    37  
    38  	Name     string    // group name
    39  	Captures []Capture // captures of this group
    40  }
    41  
    42  // Capture is a single capture of text within the larger original string
    43  type Capture struct {
    44  	// the original string
    45  	text []rune
    46  	// the position in the original string where the first character of
    47  	// captured substring was found.
    48  	Index int
    49  	// the length of the captured substring.
    50  	Length int
    51  }
    52  
    53  // String returns the captured text as a String
    54  func (c *Capture) String() string {
    55  	return string(c.text[c.Index : c.Index+c.Length])
    56  }
    57  
    58  // Runes returns the captured text as a rune slice
    59  func (c *Capture) Runes() []rune {
    60  	return c.text[c.Index : c.Index+c.Length]
    61  }
    62  
    63  func newMatch(regex *Regexp, capcount int, text []rune, startpos int) *Match {
    64  	m := Match{
    65  		regex:      regex,
    66  		matchcount: make([]int, capcount),
    67  		matches:    make([][]int, capcount),
    68  		textstart:  startpos,
    69  		balancing:  false,
    70  	}
    71  	m.Name = "0"
    72  	m.text = text
    73  	m.matches[0] = make([]int, 2)
    74  	return &m
    75  }
    76  
    77  func newMatchSparse(regex *Regexp, caps map[int]int, capcount int, text []rune, startpos int) *Match {
    78  	m := newMatch(regex, capcount, text, startpos)
    79  	m.sparseCaps = caps
    80  	return m
    81  }
    82  
    83  func (m *Match) reset(text []rune, textstart int) {
    84  	m.text = text
    85  	m.textstart = textstart
    86  	for i := 0; i < len(m.matchcount); i++ {
    87  		m.matchcount[i] = 0
    88  	}
    89  	m.balancing = false
    90  }
    91  
    92  func (m *Match) tidy(textpos int) {
    93  
    94  	interval := m.matches[0]
    95  	m.Index = interval[0]
    96  	m.Length = interval[1]
    97  	m.textpos = textpos
    98  	m.capcount = m.matchcount[0]
    99  	//copy our root capture to the list
   100  	m.Group.Captures = []Capture{m.Group.Capture}
   101  
   102  	if m.balancing {
   103  		// The idea here is that we want to compact all of our unbalanced captures.  To do that we
   104  		// use j basically as a count of how many unbalanced captures we have at any given time
   105  		// (really j is an index, but j/2 is the count).  First we skip past all of the real captures
   106  		// until we find a balance captures.  Then we check each subsequent entry.  If it's a balance
   107  		// capture (it's negative), we decrement j.  If it's a real capture, we increment j and copy
   108  		// it down to the last free position.
   109  		for cap := 0; cap < len(m.matchcount); cap++ {
   110  			limit := m.matchcount[cap] * 2
   111  			matcharray := m.matches[cap]
   112  
   113  			var i, j int
   114  
   115  			for i = 0; i < limit; i++ {
   116  				if matcharray[i] < 0 {
   117  					break
   118  				}
   119  			}
   120  
   121  			for j = i; i < limit; i++ {
   122  				if matcharray[i] < 0 {
   123  					// skip negative values
   124  					j--
   125  				} else {
   126  					// but if we find something positive (an actual capture), copy it back to the last
   127  					// unbalanced position.
   128  					if i != j {
   129  						matcharray[j] = matcharray[i]
   130  					}
   131  					j++
   132  				}
   133  			}
   134  
   135  			m.matchcount[cap] = j / 2
   136  		}
   137  
   138  		m.balancing = false
   139  	}
   140  }
   141  
   142  // isMatched tells if a group was matched by capnum
   143  func (m *Match) isMatched(cap int) bool {
   144  	return cap < len(m.matchcount) && m.matchcount[cap] > 0 && m.matches[cap][m.matchcount[cap]*2-1] != (-3+1)
   145  }
   146  
   147  // matchIndex returns the index of the last specified matched group by capnum
   148  func (m *Match) matchIndex(cap int) int {
   149  	i := m.matches[cap][m.matchcount[cap]*2-2]
   150  	if i >= 0 {
   151  		return i
   152  	}
   153  
   154  	return m.matches[cap][-3-i]
   155  }
   156  
   157  // matchLength returns the length of the last specified matched group by capnum
   158  func (m *Match) matchLength(cap int) int {
   159  	i := m.matches[cap][m.matchcount[cap]*2-1]
   160  	if i >= 0 {
   161  		return i
   162  	}
   163  
   164  	return m.matches[cap][-3-i]
   165  }
   166  
   167  // Nonpublic builder: add a capture to the group specified by "c"
   168  func (m *Match) addMatch(c, start, l int) {
   169  
   170  	if m.matches[c] == nil {
   171  		m.matches[c] = make([]int, 2)
   172  	}
   173  
   174  	capcount := m.matchcount[c]
   175  
   176  	if capcount*2+2 > len(m.matches[c]) {
   177  		oldmatches := m.matches[c]
   178  		newmatches := make([]int, capcount*8)
   179  		copy(newmatches, oldmatches[:capcount*2])
   180  		m.matches[c] = newmatches
   181  	}
   182  
   183  	m.matches[c][capcount*2] = start
   184  	m.matches[c][capcount*2+1] = l
   185  	m.matchcount[c] = capcount + 1
   186  	//log.Printf("addMatch: c=%v, i=%v, l=%v ... matches: %v", c, start, l, m.matches)
   187  }
   188  
   189  // Nonpublic builder: Add a capture to balance the specified group.  This is used by the
   190  //                     balanced match construct. (?<foo-foo2>...)
   191  //
   192  // If there were no such thing as backtracking, this would be as simple as calling RemoveMatch(c).
   193  // However, since we have backtracking, we need to keep track of everything.
   194  func (m *Match) balanceMatch(c int) {
   195  	m.balancing = true
   196  
   197  	// we'll look at the last capture first
   198  	capcount := m.matchcount[c]
   199  	target := capcount*2 - 2
   200  
   201  	// first see if it is negative, and therefore is a reference to the next available
   202  	// capture group for balancing.  If it is, we'll reset target to point to that capture.
   203  	if m.matches[c][target] < 0 {
   204  		target = -3 - m.matches[c][target]
   205  	}
   206  
   207  	// move back to the previous capture
   208  	target -= 2
   209  
   210  	// if the previous capture is a reference, just copy that reference to the end.  Otherwise, point to it.
   211  	if target >= 0 && m.matches[c][target] < 0 {
   212  		m.addMatch(c, m.matches[c][target], m.matches[c][target+1])
   213  	} else {
   214  		m.addMatch(c, -3-target, -4-target /* == -3 - (target + 1) */)
   215  	}
   216  }
   217  
   218  // Nonpublic builder: removes a group match by capnum
   219  func (m *Match) removeMatch(c int) {
   220  	m.matchcount[c]--
   221  }
   222  
   223  // GroupCount returns the number of groups this match has matched
   224  func (m *Match) GroupCount() int {
   225  	return len(m.matchcount)
   226  }
   227  
   228  // GroupByName returns a group based on the name of the group, or nil if the group name does not exist
   229  func (m *Match) GroupByName(name string) *Group {
   230  	num := m.regex.GroupNumberFromName(name)
   231  	if num < 0 {
   232  		return nil
   233  	}
   234  	return m.GroupByNumber(num)
   235  }
   236  
   237  // GroupByNumber returns a group based on the number of the group, or nil if the group number does not exist
   238  func (m *Match) GroupByNumber(num int) *Group {
   239  	// check our sparse map
   240  	if m.sparseCaps != nil {
   241  		if newNum, ok := m.sparseCaps[num]; ok {
   242  			num = newNum
   243  		}
   244  	}
   245  	if num >= len(m.matchcount) || num < 0 {
   246  		return nil
   247  	}
   248  
   249  	if num == 0 {
   250  		return &m.Group
   251  	}
   252  
   253  	m.populateOtherGroups()
   254  
   255  	return &m.otherGroups[num-1]
   256  }
   257  
   258  // Groups returns all the capture groups, starting with group 0 (the full match)
   259  func (m *Match) Groups() []Group {
   260  	m.populateOtherGroups()
   261  	g := make([]Group, len(m.otherGroups)+1)
   262  	g[0] = m.Group
   263  	copy(g[1:], m.otherGroups)
   264  	return g
   265  }
   266  
   267  func (m *Match) populateOtherGroups() {
   268  	// Construct all the Group objects first time called
   269  	if m.otherGroups == nil {
   270  		m.otherGroups = make([]Group, len(m.matchcount)-1)
   271  		for i := 0; i < len(m.otherGroups); i++ {
   272  			m.otherGroups[i] = newGroup(m.regex.GroupNameFromNumber(i+1), m.text, m.matches[i+1], m.matchcount[i+1])
   273  		}
   274  	}
   275  }
   276  
   277  func (m *Match) groupValueAppendToBuf(groupnum int, buf *bytes.Buffer) {
   278  	c := m.matchcount[groupnum]
   279  	if c == 0 {
   280  		return
   281  	}
   282  
   283  	matches := m.matches[groupnum]
   284  
   285  	index := matches[(c-1)*2]
   286  	last := index + matches[(c*2)-1]
   287  
   288  	for ; index < last; index++ {
   289  		buf.WriteRune(m.text[index])
   290  	}
   291  }
   292  
   293  func newGroup(name string, text []rune, caps []int, capcount int) Group {
   294  	g := Group{}
   295  	g.text = text
   296  	if capcount > 0 {
   297  		g.Index = caps[(capcount-1)*2]
   298  		g.Length = caps[(capcount*2)-1]
   299  	}
   300  	g.Name = name
   301  	g.Captures = make([]Capture, capcount)
   302  	for i := 0; i < capcount; i++ {
   303  		g.Captures[i] = Capture{
   304  			text:   text,
   305  			Index:  caps[i*2],
   306  			Length: caps[i*2+1],
   307  		}
   308  	}
   309  	//log.Printf("newGroup! capcount %v, %+v", capcount, g)
   310  
   311  	return g
   312  }
   313  
   314  func (m *Match) dump() string {
   315  	buf := &bytes.Buffer{}
   316  	buf.WriteRune('\n')
   317  	if len(m.sparseCaps) > 0 {
   318  		for k, v := range m.sparseCaps {
   319  			fmt.Fprintf(buf, "Slot %v -> %v\n", k, v)
   320  		}
   321  	}
   322  
   323  	for i, g := range m.Groups() {
   324  		fmt.Fprintf(buf, "Group %v (%v), %v caps:\n", i, g.Name, len(g.Captures))
   325  
   326  		for _, c := range g.Captures {
   327  			fmt.Fprintf(buf, "  (%v, %v) %v\n", c.Index, c.Length, c.String())
   328  		}
   329  	}
   330  	/*
   331  		for i := 0; i < len(m.matchcount); i++ {
   332  			fmt.Fprintf(buf, "\nGroup %v (%v):\n", i, m.regex.GroupNameFromNumber(i))
   333  
   334  			for j := 0; j < m.matchcount[i]; j++ {
   335  				text := ""
   336  
   337  				if m.matches[i][j*2] >= 0 {
   338  					start := m.matches[i][j*2]
   339  					text = m.text[start : start+m.matches[i][j*2+1]]
   340  				}
   341  
   342  				fmt.Fprintf(buf, "  (%v, %v) %v\n", m.matches[i][j*2], m.matches[i][j*2+1], text)
   343  			}
   344  		}
   345  	*/
   346  	return buf.String()
   347  }
   348  

View as plain text