1 package regexp2
2
3 import (
4 "bufio"
5 "bytes"
6 "fmt"
7 "log"
8 "os"
9 "regexp"
10 "strconv"
11 "strings"
12 "testing"
13 "time"
14 )
15
16
17 var totalCount, failCount = 0, 0
18
19 func TestPcre_Basics(t *testing.T) {
20 defer func() {
21 if failCount > 0 {
22 t.Logf("%v of %v patterns failed", failCount, totalCount)
23 }
24 }()
25
26
27 file, err := os.Open("testoutput1")
28 if err != nil {
29 log.Fatal(err)
30 }
31 defer file.Close()
32
33
34
35
36
37
38
39
40
41
42
43
44 scanner := bufio.NewScanner(file)
45
46 for scanner.Scan() {
47
48 line := scanner.Text()
49
50 if trim := strings.TrimSpace(line); trim == "" || strings.HasPrefix(trim, "#") {
51
52 continue
53 }
54
55 patternStart := line[0]
56 if patternStart != '/' && patternStart != '"' {
57
58 t.Fatalf("Unknown file format, expected line to start with '/' or '\"', line in: %v", line)
59 }
60
61
62 pattern := line
63 totalCount++
64
65
66
67
68 allowFirst := false
69 for !containsEnder(line, patternStart, allowFirst) {
70 if !scanner.Scan() {
71
72 t.Fatalf("Unknown file format, expected more pattern text, but got EOF, pattern so far: %v", pattern)
73 }
74 line = scanner.Text()
75 pattern += fmt.Sprintf("\n%s", line)
76 allowFirst = true
77 }
78
79
80 re := compileRawPattern(t, pattern)
81
82 var (
83 capsIdx map[int]int
84 m *Match
85 toMatch string
86 )
87
88
89
90 for scanner.Scan() {
91 line = scanner.Text()
92
93
94 if strings.TrimSpace(line) == "" {
95 break
96 }
97
98
99 if strings.HasPrefix(line, "\\= Expect") {
100 continue
101 } else if strings.HasPrefix(line, " ") {
102
103 toMatch = line[4:]
104
105 toMatch = strings.TrimRight(toMatch, " ")
106
107 m = matchString(t, re, toMatch)
108
109 capsIdx = make(map[int]int)
110 continue
111
112 } else if strings.HasPrefix(line, "No match") {
113 validateNoMatch(t, re, m)
114
115 continue
116 } else if subs := matchGroup.FindStringSubmatch(line); len(subs) == 3 {
117 gIdx, _ := strconv.Atoi(subs[1])
118 if _, ok := capsIdx[gIdx]; !ok {
119 capsIdx[gIdx] = 0
120 }
121 validateMatch(t, re, m, toMatch, subs[2], gIdx, capsIdx[gIdx])
122 capsIdx[gIdx]++
123 continue
124 } else {
125
126 t.Fatalf("Unknown file format, expected match or match group but got '%v'", line)
127 }
128 }
129
130 }
131
132 if err := scanner.Err(); err != nil {
133 log.Fatal(err)
134 }
135 }
136
137 var matchGroup = regexp.MustCompile(`^\s*(\d+): (.*)`)
138
139 func problem(t *testing.T, input string, args ...interface{}) {
140 failCount++
141 t.Errorf(input, args...)
142 }
143
144 func validateNoMatch(t *testing.T, re *Regexp, m *Match) {
145 if re == nil || m == nil {
146 return
147 }
148
149 problem(t, "Expected no match for pattern '%v', but got '%v'", re.pattern, m.String())
150 }
151
152 func validateMatch(t *testing.T, re *Regexp, m *Match, toMatch, value string, idx, capIdx int) {
153 if re == nil {
154
155 return
156 }
157
158 if m == nil {
159
160 problem(t, "Expected match for pattern '%v' with input '%v', but got no match", re.pattern, toMatch)
161 return
162 }
163
164 g := m.Groups()
165 if len(g) <= idx {
166 problem(t, "Expected group %v does not exist in pattern '%v' with input '%v'", idx, re.pattern, toMatch)
167 return
168 }
169
170 if value == "<unset>" {
171
172 if len(g[idx].Captures) > 0 {
173 problem(t, "Expected no cap %v in group %v in pattern '%v' with input '%v'", g[idx].Captures[capIdx].String(), idx, re.pattern, toMatch)
174 }
175
176 return
177 }
178
179 if len(g[idx].Captures) <= capIdx {
180 problem(t, "Expected cap %v does not exist in group %v in pattern '%v' with input '%v'", capIdx, idx, re.pattern, toMatch)
181 return
182 }
183
184 escp := unEscapeGroup(g[idx].String())
185
186 if escp != value {
187 problem(t, "Expected '%v' but got '%v' for cap %v, group %v for pattern '%v' with input '%v'", value, escp, capIdx, idx, re.pattern, toMatch)
188 return
189 }
190 }
191
192 func compileRawPattern(t *testing.T, pattern string) *Regexp {
193
194 index := strings.LastIndexAny(pattern, "/\"")
195
196
197
198 var opts RegexOptions
199
200 if index+1 < len(pattern) {
201 textOptions := pattern[index+1:]
202 pattern = pattern[:index+1]
203
204 for _, textOpt := range strings.Split(textOptions, ",") {
205 switch textOpt {
206 case "dupnames":
207
208 default:
209 if strings.Contains(textOpt, "i") {
210 opts |= IgnoreCase
211 }
212 if strings.Contains(textOpt, "s") {
213 opts |= Singleline
214 }
215 if strings.Contains(textOpt, "m") {
216 opts |= Multiline
217 }
218 if strings.Contains(textOpt, "x") {
219 opts |= IgnorePatternWhitespace
220 }
221 }
222 }
223
224 }
225
226
227 pattern = pattern[1 : len(pattern)-1]
228
229 defer func() {
230 if rec := recover(); rec != nil {
231 problem(t, "PANIC in compiling \"%v\": %v", pattern, rec)
232 }
233 }()
234 re, err := Compile(pattern, opts)
235 if err != nil {
236 problem(t, "Error parsing \"%v\": %v", pattern, err)
237 }
238 return re
239 }
240
241 func matchString(t *testing.T, re *Regexp, toMatch string) *Match {
242 if re == nil {
243 return nil
244 }
245
246 re.MatchTimeout = time.Second * 1
247
248 escp := ""
249 var err error
250 if toMatch != "\\" {
251 escp = unEscapeToMatch(toMatch)
252 }
253 m, err := re.FindStringMatch(escp)
254 if err != nil {
255 problem(t, "Error matching \"%v\" in pattern \"%v\": %v", toMatch, re.pattern, err)
256 }
257 return m
258 }
259
260 func containsEnder(line string, ender byte, allowFirst bool) bool {
261 index := strings.LastIndexByte(line, ender)
262 if index > 0 {
263 return true
264 } else if index == 0 && allowFirst {
265 return true
266 }
267 return false
268 }
269
270 func unEscapeToMatch(line string) string {
271 idx := strings.IndexRune(line, '\\')
272
273 if idx == -1 {
274 return line
275 }
276
277 buf := bytes.NewBufferString(line[:idx])
278
279
280 inEscape := false
281
282 for i := idx; i < len(line); i++ {
283 ch := line[i]
284 if ch == '\\' {
285 if inEscape {
286 buf.WriteByte(ch)
287 }
288 inEscape = !inEscape
289 continue
290 }
291 if inEscape {
292 switch ch {
293 case 'x':
294 buf.WriteByte(scanHex(line, &i))
295 case 'a':
296 buf.WriteByte(0x07)
297 case 'b':
298 buf.WriteByte('\b')
299 case 'e':
300 buf.WriteByte(0x1b)
301 case 'f':
302 buf.WriteByte('\f')
303 case 'n':
304 buf.WriteByte('\n')
305 case 'r':
306 buf.WriteByte('\r')
307 case 't':
308 buf.WriteByte('\t')
309 case 'v':
310 buf.WriteByte(0x0b)
311 default:
312 if ch >= '0' && ch <= '7' {
313 buf.WriteByte(scanOctal(line, &i))
314 } else {
315 buf.WriteByte(ch)
316
317 }
318 }
319 inEscape = false
320 } else {
321 buf.WriteByte(ch)
322 }
323 }
324
325 return buf.String()
326 }
327
328 func unEscapeGroup(val string) string {
329
330 buf := &bytes.Buffer{}
331
332 for i := 0; i < len(val); i++ {
333 ch := val[i]
334 if ch <= 0x1f || ch >= 0x7f {
335
336 fmt.Fprintf(buf, "\\x%.2x", ch)
337 } else {
338
339 buf.WriteByte(ch)
340 }
341 }
342
343 return buf.String()
344 }
345
346 func scanHex(line string, idx *int) byte {
347 if *idx >= len(line)-2 {
348 panic(fmt.Sprintf("not enough hex chars in %v at %v", line, *idx))
349 }
350 (*idx)++
351 d1 := hexDigit(line[*idx])
352 (*idx)++
353 d2 := hexDigit(line[*idx])
354 if d1 < 0 || d2 < 0 {
355 panic("bad hex chars")
356 }
357
358 return byte(d1*0x10 + d2)
359 }
360
361
362 func hexDigit(ch byte) int {
363
364 if d := uint(ch - '0'); d <= 9 {
365 return int(d)
366 }
367
368 if d := uint(ch - 'a'); d <= 5 {
369 return int(d + 0xa)
370 }
371
372 if d := uint(ch - 'A'); d <= 5 {
373 return int(d + 0xa)
374 }
375
376 return -1
377 }
378
379
380 func scanOctal(line string, idx *int) byte {
381
382
383
384 c := 3
385
386 if diff := len(line) - *idx; c > diff {
387 c = diff
388 }
389
390 i := 0
391 d := int(line[*idx] - '0')
392 for c > 0 && d <= 7 {
393 i *= 8
394 i += d
395
396 c--
397 (*idx)++
398 if *idx < len(line) {
399 d = int(line[*idx] - '0')
400 }
401 }
402 (*idx)--
403
404
405
406 i &= 0xFF
407
408 return byte(i)
409 }
410
View as plain text