1 package parser
2
3 import (
4 "regexp"
5 "testing"
6 )
7
8 func TestRegExp(t *testing.T) {
9 tt(t, func() {
10 {
11
12 test := func(input string, expect interface{}) {
13 _, err := TransformRegExp(input)
14 _, incompat := err.(RegexpErrorIncompatible)
15 is(incompat, false)
16 is(err, expect)
17 }
18
19 test("[", "Unterminated character class")
20
21 test("(", "Unterminated group")
22
23 test("\\(?=)", "Unmatched ')'")
24
25 test(")", "Unmatched ')'")
26 test("0:(?)", "Invalid group")
27 test("(?)", "Invalid group")
28 test("(?U)", "Invalid group")
29 test("(?)|(?i)", "Invalid group")
30 test("(?P<w>)(?P<w>)(?P<D>)", "Invalid group")
31 }
32
33 {
34
35 test := func(input string, expectErr interface{}) {
36 _, err := TransformRegExp(input)
37 _, incompat := err.(RegexpErrorIncompatible)
38 is(incompat, true)
39 is(err, expectErr)
40 }
41
42 test(`<%([\s\S]+?)%>`, "S in class")
43
44 test("(?<=y)x", "re2: Invalid (?<) <lookbehind>")
45
46 test(`(?!test)`, "re2: Invalid (?!) <lookahead>")
47
48 test(`\1`, "re2: Invalid \\1 <backreference>")
49
50 test(`\8`, "re2: Invalid \\8 <backreference>")
51
52 }
53
54 {
55
56 test := func(input string, expect string) {
57 result, err := TransformRegExp(input)
58 is(err, nil)
59 _, incompat := err.(RegexpErrorIncompatible)
60 is(incompat, false)
61 is(result, expect)
62 _, err = regexp.Compile(result)
63 is(err, nil)
64 }
65
66 test("", "")
67
68 test("abc", "abc")
69
70 test(`\abc`, `abc`)
71
72 test(`\a\b\c`, `a\bc`)
73
74 test(`\x`, `x`)
75
76 test(`\c`, `c`)
77
78 test(`\cA`, `\x01`)
79
80 test(`\cz`, `\x1a`)
81
82 test(`\ca`, `\x01`)
83
84 test(`\cj`, `\x0a`)
85
86 test(`\ck`, `\x0b`)
87
88 test(`\+`, `\+`)
89
90 test(`[\b]`, `[\x08]`)
91
92 test(`\u0z01\x\undefined`, `u0z01xundefined`)
93
94 test(`\\|'|\r|\n|\t|\u2028|\u2029`, `\\|'|\r|\n|\t|\x{2028}|\x{2029}`)
95
96 test("]", "]")
97
98 test("}", "}")
99
100 test("%", "%")
101
102 test("(%)", "(%)")
103
104 test("(?:[%\\s])", "(?:[%"+WhitespaceChars+"])")
105
106 test("[[]", "[[]")
107
108 test("\\101", "\\x41")
109
110 test("\\51", "\\x29")
111
112 test("\\051", "\\x29")
113
114 test("\\175", "\\x7d")
115
116 test("\\0", "\\0")
117
118 test("\\04", "\\x04")
119
120 test(`(.)^`, "("+Re2Dot+")^")
121
122 test(`\$`, `\$`)
123
124 test(`[G-b]`, `[G-b]`)
125
126 test(`[G-b\0]`, `[G-b\0]`)
127
128 test(`\k`, `k`)
129
130 test(`\x20`, `\x20`)
131
132 test(`😊`, `😊`)
133
134 test(`^.*`, `^`+Re2Dot+`*`)
135
136 test(`(\n)`, `(\n)`)
137
138 test(`(a(bc))`, `(a(bc))`)
139
140 test(`[]`, "[^\u0000-\U0001FFFF]")
141
142 test(`[^]`, "[\u0000-\U0001FFFF]")
143
144 test(`\s+`, "["+WhitespaceChars+"]+")
145
146 test(`\S+`, "[^"+WhitespaceChars+"]+")
147
148 }
149 })
150 }
151
152 func TestTransformRegExp(t *testing.T) {
153 tt(t, func() {
154 pattern, err := TransformRegExp(`\s+abc\s+`)
155 is(err, nil)
156 is(pattern, `[`+WhitespaceChars+`]+abc[`+WhitespaceChars+`]+`)
157 is(regexp.MustCompile(pattern).MatchString("\t abc def"), true)
158 })
159 tt(t, func() {
160 pattern, err := TransformRegExp(`\u{1d306}`)
161 is(err, nil)
162 is(pattern, `\x{1d306}`)
163 })
164 tt(t, func() {
165 pattern, err := TransformRegExp(`\u1234`)
166 is(err, nil)
167 is(pattern, `\x{1234}`)
168 })
169 }
170
171 func BenchmarkTransformRegExp(b *testing.B) {
172 f := func(reStr string, b *testing.B) {
173 b.ResetTimer()
174 b.ReportAllocs()
175 for i := 0; i < b.N; i++ {
176 _, _ = TransformRegExp(reStr)
177 }
178 }
179
180 b.Run("Re", func(b *testing.B) {
181 f(`^(([^<>()\[\]\\.,;:\s@"]+(\.[^<>()\[\]\\.,;:\s@"]+)*)|(".+"))@((\[[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}])|(([a-zA-Z\-0-9]+\.)+[a-zA-Z]{2,}))$`, b)
182 })
183
184 b.Run("Re2-1", func(b *testing.B) {
185 f(`(?=)^(([^<>()\[\]\\.,;:\s@"]+(\.[^<>()\[\]\\.,;:\s@"]+)*)|(".+"))@((\[[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}])|(([a-zA-Z\-0-9]+\.)+[a-zA-Z]{2,}))$`, b)
186 })
187
188 b.Run("Re2-1", func(b *testing.B) {
189 f(`^(([^<>()\[\]\\.,;:\s@"]+(\.[^<>()\[\]\\.,;:\s@"]+)*)|(".+"))@((\[[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}])|(([a-zA-Z\-0-9]+\.)+[a-zA-Z]{2,}))$(?=)`, b)
190 })
191 }
192
View as plain text