1 package goja
2
3 import (
4 "fmt"
5 "github.com/dlclark/regexp2"
6 "github.com/dop251/goja/unistring"
7 "io"
8 "regexp"
9 "sort"
10 "strings"
11 "unicode/utf16"
12 )
13
14 type regexp2MatchCache struct {
15 target String
16 runes []rune
17 posMap []int
18 }
19
20
21 type regexp2Wrapper struct {
22 rx *regexp2.Regexp
23 cache *regexp2MatchCache
24 }
25
26 type regexpWrapper regexp.Regexp
27
28 type positionMapItem struct {
29 src, dst int
30 }
31 type positionMap []positionMapItem
32
33 func (m positionMap) get(src int) int {
34 if src <= 0 {
35 return src
36 }
37 res := sort.Search(len(m), func(n int) bool { return m[n].src >= src })
38 if res >= len(m) || m[res].src != src {
39 panic("index not found")
40 }
41 return m[res].dst
42 }
43
44 type arrayRuneReader struct {
45 runes []rune
46 pos int
47 }
48
49 func (rd *arrayRuneReader) ReadRune() (r rune, size int, err error) {
50 if rd.pos < len(rd.runes) {
51 r = rd.runes[rd.pos]
52 size = 1
53 rd.pos++
54 } else {
55 err = io.EOF
56 }
57 return
58 }
59
60
61 type regexpPattern struct {
62 src string
63
64 global, ignoreCase, multiline, sticky, unicode bool
65
66 regexpWrapper *regexpWrapper
67 regexp2Wrapper *regexp2Wrapper
68 }
69
70 func compileRegexp2(src string, multiline, ignoreCase bool) (*regexp2Wrapper, error) {
71 var opts regexp2.RegexOptions = regexp2.ECMAScript
72 if multiline {
73 opts |= regexp2.Multiline
74 }
75 if ignoreCase {
76 opts |= regexp2.IgnoreCase
77 }
78 regexp2Pattern, err1 := regexp2.Compile(src, opts)
79 if err1 != nil {
80 return nil, fmt.Errorf("Invalid regular expression (regexp2): %s (%v)", src, err1)
81 }
82
83 return ®exp2Wrapper{rx: regexp2Pattern}, nil
84 }
85
86 func (p *regexpPattern) createRegexp2() {
87 if p.regexp2Wrapper != nil {
88 return
89 }
90 rx, err := compileRegexp2(p.src, p.multiline, p.ignoreCase)
91 if err != nil {
92
93 panic(err)
94 }
95 p.regexp2Wrapper = rx
96 }
97
98 func buildUTF8PosMap(s unicodeString) (positionMap, string) {
99 pm := make(positionMap, 0, s.Length())
100 rd := s.Reader()
101 sPos, utf8Pos := 0, 0
102 var sb strings.Builder
103 for {
104 r, size, err := rd.ReadRune()
105 if err == io.EOF {
106 break
107 }
108 if err != nil {
109
110 return nil, ""
111 }
112 utf8Size, _ := sb.WriteRune(r)
113 sPos += size
114 utf8Pos += utf8Size
115 pm = append(pm, positionMapItem{src: utf8Pos, dst: sPos})
116 }
117 return pm, sb.String()
118 }
119
120 func (p *regexpPattern) findSubmatchIndex(s String, start int) []int {
121 if p.regexpWrapper == nil {
122 return p.regexp2Wrapper.findSubmatchIndex(s, start, p.unicode, p.global || p.sticky)
123 }
124 if start != 0 {
125
126
127
128 p.createRegexp2()
129 return p.regexp2Wrapper.findSubmatchIndex(s, start, p.unicode, p.global || p.sticky)
130 }
131 return p.regexpWrapper.findSubmatchIndex(s, p.unicode)
132 }
133
134 func (p *regexpPattern) findAllSubmatchIndex(s String, start int, limit int, sticky bool) [][]int {
135 if p.regexpWrapper == nil {
136 return p.regexp2Wrapper.findAllSubmatchIndex(s, start, limit, sticky, p.unicode)
137 }
138 if start == 0 {
139 a, u := devirtualizeString(s)
140 if u == nil {
141 return p.regexpWrapper.findAllSubmatchIndex(string(a), limit, sticky)
142 }
143 if limit == 1 {
144 result := p.regexpWrapper.findSubmatchIndexUnicode(u, p.unicode)
145 if result == nil {
146 return nil
147 }
148 return [][]int{result}
149 }
150
151
152 if p.unicode {
153
154 pm, str := buildUTF8PosMap(u)
155 if pm != nil {
156 res := p.regexpWrapper.findAllSubmatchIndex(str, limit, sticky)
157 for _, result := range res {
158 for i, idx := range result {
159 result[i] = pm.get(idx)
160 }
161 }
162 return res
163 }
164 }
165 }
166
167 p.createRegexp2()
168 return p.regexp2Wrapper.findAllSubmatchIndex(s, start, limit, sticky, p.unicode)
169 }
170
171
172 func (p *regexpPattern) clone() *regexpPattern {
173 ret := ®expPattern{
174 src: p.src,
175 global: p.global,
176 ignoreCase: p.ignoreCase,
177 multiline: p.multiline,
178 sticky: p.sticky,
179 unicode: p.unicode,
180 }
181 if p.regexpWrapper != nil {
182 ret.regexpWrapper = p.regexpWrapper.clone()
183 }
184 if p.regexp2Wrapper != nil {
185 ret.regexp2Wrapper = p.regexp2Wrapper.clone()
186 }
187 return ret
188 }
189
190 type regexpObject struct {
191 baseObject
192 pattern *regexpPattern
193 source String
194
195 standard bool
196 }
197
198 func (r *regexp2Wrapper) findSubmatchIndex(s String, start int, fullUnicode, doCache bool) (result []int) {
199 if fullUnicode {
200 return r.findSubmatchIndexUnicode(s, start, doCache)
201 }
202 return r.findSubmatchIndexUTF16(s, start, doCache)
203 }
204
205 func (r *regexp2Wrapper) findUTF16Cached(s String, start int, doCache bool) (match *regexp2.Match, runes []rune, err error) {
206 wrapped := r.rx
207 cache := r.cache
208 if cache != nil && cache.posMap == nil && cache.target.SameAs(s) {
209 runes = cache.runes
210 } else {
211 runes = s.utf16Runes()
212 cache = nil
213 }
214 match, err = wrapped.FindRunesMatchStartingAt(runes, start)
215 if doCache && match != nil && err == nil {
216 if cache == nil {
217 if r.cache == nil {
218 r.cache = new(regexp2MatchCache)
219 }
220 *r.cache = regexp2MatchCache{
221 target: s,
222 runes: runes,
223 }
224 }
225 } else {
226 r.cache = nil
227 }
228 return
229 }
230
231 func (r *regexp2Wrapper) findSubmatchIndexUTF16(s String, start int, doCache bool) (result []int) {
232 match, _, err := r.findUTF16Cached(s, start, doCache)
233 if err != nil {
234 return
235 }
236
237 if match == nil {
238 return
239 }
240 groups := match.Groups()
241
242 result = make([]int, 0, len(groups)<<1)
243 for _, group := range groups {
244 if len(group.Captures) > 0 {
245 result = append(result, group.Index, group.Index+group.Length)
246 } else {
247 result = append(result, -1, 0)
248 }
249 }
250 return
251 }
252
253 func (r *regexp2Wrapper) findUnicodeCached(s String, start int, doCache bool) (match *regexp2.Match, posMap []int, err error) {
254 var (
255 runes []rune
256 mappedStart int
257 splitPair bool
258 savedRune rune
259 )
260 wrapped := r.rx
261 cache := r.cache
262 if cache != nil && cache.posMap != nil && cache.target.SameAs(s) {
263 runes, posMap = cache.runes, cache.posMap
264 mappedStart, splitPair = posMapReverseLookup(posMap, start)
265 } else {
266 posMap, runes, mappedStart, splitPair = buildPosMap(&lenientUtf16Decoder{utf16Reader: s.utf16Reader()}, s.Length(), start)
267 cache = nil
268 }
269 if splitPair {
270
271 _, second := utf16.EncodeRune(runes[mappedStart])
272 savedRune, runes[mappedStart] = runes[mappedStart], second
273 }
274 match, err = wrapped.FindRunesMatchStartingAt(runes, mappedStart)
275 if doCache && match != nil && err == nil {
276 if splitPair {
277 runes[mappedStart] = savedRune
278 }
279 if cache == nil {
280 if r.cache == nil {
281 r.cache = new(regexp2MatchCache)
282 }
283 *r.cache = regexp2MatchCache{
284 target: s,
285 runes: runes,
286 posMap: posMap,
287 }
288 }
289 } else {
290 r.cache = nil
291 }
292
293 return
294 }
295
296 func (r *regexp2Wrapper) findSubmatchIndexUnicode(s String, start int, doCache bool) (result []int) {
297 match, posMap, err := r.findUnicodeCached(s, start, doCache)
298 if match == nil || err != nil {
299 return
300 }
301
302 groups := match.Groups()
303
304 result = make([]int, 0, len(groups)<<1)
305 for _, group := range groups {
306 if len(group.Captures) > 0 {
307 result = append(result, posMap[group.Index], posMap[group.Index+group.Length])
308 } else {
309 result = append(result, -1, 0)
310 }
311 }
312 return
313 }
314
315 func (r *regexp2Wrapper) findAllSubmatchIndexUTF16(s String, start, limit int, sticky bool) [][]int {
316 wrapped := r.rx
317 match, runes, err := r.findUTF16Cached(s, start, false)
318 if match == nil || err != nil {
319 return nil
320 }
321 if limit < 0 {
322 limit = len(runes) + 1
323 }
324 results := make([][]int, 0, limit)
325 for match != nil {
326 groups := match.Groups()
327
328 result := make([]int, 0, len(groups)<<1)
329
330 for _, group := range groups {
331 if len(group.Captures) > 0 {
332 startPos := group.Index
333 endPos := group.Index + group.Length
334 result = append(result, startPos, endPos)
335 } else {
336 result = append(result, -1, 0)
337 }
338 }
339
340 if sticky && len(result) > 1 {
341 if result[0] != start {
342 break
343 }
344 start = result[1]
345 }
346
347 results = append(results, result)
348 limit--
349 if limit <= 0 {
350 break
351 }
352 match, err = wrapped.FindNextMatch(match)
353 if err != nil {
354 return nil
355 }
356 }
357 return results
358 }
359
360 func buildPosMap(rd io.RuneReader, l, start int) (posMap []int, runes []rune, mappedStart int, splitPair bool) {
361 posMap = make([]int, 0, l+1)
362 curPos := 0
363 runes = make([]rune, 0, l)
364 startFound := false
365 for {
366 if !startFound {
367 if curPos == start {
368 mappedStart = len(runes)
369 startFound = true
370 }
371 if curPos > start {
372
373 mappedStart = len(runes) - 1
374 splitPair = true
375 startFound = true
376 }
377 }
378 rn, size, err := rd.ReadRune()
379 if err != nil {
380 break
381 }
382 runes = append(runes, rn)
383 posMap = append(posMap, curPos)
384 curPos += size
385 }
386 posMap = append(posMap, curPos)
387 return
388 }
389
390 func posMapReverseLookup(posMap []int, pos int) (int, bool) {
391 mapped := sort.SearchInts(posMap, pos)
392 if mapped < len(posMap) && posMap[mapped] != pos {
393 return mapped - 1, true
394 }
395 return mapped, false
396 }
397
398 func (r *regexp2Wrapper) findAllSubmatchIndexUnicode(s unicodeString, start, limit int, sticky bool) [][]int {
399 wrapped := r.rx
400 if limit < 0 {
401 limit = len(s) + 1
402 }
403 results := make([][]int, 0, limit)
404 match, posMap, err := r.findUnicodeCached(s, start, false)
405 if err != nil {
406 return nil
407 }
408 for match != nil {
409 groups := match.Groups()
410
411 result := make([]int, 0, len(groups)<<1)
412
413 for _, group := range groups {
414 if len(group.Captures) > 0 {
415 start := posMap[group.Index]
416 end := posMap[group.Index+group.Length]
417 result = append(result, start, end)
418 } else {
419 result = append(result, -1, 0)
420 }
421 }
422
423 if sticky && len(result) > 1 {
424 if result[0] != start {
425 break
426 }
427 start = result[1]
428 }
429
430 results = append(results, result)
431 match, err = wrapped.FindNextMatch(match)
432 if err != nil {
433 return nil
434 }
435 }
436 return results
437 }
438
439 func (r *regexp2Wrapper) findAllSubmatchIndex(s String, start, limit int, sticky, fullUnicode bool) [][]int {
440 a, u := devirtualizeString(s)
441 if u != nil {
442 if fullUnicode {
443 return r.findAllSubmatchIndexUnicode(u, start, limit, sticky)
444 }
445 return r.findAllSubmatchIndexUTF16(u, start, limit, sticky)
446 }
447 return r.findAllSubmatchIndexUTF16(a, start, limit, sticky)
448 }
449
450 func (r *regexp2Wrapper) clone() *regexp2Wrapper {
451 return ®exp2Wrapper{
452 rx: r.rx,
453 }
454 }
455
456 func (r *regexpWrapper) findAllSubmatchIndex(s string, limit int, sticky bool) (results [][]int) {
457 wrapped := (*regexp.Regexp)(r)
458 results = wrapped.FindAllStringSubmatchIndex(s, limit)
459 pos := 0
460 if sticky {
461 for i, result := range results {
462 if len(result) > 1 {
463 if result[0] != pos {
464 return results[:i]
465 }
466 pos = result[1]
467 }
468 }
469 }
470 return
471 }
472
473 func (r *regexpWrapper) findSubmatchIndex(s String, fullUnicode bool) []int {
474 a, u := devirtualizeString(s)
475 if u != nil {
476 return r.findSubmatchIndexUnicode(u, fullUnicode)
477 }
478 return r.findSubmatchIndexASCII(string(a))
479 }
480
481 func (r *regexpWrapper) findSubmatchIndexASCII(s string) []int {
482 wrapped := (*regexp.Regexp)(r)
483 return wrapped.FindStringSubmatchIndex(s)
484 }
485
486 func (r *regexpWrapper) findSubmatchIndexUnicode(s unicodeString, fullUnicode bool) (result []int) {
487 wrapped := (*regexp.Regexp)(r)
488 if fullUnicode {
489 posMap, runes, _, _ := buildPosMap(&lenientUtf16Decoder{utf16Reader: s.utf16Reader()}, s.Length(), 0)
490 res := wrapped.FindReaderSubmatchIndex(&arrayRuneReader{runes: runes})
491 for i, item := range res {
492 if item >= 0 {
493 res[i] = posMap[item]
494 }
495 }
496 return res
497 }
498 return wrapped.FindReaderSubmatchIndex(s.utf16RuneReader())
499 }
500
501 func (r *regexpWrapper) clone() *regexpWrapper {
502 return r
503 }
504
505 func (r *regexpObject) execResultToArray(target String, result []int) Value {
506 captureCount := len(result) >> 1
507 valueArray := make([]Value, captureCount)
508 matchIndex := result[0]
509 valueArray[0] = target.Substring(result[0], result[1])
510 lowerBound := 0
511 for index := 1; index < captureCount; index++ {
512 offset := index << 1
513 if result[offset] >= 0 && result[offset+1] >= lowerBound {
514 valueArray[index] = target.Substring(result[offset], result[offset+1])
515 lowerBound = result[offset]
516 } else {
517 valueArray[index] = _undefined
518 }
519 }
520 match := r.val.runtime.newArrayValues(valueArray)
521 match.self.setOwnStr("input", target, false)
522 match.self.setOwnStr("index", intToValue(int64(matchIndex)), false)
523 return match
524 }
525
526 func (r *regexpObject) getLastIndex() int64 {
527 lastIndex := toLength(r.getStr("lastIndex", nil))
528 if !r.pattern.global && !r.pattern.sticky {
529 return 0
530 }
531 return lastIndex
532 }
533
534 func (r *regexpObject) updateLastIndex(index int64, firstResult, lastResult []int) bool {
535 if r.pattern.sticky {
536 if firstResult == nil || int64(firstResult[0]) != index {
537 r.setOwnStr("lastIndex", intToValue(0), true)
538 return false
539 }
540 } else {
541 if firstResult == nil {
542 if r.pattern.global {
543 r.setOwnStr("lastIndex", intToValue(0), true)
544 }
545 return false
546 }
547 }
548
549 if r.pattern.global || r.pattern.sticky {
550 r.setOwnStr("lastIndex", intToValue(int64(lastResult[1])), true)
551 }
552 return true
553 }
554
555 func (r *regexpObject) execRegexp(target String) (match bool, result []int) {
556 index := r.getLastIndex()
557 if index >= 0 && index <= int64(target.Length()) {
558 result = r.pattern.findSubmatchIndex(target, int(index))
559 }
560 match = r.updateLastIndex(index, result, result)
561 return
562 }
563
564 func (r *regexpObject) exec(target String) Value {
565 match, result := r.execRegexp(target)
566 if match {
567 return r.execResultToArray(target, result)
568 }
569 return _null
570 }
571
572 func (r *regexpObject) test(target String) bool {
573 match, _ := r.execRegexp(target)
574 return match
575 }
576
577 func (r *regexpObject) clone() *regexpObject {
578 r1 := r.val.runtime.newRegexpObject(r.prototype)
579 r1.source = r.source
580 r1.pattern = r.pattern
581
582 return r1
583 }
584
585 func (r *regexpObject) init() {
586 r.baseObject.init()
587 r.standard = true
588 r._putProp("lastIndex", intToValue(0), true, false, false)
589 }
590
591 func (r *regexpObject) setProto(proto *Object, throw bool) bool {
592 res := r.baseObject.setProto(proto, throw)
593 if res {
594 r.standard = false
595 }
596 return res
597 }
598
599 func (r *regexpObject) defineOwnPropertyStr(name unistring.String, desc PropertyDescriptor, throw bool) bool {
600 res := r.baseObject.defineOwnPropertyStr(name, desc, throw)
601 if res {
602 r.standard = false
603 }
604 return res
605 }
606
607 func (r *regexpObject) defineOwnPropertySym(name *Symbol, desc PropertyDescriptor, throw bool) bool {
608 res := r.baseObject.defineOwnPropertySym(name, desc, throw)
609 if res && r.standard {
610 switch name {
611 case SymMatch, SymMatchAll, SymSearch, SymSplit, SymReplace:
612 r.standard = false
613 }
614 }
615 return res
616 }
617
618 func (r *regexpObject) deleteStr(name unistring.String, throw bool) bool {
619 res := r.baseObject.deleteStr(name, throw)
620 if res {
621 r.standard = false
622 }
623 return res
624 }
625
626 func (r *regexpObject) setOwnStr(name unistring.String, value Value, throw bool) bool {
627 res := r.baseObject.setOwnStr(name, value, throw)
628 if res && r.standard && name == "exec" {
629 r.standard = false
630 }
631 return res
632 }
633
634 func (r *regexpObject) setOwnSym(name *Symbol, value Value, throw bool) bool {
635 res := r.baseObject.setOwnSym(name, value, throw)
636 if res && r.standard {
637 switch name {
638 case SymMatch, SymMatchAll, SymSearch, SymSplit, SymReplace:
639 r.standard = false
640 }
641 }
642 return res
643 }
644
View as plain text