1 package goja
2
3 import (
4 "fmt"
5 "github.com/dop251/goja/parser"
6 "regexp"
7 "strings"
8 "unicode/utf16"
9 "unicode/utf8"
10 )
11
12 func (r *Runtime) newRegexpObject(proto *Object) *regexpObject {
13 v := &Object{runtime: r}
14
15 o := ®expObject{}
16 o.class = classRegExp
17 o.val = v
18 o.extensible = true
19 v.self = o
20 o.prototype = proto
21 o.init()
22 return o
23 }
24
25 func (r *Runtime) newRegExpp(pattern *regexpPattern, patternStr String, proto *Object) *regexpObject {
26 o := r.newRegexpObject(proto)
27
28 o.pattern = pattern
29 o.source = patternStr
30
31 return o
32 }
33
34 func decodeHex(s string) (int, bool) {
35 var hex int
36 for i := 0; i < len(s); i++ {
37 var n byte
38 chr := s[i]
39 switch {
40 case '0' <= chr && chr <= '9':
41 n = chr - '0'
42 case 'a' <= chr && chr <= 'f':
43 n = chr - 'a' + 10
44 case 'A' <= chr && chr <= 'F':
45 n = chr - 'A' + 10
46 default:
47 return 0, false
48 }
49 hex = hex*16 + int(n)
50 }
51 return hex, true
52 }
53
54 func writeHex4(b *strings.Builder, i int) {
55 b.WriteByte(hex[i>>12])
56 b.WriteByte(hex[(i>>8)&0xF])
57 b.WriteByte(hex[(i>>4)&0xF])
58 b.WriteByte(hex[i&0xF])
59 }
60
61
62 func convertRegexpToUnicode(patternStr string) string {
63 var sb strings.Builder
64 pos := 0
65 for i := 0; i < len(patternStr)-11; {
66 r, size := utf8.DecodeRuneInString(patternStr[i:])
67 if r == '\\' {
68 i++
69 if patternStr[i] == 'u' && patternStr[i+5] == '\\' && patternStr[i+6] == 'u' {
70 if first, ok := decodeHex(patternStr[i+1 : i+5]); ok {
71 if isUTF16FirstSurrogate(uint16(first)) {
72 if second, ok := decodeHex(patternStr[i+7 : i+11]); ok {
73 if isUTF16SecondSurrogate(uint16(second)) {
74 r = utf16.DecodeRune(rune(first), rune(second))
75 sb.WriteString(patternStr[pos : i-1])
76 sb.WriteRune(r)
77 i += 11
78 pos = i
79 continue
80 }
81 }
82 }
83 }
84 }
85 i++
86 } else {
87 i += size
88 }
89 }
90 if pos > 0 {
91 sb.WriteString(patternStr[pos:])
92 return sb.String()
93 }
94 return patternStr
95 }
96
97
98 func convertRegexpToUtf16(patternStr string) string {
99 var sb strings.Builder
100 pos := 0
101 var prevRune rune
102 for i := 0; i < len(patternStr); {
103 r, size := utf8.DecodeRuneInString(patternStr[i:])
104 if r > 0xFFFF {
105 sb.WriteString(patternStr[pos:i])
106 if prevRune == '\\' {
107 sb.WriteRune('\\')
108 }
109 first, second := utf16.EncodeRune(r)
110 sb.WriteString(`\u`)
111 writeHex4(&sb, int(first))
112 sb.WriteString(`\u`)
113 writeHex4(&sb, int(second))
114 pos = i + size
115 }
116 i += size
117 prevRune = r
118 }
119 if pos > 0 {
120 sb.WriteString(patternStr[pos:])
121 return sb.String()
122 }
123 return patternStr
124 }
125
126
127 func escapeInvalidUtf16(s String) string {
128 if imported, ok := s.(*importedString); ok {
129 return imported.s
130 }
131 if ascii, ok := s.(asciiString); ok {
132 return ascii.String()
133 }
134 var sb strings.Builder
135 rd := &lenientUtf16Decoder{utf16Reader: s.utf16Reader()}
136 pos := 0
137 utf8Size := 0
138 var utf8Buf [utf8.UTFMax]byte
139 for {
140 c, size, err := rd.ReadRune()
141 if err != nil {
142 break
143 }
144 if utf16.IsSurrogate(c) {
145 if sb.Len() == 0 {
146 sb.Grow(utf8Size + 7)
147 hrd := s.Reader()
148 var c rune
149 for p := 0; p < pos; {
150 var size int
151 var err error
152 c, size, err = hrd.ReadRune()
153 if err != nil {
154
155 panic(fmt.Errorf("error while reading string head %q, pos: %d: %w", s.String(), pos, err))
156 }
157 sb.WriteRune(c)
158 p += size
159 }
160 if c == '\\' {
161 sb.WriteRune(c)
162 }
163 }
164 sb.WriteString(`\u`)
165 writeHex4(&sb, int(c))
166 } else {
167 if sb.Len() > 0 {
168 sb.WriteRune(c)
169 } else {
170 utf8Size += utf8.EncodeRune(utf8Buf[:], c)
171 pos += size
172 }
173 }
174 }
175 if sb.Len() > 0 {
176 return sb.String()
177 }
178 return s.String()
179 }
180
181 func compileRegexpFromValueString(patternStr String, flags string) (*regexpPattern, error) {
182 return compileRegexp(escapeInvalidUtf16(patternStr), flags)
183 }
184
185 func compileRegexp(patternStr, flags string) (p *regexpPattern, err error) {
186 var global, ignoreCase, multiline, sticky, unicode bool
187 var wrapper *regexpWrapper
188 var wrapper2 *regexp2Wrapper
189
190 if flags != "" {
191 invalidFlags := func() {
192 err = fmt.Errorf("Invalid flags supplied to RegExp constructor '%s'", flags)
193 }
194 for _, chr := range flags {
195 switch chr {
196 case 'g':
197 if global {
198 invalidFlags()
199 return
200 }
201 global = true
202 case 'm':
203 if multiline {
204 invalidFlags()
205 return
206 }
207 multiline = true
208 case 'i':
209 if ignoreCase {
210 invalidFlags()
211 return
212 }
213 ignoreCase = true
214 case 'y':
215 if sticky {
216 invalidFlags()
217 return
218 }
219 sticky = true
220 case 'u':
221 if unicode {
222 invalidFlags()
223 }
224 unicode = true
225 default:
226 invalidFlags()
227 return
228 }
229 }
230 }
231
232 if unicode {
233 patternStr = convertRegexpToUnicode(patternStr)
234 } else {
235 patternStr = convertRegexpToUtf16(patternStr)
236 }
237
238 re2Str, err1 := parser.TransformRegExp(patternStr)
239 if err1 == nil {
240 re2flags := ""
241 if multiline {
242 re2flags += "m"
243 }
244 if ignoreCase {
245 re2flags += "i"
246 }
247 if len(re2flags) > 0 {
248 re2Str = fmt.Sprintf("(?%s:%s)", re2flags, re2Str)
249 }
250
251 pattern, err1 := regexp.Compile(re2Str)
252 if err1 != nil {
253 err = fmt.Errorf("Invalid regular expression (re2): %s (%v)", re2Str, err1)
254 return
255 }
256 wrapper = (*regexpWrapper)(pattern)
257 } else {
258 if _, incompat := err1.(parser.RegexpErrorIncompatible); !incompat {
259 err = err1
260 return
261 }
262 wrapper2, err = compileRegexp2(patternStr, multiline, ignoreCase)
263 if err != nil {
264 err = fmt.Errorf("Invalid regular expression (regexp2): %s (%v)", patternStr, err)
265 return
266 }
267 }
268
269 p = ®expPattern{
270 src: patternStr,
271 regexpWrapper: wrapper,
272 regexp2Wrapper: wrapper2,
273 global: global,
274 ignoreCase: ignoreCase,
275 multiline: multiline,
276 sticky: sticky,
277 unicode: unicode,
278 }
279 return
280 }
281
282 func (r *Runtime) _newRegExp(patternStr String, flags string, proto *Object) *regexpObject {
283 pattern, err := compileRegexpFromValueString(patternStr, flags)
284 if err != nil {
285 panic(r.newSyntaxError(err.Error(), -1))
286 }
287 return r.newRegExpp(pattern, patternStr, proto)
288 }
289
290 func (r *Runtime) builtin_newRegExp(args []Value, proto *Object) *Object {
291 var patternVal, flagsVal Value
292 if len(args) > 0 {
293 patternVal = args[0]
294 }
295 if len(args) > 1 {
296 flagsVal = args[1]
297 }
298 return r.newRegExp(patternVal, flagsVal, proto).val
299 }
300
301 func (r *Runtime) newRegExp(patternVal, flagsVal Value, proto *Object) *regexpObject {
302 var pattern String
303 var flags string
304 if isRegexp(patternVal) {
305 if obj, ok := patternVal.(*Object); ok {
306 if rx, ok := obj.self.(*regexpObject); ok {
307 if flagsVal == nil || flagsVal == _undefined {
308 return rx.clone()
309 } else {
310 return r._newRegExp(rx.source, flagsVal.toString().String(), proto)
311 }
312 } else {
313 pattern = nilSafe(obj.self.getStr("source", nil)).toString()
314 if flagsVal == nil || flagsVal == _undefined {
315 flags = nilSafe(obj.self.getStr("flags", nil)).toString().String()
316 } else {
317 flags = flagsVal.toString().String()
318 }
319 goto exit
320 }
321 }
322 }
323
324 if patternVal != nil && patternVal != _undefined {
325 pattern = patternVal.toString()
326 }
327 if flagsVal != nil && flagsVal != _undefined {
328 flags = flagsVal.toString().String()
329 }
330
331 if pattern == nil {
332 pattern = stringEmpty
333 }
334 exit:
335 return r._newRegExp(pattern, flags, proto)
336 }
337
338 func (r *Runtime) builtin_RegExp(call FunctionCall) Value {
339 pattern := call.Argument(0)
340 patternIsRegExp := isRegexp(pattern)
341 flags := call.Argument(1)
342 if patternIsRegExp && flags == _undefined {
343 if obj, ok := call.Argument(0).(*Object); ok {
344 patternConstructor := obj.self.getStr("constructor", nil)
345 if patternConstructor == r.global.RegExp {
346 return pattern
347 }
348 }
349 }
350 return r.newRegExp(pattern, flags, r.getRegExpPrototype()).val
351 }
352
353 func (r *Runtime) regexpproto_compile(call FunctionCall) Value {
354 if this, ok := r.toObject(call.This).self.(*regexpObject); ok {
355 var (
356 pattern *regexpPattern
357 source String
358 flags string
359 err error
360 )
361 patternVal := call.Argument(0)
362 flagsVal := call.Argument(1)
363 if o, ok := patternVal.(*Object); ok {
364 if p, ok := o.self.(*regexpObject); ok {
365 if flagsVal != _undefined {
366 panic(r.NewTypeError("Cannot supply flags when constructing one RegExp from another"))
367 }
368 this.pattern = p.pattern
369 this.source = p.source
370 goto exit
371 }
372 }
373 if patternVal != _undefined {
374 source = patternVal.toString()
375 } else {
376 source = stringEmpty
377 }
378 if flagsVal != _undefined {
379 flags = flagsVal.toString().String()
380 }
381 pattern, err = compileRegexpFromValueString(source, flags)
382 if err != nil {
383 panic(r.newSyntaxError(err.Error(), -1))
384 }
385 this.pattern = pattern
386 this.source = source
387 exit:
388 this.setOwnStr("lastIndex", intToValue(0), true)
389 return call.This
390 }
391
392 panic(r.NewTypeError("Method RegExp.prototype.compile called on incompatible receiver %s", r.objectproto_toString(FunctionCall{This: call.This})))
393 }
394
395 func (r *Runtime) regexpproto_exec(call FunctionCall) Value {
396 if this, ok := r.toObject(call.This).self.(*regexpObject); ok {
397 return this.exec(call.Argument(0).toString())
398 } else {
399 r.typeErrorResult(true, "Method RegExp.prototype.exec called on incompatible receiver %s", r.objectproto_toString(FunctionCall{This: call.This}))
400 return nil
401 }
402 }
403
404 func (r *Runtime) regexpproto_test(call FunctionCall) Value {
405 if this, ok := r.toObject(call.This).self.(*regexpObject); ok {
406 if this.test(call.Argument(0).toString()) {
407 return valueTrue
408 } else {
409 return valueFalse
410 }
411 } else {
412 panic(r.NewTypeError("Method RegExp.prototype.test called on incompatible receiver %s", r.objectproto_toString(FunctionCall{This: call.This})))
413 }
414 }
415
416 func (r *Runtime) regexpproto_toString(call FunctionCall) Value {
417 obj := r.toObject(call.This)
418 if this := r.checkStdRegexp(obj); this != nil {
419 var sb StringBuilder
420 sb.WriteRune('/')
421 if !this.writeEscapedSource(&sb) {
422 sb.WriteString(this.source)
423 }
424 sb.WriteRune('/')
425 if this.pattern.global {
426 sb.WriteRune('g')
427 }
428 if this.pattern.ignoreCase {
429 sb.WriteRune('i')
430 }
431 if this.pattern.multiline {
432 sb.WriteRune('m')
433 }
434 if this.pattern.unicode {
435 sb.WriteRune('u')
436 }
437 if this.pattern.sticky {
438 sb.WriteRune('y')
439 }
440 return sb.String()
441 }
442 pattern := nilSafe(obj.self.getStr("source", nil)).toString()
443 flags := nilSafe(obj.self.getStr("flags", nil)).toString()
444 var sb StringBuilder
445 sb.WriteRune('/')
446 sb.WriteString(pattern)
447 sb.WriteRune('/')
448 sb.WriteString(flags)
449 return sb.String()
450 }
451
452 func (r *regexpObject) writeEscapedSource(sb *StringBuilder) bool {
453 if r.source.Length() == 0 {
454 sb.WriteString(asciiString("(?:)"))
455 return true
456 }
457 pos := 0
458 lastPos := 0
459 rd := &lenientUtf16Decoder{utf16Reader: r.source.utf16Reader()}
460 L:
461 for {
462 c, size, err := rd.ReadRune()
463 if err != nil {
464 break
465 }
466 switch c {
467 case '\\':
468 pos++
469 _, size, err = rd.ReadRune()
470 if err != nil {
471 break L
472 }
473 case '/', '\u000a', '\u000d', '\u2028', '\u2029':
474 sb.WriteSubstring(r.source, lastPos, pos)
475 sb.WriteRune('\\')
476 switch c {
477 case '\u000a':
478 sb.WriteRune('n')
479 case '\u000d':
480 sb.WriteRune('r')
481 default:
482 sb.WriteRune('u')
483 sb.WriteRune(rune(hex[c>>12]))
484 sb.WriteRune(rune(hex[(c>>8)&0xF]))
485 sb.WriteRune(rune(hex[(c>>4)&0xF]))
486 sb.WriteRune(rune(hex[c&0xF]))
487 }
488 lastPos = pos + size
489 }
490 pos += size
491 }
492 if lastPos > 0 {
493 sb.WriteSubstring(r.source, lastPos, r.source.Length())
494 return true
495 }
496 return false
497 }
498
499 func (r *Runtime) regexpproto_getSource(call FunctionCall) Value {
500 if this, ok := r.toObject(call.This).self.(*regexpObject); ok {
501 var sb StringBuilder
502 if this.writeEscapedSource(&sb) {
503 return sb.String()
504 }
505 return this.source
506 } else if call.This == r.global.RegExpPrototype {
507 return asciiString("(?:)")
508 } else {
509 panic(r.NewTypeError("Method RegExp.prototype.source getter called on incompatible receiver"))
510 }
511 }
512
513 func (r *Runtime) regexpproto_getGlobal(call FunctionCall) Value {
514 if this, ok := r.toObject(call.This).self.(*regexpObject); ok {
515 if this.pattern.global {
516 return valueTrue
517 } else {
518 return valueFalse
519 }
520 } else if call.This == r.global.RegExpPrototype {
521 return _undefined
522 } else {
523 panic(r.NewTypeError("Method RegExp.prototype.global getter called on incompatible receiver %s", r.objectproto_toString(FunctionCall{This: call.This})))
524 }
525 }
526
527 func (r *Runtime) regexpproto_getMultiline(call FunctionCall) Value {
528 if this, ok := r.toObject(call.This).self.(*regexpObject); ok {
529 if this.pattern.multiline {
530 return valueTrue
531 } else {
532 return valueFalse
533 }
534 } else if call.This == r.global.RegExpPrototype {
535 return _undefined
536 } else {
537 panic(r.NewTypeError("Method RegExp.prototype.multiline getter called on incompatible receiver %s", r.objectproto_toString(FunctionCall{This: call.This})))
538 }
539 }
540
541 func (r *Runtime) regexpproto_getIgnoreCase(call FunctionCall) Value {
542 if this, ok := r.toObject(call.This).self.(*regexpObject); ok {
543 if this.pattern.ignoreCase {
544 return valueTrue
545 } else {
546 return valueFalse
547 }
548 } else if call.This == r.global.RegExpPrototype {
549 return _undefined
550 } else {
551 panic(r.NewTypeError("Method RegExp.prototype.ignoreCase getter called on incompatible receiver %s", r.objectproto_toString(FunctionCall{This: call.This})))
552 }
553 }
554
555 func (r *Runtime) regexpproto_getUnicode(call FunctionCall) Value {
556 if this, ok := r.toObject(call.This).self.(*regexpObject); ok {
557 if this.pattern.unicode {
558 return valueTrue
559 } else {
560 return valueFalse
561 }
562 } else if call.This == r.global.RegExpPrototype {
563 return _undefined
564 } else {
565 panic(r.NewTypeError("Method RegExp.prototype.unicode getter called on incompatible receiver %s", r.objectproto_toString(FunctionCall{This: call.This})))
566 }
567 }
568
569 func (r *Runtime) regexpproto_getSticky(call FunctionCall) Value {
570 if this, ok := r.toObject(call.This).self.(*regexpObject); ok {
571 if this.pattern.sticky {
572 return valueTrue
573 } else {
574 return valueFalse
575 }
576 } else if call.This == r.global.RegExpPrototype {
577 return _undefined
578 } else {
579 panic(r.NewTypeError("Method RegExp.prototype.sticky getter called on incompatible receiver %s", r.objectproto_toString(FunctionCall{This: call.This})))
580 }
581 }
582
583 func (r *Runtime) regexpproto_getFlags(call FunctionCall) Value {
584 var global, ignoreCase, multiline, sticky, unicode bool
585
586 thisObj := r.toObject(call.This)
587 size := 0
588 if v := thisObj.self.getStr("global", nil); v != nil {
589 global = v.ToBoolean()
590 if global {
591 size++
592 }
593 }
594 if v := thisObj.self.getStr("ignoreCase", nil); v != nil {
595 ignoreCase = v.ToBoolean()
596 if ignoreCase {
597 size++
598 }
599 }
600 if v := thisObj.self.getStr("multiline", nil); v != nil {
601 multiline = v.ToBoolean()
602 if multiline {
603 size++
604 }
605 }
606 if v := thisObj.self.getStr("sticky", nil); v != nil {
607 sticky = v.ToBoolean()
608 if sticky {
609 size++
610 }
611 }
612 if v := thisObj.self.getStr("unicode", nil); v != nil {
613 unicode = v.ToBoolean()
614 if unicode {
615 size++
616 }
617 }
618
619 var sb strings.Builder
620 sb.Grow(size)
621 if global {
622 sb.WriteByte('g')
623 }
624 if ignoreCase {
625 sb.WriteByte('i')
626 }
627 if multiline {
628 sb.WriteByte('m')
629 }
630 if unicode {
631 sb.WriteByte('u')
632 }
633 if sticky {
634 sb.WriteByte('y')
635 }
636
637 return asciiString(sb.String())
638 }
639
640 func (r *Runtime) regExpExec(execFn func(FunctionCall) Value, rxObj *Object, arg Value) Value {
641 res := execFn(FunctionCall{
642 This: rxObj,
643 Arguments: []Value{arg},
644 })
645
646 if res != _null {
647 if _, ok := res.(*Object); !ok {
648 panic(r.NewTypeError("RegExp exec method returned something other than an Object or null"))
649 }
650 }
651
652 return res
653 }
654
655 func (r *Runtime) getGlobalRegexpMatches(rxObj *Object, s String) []Value {
656 fullUnicode := nilSafe(rxObj.self.getStr("unicode", nil)).ToBoolean()
657 rxObj.self.setOwnStr("lastIndex", intToValue(0), true)
658 execFn, ok := r.toObject(rxObj.self.getStr("exec", nil)).self.assertCallable()
659 if !ok {
660 panic(r.NewTypeError("exec is not a function"))
661 }
662 var a []Value
663 for {
664 res := r.regExpExec(execFn, rxObj, s)
665 if res == _null {
666 break
667 }
668 a = append(a, res)
669 matchStr := nilSafe(r.toObject(res).self.getIdx(valueInt(0), nil)).toString()
670 if matchStr.Length() == 0 {
671 thisIndex := toLength(rxObj.self.getStr("lastIndex", nil))
672 rxObj.self.setOwnStr("lastIndex", valueInt(advanceStringIndex64(s, thisIndex, fullUnicode)), true)
673 }
674 }
675
676 return a
677 }
678
679 func (r *Runtime) regexpproto_stdMatcherGeneric(rxObj *Object, s String) Value {
680 rx := rxObj.self
681 global := rx.getStr("global", nil)
682 if global != nil && global.ToBoolean() {
683 a := r.getGlobalRegexpMatches(rxObj, s)
684 if len(a) == 0 {
685 return _null
686 }
687 ar := make([]Value, 0, len(a))
688 for _, result := range a {
689 obj := r.toObject(result)
690 matchStr := nilSafe(obj.self.getIdx(valueInt(0), nil)).ToString()
691 ar = append(ar, matchStr)
692 }
693 return r.newArrayValues(ar)
694 }
695
696 execFn, ok := r.toObject(rx.getStr("exec", nil)).self.assertCallable()
697 if !ok {
698 panic(r.NewTypeError("exec is not a function"))
699 }
700
701 return r.regExpExec(execFn, rxObj, s)
702 }
703
704 func (r *Runtime) checkStdRegexp(rxObj *Object) *regexpObject {
705 if deoptimiseRegexp {
706 return nil
707 }
708
709 rx, ok := rxObj.self.(*regexpObject)
710 if !ok {
711 return nil
712 }
713
714 if !rx.standard || rx.prototype == nil || rx.prototype.self != r.global.stdRegexpProto {
715 return nil
716 }
717
718 return rx
719 }
720
721 func (r *Runtime) regexpproto_stdMatcher(call FunctionCall) Value {
722 thisObj := r.toObject(call.This)
723 s := call.Argument(0).toString()
724 rx := r.checkStdRegexp(thisObj)
725 if rx == nil {
726 return r.regexpproto_stdMatcherGeneric(thisObj, s)
727 }
728 if rx.pattern.global {
729 res := rx.pattern.findAllSubmatchIndex(s, 0, -1, rx.pattern.sticky)
730 if len(res) == 0 {
731 rx.setOwnStr("lastIndex", intToValue(0), true)
732 return _null
733 }
734 a := make([]Value, 0, len(res))
735 for _, result := range res {
736 a = append(a, s.Substring(result[0], result[1]))
737 }
738 rx.setOwnStr("lastIndex", intToValue(int64(res[len(res)-1][1])), true)
739 return r.newArrayValues(a)
740 } else {
741 return rx.exec(s)
742 }
743 }
744
745 func (r *Runtime) regexpproto_stdSearchGeneric(rxObj *Object, arg String) Value {
746 rx := rxObj.self
747 previousLastIndex := nilSafe(rx.getStr("lastIndex", nil))
748 zero := intToValue(0)
749 if !previousLastIndex.SameAs(zero) {
750 rx.setOwnStr("lastIndex", zero, true)
751 }
752 execFn, ok := r.toObject(rx.getStr("exec", nil)).self.assertCallable()
753 if !ok {
754 panic(r.NewTypeError("exec is not a function"))
755 }
756
757 result := r.regExpExec(execFn, rxObj, arg)
758 currentLastIndex := nilSafe(rx.getStr("lastIndex", nil))
759 if !currentLastIndex.SameAs(previousLastIndex) {
760 rx.setOwnStr("lastIndex", previousLastIndex, true)
761 }
762
763 if result == _null {
764 return intToValue(-1)
765 }
766
767 return r.toObject(result).self.getStr("index", nil)
768 }
769
770 func (r *Runtime) regexpproto_stdMatcherAll(call FunctionCall) Value {
771 thisObj := r.toObject(call.This)
772 s := call.Argument(0).toString()
773 flags := nilSafe(thisObj.self.getStr("flags", nil)).toString()
774 c := r.speciesConstructorObj(call.This.(*Object), r.getRegExp())
775 matcher := r.toConstructor(c)([]Value{call.This, flags}, nil)
776 matcher.self.setOwnStr("lastIndex", valueInt(toLength(thisObj.self.getStr("lastIndex", nil))), true)
777 flagsStr := flags.String()
778 global := strings.Contains(flagsStr, "g")
779 fullUnicode := strings.Contains(flagsStr, "u")
780 return r.createRegExpStringIterator(matcher, s, global, fullUnicode)
781 }
782
783 func (r *Runtime) createRegExpStringIterator(matcher *Object, s String, global, fullUnicode bool) Value {
784 o := &Object{runtime: r}
785
786 ri := ®ExpStringIterObject{
787 matcher: matcher,
788 s: s,
789 global: global,
790 fullUnicode: fullUnicode,
791 }
792 ri.class = classObject
793 ri.val = o
794 ri.extensible = true
795 o.self = ri
796 ri.prototype = r.getRegExpStringIteratorPrototype()
797 ri.init()
798
799 return o
800 }
801
802 type regExpStringIterObject struct {
803 baseObject
804 matcher *Object
805 s String
806 global, fullUnicode, done bool
807 }
808
809
810 func regExpExec(r *Object, s String) Value {
811 exec := r.self.getStr("exec", nil)
812 if execObject, ok := exec.(*Object); ok {
813 if execFn, ok := execObject.self.assertCallable(); ok {
814 return r.runtime.regExpExec(execFn, r, s)
815 }
816 }
817 if rx, ok := r.self.(*regexpObject); ok {
818 return rx.exec(s)
819 }
820 panic(r.runtime.NewTypeError("no RegExpMatcher internal slot"))
821 }
822
823 func (ri *regExpStringIterObject) next() (v Value) {
824 if ri.done {
825 return ri.val.runtime.createIterResultObject(_undefined, true)
826 }
827
828 match := regExpExec(ri.matcher, ri.s)
829 if IsNull(match) {
830 ri.done = true
831 return ri.val.runtime.createIterResultObject(_undefined, true)
832 }
833 if !ri.global {
834 ri.done = true
835 return ri.val.runtime.createIterResultObject(match, false)
836 }
837
838 matchStr := nilSafe(ri.val.runtime.toObject(match).self.getIdx(valueInt(0), nil)).toString()
839 if matchStr.Length() == 0 {
840 thisIndex := toLength(ri.matcher.self.getStr("lastIndex", nil))
841 ri.matcher.self.setOwnStr("lastIndex", valueInt(advanceStringIndex64(ri.s, thisIndex, ri.fullUnicode)), true)
842 }
843 return ri.val.runtime.createIterResultObject(match, false)
844 }
845
846 func (r *Runtime) regexpproto_stdSearch(call FunctionCall) Value {
847 thisObj := r.toObject(call.This)
848 s := call.Argument(0).toString()
849 rx := r.checkStdRegexp(thisObj)
850 if rx == nil {
851 return r.regexpproto_stdSearchGeneric(thisObj, s)
852 }
853
854 previousLastIndex := rx.getStr("lastIndex", nil)
855 rx.setOwnStr("lastIndex", intToValue(0), true)
856
857 match, result := rx.execRegexp(s)
858 rx.setOwnStr("lastIndex", previousLastIndex, true)
859
860 if !match {
861 return intToValue(-1)
862 }
863 return intToValue(int64(result[0]))
864 }
865
866 func (r *Runtime) regexpproto_stdSplitterGeneric(splitter *Object, s String, limit Value, unicodeMatching bool) Value {
867 var a []Value
868 var lim int64
869 if limit == nil || limit == _undefined {
870 lim = maxInt - 1
871 } else {
872 lim = toLength(limit)
873 }
874 if lim == 0 {
875 return r.newArrayValues(a)
876 }
877 size := s.Length()
878 p := 0
879 execFn := toMethod(splitter.ToObject(r).self.getStr("exec", nil))
880
881 if size == 0 {
882 if r.regExpExec(execFn, splitter, s) == _null {
883 a = append(a, s)
884 }
885 return r.newArrayValues(a)
886 }
887
888 q := p
889 for q < size {
890 splitter.self.setOwnStr("lastIndex", intToValue(int64(q)), true)
891 z := r.regExpExec(execFn, splitter, s)
892 if z == _null {
893 q = advanceStringIndex(s, q, unicodeMatching)
894 } else {
895 z := r.toObject(z)
896 e := toLength(splitter.self.getStr("lastIndex", nil))
897 if e == int64(p) {
898 q = advanceStringIndex(s, q, unicodeMatching)
899 } else {
900 a = append(a, s.Substring(p, q))
901 if int64(len(a)) == lim {
902 return r.newArrayValues(a)
903 }
904 if e > int64(size) {
905 p = size
906 } else {
907 p = int(e)
908 }
909 numberOfCaptures := max(toLength(z.self.getStr("length", nil))-1, 0)
910 for i := int64(1); i <= numberOfCaptures; i++ {
911 a = append(a, nilSafe(z.self.getIdx(valueInt(i), nil)))
912 if int64(len(a)) == lim {
913 return r.newArrayValues(a)
914 }
915 }
916 q = p
917 }
918 }
919 }
920 a = append(a, s.Substring(p, size))
921 return r.newArrayValues(a)
922 }
923
924 func advanceStringIndex(s String, pos int, unicode bool) int {
925 next := pos + 1
926 if !unicode {
927 return next
928 }
929 l := s.Length()
930 if next >= l {
931 return next
932 }
933 if !isUTF16FirstSurrogate(s.CharAt(pos)) {
934 return next
935 }
936 if !isUTF16SecondSurrogate(s.CharAt(next)) {
937 return next
938 }
939 return next + 1
940 }
941
942 func advanceStringIndex64(s String, pos int64, unicode bool) int64 {
943 next := pos + 1
944 if !unicode {
945 return next
946 }
947 l := int64(s.Length())
948 if next >= l {
949 return next
950 }
951 if !isUTF16FirstSurrogate(s.CharAt(int(pos))) {
952 return next
953 }
954 if !isUTF16SecondSurrogate(s.CharAt(int(next))) {
955 return next
956 }
957 return next + 1
958 }
959
960 func (r *Runtime) regexpproto_stdSplitter(call FunctionCall) Value {
961 rxObj := r.toObject(call.This)
962 s := call.Argument(0).toString()
963 limitValue := call.Argument(1)
964 var splitter *Object
965 search := r.checkStdRegexp(rxObj)
966 c := r.speciesConstructorObj(rxObj, r.getRegExp())
967 if search == nil || c != r.global.RegExp {
968 flags := nilSafe(rxObj.self.getStr("flags", nil)).toString()
969 flagsStr := flags.String()
970
971
972 if !strings.Contains(flagsStr, "y") {
973 flags = flags.Concat(asciiString("y"))
974 }
975 splitter = r.toConstructor(c)([]Value{rxObj, flags}, nil)
976 search = r.checkStdRegexp(splitter)
977 if search == nil {
978 return r.regexpproto_stdSplitterGeneric(splitter, s, limitValue, strings.Contains(flagsStr, "u"))
979 }
980 }
981
982 pattern := search.pattern
983 limit := -1
984 if limitValue != _undefined {
985 limit = int(toUint32(limitValue))
986 }
987
988 if limit == 0 {
989 return r.newArrayValues(nil)
990 }
991
992 targetLength := s.Length()
993 var valueArray []Value
994 lastIndex := 0
995 found := 0
996
997 result := pattern.findAllSubmatchIndex(s, 0, -1, false)
998 if targetLength == 0 {
999 if result == nil {
1000 valueArray = append(valueArray, s)
1001 }
1002 goto RETURN
1003 }
1004
1005 for _, match := range result {
1006 if match[0] == match[1] {
1007
1008 if match[0] == 0 || match[0] == targetLength {
1009 continue
1010 }
1011 }
1012
1013 if lastIndex != match[0] {
1014 valueArray = append(valueArray, s.Substring(lastIndex, match[0]))
1015 found++
1016 } else if lastIndex == match[0] {
1017 if lastIndex != -1 {
1018 valueArray = append(valueArray, stringEmpty)
1019 found++
1020 }
1021 }
1022
1023 lastIndex = match[1]
1024 if found == limit {
1025 goto RETURN
1026 }
1027
1028 captureCount := len(match) / 2
1029 for index := 1; index < captureCount; index++ {
1030 offset := index * 2
1031 var value Value
1032 if match[offset] != -1 {
1033 value = s.Substring(match[offset], match[offset+1])
1034 } else {
1035 value = _undefined
1036 }
1037 valueArray = append(valueArray, value)
1038 found++
1039 if found == limit {
1040 goto RETURN
1041 }
1042 }
1043 }
1044
1045 if found != limit {
1046 if lastIndex != targetLength {
1047 valueArray = append(valueArray, s.Substring(lastIndex, targetLength))
1048 } else {
1049 valueArray = append(valueArray, stringEmpty)
1050 }
1051 }
1052
1053 RETURN:
1054 return r.newArrayValues(valueArray)
1055 }
1056
1057 func (r *Runtime) regexpproto_stdReplacerGeneric(rxObj *Object, s, replaceStr String, rcall func(FunctionCall) Value) Value {
1058 var results []Value
1059 if nilSafe(rxObj.self.getStr("global", nil)).ToBoolean() {
1060 results = r.getGlobalRegexpMatches(rxObj, s)
1061 } else {
1062 execFn := toMethod(rxObj.self.getStr("exec", nil))
1063 result := r.regExpExec(execFn, rxObj, s)
1064 if result != _null {
1065 results = append(results, result)
1066 }
1067 }
1068 lengthS := s.Length()
1069 nextSourcePosition := 0
1070 var resultBuf StringBuilder
1071 for _, result := range results {
1072 obj := r.toObject(result)
1073 nCaptures := max(toLength(obj.self.getStr("length", nil))-1, 0)
1074 matched := nilSafe(obj.self.getIdx(valueInt(0), nil)).toString()
1075 matchLength := matched.Length()
1076 position := toIntStrict(max(min(nilSafe(obj.self.getStr("index", nil)).ToInteger(), int64(lengthS)), 0))
1077 var captures []Value
1078 if rcall != nil {
1079 captures = make([]Value, 0, nCaptures+3)
1080 } else {
1081 captures = make([]Value, 0, nCaptures+1)
1082 }
1083 captures = append(captures, matched)
1084 for n := int64(1); n <= nCaptures; n++ {
1085 capN := nilSafe(obj.self.getIdx(valueInt(n), nil))
1086 if capN != _undefined {
1087 capN = capN.ToString()
1088 }
1089 captures = append(captures, capN)
1090 }
1091 var replacement String
1092 if rcall != nil {
1093 captures = append(captures, intToValue(int64(position)), s)
1094 replacement = rcall(FunctionCall{
1095 This: _undefined,
1096 Arguments: captures,
1097 }).toString()
1098 if position >= nextSourcePosition {
1099 resultBuf.WriteString(s.Substring(nextSourcePosition, position))
1100 resultBuf.WriteString(replacement)
1101 nextSourcePosition = position + matchLength
1102 }
1103 } else {
1104 if position >= nextSourcePosition {
1105 resultBuf.WriteString(s.Substring(nextSourcePosition, position))
1106 writeSubstitution(s, position, len(captures), func(idx int) String {
1107 capture := captures[idx]
1108 if capture != _undefined {
1109 return capture.toString()
1110 }
1111 return stringEmpty
1112 }, replaceStr, &resultBuf)
1113 nextSourcePosition = position + matchLength
1114 }
1115 }
1116 }
1117 if nextSourcePosition < lengthS {
1118 resultBuf.WriteString(s.Substring(nextSourcePosition, lengthS))
1119 }
1120 return resultBuf.String()
1121 }
1122
1123 func writeSubstitution(s String, position int, numCaptures int, getCapture func(int) String, replaceStr String, buf *StringBuilder) {
1124 l := s.Length()
1125 rl := replaceStr.Length()
1126 matched := getCapture(0)
1127 tailPos := position + matched.Length()
1128
1129 for i := 0; i < rl; i++ {
1130 c := replaceStr.CharAt(i)
1131 if c == '$' && i < rl-1 {
1132 ch := replaceStr.CharAt(i + 1)
1133 switch ch {
1134 case '$':
1135 buf.WriteRune('$')
1136 case '`':
1137 buf.WriteString(s.Substring(0, position))
1138 case '\'':
1139 if tailPos < l {
1140 buf.WriteString(s.Substring(tailPos, l))
1141 }
1142 case '&':
1143 buf.WriteString(matched)
1144 default:
1145 matchNumber := 0
1146 j := i + 1
1147 for j < rl {
1148 ch := replaceStr.CharAt(j)
1149 if ch >= '0' && ch <= '9' {
1150 m := matchNumber*10 + int(ch-'0')
1151 if m >= numCaptures {
1152 break
1153 }
1154 matchNumber = m
1155 j++
1156 } else {
1157 break
1158 }
1159 }
1160 if matchNumber > 0 {
1161 buf.WriteString(getCapture(matchNumber))
1162 i = j - 1
1163 continue
1164 } else {
1165 buf.WriteRune('$')
1166 buf.WriteRune(rune(ch))
1167 }
1168 }
1169 i++
1170 } else {
1171 buf.WriteRune(rune(c))
1172 }
1173 }
1174 }
1175
1176 func (r *Runtime) regexpproto_stdReplacer(call FunctionCall) Value {
1177 rxObj := r.toObject(call.This)
1178 s := call.Argument(0).toString()
1179 replaceStr, rcall := getReplaceValue(call.Argument(1))
1180
1181 rx := r.checkStdRegexp(rxObj)
1182 if rx == nil {
1183 return r.regexpproto_stdReplacerGeneric(rxObj, s, replaceStr, rcall)
1184 }
1185
1186 var index int64
1187 find := 1
1188 if rx.pattern.global {
1189 find = -1
1190 rx.setOwnStr("lastIndex", intToValue(0), true)
1191 } else {
1192 index = rx.getLastIndex()
1193 }
1194 found := rx.pattern.findAllSubmatchIndex(s, toIntStrict(index), find, rx.pattern.sticky)
1195 if len(found) > 0 {
1196 if !rx.updateLastIndex(index, found[0], found[len(found)-1]) {
1197 found = nil
1198 }
1199 } else {
1200 rx.updateLastIndex(index, nil, nil)
1201 }
1202
1203 return stringReplace(s, found, replaceStr, rcall)
1204 }
1205
1206 func (r *Runtime) regExpStringIteratorProto_next(call FunctionCall) Value {
1207 thisObj := r.toObject(call.This)
1208 if iter, ok := thisObj.self.(*regExpStringIterObject); ok {
1209 return iter.next()
1210 }
1211 panic(r.NewTypeError("Method RegExp String Iterator.prototype.next called on incompatible receiver %s", r.objectproto_toString(FunctionCall{This: thisObj})))
1212 }
1213
1214 func (r *Runtime) createRegExpStringIteratorPrototype(val *Object) objectImpl {
1215 o := newBaseObjectObj(val, r.getIteratorPrototype(), classObject)
1216
1217 o._putProp("next", r.newNativeFunc(r.regExpStringIteratorProto_next, "next", 0), true, false, true)
1218 o._putSym(SymToStringTag, valueProp(asciiString(classRegExpStringIterator), false, false, true))
1219
1220 return o
1221 }
1222
1223 func (r *Runtime) getRegExpStringIteratorPrototype() *Object {
1224 var o *Object
1225 if o = r.global.RegExpStringIteratorPrototype; o == nil {
1226 o = &Object{runtime: r}
1227 r.global.RegExpStringIteratorPrototype = o
1228 o.self = r.createRegExpStringIteratorPrototype(o)
1229 }
1230 return o
1231 }
1232
1233 func (r *Runtime) getRegExp() *Object {
1234 ret := r.global.RegExp
1235 if ret == nil {
1236 ret = &Object{runtime: r}
1237 r.global.RegExp = ret
1238 proto := r.getRegExpPrototype()
1239 r.newNativeFuncAndConstruct(ret, r.builtin_RegExp,
1240 r.wrapNativeConstruct(r.builtin_newRegExp, ret, proto), proto, "RegExp", intToValue(2))
1241 rx := ret.self
1242 r.putSpeciesReturnThis(rx)
1243 }
1244 return ret
1245 }
1246
1247 func (r *Runtime) getRegExpPrototype() *Object {
1248 ret := r.global.RegExpPrototype
1249 if ret == nil {
1250 o := r.newGuardedObject(r.global.ObjectPrototype, classObject)
1251 ret = o.val
1252 r.global.RegExpPrototype = ret
1253 r.global.stdRegexpProto = o
1254
1255 o._putProp("constructor", r.getRegExp(), true, false, true)
1256 o._putProp("compile", r.newNativeFunc(r.regexpproto_compile, "compile", 2), true, false, true)
1257 o._putProp("exec", r.newNativeFunc(r.regexpproto_exec, "exec", 1), true, false, true)
1258 o._putProp("test", r.newNativeFunc(r.regexpproto_test, "test", 1), true, false, true)
1259 o._putProp("toString", r.newNativeFunc(r.regexpproto_toString, "toString", 0), true, false, true)
1260 o.setOwnStr("source", &valueProperty{
1261 configurable: true,
1262 getterFunc: r.newNativeFunc(r.regexpproto_getSource, "get source", 0),
1263 accessor: true,
1264 }, false)
1265 o.setOwnStr("global", &valueProperty{
1266 configurable: true,
1267 getterFunc: r.newNativeFunc(r.regexpproto_getGlobal, "get global", 0),
1268 accessor: true,
1269 }, false)
1270 o.setOwnStr("multiline", &valueProperty{
1271 configurable: true,
1272 getterFunc: r.newNativeFunc(r.regexpproto_getMultiline, "get multiline", 0),
1273 accessor: true,
1274 }, false)
1275 o.setOwnStr("ignoreCase", &valueProperty{
1276 configurable: true,
1277 getterFunc: r.newNativeFunc(r.regexpproto_getIgnoreCase, "get ignoreCase", 0),
1278 accessor: true,
1279 }, false)
1280 o.setOwnStr("unicode", &valueProperty{
1281 configurable: true,
1282 getterFunc: r.newNativeFunc(r.regexpproto_getUnicode, "get unicode", 0),
1283 accessor: true,
1284 }, false)
1285 o.setOwnStr("sticky", &valueProperty{
1286 configurable: true,
1287 getterFunc: r.newNativeFunc(r.regexpproto_getSticky, "get sticky", 0),
1288 accessor: true,
1289 }, false)
1290 o.setOwnStr("flags", &valueProperty{
1291 configurable: true,
1292 getterFunc: r.newNativeFunc(r.regexpproto_getFlags, "get flags", 0),
1293 accessor: true,
1294 }, false)
1295
1296 o._putSym(SymMatch, valueProp(r.newNativeFunc(r.regexpproto_stdMatcher, "[Symbol.match]", 1), true, false, true))
1297 o._putSym(SymMatchAll, valueProp(r.newNativeFunc(r.regexpproto_stdMatcherAll, "[Symbol.matchAll]", 1), true, false, true))
1298 o._putSym(SymSearch, valueProp(r.newNativeFunc(r.regexpproto_stdSearch, "[Symbol.search]", 1), true, false, true))
1299 o._putSym(SymSplit, valueProp(r.newNativeFunc(r.regexpproto_stdSplitter, "[Symbol.split]", 2), true, false, true))
1300 o._putSym(SymReplace, valueProp(r.newNativeFunc(r.regexpproto_stdReplacer, "[Symbol.replace]", 2), true, false, true))
1301 o.guard("exec", "global", "multiline", "ignoreCase", "unicode", "sticky")
1302 }
1303 return ret
1304 }
1305
View as plain text