1 package regexp2
2
3 import (
4 "bytes"
5 "errors"
6 "fmt"
7 "math"
8 "strconv"
9 "strings"
10 "time"
11 "unicode"
12
13 "github.com/dlclark/regexp2/syntax"
14 )
15
16 type runner struct {
17 re *Regexp
18 code *syntax.Code
19
20 runtextstart int
21
22 runtext []rune
23 runtextpos int
24 runtextend int
25
26
27
28
29
30
31
32
33
34
35
36
37
38 runtrack []int
39 runtrackpos int
40
41
42
43
44
45
46
47
48 runstack []int
49 runstackpos int
50
51
52
53
54 runcrawl []int
55 runcrawlpos int
56
57 runtrackcount int
58
59 runmatch *Match
60
61 ignoreTimeout bool
62 timeout time.Duration
63 deadline fasttime
64
65 operator syntax.InstOp
66 codepos int
67 rightToLeft bool
68 caseInsensitive bool
69 }
70
71
72
73
74
75
76 func (re *Regexp) run(quick bool, textstart int, input []rune) (*Match, error) {
77
78
79 runner := re.getRunner()
80 defer re.putRunner(runner)
81
82 if textstart < 0 {
83 if re.RightToLeft() {
84 textstart = len(input)
85 } else {
86 textstart = 0
87 }
88 }
89
90 return runner.scan(input, textstart, quick, re.MatchTimeout)
91 }
92
93
94
95
96
97
98
99
100
101
102
103 func (r *runner) scan(rt []rune, textstart int, quick bool, timeout time.Duration) (*Match, error) {
104 r.timeout = timeout
105 r.ignoreTimeout = (time.Duration(math.MaxInt64) == timeout)
106 r.runtextstart = textstart
107 r.runtext = rt
108 r.runtextend = len(rt)
109
110 stoppos := r.runtextend
111 bump := 1
112
113 if r.re.RightToLeft() {
114 bump = -1
115 stoppos = 0
116 }
117
118 r.runtextpos = textstart
119 initted := false
120
121 r.startTimeoutWatch()
122 for {
123 if r.re.Debug() {
124
125 fmt.Printf("\nSearch range: from 0 to %v\n", r.runtextend)
126 fmt.Printf("Firstchar search starting at %v stopping at %v\n", r.runtextpos, stoppos)
127 }
128
129 if r.findFirstChar() {
130 if err := r.checkTimeout(); err != nil {
131 return nil, err
132 }
133
134 if !initted {
135 r.initMatch()
136 initted = true
137 }
138
139 if r.re.Debug() {
140 fmt.Printf("Executing engine starting at %v\n\n", r.runtextpos)
141 }
142
143 if err := r.execute(); err != nil {
144 return nil, err
145 }
146
147 if r.runmatch.matchcount[0] > 0 {
148
149 return r.tidyMatch(quick), nil
150 }
151
152
153 r.runtrackpos = len(r.runtrack)
154 r.runstackpos = len(r.runstack)
155 r.runcrawlpos = len(r.runcrawl)
156 }
157
158
159
160 if r.runtextpos == stoppos {
161 r.tidyMatch(true)
162 return nil, nil
163 }
164
165
166
167
168
169 r.runtextpos += bump
170 }
171
172 }
173
174 func (r *runner) execute() error {
175
176 r.goTo(0)
177
178 for {
179
180 if r.re.Debug() {
181 r.dumpState()
182 }
183
184 if err := r.checkTimeout(); err != nil {
185 return err
186 }
187
188 switch r.operator {
189 case syntax.Stop:
190 return nil
191
192 case syntax.Nothing:
193 break
194
195 case syntax.Goto:
196 r.goTo(r.operand(0))
197 continue
198
199 case syntax.Testref:
200 if !r.runmatch.isMatched(r.operand(0)) {
201 break
202 }
203 r.advance(1)
204 continue
205
206 case syntax.Lazybranch:
207 r.trackPush1(r.textPos())
208 r.advance(1)
209 continue
210
211 case syntax.Lazybranch | syntax.Back:
212 r.trackPop()
213 r.textto(r.trackPeek())
214 r.goTo(r.operand(0))
215 continue
216
217 case syntax.Setmark:
218 r.stackPush(r.textPos())
219 r.trackPush()
220 r.advance(0)
221 continue
222
223 case syntax.Nullmark:
224 r.stackPush(-1)
225 r.trackPush()
226 r.advance(0)
227 continue
228
229 case syntax.Setmark | syntax.Back, syntax.Nullmark | syntax.Back:
230 r.stackPop()
231 break
232
233 case syntax.Getmark:
234 r.stackPop()
235 r.trackPush1(r.stackPeek())
236 r.textto(r.stackPeek())
237 r.advance(0)
238 continue
239
240 case syntax.Getmark | syntax.Back:
241 r.trackPop()
242 r.stackPush(r.trackPeek())
243 break
244
245 case syntax.Capturemark:
246 if r.operand(1) != -1 && !r.runmatch.isMatched(r.operand(1)) {
247 break
248 }
249 r.stackPop()
250 if r.operand(1) != -1 {
251 r.transferCapture(r.operand(0), r.operand(1), r.stackPeek(), r.textPos())
252 } else {
253 r.capture(r.operand(0), r.stackPeek(), r.textPos())
254 }
255 r.trackPush1(r.stackPeek())
256
257 r.advance(2)
258
259 continue
260
261 case syntax.Capturemark | syntax.Back:
262 r.trackPop()
263 r.stackPush(r.trackPeek())
264 r.uncapture()
265 if r.operand(0) != -1 && r.operand(1) != -1 {
266 r.uncapture()
267 }
268
269 break
270
271 case syntax.Branchmark:
272 r.stackPop()
273
274 matched := r.textPos() - r.stackPeek()
275
276 if matched != 0 {
277 r.trackPush2(r.stackPeek(), r.textPos())
278 r.stackPush(r.textPos())
279 r.goTo(r.operand(0))
280 } else {
281 r.trackPushNeg1(r.stackPeek())
282 r.advance(1)
283 }
284 continue
285
286 case syntax.Branchmark | syntax.Back:
287 r.trackPopN(2)
288 r.stackPop()
289 r.textto(r.trackPeekN(1))
290 r.trackPushNeg1(r.trackPeek())
291 r.advance(1)
292 continue
293
294 case syntax.Branchmark | syntax.Back2:
295 r.trackPop()
296 r.stackPush(r.trackPeek())
297 break
298
299 case syntax.Lazybranchmark:
300 {
301
302
303
304 r.stackPop()
305
306 oldMarkPos := r.stackPeek()
307
308 if r.textPos() != oldMarkPos {
309 if oldMarkPos != -1 {
310 r.trackPush2(oldMarkPos, r.textPos())
311 } else {
312 r.trackPush2(r.textPos(), r.textPos())
313 }
314 } else {
315
316
317
318
319 r.stackPush(oldMarkPos)
320
321 r.trackPushNeg1(r.stackPeek())
322 }
323 r.advance(1)
324 continue
325 }
326
327 case syntax.Lazybranchmark | syntax.Back:
328
329
330
331
332
333
334
335 r.trackPopN(2)
336 pos := r.trackPeekN(1)
337 r.trackPushNeg1(r.trackPeek())
338 r.stackPush(pos)
339 r.textto(pos)
340 r.goTo(r.operand(0))
341 continue
342
343 case syntax.Lazybranchmark | syntax.Back2:
344
345
346 r.stackPop()
347 r.trackPop()
348 r.stackPush(r.trackPeek())
349 break
350
351 case syntax.Setcount:
352 r.stackPush2(r.textPos(), r.operand(0))
353 r.trackPush()
354 r.advance(1)
355 continue
356
357 case syntax.Nullcount:
358 r.stackPush2(-1, r.operand(0))
359 r.trackPush()
360 r.advance(1)
361 continue
362
363 case syntax.Setcount | syntax.Back:
364 r.stackPopN(2)
365 break
366
367 case syntax.Nullcount | syntax.Back:
368 r.stackPopN(2)
369 break
370
371 case syntax.Branchcount:
372
373
374
375
376 r.stackPopN(2)
377 mark := r.stackPeek()
378 count := r.stackPeekN(1)
379 matched := r.textPos() - mark
380
381 if count >= r.operand(1) || (matched == 0 && count >= 0) {
382 r.trackPushNeg2(mark, count)
383 r.advance(2)
384 } else {
385 r.trackPush1(mark)
386 r.stackPush2(r.textPos(), count+1)
387 r.goTo(r.operand(0))
388 }
389 continue
390
391 case syntax.Branchcount | syntax.Back:
392
393
394
395
396
397 r.trackPop()
398 r.stackPopN(2)
399 if r.stackPeekN(1) > 0 {
400 r.textto(r.stackPeek())
401 r.trackPushNeg2(r.trackPeek(), r.stackPeekN(1)-1)
402 r.advance(2)
403 continue
404 }
405 r.stackPush2(r.trackPeek(), r.stackPeekN(1)-1)
406 break
407
408 case syntax.Branchcount | syntax.Back2:
409
410
411
412 r.trackPopN(2)
413 r.stackPush2(r.trackPeek(), r.trackPeekN(1))
414 break
415
416 case syntax.Lazybranchcount:
417
418
419
420
421 r.stackPopN(2)
422 mark := r.stackPeek()
423 count := r.stackPeekN(1)
424
425 if count < 0 {
426 r.trackPushNeg1(mark)
427 r.stackPush2(r.textPos(), count+1)
428 r.goTo(r.operand(0))
429 } else {
430 r.trackPush3(mark, count, r.textPos())
431 r.advance(2)
432 }
433 continue
434
435 case syntax.Lazybranchcount | syntax.Back:
436
437
438
439
440
441 r.trackPopN(3)
442 mark := r.trackPeek()
443 textpos := r.trackPeekN(2)
444
445 if r.trackPeekN(1) < r.operand(1) && textpos != mark {
446 r.textto(textpos)
447 r.stackPush2(textpos, r.trackPeekN(1)+1)
448 r.trackPushNeg1(mark)
449 r.goTo(r.operand(0))
450 continue
451 } else {
452 r.stackPush2(r.trackPeek(), r.trackPeekN(1))
453 break
454 }
455
456 case syntax.Lazybranchcount | syntax.Back2:
457
458
459
460
461
462 r.trackPop()
463 r.stackPopN(2)
464 r.stackPush2(r.trackPeek(), r.stackPeekN(1)-1)
465 break
466
467 case syntax.Setjump:
468 r.stackPush2(r.trackpos(), r.crawlpos())
469 r.trackPush()
470 r.advance(0)
471 continue
472
473 case syntax.Setjump | syntax.Back:
474 r.stackPopN(2)
475 break
476
477 case syntax.Backjump:
478
479
480
481 r.stackPopN(2)
482 r.trackto(r.stackPeek())
483
484 for r.crawlpos() != r.stackPeekN(1) {
485 r.uncapture()
486 }
487
488 break
489
490 case syntax.Forejump:
491
492
493
494 r.stackPopN(2)
495 r.trackto(r.stackPeek())
496 r.trackPush1(r.stackPeekN(1))
497 r.advance(0)
498 continue
499
500 case syntax.Forejump | syntax.Back:
501
502
503 r.trackPop()
504
505 for r.crawlpos() != r.trackPeek() {
506 r.uncapture()
507 }
508
509 break
510
511 case syntax.Bol:
512 if r.leftchars() > 0 && r.charAt(r.textPos()-1) != '\n' {
513 break
514 }
515 r.advance(0)
516 continue
517
518 case syntax.Eol:
519 if r.rightchars() > 0 && r.charAt(r.textPos()) != '\n' {
520 break
521 }
522 r.advance(0)
523 continue
524
525 case syntax.Boundary:
526 if !r.isBoundary(r.textPos(), 0, r.runtextend) {
527 break
528 }
529 r.advance(0)
530 continue
531
532 case syntax.Nonboundary:
533 if r.isBoundary(r.textPos(), 0, r.runtextend) {
534 break
535 }
536 r.advance(0)
537 continue
538
539 case syntax.ECMABoundary:
540 if !r.isECMABoundary(r.textPos(), 0, r.runtextend) {
541 break
542 }
543 r.advance(0)
544 continue
545
546 case syntax.NonECMABoundary:
547 if r.isECMABoundary(r.textPos(), 0, r.runtextend) {
548 break
549 }
550 r.advance(0)
551 continue
552
553 case syntax.Beginning:
554 if r.leftchars() > 0 {
555 break
556 }
557 r.advance(0)
558 continue
559
560 case syntax.Start:
561 if r.textPos() != r.textstart() {
562 break
563 }
564 r.advance(0)
565 continue
566
567 case syntax.EndZ:
568 rchars := r.rightchars()
569 if rchars > 1 {
570 break
571 }
572
573
574 if (r.re.options & (RE2 | ECMAScript)) != 0 {
575
576 if rchars > 0 {
577 break
578 }
579 } else if rchars == 1 && r.charAt(r.textPos()) != '\n' {
580
581 break
582 }
583
584 r.advance(0)
585 continue
586
587 case syntax.End:
588 if r.rightchars() > 0 {
589 break
590 }
591 r.advance(0)
592 continue
593
594 case syntax.One:
595 if r.forwardchars() < 1 || r.forwardcharnext() != rune(r.operand(0)) {
596 break
597 }
598
599 r.advance(1)
600 continue
601
602 case syntax.Notone:
603 if r.forwardchars() < 1 || r.forwardcharnext() == rune(r.operand(0)) {
604 break
605 }
606
607 r.advance(1)
608 continue
609
610 case syntax.Set:
611
612 if r.forwardchars() < 1 || !r.code.Sets[r.operand(0)].CharIn(r.forwardcharnext()) {
613 break
614 }
615
616 r.advance(1)
617 continue
618
619 case syntax.Multi:
620 if !r.runematch(r.code.Strings[r.operand(0)]) {
621 break
622 }
623
624 r.advance(1)
625 continue
626
627 case syntax.Ref:
628
629 capnum := r.operand(0)
630
631 if r.runmatch.isMatched(capnum) {
632 if !r.refmatch(r.runmatch.matchIndex(capnum), r.runmatch.matchLength(capnum)) {
633 break
634 }
635 } else {
636 if (r.re.options & ECMAScript) == 0 {
637 break
638 }
639 }
640
641 r.advance(1)
642 continue
643
644 case syntax.Onerep:
645
646 c := r.operand(1)
647
648 if r.forwardchars() < c {
649 break
650 }
651
652 ch := rune(r.operand(0))
653
654 for c > 0 {
655 if r.forwardcharnext() != ch {
656 goto BreakBackward
657 }
658 c--
659 }
660
661 r.advance(2)
662 continue
663
664 case syntax.Notonerep:
665
666 c := r.operand(1)
667
668 if r.forwardchars() < c {
669 break
670 }
671 ch := rune(r.operand(0))
672
673 for c > 0 {
674 if r.forwardcharnext() == ch {
675 goto BreakBackward
676 }
677 c--
678 }
679
680 r.advance(2)
681 continue
682
683 case syntax.Setrep:
684
685 c := r.operand(1)
686
687 if r.forwardchars() < c {
688 break
689 }
690
691 set := r.code.Sets[r.operand(0)]
692
693 for c > 0 {
694 if !set.CharIn(r.forwardcharnext()) {
695 goto BreakBackward
696 }
697 c--
698 }
699
700 r.advance(2)
701 continue
702
703 case syntax.Oneloop:
704
705 c := r.operand(1)
706
707 if c > r.forwardchars() {
708 c = r.forwardchars()
709 }
710
711 ch := rune(r.operand(0))
712 i := c
713
714 for ; i > 0; i-- {
715 if r.forwardcharnext() != ch {
716 r.backwardnext()
717 break
718 }
719 }
720
721 if c > i {
722 r.trackPush2(c-i-1, r.textPos()-r.bump())
723 }
724
725 r.advance(2)
726 continue
727
728 case syntax.Notoneloop:
729
730 c := r.operand(1)
731
732 if c > r.forwardchars() {
733 c = r.forwardchars()
734 }
735
736 ch := rune(r.operand(0))
737 i := c
738
739 for ; i > 0; i-- {
740 if r.forwardcharnext() == ch {
741 r.backwardnext()
742 break
743 }
744 }
745
746 if c > i {
747 r.trackPush2(c-i-1, r.textPos()-r.bump())
748 }
749
750 r.advance(2)
751 continue
752
753 case syntax.Setloop:
754
755 c := r.operand(1)
756
757 if c > r.forwardchars() {
758 c = r.forwardchars()
759 }
760
761 set := r.code.Sets[r.operand(0)]
762 i := c
763
764 for ; i > 0; i-- {
765 if !set.CharIn(r.forwardcharnext()) {
766 r.backwardnext()
767 break
768 }
769 }
770
771 if c > i {
772 r.trackPush2(c-i-1, r.textPos()-r.bump())
773 }
774
775 r.advance(2)
776 continue
777
778 case syntax.Oneloop | syntax.Back, syntax.Notoneloop | syntax.Back:
779
780 r.trackPopN(2)
781 i := r.trackPeek()
782 pos := r.trackPeekN(1)
783
784 r.textto(pos)
785
786 if i > 0 {
787 r.trackPush2(i-1, pos-r.bump())
788 }
789
790 r.advance(2)
791 continue
792
793 case syntax.Setloop | syntax.Back:
794
795 r.trackPopN(2)
796 i := r.trackPeek()
797 pos := r.trackPeekN(1)
798
799 r.textto(pos)
800
801 if i > 0 {
802 r.trackPush2(i-1, pos-r.bump())
803 }
804
805 r.advance(2)
806 continue
807
808 case syntax.Onelazy, syntax.Notonelazy:
809
810 c := r.operand(1)
811
812 if c > r.forwardchars() {
813 c = r.forwardchars()
814 }
815
816 if c > 0 {
817 r.trackPush2(c-1, r.textPos())
818 }
819
820 r.advance(2)
821 continue
822
823 case syntax.Setlazy:
824
825 c := r.operand(1)
826
827 if c > r.forwardchars() {
828 c = r.forwardchars()
829 }
830
831 if c > 0 {
832 r.trackPush2(c-1, r.textPos())
833 }
834
835 r.advance(2)
836 continue
837
838 case syntax.Onelazy | syntax.Back:
839
840 r.trackPopN(2)
841 pos := r.trackPeekN(1)
842 r.textto(pos)
843
844 if r.forwardcharnext() != rune(r.operand(0)) {
845 break
846 }
847
848 i := r.trackPeek()
849
850 if i > 0 {
851 r.trackPush2(i-1, pos+r.bump())
852 }
853
854 r.advance(2)
855 continue
856
857 case syntax.Notonelazy | syntax.Back:
858
859 r.trackPopN(2)
860 pos := r.trackPeekN(1)
861 r.textto(pos)
862
863 if r.forwardcharnext() == rune(r.operand(0)) {
864 break
865 }
866
867 i := r.trackPeek()
868
869 if i > 0 {
870 r.trackPush2(i-1, pos+r.bump())
871 }
872
873 r.advance(2)
874 continue
875
876 case syntax.Setlazy | syntax.Back:
877
878 r.trackPopN(2)
879 pos := r.trackPeekN(1)
880 r.textto(pos)
881
882 if !r.code.Sets[r.operand(0)].CharIn(r.forwardcharnext()) {
883 break
884 }
885
886 i := r.trackPeek()
887
888 if i > 0 {
889 r.trackPush2(i-1, pos+r.bump())
890 }
891
892 r.advance(2)
893 continue
894
895 default:
896 return errors.New("unknown state in regex runner")
897 }
898
899 BreakBackward:
900 ;
901
902
903 r.backtrack()
904 }
905 }
906
907
908 func (r *runner) ensureStorage() {
909 if r.runstackpos < r.runtrackcount*4 {
910 doubleIntSlice(&r.runstack, &r.runstackpos)
911 }
912 if r.runtrackpos < r.runtrackcount*4 {
913 doubleIntSlice(&r.runtrack, &r.runtrackpos)
914 }
915 }
916
917 func doubleIntSlice(s *[]int, pos *int) {
918 oldLen := len(*s)
919 newS := make([]int, oldLen*2)
920
921 copy(newS[oldLen:], *s)
922 *pos += oldLen
923 *s = newS
924 }
925
926
927 func (r *runner) crawl(i int) {
928 if r.runcrawlpos == 0 {
929 doubleIntSlice(&r.runcrawl, &r.runcrawlpos)
930 }
931 r.runcrawlpos--
932 r.runcrawl[r.runcrawlpos] = i
933 }
934
935
936 func (r *runner) popcrawl() int {
937 val := r.runcrawl[r.runcrawlpos]
938 r.runcrawlpos++
939 return val
940 }
941
942
943 func (r *runner) crawlpos() int {
944 return len(r.runcrawl) - r.runcrawlpos
945 }
946
947 func (r *runner) advance(i int) {
948 r.codepos += (i + 1)
949 r.setOperator(r.code.Codes[r.codepos])
950 }
951
952 func (r *runner) goTo(newpos int) {
953
954 if newpos <= r.codepos {
955 r.ensureStorage()
956 }
957
958 r.setOperator(r.code.Codes[newpos])
959 r.codepos = newpos
960 }
961
962 func (r *runner) textto(newpos int) {
963 r.runtextpos = newpos
964 }
965
966 func (r *runner) trackto(newpos int) {
967 r.runtrackpos = len(r.runtrack) - newpos
968 }
969
970 func (r *runner) textstart() int {
971 return r.runtextstart
972 }
973
974 func (r *runner) textPos() int {
975 return r.runtextpos
976 }
977
978
979 func (r *runner) trackpos() int {
980 return len(r.runtrack) - r.runtrackpos
981 }
982
983 func (r *runner) trackPush() {
984 r.runtrackpos--
985 r.runtrack[r.runtrackpos] = r.codepos
986 }
987
988 func (r *runner) trackPush1(I1 int) {
989 r.runtrackpos--
990 r.runtrack[r.runtrackpos] = I1
991 r.runtrackpos--
992 r.runtrack[r.runtrackpos] = r.codepos
993 }
994
995 func (r *runner) trackPush2(I1, I2 int) {
996 r.runtrackpos--
997 r.runtrack[r.runtrackpos] = I1
998 r.runtrackpos--
999 r.runtrack[r.runtrackpos] = I2
1000 r.runtrackpos--
1001 r.runtrack[r.runtrackpos] = r.codepos
1002 }
1003
1004 func (r *runner) trackPush3(I1, I2, I3 int) {
1005 r.runtrackpos--
1006 r.runtrack[r.runtrackpos] = I1
1007 r.runtrackpos--
1008 r.runtrack[r.runtrackpos] = I2
1009 r.runtrackpos--
1010 r.runtrack[r.runtrackpos] = I3
1011 r.runtrackpos--
1012 r.runtrack[r.runtrackpos] = r.codepos
1013 }
1014
1015 func (r *runner) trackPushNeg1(I1 int) {
1016 r.runtrackpos--
1017 r.runtrack[r.runtrackpos] = I1
1018 r.runtrackpos--
1019 r.runtrack[r.runtrackpos] = -r.codepos
1020 }
1021
1022 func (r *runner) trackPushNeg2(I1, I2 int) {
1023 r.runtrackpos--
1024 r.runtrack[r.runtrackpos] = I1
1025 r.runtrackpos--
1026 r.runtrack[r.runtrackpos] = I2
1027 r.runtrackpos--
1028 r.runtrack[r.runtrackpos] = -r.codepos
1029 }
1030
1031 func (r *runner) backtrack() {
1032 newpos := r.runtrack[r.runtrackpos]
1033 r.runtrackpos++
1034
1035 if r.re.Debug() {
1036 if newpos < 0 {
1037 fmt.Printf(" Backtracking (back2) to code position %v\n", -newpos)
1038 } else {
1039 fmt.Printf(" Backtracking to code position %v\n", newpos)
1040 }
1041 }
1042
1043 if newpos < 0 {
1044 newpos = -newpos
1045 r.setOperator(r.code.Codes[newpos] | syntax.Back2)
1046 } else {
1047 r.setOperator(r.code.Codes[newpos] | syntax.Back)
1048 }
1049
1050
1051 if newpos < r.codepos {
1052 r.ensureStorage()
1053 }
1054
1055 r.codepos = newpos
1056 }
1057
1058 func (r *runner) setOperator(op int) {
1059 r.caseInsensitive = (0 != (op & syntax.Ci))
1060 r.rightToLeft = (0 != (op & syntax.Rtl))
1061 r.operator = syntax.InstOp(op & ^(syntax.Rtl | syntax.Ci))
1062 }
1063
1064 func (r *runner) trackPop() {
1065 r.runtrackpos++
1066 }
1067
1068
1069 func (r *runner) trackPopN(framesize int) {
1070 r.runtrackpos += framesize
1071 }
1072
1073
1074
1075
1076
1077 func (r *runner) trackPeek() int {
1078 return r.runtrack[r.runtrackpos-1]
1079 }
1080
1081
1082 func (r *runner) trackPeekN(i int) int {
1083 return r.runtrack[r.runtrackpos-i-1]
1084 }
1085
1086
1087 func (r *runner) stackPush(I1 int) {
1088 r.runstackpos--
1089 r.runstack[r.runstackpos] = I1
1090 }
1091
1092 func (r *runner) stackPush2(I1, I2 int) {
1093 r.runstackpos--
1094 r.runstack[r.runstackpos] = I1
1095 r.runstackpos--
1096 r.runstack[r.runstackpos] = I2
1097 }
1098
1099 func (r *runner) stackPop() {
1100 r.runstackpos++
1101 }
1102
1103
1104 func (r *runner) stackPopN(framesize int) {
1105 r.runstackpos += framesize
1106 }
1107
1108
1109
1110
1111
1112 func (r *runner) stackPeek() int {
1113 return r.runstack[r.runstackpos-1]
1114 }
1115
1116
1117 func (r *runner) stackPeekN(i int) int {
1118 return r.runstack[r.runstackpos-i-1]
1119 }
1120
1121 func (r *runner) operand(i int) int {
1122 return r.code.Codes[r.codepos+i+1]
1123 }
1124
1125 func (r *runner) leftchars() int {
1126 return r.runtextpos
1127 }
1128
1129 func (r *runner) rightchars() int {
1130 return r.runtextend - r.runtextpos
1131 }
1132
1133 func (r *runner) bump() int {
1134 if r.rightToLeft {
1135 return -1
1136 }
1137 return 1
1138 }
1139
1140 func (r *runner) forwardchars() int {
1141 if r.rightToLeft {
1142 return r.runtextpos
1143 }
1144 return r.runtextend - r.runtextpos
1145 }
1146
1147 func (r *runner) forwardcharnext() rune {
1148 var ch rune
1149 if r.rightToLeft {
1150 r.runtextpos--
1151 ch = r.runtext[r.runtextpos]
1152 } else {
1153 ch = r.runtext[r.runtextpos]
1154 r.runtextpos++
1155 }
1156
1157 if r.caseInsensitive {
1158 return unicode.ToLower(ch)
1159 }
1160 return ch
1161 }
1162
1163 func (r *runner) runematch(str []rune) bool {
1164 var pos int
1165
1166 c := len(str)
1167 if !r.rightToLeft {
1168 if r.runtextend-r.runtextpos < c {
1169 return false
1170 }
1171
1172 pos = r.runtextpos + c
1173 } else {
1174 if r.runtextpos-0 < c {
1175 return false
1176 }
1177
1178 pos = r.runtextpos
1179 }
1180
1181 if !r.caseInsensitive {
1182 for c != 0 {
1183 c--
1184 pos--
1185 if str[c] != r.runtext[pos] {
1186 return false
1187 }
1188 }
1189 } else {
1190 for c != 0 {
1191 c--
1192 pos--
1193 if str[c] != unicode.ToLower(r.runtext[pos]) {
1194 return false
1195 }
1196 }
1197 }
1198
1199 if !r.rightToLeft {
1200 pos += len(str)
1201 }
1202
1203 r.runtextpos = pos
1204
1205 return true
1206 }
1207
1208 func (r *runner) refmatch(index, len int) bool {
1209 var c, pos, cmpos int
1210
1211 if !r.rightToLeft {
1212 if r.runtextend-r.runtextpos < len {
1213 return false
1214 }
1215
1216 pos = r.runtextpos + len
1217 } else {
1218 if r.runtextpos-0 < len {
1219 return false
1220 }
1221
1222 pos = r.runtextpos
1223 }
1224 cmpos = index + len
1225
1226 c = len
1227
1228 if !r.caseInsensitive {
1229 for c != 0 {
1230 c--
1231 cmpos--
1232 pos--
1233 if r.runtext[cmpos] != r.runtext[pos] {
1234 return false
1235 }
1236
1237 }
1238 } else {
1239 for c != 0 {
1240 c--
1241 cmpos--
1242 pos--
1243
1244 if unicode.ToLower(r.runtext[cmpos]) != unicode.ToLower(r.runtext[pos]) {
1245 return false
1246 }
1247 }
1248 }
1249
1250 if !r.rightToLeft {
1251 pos += len
1252 }
1253
1254 r.runtextpos = pos
1255
1256 return true
1257 }
1258
1259 func (r *runner) backwardnext() {
1260 if r.rightToLeft {
1261 r.runtextpos++
1262 } else {
1263 r.runtextpos--
1264 }
1265 }
1266
1267 func (r *runner) charAt(j int) rune {
1268 return r.runtext[j]
1269 }
1270
1271 func (r *runner) findFirstChar() bool {
1272
1273 if 0 != (r.code.Anchors & (syntax.AnchorBeginning | syntax.AnchorStart | syntax.AnchorEndZ | syntax.AnchorEnd)) {
1274 if !r.code.RightToLeft {
1275 if (0 != (r.code.Anchors&syntax.AnchorBeginning) && r.runtextpos > 0) ||
1276 (0 != (r.code.Anchors&syntax.AnchorStart) && r.runtextpos > r.runtextstart) {
1277 r.runtextpos = r.runtextend
1278 return false
1279 }
1280 if 0 != (r.code.Anchors&syntax.AnchorEndZ) && r.runtextpos < r.runtextend-1 {
1281 r.runtextpos = r.runtextend - 1
1282 } else if 0 != (r.code.Anchors&syntax.AnchorEnd) && r.runtextpos < r.runtextend {
1283 r.runtextpos = r.runtextend
1284 }
1285 } else {
1286 if (0 != (r.code.Anchors&syntax.AnchorEnd) && r.runtextpos < r.runtextend) ||
1287 (0 != (r.code.Anchors&syntax.AnchorEndZ) && (r.runtextpos < r.runtextend-1 ||
1288 (r.runtextpos == r.runtextend-1 && r.charAt(r.runtextpos) != '\n'))) ||
1289 (0 != (r.code.Anchors&syntax.AnchorStart) && r.runtextpos < r.runtextstart) {
1290 r.runtextpos = 0
1291 return false
1292 }
1293 if 0 != (r.code.Anchors&syntax.AnchorBeginning) && r.runtextpos > 0 {
1294 r.runtextpos = 0
1295 }
1296 }
1297
1298 if r.code.BmPrefix != nil {
1299 return r.code.BmPrefix.IsMatch(r.runtext, r.runtextpos, 0, r.runtextend)
1300 }
1301
1302 return true
1303 } else if r.code.BmPrefix != nil {
1304 r.runtextpos = r.code.BmPrefix.Scan(r.runtext, r.runtextpos, 0, r.runtextend)
1305
1306 if r.runtextpos == -1 {
1307 if r.code.RightToLeft {
1308 r.runtextpos = 0
1309 } else {
1310 r.runtextpos = r.runtextend
1311 }
1312 return false
1313 }
1314
1315 return true
1316 } else if r.code.FcPrefix == nil {
1317 return true
1318 }
1319
1320 r.rightToLeft = r.code.RightToLeft
1321 r.caseInsensitive = r.code.FcPrefix.CaseInsensitive
1322
1323 set := r.code.FcPrefix.PrefixSet
1324 if set.IsSingleton() {
1325 ch := set.SingletonChar()
1326 for i := r.forwardchars(); i > 0; i-- {
1327 if ch == r.forwardcharnext() {
1328 r.backwardnext()
1329 return true
1330 }
1331 }
1332 } else {
1333 for i := r.forwardchars(); i > 0; i-- {
1334 n := r.forwardcharnext()
1335
1336 if set.CharIn(n) {
1337 r.backwardnext()
1338 return true
1339 }
1340 }
1341 }
1342
1343 return false
1344 }
1345
1346 func (r *runner) initMatch() {
1347
1348
1349 if r.runmatch == nil {
1350 if r.re.caps != nil {
1351 r.runmatch = newMatchSparse(r.re, r.re.caps, r.re.capsize, r.runtext, r.runtextstart)
1352 } else {
1353 r.runmatch = newMatch(r.re, r.re.capsize, r.runtext, r.runtextstart)
1354 }
1355 } else {
1356 r.runmatch.reset(r.runtext, r.runtextstart)
1357 }
1358
1359
1360
1361
1362
1363
1364 if r.runcrawl != nil {
1365 r.runtrackpos = len(r.runtrack)
1366 r.runstackpos = len(r.runstack)
1367 r.runcrawlpos = len(r.runcrawl)
1368 return
1369 }
1370
1371 r.initTrackCount()
1372
1373 tracksize := r.runtrackcount * 8
1374 stacksize := r.runtrackcount * 8
1375
1376 if tracksize < 32 {
1377 tracksize = 32
1378 }
1379 if stacksize < 16 {
1380 stacksize = 16
1381 }
1382
1383 r.runtrack = make([]int, tracksize)
1384 r.runtrackpos = tracksize
1385
1386 r.runstack = make([]int, stacksize)
1387 r.runstackpos = stacksize
1388
1389 r.runcrawl = make([]int, 32)
1390 r.runcrawlpos = 32
1391 }
1392
1393 func (r *runner) tidyMatch(quick bool) *Match {
1394 if !quick {
1395 match := r.runmatch
1396
1397 r.runmatch = nil
1398
1399 match.tidy(r.runtextpos)
1400 return match
1401 } else {
1402
1403
1404 return r.runmatch
1405 }
1406 }
1407
1408
1409
1410
1411 func (r *runner) capture(capnum, start, end int) {
1412 if end < start {
1413 T := end
1414 end = start
1415 start = T
1416 }
1417
1418 r.crawl(capnum)
1419 r.runmatch.addMatch(capnum, start, end-start)
1420 }
1421
1422
1423
1424
1425 func (r *runner) transferCapture(capnum, uncapnum, start, end int) {
1426 var start2, end2 int
1427
1428
1429
1430 if end < start {
1431 T := end
1432 end = start
1433 start = T
1434 }
1435
1436 start2 = r.runmatch.matchIndex(uncapnum)
1437 end2 = start2 + r.runmatch.matchLength(uncapnum)
1438
1439
1440
1441 if start >= end2 {
1442 end = start
1443 start = end2
1444 } else if end <= start2 {
1445 start = start2
1446 } else {
1447 if end > end2 {
1448 end = end2
1449 }
1450 if start2 > start {
1451 start = start2
1452 }
1453 }
1454
1455 r.crawl(uncapnum)
1456 r.runmatch.balanceMatch(uncapnum)
1457
1458 if capnum != -1 {
1459 r.crawl(capnum)
1460 r.runmatch.addMatch(capnum, start, end-start)
1461 }
1462 }
1463
1464
1465 func (r *runner) uncapture() {
1466 capnum := r.popcrawl()
1467 r.runmatch.removeMatch(capnum)
1468 }
1469
1470
1471
1472 func (r *runner) dumpState() {
1473 back := ""
1474 if r.operator&syntax.Back != 0 {
1475 back = " Back"
1476 }
1477 if r.operator&syntax.Back2 != 0 {
1478 back += " Back2"
1479 }
1480 fmt.Printf("Text: %v\nTrack: %v\nStack: %v\n %s%s\n\n",
1481 r.textposDescription(),
1482 r.stackDescription(r.runtrack, r.runtrackpos),
1483 r.stackDescription(r.runstack, r.runstackpos),
1484 r.code.OpcodeDescription(r.codepos),
1485 back)
1486 }
1487
1488 func (r *runner) stackDescription(a []int, index int) string {
1489 buf := &bytes.Buffer{}
1490
1491 fmt.Fprintf(buf, "%v/%v", len(a)-index, len(a))
1492 if buf.Len() < 8 {
1493 buf.WriteString(strings.Repeat(" ", 8-buf.Len()))
1494 }
1495
1496 buf.WriteRune('(')
1497 for i := index; i < len(a); i++ {
1498 if i > index {
1499 buf.WriteRune(' ')
1500 }
1501
1502 buf.WriteString(strconv.Itoa(a[i]))
1503 }
1504
1505 buf.WriteRune(')')
1506
1507 return buf.String()
1508 }
1509
1510 func (r *runner) textposDescription() string {
1511 buf := &bytes.Buffer{}
1512
1513 buf.WriteString(strconv.Itoa(r.runtextpos))
1514
1515 if buf.Len() < 8 {
1516 buf.WriteString(strings.Repeat(" ", 8-buf.Len()))
1517 }
1518
1519 if r.runtextpos > 0 {
1520 buf.WriteString(syntax.CharDescription(r.runtext[r.runtextpos-1]))
1521 } else {
1522 buf.WriteRune('^')
1523 }
1524
1525 buf.WriteRune('>')
1526
1527 for i := r.runtextpos; i < r.runtextend; i++ {
1528 buf.WriteString(syntax.CharDescription(r.runtext[i]))
1529 }
1530 if buf.Len() >= 64 {
1531 buf.Truncate(61)
1532 buf.WriteString("...")
1533 } else {
1534 buf.WriteRune('$')
1535 }
1536
1537 return buf.String()
1538 }
1539
1540
1541
1542
1543 func (r *runner) isBoundary(index, startpos, endpos int) bool {
1544 return (index > startpos && syntax.IsWordChar(r.runtext[index-1])) !=
1545 (index < endpos && syntax.IsWordChar(r.runtext[index]))
1546 }
1547
1548 func (r *runner) isECMABoundary(index, startpos, endpos int) bool {
1549 return (index > startpos && syntax.IsECMAWordChar(r.runtext[index-1])) !=
1550 (index < endpos && syntax.IsECMAWordChar(r.runtext[index]))
1551 }
1552
1553 func (r *runner) startTimeoutWatch() {
1554 if r.ignoreTimeout {
1555 return
1556 }
1557 r.deadline = makeDeadline(r.timeout)
1558 }
1559
1560 func (r *runner) checkTimeout() error {
1561 if r.ignoreTimeout || !r.deadline.reached() {
1562 return nil
1563 }
1564
1565 if r.re.Debug() {
1566
1567
1568
1569
1570
1571
1572
1573 }
1574
1575 return fmt.Errorf("match timeout after %v on input `%v`", r.timeout, string(r.runtext))
1576 }
1577
1578 func (r *runner) initTrackCount() {
1579 r.runtrackcount = r.code.TrackCount
1580 }
1581
1582
1583
1584
1585 func (re *Regexp) getRunner() *runner {
1586 re.muRun.Lock()
1587 if n := len(re.runner); n > 0 {
1588 z := re.runner[n-1]
1589 re.runner = re.runner[:n-1]
1590 re.muRun.Unlock()
1591 return z
1592 }
1593 re.muRun.Unlock()
1594 z := &runner{
1595 re: re,
1596 code: re.code,
1597 }
1598 return z
1599 }
1600
1601
1602
1603
1604
1605 func (re *Regexp) putRunner(r *runner) {
1606 re.muRun.Lock()
1607 re.runner = append(re.runner, r)
1608 re.muRun.Unlock()
1609 }
1610
View as plain text