1 package parser
2
3 import (
4 "errors"
5 "fmt"
6 "strconv"
7 "strings"
8 "unicode"
9 "unicode/utf16"
10 "unicode/utf8"
11
12 "golang.org/x/text/unicode/rangetable"
13
14 "github.com/dop251/goja/file"
15 "github.com/dop251/goja/token"
16 "github.com/dop251/goja/unistring"
17 )
18
19 var (
20 unicodeRangeIdNeg = rangetable.Merge(unicode.Pattern_Syntax, unicode.Pattern_White_Space)
21 unicodeRangeIdStartPos = rangetable.Merge(unicode.Letter, unicode.Nl, unicode.Other_ID_Start)
22 unicodeRangeIdContPos = rangetable.Merge(unicodeRangeIdStartPos, unicode.Mn, unicode.Mc, unicode.Nd, unicode.Pc, unicode.Other_ID_Continue)
23 )
24
25 func isDecimalDigit(chr rune) bool {
26 return '0' <= chr && chr <= '9'
27 }
28
29 func IsIdentifier(s string) bool {
30 if s == "" {
31 return false
32 }
33 r, size := utf8.DecodeRuneInString(s)
34 if !isIdentifierStart(r) {
35 return false
36 }
37 for _, r := range s[size:] {
38 if !isIdentifierPart(r) {
39 return false
40 }
41 }
42 return true
43 }
44
45 func digitValue(chr rune) int {
46 switch {
47 case '0' <= chr && chr <= '9':
48 return int(chr - '0')
49 case 'a' <= chr && chr <= 'f':
50 return int(chr - 'a' + 10)
51 case 'A' <= chr && chr <= 'F':
52 return int(chr - 'A' + 10)
53 }
54 return 16
55 }
56
57 func isDigit(chr rune, base int) bool {
58 return digitValue(chr) < base
59 }
60
61 func isIdStartUnicode(r rune) bool {
62 return unicode.Is(unicodeRangeIdStartPos, r) && !unicode.Is(unicodeRangeIdNeg, r)
63 }
64
65 func isIdPartUnicode(r rune) bool {
66 return unicode.Is(unicodeRangeIdContPos, r) && !unicode.Is(unicodeRangeIdNeg, r) || r == '\u200C' || r == '\u200D'
67 }
68
69 func isIdentifierStart(chr rune) bool {
70 return chr == '$' || chr == '_' || chr == '\\' ||
71 'a' <= chr && chr <= 'z' || 'A' <= chr && chr <= 'Z' ||
72 chr >= utf8.RuneSelf && isIdStartUnicode(chr)
73 }
74
75 func isIdentifierPart(chr rune) bool {
76 return chr == '$' || chr == '_' || chr == '\\' ||
77 'a' <= chr && chr <= 'z' || 'A' <= chr && chr <= 'Z' ||
78 '0' <= chr && chr <= '9' ||
79 chr >= utf8.RuneSelf && isIdPartUnicode(chr)
80 }
81
82 func (self *_parser) scanIdentifier() (string, unistring.String, bool, string) {
83 offset := self.chrOffset
84 hasEscape := false
85 isUnicode := false
86 length := 0
87 for isIdentifierPart(self.chr) {
88 r := self.chr
89 length++
90 if r == '\\' {
91 hasEscape = true
92 distance := self.chrOffset - offset
93 self.read()
94 if self.chr != 'u' {
95 return "", "", false, fmt.Sprintf("Invalid identifier escape character: %c (%s)", self.chr, string(self.chr))
96 }
97 var value rune
98 if self._peek() == '{' {
99 self.read()
100 value = -1
101 for value <= utf8.MaxRune {
102 self.read()
103 if self.chr == '}' {
104 break
105 }
106 decimal, ok := hex2decimal(byte(self.chr))
107 if !ok {
108 return "", "", false, "Invalid Unicode escape sequence"
109 }
110 if value == -1 {
111 value = decimal
112 } else {
113 value = value<<4 | decimal
114 }
115 }
116 if value == -1 {
117 return "", "", false, "Invalid Unicode escape sequence"
118 }
119 } else {
120 for j := 0; j < 4; j++ {
121 self.read()
122 decimal, ok := hex2decimal(byte(self.chr))
123 if !ok {
124 return "", "", false, fmt.Sprintf("Invalid identifier escape character: %c (%s)", self.chr, string(self.chr))
125 }
126 value = value<<4 | decimal
127 }
128 }
129 if value == '\\' {
130 return "", "", false, fmt.Sprintf("Invalid identifier escape value: %c (%s)", value, string(value))
131 } else if distance == 0 {
132 if !isIdentifierStart(value) {
133 return "", "", false, fmt.Sprintf("Invalid identifier escape value: %c (%s)", value, string(value))
134 }
135 } else if distance > 0 {
136 if !isIdentifierPart(value) {
137 return "", "", false, fmt.Sprintf("Invalid identifier escape value: %c (%s)", value, string(value))
138 }
139 }
140 r = value
141 }
142 if r >= utf8.RuneSelf {
143 isUnicode = true
144 if r > 0xFFFF {
145 length++
146 }
147 }
148 self.read()
149 }
150
151 literal := self.str[offset:self.chrOffset]
152 var parsed unistring.String
153 if hasEscape || isUnicode {
154 var err string
155
156 parsed, err = parseStringLiteral(literal, length, isUnicode, false)
157 if err != "" {
158 return "", "", false, err
159 }
160 } else {
161 parsed = unistring.String(literal)
162 }
163
164 return literal, parsed, hasEscape, ""
165 }
166
167
168 func isLineWhiteSpace(chr rune) bool {
169 switch chr {
170 case '\u0009', '\u000b', '\u000c', '\u0020', '\u00a0', '\ufeff':
171 return true
172 case '\u000a', '\u000d', '\u2028', '\u2029':
173 return false
174 case '\u0085':
175 return false
176 }
177 return unicode.IsSpace(chr)
178 }
179
180
181 func isLineTerminator(chr rune) bool {
182 switch chr {
183 case '\u000a', '\u000d', '\u2028', '\u2029':
184 return true
185 }
186 return false
187 }
188
189 type parserState struct {
190 idx file.Idx
191 tok token.Token
192 literal string
193 parsedLiteral unistring.String
194 implicitSemicolon, insertSemicolon bool
195 chr rune
196 chrOffset, offset int
197 errorCount int
198 }
199
200 func (self *_parser) mark(state *parserState) *parserState {
201 if state == nil {
202 state = &parserState{}
203 }
204 state.idx, state.tok, state.literal, state.parsedLiteral, state.implicitSemicolon, state.insertSemicolon, state.chr, state.chrOffset, state.offset =
205 self.idx, self.token, self.literal, self.parsedLiteral, self.implicitSemicolon, self.insertSemicolon, self.chr, self.chrOffset, self.offset
206
207 state.errorCount = len(self.errors)
208 return state
209 }
210
211 func (self *_parser) restore(state *parserState) {
212 self.idx, self.token, self.literal, self.parsedLiteral, self.implicitSemicolon, self.insertSemicolon, self.chr, self.chrOffset, self.offset =
213 state.idx, state.tok, state.literal, state.parsedLiteral, state.implicitSemicolon, state.insertSemicolon, state.chr, state.chrOffset, state.offset
214 self.errors = self.errors[:state.errorCount]
215 }
216
217 func (self *_parser) peek() token.Token {
218 implicitSemicolon, insertSemicolon, chr, chrOffset, offset := self.implicitSemicolon, self.insertSemicolon, self.chr, self.chrOffset, self.offset
219 tok, _, _, _ := self.scan()
220 self.implicitSemicolon, self.insertSemicolon, self.chr, self.chrOffset, self.offset = implicitSemicolon, insertSemicolon, chr, chrOffset, offset
221 return tok
222 }
223
224 func (self *_parser) scan() (tkn token.Token, literal string, parsedLiteral unistring.String, idx file.Idx) {
225
226 self.implicitSemicolon = false
227
228 for {
229 self.skipWhiteSpace()
230
231 idx = self.idxOf(self.chrOffset)
232 insertSemicolon := false
233
234 switch chr := self.chr; {
235 case isIdentifierStart(chr):
236 var err string
237 var hasEscape bool
238 literal, parsedLiteral, hasEscape, err = self.scanIdentifier()
239 if err != "" {
240 tkn = token.ILLEGAL
241 break
242 }
243 if len(parsedLiteral) > 1 {
244
245 var strict bool
246 tkn, strict = token.IsKeyword(string(parsedLiteral))
247 if hasEscape {
248 self.insertSemicolon = true
249 if tkn == 0 || self.isBindingId(tkn) {
250 tkn = token.IDENTIFIER
251 } else {
252 tkn = token.ESCAPED_RESERVED_WORD
253 }
254 return
255 }
256 switch tkn {
257 case 0:
258
259 case token.KEYWORD:
260 if strict {
261
262 break
263 }
264 return
265
266 case
267 token.BOOLEAN,
268 token.NULL,
269 token.THIS,
270 token.BREAK,
271 token.THROW,
272 token.YIELD,
273 token.RETURN,
274 token.CONTINUE,
275 token.DEBUGGER:
276 self.insertSemicolon = true
277 return
278
279 case token.ASYNC:
280
281 if self.skipWhiteSpaceCheckLineTerminator() {
282 self.insertSemicolon = true
283 tkn = token.IDENTIFIER
284 }
285 return
286 default:
287 return
288
289 }
290 }
291 self.insertSemicolon = true
292 tkn = token.IDENTIFIER
293 return
294 case '0' <= chr && chr <= '9':
295 self.insertSemicolon = true
296 tkn, literal = self.scanNumericLiteral(false)
297 return
298 default:
299 self.read()
300 switch chr {
301 case -1:
302 if self.insertSemicolon {
303 self.insertSemicolon = false
304 self.implicitSemicolon = true
305 }
306 tkn = token.EOF
307 case '\r', '\n', '\u2028', '\u2029':
308 self.insertSemicolon = false
309 self.implicitSemicolon = true
310 continue
311 case ':':
312 tkn = token.COLON
313 case '.':
314 if digitValue(self.chr) < 10 {
315 insertSemicolon = true
316 tkn, literal = self.scanNumericLiteral(true)
317 } else {
318 if self.chr == '.' {
319 self.read()
320 if self.chr == '.' {
321 self.read()
322 tkn = token.ELLIPSIS
323 } else {
324 tkn = token.ILLEGAL
325 }
326 } else {
327 tkn = token.PERIOD
328 }
329 }
330 case ',':
331 tkn = token.COMMA
332 case ';':
333 tkn = token.SEMICOLON
334 case '(':
335 tkn = token.LEFT_PARENTHESIS
336 case ')':
337 tkn = token.RIGHT_PARENTHESIS
338 insertSemicolon = true
339 case '[':
340 tkn = token.LEFT_BRACKET
341 case ']':
342 tkn = token.RIGHT_BRACKET
343 insertSemicolon = true
344 case '{':
345 tkn = token.LEFT_BRACE
346 case '}':
347 tkn = token.RIGHT_BRACE
348 insertSemicolon = true
349 case '+':
350 tkn = self.switch3(token.PLUS, token.ADD_ASSIGN, '+', token.INCREMENT)
351 if tkn == token.INCREMENT {
352 insertSemicolon = true
353 }
354 case '-':
355 tkn = self.switch3(token.MINUS, token.SUBTRACT_ASSIGN, '-', token.DECREMENT)
356 if tkn == token.DECREMENT {
357 insertSemicolon = true
358 }
359 case '*':
360 if self.chr == '*' {
361 self.read()
362 tkn = self.switch2(token.EXPONENT, token.EXPONENT_ASSIGN)
363 } else {
364 tkn = self.switch2(token.MULTIPLY, token.MULTIPLY_ASSIGN)
365 }
366 case '/':
367 if self.chr == '/' {
368 self.skipSingleLineComment()
369 continue
370 } else if self.chr == '*' {
371 if self.skipMultiLineComment() {
372 self.insertSemicolon = false
373 self.implicitSemicolon = true
374 }
375 continue
376 } else {
377
378 tkn = self.switch2(token.SLASH, token.QUOTIENT_ASSIGN)
379 insertSemicolon = true
380 }
381 case '%':
382 tkn = self.switch2(token.REMAINDER, token.REMAINDER_ASSIGN)
383 case '^':
384 tkn = self.switch2(token.EXCLUSIVE_OR, token.EXCLUSIVE_OR_ASSIGN)
385 case '<':
386 tkn = self.switch4(token.LESS, token.LESS_OR_EQUAL, '<', token.SHIFT_LEFT, token.SHIFT_LEFT_ASSIGN)
387 case '>':
388 tkn = self.switch6(token.GREATER, token.GREATER_OR_EQUAL, '>', token.SHIFT_RIGHT, token.SHIFT_RIGHT_ASSIGN, '>', token.UNSIGNED_SHIFT_RIGHT, token.UNSIGNED_SHIFT_RIGHT_ASSIGN)
389 case '=':
390 if self.chr == '>' {
391 self.read()
392 if self.implicitSemicolon {
393 tkn = token.ILLEGAL
394 } else {
395 tkn = token.ARROW
396 }
397 } else {
398 tkn = self.switch2(token.ASSIGN, token.EQUAL)
399 if tkn == token.EQUAL && self.chr == '=' {
400 self.read()
401 tkn = token.STRICT_EQUAL
402 }
403 }
404 case '!':
405 tkn = self.switch2(token.NOT, token.NOT_EQUAL)
406 if tkn == token.NOT_EQUAL && self.chr == '=' {
407 self.read()
408 tkn = token.STRICT_NOT_EQUAL
409 }
410 case '&':
411 tkn = self.switch3(token.AND, token.AND_ASSIGN, '&', token.LOGICAL_AND)
412 case '|':
413 tkn = self.switch3(token.OR, token.OR_ASSIGN, '|', token.LOGICAL_OR)
414 case '~':
415 tkn = token.BITWISE_NOT
416 case '?':
417 if self.chr == '.' && !isDecimalDigit(self._peek()) {
418 self.read()
419 tkn = token.QUESTION_DOT
420 } else if self.chr == '?' {
421 self.read()
422 tkn = token.COALESCE
423 } else {
424 tkn = token.QUESTION_MARK
425 }
426 case '"', '\'':
427 insertSemicolon = true
428 tkn = token.STRING
429 var err string
430 literal, parsedLiteral, err = self.scanString(self.chrOffset-1, true)
431 if err != "" {
432 tkn = token.ILLEGAL
433 }
434 case '`':
435 tkn = token.BACKTICK
436 case '#':
437 if self.chrOffset == 1 && self.chr == '!' {
438 self.skipSingleLineComment()
439 continue
440 }
441
442 var err string
443 literal, parsedLiteral, _, err = self.scanIdentifier()
444 if err != "" || literal == "" {
445 tkn = token.ILLEGAL
446 break
447 }
448 self.insertSemicolon = true
449 tkn = token.PRIVATE_IDENTIFIER
450 return
451 default:
452 self.errorUnexpected(idx, chr)
453 tkn = token.ILLEGAL
454 }
455 }
456 self.insertSemicolon = insertSemicolon
457 return
458 }
459 }
460
461 func (self *_parser) switch2(tkn0, tkn1 token.Token) token.Token {
462 if self.chr == '=' {
463 self.read()
464 return tkn1
465 }
466 return tkn0
467 }
468
469 func (self *_parser) switch3(tkn0, tkn1 token.Token, chr2 rune, tkn2 token.Token) token.Token {
470 if self.chr == '=' {
471 self.read()
472 return tkn1
473 }
474 if self.chr == chr2 {
475 self.read()
476 return tkn2
477 }
478 return tkn0
479 }
480
481 func (self *_parser) switch4(tkn0, tkn1 token.Token, chr2 rune, tkn2, tkn3 token.Token) token.Token {
482 if self.chr == '=' {
483 self.read()
484 return tkn1
485 }
486 if self.chr == chr2 {
487 self.read()
488 if self.chr == '=' {
489 self.read()
490 return tkn3
491 }
492 return tkn2
493 }
494 return tkn0
495 }
496
497 func (self *_parser) switch6(tkn0, tkn1 token.Token, chr2 rune, tkn2, tkn3 token.Token, chr3 rune, tkn4, tkn5 token.Token) token.Token {
498 if self.chr == '=' {
499 self.read()
500 return tkn1
501 }
502 if self.chr == chr2 {
503 self.read()
504 if self.chr == '=' {
505 self.read()
506 return tkn3
507 }
508 if self.chr == chr3 {
509 self.read()
510 if self.chr == '=' {
511 self.read()
512 return tkn5
513 }
514 return tkn4
515 }
516 return tkn2
517 }
518 return tkn0
519 }
520
521 func (self *_parser) _peek() rune {
522 if self.offset < self.length {
523 return rune(self.str[self.offset])
524 }
525 return -1
526 }
527
528 func (self *_parser) read() {
529 if self.offset < self.length {
530 self.chrOffset = self.offset
531 chr, width := rune(self.str[self.offset]), 1
532 if chr >= utf8.RuneSelf {
533 chr, width = utf8.DecodeRuneInString(self.str[self.offset:])
534 if chr == utf8.RuneError && width == 1 {
535 self.error(self.chrOffset, "Invalid UTF-8 character")
536 }
537 }
538 self.offset += width
539 self.chr = chr
540 } else {
541 self.chrOffset = self.length
542 self.chr = -1
543 }
544 }
545
546 func (self *_parser) skipSingleLineComment() {
547 for self.chr != -1 {
548 self.read()
549 if isLineTerminator(self.chr) {
550 return
551 }
552 }
553 }
554
555 func (self *_parser) skipMultiLineComment() (hasLineTerminator bool) {
556 self.read()
557 for self.chr >= 0 {
558 chr := self.chr
559 if chr == '\r' || chr == '\n' || chr == '\u2028' || chr == '\u2029' {
560 hasLineTerminator = true
561 break
562 }
563 self.read()
564 if chr == '*' && self.chr == '/' {
565 self.read()
566 return
567 }
568 }
569 for self.chr >= 0 {
570 chr := self.chr
571 self.read()
572 if chr == '*' && self.chr == '/' {
573 self.read()
574 return
575 }
576 }
577
578 self.errorUnexpected(0, self.chr)
579 return
580 }
581
582 func (self *_parser) skipWhiteSpaceCheckLineTerminator() bool {
583 for {
584 switch self.chr {
585 case ' ', '\t', '\f', '\v', '\u00a0', '\ufeff':
586 self.read()
587 continue
588 case '\r':
589 if self._peek() == '\n' {
590 self.read()
591 }
592 fallthrough
593 case '\u2028', '\u2029', '\n':
594 return true
595 }
596 if self.chr >= utf8.RuneSelf {
597 if unicode.IsSpace(self.chr) {
598 self.read()
599 continue
600 }
601 }
602 break
603 }
604 return false
605 }
606
607 func (self *_parser) skipWhiteSpace() {
608 for {
609 switch self.chr {
610 case ' ', '\t', '\f', '\v', '\u00a0', '\ufeff':
611 self.read()
612 continue
613 case '\r':
614 if self._peek() == '\n' {
615 self.read()
616 }
617 fallthrough
618 case '\u2028', '\u2029', '\n':
619 if self.insertSemicolon {
620 return
621 }
622 self.read()
623 continue
624 }
625 if self.chr >= utf8.RuneSelf {
626 if unicode.IsSpace(self.chr) {
627 self.read()
628 continue
629 }
630 }
631 break
632 }
633 }
634
635 func (self *_parser) scanMantissa(base int) {
636 for digitValue(self.chr) < base {
637 self.read()
638 }
639 }
640
641 func (self *_parser) scanEscape(quote rune) (int, bool) {
642
643 var length, base uint32
644 chr := self.chr
645 switch chr {
646 case '0', '1', '2', '3', '4', '5', '6', '7':
647
648 length, base = 3, 8
649 case 'a', 'b', 'f', 'n', 'r', 't', 'v', '\\', '"', '\'':
650 self.read()
651 return 1, false
652 case '\r':
653 self.read()
654 if self.chr == '\n' {
655 self.read()
656 return 2, false
657 }
658 return 1, false
659 case '\n':
660 self.read()
661 return 1, false
662 case '\u2028', '\u2029':
663 self.read()
664 return 1, true
665 case 'x':
666 self.read()
667 length, base = 2, 16
668 case 'u':
669 self.read()
670 if self.chr == '{' {
671 self.read()
672 length, base = 0, 16
673 } else {
674 length, base = 4, 16
675 }
676 default:
677 self.read()
678 }
679
680 if base > 0 {
681 var value uint32
682 if length > 0 {
683 for ; length > 0 && self.chr != quote && self.chr >= 0; length-- {
684 digit := uint32(digitValue(self.chr))
685 if digit >= base {
686 break
687 }
688 value = value*base + digit
689 self.read()
690 }
691 } else {
692 for self.chr != quote && self.chr >= 0 && value < utf8.MaxRune {
693 if self.chr == '}' {
694 self.read()
695 break
696 }
697 digit := uint32(digitValue(self.chr))
698 if digit >= base {
699 break
700 }
701 value = value*base + digit
702 self.read()
703 }
704 }
705 chr = rune(value)
706 }
707 if chr >= utf8.RuneSelf {
708 if chr > 0xFFFF {
709 return 2, true
710 }
711 return 1, true
712 }
713 return 1, false
714 }
715
716 func (self *_parser) scanString(offset int, parse bool) (literal string, parsed unistring.String, err string) {
717
718 quote := rune(self.str[offset])
719 length := 0
720 isUnicode := false
721 for self.chr != quote {
722 chr := self.chr
723 if chr == '\n' || chr == '\r' || chr < 0 {
724 goto newline
725 }
726 if quote == '/' && (self.chr == '\u2028' || self.chr == '\u2029') {
727 goto newline
728 }
729 self.read()
730 if chr == '\\' {
731 if self.chr == '\n' || self.chr == '\r' || self.chr == '\u2028' || self.chr == '\u2029' || self.chr < 0 {
732 if quote == '/' {
733 goto newline
734 }
735 self.scanNewline()
736 } else {
737 l, u := self.scanEscape(quote)
738 length += l
739 if u {
740 isUnicode = true
741 }
742 }
743 continue
744 } else if chr == '[' && quote == '/' {
745
746
747 quote = -1
748 } else if chr == ']' && quote == -1 {
749 quote = '/'
750 }
751 if chr >= utf8.RuneSelf {
752 isUnicode = true
753 if chr > 0xFFFF {
754 length++
755 }
756 }
757 length++
758 }
759
760
761 self.read()
762 literal = self.str[offset:self.chrOffset]
763 if parse {
764
765 parsed, err = parseStringLiteral(literal[1:len(literal)-1], length, isUnicode, false)
766 }
767 return
768
769 newline:
770 self.scanNewline()
771 errStr := "String not terminated"
772 if quote == '/' {
773 errStr = "Invalid regular expression: missing /"
774 self.error(self.idxOf(offset), errStr)
775 }
776 return "", "", errStr
777 }
778
779 func (self *_parser) scanNewline() {
780 if self.chr == '\u2028' || self.chr == '\u2029' {
781 self.read()
782 return
783 }
784 if self.chr == '\r' {
785 self.read()
786 if self.chr != '\n' {
787 return
788 }
789 }
790 self.read()
791 }
792
793 func (self *_parser) parseTemplateCharacters() (literal string, parsed unistring.String, finished bool, parseErr, err string) {
794 offset := self.chrOffset
795 var end int
796 length := 0
797 isUnicode := false
798 hasCR := false
799 for {
800 chr := self.chr
801 if chr < 0 {
802 goto unterminated
803 }
804 self.read()
805 if chr == '`' {
806 finished = true
807 end = self.chrOffset - 1
808 break
809 }
810 if chr == '\\' {
811 if self.chr == '\n' || self.chr == '\r' || self.chr == '\u2028' || self.chr == '\u2029' || self.chr < 0 {
812 if self.chr == '\r' {
813 hasCR = true
814 }
815 self.scanNewline()
816 } else {
817 if self.chr == '8' || self.chr == '9' {
818 if parseErr == "" {
819 parseErr = "\\8 and \\9 are not allowed in template strings."
820 }
821 }
822 l, u := self.scanEscape('`')
823 length += l
824 if u {
825 isUnicode = true
826 }
827 }
828 continue
829 }
830 if chr == '$' && self.chr == '{' {
831 self.read()
832 end = self.chrOffset - 2
833 break
834 }
835 if chr >= utf8.RuneSelf {
836 isUnicode = true
837 if chr > 0xFFFF {
838 length++
839 }
840 } else if chr == '\r' {
841 hasCR = true
842 if self.chr == '\n' {
843 length--
844 }
845 }
846 length++
847 }
848 literal = self.str[offset:end]
849 if hasCR {
850 literal = normaliseCRLF(literal)
851 }
852 if parseErr == "" {
853 parsed, parseErr = parseStringLiteral(literal, length, isUnicode, true)
854 }
855 self.insertSemicolon = true
856 return
857 unterminated:
858 err = err_UnexpectedEndOfInput
859 finished = true
860 return
861 }
862
863 func normaliseCRLF(s string) string {
864 var buf strings.Builder
865 buf.Grow(len(s))
866 for i := 0; i < len(s); i++ {
867 if s[i] == '\r' {
868 buf.WriteByte('\n')
869 if i < len(s)-1 && s[i+1] == '\n' {
870 i++
871 }
872 } else {
873 buf.WriteByte(s[i])
874 }
875 }
876 return buf.String()
877 }
878
879 func hex2decimal(chr byte) (value rune, ok bool) {
880 {
881 chr := rune(chr)
882 switch {
883 case '0' <= chr && chr <= '9':
884 return chr - '0', true
885 case 'a' <= chr && chr <= 'f':
886 return chr - 'a' + 10, true
887 case 'A' <= chr && chr <= 'F':
888 return chr - 'A' + 10, true
889 }
890 return
891 }
892 }
893
894 func parseNumberLiteral(literal string) (value interface{}, err error) {
895
896 value, err = strconv.ParseInt(literal, 0, 64)
897 if err == nil {
898 return
899 }
900
901 parseIntErr := err
902
903 value, err = strconv.ParseFloat(literal, 64)
904 if err == nil {
905 return
906 } else if err.(*strconv.NumError).Err == strconv.ErrRange {
907
908 return value, nil
909 }
910
911 err = parseIntErr
912
913 if err.(*strconv.NumError).Err == strconv.ErrRange {
914 if len(literal) > 2 && literal[0] == '0' && (literal[1] == 'X' || literal[1] == 'x') {
915
916 var value float64
917 literal = literal[2:]
918 for _, chr := range literal {
919 digit := digitValue(chr)
920 if digit >= 16 {
921 goto error
922 }
923 value = value*16 + float64(digit)
924 }
925 return value, nil
926 }
927 }
928
929 error:
930 return nil, errors.New("Illegal numeric literal")
931 }
932
933 func parseStringLiteral(literal string, length int, unicode, strict bool) (unistring.String, string) {
934 var sb strings.Builder
935 var chars []uint16
936 if unicode {
937 chars = make([]uint16, 1, length+1)
938 chars[0] = unistring.BOM
939 } else {
940 sb.Grow(length)
941 }
942 str := literal
943 for len(str) > 0 {
944 switch chr := str[0]; {
945
946
947
948 case chr >= utf8.RuneSelf:
949 chr, size := utf8.DecodeRuneInString(str)
950 if chr <= 0xFFFF {
951 chars = append(chars, uint16(chr))
952 } else {
953 first, second := utf16.EncodeRune(chr)
954 chars = append(chars, uint16(first), uint16(second))
955 }
956 str = str[size:]
957 continue
958 case chr != '\\':
959 if unicode {
960 chars = append(chars, uint16(chr))
961 } else {
962 sb.WriteByte(chr)
963 }
964 str = str[1:]
965 continue
966 }
967
968 if len(str) <= 1 {
969 panic("len(str) <= 1")
970 }
971 chr := str[1]
972 var value rune
973 if chr >= utf8.RuneSelf {
974 str = str[1:]
975 var size int
976 value, size = utf8.DecodeRuneInString(str)
977 str = str[size:]
978 if value == '\u2028' || value == '\u2029' {
979 continue
980 }
981 } else {
982 str = str[2:]
983 switch chr {
984 case 'b':
985 value = '\b'
986 case 'f':
987 value = '\f'
988 case 'n':
989 value = '\n'
990 case 'r':
991 value = '\r'
992 case 't':
993 value = '\t'
994 case 'v':
995 value = '\v'
996 case 'x', 'u':
997 size := 0
998 switch chr {
999 case 'x':
1000 size = 2
1001 case 'u':
1002 if str == "" || str[0] != '{' {
1003 size = 4
1004 }
1005 }
1006 if size > 0 {
1007 if len(str) < size {
1008 return "", fmt.Sprintf("invalid escape: \\%s: len(%q) != %d", string(chr), str, size)
1009 }
1010 for j := 0; j < size; j++ {
1011 decimal, ok := hex2decimal(str[j])
1012 if !ok {
1013 return "", fmt.Sprintf("invalid escape: \\%s: %q", string(chr), str[:size])
1014 }
1015 value = value<<4 | decimal
1016 }
1017 } else {
1018 str = str[1:]
1019 var val rune
1020 value = -1
1021 for ; size < len(str); size++ {
1022 if str[size] == '}' {
1023 if size == 0 {
1024 return "", fmt.Sprintf("invalid escape: \\%s", string(chr))
1025 }
1026 size++
1027 value = val
1028 break
1029 }
1030 decimal, ok := hex2decimal(str[size])
1031 if !ok {
1032 return "", fmt.Sprintf("invalid escape: \\%s: %q", string(chr), str[:size+1])
1033 }
1034 val = val<<4 | decimal
1035 if val > utf8.MaxRune {
1036 return "", fmt.Sprintf("undefined Unicode code-point: %q", str[:size+1])
1037 }
1038 }
1039 if value == -1 {
1040 return "", fmt.Sprintf("unterminated \\u{: %q", str)
1041 }
1042 }
1043 str = str[size:]
1044 if chr == 'x' {
1045 break
1046 }
1047 if value > utf8.MaxRune {
1048 panic("value > utf8.MaxRune")
1049 }
1050 case '0':
1051 if len(str) == 0 || '0' > str[0] || str[0] > '7' {
1052 value = 0
1053 break
1054 }
1055 fallthrough
1056 case '1', '2', '3', '4', '5', '6', '7':
1057 if strict {
1058 return "", "Octal escape sequences are not allowed in this context"
1059 }
1060 value = rune(chr) - '0'
1061 j := 0
1062 for ; j < 2; j++ {
1063 if len(str) < j+1 {
1064 break
1065 }
1066 chr := str[j]
1067 if '0' > chr || chr > '7' {
1068 break
1069 }
1070 decimal := rune(str[j]) - '0'
1071 value = (value << 3) | decimal
1072 }
1073 str = str[j:]
1074 case '\\':
1075 value = '\\'
1076 case '\'', '"':
1077 value = rune(chr)
1078 case '\r':
1079 if len(str) > 0 {
1080 if str[0] == '\n' {
1081 str = str[1:]
1082 }
1083 }
1084 fallthrough
1085 case '\n':
1086 continue
1087 default:
1088 value = rune(chr)
1089 }
1090 }
1091 if unicode {
1092 if value <= 0xFFFF {
1093 chars = append(chars, uint16(value))
1094 } else {
1095 first, second := utf16.EncodeRune(value)
1096 chars = append(chars, uint16(first), uint16(second))
1097 }
1098 } else {
1099 if value >= utf8.RuneSelf {
1100 return "", "Unexpected unicode character"
1101 }
1102 sb.WriteByte(byte(value))
1103 }
1104 }
1105
1106 if unicode {
1107 if len(chars) != length+1 {
1108 panic(fmt.Errorf("unexpected unicode length while parsing '%s'", literal))
1109 }
1110 return unistring.FromUtf16(chars), ""
1111 }
1112 if sb.Len() != length {
1113 panic(fmt.Errorf("unexpected length while parsing '%s'", literal))
1114 }
1115 return unistring.String(sb.String()), ""
1116 }
1117
1118 func (self *_parser) scanNumericLiteral(decimalPoint bool) (token.Token, string) {
1119
1120 offset := self.chrOffset
1121 tkn := token.NUMBER
1122
1123 if decimalPoint {
1124 offset--
1125 self.scanMantissa(10)
1126 } else {
1127 if self.chr == '0' {
1128 self.read()
1129 base := 0
1130 switch self.chr {
1131 case 'x', 'X':
1132 base = 16
1133 case 'o', 'O':
1134 base = 8
1135 case 'b', 'B':
1136 base = 2
1137 case '.', 'e', 'E':
1138
1139 default:
1140
1141 self.scanMantissa(8)
1142 goto end
1143 }
1144 if base > 0 {
1145 self.read()
1146 if !isDigit(self.chr, base) {
1147 return token.ILLEGAL, self.str[offset:self.chrOffset]
1148 }
1149 self.scanMantissa(base)
1150 goto end
1151 }
1152 } else {
1153 self.scanMantissa(10)
1154 }
1155 if self.chr == '.' {
1156 self.read()
1157 self.scanMantissa(10)
1158 }
1159 }
1160
1161 if self.chr == 'e' || self.chr == 'E' {
1162 self.read()
1163 if self.chr == '-' || self.chr == '+' {
1164 self.read()
1165 }
1166 if isDecimalDigit(self.chr) {
1167 self.read()
1168 self.scanMantissa(10)
1169 } else {
1170 return token.ILLEGAL, self.str[offset:self.chrOffset]
1171 }
1172 }
1173 end:
1174 if isIdentifierStart(self.chr) || isDecimalDigit(self.chr) {
1175 return token.ILLEGAL, self.str[offset:self.chrOffset]
1176 }
1177
1178 return tkn, self.str[offset:self.chrOffset]
1179 }
1180
View as plain text