1
2
3
4
5
6
7
8
9
10
11
12
13
14
15 package scanner
16
17 import (
18 "fmt"
19 "os"
20 "path/filepath"
21 "runtime"
22 "strings"
23 "testing"
24
25 "github.com/google/go-cmp/cmp"
26
27 "cuelang.org/go/cue/errors"
28 "cuelang.org/go/cue/token"
29 )
30
31 const (
32 special = iota
33 literal
34 operator
35 keyword
36 )
37
38 func tokenclass(tok token.Token) int {
39 switch {
40 case tok.IsLiteral():
41 return literal
42 case tok.IsOperator():
43 return operator
44 case tok.IsKeyword():
45 return keyword
46 }
47 return special
48 }
49
50 type elt struct {
51 tok token.Token
52 lit string
53 class int
54 }
55
56 var testTokens = [...]elt{
57
58 {token.COMMENT, "// a comment \n", special},
59 {token.COMMENT, "//\r\n", special},
60
61
62 {token.ATTRIBUTE, "@foo()", special},
63 {token.ATTRIBUTE, "@foo(,,)", special},
64 {token.ATTRIBUTE, "@foo(a)", special},
65 {token.ATTRIBUTE, "@foo(aa=b)", special},
66 {token.ATTRIBUTE, "@foo(,a=b)", special},
67 {token.ATTRIBUTE, `@foo(",a=b")`, special},
68 {token.ATTRIBUTE, `@foo(##"\(),a=b"##)`, special},
69 {token.ATTRIBUTE, `@foo("",a="")`, special},
70 {token.ATTRIBUTE, `@foo(2,bytes,a.b=c)`, special},
71 {token.ATTRIBUTE, `@foo([{()}]())`, special},
72 {token.ATTRIBUTE, `@foo("{")`, special},
73
74
75 {token.BOTTOM, "_|_", literal},
76
77 {token.IDENT, "foobar", literal},
78 {token.IDENT, "$foobar", literal},
79 {token.IDENT, "#foobar", literal},
80
81 {token.IDENT, "#", literal},
82 {token.IDENT, "_foobar", literal},
83 {token.IDENT, "__foobar", literal},
84 {token.IDENT, "#_foobar", literal},
85 {token.IDENT, "_#foobar", literal},
86 {token.IDENT, "__#foobar", literal},
87 {token.IDENT, "a۰۱۸", literal},
88 {token.IDENT, "foo६४", literal},
89 {token.IDENT, "bar9876", literal},
90 {token.IDENT, "ŝ", literal},
91 {token.IDENT, "ŝfoo", literal},
92 {token.INT, "0", literal},
93 {token.INT, "1", literal},
94 {token.INT, "123456789012345678890", literal},
95 {token.INT, "12345_67890_12345_6788_90", literal},
96 {token.INT, "1234567M", literal},
97 {token.INT, "1234567Mi", literal},
98 {token.INT, "1234567", literal},
99 {token.INT, ".3Mi", literal},
100 {token.INT, "3.3Mi", literal},
101 {token.INT, "0xcafebabe", literal},
102 {token.INT, "0b1100_1001", literal},
103 {token.INT, "0o1234567", literal},
104 {token.FLOAT, "0.", literal},
105 {token.FLOAT, ".0", literal},
106 {token.FLOAT, "3.14159265", literal},
107 {token.FLOAT, "1e0", literal},
108 {token.FLOAT, "1e+100", literal},
109 {token.FLOAT, "1e-100", literal},
110 {token.FLOAT, "1E+100", literal},
111 {token.FLOAT, "1E-100", literal},
112 {token.FLOAT, "0e-5", literal},
113 {token.FLOAT, "0e+100", literal},
114 {token.FLOAT, "0e-100", literal},
115 {token.FLOAT, "0E+100", literal},
116 {token.FLOAT, "0E-100", literal},
117 {token.FLOAT, "2.71828e-1000", literal},
118 {token.STRING, "'a'", literal},
119 {token.STRING, "'\\000'", literal},
120 {token.STRING, "'\\xFF'", literal},
121 {token.STRING, "'\\uff16'", literal},
122 {token.STRING, "'\\uD801'", literal},
123 {token.STRING, "'\\U0000ff16'", literal},
124 {token.STRING, "'foobar'", literal},
125 {token.STRING, `'foo\/bar'`, literal},
126 {token.STRING, `#" ""#`, literal},
127 {token.STRING, `#"" "#`, literal},
128 {token.STRING, `#""hello""#`, literal},
129 {token.STRING, `##""# "##`, literal},
130 {token.STRING, `####""###"####`, literal},
131 {token.STRING, "##\"\"\"\n\"\"\"#\n\"\"\"##", literal},
132 {token.STRING, `##"####"##`, literal},
133 {token.STRING, `#"foobar"#`, literal},
134 {token.STRING, `#" """#`, literal},
135 {token.STRING, `#"\r"#`, literal},
136 {token.STRING, `#"\("#`, literal},
137 {token.STRING, `#"\q"#`, literal},
138 {token.STRING, `###"\##q"###`, literal},
139 {token.STRING, "'" + `\r` + "'", literal},
140 {token.STRING, "'foo" + `\r\n` + "bar'", literal},
141 {token.STRING, `"foobar"`, literal},
142 {token.STRING, "\"\"\"\n foobar\n \"\"\"", literal},
143 {token.STRING, "#\"\"\"\n \\(foobar\n \"\"\"#", literal},
144
145
146
147 {token.STRING, "#\"\"\"\r\n \\(foobar\n \"\"\"#", literal},
148
149
150 {token.ADD, "+", operator},
151 {token.SUB, "-", operator},
152 {token.MUL, "*", operator},
153 {token.QUO, "/", operator},
154
155 {token.AND, "&", operator},
156 {token.OR, "|", operator},
157
158 {token.LAND, "&&", operator},
159 {token.LOR, "||", operator},
160
161 {token.EQL, "==", operator},
162 {token.LSS, "<", operator},
163 {token.GTR, ">", operator},
164 {token.BIND, "=", operator},
165 {token.NOT, "!", operator},
166
167 {token.NEQ, "!=", operator},
168 {token.LEQ, "<=", operator},
169 {token.GEQ, ">=", operator},
170 {token.ELLIPSIS, "...", operator},
171
172 {token.MAT, "=~", operator},
173 {token.NMAT, "!~", operator},
174
175 {token.LPAREN, "(", operator},
176 {token.LBRACK, "[", operator},
177 {token.LBRACE, "{", operator},
178 {token.COMMA, ",", operator},
179 {token.PERIOD, ".", operator},
180 {token.OPTION, "?", operator},
181
182 {token.RPAREN, ")", operator},
183 {token.RBRACK, "]", operator},
184 {token.RBRACE, "}", operator},
185 {token.COLON, ":", operator},
186
187
188 {token.TRUE, "true", keyword},
189 {token.FALSE, "false", keyword},
190 {token.NULL, "null", keyword},
191
192 {token.FOR, "for", keyword},
193 {token.IF, "if", keyword},
194 {token.IN, "in", keyword},
195 }
196
197 const whitespace = " \t \n\n\n"
198
199 var source = func() []byte {
200 var src []byte
201 for _, t := range testTokens {
202 src = append(src, t.lit...)
203 src = append(src, whitespace...)
204 }
205 return src
206 }()
207
208 func newlineCount(s string) int {
209 n := 0
210 for i := 0; i < len(s); i++ {
211 if s[i] == '\n' {
212 n++
213 }
214 }
215 return n
216 }
217
218 func checkPosScan(t *testing.T, lit string, p token.Pos, expected token.Position) {
219 pos := p.Position()
220 if pos.Filename != expected.Filename {
221 t.Errorf("bad filename for %q: got %s, expected %s", lit, pos.Filename, expected.Filename)
222 }
223 if pos.Offset != expected.Offset {
224 t.Errorf("bad position for %q: got %d, expected %d", lit, pos.Offset, expected.Offset)
225 }
226 if pos.Line != expected.Line {
227 t.Errorf("bad line for %q: got %d, expected %d", lit, pos.Line, expected.Line)
228 }
229 if pos.Column != expected.Column {
230 t.Errorf("bad column for %q: got %d, expected %d", lit, pos.Column, expected.Column)
231 }
232 }
233
234
235 func TestScan(t *testing.T) {
236 whitespace_linecount := newlineCount(whitespace)
237
238
239 eh := func(_ token.Pos, msg string, args []interface{}) {
240 t.Errorf("error handler called (msg = %s)", fmt.Sprintf(msg, args...))
241 }
242
243
244 var s Scanner
245 s.Init(token.NewFile("", -1, len(source)), source, eh, ScanComments|DontInsertCommas)
246
247
248 epos := token.Position{
249 Filename: "",
250 Offset: 0,
251 Line: 1,
252 Column: 1,
253 }
254
255 index := 0
256 for {
257 pos, tok, lit := s.Scan()
258
259
260 if tok == token.EOF {
261
262 epos.Line = newlineCount(string(source))
263 epos.Column = 2
264 }
265 checkPosScan(t, lit, pos, epos)
266
267
268 e := elt{token.EOF, "", special}
269 if index < len(testTokens) {
270 e = testTokens[index]
271 index++
272 }
273 if tok != e.tok {
274 t.Errorf("bad token for %q: got %s, expected %s", lit, tok, e.tok)
275 }
276
277
278 if tokenclass(tok) != e.class {
279 t.Errorf("bad class for %q: got %d, expected %d", lit, tokenclass(tok), e.class)
280 }
281
282
283 elit := ""
284 switch e.tok {
285 case token.COMMENT:
286
287 elit = string(stripCR([]byte(e.lit)))
288
289 if elit[1] == '/' {
290 elit = elit[0 : len(elit)-1]
291 }
292 case token.ATTRIBUTE:
293 elit = e.lit
294 case token.IDENT:
295 elit = e.lit
296 case token.COMMA:
297 elit = ","
298 default:
299 if e.tok.IsLiteral() {
300
301 elit = e.lit
302 if elit[0] == '`' {
303 elit = string(stripCR([]byte(elit)))
304 }
305 } else if e.tok.IsKeyword() {
306 elit = e.lit
307 }
308 }
309 if lit != elit {
310 t.Errorf("bad literal for %q: got %q, expected %q", lit, lit, elit)
311 }
312
313 if tok == token.EOF {
314 break
315 }
316
317
318 epos.Offset += len(e.lit) + len(whitespace)
319 epos.Line += newlineCount(e.lit) + whitespace_linecount
320
321 }
322
323 if s.ErrorCount != 0 {
324 t.Errorf("found %d errors", s.ErrorCount)
325 }
326 }
327
328 func checkComma(t *testing.T, line string, mode Mode) {
329 var S Scanner
330 file := token.NewFile("TestCommas", -1, len(line))
331 S.Init(file, []byte(line), nil, mode)
332 pos, tok, lit := S.Scan()
333 for tok != token.EOF {
334 if tok == token.ILLEGAL {
335
336
337 commaLit := "\n"
338 if lit[0] == '~' {
339 commaLit = ","
340 }
341
342 commaPos := file.Position(pos)
343 commaPos.Offset++
344 commaPos.Column++
345 pos, tok, lit = S.Scan()
346 if tok == token.COMMA {
347 if lit != commaLit {
348 t.Errorf(`bad literal for %q: got %q (%q), expected %q`, line, lit, tok, commaLit)
349 }
350 checkPosScan(t, line, pos, commaPos)
351 } else {
352 t.Errorf("bad token for %q: got %s, expected ','", line, tok)
353 }
354 } else if tok == token.COMMA {
355 t.Errorf("bad token for %q: got ',', expected no ','", line)
356 }
357 pos, tok, lit = S.Scan()
358 }
359 }
360
361 var lines = []string{
362
363
364 "",
365 "\ufeff~,",
366 "~,",
367 "foo^\n",
368 "_foo^\n",
369 "123^\n",
370 "1.2^\n",
371 "'x'^\n",
372 "_|_^\n",
373 "_|_^\n",
374 `"x"` + "^\n",
375 "#'x'#^\n",
376 `"""
377 foo
378 """` + "^\n",
379
380
381
382 `'''
383 foo
384 '''` + "^\n",
385
386 "+\n",
387 "-\n",
388 "*\n",
389 "/\n",
390
391 "&\n",
392
393 "|\n",
394
395 "&&\n",
396 "||\n",
397 "<-\n",
398 "->\n",
399
400 "==\n",
401 "<\n",
402 ">\n",
403 "=\n",
404 "!\n",
405
406 "!=\n",
407 "<=\n",
408 ">=\n",
409 ":=\n",
410 "...^\n",
411
412 "(\n",
413 "[\n",
414 "[[\n",
415 "{\n",
416 "{{\n",
417 "~,\n",
418 ".\n",
419
420 ")^\n",
421 "]^\n",
422 "]]^\n",
423 "}^\n",
424 "}}^\n",
425 ":\n",
426 "::\n",
427 ";^\n",
428
429 "true^\n",
430 "false^\n",
431 "null^\n",
432
433 "foo^//comment\n",
434 "foo^//comment",
435
436 "foo ^// comment\n",
437 "foo ^// comment",
438
439 "foo ^",
440 "foo ^//",
441
442 "package main^\n\nfoo: bar^",
443 "package main^",
444 }
445
446 func TestCommas(t *testing.T) {
447 for _, line := range lines {
448 checkComma(t, line, 0)
449 checkComma(t, line, ScanComments)
450
451
452
453 for i := len(line) - 1; i >= 0 && line[i] == '\n'; i-- {
454 checkComma(t, line[0:i], 0)
455 checkComma(t, line[0:i], ScanComments)
456 }
457 }
458 }
459
460 func TestRelative(t *testing.T) {
461 test := `
462 package foo
463
464 // comment
465 a: 1 // a
466 b : 5
467 // line one
468 // line two
469 c
470 : "dfs"
471 , d: "foo"
472 `
473 want := []string{
474 `newline IDENT package`,
475 `blank IDENT foo`,
476 "elided , \n",
477 `section COMMENT // comment`,
478 `newline IDENT a`,
479 `nospace : `,
480 `blank INT 1`,
481 "elided , \n",
482 `blank COMMENT // a`,
483 `newline IDENT b`,
484 `blank : `,
485 `blank INT 5`,
486 "elided , \n",
487 "newline COMMENT // line one",
488 "newline COMMENT // line two",
489 `newline IDENT c`,
490 `newline : `,
491 `blank STRING "dfs"`,
492 "newline , ,",
493 "blank IDENT d",
494 "nospace : ",
495 `blank STRING "foo"`,
496 "elided , \n",
497 }
498 var S Scanner
499 f := token.NewFile("TestCommas", -1, len(test))
500 S.Init(f, []byte(test), nil, ScanComments)
501 pos, tok, lit := S.Scan()
502 got := []string{}
503 for tok != token.EOF {
504 got = append(got, fmt.Sprintf("%-7s %-8s %s", pos.RelPos(), tok, lit))
505 pos, tok, lit = S.Scan()
506 }
507 if diff := cmp.Diff(got, want); diff != "" {
508 t.Error(diff)
509 }
510 }
511
512 type segment struct {
513 srcline string
514 filename string
515 line int
516 }
517
518 var segments = []segment{
519
520 {" line1", filepath.Join("dir", "TestLineComments"), 1},
521 {"\nline2", filepath.Join("dir", "TestLineComments"), 2},
522 {"\nline3 //line File1.go:100", filepath.Join("dir", "TestLineComments"), 3},
523 {"\nline4", filepath.Join("dir", "TestLineComments"), 4},
524 {"\n//line File1.go:100\n line100", filepath.Join("dir", "File1.go"), 100},
525 {"\n//line \t :42\n line1", "", 42},
526 {"\n//line File2.go:200\n line200", filepath.Join("dir", "File2.go"), 200},
527 {"\n//line foo\t:42\n line42", filepath.Join("dir", "foo"), 42},
528 {"\n //line foo:42\n line44", filepath.Join("dir", "foo"), 44},
529 {"\n//line foo 42\n line46", filepath.Join("dir", "foo"), 46},
530 {"\n//line foo:42 extra text\n line48", filepath.Join("dir", "foo"), 48},
531 {"\n//line ./foo:42\n line42", filepath.Join("dir", "foo"), 42},
532 {"\n//line a/b/c/File1.go:100\n line100", filepath.Join("dir", "a", "b", "c", "File1.go"), 100},
533 }
534
535 var unixsegments = []segment{
536 {"\n//line /bar:42\n line42", "/bar", 42},
537 }
538
539 var winsegments = []segment{
540 {"\n//line c:\\bar:42\n line42", "c:\\bar", 42},
541 {"\n//line c:\\dir\\File1.go:100\n line100", "c:\\dir\\File1.go", 100},
542 }
543
544
545 func TestLineComments(t *testing.T) {
546 segs := segments
547 if runtime.GOOS == "windows" {
548 segs = append(segs, winsegments...)
549 } else {
550 segs = append(segs, unixsegments...)
551 }
552
553
554 var src string
555 for _, e := range segs {
556 src += e.srcline
557 }
558
559
560 var S Scanner
561 f := token.NewFile(filepath.Join("dir", "TestLineComments"), -1, len(src))
562 S.Init(f, []byte(src), nil, DontInsertCommas)
563 for _, s := range segs {
564 p, _, lit := S.Scan()
565 pos := f.Position(p)
566 checkPosScan(t, lit, p, token.Position{
567 Filename: s.filename,
568 Offset: pos.Offset,
569 Line: s.line,
570 Column: pos.Column,
571 })
572 }
573
574 if S.ErrorCount != 0 {
575 t.Errorf("found %d errors", S.ErrorCount)
576 }
577 }
578
579
580 func TestInit(t *testing.T) {
581 var s Scanner
582
583
584 src1 := "false true { }"
585 f1 := token.NewFile("src1", -1, len(src1))
586 s.Init(f1, []byte(src1), nil, DontInsertCommas)
587 if f1.Size() != len(src1) {
588 t.Errorf("bad file size: got %d, expected %d", f1.Size(), len(src1))
589 }
590 s.Scan()
591 s.Scan()
592 _, tok, _ := s.Scan()
593 if tok != token.LBRACE {
594 t.Errorf("bad token: got %s, expected %s", tok, token.LBRACE)
595 }
596
597
598 src2 := "null true { ]"
599 f2 := token.NewFile("src2", -1, len(src2))
600 s.Init(f2, []byte(src2), nil, DontInsertCommas)
601 if f2.Size() != len(src2) {
602 t.Errorf("bad file size: got %d, expected %d", f2.Size(), len(src2))
603 }
604 _, tok, _ = s.Scan()
605 if tok != token.NULL {
606 t.Errorf("bad token: got %s, expected %s", tok, token.NULL)
607 }
608
609 if s.ErrorCount != 0 {
610 t.Errorf("found %d errors", s.ErrorCount)
611 }
612 }
613
614 func TestScanInterpolation(t *testing.T) {
615
616 eh := func(pos token.Pos, msg string, args []interface{}) {
617 msg = fmt.Sprintf(msg, args...)
618 t.Errorf("error handler called (pos = %v, msg = %s)", pos, msg)
619 }
620 trim := func(s string) string { return strings.Trim(s, `#"\()`) }
621
622 sources := []string{
623 `"first\(first)\\second\(second)"`,
624 `#"first\#(first)\second\#(second)"#`,
625 `"level\( ["foo", "level", level ][2] )end\( end )"`,
626 `##"level\##( ["foo", "level", level ][2] )end\##( end )"##`,
627 `"level\( { "foo": 1, "bar": level } )end\(end)"`,
628 }
629 for i, src := range sources {
630 name := fmt.Sprintf("tsrc%d", i)
631 t.Run(name, func(t *testing.T) {
632 f := token.NewFile(name, -1, len(src))
633
634
635 var s Scanner
636 s.Init(f, []byte(src), eh, ScanComments)
637
638 count := 0
639 var lit, str string
640 for tok := token.ILLEGAL; tok != token.EOF; {
641 switch tok {
642 case token.LPAREN:
643 count++
644 case token.RPAREN:
645 if count--; count == 0 {
646 str = trim(s.ResumeInterpolation())
647 }
648 case token.INTERPOLATION:
649 str = trim(lit)
650 case token.IDENT:
651 if lit != str {
652 t.Errorf("str: got %v; want %v", lit, str)
653 }
654 }
655 _, tok, lit = s.Scan()
656 }
657 })
658 }
659 }
660
661 func TestStdErrorHander(t *testing.T) {
662 const src = "~\n" +
663 "~ ~\n" +
664 "//line File2:20\n" +
665 "~\n" +
666 "//line File2:1\n" +
667 "~ ~\n" +
668 "//line File1:1\n" +
669 "~ ~ ~"
670
671 var list errors.Error
672 eh := func(pos token.Pos, msg string, args []interface{}) {
673 list = errors.Append(list, errors.Newf(pos, msg, args...))
674 }
675
676 var s Scanner
677 s.Init(token.NewFile("File1", -1, len(src)), []byte(src), eh, DontInsertCommas)
678 for {
679 if _, tok, _ := s.Scan(); tok == token.EOF {
680 break
681 }
682 }
683
684 n := len(errors.Errors(list))
685 if n != s.ErrorCount {
686 t.Errorf("found %d errors, expected %d", n, s.ErrorCount)
687 }
688
689 if n != 9 {
690 t.Errorf("found %d raw errors, expected 9", n)
691 errors.Print(os.Stderr, list, nil)
692 }
693
694 n = len(errors.Errors(errors.Sanitize(list)))
695 if n != 8 {
696 t.Errorf("found %d one-per-line errors, expected 8", n)
697 errors.Print(os.Stderr, list, nil)
698 }
699 }
700
701 type errorCollector struct {
702 cnt int
703 msg string
704 pos token.Pos
705 }
706
707 func checkError(t *testing.T, src string, tok token.Token, pos int, lit, err string) {
708 t.Helper()
709 var s Scanner
710 var h errorCollector
711 eh := func(pos token.Pos, msg string, args []interface{}) {
712 h.cnt++
713 h.msg = fmt.Sprintf(msg, args...)
714 h.pos = pos
715 }
716 s.Init(token.NewFile("", -1, len(src)), []byte(src), eh, ScanComments|DontInsertCommas)
717 _, tok0, lit0 := s.Scan()
718 if tok0 != tok {
719 t.Errorf("%q: got %s, expected %s", src, tok0, tok)
720 }
721 if tok0 != token.ILLEGAL && lit0 != lit {
722 t.Errorf("%q: got literal %q, expected %q", src, lit0, lit)
723 }
724 cnt := 0
725 if err != "" {
726 cnt = 1
727 }
728 if h.cnt != cnt {
729 t.Errorf("%q: got cnt %d, expected %d", src, h.cnt, cnt)
730 }
731 if h.msg != err {
732 t.Errorf("%q: got msg %q, expected %q", src, h.msg, err)
733 }
734 if h.pos.Offset() != pos {
735 t.Errorf("%q: got offset %d, expected %d", src, h.pos.Offset(), pos)
736 }
737 }
738
739 var errorTests = []struct {
740 src string
741 tok token.Token
742 pos int
743 lit string
744 err string
745 }{
746 {"`", token.ILLEGAL, 0, "", "illegal character U+0060 '`'"},
747
748 {"\a", token.ILLEGAL, 0, "", "illegal character U+0007"},
749 {`^`, token.ILLEGAL, 0, "", "illegal character U+005E '^'"},
750 {`…`, token.ILLEGAL, 0, "", "illegal character U+2026 '…'"},
751 {`_|`, token.ILLEGAL, 0, "", "illegal token '_|'; expected '_'"},
752
753 {`@`, token.ATTRIBUTE, 1, `@`, "invalid attribute: expected '('"},
754 {`@foo`, token.ATTRIBUTE, 4, `@foo`, "invalid attribute: expected '('"},
755 {`@foo(`, token.ATTRIBUTE, 5, `@foo(`, "attribute missing ')'"},
756 {`@foo( `, token.ATTRIBUTE, 6, `@foo( `, "attribute missing ')'"},
757 {`@foo( ""])`, token.ATTRIBUTE, 9, `@foo( ""])`, "unexpected ']'"},
758 {`@foo(3})`, token.ATTRIBUTE, 7, `@foo(3})`, "unexpected '}'"},
759 {`@foo(["")])`, token.ATTRIBUTE, 9, `@foo(["")])`, "unexpected ')'"},
760 {`@foo(""`, token.ATTRIBUTE, 7, `@foo(""`, "attribute missing ')'"},
761 {`@foo(aa`, token.ATTRIBUTE, 7, `@foo(aa`, "attribute missing ')'"},
762 {`@foo("\(())")`, token.ATTRIBUTE, 7, `@foo("\(())")`, "interpolation not allowed in attribute"},
763
764
765
766
767 {`"\8"`, token.STRING, 2, `"\8"`, "unknown escape sequence"},
768 {`"\08"`, token.STRING, 3, `"\08"`, "illegal character U+0038 '8' in escape sequence"},
769 {`"\x"`, token.STRING, 3, `"\x"`, "illegal character U+0022 '\"' in escape sequence"},
770 {`"\x0"`, token.STRING, 4, `"\x0"`, "illegal character U+0022 '\"' in escape sequence"},
771 {`"\x0g"`, token.STRING, 4, `"\x0g"`, "illegal character U+0067 'g' in escape sequence"},
772 {`"\u"`, token.STRING, 3, `"\u"`, "illegal character U+0022 '\"' in escape sequence"},
773 {`"\u0"`, token.STRING, 4, `"\u0"`, "illegal character U+0022 '\"' in escape sequence"},
774 {`"\u00"`, token.STRING, 5, `"\u00"`, "illegal character U+0022 '\"' in escape sequence"},
775 {`"\u000"`, token.STRING, 6, `"\u000"`, "illegal character U+0022 '\"' in escape sequence"},
776
777 {`"\u0000"`, token.STRING, 0, `"\u0000"`, ""},
778 {`"\U"`, token.STRING, 3, `"\U"`, "illegal character U+0022 '\"' in escape sequence"},
779 {`"\U0"`, token.STRING, 4, `"\U0"`, "illegal character U+0022 '\"' in escape sequence"},
780 {`"\U00"`, token.STRING, 5, `"\U00"`, "illegal character U+0022 '\"' in escape sequence"},
781 {`"\U000"`, token.STRING, 6, `"\U000"`, "illegal character U+0022 '\"' in escape sequence"},
782 {`"\U0000"`, token.STRING, 7, `"\U0000"`, "illegal character U+0022 '\"' in escape sequence"},
783 {`"\U00000"`, token.STRING, 8, `"\U00000"`, "illegal character U+0022 '\"' in escape sequence"},
784 {`"\U000000"`, token.STRING, 9, `"\U000000"`, "illegal character U+0022 '\"' in escape sequence"},
785 {`"\U0000000"`, token.STRING, 10, `"\U0000000"`, "illegal character U+0022 '\"' in escape sequence"},
786
787 {`"\U00000000"`, token.STRING, 0, `"\U00000000"`, ""},
788 {`"\Uffffffff"`, token.STRING, 2, `"\Uffffffff"`, "escape sequence is invalid Unicode code point"},
789 {`'`, token.STRING, 0, `'`, "string literal not terminated"},
790 {`"`, token.STRING, 0, `"`, "string literal not terminated"},
791 {`""`, token.STRING, 0, `""`, ""},
792 {`"abc`, token.STRING, 0, `"abc`, "string literal not terminated"},
793 {`""abc`, token.STRING, 0, `""`, ""},
794 {"\"\"\"\nabc", token.STRING, 0, "\"\"\"\nabc", "string literal not terminated"},
795 {"'''\nabc", token.STRING, 0, "'''\nabc", "string literal not terminated"},
796 {"\"abc\n", token.STRING, 0, `"abc`, "string literal not terminated"},
797 {"\"abc\n ", token.STRING, 0, `"abc`, "string literal not terminated"},
798 {"\"abc\r\n ", token.STRING, 0, "\"abc\r", "string literal not terminated"},
799 {`#""`, token.STRING, 0, `#""`, "string literal not terminated"},
800 {`#"""`, token.STRING, 0, `#"""`, `expected newline after multiline quote #"""`},
801 {`#""#`, token.STRING, 0, `#""#`, ""},
802
803 {"#'", token.STRING, 0, "#'", "string literal not terminated"},
804 {"''", token.STRING, 0, "''", ""},
805 {"'", token.STRING, 0, "'", "string literal not terminated"},
806 {`"\("`, token.INTERPOLATION, 0, `"\(`, ""},
807 {`#"\("#`, token.STRING, 0, `#"\("#`, ""},
808 {`#"\#("#`, token.INTERPOLATION, 0, `#"\#(`, ""},
809 {`"\q"`, token.STRING, 2, `"\q"`, "unknown escape sequence"},
810 {`#"\q"#`, token.STRING, 0, `#"\q"#`, ""},
811 {`#"\#q"#`, token.STRING, 4, `#"\#q"#`, "unknown escape sequence"},
812 {"0", token.INT, 0, "0", ""},
813 {"077", token.INT, 0, "077", "illegal integer number"},
814 {"078.", token.FLOAT, 0, "078.", ""},
815 {"07801234567.", token.FLOAT, 0, "07801234567.", ""},
816 {"078e0", token.FLOAT, 0, "078e0", ""},
817 {"078", token.INT, 0, "078", "illegal integer number"},
818 {"07800000009", token.INT, 0, "07800000009", "illegal integer number"},
819 {"0x", token.INT, 0, "0x", "illegal hexadecimal number"},
820 {"0X", token.INT, 0, "0X", "illegal hexadecimal number"},
821 {"0Xbeef_", token.INT, 6, "0Xbeef_", "illegal '_' in number"},
822 {"0Xbeef__beef", token.INT, 7, "0Xbeef__beef", "illegal '_' in number"},
823 {"0b", token.INT, 0, "0b", "illegal binary number"},
824 {"0o", token.INT, 0, "0o", "illegal octal number"},
825
826 {"\"abc\x00def\"", token.STRING, 4, "\"abc\x00def\"", "illegal character NUL"},
827 {"\"abc\x80def\"", token.STRING, 4, "\"abc\x80def\"", "illegal UTF-8 encoding"},
828 {"\ufeff\ufeff", token.ILLEGAL, 3, "\ufeff\ufeff", "illegal byte order mark"},
829 {"//\ufeff", token.COMMENT, 2, "//\ufeff", "illegal byte order mark"},
830
831 {`"` + "abc\ufeffdef" + `"`, token.STRING, 4, `"` + "abc\ufeffdef" + `"`, "illegal byte order mark"},
832 }
833
834 func TestScanErrors(t *testing.T) {
835 for _, e := range errorTests {
836 t.Run(e.src, func(t *testing.T) {
837 checkError(t, e.src, e.tok, e.pos, e.lit, e.err)
838 })
839 }
840 }
841
842
843 func TestNoLiteralComments(t *testing.T) {
844 var src = `
845 a: {
846 A: 1 // foo
847 }
848
849 b :: {
850 B: 2
851 // foo
852 }
853
854 c: 3 // foo
855
856 d: 4
857 // foo
858
859 b anycode(): {
860 // foo
861 }
862 `
863 var s Scanner
864 s.Init(token.NewFile("", -1, len(src)), []byte(src), nil, 0)
865 for {
866 pos, tok, lit := s.Scan()
867 class := tokenclass(tok)
868 if lit != "" && class != keyword && class != literal && tok != token.COMMA {
869 t.Errorf("%s: tok = %s, lit = %q", pos, tok, lit)
870 }
871 if tok <= token.EOF {
872 break
873 }
874 }
875 }
876
877 func BenchmarkScan(b *testing.B) {
878 b.StopTimer()
879 file := token.NewFile("", -1, len(source))
880 var s Scanner
881 b.StartTimer()
882 for i := 0; i < b.N; i++ {
883 s.Init(file, source, nil, ScanComments)
884 for {
885 _, tok, _ := s.Scan()
886 if tok == token.EOF {
887 break
888 }
889 }
890 }
891 }
892
893 func BenchmarkScanFile(b *testing.B) {
894 b.StopTimer()
895 const filename = "go"
896 src, err := os.ReadFile(filename)
897 if err != nil {
898 panic(err)
899 }
900 file := token.NewFile(filename, -1, len(src))
901 b.SetBytes(int64(len(src)))
902 var s Scanner
903 b.StartTimer()
904 for i := 0; i < b.N; i++ {
905 s.Init(file, src, nil, ScanComments)
906 for {
907 _, tok, _ := s.Scan()
908 if tok == token.EOF {
909 break
910 }
911 }
912 }
913 }
914
View as plain text