1 package toml
2
3 import (
4 "fmt"
5 "reflect"
6 "runtime"
7 "strings"
8 "unicode"
9 "unicode/utf8"
10 )
11
12 type itemType int
13
14 const (
15 itemError itemType = iota
16 itemNIL
17 itemEOF
18 itemText
19 itemString
20 itemRawString
21 itemMultilineString
22 itemRawMultilineString
23 itemBool
24 itemInteger
25 itemFloat
26 itemDatetime
27 itemArray
28 itemArrayEnd
29 itemTableStart
30 itemTableEnd
31 itemArrayTableStart
32 itemArrayTableEnd
33 itemKeyStart
34 itemKeyEnd
35 itemCommentStart
36 itemInlineTableStart
37 itemInlineTableEnd
38 )
39
40 const eof = 0
41
42 type stateFn func(lx *lexer) stateFn
43
44 func (p Position) String() string {
45 return fmt.Sprintf("at line %d; start %d; length %d", p.Line, p.Start, p.Len)
46 }
47
48 type lexer struct {
49 input string
50 start int
51 pos int
52 line int
53 state stateFn
54 items chan item
55 tomlNext bool
56
57
58
59 prevWidths [4]int
60 nprev int
61 atEOF bool
62
63
64
65
66
67
68
69 stack []stateFn
70 }
71
72 type item struct {
73 typ itemType
74 val string
75 err error
76 pos Position
77 }
78
79 func (lx *lexer) nextItem() item {
80 for {
81 select {
82 case item := <-lx.items:
83 return item
84 default:
85 lx.state = lx.state(lx)
86
87 }
88 }
89 }
90
91 func lex(input string, tomlNext bool) *lexer {
92 lx := &lexer{
93 input: input,
94 state: lexTop,
95 items: make(chan item, 10),
96 stack: make([]stateFn, 0, 10),
97 line: 1,
98 tomlNext: tomlNext,
99 }
100 return lx
101 }
102
103 func (lx *lexer) push(state stateFn) {
104 lx.stack = append(lx.stack, state)
105 }
106
107 func (lx *lexer) pop() stateFn {
108 if len(lx.stack) == 0 {
109 return lx.errorf("BUG in lexer: no states to pop")
110 }
111 last := lx.stack[len(lx.stack)-1]
112 lx.stack = lx.stack[0 : len(lx.stack)-1]
113 return last
114 }
115
116 func (lx *lexer) current() string {
117 return lx.input[lx.start:lx.pos]
118 }
119
120 func (lx lexer) getPos() Position {
121 p := Position{
122 Line: lx.line,
123 Start: lx.start,
124 Len: lx.pos - lx.start,
125 }
126 if p.Len <= 0 {
127 p.Len = 1
128 }
129 return p
130 }
131
132 func (lx *lexer) emit(typ itemType) {
133
134 if lx.start > lx.pos {
135 lx.error(errLexUTF8{lx.input[lx.pos]})
136 return
137 }
138 lx.items <- item{typ: typ, pos: lx.getPos(), val: lx.current()}
139 lx.start = lx.pos
140 }
141
142 func (lx *lexer) emitTrim(typ itemType) {
143 lx.items <- item{typ: typ, pos: lx.getPos(), val: strings.TrimSpace(lx.current())}
144 lx.start = lx.pos
145 }
146
147 func (lx *lexer) next() (r rune) {
148 if lx.atEOF {
149 panic("BUG in lexer: next called after EOF")
150 }
151 if lx.pos >= len(lx.input) {
152 lx.atEOF = true
153 return eof
154 }
155
156 if lx.input[lx.pos] == '\n' {
157 lx.line++
158 }
159 lx.prevWidths[3] = lx.prevWidths[2]
160 lx.prevWidths[2] = lx.prevWidths[1]
161 lx.prevWidths[1] = lx.prevWidths[0]
162 if lx.nprev < 4 {
163 lx.nprev++
164 }
165
166 r, w := utf8.DecodeRuneInString(lx.input[lx.pos:])
167 if r == utf8.RuneError {
168 lx.error(errLexUTF8{lx.input[lx.pos]})
169 return utf8.RuneError
170 }
171
172
173 if isControl(r) || (r == '\r' && (len(lx.input)-1 == lx.pos || lx.input[lx.pos+1] != '\n')) {
174 lx.errorControlChar(r)
175 return utf8.RuneError
176 }
177
178 lx.prevWidths[0] = w
179 lx.pos += w
180 return r
181 }
182
183
184 func (lx *lexer) ignore() {
185 lx.start = lx.pos
186 }
187
188
189 func (lx *lexer) backup() {
190 if lx.atEOF {
191 lx.atEOF = false
192 return
193 }
194 if lx.nprev < 1 {
195 panic("BUG in lexer: backed up too far")
196 }
197 w := lx.prevWidths[0]
198 lx.prevWidths[0] = lx.prevWidths[1]
199 lx.prevWidths[1] = lx.prevWidths[2]
200 lx.prevWidths[2] = lx.prevWidths[3]
201 lx.nprev--
202
203 lx.pos -= w
204 if lx.pos < len(lx.input) && lx.input[lx.pos] == '\n' {
205 lx.line--
206 }
207 }
208
209
210 func (lx *lexer) accept(valid rune) bool {
211 if lx.next() == valid {
212 return true
213 }
214 lx.backup()
215 return false
216 }
217
218
219 func (lx *lexer) peek() rune {
220 r := lx.next()
221 lx.backup()
222 return r
223 }
224
225
226 func (lx *lexer) skip(pred func(rune) bool) {
227 for {
228 r := lx.next()
229 if pred(r) {
230 continue
231 }
232 lx.backup()
233 lx.ignore()
234 return
235 }
236 }
237
238
239
240
241
242 func (lx *lexer) error(err error) stateFn {
243 if lx.atEOF {
244 return lx.errorPrevLine(err)
245 }
246 lx.items <- item{typ: itemError, pos: lx.getPos(), err: err}
247 return nil
248 }
249
250
251
252
253
254 func (lx *lexer) errorPrevLine(err error) stateFn {
255 pos := lx.getPos()
256 pos.Line--
257 pos.Len = 1
258 pos.Start = lx.pos - 1
259 lx.items <- item{typ: itemError, pos: pos, err: err}
260 return nil
261 }
262
263
264 func (lx *lexer) errorPos(start, length int, err error) stateFn {
265 pos := lx.getPos()
266 pos.Start = start
267 pos.Len = length
268 lx.items <- item{typ: itemError, pos: pos, err: err}
269 return nil
270 }
271
272
273 func (lx *lexer) errorf(format string, values ...interface{}) stateFn {
274 if lx.atEOF {
275 pos := lx.getPos()
276 pos.Line--
277 pos.Len = 1
278 pos.Start = lx.pos - 1
279 lx.items <- item{typ: itemError, pos: pos, err: fmt.Errorf(format, values...)}
280 return nil
281 }
282 lx.items <- item{typ: itemError, pos: lx.getPos(), err: fmt.Errorf(format, values...)}
283 return nil
284 }
285
286 func (lx *lexer) errorControlChar(cc rune) stateFn {
287 return lx.errorPos(lx.pos-1, 1, errLexControl{cc})
288 }
289
290
291 func lexTop(lx *lexer) stateFn {
292 r := lx.next()
293 if isWhitespace(r) || isNL(r) {
294 return lexSkip(lx, lexTop)
295 }
296 switch r {
297 case '#':
298 lx.push(lexTop)
299 return lexCommentStart
300 case '[':
301 return lexTableStart
302 case eof:
303 if lx.pos > lx.start {
304 return lx.errorf("unexpected EOF")
305 }
306 lx.emit(itemEOF)
307 return nil
308 }
309
310
311
312 lx.backup()
313 lx.push(lexTopEnd)
314 return lexKeyStart
315 }
316
317
318
319
320 func lexTopEnd(lx *lexer) stateFn {
321 r := lx.next()
322 switch {
323 case r == '#':
324
325 lx.push(lexTop)
326 return lexCommentStart
327 case isWhitespace(r):
328 return lexTopEnd
329 case isNL(r):
330 lx.ignore()
331 return lexTop
332 case r == eof:
333 lx.emit(itemEOF)
334 return nil
335 }
336 return lx.errorf(
337 "expected a top-level item to end with a newline, comment, or EOF, but got %q instead",
338 r)
339 }
340
341
342
343
344
345
346 func lexTableStart(lx *lexer) stateFn {
347 if lx.peek() == '[' {
348 lx.next()
349 lx.emit(itemArrayTableStart)
350 lx.push(lexArrayTableEnd)
351 } else {
352 lx.emit(itemTableStart)
353 lx.push(lexTableEnd)
354 }
355 return lexTableNameStart
356 }
357
358 func lexTableEnd(lx *lexer) stateFn {
359 lx.emit(itemTableEnd)
360 return lexTopEnd
361 }
362
363 func lexArrayTableEnd(lx *lexer) stateFn {
364 if r := lx.next(); r != ']' {
365 return lx.errorf("expected end of table array name delimiter ']', but got %q instead", r)
366 }
367 lx.emit(itemArrayTableEnd)
368 return lexTopEnd
369 }
370
371 func lexTableNameStart(lx *lexer) stateFn {
372 lx.skip(isWhitespace)
373 switch r := lx.peek(); {
374 case r == ']' || r == eof:
375 return lx.errorf("unexpected end of table name (table names cannot be empty)")
376 case r == '.':
377 return lx.errorf("unexpected table separator (table names cannot be empty)")
378 case r == '"' || r == '\'':
379 lx.ignore()
380 lx.push(lexTableNameEnd)
381 return lexQuotedName
382 default:
383 lx.push(lexTableNameEnd)
384 return lexBareName
385 }
386 }
387
388
389
390 func lexTableNameEnd(lx *lexer) stateFn {
391 lx.skip(isWhitespace)
392 switch r := lx.next(); {
393 case isWhitespace(r):
394 return lexTableNameEnd
395 case r == '.':
396 lx.ignore()
397 return lexTableNameStart
398 case r == ']':
399 return lx.pop()
400 default:
401 return lx.errorf("expected '.' or ']' to end table name, but got %q instead", r)
402 }
403 }
404
405
406
407
408
409
410
411 func lexBareName(lx *lexer) stateFn {
412 r := lx.next()
413 if isBareKeyChar(r, lx.tomlNext) {
414 return lexBareName
415 }
416 lx.backup()
417 lx.emit(itemText)
418 return lx.pop()
419 }
420
421
422
423
424
425
426
427 func lexQuotedName(lx *lexer) stateFn {
428 r := lx.next()
429 switch {
430 case isWhitespace(r):
431 return lexSkip(lx, lexValue)
432 case r == '"':
433 lx.ignore()
434 return lexString
435 case r == '\'':
436 lx.ignore()
437 return lexRawString
438 case r == eof:
439 return lx.errorf("unexpected EOF; expected value")
440 default:
441 return lx.errorf("expected value but found %q instead", r)
442 }
443 }
444
445
446 func lexKeyStart(lx *lexer) stateFn {
447 lx.skip(isWhitespace)
448 switch r := lx.peek(); {
449 case r == '=' || r == eof:
450 return lx.errorf("unexpected '=': key name appears blank")
451 case r == '.':
452 return lx.errorf("unexpected '.': keys cannot start with a '.'")
453 case r == '"' || r == '\'':
454 lx.ignore()
455 fallthrough
456 default:
457 lx.emit(itemKeyStart)
458 return lexKeyNameStart
459 }
460 }
461
462 func lexKeyNameStart(lx *lexer) stateFn {
463 lx.skip(isWhitespace)
464 switch r := lx.peek(); {
465 case r == '=' || r == eof:
466 return lx.errorf("unexpected '='")
467 case r == '.':
468 return lx.errorf("unexpected '.'")
469 case r == '"' || r == '\'':
470 lx.ignore()
471 lx.push(lexKeyEnd)
472 return lexQuotedName
473 default:
474 lx.push(lexKeyEnd)
475 return lexBareName
476 }
477 }
478
479
480
481 func lexKeyEnd(lx *lexer) stateFn {
482 lx.skip(isWhitespace)
483 switch r := lx.next(); {
484 case isWhitespace(r):
485 return lexSkip(lx, lexKeyEnd)
486 case r == eof:
487 return lx.errorf("unexpected EOF; expected key separator '='")
488 case r == '.':
489 lx.ignore()
490 return lexKeyNameStart
491 case r == '=':
492 lx.emit(itemKeyEnd)
493 return lexSkip(lx, lexValue)
494 default:
495 return lx.errorf("expected '.' or '=', but got %q instead", r)
496 }
497 }
498
499
500
501
502 func lexValue(lx *lexer) stateFn {
503
504
505 r := lx.next()
506 switch {
507 case isWhitespace(r):
508 return lexSkip(lx, lexValue)
509 case isDigit(r):
510 lx.backup()
511 return lexNumberOrDateStart
512 }
513 switch r {
514 case '[':
515 lx.ignore()
516 lx.emit(itemArray)
517 return lexArrayValue
518 case '{':
519 lx.ignore()
520 lx.emit(itemInlineTableStart)
521 return lexInlineTableValue
522 case '"':
523 if lx.accept('"') {
524 if lx.accept('"') {
525 lx.ignore()
526 return lexMultilineString
527 }
528 lx.backup()
529 }
530 lx.ignore()
531 return lexString
532 case '\'':
533 if lx.accept('\'') {
534 if lx.accept('\'') {
535 lx.ignore()
536 return lexMultilineRawString
537 }
538 lx.backup()
539 }
540 lx.ignore()
541 return lexRawString
542 case '.':
543 return lx.errorf("floats must start with a digit, not '.'")
544 case 'i', 'n':
545 if (lx.accept('n') && lx.accept('f')) || (lx.accept('a') && lx.accept('n')) {
546 lx.emit(itemFloat)
547 return lx.pop()
548 }
549 case '-', '+':
550 return lexDecimalNumberStart
551 }
552 if unicode.IsLetter(r) {
553
554
555
556
557 lx.backup()
558 return lexBool
559 }
560 if r == eof {
561 return lx.errorf("unexpected EOF; expected value")
562 }
563 return lx.errorf("expected value but found %q instead", r)
564 }
565
566
567
568 func lexArrayValue(lx *lexer) stateFn {
569 r := lx.next()
570 switch {
571 case isWhitespace(r) || isNL(r):
572 return lexSkip(lx, lexArrayValue)
573 case r == '#':
574 lx.push(lexArrayValue)
575 return lexCommentStart
576 case r == ',':
577 return lx.errorf("unexpected comma")
578 case r == ']':
579 return lexArrayEnd
580 }
581
582 lx.backup()
583 lx.push(lexArrayValueEnd)
584 return lexValue
585 }
586
587
588
589
590 func lexArrayValueEnd(lx *lexer) stateFn {
591 switch r := lx.next(); {
592 case isWhitespace(r) || isNL(r):
593 return lexSkip(lx, lexArrayValueEnd)
594 case r == '#':
595 lx.push(lexArrayValueEnd)
596 return lexCommentStart
597 case r == ',':
598 lx.ignore()
599 return lexArrayValue
600 case r == ']':
601 return lexArrayEnd
602 default:
603 return lx.errorf("expected a comma (',') or array terminator (']'), but got %s", runeOrEOF(r))
604 }
605 }
606
607
608
609 func lexArrayEnd(lx *lexer) stateFn {
610 lx.ignore()
611 lx.emit(itemArrayEnd)
612 return lx.pop()
613 }
614
615
616
617 func lexInlineTableValue(lx *lexer) stateFn {
618 r := lx.next()
619 switch {
620 case isWhitespace(r):
621 return lexSkip(lx, lexInlineTableValue)
622 case isNL(r):
623 if lx.tomlNext {
624 return lexSkip(lx, lexInlineTableValue)
625 }
626 return lx.errorPrevLine(errLexInlineTableNL{})
627 case r == '#':
628 lx.push(lexInlineTableValue)
629 return lexCommentStart
630 case r == ',':
631 return lx.errorf("unexpected comma")
632 case r == '}':
633 return lexInlineTableEnd
634 }
635 lx.backup()
636 lx.push(lexInlineTableValueEnd)
637 return lexKeyStart
638 }
639
640
641
642
643 func lexInlineTableValueEnd(lx *lexer) stateFn {
644 switch r := lx.next(); {
645 case isWhitespace(r):
646 return lexSkip(lx, lexInlineTableValueEnd)
647 case isNL(r):
648 if lx.tomlNext {
649 return lexSkip(lx, lexInlineTableValueEnd)
650 }
651 return lx.errorPrevLine(errLexInlineTableNL{})
652 case r == '#':
653 lx.push(lexInlineTableValueEnd)
654 return lexCommentStart
655 case r == ',':
656 lx.ignore()
657 lx.skip(isWhitespace)
658 if lx.peek() == '}' {
659 if lx.tomlNext {
660 return lexInlineTableValueEnd
661 }
662 return lx.errorf("trailing comma not allowed in inline tables")
663 }
664 return lexInlineTableValue
665 case r == '}':
666 return lexInlineTableEnd
667 default:
668 return lx.errorf("expected a comma or an inline table terminator '}', but got %s instead", runeOrEOF(r))
669 }
670 }
671
672 func runeOrEOF(r rune) string {
673 if r == eof {
674 return "end of file"
675 }
676 return "'" + string(r) + "'"
677 }
678
679
680
681 func lexInlineTableEnd(lx *lexer) stateFn {
682 lx.ignore()
683 lx.emit(itemInlineTableEnd)
684 return lx.pop()
685 }
686
687
688
689 func lexString(lx *lexer) stateFn {
690 r := lx.next()
691 switch {
692 case r == eof:
693 return lx.errorf(`unexpected EOF; expected '"'`)
694 case isNL(r):
695 return lx.errorPrevLine(errLexStringNL{})
696 case r == '\\':
697 lx.push(lexString)
698 return lexStringEscape
699 case r == '"':
700 lx.backup()
701 lx.emit(itemString)
702 lx.next()
703 lx.ignore()
704 return lx.pop()
705 }
706 return lexString
707 }
708
709
710
711 func lexMultilineString(lx *lexer) stateFn {
712 r := lx.next()
713 switch r {
714 default:
715 return lexMultilineString
716 case eof:
717 return lx.errorf(`unexpected EOF; expected '"""'`)
718 case '\\':
719 return lexMultilineStringEscape
720 case '"':
721
722 if lx.accept('"') {
723 if lx.accept('"') {
724
725
726
727 if lx.peek() == '"' {
728
729
730
731
732
733
734
735
736
737
738
739
740 if strings.HasSuffix(lx.current(), `"""""`) && !strings.HasSuffix(lx.current(), `\"""""`) {
741 return lx.errorf(`unexpected '""""""'`)
742 }
743 lx.backup()
744 lx.backup()
745 return lexMultilineString
746 }
747
748 lx.backup()
749 lx.backup()
750 lx.backup()
751 lx.emit(itemMultilineString)
752 lx.next()
753 lx.next()
754 lx.next()
755 lx.ignore()
756 return lx.pop()
757 }
758 lx.backup()
759 }
760 return lexMultilineString
761 }
762 }
763
764
765
766 func lexRawString(lx *lexer) stateFn {
767 r := lx.next()
768 switch {
769 default:
770 return lexRawString
771 case r == eof:
772 return lx.errorf(`unexpected EOF; expected "'"`)
773 case isNL(r):
774 return lx.errorPrevLine(errLexStringNL{})
775 case r == '\'':
776 lx.backup()
777 lx.emit(itemRawString)
778 lx.next()
779 lx.ignore()
780 return lx.pop()
781 }
782 }
783
784
785
786
787 func lexMultilineRawString(lx *lexer) stateFn {
788 r := lx.next()
789 switch r {
790 default:
791 return lexMultilineRawString
792 case eof:
793 return lx.errorf(`unexpected EOF; expected "'''"`)
794 case '\'':
795
796 if lx.accept('\'') {
797 if lx.accept('\'') {
798
799
800
801 if lx.peek() == '\'' {
802
803
804 if strings.HasSuffix(lx.current(), "'''''") {
805 return lx.errorf(`unexpected "''''''"`)
806 }
807 lx.backup()
808 lx.backup()
809 return lexMultilineRawString
810 }
811
812 lx.backup()
813 lx.backup()
814 lx.backup()
815 lx.emit(itemRawMultilineString)
816 lx.next()
817 lx.next()
818 lx.next()
819 lx.ignore()
820 return lx.pop()
821 }
822 lx.backup()
823 }
824 return lexMultilineRawString
825 }
826 }
827
828
829
830 func lexMultilineStringEscape(lx *lexer) stateFn {
831 if isNL(lx.next()) {
832 return lexMultilineString
833 }
834 lx.backup()
835 lx.push(lexMultilineString)
836 return lexStringEscape(lx)
837 }
838
839 func lexStringEscape(lx *lexer) stateFn {
840 r := lx.next()
841 switch r {
842 case 'e':
843 if !lx.tomlNext {
844 return lx.error(errLexEscape{r})
845 }
846 fallthrough
847 case 'b':
848 fallthrough
849 case 't':
850 fallthrough
851 case 'n':
852 fallthrough
853 case 'f':
854 fallthrough
855 case 'r':
856 fallthrough
857 case '"':
858 fallthrough
859 case ' ', '\t':
860
861
862 fallthrough
863 case '\\':
864 return lx.pop()
865 case 'x':
866 if !lx.tomlNext {
867 return lx.error(errLexEscape{r})
868 }
869 return lexHexEscape
870 case 'u':
871 return lexShortUnicodeEscape
872 case 'U':
873 return lexLongUnicodeEscape
874 }
875 return lx.error(errLexEscape{r})
876 }
877
878 func lexHexEscape(lx *lexer) stateFn {
879 var r rune
880 for i := 0; i < 2; i++ {
881 r = lx.next()
882 if !isHexadecimal(r) {
883 return lx.errorf(
884 `expected two hexadecimal digits after '\x', but got %q instead`,
885 lx.current())
886 }
887 }
888 return lx.pop()
889 }
890
891 func lexShortUnicodeEscape(lx *lexer) stateFn {
892 var r rune
893 for i := 0; i < 4; i++ {
894 r = lx.next()
895 if !isHexadecimal(r) {
896 return lx.errorf(
897 `expected four hexadecimal digits after '\u', but got %q instead`,
898 lx.current())
899 }
900 }
901 return lx.pop()
902 }
903
904 func lexLongUnicodeEscape(lx *lexer) stateFn {
905 var r rune
906 for i := 0; i < 8; i++ {
907 r = lx.next()
908 if !isHexadecimal(r) {
909 return lx.errorf(
910 `expected eight hexadecimal digits after '\U', but got %q instead`,
911 lx.current())
912 }
913 }
914 return lx.pop()
915 }
916
917
918
919
920
921 func lexNumberOrDateStart(lx *lexer) stateFn {
922 r := lx.next()
923 switch r {
924 case '0':
925 return lexBaseNumberOrDate
926 }
927
928 if !isDigit(r) {
929
930
931
932 return lx.errorf("expected a digit but got %q", r)
933 }
934
935 return lexNumberOrDate
936 }
937
938
939 func lexNumberOrDate(lx *lexer) stateFn {
940 r := lx.next()
941 if isDigit(r) {
942 return lexNumberOrDate
943 }
944 switch r {
945 case '-', ':':
946 return lexDatetime
947 case '_':
948 return lexDecimalNumber
949 case '.', 'e', 'E':
950 return lexFloat
951 }
952
953 lx.backup()
954 lx.emit(itemInteger)
955 return lx.pop()
956 }
957
958
959
960 func lexDatetime(lx *lexer) stateFn {
961 r := lx.next()
962 if isDigit(r) {
963 return lexDatetime
964 }
965 switch r {
966 case '-', ':', 'T', 't', ' ', '.', 'Z', 'z', '+':
967 return lexDatetime
968 }
969
970 lx.backup()
971 lx.emitTrim(itemDatetime)
972 return lx.pop()
973 }
974
975
976 func lexHexInteger(lx *lexer) stateFn {
977 r := lx.next()
978 if isHexadecimal(r) {
979 return lexHexInteger
980 }
981 switch r {
982 case '_':
983 return lexHexInteger
984 }
985
986 lx.backup()
987 lx.emit(itemInteger)
988 return lx.pop()
989 }
990
991
992 func lexOctalInteger(lx *lexer) stateFn {
993 r := lx.next()
994 if isOctal(r) {
995 return lexOctalInteger
996 }
997 switch r {
998 case '_':
999 return lexOctalInteger
1000 }
1001
1002 lx.backup()
1003 lx.emit(itemInteger)
1004 return lx.pop()
1005 }
1006
1007
1008 func lexBinaryInteger(lx *lexer) stateFn {
1009 r := lx.next()
1010 if isBinary(r) {
1011 return lexBinaryInteger
1012 }
1013 switch r {
1014 case '_':
1015 return lexBinaryInteger
1016 }
1017
1018 lx.backup()
1019 lx.emit(itemInteger)
1020 return lx.pop()
1021 }
1022
1023
1024 func lexDecimalNumber(lx *lexer) stateFn {
1025 r := lx.next()
1026 if isDigit(r) {
1027 return lexDecimalNumber
1028 }
1029 switch r {
1030 case '.', 'e', 'E':
1031 return lexFloat
1032 case '_':
1033 return lexDecimalNumber
1034 }
1035
1036 lx.backup()
1037 lx.emit(itemInteger)
1038 return lx.pop()
1039 }
1040
1041
1042
1043
1044
1045
1046 func lexDecimalNumberStart(lx *lexer) stateFn {
1047 r := lx.next()
1048
1049
1050 switch r {
1051 case 'i':
1052 if !lx.accept('n') || !lx.accept('f') {
1053 return lx.errorf("invalid float: '%s'", lx.current())
1054 }
1055 lx.emit(itemFloat)
1056 return lx.pop()
1057 case 'n':
1058 if !lx.accept('a') || !lx.accept('n') {
1059 return lx.errorf("invalid float: '%s'", lx.current())
1060 }
1061 lx.emit(itemFloat)
1062 return lx.pop()
1063 case '0':
1064 p := lx.peek()
1065 switch p {
1066 case 'b', 'o', 'x':
1067 return lx.errorf("cannot use sign with non-decimal numbers: '%s%c'", lx.current(), p)
1068 }
1069 case '.':
1070 return lx.errorf("floats must start with a digit, not '.'")
1071 }
1072
1073 if isDigit(r) {
1074 return lexDecimalNumber
1075 }
1076
1077 return lx.errorf("expected a digit but got %q", r)
1078 }
1079
1080
1081
1082
1083 func lexBaseNumberOrDate(lx *lexer) stateFn {
1084 r := lx.next()
1085
1086
1087 if isDigit(r) {
1088 return lexNumberOrDate
1089 }
1090 switch r {
1091 case '_':
1092
1093
1094
1095 return lexDecimalNumber
1096 case '.', 'e', 'E':
1097 return lexFloat
1098 case 'b':
1099 r = lx.peek()
1100 if !isBinary(r) {
1101 lx.errorf("not a binary number: '%s%c'", lx.current(), r)
1102 }
1103 return lexBinaryInteger
1104 case 'o':
1105 r = lx.peek()
1106 if !isOctal(r) {
1107 lx.errorf("not an octal number: '%s%c'", lx.current(), r)
1108 }
1109 return lexOctalInteger
1110 case 'x':
1111 r = lx.peek()
1112 if !isHexadecimal(r) {
1113 lx.errorf("not a hexidecimal number: '%s%c'", lx.current(), r)
1114 }
1115 return lexHexInteger
1116 }
1117
1118 lx.backup()
1119 lx.emit(itemInteger)
1120 return lx.pop()
1121 }
1122
1123
1124
1125
1126 func lexFloat(lx *lexer) stateFn {
1127 r := lx.next()
1128 if isDigit(r) {
1129 return lexFloat
1130 }
1131 switch r {
1132 case '_', '.', '-', '+', 'e', 'E':
1133 return lexFloat
1134 }
1135
1136 lx.backup()
1137 lx.emit(itemFloat)
1138 return lx.pop()
1139 }
1140
1141
1142 func lexBool(lx *lexer) stateFn {
1143 var rs []rune
1144 for {
1145 r := lx.next()
1146 if !unicode.IsLetter(r) {
1147 lx.backup()
1148 break
1149 }
1150 rs = append(rs, r)
1151 }
1152 s := string(rs)
1153 switch s {
1154 case "true", "false":
1155 lx.emit(itemBool)
1156 return lx.pop()
1157 }
1158 return lx.errorf("expected value but found %q instead", s)
1159 }
1160
1161
1162
1163 func lexCommentStart(lx *lexer) stateFn {
1164 lx.ignore()
1165 lx.emit(itemCommentStart)
1166 return lexComment
1167 }
1168
1169
1170
1171
1172 func lexComment(lx *lexer) stateFn {
1173 switch r := lx.next(); {
1174 case isNL(r) || r == eof:
1175 lx.backup()
1176 lx.emit(itemText)
1177 return lx.pop()
1178 default:
1179 return lexComment
1180 }
1181 }
1182
1183
1184 func lexSkip(lx *lexer, nextState stateFn) stateFn {
1185 lx.ignore()
1186 return nextState
1187 }
1188
1189 func (s stateFn) String() string {
1190 name := runtime.FuncForPC(reflect.ValueOf(s).Pointer()).Name()
1191 if i := strings.LastIndexByte(name, '.'); i > -1 {
1192 name = name[i+1:]
1193 }
1194 if s == nil {
1195 name = "<nil>"
1196 }
1197 return name + "()"
1198 }
1199
1200 func (itype itemType) String() string {
1201 switch itype {
1202 case itemError:
1203 return "Error"
1204 case itemNIL:
1205 return "NIL"
1206 case itemEOF:
1207 return "EOF"
1208 case itemText:
1209 return "Text"
1210 case itemString, itemRawString, itemMultilineString, itemRawMultilineString:
1211 return "String"
1212 case itemBool:
1213 return "Bool"
1214 case itemInteger:
1215 return "Integer"
1216 case itemFloat:
1217 return "Float"
1218 case itemDatetime:
1219 return "DateTime"
1220 case itemTableStart:
1221 return "TableStart"
1222 case itemTableEnd:
1223 return "TableEnd"
1224 case itemKeyStart:
1225 return "KeyStart"
1226 case itemKeyEnd:
1227 return "KeyEnd"
1228 case itemArray:
1229 return "Array"
1230 case itemArrayEnd:
1231 return "ArrayEnd"
1232 case itemCommentStart:
1233 return "CommentStart"
1234 case itemInlineTableStart:
1235 return "InlineTableStart"
1236 case itemInlineTableEnd:
1237 return "InlineTableEnd"
1238 }
1239 panic(fmt.Sprintf("BUG: Unknown type '%d'.", int(itype)))
1240 }
1241
1242 func (item item) String() string {
1243 return fmt.Sprintf("(%s, %s)", item.typ.String(), item.val)
1244 }
1245
1246 func isWhitespace(r rune) bool { return r == '\t' || r == ' ' }
1247 func isNL(r rune) bool { return r == '\n' || r == '\r' }
1248 func isControl(r rune) bool {
1249 switch r {
1250 case '\t', '\r', '\n':
1251 return false
1252 default:
1253 return (r >= 0x00 && r <= 0x1f) || r == 0x7f
1254 }
1255 }
1256 func isDigit(r rune) bool { return r >= '0' && r <= '9' }
1257 func isBinary(r rune) bool { return r == '0' || r == '1' }
1258 func isOctal(r rune) bool { return r >= '0' && r <= '7' }
1259 func isHexadecimal(r rune) bool {
1260 return (r >= '0' && r <= '9') || (r >= 'a' && r <= 'f') || (r >= 'A' && r <= 'F')
1261 }
1262
1263 func isBareKeyChar(r rune, tomlNext bool) bool {
1264 if tomlNext {
1265 return (r >= 'A' && r <= 'Z') ||
1266 (r >= 'a' && r <= 'z') ||
1267 (r >= '0' && r <= '9') ||
1268 r == '_' || r == '-' ||
1269 r == 0xb2 || r == 0xb3 || r == 0xb9 || (r >= 0xbc && r <= 0xbe) ||
1270 (r >= 0xc0 && r <= 0xd6) || (r >= 0xd8 && r <= 0xf6) || (r >= 0xf8 && r <= 0x037d) ||
1271 (r >= 0x037f && r <= 0x1fff) ||
1272 (r >= 0x200c && r <= 0x200d) || (r >= 0x203f && r <= 0x2040) ||
1273 (r >= 0x2070 && r <= 0x218f) || (r >= 0x2460 && r <= 0x24ff) ||
1274 (r >= 0x2c00 && r <= 0x2fef) || (r >= 0x3001 && r <= 0xd7ff) ||
1275 (r >= 0xf900 && r <= 0xfdcf) || (r >= 0xfdf0 && r <= 0xfffd) ||
1276 (r >= 0x10000 && r <= 0xeffff)
1277 }
1278
1279 return (r >= 'A' && r <= 'Z') ||
1280 (r >= 'a' && r <= 'z') ||
1281 (r >= '0' && r <= '9') ||
1282 r == '_' || r == '-'
1283 }
1284
View as plain text