1 package toml
2
3 import (
4 "fmt"
5 "os"
6 "strconv"
7 "strings"
8 "time"
9 "unicode/utf8"
10
11 "github.com/BurntSushi/toml/internal"
12 )
13
14 type parser struct {
15 lx *lexer
16 context Key
17 currentKey string
18 pos Position
19 tomlNext bool
20
21 ordered []Key
22
23 keyInfo map[string]keyInfo
24 mapping map[string]interface{}
25 implicits map[string]struct{}
26 }
27
28 type keyInfo struct {
29 pos Position
30 tomlType tomlType
31 }
32
33 func parse(data string) (p *parser, err error) {
34 _, tomlNext := os.LookupEnv("BURNTSUSHI_TOML_110")
35
36 defer func() {
37 if r := recover(); r != nil {
38 if pErr, ok := r.(ParseError); ok {
39 pErr.input = data
40 err = pErr
41 return
42 }
43 panic(r)
44 }
45 }()
46
47
48
49
50 if strings.HasPrefix(data, "\xff\xfe") || strings.HasPrefix(data, "\xfe\xff") {
51 data = data[2:]
52 } else if strings.HasPrefix(data, "\xef\xbb\xbf") {
53 data = data[3:]
54 }
55
56
57
58
59 ex := 6
60 if len(data) < 6 {
61 ex = len(data)
62 }
63 if i := strings.IndexRune(data[:ex], 0); i > -1 {
64 return nil, ParseError{
65 Message: "files cannot contain NULL bytes; probably using UTF-16; TOML files must be UTF-8",
66 Position: Position{Line: 1, Start: i, Len: 1},
67 Line: 1,
68 input: data,
69 }
70 }
71
72 p = &parser{
73 keyInfo: make(map[string]keyInfo),
74 mapping: make(map[string]interface{}),
75 lx: lex(data, tomlNext),
76 ordered: make([]Key, 0),
77 implicits: make(map[string]struct{}),
78 tomlNext: tomlNext,
79 }
80 for {
81 item := p.next()
82 if item.typ == itemEOF {
83 break
84 }
85 p.topLevel(item)
86 }
87
88 return p, nil
89 }
90
91 func (p *parser) panicErr(it item, err error) {
92 panic(ParseError{
93 err: err,
94 Position: it.pos,
95 Line: it.pos.Len,
96 LastKey: p.current(),
97 })
98 }
99
100 func (p *parser) panicItemf(it item, format string, v ...interface{}) {
101 panic(ParseError{
102 Message: fmt.Sprintf(format, v...),
103 Position: it.pos,
104 Line: it.pos.Len,
105 LastKey: p.current(),
106 })
107 }
108
109 func (p *parser) panicf(format string, v ...interface{}) {
110 panic(ParseError{
111 Message: fmt.Sprintf(format, v...),
112 Position: p.pos,
113 Line: p.pos.Line,
114 LastKey: p.current(),
115 })
116 }
117
118 func (p *parser) next() item {
119 it := p.lx.nextItem()
120
121 if it.typ == itemError {
122 if it.err != nil {
123 panic(ParseError{
124 Position: it.pos,
125 Line: it.pos.Line,
126 LastKey: p.current(),
127 err: it.err,
128 })
129 }
130
131 p.panicItemf(it, "%s", it.val)
132 }
133 return it
134 }
135
136 func (p *parser) nextPos() item {
137 it := p.next()
138 p.pos = it.pos
139 return it
140 }
141
142 func (p *parser) bug(format string, v ...interface{}) {
143 panic(fmt.Sprintf("BUG: "+format+"\n\n", v...))
144 }
145
146 func (p *parser) expect(typ itemType) item {
147 it := p.next()
148 p.assertEqual(typ, it.typ)
149 return it
150 }
151
152 func (p *parser) assertEqual(expected, got itemType) {
153 if expected != got {
154 p.bug("Expected '%s' but got '%s'.", expected, got)
155 }
156 }
157
158 func (p *parser) topLevel(item item) {
159 switch item.typ {
160 case itemCommentStart:
161 p.expect(itemText)
162 case itemTableStart:
163 name := p.nextPos()
164
165 var key Key
166 for ; name.typ != itemTableEnd && name.typ != itemEOF; name = p.next() {
167 key = append(key, p.keyString(name))
168 }
169 p.assertEqual(itemTableEnd, name.typ)
170
171 p.addContext(key, false)
172 p.setType("", tomlHash, item.pos)
173 p.ordered = append(p.ordered, key)
174 case itemArrayTableStart:
175 name := p.nextPos()
176
177 var key Key
178 for ; name.typ != itemArrayTableEnd && name.typ != itemEOF; name = p.next() {
179 key = append(key, p.keyString(name))
180 }
181 p.assertEqual(itemArrayTableEnd, name.typ)
182
183 p.addContext(key, true)
184 p.setType("", tomlArrayHash, item.pos)
185 p.ordered = append(p.ordered, key)
186 case itemKeyStart:
187 outerContext := p.context
188
189 k := p.nextPos()
190 var key Key
191 for ; k.typ != itemKeyEnd && k.typ != itemEOF; k = p.next() {
192 key = append(key, p.keyString(k))
193 }
194 p.assertEqual(itemKeyEnd, k.typ)
195
196
197 p.currentKey = key[len(key)-1]
198
199
200
201 context := key[:len(key)-1]
202 for i := range context {
203 p.addImplicitContext(append(p.context, context[i:i+1]...))
204 }
205 p.ordered = append(p.ordered, p.context.add(p.currentKey))
206
207
208 vItem := p.next()
209 val, typ := p.value(vItem, false)
210 p.set(p.currentKey, val, typ, vItem.pos)
211
212
213 p.context = outerContext
214 p.currentKey = ""
215 default:
216 p.bug("Unexpected type at top level: %s", item.typ)
217 }
218 }
219
220
221 func (p *parser) keyString(it item) string {
222 switch it.typ {
223 case itemText:
224 return it.val
225 case itemString, itemMultilineString,
226 itemRawString, itemRawMultilineString:
227 s, _ := p.value(it, false)
228 return s.(string)
229 default:
230 p.bug("Unexpected key type: %s", it.typ)
231 }
232 panic("unreachable")
233 }
234
235 var datetimeRepl = strings.NewReplacer(
236 "z", "Z",
237 "t", "T",
238 " ", "T")
239
240
241
242 func (p *parser) value(it item, parentIsArray bool) (interface{}, tomlType) {
243 switch it.typ {
244 case itemString:
245 return p.replaceEscapes(it, it.val), p.typeOfPrimitive(it)
246 case itemMultilineString:
247 return p.replaceEscapes(it, p.stripEscapedNewlines(stripFirstNewline(it.val))), p.typeOfPrimitive(it)
248 case itemRawString:
249 return it.val, p.typeOfPrimitive(it)
250 case itemRawMultilineString:
251 return stripFirstNewline(it.val), p.typeOfPrimitive(it)
252 case itemInteger:
253 return p.valueInteger(it)
254 case itemFloat:
255 return p.valueFloat(it)
256 case itemBool:
257 switch it.val {
258 case "true":
259 return true, p.typeOfPrimitive(it)
260 case "false":
261 return false, p.typeOfPrimitive(it)
262 default:
263 p.bug("Expected boolean value, but got '%s'.", it.val)
264 }
265 case itemDatetime:
266 return p.valueDatetime(it)
267 case itemArray:
268 return p.valueArray(it)
269 case itemInlineTableStart:
270 return p.valueInlineTable(it, parentIsArray)
271 default:
272 p.bug("Unexpected value type: %s", it.typ)
273 }
274 panic("unreachable")
275 }
276
277 func (p *parser) valueInteger(it item) (interface{}, tomlType) {
278 if !numUnderscoresOK(it.val) {
279 p.panicItemf(it, "Invalid integer %q: underscores must be surrounded by digits", it.val)
280 }
281 if numHasLeadingZero(it.val) {
282 p.panicItemf(it, "Invalid integer %q: cannot have leading zeroes", it.val)
283 }
284
285 num, err := strconv.ParseInt(it.val, 0, 64)
286 if err != nil {
287
288
289
290
291
292 if e, ok := err.(*strconv.NumError); ok && e.Err == strconv.ErrRange {
293 p.panicErr(it, errParseRange{i: it.val, size: "int64"})
294 } else {
295 p.bug("Expected integer value, but got '%s'.", it.val)
296 }
297 }
298 return num, p.typeOfPrimitive(it)
299 }
300
301 func (p *parser) valueFloat(it item) (interface{}, tomlType) {
302 parts := strings.FieldsFunc(it.val, func(r rune) bool {
303 switch r {
304 case '.', 'e', 'E':
305 return true
306 }
307 return false
308 })
309 for _, part := range parts {
310 if !numUnderscoresOK(part) {
311 p.panicItemf(it, "Invalid float %q: underscores must be surrounded by digits", it.val)
312 }
313 }
314 if len(parts) > 0 && numHasLeadingZero(parts[0]) {
315 p.panicItemf(it, "Invalid float %q: cannot have leading zeroes", it.val)
316 }
317 if !numPeriodsOK(it.val) {
318
319
320
321
322 p.panicItemf(it, "Invalid float %q: '.' must be followed by one or more digits", it.val)
323 }
324 val := strings.Replace(it.val, "_", "", -1)
325 if val == "+nan" || val == "-nan" {
326 val = "nan"
327 }
328 num, err := strconv.ParseFloat(val, 64)
329 if err != nil {
330 if e, ok := err.(*strconv.NumError); ok && e.Err == strconv.ErrRange {
331 p.panicErr(it, errParseRange{i: it.val, size: "float64"})
332 } else {
333 p.panicItemf(it, "Invalid float value: %q", it.val)
334 }
335 }
336 return num, p.typeOfPrimitive(it)
337 }
338
339 var dtTypes = []struct {
340 fmt string
341 zone *time.Location
342 next bool
343 }{
344 {time.RFC3339Nano, time.Local, false},
345 {"2006-01-02T15:04:05.999999999", internal.LocalDatetime, false},
346 {"2006-01-02", internal.LocalDate, false},
347 {"15:04:05.999999999", internal.LocalTime, false},
348
349
350 {"2006-01-02T15:04Z07:00", time.Local, true},
351 {"2006-01-02T15:04", internal.LocalDatetime, true},
352 {"15:04", internal.LocalTime, true},
353 }
354
355 func (p *parser) valueDatetime(it item) (interface{}, tomlType) {
356 it.val = datetimeRepl.Replace(it.val)
357 var (
358 t time.Time
359 ok bool
360 err error
361 )
362 for _, dt := range dtTypes {
363 if dt.next && !p.tomlNext {
364 continue
365 }
366 t, err = time.ParseInLocation(dt.fmt, it.val, dt.zone)
367 if err == nil {
368 ok = true
369 break
370 }
371 }
372 if !ok {
373 p.panicItemf(it, "Invalid TOML Datetime: %q.", it.val)
374 }
375 return t, p.typeOfPrimitive(it)
376 }
377
378 func (p *parser) valueArray(it item) (interface{}, tomlType) {
379 p.setType(p.currentKey, tomlArray, it.pos)
380
381 var (
382 types []tomlType
383
384
385
386
387 array = []interface{}{}
388 )
389 for it = p.next(); it.typ != itemArrayEnd; it = p.next() {
390 if it.typ == itemCommentStart {
391 p.expect(itemText)
392 continue
393 }
394
395 val, typ := p.value(it, true)
396 array = append(array, val)
397 types = append(types, typ)
398
399
400
401
402
403
404 _ = types
405 }
406 return array, tomlArray
407 }
408
409 func (p *parser) valueInlineTable(it item, parentIsArray bool) (interface{}, tomlType) {
410 var (
411 hash = make(map[string]interface{})
412 outerContext = p.context
413 outerKey = p.currentKey
414 )
415
416 p.context = append(p.context, p.currentKey)
417 prevContext := p.context
418 p.currentKey = ""
419
420 p.addImplicit(p.context)
421 p.addContext(p.context, parentIsArray)
422
423
424 for it := p.next(); it.typ != itemInlineTableEnd; it = p.next() {
425 if it.typ == itemCommentStart {
426 p.expect(itemText)
427 continue
428 }
429
430
431 k := p.nextPos()
432 var key Key
433 for ; k.typ != itemKeyEnd && k.typ != itemEOF; k = p.next() {
434 key = append(key, p.keyString(k))
435 }
436 p.assertEqual(itemKeyEnd, k.typ)
437
438
439 p.currentKey = key[len(key)-1]
440
441
442
443 context := key[:len(key)-1]
444 for i := range context {
445 p.addImplicitContext(append(p.context, context[i:i+1]...))
446 }
447 p.ordered = append(p.ordered, p.context.add(p.currentKey))
448
449
450 val, typ := p.value(p.next(), false)
451 p.set(p.currentKey, val, typ, it.pos)
452 hash[p.currentKey] = val
453
454
455 p.context = prevContext
456 }
457 p.context = outerContext
458 p.currentKey = outerKey
459 return hash, tomlHash
460 }
461
462
463
464 func numHasLeadingZero(s string) bool {
465 if len(s) > 1 && s[0] == '0' && !(s[1] == 'b' || s[1] == 'o' || s[1] == 'x') {
466 return true
467 }
468 if len(s) > 2 && (s[0] == '-' || s[0] == '+') && s[1] == '0' {
469 return true
470 }
471 return false
472 }
473
474
475
476 func numUnderscoresOK(s string) bool {
477 switch s {
478 case "nan", "+nan", "-nan", "inf", "-inf", "+inf":
479 return true
480 }
481 accept := false
482 for _, r := range s {
483 if r == '_' {
484 if !accept {
485 return false
486 }
487 }
488
489
490
491 accept = isHexadecimal(r)
492 }
493 return accept
494 }
495
496
497 func numPeriodsOK(s string) bool {
498 period := false
499 for _, r := range s {
500 if period && !isDigit(r) {
501 return false
502 }
503 period = r == '.'
504 }
505 return !period
506 }
507
508
509
510
511
512
513 func (p *parser) addContext(key Key, array bool) {
514 var ok bool
515
516
517 hashContext := p.mapping
518 keyContext := make(Key, 0)
519
520
521 for _, k := range key[0 : len(key)-1] {
522 _, ok = hashContext[k]
523 keyContext = append(keyContext, k)
524
525
526 if !ok {
527 p.addImplicit(keyContext)
528 hashContext[k] = make(map[string]interface{})
529 }
530
531
532
533
534
535
536 switch t := hashContext[k].(type) {
537 case []map[string]interface{}:
538 hashContext = t[len(t)-1]
539 case map[string]interface{}:
540 hashContext = t
541 default:
542 p.panicf("Key '%s' was already created as a hash.", keyContext)
543 }
544 }
545
546 p.context = keyContext
547 if array {
548
549
550 k := key[len(key)-1]
551 if _, ok := hashContext[k]; !ok {
552 hashContext[k] = make([]map[string]interface{}, 0, 4)
553 }
554
555
556
557 if hash, ok := hashContext[k].([]map[string]interface{}); ok {
558 hashContext[k] = append(hash, make(map[string]interface{}))
559 } else {
560 p.panicf("Key '%s' was already created and cannot be used as an array.", key)
561 }
562 } else {
563 p.setValue(key[len(key)-1], make(map[string]interface{}))
564 }
565 p.context = append(p.context, key[len(key)-1])
566 }
567
568
569 func (p *parser) set(key string, val interface{}, typ tomlType, pos Position) {
570 p.setValue(key, val)
571 p.setType(key, typ, pos)
572 }
573
574
575
576
577 func (p *parser) setValue(key string, value interface{}) {
578 var (
579 tmpHash interface{}
580 ok bool
581 hash = p.mapping
582 keyContext Key
583 )
584 for _, k := range p.context {
585 keyContext = append(keyContext, k)
586 if tmpHash, ok = hash[k]; !ok {
587 p.bug("Context for key '%s' has not been established.", keyContext)
588 }
589 switch t := tmpHash.(type) {
590 case []map[string]interface{}:
591
592
593 hash = t[len(t)-1]
594 case map[string]interface{}:
595 hash = t
596 default:
597 p.panicf("Key '%s' has already been defined.", keyContext)
598 }
599 }
600 keyContext = append(keyContext, key)
601
602 if _, ok := hash[key]; ok {
603
604
605
606
607
608
609
610
611
612 if p.isArray(keyContext) {
613 p.removeImplicit(keyContext)
614 hash[key] = value
615 return
616 }
617 if p.isImplicit(keyContext) {
618 p.removeImplicit(keyContext)
619 return
620 }
621
622
623
624 p.panicf("Key '%s' has already been defined.", keyContext)
625 }
626
627 hash[key] = value
628 }
629
630
631
632
633
634
635 func (p *parser) setType(key string, typ tomlType, pos Position) {
636 keyContext := make(Key, 0, len(p.context)+1)
637 keyContext = append(keyContext, p.context...)
638 if len(key) > 0 {
639 keyContext = append(keyContext, key)
640 }
641
642
643
644 if len(keyContext) == 0 {
645 keyContext = Key{""}
646 }
647 p.keyInfo[keyContext.String()] = keyInfo{tomlType: typ, pos: pos}
648 }
649
650
651
652 func (p *parser) addImplicit(key Key) { p.implicits[key.String()] = struct{}{} }
653 func (p *parser) removeImplicit(key Key) { delete(p.implicits, key.String()) }
654 func (p *parser) isImplicit(key Key) bool { _, ok := p.implicits[key.String()]; return ok }
655 func (p *parser) isArray(key Key) bool { return p.keyInfo[key.String()].tomlType == tomlArray }
656 func (p *parser) addImplicitContext(key Key) { p.addImplicit(key); p.addContext(key, false) }
657
658
659 func (p *parser) current() string {
660 if len(p.currentKey) == 0 {
661 return p.context.String()
662 }
663 if len(p.context) == 0 {
664 return p.currentKey
665 }
666 return fmt.Sprintf("%s.%s", p.context, p.currentKey)
667 }
668
669 func stripFirstNewline(s string) string {
670 if len(s) > 0 && s[0] == '\n' {
671 return s[1:]
672 }
673 if len(s) > 1 && s[0] == '\r' && s[1] == '\n' {
674 return s[2:]
675 }
676 return s
677 }
678
679
680
681
682
683
684
685 func (p *parser) stripEscapedNewlines(s string) string {
686 var b strings.Builder
687 var i int
688 for {
689 ix := strings.Index(s[i:], `\`)
690 if ix < 0 {
691 b.WriteString(s)
692 return b.String()
693 }
694 i += ix
695
696 if len(s) > i+1 && s[i+1] == '\\' {
697
698 i += 2
699 continue
700 }
701
702 j := i + 1
703 whitespaceLoop:
704 for ; j < len(s); j++ {
705 switch s[j] {
706 case ' ', '\t', '\r', '\n':
707 default:
708 break whitespaceLoop
709 }
710 }
711 if j == i+1 {
712
713 i++
714 continue
715 }
716 if !strings.Contains(s[i:j], "\n") {
717
718
719
720 i++
721 continue
722 }
723 b.WriteString(s[:i])
724 s = s[j:]
725 i = 0
726 }
727 }
728
729 func (p *parser) replaceEscapes(it item, str string) string {
730 replaced := make([]rune, 0, len(str))
731 s := []byte(str)
732 r := 0
733 for r < len(s) {
734 if s[r] != '\\' {
735 c, size := utf8.DecodeRune(s[r:])
736 r += size
737 replaced = append(replaced, c)
738 continue
739 }
740 r += 1
741 if r >= len(s) {
742 p.bug("Escape sequence at end of string.")
743 return ""
744 }
745 switch s[r] {
746 default:
747 p.bug("Expected valid escape code after \\, but got %q.", s[r])
748 case ' ', '\t':
749 p.panicItemf(it, "invalid escape: '\\%c'", s[r])
750 case 'b':
751 replaced = append(replaced, rune(0x0008))
752 r += 1
753 case 't':
754 replaced = append(replaced, rune(0x0009))
755 r += 1
756 case 'n':
757 replaced = append(replaced, rune(0x000A))
758 r += 1
759 case 'f':
760 replaced = append(replaced, rune(0x000C))
761 r += 1
762 case 'r':
763 replaced = append(replaced, rune(0x000D))
764 r += 1
765 case 'e':
766 if p.tomlNext {
767 replaced = append(replaced, rune(0x001B))
768 r += 1
769 }
770 case '"':
771 replaced = append(replaced, rune(0x0022))
772 r += 1
773 case '\\':
774 replaced = append(replaced, rune(0x005C))
775 r += 1
776 case 'x':
777 if p.tomlNext {
778 escaped := p.asciiEscapeToUnicode(it, s[r+1:r+3])
779 replaced = append(replaced, escaped)
780 r += 3
781 }
782 case 'u':
783
784
785
786 escaped := p.asciiEscapeToUnicode(it, s[r+1:r+5])
787 replaced = append(replaced, escaped)
788 r += 5
789 case 'U':
790
791
792
793 escaped := p.asciiEscapeToUnicode(it, s[r+1:r+9])
794 replaced = append(replaced, escaped)
795 r += 9
796 }
797 }
798 return string(replaced)
799 }
800
801 func (p *parser) asciiEscapeToUnicode(it item, bs []byte) rune {
802 s := string(bs)
803 hex, err := strconv.ParseUint(strings.ToLower(s), 16, 32)
804 if err != nil {
805 p.bug("Could not parse '%s' as a hexadecimal number, but the lexer claims it's OK: %s", s, err)
806 }
807 if !utf8.ValidRune(rune(hex)) {
808 p.panicItemf(it, "Escaped character '\\u%s' is not valid UTF-8.", s)
809 }
810 return rune(hex)
811 }
812
View as plain text