1
2
3
4
5 package json
6
7 import (
8 "bytes"
9 "errors"
10 "io"
11 "math"
12 "strconv"
13 "unicode/utf16"
14 "unicode/utf8"
15 )
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50 type DecodeOptions struct {
51 requireKeyedLiterals
52 nonComparable
53
54
55
56
57
58
59 AllowDuplicateNames bool
60
61
62
63
64
65 AllowInvalidUTF8 bool
66 }
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97 type Decoder struct {
98 state
99 decodeBuffer
100 options DecodeOptions
101
102 stringCache *stringCache
103 }
104
105
106
107
108
109
110
111
112
113
114
115 type decodeBuffer struct {
116 peekPos int
117 peekErr error
118
119 buf []byte
120 prevStart int
121 prevEnd int
122
123
124
125 baseOffset int64
126
127 rd io.Reader
128 }
129
130
131
132
133
134
135 func NewDecoder(r io.Reader) *Decoder {
136 return DecodeOptions{}.NewDecoder(r)
137 }
138
139
140
141 func (o DecodeOptions) NewDecoder(r io.Reader) *Decoder {
142 d := new(Decoder)
143 o.ResetDecoder(d, r)
144 return d
145 }
146
147
148
149 func (o DecodeOptions) ResetDecoder(d *Decoder, r io.Reader) {
150 if d == nil {
151 panic("json: invalid nil Decoder")
152 }
153 if r == nil {
154 panic("json: invalid nil io.Reader")
155 }
156 d.reset(nil, r, o)
157 }
158
159 func (d *Decoder) reset(b []byte, r io.Reader, o DecodeOptions) {
160 d.state.reset()
161 d.decodeBuffer = decodeBuffer{buf: b, rd: r}
162 d.options = o
163 }
164
165
166
167 func (d *Decoder) Reset(r io.Reader) {
168 d.options.ResetDecoder(d, r)
169 }
170
171 var errBufferWriteAfterNext = errors.New("invalid bytes.Buffer.Write call after calling bytes.Buffer.Next")
172
173
174
175 func (d *Decoder) fetch() error {
176 if d.rd == nil {
177 return io.ErrUnexpectedEOF
178 }
179
180
181 d.names.copyQuotedBuffer(d.buf)
182
183
184 if bb, ok := d.rd.(*bytes.Buffer); ok {
185 switch {
186 case bb.Len() == 0:
187 return io.ErrUnexpectedEOF
188 case len(d.buf) == 0:
189 d.buf = bb.Next(bb.Len())
190 return nil
191 default:
192
193
194
195
196
197
198
199
200 return &ioError{action: "read", err: errBufferWriteAfterNext}
201 }
202 }
203
204
205 if cap(d.buf) == 0 {
206 d.buf = make([]byte, 0, 64)
207 }
208
209
210 const maxBufferSize = 4 << 10
211 const growthSizeFactor = 2
212 const growthRateFactor = 2
213
214 grow := cap(d.buf) <= maxBufferSize/growthSizeFactor
215
216
217 grow = grow && int64(cap(d.buf)) < d.previousOffsetEnd()/growthRateFactor
218
219
220
221
222 grow = grow || (d.prevStart == 0 && len(d.buf) >= 3*cap(d.buf)/4)
223
224 if grow {
225
226
227 buf := make([]byte, 0, cap(d.buf)*growthSizeFactor)
228 d.buf = append(buf, d.buf[d.prevStart:]...)
229 } else {
230
231 n := copy(d.buf[:cap(d.buf)], d.buf[d.prevStart:])
232 d.buf = d.buf[:n]
233 }
234 d.baseOffset += int64(d.prevStart)
235 d.prevEnd -= d.prevStart
236 d.prevStart = 0
237
238
239 for {
240 n, err := d.rd.Read(d.buf[len(d.buf):cap(d.buf)])
241 switch {
242 case n > 0:
243 d.buf = d.buf[:len(d.buf)+n]
244 return nil
245 case err == io.EOF:
246 return io.ErrUnexpectedEOF
247 case err != nil:
248 return &ioError{action: "read", err: err}
249 default:
250 continue
251 }
252 }
253 }
254
255 const invalidateBufferByte = '#'
256
257
258
259
260
261 func (d *decodeBuffer) invalidatePreviousRead() {
262
263
264 isBytesBuffer := func(r io.Reader) bool {
265 _, ok := r.(*bytes.Buffer)
266 return ok
267 }
268 if d.rd != nil && !isBytesBuffer(d.rd) && d.prevStart < d.prevEnd && uint(d.prevStart) < uint(len(d.buf)) {
269 d.buf[d.prevStart] = invalidateBufferByte
270 d.prevStart = d.prevEnd
271 }
272 }
273
274
275 func (d *decodeBuffer) needMore(pos int) bool {
276
277 return pos == len(d.buf)
278 }
279
280
281
282
283 func (d *decodeBuffer) injectSyntacticErrorWithPosition(err error, pos int) error {
284 if serr, ok := err.(*SyntacticError); ok {
285 return serr.withOffset(d.baseOffset + int64(pos))
286 }
287 return err
288 }
289
290 func (d *decodeBuffer) previousOffsetStart() int64 { return d.baseOffset + int64(d.prevStart) }
291 func (d *decodeBuffer) previousOffsetEnd() int64 { return d.baseOffset + int64(d.prevEnd) }
292 func (d *decodeBuffer) previousBuffer() []byte { return d.buf[d.prevStart:d.prevEnd] }
293 func (d *decodeBuffer) unreadBuffer() []byte { return d.buf[d.prevEnd:len(d.buf)] }
294
295
296
297 func (d *Decoder) PeekKind() Kind {
298
299 if d.peekPos > 0 {
300 return Kind(d.buf[d.peekPos]).normalize()
301 }
302
303 var err error
304 d.invalidatePreviousRead()
305 pos := d.prevEnd
306
307
308 pos += consumeWhitespace(d.buf[pos:])
309 if d.needMore(pos) {
310 if pos, err = d.consumeWhitespace(pos); err != nil {
311 if err == io.ErrUnexpectedEOF && d.tokens.depth() == 1 {
312 err = io.EOF
313 }
314 d.peekPos, d.peekErr = -1, err
315 return invalidKind
316 }
317 }
318
319
320 var delim byte
321 if c := d.buf[pos]; c == ':' || c == ',' {
322 delim = c
323 pos += 1
324 pos += consumeWhitespace(d.buf[pos:])
325 if d.needMore(pos) {
326 if pos, err = d.consumeWhitespace(pos); err != nil {
327 d.peekPos, d.peekErr = -1, err
328 return invalidKind
329 }
330 }
331 }
332 next := Kind(d.buf[pos]).normalize()
333 if d.tokens.needDelim(next) != delim {
334 pos = d.prevEnd
335 pos += consumeWhitespace(d.buf[pos:])
336 err = d.tokens.checkDelim(delim, next)
337 err = d.injectSyntacticErrorWithPosition(err, pos)
338 d.peekPos, d.peekErr = -1, err
339 return invalidKind
340 }
341
342
343
344
345
346 d.peekPos, d.peekErr = pos, nil
347 return next
348 }
349
350
351
352 func (d *Decoder) SkipValue() error {
353 switch d.PeekKind() {
354 case '{', '[':
355
356
357 depth := d.tokens.depth()
358 for {
359 if _, err := d.ReadToken(); err != nil {
360 return err
361 }
362 if depth >= d.tokens.depth() {
363 return nil
364 }
365 }
366 default:
367
368
369 if _, err := d.ReadValue(); err != nil {
370 return err
371 }
372 return nil
373 }
374 }
375
376
377
378
379 func (d *Decoder) ReadToken() (Token, error) {
380
381 var err error
382 var next Kind
383 pos := d.peekPos
384 if pos != 0 {
385
386 if d.peekErr != nil {
387 err := d.peekErr
388 d.peekPos, d.peekErr = 0, nil
389 return Token{}, err
390 }
391 next = Kind(d.buf[pos]).normalize()
392 d.peekPos = 0
393 } else {
394 d.invalidatePreviousRead()
395 pos = d.prevEnd
396
397
398 pos += consumeWhitespace(d.buf[pos:])
399 if d.needMore(pos) {
400 if pos, err = d.consumeWhitespace(pos); err != nil {
401 if err == io.ErrUnexpectedEOF && d.tokens.depth() == 1 {
402 err = io.EOF
403 }
404 return Token{}, err
405 }
406 }
407
408
409 var delim byte
410 if c := d.buf[pos]; c == ':' || c == ',' {
411 delim = c
412 pos += 1
413 pos += consumeWhitespace(d.buf[pos:])
414 if d.needMore(pos) {
415 if pos, err = d.consumeWhitespace(pos); err != nil {
416 return Token{}, err
417 }
418 }
419 }
420 next = Kind(d.buf[pos]).normalize()
421 if d.tokens.needDelim(next) != delim {
422 pos = d.prevEnd
423 pos += consumeWhitespace(d.buf[pos:])
424 err = d.tokens.checkDelim(delim, next)
425 return Token{}, d.injectSyntacticErrorWithPosition(err, pos)
426 }
427 }
428
429
430 var n int
431 switch next {
432 case 'n':
433 if consumeNull(d.buf[pos:]) == 0 {
434 pos, err = d.consumeLiteral(pos, "null")
435 if err != nil {
436 return Token{}, d.injectSyntacticErrorWithPosition(err, pos)
437 }
438 } else {
439 pos += len("null")
440 }
441 if err = d.tokens.appendLiteral(); err != nil {
442 return Token{}, d.injectSyntacticErrorWithPosition(err, pos-len("null"))
443 }
444 d.prevStart, d.prevEnd = pos, pos
445 return Null, nil
446
447 case 'f':
448 if consumeFalse(d.buf[pos:]) == 0 {
449 pos, err = d.consumeLiteral(pos, "false")
450 if err != nil {
451 return Token{}, d.injectSyntacticErrorWithPosition(err, pos)
452 }
453 } else {
454 pos += len("false")
455 }
456 if err = d.tokens.appendLiteral(); err != nil {
457 return Token{}, d.injectSyntacticErrorWithPosition(err, pos-len("false"))
458 }
459 d.prevStart, d.prevEnd = pos, pos
460 return False, nil
461
462 case 't':
463 if consumeTrue(d.buf[pos:]) == 0 {
464 pos, err = d.consumeLiteral(pos, "true")
465 if err != nil {
466 return Token{}, d.injectSyntacticErrorWithPosition(err, pos)
467 }
468 } else {
469 pos += len("true")
470 }
471 if err = d.tokens.appendLiteral(); err != nil {
472 return Token{}, d.injectSyntacticErrorWithPosition(err, pos-len("true"))
473 }
474 d.prevStart, d.prevEnd = pos, pos
475 return True, nil
476
477 case '"':
478 var flags valueFlags
479 if n = consumeSimpleString(d.buf[pos:]); n == 0 {
480 oldAbsPos := d.baseOffset + int64(pos)
481 pos, err = d.consumeString(&flags, pos)
482 newAbsPos := d.baseOffset + int64(pos)
483 n = int(newAbsPos - oldAbsPos)
484 if err != nil {
485 return Token{}, d.injectSyntacticErrorWithPosition(err, pos)
486 }
487 } else {
488 pos += n
489 }
490 if !d.options.AllowDuplicateNames && d.tokens.last.needObjectName() {
491 if !d.tokens.last.isValidNamespace() {
492 return Token{}, errInvalidNamespace
493 }
494 if d.tokens.last.isActiveNamespace() && !d.namespaces.last().insertQuoted(d.buf[pos-n:pos], flags.isVerbatim()) {
495 err = &SyntacticError{str: "duplicate name " + string(d.buf[pos-n:pos]) + " in object"}
496 return Token{}, d.injectSyntacticErrorWithPosition(err, pos-n)
497 }
498 d.names.replaceLastQuotedOffset(pos - n)
499 }
500 if err = d.tokens.appendString(); err != nil {
501 return Token{}, d.injectSyntacticErrorWithPosition(err, pos-n)
502 }
503 d.prevStart, d.prevEnd = pos-n, pos
504 return Token{raw: &d.decodeBuffer, num: uint64(d.previousOffsetStart())}, nil
505
506 case '0':
507
508
509 if n = consumeSimpleNumber(d.buf[pos:]); n == 0 || d.needMore(pos+n) {
510 oldAbsPos := d.baseOffset + int64(pos)
511 pos, err = d.consumeNumber(pos)
512 newAbsPos := d.baseOffset + int64(pos)
513 n = int(newAbsPos - oldAbsPos)
514 if err != nil {
515 return Token{}, d.injectSyntacticErrorWithPosition(err, pos)
516 }
517 } else {
518 pos += n
519 }
520 if err = d.tokens.appendNumber(); err != nil {
521 return Token{}, d.injectSyntacticErrorWithPosition(err, pos-n)
522 }
523 d.prevStart, d.prevEnd = pos-n, pos
524 return Token{raw: &d.decodeBuffer, num: uint64(d.previousOffsetStart())}, nil
525
526 case '{':
527 if err = d.tokens.pushObject(); err != nil {
528 return Token{}, d.injectSyntacticErrorWithPosition(err, pos)
529 }
530 if !d.options.AllowDuplicateNames {
531 d.names.push()
532 d.namespaces.push()
533 }
534 pos += 1
535 d.prevStart, d.prevEnd = pos, pos
536 return ObjectStart, nil
537
538 case '}':
539 if err = d.tokens.popObject(); err != nil {
540 return Token{}, d.injectSyntacticErrorWithPosition(err, pos)
541 }
542 if !d.options.AllowDuplicateNames {
543 d.names.pop()
544 d.namespaces.pop()
545 }
546 pos += 1
547 d.prevStart, d.prevEnd = pos, pos
548 return ObjectEnd, nil
549
550 case '[':
551 if err = d.tokens.pushArray(); err != nil {
552 return Token{}, d.injectSyntacticErrorWithPosition(err, pos)
553 }
554 pos += 1
555 d.prevStart, d.prevEnd = pos, pos
556 return ArrayStart, nil
557
558 case ']':
559 if err = d.tokens.popArray(); err != nil {
560 return Token{}, d.injectSyntacticErrorWithPosition(err, pos)
561 }
562 pos += 1
563 d.prevStart, d.prevEnd = pos, pos
564 return ArrayEnd, nil
565
566 default:
567 err = newInvalidCharacterError(d.buf[pos:], "at start of token")
568 return Token{}, d.injectSyntacticErrorWithPosition(err, pos)
569 }
570 }
571
572 type valueFlags uint
573
574 const (
575 _ valueFlags = (1 << iota) / 2
576
577 stringNonVerbatim
578 stringNonCanonical
579
580 )
581
582 func (f *valueFlags) set(f2 valueFlags) { *f |= f2 }
583 func (f valueFlags) isVerbatim() bool { return f&stringNonVerbatim == 0 }
584 func (f valueFlags) isCanonical() bool { return f&stringNonCanonical == 0 }
585
586
587
588
589
590
591
592
593 func (d *Decoder) ReadValue() (RawValue, error) {
594 var flags valueFlags
595 return d.readValue(&flags)
596 }
597 func (d *Decoder) readValue(flags *valueFlags) (RawValue, error) {
598
599 var err error
600 var next Kind
601 pos := d.peekPos
602 if pos != 0 {
603
604 if d.peekErr != nil {
605 err := d.peekErr
606 d.peekPos, d.peekErr = 0, nil
607 return nil, err
608 }
609 next = Kind(d.buf[pos]).normalize()
610 d.peekPos = 0
611 } else {
612 d.invalidatePreviousRead()
613 pos = d.prevEnd
614
615
616 pos += consumeWhitespace(d.buf[pos:])
617 if d.needMore(pos) {
618 if pos, err = d.consumeWhitespace(pos); err != nil {
619 if err == io.ErrUnexpectedEOF && d.tokens.depth() == 1 {
620 err = io.EOF
621 }
622 return nil, err
623 }
624 }
625
626
627 var delim byte
628 if c := d.buf[pos]; c == ':' || c == ',' {
629 delim = c
630 pos += 1
631 pos += consumeWhitespace(d.buf[pos:])
632 if d.needMore(pos) {
633 if pos, err = d.consumeWhitespace(pos); err != nil {
634 return nil, err
635 }
636 }
637 }
638 next = Kind(d.buf[pos]).normalize()
639 if d.tokens.needDelim(next) != delim {
640 pos = d.prevEnd
641 pos += consumeWhitespace(d.buf[pos:])
642 err = d.tokens.checkDelim(delim, next)
643 return nil, d.injectSyntacticErrorWithPosition(err, pos)
644 }
645 }
646
647
648 oldAbsPos := d.baseOffset + int64(pos)
649 pos, err = d.consumeValue(flags, pos)
650 newAbsPos := d.baseOffset + int64(pos)
651 n := int(newAbsPos - oldAbsPos)
652 if err != nil {
653 return nil, d.injectSyntacticErrorWithPosition(err, pos)
654 }
655 switch next {
656 case 'n', 't', 'f':
657 err = d.tokens.appendLiteral()
658 case '"':
659 if !d.options.AllowDuplicateNames && d.tokens.last.needObjectName() {
660 if !d.tokens.last.isValidNamespace() {
661 err = errInvalidNamespace
662 break
663 }
664 if d.tokens.last.isActiveNamespace() && !d.namespaces.last().insertQuoted(d.buf[pos-n:pos], flags.isVerbatim()) {
665 err = &SyntacticError{str: "duplicate name " + string(d.buf[pos-n:pos]) + " in object"}
666 break
667 }
668 d.names.replaceLastQuotedOffset(pos - n)
669 }
670 err = d.tokens.appendString()
671 case '0':
672 err = d.tokens.appendNumber()
673 case '{':
674 if err = d.tokens.pushObject(); err != nil {
675 break
676 }
677 if err = d.tokens.popObject(); err != nil {
678 panic("BUG: popObject should never fail immediately after pushObject: " + err.Error())
679 }
680 case '[':
681 if err = d.tokens.pushArray(); err != nil {
682 break
683 }
684 if err = d.tokens.popArray(); err != nil {
685 panic("BUG: popArray should never fail immediately after pushArray: " + err.Error())
686 }
687 }
688 if err != nil {
689 return nil, d.injectSyntacticErrorWithPosition(err, pos-n)
690 }
691 d.prevEnd = pos
692 d.prevStart = pos - n
693 return d.buf[pos-n : pos : pos], nil
694 }
695
696
697 func (d *Decoder) checkEOF() error {
698 switch pos, err := d.consumeWhitespace(d.prevEnd); err {
699 case nil:
700 return newInvalidCharacterError(d.buf[pos:], "after top-level value")
701 case io.ErrUnexpectedEOF:
702 return nil
703 default:
704 return err
705 }
706 }
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725 func (d *Decoder) consumeWhitespace(pos int) (newPos int, err error) {
726 for {
727 pos += consumeWhitespace(d.buf[pos:])
728 if d.needMore(pos) {
729 absPos := d.baseOffset + int64(pos)
730 err = d.fetch()
731 pos = int(absPos - d.baseOffset)
732 if err != nil {
733 return pos, err
734 }
735 continue
736 }
737 return pos, nil
738 }
739 }
740
741
742
743 func (d *Decoder) consumeValue(flags *valueFlags, pos int) (newPos int, err error) {
744 for {
745 var n int
746 var err error
747 switch next := Kind(d.buf[pos]).normalize(); next {
748 case 'n':
749 if n = consumeNull(d.buf[pos:]); n == 0 {
750 n, err = consumeLiteral(d.buf[pos:], "null")
751 }
752 case 'f':
753 if n = consumeFalse(d.buf[pos:]); n == 0 {
754 n, err = consumeLiteral(d.buf[pos:], "false")
755 }
756 case 't':
757 if n = consumeTrue(d.buf[pos:]); n == 0 {
758 n, err = consumeLiteral(d.buf[pos:], "true")
759 }
760 case '"':
761 if n = consumeSimpleString(d.buf[pos:]); n == 0 {
762 return d.consumeString(flags, pos)
763 }
764 case '0':
765
766
767 if n = consumeSimpleNumber(d.buf[pos:]); n == 0 || d.needMore(pos+n) {
768 return d.consumeNumber(pos)
769 }
770 case '{':
771 return d.consumeObject(flags, pos)
772 case '[':
773 return d.consumeArray(flags, pos)
774 default:
775 return pos, newInvalidCharacterError(d.buf[pos:], "at start of value")
776 }
777 if err == io.ErrUnexpectedEOF {
778 absPos := d.baseOffset + int64(pos)
779 err = d.fetch()
780 pos = int(absPos - d.baseOffset)
781 if err != nil {
782 return pos, err
783 }
784 continue
785 }
786 return pos + n, err
787 }
788 }
789
790
791
792 func (d *Decoder) consumeLiteral(pos int, lit string) (newPos int, err error) {
793 for {
794 n, err := consumeLiteral(d.buf[pos:], lit)
795 if err == io.ErrUnexpectedEOF {
796 absPos := d.baseOffset + int64(pos)
797 err = d.fetch()
798 pos = int(absPos - d.baseOffset)
799 if err != nil {
800 return pos, err
801 }
802 continue
803 }
804 return pos + n, err
805 }
806 }
807
808
809
810 func (d *Decoder) consumeString(flags *valueFlags, pos int) (newPos int, err error) {
811 var n int
812 for {
813 n, err = consumeStringResumable(flags, d.buf[pos:], n, !d.options.AllowInvalidUTF8)
814 if err == io.ErrUnexpectedEOF {
815 absPos := d.baseOffset + int64(pos)
816 err = d.fetch()
817 pos = int(absPos - d.baseOffset)
818 if err != nil {
819 return pos, err
820 }
821 continue
822 }
823 return pos + n, err
824 }
825 }
826
827
828
829 func (d *Decoder) consumeNumber(pos int) (newPos int, err error) {
830 var n int
831 var state consumeNumberState
832 for {
833 n, state, err = consumeNumberResumable(d.buf[pos:], n, state)
834
835
836 if err == io.ErrUnexpectedEOF || d.needMore(pos+n) {
837 mayTerminate := err == nil
838 absPos := d.baseOffset + int64(pos)
839 err = d.fetch()
840 pos = int(absPos - d.baseOffset)
841 if err != nil {
842 if mayTerminate && err == io.ErrUnexpectedEOF {
843 return pos + n, nil
844 }
845 return pos, err
846 }
847 continue
848 }
849 return pos + n, err
850 }
851 }
852
853
854
855 func (d *Decoder) consumeObject(flags *valueFlags, pos int) (newPos int, err error) {
856 var n int
857 var names *objectNamespace
858 if !d.options.AllowDuplicateNames {
859 d.namespaces.push()
860 defer d.namespaces.pop()
861 names = d.namespaces.last()
862 }
863
864
865 if d.buf[pos] != '{' {
866 panic("BUG: consumeObject must be called with a buffer that starts with '{'")
867 }
868 pos++
869
870
871 pos += consumeWhitespace(d.buf[pos:])
872 if d.needMore(pos) {
873 if pos, err = d.consumeWhitespace(pos); err != nil {
874 return pos, err
875 }
876 }
877 if d.buf[pos] == '}' {
878 pos++
879 return pos, nil
880 }
881
882 for {
883
884 pos += consumeWhitespace(d.buf[pos:])
885 if d.needMore(pos) {
886 if pos, err = d.consumeWhitespace(pos); err != nil {
887 return pos, err
888 }
889 }
890 var flags2 valueFlags
891 if n = consumeSimpleString(d.buf[pos:]); n == 0 {
892 oldAbsPos := d.baseOffset + int64(pos)
893 pos, err = d.consumeString(&flags2, pos)
894 newAbsPos := d.baseOffset + int64(pos)
895 n = int(newAbsPos - oldAbsPos)
896 flags.set(flags2)
897 if err != nil {
898 return pos, err
899 }
900 } else {
901 pos += n
902 }
903 if !d.options.AllowDuplicateNames && !names.insertQuoted(d.buf[pos-n:pos], flags2.isVerbatim()) {
904 return pos - n, &SyntacticError{str: "duplicate name " + string(d.buf[pos-n:pos]) + " in object"}
905 }
906
907
908 pos += consumeWhitespace(d.buf[pos:])
909 if d.needMore(pos) {
910 if pos, err = d.consumeWhitespace(pos); err != nil {
911 return pos, err
912 }
913 }
914 if d.buf[pos] != ':' {
915 return pos, newInvalidCharacterError(d.buf[pos:], "after object name (expecting ':')")
916 }
917 pos++
918
919
920 pos += consumeWhitespace(d.buf[pos:])
921 if d.needMore(pos) {
922 if pos, err = d.consumeWhitespace(pos); err != nil {
923 return pos, err
924 }
925 }
926 pos, err = d.consumeValue(flags, pos)
927 if err != nil {
928 return pos, err
929 }
930
931
932 pos += consumeWhitespace(d.buf[pos:])
933 if d.needMore(pos) {
934 if pos, err = d.consumeWhitespace(pos); err != nil {
935 return pos, err
936 }
937 }
938 switch d.buf[pos] {
939 case ',':
940 pos++
941 continue
942 case '}':
943 pos++
944 return pos, nil
945 default:
946 return pos, newInvalidCharacterError(d.buf[pos:], "after object value (expecting ',' or '}')")
947 }
948 }
949 }
950
951
952
953 func (d *Decoder) consumeArray(flags *valueFlags, pos int) (newPos int, err error) {
954
955 if d.buf[pos] != '[' {
956 panic("BUG: consumeArray must be called with a buffer that starts with '['")
957 }
958 pos++
959
960
961 pos += consumeWhitespace(d.buf[pos:])
962 if d.needMore(pos) {
963 if pos, err = d.consumeWhitespace(pos); err != nil {
964 return pos, err
965 }
966 }
967 if d.buf[pos] == ']' {
968 pos++
969 return pos, nil
970 }
971
972 for {
973
974 pos += consumeWhitespace(d.buf[pos:])
975 if d.needMore(pos) {
976 if pos, err = d.consumeWhitespace(pos); err != nil {
977 return pos, err
978 }
979 }
980 pos, err = d.consumeValue(flags, pos)
981 if err != nil {
982 return pos, err
983 }
984
985
986 pos += consumeWhitespace(d.buf[pos:])
987 if d.needMore(pos) {
988 if pos, err = d.consumeWhitespace(pos); err != nil {
989 return pos, err
990 }
991 }
992 switch d.buf[pos] {
993 case ',':
994 pos++
995 continue
996 case ']':
997 pos++
998 return pos, nil
999 default:
1000 return pos, newInvalidCharacterError(d.buf[pos:], "after array value (expecting ',' or ']')")
1001 }
1002 }
1003 }
1004
1005
1006
1007
1008
1009 func (d *Decoder) InputOffset() int64 {
1010 return d.previousOffsetEnd()
1011 }
1012
1013
1014
1015
1016
1017 func (d *Decoder) UnreadBuffer() []byte {
1018 return d.unreadBuffer()
1019 }
1020
1021
1022
1023
1024
1025
1026 func (d *Decoder) StackDepth() int {
1027
1028 return d.tokens.depth() - 1
1029 }
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043 func (d *Decoder) StackIndex(i int) (Kind, int) {
1044
1045 switch s := d.tokens.index(i); {
1046 case i > 0 && s.isObject():
1047 return '{', s.length()
1048 case i > 0 && s.isArray():
1049 return '[', s.length()
1050 default:
1051 return 0, s.length()
1052 }
1053 }
1054
1055
1056
1057
1058 func (d *Decoder) StackPointer() string {
1059 d.names.copyQuotedBuffer(d.buf)
1060 return string(d.appendStackPointer(nil))
1061 }
1062
1063
1064 func consumeWhitespace(b []byte) (n int) {
1065
1066 for len(b) > n && (b[n] == ' ' || b[n] == '\t' || b[n] == '\r' || b[n] == '\n') {
1067 n++
1068 }
1069 return n
1070 }
1071
1072
1073
1074 func consumeNull(b []byte) int {
1075
1076 const literal = "null"
1077 if len(b) >= len(literal) && string(b[:len(literal)]) == literal {
1078 return len(literal)
1079 }
1080 return 0
1081 }
1082
1083
1084
1085 func consumeFalse(b []byte) int {
1086
1087 const literal = "false"
1088 if len(b) >= len(literal) && string(b[:len(literal)]) == literal {
1089 return len(literal)
1090 }
1091 return 0
1092 }
1093
1094
1095
1096 func consumeTrue(b []byte) int {
1097
1098 const literal = "true"
1099 if len(b) >= len(literal) && string(b[:len(literal)]) == literal {
1100 return len(literal)
1101 }
1102 return 0
1103 }
1104
1105
1106
1107 func consumeLiteral(b []byte, lit string) (n int, err error) {
1108 for i := 0; i < len(b) && i < len(lit); i++ {
1109 if b[i] != lit[i] {
1110 return i, newInvalidCharacterError(b[i:], "within literal "+lit+" (expecting "+strconv.QuoteRune(rune(lit[i]))+")")
1111 }
1112 }
1113 if len(b) < len(lit) {
1114 return len(b), io.ErrUnexpectedEOF
1115 }
1116 return len(lit), nil
1117 }
1118
1119
1120
1121
1122
1123 func consumeSimpleString(b []byte) (n int) {
1124
1125 if len(b) > 0 && b[0] == '"' {
1126 n++
1127 for len(b) > n && (' ' <= b[n] && b[n] != '\\' && b[n] != '"' && b[n] < utf8.RuneSelf) {
1128 n++
1129 }
1130 if len(b) > n && b[n] == '"' {
1131 n++
1132 return n
1133 }
1134 }
1135 return 0
1136 }
1137
1138
1139
1140
1141
1142
1143 func consumeString(flags *valueFlags, b []byte, validateUTF8 bool) (n int, err error) {
1144 return consumeStringResumable(flags, b, 0, validateUTF8)
1145 }
1146
1147
1148
1149 func consumeStringResumable(flags *valueFlags, b []byte, resumeOffset int, validateUTF8 bool) (n int, err error) {
1150
1151 switch {
1152 case resumeOffset > 0:
1153 n = resumeOffset
1154 case uint(len(b)) == 0:
1155 return n, io.ErrUnexpectedEOF
1156 case b[0] == '"':
1157 n++
1158 default:
1159 return n, newInvalidCharacterError(b[n:], `at start of string (expecting '"')`)
1160 }
1161
1162
1163 for uint(len(b)) > uint(n) {
1164
1165 noEscape := func(c byte) bool {
1166 return c < utf8.RuneSelf && ' ' <= c && c != '\\' && c != '"'
1167 }
1168 for uint(len(b)) > uint(n) && noEscape(b[n]) {
1169 n++
1170 }
1171 if uint(len(b)) <= uint(n) {
1172 return n, io.ErrUnexpectedEOF
1173 }
1174
1175
1176 if b[n] == '"' {
1177 n++
1178 return n, nil
1179 }
1180
1181 switch r, rn := utf8.DecodeRune(b[n:]); {
1182
1183
1184
1185 case rn > 1:
1186 n += rn
1187
1188 case r == '\\':
1189 flags.set(stringNonVerbatim)
1190 resumeOffset = n
1191 if uint(len(b)) < uint(n+2) {
1192 return resumeOffset, io.ErrUnexpectedEOF
1193 }
1194 switch r := b[n+1]; r {
1195 case '/':
1196
1197
1198 flags.set(stringNonCanonical)
1199 n += 2
1200 case '"', '\\', 'b', 'f', 'n', 'r', 't':
1201 n += 2
1202 case 'u':
1203 if uint(len(b)) < uint(n+6) {
1204 if !hasEscapeSequencePrefix(b[n:]) {
1205 flags.set(stringNonCanonical)
1206 return n, &SyntacticError{str: "invalid escape sequence " + strconv.Quote(string(b[n:])) + " within string"}
1207 }
1208 return resumeOffset, io.ErrUnexpectedEOF
1209 }
1210 v1, ok := parseHexUint16(b[n+2 : n+6])
1211 if !ok {
1212 flags.set(stringNonCanonical)
1213 return n, &SyntacticError{str: "invalid escape sequence " + strconv.Quote(string(b[n:n+6])) + " within string"}
1214 }
1215
1216
1217 switch v1 {
1218
1219 case '\b', '\f', '\n', '\r', '\t':
1220 flags.set(stringNonCanonical)
1221 default:
1222
1223 if v1 >= ' ' {
1224 flags.set(stringNonCanonical)
1225 } else {
1226
1227 for _, c := range b[n+2 : n+6] {
1228 if 'A' <= c && c <= 'F' {
1229 flags.set(stringNonCanonical)
1230 }
1231 }
1232 }
1233 }
1234 n += 6
1235
1236 if validateUTF8 && utf16.IsSurrogate(rune(v1)) {
1237 if uint(len(b)) >= uint(n+2) && (b[n] != '\\' || b[n+1] != 'u') {
1238 return n, &SyntacticError{str: "invalid unpaired surrogate half within string"}
1239 }
1240 if uint(len(b)) < uint(n+6) {
1241 if !hasEscapeSequencePrefix(b[n:]) {
1242 flags.set(stringNonCanonical)
1243 return n, &SyntacticError{str: "invalid escape sequence " + strconv.Quote(string(b[n:])) + " within string"}
1244 }
1245 return resumeOffset, io.ErrUnexpectedEOF
1246 }
1247 v2, ok := parseHexUint16(b[n+2 : n+6])
1248 if !ok {
1249 return n, &SyntacticError{str: "invalid escape sequence " + strconv.Quote(string(b[n:n+6])) + " within string"}
1250 }
1251 if utf16.DecodeRune(rune(v1), rune(v2)) == utf8.RuneError {
1252 return n, &SyntacticError{str: "invalid surrogate pair in string"}
1253 }
1254 n += 6
1255 }
1256 default:
1257 flags.set(stringNonCanonical)
1258 return n, &SyntacticError{str: "invalid escape sequence " + strconv.Quote(string(b[n:n+2])) + " within string"}
1259 }
1260
1261 case r == utf8.RuneError:
1262 if !utf8.FullRune(b[n:]) {
1263 return n, io.ErrUnexpectedEOF
1264 }
1265 flags.set(stringNonVerbatim | stringNonCanonical)
1266 if validateUTF8 {
1267 return n, &SyntacticError{str: "invalid UTF-8 within string"}
1268 }
1269 n++
1270
1271 case r < ' ':
1272 flags.set(stringNonVerbatim | stringNonCanonical)
1273 return n, newInvalidCharacterError(b[n:], "within string (expecting non-control character)")
1274 default:
1275 panic("BUG: unhandled character " + quoteRune(b[n:]))
1276 }
1277 }
1278 return n, io.ErrUnexpectedEOF
1279 }
1280
1281
1282
1283 func hasEscapeSequencePrefix(b []byte) bool {
1284 for i, c := range b {
1285 switch {
1286 case i == 0 && c != '\\':
1287 return false
1288 case i == 1 && c != 'u':
1289 return false
1290 case i >= 2 && i < 6 && !('0' <= c && c <= '9') && !('a' <= c && c <= 'f') && !('A' <= c && c <= 'F'):
1291 return false
1292 }
1293 }
1294 return true
1295 }
1296
1297
1298
1299
1300 func unescapeString(dst, src []byte) (v []byte, ok bool) {
1301
1302 if uint(len(src)) == 0 || src[0] != '"' {
1303 return dst, false
1304 }
1305 i, n := 1, 1
1306
1307
1308 for uint(len(src)) > uint(n) {
1309
1310 noEscape := func(c byte) bool {
1311 return c < utf8.RuneSelf && ' ' <= c && c != '\\' && c != '"'
1312 }
1313 for uint(len(src)) > uint(n) && noEscape(src[n]) {
1314 n++
1315 }
1316 if uint(len(src)) <= uint(n) {
1317 break
1318 }
1319
1320
1321 if src[n] == '"' {
1322 dst = append(dst, src[i:n]...)
1323 n++
1324 return dst, len(src) == n
1325 }
1326
1327 switch r, rn := utf8.DecodeRune(src[n:]); {
1328
1329
1330
1331 case rn > 1:
1332 n += rn
1333
1334 case r == '\\':
1335 dst = append(dst, src[i:n]...)
1336 if r < ' ' {
1337 return dst, false
1338 }
1339
1340
1341 if uint(len(src)) < uint(n+2) {
1342 return dst, false
1343 }
1344 switch r := src[n+1]; r {
1345 case '"', '\\', '/':
1346 dst = append(dst, r)
1347 n += 2
1348 case 'b':
1349 dst = append(dst, '\b')
1350 n += 2
1351 case 'f':
1352 dst = append(dst, '\f')
1353 n += 2
1354 case 'n':
1355 dst = append(dst, '\n')
1356 n += 2
1357 case 'r':
1358 dst = append(dst, '\r')
1359 n += 2
1360 case 't':
1361 dst = append(dst, '\t')
1362 n += 2
1363 case 'u':
1364 if uint(len(src)) < uint(n+6) {
1365 return dst, false
1366 }
1367 v1, ok := parseHexUint16(src[n+2 : n+6])
1368 if !ok {
1369 return dst, false
1370 }
1371 n += 6
1372
1373
1374 r := rune(v1)
1375 if utf16.IsSurrogate(r) {
1376 r = utf8.RuneError
1377 if uint(len(src)) >= uint(n+6) && src[n+0] == '\\' && src[n+1] == 'u' {
1378 if v2, ok := parseHexUint16(src[n+2 : n+6]); ok {
1379 if r = utf16.DecodeRune(rune(v1), rune(v2)); r != utf8.RuneError {
1380 n += 6
1381 }
1382 }
1383 }
1384 }
1385
1386 dst = utf8.AppendRune(dst, r)
1387 default:
1388 return dst, false
1389 }
1390 i = n
1391
1392 case r == utf8.RuneError:
1393
1394
1395 dst = append(dst, src[i:n]...)
1396 dst = append(dst, "\uFFFD"...)
1397 n += rn
1398 i = n
1399
1400 case r < ' ':
1401 dst = append(dst, src[i:n]...)
1402 return dst, false
1403 default:
1404 panic("BUG: unhandled character " + quoteRune(src[n:]))
1405 }
1406 }
1407 dst = append(dst, src[i:n]...)
1408 return dst, false
1409 }
1410
1411
1412
1413
1414
1415 func unescapeStringMayCopy(b []byte, isVerbatim bool) []byte {
1416
1417 if isVerbatim {
1418 return b[len(`"`) : len(b)-len(`"`)]
1419 }
1420 b, _ = unescapeString(make([]byte, 0, len(b)), b)
1421 return b
1422 }
1423
1424
1425
1426
1427
1428 func consumeSimpleNumber(b []byte) (n int) {
1429
1430 if len(b) > 0 {
1431 if b[0] == '0' {
1432 n++
1433 } else if '1' <= b[0] && b[0] <= '9' {
1434 n++
1435 for len(b) > n && ('0' <= b[n] && b[n] <= '9') {
1436 n++
1437 }
1438 } else {
1439 return 0
1440 }
1441 if len(b) == n || !(b[n] == '.' || b[n] == 'e' || b[n] == 'E') {
1442 return n
1443 }
1444 }
1445 return 0
1446 }
1447
1448 type consumeNumberState uint
1449
1450 const (
1451 consumeNumberInit consumeNumberState = iota
1452 beforeIntegerDigits
1453 withinIntegerDigits
1454 beforeFractionalDigits
1455 withinFractionalDigits
1456 beforeExponentDigits
1457 withinExponentDigits
1458 )
1459
1460
1461
1462
1463
1464
1465
1466
1467 func consumeNumber(b []byte) (n int, err error) {
1468 n, _, err = consumeNumberResumable(b, 0, consumeNumberInit)
1469 return n, err
1470 }
1471
1472
1473
1474 func consumeNumberResumable(b []byte, resumeOffset int, state consumeNumberState) (n int, _ consumeNumberState, err error) {
1475
1476 n = resumeOffset
1477 if state > consumeNumberInit {
1478 switch state {
1479 case withinIntegerDigits, withinFractionalDigits, withinExponentDigits:
1480
1481 for len(b) > n && ('0' <= b[n] && b[n] <= '9') {
1482 n++
1483 }
1484 if len(b) == n {
1485 return n, state, nil
1486 }
1487 state++
1488 }
1489 switch state {
1490 case beforeIntegerDigits:
1491 goto beforeInteger
1492 case beforeFractionalDigits:
1493 goto beforeFractional
1494 case beforeExponentDigits:
1495 goto beforeExponent
1496 default:
1497 return n, state, nil
1498 }
1499 }
1500
1501
1502 beforeInteger:
1503 resumeOffset = n
1504 if len(b) > 0 && b[0] == '-' {
1505 n++
1506 }
1507 switch {
1508 case len(b) == n:
1509 return resumeOffset, beforeIntegerDigits, io.ErrUnexpectedEOF
1510 case b[n] == '0':
1511 n++
1512 state = beforeFractionalDigits
1513 case '1' <= b[n] && b[n] <= '9':
1514 n++
1515 for len(b) > n && ('0' <= b[n] && b[n] <= '9') {
1516 n++
1517 }
1518 state = withinIntegerDigits
1519 default:
1520 return n, state, newInvalidCharacterError(b[n:], "within number (expecting digit)")
1521 }
1522
1523
1524 beforeFractional:
1525 if len(b) > n && b[n] == '.' {
1526 resumeOffset = n
1527 n++
1528 switch {
1529 case len(b) == n:
1530 return resumeOffset, beforeFractionalDigits, io.ErrUnexpectedEOF
1531 case '0' <= b[n] && b[n] <= '9':
1532 n++
1533 default:
1534 return n, state, newInvalidCharacterError(b[n:], "within number (expecting digit)")
1535 }
1536 for len(b) > n && ('0' <= b[n] && b[n] <= '9') {
1537 n++
1538 }
1539 state = withinFractionalDigits
1540 }
1541
1542
1543 beforeExponent:
1544 if len(b) > n && (b[n] == 'e' || b[n] == 'E') {
1545 resumeOffset = n
1546 n++
1547 if len(b) > n && (b[n] == '-' || b[n] == '+') {
1548 n++
1549 }
1550 switch {
1551 case len(b) == n:
1552 return resumeOffset, beforeExponentDigits, io.ErrUnexpectedEOF
1553 case '0' <= b[n] && b[n] <= '9':
1554 n++
1555 default:
1556 return n, state, newInvalidCharacterError(b[n:], "within number (expecting digit)")
1557 }
1558 for len(b) > n && ('0' <= b[n] && b[n] <= '9') {
1559 n++
1560 }
1561 state = withinExponentDigits
1562 }
1563
1564 return n, state, nil
1565 }
1566
1567
1568
1569
1570 func parseHexUint16(b []byte) (v uint16, ok bool) {
1571 if len(b) != 4 {
1572 return 0, false
1573 }
1574 for _, c := range b[:4] {
1575 switch {
1576 case '0' <= c && c <= '9':
1577 c = c - '0'
1578 case 'a' <= c && c <= 'f':
1579 c = 10 + c - 'a'
1580 case 'A' <= c && c <= 'F':
1581 c = 10 + c - 'A'
1582 default:
1583 return 0, false
1584 }
1585 v = v*16 + uint16(c)
1586 }
1587 return v, true
1588 }
1589
1590
1591
1592
1593
1594
1595 func parseDecUint(b []byte) (v uint64, ok bool) {
1596
1597
1598
1599
1600 var n int
1601 var overflow bool
1602 for len(b) > n && ('0' <= b[n] && b[n] <= '9') {
1603 overflow = overflow || v >= math.MaxUint64/10+1
1604 v *= 10
1605
1606 v1 := v + uint64(b[n]-'0')
1607 overflow = overflow || v1 < v
1608 v = v1
1609
1610 n++
1611 }
1612 if n == 0 || len(b) != n {
1613 return 0, false
1614 }
1615 if overflow {
1616 return math.MaxUint64, false
1617 }
1618 return v, true
1619 }
1620
1621
1622
1623
1624
1625
1626
1627 func parseFloat(b []byte, bits int) (v float64, ok bool) {
1628
1629
1630 var negLen int
1631 if len(b) > 0 && b[0] == '-' {
1632 negLen = 1
1633 }
1634 u, ok := parseDecUint(b[negLen:])
1635 if ok && ((bits == 32 && u <= 1<<24) || (bits == 64 && u <= 1<<53)) {
1636 return math.Copysign(float64(u), float64(-1*negLen)), true
1637 }
1638
1639
1640
1641 fv, err := strconv.ParseFloat(string(b), bits)
1642 if math.IsInf(fv, 0) {
1643 switch {
1644 case bits == 32 && math.IsInf(fv, +1):
1645 return +math.MaxFloat32, true
1646 case bits == 64 && math.IsInf(fv, +1):
1647 return +math.MaxFloat64, true
1648 case bits == 32 && math.IsInf(fv, -1):
1649 return -math.MaxFloat32, true
1650 case bits == 64 && math.IsInf(fv, -1):
1651 return -math.MaxFloat64, true
1652 }
1653 }
1654 return fv, err == nil
1655 }
1656
View as plain text