1
2
3
4 package jreader
5
6
7
8
9
10
11
12 import (
13 "bytes"
14 "io"
15 "strconv"
16 "unicode"
17 "unicode/utf8"
18 )
19
20 var (
21 tokenNull = []byte("null")
22 tokenTrue = []byte("true")
23 tokenFalse = []byte("false")
24 )
25
26 type token struct {
27 kind tokenKind
28 boolValue bool
29 numberValue float64
30 stringValue []byte
31 delimiter byte
32 }
33
34 type tokenKind int
35
36 const (
37 nullToken tokenKind = iota
38 boolToken tokenKind = iota
39 numberToken tokenKind = iota
40 stringToken tokenKind = iota
41 delimiterToken tokenKind = iota
42 )
43
44 func (t token) valueKind() ValueKind {
45 if t.kind == delimiterToken {
46 if t.delimiter == '[' {
47 return ArrayValue
48 }
49 if t.delimiter == '{' {
50 return ObjectValue
51 }
52 }
53 return valueKindFromTokenKind(t.kind)
54 }
55
56 func (t token) description() string {
57 if t.kind == delimiterToken && t.delimiter != '[' && t.delimiter != '{' {
58 return "'" + string(t.delimiter) + "'"
59 }
60 return t.valueKind().String()
61 }
62
63 type tokenReader struct {
64 data []byte
65 pos int
66 len int
67 hasUnread bool
68 unreadToken token
69 lastPos int
70 }
71
72 func newTokenReader(data []byte) tokenReader {
73 tr := tokenReader{
74 data: data,
75 pos: 0,
76 len: len(data),
77 }
78 return tr
79 }
80
81
82 func (r *tokenReader) EOF() bool {
83 if r.hasUnread {
84 return false
85 }
86 _, ok := r.skipWhitespaceAndReadByte()
87 if !ok {
88 return true
89 }
90 r.unreadByte()
91 return false
92 }
93
94
95 func (r *tokenReader) LastPos() int {
96 return r.lastPos
97 }
98
99 func (r *tokenReader) getPos() int {
100 if r.hasUnread {
101 return r.lastPos
102 }
103 return r.pos
104 }
105
106
107
108
109
110
111 func (r *tokenReader) Null() (bool, error) {
112 t, err := r.next()
113 if err != nil {
114 return false, err
115 }
116 if t.kind == nullToken {
117 return true, nil
118 }
119 r.putBack(t)
120 if t.kind == delimiterToken && t.delimiter != '[' && t.delimiter != '{' {
121 return false, SyntaxError{Message: errMsgUnexpectedChar, Value: string(t.delimiter), Offset: r.getPos()}
122 }
123 return false, nil
124 }
125
126
127
128
129
130 func (r *tokenReader) Bool() (bool, error) {
131 t, err := r.consumeScalar(boolToken)
132 return t.boolValue, err
133 }
134
135
136
137
138
139 func (r *tokenReader) Number() (float64, error) {
140 t, err := r.consumeScalar(numberToken)
141 return t.numberValue, err
142 }
143
144
145
146
147
148 func (r *tokenReader) String() (string, error) {
149 t, err := r.consumeScalar(stringToken)
150 return string(t.stringValue), err
151 }
152
153
154
155
156
157
158
159
160
161 func (r *tokenReader) PropertyName() ([]byte, error) {
162 t, err := r.consumeScalar(stringToken)
163 if err != nil {
164 return nil, err
165 }
166 b, ok := r.skipWhitespaceAndReadByte()
167 if !ok {
168 return nil, io.EOF
169 }
170 if b != ':' {
171 r.unreadByte()
172 return nil, r.syntaxErrorOnNextToken(errMsgExpectedColon)
173 }
174 return t.stringValue, nil
175 }
176
177
178
179
180
181
182 func (r *tokenReader) Delimiter(delimiter byte) (bool, error) {
183 if r.hasUnread {
184 if r.unreadToken.kind == delimiterToken && r.unreadToken.delimiter == delimiter {
185 r.hasUnread = false
186 return true, nil
187 }
188 return false, nil
189 }
190 b, ok := r.skipWhitespaceAndReadByte()
191 if !ok {
192 return false, nil
193 }
194 if b == delimiter {
195 return true, nil
196 }
197 r.unreadByte()
198 token, err := r.next()
199 if err != nil {
200 return false, err
201 }
202 r.putBack(token)
203 return false, nil
204 }
205
206
207
208
209
210 func (r *tokenReader) EndDelimiterOrComma(delimiter byte) (bool, error) {
211 if r.hasUnread {
212 if r.unreadToken.kind == delimiterToken &&
213 (r.unreadToken.delimiter == delimiter || r.unreadToken.delimiter == ',') {
214 r.hasUnread = false
215 return r.unreadToken.delimiter == delimiter, nil
216 }
217 return false, SyntaxError{Message: badArrayOrObjectItemMessage(delimiter == '}'),
218 Value: r.unreadToken.description(), Offset: r.lastPos}
219 }
220 b, ok := r.skipWhitespaceAndReadByte()
221 if !ok {
222 return false, io.EOF
223 }
224 if b == delimiter || b == ',' {
225 return b == delimiter, nil
226 }
227 r.unreadByte()
228 t, err := r.next()
229 if err != nil {
230 return false, err
231 }
232 return false, SyntaxError{Message: badArrayOrObjectItemMessage(delimiter == '}'),
233 Value: t.description(), Offset: r.lastPos}
234 }
235
236 func badArrayOrObjectItemMessage(isObject bool) string {
237 if isObject {
238 return errMsgBadObjectItem
239 }
240 return errMsgBadArrayItem
241 }
242
243
244
245
246
247 func (r *tokenReader) Any() (AnyValue, error) {
248 t, err := r.next()
249 if err != nil {
250 return AnyValue{}, err
251 }
252 switch t.kind {
253 case boolToken:
254 return AnyValue{Kind: BoolValue, Bool: t.boolValue}, nil
255 case numberToken:
256 return AnyValue{Kind: NumberValue, Number: t.numberValue}, nil
257 case stringToken:
258 return AnyValue{Kind: StringValue, String: string(t.stringValue)}, nil
259 case delimiterToken:
260 if t.delimiter == '[' {
261 return AnyValue{Kind: ArrayValue}, nil
262 }
263 if t.delimiter == '{' {
264 return AnyValue{Kind: ObjectValue}, nil
265 }
266 return AnyValue{},
267 SyntaxError{Message: errMsgUnexpectedChar, Value: string(t.delimiter), Offset: r.lastPos}
268 default:
269 return AnyValue{Kind: NullValue}, nil
270 }
271 }
272
273
274
275
276 func (r *tokenReader) next() (token, error) {
277 if r.hasUnread {
278 r.hasUnread = false
279 return r.unreadToken, nil
280 }
281 b, ok := r.skipWhitespaceAndReadByte()
282 if !ok {
283 return token{}, io.EOF
284 }
285
286 switch {
287
288
289 case b >= 'a' && b <= 'z':
290 n := r.consumeASCIILowercaseAlphabeticChars() + 1
291 id := r.data[r.lastPos : r.lastPos+n]
292 if b == 'f' && bytes.Equal(id, tokenFalse) {
293 return token{kind: boolToken, boolValue: false}, nil
294 }
295 if b == 't' && bytes.Equal(id, tokenTrue) {
296 return token{kind: boolToken, boolValue: true}, nil
297 }
298 if b == 'n' && bytes.Equal(id, tokenNull) {
299 return token{kind: nullToken}, nil
300 }
301 return token{}, SyntaxError{Message: errMsgUnexpectedSymbol, Value: string(id), Offset: r.lastPos}
302 case (b >= '0' && b <= '9') || b == '-':
303 if n, ok := r.readNumber(b); ok {
304 return token{kind: numberToken, numberValue: n}, nil
305 }
306 return token{}, SyntaxError{Message: errMsgInvalidNumber, Offset: r.lastPos}
307 case b == '"':
308 s, err := r.readString()
309 if err != nil {
310 return token{}, err
311 }
312 return token{kind: stringToken, stringValue: s}, nil
313 case b == '[', b == ']', b == '{', b == '}', b == ':', b == ',':
314 return token{kind: delimiterToken, delimiter: b}, nil
315 }
316
317 return token{}, SyntaxError{Message: errMsgUnexpectedChar, Value: string(b), Offset: r.lastPos}
318 }
319
320 func (r *tokenReader) putBack(token token) {
321 r.unreadToken = token
322 r.hasUnread = true
323 }
324
325 func (r *tokenReader) consumeScalar(kind tokenKind) (token, error) {
326 t, err := r.next()
327 if err != nil {
328 return token{}, err
329 }
330 if t.kind == kind {
331 return t, nil
332 }
333 if t.kind == delimiterToken && t.delimiter != '[' && t.delimiter != '{' {
334 return token{}, SyntaxError{Message: errMsgUnexpectedChar, Value: string(t.delimiter), Offset: r.LastPos()}
335 }
336 return token{}, TypeError{Expected: valueKindFromTokenKind(kind),
337 Actual: t.valueKind(), Offset: r.LastPos()}
338 }
339
340 func (r *tokenReader) readByte() (byte, bool) {
341 if r.pos >= r.len {
342 return 0, false
343 }
344 b := r.data[r.pos]
345 r.pos++
346 return b, true
347 }
348
349 func (r *tokenReader) unreadByte() {
350 r.pos--
351 }
352
353 func (r *tokenReader) skipWhitespaceAndReadByte() (byte, bool) {
354 for {
355 ch, ok := r.readByte()
356 if !ok {
357 return 0, false
358 }
359 if !unicode.IsSpace(rune(ch)) {
360 r.lastPos = r.pos - 1
361 return ch, true
362 }
363 }
364 }
365
366 func (r *tokenReader) consumeASCIILowercaseAlphabeticChars() int {
367 n := 0
368 for {
369 ch, ok := r.readByte()
370 if !ok {
371 break
372 }
373 if ch < 'a' || ch > 'z' {
374 r.unreadByte()
375 break
376 }
377 n++
378 }
379 return n
380 }
381
382 func (r *tokenReader) readNumber(first byte) (float64, bool) {
383 startPos := r.lastPos
384 isFloat := false
385 var ch byte
386 var ok bool
387 for {
388 ch, ok = r.readByte()
389 if !ok {
390 break
391 }
392 if (ch < '0' || ch > '9') && !(ch == '.' && !isFloat) {
393 break
394 }
395 if ch == '.' {
396 isFloat = true
397 }
398 }
399 hasExponent := false
400 if ch == 'e' || ch == 'E' {
401
402 ch, ok = r.readByte()
403 if !ok {
404 return 0, false
405 }
406 if ch == '+' || ch == '-' {
407 } else if ch >= '0' && ch <= '9' {
408 r.unreadByte()
409 } else {
410 return 0, false
411 }
412 for {
413 ch, ok = r.readByte()
414 if !ok {
415 break
416 }
417 if ch < '0' || ch > '9' {
418 r.unreadByte()
419 break
420 }
421 hasExponent = true
422 }
423 if !hasExponent {
424 return 0, false
425 }
426 isFloat = true
427 } else {
428 if ok {
429 r.unreadByte()
430 }
431 }
432 chars := r.data[startPos:r.pos]
433 if isFloat {
434
435
436
437 n, err := strconv.ParseFloat(string(chars), 64)
438 return n, err == nil
439 } else {
440 n, ok := parseIntFromBytes(chars)
441 return float64(n), ok
442 }
443 }
444
445 func (r *tokenReader) readString() ([]byte, error) {
446 startPos := r.pos
447 var chars []byte
448 haveEscaped := false
449 var reader bytes.Reader
450 reader.Reset(r.data)
451 _, _ = reader.Seek(int64(r.pos), io.SeekStart)
452
453 for {
454 ch, _, err := reader.ReadRune()
455 if err != nil {
456 return nil, r.syntaxErrorOnLastToken(errMsgInvalidString)
457 }
458 if ch == '"' {
459 break
460 }
461 if ch != '\\' {
462 if haveEscaped {
463 chars = appendRune(chars, ch)
464 }
465 continue
466 }
467 if !haveEscaped {
468 pos := (r.len - reader.Len()) - 1
469 chars = make([]byte, pos-startPos, pos-startPos+20)
470 if pos > startPos {
471 copy(chars, r.data[startPos:pos])
472 }
473 haveEscaped = true
474 }
475 ch, _, err = reader.ReadRune()
476 if err != nil {
477 return nil, r.syntaxErrorOnLastToken(errMsgInvalidString)
478 }
479 switch ch {
480 case '"', '\\', '/':
481 chars = appendRune(chars, ch)
482 case 'b':
483 chars = appendRune(chars, '\b')
484 case 'f':
485 chars = appendRune(chars, '\f')
486 case 'n':
487 chars = appendRune(chars, '\n')
488 case 'r':
489 chars = appendRune(chars, '\r')
490 case 't':
491 chars = appendRune(chars, '\t')
492 case 'u':
493 if ch, ok := readHexChar(&reader); ok {
494 chars = appendRune(chars, ch)
495 } else {
496 return nil, r.syntaxErrorOnLastToken(errMsgInvalidString)
497 }
498 default:
499 return nil, r.syntaxErrorOnLastToken(errMsgInvalidString)
500 }
501 }
502 r.pos = r.len - reader.Len()
503 if haveEscaped {
504 if len(chars) == 0 {
505 return nil, nil
506 }
507 return chars, nil
508 } else {
509 pos := r.pos - 1
510 if pos <= startPos {
511 return nil, nil
512 }
513 return r.data[startPos:pos], nil
514 }
515 }
516
517 func readHexChar(reader *bytes.Reader) (rune, bool) {
518 var digits [4]byte
519 for i := 0; i < 4; i++ {
520 ch, err := reader.ReadByte()
521 if err != nil || !((ch >= '0' && ch <= '9') || (ch >= 'a' && ch <= 'f') || (ch >= 'A' && ch <= 'F')) {
522 return 0, false
523 }
524 digits[i] = ch
525 }
526 n, _ := strconv.ParseUint(string(digits[:]), 16, 32)
527 return rune(n), true
528 }
529
530 func (r *tokenReader) syntaxErrorOnLastToken(msg string) error {
531 return SyntaxError{Message: msg, Offset: r.LastPos()}
532 }
533
534 func (r *tokenReader) syntaxErrorOnNextToken(msg string) error {
535 t, err := r.next()
536 if err != nil {
537 return err
538 }
539 return SyntaxError{Message: msg, Value: t.description(), Offset: r.LastPos()}
540 }
541
542
543 func parseIntFromBytes(chars []byte) (int64, bool) {
544 negate := false
545 p := 0
546 var ret int64
547 if len(chars) == 0 {
548 return 0, false
549 }
550 if chars[0] == '-' {
551 negate = true
552 p++
553 if p == len(chars) {
554 return 0, false
555 }
556 }
557 for p < len(chars) {
558 ret = ret*10 + int64(chars[p]-'0')
559 p++
560 }
561 if negate {
562 ret = -ret
563 }
564 return ret, true
565 }
566
567 func appendRune(out []byte, ch rune) []byte {
568 var encodedRune [10]byte
569 n := utf8.EncodeRune(encodedRune[0:10], ch)
570 return append(out, encodedRune[0:n]...)
571 }
572
573 func valueKindFromTokenKind(k tokenKind) ValueKind {
574 switch k {
575 case nullToken:
576 return NullValue
577 case boolToken:
578 return BoolValue
579 case numberToken:
580 return NumberValue
581 case stringToken:
582 return StringValue
583 }
584 return -1
585 }
586
View as plain text