1
16
17 package jsonpath
18
19 import (
20 "errors"
21 "fmt"
22 "regexp"
23 "strconv"
24 "strings"
25 "unicode"
26 "unicode/utf8"
27 )
28
29 const eof = -1
30
31 const (
32 leftDelim = "{"
33 rightDelim = "}"
34 )
35
36 type Parser struct {
37 Name string
38 Root *ListNode
39 input string
40 pos int
41 start int
42 width int
43 }
44
45 var (
46 ErrSyntax = errors.New("invalid syntax")
47 dictKeyRex = regexp.MustCompile(`^'([^']*)'$`)
48 sliceOperatorRex = regexp.MustCompile(`^(-?[\d]*)(:-?[\d]*)?(:-?[\d]*)?$`)
49 )
50
51
52
53
54 func Parse(name, text string) (*Parser, error) {
55 p := NewParser(name)
56 err := p.Parse(text)
57 if err != nil {
58 p = nil
59 }
60 return p, err
61 }
62
63 func NewParser(name string) *Parser {
64 return &Parser{
65 Name: name,
66 }
67 }
68
69
70 func parseAction(name, text string) (*Parser, error) {
71 p, err := Parse(name, fmt.Sprintf("%s%s%s", leftDelim, text, rightDelim))
72
73 if err != nil {
74 return p, err
75 }
76 p.Root = p.Root.Nodes[0].(*ListNode)
77 return p, nil
78 }
79
80 func (p *Parser) Parse(text string) error {
81 p.input = text
82 p.Root = newList()
83 p.pos = 0
84 return p.parseText(p.Root)
85 }
86
87
88 func (p *Parser) consumeText() string {
89 value := p.input[p.start:p.pos]
90 p.start = p.pos
91 return value
92 }
93
94
95 func (p *Parser) next() rune {
96 if p.pos >= len(p.input) {
97 p.width = 0
98 return eof
99 }
100 r, w := utf8.DecodeRuneInString(p.input[p.pos:])
101 p.width = w
102 p.pos += p.width
103 return r
104 }
105
106
107 func (p *Parser) peek() rune {
108 r := p.next()
109 p.backup()
110 return r
111 }
112
113
114 func (p *Parser) backup() {
115 p.pos -= p.width
116 }
117
118 func (p *Parser) parseText(cur *ListNode) error {
119 for {
120 if strings.HasPrefix(p.input[p.pos:], leftDelim) {
121 if p.pos > p.start {
122 cur.append(newText(p.consumeText()))
123 }
124 return p.parseLeftDelim(cur)
125 }
126 if p.next() == eof {
127 break
128 }
129 }
130
131 if p.pos > p.start {
132 cur.append(newText(p.consumeText()))
133 }
134 return nil
135 }
136
137
138 func (p *Parser) parseLeftDelim(cur *ListNode) error {
139 p.pos += len(leftDelim)
140 p.consumeText()
141 newNode := newList()
142 cur.append(newNode)
143 cur = newNode
144 return p.parseInsideAction(cur)
145 }
146
147 func (p *Parser) parseInsideAction(cur *ListNode) error {
148 prefixMap := map[string]func(*ListNode) error{
149 rightDelim: p.parseRightDelim,
150 "[?(": p.parseFilter,
151 "..": p.parseRecursive,
152 }
153 for prefix, parseFunc := range prefixMap {
154 if strings.HasPrefix(p.input[p.pos:], prefix) {
155 return parseFunc(cur)
156 }
157 }
158
159 switch r := p.next(); {
160 case r == eof || isEndOfLine(r):
161 return fmt.Errorf("unclosed action")
162 case r == ' ':
163 p.consumeText()
164 case r == '@' || r == '$':
165 p.consumeText()
166 case r == '[':
167 return p.parseArray(cur)
168 case r == '"' || r == '\'':
169 return p.parseQuote(cur, r)
170 case r == '.':
171 return p.parseField(cur)
172 case r == '+' || r == '-' || unicode.IsDigit(r):
173 p.backup()
174 return p.parseNumber(cur)
175 case isAlphaNumeric(r):
176 p.backup()
177 return p.parseIdentifier(cur)
178 default:
179 return fmt.Errorf("unrecognized character in action: %#U", r)
180 }
181 return p.parseInsideAction(cur)
182 }
183
184
185 func (p *Parser) parseRightDelim(cur *ListNode) error {
186 p.pos += len(rightDelim)
187 p.consumeText()
188 return p.parseText(p.Root)
189 }
190
191
192 func (p *Parser) parseIdentifier(cur *ListNode) error {
193 var r rune
194 for {
195 r = p.next()
196 if isTerminator(r) {
197 p.backup()
198 break
199 }
200 }
201 value := p.consumeText()
202
203 if isBool(value) {
204 v, err := strconv.ParseBool(value)
205 if err != nil {
206 return fmt.Errorf("can not parse bool '%s': %s", value, err.Error())
207 }
208
209 cur.append(newBool(v))
210 } else {
211 cur.append(newIdentifier(value))
212 }
213
214 return p.parseInsideAction(cur)
215 }
216
217
218 func (p *Parser) parseRecursive(cur *ListNode) error {
219 if lastIndex := len(cur.Nodes) - 1; lastIndex >= 0 && cur.Nodes[lastIndex].Type() == NodeRecursive {
220 return fmt.Errorf("invalid multiple recursive descent")
221 }
222 p.pos += len("..")
223 p.consumeText()
224 cur.append(newRecursive())
225 if r := p.peek(); isAlphaNumeric(r) {
226 return p.parseField(cur)
227 }
228 return p.parseInsideAction(cur)
229 }
230
231
232 func (p *Parser) parseNumber(cur *ListNode) error {
233 r := p.peek()
234 if r == '+' || r == '-' {
235 p.next()
236 }
237 for {
238 r = p.next()
239 if r != '.' && !unicode.IsDigit(r) {
240 p.backup()
241 break
242 }
243 }
244 value := p.consumeText()
245 i, err := strconv.Atoi(value)
246 if err == nil {
247 cur.append(newInt(i))
248 return p.parseInsideAction(cur)
249 }
250 d, err := strconv.ParseFloat(value, 64)
251 if err == nil {
252 cur.append(newFloat(d))
253 return p.parseInsideAction(cur)
254 }
255 return fmt.Errorf("cannot parse number %s", value)
256 }
257
258
259 func (p *Parser) parseArray(cur *ListNode) error {
260 Loop:
261 for {
262 switch p.next() {
263 case eof, '\n':
264 return fmt.Errorf("unterminated array")
265 case ']':
266 break Loop
267 }
268 }
269 text := p.consumeText()
270 text = text[1 : len(text)-1]
271 if text == "*" {
272 text = ":"
273 }
274
275
276 strs := strings.Split(text, ",")
277 if len(strs) > 1 {
278 union := []*ListNode{}
279 for _, str := range strs {
280 parser, err := parseAction("union", fmt.Sprintf("[%s]", strings.Trim(str, " ")))
281 if err != nil {
282 return err
283 }
284 union = append(union, parser.Root)
285 }
286 cur.append(newUnion(union))
287 return p.parseInsideAction(cur)
288 }
289
290
291 value := dictKeyRex.FindStringSubmatch(text)
292 if value != nil {
293 parser, err := parseAction("arraydict", fmt.Sprintf(".%s", value[1]))
294 if err != nil {
295 return err
296 }
297 for _, node := range parser.Root.Nodes {
298 cur.append(node)
299 }
300 return p.parseInsideAction(cur)
301 }
302
303
304 value = sliceOperatorRex.FindStringSubmatch(text)
305 if value == nil {
306 return fmt.Errorf("invalid array index %s", text)
307 }
308 value = value[1:]
309 params := [3]ParamsEntry{}
310 for i := 0; i < 3; i++ {
311 if value[i] != "" {
312 if i > 0 {
313 value[i] = value[i][1:]
314 }
315 if i > 0 && value[i] == "" {
316 params[i].Known = false
317 } else {
318 var err error
319 params[i].Known = true
320 params[i].Value, err = strconv.Atoi(value[i])
321 if err != nil {
322 return fmt.Errorf("array index %s is not a number", value[i])
323 }
324 }
325 } else {
326 if i == 1 {
327 params[i].Known = true
328 params[i].Value = params[0].Value + 1
329 params[i].Derived = true
330 } else {
331 params[i].Known = false
332 params[i].Value = 0
333 }
334 }
335 }
336 cur.append(newArray(params))
337 return p.parseInsideAction(cur)
338 }
339
340
341 func (p *Parser) parseFilter(cur *ListNode) error {
342 p.pos += len("[?(")
343 p.consumeText()
344 begin := false
345 end := false
346 var pair rune
347
348 Loop:
349 for {
350 r := p.next()
351 switch r {
352 case eof, '\n':
353 return fmt.Errorf("unterminated filter")
354 case '"', '\'':
355 if begin == false {
356
357 begin = true
358 pair = r
359 continue
360 }
361
362 if p.input[p.pos-2] != '\\' && r == pair {
363 end = true
364 }
365 case ')':
366
367
368 if begin == end {
369 break Loop
370 }
371 }
372 }
373 if p.next() != ']' {
374 return fmt.Errorf("unclosed array expect ]")
375 }
376 reg := regexp.MustCompile(`^([^!<>=]+)([!<>=]+)(.+?)$`)
377 text := p.consumeText()
378 text = text[:len(text)-2]
379 value := reg.FindStringSubmatch(text)
380 if value == nil {
381 parser, err := parseAction("text", text)
382 if err != nil {
383 return err
384 }
385 cur.append(newFilter(parser.Root, newList(), "exists"))
386 } else {
387 leftParser, err := parseAction("left", value[1])
388 if err != nil {
389 return err
390 }
391 rightParser, err := parseAction("right", value[3])
392 if err != nil {
393 return err
394 }
395 cur.append(newFilter(leftParser.Root, rightParser.Root, value[2]))
396 }
397 return p.parseInsideAction(cur)
398 }
399
400
401 func (p *Parser) parseQuote(cur *ListNode, end rune) error {
402 Loop:
403 for {
404 switch p.next() {
405 case eof, '\n':
406 return fmt.Errorf("unterminated quoted string")
407 case end:
408
409 if p.input[p.pos-2] != '\\' {
410 break Loop
411 }
412 }
413 }
414 value := p.consumeText()
415 s, err := UnquoteExtend(value)
416 if err != nil {
417 return fmt.Errorf("unquote string %s error %v", value, err)
418 }
419 cur.append(newText(s))
420 return p.parseInsideAction(cur)
421 }
422
423
424 func (p *Parser) parseField(cur *ListNode) error {
425 p.consumeText()
426 for p.advance() {
427 }
428 value := p.consumeText()
429 if value == "*" {
430 cur.append(newWildcard())
431 } else {
432 cur.append(newField(strings.Replace(value, "\\", "", -1)))
433 }
434 return p.parseInsideAction(cur)
435 }
436
437
438 func (p *Parser) advance() bool {
439 r := p.next()
440 if r == '\\' {
441 p.next()
442 } else if isTerminator(r) {
443 p.backup()
444 return false
445 }
446 return true
447 }
448
449
450 func isTerminator(r rune) bool {
451 if isSpace(r) || isEndOfLine(r) {
452 return true
453 }
454 switch r {
455 case eof, '.', ',', '[', ']', '$', '@', '{', '}':
456 return true
457 }
458 return false
459 }
460
461
462 func isSpace(r rune) bool {
463 return r == ' ' || r == '\t'
464 }
465
466
467 func isEndOfLine(r rune) bool {
468 return r == '\r' || r == '\n'
469 }
470
471
472 func isAlphaNumeric(r rune) bool {
473 return r == '_' || unicode.IsLetter(r) || unicode.IsDigit(r)
474 }
475
476
477 func isBool(s string) bool {
478 return s == "true" || s == "false"
479 }
480
481
482 func UnquoteExtend(s string) (string, error) {
483 n := len(s)
484 if n < 2 {
485 return "", ErrSyntax
486 }
487 quote := s[0]
488 if quote != s[n-1] {
489 return "", ErrSyntax
490 }
491 s = s[1 : n-1]
492
493 if quote != '"' && quote != '\'' {
494 return "", ErrSyntax
495 }
496
497
498 if !contains(s, '\\') && !contains(s, quote) {
499 return s, nil
500 }
501
502 var runeTmp [utf8.UTFMax]byte
503 buf := make([]byte, 0, 3*len(s)/2)
504 for len(s) > 0 {
505 c, multibyte, ss, err := strconv.UnquoteChar(s, quote)
506 if err != nil {
507 return "", err
508 }
509 s = ss
510 if c < utf8.RuneSelf || !multibyte {
511 buf = append(buf, byte(c))
512 } else {
513 n := utf8.EncodeRune(runeTmp[:], c)
514 buf = append(buf, runeTmp[:n]...)
515 }
516 }
517 return string(buf), nil
518 }
519
520 func contains(s string, c byte) bool {
521 for i := 0; i < len(s); i++ {
522 if s[i] == c {
523 return true
524 }
525 }
526 return false
527 }
528
View as plain text