parser.go

Documentation: k8s.io/client-go/util/jsonpath

     1  /*
     2  Copyright 2015 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package jsonpath
    18  
    19  import (
    20  	"errors"
    21  	"fmt"
    22  	"regexp"
    23  	"strconv"
    24  	"strings"
    25  	"unicode"
    26  	"unicode/utf8"
    27  )
    28  
    29  const eof = -1
    30  
    31  const (
    32  	leftDelim  = "{"
    33  	rightDelim = "}"
    34  )
    35  
    36  type Parser struct {
    37  	Name  string
    38  	Root  *ListNode
    39  	input string
    40  	pos   int
    41  	start int
    42  	width int
    43  }
    44  
    45  var (
    46  	ErrSyntax        = errors.New("invalid syntax")
    47  	dictKeyRex       = regexp.MustCompile(`^'([^']*)'$`)
    48  	sliceOperatorRex = regexp.MustCompile(`^(-?[\d]*)(:-?[\d]*)?(:-?[\d]*)?$`)
    49  )
    50  
    51  // Parse parsed the given text and return a node Parser.
    52  // If an error is encountered, parsing stops and an empty
    53  // Parser is returned with the error
    54  func Parse(name, text string) (*Parser, error) {
    55  	p := NewParser(name)
    56  	err := p.Parse(text)
    57  	if err != nil {
    58  		p = nil
    59  	}
    60  	return p, err
    61  }
    62  
    63  func NewParser(name string) *Parser {
    64  	return &Parser{
    65  		Name: name,
    66  	}
    67  }
    68  
    69  // parseAction parsed the expression inside delimiter
    70  func parseAction(name, text string) (*Parser, error) {
    71  	p, err := Parse(name, fmt.Sprintf("%s%s%s", leftDelim, text, rightDelim))
    72  	// when error happens, p will be nil, so we need to return here
    73  	if err != nil {
    74  		return p, err
    75  	}
    76  	p.Root = p.Root.Nodes[0].(*ListNode)
    77  	return p, nil
    78  }
    79  
    80  func (p *Parser) Parse(text string) error {
    81  	p.input = text
    82  	p.Root = newList()
    83  	p.pos = 0
    84  	return p.parseText(p.Root)
    85  }
    86  
    87  // consumeText return the parsed text since last cosumeText
    88  func (p *Parser) consumeText() string {
    89  	value := p.input[p.start:p.pos]
    90  	p.start = p.pos
    91  	return value
    92  }
    93  
    94  // next returns the next rune in the input.
    95  func (p *Parser) next() rune {
    96  	if p.pos >= len(p.input) {
    97  		p.width = 0
    98  		return eof
    99  	}
   100  	r, w := utf8.DecodeRuneInString(p.input[p.pos:])
   101  	p.width = w
   102  	p.pos += p.width
   103  	return r
   104  }
   105  
   106  // peek returns but does not consume the next rune in the input.
   107  func (p *Parser) peek() rune {
   108  	r := p.next()
   109  	p.backup()
   110  	return r
   111  }
   112  
   113  // backup steps back one rune. Can only be called once per call of next.
   114  func (p *Parser) backup() {
   115  	p.pos -= p.width
   116  }
   117  
   118  func (p *Parser) parseText(cur *ListNode) error {
   119  	for {
   120  		if strings.HasPrefix(p.input[p.pos:], leftDelim) {
   121  			if p.pos > p.start {
   122  				cur.append(newText(p.consumeText()))
   123  			}
   124  			return p.parseLeftDelim(cur)
   125  		}
   126  		if p.next() == eof {
   127  			break
   128  		}
   129  	}
   130  	// Correctly reached EOF.
   131  	if p.pos > p.start {
   132  		cur.append(newText(p.consumeText()))
   133  	}
   134  	return nil
   135  }
   136  
   137  // parseLeftDelim scans the left delimiter, which is known to be present.
   138  func (p *Parser) parseLeftDelim(cur *ListNode) error {
   139  	p.pos += len(leftDelim)
   140  	p.consumeText()
   141  	newNode := newList()
   142  	cur.append(newNode)
   143  	cur = newNode
   144  	return p.parseInsideAction(cur)
   145  }
   146  
   147  func (p *Parser) parseInsideAction(cur *ListNode) error {
   148  	prefixMap := map[string]func(*ListNode) error{
   149  		rightDelim: p.parseRightDelim,
   150  		"[?(":      p.parseFilter,
   151  		"..":       p.parseRecursive,
   152  	}
   153  	for prefix, parseFunc := range prefixMap {
   154  		if strings.HasPrefix(p.input[p.pos:], prefix) {
   155  			return parseFunc(cur)
   156  		}
   157  	}
   158  
   159  	switch r := p.next(); {
   160  	case r == eof || isEndOfLine(r):
   161  		return fmt.Errorf("unclosed action")
   162  	case r == ' ':
   163  		p.consumeText()
   164  	case r == '@' || r == '$': //the current object, just pass it
   165  		p.consumeText()
   166  	case r == '[':
   167  		return p.parseArray(cur)
   168  	case r == '"' || r == '\'':
   169  		return p.parseQuote(cur, r)
   170  	case r == '.':
   171  		return p.parseField(cur)
   172  	case r == '+' || r == '-' || unicode.IsDigit(r):
   173  		p.backup()
   174  		return p.parseNumber(cur)
   175  	case isAlphaNumeric(r):
   176  		p.backup()
   177  		return p.parseIdentifier(cur)
   178  	default:
   179  		return fmt.Errorf("unrecognized character in action: %#U", r)
   180  	}
   181  	return p.parseInsideAction(cur)
   182  }
   183  
   184  // parseRightDelim scans the right delimiter, which is known to be present.
   185  func (p *Parser) parseRightDelim(cur *ListNode) error {
   186  	p.pos += len(rightDelim)
   187  	p.consumeText()
   188  	return p.parseText(p.Root)
   189  }
   190  
   191  // parseIdentifier scans build-in keywords, like "range" "end"
   192  func (p *Parser) parseIdentifier(cur *ListNode) error {
   193  	var r rune
   194  	for {
   195  		r = p.next()
   196  		if isTerminator(r) {
   197  			p.backup()
   198  			break
   199  		}
   200  	}
   201  	value := p.consumeText()
   202  
   203  	if isBool(value) {
   204  		v, err := strconv.ParseBool(value)
   205  		if err != nil {
   206  			return fmt.Errorf("can not parse bool '%s': %s", value, err.Error())
   207  		}
   208  
   209  		cur.append(newBool(v))
   210  	} else {
   211  		cur.append(newIdentifier(value))
   212  	}
   213  
   214  	return p.parseInsideAction(cur)
   215  }
   216  
   217  // parseRecursive scans the recursive descent operator ..
   218  func (p *Parser) parseRecursive(cur *ListNode) error {
   219  	if lastIndex := len(cur.Nodes) - 1; lastIndex >= 0 && cur.Nodes[lastIndex].Type() == NodeRecursive {
   220  		return fmt.Errorf("invalid multiple recursive descent")
   221  	}
   222  	p.pos += len("..")
   223  	p.consumeText()
   224  	cur.append(newRecursive())
   225  	if r := p.peek(); isAlphaNumeric(r) {
   226  		return p.parseField(cur)
   227  	}
   228  	return p.parseInsideAction(cur)
   229  }
   230  
   231  // parseNumber scans number
   232  func (p *Parser) parseNumber(cur *ListNode) error {
   233  	r := p.peek()
   234  	if r == '+' || r == '-' {
   235  		p.next()
   236  	}
   237  	for {
   238  		r = p.next()
   239  		if r != '.' && !unicode.IsDigit(r) {
   240  			p.backup()
   241  			break
   242  		}
   243  	}
   244  	value := p.consumeText()
   245  	i, err := strconv.Atoi(value)
   246  	if err == nil {
   247  		cur.append(newInt(i))
   248  		return p.parseInsideAction(cur)
   249  	}
   250  	d, err := strconv.ParseFloat(value, 64)
   251  	if err == nil {
   252  		cur.append(newFloat(d))
   253  		return p.parseInsideAction(cur)
   254  	}
   255  	return fmt.Errorf("cannot parse number %s", value)
   256  }
   257  
   258  // parseArray scans array index selection
   259  func (p *Parser) parseArray(cur *ListNode) error {
   260  Loop:
   261  	for {
   262  		switch p.next() {
   263  		case eof, '\n':
   264  			return fmt.Errorf("unterminated array")
   265  		case ']':
   266  			break Loop
   267  		}
   268  	}
   269  	text := p.consumeText()
   270  	text = text[1 : len(text)-1]
   271  	if text == "*" {
   272  		text = ":"
   273  	}
   274  
   275  	//union operator
   276  	strs := strings.Split(text, ",")
   277  	if len(strs) > 1 {
   278  		union := []*ListNode{}
   279  		for _, str := range strs {
   280  			parser, err := parseAction("union", fmt.Sprintf("[%s]", strings.Trim(str, " ")))
   281  			if err != nil {
   282  				return err
   283  			}
   284  			union = append(union, parser.Root)
   285  		}
   286  		cur.append(newUnion(union))
   287  		return p.parseInsideAction(cur)
   288  	}
   289  
   290  	// dict key
   291  	value := dictKeyRex.FindStringSubmatch(text)
   292  	if value != nil {
   293  		parser, err := parseAction("arraydict", fmt.Sprintf(".%s", value[1]))
   294  		if err != nil {
   295  			return err
   296  		}
   297  		for _, node := range parser.Root.Nodes {
   298  			cur.append(node)
   299  		}
   300  		return p.parseInsideAction(cur)
   301  	}
   302  
   303  	//slice operator
   304  	value = sliceOperatorRex.FindStringSubmatch(text)
   305  	if value == nil {
   306  		return fmt.Errorf("invalid array index %s", text)
   307  	}
   308  	value = value[1:]
   309  	params := [3]ParamsEntry{}
   310  	for i := 0; i < 3; i++ {
   311  		if value[i] != "" {
   312  			if i > 0 {
   313  				value[i] = value[i][1:]
   314  			}
   315  			if i > 0 && value[i] == "" {
   316  				params[i].Known = false
   317  			} else {
   318  				var err error
   319  				params[i].Known = true
   320  				params[i].Value, err = strconv.Atoi(value[i])
   321  				if err != nil {
   322  					return fmt.Errorf("array index %s is not a number", value[i])
   323  				}
   324  			}
   325  		} else {
   326  			if i == 1 {
   327  				params[i].Known = true
   328  				params[i].Value = params[0].Value + 1
   329  				params[i].Derived = true
   330  			} else {
   331  				params[i].Known = false
   332  				params[i].Value = 0
   333  			}
   334  		}
   335  	}
   336  	cur.append(newArray(params))
   337  	return p.parseInsideAction(cur)
   338  }
   339  
   340  // parseFilter scans filter inside array selection
   341  func (p *Parser) parseFilter(cur *ListNode) error {
   342  	p.pos += len("[?(")
   343  	p.consumeText()
   344  	begin := false
   345  	end := false
   346  	var pair rune
   347  
   348  Loop:
   349  	for {
   350  		r := p.next()
   351  		switch r {
   352  		case eof, '\n':
   353  			return fmt.Errorf("unterminated filter")
   354  		case '"', '\'':
   355  			if begin == false {
   356  				//save the paired rune
   357  				begin = true
   358  				pair = r
   359  				continue
   360  			}
   361  			//only add when met paired rune
   362  			if p.input[p.pos-2] != '\\' && r == pair {
   363  				end = true
   364  			}
   365  		case ')':
   366  			//in rightParser below quotes only appear zero or once
   367  			//and must be paired at the beginning and end
   368  			if begin == end {
   369  				break Loop
   370  			}
   371  		}
   372  	}
   373  	if p.next() != ']' {
   374  		return fmt.Errorf("unclosed array expect ]")
   375  	}
   376  	reg := regexp.MustCompile(`^([^!<>=]+)([!<>=]+)(.+?)$`)
   377  	text := p.consumeText()
   378  	text = text[:len(text)-2]
   379  	value := reg.FindStringSubmatch(text)
   380  	if value == nil {
   381  		parser, err := parseAction("text", text)
   382  		if err != nil {
   383  			return err
   384  		}
   385  		cur.append(newFilter(parser.Root, newList(), "exists"))
   386  	} else {
   387  		leftParser, err := parseAction("left", value[1])
   388  		if err != nil {
   389  			return err
   390  		}
   391  		rightParser, err := parseAction("right", value[3])
   392  		if err != nil {
   393  			return err
   394  		}
   395  		cur.append(newFilter(leftParser.Root, rightParser.Root, value[2]))
   396  	}
   397  	return p.parseInsideAction(cur)
   398  }
   399  
   400  // parseQuote unquotes string inside double or single quote
   401  func (p *Parser) parseQuote(cur *ListNode, end rune) error {
   402  Loop:
   403  	for {
   404  		switch p.next() {
   405  		case eof, '\n':
   406  			return fmt.Errorf("unterminated quoted string")
   407  		case end:
   408  			//if it's not escape break the Loop
   409  			if p.input[p.pos-2] != '\\' {
   410  				break Loop
   411  			}
   412  		}
   413  	}
   414  	value := p.consumeText()
   415  	s, err := UnquoteExtend(value)
   416  	if err != nil {
   417  		return fmt.Errorf("unquote string %s error %v", value, err)
   418  	}
   419  	cur.append(newText(s))
   420  	return p.parseInsideAction(cur)
   421  }
   422  
   423  // parseField scans a field until a terminator
   424  func (p *Parser) parseField(cur *ListNode) error {
   425  	p.consumeText()
   426  	for p.advance() {
   427  	}
   428  	value := p.consumeText()
   429  	if value == "*" {
   430  		cur.append(newWildcard())
   431  	} else {
   432  		cur.append(newField(strings.Replace(value, "\\", "", -1)))
   433  	}
   434  	return p.parseInsideAction(cur)
   435  }
   436  
   437  // advance scans until next non-escaped terminator
   438  func (p *Parser) advance() bool {
   439  	r := p.next()
   440  	if r == '\\' {
   441  		p.next()
   442  	} else if isTerminator(r) {
   443  		p.backup()
   444  		return false
   445  	}
   446  	return true
   447  }
   448  
   449  // isTerminator reports whether the input is at valid termination character to appear after an identifier.
   450  func isTerminator(r rune) bool {
   451  	if isSpace(r) || isEndOfLine(r) {
   452  		return true
   453  	}
   454  	switch r {
   455  	case eof, '.', ',', '[', ']', '$', '@', '{', '}':
   456  		return true
   457  	}
   458  	return false
   459  }
   460  
   461  // isSpace reports whether r is a space character.
   462  func isSpace(r rune) bool {
   463  	return r == ' ' || r == '\t'
   464  }
   465  
   466  // isEndOfLine reports whether r is an end-of-line character.
   467  func isEndOfLine(r rune) bool {
   468  	return r == '\r' || r == '\n'
   469  }
   470  
   471  // isAlphaNumeric reports whether r is an alphabetic, digit, or underscore.
   472  func isAlphaNumeric(r rune) bool {
   473  	return r == '_' || unicode.IsLetter(r) || unicode.IsDigit(r)
   474  }
   475  
   476  // isBool reports whether s is a boolean value.
   477  func isBool(s string) bool {
   478  	return s == "true" || s == "false"
   479  }
   480  
   481  // UnquoteExtend is almost same as strconv.Unquote(), but it support parse single quotes as a string
   482  func UnquoteExtend(s string) (string, error) {
   483  	n := len(s)
   484  	if n < 2 {
   485  		return "", ErrSyntax
   486  	}
   487  	quote := s[0]
   488  	if quote != s[n-1] {
   489  		return "", ErrSyntax
   490  	}
   491  	s = s[1 : n-1]
   492  
   493  	if quote != '"' && quote != '\'' {
   494  		return "", ErrSyntax
   495  	}
   496  
   497  	// Is it trivial?  Avoid allocation.
   498  	if !contains(s, '\\') && !contains(s, quote) {
   499  		return s, nil
   500  	}
   501  
   502  	var runeTmp [utf8.UTFMax]byte
   503  	buf := make([]byte, 0, 3*len(s)/2) // Try to avoid more allocations.
   504  	for len(s) > 0 {
   505  		c, multibyte, ss, err := strconv.UnquoteChar(s, quote)
   506  		if err != nil {
   507  			return "", err
   508  		}
   509  		s = ss
   510  		if c < utf8.RuneSelf || !multibyte {
   511  			buf = append(buf, byte(c))
   512  		} else {
   513  			n := utf8.EncodeRune(runeTmp[:], c)
   514  			buf = append(buf, runeTmp[:n]...)
   515  		}
   516  	}
   517  	return string(buf), nil
   518  }
   519  
   520  func contains(s string, c byte) bool {
   521  	for i := 0; i < len(s); i++ {
   522  		if s[i] == c {
   523  			return true
   524  		}
   525  	}
   526  	return false
   527  }
   528
View as plain text