...

Source file src/github.com/grpc-ecosystem/grpc-gateway/v2/internal/httprule/parse.go

Documentation: github.com/grpc-ecosystem/grpc-gateway/v2/internal/httprule

     1  package httprule
     2  
     3  import (
     4  	"errors"
     5  	"fmt"
     6  	"strings"
     7  )
     8  
     9  // InvalidTemplateError indicates that the path template is not valid.
    10  type InvalidTemplateError struct {
    11  	tmpl string
    12  	msg  string
    13  }
    14  
    15  func (e InvalidTemplateError) Error() string {
    16  	return fmt.Sprintf("%s: %s", e.msg, e.tmpl)
    17  }
    18  
    19  // Parse parses the string representation of path template
    20  func Parse(tmpl string) (Compiler, error) {
    21  	if !strings.HasPrefix(tmpl, "/") {
    22  		return template{}, InvalidTemplateError{tmpl: tmpl, msg: "no leading /"}
    23  	}
    24  	tokens, verb := tokenize(tmpl[1:])
    25  
    26  	p := parser{tokens: tokens}
    27  	segs, err := p.topLevelSegments()
    28  	if err != nil {
    29  		return template{}, InvalidTemplateError{tmpl: tmpl, msg: err.Error()}
    30  	}
    31  
    32  	return template{
    33  		segments: segs,
    34  		verb:     verb,
    35  		template: tmpl,
    36  	}, nil
    37  }
    38  
    39  func tokenize(path string) (tokens []string, verb string) {
    40  	if path == "" {
    41  		return []string{eof}, ""
    42  	}
    43  
    44  	const (
    45  		init = iota
    46  		field
    47  		nested
    48  	)
    49  	st := init
    50  	for path != "" {
    51  		var idx int
    52  		switch st {
    53  		case init:
    54  			idx = strings.IndexAny(path, "/{")
    55  		case field:
    56  			idx = strings.IndexAny(path, ".=}")
    57  		case nested:
    58  			idx = strings.IndexAny(path, "/}")
    59  		}
    60  		if idx < 0 {
    61  			tokens = append(tokens, path)
    62  			break
    63  		}
    64  		switch r := path[idx]; r {
    65  		case '/', '.':
    66  		case '{':
    67  			st = field
    68  		case '=':
    69  			st = nested
    70  		case '}':
    71  			st = init
    72  		}
    73  		if idx == 0 {
    74  			tokens = append(tokens, path[idx:idx+1])
    75  		} else {
    76  			tokens = append(tokens, path[:idx], path[idx:idx+1])
    77  		}
    78  		path = path[idx+1:]
    79  	}
    80  
    81  	l := len(tokens)
    82  	// See
    83  	// https://github.com/grpc-ecosystem/grpc-gateway/pull/1947#issuecomment-774523693 ;
    84  	// although normal and backwards-compat logic here is to use the last index
    85  	// of a colon, if the final segment is a variable followed by a colon, the
    86  	// part following the colon must be a verb. Hence if the previous token is
    87  	// an end var marker, we switch the index we're looking for to Index instead
    88  	// of LastIndex, so that we correctly grab the remaining part of the path as
    89  	// the verb.
    90  	var penultimateTokenIsEndVar bool
    91  	switch l {
    92  	case 0, 1:
    93  		// Not enough to be variable so skip this logic and don't result in an
    94  		// invalid index
    95  	default:
    96  		penultimateTokenIsEndVar = tokens[l-2] == "}"
    97  	}
    98  	t := tokens[l-1]
    99  	var idx int
   100  	if penultimateTokenIsEndVar {
   101  		idx = strings.Index(t, ":")
   102  	} else {
   103  		idx = strings.LastIndex(t, ":")
   104  	}
   105  	if idx == 0 {
   106  		tokens, verb = tokens[:l-1], t[1:]
   107  	} else if idx > 0 {
   108  		tokens[l-1], verb = t[:idx], t[idx+1:]
   109  	}
   110  	tokens = append(tokens, eof)
   111  	return tokens, verb
   112  }
   113  
   114  // parser is a parser of the template syntax defined in github.com/googleapis/googleapis/google/api/http.proto.
   115  type parser struct {
   116  	tokens   []string
   117  	accepted []string
   118  }
   119  
   120  // topLevelSegments is the target of this parser.
   121  func (p *parser) topLevelSegments() ([]segment, error) {
   122  	if _, err := p.accept(typeEOF); err == nil {
   123  		p.tokens = p.tokens[:0]
   124  		return []segment{literal(eof)}, nil
   125  	}
   126  	segs, err := p.segments()
   127  	if err != nil {
   128  		return nil, err
   129  	}
   130  	if _, err := p.accept(typeEOF); err != nil {
   131  		return nil, fmt.Errorf("unexpected token %q after segments %q", p.tokens[0], strings.Join(p.accepted, ""))
   132  	}
   133  	return segs, nil
   134  }
   135  
   136  func (p *parser) segments() ([]segment, error) {
   137  	s, err := p.segment()
   138  	if err != nil {
   139  		return nil, err
   140  	}
   141  
   142  	segs := []segment{s}
   143  	for {
   144  		if _, err := p.accept("/"); err != nil {
   145  			return segs, nil
   146  		}
   147  		s, err := p.segment()
   148  		if err != nil {
   149  			return segs, err
   150  		}
   151  		segs = append(segs, s)
   152  	}
   153  }
   154  
   155  func (p *parser) segment() (segment, error) {
   156  	if _, err := p.accept("*"); err == nil {
   157  		return wildcard{}, nil
   158  	}
   159  	if _, err := p.accept("**"); err == nil {
   160  		return deepWildcard{}, nil
   161  	}
   162  	if l, err := p.literal(); err == nil {
   163  		return l, nil
   164  	}
   165  
   166  	v, err := p.variable()
   167  	if err != nil {
   168  		return nil, fmt.Errorf("segment neither wildcards, literal or variable: %w", err)
   169  	}
   170  	return v, nil
   171  }
   172  
   173  func (p *parser) literal() (segment, error) {
   174  	lit, err := p.accept(typeLiteral)
   175  	if err != nil {
   176  		return nil, err
   177  	}
   178  	return literal(lit), nil
   179  }
   180  
   181  func (p *parser) variable() (segment, error) {
   182  	if _, err := p.accept("{"); err != nil {
   183  		return nil, err
   184  	}
   185  
   186  	path, err := p.fieldPath()
   187  	if err != nil {
   188  		return nil, err
   189  	}
   190  
   191  	var segs []segment
   192  	if _, err := p.accept("="); err == nil {
   193  		segs, err = p.segments()
   194  		if err != nil {
   195  			return nil, fmt.Errorf("invalid segment in variable %q: %w", path, err)
   196  		}
   197  	} else {
   198  		segs = []segment{wildcard{}}
   199  	}
   200  
   201  	if _, err := p.accept("}"); err != nil {
   202  		return nil, fmt.Errorf("unterminated variable segment: %s", path)
   203  	}
   204  	return variable{
   205  		path:     path,
   206  		segments: segs,
   207  	}, nil
   208  }
   209  
   210  func (p *parser) fieldPath() (string, error) {
   211  	c, err := p.accept(typeIdent)
   212  	if err != nil {
   213  		return "", err
   214  	}
   215  	components := []string{c}
   216  	for {
   217  		if _, err := p.accept("."); err != nil {
   218  			return strings.Join(components, "."), nil
   219  		}
   220  		c, err := p.accept(typeIdent)
   221  		if err != nil {
   222  			return "", fmt.Errorf("invalid field path component: %w", err)
   223  		}
   224  		components = append(components, c)
   225  	}
   226  }
   227  
   228  // A termType is a type of terminal symbols.
   229  type termType string
   230  
   231  // These constants define some of valid values of termType.
   232  // They improve readability of parse functions.
   233  //
   234  // You can also use "/", "*", "**", "." or "=" as valid values.
   235  const (
   236  	typeIdent   = termType("ident")
   237  	typeLiteral = termType("literal")
   238  	typeEOF     = termType("$")
   239  )
   240  
   241  // eof is the terminal symbol which always appears at the end of token sequence.
   242  const eof = "\u0000"
   243  
   244  // accept tries to accept a token in "p".
   245  // This function consumes a token and returns it if it matches to the specified "term".
   246  // If it doesn't match, the function does not consume any tokens and return an error.
   247  func (p *parser) accept(term termType) (string, error) {
   248  	t := p.tokens[0]
   249  	switch term {
   250  	case "/", "*", "**", ".", "=", "{", "}":
   251  		if t != string(term) && t != "/" {
   252  			return "", fmt.Errorf("expected %q but got %q", term, t)
   253  		}
   254  	case typeEOF:
   255  		if t != eof {
   256  			return "", fmt.Errorf("expected EOF but got %q", t)
   257  		}
   258  	case typeIdent:
   259  		if err := expectIdent(t); err != nil {
   260  			return "", err
   261  		}
   262  	case typeLiteral:
   263  		if err := expectPChars(t); err != nil {
   264  			return "", err
   265  		}
   266  	default:
   267  		return "", fmt.Errorf("unknown termType %q", term)
   268  	}
   269  	p.tokens = p.tokens[1:]
   270  	p.accepted = append(p.accepted, t)
   271  	return t, nil
   272  }
   273  
   274  // expectPChars determines if "t" consists of only pchars defined in RFC3986.
   275  //
   276  // https://www.ietf.org/rfc/rfc3986.txt, P.49
   277  //
   278  //	pchar         = unreserved / pct-encoded / sub-delims / ":" / "@"
   279  //	unreserved    = ALPHA / DIGIT / "-" / "." / "_" / "~"
   280  //	sub-delims    = "!" / "$" / "&" / "'" / "(" / ")"
   281  //	              / "*" / "+" / "," / ";" / "="
   282  //	pct-encoded   = "%" HEXDIG HEXDIG
   283  func expectPChars(t string) error {
   284  	const (
   285  		init = iota
   286  		pct1
   287  		pct2
   288  	)
   289  	st := init
   290  	for _, r := range t {
   291  		if st != init {
   292  			if !isHexDigit(r) {
   293  				return fmt.Errorf("invalid hexdigit: %c(%U)", r, r)
   294  			}
   295  			switch st {
   296  			case pct1:
   297  				st = pct2
   298  			case pct2:
   299  				st = init
   300  			}
   301  			continue
   302  		}
   303  
   304  		// unreserved
   305  		switch {
   306  		case 'A' <= r && r <= 'Z':
   307  			continue
   308  		case 'a' <= r && r <= 'z':
   309  			continue
   310  		case '0' <= r && r <= '9':
   311  			continue
   312  		}
   313  		switch r {
   314  		case '-', '.', '_', '~':
   315  			// unreserved
   316  		case '!', '$', '&', '\'', '(', ')', '*', '+', ',', ';', '=':
   317  			// sub-delims
   318  		case ':', '@':
   319  			// rest of pchar
   320  		case '%':
   321  			// pct-encoded
   322  			st = pct1
   323  		default:
   324  			return fmt.Errorf("invalid character in path segment: %q(%U)", r, r)
   325  		}
   326  	}
   327  	if st != init {
   328  		return fmt.Errorf("invalid percent-encoding in %q", t)
   329  	}
   330  	return nil
   331  }
   332  
   333  // expectIdent determines if "ident" is a valid identifier in .proto schema ([[:alpha:]_][[:alphanum:]_]*).
   334  func expectIdent(ident string) error {
   335  	if ident == "" {
   336  		return errors.New("empty identifier")
   337  	}
   338  	for pos, r := range ident {
   339  		switch {
   340  		case '0' <= r && r <= '9':
   341  			if pos == 0 {
   342  				return fmt.Errorf("identifier starting with digit: %s", ident)
   343  			}
   344  			continue
   345  		case 'A' <= r && r <= 'Z':
   346  			continue
   347  		case 'a' <= r && r <= 'z':
   348  			continue
   349  		case r == '_':
   350  			continue
   351  		default:
   352  			return fmt.Errorf("invalid character %q(%U) in identifier: %s", r, r, ident)
   353  		}
   354  	}
   355  	return nil
   356  }
   357  
   358  func isHexDigit(r rune) bool {
   359  	switch {
   360  	case '0' <= r && r <= '9':
   361  		return true
   362  	case 'A' <= r && r <= 'F':
   363  		return true
   364  	case 'a' <= r && r <= 'f':
   365  		return true
   366  	}
   367  	return false
   368  }
   369  

View as plain text