...

Source file src/github.com/gorilla/mux/regexp.go

Documentation: github.com/gorilla/mux

     1  // Copyright 2012 The Gorilla Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package mux
     6  
     7  import (
     8  	"bytes"
     9  	"fmt"
    10  	"net/http"
    11  	"net/url"
    12  	"regexp"
    13  	"strconv"
    14  	"strings"
    15  )
    16  
    17  type routeRegexpOptions struct {
    18  	strictSlash    bool
    19  	useEncodedPath bool
    20  }
    21  
    22  type regexpType int
    23  
    24  const (
    25  	regexpTypePath regexpType = iota
    26  	regexpTypeHost
    27  	regexpTypePrefix
    28  	regexpTypeQuery
    29  )
    30  
    31  // newRouteRegexp parses a route template and returns a routeRegexp,
    32  // used to match a host, a path or a query string.
    33  //
    34  // It will extract named variables, assemble a regexp to be matched, create
    35  // a "reverse" template to build URLs and compile regexps to validate variable
    36  // values used in URL building.
    37  //
    38  // Previously we accepted only Python-like identifiers for variable
    39  // names ([a-zA-Z_][a-zA-Z0-9_]*), but currently the only restriction is that
    40  // name and pattern can't be empty, and names can't contain a colon.
    41  func newRouteRegexp(tpl string, typ regexpType, options routeRegexpOptions) (*routeRegexp, error) {
    42  	// Check if it is well-formed.
    43  	idxs, errBraces := braceIndices(tpl)
    44  	if errBraces != nil {
    45  		return nil, errBraces
    46  	}
    47  	// Backup the original.
    48  	template := tpl
    49  	// Now let's parse it.
    50  	defaultPattern := "[^/]+"
    51  	if typ == regexpTypeQuery {
    52  		defaultPattern = ".*"
    53  	} else if typ == regexpTypeHost {
    54  		defaultPattern = "[^.]+"
    55  	}
    56  	// Only match strict slash if not matching
    57  	if typ != regexpTypePath {
    58  		options.strictSlash = false
    59  	}
    60  	// Set a flag for strictSlash.
    61  	endSlash := false
    62  	if options.strictSlash && strings.HasSuffix(tpl, "/") {
    63  		tpl = tpl[:len(tpl)-1]
    64  		endSlash = true
    65  	}
    66  	varsN := make([]string, len(idxs)/2)
    67  	varsR := make([]*regexp.Regexp, len(idxs)/2)
    68  	pattern := bytes.NewBufferString("")
    69  	pattern.WriteByte('^')
    70  	reverse := bytes.NewBufferString("")
    71  	var end int
    72  	var err error
    73  	for i := 0; i < len(idxs); i += 2 {
    74  		// Set all values we are interested in.
    75  		raw := tpl[end:idxs[i]]
    76  		end = idxs[i+1]
    77  		parts := strings.SplitN(tpl[idxs[i]+1:end-1], ":", 2)
    78  		name := parts[0]
    79  		patt := defaultPattern
    80  		if len(parts) == 2 {
    81  			patt = parts[1]
    82  		}
    83  		// Name or pattern can't be empty.
    84  		if name == "" || patt == "" {
    85  			return nil, fmt.Errorf("mux: missing name or pattern in %q",
    86  				tpl[idxs[i]:end])
    87  		}
    88  		// Build the regexp pattern.
    89  		fmt.Fprintf(pattern, "%s(?P<%s>%s)", regexp.QuoteMeta(raw), varGroupName(i/2), patt)
    90  
    91  		// Build the reverse template.
    92  		fmt.Fprintf(reverse, "%s%%s", raw)
    93  
    94  		// Append variable name and compiled pattern.
    95  		varsN[i/2] = name
    96  		varsR[i/2], err = regexp.Compile(fmt.Sprintf("^%s$", patt))
    97  		if err != nil {
    98  			return nil, err
    99  		}
   100  	}
   101  	// Add the remaining.
   102  	raw := tpl[end:]
   103  	pattern.WriteString(regexp.QuoteMeta(raw))
   104  	if options.strictSlash {
   105  		pattern.WriteString("[/]?")
   106  	}
   107  	if typ == regexpTypeQuery {
   108  		// Add the default pattern if the query value is empty
   109  		if queryVal := strings.SplitN(template, "=", 2)[1]; queryVal == "" {
   110  			pattern.WriteString(defaultPattern)
   111  		}
   112  	}
   113  	if typ != regexpTypePrefix {
   114  		pattern.WriteByte('$')
   115  	}
   116  
   117  	var wildcardHostPort bool
   118  	if typ == regexpTypeHost {
   119  		if !strings.Contains(pattern.String(), ":") {
   120  			wildcardHostPort = true
   121  		}
   122  	}
   123  	reverse.WriteString(raw)
   124  	if endSlash {
   125  		reverse.WriteByte('/')
   126  	}
   127  	// Compile full regexp.
   128  	reg, errCompile := regexp.Compile(pattern.String())
   129  	if errCompile != nil {
   130  		return nil, errCompile
   131  	}
   132  
   133  	// Check for capturing groups which used to work in older versions
   134  	if reg.NumSubexp() != len(idxs)/2 {
   135  		panic(fmt.Sprintf("route %s contains capture groups in its regexp. ", template) +
   136  			"Only non-capturing groups are accepted: e.g. (?:pattern) instead of (pattern)")
   137  	}
   138  
   139  	// Done!
   140  	return &routeRegexp{
   141  		template:         template,
   142  		regexpType:       typ,
   143  		options:          options,
   144  		regexp:           reg,
   145  		reverse:          reverse.String(),
   146  		varsN:            varsN,
   147  		varsR:            varsR,
   148  		wildcardHostPort: wildcardHostPort,
   149  	}, nil
   150  }
   151  
   152  // routeRegexp stores a regexp to match a host or path and information to
   153  // collect and validate route variables.
   154  type routeRegexp struct {
   155  	// The unmodified template.
   156  	template string
   157  	// The type of match
   158  	regexpType regexpType
   159  	// Options for matching
   160  	options routeRegexpOptions
   161  	// Expanded regexp.
   162  	regexp *regexp.Regexp
   163  	// Reverse template.
   164  	reverse string
   165  	// Variable names.
   166  	varsN []string
   167  	// Variable regexps (validators).
   168  	varsR []*regexp.Regexp
   169  	// Wildcard host-port (no strict port match in hostname)
   170  	wildcardHostPort bool
   171  }
   172  
   173  // Match matches the regexp against the URL host or path.
   174  func (r *routeRegexp) Match(req *http.Request, match *RouteMatch) bool {
   175  	if r.regexpType == regexpTypeHost {
   176  		host := getHost(req)
   177  		if r.wildcardHostPort {
   178  			// Don't be strict on the port match
   179  			if i := strings.Index(host, ":"); i != -1 {
   180  				host = host[:i]
   181  			}
   182  		}
   183  		return r.regexp.MatchString(host)
   184  	}
   185  
   186  	if r.regexpType == regexpTypeQuery {
   187  		return r.matchQueryString(req)
   188  	}
   189  	path := req.URL.Path
   190  	if r.options.useEncodedPath {
   191  		path = req.URL.EscapedPath()
   192  	}
   193  	return r.regexp.MatchString(path)
   194  }
   195  
   196  // url builds a URL part using the given values.
   197  func (r *routeRegexp) url(values map[string]string) (string, error) {
   198  	urlValues := make([]interface{}, len(r.varsN))
   199  	for k, v := range r.varsN {
   200  		value, ok := values[v]
   201  		if !ok {
   202  			return "", fmt.Errorf("mux: missing route variable %q", v)
   203  		}
   204  		if r.regexpType == regexpTypeQuery {
   205  			value = url.QueryEscape(value)
   206  		}
   207  		urlValues[k] = value
   208  	}
   209  	rv := fmt.Sprintf(r.reverse, urlValues...)
   210  	if !r.regexp.MatchString(rv) {
   211  		// The URL is checked against the full regexp, instead of checking
   212  		// individual variables. This is faster but to provide a good error
   213  		// message, we check individual regexps if the URL doesn't match.
   214  		for k, v := range r.varsN {
   215  			if !r.varsR[k].MatchString(values[v]) {
   216  				return "", fmt.Errorf(
   217  					"mux: variable %q doesn't match, expected %q", values[v],
   218  					r.varsR[k].String())
   219  			}
   220  		}
   221  	}
   222  	return rv, nil
   223  }
   224  
   225  // getURLQuery returns a single query parameter from a request URL.
   226  // For a URL with foo=bar&baz=ding, we return only the relevant key
   227  // value pair for the routeRegexp.
   228  func (r *routeRegexp) getURLQuery(req *http.Request) string {
   229  	if r.regexpType != regexpTypeQuery {
   230  		return ""
   231  	}
   232  	templateKey := strings.SplitN(r.template, "=", 2)[0]
   233  	val, ok := findFirstQueryKey(req.URL.RawQuery, templateKey)
   234  	if ok {
   235  		return templateKey + "=" + val
   236  	}
   237  	return ""
   238  }
   239  
   240  // findFirstQueryKey returns the same result as (*url.URL).Query()[key][0].
   241  // If key was not found, empty string and false is returned.
   242  func findFirstQueryKey(rawQuery, key string) (value string, ok bool) {
   243  	query := []byte(rawQuery)
   244  	for len(query) > 0 {
   245  		foundKey := query
   246  		if i := bytes.IndexAny(foundKey, "&;"); i >= 0 {
   247  			foundKey, query = foundKey[:i], foundKey[i+1:]
   248  		} else {
   249  			query = query[:0]
   250  		}
   251  		if len(foundKey) == 0 {
   252  			continue
   253  		}
   254  		var value []byte
   255  		if i := bytes.IndexByte(foundKey, '='); i >= 0 {
   256  			foundKey, value = foundKey[:i], foundKey[i+1:]
   257  		}
   258  		if len(foundKey) < len(key) {
   259  			// Cannot possibly be key.
   260  			continue
   261  		}
   262  		keyString, err := url.QueryUnescape(string(foundKey))
   263  		if err != nil {
   264  			continue
   265  		}
   266  		if keyString != key {
   267  			continue
   268  		}
   269  		valueString, err := url.QueryUnescape(string(value))
   270  		if err != nil {
   271  			continue
   272  		}
   273  		return valueString, true
   274  	}
   275  	return "", false
   276  }
   277  
   278  func (r *routeRegexp) matchQueryString(req *http.Request) bool {
   279  	return r.regexp.MatchString(r.getURLQuery(req))
   280  }
   281  
   282  // braceIndices returns the first level curly brace indices from a string.
   283  // It returns an error in case of unbalanced braces.
   284  func braceIndices(s string) ([]int, error) {
   285  	var level, idx int
   286  	var idxs []int
   287  	for i := 0; i < len(s); i++ {
   288  		switch s[i] {
   289  		case '{':
   290  			if level++; level == 1 {
   291  				idx = i
   292  			}
   293  		case '}':
   294  			if level--; level == 0 {
   295  				idxs = append(idxs, idx, i+1)
   296  			} else if level < 0 {
   297  				return nil, fmt.Errorf("mux: unbalanced braces in %q", s)
   298  			}
   299  		}
   300  	}
   301  	if level != 0 {
   302  		return nil, fmt.Errorf("mux: unbalanced braces in %q", s)
   303  	}
   304  	return idxs, nil
   305  }
   306  
   307  // varGroupName builds a capturing group name for the indexed variable.
   308  func varGroupName(idx int) string {
   309  	return "v" + strconv.Itoa(idx)
   310  }
   311  
   312  // ----------------------------------------------------------------------------
   313  // routeRegexpGroup
   314  // ----------------------------------------------------------------------------
   315  
   316  // routeRegexpGroup groups the route matchers that carry variables.
   317  type routeRegexpGroup struct {
   318  	host    *routeRegexp
   319  	path    *routeRegexp
   320  	queries []*routeRegexp
   321  }
   322  
   323  // setMatch extracts the variables from the URL once a route matches.
   324  func (v routeRegexpGroup) setMatch(req *http.Request, m *RouteMatch, r *Route) {
   325  	// Store host variables.
   326  	if v.host != nil {
   327  		host := getHost(req)
   328  		if v.host.wildcardHostPort {
   329  			// Don't be strict on the port match
   330  			if i := strings.Index(host, ":"); i != -1 {
   331  				host = host[:i]
   332  			}
   333  		}
   334  		matches := v.host.regexp.FindStringSubmatchIndex(host)
   335  		if len(matches) > 0 {
   336  			extractVars(host, matches, v.host.varsN, m.Vars)
   337  		}
   338  	}
   339  	path := req.URL.Path
   340  	if r.useEncodedPath {
   341  		path = req.URL.EscapedPath()
   342  	}
   343  	// Store path variables.
   344  	if v.path != nil {
   345  		matches := v.path.regexp.FindStringSubmatchIndex(path)
   346  		if len(matches) > 0 {
   347  			extractVars(path, matches, v.path.varsN, m.Vars)
   348  			// Check if we should redirect.
   349  			if v.path.options.strictSlash {
   350  				p1 := strings.HasSuffix(path, "/")
   351  				p2 := strings.HasSuffix(v.path.template, "/")
   352  				if p1 != p2 {
   353  					u, _ := url.Parse(req.URL.String())
   354  					if p1 {
   355  						u.Path = u.Path[:len(u.Path)-1]
   356  					} else {
   357  						u.Path += "/"
   358  					}
   359  					m.Handler = http.RedirectHandler(u.String(), http.StatusMovedPermanently)
   360  				}
   361  			}
   362  		}
   363  	}
   364  	// Store query string variables.
   365  	for _, q := range v.queries {
   366  		queryURL := q.getURLQuery(req)
   367  		matches := q.regexp.FindStringSubmatchIndex(queryURL)
   368  		if len(matches) > 0 {
   369  			extractVars(queryURL, matches, q.varsN, m.Vars)
   370  		}
   371  	}
   372  }
   373  
   374  // getHost tries its best to return the request host.
   375  // According to section 14.23 of RFC 2616 the Host header
   376  // can include the port number if the default value of 80 is not used.
   377  func getHost(r *http.Request) string {
   378  	if r.URL.IsAbs() {
   379  		return r.URL.Host
   380  	}
   381  	return r.Host
   382  }
   383  
   384  func extractVars(input string, matches []int, names []string, output map[string]string) {
   385  	for i, name := range names {
   386  		output[name] = input[matches[2*i+2]:matches[2*i+3]]
   387  	}
   388  }
   389  

View as plain text