...

Source file src/github.com/pelletier/go-toml/parser.go

Documentation: github.com/pelletier/go-toml

     1  // TOML Parser.
     2  
     3  package toml
     4  
     5  import (
     6  	"errors"
     7  	"fmt"
     8  	"math"
     9  	"reflect"
    10  	"strconv"
    11  	"strings"
    12  	"time"
    13  )
    14  
    15  type tomlParser struct {
    16  	flowIdx       int
    17  	flow          []token
    18  	tree          *Tree
    19  	currentTable  []string
    20  	seenTableKeys []string
    21  }
    22  
    23  type tomlParserStateFn func() tomlParserStateFn
    24  
    25  // Formats and panics an error message based on a token
    26  func (p *tomlParser) raiseError(tok *token, msg string, args ...interface{}) {
    27  	panic(tok.Position.String() + ": " + fmt.Sprintf(msg, args...))
    28  }
    29  
    30  func (p *tomlParser) run() {
    31  	for state := p.parseStart; state != nil; {
    32  		state = state()
    33  	}
    34  }
    35  
    36  func (p *tomlParser) peek() *token {
    37  	if p.flowIdx >= len(p.flow) {
    38  		return nil
    39  	}
    40  	return &p.flow[p.flowIdx]
    41  }
    42  
    43  func (p *tomlParser) assume(typ tokenType) {
    44  	tok := p.getToken()
    45  	if tok == nil {
    46  		p.raiseError(tok, "was expecting token %s, but token stream is empty", tok)
    47  	}
    48  	if tok.typ != typ {
    49  		p.raiseError(tok, "was expecting token %s, but got %s instead", typ, tok)
    50  	}
    51  }
    52  
    53  func (p *tomlParser) getToken() *token {
    54  	tok := p.peek()
    55  	if tok == nil {
    56  		return nil
    57  	}
    58  	p.flowIdx++
    59  	return tok
    60  }
    61  
    62  func (p *tomlParser) parseStart() tomlParserStateFn {
    63  	tok := p.peek()
    64  
    65  	// end of stream, parsing is finished
    66  	if tok == nil {
    67  		return nil
    68  	}
    69  
    70  	switch tok.typ {
    71  	case tokenDoubleLeftBracket:
    72  		return p.parseGroupArray
    73  	case tokenLeftBracket:
    74  		return p.parseGroup
    75  	case tokenKey:
    76  		return p.parseAssign
    77  	case tokenEOF:
    78  		return nil
    79  	case tokenError:
    80  		p.raiseError(tok, "parsing error: %s", tok.String())
    81  	default:
    82  		p.raiseError(tok, "unexpected token %s", tok.typ)
    83  	}
    84  	return nil
    85  }
    86  
    87  func (p *tomlParser) parseGroupArray() tomlParserStateFn {
    88  	startToken := p.getToken() // discard the [[
    89  	key := p.getToken()
    90  	if key.typ != tokenKeyGroupArray {
    91  		p.raiseError(key, "unexpected token %s, was expecting a table array key", key)
    92  	}
    93  
    94  	// get or create table array element at the indicated part in the path
    95  	keys, err := parseKey(key.val)
    96  	if err != nil {
    97  		p.raiseError(key, "invalid table array key: %s", err)
    98  	}
    99  	p.tree.createSubTree(keys[:len(keys)-1], startToken.Position) // create parent entries
   100  	destTree := p.tree.GetPath(keys)
   101  	var array []*Tree
   102  	if destTree == nil {
   103  		array = make([]*Tree, 0)
   104  	} else if target, ok := destTree.([]*Tree); ok && target != nil {
   105  		array = destTree.([]*Tree)
   106  	} else {
   107  		p.raiseError(key, "key %s is already assigned and not of type table array", key)
   108  	}
   109  	p.currentTable = keys
   110  
   111  	// add a new tree to the end of the table array
   112  	newTree := newTree()
   113  	newTree.position = startToken.Position
   114  	array = append(array, newTree)
   115  	p.tree.SetPath(p.currentTable, array)
   116  
   117  	// remove all keys that were children of this table array
   118  	prefix := key.val + "."
   119  	found := false
   120  	for ii := 0; ii < len(p.seenTableKeys); {
   121  		tableKey := p.seenTableKeys[ii]
   122  		if strings.HasPrefix(tableKey, prefix) {
   123  			p.seenTableKeys = append(p.seenTableKeys[:ii], p.seenTableKeys[ii+1:]...)
   124  		} else {
   125  			found = (tableKey == key.val)
   126  			ii++
   127  		}
   128  	}
   129  
   130  	// keep this key name from use by other kinds of assignments
   131  	if !found {
   132  		p.seenTableKeys = append(p.seenTableKeys, key.val)
   133  	}
   134  
   135  	// move to next parser state
   136  	p.assume(tokenDoubleRightBracket)
   137  	return p.parseStart
   138  }
   139  
   140  func (p *tomlParser) parseGroup() tomlParserStateFn {
   141  	startToken := p.getToken() // discard the [
   142  	key := p.getToken()
   143  	if key.typ != tokenKeyGroup {
   144  		p.raiseError(key, "unexpected token %s, was expecting a table key", key)
   145  	}
   146  	for _, item := range p.seenTableKeys {
   147  		if item == key.val {
   148  			p.raiseError(key, "duplicated tables")
   149  		}
   150  	}
   151  
   152  	p.seenTableKeys = append(p.seenTableKeys, key.val)
   153  	keys, err := parseKey(key.val)
   154  	if err != nil {
   155  		p.raiseError(key, "invalid table array key: %s", err)
   156  	}
   157  	if err := p.tree.createSubTree(keys, startToken.Position); err != nil {
   158  		p.raiseError(key, "%s", err)
   159  	}
   160  	destTree := p.tree.GetPath(keys)
   161  	if target, ok := destTree.(*Tree); ok && target != nil && target.inline {
   162  		p.raiseError(key, "could not re-define exist inline table or its sub-table : %s",
   163  			strings.Join(keys, "."))
   164  	}
   165  	p.assume(tokenRightBracket)
   166  	p.currentTable = keys
   167  	return p.parseStart
   168  }
   169  
   170  func (p *tomlParser) parseAssign() tomlParserStateFn {
   171  	key := p.getToken()
   172  	p.assume(tokenEqual)
   173  
   174  	parsedKey, err := parseKey(key.val)
   175  	if err != nil {
   176  		p.raiseError(key, "invalid key: %s", err.Error())
   177  	}
   178  
   179  	value := p.parseRvalue()
   180  	var tableKey []string
   181  	if len(p.currentTable) > 0 {
   182  		tableKey = p.currentTable
   183  	} else {
   184  		tableKey = []string{}
   185  	}
   186  
   187  	prefixKey := parsedKey[0 : len(parsedKey)-1]
   188  	tableKey = append(tableKey, prefixKey...)
   189  
   190  	// find the table to assign, looking out for arrays of tables
   191  	var targetNode *Tree
   192  	switch node := p.tree.GetPath(tableKey).(type) {
   193  	case []*Tree:
   194  		targetNode = node[len(node)-1]
   195  	case *Tree:
   196  		targetNode = node
   197  	case nil:
   198  		// create intermediate
   199  		if err := p.tree.createSubTree(tableKey, key.Position); err != nil {
   200  			p.raiseError(key, "could not create intermediate group: %s", err)
   201  		}
   202  		targetNode = p.tree.GetPath(tableKey).(*Tree)
   203  	default:
   204  		p.raiseError(key, "Unknown table type for path: %s",
   205  			strings.Join(tableKey, "."))
   206  	}
   207  
   208  	if targetNode.inline {
   209  		p.raiseError(key, "could not add key or sub-table to exist inline table or its sub-table : %s",
   210  			strings.Join(tableKey, "."))
   211  	}
   212  
   213  	// assign value to the found table
   214  	keyVal := parsedKey[len(parsedKey)-1]
   215  	localKey := []string{keyVal}
   216  	finalKey := append(tableKey, keyVal)
   217  	if targetNode.GetPath(localKey) != nil {
   218  		p.raiseError(key, "The following key was defined twice: %s",
   219  			strings.Join(finalKey, "."))
   220  	}
   221  	var toInsert interface{}
   222  
   223  	switch value.(type) {
   224  	case *Tree, []*Tree:
   225  		toInsert = value
   226  	default:
   227  		toInsert = &tomlValue{value: value, position: key.Position}
   228  	}
   229  	targetNode.values[keyVal] = toInsert
   230  	return p.parseStart
   231  }
   232  
   233  var errInvalidUnderscore = errors.New("invalid use of _ in number")
   234  
   235  func numberContainsInvalidUnderscore(value string) error {
   236  	// For large numbers, you may use underscores between digits to enhance
   237  	// readability. Each underscore must be surrounded by at least one digit on
   238  	// each side.
   239  
   240  	hasBefore := false
   241  	for idx, r := range value {
   242  		if r == '_' {
   243  			if !hasBefore || idx+1 >= len(value) {
   244  				// can't end with an underscore
   245  				return errInvalidUnderscore
   246  			}
   247  		}
   248  		hasBefore = isDigit(r)
   249  	}
   250  	return nil
   251  }
   252  
   253  var errInvalidUnderscoreHex = errors.New("invalid use of _ in hex number")
   254  
   255  func hexNumberContainsInvalidUnderscore(value string) error {
   256  	hasBefore := false
   257  	for idx, r := range value {
   258  		if r == '_' {
   259  			if !hasBefore || idx+1 >= len(value) {
   260  				// can't end with an underscore
   261  				return errInvalidUnderscoreHex
   262  			}
   263  		}
   264  		hasBefore = isHexDigit(r)
   265  	}
   266  	return nil
   267  }
   268  
   269  func cleanupNumberToken(value string) string {
   270  	cleanedVal := strings.Replace(value, "_", "", -1)
   271  	return cleanedVal
   272  }
   273  
   274  func (p *tomlParser) parseRvalue() interface{} {
   275  	tok := p.getToken()
   276  	if tok == nil || tok.typ == tokenEOF {
   277  		p.raiseError(tok, "expecting a value")
   278  	}
   279  
   280  	switch tok.typ {
   281  	case tokenString:
   282  		return tok.val
   283  	case tokenTrue:
   284  		return true
   285  	case tokenFalse:
   286  		return false
   287  	case tokenInf:
   288  		if tok.val[0] == '-' {
   289  			return math.Inf(-1)
   290  		}
   291  		return math.Inf(1)
   292  	case tokenNan:
   293  		return math.NaN()
   294  	case tokenInteger:
   295  		cleanedVal := cleanupNumberToken(tok.val)
   296  		base := 10
   297  		s := cleanedVal
   298  		checkInvalidUnderscore := numberContainsInvalidUnderscore
   299  		if len(cleanedVal) >= 3 && cleanedVal[0] == '0' {
   300  			switch cleanedVal[1] {
   301  			case 'x':
   302  				checkInvalidUnderscore = hexNumberContainsInvalidUnderscore
   303  				base = 16
   304  			case 'o':
   305  				base = 8
   306  			case 'b':
   307  				base = 2
   308  			default:
   309  				panic("invalid base") // the lexer should catch this first
   310  			}
   311  			s = cleanedVal[2:]
   312  		}
   313  
   314  		err := checkInvalidUnderscore(tok.val)
   315  		if err != nil {
   316  			p.raiseError(tok, "%s", err)
   317  		}
   318  
   319  		var val interface{}
   320  		val, err = strconv.ParseInt(s, base, 64)
   321  		if err == nil {
   322  			return val
   323  		}
   324  
   325  		if s[0] != '-' {
   326  			if val, err = strconv.ParseUint(s, base, 64); err == nil {
   327  				return val
   328  			}
   329  		}
   330  		p.raiseError(tok, "%s", err)
   331  	case tokenFloat:
   332  		err := numberContainsInvalidUnderscore(tok.val)
   333  		if err != nil {
   334  			p.raiseError(tok, "%s", err)
   335  		}
   336  		cleanedVal := cleanupNumberToken(tok.val)
   337  		val, err := strconv.ParseFloat(cleanedVal, 64)
   338  		if err != nil {
   339  			p.raiseError(tok, "%s", err)
   340  		}
   341  		return val
   342  	case tokenLocalTime:
   343  		val, err := ParseLocalTime(tok.val)
   344  		if err != nil {
   345  			p.raiseError(tok, "%s", err)
   346  		}
   347  		return val
   348  	case tokenLocalDate:
   349  		// a local date may be followed by:
   350  		// * nothing: this is a local date
   351  		// * a local time: this is a local date-time
   352  
   353  		next := p.peek()
   354  		if next == nil || next.typ != tokenLocalTime {
   355  			val, err := ParseLocalDate(tok.val)
   356  			if err != nil {
   357  				p.raiseError(tok, "%s", err)
   358  			}
   359  			return val
   360  		}
   361  
   362  		localDate := tok
   363  		localTime := p.getToken()
   364  
   365  		next = p.peek()
   366  		if next == nil || next.typ != tokenTimeOffset {
   367  			v := localDate.val + "T" + localTime.val
   368  			val, err := ParseLocalDateTime(v)
   369  			if err != nil {
   370  				p.raiseError(tok, "%s", err)
   371  			}
   372  			return val
   373  		}
   374  
   375  		offset := p.getToken()
   376  
   377  		layout := time.RFC3339Nano
   378  		v := localDate.val + "T" + localTime.val + offset.val
   379  		val, err := time.ParseInLocation(layout, v, time.UTC)
   380  		if err != nil {
   381  			p.raiseError(tok, "%s", err)
   382  		}
   383  		return val
   384  	case tokenLeftBracket:
   385  		return p.parseArray()
   386  	case tokenLeftCurlyBrace:
   387  		return p.parseInlineTable()
   388  	case tokenEqual:
   389  		p.raiseError(tok, "cannot have multiple equals for the same key")
   390  	case tokenError:
   391  		p.raiseError(tok, "%s", tok)
   392  	default:
   393  		panic(fmt.Errorf("unhandled token: %v", tok))
   394  	}
   395  
   396  	return nil
   397  }
   398  
   399  func tokenIsComma(t *token) bool {
   400  	return t != nil && t.typ == tokenComma
   401  }
   402  
   403  func (p *tomlParser) parseInlineTable() *Tree {
   404  	tree := newTree()
   405  	var previous *token
   406  Loop:
   407  	for {
   408  		follow := p.peek()
   409  		if follow == nil || follow.typ == tokenEOF {
   410  			p.raiseError(follow, "unterminated inline table")
   411  		}
   412  		switch follow.typ {
   413  		case tokenRightCurlyBrace:
   414  			p.getToken()
   415  			break Loop
   416  		case tokenKey, tokenInteger, tokenString:
   417  			if !tokenIsComma(previous) && previous != nil {
   418  				p.raiseError(follow, "comma expected between fields in inline table")
   419  			}
   420  			key := p.getToken()
   421  			p.assume(tokenEqual)
   422  
   423  			parsedKey, err := parseKey(key.val)
   424  			if err != nil {
   425  				p.raiseError(key, "invalid key: %s", err)
   426  			}
   427  
   428  			value := p.parseRvalue()
   429  			tree.SetPath(parsedKey, value)
   430  		case tokenComma:
   431  			if tokenIsComma(previous) {
   432  				p.raiseError(follow, "need field between two commas in inline table")
   433  			}
   434  			p.getToken()
   435  		default:
   436  			p.raiseError(follow, "unexpected token type in inline table: %s", follow.String())
   437  		}
   438  		previous = follow
   439  	}
   440  	if tokenIsComma(previous) {
   441  		p.raiseError(previous, "trailing comma at the end of inline table")
   442  	}
   443  	tree.inline = true
   444  	return tree
   445  }
   446  
   447  func (p *tomlParser) parseArray() interface{} {
   448  	var array []interface{}
   449  	arrayType := reflect.TypeOf(newTree())
   450  	for {
   451  		follow := p.peek()
   452  		if follow == nil || follow.typ == tokenEOF {
   453  			p.raiseError(follow, "unterminated array")
   454  		}
   455  		if follow.typ == tokenRightBracket {
   456  			p.getToken()
   457  			break
   458  		}
   459  		val := p.parseRvalue()
   460  		if reflect.TypeOf(val) != arrayType {
   461  			arrayType = nil
   462  		}
   463  		array = append(array, val)
   464  		follow = p.peek()
   465  		if follow == nil || follow.typ == tokenEOF {
   466  			p.raiseError(follow, "unterminated array")
   467  		}
   468  		if follow.typ != tokenRightBracket && follow.typ != tokenComma {
   469  			p.raiseError(follow, "missing comma")
   470  		}
   471  		if follow.typ == tokenComma {
   472  			p.getToken()
   473  		}
   474  	}
   475  
   476  	// if the array is a mixed-type array or its length is 0,
   477  	// don't convert it to a table array
   478  	if len(array) <= 0 {
   479  		arrayType = nil
   480  	}
   481  	// An array of Trees is actually an array of inline
   482  	// tables, which is a shorthand for a table array. If the
   483  	// array was not converted from []interface{} to []*Tree,
   484  	// the two notations would not be equivalent.
   485  	if arrayType == reflect.TypeOf(newTree()) {
   486  		tomlArray := make([]*Tree, len(array))
   487  		for i, v := range array {
   488  			tomlArray[i] = v.(*Tree)
   489  		}
   490  		return tomlArray
   491  	}
   492  	return array
   493  }
   494  
   495  func parseToml(flow []token) *Tree {
   496  	result := newTree()
   497  	result.position = Position{1, 1}
   498  	parser := &tomlParser{
   499  		flowIdx:       0,
   500  		flow:          flow,
   501  		tree:          result,
   502  		currentTable:  make([]string, 0),
   503  		seenTableKeys: make([]string, 0),
   504  	}
   505  	parser.run()
   506  	return result
   507  }
   508  

View as plain text