parse.go

Documentation: github.com/BurntSushi/toml

     1  package toml
     2  
     3  import (
     4  	"fmt"
     5  	"os"
     6  	"strconv"
     7  	"strings"
     8  	"time"
     9  	"unicode/utf8"
    10  
    11  	"github.com/BurntSushi/toml/internal"
    12  )
    13  
    14  type parser struct {
    15  	lx         *lexer
    16  	context    Key      // Full key for the current hash in scope.
    17  	currentKey string   // Base key name for everything except hashes.
    18  	pos        Position // Current position in the TOML file.
    19  	tomlNext   bool
    20  
    21  	ordered []Key // List of keys in the order that they appear in the TOML data.
    22  
    23  	keyInfo   map[string]keyInfo     // Map keyname → info about the TOML key.
    24  	mapping   map[string]interface{} // Map keyname → key value.
    25  	implicits map[string]struct{}    // Record implicit keys (e.g. "key.group.names").
    26  }
    27  
    28  type keyInfo struct {
    29  	pos      Position
    30  	tomlType tomlType
    31  }
    32  
    33  func parse(data string) (p *parser, err error) {
    34  	_, tomlNext := os.LookupEnv("BURNTSUSHI_TOML_110")
    35  
    36  	defer func() {
    37  		if r := recover(); r != nil {
    38  			if pErr, ok := r.(ParseError); ok {
    39  				pErr.input = data
    40  				err = pErr
    41  				return
    42  			}
    43  			panic(r)
    44  		}
    45  	}()
    46  
    47  	// Read over BOM; do this here as the lexer calls utf8.DecodeRuneInString()
    48  	// which mangles stuff. UTF-16 BOM isn't strictly valid, but some tools add
    49  	// it anyway.
    50  	if strings.HasPrefix(data, "\xff\xfe") || strings.HasPrefix(data, "\xfe\xff") { // UTF-16
    51  		data = data[2:]
    52  	} else if strings.HasPrefix(data, "\xef\xbb\xbf") { // UTF-8
    53  		data = data[3:]
    54  	}
    55  
    56  	// Examine first few bytes for NULL bytes; this probably means it's a UTF-16
    57  	// file (second byte in surrogate pair being NULL). Again, do this here to
    58  	// avoid having to deal with UTF-8/16 stuff in the lexer.
    59  	ex := 6
    60  	if len(data) < 6 {
    61  		ex = len(data)
    62  	}
    63  	if i := strings.IndexRune(data[:ex], 0); i > -1 {
    64  		return nil, ParseError{
    65  			Message:  "files cannot contain NULL bytes; probably using UTF-16; TOML files must be UTF-8",
    66  			Position: Position{Line: 1, Start: i, Len: 1},
    67  			Line:     1,
    68  			input:    data,
    69  		}
    70  	}
    71  
    72  	p = &parser{
    73  		keyInfo:   make(map[string]keyInfo),
    74  		mapping:   make(map[string]interface{}),
    75  		lx:        lex(data, tomlNext),
    76  		ordered:   make([]Key, 0),
    77  		implicits: make(map[string]struct{}),
    78  		tomlNext:  tomlNext,
    79  	}
    80  	for {
    81  		item := p.next()
    82  		if item.typ == itemEOF {
    83  			break
    84  		}
    85  		p.topLevel(item)
    86  	}
    87  
    88  	return p, nil
    89  }
    90  
    91  func (p *parser) panicErr(it item, err error) {
    92  	panic(ParseError{
    93  		err:      err,
    94  		Position: it.pos,
    95  		Line:     it.pos.Len,
    96  		LastKey:  p.current(),
    97  	})
    98  }
    99  
   100  func (p *parser) panicItemf(it item, format string, v ...interface{}) {
   101  	panic(ParseError{
   102  		Message:  fmt.Sprintf(format, v...),
   103  		Position: it.pos,
   104  		Line:     it.pos.Len,
   105  		LastKey:  p.current(),
   106  	})
   107  }
   108  
   109  func (p *parser) panicf(format string, v ...interface{}) {
   110  	panic(ParseError{
   111  		Message:  fmt.Sprintf(format, v...),
   112  		Position: p.pos,
   113  		Line:     p.pos.Line,
   114  		LastKey:  p.current(),
   115  	})
   116  }
   117  
   118  func (p *parser) next() item {
   119  	it := p.lx.nextItem()
   120  	//fmt.Printf("ITEM %-18s line %-3d │ %q\n", it.typ, it.pos.Line, it.val)
   121  	if it.typ == itemError {
   122  		if it.err != nil {
   123  			panic(ParseError{
   124  				Position: it.pos,
   125  				Line:     it.pos.Line,
   126  				LastKey:  p.current(),
   127  				err:      it.err,
   128  			})
   129  		}
   130  
   131  		p.panicItemf(it, "%s", it.val)
   132  	}
   133  	return it
   134  }
   135  
   136  func (p *parser) nextPos() item {
   137  	it := p.next()
   138  	p.pos = it.pos
   139  	return it
   140  }
   141  
   142  func (p *parser) bug(format string, v ...interface{}) {
   143  	panic(fmt.Sprintf("BUG: "+format+"\n\n", v...))
   144  }
   145  
   146  func (p *parser) expect(typ itemType) item {
   147  	it := p.next()
   148  	p.assertEqual(typ, it.typ)
   149  	return it
   150  }
   151  
   152  func (p *parser) assertEqual(expected, got itemType) {
   153  	if expected != got {
   154  		p.bug("Expected '%s' but got '%s'.", expected, got)
   155  	}
   156  }
   157  
   158  func (p *parser) topLevel(item item) {
   159  	switch item.typ {
   160  	case itemCommentStart: // # ..
   161  		p.expect(itemText)
   162  	case itemTableStart: // [ .. ]
   163  		name := p.nextPos()
   164  
   165  		var key Key
   166  		for ; name.typ != itemTableEnd && name.typ != itemEOF; name = p.next() {
   167  			key = append(key, p.keyString(name))
   168  		}
   169  		p.assertEqual(itemTableEnd, name.typ)
   170  
   171  		p.addContext(key, false)
   172  		p.setType("", tomlHash, item.pos)
   173  		p.ordered = append(p.ordered, key)
   174  	case itemArrayTableStart: // [[ .. ]]
   175  		name := p.nextPos()
   176  
   177  		var key Key
   178  		for ; name.typ != itemArrayTableEnd && name.typ != itemEOF; name = p.next() {
   179  			key = append(key, p.keyString(name))
   180  		}
   181  		p.assertEqual(itemArrayTableEnd, name.typ)
   182  
   183  		p.addContext(key, true)
   184  		p.setType("", tomlArrayHash, item.pos)
   185  		p.ordered = append(p.ordered, key)
   186  	case itemKeyStart: // key = ..
   187  		outerContext := p.context
   188  		/// Read all the key parts (e.g. 'a' and 'b' in 'a.b')
   189  		k := p.nextPos()
   190  		var key Key
   191  		for ; k.typ != itemKeyEnd && k.typ != itemEOF; k = p.next() {
   192  			key = append(key, p.keyString(k))
   193  		}
   194  		p.assertEqual(itemKeyEnd, k.typ)
   195  
   196  		/// The current key is the last part.
   197  		p.currentKey = key[len(key)-1]
   198  
   199  		/// All the other parts (if any) are the context; need to set each part
   200  		/// as implicit.
   201  		context := key[:len(key)-1]
   202  		for i := range context {
   203  			p.addImplicitContext(append(p.context, context[i:i+1]...))
   204  		}
   205  		p.ordered = append(p.ordered, p.context.add(p.currentKey))
   206  
   207  		/// Set value.
   208  		vItem := p.next()
   209  		val, typ := p.value(vItem, false)
   210  		p.set(p.currentKey, val, typ, vItem.pos)
   211  
   212  		/// Remove the context we added (preserving any context from [tbl] lines).
   213  		p.context = outerContext
   214  		p.currentKey = ""
   215  	default:
   216  		p.bug("Unexpected type at top level: %s", item.typ)
   217  	}
   218  }
   219  
   220  // Gets a string for a key (or part of a key in a table name).
   221  func (p *parser) keyString(it item) string {
   222  	switch it.typ {
   223  	case itemText:
   224  		return it.val
   225  	case itemString, itemMultilineString,
   226  		itemRawString, itemRawMultilineString:
   227  		s, _ := p.value(it, false)
   228  		return s.(string)
   229  	default:
   230  		p.bug("Unexpected key type: %s", it.typ)
   231  	}
   232  	panic("unreachable")
   233  }
   234  
   235  var datetimeRepl = strings.NewReplacer(
   236  	"z", "Z",
   237  	"t", "T",
   238  	" ", "T")
   239  
   240  // value translates an expected value from the lexer into a Go value wrapped
   241  // as an empty interface.
   242  func (p *parser) value(it item, parentIsArray bool) (interface{}, tomlType) {
   243  	switch it.typ {
   244  	case itemString:
   245  		return p.replaceEscapes(it, it.val), p.typeOfPrimitive(it)
   246  	case itemMultilineString:
   247  		return p.replaceEscapes(it, p.stripEscapedNewlines(stripFirstNewline(it.val))), p.typeOfPrimitive(it)
   248  	case itemRawString:
   249  		return it.val, p.typeOfPrimitive(it)
   250  	case itemRawMultilineString:
   251  		return stripFirstNewline(it.val), p.typeOfPrimitive(it)
   252  	case itemInteger:
   253  		return p.valueInteger(it)
   254  	case itemFloat:
   255  		return p.valueFloat(it)
   256  	case itemBool:
   257  		switch it.val {
   258  		case "true":
   259  			return true, p.typeOfPrimitive(it)
   260  		case "false":
   261  			return false, p.typeOfPrimitive(it)
   262  		default:
   263  			p.bug("Expected boolean value, but got '%s'.", it.val)
   264  		}
   265  	case itemDatetime:
   266  		return p.valueDatetime(it)
   267  	case itemArray:
   268  		return p.valueArray(it)
   269  	case itemInlineTableStart:
   270  		return p.valueInlineTable(it, parentIsArray)
   271  	default:
   272  		p.bug("Unexpected value type: %s", it.typ)
   273  	}
   274  	panic("unreachable")
   275  }
   276  
   277  func (p *parser) valueInteger(it item) (interface{}, tomlType) {
   278  	if !numUnderscoresOK(it.val) {
   279  		p.panicItemf(it, "Invalid integer %q: underscores must be surrounded by digits", it.val)
   280  	}
   281  	if numHasLeadingZero(it.val) {
   282  		p.panicItemf(it, "Invalid integer %q: cannot have leading zeroes", it.val)
   283  	}
   284  
   285  	num, err := strconv.ParseInt(it.val, 0, 64)
   286  	if err != nil {
   287  		// Distinguish integer values. Normally, it'd be a bug if the lexer
   288  		// provides an invalid integer, but it's possible that the number is
   289  		// out of range of valid values (which the lexer cannot determine).
   290  		// So mark the former as a bug but the latter as a legitimate user
   291  		// error.
   292  		if e, ok := err.(*strconv.NumError); ok && e.Err == strconv.ErrRange {
   293  			p.panicErr(it, errParseRange{i: it.val, size: "int64"})
   294  		} else {
   295  			p.bug("Expected integer value, but got '%s'.", it.val)
   296  		}
   297  	}
   298  	return num, p.typeOfPrimitive(it)
   299  }
   300  
   301  func (p *parser) valueFloat(it item) (interface{}, tomlType) {
   302  	parts := strings.FieldsFunc(it.val, func(r rune) bool {
   303  		switch r {
   304  		case '.', 'e', 'E':
   305  			return true
   306  		}
   307  		return false
   308  	})
   309  	for _, part := range parts {
   310  		if !numUnderscoresOK(part) {
   311  			p.panicItemf(it, "Invalid float %q: underscores must be surrounded by digits", it.val)
   312  		}
   313  	}
   314  	if len(parts) > 0 && numHasLeadingZero(parts[0]) {
   315  		p.panicItemf(it, "Invalid float %q: cannot have leading zeroes", it.val)
   316  	}
   317  	if !numPeriodsOK(it.val) {
   318  		// As a special case, numbers like '123.' or '1.e2',
   319  		// which are valid as far as Go/strconv are concerned,
   320  		// must be rejected because TOML says that a fractional
   321  		// part consists of '.' followed by 1+ digits.
   322  		p.panicItemf(it, "Invalid float %q: '.' must be followed by one or more digits", it.val)
   323  	}
   324  	val := strings.Replace(it.val, "_", "", -1)
   325  	if val == "+nan" || val == "-nan" { // Go doesn't support this, but TOML spec does.
   326  		val = "nan"
   327  	}
   328  	num, err := strconv.ParseFloat(val, 64)
   329  	if err != nil {
   330  		if e, ok := err.(*strconv.NumError); ok && e.Err == strconv.ErrRange {
   331  			p.panicErr(it, errParseRange{i: it.val, size: "float64"})
   332  		} else {
   333  			p.panicItemf(it, "Invalid float value: %q", it.val)
   334  		}
   335  	}
   336  	return num, p.typeOfPrimitive(it)
   337  }
   338  
   339  var dtTypes = []struct {
   340  	fmt  string
   341  	zone *time.Location
   342  	next bool
   343  }{
   344  	{time.RFC3339Nano, time.Local, false},
   345  	{"2006-01-02T15:04:05.999999999", internal.LocalDatetime, false},
   346  	{"2006-01-02", internal.LocalDate, false},
   347  	{"15:04:05.999999999", internal.LocalTime, false},
   348  
   349  	// tomlNext
   350  	{"2006-01-02T15:04Z07:00", time.Local, true},
   351  	{"2006-01-02T15:04", internal.LocalDatetime, true},
   352  	{"15:04", internal.LocalTime, true},
   353  }
   354  
   355  func (p *parser) valueDatetime(it item) (interface{}, tomlType) {
   356  	it.val = datetimeRepl.Replace(it.val)
   357  	var (
   358  		t   time.Time
   359  		ok  bool
   360  		err error
   361  	)
   362  	for _, dt := range dtTypes {
   363  		if dt.next && !p.tomlNext {
   364  			continue
   365  		}
   366  		t, err = time.ParseInLocation(dt.fmt, it.val, dt.zone)
   367  		if err == nil {
   368  			ok = true
   369  			break
   370  		}
   371  	}
   372  	if !ok {
   373  		p.panicItemf(it, "Invalid TOML Datetime: %q.", it.val)
   374  	}
   375  	return t, p.typeOfPrimitive(it)
   376  }
   377  
   378  func (p *parser) valueArray(it item) (interface{}, tomlType) {
   379  	p.setType(p.currentKey, tomlArray, it.pos)
   380  
   381  	var (
   382  		types []tomlType
   383  
   384  		// Initialize to a non-nil empty slice. This makes it consistent with
   385  		// how S = [] decodes into a non-nil slice inside something like struct
   386  		// { S []string }. See #338
   387  		array = []interface{}{}
   388  	)
   389  	for it = p.next(); it.typ != itemArrayEnd; it = p.next() {
   390  		if it.typ == itemCommentStart {
   391  			p.expect(itemText)
   392  			continue
   393  		}
   394  
   395  		val, typ := p.value(it, true)
   396  		array = append(array, val)
   397  		types = append(types, typ)
   398  
   399  		// XXX: types isn't used here, we need it to record the accurate type
   400  		// information.
   401  		//
   402  		// Not entirely sure how to best store this; could use "key[0]",
   403  		// "key[1]" notation, or maybe store it on the Array type?
   404  		_ = types
   405  	}
   406  	return array, tomlArray
   407  }
   408  
   409  func (p *parser) valueInlineTable(it item, parentIsArray bool) (interface{}, tomlType) {
   410  	var (
   411  		hash         = make(map[string]interface{})
   412  		outerContext = p.context
   413  		outerKey     = p.currentKey
   414  	)
   415  
   416  	p.context = append(p.context, p.currentKey)
   417  	prevContext := p.context
   418  	p.currentKey = ""
   419  
   420  	p.addImplicit(p.context)
   421  	p.addContext(p.context, parentIsArray)
   422  
   423  	/// Loop over all table key/value pairs.
   424  	for it := p.next(); it.typ != itemInlineTableEnd; it = p.next() {
   425  		if it.typ == itemCommentStart {
   426  			p.expect(itemText)
   427  			continue
   428  		}
   429  
   430  		/// Read all key parts.
   431  		k := p.nextPos()
   432  		var key Key
   433  		for ; k.typ != itemKeyEnd && k.typ != itemEOF; k = p.next() {
   434  			key = append(key, p.keyString(k))
   435  		}
   436  		p.assertEqual(itemKeyEnd, k.typ)
   437  
   438  		/// The current key is the last part.
   439  		p.currentKey = key[len(key)-1]
   440  
   441  		/// All the other parts (if any) are the context; need to set each part
   442  		/// as implicit.
   443  		context := key[:len(key)-1]
   444  		for i := range context {
   445  			p.addImplicitContext(append(p.context, context[i:i+1]...))
   446  		}
   447  		p.ordered = append(p.ordered, p.context.add(p.currentKey))
   448  
   449  		/// Set the value.
   450  		val, typ := p.value(p.next(), false)
   451  		p.set(p.currentKey, val, typ, it.pos)
   452  		hash[p.currentKey] = val
   453  
   454  		/// Restore context.
   455  		p.context = prevContext
   456  	}
   457  	p.context = outerContext
   458  	p.currentKey = outerKey
   459  	return hash, tomlHash
   460  }
   461  
   462  // numHasLeadingZero checks if this number has leading zeroes, allowing for '0',
   463  // +/- signs, and base prefixes.
   464  func numHasLeadingZero(s string) bool {
   465  	if len(s) > 1 && s[0] == '0' && !(s[1] == 'b' || s[1] == 'o' || s[1] == 'x') { // Allow 0b, 0o, 0x
   466  		return true
   467  	}
   468  	if len(s) > 2 && (s[0] == '-' || s[0] == '+') && s[1] == '0' {
   469  		return true
   470  	}
   471  	return false
   472  }
   473  
   474  // numUnderscoresOK checks whether each underscore in s is surrounded by
   475  // characters that are not underscores.
   476  func numUnderscoresOK(s string) bool {
   477  	switch s {
   478  	case "nan", "+nan", "-nan", "inf", "-inf", "+inf":
   479  		return true
   480  	}
   481  	accept := false
   482  	for _, r := range s {
   483  		if r == '_' {
   484  			if !accept {
   485  				return false
   486  			}
   487  		}
   488  
   489  		// isHexadecimal is a superset of all the permissable characters
   490  		// surrounding an underscore.
   491  		accept = isHexadecimal(r)
   492  	}
   493  	return accept
   494  }
   495  
   496  // numPeriodsOK checks whether every period in s is followed by a digit.
   497  func numPeriodsOK(s string) bool {
   498  	period := false
   499  	for _, r := range s {
   500  		if period && !isDigit(r) {
   501  			return false
   502  		}
   503  		period = r == '.'
   504  	}
   505  	return !period
   506  }
   507  
   508  // Set the current context of the parser, where the context is either a hash or
   509  // an array of hashes, depending on the value of the `array` parameter.
   510  //
   511  // Establishing the context also makes sure that the key isn't a duplicate, and
   512  // will create implicit hashes automatically.
   513  func (p *parser) addContext(key Key, array bool) {
   514  	var ok bool
   515  
   516  	// Always start at the top level and drill down for our context.
   517  	hashContext := p.mapping
   518  	keyContext := make(Key, 0)
   519  
   520  	// We only need implicit hashes for key[0:-1]
   521  	for _, k := range key[0 : len(key)-1] {
   522  		_, ok = hashContext[k]
   523  		keyContext = append(keyContext, k)
   524  
   525  		// No key? Make an implicit hash and move on.
   526  		if !ok {
   527  			p.addImplicit(keyContext)
   528  			hashContext[k] = make(map[string]interface{})
   529  		}
   530  
   531  		// If the hash context is actually an array of tables, then set
   532  		// the hash context to the last element in that array.
   533  		//
   534  		// Otherwise, it better be a table, since this MUST be a key group (by
   535  		// virtue of it not being the last element in a key).
   536  		switch t := hashContext[k].(type) {
   537  		case []map[string]interface{}:
   538  			hashContext = t[len(t)-1]
   539  		case map[string]interface{}:
   540  			hashContext = t
   541  		default:
   542  			p.panicf("Key '%s' was already created as a hash.", keyContext)
   543  		}
   544  	}
   545  
   546  	p.context = keyContext
   547  	if array {
   548  		// If this is the first element for this array, then allocate a new
   549  		// list of tables for it.
   550  		k := key[len(key)-1]
   551  		if _, ok := hashContext[k]; !ok {
   552  			hashContext[k] = make([]map[string]interface{}, 0, 4)
   553  		}
   554  
   555  		// Add a new table. But make sure the key hasn't already been used
   556  		// for something else.
   557  		if hash, ok := hashContext[k].([]map[string]interface{}); ok {
   558  			hashContext[k] = append(hash, make(map[string]interface{}))
   559  		} else {
   560  			p.panicf("Key '%s' was already created and cannot be used as an array.", key)
   561  		}
   562  	} else {
   563  		p.setValue(key[len(key)-1], make(map[string]interface{}))
   564  	}
   565  	p.context = append(p.context, key[len(key)-1])
   566  }
   567  
   568  // set calls setValue and setType.
   569  func (p *parser) set(key string, val interface{}, typ tomlType, pos Position) {
   570  	p.setValue(key, val)
   571  	p.setType(key, typ, pos)
   572  }
   573  
   574  // setValue sets the given key to the given value in the current context.
   575  // It will make sure that the key hasn't already been defined, account for
   576  // implicit key groups.
   577  func (p *parser) setValue(key string, value interface{}) {
   578  	var (
   579  		tmpHash    interface{}
   580  		ok         bool
   581  		hash       = p.mapping
   582  		keyContext Key
   583  	)
   584  	for _, k := range p.context {
   585  		keyContext = append(keyContext, k)
   586  		if tmpHash, ok = hash[k]; !ok {
   587  			p.bug("Context for key '%s' has not been established.", keyContext)
   588  		}
   589  		switch t := tmpHash.(type) {
   590  		case []map[string]interface{}:
   591  			// The context is a table of hashes. Pick the most recent table
   592  			// defined as the current hash.
   593  			hash = t[len(t)-1]
   594  		case map[string]interface{}:
   595  			hash = t
   596  		default:
   597  			p.panicf("Key '%s' has already been defined.", keyContext)
   598  		}
   599  	}
   600  	keyContext = append(keyContext, key)
   601  
   602  	if _, ok := hash[key]; ok {
   603  		// Normally redefining keys isn't allowed, but the key could have been
   604  		// defined implicitly and it's allowed to be redefined concretely. (See
   605  		// the `valid/implicit-and-explicit-after.toml` in toml-test)
   606  		//
   607  		// But we have to make sure to stop marking it as an implicit. (So that
   608  		// another redefinition provokes an error.)
   609  		//
   610  		// Note that since it has already been defined (as a hash), we don't
   611  		// want to overwrite it. So our business is done.
   612  		if p.isArray(keyContext) {
   613  			p.removeImplicit(keyContext)
   614  			hash[key] = value
   615  			return
   616  		}
   617  		if p.isImplicit(keyContext) {
   618  			p.removeImplicit(keyContext)
   619  			return
   620  		}
   621  
   622  		// Otherwise, we have a concrete key trying to override a previous
   623  		// key, which is *always* wrong.
   624  		p.panicf("Key '%s' has already been defined.", keyContext)
   625  	}
   626  
   627  	hash[key] = value
   628  }
   629  
   630  // setType sets the type of a particular value at a given key. It should be
   631  // called immediately AFTER setValue.
   632  //
   633  // Note that if `key` is empty, then the type given will be applied to the
   634  // current context (which is either a table or an array of tables).
   635  func (p *parser) setType(key string, typ tomlType, pos Position) {
   636  	keyContext := make(Key, 0, len(p.context)+1)
   637  	keyContext = append(keyContext, p.context...)
   638  	if len(key) > 0 { // allow type setting for hashes
   639  		keyContext = append(keyContext, key)
   640  	}
   641  	// Special case to make empty keys ("" = 1) work.
   642  	// Without it it will set "" rather than `""`.
   643  	// TODO: why is this needed? And why is this only needed here?
   644  	if len(keyContext) == 0 {
   645  		keyContext = Key{""}
   646  	}
   647  	p.keyInfo[keyContext.String()] = keyInfo{tomlType: typ, pos: pos}
   648  }
   649  
   650  // Implicit keys need to be created when tables are implied in "a.b.c.d = 1" and
   651  // "[a.b.c]" (the "a", "b", and "c" hashes are never created explicitly).
   652  func (p *parser) addImplicit(key Key)        { p.implicits[key.String()] = struct{}{} }
   653  func (p *parser) removeImplicit(key Key)     { delete(p.implicits, key.String()) }
   654  func (p *parser) isImplicit(key Key) bool    { _, ok := p.implicits[key.String()]; return ok }
   655  func (p *parser) isArray(key Key) bool       { return p.keyInfo[key.String()].tomlType == tomlArray }
   656  func (p *parser) addImplicitContext(key Key) { p.addImplicit(key); p.addContext(key, false) }
   657  
   658  // current returns the full key name of the current context.
   659  func (p *parser) current() string {
   660  	if len(p.currentKey) == 0 {
   661  		return p.context.String()
   662  	}
   663  	if len(p.context) == 0 {
   664  		return p.currentKey
   665  	}
   666  	return fmt.Sprintf("%s.%s", p.context, p.currentKey)
   667  }
   668  
   669  func stripFirstNewline(s string) string {
   670  	if len(s) > 0 && s[0] == '\n' {
   671  		return s[1:]
   672  	}
   673  	if len(s) > 1 && s[0] == '\r' && s[1] == '\n' {
   674  		return s[2:]
   675  	}
   676  	return s
   677  }
   678  
   679  // stripEscapedNewlines removes whitespace after line-ending backslashes in
   680  // multiline strings.
   681  //
   682  // A line-ending backslash is an unescaped \ followed only by whitespace until
   683  // the next newline. After a line-ending backslash, all whitespace is removed
   684  // until the next non-whitespace character.
   685  func (p *parser) stripEscapedNewlines(s string) string {
   686  	var b strings.Builder
   687  	var i int
   688  	for {
   689  		ix := strings.Index(s[i:], `\`)
   690  		if ix < 0 {
   691  			b.WriteString(s)
   692  			return b.String()
   693  		}
   694  		i += ix
   695  
   696  		if len(s) > i+1 && s[i+1] == '\\' {
   697  			// Escaped backslash.
   698  			i += 2
   699  			continue
   700  		}
   701  		// Scan until the next non-whitespace.
   702  		j := i + 1
   703  	whitespaceLoop:
   704  		for ; j < len(s); j++ {
   705  			switch s[j] {
   706  			case ' ', '\t', '\r', '\n':
   707  			default:
   708  				break whitespaceLoop
   709  			}
   710  		}
   711  		if j == i+1 {
   712  			// Not a whitespace escape.
   713  			i++
   714  			continue
   715  		}
   716  		if !strings.Contains(s[i:j], "\n") {
   717  			// This is not a line-ending backslash.
   718  			// (It's a bad escape sequence, but we can let
   719  			// replaceEscapes catch it.)
   720  			i++
   721  			continue
   722  		}
   723  		b.WriteString(s[:i])
   724  		s = s[j:]
   725  		i = 0
   726  	}
   727  }
   728  
   729  func (p *parser) replaceEscapes(it item, str string) string {
   730  	replaced := make([]rune, 0, len(str))
   731  	s := []byte(str)
   732  	r := 0
   733  	for r < len(s) {
   734  		if s[r] != '\\' {
   735  			c, size := utf8.DecodeRune(s[r:])
   736  			r += size
   737  			replaced = append(replaced, c)
   738  			continue
   739  		}
   740  		r += 1
   741  		if r >= len(s) {
   742  			p.bug("Escape sequence at end of string.")
   743  			return ""
   744  		}
   745  		switch s[r] {
   746  		default:
   747  			p.bug("Expected valid escape code after \\, but got %q.", s[r])
   748  		case ' ', '\t':
   749  			p.panicItemf(it, "invalid escape: '\\%c'", s[r])
   750  		case 'b':
   751  			replaced = append(replaced, rune(0x0008))
   752  			r += 1
   753  		case 't':
   754  			replaced = append(replaced, rune(0x0009))
   755  			r += 1
   756  		case 'n':
   757  			replaced = append(replaced, rune(0x000A))
   758  			r += 1
   759  		case 'f':
   760  			replaced = append(replaced, rune(0x000C))
   761  			r += 1
   762  		case 'r':
   763  			replaced = append(replaced, rune(0x000D))
   764  			r += 1
   765  		case 'e':
   766  			if p.tomlNext {
   767  				replaced = append(replaced, rune(0x001B))
   768  				r += 1
   769  			}
   770  		case '"':
   771  			replaced = append(replaced, rune(0x0022))
   772  			r += 1
   773  		case '\\':
   774  			replaced = append(replaced, rune(0x005C))
   775  			r += 1
   776  		case 'x':
   777  			if p.tomlNext {
   778  				escaped := p.asciiEscapeToUnicode(it, s[r+1:r+3])
   779  				replaced = append(replaced, escaped)
   780  				r += 3
   781  			}
   782  		case 'u':
   783  			// At this point, we know we have a Unicode escape of the form
   784  			// `uXXXX` at [r, r+5). (Because the lexer guarantees this
   785  			// for us.)
   786  			escaped := p.asciiEscapeToUnicode(it, s[r+1:r+5])
   787  			replaced = append(replaced, escaped)
   788  			r += 5
   789  		case 'U':
   790  			// At this point, we know we have a Unicode escape of the form
   791  			// `uXXXX` at [r, r+9). (Because the lexer guarantees this
   792  			// for us.)
   793  			escaped := p.asciiEscapeToUnicode(it, s[r+1:r+9])
   794  			replaced = append(replaced, escaped)
   795  			r += 9
   796  		}
   797  	}
   798  	return string(replaced)
   799  }
   800  
   801  func (p *parser) asciiEscapeToUnicode(it item, bs []byte) rune {
   802  	s := string(bs)
   803  	hex, err := strconv.ParseUint(strings.ToLower(s), 16, 32)
   804  	if err != nil {
   805  		p.bug("Could not parse '%s' as a hexadecimal number, but the lexer claims it's OK: %s", s, err)
   806  	}
   807  	if !utf8.ValidRune(rune(hex)) {
   808  		p.panicItemf(it, "Escaped character '\\u%s' is not valid UTF-8.", s)
   809  	}
   810  	return rune(hex)
   811  }
   812
View as plain text