...

Source file src/gopkg.in/ini.v1/parser.go

Documentation: gopkg.in/ini.v1

     1  // Copyright 2015 Unknwon
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License"): you may
     4  // not use this file except in compliance with the License. You may obtain
     5  // a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
    11  // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
    12  // License for the specific language governing permissions and limitations
    13  // under the License.
    14  
    15  package ini
    16  
    17  import (
    18  	"bufio"
    19  	"bytes"
    20  	"fmt"
    21  	"io"
    22  	"regexp"
    23  	"strconv"
    24  	"strings"
    25  	"unicode"
    26  )
    27  
    28  const minReaderBufferSize = 4096
    29  
    30  var pythonMultiline = regexp.MustCompile(`^([\t\f ]+)(.*)`)
    31  
    32  type parserOptions struct {
    33  	IgnoreContinuation          bool
    34  	IgnoreInlineComment         bool
    35  	AllowPythonMultilineValues  bool
    36  	SpaceBeforeInlineComment    bool
    37  	UnescapeValueDoubleQuotes   bool
    38  	UnescapeValueCommentSymbols bool
    39  	PreserveSurroundedQuote     bool
    40  	DebugFunc                   DebugFunc
    41  	ReaderBufferSize            int
    42  }
    43  
    44  type parser struct {
    45  	buf     *bufio.Reader
    46  	options parserOptions
    47  
    48  	isEOF   bool
    49  	count   int
    50  	comment *bytes.Buffer
    51  }
    52  
    53  func (p *parser) debug(format string, args ...interface{}) {
    54  	if p.options.DebugFunc != nil {
    55  		p.options.DebugFunc(fmt.Sprintf(format, args...))
    56  	}
    57  }
    58  
    59  func newParser(r io.Reader, opts parserOptions) *parser {
    60  	size := opts.ReaderBufferSize
    61  	if size < minReaderBufferSize {
    62  		size = minReaderBufferSize
    63  	}
    64  
    65  	return &parser{
    66  		buf:     bufio.NewReaderSize(r, size),
    67  		options: opts,
    68  		count:   1,
    69  		comment: &bytes.Buffer{},
    70  	}
    71  }
    72  
    73  // BOM handles header of UTF-8, UTF-16 LE and UTF-16 BE's BOM format.
    74  // http://en.wikipedia.org/wiki/Byte_order_mark#Representations_of_byte_order_marks_by_encoding
    75  func (p *parser) BOM() error {
    76  	mask, err := p.buf.Peek(2)
    77  	if err != nil && err != io.EOF {
    78  		return err
    79  	} else if len(mask) < 2 {
    80  		return nil
    81  	}
    82  
    83  	switch {
    84  	case mask[0] == 254 && mask[1] == 255:
    85  		fallthrough
    86  	case mask[0] == 255 && mask[1] == 254:
    87  		_, err = p.buf.Read(mask)
    88  		if err != nil {
    89  			return err
    90  		}
    91  	case mask[0] == 239 && mask[1] == 187:
    92  		mask, err := p.buf.Peek(3)
    93  		if err != nil && err != io.EOF {
    94  			return err
    95  		} else if len(mask) < 3 {
    96  			return nil
    97  		}
    98  		if mask[2] == 191 {
    99  			_, err = p.buf.Read(mask)
   100  			if err != nil {
   101  				return err
   102  			}
   103  		}
   104  	}
   105  	return nil
   106  }
   107  
   108  func (p *parser) readUntil(delim byte) ([]byte, error) {
   109  	data, err := p.buf.ReadBytes(delim)
   110  	if err != nil {
   111  		if err == io.EOF {
   112  			p.isEOF = true
   113  		} else {
   114  			return nil, err
   115  		}
   116  	}
   117  	return data, nil
   118  }
   119  
   120  func cleanComment(in []byte) ([]byte, bool) {
   121  	i := bytes.IndexAny(in, "#;")
   122  	if i == -1 {
   123  		return nil, false
   124  	}
   125  	return in[i:], true
   126  }
   127  
   128  func readKeyName(delimiters string, in []byte) (string, int, error) {
   129  	line := string(in)
   130  
   131  	// Check if key name surrounded by quotes.
   132  	var keyQuote string
   133  	if line[0] == '"' {
   134  		if len(line) > 6 && line[0:3] == `"""` {
   135  			keyQuote = `"""`
   136  		} else {
   137  			keyQuote = `"`
   138  		}
   139  	} else if line[0] == '`' {
   140  		keyQuote = "`"
   141  	}
   142  
   143  	// Get out key name
   144  	var endIdx int
   145  	if len(keyQuote) > 0 {
   146  		startIdx := len(keyQuote)
   147  		// FIXME: fail case -> """"""name"""=value
   148  		pos := strings.Index(line[startIdx:], keyQuote)
   149  		if pos == -1 {
   150  			return "", -1, fmt.Errorf("missing closing key quote: %s", line)
   151  		}
   152  		pos += startIdx
   153  
   154  		// Find key-value delimiter
   155  		i := strings.IndexAny(line[pos+startIdx:], delimiters)
   156  		if i < 0 {
   157  			return "", -1, ErrDelimiterNotFound{line}
   158  		}
   159  		endIdx = pos + i
   160  		return strings.TrimSpace(line[startIdx:pos]), endIdx + startIdx + 1, nil
   161  	}
   162  
   163  	endIdx = strings.IndexAny(line, delimiters)
   164  	if endIdx < 0 {
   165  		return "", -1, ErrDelimiterNotFound{line}
   166  	}
   167  	if endIdx == 0 {
   168  		return "", -1, ErrEmptyKeyName{line}
   169  	}
   170  
   171  	return strings.TrimSpace(line[0:endIdx]), endIdx + 1, nil
   172  }
   173  
   174  func (p *parser) readMultilines(line, val, valQuote string) (string, error) {
   175  	for {
   176  		data, err := p.readUntil('\n')
   177  		if err != nil {
   178  			return "", err
   179  		}
   180  		next := string(data)
   181  
   182  		pos := strings.LastIndex(next, valQuote)
   183  		if pos > -1 {
   184  			val += next[:pos]
   185  
   186  			comment, has := cleanComment([]byte(next[pos:]))
   187  			if has {
   188  				p.comment.Write(bytes.TrimSpace(comment))
   189  			}
   190  			break
   191  		}
   192  		val += next
   193  		if p.isEOF {
   194  			return "", fmt.Errorf("missing closing key quote from %q to %q", line, next)
   195  		}
   196  	}
   197  	return val, nil
   198  }
   199  
   200  func (p *parser) readContinuationLines(val string) (string, error) {
   201  	for {
   202  		data, err := p.readUntil('\n')
   203  		if err != nil {
   204  			return "", err
   205  		}
   206  		next := strings.TrimSpace(string(data))
   207  
   208  		if len(next) == 0 {
   209  			break
   210  		}
   211  		val += next
   212  		if val[len(val)-1] != '\\' {
   213  			break
   214  		}
   215  		val = val[:len(val)-1]
   216  	}
   217  	return val, nil
   218  }
   219  
   220  // hasSurroundedQuote check if and only if the first and last characters
   221  // are quotes \" or \'.
   222  // It returns false if any other parts also contain same kind of quotes.
   223  func hasSurroundedQuote(in string, quote byte) bool {
   224  	return len(in) >= 2 && in[0] == quote && in[len(in)-1] == quote &&
   225  		strings.IndexByte(in[1:], quote) == len(in)-2
   226  }
   227  
   228  func (p *parser) readValue(in []byte, bufferSize int) (string, error) {
   229  
   230  	line := strings.TrimLeftFunc(string(in), unicode.IsSpace)
   231  	if len(line) == 0 {
   232  		if p.options.AllowPythonMultilineValues && len(in) > 0 && in[len(in)-1] == '\n' {
   233  			return p.readPythonMultilines(line, bufferSize)
   234  		}
   235  		return "", nil
   236  	}
   237  
   238  	var valQuote string
   239  	if len(line) > 3 && line[0:3] == `"""` {
   240  		valQuote = `"""`
   241  	} else if line[0] == '`' {
   242  		valQuote = "`"
   243  	} else if p.options.UnescapeValueDoubleQuotes && line[0] == '"' {
   244  		valQuote = `"`
   245  	}
   246  
   247  	if len(valQuote) > 0 {
   248  		startIdx := len(valQuote)
   249  		pos := strings.LastIndex(line[startIdx:], valQuote)
   250  		// Check for multi-line value
   251  		if pos == -1 {
   252  			return p.readMultilines(line, line[startIdx:], valQuote)
   253  		}
   254  
   255  		if p.options.UnescapeValueDoubleQuotes && valQuote == `"` {
   256  			return strings.Replace(line[startIdx:pos+startIdx], `\"`, `"`, -1), nil
   257  		}
   258  		return line[startIdx : pos+startIdx], nil
   259  	}
   260  
   261  	lastChar := line[len(line)-1]
   262  	// Won't be able to reach here if value only contains whitespace
   263  	line = strings.TrimSpace(line)
   264  	trimmedLastChar := line[len(line)-1]
   265  
   266  	// Check continuation lines when desired
   267  	if !p.options.IgnoreContinuation && trimmedLastChar == '\\' {
   268  		return p.readContinuationLines(line[:len(line)-1])
   269  	}
   270  
   271  	// Check if ignore inline comment
   272  	if !p.options.IgnoreInlineComment {
   273  		var i int
   274  		if p.options.SpaceBeforeInlineComment {
   275  			i = strings.Index(line, " #")
   276  			if i == -1 {
   277  				i = strings.Index(line, " ;")
   278  			}
   279  
   280  		} else {
   281  			i = strings.IndexAny(line, "#;")
   282  		}
   283  
   284  		if i > -1 {
   285  			p.comment.WriteString(line[i:])
   286  			line = strings.TrimSpace(line[:i])
   287  		}
   288  
   289  	}
   290  
   291  	// Trim single and double quotes
   292  	if (hasSurroundedQuote(line, '\'') ||
   293  		hasSurroundedQuote(line, '"')) && !p.options.PreserveSurroundedQuote {
   294  		line = line[1 : len(line)-1]
   295  	} else if len(valQuote) == 0 && p.options.UnescapeValueCommentSymbols {
   296  		line = strings.ReplaceAll(line, `\;`, ";")
   297  		line = strings.ReplaceAll(line, `\#`, "#")
   298  	} else if p.options.AllowPythonMultilineValues && lastChar == '\n' {
   299  		return p.readPythonMultilines(line, bufferSize)
   300  	}
   301  
   302  	return line, nil
   303  }
   304  
   305  func (p *parser) readPythonMultilines(line string, bufferSize int) (string, error) {
   306  	parserBufferPeekResult, _ := p.buf.Peek(bufferSize)
   307  	peekBuffer := bytes.NewBuffer(parserBufferPeekResult)
   308  
   309  	for {
   310  		peekData, peekErr := peekBuffer.ReadBytes('\n')
   311  		if peekErr != nil && peekErr != io.EOF {
   312  			p.debug("readPythonMultilines: failed to peek with error: %v", peekErr)
   313  			return "", peekErr
   314  		}
   315  
   316  		p.debug("readPythonMultilines: parsing %q", string(peekData))
   317  
   318  		peekMatches := pythonMultiline.FindStringSubmatch(string(peekData))
   319  		p.debug("readPythonMultilines: matched %d parts", len(peekMatches))
   320  		for n, v := range peekMatches {
   321  			p.debug("   %d: %q", n, v)
   322  		}
   323  
   324  		// Return if not a Python multiline value.
   325  		if len(peekMatches) != 3 {
   326  			p.debug("readPythonMultilines: end of value, got: %q", line)
   327  			return line, nil
   328  		}
   329  
   330  		// Advance the parser reader (buffer) in-sync with the peek buffer.
   331  		_, err := p.buf.Discard(len(peekData))
   332  		if err != nil {
   333  			p.debug("readPythonMultilines: failed to skip to the end, returning error")
   334  			return "", err
   335  		}
   336  
   337  		line += "\n" + peekMatches[0]
   338  	}
   339  }
   340  
   341  // parse parses data through an io.Reader.
   342  func (f *File) parse(reader io.Reader) (err error) {
   343  	p := newParser(reader, parserOptions{
   344  		IgnoreContinuation:          f.options.IgnoreContinuation,
   345  		IgnoreInlineComment:         f.options.IgnoreInlineComment,
   346  		AllowPythonMultilineValues:  f.options.AllowPythonMultilineValues,
   347  		SpaceBeforeInlineComment:    f.options.SpaceBeforeInlineComment,
   348  		UnescapeValueDoubleQuotes:   f.options.UnescapeValueDoubleQuotes,
   349  		UnescapeValueCommentSymbols: f.options.UnescapeValueCommentSymbols,
   350  		PreserveSurroundedQuote:     f.options.PreserveSurroundedQuote,
   351  		DebugFunc:                   f.options.DebugFunc,
   352  		ReaderBufferSize:            f.options.ReaderBufferSize,
   353  	})
   354  	if err = p.BOM(); err != nil {
   355  		return fmt.Errorf("BOM: %v", err)
   356  	}
   357  
   358  	// Ignore error because default section name is never empty string.
   359  	name := DefaultSection
   360  	if f.options.Insensitive || f.options.InsensitiveSections {
   361  		name = strings.ToLower(DefaultSection)
   362  	}
   363  	section, _ := f.NewSection(name)
   364  
   365  	// This "last" is not strictly equivalent to "previous one" if current key is not the first nested key
   366  	var isLastValueEmpty bool
   367  	var lastRegularKey *Key
   368  
   369  	var line []byte
   370  	var inUnparseableSection bool
   371  
   372  	// NOTE: Iterate and increase `currentPeekSize` until
   373  	// the size of the parser buffer is found.
   374  	// TODO(unknwon): When Golang 1.10 is the lowest version supported, replace with `parserBufferSize := p.buf.Size()`.
   375  	parserBufferSize := 0
   376  	// NOTE: Peek 4kb at a time.
   377  	currentPeekSize := minReaderBufferSize
   378  
   379  	if f.options.AllowPythonMultilineValues {
   380  		for {
   381  			peekBytes, _ := p.buf.Peek(currentPeekSize)
   382  			peekBytesLength := len(peekBytes)
   383  
   384  			if parserBufferSize >= peekBytesLength {
   385  				break
   386  			}
   387  
   388  			currentPeekSize *= 2
   389  			parserBufferSize = peekBytesLength
   390  		}
   391  	}
   392  
   393  	for !p.isEOF {
   394  		line, err = p.readUntil('\n')
   395  		if err != nil {
   396  			return err
   397  		}
   398  
   399  		if f.options.AllowNestedValues &&
   400  			isLastValueEmpty && len(line) > 0 {
   401  			if line[0] == ' ' || line[0] == '\t' {
   402  				err = lastRegularKey.addNestedValue(string(bytes.TrimSpace(line)))
   403  				if err != nil {
   404  					return err
   405  				}
   406  				continue
   407  			}
   408  		}
   409  
   410  		line = bytes.TrimLeftFunc(line, unicode.IsSpace)
   411  		if len(line) == 0 {
   412  			continue
   413  		}
   414  
   415  		// Comments
   416  		if line[0] == '#' || line[0] == ';' {
   417  			// Note: we do not care ending line break,
   418  			// it is needed for adding second line,
   419  			// so just clean it once at the end when set to value.
   420  			p.comment.Write(line)
   421  			continue
   422  		}
   423  
   424  		// Section
   425  		if line[0] == '[' {
   426  			// Read to the next ']' (TODO: support quoted strings)
   427  			closeIdx := bytes.LastIndexByte(line, ']')
   428  			if closeIdx == -1 {
   429  				return fmt.Errorf("unclosed section: %s", line)
   430  			}
   431  
   432  			name := string(line[1:closeIdx])
   433  			section, err = f.NewSection(name)
   434  			if err != nil {
   435  				return err
   436  			}
   437  
   438  			comment, has := cleanComment(line[closeIdx+1:])
   439  			if has {
   440  				p.comment.Write(comment)
   441  			}
   442  
   443  			section.Comment = strings.TrimSpace(p.comment.String())
   444  
   445  			// Reset auto-counter and comments
   446  			p.comment.Reset()
   447  			p.count = 1
   448  			// Nested values can't span sections
   449  			isLastValueEmpty = false
   450  
   451  			inUnparseableSection = false
   452  			for i := range f.options.UnparseableSections {
   453  				if f.options.UnparseableSections[i] == name ||
   454  					((f.options.Insensitive || f.options.InsensitiveSections) && strings.EqualFold(f.options.UnparseableSections[i], name)) {
   455  					inUnparseableSection = true
   456  					continue
   457  				}
   458  			}
   459  			continue
   460  		}
   461  
   462  		if inUnparseableSection {
   463  			section.isRawSection = true
   464  			section.rawBody += string(line)
   465  			continue
   466  		}
   467  
   468  		kname, offset, err := readKeyName(f.options.KeyValueDelimiters, line)
   469  		if err != nil {
   470  			switch {
   471  			// Treat as boolean key when desired, and whole line is key name.
   472  			case IsErrDelimiterNotFound(err):
   473  				switch {
   474  				case f.options.AllowBooleanKeys:
   475  					kname, err := p.readValue(line, parserBufferSize)
   476  					if err != nil {
   477  						return err
   478  					}
   479  					key, err := section.NewBooleanKey(kname)
   480  					if err != nil {
   481  						return err
   482  					}
   483  					key.Comment = strings.TrimSpace(p.comment.String())
   484  					p.comment.Reset()
   485  					continue
   486  
   487  				case f.options.SkipUnrecognizableLines:
   488  					continue
   489  				}
   490  			case IsErrEmptyKeyName(err) && f.options.SkipUnrecognizableLines:
   491  				continue
   492  			}
   493  			return err
   494  		}
   495  
   496  		// Auto increment.
   497  		isAutoIncr := false
   498  		if kname == "-" {
   499  			isAutoIncr = true
   500  			kname = "#" + strconv.Itoa(p.count)
   501  			p.count++
   502  		}
   503  
   504  		value, err := p.readValue(line[offset:], parserBufferSize)
   505  		if err != nil {
   506  			return err
   507  		}
   508  		isLastValueEmpty = len(value) == 0
   509  
   510  		key, err := section.NewKey(kname, value)
   511  		if err != nil {
   512  			return err
   513  		}
   514  		key.isAutoIncrement = isAutoIncr
   515  		key.Comment = strings.TrimSpace(p.comment.String())
   516  		p.comment.Reset()
   517  		lastRegularKey = key
   518  	}
   519  	return nil
   520  }
   521  

View as plain text