...

Source file src/edge-infra.dev/pkg/lib/ini/parser.go

Documentation: edge-infra.dev/pkg/lib/ini

     1  package ini
     2  
     3  import (
     4  	"bufio"
     5  	"bytes"
     6  	"fmt"
     7  	"io"
     8  	"regexp"
     9  	"strconv"
    10  	"strings"
    11  	"unicode"
    12  )
    13  
    14  const minReaderBufferSize = 4096
    15  
    16  var pythonMultiline = regexp.MustCompile(`^([\t\f ]+)(.*)`)
    17  
    18  type parserOptions struct {
    19  	IgnoreContinuation          bool
    20  	IgnoreInlineComment         bool
    21  	AllowPythonMultilineValues  bool
    22  	SpaceBeforeInlineComment    bool
    23  	UnescapeValueDoubleQuotes   bool
    24  	UnescapeValueCommentSymbols bool
    25  	PreserveSurroundedQuote     bool
    26  	DebugFunc                   DebugFunc
    27  	ReaderBufferSize            int
    28  }
    29  
    30  type parser struct {
    31  	buf     *bufio.Reader
    32  	options parserOptions
    33  
    34  	isEOF   bool
    35  	count   int
    36  	comment *bytes.Buffer
    37  }
    38  
    39  func (p *parser) debug(format string, args ...interface{}) {
    40  	if p.options.DebugFunc != nil {
    41  		p.options.DebugFunc(fmt.Sprintf(format, args...))
    42  	}
    43  }
    44  
    45  func newParser(r io.Reader, opts parserOptions) *parser {
    46  	size := opts.ReaderBufferSize
    47  	if size < minReaderBufferSize {
    48  		size = minReaderBufferSize
    49  	}
    50  
    51  	return &parser{
    52  		buf:     bufio.NewReaderSize(r, size),
    53  		options: opts,
    54  		count:   1,
    55  		comment: &bytes.Buffer{},
    56  	}
    57  }
    58  
    59  // BOM handles header of UTF-8, UTF-16 LE and UTF-16 BE's BOM format.
    60  // http://en.wikipedia.org/wiki/Byte_order_mark#Representations_of_byte_order_marks_by_encoding
    61  func (p *parser) BOM() error {
    62  	mask, err := p.buf.Peek(2)
    63  	if err != nil && err != io.EOF {
    64  		return err
    65  	} else if len(mask) < 2 {
    66  		return nil
    67  	}
    68  
    69  	switch {
    70  	case mask[0] == 254 && mask[1] == 255:
    71  		fallthrough
    72  	case mask[0] == 255 && mask[1] == 254:
    73  		_, err = p.buf.Read(mask)
    74  		if err != nil {
    75  			return err
    76  		}
    77  	case mask[0] == 239 && mask[1] == 187:
    78  		mask, err := p.buf.Peek(3)
    79  		if err != nil && err != io.EOF {
    80  			return err
    81  		} else if len(mask) < 3 {
    82  			return nil
    83  		}
    84  		if mask[2] == 191 {
    85  			_, err = p.buf.Read(mask)
    86  			if err != nil {
    87  				return err
    88  			}
    89  		}
    90  	}
    91  	return nil
    92  }
    93  
    94  func (p *parser) readUntil(delim byte) ([]byte, error) {
    95  	data, err := p.buf.ReadBytes(delim)
    96  	if err != nil {
    97  		if err == io.EOF {
    98  			p.isEOF = true
    99  		} else {
   100  			return nil, err
   101  		}
   102  	}
   103  	return data, nil
   104  }
   105  
   106  func cleanComment(in []byte) ([]byte, bool) {
   107  	i := bytes.IndexAny(in, "#;")
   108  	if i == -1 {
   109  		return nil, false
   110  	}
   111  	return in[i:], true
   112  }
   113  
   114  func readKeyName(delimiters string, in []byte) (string, int, error) {
   115  	line := string(in)
   116  
   117  	// Check if key name surrounded by quotes.
   118  	var keyQuote string
   119  	if line[0] == '"' {
   120  		if len(line) > 6 && line[0:3] == `"""` {
   121  			keyQuote = `"""`
   122  		} else {
   123  			keyQuote = `"`
   124  		}
   125  	} else if line[0] == '`' {
   126  		keyQuote = "`"
   127  	}
   128  
   129  	// Get out key name
   130  	var endIdx int
   131  	if len(keyQuote) > 0 {
   132  		startIdx := len(keyQuote)
   133  		// FIXME: fail case -> """"""name"""=value
   134  		pos := strings.Index(line[startIdx:], keyQuote)
   135  		if pos == -1 {
   136  			return "", -1, fmt.Errorf("missing closing key quote: %s", line)
   137  		}
   138  		pos += startIdx
   139  
   140  		// Find key-value delimiter
   141  		i := strings.IndexAny(line[pos+startIdx:], delimiters)
   142  		if i < 0 {
   143  			return "", -1, ErrDelimiterNotFound{line}
   144  		}
   145  		endIdx = pos + i
   146  		return strings.TrimSpace(line[startIdx:pos]), endIdx + startIdx + 1, nil
   147  	}
   148  
   149  	endIdx = strings.IndexAny(line, delimiters)
   150  	if endIdx < 0 {
   151  		return "", -1, ErrDelimiterNotFound{line}
   152  	}
   153  	if endIdx == 0 {
   154  		return "", -1, ErrEmptyKeyName{line}
   155  	}
   156  
   157  	return strings.TrimSpace(line[0:endIdx]), endIdx + 1, nil
   158  }
   159  
   160  func (p *parser) readMultilines(line, val, valQuote string) (string, error) {
   161  	for {
   162  		data, err := p.readUntil('\n')
   163  		if err != nil {
   164  			return "", err
   165  		}
   166  		next := string(data)
   167  
   168  		pos := strings.LastIndex(next, valQuote)
   169  		if pos > -1 {
   170  			val += next[:pos]
   171  
   172  			comment, has := cleanComment([]byte(next[pos:]))
   173  			if has {
   174  				p.comment.Write(bytes.TrimSpace(comment))
   175  			}
   176  			break
   177  		}
   178  		val += next
   179  		if p.isEOF {
   180  			return "", fmt.Errorf("missing closing key quote from %q to %q", line, next)
   181  		}
   182  	}
   183  	return val, nil
   184  }
   185  
   186  func (p *parser) readContinuationLines(val string) (string, error) {
   187  	for {
   188  		data, err := p.readUntil('\n')
   189  		if err != nil {
   190  			return "", err
   191  		}
   192  		next := strings.TrimSpace(string(data))
   193  
   194  		if len(next) == 0 {
   195  			break
   196  		}
   197  		val += next
   198  		if val[len(val)-1] != '\\' {
   199  			break
   200  		}
   201  		val = val[:len(val)-1]
   202  	}
   203  	return val, nil
   204  }
   205  
   206  // hasSurroundedQuote check if and only if the first and last characters
   207  // are quotes \" or \'.
   208  // It returns false if any other parts also contain same kind of quotes.
   209  func hasSurroundedQuote(in string, quote byte) bool {
   210  	return len(in) >= 2 && in[0] == quote && in[len(in)-1] == quote &&
   211  		strings.IndexByte(in[1:], quote) == len(in)-2
   212  }
   213  
   214  func (p *parser) readValue(in []byte, bufferSize int) (string, error) {
   215  
   216  	line := strings.TrimLeftFunc(string(in), unicode.IsSpace)
   217  	if len(line) == 0 {
   218  		if p.options.AllowPythonMultilineValues && len(in) > 0 && in[len(in)-1] == '\n' {
   219  			return p.readPythonMultilines(line, bufferSize)
   220  		}
   221  		return "", nil
   222  	}
   223  
   224  	var valQuote string
   225  	if len(line) > 3 && line[0:3] == `"""` {
   226  		valQuote = `"""`
   227  	} else if line[0] == '`' {
   228  		valQuote = "`"
   229  	} else if p.options.UnescapeValueDoubleQuotes && line[0] == '"' {
   230  		valQuote = `"`
   231  	}
   232  
   233  	if len(valQuote) > 0 {
   234  		startIdx := len(valQuote)
   235  		pos := strings.LastIndex(line[startIdx:], valQuote)
   236  		// Check for multi-line value
   237  		if pos == -1 {
   238  			return p.readMultilines(line, line[startIdx:], valQuote)
   239  		}
   240  
   241  		if p.options.UnescapeValueDoubleQuotes && valQuote == `"` {
   242  			return strings.Replace(line[startIdx:pos+startIdx], `\"`, `"`, -1), nil
   243  		}
   244  		return line[startIdx : pos+startIdx], nil
   245  	}
   246  
   247  	lastChar := line[len(line)-1]
   248  	// Won't be able to reach here if value only contains whitespace
   249  	line = strings.TrimSpace(line)
   250  	trimmedLastChar := line[len(line)-1]
   251  
   252  	// Check continuation lines when desired
   253  	if !p.options.IgnoreContinuation && trimmedLastChar == '\\' {
   254  		return p.readContinuationLines(line[:len(line)-1])
   255  	}
   256  
   257  	// Check if ignore inline comment
   258  	if !p.options.IgnoreInlineComment {
   259  		var i int
   260  		if p.options.SpaceBeforeInlineComment {
   261  			i = strings.Index(line, " #")
   262  			if i == -1 {
   263  				i = strings.Index(line, " ;")
   264  			}
   265  
   266  		} else {
   267  			i = strings.IndexAny(line, "#;")
   268  		}
   269  
   270  		if i > -1 {
   271  			p.comment.WriteString(line[i:])
   272  			line = strings.TrimSpace(line[:i])
   273  		}
   274  
   275  	}
   276  
   277  	// Trim single and double quotes
   278  	if (hasSurroundedQuote(line, '\'') ||
   279  		hasSurroundedQuote(line, '"')) && !p.options.PreserveSurroundedQuote {
   280  		line = line[1 : len(line)-1]
   281  	} else if len(valQuote) == 0 && p.options.UnescapeValueCommentSymbols {
   282  		line = strings.ReplaceAll(line, `\;`, ";")
   283  		line = strings.ReplaceAll(line, `\#`, "#")
   284  	} else if p.options.AllowPythonMultilineValues && lastChar == '\n' {
   285  		return p.readPythonMultilines(line, bufferSize)
   286  	}
   287  
   288  	return line, nil
   289  }
   290  
   291  func (p *parser) readPythonMultilines(line string, bufferSize int) (string, error) {
   292  	parserBufferPeekResult, _ := p.buf.Peek(bufferSize)
   293  	peekBuffer := bytes.NewBuffer(parserBufferPeekResult)
   294  
   295  	for {
   296  		peekData, peekErr := peekBuffer.ReadBytes('\n')
   297  		if peekErr != nil && peekErr != io.EOF {
   298  			p.debug("readPythonMultilines: failed to peek with error: %v", peekErr)
   299  			return "", peekErr
   300  		}
   301  
   302  		p.debug("readPythonMultilines: parsing %q", string(peekData))
   303  
   304  		peekMatches := pythonMultiline.FindStringSubmatch(string(peekData))
   305  		p.debug("readPythonMultilines: matched %d parts", len(peekMatches))
   306  		for n, v := range peekMatches {
   307  			p.debug("   %d: %q", n, v)
   308  		}
   309  
   310  		// Return if not a Python multiline value.
   311  		if len(peekMatches) != 3 {
   312  			p.debug("readPythonMultilines: end of value, got: %q", line)
   313  			return line, nil
   314  		}
   315  
   316  		// Advance the parser reader (buffer) in-sync with the peek buffer.
   317  		_, err := p.buf.Discard(len(peekData))
   318  		if err != nil {
   319  			p.debug("readPythonMultilines: failed to skip to the end, returning error")
   320  			return "", err
   321  		}
   322  
   323  		line += "\n" + peekMatches[0]
   324  	}
   325  }
   326  
   327  // parse parses data through an io.Reader.
   328  func (f *File) parse(reader io.Reader) (err error) {
   329  	p := newParser(reader, parserOptions{
   330  		IgnoreContinuation:          f.options.IgnoreContinuation,
   331  		IgnoreInlineComment:         f.options.IgnoreInlineComment,
   332  		AllowPythonMultilineValues:  f.options.AllowPythonMultilineValues,
   333  		SpaceBeforeInlineComment:    f.options.SpaceBeforeInlineComment,
   334  		UnescapeValueDoubleQuotes:   f.options.UnescapeValueDoubleQuotes,
   335  		UnescapeValueCommentSymbols: f.options.UnescapeValueCommentSymbols,
   336  		PreserveSurroundedQuote:     f.options.PreserveSurroundedQuote,
   337  		DebugFunc:                   f.options.DebugFunc,
   338  		ReaderBufferSize:            f.options.ReaderBufferSize,
   339  	})
   340  	if err = p.BOM(); err != nil {
   341  		return fmt.Errorf("BOM: %v", err)
   342  	}
   343  
   344  	// Ignore error because default section name is never empty string.
   345  	name := DefaultSection
   346  	if f.options.Insensitive || f.options.InsensitiveSections {
   347  		name = strings.ToLower(DefaultSection)
   348  	}
   349  	section, _ := f.NewSection(name)
   350  
   351  	// This "last" is not strictly equivalent to "previous one" if current key is not the first nested key
   352  	var isLastValueEmpty bool
   353  	var lastRegularKey *Key
   354  
   355  	var line []byte
   356  	var inUnparseableSection bool
   357  
   358  	// NOTE: Iterate and increase `currentPeekSize` until
   359  	// the size of the parser buffer is found.
   360  	// TODO(unknwon): When Golang 1.10 is the lowest version supported, replace with `parserBufferSize := p.buf.Size()`.
   361  	parserBufferSize := 0
   362  	// NOTE: Peek 4kb at a time.
   363  	currentPeekSize := minReaderBufferSize
   364  
   365  	if f.options.AllowPythonMultilineValues {
   366  		for {
   367  			peekBytes, _ := p.buf.Peek(currentPeekSize)
   368  			peekBytesLength := len(peekBytes)
   369  
   370  			if parserBufferSize >= peekBytesLength {
   371  				break
   372  			}
   373  
   374  			currentPeekSize *= 2
   375  			parserBufferSize = peekBytesLength
   376  		}
   377  	}
   378  
   379  	for !p.isEOF {
   380  		line, err = p.readUntil('\n')
   381  		if err != nil {
   382  			return err
   383  		}
   384  
   385  		if f.options.AllowNestedValues &&
   386  			isLastValueEmpty && len(line) > 0 {
   387  			if line[0] == ' ' || line[0] == '\t' {
   388  				err = lastRegularKey.addNestedValue(string(bytes.TrimSpace(line)))
   389  				if err != nil {
   390  					return err
   391  				}
   392  				continue
   393  			}
   394  		}
   395  
   396  		line = bytes.TrimLeftFunc(line, unicode.IsSpace)
   397  		if len(line) == 0 {
   398  			continue
   399  		}
   400  
   401  		// Comments
   402  		if line[0] == '#' || line[0] == ';' {
   403  			// Note: we do not care ending line break,
   404  			// it is needed for adding second line,
   405  			// so just clean it once at the end when set to value.
   406  			p.comment.Write(line)
   407  			continue
   408  		}
   409  
   410  		// Section
   411  		if line[0] == '[' {
   412  			// Read to the next ']' (TODO: support quoted strings)
   413  			closeIdx := bytes.LastIndexByte(line, ']')
   414  			if closeIdx == -1 {
   415  				return fmt.Errorf("unclosed section: %s", line)
   416  			}
   417  
   418  			name := string(line[1:closeIdx])
   419  			section, err = f.NewSection(name)
   420  			if err != nil {
   421  				return err
   422  			}
   423  
   424  			comment, has := cleanComment(line[closeIdx+1:])
   425  			if has {
   426  				p.comment.Write(comment)
   427  			}
   428  
   429  			section.Comment = strings.TrimSpace(p.comment.String())
   430  
   431  			// Reset auto-counter and comments
   432  			p.comment.Reset()
   433  			p.count = 1
   434  			// Nested values can't span sections
   435  			isLastValueEmpty = false
   436  
   437  			inUnparseableSection = false
   438  			for i := range f.options.UnparseableSections {
   439  				if f.options.UnparseableSections[i] == name ||
   440  					((f.options.Insensitive || f.options.InsensitiveSections) && strings.EqualFold(f.options.UnparseableSections[i], name)) {
   441  					inUnparseableSection = true
   442  					continue
   443  				}
   444  			}
   445  			continue
   446  		}
   447  
   448  		if inUnparseableSection {
   449  			section.isRawSection = true
   450  			section.rawBody += string(line)
   451  			continue
   452  		}
   453  
   454  		kname, offset, err := readKeyName(f.options.KeyValueDelimiters, line)
   455  		if err != nil {
   456  			switch {
   457  			// Treat as boolean key when desired, and whole line is key name.
   458  			case IsErrDelimiterNotFound(err):
   459  				switch {
   460  				case f.options.AllowBooleanKeys:
   461  					kname, err := p.readValue(line, parserBufferSize)
   462  					if err != nil {
   463  						return err
   464  					}
   465  					key, err := section.NewBooleanKey(kname)
   466  					if err != nil {
   467  						return err
   468  					}
   469  					key.Comment = strings.TrimSpace(p.comment.String())
   470  					p.comment.Reset()
   471  					continue
   472  
   473  				case f.options.SkipUnrecognizableLines:
   474  					continue
   475  				}
   476  			case IsErrEmptyKeyName(err) && f.options.SkipUnrecognizableLines:
   477  				continue
   478  			}
   479  			return err
   480  		}
   481  
   482  		// Auto increment.
   483  		isAutoIncr := false
   484  		if kname == "-" {
   485  			isAutoIncr = true
   486  			kname = "#" + strconv.Itoa(p.count)
   487  			p.count++
   488  		}
   489  
   490  		value, err := p.readValue(line[offset:], parserBufferSize)
   491  		if err != nil {
   492  			return err
   493  		}
   494  		isLastValueEmpty = len(value) == 0
   495  
   496  		key, err := section.NewKey(kname, value)
   497  		if err != nil {
   498  			return err
   499  		}
   500  		key.isAutoIncrement = isAutoIncr
   501  		key.Comment = strings.TrimSpace(p.comment.String())
   502  		p.comment.Reset()
   503  		lastRegularKey = key
   504  	}
   505  	return nil
   506  }
   507  

View as plain text