parser.go

Documentation: github.com/protocolbuffers/txtpbfmt/parser

     1  // Package parser edits text proto files, applies standard formatting
     2  // and preserves comments.
     3  // See also: https://github.com/golang/protobuf/blob/master/proto/text_parser.go
     4  //
     5  // To disable a specific file from getting formatted, add '# txtpbfmt: disable'
     6  // at the top of the file.
     7  package parser
     8  
     9  import (
    10  	"bufio"
    11  	"bytes"
    12  	"fmt"
    13  	"math"
    14  	"regexp"
    15  	"strconv"
    16  	"strings"
    17  
    18  	"github.com/mitchellh/go-wordwrap"
    19  	"github.com/protocolbuffers/txtpbfmt/ast"
    20  	"github.com/protocolbuffers/txtpbfmt/unquote"
    21  )
    22  
    23  // Config can be used to pass additional config parameters to the formatter at
    24  // the time of the API call.
    25  type Config struct {
    26  	// Do not apply any reformatting to this file.
    27  	Disable bool
    28  
    29  	// Expand all children irrespective of the initial state.
    30  	ExpandAllChildren bool
    31  
    32  	// Skip colons whenever possible.
    33  	SkipAllColons bool
    34  
    35  	// Allow unnamed nodes everywhere.
    36  	// Default is to allow only top-level nodes to be unnamed.
    37  	AllowUnnamedNodesEverywhere bool
    38  
    39  	// Sort fields by field name.
    40  	SortFieldsByFieldName bool
    41  
    42  	// Sort adjacent scalar fields of the same field name by their contents.
    43  	SortRepeatedFieldsByContent bool
    44  
    45  	// Sort adjacent message fields of the given field name by the contents of the given subfield.
    46  	// Format: either "field_name.subfield_name" or just "subfield_name" (applies to all field names).
    47  	SortRepeatedFieldsBySubfield []string
    48  
    49  	// Map from Node.Name to the order of all fields within that node. See AddFieldSortOrder().
    50  	fieldSortOrder map[string][]string
    51  
    52  	// RequireFieldSortOrderToMatchAllFieldsInNode will cause parsing to fail if a node was added via
    53  	// AddFieldSortOrder() but 1+ fields under that node in the textproto aren't specified in the
    54  	// field order. This won't fail for nodes that don't have a field order specified at all. Use this
    55  	// to strictly enforce that your field order config always orders ALL the fields, and you're
    56  	// willing for new fields in the textproto to break parsing in order to enforce it.
    57  	RequireFieldSortOrderToMatchAllFieldsInNode bool
    58  
    59  	// Remove lines that have the same field name and scalar value as another.
    60  	RemoveDuplicateValuesForRepeatedFields bool
    61  
    62  	// Permit usage of Python-style """ or ''' delimited strings.
    63  	AllowTripleQuotedStrings bool
    64  
    65  	// Max columns for string field values. If zero, no string wrapping will occur.
    66  	// Strings that may contain HTML tags will never be wrapped.
    67  	WrapStringsAtColumn int
    68  
    69  	// Whether strings that appear to contain HTML tags should be wrapped
    70  	// (requires WrapStringsAtColumn to be set).
    71  	WrapHTMLStrings bool
    72  
    73  	// Wrap string field values after each newline.
    74  	// Should not be used with other Wrap* options.
    75  	WrapStringsAfterNewlines bool
    76  
    77  	// Whether angle brackets used instead of curly braces should be preserved
    78  	// when outputting a formatted textproto.
    79  	PreserveAngleBrackets bool
    80  
    81  	// Use single quotes around strings that contain double but not single quotes.
    82  	SmartQuotes bool
    83  
    84  	// Logger enables logging when it is non-nil.
    85  	// If the log messages aren't going to be useful, it's best to leave Logger
    86  	// set to nil, as otherwise log messages will be constructed.
    87  	Logger Logger
    88  }
    89  
    90  func (c *Config) infof(format string, args ...any) {
    91  	if c.Logger != nil {
    92  		c.Logger.Infof(format, args...)
    93  	}
    94  }
    95  func (c *Config) infoLevel() bool {
    96  	return c.Logger != nil
    97  }
    98  
    99  // Logger is a small glog-like interface.
   100  type Logger interface {
   101  	// Infof is used for informative messages, for testing or debugging.
   102  	Infof(format string, args ...any)
   103  }
   104  
   105  // RootName contains a constant that can be used to identify the root of all Nodes.
   106  const RootName = "__ROOT__"
   107  
   108  // AddFieldSortOrder adds a config rule for the given Node.Name, so that all contained field names
   109  // are output in the provided order. To specify an order for top-level Nodes, use RootName as the
   110  // nodeName.
   111  func (c *Config) AddFieldSortOrder(nodeName string, fieldOrder ...string) {
   112  	if c.fieldSortOrder == nil {
   113  		c.fieldSortOrder = make(map[string][]string)
   114  	}
   115  	c.fieldSortOrder[nodeName] = fieldOrder
   116  }
   117  
   118  // UnsortedFieldsError will be returned by ParseWithConfig if
   119  // Config.RequireFieldSortOrderToMatchAllFieldsInNode is set, and an unrecognized field is found
   120  // while parsing.
   121  type UnsortedFieldsError struct {
   122  	UnsortedFields []UnsortedField
   123  }
   124  
   125  // UnsortedField records details about a single unsorted field.
   126  type UnsortedField struct {
   127  	FieldName       string
   128  	Line            int32
   129  	ParentFieldName string
   130  }
   131  
   132  func (e *UnsortedFieldsError) Error() string {
   133  	var errs []string
   134  	for _, us := range e.UnsortedFields {
   135  		errs = append(errs, fmt.Sprintf("  line: %d, parent field: %q, unsorted field: %q", us.Line, us.ParentFieldName, us.FieldName))
   136  	}
   137  	return fmt.Sprintf("fields parsed that were not specified in the parser.AddFieldSortOrder() call:\n%s", strings.Join(errs, "\n"))
   138  }
   139  
   140  type parser struct {
   141  	in     []byte
   142  	index  int
   143  	length int
   144  	// Maps the index of '{' characters on 'in' that have the matching '}' on
   145  	// the same line to 'true'.
   146  	bracketSameLine map[int]bool
   147  	config          Config
   148  	line, column    int // current position, 1-based.
   149  }
   150  
   151  var defConfig = Config{}
   152  var tagRegex = regexp.MustCompile(`<.*>`)
   153  
   154  const indentSpaces = "  "
   155  
   156  // Format formats a text proto file preserving comments.
   157  func Format(in []byte) ([]byte, error) {
   158  	return FormatWithConfig(in, defConfig)
   159  }
   160  
   161  // FormatWithConfig functions similar to format, but allows the user to pass in
   162  // additional configuration options.
   163  func FormatWithConfig(in []byte, c Config) ([]byte, error) {
   164  	if err := addMetaCommentsToConfig(in, &c); err != nil {
   165  		return nil, err
   166  	}
   167  	if c.Disable {
   168  		c.infof("Ignored file with 'disable' comment.")
   169  		return in, nil
   170  	}
   171  	nodes, err := parseWithMetaCommentConfig(in, c)
   172  	if err != nil {
   173  		return nil, err
   174  	}
   175  	return PrettyBytes(nodes, 0), nil
   176  }
   177  
   178  // Return the byte-positions of each bracket which has the corresponding close on the
   179  // same line as a set.
   180  func sameLineBrackets(in []byte, allowTripleQuotedStrings bool) (map[int]bool, error) {
   181  	line := 1
   182  	type bracket struct {
   183  		index int
   184  		line  int
   185  	}
   186  	open := []bracket{} // Stack.
   187  	res := map[int]bool{}
   188  	insideComment := false
   189  	insideString := false
   190  	insideTemplate := false
   191  	insideTripleQuotedString := false
   192  	var stringDelimiter string
   193  	isEscapedChar := false
   194  	for i, c := range in {
   195  		switch c {
   196  		case '\n':
   197  			line++
   198  			insideComment = false
   199  		case '{', '<':
   200  			if insideComment || insideString || insideTemplate {
   201  				continue
   202  			}
   203  			open = append(open, bracket{index: i, line: line})
   204  		case '}', '>':
   205  			if insideComment || insideString || insideTemplate {
   206  				continue
   207  			}
   208  			if len(open) == 0 {
   209  				return nil, fmt.Errorf("too many '}' or '>' at index %d", i)
   210  			}
   211  			last := len(open) - 1
   212  			br := open[last]
   213  			open = open[:last]
   214  			if br.line == line {
   215  				res[br.index] = true
   216  			}
   217  		case '#':
   218  			if insideString {
   219  				continue
   220  			}
   221  			insideComment = true
   222  		case '%':
   223  			if insideComment || insideString {
   224  				continue
   225  			}
   226  			if insideTemplate {
   227  				insideTemplate = false
   228  			} else {
   229  				insideTemplate = true
   230  			}
   231  		case '"', '\'':
   232  			if insideComment {
   233  				continue
   234  			}
   235  			delim := string(c)
   236  			tripleQuoted := false
   237  			if allowTripleQuotedStrings && i+3 <= len(in) {
   238  				triple := string(in[i : i+3])
   239  				if triple == `"""` || triple == `'''` {
   240  					delim = triple
   241  					tripleQuoted = true
   242  				}
   243  			}
   244  
   245  			if insideString {
   246  				if stringDelimiter == delim && (insideTripleQuotedString || !isEscapedChar) {
   247  					insideString = false
   248  					insideTripleQuotedString = false
   249  				}
   250  			} else {
   251  				insideString = true
   252  				if tripleQuoted {
   253  					insideTripleQuotedString = true
   254  				}
   255  				stringDelimiter = delim
   256  			}
   257  		}
   258  
   259  		if isEscapedChar {
   260  			isEscapedChar = false
   261  		} else if c == '\\' && insideString && !insideTripleQuotedString {
   262  			isEscapedChar = true
   263  		}
   264  	}
   265  	if insideString {
   266  		return nil, fmt.Errorf("unterminated string literal")
   267  	}
   268  	return res, nil
   269  }
   270  
   271  func removeDeleted(nodes []*ast.Node) []*ast.Node {
   272  	res := []*ast.Node{}
   273  	// When removing a node which has an empty line before it, we should keep
   274  	// the empty line before the next non-removed node to maintain the visual separation.
   275  	// Consider the following:
   276  	// foo: { name: "foo1" }
   277  	// foo: { name: "foo2" }
   278  	//
   279  	// bar: { name: "bar1" }
   280  	// bar: { name: "bar2" }
   281  	//
   282  	// If we decide to remove both foo2 and bar1, the result should still have one empty
   283  	// line between foo1 and bar2.
   284  	addEmptyLine := false
   285  	for _, node := range nodes {
   286  		if node.Deleted {
   287  			if len(node.PreComments) > 0 && node.PreComments[0] == "" {
   288  				addEmptyLine = true
   289  			}
   290  			continue
   291  		}
   292  		if len(node.Children) > 0 {
   293  			node.Children = removeDeleted(node.Children)
   294  		}
   295  		if addEmptyLine && (len(node.PreComments) == 0 || node.PreComments[0] != "") {
   296  			node.PreComments = append([]string{""}, node.PreComments...)
   297  		}
   298  		addEmptyLine = false
   299  		res = append(res, node)
   300  	}
   301  	return res
   302  }
   303  
   304  var (
   305  	spaceSeparators = []byte(" \t\n")
   306  	valueSeparators = []byte(" \t\n{}:,[]<>;#")
   307  )
   308  
   309  // Parse returns a tree representation of a textproto file.
   310  func Parse(in []byte) ([]*ast.Node, error) {
   311  	return ParseWithConfig(in, defConfig)
   312  }
   313  
   314  // ParseWithConfig functions similar to Parse, but allows the user to pass in
   315  // additional configuration options.
   316  func ParseWithConfig(in []byte, c Config) ([]*ast.Node, error) {
   317  	if err := addMetaCommentsToConfig(in, &c); err != nil {
   318  		return nil, err
   319  	}
   320  	return parseWithMetaCommentConfig(in, c)
   321  }
   322  
   323  // Parses in textproto with MetaComments already added to configuration.
   324  func parseWithMetaCommentConfig(in []byte, c Config) ([]*ast.Node, error) {
   325  	p, err := newParser(in, c)
   326  	if err != nil {
   327  		return nil, err
   328  	}
   329  	if p.config.infoLevel() {
   330  		p.config.infof("p.in: %q", string(p.in))
   331  		p.config.infof("p.length: %v", p.length)
   332  	}
   333  	// Although unnamed nodes aren't strictly allowed, some formats represent a
   334  	// list of protos as a list of unnamed top-level nodes.
   335  	nodes, _, err := p.parse( /*isRoot=*/ true)
   336  	if err != nil {
   337  		return nil, err
   338  	}
   339  	if p.index < p.length {
   340  		return nil, fmt.Errorf("parser didn't consume all input. Stopped at %s", p.errorContext())
   341  	}
   342  	if err := wrapStrings(nodes, 0, c); err != nil {
   343  		return nil, err
   344  	}
   345  	if err := sortAndFilterNodes( /*parent=*/ nil, nodes, nodeSortFunction(c), nodeFilterFunction(c)); err != nil {
   346  		return nil, err
   347  	}
   348  	return nodes, nil
   349  }
   350  
   351  // There are two types of MetaComment, one in the format of <key>=<val> and the other one doesn't
   352  // have the equal sign. Currently there are only two MetaComments that are in the former format:
   353  //
   354  //	"sort_repeated_fields_by_subfield": If this appears multiple times, then they will all be added
   355  //	to the config and the order is perserved.
   356  //	"wrap_strings_at_column": The <val> is expected to be an integer. If it is not, then it will be
   357  //	ignored. If this appears multiple times, only the last one saved.
   358  func addToConfig(metaComment string, c *Config) error {
   359  	// Test if a MetaComment is in the format of <key>=<val>.
   360  	key, val, hasEqualSign := strings.Cut(metaComment, "=")
   361  	switch key {
   362  	case "allow_triple_quoted_strings":
   363  		c.AllowTripleQuotedStrings = true
   364  	case "allow_unnamed_nodes_everywhere":
   365  		c.AllowUnnamedNodesEverywhere = true
   366  	case "disable":
   367  		c.Disable = true
   368  	case "expand_all_children":
   369  		c.ExpandAllChildren = true
   370  	case "preserve_angle_brackets":
   371  		c.PreserveAngleBrackets = true
   372  	case "remove_duplicate_values_for_repeated_fields":
   373  		c.RemoveDuplicateValuesForRepeatedFields = true
   374  	case "skip_all_colons":
   375  		c.SkipAllColons = true
   376  	case "smartquotes":
   377  		c.SmartQuotes = true
   378  	case "sort_fields_by_field_name":
   379  		c.SortFieldsByFieldName = true
   380  	case "sort_repeated_fields_by_content":
   381  		c.SortRepeatedFieldsByContent = true
   382  	case "sort_repeated_fields_by_subfield":
   383  		// Take all the subfields and the subfields in order as tie breakers.
   384  		if !hasEqualSign {
   385  			return fmt.Errorf("format should be %s=<string>, got: %s", key, metaComment)
   386  		}
   387  		c.SortRepeatedFieldsBySubfield = append(c.SortRepeatedFieldsBySubfield, val)
   388  	case "wrap_strings_at_column":
   389  		// If multiple of this MetaComment exists in the file, take the last one.
   390  		if !hasEqualSign {
   391  			return fmt.Errorf("format should be %s=<int>, got: %s", key, metaComment)
   392  		}
   393  		i, err := strconv.Atoi(strings.TrimSpace(val))
   394  		if err != nil {
   395  			return fmt.Errorf("error parsing %s value %q (skipping): %v", key, val, err)
   396  		}
   397  		c.WrapStringsAtColumn = i
   398  	case "wrap_html_strings":
   399  		c.WrapHTMLStrings = true
   400  	case "wrap_strings_after_newlines":
   401  		c.WrapStringsAfterNewlines = true
   402  	default:
   403  		return fmt.Errorf("unrecognized MetaComment: %s", metaComment)
   404  	}
   405  	return nil
   406  }
   407  
   408  // Parses MetaComments and adds them to the configuration.
   409  func addMetaCommentsToConfig(in []byte, c *Config) error {
   410  	scanner := bufio.NewScanner(bytes.NewReader(in))
   411  	for scanner.Scan() {
   412  		line := scanner.Text()
   413  		if len(line) == 0 {
   414  			continue
   415  		}
   416  		if line[0] != byte('#') {
   417  			break // only process the leading comment block
   418  		}
   419  
   420  		// Look for comment lines in the format of "<key>:<value>", and process the lines with <key>
   421  		// equals to "txtpbfmt". It's assumed that the MetaComments are given in the format of:
   422  		// # txtpbfmt: <MetaComment 1>[, <MetaComment 2> ...]
   423  		key, value, hasColon := strings.Cut(line[1:], ":") // Ignore the first '#'.
   424  		if hasColon && strings.TrimSpace(key) == "txtpbfmt" {
   425  			for _, s := range strings.Split(strings.TrimSpace(value), ",") {
   426  				metaComment := strings.TrimSpace(s)
   427  				if err := addToConfig(metaComment, c); err != nil {
   428  					return err
   429  				}
   430  			}
   431  		}
   432  	}
   433  	return nil
   434  }
   435  
   436  func newParser(in []byte, c Config) (*parser, error) {
   437  	var bracketSameLine map[int]bool
   438  	if c.ExpandAllChildren {
   439  		bracketSameLine = map[int]bool{}
   440  	} else {
   441  		var err error
   442  		if bracketSameLine, err = sameLineBrackets(in, c.AllowTripleQuotedStrings); err != nil {
   443  			return nil, err
   444  		}
   445  	}
   446  	if len(in) > 0 && in[len(in)-1] != '\n' {
   447  		in = append(in, '\n')
   448  	}
   449  	parser := &parser{
   450  		in:              in,
   451  		index:           0,
   452  		length:          len(in),
   453  		bracketSameLine: bracketSameLine,
   454  		config:          c,
   455  		line:            1,
   456  		column:          1,
   457  	}
   458  	return parser, nil
   459  }
   460  
   461  func (p *parser) nextInputIs(b byte) bool {
   462  	return p.index < p.length && p.in[p.index] == b
   463  }
   464  
   465  func (p *parser) consume(b byte) bool {
   466  	if !p.nextInputIs(b) {
   467  		return false
   468  	}
   469  	p.index++
   470  	p.column++
   471  	if b == '\n' {
   472  		p.line++
   473  		p.column = 1
   474  	}
   475  	return true
   476  }
   477  
   478  // consumeString consumes the given string s, which should not have any newlines.
   479  func (p *parser) consumeString(s string) bool {
   480  	if p.index+len(s) > p.length {
   481  		return false
   482  	}
   483  	if string(p.in[p.index:p.index+len(s)]) != s {
   484  		return false
   485  	}
   486  	p.index += len(s)
   487  	p.column += len(s)
   488  	return true
   489  }
   490  
   491  // loopDetector detects if the parser is in an infinite loop (ie failing to
   492  // make progress).
   493  type loopDetector struct {
   494  	lastIndex int
   495  	count     int
   496  	parser    *parser
   497  }
   498  
   499  func (p *parser) getLoopDetector() *loopDetector {
   500  	return &loopDetector{lastIndex: p.index, parser: p}
   501  }
   502  
   503  func (l *loopDetector) iter() error {
   504  	if l.parser.index == l.lastIndex {
   505  		l.count++
   506  		if l.count < 2 {
   507  			return nil
   508  		}
   509  		return fmt.Errorf("parser failed to make progress at %s", l.parser.errorContext())
   510  	}
   511  	l.lastIndex = l.parser.index
   512  	l.count = 0
   513  	return nil
   514  }
   515  
   516  func (p parser) errorContext() string {
   517  	index := p.index
   518  	if index >= p.length {
   519  		index = p.length - 1
   520  	}
   521  	// Provide the surrounding input as context.
   522  	lastContentIndex := index + 20
   523  	if lastContentIndex >= p.length {
   524  		lastContentIndex = p.length - 1
   525  	}
   526  	previousContentIndex := index - 20
   527  	if previousContentIndex < 0 {
   528  		previousContentIndex = 0
   529  	}
   530  	before := string(p.in[previousContentIndex:index])
   531  	after := string(p.in[index:lastContentIndex])
   532  	return fmt.Sprintf("index %v\nposition %+v\nbefore: %q\nafter: %q\nbefore+after: %q", index, p.position(), before, after, before+after)
   533  }
   534  
   535  func (p *parser) position() ast.Position {
   536  	return ast.Position{
   537  		Byte:   uint32(p.index),
   538  		Line:   int32(p.line),
   539  		Column: int32(p.column),
   540  	}
   541  }
   542  
   543  func (p *parser) consumeOptionalSeparator() error {
   544  	if p.index > 0 && !p.isBlankSep(p.index-1) {
   545  		// If an unnamed field immediately follows non-whitespace, we require a separator character first (key_one:,:value_two instead of key_one::value_two)
   546  		if p.consume(':') {
   547  			return fmt.Errorf("parser encountered unexpected : character (should be whitespace, or a ,; separator)")
   548  		}
   549  	}
   550  
   551  	_ = p.consume(';') // Ignore optional ';'.
   552  	_ = p.consume(',') // Ignore optional ','.
   553  
   554  	return nil
   555  }
   556  
   557  // parse parses a text proto.
   558  // It assumes the text to be either conformant with the standard text proto
   559  // (i.e. passes proto.UnmarshalText() without error) or the alternative textproto
   560  // format (sequence of messages, each of which passes proto.UnmarshalText()).
   561  // endPos is the position of the first character on the first line
   562  // after parsed nodes: that's the position to append more children.
   563  func (p *parser) parse(isRoot bool) (result []*ast.Node, endPos ast.Position, err error) {
   564  	res := []*ast.Node{}
   565  	for ld := p.getLoopDetector(); p.index < p.length; {
   566  		if err := ld.iter(); err != nil {
   567  			return nil, ast.Position{}, err
   568  		}
   569  
   570  		startPos := p.position()
   571  		if p.nextInputIs('\n') {
   572  			// p.parse is often invoked with the index pointing at the
   573  			// newline character after the previous item.
   574  			// We should still report that this item starts in the next line.
   575  			startPos.Byte++
   576  			startPos.Line++
   577  			startPos.Column = 1
   578  		}
   579  
   580  		// Read PreComments.
   581  		comments, blankLines := p.skipWhiteSpaceAndReadComments(true /* multiLine */)
   582  
   583  		// Handle blank lines.
   584  		if blankLines > 1 {
   585  			if p.config.infoLevel() {
   586  				p.config.infof("blankLines: %v", blankLines)
   587  			}
   588  			comments = append([]string{""}, comments...)
   589  		}
   590  
   591  		for p.nextInputIs('%') {
   592  			comments = append(comments, p.readTemplate())
   593  			c, _ := p.skipWhiteSpaceAndReadComments(false)
   594  			comments = append(comments, c...)
   595  		}
   596  
   597  		if endPos := p.position(); p.consume('}') || p.consume('>') || p.consume(']') {
   598  			// Handle comments after last child.
   599  
   600  			if len(comments) > 0 {
   601  				res = append(res, &ast.Node{Start: startPos, PreComments: comments})
   602  			}
   603  
   604  			// endPos points at the closing brace, but we should rather return the position
   605  			// of the first character after the previous item. Therefore let's rewind a bit:
   606  			for endPos.Byte > 0 && p.in[endPos.Byte-1] == ' ' {
   607  				endPos.Byte--
   608  				endPos.Column--
   609  			}
   610  
   611  			if err = p.consumeOptionalSeparator(); err != nil {
   612  				return nil, ast.Position{}, err
   613  			}
   614  
   615  			// Done parsing children.
   616  			return res, endPos, nil
   617  		}
   618  
   619  		nd := &ast.Node{
   620  			Start:       startPos,
   621  			PreComments: comments,
   622  		}
   623  		if p.config.infoLevel() {
   624  			p.config.infof("PreComments: %q", strings.Join(nd.PreComments, "\n"))
   625  		}
   626  
   627  		// Skip white-space other than '\n', which is handled below.
   628  		for p.consume(' ') || p.consume('\t') {
   629  		}
   630  
   631  		// Handle multiple comment blocks.
   632  		// <example>
   633  		// # comment block 1
   634  		// # comment block 1
   635  		//
   636  		// # comment block 2
   637  		// # comment block 2
   638  		// </example>
   639  		// Each block that ends on an empty line (instead of a field) gets its own
   640  		// 'empty' node.
   641  		if p.nextInputIs('\n') {
   642  			res = append(res, nd)
   643  			continue
   644  		}
   645  
   646  		// Handle end of file.
   647  		if p.index >= p.length {
   648  			nd.End = p.position()
   649  			if len(nd.PreComments) > 0 {
   650  				res = append(res, nd)
   651  			}
   652  			break
   653  		}
   654  
   655  		if p.consume('[') {
   656  			// Read Name (of proto extension).
   657  			nd.Name = fmt.Sprintf("[%s]", p.readExtension())
   658  			_ = p.consume(']') // Ignore the ']'.
   659  		} else {
   660  			// Read Name.
   661  			nd.Name = p.readFieldName()
   662  			if nd.Name == "" && !isRoot && !p.config.AllowUnnamedNodesEverywhere {
   663  				return nil, ast.Position{}, fmt.Errorf("Failed to find a FieldName at %s", p.errorContext())
   664  			}
   665  		}
   666  		if p.config.infoLevel() {
   667  			p.config.infof("name: %q", nd.Name)
   668  		}
   669  		// Skip separator.
   670  		preCommentsBeforeColon, _ := p.skipWhiteSpaceAndReadComments(true /* multiLine */)
   671  		nd.SkipColon = !p.consume(':')
   672  		previousPos := p.position()
   673  		preCommentsAfterColon, _ := p.skipWhiteSpaceAndReadComments(true /* multiLine */)
   674  
   675  		if p.consume('{') || p.consume('<') {
   676  			if p.config.SkipAllColons {
   677  				nd.SkipColon = true
   678  			}
   679  			nd.ChildrenSameLine = p.bracketSameLine[p.index-1]
   680  			nd.IsAngleBracket = p.config.PreserveAngleBrackets && p.in[p.index-1] == '<'
   681  			// Recursive call to parse child nodes.
   682  			nodes, lastPos, err := p.parse( /*isRoot=*/ false)
   683  			if err != nil {
   684  				return nil, ast.Position{}, err
   685  			}
   686  			nd.Children = nodes
   687  			nd.End = lastPos
   688  
   689  			nd.ClosingBraceComment = p.readInlineComment()
   690  		} else if p.consume('[') {
   691  			openBracketLine := p.line
   692  
   693  			// Skip separator.
   694  			preCommentsAfterListStart := p.readContinuousBlocksOfComments()
   695  
   696  			var preComments []string
   697  			preComments = append(preComments, preCommentsBeforeColon...)
   698  			preComments = append(preComments, preCommentsAfterColon...)
   699  			preComments = append(preComments, preCommentsAfterListStart...)
   700  
   701  			if p.nextInputIs('{') {
   702  				// Handle list of nodes.
   703  				nd.ChildrenAsList = true
   704  
   705  				nodes, lastPos, err := p.parse( /*isRoot=*/ true)
   706  				if err != nil {
   707  					return nil, ast.Position{}, err
   708  				}
   709  				if len(nodes) > 0 {
   710  					nodes[0].PreComments = preComments
   711  				}
   712  
   713  				nd.Children = nodes
   714  				nd.End = lastPos
   715  				nd.ClosingBraceComment = p.readInlineComment()
   716  				nd.ChildrenSameLine = openBracketLine == p.line
   717  			} else {
   718  				// Handle list of values.
   719  				nd.ValuesAsList = true // We found values in list - keep it as list.
   720  
   721  				for ld := p.getLoopDetector(); !p.consume(']') && p.index < p.length; {
   722  					if err := ld.iter(); err != nil {
   723  						return nil, ast.Position{}, err
   724  					}
   725  
   726  					// Read each value in the list.
   727  					vals, err := p.readValues()
   728  					if err != nil {
   729  						return nil, ast.Position{}, err
   730  					}
   731  					if len(vals) != 1 {
   732  						return nil, ast.Position{}, fmt.Errorf("multiple-string value not supported (%v). Please add comma explicitly, see http://b/162070952", vals)
   733  					}
   734  					if len(preComments) > 0 {
   735  						// If we read preComments before readValues(), they should go first,
   736  						// but avoid copy overhead if there are none.
   737  						vals[0].PreComments = append(preComments, vals[0].PreComments...)
   738  					}
   739  
   740  					// Skip separator.
   741  					_, _ = p.skipWhiteSpaceAndReadComments(false /* multiLine */)
   742  					if p.consume(',') {
   743  						vals[0].InlineComment = p.readInlineComment()
   744  					}
   745  
   746  					nd.Values = append(nd.Values, vals...)
   747  
   748  					preComments, _ = p.skipWhiteSpaceAndReadComments(true /* multiLine */)
   749  				}
   750  				nd.ChildrenSameLine = openBracketLine == p.line
   751  
   752  				res = append(res, nd)
   753  
   754  				// Handle comments after last line (or for empty list)
   755  				nd.PostValuesComments = preComments
   756  				nd.ClosingBraceComment = p.readInlineComment()
   757  
   758  				if err = p.consumeOptionalSeparator(); err != nil {
   759  					return nil, ast.Position{}, err
   760  				}
   761  
   762  				continue
   763  			}
   764  		} else {
   765  			// Rewind comments.
   766  			p.index = int(previousPos.Byte)
   767  			p.line = int(previousPos.Line)
   768  			p.column = int(previousPos.Column)
   769  			// Handle Values.
   770  			nd.Values, err = p.readValues()
   771  			if err != nil {
   772  				return nil, ast.Position{}, err
   773  			}
   774  			if err = p.consumeOptionalSeparator(); err != nil {
   775  				return nil, ast.Position{}, err
   776  			}
   777  		}
   778  		if p.config.infoLevel() && p.index < p.length {
   779  			p.config.infof("p.in[p.index]: %q", string(p.in[p.index]))
   780  		}
   781  		res = append(res, nd)
   782  	}
   783  	return res, p.position(), nil
   784  }
   785  
   786  func (p *parser) readFieldName() string {
   787  	i := p.index
   788  	for ; i < p.length && !p.isValueSep(i); i++ {
   789  	}
   790  	return p.advance(i)
   791  }
   792  
   793  func (p *parser) readExtension() string {
   794  	i := p.index
   795  	for ; i < p.length && (p.isBlankSep(i) || !p.isValueSep(i)); i++ {
   796  	}
   797  	return removeBlanks(p.advance(i))
   798  }
   799  
   800  func removeBlanks(in string) string {
   801  	s := []byte(in)
   802  	for _, b := range spaceSeparators {
   803  		s = bytes.Replace(s, []byte{b}, nil, -1)
   804  	}
   805  	return string(s)
   806  }
   807  
   808  func (p *parser) readContinuousBlocksOfComments() []string {
   809  	var preComments []string
   810  	for {
   811  		comments, blankLines := p.skipWhiteSpaceAndReadComments(true)
   812  		if len(comments) == 0 {
   813  			break
   814  		}
   815  		if blankLines > 0 && len(preComments) > 0 {
   816  			comments = append([]string{""}, comments...)
   817  		}
   818  		preComments = append(preComments, comments...)
   819  	}
   820  
   821  	return preComments
   822  }
   823  
   824  // skipWhiteSpaceAndReadComments has multiple cases:
   825  //   - (1) reading a block of comments followed by a blank line
   826  //   - (2) reading a block of comments followed by non-blank content
   827  //   - (3) reading the inline comments between the current char and the end of the
   828  //     current line
   829  //
   830  // Lines of comments and number of blank lines will be returned.
   831  func (p *parser) skipWhiteSpaceAndReadComments(multiLine bool) ([]string, int) {
   832  	i := p.index
   833  	var foundComment, insideComment bool
   834  	commentBegin := 0
   835  	var comments []string
   836  	blankLines := 0
   837  	for ; i < p.length; i++ {
   838  		if p.in[i] == '#' && !insideComment {
   839  			insideComment = true
   840  			foundComment = true
   841  			commentBegin = i
   842  		} else if p.in[i] == '\n' {
   843  			if insideComment {
   844  				comments = append(comments, string(p.in[commentBegin:i])) // Exclude the '\n'.
   845  				insideComment = false
   846  			} else if foundComment {
   847  				i-- // Put back the last '\n' so the caller can detect that we're on case (1).
   848  				break
   849  			} else {
   850  				blankLines++
   851  			}
   852  			if !multiLine {
   853  				break
   854  			}
   855  		}
   856  		if !insideComment && !p.isBlankSep(i) {
   857  			break
   858  		}
   859  	}
   860  	sep := p.advance(i)
   861  	if p.config.infoLevel() {
   862  		p.config.infof("sep: %q\np.index: %v", string(sep), p.index)
   863  		if p.index < p.length {
   864  			p.config.infof("p.in[p.index]: %q", string(p.in[p.index]))
   865  		}
   866  	}
   867  	return comments, blankLines
   868  }
   869  
   870  func (p *parser) isBlankSep(i int) bool {
   871  	return bytes.Contains(spaceSeparators, p.in[i:i+1])
   872  }
   873  
   874  func (p *parser) isValueSep(i int) bool {
   875  	return bytes.Contains(valueSeparators, p.in[i:i+1])
   876  }
   877  
   878  func (p *parser) advance(i int) string {
   879  	if i > p.length {
   880  		i = p.length
   881  	}
   882  	res := p.in[p.index:i]
   883  	p.index = i
   884  	strRes := string(res)
   885  	newlines := strings.Count(strRes, "\n")
   886  	if newlines == 0 {
   887  		p.column += len(strRes)
   888  	} else {
   889  		p.column = len(strRes) - strings.LastIndex(strRes, "\n")
   890  		p.line += newlines
   891  	}
   892  	return string(res)
   893  }
   894  
   895  func (p *parser) readValues() ([]*ast.Value, error) {
   896  	var values []*ast.Value
   897  	var previousPos ast.Position
   898  	preComments, _ := p.skipWhiteSpaceAndReadComments(true /* multiLine */)
   899  	if p.nextInputIs('%') {
   900  		values = append(values, p.populateValue(p.readTemplate(), nil))
   901  		previousPos = p.position()
   902  	}
   903  	if p.config.AllowTripleQuotedStrings {
   904  		v, err := p.readTripleQuotedString()
   905  		if err != nil {
   906  			return nil, err
   907  		}
   908  		if v != nil {
   909  			values = append(values, v)
   910  			previousPos = p.position()
   911  		}
   912  	}
   913  	for p.consume('"') || p.consume('\'') {
   914  		// Handle string value.
   915  		stringBegin := p.index - 1 // Index of the quote.
   916  		i := p.index
   917  		for ; i < p.length; i++ {
   918  			if p.in[i] == '\\' {
   919  				i++ // Skip escaped char.
   920  				continue
   921  			}
   922  			if p.in[i] == '\n' {
   923  				p.index = i
   924  				return nil, fmt.Errorf("found literal (unescaped) new line in string at %s", p.errorContext())
   925  			}
   926  			if p.in[i] == p.in[stringBegin] {
   927  				var vl string
   928  				if p.config.SmartQuotes {
   929  					vl = smartQuotes(p.advance(i))
   930  				} else {
   931  					vl = fixQuotes(p.advance(i))
   932  				}
   933  				_ = p.advance(i + 1) // Skip the quote.
   934  				values = append(values, p.populateValue(vl, preComments))
   935  
   936  				previousPos = p.position()
   937  				preComments, _ = p.skipWhiteSpaceAndReadComments(true /* multiLine */)
   938  				break
   939  			}
   940  		}
   941  		if i == p.length {
   942  			p.index = i
   943  			return nil, fmt.Errorf("unfinished string at %s", p.errorContext())
   944  		}
   945  	}
   946  	if previousPos != (ast.Position{}) {
   947  		// Rewind comments.
   948  		p.index = int(previousPos.Byte)
   949  		p.line = int(previousPos.Line)
   950  		p.column = int(previousPos.Column)
   951  	} else {
   952  		i := p.index
   953  		// Handle other values.
   954  		for ; i < p.length; i++ {
   955  			if p.isValueSep(i) {
   956  				break
   957  			}
   958  		}
   959  		vl := p.advance(i)
   960  		values = append(values, p.populateValue(vl, preComments))
   961  	}
   962  	if p.config.infoLevel() {
   963  		p.config.infof("values: %v", values)
   964  	}
   965  	return values, nil
   966  }
   967  
   968  func (p *parser) readTripleQuotedString() (*ast.Value, error) {
   969  	start := p.index
   970  	stringBegin := p.index
   971  	delimiter := `"""`
   972  	if !p.consumeString(delimiter) {
   973  		delimiter = `'''`
   974  		if !p.consumeString(delimiter) {
   975  			return nil, nil
   976  		}
   977  	}
   978  
   979  	for {
   980  		if p.consumeString(delimiter) {
   981  			break
   982  		}
   983  		if p.index == p.length {
   984  			p.index = start
   985  			return nil, fmt.Errorf("unfinished string at %s", p.errorContext())
   986  		}
   987  		p.index++
   988  	}
   989  
   990  	v := p.populateValue(string(p.in[stringBegin:p.index]), nil)
   991  
   992  	return v, nil
   993  }
   994  
   995  func (p *parser) populateValue(vl string, preComments []string) *ast.Value {
   996  	if p.config.infoLevel() {
   997  		p.config.infof("value: %q", vl)
   998  	}
   999  	return &ast.Value{
  1000  		Value:         vl,
  1001  		InlineComment: p.readInlineComment(),
  1002  		PreComments:   preComments,
  1003  	}
  1004  }
  1005  
  1006  func (p *parser) readInlineComment() string {
  1007  	inlineComment, _ := p.skipWhiteSpaceAndReadComments(false /* multiLine */)
  1008  	if p.config.infoLevel() {
  1009  		p.config.infof("inlineComment: %q", strings.Join(inlineComment, "\n"))
  1010  	}
  1011  	if len(inlineComment) > 0 {
  1012  		return inlineComment[0]
  1013  	}
  1014  	return ""
  1015  }
  1016  
  1017  func (p *parser) readTemplate() string {
  1018  	if !p.nextInputIs('%') {
  1019  		return ""
  1020  	}
  1021  	i := p.index + 1
  1022  	for ; i < p.length; i++ {
  1023  		if p.in[i] == '"' || p.in[i] == '\'' {
  1024  			stringBegin := i // Index of quote.
  1025  			i++
  1026  			for ; i < p.length; i++ {
  1027  				if p.in[i] == '\\' {
  1028  					i++ // Skip escaped char.
  1029  					continue
  1030  				}
  1031  				if p.in[i] == p.in[stringBegin] {
  1032  					i++ // Skip end quote.
  1033  					break
  1034  				}
  1035  			}
  1036  		}
  1037  		if i < p.length && p.in[i] == '%' {
  1038  			i++
  1039  			break
  1040  		}
  1041  	}
  1042  	return p.advance(i)
  1043  }
  1044  
  1045  // NodeSortFunction sorts the given nodes, using the parent node as context. parent can be nil.
  1046  type NodeSortFunction func(parent *ast.Node, nodes []*ast.Node) error
  1047  
  1048  // NodeFilterFunction filters the given nodes.
  1049  type NodeFilterFunction func(nodes []*ast.Node)
  1050  
  1051  func sortAndFilterNodes(parent *ast.Node, nodes []*ast.Node, sortFunction NodeSortFunction, filterFunction NodeFilterFunction) error {
  1052  	if len(nodes) == 0 {
  1053  		return nil
  1054  	}
  1055  	if filterFunction != nil {
  1056  		filterFunction(nodes)
  1057  	}
  1058  	for _, nd := range nodes {
  1059  		err := sortAndFilterNodes(nd, nd.Children, sortFunction, filterFunction)
  1060  		if err != nil {
  1061  			return err
  1062  		}
  1063  	}
  1064  	if sortFunction != nil {
  1065  		return sortFunction(parent, nodes)
  1066  	}
  1067  	return nil
  1068  }
  1069  
  1070  // RemoveDuplicates marks duplicate key:value pairs from nodes as Deleted.
  1071  func RemoveDuplicates(nodes []*ast.Node) {
  1072  	type nameAndValue struct {
  1073  		name, value string
  1074  	}
  1075  	seen := make(map[nameAndValue]bool)
  1076  	for _, nd := range nodes {
  1077  		if seen != nil && len(nd.Values) == 1 {
  1078  			key := nameAndValue{nd.Name, nd.Values[0].Value}
  1079  			if _, value := seen[key]; value {
  1080  				// Name-Value pair found in the same nesting level, deleting.
  1081  				nd.Deleted = true
  1082  			} else {
  1083  				seen[key] = true
  1084  			}
  1085  		}
  1086  	}
  1087  }
  1088  
  1089  func wrapStrings(nodes []*ast.Node, depth int, c Config) error {
  1090  	if c.WrapStringsAtColumn == 0 && !c.WrapStringsAfterNewlines {
  1091  		return nil
  1092  	}
  1093  	for _, nd := range nodes {
  1094  		if nd.ChildrenSameLine {
  1095  			continue
  1096  		}
  1097  		if c.WrapStringsAtColumn > 0 && needsWrappingAtColumn(nd, depth, c) {
  1098  			if err := wrapLinesAtColumn(nd, depth, c); err != nil {
  1099  				return err
  1100  			}
  1101  		}
  1102  		if c.WrapStringsAfterNewlines && needsWrappingAfterNewlines(nd, c) {
  1103  			if err := wrapLinesAfterNewlines(nd, c); err != nil {
  1104  				return err
  1105  			}
  1106  		}
  1107  		if err := wrapStrings(nd.Children, depth+1, c); err != nil {
  1108  			return err
  1109  		}
  1110  	}
  1111  	return nil
  1112  }
  1113  
  1114  func needsWrappingAtColumn(nd *ast.Node, depth int, c Config) bool {
  1115  	// Even at depth 0 we have a 2-space indent when the wrapped string is rendered on the line below
  1116  	// the field name.
  1117  	const lengthBuffer = 2
  1118  	maxLength := c.WrapStringsAtColumn - lengthBuffer - (depth * len(indentSpaces))
  1119  
  1120  	if !c.WrapHTMLStrings {
  1121  		for _, v := range nd.Values {
  1122  			if tagRegex.Match([]byte(v.Value)) {
  1123  				return false
  1124  			}
  1125  		}
  1126  	}
  1127  
  1128  	for _, v := range nd.Values {
  1129  		if len(v.Value) >= 3 && (strings.HasPrefix(v.Value, `'''`) || strings.HasPrefix(v.Value, `"""`)) {
  1130  			// Don't wrap triple-quoted strings
  1131  			return false
  1132  		}
  1133  		if len(v.Value) > 0 && v.Value[0] != '\'' && v.Value[0] != '"' {
  1134  			// Only wrap strings
  1135  			return false
  1136  		}
  1137  		if len(v.Value) > maxLength {
  1138  			return true
  1139  		}
  1140  	}
  1141  	return false
  1142  }
  1143  
  1144  // If the Values of this Node constitute a string, and if Config.WrapStringsAtColumn > 0, then wrap
  1145  // the string so each line is within the specified columns. Wraps only the current Node (does not
  1146  // recurse into Children).
  1147  func wrapLinesAtColumn(nd *ast.Node, depth int, c Config) error {
  1148  	// This function looks at the unquoted ast.Value.Value string (i.e., with each Value's wrapping
  1149  	// quote chars removed). We need to remove these quotes, since otherwise they'll be re-flowed into
  1150  	// the body of the text.
  1151  	lengthBuffer := 4 // Even at depth 0 we have a 2-space indent and a pair of quotes
  1152  	maxLength := c.WrapStringsAtColumn - lengthBuffer - (depth * len(indentSpaces))
  1153  
  1154  	str, err := unquote.Raw(nd)
  1155  	if err != nil {
  1156  		return fmt.Errorf("skipping string wrapping on node %q (error unquoting string): %v", nd.Name, err)
  1157  	}
  1158  
  1159  	// Remove one from the max length since a trailing space may be added below.
  1160  	wrappedStr := wordwrap.WrapString(str, uint(maxLength)-1)
  1161  	lines := strings.Split(wrappedStr, "\n")
  1162  	newValues := make([]*ast.Value, 0, len(lines))
  1163  	// The Value objects have more than just the string in them. They also have any leading and
  1164  	// trailing comments. To maintain these comments we recycle the existing Value objects if
  1165  	// possible.
  1166  	var i int
  1167  	var line string
  1168  	for i, line = range lines {
  1169  		var v *ast.Value
  1170  		if i < len(nd.Values) {
  1171  			v = nd.Values[i]
  1172  		} else {
  1173  			v = &ast.Value{}
  1174  		}
  1175  		if i < len(lines)-1 {
  1176  			line = line + " "
  1177  		}
  1178  		v.Value = fmt.Sprintf(`"%s"`, line)
  1179  		newValues = append(newValues, v)
  1180  	}
  1181  
  1182  	postWrapCollectComments(nd, i)
  1183  
  1184  	nd.Values = newValues
  1185  	return nil
  1186  }
  1187  
  1188  // N.b.: this will incorrectly match `\\\\x`, which hopefully is rare.
  1189  var byteEscapeRegex = regexp.MustCompile(`\\x`)
  1190  
  1191  func needsWrappingAfterNewlines(nd *ast.Node, c Config) bool {
  1192  	for _, v := range nd.Values {
  1193  		if len(v.Value) >= 3 && (strings.HasPrefix(v.Value, `'''`) || strings.HasPrefix(v.Value, `"""`)) {
  1194  			// Don't wrap triple-quoted strings
  1195  			return false
  1196  		}
  1197  		if len(v.Value) > 0 && v.Value[0] != '\'' && v.Value[0] != '"' {
  1198  			// Only wrap strings
  1199  			return false
  1200  		}
  1201  		byteEscapeCount := len(byteEscapeRegex.FindAllStringIndex(v.Value, -1))
  1202  		if float64(byteEscapeCount) > float64(len(v.Value))*0.1 {
  1203  			// Only wrap UTF-8 looking strings (where less than ~10% of the characters are escaped).
  1204  			return false
  1205  		}
  1206  		// Check that there is at least one newline, *not* at the end of the string.
  1207  		if i := strings.Index(v.Value, `\n`); i >= 0 && i < len(v.Value)-3 {
  1208  			return true
  1209  		}
  1210  	}
  1211  	return false
  1212  }
  1213  
  1214  // If the Values of this Node constitute a string, and if Config.WrapStringsAfterNewlines,
  1215  // then wrap the string so each line ends with a newline.
  1216  // Wraps only the current Node (does not recurse into Children).
  1217  func wrapLinesAfterNewlines(nd *ast.Node, c Config) error {
  1218  	str, err := unquote.Raw(nd)
  1219  	if err != nil {
  1220  		return fmt.Errorf("skipping string wrapping on node %q (error unquoting string): %v", nd.Name, err)
  1221  	}
  1222  
  1223  	wrappedStr := strings.ReplaceAll(str, `\n`, `\n`+"\n")
  1224  	// Avoid empty string at end after splitting in case str ended with an (escaped) newline.
  1225  	wrappedStr = strings.TrimSuffix(wrappedStr, "\n")
  1226  	lines := strings.Split(wrappedStr, "\n")
  1227  	newValues := make([]*ast.Value, 0, len(lines))
  1228  	// The Value objects have more than just the string in them. They also have any leading and
  1229  	// trailing comments. To maintain these comments we recycle the existing Value objects if
  1230  	// possible.
  1231  	var i int
  1232  	var line string
  1233  	for i, line = range lines {
  1234  		var v *ast.Value
  1235  		if i < len(nd.Values) {
  1236  			v = nd.Values[i]
  1237  		} else {
  1238  			v = &ast.Value{}
  1239  		}
  1240  		v.Value = fmt.Sprintf(`"%s"`, line)
  1241  		newValues = append(newValues, v)
  1242  	}
  1243  
  1244  	postWrapCollectComments(nd, i)
  1245  
  1246  	nd.Values = newValues
  1247  	return nil
  1248  }
  1249  
  1250  func postWrapCollectComments(nd *ast.Node, i int) {
  1251  	for i++; i < len(nd.Values); i++ {
  1252  		// If this executes, then the text was wrapped into less lines of text (less Values) than
  1253  		// previously. If any of these had comments on them, we collect them so they are not lost.
  1254  		v := nd.Values[i]
  1255  		nd.PostValuesComments = append(nd.PostValuesComments, v.PreComments...)
  1256  		if len(v.InlineComment) > 0 {
  1257  			nd.PostValuesComments = append(nd.PostValuesComments, v.InlineComment)
  1258  		}
  1259  	}
  1260  }
  1261  
  1262  func fixQuotes(s string) string {
  1263  	res := make([]byte, 0, len(s))
  1264  	res = append(res, '"')
  1265  	for i := 0; i < len(s); i++ {
  1266  		if s[i] == '"' {
  1267  			res = append(res, '\\')
  1268  		} else if s[i] == '\\' {
  1269  			res = append(res, s[i])
  1270  			i++
  1271  		}
  1272  		res = append(res, s[i])
  1273  	}
  1274  	res = append(res, '"')
  1275  	return string(res)
  1276  }
  1277  
  1278  func unescapeQuotes(s string) string {
  1279  	res := make([]byte, 0, len(s))
  1280  	for i := 0; i < len(s); i++ {
  1281  		// If we hit an escape sequence...
  1282  		if s[i] == '\\' {
  1283  			// ... keep the backslash unless it's in front of a quote ...
  1284  			if i == len(s)-1 || (s[i+1] != '"' && s[i+1] != '\'') {
  1285  				res = append(res, '\\')
  1286  			}
  1287  			// ... then point at the escaped character so it is output verbatim below.
  1288  			// Doing this within the loop (without "continue") ensures correct handling
  1289  			// of escaped backslashes.
  1290  			i++
  1291  		}
  1292  		if i < len(s) {
  1293  			res = append(res, s[i])
  1294  		}
  1295  	}
  1296  	return string(res)
  1297  }
  1298  
  1299  func smartQuotes(s string) string {
  1300  	s = unescapeQuotes(s)
  1301  	if strings.Contains(s, "\"") && !strings.Contains(s, "'") {
  1302  		// If we hit this branch, the string doesn't contain any single quotes, and
  1303  		// is being wrapped in single quotes, so no escaping is needed.
  1304  		return "'" + s + "'"
  1305  	}
  1306  	// fixQuotes will wrap the string in double quotes, but will escape any
  1307  	// double quotes that appear within the string.
  1308  	return fixQuotes(s)
  1309  }
  1310  
  1311  // DebugFormat returns a textual representation of the specified nodes for
  1312  // consumption by humans when debugging (e.g. in test failures). No guarantees
  1313  // are made about the specific output.
  1314  func DebugFormat(nodes []*ast.Node, depth int) string {
  1315  	res := []string{""}
  1316  	prefix := strings.Repeat(".", depth)
  1317  	for _, nd := range nodes {
  1318  		var value string
  1319  		if nd.Deleted {
  1320  			res = append(res, "DELETED")
  1321  		}
  1322  		if nd.Children != nil { // Also for 0 children.
  1323  			value = fmt.Sprintf("children:%s", DebugFormat(nd.Children, depth+1))
  1324  		} else {
  1325  			value = fmt.Sprintf("values: %v\n", nd.Values)
  1326  		}
  1327  		res = append(res,
  1328  			fmt.Sprintf("name: %q", nd.Name),
  1329  			fmt.Sprintf("PreComments: %q (len %d)", strings.Join(nd.PreComments, "\n"), len(nd.PreComments)),
  1330  			value)
  1331  	}
  1332  	return strings.Join(res, fmt.Sprintf("\n%s ", prefix))
  1333  }
  1334  
  1335  // Pretty formats the nodes at the given indentation depth (0 = top-level).
  1336  func Pretty(nodes []*ast.Node, depth int) string {
  1337  	var result strings.Builder
  1338  	formatter{&result}.writeNodes(removeDeleted(nodes), depth, false /* isSameLine */, false /* asListItems */)
  1339  	return result.String()
  1340  }
  1341  
  1342  // PrettyBytes returns formatted nodes at the given indentation depth (0 = top-level) as bytes.
  1343  func PrettyBytes(nodes []*ast.Node, depth int) []byte {
  1344  	var result bytes.Buffer
  1345  	formatter{&result}.writeNodes(removeDeleted(nodes), depth, false /* isSameLine */, false /* asListItems */)
  1346  	return result.Bytes()
  1347  }
  1348  
  1349  // UnsortedFieldCollector collects UnsortedFields during parsing.
  1350  type UnsortedFieldCollector struct {
  1351  	fields map[string]UnsortedField
  1352  }
  1353  
  1354  func newUnsortedFieldCollector() *UnsortedFieldCollector {
  1355  	return &UnsortedFieldCollector{
  1356  		fields: make(map[string]UnsortedField),
  1357  	}
  1358  }
  1359  
  1360  // UnsortedFieldCollectorFunc collects UnsortedFields during parsing.
  1361  type UnsortedFieldCollectorFunc func(name string, line int32, parent string)
  1362  
  1363  func (ufc *UnsortedFieldCollector) collect(name string, line int32, parent string) {
  1364  	ufc.fields[name] = UnsortedField{name, line, parent}
  1365  }
  1366  
  1367  func (ufc *UnsortedFieldCollector) asError() error {
  1368  	if len(ufc.fields) == 0 {
  1369  		return nil
  1370  	}
  1371  	var fields []UnsortedField
  1372  	for _, f := range ufc.fields {
  1373  		fields = append(fields, f)
  1374  	}
  1375  	return &UnsortedFieldsError{fields}
  1376  }
  1377  
  1378  func nodeSortFunction(c Config) NodeSortFunction {
  1379  	var sorter ast.NodeLess = nil
  1380  	unsortedFieldCollector := newUnsortedFieldCollector()
  1381  	for name, fieldOrder := range c.fieldSortOrder {
  1382  		sorter = ast.ChainNodeLess(sorter, ByFieldOrder(name, fieldOrder, unsortedFieldCollector.collect))
  1383  	}
  1384  	if c.SortFieldsByFieldName {
  1385  		sorter = ast.ChainNodeLess(sorter, ast.ByFieldName)
  1386  	}
  1387  	if c.SortRepeatedFieldsByContent {
  1388  		sorter = ast.ChainNodeLess(sorter, ast.ByFieldValue)
  1389  	}
  1390  	for _, sf := range c.SortRepeatedFieldsBySubfield {
  1391  		field, subfield := parseSubfieldSpec(sf)
  1392  		if subfield != "" {
  1393  			sorter = ast.ChainNodeLess(sorter, ast.ByFieldSubfield(field, subfield))
  1394  		}
  1395  	}
  1396  	if sorter != nil {
  1397  		return func(parent *ast.Node, ns []*ast.Node) error {
  1398  			ast.SortNodes(parent, ns, sorter)
  1399  			if c.RequireFieldSortOrderToMatchAllFieldsInNode {
  1400  				return unsortedFieldCollector.asError()
  1401  			}
  1402  			return nil
  1403  		}
  1404  	}
  1405  	return nil
  1406  }
  1407  
  1408  // Returns the field and subfield parts of spec "{field}.{subfield}".
  1409  // Spec without a dot is considered to be "{subfield}".
  1410  func parseSubfieldSpec(subfieldSpec string) (field string, subfield string) {
  1411  	parts := strings.SplitN(subfieldSpec, ".", 2)
  1412  	if len(parts) == 1 {
  1413  		return "", parts[0]
  1414  	}
  1415  	return parts[0], parts[1]
  1416  }
  1417  
  1418  func nodeFilterFunction(c Config) NodeFilterFunction {
  1419  	if c.RemoveDuplicateValuesForRepeatedFields {
  1420  		return RemoveDuplicates
  1421  	}
  1422  	return nil
  1423  }
  1424  
  1425  func getNodePriorityForByFieldOrder(parent, node *ast.Node, name string, priorities map[string]int, unsortedCollector UnsortedFieldCollectorFunc) *int {
  1426  	if parent != nil && parent.Name != name {
  1427  		return nil
  1428  	}
  1429  	if parent == nil && name != RootName {
  1430  		return nil
  1431  	}
  1432  	// CommentOnly nodes don't set priority below, and default to MaxInt, which keeps them at the bottom
  1433  	prio := math.MaxInt
  1434  
  1435  	// Unknown fields will get the int nil value of 0 from the order map, and bubble to the top.
  1436  	if !node.IsCommentOnly() {
  1437  		var ok bool
  1438  		prio, ok = priorities[node.Name]
  1439  		if !ok {
  1440  			unsortedCollector(node.Name, node.Start.Line, parent.Name)
  1441  		}
  1442  	}
  1443  	return &prio
  1444  }
  1445  
  1446  // ByFieldOrder returns a NodeLess function that orders fields within a node named name
  1447  // by the order specified in fieldOrder. Nodes sorted but not specified by the field order
  1448  // are bubbled to the top and reported to unsortedCollector.
  1449  func ByFieldOrder(name string, fieldOrder []string, unsortedCollector UnsortedFieldCollectorFunc) ast.NodeLess {
  1450  	priorities := make(map[string]int)
  1451  	for i, fieldName := range fieldOrder {
  1452  		priorities[fieldName] = i + 1
  1453  	}
  1454  	return func(parent, ni, nj *ast.Node, isWholeSlice bool) bool {
  1455  		if !isWholeSlice {
  1456  			return false
  1457  		}
  1458  		vi := getNodePriorityForByFieldOrder(parent, ni, name, priorities, unsortedCollector)
  1459  		vj := getNodePriorityForByFieldOrder(parent, nj, name, priorities, unsortedCollector)
  1460  		if vi == nil {
  1461  			return vj != nil
  1462  		}
  1463  		if vj == nil {
  1464  			return false
  1465  		}
  1466  		return *vi < *vj
  1467  	}
  1468  }
  1469  
  1470  // stringWriter abstracts over bytes.Buffer and strings.Builder
  1471  type stringWriter interface {
  1472  	WriteString(s string) (int, error)
  1473  }
  1474  
  1475  // formatter accumulates pretty-printed textproto contents into a stringWriter.
  1476  type formatter struct {
  1477  	stringWriter
  1478  }
  1479  
  1480  func (f formatter) writeNodes(nodes []*ast.Node, depth int, isSameLine, asListItems bool) {
  1481  	indent := " "
  1482  	if !isSameLine {
  1483  		indent = strings.Repeat(indentSpaces, depth)
  1484  	}
  1485  
  1486  	lastNonCommentIndex := 0
  1487  	if asListItems {
  1488  		for i := len(nodes) - 1; i >= 0; i-- {
  1489  			if !nodes[i].IsCommentOnly() {
  1490  				lastNonCommentIndex = i
  1491  				break
  1492  			}
  1493  		}
  1494  	}
  1495  
  1496  	for index, nd := range nodes {
  1497  		for _, comment := range nd.PreComments {
  1498  			if len(comment) == 0 {
  1499  				if !(depth == 0 && index == 0) {
  1500  					f.WriteString("\n")
  1501  				}
  1502  				continue
  1503  			}
  1504  			f.WriteString(indent)
  1505  			f.WriteString(comment)
  1506  			f.WriteString("\n")
  1507  		}
  1508  
  1509  		if nd.IsCommentOnly() {
  1510  			// The comments have been printed already, no more work to do.
  1511  			continue
  1512  		}
  1513  		f.WriteString(indent)
  1514  		// Node name may be empty in alternative-style textproto files, because they
  1515  		// contain a sequence of proto messages of the same type:
  1516  		//   { name: "first_msg" }
  1517  		//   { name: "second_msg" }
  1518  		// In all other cases, nd.Name is not empty and should be printed.
  1519  		if nd.Name != "" {
  1520  			f.WriteString(nd.Name)
  1521  			if !nd.SkipColon {
  1522  				f.WriteString(":")
  1523  			}
  1524  
  1525  			// The space after the name is required for one-liners and message fields:
  1526  			//   title: "there was a space here"
  1527  			//   metadata: { ... }
  1528  			// In other cases, there is a newline right after the colon, so no space required.
  1529  			if nd.Children != nil || (len(nd.Values) == 1 && len(nd.Values[0].PreComments) == 0) || nd.ValuesAsList {
  1530  				f.WriteString(" ")
  1531  			}
  1532  		}
  1533  
  1534  		if nd.ValuesAsList { // For ValuesAsList option we will preserve even empty list  `field: []`
  1535  			f.writeValuesAsList(nd, nd.Values, indent+indentSpaces)
  1536  		} else if len(nd.Values) > 0 {
  1537  			f.writeValues(nd, nd.Values, indent+indentSpaces)
  1538  		}
  1539  		if nd.Children != nil { // Also for 0 Children.
  1540  			if nd.ChildrenAsList {
  1541  				f.writeChildrenAsListItems(nd.Children, depth+1, isSameLine || nd.ChildrenSameLine)
  1542  			} else {
  1543  				f.writeChildren(nd.Children, depth+1, isSameLine || nd.ChildrenSameLine, nd.IsAngleBracket)
  1544  			}
  1545  		}
  1546  
  1547  		if asListItems && index < lastNonCommentIndex {
  1548  			f.WriteString(",")
  1549  		}
  1550  
  1551  		if (nd.Children != nil || nd.ValuesAsList) && len(nd.ClosingBraceComment) > 0 {
  1552  			f.WriteString(indentSpaces)
  1553  			f.WriteString(nd.ClosingBraceComment)
  1554  		}
  1555  
  1556  		if !isSameLine {
  1557  			f.WriteString("\n")
  1558  		}
  1559  	}
  1560  }
  1561  
  1562  func (f formatter) writeValues(nd *ast.Node, vals []*ast.Value, indent string) {
  1563  	if len(vals) == 0 {
  1564  		// This should never happen: formatValues can be called only if there are some values.
  1565  		return
  1566  	}
  1567  	sep := "\n" + indent
  1568  	if len(vals) == 1 && len(vals[0].PreComments) == 0 {
  1569  		sep = ""
  1570  	}
  1571  	for _, v := range vals {
  1572  		f.WriteString(sep)
  1573  		for _, comment := range v.PreComments {
  1574  			f.WriteString(comment)
  1575  			f.WriteString(sep)
  1576  		}
  1577  		f.WriteString(v.Value)
  1578  		if len(v.InlineComment) > 0 {
  1579  			f.WriteString(indentSpaces)
  1580  			f.WriteString(v.InlineComment)
  1581  		}
  1582  	}
  1583  	for _, comment := range nd.PostValuesComments {
  1584  		f.WriteString(sep)
  1585  		f.WriteString(comment)
  1586  	}
  1587  }
  1588  
  1589  func (f formatter) writeValuesAsList(nd *ast.Node, vals []*ast.Value, indent string) {
  1590  	// Checks if it's possible to put whole list in a single line.
  1591  	sameLine := nd.ChildrenSameLine && len(nd.PostValuesComments) == 0
  1592  	if sameLine {
  1593  		// Parser found all children on a same line, but we need to check again.
  1594  		// It's possible that AST was modified after parsing.
  1595  		for _, val := range vals {
  1596  			if len(val.PreComments) > 0 || len(vals[0].InlineComment) > 0 {
  1597  				sameLine = false
  1598  				break
  1599  			}
  1600  		}
  1601  	}
  1602  	sep := ""
  1603  	if !sameLine {
  1604  		sep = "\n" + indent
  1605  	}
  1606  	f.WriteString("[")
  1607  
  1608  	for idx, v := range vals {
  1609  		for _, comment := range v.PreComments {
  1610  			f.WriteString(sep)
  1611  			f.WriteString(comment)
  1612  		}
  1613  		f.WriteString(sep)
  1614  		f.WriteString(v.Value)
  1615  		if idx < len(vals)-1 { // Don't put trailing comma that fails Python parser.
  1616  			f.WriteString(",")
  1617  			if sameLine {
  1618  				f.WriteString(" ")
  1619  			}
  1620  		}
  1621  		if len(v.InlineComment) > 0 {
  1622  			f.WriteString(indentSpaces)
  1623  			f.WriteString(v.InlineComment)
  1624  		}
  1625  	}
  1626  	for _, comment := range nd.PostValuesComments {
  1627  		f.WriteString(sep)
  1628  		f.WriteString(comment)
  1629  	}
  1630  	f.WriteString(strings.Replace(sep, indentSpaces, "", 1))
  1631  	f.WriteString("]")
  1632  }
  1633  
  1634  // writeChildren writes the child nodes. The result always ends with a closing brace.
  1635  func (f formatter) writeChildren(children []*ast.Node, depth int, sameLine, isAngleBracket bool) {
  1636  	openBrace := "{"
  1637  	closeBrace := "}"
  1638  	if isAngleBracket {
  1639  		openBrace = "<"
  1640  		closeBrace = ">"
  1641  	}
  1642  	switch {
  1643  	case sameLine && len(children) == 0:
  1644  		f.WriteString(openBrace + closeBrace)
  1645  	case sameLine:
  1646  		f.WriteString(openBrace)
  1647  		f.writeNodes(children, depth, sameLine, false /* asListItems */)
  1648  		f.WriteString(" " + closeBrace)
  1649  	default:
  1650  		f.WriteString(openBrace + "\n")
  1651  		f.writeNodes(children, depth, sameLine, false /* asListItems */)
  1652  		f.WriteString(strings.Repeat(indentSpaces, depth-1))
  1653  		f.WriteString(closeBrace)
  1654  	}
  1655  }
  1656  
  1657  // writeChildrenAsListItems writes the child nodes as list items.
  1658  func (f formatter) writeChildrenAsListItems(children []*ast.Node, depth int, sameLine bool) {
  1659  	openBrace := "["
  1660  	closeBrace := "]"
  1661  	switch {
  1662  	case sameLine && len(children) == 0:
  1663  		f.WriteString(openBrace + closeBrace)
  1664  	case sameLine:
  1665  		f.WriteString(openBrace)
  1666  		f.writeNodes(children, depth, sameLine, true /* asListItems */)
  1667  		f.WriteString(" " + closeBrace)
  1668  	default:
  1669  		f.WriteString(openBrace + "\n")
  1670  		f.writeNodes(children, depth, sameLine, true /* asListItems */)
  1671  		f.WriteString(strings.Repeat(indentSpaces, depth-1))
  1672  		f.WriteString(closeBrace)
  1673  	}
  1674  }
  1675
View as plain text