1
2
3
4
5
6
7 package parser
8
9 import (
10 "bufio"
11 "bytes"
12 "fmt"
13 "math"
14 "regexp"
15 "strconv"
16 "strings"
17
18 "github.com/mitchellh/go-wordwrap"
19 "github.com/protocolbuffers/txtpbfmt/ast"
20 "github.com/protocolbuffers/txtpbfmt/unquote"
21 )
22
23
24
25 type Config struct {
26
27 Disable bool
28
29
30 ExpandAllChildren bool
31
32
33 SkipAllColons bool
34
35
36
37 AllowUnnamedNodesEverywhere bool
38
39
40 SortFieldsByFieldName bool
41
42
43 SortRepeatedFieldsByContent bool
44
45
46
47 SortRepeatedFieldsBySubfield []string
48
49
50 fieldSortOrder map[string][]string
51
52
53
54
55
56
57 RequireFieldSortOrderToMatchAllFieldsInNode bool
58
59
60 RemoveDuplicateValuesForRepeatedFields bool
61
62
63 AllowTripleQuotedStrings bool
64
65
66
67 WrapStringsAtColumn int
68
69
70
71 WrapHTMLStrings bool
72
73
74
75 WrapStringsAfterNewlines bool
76
77
78
79 PreserveAngleBrackets bool
80
81
82 SmartQuotes bool
83
84
85
86
87 Logger Logger
88 }
89
90 func (c *Config) infof(format string, args ...any) {
91 if c.Logger != nil {
92 c.Logger.Infof(format, args...)
93 }
94 }
95 func (c *Config) infoLevel() bool {
96 return c.Logger != nil
97 }
98
99
100 type Logger interface {
101
102 Infof(format string, args ...any)
103 }
104
105
106 const RootName = "__ROOT__"
107
108
109
110
111 func (c *Config) AddFieldSortOrder(nodeName string, fieldOrder ...string) {
112 if c.fieldSortOrder == nil {
113 c.fieldSortOrder = make(map[string][]string)
114 }
115 c.fieldSortOrder[nodeName] = fieldOrder
116 }
117
118
119
120
121 type UnsortedFieldsError struct {
122 UnsortedFields []UnsortedField
123 }
124
125
126 type UnsortedField struct {
127 FieldName string
128 Line int32
129 ParentFieldName string
130 }
131
132 func (e *UnsortedFieldsError) Error() string {
133 var errs []string
134 for _, us := range e.UnsortedFields {
135 errs = append(errs, fmt.Sprintf(" line: %d, parent field: %q, unsorted field: %q", us.Line, us.ParentFieldName, us.FieldName))
136 }
137 return fmt.Sprintf("fields parsed that were not specified in the parser.AddFieldSortOrder() call:\n%s", strings.Join(errs, "\n"))
138 }
139
140 type parser struct {
141 in []byte
142 index int
143 length int
144
145
146 bracketSameLine map[int]bool
147 config Config
148 line, column int
149 }
150
151 var defConfig = Config{}
152 var tagRegex = regexp.MustCompile(`<.*>`)
153
154 const indentSpaces = " "
155
156
157 func Format(in []byte) ([]byte, error) {
158 return FormatWithConfig(in, defConfig)
159 }
160
161
162
163 func FormatWithConfig(in []byte, c Config) ([]byte, error) {
164 if err := addMetaCommentsToConfig(in, &c); err != nil {
165 return nil, err
166 }
167 if c.Disable {
168 c.infof("Ignored file with 'disable' comment.")
169 return in, nil
170 }
171 nodes, err := parseWithMetaCommentConfig(in, c)
172 if err != nil {
173 return nil, err
174 }
175 return PrettyBytes(nodes, 0), nil
176 }
177
178
179
180 func sameLineBrackets(in []byte, allowTripleQuotedStrings bool) (map[int]bool, error) {
181 line := 1
182 type bracket struct {
183 index int
184 line int
185 }
186 open := []bracket{}
187 res := map[int]bool{}
188 insideComment := false
189 insideString := false
190 insideTemplate := false
191 insideTripleQuotedString := false
192 var stringDelimiter string
193 isEscapedChar := false
194 for i, c := range in {
195 switch c {
196 case '\n':
197 line++
198 insideComment = false
199 case '{', '<':
200 if insideComment || insideString || insideTemplate {
201 continue
202 }
203 open = append(open, bracket{index: i, line: line})
204 case '}', '>':
205 if insideComment || insideString || insideTemplate {
206 continue
207 }
208 if len(open) == 0 {
209 return nil, fmt.Errorf("too many '}' or '>' at index %d", i)
210 }
211 last := len(open) - 1
212 br := open[last]
213 open = open[:last]
214 if br.line == line {
215 res[br.index] = true
216 }
217 case '#':
218 if insideString {
219 continue
220 }
221 insideComment = true
222 case '%':
223 if insideComment || insideString {
224 continue
225 }
226 if insideTemplate {
227 insideTemplate = false
228 } else {
229 insideTemplate = true
230 }
231 case '"', '\'':
232 if insideComment {
233 continue
234 }
235 delim := string(c)
236 tripleQuoted := false
237 if allowTripleQuotedStrings && i+3 <= len(in) {
238 triple := string(in[i : i+3])
239 if triple == `"""` || triple == `'''` {
240 delim = triple
241 tripleQuoted = true
242 }
243 }
244
245 if insideString {
246 if stringDelimiter == delim && (insideTripleQuotedString || !isEscapedChar) {
247 insideString = false
248 insideTripleQuotedString = false
249 }
250 } else {
251 insideString = true
252 if tripleQuoted {
253 insideTripleQuotedString = true
254 }
255 stringDelimiter = delim
256 }
257 }
258
259 if isEscapedChar {
260 isEscapedChar = false
261 } else if c == '\\' && insideString && !insideTripleQuotedString {
262 isEscapedChar = true
263 }
264 }
265 if insideString {
266 return nil, fmt.Errorf("unterminated string literal")
267 }
268 return res, nil
269 }
270
271 func removeDeleted(nodes []*ast.Node) []*ast.Node {
272 res := []*ast.Node{}
273
274
275
276
277
278
279
280
281
282
283
284 addEmptyLine := false
285 for _, node := range nodes {
286 if node.Deleted {
287 if len(node.PreComments) > 0 && node.PreComments[0] == "" {
288 addEmptyLine = true
289 }
290 continue
291 }
292 if len(node.Children) > 0 {
293 node.Children = removeDeleted(node.Children)
294 }
295 if addEmptyLine && (len(node.PreComments) == 0 || node.PreComments[0] != "") {
296 node.PreComments = append([]string{""}, node.PreComments...)
297 }
298 addEmptyLine = false
299 res = append(res, node)
300 }
301 return res
302 }
303
304 var (
305 spaceSeparators = []byte(" \t\n")
306 valueSeparators = []byte(" \t\n{}:,[]<>;#")
307 )
308
309
310 func Parse(in []byte) ([]*ast.Node, error) {
311 return ParseWithConfig(in, defConfig)
312 }
313
314
315
316 func ParseWithConfig(in []byte, c Config) ([]*ast.Node, error) {
317 if err := addMetaCommentsToConfig(in, &c); err != nil {
318 return nil, err
319 }
320 return parseWithMetaCommentConfig(in, c)
321 }
322
323
324 func parseWithMetaCommentConfig(in []byte, c Config) ([]*ast.Node, error) {
325 p, err := newParser(in, c)
326 if err != nil {
327 return nil, err
328 }
329 if p.config.infoLevel() {
330 p.config.infof("p.in: %q", string(p.in))
331 p.config.infof("p.length: %v", p.length)
332 }
333
334
335 nodes, _, err := p.parse( true)
336 if err != nil {
337 return nil, err
338 }
339 if p.index < p.length {
340 return nil, fmt.Errorf("parser didn't consume all input. Stopped at %s", p.errorContext())
341 }
342 if err := wrapStrings(nodes, 0, c); err != nil {
343 return nil, err
344 }
345 if err := sortAndFilterNodes( nil, nodes, nodeSortFunction(c), nodeFilterFunction(c)); err != nil {
346 return nil, err
347 }
348 return nodes, nil
349 }
350
351
352
353
354
355
356
357
358 func addToConfig(metaComment string, c *Config) error {
359
360 key, val, hasEqualSign := strings.Cut(metaComment, "=")
361 switch key {
362 case "allow_triple_quoted_strings":
363 c.AllowTripleQuotedStrings = true
364 case "allow_unnamed_nodes_everywhere":
365 c.AllowUnnamedNodesEverywhere = true
366 case "disable":
367 c.Disable = true
368 case "expand_all_children":
369 c.ExpandAllChildren = true
370 case "preserve_angle_brackets":
371 c.PreserveAngleBrackets = true
372 case "remove_duplicate_values_for_repeated_fields":
373 c.RemoveDuplicateValuesForRepeatedFields = true
374 case "skip_all_colons":
375 c.SkipAllColons = true
376 case "smartquotes":
377 c.SmartQuotes = true
378 case "sort_fields_by_field_name":
379 c.SortFieldsByFieldName = true
380 case "sort_repeated_fields_by_content":
381 c.SortRepeatedFieldsByContent = true
382 case "sort_repeated_fields_by_subfield":
383
384 if !hasEqualSign {
385 return fmt.Errorf("format should be %s=<string>, got: %s", key, metaComment)
386 }
387 c.SortRepeatedFieldsBySubfield = append(c.SortRepeatedFieldsBySubfield, val)
388 case "wrap_strings_at_column":
389
390 if !hasEqualSign {
391 return fmt.Errorf("format should be %s=<int>, got: %s", key, metaComment)
392 }
393 i, err := strconv.Atoi(strings.TrimSpace(val))
394 if err != nil {
395 return fmt.Errorf("error parsing %s value %q (skipping): %v", key, val, err)
396 }
397 c.WrapStringsAtColumn = i
398 case "wrap_html_strings":
399 c.WrapHTMLStrings = true
400 case "wrap_strings_after_newlines":
401 c.WrapStringsAfterNewlines = true
402 default:
403 return fmt.Errorf("unrecognized MetaComment: %s", metaComment)
404 }
405 return nil
406 }
407
408
409 func addMetaCommentsToConfig(in []byte, c *Config) error {
410 scanner := bufio.NewScanner(bytes.NewReader(in))
411 for scanner.Scan() {
412 line := scanner.Text()
413 if len(line) == 0 {
414 continue
415 }
416 if line[0] != byte('#') {
417 break
418 }
419
420
421
422
423 key, value, hasColon := strings.Cut(line[1:], ":")
424 if hasColon && strings.TrimSpace(key) == "txtpbfmt" {
425 for _, s := range strings.Split(strings.TrimSpace(value), ",") {
426 metaComment := strings.TrimSpace(s)
427 if err := addToConfig(metaComment, c); err != nil {
428 return err
429 }
430 }
431 }
432 }
433 return nil
434 }
435
436 func newParser(in []byte, c Config) (*parser, error) {
437 var bracketSameLine map[int]bool
438 if c.ExpandAllChildren {
439 bracketSameLine = map[int]bool{}
440 } else {
441 var err error
442 if bracketSameLine, err = sameLineBrackets(in, c.AllowTripleQuotedStrings); err != nil {
443 return nil, err
444 }
445 }
446 if len(in) > 0 && in[len(in)-1] != '\n' {
447 in = append(in, '\n')
448 }
449 parser := &parser{
450 in: in,
451 index: 0,
452 length: len(in),
453 bracketSameLine: bracketSameLine,
454 config: c,
455 line: 1,
456 column: 1,
457 }
458 return parser, nil
459 }
460
461 func (p *parser) nextInputIs(b byte) bool {
462 return p.index < p.length && p.in[p.index] == b
463 }
464
465 func (p *parser) consume(b byte) bool {
466 if !p.nextInputIs(b) {
467 return false
468 }
469 p.index++
470 p.column++
471 if b == '\n' {
472 p.line++
473 p.column = 1
474 }
475 return true
476 }
477
478
479 func (p *parser) consumeString(s string) bool {
480 if p.index+len(s) > p.length {
481 return false
482 }
483 if string(p.in[p.index:p.index+len(s)]) != s {
484 return false
485 }
486 p.index += len(s)
487 p.column += len(s)
488 return true
489 }
490
491
492
493 type loopDetector struct {
494 lastIndex int
495 count int
496 parser *parser
497 }
498
499 func (p *parser) getLoopDetector() *loopDetector {
500 return &loopDetector{lastIndex: p.index, parser: p}
501 }
502
503 func (l *loopDetector) iter() error {
504 if l.parser.index == l.lastIndex {
505 l.count++
506 if l.count < 2 {
507 return nil
508 }
509 return fmt.Errorf("parser failed to make progress at %s", l.parser.errorContext())
510 }
511 l.lastIndex = l.parser.index
512 l.count = 0
513 return nil
514 }
515
516 func (p parser) errorContext() string {
517 index := p.index
518 if index >= p.length {
519 index = p.length - 1
520 }
521
522 lastContentIndex := index + 20
523 if lastContentIndex >= p.length {
524 lastContentIndex = p.length - 1
525 }
526 previousContentIndex := index - 20
527 if previousContentIndex < 0 {
528 previousContentIndex = 0
529 }
530 before := string(p.in[previousContentIndex:index])
531 after := string(p.in[index:lastContentIndex])
532 return fmt.Sprintf("index %v\nposition %+v\nbefore: %q\nafter: %q\nbefore+after: %q", index, p.position(), before, after, before+after)
533 }
534
535 func (p *parser) position() ast.Position {
536 return ast.Position{
537 Byte: uint32(p.index),
538 Line: int32(p.line),
539 Column: int32(p.column),
540 }
541 }
542
543 func (p *parser) consumeOptionalSeparator() error {
544 if p.index > 0 && !p.isBlankSep(p.index-1) {
545
546 if p.consume(':') {
547 return fmt.Errorf("parser encountered unexpected : character (should be whitespace, or a ,; separator)")
548 }
549 }
550
551 _ = p.consume(';')
552 _ = p.consume(',')
553
554 return nil
555 }
556
557
558
559
560
561
562
563 func (p *parser) parse(isRoot bool) (result []*ast.Node, endPos ast.Position, err error) {
564 res := []*ast.Node{}
565 for ld := p.getLoopDetector(); p.index < p.length; {
566 if err := ld.iter(); err != nil {
567 return nil, ast.Position{}, err
568 }
569
570 startPos := p.position()
571 if p.nextInputIs('\n') {
572
573
574
575 startPos.Byte++
576 startPos.Line++
577 startPos.Column = 1
578 }
579
580
581 comments, blankLines := p.skipWhiteSpaceAndReadComments(true )
582
583
584 if blankLines > 1 {
585 if p.config.infoLevel() {
586 p.config.infof("blankLines: %v", blankLines)
587 }
588 comments = append([]string{""}, comments...)
589 }
590
591 for p.nextInputIs('%') {
592 comments = append(comments, p.readTemplate())
593 c, _ := p.skipWhiteSpaceAndReadComments(false)
594 comments = append(comments, c...)
595 }
596
597 if endPos := p.position(); p.consume('}') || p.consume('>') || p.consume(']') {
598
599
600 if len(comments) > 0 {
601 res = append(res, &ast.Node{Start: startPos, PreComments: comments})
602 }
603
604
605
606 for endPos.Byte > 0 && p.in[endPos.Byte-1] == ' ' {
607 endPos.Byte--
608 endPos.Column--
609 }
610
611 if err = p.consumeOptionalSeparator(); err != nil {
612 return nil, ast.Position{}, err
613 }
614
615
616 return res, endPos, nil
617 }
618
619 nd := &ast.Node{
620 Start: startPos,
621 PreComments: comments,
622 }
623 if p.config.infoLevel() {
624 p.config.infof("PreComments: %q", strings.Join(nd.PreComments, "\n"))
625 }
626
627
628 for p.consume(' ') || p.consume('\t') {
629 }
630
631
632
633
634
635
636
637
638
639
640
641 if p.nextInputIs('\n') {
642 res = append(res, nd)
643 continue
644 }
645
646
647 if p.index >= p.length {
648 nd.End = p.position()
649 if len(nd.PreComments) > 0 {
650 res = append(res, nd)
651 }
652 break
653 }
654
655 if p.consume('[') {
656
657 nd.Name = fmt.Sprintf("[%s]", p.readExtension())
658 _ = p.consume(']')
659 } else {
660
661 nd.Name = p.readFieldName()
662 if nd.Name == "" && !isRoot && !p.config.AllowUnnamedNodesEverywhere {
663 return nil, ast.Position{}, fmt.Errorf("Failed to find a FieldName at %s", p.errorContext())
664 }
665 }
666 if p.config.infoLevel() {
667 p.config.infof("name: %q", nd.Name)
668 }
669
670 preCommentsBeforeColon, _ := p.skipWhiteSpaceAndReadComments(true )
671 nd.SkipColon = !p.consume(':')
672 previousPos := p.position()
673 preCommentsAfterColon, _ := p.skipWhiteSpaceAndReadComments(true )
674
675 if p.consume('{') || p.consume('<') {
676 if p.config.SkipAllColons {
677 nd.SkipColon = true
678 }
679 nd.ChildrenSameLine = p.bracketSameLine[p.index-1]
680 nd.IsAngleBracket = p.config.PreserveAngleBrackets && p.in[p.index-1] == '<'
681
682 nodes, lastPos, err := p.parse( false)
683 if err != nil {
684 return nil, ast.Position{}, err
685 }
686 nd.Children = nodes
687 nd.End = lastPos
688
689 nd.ClosingBraceComment = p.readInlineComment()
690 } else if p.consume('[') {
691 openBracketLine := p.line
692
693
694 preCommentsAfterListStart := p.readContinuousBlocksOfComments()
695
696 var preComments []string
697 preComments = append(preComments, preCommentsBeforeColon...)
698 preComments = append(preComments, preCommentsAfterColon...)
699 preComments = append(preComments, preCommentsAfterListStart...)
700
701 if p.nextInputIs('{') {
702
703 nd.ChildrenAsList = true
704
705 nodes, lastPos, err := p.parse( true)
706 if err != nil {
707 return nil, ast.Position{}, err
708 }
709 if len(nodes) > 0 {
710 nodes[0].PreComments = preComments
711 }
712
713 nd.Children = nodes
714 nd.End = lastPos
715 nd.ClosingBraceComment = p.readInlineComment()
716 nd.ChildrenSameLine = openBracketLine == p.line
717 } else {
718
719 nd.ValuesAsList = true
720
721 for ld := p.getLoopDetector(); !p.consume(']') && p.index < p.length; {
722 if err := ld.iter(); err != nil {
723 return nil, ast.Position{}, err
724 }
725
726
727 vals, err := p.readValues()
728 if err != nil {
729 return nil, ast.Position{}, err
730 }
731 if len(vals) != 1 {
732 return nil, ast.Position{}, fmt.Errorf("multiple-string value not supported (%v). Please add comma explicitly, see http://b/162070952", vals)
733 }
734 if len(preComments) > 0 {
735
736
737 vals[0].PreComments = append(preComments, vals[0].PreComments...)
738 }
739
740
741 _, _ = p.skipWhiteSpaceAndReadComments(false )
742 if p.consume(',') {
743 vals[0].InlineComment = p.readInlineComment()
744 }
745
746 nd.Values = append(nd.Values, vals...)
747
748 preComments, _ = p.skipWhiteSpaceAndReadComments(true )
749 }
750 nd.ChildrenSameLine = openBracketLine == p.line
751
752 res = append(res, nd)
753
754
755 nd.PostValuesComments = preComments
756 nd.ClosingBraceComment = p.readInlineComment()
757
758 if err = p.consumeOptionalSeparator(); err != nil {
759 return nil, ast.Position{}, err
760 }
761
762 continue
763 }
764 } else {
765
766 p.index = int(previousPos.Byte)
767 p.line = int(previousPos.Line)
768 p.column = int(previousPos.Column)
769
770 nd.Values, err = p.readValues()
771 if err != nil {
772 return nil, ast.Position{}, err
773 }
774 if err = p.consumeOptionalSeparator(); err != nil {
775 return nil, ast.Position{}, err
776 }
777 }
778 if p.config.infoLevel() && p.index < p.length {
779 p.config.infof("p.in[p.index]: %q", string(p.in[p.index]))
780 }
781 res = append(res, nd)
782 }
783 return res, p.position(), nil
784 }
785
786 func (p *parser) readFieldName() string {
787 i := p.index
788 for ; i < p.length && !p.isValueSep(i); i++ {
789 }
790 return p.advance(i)
791 }
792
793 func (p *parser) readExtension() string {
794 i := p.index
795 for ; i < p.length && (p.isBlankSep(i) || !p.isValueSep(i)); i++ {
796 }
797 return removeBlanks(p.advance(i))
798 }
799
800 func removeBlanks(in string) string {
801 s := []byte(in)
802 for _, b := range spaceSeparators {
803 s = bytes.Replace(s, []byte{b}, nil, -1)
804 }
805 return string(s)
806 }
807
808 func (p *parser) readContinuousBlocksOfComments() []string {
809 var preComments []string
810 for {
811 comments, blankLines := p.skipWhiteSpaceAndReadComments(true)
812 if len(comments) == 0 {
813 break
814 }
815 if blankLines > 0 && len(preComments) > 0 {
816 comments = append([]string{""}, comments...)
817 }
818 preComments = append(preComments, comments...)
819 }
820
821 return preComments
822 }
823
824
825
826
827
828
829
830
831 func (p *parser) skipWhiteSpaceAndReadComments(multiLine bool) ([]string, int) {
832 i := p.index
833 var foundComment, insideComment bool
834 commentBegin := 0
835 var comments []string
836 blankLines := 0
837 for ; i < p.length; i++ {
838 if p.in[i] == '#' && !insideComment {
839 insideComment = true
840 foundComment = true
841 commentBegin = i
842 } else if p.in[i] == '\n' {
843 if insideComment {
844 comments = append(comments, string(p.in[commentBegin:i]))
845 insideComment = false
846 } else if foundComment {
847 i--
848 break
849 } else {
850 blankLines++
851 }
852 if !multiLine {
853 break
854 }
855 }
856 if !insideComment && !p.isBlankSep(i) {
857 break
858 }
859 }
860 sep := p.advance(i)
861 if p.config.infoLevel() {
862 p.config.infof("sep: %q\np.index: %v", string(sep), p.index)
863 if p.index < p.length {
864 p.config.infof("p.in[p.index]: %q", string(p.in[p.index]))
865 }
866 }
867 return comments, blankLines
868 }
869
870 func (p *parser) isBlankSep(i int) bool {
871 return bytes.Contains(spaceSeparators, p.in[i:i+1])
872 }
873
874 func (p *parser) isValueSep(i int) bool {
875 return bytes.Contains(valueSeparators, p.in[i:i+1])
876 }
877
878 func (p *parser) advance(i int) string {
879 if i > p.length {
880 i = p.length
881 }
882 res := p.in[p.index:i]
883 p.index = i
884 strRes := string(res)
885 newlines := strings.Count(strRes, "\n")
886 if newlines == 0 {
887 p.column += len(strRes)
888 } else {
889 p.column = len(strRes) - strings.LastIndex(strRes, "\n")
890 p.line += newlines
891 }
892 return string(res)
893 }
894
895 func (p *parser) readValues() ([]*ast.Value, error) {
896 var values []*ast.Value
897 var previousPos ast.Position
898 preComments, _ := p.skipWhiteSpaceAndReadComments(true )
899 if p.nextInputIs('%') {
900 values = append(values, p.populateValue(p.readTemplate(), nil))
901 previousPos = p.position()
902 }
903 if p.config.AllowTripleQuotedStrings {
904 v, err := p.readTripleQuotedString()
905 if err != nil {
906 return nil, err
907 }
908 if v != nil {
909 values = append(values, v)
910 previousPos = p.position()
911 }
912 }
913 for p.consume('"') || p.consume('\'') {
914
915 stringBegin := p.index - 1
916 i := p.index
917 for ; i < p.length; i++ {
918 if p.in[i] == '\\' {
919 i++
920 continue
921 }
922 if p.in[i] == '\n' {
923 p.index = i
924 return nil, fmt.Errorf("found literal (unescaped) new line in string at %s", p.errorContext())
925 }
926 if p.in[i] == p.in[stringBegin] {
927 var vl string
928 if p.config.SmartQuotes {
929 vl = smartQuotes(p.advance(i))
930 } else {
931 vl = fixQuotes(p.advance(i))
932 }
933 _ = p.advance(i + 1)
934 values = append(values, p.populateValue(vl, preComments))
935
936 previousPos = p.position()
937 preComments, _ = p.skipWhiteSpaceAndReadComments(true )
938 break
939 }
940 }
941 if i == p.length {
942 p.index = i
943 return nil, fmt.Errorf("unfinished string at %s", p.errorContext())
944 }
945 }
946 if previousPos != (ast.Position{}) {
947
948 p.index = int(previousPos.Byte)
949 p.line = int(previousPos.Line)
950 p.column = int(previousPos.Column)
951 } else {
952 i := p.index
953
954 for ; i < p.length; i++ {
955 if p.isValueSep(i) {
956 break
957 }
958 }
959 vl := p.advance(i)
960 values = append(values, p.populateValue(vl, preComments))
961 }
962 if p.config.infoLevel() {
963 p.config.infof("values: %v", values)
964 }
965 return values, nil
966 }
967
968 func (p *parser) readTripleQuotedString() (*ast.Value, error) {
969 start := p.index
970 stringBegin := p.index
971 delimiter := `"""`
972 if !p.consumeString(delimiter) {
973 delimiter = `'''`
974 if !p.consumeString(delimiter) {
975 return nil, nil
976 }
977 }
978
979 for {
980 if p.consumeString(delimiter) {
981 break
982 }
983 if p.index == p.length {
984 p.index = start
985 return nil, fmt.Errorf("unfinished string at %s", p.errorContext())
986 }
987 p.index++
988 }
989
990 v := p.populateValue(string(p.in[stringBegin:p.index]), nil)
991
992 return v, nil
993 }
994
995 func (p *parser) populateValue(vl string, preComments []string) *ast.Value {
996 if p.config.infoLevel() {
997 p.config.infof("value: %q", vl)
998 }
999 return &ast.Value{
1000 Value: vl,
1001 InlineComment: p.readInlineComment(),
1002 PreComments: preComments,
1003 }
1004 }
1005
1006 func (p *parser) readInlineComment() string {
1007 inlineComment, _ := p.skipWhiteSpaceAndReadComments(false )
1008 if p.config.infoLevel() {
1009 p.config.infof("inlineComment: %q", strings.Join(inlineComment, "\n"))
1010 }
1011 if len(inlineComment) > 0 {
1012 return inlineComment[0]
1013 }
1014 return ""
1015 }
1016
1017 func (p *parser) readTemplate() string {
1018 if !p.nextInputIs('%') {
1019 return ""
1020 }
1021 i := p.index + 1
1022 for ; i < p.length; i++ {
1023 if p.in[i] == '"' || p.in[i] == '\'' {
1024 stringBegin := i
1025 i++
1026 for ; i < p.length; i++ {
1027 if p.in[i] == '\\' {
1028 i++
1029 continue
1030 }
1031 if p.in[i] == p.in[stringBegin] {
1032 i++
1033 break
1034 }
1035 }
1036 }
1037 if i < p.length && p.in[i] == '%' {
1038 i++
1039 break
1040 }
1041 }
1042 return p.advance(i)
1043 }
1044
1045
1046 type NodeSortFunction func(parent *ast.Node, nodes []*ast.Node) error
1047
1048
1049 type NodeFilterFunction func(nodes []*ast.Node)
1050
1051 func sortAndFilterNodes(parent *ast.Node, nodes []*ast.Node, sortFunction NodeSortFunction, filterFunction NodeFilterFunction) error {
1052 if len(nodes) == 0 {
1053 return nil
1054 }
1055 if filterFunction != nil {
1056 filterFunction(nodes)
1057 }
1058 for _, nd := range nodes {
1059 err := sortAndFilterNodes(nd, nd.Children, sortFunction, filterFunction)
1060 if err != nil {
1061 return err
1062 }
1063 }
1064 if sortFunction != nil {
1065 return sortFunction(parent, nodes)
1066 }
1067 return nil
1068 }
1069
1070
1071 func RemoveDuplicates(nodes []*ast.Node) {
1072 type nameAndValue struct {
1073 name, value string
1074 }
1075 seen := make(map[nameAndValue]bool)
1076 for _, nd := range nodes {
1077 if seen != nil && len(nd.Values) == 1 {
1078 key := nameAndValue{nd.Name, nd.Values[0].Value}
1079 if _, value := seen[key]; value {
1080
1081 nd.Deleted = true
1082 } else {
1083 seen[key] = true
1084 }
1085 }
1086 }
1087 }
1088
1089 func wrapStrings(nodes []*ast.Node, depth int, c Config) error {
1090 if c.WrapStringsAtColumn == 0 && !c.WrapStringsAfterNewlines {
1091 return nil
1092 }
1093 for _, nd := range nodes {
1094 if nd.ChildrenSameLine {
1095 continue
1096 }
1097 if c.WrapStringsAtColumn > 0 && needsWrappingAtColumn(nd, depth, c) {
1098 if err := wrapLinesAtColumn(nd, depth, c); err != nil {
1099 return err
1100 }
1101 }
1102 if c.WrapStringsAfterNewlines && needsWrappingAfterNewlines(nd, c) {
1103 if err := wrapLinesAfterNewlines(nd, c); err != nil {
1104 return err
1105 }
1106 }
1107 if err := wrapStrings(nd.Children, depth+1, c); err != nil {
1108 return err
1109 }
1110 }
1111 return nil
1112 }
1113
1114 func needsWrappingAtColumn(nd *ast.Node, depth int, c Config) bool {
1115
1116
1117 const lengthBuffer = 2
1118 maxLength := c.WrapStringsAtColumn - lengthBuffer - (depth * len(indentSpaces))
1119
1120 if !c.WrapHTMLStrings {
1121 for _, v := range nd.Values {
1122 if tagRegex.Match([]byte(v.Value)) {
1123 return false
1124 }
1125 }
1126 }
1127
1128 for _, v := range nd.Values {
1129 if len(v.Value) >= 3 && (strings.HasPrefix(v.Value, `'''`) || strings.HasPrefix(v.Value, `"""`)) {
1130
1131 return false
1132 }
1133 if len(v.Value) > 0 && v.Value[0] != '\'' && v.Value[0] != '"' {
1134
1135 return false
1136 }
1137 if len(v.Value) > maxLength {
1138 return true
1139 }
1140 }
1141 return false
1142 }
1143
1144
1145
1146
1147 func wrapLinesAtColumn(nd *ast.Node, depth int, c Config) error {
1148
1149
1150
1151 lengthBuffer := 4
1152 maxLength := c.WrapStringsAtColumn - lengthBuffer - (depth * len(indentSpaces))
1153
1154 str, err := unquote.Raw(nd)
1155 if err != nil {
1156 return fmt.Errorf("skipping string wrapping on node %q (error unquoting string): %v", nd.Name, err)
1157 }
1158
1159
1160 wrappedStr := wordwrap.WrapString(str, uint(maxLength)-1)
1161 lines := strings.Split(wrappedStr, "\n")
1162 newValues := make([]*ast.Value, 0, len(lines))
1163
1164
1165
1166 var i int
1167 var line string
1168 for i, line = range lines {
1169 var v *ast.Value
1170 if i < len(nd.Values) {
1171 v = nd.Values[i]
1172 } else {
1173 v = &ast.Value{}
1174 }
1175 if i < len(lines)-1 {
1176 line = line + " "
1177 }
1178 v.Value = fmt.Sprintf(`"%s"`, line)
1179 newValues = append(newValues, v)
1180 }
1181
1182 postWrapCollectComments(nd, i)
1183
1184 nd.Values = newValues
1185 return nil
1186 }
1187
1188
1189 var byteEscapeRegex = regexp.MustCompile(`\\x`)
1190
1191 func needsWrappingAfterNewlines(nd *ast.Node, c Config) bool {
1192 for _, v := range nd.Values {
1193 if len(v.Value) >= 3 && (strings.HasPrefix(v.Value, `'''`) || strings.HasPrefix(v.Value, `"""`)) {
1194
1195 return false
1196 }
1197 if len(v.Value) > 0 && v.Value[0] != '\'' && v.Value[0] != '"' {
1198
1199 return false
1200 }
1201 byteEscapeCount := len(byteEscapeRegex.FindAllStringIndex(v.Value, -1))
1202 if float64(byteEscapeCount) > float64(len(v.Value))*0.1 {
1203
1204 return false
1205 }
1206
1207 if i := strings.Index(v.Value, `\n`); i >= 0 && i < len(v.Value)-3 {
1208 return true
1209 }
1210 }
1211 return false
1212 }
1213
1214
1215
1216
1217 func wrapLinesAfterNewlines(nd *ast.Node, c Config) error {
1218 str, err := unquote.Raw(nd)
1219 if err != nil {
1220 return fmt.Errorf("skipping string wrapping on node %q (error unquoting string): %v", nd.Name, err)
1221 }
1222
1223 wrappedStr := strings.ReplaceAll(str, `\n`, `\n`+"\n")
1224
1225 wrappedStr = strings.TrimSuffix(wrappedStr, "\n")
1226 lines := strings.Split(wrappedStr, "\n")
1227 newValues := make([]*ast.Value, 0, len(lines))
1228
1229
1230
1231 var i int
1232 var line string
1233 for i, line = range lines {
1234 var v *ast.Value
1235 if i < len(nd.Values) {
1236 v = nd.Values[i]
1237 } else {
1238 v = &ast.Value{}
1239 }
1240 v.Value = fmt.Sprintf(`"%s"`, line)
1241 newValues = append(newValues, v)
1242 }
1243
1244 postWrapCollectComments(nd, i)
1245
1246 nd.Values = newValues
1247 return nil
1248 }
1249
1250 func postWrapCollectComments(nd *ast.Node, i int) {
1251 for i++; i < len(nd.Values); i++ {
1252
1253
1254 v := nd.Values[i]
1255 nd.PostValuesComments = append(nd.PostValuesComments, v.PreComments...)
1256 if len(v.InlineComment) > 0 {
1257 nd.PostValuesComments = append(nd.PostValuesComments, v.InlineComment)
1258 }
1259 }
1260 }
1261
1262 func fixQuotes(s string) string {
1263 res := make([]byte, 0, len(s))
1264 res = append(res, '"')
1265 for i := 0; i < len(s); i++ {
1266 if s[i] == '"' {
1267 res = append(res, '\\')
1268 } else if s[i] == '\\' {
1269 res = append(res, s[i])
1270 i++
1271 }
1272 res = append(res, s[i])
1273 }
1274 res = append(res, '"')
1275 return string(res)
1276 }
1277
1278 func unescapeQuotes(s string) string {
1279 res := make([]byte, 0, len(s))
1280 for i := 0; i < len(s); i++ {
1281
1282 if s[i] == '\\' {
1283
1284 if i == len(s)-1 || (s[i+1] != '"' && s[i+1] != '\'') {
1285 res = append(res, '\\')
1286 }
1287
1288
1289
1290 i++
1291 }
1292 if i < len(s) {
1293 res = append(res, s[i])
1294 }
1295 }
1296 return string(res)
1297 }
1298
1299 func smartQuotes(s string) string {
1300 s = unescapeQuotes(s)
1301 if strings.Contains(s, "\"") && !strings.Contains(s, "'") {
1302
1303
1304 return "'" + s + "'"
1305 }
1306
1307
1308 return fixQuotes(s)
1309 }
1310
1311
1312
1313
1314 func DebugFormat(nodes []*ast.Node, depth int) string {
1315 res := []string{""}
1316 prefix := strings.Repeat(".", depth)
1317 for _, nd := range nodes {
1318 var value string
1319 if nd.Deleted {
1320 res = append(res, "DELETED")
1321 }
1322 if nd.Children != nil {
1323 value = fmt.Sprintf("children:%s", DebugFormat(nd.Children, depth+1))
1324 } else {
1325 value = fmt.Sprintf("values: %v\n", nd.Values)
1326 }
1327 res = append(res,
1328 fmt.Sprintf("name: %q", nd.Name),
1329 fmt.Sprintf("PreComments: %q (len %d)", strings.Join(nd.PreComments, "\n"), len(nd.PreComments)),
1330 value)
1331 }
1332 return strings.Join(res, fmt.Sprintf("\n%s ", prefix))
1333 }
1334
1335
1336 func Pretty(nodes []*ast.Node, depth int) string {
1337 var result strings.Builder
1338 formatter{&result}.writeNodes(removeDeleted(nodes), depth, false , false )
1339 return result.String()
1340 }
1341
1342
1343 func PrettyBytes(nodes []*ast.Node, depth int) []byte {
1344 var result bytes.Buffer
1345 formatter{&result}.writeNodes(removeDeleted(nodes), depth, false , false )
1346 return result.Bytes()
1347 }
1348
1349
1350 type UnsortedFieldCollector struct {
1351 fields map[string]UnsortedField
1352 }
1353
1354 func newUnsortedFieldCollector() *UnsortedFieldCollector {
1355 return &UnsortedFieldCollector{
1356 fields: make(map[string]UnsortedField),
1357 }
1358 }
1359
1360
1361 type UnsortedFieldCollectorFunc func(name string, line int32, parent string)
1362
1363 func (ufc *UnsortedFieldCollector) collect(name string, line int32, parent string) {
1364 ufc.fields[name] = UnsortedField{name, line, parent}
1365 }
1366
1367 func (ufc *UnsortedFieldCollector) asError() error {
1368 if len(ufc.fields) == 0 {
1369 return nil
1370 }
1371 var fields []UnsortedField
1372 for _, f := range ufc.fields {
1373 fields = append(fields, f)
1374 }
1375 return &UnsortedFieldsError{fields}
1376 }
1377
1378 func nodeSortFunction(c Config) NodeSortFunction {
1379 var sorter ast.NodeLess = nil
1380 unsortedFieldCollector := newUnsortedFieldCollector()
1381 for name, fieldOrder := range c.fieldSortOrder {
1382 sorter = ast.ChainNodeLess(sorter, ByFieldOrder(name, fieldOrder, unsortedFieldCollector.collect))
1383 }
1384 if c.SortFieldsByFieldName {
1385 sorter = ast.ChainNodeLess(sorter, ast.ByFieldName)
1386 }
1387 if c.SortRepeatedFieldsByContent {
1388 sorter = ast.ChainNodeLess(sorter, ast.ByFieldValue)
1389 }
1390 for _, sf := range c.SortRepeatedFieldsBySubfield {
1391 field, subfield := parseSubfieldSpec(sf)
1392 if subfield != "" {
1393 sorter = ast.ChainNodeLess(sorter, ast.ByFieldSubfield(field, subfield))
1394 }
1395 }
1396 if sorter != nil {
1397 return func(parent *ast.Node, ns []*ast.Node) error {
1398 ast.SortNodes(parent, ns, sorter)
1399 if c.RequireFieldSortOrderToMatchAllFieldsInNode {
1400 return unsortedFieldCollector.asError()
1401 }
1402 return nil
1403 }
1404 }
1405 return nil
1406 }
1407
1408
1409
1410 func parseSubfieldSpec(subfieldSpec string) (field string, subfield string) {
1411 parts := strings.SplitN(subfieldSpec, ".", 2)
1412 if len(parts) == 1 {
1413 return "", parts[0]
1414 }
1415 return parts[0], parts[1]
1416 }
1417
1418 func nodeFilterFunction(c Config) NodeFilterFunction {
1419 if c.RemoveDuplicateValuesForRepeatedFields {
1420 return RemoveDuplicates
1421 }
1422 return nil
1423 }
1424
1425 func getNodePriorityForByFieldOrder(parent, node *ast.Node, name string, priorities map[string]int, unsortedCollector UnsortedFieldCollectorFunc) *int {
1426 if parent != nil && parent.Name != name {
1427 return nil
1428 }
1429 if parent == nil && name != RootName {
1430 return nil
1431 }
1432
1433 prio := math.MaxInt
1434
1435
1436 if !node.IsCommentOnly() {
1437 var ok bool
1438 prio, ok = priorities[node.Name]
1439 if !ok {
1440 unsortedCollector(node.Name, node.Start.Line, parent.Name)
1441 }
1442 }
1443 return &prio
1444 }
1445
1446
1447
1448
1449 func ByFieldOrder(name string, fieldOrder []string, unsortedCollector UnsortedFieldCollectorFunc) ast.NodeLess {
1450 priorities := make(map[string]int)
1451 for i, fieldName := range fieldOrder {
1452 priorities[fieldName] = i + 1
1453 }
1454 return func(parent, ni, nj *ast.Node, isWholeSlice bool) bool {
1455 if !isWholeSlice {
1456 return false
1457 }
1458 vi := getNodePriorityForByFieldOrder(parent, ni, name, priorities, unsortedCollector)
1459 vj := getNodePriorityForByFieldOrder(parent, nj, name, priorities, unsortedCollector)
1460 if vi == nil {
1461 return vj != nil
1462 }
1463 if vj == nil {
1464 return false
1465 }
1466 return *vi < *vj
1467 }
1468 }
1469
1470
1471 type stringWriter interface {
1472 WriteString(s string) (int, error)
1473 }
1474
1475
1476 type formatter struct {
1477 stringWriter
1478 }
1479
1480 func (f formatter) writeNodes(nodes []*ast.Node, depth int, isSameLine, asListItems bool) {
1481 indent := " "
1482 if !isSameLine {
1483 indent = strings.Repeat(indentSpaces, depth)
1484 }
1485
1486 lastNonCommentIndex := 0
1487 if asListItems {
1488 for i := len(nodes) - 1; i >= 0; i-- {
1489 if !nodes[i].IsCommentOnly() {
1490 lastNonCommentIndex = i
1491 break
1492 }
1493 }
1494 }
1495
1496 for index, nd := range nodes {
1497 for _, comment := range nd.PreComments {
1498 if len(comment) == 0 {
1499 if !(depth == 0 && index == 0) {
1500 f.WriteString("\n")
1501 }
1502 continue
1503 }
1504 f.WriteString(indent)
1505 f.WriteString(comment)
1506 f.WriteString("\n")
1507 }
1508
1509 if nd.IsCommentOnly() {
1510
1511 continue
1512 }
1513 f.WriteString(indent)
1514
1515
1516
1517
1518
1519 if nd.Name != "" {
1520 f.WriteString(nd.Name)
1521 if !nd.SkipColon {
1522 f.WriteString(":")
1523 }
1524
1525
1526
1527
1528
1529 if nd.Children != nil || (len(nd.Values) == 1 && len(nd.Values[0].PreComments) == 0) || nd.ValuesAsList {
1530 f.WriteString(" ")
1531 }
1532 }
1533
1534 if nd.ValuesAsList {
1535 f.writeValuesAsList(nd, nd.Values, indent+indentSpaces)
1536 } else if len(nd.Values) > 0 {
1537 f.writeValues(nd, nd.Values, indent+indentSpaces)
1538 }
1539 if nd.Children != nil {
1540 if nd.ChildrenAsList {
1541 f.writeChildrenAsListItems(nd.Children, depth+1, isSameLine || nd.ChildrenSameLine)
1542 } else {
1543 f.writeChildren(nd.Children, depth+1, isSameLine || nd.ChildrenSameLine, nd.IsAngleBracket)
1544 }
1545 }
1546
1547 if asListItems && index < lastNonCommentIndex {
1548 f.WriteString(",")
1549 }
1550
1551 if (nd.Children != nil || nd.ValuesAsList) && len(nd.ClosingBraceComment) > 0 {
1552 f.WriteString(indentSpaces)
1553 f.WriteString(nd.ClosingBraceComment)
1554 }
1555
1556 if !isSameLine {
1557 f.WriteString("\n")
1558 }
1559 }
1560 }
1561
1562 func (f formatter) writeValues(nd *ast.Node, vals []*ast.Value, indent string) {
1563 if len(vals) == 0 {
1564
1565 return
1566 }
1567 sep := "\n" + indent
1568 if len(vals) == 1 && len(vals[0].PreComments) == 0 {
1569 sep = ""
1570 }
1571 for _, v := range vals {
1572 f.WriteString(sep)
1573 for _, comment := range v.PreComments {
1574 f.WriteString(comment)
1575 f.WriteString(sep)
1576 }
1577 f.WriteString(v.Value)
1578 if len(v.InlineComment) > 0 {
1579 f.WriteString(indentSpaces)
1580 f.WriteString(v.InlineComment)
1581 }
1582 }
1583 for _, comment := range nd.PostValuesComments {
1584 f.WriteString(sep)
1585 f.WriteString(comment)
1586 }
1587 }
1588
1589 func (f formatter) writeValuesAsList(nd *ast.Node, vals []*ast.Value, indent string) {
1590
1591 sameLine := nd.ChildrenSameLine && len(nd.PostValuesComments) == 0
1592 if sameLine {
1593
1594
1595 for _, val := range vals {
1596 if len(val.PreComments) > 0 || len(vals[0].InlineComment) > 0 {
1597 sameLine = false
1598 break
1599 }
1600 }
1601 }
1602 sep := ""
1603 if !sameLine {
1604 sep = "\n" + indent
1605 }
1606 f.WriteString("[")
1607
1608 for idx, v := range vals {
1609 for _, comment := range v.PreComments {
1610 f.WriteString(sep)
1611 f.WriteString(comment)
1612 }
1613 f.WriteString(sep)
1614 f.WriteString(v.Value)
1615 if idx < len(vals)-1 {
1616 f.WriteString(",")
1617 if sameLine {
1618 f.WriteString(" ")
1619 }
1620 }
1621 if len(v.InlineComment) > 0 {
1622 f.WriteString(indentSpaces)
1623 f.WriteString(v.InlineComment)
1624 }
1625 }
1626 for _, comment := range nd.PostValuesComments {
1627 f.WriteString(sep)
1628 f.WriteString(comment)
1629 }
1630 f.WriteString(strings.Replace(sep, indentSpaces, "", 1))
1631 f.WriteString("]")
1632 }
1633
1634
1635 func (f formatter) writeChildren(children []*ast.Node, depth int, sameLine, isAngleBracket bool) {
1636 openBrace := "{"
1637 closeBrace := "}"
1638 if isAngleBracket {
1639 openBrace = "<"
1640 closeBrace = ">"
1641 }
1642 switch {
1643 case sameLine && len(children) == 0:
1644 f.WriteString(openBrace + closeBrace)
1645 case sameLine:
1646 f.WriteString(openBrace)
1647 f.writeNodes(children, depth, sameLine, false )
1648 f.WriteString(" " + closeBrace)
1649 default:
1650 f.WriteString(openBrace + "\n")
1651 f.writeNodes(children, depth, sameLine, false )
1652 f.WriteString(strings.Repeat(indentSpaces, depth-1))
1653 f.WriteString(closeBrace)
1654 }
1655 }
1656
1657
1658 func (f formatter) writeChildrenAsListItems(children []*ast.Node, depth int, sameLine bool) {
1659 openBrace := "["
1660 closeBrace := "]"
1661 switch {
1662 case sameLine && len(children) == 0:
1663 f.WriteString(openBrace + closeBrace)
1664 case sameLine:
1665 f.WriteString(openBrace)
1666 f.writeNodes(children, depth, sameLine, true )
1667 f.WriteString(" " + closeBrace)
1668 default:
1669 f.WriteString(openBrace + "\n")
1670 f.writeNodes(children, depth, sameLine, true )
1671 f.WriteString(strings.Repeat(indentSpaces, depth-1))
1672 f.WriteString(closeBrace)
1673 }
1674 }
1675
View as plain text