printer.go

Documentation: cuelang.org/go/cue/format

     1  // Copyright 2018 The CUE Authors
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package format
    16  
    17  import (
    18  	"fmt"
    19  	"os"
    20  	"strings"
    21  	"text/tabwriter"
    22  
    23  	"cuelang.org/go/cue/ast"
    24  	"cuelang.org/go/cue/errors"
    25  	"cuelang.org/go/cue/literal"
    26  	"cuelang.org/go/cue/token"
    27  )
    28  
    29  // A printer takes the stream of formatting tokens and spacing directives
    30  // produced by the formatter and adjusts the spacing based on the original
    31  // source code.
    32  type printer struct {
    33  	cfg *config
    34  
    35  	allowed     whiteSpace
    36  	requested   whiteSpace
    37  	indentStack []whiteSpace
    38  
    39  	pos     token.Position // current pos in AST
    40  	lineout line
    41  
    42  	lastTok token.Token // last token printed (syntax.ILLEGAL if it's whitespace)
    43  
    44  	output      []byte
    45  	indent      int
    46  	spaceBefore bool
    47  
    48  	errs errors.Error
    49  }
    50  
    51  type line int
    52  
    53  func (p *printer) init(cfg *config) {
    54  	p.cfg = cfg
    55  	p.pos = token.Position{Line: 1, Column: 1}
    56  }
    57  
    58  func (p *printer) errf(n ast.Node, format string, args ...interface{}) {
    59  	p.errs = errors.Append(p.errs, errors.Newf(n.Pos(), format, args...))
    60  }
    61  
    62  const debug = false
    63  
    64  func (p *printer) internalError(msg ...interface{}) {
    65  	if debug {
    66  		fmt.Print(p.pos.String() + ": ")
    67  		fmt.Println(msg...)
    68  		panic("go/printer")
    69  	}
    70  }
    71  
    72  func (p *printer) lineFor(pos token.Pos) int {
    73  	return pos.Line()
    74  }
    75  
    76  func (p *printer) Print(v interface{}) {
    77  	var (
    78  		impliedComma = false
    79  		isLit        bool
    80  		data         string
    81  		nextWS       whiteSpace
    82  	)
    83  	switch x := v.(type) {
    84  	case *line:
    85  		*x = p.lineout
    86  
    87  	case token.Token:
    88  		s := x.String()
    89  		before, after := mayCombine(p.lastTok, x)
    90  		if before && !p.spaceBefore {
    91  			// the previous and the current token must be
    92  			// separated by a blank otherwise they combine
    93  			// into a different incorrect token sequence
    94  			// (except for syntax.INT followed by a '.' this
    95  			// should never happen because it is taken care
    96  			// of via binary expression formatting)
    97  			if p.allowed&blank != 0 {
    98  				p.internalError("whitespace buffer not empty")
    99  			}
   100  			p.allowed |= blank
   101  		}
   102  		if after {
   103  			nextWS = blank
   104  		}
   105  		data = s
   106  		switch x {
   107  		case token.EOF:
   108  			data = ""
   109  			p.allowed = newline
   110  			p.allowed &^= newsection
   111  		case token.LPAREN, token.LBRACK, token.LBRACE:
   112  		case token.RPAREN, token.RBRACK, token.RBRACE:
   113  			impliedComma = true
   114  		}
   115  		p.lastTok = x
   116  
   117  	case *ast.BasicLit:
   118  		data = x.Value
   119  		switch x.Kind {
   120  		case token.STRING:
   121  			// TODO: only do this when simplifying. Right now this does not
   122  			// give the right result, but it should be better if:
   123  			// 1) simplification is done as a separate step
   124  			// 2) simplified structs are explicitly referenced separately
   125  			//    in the AST.
   126  			if p.indent < 6 {
   127  				data = literal.IndentTabs(data, p.cfg.Indent+p.indent+1)
   128  			}
   129  
   130  		case token.INT:
   131  			if len(data) > 1 &&
   132  				data[0] == '0' &&
   133  				data[1] >= '0' && data[1] <= '9' {
   134  				data = "0o" + data[1:]
   135  			}
   136  			// Pad trailing dot before multiplier.
   137  			if p := strings.IndexByte(data, '.'); p >= 0 && data[p+1] > '9' {
   138  				data = data[:p+1] + "0" + data[p+1:]
   139  			}
   140  			// Lowercase E, but only if it is not the last character: in the
   141  			// future we may use E for Exa.
   142  			if p := strings.IndexByte(data, 'E'); p != -1 && p < len(data)-1 {
   143  				data = strings.ToLower(data)
   144  			}
   145  
   146  		case token.FLOAT:
   147  			// Pad leading or trailing dots.
   148  			switch p := strings.IndexByte(data, '.'); {
   149  			case p < 0:
   150  			case p == 0:
   151  				data = "0" + data
   152  			case p == len(data)-1:
   153  				data += "0"
   154  			case data[p+1] > '9':
   155  				data = data[:p+1] + "0" + data[p+1:]
   156  			}
   157  			if strings.IndexByte(data, 'E') != -1 {
   158  				data = strings.ToLower(data)
   159  			}
   160  		}
   161  
   162  		isLit = true
   163  		impliedComma = true
   164  		p.lastTok = x.Kind
   165  
   166  	case *ast.Ident:
   167  		data = x.Name
   168  		if !ast.IsValidIdent(data) {
   169  			p.errf(x, "invalid identifier %q", x.Name)
   170  			data = "*bad identifier*"
   171  		}
   172  		impliedComma = true
   173  		p.lastTok = token.IDENT
   174  
   175  	case string:
   176  		// We can print a Go string as part of a CUE identifier or literal;
   177  		// for example, see the formatter.label method.
   178  		isLit = true
   179  		data = x
   180  		impliedComma = true
   181  		p.lastTok = token.STRING
   182  
   183  	case *ast.CommentGroup:
   184  		rel := x.Pos().RelPos()
   185  		if x.Line { // TODO: we probably don't need this.
   186  			rel = token.Blank
   187  		}
   188  		switch rel {
   189  		case token.NoRelPos:
   190  		case token.Newline, token.NewSection:
   191  		case token.Blank, token.Elided:
   192  			p.allowed |= blank
   193  			fallthrough
   194  		case token.NoSpace:
   195  			p.allowed &^= newline | newsection | formfeed | declcomma
   196  		}
   197  		return
   198  
   199  	case *ast.Attribute:
   200  		isLit = true
   201  		data = x.Text
   202  		impliedComma = true
   203  		p.lastTok = token.ATTRIBUTE
   204  
   205  	case *ast.Comment:
   206  		// TODO: if implied comma, postpone comment
   207  		isLit = true
   208  		data = x.Text
   209  		p.lastTok = token.COMMENT
   210  
   211  	case whiteSpace:
   212  		p.allowed |= x
   213  		return
   214  
   215  	case token.Pos:
   216  		// TODO: should we use a known file position to synchronize? Go does,
   217  		// but we don't really have to.
   218  		// pos := x
   219  		if x.HasRelPos() {
   220  			if p.allowed&nooverride == 0 {
   221  				requested := p.allowed
   222  				switch x.RelPos() {
   223  				case token.NoSpace:
   224  					requested &^= newline | newsection | formfeed
   225  				case token.Blank:
   226  					requested |= blank
   227  					requested &^= newline | newsection | formfeed
   228  				case token.Newline:
   229  					requested |= newline
   230  				case token.NewSection:
   231  					requested |= newsection
   232  				}
   233  				p.writeWhitespace(requested)
   234  				p.allowed = 0
   235  				p.requested = 0
   236  			}
   237  			// p.pos = pos
   238  		}
   239  		return
   240  
   241  	default:
   242  		fmt.Fprintf(os.Stderr, "print: unsupported argument %v (%T)\n", x, x)
   243  		panic("go/printer type")
   244  	}
   245  
   246  	p.writeWhitespace(p.allowed)
   247  	p.allowed = 0
   248  	p.requested = 0
   249  	p.writeString(data, isLit)
   250  	p.allowed = nextWS
   251  	_ = impliedComma // TODO: delay comment printings
   252  }
   253  
   254  func (p *printer) writeWhitespace(ws whiteSpace) {
   255  	if ws&comma != 0 {
   256  		switch {
   257  		case ws&(newsection|newline|formfeed) != 0,
   258  			ws&trailcomma == 0:
   259  			p.writeByte(',', 1)
   260  		}
   261  	}
   262  	if ws&indent != 0 {
   263  		p.markLineIndent(ws)
   264  	}
   265  	if ws&unindent != 0 {
   266  		p.markUnindentLine()
   267  	}
   268  	switch {
   269  	case ws&newsection != 0:
   270  		p.maybeIndentLine(ws)
   271  		p.writeByte('\f', 2)
   272  		p.lineout += 2
   273  		p.spaceBefore = true
   274  	case ws&formfeed != 0:
   275  		p.maybeIndentLine(ws)
   276  		p.writeByte('\f', 1)
   277  		p.lineout++
   278  		p.spaceBefore = true
   279  	case ws&newline != 0:
   280  		p.maybeIndentLine(ws)
   281  		p.writeByte('\n', 1)
   282  		p.lineout++
   283  		p.spaceBefore = true
   284  	case ws&declcomma != 0:
   285  		p.writeByte(',', 1)
   286  		p.writeByte(' ', 1)
   287  		p.spaceBefore = true
   288  	case ws&noblank != 0:
   289  	case ws&vtab != 0:
   290  		p.writeByte('\v', 1)
   291  		p.spaceBefore = true
   292  	case ws&blank != 0:
   293  		p.writeByte(' ', 1)
   294  		p.spaceBefore = true
   295  	}
   296  }
   297  
   298  func (p *printer) markLineIndent(ws whiteSpace) {
   299  	p.indentStack = append(p.indentStack, ws)
   300  }
   301  
   302  func (p *printer) markUnindentLine() (wasUnindented bool) {
   303  	last := len(p.indentStack) - 1
   304  	if ws := p.indentStack[last]; ws&indented != 0 {
   305  		p.indent--
   306  		wasUnindented = true
   307  	}
   308  	p.indentStack = p.indentStack[:last]
   309  	return wasUnindented
   310  }
   311  
   312  func (p *printer) maybeIndentLine(ws whiteSpace) {
   313  	if ws&unindent == 0 && len(p.indentStack) > 0 {
   314  		last := len(p.indentStack) - 1
   315  		if ws := p.indentStack[last]; ws&indented != 0 || ws&indent == 0 {
   316  			return
   317  		}
   318  		p.indentStack[last] |= indented
   319  		p.indent++
   320  	}
   321  }
   322  
   323  func (f *formatter) matchUnindent() whiteSpace {
   324  	f.allowed |= unindent
   325  	// TODO: make this work. Whitespace from closing bracket should match that
   326  	// of opening if there is no position information.
   327  	// f.allowed &^= nooverride | newline | newsection | formfeed | blank | noblank
   328  	// ws := f.indentStack[len(f.indentStack)-1]
   329  	// mask := blank | noblank | vtab
   330  	// f.allowed |= unindent | blank | noblank
   331  	// if ws&newline != 0 || ws*indented != 0 {
   332  	// 	f.allowed |= newline
   333  	// }
   334  	return 0
   335  }
   336  
   337  // writeString writes the string s to p.output and updates p.pos, p.out,
   338  // and p.last. If isLit is set, s is escaped w/ tabwriter.Escape characters
   339  // to protect s from being interpreted by the tabwriter.
   340  //
   341  // Note: writeString is only used to write Go tokens, literals, and
   342  // comments, all of which must be written literally. Thus, it is correct
   343  // to always set isLit = true. However, setting it explicitly only when
   344  // needed (i.e., when we don't know that s contains no tabs or line breaks)
   345  // avoids processing extra escape characters and reduces run time of the
   346  // printer benchmark by up to 10%.
   347  func (p *printer) writeString(s string, isLit bool) {
   348  	if s != "" {
   349  		p.spaceBefore = false
   350  	}
   351  
   352  	if isLit {
   353  		// Protect s such that is passes through the tabwriter
   354  		// unchanged. Note that valid Go programs cannot contain
   355  		// tabwriter.Escape bytes since they do not appear in legal
   356  		// UTF-8 sequences.
   357  		p.output = append(p.output, tabwriter.Escape)
   358  	}
   359  
   360  	p.output = append(p.output, s...)
   361  
   362  	if isLit {
   363  		p.output = append(p.output, tabwriter.Escape)
   364  	}
   365  	// update positions
   366  	nLines := 0
   367  	var li int // index of last newline; valid if nLines > 0
   368  	for i := 0; i < len(s); i++ {
   369  		// CUE tokens cannot contain '\f' - no need to look for it
   370  		if s[i] == '\n' {
   371  			nLines++
   372  			li = i
   373  		}
   374  	}
   375  	p.pos.Offset += len(s)
   376  	if nLines > 0 {
   377  		p.pos.Line += nLines
   378  		c := len(s) - li
   379  		p.pos.Column = c
   380  	} else {
   381  		p.pos.Column += len(s)
   382  	}
   383  }
   384  
   385  func (p *printer) writeByte(ch byte, n int) {
   386  	for i := 0; i < n; i++ {
   387  		p.output = append(p.output, ch)
   388  	}
   389  
   390  	// update positions
   391  	p.pos.Offset += n
   392  	if ch == '\n' || ch == '\f' {
   393  		p.pos.Line += n
   394  		p.pos.Column = 1
   395  
   396  		n := p.cfg.Indent + p.indent // include base indentation
   397  		for i := 0; i < n; i++ {
   398  			p.output = append(p.output, '\t')
   399  		}
   400  
   401  		// update positions
   402  		p.pos.Offset += n
   403  		p.pos.Column += n
   404  
   405  		return
   406  	}
   407  	p.pos.Column += n
   408  }
   409  
   410  func mayCombine(prev, next token.Token) (before, after bool) {
   411  	s := next.String()
   412  	if 'a' <= s[0] && s[0] < 'z' {
   413  		return true, true
   414  	}
   415  	switch prev {
   416  	case token.IQUO, token.IREM, token.IDIV, token.IMOD:
   417  		return false, false
   418  	case token.INT:
   419  		before = next == token.PERIOD // 1.
   420  	case token.ADD:
   421  		before = s[0] == '+' // ++
   422  	case token.SUB:
   423  		before = s[0] == '-' // --
   424  	case token.QUO:
   425  		before = s[0] == '*' // /*
   426  	}
   427  	return before, false
   428  }
   429
View as plain text