// Copyright 2018 The CUE Authors // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package format import ( "fmt" "os" "strings" "text/tabwriter" "cuelang.org/go/cue/ast" "cuelang.org/go/cue/errors" "cuelang.org/go/cue/literal" "cuelang.org/go/cue/token" ) // A printer takes the stream of formatting tokens and spacing directives // produced by the formatter and adjusts the spacing based on the original // source code. type printer struct { cfg *config allowed whiteSpace requested whiteSpace indentStack []whiteSpace pos token.Position // current pos in AST lineout line lastTok token.Token // last token printed (syntax.ILLEGAL if it's whitespace) output []byte indent int spaceBefore bool errs errors.Error } type line int func (p *printer) init(cfg *config) { p.cfg = cfg p.pos = token.Position{Line: 1, Column: 1} } func (p *printer) errf(n ast.Node, format string, args ...interface{}) { p.errs = errors.Append(p.errs, errors.Newf(n.Pos(), format, args...)) } const debug = false func (p *printer) internalError(msg ...interface{}) { if debug { fmt.Print(p.pos.String() + ": ") fmt.Println(msg...) panic("go/printer") } } func (p *printer) lineFor(pos token.Pos) int { return pos.Line() } func (p *printer) Print(v interface{}) { var ( impliedComma = false isLit bool data string nextWS whiteSpace ) switch x := v.(type) { case *line: *x = p.lineout case token.Token: s := x.String() before, after := mayCombine(p.lastTok, x) if before && !p.spaceBefore { // the previous and the current token must be // separated by a blank otherwise they combine // into a different incorrect token sequence // (except for syntax.INT followed by a '.' this // should never happen because it is taken care // of via binary expression formatting) if p.allowed&blank != 0 { p.internalError("whitespace buffer not empty") } p.allowed |= blank } if after { nextWS = blank } data = s switch x { case token.EOF: data = "" p.allowed = newline p.allowed &^= newsection case token.LPAREN, token.LBRACK, token.LBRACE: case token.RPAREN, token.RBRACK, token.RBRACE: impliedComma = true } p.lastTok = x case *ast.BasicLit: data = x.Value switch x.Kind { case token.STRING: // TODO: only do this when simplifying. Right now this does not // give the right result, but it should be better if: // 1) simplification is done as a separate step // 2) simplified structs are explicitly referenced separately // in the AST. if p.indent < 6 { data = literal.IndentTabs(data, p.cfg.Indent+p.indent+1) } case token.INT: if len(data) > 1 && data[0] == '0' && data[1] >= '0' && data[1] <= '9' { data = "0o" + data[1:] } // Pad trailing dot before multiplier. if p := strings.IndexByte(data, '.'); p >= 0 && data[p+1] > '9' { data = data[:p+1] + "0" + data[p+1:] } // Lowercase E, but only if it is not the last character: in the // future we may use E for Exa. if p := strings.IndexByte(data, 'E'); p != -1 && p < len(data)-1 { data = strings.ToLower(data) } case token.FLOAT: // Pad leading or trailing dots. switch p := strings.IndexByte(data, '.'); { case p < 0: case p == 0: data = "0" + data case p == len(data)-1: data += "0" case data[p+1] > '9': data = data[:p+1] + "0" + data[p+1:] } if strings.IndexByte(data, 'E') != -1 { data = strings.ToLower(data) } } isLit = true impliedComma = true p.lastTok = x.Kind case *ast.Ident: data = x.Name if !ast.IsValidIdent(data) { p.errf(x, "invalid identifier %q", x.Name) data = "*bad identifier*" } impliedComma = true p.lastTok = token.IDENT case string: // We can print a Go string as part of a CUE identifier or literal; // for example, see the formatter.label method. isLit = true data = x impliedComma = true p.lastTok = token.STRING case *ast.CommentGroup: rel := x.Pos().RelPos() if x.Line { // TODO: we probably don't need this. rel = token.Blank } switch rel { case token.NoRelPos: case token.Newline, token.NewSection: case token.Blank, token.Elided: p.allowed |= blank fallthrough case token.NoSpace: p.allowed &^= newline | newsection | formfeed | declcomma } return case *ast.Attribute: isLit = true data = x.Text impliedComma = true p.lastTok = token.ATTRIBUTE case *ast.Comment: // TODO: if implied comma, postpone comment isLit = true data = x.Text p.lastTok = token.COMMENT case whiteSpace: p.allowed |= x return case token.Pos: // TODO: should we use a known file position to synchronize? Go does, // but we don't really have to. // pos := x if x.HasRelPos() { if p.allowed&nooverride == 0 { requested := p.allowed switch x.RelPos() { case token.NoSpace: requested &^= newline | newsection | formfeed case token.Blank: requested |= blank requested &^= newline | newsection | formfeed case token.Newline: requested |= newline case token.NewSection: requested |= newsection } p.writeWhitespace(requested) p.allowed = 0 p.requested = 0 } // p.pos = pos } return default: fmt.Fprintf(os.Stderr, "print: unsupported argument %v (%T)\n", x, x) panic("go/printer type") } p.writeWhitespace(p.allowed) p.allowed = 0 p.requested = 0 p.writeString(data, isLit) p.allowed = nextWS _ = impliedComma // TODO: delay comment printings } func (p *printer) writeWhitespace(ws whiteSpace) { if ws&comma != 0 { switch { case ws&(newsection|newline|formfeed) != 0, ws&trailcomma == 0: p.writeByte(',', 1) } } if ws&indent != 0 { p.markLineIndent(ws) } if ws&unindent != 0 { p.markUnindentLine() } switch { case ws&newsection != 0: p.maybeIndentLine(ws) p.writeByte('\f', 2) p.lineout += 2 p.spaceBefore = true case ws&formfeed != 0: p.maybeIndentLine(ws) p.writeByte('\f', 1) p.lineout++ p.spaceBefore = true case ws&newline != 0: p.maybeIndentLine(ws) p.writeByte('\n', 1) p.lineout++ p.spaceBefore = true case ws&declcomma != 0: p.writeByte(',', 1) p.writeByte(' ', 1) p.spaceBefore = true case ws&noblank != 0: case ws&vtab != 0: p.writeByte('\v', 1) p.spaceBefore = true case ws&blank != 0: p.writeByte(' ', 1) p.spaceBefore = true } } func (p *printer) markLineIndent(ws whiteSpace) { p.indentStack = append(p.indentStack, ws) } func (p *printer) markUnindentLine() (wasUnindented bool) { last := len(p.indentStack) - 1 if ws := p.indentStack[last]; ws&indented != 0 { p.indent-- wasUnindented = true } p.indentStack = p.indentStack[:last] return wasUnindented } func (p *printer) maybeIndentLine(ws whiteSpace) { if ws&unindent == 0 && len(p.indentStack) > 0 { last := len(p.indentStack) - 1 if ws := p.indentStack[last]; ws&indented != 0 || ws&indent == 0 { return } p.indentStack[last] |= indented p.indent++ } } func (f *formatter) matchUnindent() whiteSpace { f.allowed |= unindent // TODO: make this work. Whitespace from closing bracket should match that // of opening if there is no position information. // f.allowed &^= nooverride | newline | newsection | formfeed | blank | noblank // ws := f.indentStack[len(f.indentStack)-1] // mask := blank | noblank | vtab // f.allowed |= unindent | blank | noblank // if ws&newline != 0 || ws*indented != 0 { // f.allowed |= newline // } return 0 } // writeString writes the string s to p.output and updates p.pos, p.out, // and p.last. If isLit is set, s is escaped w/ tabwriter.Escape characters // to protect s from being interpreted by the tabwriter. // // Note: writeString is only used to write Go tokens, literals, and // comments, all of which must be written literally. Thus, it is correct // to always set isLit = true. However, setting it explicitly only when // needed (i.e., when we don't know that s contains no tabs or line breaks) // avoids processing extra escape characters and reduces run time of the // printer benchmark by up to 10%. func (p *printer) writeString(s string, isLit bool) { if s != "" { p.spaceBefore = false } if isLit { // Protect s such that is passes through the tabwriter // unchanged. Note that valid Go programs cannot contain // tabwriter.Escape bytes since they do not appear in legal // UTF-8 sequences. p.output = append(p.output, tabwriter.Escape) } p.output = append(p.output, s...) if isLit { p.output = append(p.output, tabwriter.Escape) } // update positions nLines := 0 var li int // index of last newline; valid if nLines > 0 for i := 0; i < len(s); i++ { // CUE tokens cannot contain '\f' - no need to look for it if s[i] == '\n' { nLines++ li = i } } p.pos.Offset += len(s) if nLines > 0 { p.pos.Line += nLines c := len(s) - li p.pos.Column = c } else { p.pos.Column += len(s) } } func (p *printer) writeByte(ch byte, n int) { for i := 0; i < n; i++ { p.output = append(p.output, ch) } // update positions p.pos.Offset += n if ch == '\n' || ch == '\f' { p.pos.Line += n p.pos.Column = 1 n := p.cfg.Indent + p.indent // include base indentation for i := 0; i < n; i++ { p.output = append(p.output, '\t') } // update positions p.pos.Offset += n p.pos.Column += n return } p.pos.Column += n } func mayCombine(prev, next token.Token) (before, after bool) { s := next.String() if 'a' <= s[0] && s[0] < 'z' { return true, true } switch prev { case token.IQUO, token.IREM, token.IDIV, token.IMOD: return false, false case token.INT: before = next == token.PERIOD // 1. case token.ADD: before = s[0] == '+' // ++ case token.SUB: before = s[0] == '-' // -- case token.QUO: before = s[0] == '*' // /* } return before, false }