lex.go

Documentation: github.com/bazelbuild/buildtools/build

     1  /*
     2  Copyright 2016 Google LLC
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      https://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  // Lexical scanning for BUILD file parser.
    18  
    19  package build
    20  
    21  import (
    22  	"bytes"
    23  	"fmt"
    24  	"path/filepath"
    25  	"sort"
    26  	"strings"
    27  	"unicode/utf8"
    28  )
    29  
    30  // FileType represents a type of a file (default (for .bzl files), BUILD, or WORKSPACE).
    31  // Certain formatting or refactoring rules can be applied to several file types, so they support
    32  // bitwise operations: `type1 | type2` can represent a scope (e.g. BUILD and WORKSPACE files) and
    33  // `scope & fileType` can be used to check whether a file type belongs to a scope.
    34  type FileType int
    35  
    36  const (
    37  	// TypeDefault represents general Starlark files
    38  	TypeDefault FileType = 1 << iota
    39  	// TypeBuild represents BUILD files
    40  	TypeBuild
    41  	// TypeWorkspace represents WORKSPACE files
    42  	TypeWorkspace
    43  	// TypeBzl represents .bzl files
    44  	TypeBzl
    45  	//TypeModule represents MODULE.bazel files
    46  	TypeModule
    47  )
    48  
    49  func (t FileType) String() string {
    50  	switch t {
    51  	case TypeDefault:
    52  		return "default"
    53  	case TypeBuild:
    54  		return "BUILD"
    55  	case TypeWorkspace:
    56  		return "WORKSPACE"
    57  	case TypeBzl:
    58  		return ".bzl"
    59  	case TypeModule:
    60  		return "MODULE.bazel"
    61  	}
    62  	return "unknown"
    63  }
    64  
    65  // ParseBuild parses a file, marks it as a BUILD file and returns the corresponding parse tree.
    66  //
    67  // The filename is used only for generating error messages.
    68  func ParseBuild(filename string, data []byte) (*File, error) {
    69  	in := newInput(filename, data)
    70  	f, err := in.parse()
    71  	if f != nil {
    72  		f.Type = TypeBuild
    73  	}
    74  	return f, err
    75  }
    76  
    77  // ParseWorkspace parses a file, marks it as a WORKSPACE file and returns the corresponding parse tree.
    78  //
    79  // The filename is used only for generating error messages.
    80  func ParseWorkspace(filename string, data []byte) (*File, error) {
    81  	in := newInput(filename, data)
    82  	f, err := in.parse()
    83  	if f != nil {
    84  		f.Type = TypeWorkspace
    85  	}
    86  	return f, err
    87  }
    88  
    89  // ParseModule parses a file, marks it as a MODULE.bazel file and returns the corresponding parse tree.
    90  //
    91  // The filename is used only for generating error messages.
    92  func ParseModule(filename string, data []byte) (*File, error) {
    93  	in := newInput(filename, data)
    94  	f, err := in.parse()
    95  	if f != nil {
    96  		f.Type = TypeModule
    97  	}
    98  	return f, err
    99  }
   100  
   101  // ParseBzl parses a file, marks it as a .bzl file and returns the corresponding parse tree.
   102  //
   103  // The filename is used only for generating error messages.
   104  func ParseBzl(filename string, data []byte) (*File, error) {
   105  	in := newInput(filename, data)
   106  	f, err := in.parse()
   107  	if f != nil {
   108  		f.Type = TypeBzl
   109  	}
   110  	return f, err
   111  }
   112  
   113  // ParseDefault parses a file, marks it as a generic Starlark file and returns the corresponding parse tree.
   114  //
   115  // The filename is used only for generating error messages.
   116  func ParseDefault(filename string, data []byte) (*File, error) {
   117  	in := newInput(filename, data)
   118  	f, err := in.parse()
   119  	if f != nil {
   120  		f.Type = TypeDefault
   121  	}
   122  	return f, err
   123  }
   124  
   125  func getFileType(filename string) FileType {
   126  	if filename == "" { // stdin
   127  		return TypeDefault
   128  	}
   129  	basename := strings.ToLower(filepath.Base(filename))
   130  	if strings.HasSuffix(basename, ".oss") {
   131  		basename = basename[:len(basename)-4]
   132  	}
   133  	if basename == "module.bazel" {
   134  		return TypeModule
   135  	}
   136  	ext := filepath.Ext(basename)
   137  	switch ext {
   138  	case ".bzl":
   139  		return TypeBzl
   140  	case ".sky":
   141  		return TypeDefault
   142  	}
   143  	base := basename[:len(basename)-len(ext)]
   144  	switch {
   145  	case ext == ".build" || base == "build" || strings.HasPrefix(base, "build."):
   146  		return TypeBuild
   147  	case ext == ".workspace" || base == "workspace" || strings.HasPrefix(base, "workspace."):
   148  		return TypeWorkspace
   149  	}
   150  	return TypeDefault
   151  }
   152  
   153  // Parse parses the input data and returns the corresponding parse tree.
   154  //
   155  // Uses the filename to detect the formatting type (build, workspace, or default) and calls
   156  // ParseBuild, ParseWorkspace, or ParseDefault correspondingly.
   157  func Parse(filename string, data []byte) (*File, error) {
   158  	switch getFileType(filename) {
   159  	case TypeBuild:
   160  		return ParseBuild(filename, data)
   161  	case TypeWorkspace:
   162  		return ParseWorkspace(filename, data)
   163  	case TypeModule:
   164  		return ParseModule(filename, data)
   165  	case TypeBzl:
   166  		return ParseBzl(filename, data)
   167  	}
   168  	return ParseDefault(filename, data)
   169  }
   170  
   171  // ParseError contains information about the error encountered during parsing.
   172  type ParseError struct {
   173  	Message  string
   174  	Filename string
   175  	Pos      Position
   176  }
   177  
   178  // Error returns a string representation of the parse error.
   179  func (e ParseError) Error() string {
   180  	filename := e.Filename
   181  	if filename == "" {
   182  		filename = "<stdin>"
   183  	}
   184  	return fmt.Sprintf("%s:%d:%d: %v", filename, e.Pos.Line, e.Pos.LineRune, e.Message)
   185  }
   186  
   187  // An input represents a single input file being parsed.
   188  type input struct {
   189  	// Lexing state.
   190  	filename       string    // name of input file, for errors
   191  	complete       []byte    // entire input
   192  	remaining      []byte    // remaining input
   193  	token          []byte    // token being scanned
   194  	lastToken      string    // most recently returned token, for error messages
   195  	pos            Position  // current input position
   196  	lineComments   []Comment // accumulated line comments
   197  	suffixComments []Comment // accumulated suffix comments
   198  	depth          int       // nesting of [ ] { } ( )
   199  	cleanLine      bool      // true if the current line only contains whitespace before the current position
   200  	indent         int       // current line indentation in spaces
   201  	indents        []int     // stack of indentation levels in spaces
   202  
   203  	// Parser state.
   204  	file       *File // returned top-level syntax tree
   205  	parseError error // error encountered during parsing
   206  
   207  	// Comment assignment state.
   208  	pre  []Expr // all expressions, in preorder traversal
   209  	post []Expr // all expressions, in postorder traversal
   210  }
   211  
   212  func newInput(filename string, data []byte) *input {
   213  	// The syntax requires that each simple statement ends with '\n', however it's optional at EOF.
   214  	// If `data` doesn't end with '\n' we add it here to keep parser simple.
   215  	// It shouldn't affect neither the parsed tree nor its formatting.
   216  	data = append(data, '\n')
   217  
   218  	return &input{
   219  		filename:  filename,
   220  		complete:  data,
   221  		remaining: data,
   222  		pos:       Position{Line: 1, LineRune: 1, Byte: 0},
   223  		cleanLine: true,
   224  		indents:   []int{0},
   225  	}
   226  }
   227  
   228  func (in *input) currentIndent() int {
   229  	return in.indents[len(in.indents)-1]
   230  }
   231  
   232  // parse parses the input file.
   233  func (in *input) parse() (f *File, err error) {
   234  	// The parser panics for both routine errors like syntax errors
   235  	// and for programmer bugs like array index errors.
   236  	// Turn both into error returns. Catching bug panics is
   237  	// especially important when processing many files.
   238  	defer func() {
   239  		if e := recover(); e != nil {
   240  			if e == in.parseError {
   241  				err = in.parseError
   242  			} else {
   243  				err = ParseError{Message: fmt.Sprintf("internal error: %v", e), Filename: in.filename, Pos: in.pos}
   244  			}
   245  		}
   246  	}()
   247  
   248  	// Invoke the parser generated from parse.y.
   249  	yyParse(in)
   250  	if in.parseError != nil {
   251  		return nil, in.parseError
   252  	}
   253  	in.file.Path = in.filename
   254  
   255  	// Assign comments to nearby syntax.
   256  	in.assignComments()
   257  
   258  	return in.file, nil
   259  }
   260  
   261  // Error is called to report an error.
   262  // When called by the generated code s is always "syntax error".
   263  // Error does not return: it panics.
   264  func (in *input) Error(s string) {
   265  	if s == "syntax error" && in.lastToken != "" {
   266  		s += " near " + in.lastToken
   267  	}
   268  	in.parseError = ParseError{Message: s, Filename: in.filename, Pos: in.pos}
   269  	panic(in.parseError)
   270  }
   271  
   272  // eof reports whether the input has reached end of file.
   273  func (in *input) eof() bool {
   274  	return len(in.remaining) == 0
   275  }
   276  
   277  // peekRune returns the next rune in the input without consuming it.
   278  func (in *input) peekRune() int {
   279  	if len(in.remaining) == 0 {
   280  		return 0
   281  	}
   282  	r, _ := utf8.DecodeRune(in.remaining)
   283  	return int(r)
   284  }
   285  
   286  // readRune consumes and returns the next rune in the input.
   287  func (in *input) readRune() int {
   288  	if len(in.remaining) == 0 {
   289  		in.Error("internal lexer error: readRune at EOF")
   290  	}
   291  	r, size := utf8.DecodeRune(in.remaining)
   292  	in.remaining = in.remaining[size:]
   293  	if r == '\n' {
   294  		in.pos.Line++
   295  		in.pos.LineRune = 1
   296  	} else {
   297  		in.pos.LineRune++
   298  	}
   299  	in.pos.Byte += size
   300  	return int(r)
   301  }
   302  
   303  // startToken marks the beginning of the next input token.
   304  // It must be followed by a call to endToken, once the token has
   305  // been consumed using readRune.
   306  func (in *input) startToken(val *yySymType) {
   307  	in.token = in.remaining
   308  	val.tok = ""
   309  	val.pos = in.pos
   310  }
   311  
   312  // yySymType (used in the next few functions) is defined by the
   313  // generated parser. It is a struct containing all the fields listed
   314  // in parse.y's %union [sic] section.
   315  
   316  // endToken marks the end of an input token.
   317  // It records the actual token string in val.tok if the caller
   318  // has not done that already.
   319  func (in *input) endToken(val *yySymType) {
   320  	if val.tok == "" {
   321  		tok := string(in.token[:len(in.token)-len(in.remaining)])
   322  		val.tok = tok
   323  		in.lastToken = val.tok
   324  	}
   325  }
   326  
   327  // Lex is called from the generated parser to obtain the next input token.
   328  // It returns the token value (either a rune like '+' or a symbolic token _FOR)
   329  // and sets val to the data associated with the token.
   330  //
   331  // For all our input tokens, the associated data is
   332  // val.Pos (the position where the token begins)
   333  // and val.Token (the input string corresponding to the token).
   334  func (in *input) Lex(val *yySymType) int {
   335  	// Skip past spaces, stopping at non-space or EOF.
   336  	countNL := 0 // number of newlines we've skipped past
   337  	for !in.eof() {
   338  		// Skip over spaces. Count newlines so we can give the parser
   339  		// information about where top-level blank lines are,
   340  		// for top-level comment assignment.
   341  		c := in.peekRune()
   342  		if c == ' ' || c == '\t' || c == '\r' || c == '\n' {
   343  			if c == '\n' {
   344  				in.indent = 0
   345  				in.cleanLine = true
   346  				if in.depth == 0 {
   347  					// Not in a statememt. Tell parser about top-level blank line.
   348  					in.startToken(val)
   349  					in.readRune()
   350  					in.endToken(val)
   351  					return '\n'
   352  				}
   353  				countNL++
   354  			} else if c == ' ' && in.cleanLine {
   355  				in.indent++
   356  			}
   357  			in.readRune()
   358  			continue
   359  		}
   360  
   361  		// Comment runs to end of line.
   362  		if c == '#' {
   363  			// If a line contains just a comment its indentation level doesn't matter.
   364  			// Reset it to zero.
   365  			in.indent = 0
   366  			isLineComment := in.cleanLine
   367  			in.cleanLine = true
   368  
   369  			// Is this comment the only thing on its line?
   370  			// Find the last \n before this # and see if it's all
   371  			// spaces from there to here.
   372  			// If it's a suffix comment but the last non-space symbol before
   373  			// it is one of (, [, or {, or it's a suffix comment to "):"
   374  			// (e.g. trailing closing bracket or a function definition),
   375  			// treat it as a line comment that should be
   376  			// put inside the corresponding block.
   377  			i := bytes.LastIndex(in.complete[:in.pos.Byte], []byte("\n"))
   378  			prefix := bytes.TrimSpace(in.complete[i+1 : in.pos.Byte])
   379  			prefix = bytes.Replace(prefix, []byte{' '}, []byte{}, -1)
   380  			isSuffix := true
   381  			if len(prefix) == 0 ||
   382  				(len(prefix) == 2 && prefix[0] == ')' && prefix[1] == ':') ||
   383  				prefix[len(prefix)-1] == '[' ||
   384  				prefix[len(prefix)-1] == '(' ||
   385  				prefix[len(prefix)-1] == '{' {
   386  				isSuffix = false
   387  			}
   388  
   389  			// Consume comment without the \n it ends with.
   390  			in.startToken(val)
   391  			for len(in.remaining) > 0 && in.peekRune() != '\n' {
   392  				in.readRune()
   393  			}
   394  
   395  			in.endToken(val)
   396  
   397  			val.tok = strings.TrimRight(val.tok, "\n")
   398  			in.lastToken = "comment"
   399  
   400  			// If we are at top level (not in a rule), hand the comment to
   401  			// the parser as a _COMMENT token. The grammar is written
   402  			// to handle top-level comments itself.
   403  			if in.depth == 0 && isLineComment {
   404  				// Not in a statement. Tell parser about top-level comment.
   405  				return _COMMENT
   406  			}
   407  
   408  			// Otherwise, save comment for later attachment to syntax tree.
   409  			if countNL > 1 {
   410  				in.lineComments = append(in.lineComments, Comment{val.pos, ""})
   411  			}
   412  			if isSuffix {
   413  				in.suffixComments = append(in.suffixComments, Comment{val.pos, val.tok})
   414  			} else {
   415  				in.lineComments = append(in.lineComments, Comment{val.pos, val.tok})
   416  			}
   417  			countNL = 0
   418  			continue
   419  		}
   420  
   421  		if c == '\\' && len(in.remaining) >= 2 && in.remaining[1] == '\n' {
   422  			// We can ignore a trailing \ at end of line together with the \n.
   423  			in.readRune()
   424  			in.readRune()
   425  			continue
   426  		}
   427  
   428  		// Found non-space non-comment.
   429  		break
   430  	}
   431  
   432  	// Check for changes in indentation
   433  	// Skip if we're inside a statement, or if there were non-space
   434  	// characters before in the current line.
   435  	if in.depth == 0 && in.cleanLine {
   436  		if in.indent > in.currentIndent() {
   437  			// A new indentation block starts
   438  			in.indents = append(in.indents, in.indent)
   439  			in.lastToken = "indent"
   440  			in.cleanLine = false
   441  			return _INDENT
   442  		} else if in.indent < in.currentIndent() {
   443  			// An indentation block ends
   444  			in.indents = in.indents[:len(in.indents)-1]
   445  
   446  			// It's a syntax error if the current line indentation level in now greater than
   447  			// currentIndent(), should be either equal (a parent block continues) or still less
   448  			// (need to unindent more).
   449  			if in.indent > in.currentIndent() {
   450  				in.pos = val.pos
   451  				in.Error("unexpected indentation")
   452  			}
   453  			in.lastToken = "unindent"
   454  			return _UNINDENT
   455  		}
   456  	}
   457  
   458  	in.cleanLine = false
   459  
   460  	// If the file ends with an indented block, return the corresponding amounts of unindents.
   461  	if in.eof() && in.currentIndent() > 0 {
   462  		in.indents = in.indents[:len(in.indents)-1]
   463  		in.lastToken = "unindent"
   464  		return _UNINDENT
   465  	}
   466  
   467  	// Found the beginning of the next token.
   468  	in.startToken(val)
   469  	defer in.endToken(val)
   470  
   471  	// End of file.
   472  	if in.eof() {
   473  		in.lastToken = "EOF"
   474  		return _EOF
   475  	}
   476  
   477  	// Punctuation tokens.
   478  	switch c := in.peekRune(); c {
   479  	case '[', '(', '{':
   480  		in.depth++
   481  		in.readRune()
   482  		return c
   483  
   484  	case ']', ')', '}':
   485  		in.depth--
   486  		in.readRune()
   487  		return c
   488  
   489  	case '.', ':', ';', ',': // single-char tokens
   490  		in.readRune()
   491  		return c
   492  
   493  	case '<', '>', '=', '!', '+', '-', '*', '/', '%', '|', '&', '~', '^': // possibly followed by =
   494  		in.readRune()
   495  
   496  		if c == '~' {
   497  			// unary bitwise not, shouldn't be followed by anything
   498  			return c
   499  		}
   500  
   501  		if c == '*' && in.peekRune() == '*' {
   502  			// double asterisk
   503  			in.readRune()
   504  			return _STAR_STAR
   505  		}
   506  
   507  		if c == '-' && in.peekRune() == '>' {
   508  			// functions type annotation
   509  			in.readRune()
   510  			return _ARROW
   511  		}
   512  
   513  		if c == in.peekRune() {
   514  			switch c {
   515  			case '/':
   516  				// integer division
   517  				in.readRune()
   518  				c = _INT_DIV
   519  			case '<':
   520  				// left shift
   521  				in.readRune()
   522  				c = _BIT_LSH
   523  			case '>':
   524  				// right shift
   525  				in.readRune()
   526  				c = _BIT_RSH
   527  			}
   528  		}
   529  
   530  		if in.peekRune() == '=' {
   531  			in.readRune()
   532  			switch c {
   533  			case '<':
   534  				return _LE
   535  			case '>':
   536  				return _GE
   537  			case '=':
   538  				return _EQ
   539  			case '!':
   540  				return _NE
   541  			default:
   542  				return _AUGM
   543  			}
   544  		}
   545  		return c
   546  
   547  	case 'r': // possible beginning of raw quoted string
   548  		if len(in.remaining) < 2 || in.remaining[1] != '"' && in.remaining[1] != '\'' {
   549  			break
   550  		}
   551  		in.readRune()
   552  		c = in.peekRune()
   553  		fallthrough
   554  
   555  	case '"', '\'': // quoted string
   556  		quote := c
   557  		if len(in.remaining) >= 3 && in.remaining[0] == byte(quote) && in.remaining[1] == byte(quote) && in.remaining[2] == byte(quote) {
   558  			// Triple-quoted string.
   559  			in.readRune()
   560  			in.readRune()
   561  			in.readRune()
   562  			var c1, c2, c3 int
   563  			for {
   564  				if in.eof() {
   565  					in.pos = val.pos
   566  					in.Error("unexpected EOF in string")
   567  				}
   568  				c1, c2, c3 = c2, c3, in.readRune()
   569  				if c1 == quote && c2 == quote && c3 == quote {
   570  					break
   571  				}
   572  				if c3 == '\\' {
   573  					if in.eof() {
   574  						in.pos = val.pos
   575  						in.Error("unexpected EOF in string")
   576  					}
   577  					in.readRune()
   578  				}
   579  			}
   580  		} else {
   581  			in.readRune()
   582  			for {
   583  				if in.eof() {
   584  					in.pos = val.pos
   585  					in.Error("unexpected EOF in string")
   586  				}
   587  				if in.peekRune() == '\n' {
   588  					in.Error("unexpected newline in string")
   589  				}
   590  				c := in.readRune()
   591  				if c == quote {
   592  					break
   593  				}
   594  				if c == '\\' {
   595  					if in.eof() {
   596  						in.pos = val.pos
   597  						in.Error("unexpected EOF in string")
   598  					}
   599  					in.readRune()
   600  				}
   601  			}
   602  		}
   603  		in.endToken(val)
   604  		s, triple, err := Unquote(val.tok)
   605  		if err != nil {
   606  			in.Error(fmt.Sprint(err))
   607  		}
   608  		val.str = s
   609  		val.triple = triple
   610  		return _STRING
   611  	}
   612  
   613  	// Checked all punctuation. Must be identifier token.
   614  	if c := in.peekRune(); !isIdent(c) {
   615  		in.Error(fmt.Sprintf("unexpected input character %#q", c))
   616  	}
   617  
   618  	// Scan over alphanumeric identifier.
   619  	for {
   620  		c := in.peekRune()
   621  		if !isIdent(c) {
   622  			break
   623  		}
   624  		in.readRune()
   625  	}
   626  
   627  	// Call endToken to set val.tok to identifier we just scanned,
   628  	// so we can look to see if val.tok is a keyword.
   629  	in.endToken(val)
   630  	if k := keywordToken[val.tok]; k != 0 {
   631  		return k
   632  	}
   633  	switch val.tok {
   634  	case "pass":
   635  		return _PASS
   636  	case "break":
   637  		return _BREAK
   638  	case "continue":
   639  		return _CONTINUE
   640  	}
   641  	if len(val.tok) > 0 && val.tok[0] >= '0' && val.tok[0] <= '9' {
   642  		return _INT
   643  	}
   644  	return _IDENT
   645  }
   646  
   647  // isIdent reports whether c is an identifier rune.
   648  // We treat all non-ASCII runes as identifier runes.
   649  func isIdent(c int) bool {
   650  	return '0' <= c && c <= '9' ||
   651  		'A' <= c && c <= 'Z' ||
   652  		'a' <= c && c <= 'z' ||
   653  		c == '_' ||
   654  		c >= 0x80
   655  }
   656  
   657  // keywordToken records the special tokens for
   658  // strings that should not be treated as ordinary identifiers.
   659  var keywordToken = map[string]int{
   660  	"and":    _AND,
   661  	"for":    _FOR,
   662  	"if":     _IF,
   663  	"else":   _ELSE,
   664  	"elif":   _ELIF,
   665  	"in":     _IN,
   666  	"is":     _IS,
   667  	"lambda": _LAMBDA,
   668  	"load":   _LOAD,
   669  	"not":    _NOT,
   670  	"or":     _OR,
   671  	"def":    _DEF,
   672  	"return": _RETURN,
   673  }
   674  
   675  // Comment assignment.
   676  // We build two lists of all subexpressions, preorder and postorder.
   677  // The preorder list is ordered by start location, with outer expressions first.
   678  // The postorder list is ordered by end location, with outer expressions last.
   679  // We use the preorder list to assign each whole-line comment to the syntax
   680  // immediately following it, and we use the postorder list to assign each
   681  // end-of-line comment to the syntax immediately preceding it.
   682  
   683  // order walks the expression adding it and its subexpressions to the
   684  // preorder and postorder lists.
   685  func (in *input) order(v Expr) {
   686  	if v != nil {
   687  		in.pre = append(in.pre, v)
   688  	}
   689  	switch v := v.(type) {
   690  	default:
   691  		panic(fmt.Errorf("order: unexpected type %T", v))
   692  	case nil:
   693  		// nothing
   694  	case *End:
   695  		// nothing
   696  	case *File:
   697  		for _, stmt := range v.Stmt {
   698  			in.order(stmt)
   699  		}
   700  	case *CommentBlock:
   701  		// nothing
   702  	case *CallExpr:
   703  		in.order(v.X)
   704  		for _, x := range v.List {
   705  			in.order(x)
   706  		}
   707  		in.order(&v.End)
   708  	case *LoadStmt:
   709  		in.order(v.Module)
   710  		for i := range v.From {
   711  			in.order(v.To[i])
   712  			in.order(v.From[i])
   713  		}
   714  		in.order(&v.Rparen)
   715  	case *LiteralExpr:
   716  		// nothing
   717  	case *StringExpr:
   718  		// nothing
   719  	case *Ident:
   720  		// nothing
   721  	case *TypedIdent:
   722  		in.order(v.Type)
   723  	case *BranchStmt:
   724  		// nothing
   725  	case *DotExpr:
   726  		in.order(v.X)
   727  	case *ListExpr:
   728  		for _, x := range v.List {
   729  			in.order(x)
   730  		}
   731  		in.order(&v.End)
   732  	case *Comprehension:
   733  		in.order(v.Body)
   734  		for _, c := range v.Clauses {
   735  			in.order(c)
   736  		}
   737  		in.order(&v.End)
   738  	case *SetExpr:
   739  		for _, x := range v.List {
   740  			in.order(x)
   741  		}
   742  		in.order(&v.End)
   743  	case *ForClause:
   744  		in.order(v.Vars)
   745  		in.order(v.X)
   746  	case *IfClause:
   747  		in.order(v.Cond)
   748  	case *KeyValueExpr:
   749  		in.order(v.Key)
   750  		in.order(v.Value)
   751  	case *DictExpr:
   752  		for _, x := range v.List {
   753  			in.order(x)
   754  		}
   755  		in.order(&v.End)
   756  	case *TupleExpr:
   757  		for _, x := range v.List {
   758  			in.order(x)
   759  		}
   760  		if !v.NoBrackets {
   761  			in.order(&v.End)
   762  		}
   763  	case *UnaryExpr:
   764  		in.order(v.X)
   765  	case *BinaryExpr:
   766  		in.order(v.X)
   767  		in.order(v.Y)
   768  	case *AssignExpr:
   769  		in.order(v.LHS)
   770  		in.order(v.RHS)
   771  	case *ConditionalExpr:
   772  		in.order(v.Then)
   773  		in.order(v.Test)
   774  		in.order(v.Else)
   775  	case *ParenExpr:
   776  		in.order(v.X)
   777  		in.order(&v.End)
   778  	case *SliceExpr:
   779  		in.order(v.X)
   780  		in.order(v.From)
   781  		in.order(v.To)
   782  		in.order(v.Step)
   783  	case *IndexExpr:
   784  		in.order(v.X)
   785  		in.order(v.Y)
   786  	case *LambdaExpr:
   787  		for _, param := range v.Params {
   788  			in.order(param)
   789  		}
   790  		for _, expr := range v.Body {
   791  			in.order(expr)
   792  		}
   793  	case *ReturnStmt:
   794  		if v.Result != nil {
   795  			in.order(v.Result)
   796  		}
   797  	case *DefStmt:
   798  		for _, x := range v.Params {
   799  			in.order(x)
   800  		}
   801  		for _, x := range v.Body {
   802  			in.order(x)
   803  		}
   804  	case *ForStmt:
   805  		in.order(v.Vars)
   806  		in.order(v.X)
   807  		for _, x := range v.Body {
   808  			in.order(x)
   809  		}
   810  	case *IfStmt:
   811  		in.order(v.Cond)
   812  		for _, s := range v.True {
   813  			in.order(s)
   814  		}
   815  		if len(v.False) > 0 {
   816  			in.order(&v.ElsePos)
   817  		}
   818  		for _, s := range v.False {
   819  			in.order(s)
   820  		}
   821  	}
   822  	if v != nil {
   823  		in.post = append(in.post, v)
   824  	}
   825  }
   826  
   827  // assignComments attaches comments to nearby syntax.
   828  func (in *input) assignComments() {
   829  	// Generate preorder and postorder lists.
   830  	in.order(in.file)
   831  	in.assignSuffixComments()
   832  	in.assignLineComments()
   833  }
   834  
   835  func (in *input) assignSuffixComments() {
   836  	// Assign suffix comments to syntax immediately before.
   837  	suffix := in.suffixComments
   838  	for i := len(in.post) - 1; i >= 0; i-- {
   839  		x := in.post[i]
   840  
   841  		// Do not assign suffix comments to file or to block statements
   842  		switch x.(type) {
   843  		case *File, *DefStmt, *IfStmt, *ForStmt, *CommentBlock:
   844  			continue
   845  		}
   846  
   847  		_, end := x.Span()
   848  		xcom := x.Comment()
   849  		for len(suffix) > 0 && end.Byte <= suffix[len(suffix)-1].Start.Byte {
   850  			xcom.Suffix = append(xcom.Suffix, suffix[len(suffix)-1])
   851  			suffix = suffix[:len(suffix)-1]
   852  		}
   853  	}
   854  
   855  	// We assigned suffix comments in reverse.
   856  	// If multiple suffix comments were appended to the same
   857  	// expression node, they are now in reverse. Fix that.
   858  	for _, x := range in.post {
   859  		reverseComments(x.Comment().Suffix)
   860  	}
   861  
   862  	// Remaining suffix comments go at beginning of file.
   863  	in.file.Before = append(in.file.Before, suffix...)
   864  }
   865  
   866  func (in *input) assignLineComments() {
   867  	// Assign line comments to syntax immediately following.
   868  	line := in.lineComments
   869  	for _, x := range in.pre {
   870  		start, _ := x.Span()
   871  		xcom := x.Comment()
   872  		for len(line) > 0 && start.Byte >= line[0].Start.Byte {
   873  			xcom.Before = append(xcom.Before, line[0])
   874  			line = line[1:]
   875  		}
   876  		// Line comments can be sorted in a wrong order because they get assigned from different
   877  		// parts of the lexer and the parser. Restore the original order.
   878  		sort.SliceStable(xcom.Before, func(i, j int) bool {
   879  			return xcom.Before[i].Start.Byte < xcom.Before[j].Start.Byte
   880  		})
   881  	}
   882  
   883  	// Remaining line comments go at end of file.
   884  	in.file.After = append(in.file.After, line...)
   885  }
   886  
   887  // reverseComments reverses the []Comment list.
   888  func reverseComments(list []Comment) {
   889  	for i, j := 0, len(list)-1; i < j; i, j = i+1, j-1 {
   890  		list[i], list[j] = list[j], list[i]
   891  	}
   892  }
   893
View as plain text