read.go

Documentation: github.com/bazelbuild/rules_go/go/tools/builders

     1  // Copyright 2012 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // This file was adapted from Go src/go/build/read.go at commit 8634a234df2a
     6  // on 2021-01-26. It's used to extract metadata from .go files without requiring
     7  // them to be in the same directory.
     8  
     9  package main
    10  
    11  import (
    12  	"bufio"
    13  	"errors"
    14  	"fmt"
    15  	"go/ast"
    16  	"go/parser"
    17  	"go/token"
    18  	"io"
    19  	"strconv"
    20  	"strings"
    21  	"unicode"
    22  	"unicode/utf8"
    23  )
    24  
    25  type importReader struct {
    26  	b    *bufio.Reader
    27  	buf  []byte
    28  	peek byte
    29  	err  error
    30  	eof  bool
    31  	nerr int
    32  	pos  token.Position
    33  }
    34  
    35  func newImportReader(name string, r io.Reader) *importReader {
    36  	return &importReader{
    37  		b: bufio.NewReader(r),
    38  		pos: token.Position{
    39  			Filename: name,
    40  			Line:     1,
    41  			Column:   1,
    42  		},
    43  	}
    44  }
    45  
    46  func isIdent(c byte) bool {
    47  	return 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z' || '0' <= c && c <= '9' || c == '_' || c >= utf8.RuneSelf
    48  }
    49  
    50  var (
    51  	errSyntax = errors.New("syntax error")
    52  	errNUL    = errors.New("unexpected NUL in input")
    53  )
    54  
    55  // syntaxError records a syntax error, but only if an I/O error has not already been recorded.
    56  func (r *importReader) syntaxError() {
    57  	if r.err == nil {
    58  		r.err = errSyntax
    59  	}
    60  }
    61  
    62  // readByte reads the next byte from the input, saves it in buf, and returns it.
    63  // If an error occurs, readByte records the error in r.err and returns 0.
    64  func (r *importReader) readByte() byte {
    65  	c, err := r.b.ReadByte()
    66  	if err == nil {
    67  		r.buf = append(r.buf, c)
    68  		if c == 0 {
    69  			err = errNUL
    70  		}
    71  	}
    72  	if err != nil {
    73  		if err == io.EOF {
    74  			r.eof = true
    75  		} else if r.err == nil {
    76  			r.err = err
    77  		}
    78  		c = 0
    79  	}
    80  	return c
    81  }
    82  
    83  // readByteNoBuf is like readByte but doesn't buffer the byte.
    84  // It exhausts r.buf before reading from r.b.
    85  func (r *importReader) readByteNoBuf() byte {
    86  	var c byte
    87  	var err error
    88  	if len(r.buf) > 0 {
    89  		c = r.buf[0]
    90  		r.buf = r.buf[1:]
    91  	} else {
    92  		c, err = r.b.ReadByte()
    93  		if err == nil && c == 0 {
    94  			err = errNUL
    95  		}
    96  	}
    97  
    98  	if err != nil {
    99  		if err == io.EOF {
   100  			r.eof = true
   101  		} else if r.err == nil {
   102  			r.err = err
   103  		}
   104  		return 0
   105  	}
   106  	r.pos.Offset++
   107  	if c == '\n' {
   108  		r.pos.Line++
   109  		r.pos.Column = 1
   110  	} else {
   111  		r.pos.Column++
   112  	}
   113  	return c
   114  }
   115  
   116  // peekByte returns the next byte from the input reader but does not advance beyond it.
   117  // If skipSpace is set, peekByte skips leading spaces and comments.
   118  func (r *importReader) peekByte(skipSpace bool) byte {
   119  	if r.err != nil {
   120  		if r.nerr++; r.nerr > 10000 {
   121  			panic("go/build: import reader looping")
   122  		}
   123  		return 0
   124  	}
   125  
   126  	// Use r.peek as first input byte.
   127  	// Don't just return r.peek here: it might have been left by peekByte(false)
   128  	// and this might be peekByte(true).
   129  	c := r.peek
   130  	if c == 0 {
   131  		c = r.readByte()
   132  	}
   133  	for r.err == nil && !r.eof {
   134  		if skipSpace {
   135  			// For the purposes of this reader, semicolons are never necessary to
   136  			// understand the input and are treated as spaces.
   137  			switch c {
   138  			case ' ', '\f', '\t', '\r', '\n', ';':
   139  				c = r.readByte()
   140  				continue
   141  
   142  			case '/':
   143  				c = r.readByte()
   144  				if c == '/' {
   145  					for c != '\n' && r.err == nil && !r.eof {
   146  						c = r.readByte()
   147  					}
   148  				} else if c == '*' {
   149  					var c1 byte
   150  					for (c != '*' || c1 != '/') && r.err == nil {
   151  						if r.eof {
   152  							r.syntaxError()
   153  						}
   154  						c, c1 = c1, r.readByte()
   155  					}
   156  				} else {
   157  					r.syntaxError()
   158  				}
   159  				c = r.readByte()
   160  				continue
   161  			}
   162  		}
   163  		break
   164  	}
   165  	r.peek = c
   166  	return r.peek
   167  }
   168  
   169  // nextByte is like peekByte but advances beyond the returned byte.
   170  func (r *importReader) nextByte(skipSpace bool) byte {
   171  	c := r.peekByte(skipSpace)
   172  	r.peek = 0
   173  	return c
   174  }
   175  
   176  var goEmbed = []byte("go:embed")
   177  
   178  // findEmbed advances the input reader to the next //go:embed comment.
   179  // It reports whether it found a comment.
   180  // (Otherwise it found an error or EOF.)
   181  func (r *importReader) findEmbed(first bool) bool {
   182  	// The import block scan stopped after a non-space character,
   183  	// so the reader is not at the start of a line on the first call.
   184  	// After that, each //go:embed extraction leaves the reader
   185  	// at the end of a line.
   186  	startLine := !first
   187  	var c byte
   188  	for r.err == nil && !r.eof {
   189  		c = r.readByteNoBuf()
   190  	Reswitch:
   191  		switch c {
   192  		default:
   193  			startLine = false
   194  
   195  		case '\n':
   196  			startLine = true
   197  
   198  		case ' ', '\t':
   199  			// leave startLine alone
   200  
   201  		case '\'':
   202  			startLine = false
   203  			for r.err == nil {
   204  				if r.eof {
   205  					r.syntaxError()
   206  				}
   207  				c = r.readByteNoBuf()
   208  				if c == '\\' {
   209  					_ = r.readByteNoBuf()
   210  					if r.err != nil {
   211  						r.syntaxError()
   212  						return false
   213  					}
   214  					continue
   215  				}
   216  				if c == '\'' {
   217  					c = r.readByteNoBuf()
   218  					goto Reswitch
   219  				}
   220  			}
   221  			goto Reswitch
   222  
   223  		case '"':
   224  			startLine = false
   225  			for r.err == nil {
   226  				if r.eof {
   227  					r.syntaxError()
   228  				}
   229  				c = r.readByteNoBuf()
   230  				if c == '\\' {
   231  					_ = r.readByteNoBuf()
   232  					if r.err != nil {
   233  						r.syntaxError()
   234  						return false
   235  					}
   236  					continue
   237  				}
   238  				if c == '"' {
   239  					c = r.readByteNoBuf()
   240  					goto Reswitch
   241  				}
   242  			}
   243  			goto Reswitch
   244  
   245  		case '`':
   246  			startLine = false
   247  			for r.err == nil {
   248  				if r.eof {
   249  					r.syntaxError()
   250  				}
   251  				c = r.readByteNoBuf()
   252  				if c == '`' {
   253  					c = r.readByteNoBuf()
   254  					goto Reswitch
   255  				}
   256  			}
   257  
   258  		case '/':
   259  			c = r.readByteNoBuf()
   260  			switch c {
   261  			default:
   262  				startLine = false
   263  				goto Reswitch
   264  
   265  			case '*':
   266  				var c1 byte
   267  				for (c != '*' || c1 != '/') && r.err == nil {
   268  					if r.eof {
   269  						r.syntaxError()
   270  					}
   271  					c, c1 = c1, r.readByteNoBuf()
   272  				}
   273  				startLine = false
   274  
   275  			case '/':
   276  				if startLine {
   277  					// Try to read this as a //go:embed comment.
   278  					for i := range goEmbed {
   279  						c = r.readByteNoBuf()
   280  						if c != goEmbed[i] {
   281  							goto SkipSlashSlash
   282  						}
   283  					}
   284  					c = r.readByteNoBuf()
   285  					if c == ' ' || c == '\t' {
   286  						// Found one!
   287  						return true
   288  					}
   289  				}
   290  			SkipSlashSlash:
   291  				for c != '\n' && r.err == nil && !r.eof {
   292  					c = r.readByteNoBuf()
   293  				}
   294  				startLine = true
   295  			}
   296  		}
   297  	}
   298  	return false
   299  }
   300  
   301  // readKeyword reads the given keyword from the input.
   302  // If the keyword is not present, readKeyword records a syntax error.
   303  func (r *importReader) readKeyword(kw string) {
   304  	r.peekByte(true)
   305  	for i := 0; i < len(kw); i++ {
   306  		if r.nextByte(false) != kw[i] {
   307  			r.syntaxError()
   308  			return
   309  		}
   310  	}
   311  	if isIdent(r.peekByte(false)) {
   312  		r.syntaxError()
   313  	}
   314  }
   315  
   316  // readIdent reads an identifier from the input.
   317  // If an identifier is not present, readIdent records a syntax error.
   318  func (r *importReader) readIdent() {
   319  	c := r.peekByte(true)
   320  	if !isIdent(c) {
   321  		r.syntaxError()
   322  		return
   323  	}
   324  	for isIdent(r.peekByte(false)) {
   325  		r.peek = 0
   326  	}
   327  }
   328  
   329  // readString reads a quoted string literal from the input.
   330  // If an identifier is not present, readString records a syntax error.
   331  func (r *importReader) readString() {
   332  	switch r.nextByte(true) {
   333  	case '`':
   334  		for r.err == nil {
   335  			if r.nextByte(false) == '`' {
   336  				break
   337  			}
   338  			if r.eof {
   339  				r.syntaxError()
   340  			}
   341  		}
   342  	case '"':
   343  		for r.err == nil {
   344  			c := r.nextByte(false)
   345  			if c == '"' {
   346  				break
   347  			}
   348  			if r.eof || c == '\n' {
   349  				r.syntaxError()
   350  			}
   351  			if c == '\\' {
   352  				r.nextByte(false)
   353  			}
   354  		}
   355  	default:
   356  		r.syntaxError()
   357  	}
   358  }
   359  
   360  // readImport reads an import clause - optional identifier followed by quoted string -
   361  // from the input.
   362  func (r *importReader) readImport() {
   363  	c := r.peekByte(true)
   364  	if c == '.' {
   365  		r.peek = 0
   366  	} else if isIdent(c) {
   367  		r.readIdent()
   368  	}
   369  	r.readString()
   370  }
   371  
   372  // readComments is like io.ReadAll, except that it only reads the leading
   373  // block of comments in the file.
   374  func readComments(f io.Reader) ([]byte, error) {
   375  	r := newImportReader("", f)
   376  	r.peekByte(true)
   377  	if r.err == nil && !r.eof {
   378  		// Didn't reach EOF, so must have found a non-space byte. Remove it.
   379  		r.buf = r.buf[:len(r.buf)-1]
   380  	}
   381  	return r.buf, r.err
   382  }
   383  
   384  // readGoInfo expects a Go file as input and reads the file up to and including the import section.
   385  // It records what it learned in *info.
   386  // If info.fset is non-nil, readGoInfo parses the file and sets info.parsed, info.parseErr,
   387  // info.imports, info.embeds, and info.embedErr.
   388  //
   389  // It only returns an error if there are problems reading the file,
   390  // not for syntax errors in the file itself.
   391  func readGoInfo(f io.Reader, info *fileInfo) error {
   392  	r := newImportReader(info.filename, f)
   393  
   394  	r.readKeyword("package")
   395  	r.readIdent()
   396  	for r.peekByte(true) == 'i' {
   397  		r.readKeyword("import")
   398  		if r.peekByte(true) == '(' {
   399  			r.nextByte(false)
   400  			for r.peekByte(true) != ')' && r.err == nil {
   401  				r.readImport()
   402  			}
   403  			r.nextByte(false)
   404  		} else {
   405  			r.readImport()
   406  		}
   407  	}
   408  
   409  	info.header = r.buf
   410  
   411  	// If we stopped successfully before EOF, we read a byte that told us we were done.
   412  	// Return all but that last byte, which would cause a syntax error if we let it through.
   413  	if r.err == nil && !r.eof {
   414  		info.header = r.buf[:len(r.buf)-1]
   415  	}
   416  
   417  	// If we stopped for a syntax error, consume the whole file so that
   418  	// we are sure we don't change the errors that go/parser returns.
   419  	if r.err == errSyntax {
   420  		r.err = nil
   421  		for r.err == nil && !r.eof {
   422  			r.readByte()
   423  		}
   424  		info.header = r.buf
   425  	}
   426  	if r.err != nil {
   427  		return r.err
   428  	}
   429  
   430  	if info.fset == nil {
   431  		return nil
   432  	}
   433  
   434  	// Parse file header & record imports.
   435  	info.parsed, info.parseErr = parser.ParseFile(info.fset, info.filename, info.header, parser.ImportsOnly|parser.ParseComments)
   436  	if info.parseErr != nil {
   437  		return nil
   438  	}
   439  	info.pkg = info.parsed.Name.Name
   440  
   441  	hasEmbed := false
   442  	for _, decl := range info.parsed.Decls {
   443  		d, ok := decl.(*ast.GenDecl)
   444  		if !ok {
   445  			continue
   446  		}
   447  		for _, dspec := range d.Specs {
   448  			spec, ok := dspec.(*ast.ImportSpec)
   449  			if !ok {
   450  				continue
   451  			}
   452  			quoted := spec.Path.Value
   453  			path, err := strconv.Unquote(quoted)
   454  			if err != nil {
   455  				return fmt.Errorf("parser returned invalid quoted string: <%s>", quoted)
   456  			}
   457  			if path == "embed" {
   458  				hasEmbed = true
   459  			}
   460  
   461  			doc := spec.Doc
   462  			if doc == nil && len(d.Specs) == 1 {
   463  				doc = d.Doc
   464  			}
   465  			info.imports = append(info.imports, fileImport{path, spec.Pos(), doc})
   466  		}
   467  	}
   468  
   469  	// If the file imports "embed",
   470  	// we have to look for //go:embed comments
   471  	// in the remainder of the file.
   472  	// The compiler will enforce the mapping of comments to
   473  	// declared variables. We just need to know the patterns.
   474  	// If there were //go:embed comments earlier in the file
   475  	// (near the package statement or imports), the compiler
   476  	// will reject them. They can be (and have already been) ignored.
   477  	if hasEmbed {
   478  		var line []byte
   479  		for first := true; r.findEmbed(first); first = false {
   480  			line = line[:0]
   481  			pos := r.pos
   482  			for {
   483  				c := r.readByteNoBuf()
   484  				if c == '\n' || r.err != nil || r.eof {
   485  					break
   486  				}
   487  				line = append(line, c)
   488  			}
   489  			// Add args if line is well-formed.
   490  			// Ignore badly-formed lines - the compiler will report them when it finds them,
   491  			// and we can pretend they are not there to help go list succeed with what it knows.
   492  			embs, err := parseGoEmbed(string(line), pos)
   493  			if err == nil {
   494  				info.embeds = append(info.embeds, embs...)
   495  			}
   496  		}
   497  	}
   498  
   499  	return nil
   500  }
   501  
   502  // parseGoEmbed parses the text following "//go:embed" to extract the glob patterns.
   503  // It accepts unquoted space-separated patterns as well as double-quoted and back-quoted Go strings.
   504  // This is based on a similar function in cmd/compile/internal/gc/noder.go;
   505  // this version calculates position information as well.
   506  func parseGoEmbed(args string, pos token.Position) ([]fileEmbed, error) {
   507  	trimBytes := func(n int) {
   508  		pos.Offset += n
   509  		pos.Column += utf8.RuneCountInString(args[:n])
   510  		args = args[n:]
   511  	}
   512  	trimSpace := func() {
   513  		trim := strings.TrimLeftFunc(args, unicode.IsSpace)
   514  		trimBytes(len(args) - len(trim))
   515  	}
   516  
   517  	var list []fileEmbed
   518  	for trimSpace(); args != ""; trimSpace() {
   519  		var path string
   520  		pathPos := pos
   521  	Switch:
   522  		switch args[0] {
   523  		default:
   524  			i := len(args)
   525  			for j, c := range args {
   526  				if unicode.IsSpace(c) {
   527  					i = j
   528  					break
   529  				}
   530  			}
   531  			path = args[:i]
   532  			trimBytes(i)
   533  
   534  		case '`':
   535  			i := strings.Index(args[1:], "`")
   536  			if i < 0 {
   537  				return nil, fmt.Errorf("invalid quoted string in //go:embed: %s", args)
   538  			}
   539  			path = args[1 : 1+i]
   540  			trimBytes(1 + i + 1)
   541  
   542  		case '"':
   543  			i := 1
   544  			for ; i < len(args); i++ {
   545  				if args[i] == '\\' {
   546  					i++
   547  					continue
   548  				}
   549  				if args[i] == '"' {
   550  					q, err := strconv.Unquote(args[:i+1])
   551  					if err != nil {
   552  						return nil, fmt.Errorf("invalid quoted string in //go:embed: %s", args[:i+1])
   553  					}
   554  					path = q
   555  					trimBytes(i + 1)
   556  					break Switch
   557  				}
   558  			}
   559  			if i >= len(args) {
   560  				return nil, fmt.Errorf("invalid quoted string in //go:embed: %s", args)
   561  			}
   562  		}
   563  
   564  		if args != "" {
   565  			r, _ := utf8.DecodeRuneInString(args)
   566  			if !unicode.IsSpace(r) {
   567  				return nil, fmt.Errorf("invalid quoted string in //go:embed: %s", args)
   568  			}
   569  		}
   570  		list = append(list, fileEmbed{path, pathPos})
   571  	}
   572  	return list, nil
   573  }
   574
View as plain text