parser.go

Documentation: github.com/emicklei/proto

     1  // Copyright (c) 2017 Ernest Micklei
     2  //
     3  // MIT License
     4  //
     5  // Permission is hereby granted, free of charge, to any person obtaining
     6  // a copy of this software and associated documentation files (the
     7  // "Software"), to deal in the Software without restriction, including
     8  // without limitation the rights to use, copy, modify, merge, publish,
     9  // distribute, sublicense, and/or sell copies of the Software, and to
    10  // permit persons to whom the Software is furnished to do so, subject to
    11  // the following conditions:
    12  //
    13  // The above copyright notice and this permission notice shall be
    14  // included in all copies or substantial portions of the Software.
    15  //
    16  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
    17  // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
    18  // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
    19  // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
    20  // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
    21  // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
    22  // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
    23  
    24  package proto
    25  
    26  import (
    27  	"bytes"
    28  	"errors"
    29  	"fmt"
    30  	"io"
    31  	"runtime"
    32  	"strconv"
    33  	"strings"
    34  	"text/scanner"
    35  )
    36  
    37  // Parser represents a parser.
    38  type Parser struct {
    39  	debug         bool
    40  	scanner       *scanner.Scanner
    41  	buf           *nextValues
    42  	scannerErrors []error
    43  }
    44  
    45  // nextValues is to capture the result of next()
    46  type nextValues struct {
    47  	pos scanner.Position
    48  	tok token
    49  	lit string
    50  }
    51  
    52  // NewParser returns a new instance of Parser.
    53  func NewParser(r io.Reader) *Parser {
    54  	s := new(scanner.Scanner)
    55  	s.Init(r)
    56  	s.Mode = scanner.ScanIdents | scanner.ScanFloats | scanner.ScanStrings | scanner.ScanRawStrings | scanner.ScanComments
    57  	p := &Parser{scanner: s}
    58  	s.Error = p.handleScanError
    59  	return p
    60  }
    61  
    62  // handleScanError is called from the underlying Scanner
    63  func (p *Parser) handleScanError(s *scanner.Scanner, msg string) {
    64  	p.scannerErrors = append(p.scannerErrors,
    65  		fmt.Errorf("go scanner error at %v = %v", s.Position, msg))
    66  }
    67  
    68  // ignoreIllegalEscapesWhile is called for scanning constants of an option.
    69  // Such content can have a syntax that is not acceptable by the Go scanner.
    70  // This temporary installs a handler that ignores only one type of error: illegal char escape
    71  func (p *Parser) ignoreIllegalEscapesWhile(block func()) {
    72  	// during block call change error handler
    73  	p.scanner.Error = func(s *scanner.Scanner, msg string) {
    74  		// this catches both "illegal char escape" <= go1.12 and "invalid char escape" go1.13
    75  		if strings.Contains(msg, "char escape") { // too bad there is no constant for this in scanner pkg
    76  			return
    77  		}
    78  		p.handleScanError(s, msg)
    79  	}
    80  	block()
    81  	// restore
    82  	p.scanner.Error = p.handleScanError
    83  }
    84  
    85  // Parse parses a proto definition. May return a parse or scanner error.
    86  func (p *Parser) Parse() (*Proto, error) {
    87  	proto := new(Proto)
    88  	if p.scanner.Filename != "" {
    89  		proto.Filename = p.scanner.Filename
    90  	}
    91  	parseError := proto.parse(p)
    92  	// see if it was a scanner error
    93  	if len(p.scannerErrors) > 0 {
    94  		buf := new(bytes.Buffer)
    95  		for _, each := range p.scannerErrors {
    96  			fmt.Fprintln(buf, each)
    97  		}
    98  		return proto, errors.New(buf.String())
    99  	}
   100  	return proto, parseError
   101  }
   102  
   103  // Filename is for reporting. Optional.
   104  func (p *Parser) Filename(f string) {
   105  	p.scanner.Filename = f
   106  }
   107  
   108  const stringWithSingleQuote = "'"
   109  
   110  // next returns the next token using the scanner or drain the buffer.
   111  func (p *Parser) next() (pos scanner.Position, tok token, lit string) {
   112  	if p.buf != nil {
   113  		// consume buf
   114  		vals := *p.buf
   115  		p.buf = nil
   116  		return vals.pos, vals.tok, vals.lit
   117  	}
   118  	ch := p.scanner.Scan()
   119  	if ch == scanner.EOF {
   120  		return p.scanner.Position, tEOF, ""
   121  	}
   122  	lit = p.scanner.TokenText()
   123  	// single quote needs additional scanning
   124  	if stringWithSingleQuote == lit {
   125  		return p.nextSingleQuotedString()
   126  	}
   127  	return p.scanner.Position, asToken(lit), lit
   128  }
   129  
   130  // pre: first single quote has been read
   131  func (p *Parser) nextSingleQuotedString() (pos scanner.Position, tok token, lit string) {
   132  	var ch rune
   133  	p.ignoreErrorsWhile(func() { ch = p.scanner.Scan() })
   134  	if ch == scanner.EOF {
   135  		return p.scanner.Position, tEOF, ""
   136  	}
   137  	// string inside single quote
   138  	lit = p.scanner.TokenText()
   139  	if stringWithSingleQuote == lit {
   140  		// empty single quoted string
   141  		return p.scanner.Position, tIDENT, "''"
   142  	}
   143  
   144  	// scan for partial tokens until actual closing single-quote(') token
   145  	for {
   146  		p.ignoreErrorsWhile(func() { ch = p.scanner.Scan() })
   147  
   148  		if ch == scanner.EOF {
   149  			return p.scanner.Position, tEOF, ""
   150  		}
   151  
   152  		partial := p.scanner.TokenText()
   153  		if partial == "'" {
   154  			break
   155  		}
   156  		lit += partial
   157  	}
   158  	// end quote expected
   159  	if stringWithSingleQuote != p.scanner.TokenText() {
   160  		p.unexpected(lit, "'", p)
   161  	}
   162  	return p.scanner.Position, tIDENT, fmt.Sprintf("'%s'", lit)
   163  }
   164  
   165  func (p *Parser) ignoreErrorsWhile(block func()) {
   166  	// during block call change error handler which ignores it all
   167  	p.scanner.Error = func(s *scanner.Scanner, msg string) { return }
   168  	block()
   169  	// restore
   170  	p.scanner.Error = p.handleScanError
   171  }
   172  
   173  // nextPut sets the buffer
   174  func (p *Parser) nextPut(pos scanner.Position, tok token, lit string) {
   175  	p.buf = &nextValues{pos, tok, lit}
   176  }
   177  
   178  func (p *Parser) unexpected(found, expected string, obj interface{}) error {
   179  	debug := ""
   180  	if p.debug {
   181  		_, file, line, _ := runtime.Caller(1)
   182  		debug = fmt.Sprintf(" at %s:%d (with %#v)", file, line, obj)
   183  	}
   184  	return fmt.Errorf("%v: found %q but expected [%s]%s", p.scanner.Position, found, expected, debug)
   185  }
   186  
   187  func (p *Parser) nextInteger() (i int, err error) {
   188  	_, tok, lit := p.next()
   189  	if "-" == lit {
   190  		i, err = p.nextInteger()
   191  		return i * -1, err
   192  	}
   193  	if tok != tNUMBER {
   194  		return 0, errors.New("non integer")
   195  	}
   196  	if strings.HasPrefix(lit, "0x") || strings.HasPrefix(lit, "0X") {
   197  		// hex decode
   198  		i64, err := strconv.ParseInt(lit, 0, 64)
   199  		return int(i64), err
   200  	}
   201  	i, err = strconv.Atoi(lit)
   202  	return
   203  }
   204  
   205  // nextIdentifier consumes tokens which may have one or more dot separators (namespaced idents).
   206  func (p *Parser) nextIdentifier() (pos scanner.Position, tok token, lit string) {
   207  	pos, tok, lit = p.nextIdent(false)
   208  	if tDOT == tok {
   209  		// leading dot allowed
   210  		pos, tok, lit = p.nextIdent(false)
   211  		lit = "." + lit
   212  	}
   213  	return
   214  }
   215  
   216  // nextTypeName implements the Packages and Name Resolution for finding the name of the type.
   217  // Valid examples:
   218  // .google.protobuf.Empty
   219  // stream T must return tSTREAM
   220  // optional int32 must return tOPTIONAL
   221  // Bogus must return Bogus
   222  func (p *Parser) nextTypeName() (pos scanner.Position, tok token, lit string) {
   223  	pos, tok, lit = p.next()
   224  	startPos := pos
   225  	fullLit := lit
   226  	// leading dot allowed
   227  	if tDOT == tok {
   228  		pos, tok, lit = p.next()
   229  		fullLit = fmt.Sprintf(".%s", lit)
   230  	}
   231  	// type can be namespaced more
   232  	for {
   233  		r := p.peekNonWhitespace()
   234  		if '.' != r {
   235  			break
   236  		}
   237  		p.next() // consume dot
   238  		pos, tok, lit = p.next()
   239  		fullLit = fmt.Sprintf("%s.%s", fullLit, lit)
   240  		tok = tIDENT
   241  	}
   242  	return startPos, tok, fullLit
   243  }
   244  
   245  func (p *Parser) nextIdent(keywordStartAllowed bool) (pos scanner.Position, tok token, lit string) {
   246  	pos, tok, lit = p.next()
   247  	if tIDENT != tok {
   248  		// can be keyword
   249  		if !(isKeyword(tok) && keywordStartAllowed) {
   250  			return
   251  		}
   252  		// proceed with keyword as first literal
   253  	}
   254  	startPos := pos
   255  	fullLit := lit
   256  	// see if identifier is namespaced
   257  	for {
   258  		r := p.peekNonWhitespace()
   259  		if '.' != r {
   260  			break
   261  		}
   262  		p.next() // consume dot
   263  		pos, tok, lit := p.next()
   264  		if tIDENT != tok && !isKeyword(tok) {
   265  			p.nextPut(pos, tok, lit)
   266  			break
   267  		}
   268  		fullLit = fmt.Sprintf("%s.%s", fullLit, lit)
   269  	}
   270  	return startPos, tIDENT, fullLit
   271  }
   272  
   273  func (p *Parser) peekNonWhitespace() rune {
   274  	r := p.scanner.Peek()
   275  	if r == scanner.EOF {
   276  		return r
   277  	}
   278  	if isWhitespace(r) {
   279  		// consume it
   280  		p.scanner.Next()
   281  		return p.peekNonWhitespace()
   282  	}
   283  	return r
   284  }
   285
View as plain text