...

Source file src/github.com/emicklei/proto/token.go

Documentation: github.com/emicklei/proto

     1  // Copyright (c) 2017 Ernest Micklei
     2  //
     3  // MIT License
     4  //
     5  // Permission is hereby granted, free of charge, to any person obtaining
     6  // a copy of this software and associated documentation files (the
     7  // "Software"), to deal in the Software without restriction, including
     8  // without limitation the rights to use, copy, modify, merge, publish,
     9  // distribute, sublicense, and/or sell copies of the Software, and to
    10  // permit persons to whom the Software is furnished to do so, subject to
    11  // the following conditions:
    12  //
    13  // The above copyright notice and this permission notice shall be
    14  // included in all copies or substantial portions of the Software.
    15  //
    16  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
    17  // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
    18  // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
    19  // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
    20  // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
    21  // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
    22  // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
    23  
    24  package proto
    25  
    26  import (
    27  	"strconv"
    28  	"strings"
    29  )
    30  
    31  // token represents a lexical token.
    32  type token int
    33  
    34  const (
    35  	// Special tokens
    36  	tILLEGAL token = iota
    37  	tEOF
    38  	tWS
    39  
    40  	// Literals
    41  	tIDENT
    42  
    43  	// Misc characters
    44  	tSEMICOLON   // ;
    45  	tCOLON       // :
    46  	tEQUALS      // =
    47  	tQUOTE       // "
    48  	tSINGLEQUOTE // '
    49  	tLEFTPAREN   // (
    50  	tRIGHTPAREN  // )
    51  	tLEFTCURLY   // {
    52  	tRIGHTCURLY  // }
    53  	tLEFTSQUARE  // [
    54  	tRIGHTSQUARE // ]
    55  	tCOMMENT     // /
    56  	tLESS        // <
    57  	tGREATER     // >
    58  	tCOMMA       // ,
    59  	tDOT         // .
    60  
    61  	// Keywords
    62  	keywordsStart
    63  	tSYNTAX
    64  	tSERVICE
    65  	tRPC
    66  	tRETURNS
    67  	tMESSAGE
    68  	tIMPORT
    69  	tPACKAGE
    70  	tOPTION
    71  	tREPEATED
    72  	tWEAK
    73  	tPUBLIC
    74  
    75  	// special fields
    76  	tONEOF
    77  	tMAP
    78  	tRESERVED
    79  	tENUM
    80  	tSTREAM
    81  
    82  	// numbers (pos or neg, float)
    83  	tNUMBER
    84  
    85  	// BEGIN proto2
    86  	tOPTIONAL
    87  	tGROUP
    88  	tEXTENSIONS
    89  	tEXTEND
    90  	tREQUIRED
    91  	// END proto2
    92  	keywordsEnd
    93  )
    94  
    95  // typeTokens exists for future validation
    96  const typeTokens = "double float int32 int64 uint32 uint64 sint32 sint64 fixed32 sfixed32 sfixed64 bool string bytes"
    97  
    98  // isKeyword returns if tok is in the keywords range
    99  func isKeyword(tok token) bool {
   100  	return keywordsStart < tok && tok < keywordsEnd
   101  }
   102  
   103  // isWhitespace checks for space,tab and newline
   104  func isWhitespace(r rune) bool {
   105  	return r == ' ' || r == '\t' || r == '\n'
   106  }
   107  
   108  // isDigit returns true if the rune is a digit.
   109  func isDigit(ch rune) bool { return (ch >= '0' && ch <= '9') }
   110  
   111  // isString checks if the literal is quoted (single or double).
   112  func isString(lit string) bool {
   113  	if lit == "'" {
   114  		return false
   115  	}
   116  	return (strings.HasPrefix(lit, "\"") &&
   117  		strings.HasSuffix(lit, "\"")) ||
   118  		(strings.HasPrefix(lit, "'") &&
   119  			strings.HasSuffix(lit, "'"))
   120  }
   121  
   122  func isComment(lit string) bool {
   123  	return strings.HasPrefix(lit, "//") || strings.HasPrefix(lit, "/*")
   124  }
   125  
   126  func isNumber(lit string) bool {
   127  	if strings.HasPrefix(lit, "0x") || strings.HasPrefix(lit, "0X") {
   128  		_, err := strconv.ParseInt(lit, 0, 64)
   129  		return err == nil
   130  	}
   131  	_, err := strconv.ParseFloat(lit, 64)
   132  	return err == nil
   133  }
   134  
   135  const doubleQuoteRune = rune('"')
   136  
   137  // unQuote removes one matching leading and trailing single or double quote.
   138  //
   139  // https://github.com/emicklei/proto/issues/103
   140  // cannot use strconv.Unquote as this unescapes quotes.
   141  func unQuote(lit string) (string, rune) {
   142  	if len(lit) < 2 {
   143  		return lit, doubleQuoteRune
   144  	}
   145  	chars := []rune(lit)
   146  	first, last := chars[0], chars[len(chars)-1]
   147  	if first != last {
   148  		return lit, doubleQuoteRune
   149  	}
   150  	if s := string(chars[0]); s == "\"" || s == stringWithSingleQuote {
   151  		return string(chars[1 : len(chars)-1]), chars[0]
   152  	}
   153  	return lit, doubleQuoteRune
   154  }
   155  
   156  func asToken(literal string) token {
   157  	switch literal {
   158  	// delimiters
   159  	case ";":
   160  		return tSEMICOLON
   161  	case ":":
   162  		return tCOLON
   163  	case "=":
   164  		return tEQUALS
   165  	case "\"":
   166  		return tQUOTE
   167  	case "'":
   168  		return tSINGLEQUOTE
   169  	case "(":
   170  		return tLEFTPAREN
   171  	case ")":
   172  		return tRIGHTPAREN
   173  	case "{":
   174  		return tLEFTCURLY
   175  	case "}":
   176  		return tRIGHTCURLY
   177  	case "[":
   178  		return tLEFTSQUARE
   179  	case "]":
   180  		return tRIGHTSQUARE
   181  	case "<":
   182  		return tLESS
   183  	case ">":
   184  		return tGREATER
   185  	case ",":
   186  		return tCOMMA
   187  	case ".":
   188  		return tDOT
   189  	// words
   190  	case "syntax":
   191  		return tSYNTAX
   192  	case "service":
   193  		return tSERVICE
   194  	case "rpc":
   195  		return tRPC
   196  	case "returns":
   197  		return tRETURNS
   198  	case "option":
   199  		return tOPTION
   200  	case "message":
   201  		return tMESSAGE
   202  	case "import":
   203  		return tIMPORT
   204  	case "package":
   205  		return tPACKAGE
   206  	case "oneof":
   207  		return tONEOF
   208  	// special fields
   209  	case "map":
   210  		return tMAP
   211  	case "reserved":
   212  		return tRESERVED
   213  	case "enum":
   214  		return tENUM
   215  	case "repeated":
   216  		return tREPEATED
   217  	case "weak":
   218  		return tWEAK
   219  	case "public":
   220  		return tPUBLIC
   221  	case "stream":
   222  		return tSTREAM
   223  	// proto2
   224  	case "optional":
   225  		return tOPTIONAL
   226  	case "group":
   227  		return tGROUP
   228  	case "extensions":
   229  		return tEXTENSIONS
   230  	case "extend":
   231  		return tEXTEND
   232  	case "required":
   233  		return tREQUIRED
   234  	default:
   235  		// special cases
   236  		if isNumber(literal) {
   237  			return tNUMBER
   238  		}
   239  		if isComment(literal) {
   240  			return tCOMMENT
   241  		}
   242  		return tIDENT
   243  	}
   244  }
   245  

View as plain text