...

Source file src/github.com/golang/protobuf/proto/text_decode.go

Documentation: github.com/golang/protobuf/proto

     1  // Copyright 2010 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package proto
     6  
     7  import (
     8  	"encoding"
     9  	"errors"
    10  	"fmt"
    11  	"reflect"
    12  	"strconv"
    13  	"strings"
    14  	"unicode/utf8"
    15  
    16  	"google.golang.org/protobuf/encoding/prototext"
    17  	protoV2 "google.golang.org/protobuf/proto"
    18  	"google.golang.org/protobuf/reflect/protoreflect"
    19  	"google.golang.org/protobuf/reflect/protoregistry"
    20  )
    21  
    22  const wrapTextUnmarshalV2 = false
    23  
    24  // ParseError is returned by UnmarshalText.
    25  type ParseError struct {
    26  	Message string
    27  
    28  	// Deprecated: Do not use.
    29  	Line, Offset int
    30  }
    31  
    32  func (e *ParseError) Error() string {
    33  	if wrapTextUnmarshalV2 {
    34  		return e.Message
    35  	}
    36  	if e.Line == 1 {
    37  		return fmt.Sprintf("line 1.%d: %v", e.Offset, e.Message)
    38  	}
    39  	return fmt.Sprintf("line %d: %v", e.Line, e.Message)
    40  }
    41  
    42  // UnmarshalText parses a proto text formatted string into m.
    43  func UnmarshalText(s string, m Message) error {
    44  	if u, ok := m.(encoding.TextUnmarshaler); ok {
    45  		return u.UnmarshalText([]byte(s))
    46  	}
    47  
    48  	m.Reset()
    49  	mi := MessageV2(m)
    50  
    51  	if wrapTextUnmarshalV2 {
    52  		err := prototext.UnmarshalOptions{
    53  			AllowPartial: true,
    54  		}.Unmarshal([]byte(s), mi)
    55  		if err != nil {
    56  			return &ParseError{Message: err.Error()}
    57  		}
    58  		return checkRequiredNotSet(mi)
    59  	} else {
    60  		if err := newTextParser(s).unmarshalMessage(mi.ProtoReflect(), ""); err != nil {
    61  			return err
    62  		}
    63  		return checkRequiredNotSet(mi)
    64  	}
    65  }
    66  
    67  type textParser struct {
    68  	s            string // remaining input
    69  	done         bool   // whether the parsing is finished (success or error)
    70  	backed       bool   // whether back() was called
    71  	offset, line int
    72  	cur          token
    73  }
    74  
    75  type token struct {
    76  	value    string
    77  	err      *ParseError
    78  	line     int    // line number
    79  	offset   int    // byte number from start of input, not start of line
    80  	unquoted string // the unquoted version of value, if it was a quoted string
    81  }
    82  
    83  func newTextParser(s string) *textParser {
    84  	p := new(textParser)
    85  	p.s = s
    86  	p.line = 1
    87  	p.cur.line = 1
    88  	return p
    89  }
    90  
    91  func (p *textParser) unmarshalMessage(m protoreflect.Message, terminator string) (err error) {
    92  	md := m.Descriptor()
    93  	fds := md.Fields()
    94  
    95  	// A struct is a sequence of "name: value", terminated by one of
    96  	// '>' or '}', or the end of the input.  A name may also be
    97  	// "[extension]" or "[type/url]".
    98  	//
    99  	// The whole struct can also be an expanded Any message, like:
   100  	// [type/url] < ... struct contents ... >
   101  	seen := make(map[protoreflect.FieldNumber]bool)
   102  	for {
   103  		tok := p.next()
   104  		if tok.err != nil {
   105  			return tok.err
   106  		}
   107  		if tok.value == terminator {
   108  			break
   109  		}
   110  		if tok.value == "[" {
   111  			if err := p.unmarshalExtensionOrAny(m, seen); err != nil {
   112  				return err
   113  			}
   114  			continue
   115  		}
   116  
   117  		// This is a normal, non-extension field.
   118  		name := protoreflect.Name(tok.value)
   119  		fd := fds.ByName(name)
   120  		switch {
   121  		case fd == nil:
   122  			gd := fds.ByName(protoreflect.Name(strings.ToLower(string(name))))
   123  			if gd != nil && gd.Kind() == protoreflect.GroupKind && gd.Message().Name() == name {
   124  				fd = gd
   125  			}
   126  		case fd.Kind() == protoreflect.GroupKind && fd.Message().Name() != name:
   127  			fd = nil
   128  		case fd.IsWeak() && fd.Message().IsPlaceholder():
   129  			fd = nil
   130  		}
   131  		if fd == nil {
   132  			typeName := string(md.FullName())
   133  			if m, ok := m.Interface().(Message); ok {
   134  				t := reflect.TypeOf(m)
   135  				if t.Kind() == reflect.Ptr {
   136  					typeName = t.Elem().String()
   137  				}
   138  			}
   139  			return p.errorf("unknown field name %q in %v", name, typeName)
   140  		}
   141  		if od := fd.ContainingOneof(); od != nil && m.WhichOneof(od) != nil {
   142  			return p.errorf("field '%s' would overwrite already parsed oneof '%s'", name, od.Name())
   143  		}
   144  		if fd.Cardinality() != protoreflect.Repeated && seen[fd.Number()] {
   145  			return p.errorf("non-repeated field %q was repeated", fd.Name())
   146  		}
   147  		seen[fd.Number()] = true
   148  
   149  		// Consume any colon.
   150  		if err := p.checkForColon(fd); err != nil {
   151  			return err
   152  		}
   153  
   154  		// Parse into the field.
   155  		v := m.Get(fd)
   156  		if !m.Has(fd) && (fd.IsList() || fd.IsMap() || fd.Message() != nil) {
   157  			v = m.Mutable(fd)
   158  		}
   159  		if v, err = p.unmarshalValue(v, fd); err != nil {
   160  			return err
   161  		}
   162  		m.Set(fd, v)
   163  
   164  		if err := p.consumeOptionalSeparator(); err != nil {
   165  			return err
   166  		}
   167  	}
   168  	return nil
   169  }
   170  
   171  func (p *textParser) unmarshalExtensionOrAny(m protoreflect.Message, seen map[protoreflect.FieldNumber]bool) error {
   172  	name, err := p.consumeExtensionOrAnyName()
   173  	if err != nil {
   174  		return err
   175  	}
   176  
   177  	// If it contains a slash, it's an Any type URL.
   178  	if slashIdx := strings.LastIndex(name, "/"); slashIdx >= 0 {
   179  		tok := p.next()
   180  		if tok.err != nil {
   181  			return tok.err
   182  		}
   183  		// consume an optional colon
   184  		if tok.value == ":" {
   185  			tok = p.next()
   186  			if tok.err != nil {
   187  				return tok.err
   188  			}
   189  		}
   190  
   191  		var terminator string
   192  		switch tok.value {
   193  		case "<":
   194  			terminator = ">"
   195  		case "{":
   196  			terminator = "}"
   197  		default:
   198  			return p.errorf("expected '{' or '<', found %q", tok.value)
   199  		}
   200  
   201  		mt, err := protoregistry.GlobalTypes.FindMessageByURL(name)
   202  		if err != nil {
   203  			return p.errorf("unrecognized message %q in google.protobuf.Any", name[slashIdx+len("/"):])
   204  		}
   205  		m2 := mt.New()
   206  		if err := p.unmarshalMessage(m2, terminator); err != nil {
   207  			return err
   208  		}
   209  		b, err := protoV2.Marshal(m2.Interface())
   210  		if err != nil {
   211  			return p.errorf("failed to marshal message of type %q: %v", name[slashIdx+len("/"):], err)
   212  		}
   213  
   214  		urlFD := m.Descriptor().Fields().ByName("type_url")
   215  		valFD := m.Descriptor().Fields().ByName("value")
   216  		if seen[urlFD.Number()] {
   217  			return p.errorf("Any message unpacked multiple times, or %q already set", urlFD.Name())
   218  		}
   219  		if seen[valFD.Number()] {
   220  			return p.errorf("Any message unpacked multiple times, or %q already set", valFD.Name())
   221  		}
   222  		m.Set(urlFD, protoreflect.ValueOfString(name))
   223  		m.Set(valFD, protoreflect.ValueOfBytes(b))
   224  		seen[urlFD.Number()] = true
   225  		seen[valFD.Number()] = true
   226  		return nil
   227  	}
   228  
   229  	xname := protoreflect.FullName(name)
   230  	xt, _ := protoregistry.GlobalTypes.FindExtensionByName(xname)
   231  	if xt == nil && isMessageSet(m.Descriptor()) {
   232  		xt, _ = protoregistry.GlobalTypes.FindExtensionByName(xname.Append("message_set_extension"))
   233  	}
   234  	if xt == nil {
   235  		return p.errorf("unrecognized extension %q", name)
   236  	}
   237  	fd := xt.TypeDescriptor()
   238  	if fd.ContainingMessage().FullName() != m.Descriptor().FullName() {
   239  		return p.errorf("extension field %q does not extend message %q", name, m.Descriptor().FullName())
   240  	}
   241  
   242  	if err := p.checkForColon(fd); err != nil {
   243  		return err
   244  	}
   245  
   246  	v := m.Get(fd)
   247  	if !m.Has(fd) && (fd.IsList() || fd.IsMap() || fd.Message() != nil) {
   248  		v = m.Mutable(fd)
   249  	}
   250  	v, err = p.unmarshalValue(v, fd)
   251  	if err != nil {
   252  		return err
   253  	}
   254  	m.Set(fd, v)
   255  	return p.consumeOptionalSeparator()
   256  }
   257  
   258  func (p *textParser) unmarshalValue(v protoreflect.Value, fd protoreflect.FieldDescriptor) (protoreflect.Value, error) {
   259  	tok := p.next()
   260  	if tok.err != nil {
   261  		return v, tok.err
   262  	}
   263  	if tok.value == "" {
   264  		return v, p.errorf("unexpected EOF")
   265  	}
   266  
   267  	switch {
   268  	case fd.IsList():
   269  		lv := v.List()
   270  		var err error
   271  		if tok.value == "[" {
   272  			// Repeated field with list notation, like [1,2,3].
   273  			for {
   274  				vv := lv.NewElement()
   275  				vv, err = p.unmarshalSingularValue(vv, fd)
   276  				if err != nil {
   277  					return v, err
   278  				}
   279  				lv.Append(vv)
   280  
   281  				tok := p.next()
   282  				if tok.err != nil {
   283  					return v, tok.err
   284  				}
   285  				if tok.value == "]" {
   286  					break
   287  				}
   288  				if tok.value != "," {
   289  					return v, p.errorf("Expected ']' or ',' found %q", tok.value)
   290  				}
   291  			}
   292  			return v, nil
   293  		}
   294  
   295  		// One value of the repeated field.
   296  		p.back()
   297  		vv := lv.NewElement()
   298  		vv, err = p.unmarshalSingularValue(vv, fd)
   299  		if err != nil {
   300  			return v, err
   301  		}
   302  		lv.Append(vv)
   303  		return v, nil
   304  	case fd.IsMap():
   305  		// The map entry should be this sequence of tokens:
   306  		//	< key : KEY value : VALUE >
   307  		// However, implementations may omit key or value, and technically
   308  		// we should support them in any order.
   309  		var terminator string
   310  		switch tok.value {
   311  		case "<":
   312  			terminator = ">"
   313  		case "{":
   314  			terminator = "}"
   315  		default:
   316  			return v, p.errorf("expected '{' or '<', found %q", tok.value)
   317  		}
   318  
   319  		keyFD := fd.MapKey()
   320  		valFD := fd.MapValue()
   321  
   322  		mv := v.Map()
   323  		kv := keyFD.Default()
   324  		vv := mv.NewValue()
   325  		for {
   326  			tok := p.next()
   327  			if tok.err != nil {
   328  				return v, tok.err
   329  			}
   330  			if tok.value == terminator {
   331  				break
   332  			}
   333  			var err error
   334  			switch tok.value {
   335  			case "key":
   336  				if err := p.consumeToken(":"); err != nil {
   337  					return v, err
   338  				}
   339  				if kv, err = p.unmarshalSingularValue(kv, keyFD); err != nil {
   340  					return v, err
   341  				}
   342  				if err := p.consumeOptionalSeparator(); err != nil {
   343  					return v, err
   344  				}
   345  			case "value":
   346  				if err := p.checkForColon(valFD); err != nil {
   347  					return v, err
   348  				}
   349  				if vv, err = p.unmarshalSingularValue(vv, valFD); err != nil {
   350  					return v, err
   351  				}
   352  				if err := p.consumeOptionalSeparator(); err != nil {
   353  					return v, err
   354  				}
   355  			default:
   356  				p.back()
   357  				return v, p.errorf(`expected "key", "value", or %q, found %q`, terminator, tok.value)
   358  			}
   359  		}
   360  		mv.Set(kv.MapKey(), vv)
   361  		return v, nil
   362  	default:
   363  		p.back()
   364  		return p.unmarshalSingularValue(v, fd)
   365  	}
   366  }
   367  
   368  func (p *textParser) unmarshalSingularValue(v protoreflect.Value, fd protoreflect.FieldDescriptor) (protoreflect.Value, error) {
   369  	tok := p.next()
   370  	if tok.err != nil {
   371  		return v, tok.err
   372  	}
   373  	if tok.value == "" {
   374  		return v, p.errorf("unexpected EOF")
   375  	}
   376  
   377  	switch fd.Kind() {
   378  	case protoreflect.BoolKind:
   379  		switch tok.value {
   380  		case "true", "1", "t", "True":
   381  			return protoreflect.ValueOfBool(true), nil
   382  		case "false", "0", "f", "False":
   383  			return protoreflect.ValueOfBool(false), nil
   384  		}
   385  	case protoreflect.Int32Kind, protoreflect.Sint32Kind, protoreflect.Sfixed32Kind:
   386  		if x, err := strconv.ParseInt(tok.value, 0, 32); err == nil {
   387  			return protoreflect.ValueOfInt32(int32(x)), nil
   388  		}
   389  
   390  		// The C++ parser accepts large positive hex numbers that uses
   391  		// two's complement arithmetic to represent negative numbers.
   392  		// This feature is here for backwards compatibility with C++.
   393  		if strings.HasPrefix(tok.value, "0x") {
   394  			if x, err := strconv.ParseUint(tok.value, 0, 32); err == nil {
   395  				return protoreflect.ValueOfInt32(int32(-(int64(^x) + 1))), nil
   396  			}
   397  		}
   398  	case protoreflect.Int64Kind, protoreflect.Sint64Kind, protoreflect.Sfixed64Kind:
   399  		if x, err := strconv.ParseInt(tok.value, 0, 64); err == nil {
   400  			return protoreflect.ValueOfInt64(int64(x)), nil
   401  		}
   402  
   403  		// The C++ parser accepts large positive hex numbers that uses
   404  		// two's complement arithmetic to represent negative numbers.
   405  		// This feature is here for backwards compatibility with C++.
   406  		if strings.HasPrefix(tok.value, "0x") {
   407  			if x, err := strconv.ParseUint(tok.value, 0, 64); err == nil {
   408  				return protoreflect.ValueOfInt64(int64(-(int64(^x) + 1))), nil
   409  			}
   410  		}
   411  	case protoreflect.Uint32Kind, protoreflect.Fixed32Kind:
   412  		if x, err := strconv.ParseUint(tok.value, 0, 32); err == nil {
   413  			return protoreflect.ValueOfUint32(uint32(x)), nil
   414  		}
   415  	case protoreflect.Uint64Kind, protoreflect.Fixed64Kind:
   416  		if x, err := strconv.ParseUint(tok.value, 0, 64); err == nil {
   417  			return protoreflect.ValueOfUint64(uint64(x)), nil
   418  		}
   419  	case protoreflect.FloatKind:
   420  		// Ignore 'f' for compatibility with output generated by C++,
   421  		// but don't remove 'f' when the value is "-inf" or "inf".
   422  		v := tok.value
   423  		if strings.HasSuffix(v, "f") && v != "-inf" && v != "inf" {
   424  			v = v[:len(v)-len("f")]
   425  		}
   426  		if x, err := strconv.ParseFloat(v, 32); err == nil {
   427  			return protoreflect.ValueOfFloat32(float32(x)), nil
   428  		}
   429  	case protoreflect.DoubleKind:
   430  		// Ignore 'f' for compatibility with output generated by C++,
   431  		// but don't remove 'f' when the value is "-inf" or "inf".
   432  		v := tok.value
   433  		if strings.HasSuffix(v, "f") && v != "-inf" && v != "inf" {
   434  			v = v[:len(v)-len("f")]
   435  		}
   436  		if x, err := strconv.ParseFloat(v, 64); err == nil {
   437  			return protoreflect.ValueOfFloat64(float64(x)), nil
   438  		}
   439  	case protoreflect.StringKind:
   440  		if isQuote(tok.value[0]) {
   441  			return protoreflect.ValueOfString(tok.unquoted), nil
   442  		}
   443  	case protoreflect.BytesKind:
   444  		if isQuote(tok.value[0]) {
   445  			return protoreflect.ValueOfBytes([]byte(tok.unquoted)), nil
   446  		}
   447  	case protoreflect.EnumKind:
   448  		if x, err := strconv.ParseInt(tok.value, 0, 32); err == nil {
   449  			return protoreflect.ValueOfEnum(protoreflect.EnumNumber(x)), nil
   450  		}
   451  		vd := fd.Enum().Values().ByName(protoreflect.Name(tok.value))
   452  		if vd != nil {
   453  			return protoreflect.ValueOfEnum(vd.Number()), nil
   454  		}
   455  	case protoreflect.MessageKind, protoreflect.GroupKind:
   456  		var terminator string
   457  		switch tok.value {
   458  		case "{":
   459  			terminator = "}"
   460  		case "<":
   461  			terminator = ">"
   462  		default:
   463  			return v, p.errorf("expected '{' or '<', found %q", tok.value)
   464  		}
   465  		err := p.unmarshalMessage(v.Message(), terminator)
   466  		return v, err
   467  	default:
   468  		panic(fmt.Sprintf("invalid kind %v", fd.Kind()))
   469  	}
   470  	return v, p.errorf("invalid %v: %v", fd.Kind(), tok.value)
   471  }
   472  
   473  // Consume a ':' from the input stream (if the next token is a colon),
   474  // returning an error if a colon is needed but not present.
   475  func (p *textParser) checkForColon(fd protoreflect.FieldDescriptor) *ParseError {
   476  	tok := p.next()
   477  	if tok.err != nil {
   478  		return tok.err
   479  	}
   480  	if tok.value != ":" {
   481  		if fd.Message() == nil {
   482  			return p.errorf("expected ':', found %q", tok.value)
   483  		}
   484  		p.back()
   485  	}
   486  	return nil
   487  }
   488  
   489  // consumeExtensionOrAnyName consumes an extension name or an Any type URL and
   490  // the following ']'. It returns the name or URL consumed.
   491  func (p *textParser) consumeExtensionOrAnyName() (string, error) {
   492  	tok := p.next()
   493  	if tok.err != nil {
   494  		return "", tok.err
   495  	}
   496  
   497  	// If extension name or type url is quoted, it's a single token.
   498  	if len(tok.value) > 2 && isQuote(tok.value[0]) && tok.value[len(tok.value)-1] == tok.value[0] {
   499  		name, err := unquoteC(tok.value[1:len(tok.value)-1], rune(tok.value[0]))
   500  		if err != nil {
   501  			return "", err
   502  		}
   503  		return name, p.consumeToken("]")
   504  	}
   505  
   506  	// Consume everything up to "]"
   507  	var parts []string
   508  	for tok.value != "]" {
   509  		parts = append(parts, tok.value)
   510  		tok = p.next()
   511  		if tok.err != nil {
   512  			return "", p.errorf("unrecognized type_url or extension name: %s", tok.err)
   513  		}
   514  		if p.done && tok.value != "]" {
   515  			return "", p.errorf("unclosed type_url or extension name")
   516  		}
   517  	}
   518  	return strings.Join(parts, ""), nil
   519  }
   520  
   521  // consumeOptionalSeparator consumes an optional semicolon or comma.
   522  // It is used in unmarshalMessage to provide backward compatibility.
   523  func (p *textParser) consumeOptionalSeparator() error {
   524  	tok := p.next()
   525  	if tok.err != nil {
   526  		return tok.err
   527  	}
   528  	if tok.value != ";" && tok.value != "," {
   529  		p.back()
   530  	}
   531  	return nil
   532  }
   533  
   534  func (p *textParser) errorf(format string, a ...interface{}) *ParseError {
   535  	pe := &ParseError{fmt.Sprintf(format, a...), p.cur.line, p.cur.offset}
   536  	p.cur.err = pe
   537  	p.done = true
   538  	return pe
   539  }
   540  
   541  func (p *textParser) skipWhitespace() {
   542  	i := 0
   543  	for i < len(p.s) && (isWhitespace(p.s[i]) || p.s[i] == '#') {
   544  		if p.s[i] == '#' {
   545  			// comment; skip to end of line or input
   546  			for i < len(p.s) && p.s[i] != '\n' {
   547  				i++
   548  			}
   549  			if i == len(p.s) {
   550  				break
   551  			}
   552  		}
   553  		if p.s[i] == '\n' {
   554  			p.line++
   555  		}
   556  		i++
   557  	}
   558  	p.offset += i
   559  	p.s = p.s[i:len(p.s)]
   560  	if len(p.s) == 0 {
   561  		p.done = true
   562  	}
   563  }
   564  
   565  func (p *textParser) advance() {
   566  	// Skip whitespace
   567  	p.skipWhitespace()
   568  	if p.done {
   569  		return
   570  	}
   571  
   572  	// Start of non-whitespace
   573  	p.cur.err = nil
   574  	p.cur.offset, p.cur.line = p.offset, p.line
   575  	p.cur.unquoted = ""
   576  	switch p.s[0] {
   577  	case '<', '>', '{', '}', ':', '[', ']', ';', ',', '/':
   578  		// Single symbol
   579  		p.cur.value, p.s = p.s[0:1], p.s[1:len(p.s)]
   580  	case '"', '\'':
   581  		// Quoted string
   582  		i := 1
   583  		for i < len(p.s) && p.s[i] != p.s[0] && p.s[i] != '\n' {
   584  			if p.s[i] == '\\' && i+1 < len(p.s) {
   585  				// skip escaped char
   586  				i++
   587  			}
   588  			i++
   589  		}
   590  		if i >= len(p.s) || p.s[i] != p.s[0] {
   591  			p.errorf("unmatched quote")
   592  			return
   593  		}
   594  		unq, err := unquoteC(p.s[1:i], rune(p.s[0]))
   595  		if err != nil {
   596  			p.errorf("invalid quoted string %s: %v", p.s[0:i+1], err)
   597  			return
   598  		}
   599  		p.cur.value, p.s = p.s[0:i+1], p.s[i+1:len(p.s)]
   600  		p.cur.unquoted = unq
   601  	default:
   602  		i := 0
   603  		for i < len(p.s) && isIdentOrNumberChar(p.s[i]) {
   604  			i++
   605  		}
   606  		if i == 0 {
   607  			p.errorf("unexpected byte %#x", p.s[0])
   608  			return
   609  		}
   610  		p.cur.value, p.s = p.s[0:i], p.s[i:len(p.s)]
   611  	}
   612  	p.offset += len(p.cur.value)
   613  }
   614  
   615  // Back off the parser by one token. Can only be done between calls to next().
   616  // It makes the next advance() a no-op.
   617  func (p *textParser) back() { p.backed = true }
   618  
   619  // Advances the parser and returns the new current token.
   620  func (p *textParser) next() *token {
   621  	if p.backed || p.done {
   622  		p.backed = false
   623  		return &p.cur
   624  	}
   625  	p.advance()
   626  	if p.done {
   627  		p.cur.value = ""
   628  	} else if len(p.cur.value) > 0 && isQuote(p.cur.value[0]) {
   629  		// Look for multiple quoted strings separated by whitespace,
   630  		// and concatenate them.
   631  		cat := p.cur
   632  		for {
   633  			p.skipWhitespace()
   634  			if p.done || !isQuote(p.s[0]) {
   635  				break
   636  			}
   637  			p.advance()
   638  			if p.cur.err != nil {
   639  				return &p.cur
   640  			}
   641  			cat.value += " " + p.cur.value
   642  			cat.unquoted += p.cur.unquoted
   643  		}
   644  		p.done = false // parser may have seen EOF, but we want to return cat
   645  		p.cur = cat
   646  	}
   647  	return &p.cur
   648  }
   649  
   650  func (p *textParser) consumeToken(s string) error {
   651  	tok := p.next()
   652  	if tok.err != nil {
   653  		return tok.err
   654  	}
   655  	if tok.value != s {
   656  		p.back()
   657  		return p.errorf("expected %q, found %q", s, tok.value)
   658  	}
   659  	return nil
   660  }
   661  
   662  var errBadUTF8 = errors.New("proto: bad UTF-8")
   663  
   664  func unquoteC(s string, quote rune) (string, error) {
   665  	// This is based on C++'s tokenizer.cc.
   666  	// Despite its name, this is *not* parsing C syntax.
   667  	// For instance, "\0" is an invalid quoted string.
   668  
   669  	// Avoid allocation in trivial cases.
   670  	simple := true
   671  	for _, r := range s {
   672  		if r == '\\' || r == quote {
   673  			simple = false
   674  			break
   675  		}
   676  	}
   677  	if simple {
   678  		return s, nil
   679  	}
   680  
   681  	buf := make([]byte, 0, 3*len(s)/2)
   682  	for len(s) > 0 {
   683  		r, n := utf8.DecodeRuneInString(s)
   684  		if r == utf8.RuneError && n == 1 {
   685  			return "", errBadUTF8
   686  		}
   687  		s = s[n:]
   688  		if r != '\\' {
   689  			if r < utf8.RuneSelf {
   690  				buf = append(buf, byte(r))
   691  			} else {
   692  				buf = append(buf, string(r)...)
   693  			}
   694  			continue
   695  		}
   696  
   697  		ch, tail, err := unescape(s)
   698  		if err != nil {
   699  			return "", err
   700  		}
   701  		buf = append(buf, ch...)
   702  		s = tail
   703  	}
   704  	return string(buf), nil
   705  }
   706  
   707  func unescape(s string) (ch string, tail string, err error) {
   708  	r, n := utf8.DecodeRuneInString(s)
   709  	if r == utf8.RuneError && n == 1 {
   710  		return "", "", errBadUTF8
   711  	}
   712  	s = s[n:]
   713  	switch r {
   714  	case 'a':
   715  		return "\a", s, nil
   716  	case 'b':
   717  		return "\b", s, nil
   718  	case 'f':
   719  		return "\f", s, nil
   720  	case 'n':
   721  		return "\n", s, nil
   722  	case 'r':
   723  		return "\r", s, nil
   724  	case 't':
   725  		return "\t", s, nil
   726  	case 'v':
   727  		return "\v", s, nil
   728  	case '?':
   729  		return "?", s, nil // trigraph workaround
   730  	case '\'', '"', '\\':
   731  		return string(r), s, nil
   732  	case '0', '1', '2', '3', '4', '5', '6', '7':
   733  		if len(s) < 2 {
   734  			return "", "", fmt.Errorf(`\%c requires 2 following digits`, r)
   735  		}
   736  		ss := string(r) + s[:2]
   737  		s = s[2:]
   738  		i, err := strconv.ParseUint(ss, 8, 8)
   739  		if err != nil {
   740  			return "", "", fmt.Errorf(`\%s contains non-octal digits`, ss)
   741  		}
   742  		return string([]byte{byte(i)}), s, nil
   743  	case 'x', 'X', 'u', 'U':
   744  		var n int
   745  		switch r {
   746  		case 'x', 'X':
   747  			n = 2
   748  		case 'u':
   749  			n = 4
   750  		case 'U':
   751  			n = 8
   752  		}
   753  		if len(s) < n {
   754  			return "", "", fmt.Errorf(`\%c requires %d following digits`, r, n)
   755  		}
   756  		ss := s[:n]
   757  		s = s[n:]
   758  		i, err := strconv.ParseUint(ss, 16, 64)
   759  		if err != nil {
   760  			return "", "", fmt.Errorf(`\%c%s contains non-hexadecimal digits`, r, ss)
   761  		}
   762  		if r == 'x' || r == 'X' {
   763  			return string([]byte{byte(i)}), s, nil
   764  		}
   765  		if i > utf8.MaxRune {
   766  			return "", "", fmt.Errorf(`\%c%s is not a valid Unicode code point`, r, ss)
   767  		}
   768  		return string(rune(i)), s, nil
   769  	}
   770  	return "", "", fmt.Errorf(`unknown escape \%c`, r)
   771  }
   772  
   773  func isIdentOrNumberChar(c byte) bool {
   774  	switch {
   775  	case 'A' <= c && c <= 'Z', 'a' <= c && c <= 'z':
   776  		return true
   777  	case '0' <= c && c <= '9':
   778  		return true
   779  	}
   780  	switch c {
   781  	case '-', '+', '.', '_':
   782  		return true
   783  	}
   784  	return false
   785  }
   786  
   787  func isWhitespace(c byte) bool {
   788  	switch c {
   789  	case ' ', '\t', '\n', '\r':
   790  		return true
   791  	}
   792  	return false
   793  }
   794  
   795  func isQuote(c byte) bool {
   796  	switch c {
   797  	case '"', '\'':
   798  		return true
   799  	}
   800  	return false
   801  }
   802  

View as plain text