unquote.go

Documentation: github.com/protocolbuffers/txtpbfmt/unquote

     1  // Package unquote provides a function to unquote txtpb-formatted quoted string literals.
     2  package unquote
     3  
     4  import (
     5  	"errors"
     6  	"fmt"
     7  	"strconv"
     8  	"strings"
     9  	"unicode/utf8"
    10  
    11  	"github.com/protocolbuffers/txtpbfmt/ast"
    12  )
    13  
    14  // Unquote returns the value of the string node.
    15  // Calling Unquote on non-string node doesn't panic, but is otherwise undefined.
    16  func Unquote(n *ast.Node) (string, error) {
    17  	return unquoteValues(n.Values, unquote)
    18  }
    19  
    20  // Raw returns the raw value of the string node, with string escapes left in place.
    21  // Calling UnquoteRaw on non-string node doesn't panic, but is otherwise undefined.
    22  func Raw(n *ast.Node) (string, error) {
    23  	return unquoteValues(n.Values, unquoteRaw)
    24  }
    25  
    26  func unquoteValues(values []*ast.Value, unquoter func(string) (string, error)) (string, error) {
    27  	var ret strings.Builder
    28  	for _, v := range values {
    29  		uq, err := unquoter(v.Value)
    30  		if err != nil {
    31  			return "", err
    32  		}
    33  		ret.WriteString(uq)
    34  	}
    35  	return ret.String(), nil
    36  }
    37  
    38  // Returns the quote rune used in the given string (' or "). Returns an error if the string doesn't
    39  // start and end with a matching pair of valid quotes.
    40  func quoteRune(s string) (rune, error) {
    41  	if len(s) < 2 {
    42  		return 0, errors.New("not a quoted string")
    43  	}
    44  	quote := s[0]
    45  	if quote != '"' && quote != '\'' {
    46  		return 0, fmt.Errorf("invalid quote character %s", string(quote))
    47  	}
    48  	if s[len(s)-1] != quote {
    49  		return 0, errors.New("unmatched quote")
    50  	}
    51  	return rune(quote), nil
    52  }
    53  
    54  func unquote(s string) (string, error) {
    55  	quote, err := quoteRune(s)
    56  	if err != nil {
    57  		return "", err
    58  	}
    59  	return unquoteC(s[1:len(s)-1], quote)
    60  }
    61  
    62  func unquoteRaw(s string) (string, error) {
    63  	_, err := quoteRune(s) // Trigger validation, which guarantees this is a quote-wrapped string.
    64  	if err != nil {
    65  		return "", err
    66  	}
    67  	return s[1 : len(s)-1], nil
    68  }
    69  
    70  var (
    71  	errBadUTF8 = errors.New("bad UTF-8")
    72  )
    73  
    74  func unquoteC(s string, quote rune) (string, error) {
    75  	// Copied from third_party/golang/protobuf/proto/text_parser.go
    76  
    77  	// This is based on C++'s tokenizer.cc.
    78  	// Despite its name, this is *not* parsing C syntax.
    79  	// For instance, "\0" is an invalid quoted string.
    80  
    81  	// Avoid allocation in trivial cases.
    82  	simple := true
    83  	for _, r := range s {
    84  		if r == '\\' || r == quote {
    85  			simple = false
    86  			break
    87  		}
    88  	}
    89  	if simple {
    90  		return s, nil
    91  	}
    92  
    93  	buf := make([]byte, 0, 3*len(s)/2)
    94  	for len(s) > 0 {
    95  		r, n := utf8.DecodeRuneInString(s)
    96  		if r == utf8.RuneError && n == 1 {
    97  			return "", errBadUTF8
    98  		}
    99  		s = s[n:]
   100  		if r != '\\' {
   101  			if r < utf8.RuneSelf {
   102  				buf = append(buf, byte(r))
   103  			} else {
   104  				buf = append(buf, string(r)...)
   105  			}
   106  			continue
   107  		}
   108  
   109  		ch, tail, err := unescape(s)
   110  		if err != nil {
   111  			return "", err
   112  		}
   113  		buf = append(buf, ch...)
   114  		s = tail
   115  	}
   116  	return string(buf), nil
   117  }
   118  
   119  func unescape(s string) (ch string, tail string, err error) {
   120  	// Copied from third_party/golang/protobuf/proto/text_parser.go
   121  
   122  	r, n := utf8.DecodeRuneInString(s)
   123  	if r == utf8.RuneError && n == 1 {
   124  		return "", "", errBadUTF8
   125  	}
   126  	s = s[n:]
   127  	switch r {
   128  	case 'a':
   129  		return "\a", s, nil
   130  	case 'b':
   131  		return "\b", s, nil
   132  	case 'f':
   133  		return "\f", s, nil
   134  	case 'n':
   135  		return "\n", s, nil
   136  	case 'r':
   137  		return "\r", s, nil
   138  	case 't':
   139  		return "\t", s, nil
   140  	case 'v':
   141  		return "\v", s, nil
   142  	case '?':
   143  		return "?", s, nil // trigraph workaround
   144  	case '\'', '"', '\\':
   145  		return string(r), s, nil
   146  	case '0', '1', '2', '3', '4', '5', '6', '7':
   147  		if len(s) < 2 {
   148  			return "", "", fmt.Errorf(`\%c requires 2 following digits`, r)
   149  		}
   150  		ss := string(r) + s[:2]
   151  		s = s[2:]
   152  		i, err := strconv.ParseUint(ss, 8, 8)
   153  		if err != nil {
   154  			return "", "", fmt.Errorf(`\%s contains non-octal digits`, ss)
   155  		}
   156  		return string([]byte{byte(i)}), s, nil
   157  	case 'x', 'X', 'u', 'U':
   158  		var n int
   159  		switch r {
   160  		case 'x', 'X':
   161  			n = 2
   162  		case 'u':
   163  			n = 4
   164  		case 'U':
   165  			n = 8
   166  		}
   167  		if len(s) < n {
   168  			return "", "", fmt.Errorf(`\%c requires %d following digits`, r, n)
   169  		}
   170  		ss := s[:n]
   171  		s = s[n:]
   172  		i, err := strconv.ParseUint(ss, 16, 64)
   173  		if err != nil {
   174  			return "", "", fmt.Errorf(`\%c%s contains non-hexadecimal digits`, r, ss)
   175  		}
   176  		if r == 'x' || r == 'X' {
   177  			return string([]byte{byte(i)}), s, nil
   178  		}
   179  		if i > utf8.MaxRune {
   180  			return "", "", fmt.Errorf(`\%c%s is not a valid Unicode code point`, r, ss)
   181  		}
   182  		return strconv.FormatUint(i, 10), s, nil
   183  	}
   184  	return "", "", fmt.Errorf(`unknown escape \%c`, r)
   185  }
   186
View as plain text