1
2 package unquote
3
4 import (
5 "errors"
6 "fmt"
7 "strconv"
8 "strings"
9 "unicode/utf8"
10
11 "github.com/protocolbuffers/txtpbfmt/ast"
12 )
13
14
15
16 func Unquote(n *ast.Node) (string, error) {
17 return unquoteValues(n.Values, unquote)
18 }
19
20
21
22 func Raw(n *ast.Node) (string, error) {
23 return unquoteValues(n.Values, unquoteRaw)
24 }
25
26 func unquoteValues(values []*ast.Value, unquoter func(string) (string, error)) (string, error) {
27 var ret strings.Builder
28 for _, v := range values {
29 uq, err := unquoter(v.Value)
30 if err != nil {
31 return "", err
32 }
33 ret.WriteString(uq)
34 }
35 return ret.String(), nil
36 }
37
38
39
40 func quoteRune(s string) (rune, error) {
41 if len(s) < 2 {
42 return 0, errors.New("not a quoted string")
43 }
44 quote := s[0]
45 if quote != '"' && quote != '\'' {
46 return 0, fmt.Errorf("invalid quote character %s", string(quote))
47 }
48 if s[len(s)-1] != quote {
49 return 0, errors.New("unmatched quote")
50 }
51 return rune(quote), nil
52 }
53
54 func unquote(s string) (string, error) {
55 quote, err := quoteRune(s)
56 if err != nil {
57 return "", err
58 }
59 return unquoteC(s[1:len(s)-1], quote)
60 }
61
62 func unquoteRaw(s string) (string, error) {
63 _, err := quoteRune(s)
64 if err != nil {
65 return "", err
66 }
67 return s[1 : len(s)-1], nil
68 }
69
70 var (
71 errBadUTF8 = errors.New("bad UTF-8")
72 )
73
74 func unquoteC(s string, quote rune) (string, error) {
75
76
77
78
79
80
81
82 simple := true
83 for _, r := range s {
84 if r == '\\' || r == quote {
85 simple = false
86 break
87 }
88 }
89 if simple {
90 return s, nil
91 }
92
93 buf := make([]byte, 0, 3*len(s)/2)
94 for len(s) > 0 {
95 r, n := utf8.DecodeRuneInString(s)
96 if r == utf8.RuneError && n == 1 {
97 return "", errBadUTF8
98 }
99 s = s[n:]
100 if r != '\\' {
101 if r < utf8.RuneSelf {
102 buf = append(buf, byte(r))
103 } else {
104 buf = append(buf, string(r)...)
105 }
106 continue
107 }
108
109 ch, tail, err := unescape(s)
110 if err != nil {
111 return "", err
112 }
113 buf = append(buf, ch...)
114 s = tail
115 }
116 return string(buf), nil
117 }
118
119 func unescape(s string) (ch string, tail string, err error) {
120
121
122 r, n := utf8.DecodeRuneInString(s)
123 if r == utf8.RuneError && n == 1 {
124 return "", "", errBadUTF8
125 }
126 s = s[n:]
127 switch r {
128 case 'a':
129 return "\a", s, nil
130 case 'b':
131 return "\b", s, nil
132 case 'f':
133 return "\f", s, nil
134 case 'n':
135 return "\n", s, nil
136 case 'r':
137 return "\r", s, nil
138 case 't':
139 return "\t", s, nil
140 case 'v':
141 return "\v", s, nil
142 case '?':
143 return "?", s, nil
144 case '\'', '"', '\\':
145 return string(r), s, nil
146 case '0', '1', '2', '3', '4', '5', '6', '7':
147 if len(s) < 2 {
148 return "", "", fmt.Errorf(`\%c requires 2 following digits`, r)
149 }
150 ss := string(r) + s[:2]
151 s = s[2:]
152 i, err := strconv.ParseUint(ss, 8, 8)
153 if err != nil {
154 return "", "", fmt.Errorf(`\%s contains non-octal digits`, ss)
155 }
156 return string([]byte{byte(i)}), s, nil
157 case 'x', 'X', 'u', 'U':
158 var n int
159 switch r {
160 case 'x', 'X':
161 n = 2
162 case 'u':
163 n = 4
164 case 'U':
165 n = 8
166 }
167 if len(s) < n {
168 return "", "", fmt.Errorf(`\%c requires %d following digits`, r, n)
169 }
170 ss := s[:n]
171 s = s[n:]
172 i, err := strconv.ParseUint(ss, 16, 64)
173 if err != nil {
174 return "", "", fmt.Errorf(`\%c%s contains non-hexadecimal digits`, r, ss)
175 }
176 if r == 'x' || r == 'X' {
177 return string([]byte{byte(i)}), s, nil
178 }
179 if i > utf8.MaxRune {
180 return "", "", fmt.Errorf(`\%c%s is not a valid Unicode code point`, r, ss)
181 }
182 return strconv.FormatUint(i, 10), s, nil
183 }
184 return "", "", fmt.Errorf(`unknown escape \%c`, r)
185 }
186
View as plain text