1
2
3
4
5 package syntax
6
7
8
9 import (
10 "fmt"
11 "strconv"
12 "strings"
13 "unicode"
14 "unicode/utf8"
15 )
16
17
18 var unesc = [256]byte{
19 'a': '\a',
20 'b': '\b',
21 'f': '\f',
22 'n': '\n',
23 'r': '\r',
24 't': '\t',
25 'v': '\v',
26 '\\': '\\',
27 '\'': '\'',
28 '"': '"',
29 }
30
31
32 var esc = [256]byte{
33 '\a': 'a',
34 '\b': 'b',
35 '\f': 'f',
36 '\n': 'n',
37 '\r': 'r',
38 '\t': 't',
39 '\v': 'v',
40 '\\': '\\',
41 '\'': '\'',
42 '"': '"',
43 }
44
45
46
47
48 func unquote(quoted string) (s string, triple, isByte bool, err error) {
49
50 raw := false
51 if strings.HasPrefix(quoted, "r") {
52 raw = true
53 quoted = quoted[1:]
54 }
55
56 if strings.HasPrefix(quoted, "b") {
57 isByte = true
58 quoted = quoted[1:]
59 }
60
61 if len(quoted) < 2 {
62 err = fmt.Errorf("string literal too short")
63 return
64 }
65
66 if quoted[0] != '"' && quoted[0] != '\'' || quoted[0] != quoted[len(quoted)-1] {
67 err = fmt.Errorf("string literal has invalid quotes")
68 return
69 }
70
71
72 quote := quoted[0]
73 if len(quoted) >= 6 && quoted[1] == quote && quoted[2] == quote && quoted[:3] == quoted[len(quoted)-3:] {
74 triple = true
75 quoted = quoted[3 : len(quoted)-3]
76 } else {
77 quoted = quoted[1 : len(quoted)-1]
78 }
79
80
81
82
83 var unquoteChars string
84 if raw {
85 unquoteChars = "\r"
86 } else {
87 unquoteChars = "\\\r"
88 }
89 if !strings.ContainsAny(quoted, unquoteChars) {
90 s = quoted
91 return
92 }
93
94
95
96
97 buf := new(strings.Builder)
98 for {
99
100 i := strings.IndexAny(quoted, unquoteChars)
101 if i < 0 {
102 i = len(quoted)
103 }
104 buf.WriteString(quoted[:i])
105 quoted = quoted[i:]
106
107 if len(quoted) == 0 {
108 break
109 }
110
111
112 if quoted[0] == '\r' {
113 buf.WriteByte('\n')
114 if len(quoted) > 1 && quoted[1] == '\n' {
115 quoted = quoted[2:]
116 } else {
117 quoted = quoted[1:]
118 }
119 continue
120 }
121
122
123 if len(quoted) == 1 {
124 err = fmt.Errorf(`truncated escape sequence \`)
125 return
126 }
127
128 switch quoted[1] {
129 default:
130
131
132
133 err = fmt.Errorf("invalid escape sequence \\%c", quoted[1])
134 return
135
136 case '\n':
137
138 quoted = quoted[2:]
139
140 case 'a', 'b', 'f', 'n', 'r', 't', 'v', '\\', '\'', '"':
141
142
143
144 buf.WriteByte(unesc[quoted[1]])
145 quoted = quoted[2:]
146
147 case '0', '1', '2', '3', '4', '5', '6', '7':
148
149 n := int(quoted[1] - '0')
150 quoted = quoted[2:]
151 for i := 1; i < 3; i++ {
152 if len(quoted) == 0 || quoted[0] < '0' || '7' < quoted[0] {
153 break
154 }
155 n = n*8 + int(quoted[0]-'0')
156 quoted = quoted[1:]
157 }
158 if !isByte && n > 127 {
159 err = fmt.Errorf(`non-ASCII octal escape \%o (use \u%04X for the UTF-8 encoding of U+%04X)`, n, n, n)
160 return
161 }
162 if n >= 256 {
163
164
165
166 err = fmt.Errorf(`invalid escape sequence \%03o`, n)
167 return
168 }
169 buf.WriteByte(byte(n))
170
171 case 'x':
172
173 if len(quoted) < 4 {
174 err = fmt.Errorf(`truncated escape sequence %s`, quoted)
175 return
176 }
177 n, err1 := strconv.ParseUint(quoted[2:4], 16, 0)
178 if err1 != nil {
179 err = fmt.Errorf(`invalid escape sequence %s`, quoted[:4])
180 return
181 }
182 if !isByte && n > 127 {
183 err = fmt.Errorf(`non-ASCII hex escape %s (use \u%04X for the UTF-8 encoding of U+%04X)`,
184 quoted[:4], n, n)
185 return
186 }
187 buf.WriteByte(byte(n))
188 quoted = quoted[4:]
189
190 case 'u', 'U':
191
192 sz := 6
193 if quoted[1] == 'U' {
194 sz = 10
195 }
196 if len(quoted) < sz {
197 err = fmt.Errorf(`truncated escape sequence %s`, quoted)
198 return
199 }
200 n, err1 := strconv.ParseUint(quoted[2:sz], 16, 0)
201 if err1 != nil {
202 err = fmt.Errorf(`invalid escape sequence %s`, quoted[:sz])
203 return
204 }
205 if n > unicode.MaxRune {
206 err = fmt.Errorf(`code point out of range: %s (max \U%08x)`,
207 quoted[:sz], n)
208 return
209 }
210
211 if 0xD800 <= n && n < 0xE000 {
212 err = fmt.Errorf(`invalid Unicode code point U+%04X`, n)
213 return
214 }
215 buf.WriteRune(rune(n))
216 quoted = quoted[sz:]
217 }
218 }
219
220 s = buf.String()
221 return
222 }
223
224
225 func indexByte(s string, b byte) int {
226 for i := 0; i < len(s); i++ {
227 if s[i] == b {
228 return i
229 }
230 }
231 return -1
232 }
233
234
235
236 func Quote(s string, b bool) string {
237 const hex = "0123456789abcdef"
238 var runeTmp [utf8.UTFMax]byte
239
240 buf := make([]byte, 0, 3*len(s)/2)
241 if b {
242 buf = append(buf, 'b')
243 }
244 buf = append(buf, '"')
245 for width := 0; len(s) > 0; s = s[width:] {
246 r := rune(s[0])
247 width = 1
248 if r >= utf8.RuneSelf {
249 r, width = utf8.DecodeRuneInString(s)
250 }
251 if width == 1 && r == utf8.RuneError {
252
253
254
255 buf = append(buf, `\x`...)
256 buf = append(buf, hex[s[0]>>4])
257 buf = append(buf, hex[s[0]&0xF])
258 continue
259 }
260 if r == '"' || r == '\\' {
261 buf = append(buf, '\\')
262 buf = append(buf, byte(r))
263 continue
264 }
265 if strconv.IsPrint(r) {
266 n := utf8.EncodeRune(runeTmp[:], r)
267 buf = append(buf, runeTmp[:n]...)
268 continue
269 }
270 switch r {
271 case '\a':
272 buf = append(buf, `\a`...)
273 case '\b':
274 buf = append(buf, `\b`...)
275 case '\f':
276 buf = append(buf, `\f`...)
277 case '\n':
278 buf = append(buf, `\n`...)
279 case '\r':
280 buf = append(buf, `\r`...)
281 case '\t':
282 buf = append(buf, `\t`...)
283 case '\v':
284 buf = append(buf, `\v`...)
285 default:
286 switch {
287 case r < ' ' || r == 0x7f:
288 buf = append(buf, `\x`...)
289 buf = append(buf, hex[byte(r)>>4])
290 buf = append(buf, hex[byte(r)&0xF])
291 case r > utf8.MaxRune:
292 r = 0xFFFD
293 fallthrough
294 case r < 0x10000:
295 buf = append(buf, `\u`...)
296 for s := 12; s >= 0; s -= 4 {
297 buf = append(buf, hex[r>>uint(s)&0xF])
298 }
299 default:
300 buf = append(buf, `\U`...)
301 for s := 28; s >= 0; s -= 4 {
302 buf = append(buf, hex[r>>uint(s)&0xF])
303 }
304 }
305 }
306 }
307 buf = append(buf, '"')
308 return string(buf)
309 }
310
View as plain text