1
2
3
4
5
6
7
8
9
10
11
12
13
14
15 package literal
16
17 import (
18 "errors"
19 "strings"
20 "unicode"
21 "unicode/utf8"
22 )
23
24 var (
25 errSyntax = errors.New("invalid syntax")
26 errInvalidWhitespace = errors.New("invalid string: invalid whitespace")
27 errMissingNewline = errors.New(
28 "invalid string: opening quote of multiline string must be followed by newline")
29 errUnmatchedQuote = errors.New("invalid string: unmatched quote")
30
31
32 errSurrogate = errors.New("unmatched surrogate pair")
33 errEscapedLastNewline = errors.New("last newline of multiline string cannot be escaped")
34 )
35
36
37
38
39 func Unquote(s string) (string, error) {
40 info, nStart, _, err := ParseQuotes(s, s)
41 if err != nil {
42 return "", err
43 }
44 s = s[nStart:]
45 return info.Unquote(s)
46 }
47
48
49 type QuoteInfo struct {
50 quote string
51 whitespace string
52 numHash int
53 multiline bool
54 char byte
55 numChar byte
56 }
57
58
59 func (q QuoteInfo) IsDouble() bool {
60 return q.char == '"'
61 }
62
63
64 func (q QuoteInfo) IsMulti() bool {
65 return q.multiline
66 }
67
68
69 func (q QuoteInfo) Whitespace() string {
70 return q.whitespace
71 }
72
73
74
75
76 func ParseQuotes(start, end string) (q QuoteInfo, nStart, nEnd int, err error) {
77 for i, c := range start {
78 if c != '#' {
79 break
80 }
81 q.numHash = i + 1
82 }
83 s := start[q.numHash:]
84 switch s[0] {
85 case '"', '\'':
86 q.char = s[0]
87 if len(s) > 3 && s[1] == s[0] && s[2] == s[0] {
88 switch s[3] {
89 case '\n':
90 q.quote = start[:3+q.numHash]
91 case '\r':
92 if len(s) > 4 && s[4] == '\n' {
93 q.quote = start[:4+q.numHash]
94 break
95 }
96 fallthrough
97 default:
98 return q, 0, 0, errMissingNewline
99 }
100 q.multiline = true
101 q.numChar = 3
102 nStart = len(q.quote) + 1
103 } else {
104 q.quote = start[:1+q.numHash]
105 q.numChar = 1
106 nStart = len(q.quote)
107 }
108 default:
109 return q, 0, 0, errSyntax
110 }
111 quote := start[:int(q.numChar)+q.numHash]
112 for i := 0; i < len(quote); i++ {
113 if j := len(end) - i - 1; j < 0 || quote[i] != end[j] {
114 return q, 0, 0, errUnmatchedQuote
115 }
116 }
117 if q.multiline {
118 i := len(end) - len(quote)
119 for i > 0 {
120 r, size := utf8.DecodeLastRuneInString(end[:i])
121 if r == '\n' || !unicode.IsSpace(r) {
122 break
123 }
124 i -= size
125 }
126 q.whitespace = end[i : len(end)-len(quote)]
127
128 if len(start) > nStart && start[nStart] != '\n' {
129 if !strings.HasPrefix(start[nStart:], q.whitespace) {
130 return q, 0, 0, errInvalidWhitespace
131 }
132 nStart += len(q.whitespace)
133 }
134 }
135
136 return q, nStart, int(q.numChar) + q.numHash, nil
137 }
138
139
140
141
142
143 func (q QuoteInfo) Unquote(s string) (string, error) {
144 if len(s) > 0 && !q.multiline {
145 if contains(s, '\n') || contains(s, '\r') {
146 return "", errSyntax
147 }
148
149
150 if s[len(s)-1] == q.char && q.numHash == 0 {
151 if s := s[:len(s)-1]; isSimple(s, rune(q.char)) {
152 return s, nil
153 }
154 }
155 }
156
157 buf := make([]byte, 0, 3*len(s)/2)
158 stripNL := false
159 wasEscapedNewline := false
160 for len(s) > 0 {
161 switch s[0] {
162 case '\r':
163 s = s[1:]
164 wasEscapedNewline = false
165 continue
166 case '\n':
167 var err error
168 s, err = skipWhitespaceAfterNewline(s[1:], q)
169 if err != nil {
170 return "", err
171 }
172 stripNL = true
173 wasEscapedNewline = false
174 buf = append(buf, '\n')
175 continue
176 }
177 c, multibyte, ss, err := unquoteChar(s, q)
178 if surHigh <= c && c < surEnd {
179 if c >= surLow {
180 return "", errSurrogate
181 }
182 var cl rune
183 cl, _, ss, err = unquoteChar(ss, q)
184 if cl < surLow || surEnd <= cl {
185 return "", errSurrogate
186 }
187 c = 0x10000 + (c-surHigh)*0x400 + (cl - surLow)
188 }
189
190 if err != nil {
191 return "", err
192 }
193
194 s = ss
195 if c < 0 {
196 switch c {
197 case escapedNewline:
198 var err error
199 s, err = skipWhitespaceAfterNewline(s, q)
200 if err != nil {
201 return "", err
202 }
203 wasEscapedNewline = true
204 continue
205 case terminatedByQuote:
206 if wasEscapedNewline {
207 return "", errEscapedLastNewline
208 }
209 if stripNL {
210
211
212 buf = buf[:len(buf)-1]
213 }
214 case terminatedByExpr:
215 default:
216 panic("unreachable")
217 }
218 return string(buf), nil
219 }
220 stripNL = false
221 wasEscapedNewline = false
222 if !multibyte {
223 buf = append(buf, byte(c))
224 } else {
225 buf = utf8.AppendRune(buf, c)
226 }
227 }
228
229 return "", errUnmatchedQuote
230 }
231
232 func skipWhitespaceAfterNewline(s string, q QuoteInfo) (string, error) {
233 switch {
234 case !q.multiline:
235
236
237 fallthrough
238 default:
239 return "", errInvalidWhitespace
240 case strings.HasPrefix(s, q.whitespace):
241 s = s[len(q.whitespace):]
242 case strings.HasPrefix(s, "\n"):
243 case strings.HasPrefix(s, "\r\n"):
244 }
245 return s, nil
246 }
247
248 const (
249 surHigh = 0xD800
250 surLow = 0xDC00
251 surEnd = 0xE000
252 )
253
254 func isSimple(s string, quote rune) bool {
255
256
257
258 for _, r := range s {
259 if r == quote || r == '\\' {
260 return false
261 }
262 if surHigh <= r && r < surEnd {
263 return false
264 }
265 }
266 return true
267 }
268
269
270 func contains(s string, c byte) bool {
271 for i := 0; i < len(s); i++ {
272 if s[i] == c {
273 return true
274 }
275 }
276 return false
277 }
278
279 const (
280 terminatedByQuote = rune(-1)
281 terminatedByExpr = rune(-2)
282 escapedNewline = rune(-3)
283 )
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304 func unquoteChar(s string, info QuoteInfo) (value rune, multibyte bool, tail string, err error) {
305
306 switch c := s[0]; {
307 case c == info.char && info.char != 0:
308 for i := 1; byte(i) < info.numChar; i++ {
309 if i >= len(s) || s[i] != info.char {
310 return rune(info.char), false, s[1:], nil
311 }
312 }
313 for i := 0; i < info.numHash; i++ {
314 if i+int(info.numChar) >= len(s) || s[i+int(info.numChar)] != '#' {
315 return rune(info.char), false, s[1:], nil
316 }
317 }
318 if ln := int(info.numChar) + info.numHash; len(s) != ln {
319
320 return 0, false, s[ln:], errSyntax
321 }
322 return terminatedByQuote, false, "", nil
323 case c >= utf8.RuneSelf:
324
325
326
327 r, size := utf8.DecodeRuneInString(s)
328 return r, true, s[size:], nil
329 case c != '\\':
330 return rune(s[0]), false, s[1:], nil
331 }
332
333 if len(s) <= 1+info.numHash {
334 return '\\', false, s[1:], nil
335 }
336 for i := 1; i <= info.numHash && i < len(s); i++ {
337 if s[i] != '#' {
338 return '\\', false, s[1:], nil
339 }
340 }
341
342 c := s[1+info.numHash]
343 s = s[2+info.numHash:]
344
345 switch c {
346 case 'a':
347 value = '\a'
348 case 'b':
349 value = '\b'
350 case 'f':
351 value = '\f'
352 case 'n':
353 value = '\n'
354 case 'r':
355 value = '\r'
356 case 't':
357 value = '\t'
358 case 'v':
359 value = '\v'
360 case '/':
361 value = '/'
362 case 'x', 'u', 'U':
363 n := 0
364 switch c {
365 case 'x':
366 n = 2
367 case 'u':
368 n = 4
369 case 'U':
370 n = 8
371 }
372 var v rune
373 if len(s) < n {
374 err = errSyntax
375 return
376 }
377 for j := 0; j < n; j++ {
378 x, ok := unhex(s[j])
379 if !ok {
380 err = errSyntax
381 return
382 }
383 v = v<<4 | x
384 }
385 s = s[n:]
386 if c == 'x' {
387 if info.char == '"' {
388 err = errSyntax
389 return
390 }
391
392 value = v
393 break
394 }
395 if v > utf8.MaxRune {
396 err = errSyntax
397 return
398 }
399 value = v
400 multibyte = true
401 case '0', '1', '2', '3', '4', '5', '6', '7':
402 if info.char == '"' {
403 err = errSyntax
404 return
405 }
406 v := rune(c) - '0'
407 if len(s) < 2 {
408 err = errSyntax
409 return
410 }
411 for j := 0; j < 2; j++ {
412 x := rune(s[j]) - '0'
413 if x < 0 || x > 7 {
414 err = errSyntax
415 return
416 }
417 v = (v << 3) | x
418 }
419 s = s[2:]
420 if v > 255 {
421 err = errSyntax
422 return
423 }
424 value = v
425 case '\\':
426 value = '\\'
427 case '\'', '"':
428
429 if c != info.char {
430 err = errSyntax
431 return
432 }
433 value = rune(c)
434 case '(':
435 if s != "" {
436
437 return 0, false, s, errSyntax
438 }
439 value = terminatedByExpr
440 case '\r':
441 if len(s) == 0 || s[0] != '\n' {
442 err = errSyntax
443 return
444 }
445 s = s[1:]
446 value = escapedNewline
447 case '\n':
448 value = escapedNewline
449 default:
450 err = errSyntax
451 return
452 }
453 tail = s
454 return
455 }
456
457 func unhex(b byte) (v rune, ok bool) {
458 c := rune(b)
459 switch {
460 case '0' <= c && c <= '9':
461 return c - '0', true
462 case 'a' <= c && c <= 'f':
463 return c - 'a' + 10, true
464 case 'A' <= c && c <= 'F':
465 return c - 'A' + 10, true
466 }
467 return
468 }
469
View as plain text