...
1 package scanner
2
3 import (
4 "bytes"
5 "fmt"
6 "os"
7 "unicode"
8 "unicode/utf8"
9
10 "github.com/hashicorp/hcl/json/token"
11 )
12
13
14 const eof = rune(0)
15
16
17 type Scanner struct {
18 buf *bytes.Buffer
19 src []byte
20
21
22 srcPos token.Pos
23 prevPos token.Pos
24
25 lastCharLen int
26 lastLineLen int
27
28 tokStart int
29 tokEnd int
30
31
32
33 Error func(pos token.Pos, msg string)
34
35
36 ErrorCount int
37
38
39
40
41
42 tokPos token.Pos
43 }
44
45
46
47 func New(src []byte) *Scanner {
48
49
50
51 b := bytes.NewBuffer(src)
52 s := &Scanner{
53 buf: b,
54 src: src,
55 }
56
57
58 s.srcPos.Line = 1
59 return s
60 }
61
62
63
64 func (s *Scanner) next() rune {
65 ch, size, err := s.buf.ReadRune()
66 if err != nil {
67
68 s.srcPos.Column++
69 s.srcPos.Offset += size
70 s.lastCharLen = size
71 return eof
72 }
73
74 if ch == utf8.RuneError && size == 1 {
75 s.srcPos.Column++
76 s.srcPos.Offset += size
77 s.lastCharLen = size
78 s.err("illegal UTF-8 encoding")
79 return ch
80 }
81
82
83 s.prevPos = s.srcPos
84
85 s.srcPos.Column++
86 s.lastCharLen = size
87 s.srcPos.Offset += size
88
89 if ch == '\n' {
90 s.srcPos.Line++
91 s.lastLineLen = s.srcPos.Column
92 s.srcPos.Column = 0
93 }
94
95
96
97 return ch
98 }
99
100
101 func (s *Scanner) unread() {
102 if err := s.buf.UnreadRune(); err != nil {
103 panic(err)
104 }
105 s.srcPos = s.prevPos
106 }
107
108
109 func (s *Scanner) peek() rune {
110 peek, _, err := s.buf.ReadRune()
111 if err != nil {
112 return eof
113 }
114
115 s.buf.UnreadRune()
116 return peek
117 }
118
119
120 func (s *Scanner) Scan() token.Token {
121 ch := s.next()
122
123
124 for isWhitespace(ch) {
125 ch = s.next()
126 }
127
128 var tok token.Type
129
130
131 s.tokStart = s.srcPos.Offset - s.lastCharLen
132
133
134
135 s.tokPos.Offset = s.srcPos.Offset - s.lastCharLen
136 if s.srcPos.Column > 0 {
137
138 s.tokPos.Line = s.srcPos.Line
139 s.tokPos.Column = s.srcPos.Column
140 } else {
141
142
143
144 s.tokPos.Line = s.srcPos.Line - 1
145 s.tokPos.Column = s.lastLineLen
146 }
147
148 switch {
149 case isLetter(ch):
150 lit := s.scanIdentifier()
151 if lit == "true" || lit == "false" {
152 tok = token.BOOL
153 } else if lit == "null" {
154 tok = token.NULL
155 } else {
156 s.err("illegal char")
157 }
158 case isDecimal(ch):
159 tok = s.scanNumber(ch)
160 default:
161 switch ch {
162 case eof:
163 tok = token.EOF
164 case '"':
165 tok = token.STRING
166 s.scanString()
167 case '.':
168 tok = token.PERIOD
169 ch = s.peek()
170 if isDecimal(ch) {
171 tok = token.FLOAT
172 ch = s.scanMantissa(ch)
173 ch = s.scanExponent(ch)
174 }
175 case '[':
176 tok = token.LBRACK
177 case ']':
178 tok = token.RBRACK
179 case '{':
180 tok = token.LBRACE
181 case '}':
182 tok = token.RBRACE
183 case ',':
184 tok = token.COMMA
185 case ':':
186 tok = token.COLON
187 case '-':
188 if isDecimal(s.peek()) {
189 ch := s.next()
190 tok = s.scanNumber(ch)
191 } else {
192 s.err("illegal char")
193 }
194 default:
195 s.err("illegal char: " + string(ch))
196 }
197 }
198
199
200 s.tokEnd = s.srcPos.Offset
201
202
203 var tokenText string
204 if s.tokStart >= 0 {
205 tokenText = string(s.src[s.tokStart:s.tokEnd])
206 }
207 s.tokStart = s.tokEnd
208
209 return token.Token{
210 Type: tok,
211 Pos: s.tokPos,
212 Text: tokenText,
213 }
214 }
215
216
217 func (s *Scanner) scanNumber(ch rune) token.Type {
218 zero := ch == '0'
219 pos := s.srcPos
220
221 s.scanMantissa(ch)
222 ch = s.next()
223 if ch == 'e' || ch == 'E' {
224 ch = s.scanExponent(ch)
225 return token.FLOAT
226 }
227
228 if ch == '.' {
229 ch = s.scanFraction(ch)
230 if ch == 'e' || ch == 'E' {
231 ch = s.next()
232 ch = s.scanExponent(ch)
233 }
234 return token.FLOAT
235 }
236
237 if ch != eof {
238 s.unread()
239 }
240
241
242 if zero && pos != s.srcPos {
243 s.err("numbers cannot start with 0")
244 }
245
246 return token.NUMBER
247 }
248
249
250
251 func (s *Scanner) scanMantissa(ch rune) rune {
252 scanned := false
253 for isDecimal(ch) {
254 ch = s.next()
255 scanned = true
256 }
257
258 if scanned && ch != eof {
259 s.unread()
260 }
261 return ch
262 }
263
264
265 func (s *Scanner) scanFraction(ch rune) rune {
266 if ch == '.' {
267 ch = s.peek()
268 ch = s.scanMantissa(ch)
269 }
270 return ch
271 }
272
273
274
275 func (s *Scanner) scanExponent(ch rune) rune {
276 if ch == 'e' || ch == 'E' {
277 ch = s.next()
278 if ch == '-' || ch == '+' {
279 ch = s.next()
280 }
281 ch = s.scanMantissa(ch)
282 }
283 return ch
284 }
285
286
287 func (s *Scanner) scanString() {
288 braces := 0
289 for {
290
291
292 ch := s.next()
293
294 if ch == '\n' || ch < 0 || ch == eof {
295 s.err("literal not terminated")
296 return
297 }
298
299 if ch == '"' {
300 break
301 }
302
303
304 if braces == 0 && ch == '$' && s.peek() == '{' {
305 braces++
306 s.next()
307 } else if braces > 0 && ch == '{' {
308 braces++
309 }
310 if braces > 0 && ch == '}' {
311 braces--
312 }
313
314 if ch == '\\' {
315 s.scanEscape()
316 }
317 }
318
319 return
320 }
321
322
323 func (s *Scanner) scanEscape() rune {
324
325 ch := s.next()
326 switch ch {
327 case 'a', 'b', 'f', 'n', 'r', 't', 'v', '\\', '"':
328
329 case '0', '1', '2', '3', '4', '5', '6', '7':
330
331 ch = s.scanDigits(ch, 8, 3)
332 case 'x':
333
334 ch = s.scanDigits(s.next(), 16, 2)
335 case 'u':
336
337 ch = s.scanDigits(s.next(), 16, 4)
338 case 'U':
339
340 ch = s.scanDigits(s.next(), 16, 8)
341 default:
342 s.err("illegal char escape")
343 }
344 return ch
345 }
346
347
348
349 func (s *Scanner) scanDigits(ch rune, base, n int) rune {
350 for n > 0 && digitVal(ch) < base {
351 ch = s.next()
352 n--
353 }
354 if n > 0 {
355 s.err("illegal char escape")
356 }
357
358
359 s.unread()
360 return ch
361 }
362
363
364 func (s *Scanner) scanIdentifier() string {
365 offs := s.srcPos.Offset - s.lastCharLen
366 ch := s.next()
367 for isLetter(ch) || isDigit(ch) || ch == '-' {
368 ch = s.next()
369 }
370
371 if ch != eof {
372 s.unread()
373 }
374
375 return string(s.src[offs:s.srcPos.Offset])
376 }
377
378
379
380 func (s *Scanner) recentPosition() (pos token.Pos) {
381 pos.Offset = s.srcPos.Offset - s.lastCharLen
382 switch {
383 case s.srcPos.Column > 0:
384
385 pos.Line = s.srcPos.Line
386 pos.Column = s.srcPos.Column
387 case s.lastLineLen > 0:
388
389
390
391 pos.Line = s.srcPos.Line - 1
392 pos.Column = s.lastLineLen
393 default:
394
395 pos.Line = 1
396 pos.Column = 1
397 }
398 return
399 }
400
401
402
403 func (s *Scanner) err(msg string) {
404 s.ErrorCount++
405 pos := s.recentPosition()
406
407 if s.Error != nil {
408 s.Error(pos, msg)
409 return
410 }
411
412 fmt.Fprintf(os.Stderr, "%s: %s\n", pos, msg)
413 }
414
415
416 func isLetter(ch rune) bool {
417 return 'a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z' || ch == '_' || ch >= 0x80 && unicode.IsLetter(ch)
418 }
419
420
421 func isDigit(ch rune) bool {
422 return '0' <= ch && ch <= '9' || ch >= 0x80 && unicode.IsDigit(ch)
423 }
424
425
426 func isDecimal(ch rune) bool {
427 return '0' <= ch && ch <= '9'
428 }
429
430
431 func isHexadecimal(ch rune) bool {
432 return '0' <= ch && ch <= '9' || 'a' <= ch && ch <= 'f' || 'A' <= ch && ch <= 'F'
433 }
434
435
436 func isWhitespace(ch rune) bool {
437 return ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r'
438 }
439
440
441 func digitVal(ch rune) int {
442 switch {
443 case '0' <= ch && ch <= '9':
444 return int(ch - '0')
445 case 'a' <= ch && ch <= 'f':
446 return int(ch - 'a' + 10)
447 case 'A' <= ch && ch <= 'F':
448 return int(ch - 'A' + 10)
449 }
450 return 16
451 }
452
View as plain text