1 package lexer
2
3 import (
4 "bytes"
5 "unicode/utf8"
6
7 "github.com/vektah/gqlparser/ast"
8 "github.com/vektah/gqlparser/gqlerror"
9 )
10
11
12 type Lexer struct {
13 *ast.Source
14
15 start int
16
17 startRunes int
18
19 end int
20
21 endRunes int
22
23 line int
24
25 lineStartRunes int
26 }
27
28 func New(src *ast.Source) Lexer {
29 return Lexer{
30 Source: src,
31 line: 1,
32 }
33 }
34
35
36 func (s *Lexer) peek() (rune, int) {
37 return utf8.DecodeRuneInString(s.Input[s.end:])
38 }
39
40 func (s *Lexer) makeToken(kind Type) (Token, *gqlerror.Error) {
41 return s.makeValueToken(kind, s.Input[s.start:s.end])
42 }
43
44 func (s *Lexer) makeValueToken(kind Type, value string) (Token, *gqlerror.Error) {
45 return Token{
46 Kind: kind,
47 Value: value,
48 Pos: ast.Position{
49 Start: s.startRunes,
50 End: s.endRunes,
51 Line: s.line,
52 Column: s.startRunes - s.lineStartRunes + 1,
53 Src: s.Source,
54 },
55 }, nil
56 }
57
58 func (s *Lexer) makeError(format string, args ...interface{}) (Token, *gqlerror.Error) {
59 column := s.endRunes - s.lineStartRunes + 1
60 return Token{
61 Kind: Invalid,
62 Pos: ast.Position{
63 Start: s.startRunes,
64 End: s.endRunes,
65 Line: s.line,
66 Column: column,
67 Src: s.Source,
68 },
69 }, gqlerror.ErrorLocf(s.Source.Name, s.line, column, format, args...)
70 }
71
72
73
74
75
76
77 func (s *Lexer) ReadToken() (token Token, err *gqlerror.Error) {
78
79 s.ws()
80 s.start = s.end
81 s.startRunes = s.endRunes
82
83 if s.end >= len(s.Input) {
84 return s.makeToken(EOF)
85 }
86 r := s.Input[s.start]
87 s.end++
88 s.endRunes++
89 switch r {
90 case '!':
91 return s.makeValueToken(Bang, "")
92
93 case '$':
94 return s.makeValueToken(Dollar, "")
95 case '&':
96 return s.makeValueToken(Amp, "")
97 case '(':
98 return s.makeValueToken(ParenL, "")
99 case ')':
100 return s.makeValueToken(ParenR, "")
101 case '.':
102 if len(s.Input) > s.start+2 && s.Input[s.start:s.start+3] == "..." {
103 s.end += 2
104 s.endRunes += 2
105 return s.makeValueToken(Spread, "")
106 }
107 case ':':
108 return s.makeValueToken(Colon, "")
109 case '=':
110 return s.makeValueToken(Equals, "")
111 case '@':
112 return s.makeValueToken(At, "")
113 case '[':
114 return s.makeValueToken(BracketL, "")
115 case ']':
116 return s.makeValueToken(BracketR, "")
117 case '{':
118 return s.makeValueToken(BraceL, "")
119 case '}':
120 return s.makeValueToken(BraceR, "")
121 case '|':
122 return s.makeValueToken(Pipe, "")
123 case '#':
124 s.readComment()
125 return s.ReadToken()
126
127 case '_', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z':
128 return s.readName()
129
130 case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
131 return s.readNumber()
132
133 case '"':
134 if len(s.Input) > s.start+2 && s.Input[s.start:s.start+3] == `"""` {
135 return s.readBlockString()
136 }
137
138 return s.readString()
139 }
140
141 s.end--
142 s.endRunes--
143
144 if r < 0x0020 && r != 0x0009 && r != 0x000a && r != 0x000d {
145 return s.makeError(`Cannot contain the invalid character "\u%04d"`, r)
146 }
147
148 if r == '\'' {
149 return s.makeError(`Unexpected single quote character ('), did you mean to use a double quote (")?`)
150 }
151
152 return s.makeError(`Cannot parse the unexpected character "%s".`, string(r))
153 }
154
155
156
157 func (s *Lexer) ws() {
158 for s.end < len(s.Input) {
159 switch s.Input[s.end] {
160 case '\t', ' ', ',':
161 s.end++
162 s.endRunes++
163 case '\n':
164 s.end++
165 s.endRunes++
166 s.line++
167 s.lineStartRunes = s.endRunes
168 case '\r':
169 s.end++
170 s.endRunes++
171 s.line++
172 s.lineStartRunes = s.endRunes
173
174 if s.end < len(s.Input) && s.Input[s.end] == '\n' {
175 s.end++
176 s.endRunes++
177 }
178
179 case 0xef:
180 if s.end+2 < len(s.Input) && s.Input[s.end+1] == 0xBB && s.Input[s.end+2] == 0xBF {
181 s.end += 3
182 s.endRunes++
183 } else {
184 return
185 }
186 default:
187 return
188 }
189 }
190 }
191
192
193
194
195 func (s *Lexer) readComment() (Token, *gqlerror.Error) {
196 for s.end < len(s.Input) {
197 r, w := s.peek()
198
199
200 if r > 0x001f || r == '\t' {
201 s.end += w
202 s.endRunes++
203 } else {
204 break
205 }
206 }
207
208 return s.makeToken(Comment)
209 }
210
211
212
213
214
215
216 func (s *Lexer) readNumber() (Token, *gqlerror.Error) {
217 float := false
218
219
220 s.end--
221 s.endRunes--
222
223 s.acceptByte('-')
224
225 if s.acceptByte('0') {
226 if consumed := s.acceptDigits(); consumed != 0 {
227 s.end -= consumed
228 s.endRunes -= consumed
229 return s.makeError("Invalid number, unexpected digit after 0: %s.", s.describeNext())
230 }
231 } else {
232 if consumed := s.acceptDigits(); consumed == 0 {
233 return s.makeError("Invalid number, expected digit but got: %s.", s.describeNext())
234 }
235 }
236
237 if s.acceptByte('.') {
238 float = true
239
240 if consumed := s.acceptDigits(); consumed == 0 {
241 return s.makeError("Invalid number, expected digit but got: %s.", s.describeNext())
242 }
243 }
244
245 if s.acceptByte('e', 'E') {
246 float = true
247
248 s.acceptByte('-', '+')
249
250 if consumed := s.acceptDigits(); consumed == 0 {
251 return s.makeError("Invalid number, expected digit but got: %s.", s.describeNext())
252 }
253 }
254
255 if float {
256 return s.makeToken(Float)
257 } else {
258 return s.makeToken(Int)
259 }
260 }
261
262
263 func (s *Lexer) acceptByte(bytes ...uint8) bool {
264 if s.end >= len(s.Input) {
265 return false
266 }
267
268 for _, accepted := range bytes {
269 if s.Input[s.end] == accepted {
270 s.end++
271 s.endRunes++
272 return true
273 }
274 }
275 return false
276 }
277
278
279 func (s *Lexer) acceptDigits() int {
280 consumed := 0
281 for s.end < len(s.Input) && s.Input[s.end] >= '0' && s.Input[s.end] <= '9' {
282 s.end++
283 s.endRunes++
284 consumed++
285 }
286
287 return consumed
288 }
289
290
291
292 func (s *Lexer) describeNext() string {
293 if s.end < len(s.Input) {
294 return `"` + string(s.Input[s.end]) + `"`
295 }
296 return "<EOF>"
297 }
298
299
300
301
302 func (s *Lexer) readString() (Token, *gqlerror.Error) {
303 inputLen := len(s.Input)
304
305
306 var buf *bytes.Buffer
307
308
309 s.start++
310 s.startRunes++
311
312 for s.end < inputLen {
313 r := s.Input[s.end]
314 if r == '\n' || r == '\r' {
315 break
316 }
317 if r < 0x0020 && r != '\t' {
318 return s.makeError(`Invalid character within String: "\u%04d".`, r)
319 }
320 switch r {
321 default:
322 var char = rune(r)
323 var w = 1
324
325
326 if r >= 127 {
327 char, w = utf8.DecodeRuneInString(s.Input[s.end:])
328 }
329 s.end += w
330 s.endRunes++
331
332 if buf != nil {
333 buf.WriteRune(char)
334 }
335
336 case '"':
337 t, err := s.makeToken(String)
338
339 t.Pos.Start--
340 t.Pos.End++
341
342 if buf != nil {
343 t.Value = buf.String()
344 }
345
346
347 s.end++
348 s.endRunes++
349
350 return t, err
351
352 case '\\':
353 if s.end+1 >= inputLen {
354 s.end++
355 s.endRunes++
356 return s.makeError(`Invalid character escape sequence.`)
357 }
358
359 if buf == nil {
360 buf = bytes.NewBufferString(s.Input[s.start:s.end])
361 }
362
363 escape := s.Input[s.end+1]
364
365 if escape == 'u' {
366 if s.end+6 >= inputLen {
367 s.end++
368 s.endRunes++
369 return s.makeError("Invalid character escape sequence: \\%s.", s.Input[s.end:])
370 }
371
372 r, ok := unhex(s.Input[s.end+2 : s.end+6])
373 if !ok {
374 s.end++
375 s.endRunes++
376 return s.makeError("Invalid character escape sequence: \\%s.", s.Input[s.end:s.end+5])
377 }
378 buf.WriteRune(r)
379 s.end += 6
380 s.endRunes += 6
381 } else {
382 switch escape {
383 case '"', '/', '\\':
384 buf.WriteByte(escape)
385 case 'b':
386 buf.WriteByte('\b')
387 case 'f':
388 buf.WriteByte('\f')
389 case 'n':
390 buf.WriteByte('\n')
391 case 'r':
392 buf.WriteByte('\r')
393 case 't':
394 buf.WriteByte('\t')
395 default:
396 s.end += 1
397 s.endRunes += 1
398 return s.makeError("Invalid character escape sequence: \\%s.", string(escape))
399 }
400 s.end += 2
401 s.endRunes += 2
402 }
403 }
404 }
405
406 return s.makeError("Unterminated string.")
407 }
408
409
410
411
412 func (s *Lexer) readBlockString() (Token, *gqlerror.Error) {
413 inputLen := len(s.Input)
414
415 var buf bytes.Buffer
416
417
418 s.start += 3
419 s.startRunes += 3
420 s.end += 2
421 s.endRunes += 2
422
423 for s.end < inputLen {
424 r := s.Input[s.end]
425
426
427 if r == '"' && s.end+3 <= inputLen && s.Input[s.end:s.end+3] == `"""` {
428 t, err := s.makeValueToken(BlockString, blockStringValue(buf.String()))
429
430
431 t.Pos.Start -= 3
432 t.Pos.End += 3
433
434
435 s.end += 3
436 s.endRunes += 3
437
438 return t, err
439 }
440
441
442 if r < 0x0020 && r != '\t' && r != '\n' && r != '\r' {
443 return s.makeError(`Invalid character within String: "\u%04d".`, r)
444 }
445
446 if r == '\\' && s.end+4 <= inputLen && s.Input[s.end:s.end+4] == `\"""` {
447 buf.WriteString(`"""`)
448 s.end += 4
449 s.endRunes += 4
450 } else if r == '\r' {
451 if s.end+1 < inputLen && s.Input[s.end+1] == '\n' {
452 s.end++
453 s.endRunes++
454 }
455
456 buf.WriteByte('\n')
457 s.end++
458 s.endRunes++
459 } else {
460 var char = rune(r)
461 var w = 1
462
463
464 if r >= 127 {
465 char, w = utf8.DecodeRuneInString(s.Input[s.end:])
466 }
467 s.end += w
468 s.endRunes++
469 buf.WriteRune(char)
470 }
471 }
472
473 return s.makeError("Unterminated string.")
474 }
475
476 func unhex(b string) (v rune, ok bool) {
477 for _, c := range b {
478 v <<= 4
479 switch {
480 case '0' <= c && c <= '9':
481 v |= c - '0'
482 case 'a' <= c && c <= 'f':
483 v |= c - 'a' + 10
484 case 'A' <= c && c <= 'F':
485 v |= c - 'A' + 10
486 default:
487 return 0, false
488 }
489 }
490
491 return v, true
492 }
493
494
495
496
497 func (s *Lexer) readName() (Token, *gqlerror.Error) {
498 for s.end < len(s.Input) {
499 r, w := s.peek()
500
501 if (r >= '0' && r <= '9') || (r >= 'A' && r <= 'Z') || (r >= 'a' && r <= 'z') || r == '_' {
502 s.end += w
503 s.endRunes++
504 } else {
505 break
506 }
507 }
508
509 return s.makeToken(Name)
510 }
511
View as plain text