1 package lexer
2
3 import (
4 "bytes"
5 "unicode/utf8"
6
7 "github.com/vektah/gqlparser/v2/ast"
8 "github.com/vektah/gqlparser/v2/gqlerror"
9 )
10
11
12 type Lexer struct {
13 *ast.Source
14
15 start int
16
17 startRunes int
18
19 end int
20
21 endRunes int
22
23 line int
24
25 lineStartRunes int
26 }
27
28 func New(src *ast.Source) Lexer {
29 return Lexer{
30 Source: src,
31 line: 1,
32 }
33 }
34
35
36 func (s *Lexer) peek() (rune, int) {
37 return utf8.DecodeRuneInString(s.Input[s.end:])
38 }
39
40 func (s *Lexer) makeToken(kind Type) (Token, error) {
41 return s.makeValueToken(kind, s.Input[s.start:s.end])
42 }
43
44 func (s *Lexer) makeValueToken(kind Type, value string) (Token, error) {
45 return Token{
46 Kind: kind,
47 Value: value,
48 Pos: ast.Position{
49 Start: s.startRunes,
50 End: s.endRunes,
51 Line: s.line,
52 Column: s.startRunes - s.lineStartRunes + 1,
53 Src: s.Source,
54 },
55 }, nil
56 }
57
58 func (s *Lexer) makeError(format string, args ...interface{}) (Token, *gqlerror.Error) {
59 column := s.endRunes - s.lineStartRunes + 1
60 return Token{
61 Kind: Invalid,
62 Pos: ast.Position{
63 Start: s.startRunes,
64 End: s.endRunes,
65 Line: s.line,
66 Column: column,
67 Src: s.Source,
68 },
69 }, gqlerror.ErrorLocf(s.Source.Name, s.line, column, format, args...)
70 }
71
72
73
74
75
76
77 func (s *Lexer) ReadToken() (Token, error) {
78 s.ws()
79 s.start = s.end
80 s.startRunes = s.endRunes
81
82 if s.end >= len(s.Input) {
83 return s.makeToken(EOF)
84 }
85 r := s.Input[s.start]
86 s.end++
87 s.endRunes++
88 switch r {
89 case '!':
90 return s.makeValueToken(Bang, "")
91
92 case '$':
93 return s.makeValueToken(Dollar, "")
94 case '&':
95 return s.makeValueToken(Amp, "")
96 case '(':
97 return s.makeValueToken(ParenL, "")
98 case ')':
99 return s.makeValueToken(ParenR, "")
100 case '.':
101 if len(s.Input) > s.start+2 && s.Input[s.start:s.start+3] == "..." {
102 s.end += 2
103 s.endRunes += 2
104 return s.makeValueToken(Spread, "")
105 }
106 case ':':
107 return s.makeValueToken(Colon, "")
108 case '=':
109 return s.makeValueToken(Equals, "")
110 case '@':
111 return s.makeValueToken(At, "")
112 case '[':
113 return s.makeValueToken(BracketL, "")
114 case ']':
115 return s.makeValueToken(BracketR, "")
116 case '{':
117 return s.makeValueToken(BraceL, "")
118 case '}':
119 return s.makeValueToken(BraceR, "")
120 case '|':
121 return s.makeValueToken(Pipe, "")
122 case '#':
123 return s.readComment()
124
125 case '_', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z':
126 return s.readName()
127
128 case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
129 return s.readNumber()
130
131 case '"':
132 if len(s.Input) > s.start+2 && s.Input[s.start:s.start+3] == `"""` {
133 return s.readBlockString()
134 }
135
136 return s.readString()
137 }
138
139 s.end--
140 s.endRunes--
141
142 if r < 0x0020 && r != 0x0009 && r != 0x000a && r != 0x000d {
143 return s.makeError(`Cannot contain the invalid character "\u%04d"`, r)
144 }
145
146 if r == '\'' {
147 return s.makeError(`Unexpected single quote character ('), did you mean to use a double quote (")?`)
148 }
149
150 return s.makeError(`Cannot parse the unexpected character "%s".`, string(r))
151 }
152
153
154
155 func (s *Lexer) ws() {
156 for s.end < len(s.Input) {
157 switch s.Input[s.end] {
158 case '\t', ' ', ',':
159 s.end++
160 s.endRunes++
161 case '\n':
162 s.end++
163 s.endRunes++
164 s.line++
165 s.lineStartRunes = s.endRunes
166 case '\r':
167 s.end++
168 s.endRunes++
169 s.line++
170 s.lineStartRunes = s.endRunes
171
172 if s.end < len(s.Input) && s.Input[s.end] == '\n' {
173 s.end++
174 s.endRunes++
175 }
176
177 case 0xef:
178 if s.end+2 < len(s.Input) && s.Input[s.end+1] == 0xBB && s.Input[s.end+2] == 0xBF {
179 s.end += 3
180 s.endRunes++
181 } else {
182 return
183 }
184 default:
185 return
186 }
187 }
188 }
189
190
191
192
193 func (s *Lexer) readComment() (Token, error) {
194 for s.end < len(s.Input) {
195 r, w := s.peek()
196
197
198 if r > 0x001f || r == '\t' {
199 s.end += w
200 s.endRunes++
201 } else {
202 break
203 }
204 }
205
206 return s.makeToken(Comment)
207 }
208
209
210
211
212
213
214 func (s *Lexer) readNumber() (Token, error) {
215 float := false
216
217
218 s.end--
219 s.endRunes--
220
221 s.acceptByte('-')
222
223 if s.acceptByte('0') {
224 if consumed := s.acceptDigits(); consumed != 0 {
225 s.end -= consumed
226 s.endRunes -= consumed
227 return s.makeError("Invalid number, unexpected digit after 0: %s.", s.describeNext())
228 }
229 } else {
230 if consumed := s.acceptDigits(); consumed == 0 {
231 return s.makeError("Invalid number, expected digit but got: %s.", s.describeNext())
232 }
233 }
234
235 if s.acceptByte('.') {
236 float = true
237
238 if consumed := s.acceptDigits(); consumed == 0 {
239 return s.makeError("Invalid number, expected digit but got: %s.", s.describeNext())
240 }
241 }
242
243 if s.acceptByte('e', 'E') {
244 float = true
245
246 s.acceptByte('-', '+')
247
248 if consumed := s.acceptDigits(); consumed == 0 {
249 return s.makeError("Invalid number, expected digit but got: %s.", s.describeNext())
250 }
251 }
252
253 if float {
254 return s.makeToken(Float)
255 }
256 return s.makeToken(Int)
257
258 }
259
260
261 func (s *Lexer) acceptByte(bytes ...uint8) bool {
262 if s.end >= len(s.Input) {
263 return false
264 }
265
266 for _, accepted := range bytes {
267 if s.Input[s.end] == accepted {
268 s.end++
269 s.endRunes++
270 return true
271 }
272 }
273 return false
274 }
275
276
277 func (s *Lexer) acceptDigits() int {
278 consumed := 0
279 for s.end < len(s.Input) && s.Input[s.end] >= '0' && s.Input[s.end] <= '9' {
280 s.end++
281 s.endRunes++
282 consumed++
283 }
284
285 return consumed
286 }
287
288
289
290 func (s *Lexer) describeNext() string {
291 if s.end < len(s.Input) {
292 return `"` + string(s.Input[s.end]) + `"`
293 }
294 return "<EOF>"
295 }
296
297
298
299
300 func (s *Lexer) readString() (Token, error) {
301 inputLen := len(s.Input)
302
303
304 var buf *bytes.Buffer
305
306
307 s.start++
308 s.startRunes++
309
310 for s.end < inputLen {
311 r := s.Input[s.end]
312 if r == '\n' || r == '\r' {
313 break
314 }
315 if r < 0x0020 && r != '\t' {
316 return s.makeError(`Invalid character within String: "\u%04d".`, r)
317 }
318 switch r {
319 default:
320 var char = rune(r)
321 var w = 1
322
323
324 if r >= 127 {
325 char, w = utf8.DecodeRuneInString(s.Input[s.end:])
326 }
327 s.end += w
328 s.endRunes++
329
330 if buf != nil {
331 buf.WriteRune(char)
332 }
333
334 case '"':
335 t, err := s.makeToken(String)
336
337 t.Pos.Start--
338 t.Pos.End++
339
340 if buf != nil {
341 t.Value = buf.String()
342 }
343
344
345 s.end++
346 s.endRunes++
347
348 return t, err
349
350 case '\\':
351 if s.end+1 >= inputLen {
352 s.end++
353 s.endRunes++
354 return s.makeError(`Invalid character escape sequence.`)
355 }
356
357 if buf == nil {
358 buf = bytes.NewBufferString(s.Input[s.start:s.end])
359 }
360
361 escape := s.Input[s.end+1]
362
363 if escape == 'u' {
364 if s.end+6 >= inputLen {
365 s.end++
366 s.endRunes++
367 return s.makeError("Invalid character escape sequence: \\%s.", s.Input[s.end:])
368 }
369
370 r, ok := unhex(s.Input[s.end+2 : s.end+6])
371 if !ok {
372 s.end++
373 s.endRunes++
374 return s.makeError("Invalid character escape sequence: \\%s.", s.Input[s.end:s.end+5])
375 }
376 buf.WriteRune(r)
377 s.end += 6
378 s.endRunes += 6
379 } else {
380 switch escape {
381 case '"', '/', '\\':
382 buf.WriteByte(escape)
383 case 'b':
384 buf.WriteByte('\b')
385 case 'f':
386 buf.WriteByte('\f')
387 case 'n':
388 buf.WriteByte('\n')
389 case 'r':
390 buf.WriteByte('\r')
391 case 't':
392 buf.WriteByte('\t')
393 default:
394 s.end++
395 s.endRunes++
396 return s.makeError("Invalid character escape sequence: \\%s.", string(escape))
397 }
398 s.end += 2
399 s.endRunes += 2
400 }
401 }
402 }
403
404 return s.makeError("Unterminated string.")
405 }
406
407
408
409
410 func (s *Lexer) readBlockString() (Token, error) {
411 inputLen := len(s.Input)
412
413 var buf bytes.Buffer
414
415
416 s.start += 3
417 s.startRunes += 3
418 s.end += 2
419 s.endRunes += 2
420
421 for s.end < inputLen {
422 r := s.Input[s.end]
423
424
425 if r == '"' && s.end+3 <= inputLen && s.Input[s.end:s.end+3] == `"""` {
426 t, err := s.makeValueToken(BlockString, blockStringValue(buf.String()))
427
428
429 t.Pos.Start -= 3
430 t.Pos.End += 3
431
432
433 s.end += 3
434 s.endRunes += 3
435 return t, err
436 }
437
438
439 if r < 0x0020 && r != '\t' && r != '\n' && r != '\r' {
440 return s.makeError(`Invalid character within String: "\u%04d".`, r)
441 }
442
443 if r == '\\' && s.end+4 <= inputLen && s.Input[s.end:s.end+4] == `\"""` {
444 buf.WriteString(`"""`)
445 s.end += 4
446 s.endRunes += 4
447 } else if r == '\r' {
448 if s.end+1 < inputLen && s.Input[s.end+1] == '\n' {
449 s.end++
450 s.endRunes++
451 }
452
453 buf.WriteByte('\n')
454 s.end++
455 s.endRunes++
456 s.line++
457 s.lineStartRunes = s.endRunes
458 } else {
459 var char = rune(r)
460 var w = 1
461
462
463 if r >= 127 {
464 char, w = utf8.DecodeRuneInString(s.Input[s.end:])
465 }
466 s.end += w
467 s.endRunes++
468 buf.WriteRune(char)
469 if r == '\n' {
470 s.line++
471 s.lineStartRunes = s.endRunes
472 }
473 }
474 }
475
476 return s.makeError("Unterminated string.")
477 }
478
479 func unhex(b string) (v rune, ok bool) {
480 for _, c := range b {
481 v <<= 4
482 switch {
483 case '0' <= c && c <= '9':
484 v |= c - '0'
485 case 'a' <= c && c <= 'f':
486 v |= c - 'a' + 10
487 case 'A' <= c && c <= 'F':
488 v |= c - 'A' + 10
489 default:
490 return 0, false
491 }
492 }
493
494 return v, true
495 }
496
497
498
499
500 func (s *Lexer) readName() (Token, error) {
501 for s.end < len(s.Input) {
502 r, w := s.peek()
503
504 if (r >= '0' && r <= '9') || (r >= 'A' && r <= 'Z') || (r >= 'a' && r <= 'z') || r == '_' {
505 s.end += w
506 s.endRunes++
507 } else {
508 break
509 }
510 }
511
512 return s.makeToken(Name)
513 }
514
View as plain text