...
1
2
3
4
5
6 package query
7
8 import (
9 "fmt"
10 "github.com/pelletier/go-toml"
11 "strconv"
12 "strings"
13 "unicode/utf8"
14 )
15
16
17 type queryLexStateFn func() queryLexStateFn
18
19
20 type queryLexer struct {
21 input string
22 start int
23 pos int
24 width int
25 tokens chan token
26 depth int
27 line int
28 col int
29 stringTerm string
30 }
31
32 func (l *queryLexer) run() {
33 for state := l.lexVoid; state != nil; {
34 state = state()
35 }
36 close(l.tokens)
37 }
38
39 func (l *queryLexer) nextStart() {
40
41
42 for i := l.start; i < l.pos; {
43 r, width := utf8.DecodeRuneInString(l.input[i:])
44 if r == '\n' {
45 l.line++
46 l.col = 1
47 } else {
48 l.col++
49 }
50 i += width
51 }
52
53 l.start = l.pos
54 }
55
56 func (l *queryLexer) emit(t tokenType) {
57 l.tokens <- token{
58 Position: toml.Position{Line: l.line, Col: l.col},
59 typ: t,
60 val: l.input[l.start:l.pos],
61 }
62 l.nextStart()
63 }
64
65 func (l *queryLexer) emitWithValue(t tokenType, value string) {
66 l.tokens <- token{
67 Position: toml.Position{Line: l.line, Col: l.col},
68 typ: t,
69 val: value,
70 }
71 l.nextStart()
72 }
73
74 func (l *queryLexer) next() rune {
75 if l.pos >= len(l.input) {
76 l.width = 0
77 return eof
78 }
79 var r rune
80 r, l.width = utf8.DecodeRuneInString(l.input[l.pos:])
81 l.pos += l.width
82 return r
83 }
84
85 func (l *queryLexer) ignore() {
86 l.nextStart()
87 }
88
89 func (l *queryLexer) backup() {
90 l.pos -= l.width
91 }
92
93 func (l *queryLexer) errorf(format string, args ...interface{}) queryLexStateFn {
94 l.tokens <- token{
95 Position: toml.Position{Line: l.line, Col: l.col},
96 typ: tokenError,
97 val: fmt.Sprintf(format, args...),
98 }
99 return nil
100 }
101
102 func (l *queryLexer) peek() rune {
103 r := l.next()
104 l.backup()
105 return r
106 }
107
108 func (l *queryLexer) accept(valid string) bool {
109 if strings.ContainsRune(valid, l.next()) {
110 return true
111 }
112 l.backup()
113 return false
114 }
115
116 func (l *queryLexer) follow(next string) bool {
117 return strings.HasPrefix(l.input[l.pos:], next)
118 }
119
120 func (l *queryLexer) lexVoid() queryLexStateFn {
121 for {
122 next := l.peek()
123 switch next {
124 case '$':
125 l.pos++
126 l.emit(tokenDollar)
127 continue
128 case '.':
129 if l.follow("..") {
130 l.pos += 2
131 l.emit(tokenDotDot)
132 } else {
133 l.pos++
134 l.emit(tokenDot)
135 }
136 continue
137 case '[':
138 l.pos++
139 l.emit(tokenLeftBracket)
140 continue
141 case ']':
142 l.pos++
143 l.emit(tokenRightBracket)
144 continue
145 case ',':
146 l.pos++
147 l.emit(tokenComma)
148 continue
149 case '*':
150 l.pos++
151 l.emit(tokenStar)
152 continue
153 case '(':
154 l.pos++
155 l.emit(tokenLeftParen)
156 continue
157 case ')':
158 l.pos++
159 l.emit(tokenRightParen)
160 continue
161 case '?':
162 l.pos++
163 l.emit(tokenQuestion)
164 continue
165 case ':':
166 l.pos++
167 l.emit(tokenColon)
168 continue
169 case '\'':
170 l.ignore()
171 l.stringTerm = string(next)
172 return l.lexString
173 case '"':
174 l.ignore()
175 l.stringTerm = string(next)
176 return l.lexString
177 }
178
179 if isSpace(next) {
180 l.next()
181 l.ignore()
182 continue
183 }
184
185 if isAlphanumeric(next) {
186 return l.lexKey
187 }
188
189 if next == '+' || next == '-' || isDigit(next) {
190 return l.lexNumber
191 }
192
193 if l.next() == eof {
194 break
195 }
196
197 return l.errorf("unexpected char: '%v'", next)
198 }
199 l.emit(tokenEOF)
200 return nil
201 }
202
203 func (l *queryLexer) lexKey() queryLexStateFn {
204 for {
205 next := l.peek()
206 if !isAlphanumeric(next) {
207 l.emit(tokenKey)
208 return l.lexVoid
209 }
210
211 if l.next() == eof {
212 break
213 }
214 }
215 l.emit(tokenEOF)
216 return nil
217 }
218
219 func (l *queryLexer) lexString() queryLexStateFn {
220 l.pos++
221 l.ignore()
222 growingString := ""
223
224 for {
225 if l.follow(l.stringTerm) {
226 l.emitWithValue(tokenString, growingString)
227 l.pos++
228 l.ignore()
229 return l.lexVoid
230 }
231
232 if l.follow("\\\"") {
233 l.pos++
234 growingString += "\""
235 } else if l.follow("\\'") {
236 l.pos++
237 growingString += "'"
238 } else if l.follow("\\n") {
239 l.pos++
240 growingString += "\n"
241 } else if l.follow("\\b") {
242 l.pos++
243 growingString += "\b"
244 } else if l.follow("\\f") {
245 l.pos++
246 growingString += "\f"
247 } else if l.follow("\\/") {
248 l.pos++
249 growingString += "/"
250 } else if l.follow("\\t") {
251 l.pos++
252 growingString += "\t"
253 } else if l.follow("\\r") {
254 l.pos++
255 growingString += "\r"
256 } else if l.follow("\\\\") {
257 l.pos++
258 growingString += "\\"
259 } else if l.follow("\\u") {
260 l.pos += 2
261 code := ""
262 for i := 0; i < 4; i++ {
263 c := l.peek()
264 l.pos++
265 if !isHexDigit(c) {
266 return l.errorf("unfinished unicode escape")
267 }
268 code = code + string(c)
269 }
270 l.pos--
271 intcode, err := strconv.ParseInt(code, 16, 32)
272 if err != nil {
273 return l.errorf("invalid unicode escape: \\u" + code)
274 }
275 growingString += string(rune(intcode))
276 } else if l.follow("\\U") {
277 l.pos += 2
278 code := ""
279 for i := 0; i < 8; i++ {
280 c := l.peek()
281 l.pos++
282 if !isHexDigit(c) {
283 return l.errorf("unfinished unicode escape")
284 }
285 code = code + string(c)
286 }
287 l.pos--
288 intcode, err := strconv.ParseInt(code, 16, 32)
289 if err != nil {
290 return l.errorf("invalid unicode escape: \\u" + code)
291 }
292 growingString += string(rune(intcode))
293 } else if l.follow("\\") {
294 l.pos++
295 return l.errorf("invalid escape sequence: \\" + string(l.peek()))
296 } else {
297 growingString += string(l.peek())
298 }
299
300 if l.next() == eof {
301 break
302 }
303 }
304
305 return l.errorf("unclosed string")
306 }
307
308 func (l *queryLexer) lexNumber() queryLexStateFn {
309 l.ignore()
310 if !l.accept("+") {
311 l.accept("-")
312 }
313 pointSeen := false
314 digitSeen := false
315 for {
316 next := l.next()
317 if next == '.' {
318 if pointSeen {
319 return l.errorf("cannot have two dots in one float")
320 }
321 if !isDigit(l.peek()) {
322 return l.errorf("float cannot end with a dot")
323 }
324 pointSeen = true
325 } else if isDigit(next) {
326 digitSeen = true
327 } else {
328 l.backup()
329 break
330 }
331 if pointSeen && !digitSeen {
332 return l.errorf("cannot start float with a dot")
333 }
334 }
335
336 if !digitSeen {
337 return l.errorf("no digit in that number")
338 }
339 if pointSeen {
340 l.emit(tokenFloat)
341 } else {
342 l.emit(tokenInteger)
343 }
344 return l.lexVoid
345 }
346
347
348 func lexQuery(input string) chan token {
349 l := &queryLexer{
350 input: input,
351 tokens: make(chan token),
352 line: 1,
353 col: 1,
354 }
355 go l.run()
356 return l.tokens
357 }
358
View as plain text