...
1 package lexer
2
3 import (
4 "bytes"
5 "fmt"
6 "github.com/gobwas/glob/util/runes"
7 "unicode/utf8"
8 )
9
10 const (
11 char_any = '*'
12 char_comma = ','
13 char_single = '?'
14 char_escape = '\\'
15 char_range_open = '['
16 char_range_close = ']'
17 char_terms_open = '{'
18 char_terms_close = '}'
19 char_range_not = '!'
20 char_range_between = '-'
21 )
22
23 var specials = []byte{
24 char_any,
25 char_single,
26 char_escape,
27 char_range_open,
28 char_range_close,
29 char_terms_open,
30 char_terms_close,
31 }
32
33 func Special(c byte) bool {
34 return bytes.IndexByte(specials, c) != -1
35 }
36
37 type tokens []Token
38
39 func (i *tokens) shift() (ret Token) {
40 ret = (*i)[0]
41 copy(*i, (*i)[1:])
42 *i = (*i)[:len(*i)-1]
43 return
44 }
45
46 func (i *tokens) push(v Token) {
47 *i = append(*i, v)
48 }
49
50 func (i *tokens) empty() bool {
51 return len(*i) == 0
52 }
53
54 var eof rune = 0
55
56 type lexer struct {
57 data string
58 pos int
59 err error
60
61 tokens tokens
62 termsLevel int
63
64 lastRune rune
65 lastRuneSize int
66 hasRune bool
67 }
68
69 func NewLexer(source string) *lexer {
70 l := &lexer{
71 data: source,
72 tokens: tokens(make([]Token, 0, 4)),
73 }
74 return l
75 }
76
77 func (l *lexer) Next() Token {
78 if l.err != nil {
79 return Token{Error, l.err.Error()}
80 }
81 if !l.tokens.empty() {
82 return l.tokens.shift()
83 }
84
85 l.fetchItem()
86 return l.Next()
87 }
88
89 func (l *lexer) peek() (r rune, w int) {
90 if l.pos == len(l.data) {
91 return eof, 0
92 }
93
94 r, w = utf8.DecodeRuneInString(l.data[l.pos:])
95 if r == utf8.RuneError {
96 l.errorf("could not read rune")
97 r = eof
98 w = 0
99 }
100
101 return
102 }
103
104 func (l *lexer) read() rune {
105 if l.hasRune {
106 l.hasRune = false
107 l.seek(l.lastRuneSize)
108 return l.lastRune
109 }
110
111 r, s := l.peek()
112 l.seek(s)
113
114 l.lastRune = r
115 l.lastRuneSize = s
116
117 return r
118 }
119
120 func (l *lexer) seek(w int) {
121 l.pos += w
122 }
123
124 func (l *lexer) unread() {
125 if l.hasRune {
126 l.errorf("could not unread rune")
127 return
128 }
129 l.seek(-l.lastRuneSize)
130 l.hasRune = true
131 }
132
133 func (l *lexer) errorf(f string, v ...interface{}) {
134 l.err = fmt.Errorf(f, v...)
135 }
136
137 func (l *lexer) inTerms() bool {
138 return l.termsLevel > 0
139 }
140
141 func (l *lexer) termsEnter() {
142 l.termsLevel++
143 }
144
145 func (l *lexer) termsLeave() {
146 l.termsLevel--
147 }
148
149 var inTextBreakers = []rune{char_single, char_any, char_range_open, char_terms_open}
150 var inTermsBreakers = append(inTextBreakers, char_terms_close, char_comma)
151
152 func (l *lexer) fetchItem() {
153 r := l.read()
154 switch {
155 case r == eof:
156 l.tokens.push(Token{EOF, ""})
157
158 case r == char_terms_open:
159 l.termsEnter()
160 l.tokens.push(Token{TermsOpen, string(r)})
161
162 case r == char_comma && l.inTerms():
163 l.tokens.push(Token{Separator, string(r)})
164
165 case r == char_terms_close && l.inTerms():
166 l.tokens.push(Token{TermsClose, string(r)})
167 l.termsLeave()
168
169 case r == char_range_open:
170 l.tokens.push(Token{RangeOpen, string(r)})
171 l.fetchRange()
172
173 case r == char_single:
174 l.tokens.push(Token{Single, string(r)})
175
176 case r == char_any:
177 if l.read() == char_any {
178 l.tokens.push(Token{Super, string(r) + string(r)})
179 } else {
180 l.unread()
181 l.tokens.push(Token{Any, string(r)})
182 }
183
184 default:
185 l.unread()
186
187 var breakers []rune
188 if l.inTerms() {
189 breakers = inTermsBreakers
190 } else {
191 breakers = inTextBreakers
192 }
193 l.fetchText(breakers)
194 }
195 }
196
197 func (l *lexer) fetchRange() {
198 var wantHi bool
199 var wantClose bool
200 var seenNot bool
201 for {
202 r := l.read()
203 if r == eof {
204 l.errorf("unexpected end of input")
205 return
206 }
207
208 if wantClose {
209 if r != char_range_close {
210 l.errorf("expected close range character")
211 } else {
212 l.tokens.push(Token{RangeClose, string(r)})
213 }
214 return
215 }
216
217 if wantHi {
218 l.tokens.push(Token{RangeHi, string(r)})
219 wantClose = true
220 continue
221 }
222
223 if !seenNot && r == char_range_not {
224 l.tokens.push(Token{Not, string(r)})
225 seenNot = true
226 continue
227 }
228
229 if n, w := l.peek(); n == char_range_between {
230 l.seek(w)
231 l.tokens.push(Token{RangeLo, string(r)})
232 l.tokens.push(Token{RangeBetween, string(n)})
233 wantHi = true
234 continue
235 }
236
237 l.unread()
238 l.fetchText([]rune{char_range_close})
239 wantClose = true
240 }
241 }
242
243 func (l *lexer) fetchText(breakers []rune) {
244 var data []rune
245 var escaped bool
246
247 reading:
248 for {
249 r := l.read()
250 if r == eof {
251 break
252 }
253
254 if !escaped {
255 if r == char_escape {
256 escaped = true
257 continue
258 }
259
260 if runes.IndexRune(breakers, r) != -1 {
261 l.unread()
262 break reading
263 }
264 }
265
266 escaped = false
267 data = append(data, r)
268 }
269
270 if len(data) > 0 {
271 l.tokens.push(Token{Text, string(data)})
272 }
273 }
274
View as plain text