1
16
17
40 package shlex
41
42 import (
43 "bufio"
44 "fmt"
45 "io"
46 "strings"
47 )
48
49
50 type TokenType int
51
52
53 type runeTokenClass int
54
55
56 type lexerState int
57
58
59 type Token struct {
60 tokenType TokenType
61 value string
62 }
63
64
65
66
67 func (a *Token) Equal(b *Token) bool {
68 if a == nil || b == nil {
69 return false
70 }
71 if a.tokenType != b.tokenType {
72 return false
73 }
74 return a.value == b.value
75 }
76
77
78 const (
79 spaceRunes = " \t\r\n"
80 escapingQuoteRunes = `"`
81 nonEscapingQuoteRunes = "'"
82 escapeRunes = `\`
83 commentRunes = "#"
84 )
85
86
87 const (
88 unknownRuneClass runeTokenClass = iota
89 spaceRuneClass
90 escapingQuoteRuneClass
91 nonEscapingQuoteRuneClass
92 escapeRuneClass
93 commentRuneClass
94 eofRuneClass
95 )
96
97
98 const (
99 UnknownToken TokenType = iota
100 WordToken
101 SpaceToken
102 CommentToken
103 )
104
105
106 const (
107 startState lexerState = iota
108 inWordState
109 escapingState
110 escapingQuotedState
111 quotingEscapingState
112 quotingState
113 commentState
114 )
115
116
117 type tokenClassifier map[rune]runeTokenClass
118
119 func (typeMap tokenClassifier) addRuneClass(runes string, tokenType runeTokenClass) {
120 for _, runeChar := range runes {
121 typeMap[runeChar] = tokenType
122 }
123 }
124
125
126 func newDefaultClassifier() tokenClassifier {
127 t := tokenClassifier{}
128 t.addRuneClass(spaceRunes, spaceRuneClass)
129 t.addRuneClass(escapingQuoteRunes, escapingQuoteRuneClass)
130 t.addRuneClass(nonEscapingQuoteRunes, nonEscapingQuoteRuneClass)
131 t.addRuneClass(escapeRunes, escapeRuneClass)
132 t.addRuneClass(commentRunes, commentRuneClass)
133 return t
134 }
135
136
137 func (t tokenClassifier) ClassifyRune(runeVal rune) runeTokenClass {
138 return t[runeVal]
139 }
140
141
142 type Lexer Tokenizer
143
144
145 func NewLexer(r io.Reader) *Lexer {
146
147 return (*Lexer)(NewTokenizer(r))
148 }
149
150
151
152 func (l *Lexer) Next() (string, error) {
153 for {
154 token, err := (*Tokenizer)(l).Next()
155 if err != nil {
156 return "", err
157 }
158 switch token.tokenType {
159 case WordToken:
160 return token.value, nil
161 case CommentToken:
162
163 default:
164 return "", fmt.Errorf("Unknown token type: %v", token.tokenType)
165 }
166 }
167 }
168
169
170 type Tokenizer struct {
171 input bufio.Reader
172 classifier tokenClassifier
173 }
174
175
176 func NewTokenizer(r io.Reader) *Tokenizer {
177 input := bufio.NewReader(r)
178 classifier := newDefaultClassifier()
179 return &Tokenizer{
180 input: *input,
181 classifier: classifier}
182 }
183
184
185
186 func (t *Tokenizer) scanStream() (*Token, error) {
187 state := startState
188 var tokenType TokenType
189 var value []rune
190 var nextRune rune
191 var nextRuneType runeTokenClass
192 var err error
193
194 for {
195 nextRune, _, err = t.input.ReadRune()
196 nextRuneType = t.classifier.ClassifyRune(nextRune)
197
198 if err == io.EOF {
199 nextRuneType = eofRuneClass
200 err = nil
201 } else if err != nil {
202 return nil, err
203 }
204
205 switch state {
206 case startState:
207 {
208 switch nextRuneType {
209 case eofRuneClass:
210 {
211 return nil, io.EOF
212 }
213 case spaceRuneClass:
214 {
215 }
216 case escapingQuoteRuneClass:
217 {
218 tokenType = WordToken
219 state = quotingEscapingState
220 }
221 case nonEscapingQuoteRuneClass:
222 {
223 tokenType = WordToken
224 state = quotingState
225 }
226 case escapeRuneClass:
227 {
228 tokenType = WordToken
229 state = escapingState
230 }
231 case commentRuneClass:
232 {
233 tokenType = CommentToken
234 state = commentState
235 }
236 default:
237 {
238 tokenType = WordToken
239 value = append(value, nextRune)
240 state = inWordState
241 }
242 }
243 }
244 case inWordState:
245 {
246 switch nextRuneType {
247 case eofRuneClass:
248 {
249 token := &Token{
250 tokenType: tokenType,
251 value: string(value)}
252 return token, err
253 }
254 case spaceRuneClass:
255 {
256 token := &Token{
257 tokenType: tokenType,
258 value: string(value)}
259 return token, err
260 }
261 case escapingQuoteRuneClass:
262 {
263 state = quotingEscapingState
264 }
265 case nonEscapingQuoteRuneClass:
266 {
267 state = quotingState
268 }
269 case escapeRuneClass:
270 {
271 state = escapingState
272 }
273 default:
274 {
275 value = append(value, nextRune)
276 }
277 }
278 }
279 case escapingState:
280 {
281 switch nextRuneType {
282 case eofRuneClass:
283 {
284 err = fmt.Errorf("EOF found after escape character")
285 token := &Token{
286 tokenType: tokenType,
287 value: string(value)}
288 return token, err
289 }
290 default:
291 {
292 state = inWordState
293 value = append(value, nextRune)
294 }
295 }
296 }
297 case escapingQuotedState:
298 {
299 switch nextRuneType {
300 case eofRuneClass:
301 {
302 err = fmt.Errorf("EOF found after escape character")
303 token := &Token{
304 tokenType: tokenType,
305 value: string(value)}
306 return token, err
307 }
308 default:
309 {
310 state = quotingEscapingState
311 value = append(value, nextRune)
312 }
313 }
314 }
315 case quotingEscapingState:
316 {
317 switch nextRuneType {
318 case eofRuneClass:
319 {
320 err = fmt.Errorf("EOF found when expecting closing quote")
321 token := &Token{
322 tokenType: tokenType,
323 value: string(value)}
324 return token, err
325 }
326 case escapingQuoteRuneClass:
327 {
328 state = inWordState
329 }
330 case escapeRuneClass:
331 {
332 state = escapingQuotedState
333 }
334 default:
335 {
336 value = append(value, nextRune)
337 }
338 }
339 }
340 case quotingState:
341 {
342 switch nextRuneType {
343 case eofRuneClass:
344 {
345 err = fmt.Errorf("EOF found when expecting closing quote")
346 token := &Token{
347 tokenType: tokenType,
348 value: string(value)}
349 return token, err
350 }
351 case nonEscapingQuoteRuneClass:
352 {
353 state = inWordState
354 }
355 default:
356 {
357 value = append(value, nextRune)
358 }
359 }
360 }
361 case commentState:
362 {
363 switch nextRuneType {
364 case eofRuneClass:
365 {
366 token := &Token{
367 tokenType: tokenType,
368 value: string(value)}
369 return token, err
370 }
371 case spaceRuneClass:
372 {
373 if nextRune == '\n' {
374 state = startState
375 token := &Token{
376 tokenType: tokenType,
377 value: string(value)}
378 return token, err
379 } else {
380 value = append(value, nextRune)
381 }
382 }
383 default:
384 {
385 value = append(value, nextRune)
386 }
387 }
388 }
389 default:
390 {
391 return nil, fmt.Errorf("Unexpected state: %v", state)
392 }
393 }
394 }
395 }
396
397
398 func (t *Tokenizer) Next() (*Token, error) {
399 return t.scanStream()
400 }
401
402
403 func Split(s string) ([]string, error) {
404 l := NewLexer(strings.NewReader(s))
405 subStrings := make([]string, 0)
406 for {
407 word, err := l.Next()
408 if err != nil {
409 if err == io.EOF {
410 return subStrings, nil
411 }
412 return subStrings, err
413 }
414 subStrings = append(subStrings, word)
415 }
416 }
417
View as plain text