1
2 package cascadia
3
4 import (
5 "errors"
6 "fmt"
7 "regexp"
8 "strconv"
9 "strings"
10 )
11
12
13 type parser struct {
14 s string
15 i int
16
17
18
19 acceptPseudoElements bool
20 }
21
22
23 func (p *parser) parseEscape() (result string, err error) {
24 if len(p.s) < p.i+2 || p.s[p.i] != '\\' {
25 return "", errors.New("invalid escape sequence")
26 }
27
28 start := p.i + 1
29 c := p.s[start]
30 switch {
31 case c == '\r' || c == '\n' || c == '\f':
32 return "", errors.New("escaped line ending outside string")
33 case hexDigit(c):
34
35 var i int
36 for i = start; i < start+6 && i < len(p.s) && hexDigit(p.s[i]); i++ {
37
38 }
39 v, _ := strconv.ParseUint(p.s[start:i], 16, 64)
40 if len(p.s) > i {
41 switch p.s[i] {
42 case '\r':
43 i++
44 if len(p.s) > i && p.s[i] == '\n' {
45 i++
46 }
47 case ' ', '\t', '\n', '\f':
48 i++
49 }
50 }
51 p.i = i
52 return string(rune(v)), nil
53 }
54
55
56 result = p.s[start : start+1]
57 p.i += 2
58 return result, nil
59 }
60
61
62 func toLowerASCII(s string) string {
63 var b []byte
64 for i := 0; i < len(s); i++ {
65 if c := s[i]; 'A' <= c && c <= 'Z' {
66 if b == nil {
67 b = make([]byte, len(s))
68 copy(b, s)
69 }
70 b[i] = s[i] + ('a' - 'A')
71 }
72 }
73
74 if b == nil {
75 return s
76 }
77
78 return string(b)
79 }
80
81 func hexDigit(c byte) bool {
82 return '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F'
83 }
84
85
86
87 func nameStart(c byte) bool {
88 return 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || c == '_' || c > 127
89 }
90
91
92
93 func nameChar(c byte) bool {
94 return 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || c == '_' || c > 127 ||
95 c == '-' || '0' <= c && c <= '9'
96 }
97
98
99 func (p *parser) parseIdentifier() (result string, err error) {
100 const prefix = '-'
101 var numPrefix int
102
103 for len(p.s) > p.i && p.s[p.i] == prefix {
104 p.i++
105 numPrefix++
106 }
107
108 if len(p.s) <= p.i {
109 return "", errors.New("expected identifier, found EOF instead")
110 }
111
112 if c := p.s[p.i]; !(nameStart(c) || c == '\\') {
113 return "", fmt.Errorf("expected identifier, found %c instead", c)
114 }
115
116 result, err = p.parseName()
117 if numPrefix > 0 && err == nil {
118 result = strings.Repeat(string(prefix), numPrefix) + result
119 }
120 return
121 }
122
123
124
125 func (p *parser) parseName() (result string, err error) {
126 i := p.i
127 loop:
128 for i < len(p.s) {
129 c := p.s[i]
130 switch {
131 case nameChar(c):
132 start := i
133 for i < len(p.s) && nameChar(p.s[i]) {
134 i++
135 }
136 result += p.s[start:i]
137 case c == '\\':
138 p.i = i
139 val, err := p.parseEscape()
140 if err != nil {
141 return "", err
142 }
143 i = p.i
144 result += val
145 default:
146 break loop
147 }
148 }
149
150 if result == "" {
151 return "", errors.New("expected name, found EOF instead")
152 }
153
154 p.i = i
155 return result, nil
156 }
157
158
159 func (p *parser) parseString() (result string, err error) {
160 i := p.i
161 if len(p.s) < i+2 {
162 return "", errors.New("expected string, found EOF instead")
163 }
164
165 quote := p.s[i]
166 i++
167
168 loop:
169 for i < len(p.s) {
170 switch p.s[i] {
171 case '\\':
172 if len(p.s) > i+1 {
173 switch c := p.s[i+1]; c {
174 case '\r':
175 if len(p.s) > i+2 && p.s[i+2] == '\n' {
176 i += 3
177 continue loop
178 }
179 fallthrough
180 case '\n', '\f':
181 i += 2
182 continue loop
183 }
184 }
185 p.i = i
186 val, err := p.parseEscape()
187 if err != nil {
188 return "", err
189 }
190 i = p.i
191 result += val
192 case quote:
193 break loop
194 case '\r', '\n', '\f':
195 return "", errors.New("unexpected end of line in string")
196 default:
197 start := i
198 for i < len(p.s) {
199 if c := p.s[i]; c == quote || c == '\\' || c == '\r' || c == '\n' || c == '\f' {
200 break
201 }
202 i++
203 }
204 result += p.s[start:i]
205 }
206 }
207
208 if i >= len(p.s) {
209 return "", errors.New("EOF in string")
210 }
211
212
213 i++
214
215 p.i = i
216 return result, nil
217 }
218
219
220
221 func (p *parser) parseRegex() (rx *regexp.Regexp, err error) {
222 i := p.i
223 if len(p.s) < i+2 {
224 return nil, errors.New("expected regular expression, found EOF instead")
225 }
226
227
228
229 open := 0
230
231 loop:
232 for i < len(p.s) {
233 switch p.s[i] {
234 case '(', '[':
235 open++
236 case ')', ']':
237 open--
238 if open < 0 {
239 break loop
240 }
241 }
242 i++
243 }
244
245 if i >= len(p.s) {
246 return nil, errors.New("EOF in regular expression")
247 }
248 rx, err = regexp.Compile(p.s[p.i:i])
249 p.i = i
250 return rx, err
251 }
252
253
254
255 func (p *parser) skipWhitespace() bool {
256 i := p.i
257 for i < len(p.s) {
258 switch p.s[i] {
259 case ' ', '\t', '\r', '\n', '\f':
260 i++
261 continue
262 case '/':
263 if strings.HasPrefix(p.s[i:], "/*") {
264 end := strings.Index(p.s[i+len("/*"):], "*/")
265 if end != -1 {
266 i += end + len("/**/")
267 continue
268 }
269 }
270 }
271 break
272 }
273
274 if i > p.i {
275 p.i = i
276 return true
277 }
278
279 return false
280 }
281
282
283
284 func (p *parser) consumeParenthesis() bool {
285 if p.i < len(p.s) && p.s[p.i] == '(' {
286 p.i++
287 p.skipWhitespace()
288 return true
289 }
290 return false
291 }
292
293
294
295 func (p *parser) consumeClosingParenthesis() bool {
296 i := p.i
297 p.skipWhitespace()
298 if p.i < len(p.s) && p.s[p.i] == ')' {
299 p.i++
300 return true
301 }
302 p.i = i
303 return false
304 }
305
306
307 func (p *parser) parseTypeSelector() (result tagSelector, err error) {
308 tag, err := p.parseIdentifier()
309 if err != nil {
310 return
311 }
312 return tagSelector{tag: toLowerASCII(tag)}, nil
313 }
314
315
316 func (p *parser) parseIDSelector() (idSelector, error) {
317 if p.i >= len(p.s) {
318 return idSelector{}, fmt.Errorf("expected id selector (#id), found EOF instead")
319 }
320 if p.s[p.i] != '#' {
321 return idSelector{}, fmt.Errorf("expected id selector (#id), found '%c' instead", p.s[p.i])
322 }
323
324 p.i++
325 id, err := p.parseName()
326 if err != nil {
327 return idSelector{}, err
328 }
329
330 return idSelector{id: id}, nil
331 }
332
333
334 func (p *parser) parseClassSelector() (classSelector, error) {
335 if p.i >= len(p.s) {
336 return classSelector{}, fmt.Errorf("expected class selector (.class), found EOF instead")
337 }
338 if p.s[p.i] != '.' {
339 return classSelector{}, fmt.Errorf("expected class selector (.class), found '%c' instead", p.s[p.i])
340 }
341
342 p.i++
343 class, err := p.parseIdentifier()
344 if err != nil {
345 return classSelector{}, err
346 }
347
348 return classSelector{class: class}, nil
349 }
350
351
352 func (p *parser) parseAttributeSelector() (attrSelector, error) {
353 if p.i >= len(p.s) {
354 return attrSelector{}, fmt.Errorf("expected attribute selector ([attribute]), found EOF instead")
355 }
356 if p.s[p.i] != '[' {
357 return attrSelector{}, fmt.Errorf("expected attribute selector ([attribute]), found '%c' instead", p.s[p.i])
358 }
359
360 p.i++
361 p.skipWhitespace()
362 key, err := p.parseIdentifier()
363 if err != nil {
364 return attrSelector{}, err
365 }
366 key = toLowerASCII(key)
367
368 p.skipWhitespace()
369 if p.i >= len(p.s) {
370 return attrSelector{}, errors.New("unexpected EOF in attribute selector")
371 }
372
373 if p.s[p.i] == ']' {
374 p.i++
375 return attrSelector{key: key, operation: ""}, nil
376 }
377
378 if p.i+2 >= len(p.s) {
379 return attrSelector{}, errors.New("unexpected EOF in attribute selector")
380 }
381
382 op := p.s[p.i : p.i+2]
383 if op[0] == '=' {
384 op = "="
385 } else if op[1] != '=' {
386 return attrSelector{}, fmt.Errorf(`expected equality operator, found "%s" instead`, op)
387 }
388 p.i += len(op)
389
390 p.skipWhitespace()
391 if p.i >= len(p.s) {
392 return attrSelector{}, errors.New("unexpected EOF in attribute selector")
393 }
394 var val string
395 var rx *regexp.Regexp
396 if op == "#=" {
397 rx, err = p.parseRegex()
398 } else {
399 switch p.s[p.i] {
400 case '\'', '"':
401 val, err = p.parseString()
402 default:
403 val, err = p.parseIdentifier()
404 }
405 }
406 if err != nil {
407 return attrSelector{}, err
408 }
409
410 p.skipWhitespace()
411 if p.i >= len(p.s) {
412 return attrSelector{}, errors.New("unexpected EOF in attribute selector")
413 }
414
415
416 ignoreCase := false
417 if p.s[p.i] == 'i' || p.s[p.i] == 'I' {
418 ignoreCase = true
419 p.i++
420 }
421
422 p.skipWhitespace()
423 if p.i >= len(p.s) {
424 return attrSelector{}, errors.New("unexpected EOF in attribute selector")
425 }
426
427 if p.s[p.i] != ']' {
428 return attrSelector{}, fmt.Errorf("expected ']', found '%c' instead", p.s[p.i])
429 }
430 p.i++
431
432 switch op {
433 case "=", "!=", "~=", "|=", "^=", "$=", "*=", "#=":
434 return attrSelector{key: key, val: val, operation: op, regexp: rx, insensitive: ignoreCase}, nil
435 default:
436 return attrSelector{}, fmt.Errorf("attribute operator %q is not supported", op)
437 }
438 }
439
440 var (
441 errExpectedParenthesis = errors.New("expected '(' but didn't find it")
442 errExpectedClosingParenthesis = errors.New("expected ')' but didn't find it")
443 errUnmatchedParenthesis = errors.New("unmatched '('")
444 )
445
446
447
448
449
450 func (p *parser) parsePseudoclassSelector() (out Sel, pseudoElement string, err error) {
451 if p.i >= len(p.s) {
452 return nil, "", fmt.Errorf("expected pseudoclass selector (:pseudoclass), found EOF instead")
453 }
454 if p.s[p.i] != ':' {
455 return nil, "", fmt.Errorf("expected attribute selector (:pseudoclass), found '%c' instead", p.s[p.i])
456 }
457
458 p.i++
459 var mustBePseudoElement bool
460 if p.i >= len(p.s) {
461 return nil, "", fmt.Errorf("got empty pseudoclass (or pseudoelement)")
462 }
463 if p.s[p.i] == ':' {
464 mustBePseudoElement = true
465 p.i++
466 }
467
468 name, err := p.parseIdentifier()
469 if err != nil {
470 return
471 }
472 name = toLowerASCII(name)
473 if mustBePseudoElement && (name != "after" && name != "backdrop" && name != "before" &&
474 name != "cue" && name != "first-letter" && name != "first-line" && name != "grammar-error" &&
475 name != "marker" && name != "placeholder" && name != "selection" && name != "spelling-error") {
476 return out, "", fmt.Errorf("unknown pseudoelement :%s", name)
477 }
478
479 switch name {
480 case "not", "has", "haschild":
481 if !p.consumeParenthesis() {
482 return out, "", errExpectedParenthesis
483 }
484 sel, parseErr := p.parseSelectorGroup()
485 if parseErr != nil {
486 return out, "", parseErr
487 }
488 if !p.consumeClosingParenthesis() {
489 return out, "", errExpectedClosingParenthesis
490 }
491
492 out = relativePseudoClassSelector{name: name, match: sel}
493
494 case "contains", "containsown":
495 if !p.consumeParenthesis() {
496 return out, "", errExpectedParenthesis
497 }
498 if p.i == len(p.s) {
499 return out, "", errUnmatchedParenthesis
500 }
501 var val string
502 switch p.s[p.i] {
503 case '\'', '"':
504 val, err = p.parseString()
505 default:
506 val, err = p.parseIdentifier()
507 }
508 if err != nil {
509 return out, "", err
510 }
511 val = strings.ToLower(val)
512 p.skipWhitespace()
513 if p.i >= len(p.s) {
514 return out, "", errors.New("unexpected EOF in pseudo selector")
515 }
516 if !p.consumeClosingParenthesis() {
517 return out, "", errExpectedClosingParenthesis
518 }
519
520 out = containsPseudoClassSelector{own: name == "containsown", value: val}
521
522 case "matches", "matchesown":
523 if !p.consumeParenthesis() {
524 return out, "", errExpectedParenthesis
525 }
526 rx, err := p.parseRegex()
527 if err != nil {
528 return out, "", err
529 }
530 if p.i >= len(p.s) {
531 return out, "", errors.New("unexpected EOF in pseudo selector")
532 }
533 if !p.consumeClosingParenthesis() {
534 return out, "", errExpectedClosingParenthesis
535 }
536
537 out = regexpPseudoClassSelector{own: name == "matchesown", regexp: rx}
538
539 case "nth-child", "nth-last-child", "nth-of-type", "nth-last-of-type":
540 if !p.consumeParenthesis() {
541 return out, "", errExpectedParenthesis
542 }
543 a, b, err := p.parseNth()
544 if err != nil {
545 return out, "", err
546 }
547 if !p.consumeClosingParenthesis() {
548 return out, "", errExpectedClosingParenthesis
549 }
550 last := name == "nth-last-child" || name == "nth-last-of-type"
551 ofType := name == "nth-of-type" || name == "nth-last-of-type"
552 out = nthPseudoClassSelector{a: a, b: b, last: last, ofType: ofType}
553
554 case "first-child":
555 out = nthPseudoClassSelector{a: 0, b: 1, ofType: false, last: false}
556 case "last-child":
557 out = nthPseudoClassSelector{a: 0, b: 1, ofType: false, last: true}
558 case "first-of-type":
559 out = nthPseudoClassSelector{a: 0, b: 1, ofType: true, last: false}
560 case "last-of-type":
561 out = nthPseudoClassSelector{a: 0, b: 1, ofType: true, last: true}
562 case "only-child":
563 out = onlyChildPseudoClassSelector{ofType: false}
564 case "only-of-type":
565 out = onlyChildPseudoClassSelector{ofType: true}
566 case "input":
567 out = inputPseudoClassSelector{}
568 case "empty":
569 out = emptyElementPseudoClassSelector{}
570 case "root":
571 out = rootPseudoClassSelector{}
572 case "link":
573 out = linkPseudoClassSelector{}
574 case "lang":
575 if !p.consumeParenthesis() {
576 return out, "", errExpectedParenthesis
577 }
578 if p.i == len(p.s) {
579 return out, "", errUnmatchedParenthesis
580 }
581 val, err := p.parseIdentifier()
582 if err != nil {
583 return out, "", err
584 }
585 val = strings.ToLower(val)
586 p.skipWhitespace()
587 if p.i >= len(p.s) {
588 return out, "", errors.New("unexpected EOF in pseudo selector")
589 }
590 if !p.consumeClosingParenthesis() {
591 return out, "", errExpectedClosingParenthesis
592 }
593 out = langPseudoClassSelector{lang: val}
594 case "enabled":
595 out = enabledPseudoClassSelector{}
596 case "disabled":
597 out = disabledPseudoClassSelector{}
598 case "checked":
599 out = checkedPseudoClassSelector{}
600 case "visited", "hover", "active", "focus", "target":
601
602 out = neverMatchSelector{value: ":" + name}
603 case "after", "backdrop", "before", "cue", "first-letter", "first-line", "grammar-error", "marker", "placeholder", "selection", "spelling-error":
604 return nil, name, nil
605 default:
606 return out, "", fmt.Errorf("unknown pseudoclass or pseudoelement :%s", name)
607 }
608 return
609 }
610
611
612 func (p *parser) parseInteger() (int, error) {
613 i := p.i
614 start := i
615 for i < len(p.s) && '0' <= p.s[i] && p.s[i] <= '9' {
616 i++
617 }
618 if i == start {
619 return 0, errors.New("expected integer, but didn't find it")
620 }
621 p.i = i
622
623 val, err := strconv.Atoi(p.s[start:i])
624 if err != nil {
625 return 0, err
626 }
627
628 return val, nil
629 }
630
631
632 func (p *parser) parseNth() (a, b int, err error) {
633
634 if p.i >= len(p.s) {
635 goto eof
636 }
637 switch p.s[p.i] {
638 case '-':
639 p.i++
640 goto negativeA
641 case '+':
642 p.i++
643 goto positiveA
644 case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
645 goto positiveA
646 case 'n', 'N':
647 a = 1
648 p.i++
649 goto readN
650 case 'o', 'O', 'e', 'E':
651 id, nameErr := p.parseName()
652 if nameErr != nil {
653 return 0, 0, nameErr
654 }
655 id = toLowerASCII(id)
656 if id == "odd" {
657 return 2, 1, nil
658 }
659 if id == "even" {
660 return 2, 0, nil
661 }
662 return 0, 0, fmt.Errorf("expected 'odd' or 'even', but found '%s' instead", id)
663 default:
664 goto invalid
665 }
666
667 positiveA:
668 if p.i >= len(p.s) {
669 goto eof
670 }
671 switch p.s[p.i] {
672 case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
673 a, err = p.parseInteger()
674 if err != nil {
675 return 0, 0, err
676 }
677 goto readA
678 case 'n', 'N':
679 a = 1
680 p.i++
681 goto readN
682 default:
683 goto invalid
684 }
685
686 negativeA:
687 if p.i >= len(p.s) {
688 goto eof
689 }
690 switch p.s[p.i] {
691 case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
692 a, err = p.parseInteger()
693 if err != nil {
694 return 0, 0, err
695 }
696 a = -a
697 goto readA
698 case 'n', 'N':
699 a = -1
700 p.i++
701 goto readN
702 default:
703 goto invalid
704 }
705
706 readA:
707 if p.i >= len(p.s) {
708 goto eof
709 }
710 switch p.s[p.i] {
711 case 'n', 'N':
712 p.i++
713 goto readN
714 default:
715
716 return 0, a, nil
717 }
718
719 readN:
720 p.skipWhitespace()
721 if p.i >= len(p.s) {
722 goto eof
723 }
724 switch p.s[p.i] {
725 case '+':
726 p.i++
727 p.skipWhitespace()
728 b, err = p.parseInteger()
729 if err != nil {
730 return 0, 0, err
731 }
732 return a, b, nil
733 case '-':
734 p.i++
735 p.skipWhitespace()
736 b, err = p.parseInteger()
737 if err != nil {
738 return 0, 0, err
739 }
740 return a, -b, nil
741 default:
742 return a, 0, nil
743 }
744
745 eof:
746 return 0, 0, errors.New("unexpected EOF while attempting to parse expression of form an+b")
747
748 invalid:
749 return 0, 0, errors.New("unexpected character while attempting to parse expression of form an+b")
750 }
751
752
753
754 func (p *parser) parseSimpleSelectorSequence() (Sel, error) {
755 var selectors []Sel
756
757 if p.i >= len(p.s) {
758 return nil, errors.New("expected selector, found EOF instead")
759 }
760
761 switch p.s[p.i] {
762 case '*':
763
764 p.i++
765 if p.i+2 < len(p.s) && p.s[p.i:p.i+2] == "|*" {
766 p.i += 2
767 }
768 case '#', '.', '[', ':':
769
770 default:
771 r, err := p.parseTypeSelector()
772 if err != nil {
773 return nil, err
774 }
775 selectors = append(selectors, r)
776 }
777
778 var pseudoElement string
779 loop:
780 for p.i < len(p.s) {
781 var (
782 ns Sel
783 newPseudoElement string
784 err error
785 )
786 switch p.s[p.i] {
787 case '#':
788 ns, err = p.parseIDSelector()
789 case '.':
790 ns, err = p.parseClassSelector()
791 case '[':
792 ns, err = p.parseAttributeSelector()
793 case ':':
794 ns, newPseudoElement, err = p.parsePseudoclassSelector()
795 default:
796 break loop
797 }
798 if err != nil {
799 return nil, err
800 }
801
802
803
804
805 if ns == nil {
806 if pseudoElement != "" {
807 return nil, fmt.Errorf("only one pseudo-element is accepted per selector, got %s and %s", pseudoElement, newPseudoElement)
808 }
809 if !p.acceptPseudoElements {
810 return nil, fmt.Errorf("pseudo-element %s found, but pseudo-elements support is disabled", newPseudoElement)
811 }
812 pseudoElement = newPseudoElement
813 } else {
814 if pseudoElement != "" {
815 return nil, fmt.Errorf("pseudo-element %s must be at the end of selector", pseudoElement)
816 }
817 selectors = append(selectors, ns)
818 }
819
820 }
821 if len(selectors) == 1 && pseudoElement == "" {
822 return selectors[0], nil
823 }
824 return compoundSelector{selectors: selectors, pseudoElement: pseudoElement}, nil
825 }
826
827
828 func (p *parser) parseSelector() (Sel, error) {
829 p.skipWhitespace()
830 result, err := p.parseSimpleSelectorSequence()
831 if err != nil {
832 return nil, err
833 }
834
835 for {
836 var (
837 combinator byte
838 c Sel
839 )
840 if p.skipWhitespace() {
841 combinator = ' '
842 }
843 if p.i >= len(p.s) {
844 return result, nil
845 }
846
847 switch p.s[p.i] {
848 case '+', '>', '~':
849 combinator = p.s[p.i]
850 p.i++
851 p.skipWhitespace()
852 case ',', ')':
853
854 return result, nil
855 }
856
857 if combinator == 0 {
858 return result, nil
859 }
860
861 c, err = p.parseSimpleSelectorSequence()
862 if err != nil {
863 return nil, err
864 }
865 result = combinedSelector{first: result, combinator: combinator, second: c}
866 }
867 }
868
869
870 func (p *parser) parseSelectorGroup() (SelectorGroup, error) {
871 current, err := p.parseSelector()
872 if err != nil {
873 return nil, err
874 }
875 result := SelectorGroup{current}
876
877 for p.i < len(p.s) {
878 if p.s[p.i] != ',' {
879 break
880 }
881 p.i++
882 c, err := p.parseSelector()
883 if err != nil {
884 return nil, err
885 }
886 result = append(result, c)
887 }
888 return result, nil
889 }
890
View as plain text