1 package cascadia
2
3 import (
4 "bytes"
5 "encoding/json"
6 "fmt"
7 "io/ioutil"
8 "log"
9 "reflect"
10 "strings"
11 "testing"
12
13 "golang.org/x/net/html"
14 )
15
16 var validSelectors []validSelector
17
18 func init() {
19 c, err := ioutil.ReadFile("test_resources/valid_selectors.json")
20 if err != nil {
21 log.Fatal(err)
22 }
23 if err = json.Unmarshal(c, &validSelectors); err != nil {
24 log.Fatal(err)
25 }
26 }
27
28 type selectorTest struct {
29 HTML, selector string
30 results []string
31 }
32
33 func nodeString(n *html.Node) string {
34 buf := bytes.NewBufferString("")
35 if err := html.Render(buf, n); err != nil {
36 log.Fatal(err)
37 }
38 return buf.String()
39 }
40
41 var selectorTests = []selectorTest{
42 {
43 `<body><address>This address...</address></body>`,
44 "address",
45 []string{
46 "<address>This address...</address>",
47 },
48 },
49 {
50 `<!-- comment --><html><head></head><body>text</body></html>`,
51 "*",
52 []string{
53 "<html><head></head><body>text</body></html>",
54 "<head></head>",
55 "<body>text</body>",
56 },
57 },
58 {
59 `<html><head></head><body></body></html>`,
60 "*",
61 []string{
62 "<html><head></head><body></body></html>",
63 "<head></head>",
64 "<body></body>",
65 },
66 },
67 {
68 `<p id="foo"><p id="bar">`,
69 "#foo",
70 []string{
71 `<p id="foo"></p>`,
72 },
73 },
74 {
75 `<ul><li id="t1"><p id="t1">`,
76 "li#t1",
77 []string{
78 `<li id="t1"><p id="t1"></p></li>`,
79 },
80 },
81 {
82 `<ol><li id="t4"><li id="t44">`,
83 "*#t4",
84 []string{
85 `<li id="t4"></li>`,
86 },
87 },
88 {
89 `<ul><li class="t1"><li class="t2">`,
90 ".t1",
91 []string{
92 `<li class="t1"></li>`,
93 },
94 },
95 {
96 `<p class="t1 t2">`,
97 "p.t1",
98 []string{
99 `<p class="t1 t2"></p>`,
100 },
101 },
102 {
103 `<div class="test">`,
104 "div.teST",
105 []string{},
106 },
107 {
108 `<p class="t1 t2">`,
109 ".t1.fail",
110 []string{},
111 },
112 {
113 `<p class="t1 t2">`,
114 "p.t1.t2",
115 []string{
116 `<p class="t1 t2"></p>`,
117 },
118 },
119 {
120 `<p class="--t1 --t2">`,
121 "p.--t1",
122 []string{
123 `<p class="--t1 --t2"></p>`,
124 },
125 },
126 {
127 `<p class="--t1 --t2">`,
128 "p.--t1.--t2",
129 []string{
130 `<p class="--t1 --t2"></p>`,
131 },
132 },
133 {
134 `<p><p title="title">`,
135 "p[title]",
136 []string{
137 `<p title="title"></p>`,
138 },
139 },
140 {
141 `<div><div class="Red">`,
142 `div[class="red" i]`,
143 []string{
144 `<div class="Red"></div>`,
145 },
146 },
147 {
148 `<address><address title="foo"><address title="bar">`,
149 `address[title="foo"]`,
150 []string{
151 `<address title="foo"><address title="bar"></address></address>`,
152 },
153 },
154 {
155 `<address><address title="fooIgnoreCase"><address title="bar">`,
156 `address[title="FoOIgnoRECaSe" i]`,
157 []string{
158 `<address title="fooIgnoreCase"><address title="bar"></address></address>`,
159 },
160 },
161 {
162 `<address><address title="foo"><address title="bar">`,
163 `address[title!="foo"]`,
164 []string{
165 `<address><address title="foo"><address title="bar"></address></address></address>`,
166 `<address title="bar"></address>`,
167 },
168 },
169 {
170 `<address><address title="FOO"><address title="bar">`,
171 `address[title!="foo" i]`,
172 []string{
173 `<address><address title="FOO"><address title="bar"></address></address></address>`,
174 `<address title="bar"></address>`,
175 },
176 },
177 {
178 `<p title="fooBARuFOO"><p title="varfoo">`,
179 `p[title!="FooBarUFoo" i]`,
180 []string{
181 `<p title="varfoo"></p>`,
182 },
183 },
184 {
185 `<p title="tot foo bar">`,
186 `[ title ~= foo ]`,
187 []string{
188 `<p title="tot foo bar"></p>`,
189 },
190 },
191 {
192 `<p title="tot foo bar">`,
193 `p[title~="FOO" i]`,
194 []string{
195 `<p title="tot foo bar"></p>`,
196 },
197 },
198 {
199 `<p title="tot foo bar">`,
200 `p[title~=toofoo i]`,
201 []string{},
202 },
203 {
204 `<p title="hello world">`,
205 `[title~="hello world"]`,
206 []string{},
207 },
208 {
209 `<p title="HELLO world">`,
210 `[title~="hello" i]`,
211 []string{
212 `<p title="HELLO world"></p>`,
213 },
214 },
215 {
216 `<p title="HELLO world">`,
217 `[title~="hello" I]`,
218 []string{
219 `<p title="HELLO world"></p>`,
220 },
221 },
222 {
223 `<p lang="en"><p lang="en-gb"><p lang="enough"><p lang="fr-en">`,
224 `[lang|="en"]`,
225 []string{
226 `<p lang="en"></p>`,
227 `<p lang="en-gb"></p>`,
228 },
229 },
230 {
231 `<p lang="en"><p lang="En-gb"><p lang="enough"><p lang="fr-en">`,
232 `[lang|="EN" i]`,
233 []string{
234 `<p lang="en"></p>`,
235 `<p lang="En-gb"></p>`,
236 },
237 },
238 {
239 `<p lang="en"><p lang="En-gb"><p lang="enough"><p lang="fr-en">`,
240 `[lang|="EN" i]`,
241 []string{
242 `<p lang="en"></p>`,
243 `<p lang="En-gb"></p>`,
244 },
245 },
246 {
247 `<p title="foobar"><p title="barfoo">`,
248 `[title^="foo"]`,
249 []string{
250 `<p title="foobar"></p>`,
251 },
252 },
253 {
254 `<p title="FooBAR"><p title="barfoo">`,
255 `[title^="foo" i]`,
256 []string{
257 `<p title="FooBAR"></p>`,
258 },
259 },
260 {
261 `<p title="foobar"><p title="barfoo">`,
262 `[title$="bar"]`,
263 []string{
264 `<p title="foobar"></p>`,
265 },
266 },
267 {
268 `<p title="foobar"><p title="barfoo">`,
269 `[title$="BAR" i]`,
270 []string{
271 `<p title="foobar"></p>`,
272 },
273 },
274 {
275 `<p title="foobarufoo">`,
276 `[title*="bar"]`,
277 []string{
278 `<p title="foobarufoo"></p>`,
279 },
280 },
281 {
282 `<p title="foobarufoo">`,
283 `[title*="BaRu" i]`,
284 []string{
285 `<p title="foobarufoo"></p>`,
286 },
287 },
288 {
289 `<p title="foobarufoo">`,
290 `[title*="BaRu" I]`,
291 []string{
292 `<p title="foobarufoo"></p>`,
293 },
294 },
295 {
296 `<p class=" ">This text should be green.</p><p>This text should be green.</p>`,
297 `p[class$=" "]`,
298 []string{},
299 },
300 {
301 `<p class="">This text should be green.</p><p>This text should be green.</p>`,
302 `p[class$=""]`,
303 []string{},
304 },
305 {
306 `<p class=" ">This text should be green.</p><p>This text should be green.</p>`,
307 `p[class^=" "]`,
308 []string{},
309 },
310 {
311 `<p class="">This text should be green.</p><p>This text should be green.</p>`,
312 `p[class^=""]`,
313 []string{},
314 },
315 {
316 `<p class=" ">This text should be green.</p><p>This text should be green.</p>`,
317 `p[class*=" "]`,
318 []string{},
319 },
320 {
321 `<p class="">This text should be green.</p><p>This text should be green.</p>`,
322 `p[class*=""]`,
323 []string{},
324 },
325 {
326 `<input type="radio" name="Sex" value="F"/>`,
327 `input[name=Sex][value=F]`,
328 []string{
329 `<input type="radio" name="Sex" value="F"/>`,
330 },
331 },
332 {
333 `<table border="0" cellpadding="0" cellspacing="0" style="table-layout: fixed; width: 100%; border: 0 dashed; border-color: #FFFFFF"><tr style="height:64px">aaa</tr></table>`,
334 `table[border="0"][cellpadding="0"][cellspacing="0"]`,
335 []string{
336 `<table border="0" cellpadding="0" cellspacing="0" style="table-layout: fixed; width: 100%; border: 0 dashed; border-color: #FFFFFF"><tbody><tr style="height:64px"></tr></tbody></table>`,
337 },
338 },
339 {
340 `<p class="t1 t2">`,
341 ".t1:not(.t2)",
342 []string{},
343 },
344 {
345 `<div class="t3">`,
346 `div:not(.t1)`,
347 []string{
348 `<div class="t3"></div>`,
349 },
350 },
351 {
352 `<div><div class="t2"><div class="t3">`,
353 `div:not([class="t2"])`,
354 []string{
355 `<div><div class="t2"><div class="t3"></div></div></div>`,
356 `<div class="t3"></div>`,
357 },
358 },
359 {
360 `<ol><li id=1><li id=2><li id=3></ol>`,
361 `li:nth-child(odd)`,
362 []string{
363 `<li id="1"></li>`,
364 `<li id="3"></li>`,
365 },
366 },
367 {
368 `<ol><li id=1><li id=2><li id=3></ol>`,
369 `li:nth-child(even)`,
370 []string{
371 `<li id="2"></li>`,
372 },
373 },
374 {
375 `<ol><li id=1><li id=2><li id=3></ol>`,
376 `li:nth-child(-n+2)`,
377 []string{
378 `<li id="1"></li>`,
379 `<li id="2"></li>`,
380 },
381 },
382 {
383 `<ol><li id=1><li id=2><li id=3></ol>`,
384 `li:nth-child(3n+1)`,
385 []string{
386 `<li id="1"></li>`,
387 },
388 },
389 {
390 `<ol><li id=1><li id=2><li id=3><li id=4></ol>`,
391 `li:nth-last-child(odd)`,
392 []string{
393 `<li id="2"></li>`,
394 `<li id="4"></li>`,
395 },
396 },
397 {
398 `<ol><li id=1><li id=2><li id=3><li id=4></ol>`,
399 `li:nth-last-child(even)`,
400 []string{
401 `<li id="1"></li>`,
402 `<li id="3"></li>`,
403 },
404 },
405 {
406 `<ol><li id=1><li id=2><li id=3><li id=4></ol>`,
407 `li:nth-last-child(-n+2)`,
408 []string{
409 `<li id="3"></li>`,
410 `<li id="4"></li>`,
411 },
412 },
413 {
414 `<ol><li id=1><li id=2><li id=3><li id=4></ol>`,
415 `li:nth-last-child(3n+1)`,
416 []string{
417 `<li id="1"></li>`,
418 `<li id="4"></li>`,
419 },
420 },
421 {
422 `<p>some text <span id="1">and a span</span><span id="2"> and another</span></p>`,
423 `span:first-child`,
424 []string{
425 `<span id="1">and a span</span>`,
426 },
427 },
428 {
429 `<span>a span</span> and some text`,
430 `span:last-child`,
431 []string{
432 `<span>a span</span>`,
433 },
434 },
435 {
436 `<address></address><p id=1><p id=2>`,
437 `p:nth-of-type(2)`,
438 []string{
439 `<p id="2"></p>`,
440 },
441 },
442 {
443 `<address></address><p id=1><p id=2></p><a>`,
444 `p:nth-last-of-type(2)`,
445 []string{
446 `<p id="1"></p>`,
447 },
448 },
449 {
450 `<address></address><p id=1><p id=2></p><a>`,
451 `p:last-of-type`,
452 []string{
453 `<p id="2"></p>`,
454 },
455 },
456 {
457 `<address></address><p id=1><p id=2></p><a>`,
458 `p:first-of-type`,
459 []string{
460 `<p id="1"></p>`,
461 },
462 },
463 {
464 `<div><p id="1"></p><a></a></div><div><p id="2"></p></div>`,
465 `p:only-child`,
466 []string{
467 `<p id="2"></p>`,
468 },
469 },
470 {
471 `<div><p id="1"></p><a></a></div><div><p id="2"></p><p id="3"></p></div>`,
472 `p:only-of-type`,
473 []string{
474 `<p id="1"></p>`,
475 },
476 },
477 {
478 `<p id="1"><!-- --><p id="2">Hello<p id="3"><span>`,
479 `:empty`,
480 []string{
481 `<head></head>`,
482 `<p id="1"><!-- --></p>`,
483 `<span></span>`,
484 },
485 },
486 {
487 `<div><p id="1"><table><tr><td><p id="2"></table></div><p id="3">`,
488 `div p`,
489 []string{
490 `<p id="1"><table><tbody><tr><td><p id="2"></p></td></tr></tbody></table></p>`,
491 `<p id="2"></p>`,
492 },
493 },
494 {
495 `<div><p id="1"><table><tr><td><p id="2"></table></div><p id="3">`,
496 `div table p`,
497 []string{
498 `<p id="2"></p>`,
499 },
500 },
501 {
502 `<div><p id="1"><div><p id="2"></div><table><tr><td><p id="3"></table></div>`,
503 `div > p`,
504 []string{
505 `<p id="1"></p>`,
506 `<p id="2"></p>`,
507 },
508 },
509 {
510 `<p id="1"><p id="2"></p><address></address><p id="3">`,
511 `p ~ p`,
512 []string{
513 `<p id="2"></p>`,
514 `<p id="3"></p>`,
515 },
516 },
517 {
518 `<p id="1"></p>
519 <!--comment-->
520 <p id="2"></p><address></address><p id="3">`,
521 `p + p`,
522 []string{
523 `<p id="2"></p>`,
524 },
525 },
526 {
527 `<ul><li></li><li></li></ul><p>`,
528 `li, p`,
529 []string{
530 "<li></li>",
531 "<li></li>",
532 "<p></p>",
533 },
534 },
535 {
536 `<p id="1"><p id="2"></p><address></address><p id="3">`,
537 `p +/*This is a comment*/ p`,
538 []string{
539 `<p id="2"></p>`,
540 },
541 },
542 {
543 `<p>Text block that <span>wraps inner text</span> and continues</p>`,
544 `p:contains("that wraps")`,
545 []string{
546 `<p>Text block that <span>wraps inner text</span> and continues</p>`,
547 },
548 },
549 {
550 `<p>Text block that <span>wraps inner text</span> and continues</p>`,
551 `p:containsOwn("that wraps")`,
552 []string{},
553 },
554 {
555 `<p>Text block that <span>wraps inner text</span> and continues</p>`,
556 `:containsOwn("inner")`,
557 []string{
558 `<span>wraps inner text</span>`,
559 },
560 },
561 {
562 `<p>Text block that <span>wraps inner text</span> and continues</p>`,
563 `p:containsOwn("block")`,
564 []string{
565 `<p>Text block that <span>wraps inner text</span> and continues</p>`,
566 },
567 },
568 {
569 `<div id="d1"><p id="p1"><span>text content</span></p></div><div id="d2"/>`,
570 `div:has(#p1)`,
571 []string{
572 `<div id="d1"><p id="p1"><span>text content</span></p></div>`,
573 },
574 },
575 {
576 `<div id="d1"><p id="p1"><span>contents 1</span></p></div>
577 <div id="d2"><p>contents <em>2</em></p></div>`,
578 `div:has(:containsOwn("2"))`,
579 []string{
580 `<div id="d2"><p>contents <em>2</em></p></div>`,
581 },
582 },
583 {
584 `<body><div id="d1"><p id="p1"><span>contents 1</span></p></div>
585 <div id="d2"><p id="p2">contents <em>2</em></p></div></body>`,
586 `body :has(:containsOwn("2"))`,
587 []string{
588 `<div id="d2"><p id="p2">contents <em>2</em></p></div>`,
589 `<p id="p2">contents <em>2</em></p>`,
590 },
591 },
592 {
593 `<body><div id="d1"><p id="p1"><span>contents 1</span></p></div>
594 <div id="d2"><p id="p2">contents <em>2</em></p></div></body>`,
595 `body :haschild(:containsOwn("2"))`,
596 []string{
597 `<p id="p2">contents <em>2</em></p>`,
598 },
599 },
600 {
601 `<p id="p1">0123456789</p><p id="p2">abcdef</p><p id="p3">0123ABCD</p>`,
602 `p:matches([\d])`,
603 []string{
604 `<p id="p1">0123456789</p>`,
605 `<p id="p3">0123ABCD</p>`,
606 },
607 },
608 {
609 `<p id="p1">0123456789</p><p id="p2">abcdef</p><p id="p3">0123ABCD</p>`,
610 `p:matches([a-z])`,
611 []string{
612 `<p id="p2">abcdef</p>`,
613 },
614 },
615 {
616 `<p id="p1">0123456789</p><p id="p2">abcdef</p><p id="p3">0123ABCD</p>`,
617 `p:matches([a-zA-Z])`,
618 []string{
619 `<p id="p2">abcdef</p>`,
620 `<p id="p3">0123ABCD</p>`,
621 },
622 },
623 {
624 `<p id="p1">0123456789</p><p id="p2">abcdef</p><p id="p3">0123ABCD</p>`,
625 `p:matches([^\d])`,
626 []string{
627 `<p id="p2">abcdef</p>`,
628 `<p id="p3">0123ABCD</p>`,
629 },
630 },
631 {
632 `<p id="p1">0123456789</p><p id="p2">abcdef</p><p id="p3">0123ABCD</p>`,
633 `p:matches(^(0|a))`,
634 []string{
635 `<p id="p1">0123456789</p>`,
636 `<p id="p2">abcdef</p>`,
637 `<p id="p3">0123ABCD</p>`,
638 },
639 },
640 {
641 `<p id="p1">0123456789</p><p id="p2">abcdef</p><p id="p3">0123ABCD</p>`,
642 `p:matches(^\d+$)`,
643 []string{
644 `<p id="p1">0123456789</p>`,
645 },
646 },
647 {
648 `<p id="p1">0123456789</p><p id="p2">abcdef</p><p id="p3">0123ABCD</p>`,
649 `p:not(:matches(^\d+$))`,
650 []string{
651 `<p id="p2">abcdef</p>`,
652 `<p id="p3">0123ABCD</p>`,
653 },
654 },
655 {
656 `<div><p id="p1">01234<em>567</em>89</p><div>`,
657 `div :matchesOwn(^\d+$)`,
658 []string{
659 `<p id="p1">01234<em>567</em>89</p>`,
660 `<em>567</em>`,
661 },
662 },
663 {
664 `<ul>
665 <li><a id="a1" href="http://www.google.com/finance"></a>
666 <li><a id="a2" href="http://finance.yahoo.com/"></a>
667 <li><a id="a2" href="http://finance.untrusted.com/"/>
668 <li><a id="a3" href="https://www.google.com/news"/>
669 <li><a id="a4" href="http://news.yahoo.com"/>
670 </ul>`,
671 `[href#=(fina)]:not([href#=(\/\/[^\/]+untrusted)])`,
672 []string{
673 `<a id="a1" href="http://www.google.com/finance"></a>`,
674 `<a id="a2" href="http://finance.yahoo.com/"></a>`,
675 },
676 },
677 {
678 `<ul>
679 <li><a id="a1" href="http://www.google.com/finance"/>
680 <li><a id="a2" href="http://finance.yahoo.com/"/>
681 <li><a id="a3" href="https://www.google.com/news"></a>
682 <li><a id="a4" href="http://news.yahoo.com"/>
683 </ul>`,
684 `[href#=(^https:\/\/[^\/]*\/?news)]`,
685 []string{
686 `<a id="a3" href="https://www.google.com/news"></a>`,
687 },
688 },
689 {
690 `<form>
691 <label>Username <input type="text" name="username" /></label>
692 <label>Password <input type="password" name="password" /></label>
693 <label>Country
694 <select name="country">
695 <option value="ca">Canada</option>
696 <option value="us">United States</option>
697 </select>
698 </label>
699 <label>Bio <textarea name="bio"></textarea></label>
700 <button>Sign up</button>
701 </form>`,
702 `:input`,
703 []string{
704 `<input type="text" name="username"/>`,
705 `<input type="password" name="password"/>`,
706 `<select name="country">
707 <option value="ca">Canada</option>
708 <option value="us">United States</option>
709 </select>`,
710 `<textarea name="bio"></textarea>`,
711 `<button>Sign up</button>`,
712 },
713 },
714 {
715 `<html><head></head><body></body></html>`,
716 ":root",
717 []string{
718 "<html><head></head><body></body></html>",
719 },
720 },
721 {
722 `<html><head></head><body></body></html>`,
723 "*:root",
724 []string{
725 "<html><head></head><body></body></html>",
726 },
727 },
728 {
729 `<html><head></head><body></body></html>`,
730 "html:nth-child(1)",
731 []string{
732 "<html><head></head><body></body></html>",
733 },
734 },
735 {
736 `<html><head></head><body></body></html>`,
737 "*:root:first-child",
738 []string{
739 `<html><head></head><body></body></html>`,
740 },
741 },
742 {
743 `<html><head></head><body></body></html>`,
744 "*:root:nth-child(1)",
745 []string{
746 `<html><head></head><body></body></html>`,
747 },
748 },
749 {
750 `<html><head></head><body><a href="http://www.foo.com"></a></body></html>`,
751 "a:not(:root)",
752 []string{
753 `<a href="http://www.foo.com"></a>`,
754 },
755 },
756 {
757 `<html><head></head><body><p></p><div></div><span></span><a></a><form></form></body></html>`,
758 "body > *:nth-child(3n+2)",
759 []string{
760 "<div></div>",
761 "<form></form>",
762 },
763 },
764 {
765 `<html><head></head><body><fieldset disabled><legend id="1"><input id="i1"/></legend><legend id="2"><input id="i2"/></legend></fieldset></body></html>`,
766 "input:disabled",
767 []string{
768 `<input id="i2"/>`,
769 },
770 },
771 {
772 `<html><head></head><body><fieldset disabled></fieldset></body></html>`,
773 ":disabled",
774 []string{
775 `<fieldset disabled=""></fieldset>`,
776 },
777 },
778 {
779 `<html><head></head><body><fieldset></fieldset></body></html>`,
780 ":enabled",
781 []string{
782 `<fieldset></fieldset>`,
783 },
784 },
785 {
786 `<div class=class1></div><div class=class2></div><div class=class3></div>`,
787 "div.class1, div.class2",
788 []string{
789 `<div class="class1"></div>`,
790 `<div class="class2"></div>`,
791 },
792 },
793 }
794
795 func setup(selector, testHTML string) (Selector, *html.Node, error) {
796 s, err := Compile(selector)
797 if err != nil {
798 return nil, nil, fmt.Errorf("error compiling %q: %s", selector, err)
799 }
800
801 doc, err := html.Parse(strings.NewReader(testHTML))
802 if err != nil {
803 return nil, nil, fmt.Errorf("error parsing %q: %s", testHTML, err)
804 }
805 return s, doc, nil
806 }
807
808 func TestSelectors(t *testing.T) {
809 for _, test := range selectorTests {
810 s, doc, err := setup(test.selector, test.HTML)
811 if err != nil {
812 t.Error(err)
813 continue
814 }
815
816 matches := s.MatchAll(doc)
817 if len(matches) != len(test.results) {
818 t.Errorf("selector %s wanted %d elements, got %d instead", test.selector, len(test.results), len(matches))
819 continue
820 }
821
822 for i, m := range matches {
823 got := nodeString(m)
824 if got != test.results[i] {
825 t.Errorf("selector %s wanted %s, got %s instead", test.selector, test.results[i], got)
826 }
827 }
828
829 firstMatch := s.MatchFirst(doc)
830 if len(test.results) == 0 {
831 if firstMatch != nil {
832 t.Errorf("MatchFirst: selector %s want nil, got %s", test.selector, nodeString(firstMatch))
833 }
834 } else {
835 got := nodeString(firstMatch)
836 if got != test.results[0] {
837 t.Errorf("MatchFirst: selector %s want %s, got %s", test.selector, test.results[0], got)
838 }
839 }
840 }
841 }
842
843 func setupMatcher(selector, testHTML string) (Matcher, *html.Node, error) {
844 s, err := ParseGroup(selector)
845 if err != nil {
846 return nil, nil, fmt.Errorf("error compiling %q: %s", selector, err)
847 }
848
849 doc, err := html.Parse(strings.NewReader(testHTML))
850 if err != nil {
851 return nil, nil, fmt.Errorf("error parsing %q: %s", testHTML, err)
852 }
853 return s, doc, nil
854 }
855
856 func TestMatchers(t *testing.T) {
857 for _, test := range selectorTests {
858 s, doc, err := setupMatcher(test.selector, test.HTML)
859 if err != nil {
860 t.Error(err)
861 continue
862 }
863
864 matches := QueryAll(doc, s)
865 if len(matches) != len(test.results) {
866 t.Errorf("selector %s wanted %d elements, got %d instead", test.selector, len(test.results), len(matches))
867 continue
868 }
869
870 for i, m := range matches {
871 got := nodeString(m)
872 if got != test.results[i] {
873 t.Errorf("selector %s wanted %s, got %s instead", test.selector, test.results[i], got)
874 }
875 }
876
877 firstMatch := Query(doc, s)
878 if len(test.results) == 0 {
879 if firstMatch != nil {
880 t.Errorf("Query: selector %s want nil, got %s", test.selector, nodeString(firstMatch))
881 }
882 } else {
883 got := nodeString(firstMatch)
884 if got != test.results[0] {
885 t.Errorf("Query: selector %s want %s, got %s", test.selector, test.results[0], got)
886 }
887 }
888
889 if !reflect.DeepEqual(matches, Selector(s.Match).Filter(matches)) {
890 t.Fatalf("inconsistent Filter result")
891 }
892 }
893 }
894
895 type testPseudo struct {
896 HTML, selector string
897 spec Specificity
898 pseudo string
899 }
900
901 var testsPseudo = []testPseudo{
902 {
903 HTML: `<html><body><ul><ol><li id="s12" class="red level"></li></ol></ul></body></html>`,
904 selector: "#s12:not(FOO)::before",
905 spec: Specificity{1, 0, 2},
906 pseudo: "before",
907 },
908 {
909 HTML: `<html><body><ul><ol><li id="s12" class="red level"></li></ol></ul></body></html>`,
910 selector: "#s12::first-line",
911 spec: Specificity{1, 0, 1},
912 pseudo: "first-line",
913 },
914 {
915 HTML: `<html><body><ul><ol><li id="s12" class="red level"></li></ol></ul></body></html>`,
916 selector: "ol > #s12:first-line",
917 spec: Specificity{1, 0, 2},
918 pseudo: "first-line",
919 },
920 {
921 HTML: `<html><body><ul><ol><li id="s12" class="red level"></li></ol></ul></body></html>`,
922 selector: "#s12:not(FOO)::after",
923 spec: Specificity{1, 0, 2},
924 pseudo: "after",
925 },
926 {
927 HTML: `<html><body><ul><ol><li id="s12" class="red level"></li></ol></ul></body></html>`,
928 selector: "LI.red.level:before",
929 spec: Specificity{0, 2, 2},
930 pseudo: "before",
931 },
932 }
933
934 func TestPseudoElement(t *testing.T) {
935 for _, test := range testsPseudo {
936 s, err := ParseWithPseudoElement(test.selector)
937 if err != nil {
938 t.Fatalf("error compiling %q: %s", test.selector, err)
939 }
940
941 if _, err = Parse(test.selector); err == nil {
942 t.Fatalf("selector %s with pseudo-element should not compile", test.selector)
943 }
944
945 doc, err := html.Parse(strings.NewReader(test.HTML))
946 if err != nil {
947 t.Fatalf("error parsing %q: %s", test.HTML, err)
948 }
949
950 body := doc.FirstChild.LastChild
951 testNode := body.FirstChild.FirstChild.LastChild
952 if !s.Match(testNode) {
953 t.Errorf("%s didn't match (html tree : \n %s) \n", test.selector, nodeString(doc))
954 continue
955 }
956 if s.Specificity() != test.spec {
957 t.Errorf("wrong specificity : expected %v got %v", test.spec, s.Specificity())
958 }
959 if s.PseudoElement() != test.pseudo {
960 t.Errorf("wrong pseudo-element : expected %s got %s", test.pseudo, s.PseudoElement())
961 }
962 }
963 }
964
965 type invalidSelector struct {
966 Name string `json:"name,omitempty"`
967 Selector string `json:"selector,omitempty"`
968 }
969
970 type validSelector struct {
971 invalidSelector
972 Expect []string `json:"expect,omitempty"`
973 Exclude []string `json:"exclude,omitempty"`
974 Level int `json:"level,omitempty"`
975 Xfail bool `json:"xfail,omitempty"`
976 }
977
978 func TestShakespeare(t *testing.T) {
979 doc := parseReference("test_resources/shakespeare.html")
980 body := doc.FirstChild.NextSibling.LastChild
981 assertCount := func(selector string, expected int) {
982 sel, err := ParseGroup(selector)
983 if err != nil {
984 t.Errorf("invalid selector %s", selector)
985 }
986 if l := len(Selector(sel.Match).MatchAll(body)); l != expected {
987 t.Errorf("%s -> expected %d, got %d", selector, expected, l)
988 }
989 }
990
991
992 assertCount("*", 246)
993 assertCount("div:only-child", 22)
994 assertCount("div:nth-child(even)", 106)
995 assertCount("div:nth-child(2n)", 106)
996 assertCount("div:nth-child(odd)", 137)
997 assertCount("div:nth-child(2n+1)", 137)
998 assertCount("div:nth-child(n)", 243)
999 assertCount("div:last-child", 53)
1000 assertCount("div:first-child", 51)
1001 assertCount("div > div", 242)
1002 assertCount("div + div", 190)
1003 assertCount("div ~ div", 190)
1004 assertCount("body", 1)
1005 assertCount("body div", 243)
1006 assertCount("div", 243)
1007 assertCount("div div", 242)
1008 assertCount("div div div", 241)
1009 assertCount("div, div, div", 243)
1010 assertCount("div, a, span", 243)
1011 assertCount(".dialog", 51)
1012 assertCount("div.dialog", 51)
1013 assertCount("div .dialog", 51)
1014 assertCount("div.character, div.dialog", 99)
1015 assertCount("div.direction.dialog", 0)
1016 assertCount("div.dialog.direction", 0)
1017 assertCount("div.dialog.scene", 1)
1018 assertCount("div.scene.scene", 1)
1019 assertCount("div.scene .scene", 0)
1020 assertCount("div.direction .dialog ", 0)
1021 assertCount("div .dialog .direction", 4)
1022 assertCount("div.dialog .dialog .direction", 4)
1023 assertCount("#speech5", 1)
1024 assertCount("div#speech5", 1)
1025 assertCount("div #speech5", 1)
1026 assertCount("div.scene div.dialog", 49)
1027 assertCount("div#scene1 div.dialog div", 142)
1028 assertCount("#scene1 #speech1", 1)
1029 assertCount("div[class]", 103)
1030 assertCount("div[class=dialog]", 50)
1031 assertCount("div[class^=dia]", 51)
1032 assertCount("div[class$=log]", 50)
1033 assertCount("div[class*=sce]", 1)
1034 assertCount("div[class|=dialog]", 50)
1035 assertCount("div[class~=dialog]", 51)
1036 }
1037
View as plain text