...

Source file src/github.com/andybalholm/cascadia/selector_test.go

Documentation: github.com/andybalholm/cascadia

     1  package cascadia
     2  
     3  import (
     4  	"bytes"
     5  	"encoding/json"
     6  	"fmt"
     7  	"io/ioutil"
     8  	"log"
     9  	"reflect"
    10  	"strings"
    11  	"testing"
    12  
    13  	"golang.org/x/net/html"
    14  )
    15  
    16  var validSelectors []validSelector
    17  
    18  func init() {
    19  	c, err := ioutil.ReadFile("test_resources/valid_selectors.json")
    20  	if err != nil {
    21  		log.Fatal(err)
    22  	}
    23  	if err = json.Unmarshal(c, &validSelectors); err != nil {
    24  		log.Fatal(err)
    25  	}
    26  }
    27  
    28  type selectorTest struct {
    29  	HTML, selector string
    30  	results        []string
    31  }
    32  
    33  func nodeString(n *html.Node) string {
    34  	buf := bytes.NewBufferString("")
    35  	if err := html.Render(buf, n); err != nil {
    36  		log.Fatal(err)
    37  	}
    38  	return buf.String()
    39  }
    40  
    41  var selectorTests = []selectorTest{
    42  	{
    43  		`<body><address>This address...</address></body>`,
    44  		"address",
    45  		[]string{
    46  			"<address>This address...</address>",
    47  		},
    48  	},
    49  	{
    50  		`<!-- comment --><html><head></head><body>text</body></html>`,
    51  		"*",
    52  		[]string{
    53  			"<html><head></head><body>text</body></html>",
    54  			"<head></head>",
    55  			"<body>text</body>",
    56  		},
    57  	},
    58  	{
    59  		`<html><head></head><body></body></html>`,
    60  		"*",
    61  		[]string{
    62  			"<html><head></head><body></body></html>",
    63  			"<head></head>",
    64  			"<body></body>",
    65  		},
    66  	},
    67  	{
    68  		`<p id="foo"><p id="bar">`,
    69  		"#foo",
    70  		[]string{
    71  			`<p id="foo"></p>`,
    72  		},
    73  	},
    74  	{
    75  		`<ul><li id="t1"><p id="t1">`,
    76  		"li#t1",
    77  		[]string{
    78  			`<li id="t1"><p id="t1"></p></li>`,
    79  		},
    80  	},
    81  	{
    82  		`<ol><li id="t4"><li id="t44">`,
    83  		"*#t4",
    84  		[]string{
    85  			`<li id="t4"></li>`,
    86  		},
    87  	},
    88  	{
    89  		`<ul><li class="t1"><li class="t2">`,
    90  		".t1",
    91  		[]string{
    92  			`<li class="t1"></li>`,
    93  		},
    94  	},
    95  	{
    96  		`<p class="t1 t2">`,
    97  		"p.t1",
    98  		[]string{
    99  			`<p class="t1 t2"></p>`,
   100  		},
   101  	},
   102  	{
   103  		`<div class="test">`,
   104  		"div.teST",
   105  		[]string{},
   106  	},
   107  	{
   108  		`<p class="t1 t2">`,
   109  		".t1.fail",
   110  		[]string{},
   111  	},
   112  	{
   113  		`<p class="t1 t2">`,
   114  		"p.t1.t2",
   115  		[]string{
   116  			`<p class="t1 t2"></p>`,
   117  		},
   118  	},
   119  	{
   120  		`<p class="--t1 --t2">`,
   121  		"p.--t1",
   122  		[]string{
   123  			`<p class="--t1 --t2"></p>`,
   124  		},
   125  	},
   126  	{
   127  		`<p class="--t1 --t2">`,
   128  		"p.--t1.--t2",
   129  		[]string{
   130  			`<p class="--t1 --t2"></p>`,
   131  		},
   132  	},
   133  	{
   134  		`<p><p title="title">`,
   135  		"p[title]",
   136  		[]string{
   137  			`<p title="title"></p>`,
   138  		},
   139  	},
   140  	{
   141  		`<div><div class="Red">`,
   142  		`div[class="red" i]`,
   143  		[]string{
   144  			`<div class="Red"></div>`,
   145  		},
   146  	},
   147  	{
   148  		`<address><address title="foo"><address title="bar">`,
   149  		`address[title="foo"]`,
   150  		[]string{
   151  			`<address title="foo"><address title="bar"></address></address>`,
   152  		},
   153  	},
   154  	{
   155  		`<address><address title="fooIgnoreCase"><address title="bar">`,
   156  		`address[title="FoOIgnoRECaSe" i]`,
   157  		[]string{
   158  			`<address title="fooIgnoreCase"><address title="bar"></address></address>`,
   159  		},
   160  	},
   161  	{
   162  		`<address><address title="foo"><address title="bar">`,
   163  		`address[title!="foo"]`,
   164  		[]string{
   165  			`<address><address title="foo"><address title="bar"></address></address></address>`,
   166  			`<address title="bar"></address>`,
   167  		},
   168  	},
   169  	{
   170  		`<address><address title="FOO"><address title="bar">`,
   171  		`address[title!="foo" i]`,
   172  		[]string{
   173  			`<address><address title="FOO"><address title="bar"></address></address></address>`,
   174  			`<address title="bar"></address>`,
   175  		},
   176  	},
   177  	{
   178  		`<p title="fooBARuFOO"><p title="varfoo">`,
   179  		`p[title!="FooBarUFoo" i]`,
   180  		[]string{
   181  			`<p title="varfoo"></p>`,
   182  		},
   183  	},
   184  	{
   185  		`<p title="tot foo bar">`,
   186  		`[    	title        ~=       foo    ]`,
   187  		[]string{
   188  			`<p title="tot foo bar"></p>`,
   189  		},
   190  	},
   191  	{
   192  		`<p title="tot foo bar">`,
   193  		`p[title~="FOO" i]`,
   194  		[]string{
   195  			`<p title="tot foo bar"></p>`,
   196  		},
   197  	},
   198  	{
   199  		`<p title="tot foo bar">`,
   200  		`p[title~=toofoo i]`,
   201  		[]string{},
   202  	},
   203  	{
   204  		`<p title="hello world">`,
   205  		`[title~="hello world"]`,
   206  		[]string{},
   207  	},
   208  	{
   209  		`<p title="HELLO world">`,
   210  		`[title~="hello" i]`,
   211  		[]string{
   212  			`<p title="HELLO world"></p>`,
   213  		},
   214  	},
   215  	{
   216  		`<p title="HELLO world">`,
   217  		`[title~="hello"          I]`,
   218  		[]string{
   219  			`<p title="HELLO world"></p>`,
   220  		},
   221  	},
   222  	{
   223  		`<p lang="en"><p lang="en-gb"><p lang="enough"><p lang="fr-en">`,
   224  		`[lang|="en"]`,
   225  		[]string{
   226  			`<p lang="en"></p>`,
   227  			`<p lang="en-gb"></p>`,
   228  		},
   229  	},
   230  	{
   231  		`<p lang="en"><p lang="En-gb"><p lang="enough"><p lang="fr-en">`,
   232  		`[lang|="EN" i]`,
   233  		[]string{
   234  			`<p lang="en"></p>`,
   235  			`<p lang="En-gb"></p>`,
   236  		},
   237  	},
   238  	{
   239  		`<p lang="en"><p lang="En-gb"><p lang="enough"><p lang="fr-en">`,
   240  		`[lang|="EN"     i]`,
   241  		[]string{
   242  			`<p lang="en"></p>`,
   243  			`<p lang="En-gb"></p>`,
   244  		},
   245  	},
   246  	{
   247  		`<p title="foobar"><p title="barfoo">`,
   248  		`[title^="foo"]`,
   249  		[]string{
   250  			`<p title="foobar"></p>`,
   251  		},
   252  	},
   253  	{
   254  		`<p title="FooBAR"><p title="barfoo">`,
   255  		`[title^="foo" i]`,
   256  		[]string{
   257  			`<p title="FooBAR"></p>`,
   258  		},
   259  	},
   260  	{
   261  		`<p title="foobar"><p title="barfoo">`,
   262  		`[title$="bar"]`,
   263  		[]string{
   264  			`<p title="foobar"></p>`,
   265  		},
   266  	},
   267  	{
   268  		`<p title="foobar"><p title="barfoo">`,
   269  		`[title$="BAR" i]`,
   270  		[]string{
   271  			`<p title="foobar"></p>`,
   272  		},
   273  	},
   274  	{
   275  		`<p title="foobarufoo">`,
   276  		`[title*="bar"]`,
   277  		[]string{
   278  			`<p title="foobarufoo"></p>`,
   279  		},
   280  	},
   281  	{
   282  		`<p title="foobarufoo">`,
   283  		`[title*="BaRu" i]`,
   284  		[]string{
   285  			`<p title="foobarufoo"></p>`,
   286  		},
   287  	},
   288  	{
   289  		`<p title="foobarufoo">`,
   290  		`[title*="BaRu" I]`,
   291  		[]string{
   292  			`<p title="foobarufoo"></p>`,
   293  		},
   294  	},
   295  	{
   296  		`<p class=" ">This text should be green.</p><p>This text should be green.</p>`,
   297  		`p[class$=" "]`,
   298  		[]string{},
   299  	},
   300  	{
   301  		`<p class="">This text should be green.</p><p>This text should be green.</p>`,
   302  		`p[class$=""]`,
   303  		[]string{},
   304  	},
   305  	{
   306  		`<p class=" ">This text should be green.</p><p>This text should be green.</p>`,
   307  		`p[class^=" "]`,
   308  		[]string{},
   309  	},
   310  	{
   311  		`<p class="">This text should be green.</p><p>This text should be green.</p>`,
   312  		`p[class^=""]`,
   313  		[]string{},
   314  	},
   315  	{
   316  		`<p class=" ">This text should be green.</p><p>This text should be green.</p>`,
   317  		`p[class*=" "]`,
   318  		[]string{},
   319  	},
   320  	{
   321  		`<p class="">This text should be green.</p><p>This text should be green.</p>`,
   322  		`p[class*=""]`,
   323  		[]string{},
   324  	},
   325  	{
   326  		`<input type="radio" name="Sex" value="F"/>`,
   327  		`input[name=Sex][value=F]`,
   328  		[]string{
   329  			`<input type="radio" name="Sex" value="F"/>`,
   330  		},
   331  	},
   332  	{
   333  		`<table border="0" cellpadding="0" cellspacing="0" style="table-layout: fixed; width: 100%; border: 0 dashed; border-color: #FFFFFF"><tr style="height:64px">aaa</tr></table>`,
   334  		`table[border="0"][cellpadding="0"][cellspacing="0"]`,
   335  		[]string{
   336  			`<table border="0" cellpadding="0" cellspacing="0" style="table-layout: fixed; width: 100%; border: 0 dashed; border-color: #FFFFFF"><tbody><tr style="height:64px"></tr></tbody></table>`,
   337  		},
   338  	},
   339  	{
   340  		`<p class="t1 t2">`,
   341  		".t1:not(.t2)",
   342  		[]string{},
   343  	},
   344  	{
   345  		`<div class="t3">`,
   346  		`div:not(.t1)`,
   347  		[]string{
   348  			`<div class="t3"></div>`,
   349  		},
   350  	},
   351  	{
   352  		`<div><div class="t2"><div class="t3">`,
   353  		`div:not([class="t2"])`,
   354  		[]string{
   355  			`<div><div class="t2"><div class="t3"></div></div></div>`,
   356  			`<div class="t3"></div>`,
   357  		},
   358  	},
   359  	{
   360  		`<ol><li id=1><li id=2><li id=3></ol>`,
   361  		`li:nth-child(odd)`,
   362  		[]string{
   363  			`<li id="1"></li>`,
   364  			`<li id="3"></li>`,
   365  		},
   366  	},
   367  	{
   368  		`<ol><li id=1><li id=2><li id=3></ol>`,
   369  		`li:nth-child(even)`,
   370  		[]string{
   371  			`<li id="2"></li>`,
   372  		},
   373  	},
   374  	{
   375  		`<ol><li id=1><li id=2><li id=3></ol>`,
   376  		`li:nth-child(-n+2)`,
   377  		[]string{
   378  			`<li id="1"></li>`,
   379  			`<li id="2"></li>`,
   380  		},
   381  	},
   382  	{
   383  		`<ol><li id=1><li id=2><li id=3></ol>`,
   384  		`li:nth-child(3n+1)`,
   385  		[]string{
   386  			`<li id="1"></li>`,
   387  		},
   388  	},
   389  	{
   390  		`<ol><li id=1><li id=2><li id=3><li id=4></ol>`,
   391  		`li:nth-last-child(odd)`,
   392  		[]string{
   393  			`<li id="2"></li>`,
   394  			`<li id="4"></li>`,
   395  		},
   396  	},
   397  	{
   398  		`<ol><li id=1><li id=2><li id=3><li id=4></ol>`,
   399  		`li:nth-last-child(even)`,
   400  		[]string{
   401  			`<li id="1"></li>`,
   402  			`<li id="3"></li>`,
   403  		},
   404  	},
   405  	{
   406  		`<ol><li id=1><li id=2><li id=3><li id=4></ol>`,
   407  		`li:nth-last-child(-n+2)`,
   408  		[]string{
   409  			`<li id="3"></li>`,
   410  			`<li id="4"></li>`,
   411  		},
   412  	},
   413  	{
   414  		`<ol><li id=1><li id=2><li id=3><li id=4></ol>`,
   415  		`li:nth-last-child(3n+1)`,
   416  		[]string{
   417  			`<li id="1"></li>`,
   418  			`<li id="4"></li>`,
   419  		},
   420  	},
   421  	{
   422  		`<p>some text <span id="1">and a span</span><span id="2"> and another</span></p>`,
   423  		`span:first-child`,
   424  		[]string{
   425  			`<span id="1">and a span</span>`,
   426  		},
   427  	},
   428  	{
   429  		`<span>a span</span> and some text`,
   430  		`span:last-child`,
   431  		[]string{
   432  			`<span>a span</span>`,
   433  		},
   434  	},
   435  	{
   436  		`<address></address><p id=1><p id=2>`,
   437  		`p:nth-of-type(2)`,
   438  		[]string{
   439  			`<p id="2"></p>`,
   440  		},
   441  	},
   442  	{
   443  		`<address></address><p id=1><p id=2></p><a>`,
   444  		`p:nth-last-of-type(2)`,
   445  		[]string{
   446  			`<p id="1"></p>`,
   447  		},
   448  	},
   449  	{
   450  		`<address></address><p id=1><p id=2></p><a>`,
   451  		`p:last-of-type`,
   452  		[]string{
   453  			`<p id="2"></p>`,
   454  		},
   455  	},
   456  	{
   457  		`<address></address><p id=1><p id=2></p><a>`,
   458  		`p:first-of-type`,
   459  		[]string{
   460  			`<p id="1"></p>`,
   461  		},
   462  	},
   463  	{
   464  		`<div><p id="1"></p><a></a></div><div><p id="2"></p></div>`,
   465  		`p:only-child`,
   466  		[]string{
   467  			`<p id="2"></p>`,
   468  		},
   469  	},
   470  	{
   471  		`<div><p id="1"></p><a></a></div><div><p id="2"></p><p id="3"></p></div>`,
   472  		`p:only-of-type`,
   473  		[]string{
   474  			`<p id="1"></p>`,
   475  		},
   476  	},
   477  	{
   478  		`<p id="1"><!-- --><p id="2">Hello<p id="3"><span>`,
   479  		`:empty`,
   480  		[]string{
   481  			`<head></head>`,
   482  			`<p id="1"><!-- --></p>`,
   483  			`<span></span>`,
   484  		},
   485  	},
   486  	{
   487  		`<div><p id="1"><table><tr><td><p id="2"></table></div><p id="3">`,
   488  		`div p`,
   489  		[]string{
   490  			`<p id="1"><table><tbody><tr><td><p id="2"></p></td></tr></tbody></table></p>`,
   491  			`<p id="2"></p>`,
   492  		},
   493  	},
   494  	{
   495  		`<div><p id="1"><table><tr><td><p id="2"></table></div><p id="3">`,
   496  		`div table p`,
   497  		[]string{
   498  			`<p id="2"></p>`,
   499  		},
   500  	},
   501  	{
   502  		`<div><p id="1"><div><p id="2"></div><table><tr><td><p id="3"></table></div>`,
   503  		`div > p`,
   504  		[]string{
   505  			`<p id="1"></p>`,
   506  			`<p id="2"></p>`,
   507  		},
   508  	},
   509  	{
   510  		`<p id="1"><p id="2"></p><address></address><p id="3">`,
   511  		`p ~ p`,
   512  		[]string{
   513  			`<p id="2"></p>`,
   514  			`<p id="3"></p>`,
   515  		},
   516  	},
   517  	{
   518  		`<p id="1"></p>
   519  		 <!--comment-->
   520  		 <p id="2"></p><address></address><p id="3">`,
   521  		`p + p`,
   522  		[]string{
   523  			`<p id="2"></p>`,
   524  		},
   525  	},
   526  	{
   527  		`<ul><li></li><li></li></ul><p>`,
   528  		`li, p`,
   529  		[]string{
   530  			"<li></li>",
   531  			"<li></li>",
   532  			"<p></p>",
   533  		},
   534  	},
   535  	{
   536  		`<p id="1"><p id="2"></p><address></address><p id="3">`,
   537  		`p +/*This is a comment*/ p`,
   538  		[]string{
   539  			`<p id="2"></p>`,
   540  		},
   541  	},
   542  	{
   543  		`<p>Text block that <span>wraps inner text</span> and continues</p>`,
   544  		`p:contains("that wraps")`,
   545  		[]string{
   546  			`<p>Text block that <span>wraps inner text</span> and continues</p>`,
   547  		},
   548  	},
   549  	{
   550  		`<p>Text block that <span>wraps inner text</span> and continues</p>`,
   551  		`p:containsOwn("that wraps")`,
   552  		[]string{},
   553  	},
   554  	{
   555  		`<p>Text block that <span>wraps inner text</span> and continues</p>`,
   556  		`:containsOwn("inner")`,
   557  		[]string{
   558  			`<span>wraps inner text</span>`,
   559  		},
   560  	},
   561  	{
   562  		`<p>Text block that <span>wraps inner text</span> and continues</p>`,
   563  		`p:containsOwn("block")`,
   564  		[]string{
   565  			`<p>Text block that <span>wraps inner text</span> and continues</p>`,
   566  		},
   567  	},
   568  	{
   569  		`<div id="d1"><p id="p1"><span>text content</span></p></div><div id="d2"/>`,
   570  		`div:has(#p1)`,
   571  		[]string{
   572  			`<div id="d1"><p id="p1"><span>text content</span></p></div>`,
   573  		},
   574  	},
   575  	{
   576  		`<div id="d1"><p id="p1"><span>contents 1</span></p></div>
   577  		<div id="d2"><p>contents <em>2</em></p></div>`,
   578  		`div:has(:containsOwn("2"))`,
   579  		[]string{
   580  			`<div id="d2"><p>contents <em>2</em></p></div>`,
   581  		},
   582  	},
   583  	{
   584  		`<body><div id="d1"><p id="p1"><span>contents 1</span></p></div>
   585  		<div id="d2"><p id="p2">contents <em>2</em></p></div></body>`,
   586  		`body :has(:containsOwn("2"))`,
   587  		[]string{
   588  			`<div id="d2"><p id="p2">contents <em>2</em></p></div>`,
   589  			`<p id="p2">contents <em>2</em></p>`,
   590  		},
   591  	},
   592  	{
   593  		`<body><div id="d1"><p id="p1"><span>contents 1</span></p></div>
   594  		<div id="d2"><p id="p2">contents <em>2</em></p></div></body>`,
   595  		`body :haschild(:containsOwn("2"))`,
   596  		[]string{
   597  			`<p id="p2">contents <em>2</em></p>`,
   598  		},
   599  	},
   600  	{
   601  		`<p id="p1">0123456789</p><p id="p2">abcdef</p><p id="p3">0123ABCD</p>`,
   602  		`p:matches([\d])`,
   603  		[]string{
   604  			`<p id="p1">0123456789</p>`,
   605  			`<p id="p3">0123ABCD</p>`,
   606  		},
   607  	},
   608  	{
   609  		`<p id="p1">0123456789</p><p id="p2">abcdef</p><p id="p3">0123ABCD</p>`,
   610  		`p:matches([a-z])`,
   611  		[]string{
   612  			`<p id="p2">abcdef</p>`,
   613  		},
   614  	},
   615  	{
   616  		`<p id="p1">0123456789</p><p id="p2">abcdef</p><p id="p3">0123ABCD</p>`,
   617  		`p:matches([a-zA-Z])`,
   618  		[]string{
   619  			`<p id="p2">abcdef</p>`,
   620  			`<p id="p3">0123ABCD</p>`,
   621  		},
   622  	},
   623  	{
   624  		`<p id="p1">0123456789</p><p id="p2">abcdef</p><p id="p3">0123ABCD</p>`,
   625  		`p:matches([^\d])`,
   626  		[]string{
   627  			`<p id="p2">abcdef</p>`,
   628  			`<p id="p3">0123ABCD</p>`,
   629  		},
   630  	},
   631  	{
   632  		`<p id="p1">0123456789</p><p id="p2">abcdef</p><p id="p3">0123ABCD</p>`,
   633  		`p:matches(^(0|a))`,
   634  		[]string{
   635  			`<p id="p1">0123456789</p>`,
   636  			`<p id="p2">abcdef</p>`,
   637  			`<p id="p3">0123ABCD</p>`,
   638  		},
   639  	},
   640  	{
   641  		`<p id="p1">0123456789</p><p id="p2">abcdef</p><p id="p3">0123ABCD</p>`,
   642  		`p:matches(^\d+$)`,
   643  		[]string{
   644  			`<p id="p1">0123456789</p>`,
   645  		},
   646  	},
   647  	{
   648  		`<p id="p1">0123456789</p><p id="p2">abcdef</p><p id="p3">0123ABCD</p>`,
   649  		`p:not(:matches(^\d+$))`,
   650  		[]string{
   651  			`<p id="p2">abcdef</p>`,
   652  			`<p id="p3">0123ABCD</p>`,
   653  		},
   654  	},
   655  	{
   656  		`<div><p id="p1">01234<em>567</em>89</p><div>`,
   657  		`div :matchesOwn(^\d+$)`,
   658  		[]string{
   659  			`<p id="p1">01234<em>567</em>89</p>`,
   660  			`<em>567</em>`,
   661  		},
   662  	},
   663  	{
   664  		`<ul>
   665  			<li><a id="a1" href="http://www.google.com/finance"></a>
   666  			<li><a id="a2" href="http://finance.yahoo.com/"></a>
   667  			<li><a id="a2" href="http://finance.untrusted.com/"/>
   668  			<li><a id="a3" href="https://www.google.com/news"/>
   669  			<li><a id="a4" href="http://news.yahoo.com"/>
   670  		</ul>`,
   671  		`[href#=(fina)]:not([href#=(\/\/[^\/]+untrusted)])`,
   672  		[]string{
   673  			`<a id="a1" href="http://www.google.com/finance"></a>`,
   674  			`<a id="a2" href="http://finance.yahoo.com/"></a>`,
   675  		},
   676  	},
   677  	{
   678  		`<ul>
   679  			<li><a id="a1" href="http://www.google.com/finance"/>
   680  			<li><a id="a2" href="http://finance.yahoo.com/"/>
   681  			<li><a id="a3" href="https://www.google.com/news"></a>
   682  			<li><a id="a4" href="http://news.yahoo.com"/>
   683  		</ul>`,
   684  		`[href#=(^https:\/\/[^\/]*\/?news)]`,
   685  		[]string{
   686  			`<a id="a3" href="https://www.google.com/news"></a>`,
   687  		},
   688  	},
   689  	{
   690  		`<form>
   691  			<label>Username <input type="text" name="username" /></label>
   692  			<label>Password <input type="password" name="password" /></label>
   693  			<label>Country
   694  				<select name="country">
   695  					<option value="ca">Canada</option>
   696  					<option value="us">United States</option>
   697  				</select>
   698  			</label>
   699  			<label>Bio <textarea name="bio"></textarea></label>
   700  			<button>Sign up</button>
   701  		</form>`,
   702  		`:input`,
   703  		[]string{
   704  			`<input type="text" name="username"/>`,
   705  			`<input type="password" name="password"/>`,
   706  			`<select name="country">
   707  					<option value="ca">Canada</option>
   708  					<option value="us">United States</option>
   709  				</select>`,
   710  			`<textarea name="bio"></textarea>`,
   711  			`<button>Sign up</button>`,
   712  		},
   713  	},
   714  	{
   715  		`<html><head></head><body></body></html>`,
   716  		":root",
   717  		[]string{
   718  			"<html><head></head><body></body></html>",
   719  		},
   720  	},
   721  	{
   722  		`<html><head></head><body></body></html>`,
   723  		"*:root",
   724  		[]string{
   725  			"<html><head></head><body></body></html>",
   726  		},
   727  	},
   728  	{
   729  		`<html><head></head><body></body></html>`,
   730  		"html:nth-child(1)",
   731  		[]string{
   732  			"<html><head></head><body></body></html>",
   733  		},
   734  	},
   735  	{
   736  		`<html><head></head><body></body></html>`,
   737  		"*:root:first-child",
   738  		[]string{
   739  			`<html><head></head><body></body></html>`,
   740  		},
   741  	},
   742  	{
   743  		`<html><head></head><body></body></html>`,
   744  		"*:root:nth-child(1)",
   745  		[]string{
   746  			`<html><head></head><body></body></html>`,
   747  		},
   748  	},
   749  	{
   750  		`<html><head></head><body><a href="http://www.foo.com"></a></body></html>`,
   751  		"a:not(:root)",
   752  		[]string{
   753  			`<a href="http://www.foo.com"></a>`,
   754  		},
   755  	},
   756  	{
   757  		`<html><head></head><body><p></p><div></div><span></span><a></a><form></form></body></html>`,
   758  		"body > *:nth-child(3n+2)",
   759  		[]string{
   760  			"<div></div>",
   761  			"<form></form>",
   762  		},
   763  	},
   764  	{
   765  		`<html><head></head><body><fieldset disabled><legend id="1"><input id="i1"/></legend><legend id="2"><input id="i2"/></legend></fieldset></body></html>`,
   766  		"input:disabled",
   767  		[]string{
   768  			`<input id="i2"/>`,
   769  		},
   770  	},
   771  	{
   772  		`<html><head></head><body><fieldset disabled></fieldset></body></html>`,
   773  		":disabled",
   774  		[]string{
   775  			`<fieldset disabled=""></fieldset>`,
   776  		},
   777  	},
   778  	{
   779  		`<html><head></head><body><fieldset></fieldset></body></html>`,
   780  		":enabled",
   781  		[]string{
   782  			`<fieldset></fieldset>`,
   783  		},
   784  	},
   785  	{
   786  		`<div class=class1></div><div class=class2></div><div class=class3></div>`,
   787  		"div.class1, div.class2",
   788  		[]string{
   789  			`<div class="class1"></div>`,
   790  			`<div class="class2"></div>`,
   791  		},
   792  	},
   793  }
   794  
   795  func setup(selector, testHTML string) (Selector, *html.Node, error) {
   796  	s, err := Compile(selector)
   797  	if err != nil {
   798  		return nil, nil, fmt.Errorf("error compiling %q: %s", selector, err)
   799  	}
   800  
   801  	doc, err := html.Parse(strings.NewReader(testHTML))
   802  	if err != nil {
   803  		return nil, nil, fmt.Errorf("error parsing %q: %s", testHTML, err)
   804  	}
   805  	return s, doc, nil
   806  }
   807  
   808  func TestSelectors(t *testing.T) {
   809  	for _, test := range selectorTests {
   810  		s, doc, err := setup(test.selector, test.HTML)
   811  		if err != nil {
   812  			t.Error(err)
   813  			continue
   814  		}
   815  
   816  		matches := s.MatchAll(doc)
   817  		if len(matches) != len(test.results) {
   818  			t.Errorf("selector %s wanted %d elements, got %d instead", test.selector, len(test.results), len(matches))
   819  			continue
   820  		}
   821  
   822  		for i, m := range matches {
   823  			got := nodeString(m)
   824  			if got != test.results[i] {
   825  				t.Errorf("selector %s wanted %s, got %s instead", test.selector, test.results[i], got)
   826  			}
   827  		}
   828  
   829  		firstMatch := s.MatchFirst(doc)
   830  		if len(test.results) == 0 {
   831  			if firstMatch != nil {
   832  				t.Errorf("MatchFirst: selector %s want nil, got %s", test.selector, nodeString(firstMatch))
   833  			}
   834  		} else {
   835  			got := nodeString(firstMatch)
   836  			if got != test.results[0] {
   837  				t.Errorf("MatchFirst: selector %s want %s, got %s", test.selector, test.results[0], got)
   838  			}
   839  		}
   840  	}
   841  }
   842  
   843  func setupMatcher(selector, testHTML string) (Matcher, *html.Node, error) {
   844  	s, err := ParseGroup(selector)
   845  	if err != nil {
   846  		return nil, nil, fmt.Errorf("error compiling %q: %s", selector, err)
   847  	}
   848  
   849  	doc, err := html.Parse(strings.NewReader(testHTML))
   850  	if err != nil {
   851  		return nil, nil, fmt.Errorf("error parsing %q: %s", testHTML, err)
   852  	}
   853  	return s, doc, nil
   854  }
   855  
   856  func TestMatchers(t *testing.T) {
   857  	for _, test := range selectorTests {
   858  		s, doc, err := setupMatcher(test.selector, test.HTML)
   859  		if err != nil {
   860  			t.Error(err)
   861  			continue
   862  		}
   863  
   864  		matches := QueryAll(doc, s)
   865  		if len(matches) != len(test.results) {
   866  			t.Errorf("selector %s wanted %d elements, got %d instead", test.selector, len(test.results), len(matches))
   867  			continue
   868  		}
   869  
   870  		for i, m := range matches {
   871  			got := nodeString(m)
   872  			if got != test.results[i] {
   873  				t.Errorf("selector %s wanted %s, got %s instead", test.selector, test.results[i], got)
   874  			}
   875  		}
   876  
   877  		firstMatch := Query(doc, s)
   878  		if len(test.results) == 0 {
   879  			if firstMatch != nil {
   880  				t.Errorf("Query: selector %s want nil, got %s", test.selector, nodeString(firstMatch))
   881  			}
   882  		} else {
   883  			got := nodeString(firstMatch)
   884  			if got != test.results[0] {
   885  				t.Errorf("Query: selector %s want %s, got %s", test.selector, test.results[0], got)
   886  			}
   887  		}
   888  
   889  		if !reflect.DeepEqual(matches, Selector(s.Match).Filter(matches)) {
   890  			t.Fatalf("inconsistent Filter result")
   891  		}
   892  	}
   893  }
   894  
   895  type testPseudo struct {
   896  	HTML, selector string
   897  	spec           Specificity
   898  	pseudo         string
   899  }
   900  
   901  var testsPseudo = []testPseudo{
   902  	{
   903  		HTML:     `<html><body><ul><ol><li id="s12" class="red level"></li></ol></ul></body></html>`,
   904  		selector: "#s12:not(FOO)::before",
   905  		spec:     Specificity{1, 0, 2},
   906  		pseudo:   "before",
   907  	},
   908  	{
   909  		HTML:     `<html><body><ul><ol><li id="s12" class="red level"></li></ol></ul></body></html>`,
   910  		selector: "#s12::first-line",
   911  		spec:     Specificity{1, 0, 1},
   912  		pseudo:   "first-line",
   913  	},
   914  	{
   915  		HTML:     `<html><body><ul><ol><li id="s12" class="red level"></li></ol></ul></body></html>`,
   916  		selector: "ol > #s12:first-line",
   917  		spec:     Specificity{1, 0, 2},
   918  		pseudo:   "first-line",
   919  	},
   920  	{
   921  		HTML:     `<html><body><ul><ol><li id="s12" class="red level"></li></ol></ul></body></html>`,
   922  		selector: "#s12:not(FOO)::after",
   923  		spec:     Specificity{1, 0, 2},
   924  		pseudo:   "after",
   925  	},
   926  	{
   927  		HTML:     `<html><body><ul><ol><li id="s12" class="red level"></li></ol></ul></body></html>`,
   928  		selector: "LI.red.level:before",
   929  		spec:     Specificity{0, 2, 2},
   930  		pseudo:   "before",
   931  	},
   932  }
   933  
   934  func TestPseudoElement(t *testing.T) {
   935  	for _, test := range testsPseudo {
   936  		s, err := ParseWithPseudoElement(test.selector)
   937  		if err != nil {
   938  			t.Fatalf("error compiling %q: %s", test.selector, err)
   939  		}
   940  
   941  		if _, err = Parse(test.selector); err == nil {
   942  			t.Fatalf("selector %s with pseudo-element should not compile", test.selector)
   943  		}
   944  
   945  		doc, err := html.Parse(strings.NewReader(test.HTML))
   946  		if err != nil {
   947  			t.Fatalf("error parsing %q: %s", test.HTML, err)
   948  		}
   949  
   950  		body := doc.FirstChild.LastChild
   951  		testNode := body.FirstChild.FirstChild.LastChild
   952  		if !s.Match(testNode) {
   953  			t.Errorf("%s didn't match (html tree : \n %s) \n", test.selector, nodeString(doc))
   954  			continue
   955  		}
   956  		if s.Specificity() != test.spec {
   957  			t.Errorf("wrong specificity : expected %v got %v", test.spec, s.Specificity())
   958  		}
   959  		if s.PseudoElement() != test.pseudo {
   960  			t.Errorf("wrong pseudo-element : expected %s got %s", test.pseudo, s.PseudoElement())
   961  		}
   962  	}
   963  }
   964  
   965  type invalidSelector struct {
   966  	Name     string `json:"name,omitempty"`
   967  	Selector string `json:"selector,omitempty"`
   968  }
   969  
   970  type validSelector struct {
   971  	invalidSelector
   972  	Expect  []string `json:"expect,omitempty"`
   973  	Exclude []string `json:"exclude,omitempty"`
   974  	Level   int      `json:"level,omitempty"`
   975  	Xfail   bool     `json:"xfail,omitempty"`
   976  }
   977  
   978  func TestShakespeare(t *testing.T) {
   979  	doc := parseReference("test_resources/shakespeare.html")
   980  	body := doc.FirstChild.NextSibling.LastChild
   981  	assertCount := func(selector string, expected int) {
   982  		sel, err := ParseGroup(selector)
   983  		if err != nil {
   984  			t.Errorf("invalid selector %s", selector)
   985  		}
   986  		if l := len(Selector(sel.Match).MatchAll(body)); l != expected {
   987  			t.Errorf("%s -> expected %d, got %d", selector, expected, l)
   988  		}
   989  	}
   990  
   991  	// Data borrowed from https://github.com/Kozea/cssselect2
   992  	assertCount("*", 246)
   993  	assertCount("div:only-child", 22) // ?
   994  	assertCount("div:nth-child(even)", 106)
   995  	assertCount("div:nth-child(2n)", 106)
   996  	assertCount("div:nth-child(odd)", 137)
   997  	assertCount("div:nth-child(2n+1)", 137)
   998  	assertCount("div:nth-child(n)", 243)
   999  	assertCount("div:last-child", 53)
  1000  	assertCount("div:first-child", 51)
  1001  	assertCount("div > div", 242)
  1002  	assertCount("div + div", 190)
  1003  	assertCount("div ~ div", 190)
  1004  	assertCount("body", 1)
  1005  	assertCount("body div", 243)
  1006  	assertCount("div", 243)
  1007  	assertCount("div div", 242)
  1008  	assertCount("div div div", 241)
  1009  	assertCount("div, div, div", 243)
  1010  	assertCount("div, a, span", 243)
  1011  	assertCount(".dialog", 51)
  1012  	assertCount("div.dialog", 51)
  1013  	assertCount("div .dialog", 51)
  1014  	assertCount("div.character, div.dialog", 99)
  1015  	assertCount("div.direction.dialog", 0)
  1016  	assertCount("div.dialog.direction", 0)
  1017  	assertCount("div.dialog.scene", 1)
  1018  	assertCount("div.scene.scene", 1)
  1019  	assertCount("div.scene .scene", 0)
  1020  	assertCount("div.direction .dialog ", 0)
  1021  	assertCount("div .dialog .direction", 4)
  1022  	assertCount("div.dialog .dialog .direction", 4)
  1023  	assertCount("#speech5", 1)
  1024  	assertCount("div#speech5", 1)
  1025  	assertCount("div #speech5", 1)
  1026  	assertCount("div.scene div.dialog", 49)
  1027  	assertCount("div#scene1 div.dialog div", 142)
  1028  	assertCount("#scene1 #speech1", 1)
  1029  	assertCount("div[class]", 103)
  1030  	assertCount("div[class=dialog]", 50)
  1031  	assertCount("div[class^=dia]", 51)
  1032  	assertCount("div[class$=log]", 50)
  1033  	assertCount("div[class*=sce]", 1)
  1034  	assertCount("div[class|=dialog]", 50)
  1035  	assertCount("div[class~=dialog]", 51)
  1036  }
  1037  

View as plain text