1 package charset
2
3 import (
4 "testing"
5 )
6
7 const xmlDoc = `<?xml version="1.0" encoding="UTF-8"?>
8 <note>
9 <to>Tove</to>
10 <from>Jani</from>
11 <heading>Reminder</heading>
12 <body>Don't forget me this weekend!</body>
13 </note>`
14 const htmlDoc = `<!DOCTYPE html>
15 <html>
16 <head><!--[if lt IE 9]><script language="javascript" type="text/javascript" src="//html5shim.googlecode.com/svn/trunk/html5.js"></script><![endif]-->
17 <meta charset="UTF-8"><style>/*
18 </style>
19 <link rel="stylesheet" href="css/animation.css"><!--[if IE 7]><link rel="stylesheet" href="css/" + font.fontname + "-ie7.css"><![endif]-->
20 <script>
21 </script>
22 </head>
23 <body>
24 <div class="container footer">さ</div>
25 </body>
26 </html>`
27 const htmlDocWithIncorrectCharset = `<!DOCTYPE html>
28 <!--
29 Some comment
30
31 -->
32 <html dir="ltr" mozdisallowselectionprint>
33 <head>
34 <meta charset="ISO-8859-16">
35 <meta name="viewport" content="width=device-width, initial-scale=1, maximum-scale=1">
36 <meta name="some name" content="notranslate">
37 <title>test</title>
38
39
40 <link rel="stylesheet" href="html.utf8bom.css">
41
42
43
44 </head>
45
46 <body tabindex="1">
47 <div id="printContainer"></div>
48 </body>
49 </html>`
50
51 func TestFromXML(t *testing.T) {
52 charset := FromXML([]byte(xmlDoc))
53 if charset != "utf-8" {
54 t.Errorf("expected: utf-8; got: %s", charset)
55 }
56 }
57
58 func TestFromHTML(t *testing.T) {
59 charset := FromHTML([]byte(htmlDoc))
60 if charset != "utf-8" {
61 t.Errorf("expected: utf-8; got: %s", charset)
62 }
63 }
64
65 func TestFromHTMLWithBOM(t *testing.T) {
66 charset := FromHTML(append([]byte{0xEF, 0xBB, 0xBF}, []byte(htmlDocWithIncorrectCharset)...))
67 if charset != "utf-8" {
68 t.Errorf("expected: utf-8; got: %s", charset)
69 }
70 }
71
72 func TestFromPlain(t *testing.T) {
73 tcases := []struct {
74 raw []byte
75 charset string
76 }{
77 {[]byte{0xe6, 0xf8, 0xe5, 0x85, 0x85}, "windows-1252"},
78 {[]byte{0xe6, 0xf8, 0xe5}, "iso-8859-1"},
79 {[]byte("æøå"), "utf-8"},
80 {[]byte{}, ""},
81 }
82 for _, tc := range tcases {
83 if cs := FromPlain(tc.raw); cs != tc.charset {
84 t.Errorf("in: %v; expected: %s; got: %s", tc.raw, tc.charset, cs)
85 }
86 }
87 }
88
89 func FuzzFromPlain(f *testing.F) {
90 samples := [][]byte{
91 []byte{0xe6, 0xf8, 0xe5, 0x85, 0x85},
92 []byte{0xe6, 0xf8, 0xe5},
93 []byte("æøå"),
94 }
95
96 for _, s := range samples {
97 f.Add(s)
98 }
99
100 f.Fuzz(func(t *testing.T, d []byte) {
101 if charset := FromPlain(d); charset == "" {
102 t.Skip()
103 }
104 })
105 }
106 func FuzzFromHTML(f *testing.F) {
107 samples := []string{
108 `<meta charset="c">`,
109 `<meta charset="щ">`,
110 `<meta http-equiv="content-type" content="a/b; charset=c">`,
111 `<meta http-equiv="content-type" content="a/b; charset=щ">`,
112 `<f 1=2 /><meta charset="c">`,
113 `<f a=2><meta http-equiv="content-type" content="a/b; charset=c">`,
114 `<f 1=2 /><meta b="b" charset="c">`,
115 `<f a=2><meta b="b" http-equiv="content-type" content="a/b; charset=c">`,
116 }
117
118 for _, s := range samples {
119 f.Add([]byte(s))
120 }
121
122 f.Fuzz(func(t *testing.T, d []byte) {
123 if charset := FromHTML(d); charset == "" {
124 t.Skip()
125 }
126 })
127 }
128 func FuzzFromXML(f *testing.F) {
129 samples := []string{
130 `<?xml version="1.0" encoding="c"?>`,
131 }
132
133 for _, s := range samples {
134 f.Add([]byte(s))
135 }
136
137 f.Fuzz(func(t *testing.T, d []byte) {
138 if charset := FromXML(d); charset == "" {
139 t.Skip()
140 }
141 })
142 }
143
View as plain text