1 package utfbom
2
3 import (
4 "io"
5 "io/ioutil"
6 "reflect"
7 "testing"
8 "testing/iotest"
9 "time"
10 )
11
12 var testCases = []struct {
13 name string
14 input []byte
15 inputError error
16 encoding Encoding
17 output []byte
18 }{
19 {"1", []byte{}, nil, Unknown, []byte{}},
20 {"2", []byte("hello"), nil, Unknown, []byte("hello")},
21 {"3", []byte("\xEF\xBB\xBF"), nil, UTF8, []byte{}},
22 {"4", []byte("\xEF\xBB\xBFhello"), nil, UTF8, []byte("hello")},
23 {"5", []byte("\xFE\xFF"), nil, UTF16BigEndian, []byte{}},
24 {"6", []byte("\xFF\xFE"), nil, UTF16LittleEndian, []byte{}},
25 {"7", []byte("\x00\x00\xFE\xFF"), nil, UTF32BigEndian, []byte{}},
26 {"8", []byte("\xFF\xFE\x00\x00"), nil, UTF32LittleEndian, []byte{}},
27 {"5", []byte("\xFE\xFF\x00\x68\x00\x65\x00\x6C\x00\x6C\x00\x6F"), nil,
28 UTF16BigEndian, []byte{0x00, 0x68, 0x00, 0x65, 0x00, 0x6C, 0x00, 0x6C, 0x00, 0x6F}},
29 {"6", []byte("\xFF\xFE\x68\x00\x65\x00\x6C\x00\x6C\x00\x6F\x00"), nil,
30 UTF16LittleEndian, []byte{0x68, 0x00, 0x65, 0x00, 0x6C, 0x00, 0x6C, 0x00, 0x6F, 0x00}},
31 {"7", []byte("\x00\x00\xFE\xFF\x00\x00\x00\x68\x00\x00\x00\x65\x00\x00\x00\x6C\x00\x00\x00\x6C\x00\x00\x00\x6F"), nil,
32 UTF32BigEndian,
33 []byte{0x00, 0x00, 0x00, 0x68, 0x00, 0x00, 0x00, 0x65, 0x00, 0x00, 0x00, 0x6C, 0x00, 0x00, 0x00, 0x6C, 0x00, 0x00, 0x00, 0x6F}},
34 {"8", []byte("\xFF\xFE\x00\x00\x68\x00\x00\x00\x65\x00\x00\x00\x6C\x00\x00\x00\x6C\x00\x00\x00\x6F\x00\x00\x00"), nil,
35 UTF32LittleEndian,
36 []byte{0x68, 0x00, 0x00, 0x00, 0x65, 0x00, 0x00, 0x00, 0x6C, 0x00, 0x00, 0x00, 0x6C, 0x00, 0x00, 0x00, 0x6F, 0x00, 0x00, 0x00}},
37 {"9", []byte("\xEF"), nil, Unknown, []byte("\xEF")},
38 {"10", []byte("\xEF\xBB"), nil, Unknown, []byte("\xEF\xBB")},
39 {"11", []byte("\xEF\xBB\xBF"), io.ErrClosedPipe, UTF8, []byte{}},
40 {"12", []byte("\xFE\xFF"), io.ErrClosedPipe, Unknown, []byte("\xFE\xFF")},
41 {"13", []byte("\xFE"), io.ErrClosedPipe, Unknown, []byte("\xFE")},
42 {"14", []byte("\xFF\xFE"), io.ErrClosedPipe, Unknown, []byte("\xFF\xFE")},
43 {"15", []byte("\x00\x00\xFE\xFF"), io.ErrClosedPipe, UTF32BigEndian, []byte{}},
44 {"16", []byte("\x00\x00\xFE"), io.ErrClosedPipe, Unknown, []byte{0x00, 0x00, 0xFE}},
45 {"17", []byte("\x00\x00"), io.ErrClosedPipe, Unknown, []byte{0x00, 0x00}},
46 {"18", []byte("\x00"), io.ErrClosedPipe, Unknown, []byte{0x00}},
47 {"19", []byte("\xFF\xFE\x00\x00"), io.ErrClosedPipe, UTF32LittleEndian, []byte{}},
48 {"20", []byte("\xFF\xFE\x00"), io.ErrClosedPipe, Unknown, []byte{0xFF, 0xFE, 0x00}},
49 {"21", []byte("\xFF\xFE"), io.ErrClosedPipe, Unknown, []byte{0xFF, 0xFE}},
50 {"22", []byte("\xFF"), io.ErrClosedPipe, Unknown, []byte{0xFF}},
51 {"23", []byte("\x68\x65"), nil, Unknown, []byte{0x68, 0x65}},
52 }
53
54 type sliceReader struct {
55 input []byte
56 inputError error
57 }
58
59 func (r *sliceReader) Read(p []byte) (n int, err error) {
60 if len(p) == 0 {
61 return
62 }
63
64 if err = r.getError(); err != nil {
65 return
66 }
67
68 n = copy(p, r.input)
69 r.input = r.input[n:]
70 err = r.getError()
71 return
72 }
73
74 func (r *sliceReader) getError() (err error) {
75 if len(r.input) == 0 {
76 if r.inputError == nil {
77 err = io.EOF
78 } else {
79 err = r.inputError
80 }
81 }
82 return
83 }
84
85 var readMakers = []struct {
86 name string
87 fn func(io.Reader) io.Reader
88 }{
89 {"full", func(r io.Reader) io.Reader { return r }},
90 {"byte", iotest.OneByteReader},
91 }
92
93 func TestSkip(t *testing.T) {
94 for _, tc := range testCases {
95 for _, readMaker := range readMakers {
96 r := readMaker.fn(&sliceReader{tc.input, tc.inputError})
97
98 sr, enc := Skip(r)
99 if enc != tc.encoding {
100 t.Fatalf("test %v reader=%s: expected encoding %v, but got %v", tc.name, readMaker.name, tc.encoding, enc)
101 }
102
103 output, err := ioutil.ReadAll(sr)
104 if !reflect.DeepEqual(output, tc.output) {
105 t.Fatalf("test %v reader=%s: expected to read %+#v, but got %+#v", tc.name, readMaker.name, tc.output, output)
106 }
107 if err != tc.inputError {
108 t.Fatalf("test %v reader=%s: expected to get %+#v error, but got %+#v", tc.name, readMaker.name, tc.inputError, err)
109 }
110 }
111 }
112 }
113
114 func TestSkipSkip(t *testing.T) {
115 for _, tc := range testCases {
116 for _, readMaker := range readMakers {
117 r := readMaker.fn(&sliceReader{tc.input, tc.inputError})
118
119 sr0, _ := Skip(r)
120 sr, enc := Skip(sr0)
121 if enc != Unknown {
122 t.Fatalf("test %v reader=%s: expected encoding %v, but got %v", tc.name, readMaker.name, Unknown, enc)
123 }
124
125 output, err := ioutil.ReadAll(sr)
126 if !reflect.DeepEqual(output, tc.output) {
127 t.Fatalf("test %v reader=%s: expected to read %+#v, but got %+#v", tc.name, readMaker.name, tc.output, output)
128 }
129 if err != tc.inputError {
130 t.Fatalf("test %v reader=%s: expected to get %+#v error, but got %+#v", tc.name, readMaker.name, tc.inputError, err)
131 }
132 }
133 }
134 }
135
136 func TestSkipOnly(t *testing.T) {
137 for _, tc := range testCases {
138 for _, readMaker := range readMakers {
139 r := readMaker.fn(&sliceReader{tc.input, tc.inputError})
140
141 sr := SkipOnly(r)
142
143 output, err := ioutil.ReadAll(sr)
144 if !reflect.DeepEqual(output, tc.output) {
145 t.Fatalf("test %v reader=%s: expected to read %+#v, but got %+#v", tc.name, readMaker.name, tc.output, output)
146 }
147 if err != tc.inputError {
148 t.Fatalf("test %v reader=%s: expected to get %+#v error, but got %+#v", tc.name, readMaker.name, tc.inputError, err)
149 }
150 }
151 }
152 }
153
154 type zeroReader struct{}
155
156 func (zeroReader) Read(p []byte) (int, error) {
157 return 0, nil
158 }
159
160 type readerEncoding struct {
161 Rd *Reader
162 Enc Encoding
163 }
164
165 func TestSkipZeroReader(t *testing.T) {
166 var z zeroReader
167
168 c := make(chan readerEncoding)
169 go func() {
170 r, enc := Skip(z)
171 c <- readerEncoding{r, enc}
172 }()
173
174 select {
175 case re := <-c:
176 if re.Enc != Unknown {
177 t.Error("Unknown encoding expected")
178 } else {
179 var b [1]byte
180 n, err := re.Rd.Read(b[:])
181 if n != 0 {
182 t.Error("unexpected bytes count:", n)
183 }
184 if err != io.ErrNoProgress {
185 t.Error("unexpected error:", err)
186 }
187 }
188 case <-time.After(time.Second):
189 t.Error("test timed out (endless loop in Skip?)")
190 }
191 }
192
193 func TestSkipOnlyZeroReader(t *testing.T) {
194 var z zeroReader
195
196 c := make(chan *Reader)
197 go func() {
198 r := SkipOnly(z)
199 c <- r
200 }()
201
202 select {
203 case r := <-c:
204 var b [1]byte
205 n, err := r.Read(b[:])
206 if n != 0 {
207 t.Error("unexpected bytes count:", n)
208 }
209 if err != io.ErrNoProgress {
210 t.Error("unexpected error:", err)
211 }
212 case <-time.After(time.Second):
213 t.Error("test timed out (endless loop in Skip?)")
214 }
215 }
216
217 func TestReader_ReadEmpty(t *testing.T) {
218 for _, tc := range testCases {
219 for _, readMaker := range readMakers {
220 r := readMaker.fn(&sliceReader{tc.input, tc.inputError})
221
222 sr := SkipOnly(r)
223
224 n, err := sr.Read(nil)
225 if n != 0 {
226 t.Fatalf("test %v reader=%s: expected to read zero bytes, but got %v", tc.name, readMaker.name, n)
227 }
228 if err != nil {
229 t.Fatalf("test %v reader=%s: expected to get <nil> error, but got %+#v", tc.name, readMaker.name, err)
230 }
231 }
232 }
233 }
234
235 func TestEncoding_String(t *testing.T) {
236 for e := Unknown; e <= UTF32LittleEndian; e++ {
237 s := e.String()
238 if s == "" {
239 t.Errorf("no string for %#v", e)
240 }
241 }
242 s := Encoding(999).String()
243 if s != "Unknown" {
244 t.Errorf("wrong string '%s' for invalid encoding", s)
245 }
246 }
247
View as plain text