1 package mimetype
2
3 import (
4 "bytes"
5 "fmt"
6 "io"
7 "math"
8 "math/rand"
9 "mime"
10 "os"
11 "path/filepath"
12 "strings"
13 "sync"
14 "testing"
15 )
16
17 const testDataDir = "testdata"
18
19
20 var files = map[string]string{
21 "3g2.3g2": "video/3gpp2",
22 "3gp.3gp": "video/3gpp",
23 "3mf.3mf": "application/vnd.ms-package.3dmanufacturing-3dmodel+xml",
24 "7z.7z": "application/x-7z-compressed",
25 "a.a": "application/x-archive",
26 "aac.aac": "audio/aac",
27 "aaf.aaf": "application/octet-stream",
28 "accdb.accdb": "application/x-msaccess",
29 "aiff.aiff": "audio/aiff",
30 "amf.amf": "application/x-amf",
31 "amr.amr": "audio/amr",
32 "ape.ape": "audio/ape",
33 "apng.png": "image/vnd.mozilla.apng",
34 "asf.asf": "video/x-ms-asf",
35 "atom.atom": "application/atom+xml",
36 "au.au": "audio/basic",
37 "avi.avi": "video/x-msvideo",
38 "avif.avif": "image/avif",
39 "avifsequence.avif": "image/avif",
40 "bmp.bmp": "image/bmp",
41 "bpg.bpg": "image/bpg",
42 "bz2.bz2": "application/x-bzip2",
43 "cab.cab": "application/vnd.ms-cab-compressed",
44 "cab.is.cab": "application/x-installshield",
45 "class.class": "application/x-java-applet",
46 "crx.crx": "application/x-chrome-extension",
47 "csv.csv": "text/csv",
48 "cpio.cpio": "application/x-cpio",
49 "dae.dae": "model/vnd.collada+xml",
50 "dbf.dbf": "application/x-dbf",
51 "dcm.dcm": "application/dicom",
52 "deb.deb": "application/vnd.debian.binary-package",
53 "djvu.djvu": "image/vnd.djvu",
54 "doc.doc": "application/msword",
55 "docx.1.docx": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
56 "docx.docx": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
57 "drpm.rpm": "application/x-rpm",
58 "dwg.1.dwg": "image/vnd.dwg",
59 "dwg.dwg": "image/vnd.dwg",
60 "eot.eot": "application/vnd.ms-fontobject",
61 "epub.epub": "application/epub+zip",
62 "exe.exe": "application/vnd.microsoft.portable-executable",
63 "fdf.fdf": "application/vnd.fdf",
64 "fits.fits": "application/fits",
65 "flac.flac": "audio/flac",
66 "flv.flv": "video/x-flv",
67 "gbr.gbr": "image/x-gimp-gbr",
68 "geojson.1.geojson": "application/geo+json",
69 "geojson.geojson": "application/geo+json",
70 "gif.gif": "image/gif",
71 "glb.glb": "model/gltf-binary",
72 "gml.gml": "application/gml+xml",
73 "gpx.gpx": "application/gpx+xml",
74 "gz.gz": "application/gzip",
75 "har.har": "application/json",
76 "hdr.hdr": "image/vnd.radiance",
77 "heic.single.heic": "image/heic",
78 "heif.heif": "image/heif",
79 "html.html": "text/html; charset=utf-8",
80 "html.iso88591.html": "text/html; charset=iso-8859-1",
81 "html.svg.html": "text/html; charset=utf-8",
82 "html.usascii.html": "text/html; charset=us-ascii",
83 "html.utf8.html": "text/html; charset=utf-8",
84 "html.withbr.html": "text/html; charset=utf-8",
85 "ico.ico": "image/x-icon",
86 "ics.dos.ics": "text/calendar",
87 "ics.ics": "text/calendar",
88 "iso88591.txt": "text/plain; charset=iso-8859-1",
89 "jar.jar": "application/jar",
90 "jp2.jp2": "image/jp2",
91 "jpf.jpf": "image/jpx",
92 "jpg.jpg": "image/jpeg",
93 "jpm.jpm": "image/jpm",
94 "jxl.jxl": "image/jxl",
95 "jxr.jxr": "image/jxr",
96 "xpm.xpm": "image/x-xpixmap",
97 "js.js": "application/javascript",
98 "json.json": "application/json",
99 "json.lowascii.json": "application/json",
100
101
102
103 "json.int.txt": "text/plain; charset=utf-8",
104 "json.float.txt": "text/plain; charset=utf-8",
105 "json.string.txt": "text/plain; charset=utf-8",
106 "kml.kml": "application/vnd.google-earth.kml+xml",
107 "lit.lit": "application/x-ms-reader",
108 "ln": "application/x-executable",
109 "lua.lua": "text/x-lua",
110 "lz.lz": "application/lzip",
111 "m3u.m3u": "application/vnd.apple.mpegurl",
112 "m4a.m4a": "audio/x-m4a",
113 "audio.mp4": "audio/mp4",
114 "lnk.lnk": "application/x-ms-shortcut",
115 "macho.macho": "application/x-mach-binary",
116 "mdb.mdb": "application/x-msaccess",
117 "midi.midi": "audio/midi",
118 "mkv.mkv": "video/x-matroska",
119 "mobi.mobi": "application/x-mobipocket-ebook",
120 "mov.mov": "video/quicktime",
121 "mp3.mp3": "audio/mpeg",
122 "mp3.v1.notag.mp3": "audio/mpeg",
123 "mp3.v2.5.notag.mp3": "audio/mpeg",
124 "mp3.v2.notag.mp3": "audio/mpeg",
125 "mp4.1.mp4": "video/mp4",
126 "mp4.mp4": "video/mp4",
127 "mpc.mpc": "audio/musepack",
128 "mpeg.mpeg": "video/mpeg",
129 "mqv.mqv": "video/quicktime",
130 "mrc.mrc": "application/marc",
131 "msi.msi": "application/x-ms-installer",
132 "msg.msg": "application/vnd.ms-outlook",
133 "ndjson.xl.ndjson": "application/x-ndjson",
134 "ndjson.ndjson": "application/x-ndjson",
135 "nes.nes": "application/vnd.nintendo.snes.rom",
136 "elfobject": "application/x-object",
137 "odf.odf": "application/vnd.oasis.opendocument.formula",
138 "sxc.sxc": "application/vnd.sun.xml.calc",
139 "odg.odg": "application/vnd.oasis.opendocument.graphics",
140 "odp.odp": "application/vnd.oasis.opendocument.presentation",
141 "ods.ods": "application/vnd.oasis.opendocument.spreadsheet",
142 "odt.odt": "application/vnd.oasis.opendocument.text",
143 "ogg.oga": "audio/ogg",
144 "ogg.ogv": "video/ogg",
145 "ogg.spx.oga": "audio/ogg",
146 "otf.otf": "font/otf",
147 "otg.otg": "application/vnd.oasis.opendocument.graphics-template",
148 "otp.otp": "application/vnd.oasis.opendocument.presentation-template",
149 "ots.ots": "application/vnd.oasis.opendocument.spreadsheet-template",
150 "ott.ott": "application/vnd.oasis.opendocument.text-template",
151 "odc.odc": "application/vnd.oasis.opendocument.chart",
152 "owl2.owl": "application/owl+xml",
153 "pat.pat": "image/x-gimp-pat",
154 "pdf.pdf": "application/pdf",
155 "php.php": "text/x-php",
156 "pl.pl": "text/x-perl",
157 "png.png": "image/png",
158 "ppt.ppt": "application/vnd.ms-powerpoint",
159 "pptx.pptx": "application/vnd.openxmlformats-officedocument.presentationml.presentation",
160 "ps.ps": "application/postscript",
161 "psd.psd": "image/vnd.adobe.photoshop",
162 "p7s_pem.p7s": "application/pkcs7-signature",
163 "p7s_der.p7s": "application/pkcs7-signature",
164 "pub.pub": "application/vnd.ms-publisher",
165 "py.py": "text/x-python",
166 "qcp.qcp": "audio/qcelp",
167 "rar.rar": "application/x-rar-compressed",
168 "rmvb.rmvb": "application/vnd.rn-realmedia-vbr",
169 "rpm.rpm": "application/x-rpm",
170 "rss.rss": "application/rss+xml",
171 "rtf.rtf": "text/rtf",
172 "sample32.macho": "application/x-mach-binary",
173 "sample64.macho": "application/x-mach-binary",
174 "shp.shp": "application/vnd.shp",
175 "shx.shx": "application/vnd.shx",
176 "so.so": "application/x-sharedlib",
177 "sqlite.sqlite": "application/vnd.sqlite3",
178 "srt.srt": "application/x-subrip",
179
180
181 "not.srt.txt": "text/plain; charset=utf-8",
182
183 "not.srt.2.txt": "text/plain; charset=utf-8",
184 "svg.1.svg": "image/svg+xml",
185 "svg.svg": "image/svg+xml",
186 "swf.swf": "application/x-shockwave-flash",
187 "tar.tar": "application/x-tar",
188 "tar.gnu.tar": "application/x-tar",
189 "tar.oldgnu.tar": "application/x-tar",
190 "tar.posix.tar": "application/x-tar",
191
192 "tar.star.tar": "application/x-tar",
193 "tar.ustar.tar": "application/x-tar",
194 "tar.v7.tar": "application/x-tar",
195
196 "tar.v7-gnu.tar": "application/x-tar",
197 "tcl.tcl": "text/x-tcl",
198 "tcx.tcx": "application/vnd.garmin.tcx+xml",
199 "tiff.tiff": "image/tiff",
200 "torrent.torrent": "application/x-bittorrent",
201 "tsv.tsv": "text/tab-separated-values",
202 "ttc.ttc": "font/collection",
203 "ttf.ttf": "font/ttf",
204 "tzfile": "application/tzif",
205 "utf16bebom.txt": "text/plain; charset=utf-16be",
206 "utf16lebom.txt": "text/plain; charset=utf-16le",
207 "utf32bebom.txt": "text/plain; charset=utf-32be",
208 "utf32lebom.txt": "text/plain; charset=utf-32le",
209 "utf8.txt": "text/plain; charset=utf-8",
210 "utf8ctrlchars": "application/octet-stream",
211 "vcf.dos.vcf": "text/vcard",
212 "vcf.vcf": "text/vcard",
213 "voc.voc": "audio/x-unknown",
214 "vtt.vtt": "text/vtt",
215 "vtt.space.vtt": "text/vtt",
216 "vtt.tab.vtt": "text/vtt",
217 "vtt.eof.vtt": "text/vtt",
218 "warc.warc": "application/warc",
219 "wasm.wasm": "application/wasm",
220 "wav.wav": "audio/wav",
221 "webm.webm": "video/webm",
222 "webp.webp": "image/webp",
223 "woff.woff": "font/woff",
224 "woff2.woff2": "font/woff2",
225 "x3d.x3d": "model/x3d+xml",
226 "xar.xar": "application/x-xar",
227 "xcf.xcf": "image/x-xcf",
228 "xfdf.xfdf": "application/vnd.adobe.xfdf",
229 "xlf.xlf": "application/x-xliff+xml",
230 "xls.xls": "application/vnd.ms-excel",
231 "xlsx.1.xlsx": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
232 "xlsx.2.xlsx": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
233 "xlsx.xlsx": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
234 "xml.xml": "text/xml; charset=utf-8",
235 "xml.withbr.xml": "text/xml; charset=utf-8",
236 "xz.xz": "application/x-xz",
237 "zip.zip": "application/zip",
238 "zst.zst": "application/zstd",
239 }
240
241 func TestDetect(t *testing.T) {
242 errStr := "File: %s; Expected: %s != Detected: %s; err: %v"
243 extStr := "File: %s; ExpectedExt: %s != DetectedExt: %s"
244 for fName, expected := range files {
245 fileName := filepath.Join(testDataDir, fName)
246 f, err := os.Open(fileName)
247 if err != nil {
248 t.Fatal(err)
249 }
250 data, err := io.ReadAll(f)
251 if err != nil {
252 t.Fatal(err)
253 }
254
255 if mtype := Detect(data); mtype.String() != expected {
256 t.Errorf(errStr, fName, expected, mtype.String(), nil)
257 }
258
259 if _, err := f.Seek(0, io.SeekStart); err != nil {
260 t.Fatal(err)
261 }
262
263 if mtype, err := DetectReader(f); mtype.String() != expected {
264 t.Errorf(errStr, fName, expected, mtype.String(), err)
265 }
266 f.Close()
267
268 if mtype, err := DetectFile(fileName); mtype.String() != expected {
269 t.Errorf(errStr, fName, expected, mtype.String(), err)
270 } else if mtype.Extension() != filepath.Ext(fName) {
271 t.Errorf(extStr, fName, filepath.Ext(fName), mtype.Extension())
272 }
273 }
274 }
275
276
277 func TestGenerateSupportedFormats(t *testing.T) {
278 f, err := os.OpenFile("supported_mimes.md", os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0644)
279 if err != nil {
280 t.Fatal(err)
281 }
282 defer f.Close()
283
284 nodes := root.flatten()
285 header := fmt.Sprintf(`## %d Supported MIME types
286 This file is automatically generated when running tests. Do not edit manually.
287
288 Extension | MIME type | Aliases
289 --------- | --------- | -------
290 `, len(nodes))
291
292 if _, err := f.WriteString(header); err != nil {
293 t.Fatal(err)
294 }
295 for _, n := range nodes {
296 ext := n.extension
297 if ext == "" {
298 ext = "n/a"
299 }
300
301 aliases := strings.Join(n.aliases, ", ")
302 if aliases == "" {
303 aliases = "-"
304 }
305 str := fmt.Sprintf("**%s** | %s | %s\n", ext, n.mime, aliases)
306 if _, err := f.WriteString(str); err != nil {
307 t.Fatal(err)
308 }
309 }
310 }
311
312 func TestEqualsAny(t *testing.T) {
313 type ss []string
314 testCases := []struct {
315 m1 string
316 m2 ss
317 res bool
318 }{
319 {"foo/bar", ss{"foo/bar"}, true},
320 {" foo/bar", ss{"foo/bar "}, true},
321 {" foo/bar", ss{"foo/BAR "}, true},
322 {" foo/bar", ss{"foo/baz"}, false},
323 {";charset=utf-8", ss{""}, true},
324 {"", ss{"", "foo/bar"}, true},
325 {"foo/bar", ss{""}, false},
326 {"foo/bar", nil, false},
327 }
328 for _, tc := range testCases {
329 if EqualsAny(tc.m1, tc.m2...) != tc.res {
330 t.Errorf("Equality test failed for %+v", tc)
331 }
332 }
333 }
334
335 func TestDetectReader(t *testing.T) {
336 errStr := "File: %s; Mime: %s != DetectedMime: %s; err: %v"
337 for fName, expected := range files {
338 fileName := filepath.Join(testDataDir, fName)
339 f, err := os.Open(fileName)
340 if err != nil {
341 t.Fatal(err)
342 }
343 r := breakReader{
344 r: f,
345 breakSize: 3,
346 }
347 if mtype, err := DetectReader(&r); mtype.String() != expected {
348 t.Errorf(errStr, fName, expected, mtype.String(), err)
349 }
350 f.Close()
351 }
352 }
353
354
355
356
357
358
359
360
361 type breakReader struct {
362 r io.Reader
363 breakSize int
364 }
365
366 func (b *breakReader) Read(p []byte) (int, error) {
367 if len(p) > b.breakSize {
368 p = p[:b.breakSize]
369 }
370 n, err := io.ReadFull(b.r, p)
371 if err == io.ErrUnexpectedEOF {
372 return n, io.EOF
373 }
374 return n, err
375 }
376
377 func TestFaultyInput(t *testing.T) {
378 inexistent := "inexistent.file"
379 if mtype, err := DetectFile(inexistent); err == nil {
380 t.Errorf("%s should not match successfully", inexistent)
381 } else if mtype.String() != "application/octet-stream" {
382 t.Errorf("inexistent.file expected application/octet-stream, got %s", mtype)
383 }
384
385 f, _ := os.Open(inexistent)
386 if mtype, err := DetectReader(f); err == nil {
387 t.Errorf("%s reader should not match successfully", inexistent)
388 } else if mtype.String() != "application/octet-stream" {
389 t.Errorf("inexistent.file reader expected application/octet-stream, got %s", mtype)
390 }
391 }
392
393 func TestHierarchy(t *testing.T) {
394 detectedMIME, err := DetectFile("testdata/html.html")
395 if err != nil {
396 t.Fatal(err)
397 }
398 expected := []string{
399 "text/html; charset=utf-8",
400 "text/plain",
401 "application/octet-stream",
402 }
403
404 got := []string{}
405 for mtype := detectedMIME; mtype != nil; mtype = mtype.Parent() {
406 got = append(got, mtype.String())
407 }
408 if le, lg := len(expected), len(got); le != lg {
409 t.Fatalf("hierarchy len error; expected: %d, got: %d", le, lg)
410 }
411
412 for i := range expected {
413 if expected[i] != got[i] {
414 t.Fatalf("hierarchy error; expected: %s, got: %s", expected, got)
415 }
416 }
417 }
418
419 func TestConcurrent(t *testing.T) {
420 wg := sync.WaitGroup{}
421 wg.Add(4)
422
423 go func() {
424 for i := 0; i < 1000; i++ {
425 Detect([]byte("text content"))
426 }
427 wg.Done()
428 }()
429 go func() {
430 for i := 0; i < 1000; i++ {
431 SetLimit(5000 + uint32(i))
432 }
433 wg.Done()
434 }()
435 go func() {
436 for i := 0; i < 1000; i++ {
437 Lookup("text/plain")
438 }
439 wg.Done()
440 }()
441 go func() {
442 for i := 0; i < 1000; i++ {
443 Extend(func([]byte, uint32) bool { return false }, "e", ".e")
444 Lookup("text/plain").Extend(func([]byte, uint32) bool { return false }, "e", ".e")
445 }
446 wg.Done()
447 }()
448
449 wg.Wait()
450
451 SetLimit(3072)
452 }
453
454
455 func TestEmptyInput(t *testing.T) {
456 mtype, err := DetectReader(bytes.NewReader(nil))
457 if err != nil {
458 t.Fatalf("empty reader err; expected: nil, got: %s", err)
459 }
460 plain := "text/plain"
461 if !mtype.Is(plain) {
462 t.Fatalf("empty reader detection; expected: %s, got: %s", plain, mtype)
463 }
464 mtype = Detect(nil)
465 if !mtype.Is(plain) {
466 t.Fatalf("empty bytes slice detection; expected: %s, got: %s", plain, mtype)
467 }
468 SetLimit(0)
469 mtype, err = DetectReader(bytes.NewReader(nil))
470 if err != nil {
471 t.Fatalf("0 limіt, empty reader err; expected: nil, got: %s", err)
472 }
473 if !mtype.Is(plain) {
474 t.Fatalf("0 limit, empty reader detection; expected: %s, got: %s", plain, mtype)
475 }
476 SetLimit(3072)
477 }
478
479
480
481
482
483
484
485 func BenchmarkSliceRand(b *testing.B) {
486 r := rand.New(rand.NewSource(0))
487 data := make([]byte, 3072)
488 if _, err := io.ReadFull(r, data); err != io.ErrUnexpectedEOF && err != nil {
489 b.Fatal(err)
490 }
491
492 b.ResetTimer()
493
494 b.RunParallel(func(pb *testing.PB) {
495 for pb.Next() {
496 Detect(data)
497 }
498 })
499 }
500
501 func BenchmarkCommon(b *testing.B) {
502 commonFiles := map[string]string{
503 "tar": "testdata/tar.tar",
504 "zip": "testdata/zip.zip",
505 "pdf": "testdata/pdf.pdf",
506 "jpg": "testdata/jpg.jpg",
507 "png": "testdata/png.png",
508 "gif": "testdata/gif.gif",
509 "xls": "testdata/xls.xls",
510 "webm": "testdata/webm.webm",
511 "xlsx": "testdata/xlsx.xlsx",
512 "pptx": "testdata/pptx.pptx",
513 "docx": "testdata/docx.docx",
514 }
515 for k, v := range commonFiles {
516 b.Run(k, func(b *testing.B) {
517 f, err := os.ReadFile(v)
518 if err != nil {
519 b.Fatal(err)
520 }
521 b.ResetTimer()
522 for n := 0; n < b.N; n++ {
523 Detect(f)
524 }
525 })
526 }
527 }
528
529
530 func TestIndexOutOfRangePanic(t *testing.T) {
531 for _, n := range root.flatten() {
532 n.detector(nil, 1<<10)
533 }
534 }
535
536
537
538
539 func TestMIMEFormat(t *testing.T) {
540 for _, n := range root.flatten() {
541
542
543 if n.Extension() != "" && !strings.HasPrefix(n.Extension(), ".") {
544 t.Fatalf("extension %s should be dot prefixed", n.Extension())
545 }
546
547 _, _, err := mime.ParseMediaType(n.String())
548 if err != nil {
549 t.Fatalf("error parsing node MIME: %s", err)
550 }
551
552 for _, a := range n.aliases {
553 parsed, params, err := mime.ParseMediaType(a)
554 if err != nil {
555 t.Fatalf("error parsing node alias MIME: %s", err)
556 }
557 if parsed != a || len(params) > 0 {
558 t.Fatalf("node alias MIME should have no optional params; alias: %s, params: %v", a, params)
559 }
560 }
561 }
562 }
563
564 func TestLookup(t *testing.T) {
565 data := []struct {
566 mime string
567 m *MIME
568 }{
569 {root.mime, root},
570 {zip.mime, zip},
571 {zip.aliases[0], zip},
572 {xlsx.mime, xlsx},
573 }
574
575 for _, tt := range data {
576 t.Run(fmt.Sprintf("lookup %s", tt.mime), func(t *testing.T) {
577 if m := Lookup(tt.mime); m != tt.m {
578 t.Fatalf("failed to lookup: %s", tt.mime)
579 }
580 })
581 }
582 }
583
584 func TestExtend(t *testing.T) {
585 data := []struct {
586 mime string
587 ext string
588 parent *MIME
589 }{
590 {"foo", ".foo", nil},
591 {"bar", ".bar", root},
592 {"baz", ".baz", zip},
593 }
594
595 for _, tt := range data {
596 t.Run(fmt.Sprintf("extending to %s", tt.mime), func(t *testing.T) {
597 extend := Extend
598 if tt.parent != nil {
599 extend = tt.parent.Extend
600 } else {
601 tt.parent = root
602 }
603
604 extend(func(raw []byte, limit uint32) bool { return false }, tt.mime, tt.ext)
605 m := Lookup(tt.mime)
606 if m == nil {
607 t.Fatalf("mime %s not found", tt.mime)
608 }
609 if m.parent != tt.parent {
610 t.Fatalf("mime %s has wrong parent: want %s, got %s", tt.mime, tt.parent.mime, m.parent.mime)
611 }
612 })
613 }
614 }
615
616
617
618
619 func FuzzMimetype(f *testing.F) {
620
621
622
623 corpus := []string{
624 "testdata/mkv.mkv",
625 "testdata/webm.webm",
626 "testdata/docx.docx",
627 "testdata/pptx.pptx",
628 "testdata/xlsx.xlsx",
629 "testdata/3gp.3gp",
630 "testdata/class.class",
631 }
632 for _, c := range corpus {
633 data, err := os.ReadFile(c)
634 if err != nil {
635 f.Fatal(err)
636 }
637 f.Add(data[:100])
638 }
639
640 detectors := root.flatten()[1:]
641 f.Fuzz(func(t *testing.T, data []byte) {
642 matched := false
643 for _, d := range detectors {
644 if d.detector(data, math.MaxUint32) {
645 matched = true
646 }
647 }
648 if !matched {
649 t.Skip()
650 }
651 })
652 }
653
View as plain text