...

Source file src/github.com/gabriel-vasile/mimetype/internal/charset/charset_test.go

Documentation: github.com/gabriel-vasile/mimetype/internal/charset

     1  package charset
     2  
     3  import (
     4  	"testing"
     5  )
     6  
     7  const xmlDoc = `<?xml version="1.0" encoding="UTF-8"?>
     8  <note>
     9    <to>Tove</to>
    10    <from>Jani</from>
    11    <heading>Reminder</heading>
    12    <body>Don't forget me this weekend!</body>
    13  </note>`
    14  const htmlDoc = `<!DOCTYPE html>
    15  <html>
    16    <head><!--[if lt IE 9]><script language="javascript" type="text/javascript" src="//html5shim.googlecode.com/svn/trunk/html5.js"></script><![endif]-->
    17      <meta charset="UTF-8"><style>/*
    18       </style>
    19      <link rel="stylesheet" href="css/animation.css"><!--[if IE 7]><link rel="stylesheet" href="css/" + font.fontname + "-ie7.css"><![endif]-->
    20      <script>
    21      </script>
    22    </head>
    23    <body>
    24      <div class="container footer">さ</div>
    25    </body>
    26  </html>`
    27  const htmlDocWithIncorrectCharset = `<!DOCTYPE html>
    28  <!--
    29  Some comment
    30  
    31  -->
    32  <html dir="ltr" mozdisallowselectionprint>
    33    <head>
    34      <meta charset="ISO-8859-16">
    35      <meta name="viewport" content="width=device-width, initial-scale=1, maximum-scale=1">
    36      <meta name="some name" content="notranslate">
    37      <title>test</title>
    38  
    39  
    40      <link rel="stylesheet" href="html.utf8bom.css">
    41  
    42  
    43  
    44    </head>
    45  
    46    <body tabindex="1">
    47      <div id="printContainer"></div>
    48    </body>
    49  </html>`
    50  
    51  func TestFromXML(t *testing.T) {
    52  	charset := FromXML([]byte(xmlDoc))
    53  	if charset != "utf-8" {
    54  		t.Errorf("expected: utf-8; got: %s", charset)
    55  	}
    56  }
    57  
    58  func TestFromHTML(t *testing.T) {
    59  	charset := FromHTML([]byte(htmlDoc))
    60  	if charset != "utf-8" {
    61  		t.Errorf("expected: utf-8; got: %s", charset)
    62  	}
    63  }
    64  
    65  func TestFromHTMLWithBOM(t *testing.T) {
    66  	charset := FromHTML(append([]byte{0xEF, 0xBB, 0xBF}, []byte(htmlDocWithIncorrectCharset)...))
    67  	if charset != "utf-8" {
    68  		t.Errorf("expected: utf-8; got: %s", charset)
    69  	}
    70  }
    71  
    72  func TestFromPlain(t *testing.T) {
    73  	tcases := []struct {
    74  		raw     []byte
    75  		charset string
    76  	}{
    77  		{[]byte{0xe6, 0xf8, 0xe5, 0x85, 0x85}, "windows-1252"},
    78  		{[]byte{0xe6, 0xf8, 0xe5}, "iso-8859-1"},
    79  		{[]byte("æøå"), "utf-8"},
    80  		{[]byte{}, ""},
    81  	}
    82  	for _, tc := range tcases {
    83  		if cs := FromPlain(tc.raw); cs != tc.charset {
    84  			t.Errorf("in: %v; expected: %s; got: %s", tc.raw, tc.charset, cs)
    85  		}
    86  	}
    87  }
    88  
    89  func FuzzFromPlain(f *testing.F) {
    90  	samples := [][]byte{
    91  		[]byte{0xe6, 0xf8, 0xe5, 0x85, 0x85},
    92  		[]byte{0xe6, 0xf8, 0xe5},
    93  		[]byte("æøå"),
    94  	}
    95  
    96  	for _, s := range samples {
    97  		f.Add(s)
    98  	}
    99  
   100  	f.Fuzz(func(t *testing.T, d []byte) {
   101  		if charset := FromPlain(d); charset == "" {
   102  			t.Skip()
   103  		}
   104  	})
   105  }
   106  func FuzzFromHTML(f *testing.F) {
   107  	samples := []string{
   108  		`<meta charset="c">`,
   109  		`<meta charset="щ">`,
   110  		`<meta http-equiv="content-type" content="a/b; charset=c">`,
   111  		`<meta http-equiv="content-type" content="a/b; charset=щ">`,
   112  		`<f 1=2 /><meta charset="c">`,
   113  		`<f a=2><meta http-equiv="content-type" content="a/b; charset=c">`,
   114  		`<f 1=2 /><meta b="b" charset="c">`,
   115  		`<f a=2><meta b="b" http-equiv="content-type" content="a/b; charset=c">`,
   116  	}
   117  
   118  	for _, s := range samples {
   119  		f.Add([]byte(s))
   120  	}
   121  
   122  	f.Fuzz(func(t *testing.T, d []byte) {
   123  		if charset := FromHTML(d); charset == "" {
   124  			t.Skip()
   125  		}
   126  	})
   127  }
   128  func FuzzFromXML(f *testing.F) {
   129  	samples := []string{
   130  		`<?xml version="1.0" encoding="c"?>`,
   131  	}
   132  
   133  	for _, s := range samples {
   134  		f.Add([]byte(s))
   135  	}
   136  
   137  	f.Fuzz(func(t *testing.T, d []byte) {
   138  		if charset := FromXML(d); charset == "" {
   139  			t.Skip()
   140  		}
   141  	})
   142  }
   143  

View as plain text