...

Source file src/github.com/rivo/uniseg/width_test.go

Documentation: github.com/rivo/uniseg

     1  package uniseg
     2  
     3  import (
     4  	"testing"
     5  )
     6  
     7  // widthTestCases is a list of test cases for the calculation of string widths.
     8  var widthTestCases = []struct {
     9  	original string
    10  	expected int
    11  }{
    12  	{"", 0}, // Control
    13  	{"\b", 0},
    14  	{"\x00", 0},
    15  	{"\x05", 0},
    16  	{"\a", 0},
    17  	{"\u000a", 0}, // LF
    18  	{"\u000d", 0}, // CR
    19  	{"\n", 0},
    20  	{"\v", 0},
    21  	{"\f", 0},
    22  	{"\r", 0},
    23  	{"\x0e", 0},
    24  	{"\x0f", 0},
    25  	{"\u0300", 0}, // Extend
    26  	{"\u200d", 0}, // ZERO WIDTH JOINER
    27  	{"a", 1},
    28  	{"\u1b05", 1},     // N
    29  	{"\u2985", 1},     // Na
    30  	{"\U0001F100", 1}, // A
    31  	{"\uff61", 1},     // H
    32  	{"\ufe6a", 2},     // W
    33  	{"\uff01", 2},     // F
    34  	{"\u2e3a", 3},     // TWO-EM DASH
    35  	{"\u2e3b", 4},     // THREE-EM DASH
    36  	{"\u00a9", 1},     // Extended Pictographic (Emoji Presentation = No)
    37  	{"\U0001F60A", 2}, // Extended Pictographic (Emoji Presentation = Yes)
    38  	{"\U0001F1E6", 2}, // Regional Indicator
    39  	{"\u061c\u061c", 0},
    40  	{"\u061c\u000a", 0},
    41  	{"\u061c\u000d", 0},
    42  	{"\u061c\u0300", 0},
    43  	{"\u061c\u200d", 0},
    44  	{"\u061ca", 1},
    45  	{"\u061c\u1b05", 1},
    46  	{"\u061c\u2985", 1},
    47  	{"\u061c\U0001F100", 1},
    48  	{"\u061c\uff61", 1},
    49  	{"\u061c\ufe6a", 2},
    50  	{"\u061c\uff01", 2},
    51  	{"\u061c\u2e3a", 3},
    52  	{"\u061c\u2e3b", 4},
    53  	{"\u061c\u00a9", 1},
    54  	{"\u061c\U0001F60A", 2},
    55  	{"\u061c\U0001F1E6", 2},
    56  	{"\u000a\u061c", 0},
    57  	{"\u000a\u000a", 0},
    58  	{"\u000a\u000d", 0},
    59  	{"\u000a\u0300", 0},
    60  	{"\u000a\u200d", 0},
    61  	{"\u000aa", 1},
    62  	{"\u000a\u1b05", 1},
    63  	{"\u000a\u2985", 1},
    64  	{"\u000a\U0001F100", 1},
    65  	{"\u000a\uff61", 1},
    66  	{"\u000a\ufe6a", 2},
    67  	{"\u000a\uff01", 2},
    68  	{"\u000a\u2e3a", 3},
    69  	{"\u000a\u2e3b", 4},
    70  	{"\u000a\u00a9", 1},
    71  	{"\u000a\U0001F60A", 2},
    72  	{"\u000a\U0001F1E6", 2},
    73  	{"\u000d\u061c", 0},
    74  	{"\u000d\u000a", 0},
    75  	{"\u000d\u000d", 0},
    76  	{"\u000d\u0300", 0},
    77  	{"\u000d\u200d", 0},
    78  	{"\u000da", 1},
    79  	{"\u000d\u1b05", 1},
    80  	{"\u000d\u2985", 1},
    81  	{"\u000d\U0001F100", 1},
    82  	{"\u000d\uff61", 1},
    83  	{"\u000d\ufe6a", 2},
    84  	{"\u000d\uff01", 2},
    85  	{"\u000d\u2e3a", 3},
    86  	{"\u000d\u2e3b", 4},
    87  	{"\u000d\u00a9", 1},
    88  	{"\u000d\U0001F60A", 2},
    89  	{"\u000d\U0001F1E6", 2},
    90  	{"\u0300\u061c", 0},
    91  	{"\u0300\u000a", 0},
    92  	{"\u0300\u000d", 0},
    93  	{"\u0300\u0300", 0},
    94  	{"\u0300\u200d", 0},
    95  	{"\u0300a", 1},
    96  	{"\u0300\u1b05", 1},
    97  	{"\u0300\u2985", 1},
    98  	{"\u0300\U0001F100", 1},
    99  	{"\u0300\uff61", 1},
   100  	{"\u0300\ufe6a", 2},
   101  	{"\u0300\uff01", 2},
   102  	{"\u0300\u2e3a", 3},
   103  	{"\u0300\u2e3b", 4},
   104  	{"\u0300\u00a9", 1},
   105  	{"\u0300\U0001F60A", 2},
   106  	{"\u0300\U0001F1E6", 2},
   107  	{"\u200d\u061c", 0},
   108  	{"\u200d\u000a", 0},
   109  	{"\u200d\u000d", 0},
   110  	{"\u200d\u0300", 0},
   111  	{"\u200d\u200d", 0},
   112  	{"\u200da", 1},
   113  	{"\u200d\u1b05", 1},
   114  	{"\u200d\u2985", 1},
   115  	{"\u200d\U0001F100", 1},
   116  	{"\u200d\uff61", 1},
   117  	{"\u200d\ufe6a", 2},
   118  	{"\u200d\uff01", 2},
   119  	{"\u200d\u2e3a", 3},
   120  	{"\u200d\u2e3b", 4},
   121  	{"\u200d\u00a9", 1},
   122  	{"\u200d\U0001F60A", 2},
   123  	{"\u200d\U0001F1E6", 2},
   124  	{"a\u061c", 1},
   125  	{"a\u000a", 1},
   126  	{"a\u000d", 1},
   127  	{"a\u0300", 1},
   128  	{"a\u200d", 1},
   129  	{"aa", 2},
   130  	{"a\u1b05", 2},
   131  	{"a\u2985", 2},
   132  	{"a\U0001F100", 2},
   133  	{"a\uff61", 2},
   134  	{"a\ufe6a", 3},
   135  	{"a\uff01", 3},
   136  	{"a\u2e3a", 4},
   137  	{"a\u2e3b", 5},
   138  	{"a\u00a9", 2},
   139  	{"a\U0001F60A", 3},
   140  	{"a\U0001F1E6", 3},
   141  	{"\u1b05\u061c", 1},
   142  	{"\u1b05\u000a", 1},
   143  	{"\u1b05\u000d", 1},
   144  	{"\u1b05\u0300", 1},
   145  	{"\u1b05\u200d", 1},
   146  	{"\u1b05a", 2},
   147  	{"\u1b05\u1b05", 2},
   148  	{"\u1b05\u2985", 2},
   149  	{"\u1b05\U0001F100", 2},
   150  	{"\u1b05\uff61", 2},
   151  	{"\u1b05\ufe6a", 3},
   152  	{"\u1b05\uff01", 3},
   153  	{"\u1b05\u2e3a", 4},
   154  	{"\u1b05\u2e3b", 5},
   155  	{"\u1b05\u00a9", 2},
   156  	{"\u1b05\U0001F60A", 3},
   157  	{"\u1b05\U0001F1E6", 3},
   158  	{"\u2985\u061c", 1},
   159  	{"\u2985\u000a", 1},
   160  	{"\u2985\u000d", 1},
   161  	{"\u2985\u0300", 1},
   162  	{"\u2985\u200d", 1},
   163  	{"\u2985a", 2},
   164  	{"\u2985\u1b05", 2},
   165  	{"\u2985\u2985", 2},
   166  	{"\u2985\U0001F100", 2},
   167  	{"\u2985\uff61", 2},
   168  	{"\u2985\ufe6a", 3},
   169  	{"\u2985\uff01", 3},
   170  	{"\u2985\u2e3a", 4},
   171  	{"\u2985\u2e3b", 5},
   172  	{"\u2985\u00a9", 2},
   173  	{"\u2985\U0001F60A", 3},
   174  	{"\u2985\U0001F1E6", 3},
   175  	{"\U0001F100\u061c", 1},
   176  	{"\U0001F100\u000a", 1},
   177  	{"\U0001F100\u000d", 1},
   178  	{"\U0001F100\u0300", 1},
   179  	{"\U0001F100\u200d", 1},
   180  	{"\U0001F100a", 2},
   181  	{"\U0001F100\u1b05", 2},
   182  	{"\U0001F100\u2985", 2},
   183  	{"\U0001F100\U0001F100", 2},
   184  	{"\U0001F100\uff61", 2},
   185  	{"\U0001F100\ufe6a", 3},
   186  	{"\U0001F100\uff01", 3},
   187  	{"\U0001F100\u2e3a", 4},
   188  	{"\U0001F100\u2e3b", 5},
   189  	{"\U0001F100\u00a9", 2},
   190  	{"\U0001F100\U0001F60A", 3},
   191  	{"\U0001F100\U0001F1E6", 3},
   192  	{"\uff61\u061c", 1},
   193  	{"\uff61\u000a", 1},
   194  	{"\uff61\u000d", 1},
   195  	{"\uff61\u0300", 1},
   196  	{"\uff61\u200d", 1},
   197  	{"\uff61a", 2},
   198  	{"\uff61\u1b05", 2},
   199  	{"\uff61\u2985", 2},
   200  	{"\uff61\U0001F100", 2},
   201  	{"\uff61\uff61", 2},
   202  	{"\uff61\ufe6a", 3},
   203  	{"\uff61\uff01", 3},
   204  	{"\uff61\u2e3a", 4},
   205  	{"\uff61\u2e3b", 5},
   206  	{"\uff61\u00a9", 2},
   207  	{"\uff61\U0001F60A", 3},
   208  	{"\uff61\U0001F1E6", 3},
   209  	{"\ufe6a\u061c", 2},
   210  	{"\ufe6a\u000a", 2},
   211  	{"\ufe6a\u000d", 2},
   212  	{"\ufe6a\u0300", 2},
   213  	{"\ufe6a\u200d", 2},
   214  	{"\ufe6aa", 3},
   215  	{"\ufe6a\u1b05", 3},
   216  	{"\ufe6a\u2985", 3},
   217  	{"\ufe6a\U0001F100", 3},
   218  	{"\ufe6a\uff61", 3},
   219  	{"\ufe6a\ufe6a", 4},
   220  	{"\ufe6a\uff01", 4},
   221  	{"\ufe6a\u2e3a", 5},
   222  	{"\ufe6a\u2e3b", 6},
   223  	{"\ufe6a\u00a9", 3},
   224  	{"\ufe6a\U0001F60A", 4},
   225  	{"\ufe6a\U0001F1E6", 4},
   226  	{"\uff01\u061c", 2},
   227  	{"\uff01\u000a", 2},
   228  	{"\uff01\u000d", 2},
   229  	{"\uff01\u0300", 2},
   230  	{"\uff01\u200d", 2},
   231  	{"\uff01a", 3},
   232  	{"\uff01\u1b05", 3},
   233  	{"\uff01\u2985", 3},
   234  	{"\uff01\U0001F100", 3},
   235  	{"\uff01\uff61", 3},
   236  	{"\uff01\ufe6a", 4},
   237  	{"\uff01\uff01", 4},
   238  	{"\uff01\u2e3a", 5},
   239  	{"\uff01\u2e3b", 6},
   240  	{"\uff01\u00a9", 3},
   241  	{"\uff01\U0001F60A", 4},
   242  	{"\uff01\U0001F1E6", 4},
   243  	{"\u2e3a\u061c", 3},
   244  	{"\u2e3a\u000a", 3},
   245  	{"\u2e3a\u000d", 3},
   246  	{"\u2e3a\u0300", 3},
   247  	{"\u2e3a\u200d", 3},
   248  	{"\u2e3aa", 4},
   249  	{"\u2e3a\u1b05", 4},
   250  	{"\u2e3a\u2985", 4},
   251  	{"\u2e3a\U0001F100", 4},
   252  	{"\u2e3a\uff61", 4},
   253  	{"\u2e3a\ufe6a", 5},
   254  	{"\u2e3a\uff01", 5},
   255  	{"\u2e3a\u2e3a", 6},
   256  	{"\u2e3a\u2e3b", 7},
   257  	{"\u2e3a\u00a9", 4},
   258  	{"\u2e3a\U0001F60A", 5},
   259  	{"\u2e3a\U0001F1E6", 5},
   260  	{"\u2e3b\u061c", 4},
   261  	{"\u2e3b\u000a", 4},
   262  	{"\u2e3b\u000d", 4},
   263  	{"\u2e3b\u0300", 4},
   264  	{"\u2e3b\u200d", 4},
   265  	{"\u2e3ba", 5},
   266  	{"\u2e3b\u1b05", 5},
   267  	{"\u2e3b\u2985", 5},
   268  	{"\u2e3b\U0001F100", 5},
   269  	{"\u2e3b\uff61", 5},
   270  	{"\u2e3b\ufe6a", 6},
   271  	{"\u2e3b\uff01", 6},
   272  	{"\u2e3b\u2e3a", 7},
   273  	{"\u2e3b\u2e3b", 8},
   274  	{"\u2e3b\u00a9", 5},
   275  	{"\u2e3b\U0001F60A", 6},
   276  	{"\u2e3b\U0001F1E6", 6},
   277  	{"\u00a9\u061c", 1},
   278  	{"\u00a9\u000a", 1},
   279  	{"\u00a9\u000d", 1},
   280  	{"\u00a9\u0300", 1},
   281  	{"\u00a9\u200d", 1},
   282  	{"\u00a9a", 2},
   283  	{"\u00a9\u1b05", 2},
   284  	{"\u00a9\u2985", 2},
   285  	{"\u00a9\U0001F100", 2},
   286  	{"\u00a9\uff61", 2},
   287  	{"\u00a9\ufe6a", 3},
   288  	{"\u00a9\uff01", 3},
   289  	{"\u00a9\u2e3a", 4},
   290  	{"\u00a9\u2e3b", 5},
   291  	{"\u00a9\u00a9", 2},
   292  	{"\u00a9\U0001F60A", 3},
   293  	{"\u00a9\U0001F1E6", 3},
   294  	{"\U0001F60A\u061c", 2},
   295  	{"\U0001F60A\u000a", 2},
   296  	{"\U0001F60A\u000d", 2},
   297  	{"\U0001F60A\u0300", 2},
   298  	{"\U0001F60A\u200d", 2},
   299  	{"\U0001F60Aa", 3},
   300  	{"\U0001F60A\u1b05", 3},
   301  	{"\U0001F60A\u2985", 3},
   302  	{"\U0001F60A\U0001F100", 3},
   303  	{"\U0001F60A\uff61", 3},
   304  	{"\U0001F60A\ufe6a", 4},
   305  	{"\U0001F60A\uff01", 4},
   306  	{"\U0001F60A\u2e3a", 5},
   307  	{"\U0001F60A\u2e3b", 6},
   308  	{"\U0001F60A\u00a9", 3},
   309  	{"\U0001F60A\U0001F60A", 4},
   310  	{"\U0001F60A\U0001F1E6", 4},
   311  	{"\U0001F1E6\u061c", 2},
   312  	{"\U0001F1E6\u000a", 2},
   313  	{"\U0001F1E6\u000d", 2},
   314  	{"\U0001F1E6\u0300", 2},
   315  	{"\U0001F1E6\u200d", 2},
   316  	{"\U0001F1E6a", 3},
   317  	{"\U0001F1E6\u1b05", 3},
   318  	{"\U0001F1E6\u2985", 3},
   319  	{"\U0001F1E6\U0001F100", 3},
   320  	{"\U0001F1E6\uff61", 3},
   321  	{"\U0001F1E6\ufe6a", 4},
   322  	{"\U0001F1E6\uff01", 4},
   323  	{"\U0001F1E6\u2e3a", 5},
   324  	{"\U0001F1E6\u2e3b", 6},
   325  	{"\U0001F1E6\u00a9", 3},
   326  	{"\U0001F1E6\U0001F60A", 4},
   327  	{"\U0001F1E6\U0001F1E6", 2},
   328  	{"Ka\u0308se", 4},                       // Käse (German, "cheese")
   329  	{"\U0001f3f3\ufe0f\u200d\U0001f308", 2}, // Rainbow flag
   330  	{"\U0001f1e9\U0001f1ea", 2},             // German flag
   331  	{"\u0916\u093e", 2},                     // खा (Hindi, "eat")
   332  	{"\u0915\u0948\u0938\u0947", 2},         // कैसे (Hindi, "how")
   333  	{"\U0001f468\u200d\U0001f469\u200d\U0001f467\u200d\U0001f466", 2}, // Family: Man, Woman, Girl, Boy
   334  	{"\u1112\u116f\u11b6", 2},                   // 훯 (Hangul, conjoining Jamo, "h+weo+lh")
   335  	{"\ud6ef", 2},                               // 훯 (Hangul, precomposed, "h+weo+lh")
   336  	{"\u79f0\u8c13", 4},                         // 称谓 (Chinese, "title")
   337  	{"\u0e1c\u0e39\u0e49", 1},                   // ผู้ (Thai, "person")
   338  	{"\u0623\u0643\u062a\u0648\u0628\u0631", 6}, // أكتوبر (Arabic, "October")
   339  	{"\ua992\ua997\ua983", 3},                   // ꦒꦗꦃ (Javanese, "elephant")
   340  	{"\u263a", 1},                               // White smiling face
   341  	{"\u263a\ufe0f", 2},                         // White smiling face (with variation selector 16 = emoji presentation)
   342  	{"\u231b", 2},                               // Hourglass
   343  	{"\u231b\ufe0e", 1},                         // Hourglass (with variation selector 15 = text presentation)
   344  	{"1\ufe0f", 1},                              // Emoji presentation of digit one.
   345  }
   346  
   347  // String width tests using the StringWidth function.
   348  func TestWidthStringWidth(t *testing.T) {
   349  	for index, testCase := range widthTestCases {
   350  		actual := StringWidth(testCase.original)
   351  		if actual != testCase.expected {
   352  			t.Errorf("StringWidth(%q) is %d, expected %d (test case %d)", testCase.original, actual, testCase.expected, index)
   353  		}
   354  	}
   355  }
   356  
   357  // String width tests using the Graphemes class.
   358  func TestWidthGraphemes(t *testing.T) {
   359  	for index, testCase := range widthTestCases {
   360  		var actual int
   361  		graphemes := NewGraphemes(testCase.original)
   362  		if w := graphemes.Width(); w != 0 {
   363  			t.Errorf("Expected initial Width to be 0, got %d", w)
   364  		}
   365  		for graphemes.Next() {
   366  			actual += graphemes.Width()
   367  		}
   368  		if actual != testCase.expected {
   369  			t.Errorf("Width of %q is %d, expected %d (test case %d)", testCase.original, actual, testCase.expected, index)
   370  		}
   371  	}
   372  }
   373  
   374  // String width tests using the FirstGraphemeCluster function.
   375  func TestWidthGraphemesFunctionBytes(t *testing.T) {
   376  	for index, testCase := range widthTestCases {
   377  		var actual, width int
   378  		state := -1
   379  		text := []byte(testCase.original)
   380  		for len(text) > 0 {
   381  			_, text, width, state = FirstGraphemeCluster(text, state)
   382  			actual += width
   383  		}
   384  		if actual != testCase.expected {
   385  			t.Errorf("Width of %q is %d, expected %d (test case %d)", testCase.original, actual, testCase.expected, index)
   386  		}
   387  	}
   388  }
   389  
   390  // String width tests using the FirstGraphemeClusterString function.
   391  func TestWidthGraphemesFunctionString(t *testing.T) {
   392  	for index, testCase := range widthTestCases {
   393  		var actual, width int
   394  		state := -1
   395  		text := testCase.original
   396  		for len(text) > 0 {
   397  			_, text, width, state = FirstGraphemeClusterInString(text, state)
   398  			actual += width
   399  		}
   400  		if actual != testCase.expected {
   401  			t.Errorf("Width of %q is %d, expected %d (test case %d)", testCase.original, actual, testCase.expected, index)
   402  		}
   403  		cluster, rest, width, newState := FirstGraphemeClusterInString(text, -1)
   404  		if len(cluster) > 0 {
   405  			t.Errorf(`Expected cluster to be empty string, got %q`, cluster)
   406  		}
   407  		if len(rest) > 0 {
   408  			t.Errorf(`Expected rest to be empty string, got %q`, rest)
   409  		}
   410  		if width != 0 {
   411  			t.Errorf(`Expected width to be 0, got %d`, width)
   412  		}
   413  		if newState != 0 {
   414  			t.Errorf(`Expected newState to be 0, got %d`, newState)
   415  		}
   416  	}
   417  }
   418  
   419  // String width tests using the Step function.
   420  func TestWidthStepBytes(t *testing.T) {
   421  	for index, testCase := range widthTestCases {
   422  		var actual, boundaries int
   423  		state := -1
   424  		text := []byte(testCase.original)
   425  		for len(text) > 0 {
   426  			_, text, boundaries, state = Step(text, state)
   427  			actual += boundaries >> ShiftWidth
   428  		}
   429  		if actual != testCase.expected {
   430  			t.Errorf("Width of %q is %d, expected %d (test case %d)", testCase.original, actual, testCase.expected, index)
   431  		}
   432  	}
   433  }
   434  
   435  // String width tests using the StepString function.
   436  func TestWidthStepString(t *testing.T) {
   437  	for index, testCase := range widthTestCases {
   438  		var actual, boundaries int
   439  		state := -1
   440  		text := testCase.original
   441  		for len(text) > 0 {
   442  			_, text, boundaries, state = StepString(text, state)
   443  			actual += boundaries >> ShiftWidth
   444  		}
   445  		if actual != testCase.expected {
   446  			t.Errorf("Width of %q is %d, expected %d (test case %d)", testCase.original, actual, testCase.expected, index)
   447  		}
   448  	}
   449  }
   450  
   451  func TestRunesWidth(t *testing.T) {
   452  	tc := []struct {
   453  		name  string
   454  		raw   string
   455  		width int
   456  	}{
   457  		{"latin    ", "long", 4},
   458  		{"chinese  ", "中国", 4},
   459  		{"combining", "shangha\u0308\u0308i", 8},
   460  		{
   461  			"emoji 1", "🏝",
   462  			1,
   463  		},
   464  		{
   465  			"emoji 2", "🗻",
   466  			2,
   467  		},
   468  		{
   469  			"emoji 3", "🏖",
   470  			1,
   471  		},
   472  		{
   473  			"flags", "🇳🇱🇧🇷i",
   474  			5,
   475  		},
   476  		{
   477  			"flag 2", "🇨🇳",
   478  			2,
   479  		},
   480  	}
   481  
   482  	for _, v := range tc {
   483  		graphemes := NewGraphemes(v.raw)
   484  		width := 0
   485  		var rs []rune
   486  		for graphemes.Next() {
   487  			rs = graphemes.Runes()
   488  			width += StringWidth(string(rs))
   489  		}
   490  
   491  		if v.width != width {
   492  			t.Logf("%s :\t %q %U\n", v.name, v.raw, rs)
   493  			t.Errorf("%s:\t %q  expect width %d, got %d\n", v.name, v.raw, v.width, width)
   494  		}
   495  	}
   496  }
   497  

View as plain text