...

Source file src/github.com/rivo/uniseg/grapheme_test.go

Documentation: github.com/rivo/uniseg

     1  package uniseg
     2  
     3  import (
     4  	"testing"
     5  )
     6  
     7  const benchmarkStr = "This is 🏳️‍🌈, a test string ツ for grapheme cluster testing. 🏋🏽‍♀️🙂🙂 It's only relevant for benchmark tests."
     8  
     9  var benchmarkBytes = []byte(benchmarkStr)
    10  
    11  // Variables to avoid compiler optimizations.
    12  var resultRunes []rune
    13  
    14  type testCase = struct {
    15  	original string
    16  	expected [][]rune
    17  }
    18  
    19  // The test cases for the simple test function.
    20  var testCases = []testCase{
    21  	{original: "", expected: [][]rune{}},
    22  	{original: "x", expected: [][]rune{{0x78}}},
    23  	{original: "basic", expected: [][]rune{{0x62}, {0x61}, {0x73}, {0x69}, {0x63}}},
    24  	{original: "möp", expected: [][]rune{{0x6d}, {0x6f, 0x308}, {0x70}}},
    25  	{original: "\r\n", expected: [][]rune{{0xd, 0xa}}},
    26  	{original: "\n\n", expected: [][]rune{{0xa}, {0xa}}},
    27  	{original: "\t*", expected: [][]rune{{0x9}, {0x2a}}},
    28  	{original: "뢴", expected: [][]rune{{0x1105, 0x116c, 0x11ab}}},
    29  	{original: "ܐ܏ܒܓܕ", expected: [][]rune{{0x710}, {0x70f, 0x712}, {0x713}, {0x715}}},
    30  	{original: "ำ", expected: [][]rune{{0xe33}}},
    31  	{original: "ำำ", expected: [][]rune{{0xe33, 0xe33}}},
    32  	{original: "สระอำ", expected: [][]rune{{0xe2a}, {0xe23}, {0xe30}, {0xe2d, 0xe33}}},
    33  	{original: "*뢴*", expected: [][]rune{{0x2a}, {0x1105, 0x116c, 0x11ab}, {0x2a}}},
    34  	{original: "*👩‍❤️‍💋‍👩*", expected: [][]rune{{0x2a}, {0x1f469, 0x200d, 0x2764, 0xfe0f, 0x200d, 0x1f48b, 0x200d, 0x1f469}, {0x2a}}},
    35  	{original: "👩‍❤️‍💋‍👩", expected: [][]rune{{0x1f469, 0x200d, 0x2764, 0xfe0f, 0x200d, 0x1f48b, 0x200d, 0x1f469}}},
    36  	{original: "🏋🏽‍♀️", expected: [][]rune{{0x1f3cb, 0x1f3fd, 0x200d, 0x2640, 0xfe0f}}},
    37  	{original: "🙂", expected: [][]rune{{0x1f642}}},
    38  	{original: "🙂🙂", expected: [][]rune{{0x1f642}, {0x1f642}}},
    39  	{original: "🇩🇪", expected: [][]rune{{0x1f1e9, 0x1f1ea}}},
    40  	{original: "🏳️‍🌈", expected: [][]rune{{0x1f3f3, 0xfe0f, 0x200d, 0x1f308}}},
    41  	{original: "\t🏳️‍🌈", expected: [][]rune{{0x9}, {0x1f3f3, 0xfe0f, 0x200d, 0x1f308}}},
    42  	{original: "\t🏳️‍🌈\t", expected: [][]rune{{0x9}, {0x1f3f3, 0xfe0f, 0x200d, 0x1f308}, {0x9}}},
    43  	{original: "\r\n\uFE0E", expected: [][]rune{{13, 10}, {0xfe0e}}},
    44  }
    45  
    46  // decomposed returns a grapheme cluster decomposition.
    47  func decomposed(s string) (runes [][]rune) {
    48  	gr := NewGraphemes(s)
    49  	for gr.Next() {
    50  		runes = append(runes, gr.Runes())
    51  	}
    52  	return
    53  }
    54  
    55  // Run all lists of test cases using the Graphemes class.
    56  func TestGraphemesClass(t *testing.T) {
    57  	allCases := append(testCases, graphemeBreakTestCases...)
    58  	for testNum, testCase := range allCases {
    59  		/*t.Logf(`Test case %d %q: Expecting %x, getting %x, code points %x"`,
    60  		testNum,
    61  		strings.TrimSpace(testCase.original),
    62  		testCase.expected,
    63  		decomposed(testCase.original),
    64  		[]rune(testCase.original))*/
    65  		gr := NewGraphemes(testCase.original)
    66  		var index int
    67  	GraphemeLoop:
    68  		for index = 0; gr.Next(); index++ {
    69  			if index >= len(testCase.expected) {
    70  				t.Errorf(`Test case %d %q failed: More grapheme clusters returned than expected %d`,
    71  					testNum,
    72  					testCase.original,
    73  					len(testCase.expected))
    74  				break
    75  			}
    76  			cluster := gr.Runes()
    77  			if len(cluster) != len(testCase.expected[index]) {
    78  				t.Errorf(`Test case %d %q failed: Grapheme cluster at index %d has %d codepoints %x, %d expected %x`,
    79  					testNum,
    80  					testCase.original,
    81  					index,
    82  					len(cluster),
    83  					cluster,
    84  					len(testCase.expected[index]),
    85  					testCase.expected[index])
    86  				break
    87  			}
    88  			for i, r := range cluster {
    89  				if r != testCase.expected[index][i] {
    90  					t.Errorf(`Test case %d %q failed: Grapheme cluster at index %d is %x, expected %x`,
    91  						testNum,
    92  						testCase.original,
    93  						index,
    94  						cluster,
    95  						testCase.expected[index])
    96  					break GraphemeLoop
    97  				}
    98  			}
    99  		}
   100  		if index < len(testCase.expected) {
   101  			t.Errorf(`Test case %d %q failed: Fewer grapheme clusters returned (%d) than expected (%d)`,
   102  				testNum,
   103  				testCase.original,
   104  				index,
   105  				len(testCase.expected))
   106  		}
   107  	}
   108  }
   109  
   110  // Run the standard Unicode test cases for word boundaries using the Graphemes
   111  // class.
   112  func TestGraphemesClassWord(t *testing.T) {
   113  	for testNum, testCase := range wordBreakTestCases {
   114  		if testNum == 1700 {
   115  			// This test case reveals an inconsistency in the Unicode rule set,
   116  			// namely the handling of ZWJ within two RI graphemes. (Grapheme
   117  			// rules will restart the RI count, word rules will ignore the ZWJ.)
   118  			// An error has been reported.
   119  			continue
   120  		}
   121  		/*t.Logf(`Test case %d %q: Expecting %x, getting %x, code points %x"`,
   122  		testNum,
   123  		strings.TrimSpace(testCase.original),
   124  		testCase.expected,
   125  		decomposed(testCase.original),
   126  		[]rune(testCase.original))*/
   127  		gr := NewGraphemes(testCase.original)
   128  		var (
   129  			index   int
   130  			cluster []rune
   131  		)
   132  		if !gr.IsWordBoundary() {
   133  			t.Error("Expected initial IsWordBoundary to be true, got false")
   134  		}
   135  	GraphemeLoop:
   136  		for gr.Next() {
   137  			if index >= len(testCase.expected) {
   138  				t.Errorf(`Test case %d %q failed: More words returned than expected %d`,
   139  					testNum,
   140  					testCase.original,
   141  					len(testCase.expected))
   142  				break
   143  			}
   144  			cluster = append(cluster, gr.Runes()...)
   145  			if gr.IsWordBoundary() {
   146  				if len(cluster) != len(testCase.expected[index]) {
   147  					t.Errorf(`Test case %d %q failed: Word at index %d has %d codepoints %x, %d expected %x`,
   148  						testNum,
   149  						testCase.original,
   150  						index,
   151  						len(cluster),
   152  						cluster,
   153  						len(testCase.expected[index]),
   154  						testCase.expected[index])
   155  					break
   156  				}
   157  				for i, r := range cluster {
   158  					if r != testCase.expected[index][i] {
   159  						t.Errorf(`Test case %d %q failed: Word at index %d is %x, expected %x`,
   160  							testNum,
   161  							testCase.original,
   162  							index,
   163  							cluster,
   164  							testCase.expected[index])
   165  						break GraphemeLoop
   166  					}
   167  				}
   168  				cluster = nil
   169  				index++
   170  			}
   171  		}
   172  		if index < len(testCase.expected) {
   173  			t.Errorf(`Test case %d %q failed: Fewer words returned (%d) than expected (%d)`,
   174  				testNum,
   175  				testCase.original,
   176  				index,
   177  				len(testCase.expected))
   178  		}
   179  	}
   180  }
   181  
   182  // Run the standard Unicode test cases for sentence boundaries using the
   183  // Graphemes class.
   184  func TestGraphemesClassSentence(t *testing.T) {
   185  	for testNum, testCase := range sentenceBreakTestCases {
   186  		/*t.Logf(`Test case %d %q: Expecting %x, getting %x, code points %x"`,
   187  		testNum,
   188  		strings.TrimSpace(testCase.original),
   189  		testCase.expected,
   190  		decomposed(testCase.original),
   191  		[]rune(testCase.original))*/
   192  		gr := NewGraphemes(testCase.original)
   193  		var (
   194  			index   int
   195  			cluster []rune
   196  		)
   197  		if !gr.IsSentenceBoundary() {
   198  			t.Error("Expected initial IsSentenceBoundary to be true, got false")
   199  		}
   200  	GraphemeLoop:
   201  		for gr.Next() {
   202  			if index >= len(testCase.expected) {
   203  				t.Errorf(`Test case %d %q failed: More sentences returned than expected %d`,
   204  					testNum,
   205  					testCase.original,
   206  					len(testCase.expected))
   207  				break
   208  			}
   209  			cluster = append(cluster, gr.Runes()...)
   210  			if gr.IsSentenceBoundary() {
   211  				if len(cluster) != len(testCase.expected[index]) {
   212  					t.Errorf(`Test case %d %q failed: Sentence at index %d has %d codepoints %x, %d expected %x`,
   213  						testNum,
   214  						testCase.original,
   215  						index,
   216  						len(cluster),
   217  						cluster,
   218  						len(testCase.expected[index]),
   219  						testCase.expected[index])
   220  					break
   221  				}
   222  				for i, r := range cluster {
   223  					if r != testCase.expected[index][i] {
   224  						t.Errorf(`Test case %d %q failed: Sentence at index %d is %x, expected %x`,
   225  							testNum,
   226  							testCase.original,
   227  							index,
   228  							cluster,
   229  							testCase.expected[index])
   230  						break GraphemeLoop
   231  					}
   232  				}
   233  				cluster = nil
   234  				index++
   235  			}
   236  		}
   237  		if index < len(testCase.expected) {
   238  			t.Errorf(`Test case %d %q failed: Fewer sentences returned (%d) than expected (%d)`,
   239  				testNum,
   240  				testCase.original,
   241  				index,
   242  				len(testCase.expected))
   243  		}
   244  	}
   245  }
   246  
   247  // Test the Str() function.
   248  func TestGraphemesStr(t *testing.T) {
   249  	gr := NewGraphemes("möp")
   250  	gr.Next()
   251  	gr.Next()
   252  	gr.Next()
   253  	if str := gr.Str(); str != "p" {
   254  		t.Errorf(`Expected "p", got %q`, str)
   255  	}
   256  }
   257  
   258  // Test the Bytes() function.
   259  func TestGraphemesBytes(t *testing.T) {
   260  	gr := NewGraphemes("A👩‍❤️‍💋‍👩B")
   261  	gr.Next()
   262  	gr.Next()
   263  	gr.Next()
   264  	b := gr.Bytes()
   265  	if len(b) != 1 {
   266  		t.Fatalf(`Expected len("B") == 1, got %d`, len(b))
   267  	}
   268  	if b[0] != 'B' {
   269  		t.Errorf(`Expected "B", got %q`, string(b[0]))
   270  	}
   271  }
   272  
   273  // Test the Positions() function.
   274  func TestGraphemesPositions(t *testing.T) {
   275  	gr := NewGraphemes("A👩‍❤️‍💋‍👩B")
   276  	gr.Next()
   277  	gr.Next()
   278  	from, to := gr.Positions()
   279  	if from != 1 || to != 28 {
   280  		t.Errorf(`Expected from=%d to=%d, got from=%d to=%d`, 1, 28, from, to)
   281  	}
   282  }
   283  
   284  // Test the Reset() function.
   285  func TestGraphemesReset(t *testing.T) {
   286  	gr := NewGraphemes("möp")
   287  	gr.Next()
   288  	gr.Next()
   289  	gr.Next()
   290  	gr.Reset()
   291  	gr.Next()
   292  	if str := gr.Str(); str != "m" {
   293  		t.Errorf(`Expected "m", got %q`, str)
   294  	}
   295  }
   296  
   297  // Test retrieving clusters before calling Next().
   298  func TestGraphemesEarly(t *testing.T) {
   299  	gr := NewGraphemes("test")
   300  	r := gr.Runes()
   301  	if r != nil {
   302  		t.Errorf(`Expected nil rune slice, got %x`, r)
   303  	}
   304  	str := gr.Str()
   305  	if str != "" {
   306  		t.Errorf(`Expected empty string, got %q`, str)
   307  	}
   308  	b := gr.Bytes()
   309  	if b != nil {
   310  		t.Errorf(`Expected byte rune slice, got %x`, b)
   311  	}
   312  	from, to := gr.Positions()
   313  	if from != 0 || to != 0 {
   314  		t.Errorf(`Expected from=%d to=%d, got from=%d to=%d`, 0, 0, from, to)
   315  	}
   316  }
   317  
   318  // Test retrieving more clusters after retrieving the last cluster.
   319  func TestGraphemesLate(t *testing.T) {
   320  	gr := NewGraphemes("x")
   321  	gr.Next()
   322  	gr.Next()
   323  	r := gr.Runes()
   324  	if r != nil {
   325  		t.Errorf(`Expected nil rune slice, got %x`, r)
   326  	}
   327  	str := gr.Str()
   328  	if str != "" {
   329  		t.Errorf(`Expected empty string, got %q`, str)
   330  	}
   331  	b := gr.Bytes()
   332  	if b != nil {
   333  		t.Errorf(`Expected byte rune slice, got %x`, b)
   334  	}
   335  	from, to := gr.Positions()
   336  	if from != 1 || to != 1 {
   337  		t.Errorf(`Expected from=%d to=%d, got from=%d to=%d`, 1, 1, from, to)
   338  	}
   339  }
   340  
   341  // Test the GraphemeClusterCount function.
   342  func TestGraphemesCount(t *testing.T) {
   343  	if n := GraphemeClusterCount("🇩🇪🏳️‍🌈"); n != 2 {
   344  		t.Errorf(`Expected 2 grapheme clusters, got %d`, n)
   345  	}
   346  }
   347  
   348  // Test the ReverseString function.
   349  func TestReverseString(t *testing.T) {
   350  	for _, testCase := range testCases {
   351  		var r []rune
   352  		for index := len(testCase.expected) - 1; index >= 0; index-- {
   353  			r = append(r, testCase.expected[index]...)
   354  		}
   355  		if string(r) != ReverseString(testCase.original) {
   356  			t.Errorf(`Exepected reverse of %q to be %q, got %q`, testCase.original, string(r), ReverseString(testCase.original))
   357  		}
   358  	}
   359  
   360  	// Three additional ones, for good measure.
   361  	if ReverseString("🇩🇪🏳️‍🌈") != "🏳️‍🌈🇩🇪" {
   362  		t.Error("Flags weren't reversed correctly")
   363  	}
   364  	if ReverseString("🏳️‍🌈") != "🏳️‍🌈" {
   365  		t.Error("Flag wasn't reversed correctly")
   366  	}
   367  	if ReverseString("") != "" {
   368  		t.Error("Empty string wasn't reversed correctly")
   369  	}
   370  }
   371  
   372  // Run all lists of test cases using the Graphemes function for byte slices.
   373  func TestGraphemesFunctionBytes(t *testing.T) {
   374  	allCases := append(testCases, graphemeBreakTestCases...)
   375  	for testNum, testCase := range allCases {
   376  		/*t.Logf(`Test case %d %q: Expecting %x, getting %x, code points %x"`,
   377  		testNum,
   378  		strings.TrimSpace(testCase.original),
   379  		testCase.expected,
   380  		decomposed(testCase.original),
   381  		[]rune(testCase.original))*/
   382  		b := []byte(testCase.original)
   383  		state := -1
   384  		var (
   385  			index int
   386  			c     []byte
   387  		)
   388  	GraphemeLoop:
   389  		for len(b) > 0 {
   390  			c, b, _, state = FirstGraphemeCluster(b, state)
   391  
   392  			if index >= len(testCase.expected) {
   393  				t.Errorf(`Test case %d %q failed: More grapheme clusters returned than expected %d`,
   394  					testNum,
   395  					testCase.original,
   396  					len(testCase.expected))
   397  				break
   398  			}
   399  
   400  			cluster := []rune(string(c))
   401  			if len(cluster) != len(testCase.expected[index]) {
   402  				t.Errorf(`Test case %d %q failed: Grapheme cluster at index %d has %d codepoints %x, %d expected %x`,
   403  					testNum,
   404  					testCase.original,
   405  					index,
   406  					len(cluster),
   407  					cluster,
   408  					len(testCase.expected[index]),
   409  					testCase.expected[index])
   410  				break
   411  			}
   412  			for i, r := range cluster {
   413  				if r != testCase.expected[index][i] {
   414  					t.Errorf(`Test case %d %q failed: Grapheme cluster at index %d is %x, expected %x`,
   415  						testNum,
   416  						testCase.original,
   417  						index,
   418  						cluster,
   419  						testCase.expected[index])
   420  					break GraphemeLoop
   421  				}
   422  			}
   423  
   424  			index++
   425  		}
   426  		if index < len(testCase.expected) {
   427  			t.Errorf(`Test case %d %q failed: Fewer grapheme clusters returned (%d) than expected (%d)`,
   428  				testNum,
   429  				testCase.original,
   430  				index,
   431  				len(testCase.expected))
   432  		}
   433  	}
   434  	cluster, rest, width, newState := FirstGraphemeCluster([]byte{}, 0)
   435  	if len(cluster) > 0 {
   436  		t.Errorf(`Expected cluster to be empty byte slice, got %q`, cluster)
   437  	}
   438  	if len(rest) > 0 {
   439  		t.Errorf(`Expected rest to be empty byte slice, got %q`, rest)
   440  	}
   441  	if width != 0 {
   442  		t.Errorf(`Expected width to be 0, got %d`, width)
   443  	}
   444  	if newState != 0 {
   445  		t.Errorf(`Expected newState to be 0, got %d`, newState)
   446  	}
   447  }
   448  
   449  // Run all lists of test cases using the Graphemes function for strings.
   450  func TestGraphemesFunctionString(t *testing.T) {
   451  	allCases := append(testCases, graphemeBreakTestCases...)
   452  	for testNum, testCase := range allCases {
   453  		/*t.Logf(`Test case %d %q: Expecting %x, getting %x, code points %x"`,
   454  		testNum,
   455  		strings.TrimSpace(testCase.original),
   456  		testCase.expected,
   457  		decomposed(testCase.original),
   458  		[]rune(testCase.original))*/
   459  		str := testCase.original
   460  		state := -1
   461  		var (
   462  			index int
   463  			c     string
   464  		)
   465  	GraphemeLoop:
   466  		for len(str) > 0 {
   467  			c, str, _, state = FirstGraphemeClusterInString(str, state)
   468  
   469  			if index >= len(testCase.expected) {
   470  				t.Errorf(`Test case %d %q failed: More grapheme clusters returned than expected %d`,
   471  					testNum,
   472  					testCase.original,
   473  					len(testCase.expected))
   474  				break
   475  			}
   476  
   477  			cluster := []rune(c)
   478  			if len(cluster) != len(testCase.expected[index]) {
   479  				t.Errorf(`Test case %d %q failed: Grapheme cluster at index %d has %d codepoints %x, %d expected %x`,
   480  					testNum,
   481  					testCase.original,
   482  					index,
   483  					len(cluster),
   484  					cluster,
   485  					len(testCase.expected[index]),
   486  					testCase.expected[index])
   487  				break
   488  			}
   489  			for i, r := range cluster {
   490  				if r != testCase.expected[index][i] {
   491  					t.Errorf(`Test case %d %q failed: Grapheme cluster at index %d is %x, expected %x`,
   492  						testNum,
   493  						testCase.original,
   494  						index,
   495  						cluster,
   496  						testCase.expected[index])
   497  					break GraphemeLoop
   498  				}
   499  			}
   500  
   501  			index++
   502  		}
   503  		if index < len(testCase.expected) {
   504  			t.Errorf(`Test case %d %q failed: Fewer grapheme clusters returned (%d) than expected (%d)`,
   505  				testNum,
   506  				testCase.original,
   507  				index,
   508  				len(testCase.expected))
   509  		}
   510  	}
   511  }
   512  
   513  // Benchmark the use of the Graphemes class.
   514  func BenchmarkGraphemesClass(b *testing.B) {
   515  	for i := 0; i < b.N; i++ {
   516  		g := NewGraphemes(benchmarkStr)
   517  		for g.Next() {
   518  			resultRunes = g.Runes()
   519  		}
   520  	}
   521  }
   522  
   523  // Benchmark the use of the Graphemes function for byte slices.
   524  func BenchmarkGraphemesFunctionBytes(b *testing.B) {
   525  	for i := 0; i < b.N; i++ {
   526  		var c []byte
   527  		state := -1
   528  		str := benchmarkBytes
   529  		for len(str) > 0 {
   530  			c, str, _, state = FirstGraphemeCluster(str, state)
   531  			resultRunes = []rune(string(c))
   532  		}
   533  	}
   534  }
   535  
   536  // Benchmark the use of the Graphemes function for strings.
   537  func BenchmarkGraphemesFunctionString(b *testing.B) {
   538  	for i := 0; i < b.N; i++ {
   539  		var c string
   540  		state := -1
   541  		str := benchmarkStr
   542  		for len(str) > 0 {
   543  			c, str, _, state = FirstGraphemeClusterInString(str, state)
   544  			resultRunes = []rune(c)
   545  		}
   546  	}
   547  }
   548  

View as plain text