word_test.go

Documentation: github.com/rivo/uniseg

     1  package uniseg
     2  
     3  import "testing"
     4  
     5  // Test all official Unicode test cases for word boundaries using the byte slice
     6  // function.
     7  func TestWordCasesBytes(t *testing.T) {
     8  	for testNum, testCase := range wordBreakTestCases {
     9  		/*t.Logf(`Test case %d %q: Expecting %x, getting %x, code points %x"`,
    10  		testNum,
    11  		strings.TrimSpace(testCase.original),
    12  		testCase.expected,
    13  		decomposed(testCase.original),
    14  		[]rune(testCase.original))*/
    15  		var (
    16  			word  []byte
    17  			index int
    18  		)
    19  		state := -1
    20  		b := []byte(testCase.original)
    21  	WordLoop:
    22  		for index = 0; len(b) > 0; index++ {
    23  			if index >= len(testCase.expected) {
    24  				t.Errorf(`Test case %d %q failed: More words %d returned than expected %d`,
    25  					testNum,
    26  					testCase.original,
    27  					index,
    28  					len(testCase.expected))
    29  				break
    30  			}
    31  			word, b, state = FirstWord(b, state)
    32  			cluster := []rune(string(word))
    33  			if len(cluster) != len(testCase.expected[index]) {
    34  				t.Errorf(`Test case %d %q failed: Word at index %d has %d codepoints %x, %d expected %x`,
    35  					testNum,
    36  					testCase.original,
    37  					index,
    38  					len(cluster),
    39  					cluster,
    40  					len(testCase.expected[index]),
    41  					testCase.expected[index])
    42  				break
    43  			}
    44  			for i, r := range cluster {
    45  				if r != testCase.expected[index][i] {
    46  					t.Errorf(`Test case %d %q failed: Word at index %d is %x, expected %x`,
    47  						testNum,
    48  						testCase.original,
    49  						index,
    50  						cluster,
    51  						testCase.expected[index])
    52  					break WordLoop
    53  				}
    54  			}
    55  		}
    56  		if index < len(testCase.expected) {
    57  			t.Errorf(`Test case %d %q failed: Fewer words returned (%d) than expected (%d)`,
    58  				testNum,
    59  				testCase.original,
    60  				index,
    61  				len(testCase.expected))
    62  		}
    63  	}
    64  	word, rest, newState := FirstWord([]byte{}, -1)
    65  	if len(word) > 0 {
    66  		t.Errorf(`Expected word to be empty byte slice, got %q`, word)
    67  	}
    68  	if len(rest) > 0 {
    69  		t.Errorf(`Expected rest to be empty byte slice, got %q`, rest)
    70  	}
    71  	if newState != 0 {
    72  		t.Errorf(`Expected newState to be 0, got %d`, newState)
    73  	}
    74  }
    75  
    76  // Test all official Unicode test cases for word boundaries using the string
    77  // function.
    78  func TestWordCasesString(t *testing.T) {
    79  	for testNum, testCase := range wordBreakTestCases {
    80  		/*t.Logf(`Test case %d %q: Expecting %x, getting %x, code points %x"`,
    81  		testNum,
    82  		strings.TrimSpace(testCase.original),
    83  		testCase.expected,
    84  		decomposed(testCase.original),
    85  		[]rune(testCase.original))*/
    86  		var (
    87  			word  string
    88  			index int
    89  		)
    90  		state := -1
    91  		str := testCase.original
    92  	WordLoop:
    93  		for index = 0; len(str) > 0; index++ {
    94  			if index >= len(testCase.expected) {
    95  				t.Errorf(`Test case %d %q failed: More words %d returned than expected %d`,
    96  					testNum,
    97  					testCase.original,
    98  					index,
    99  					len(testCase.expected))
   100  				break
   101  			}
   102  			word, str, state = FirstWordInString(str, state)
   103  			cluster := []rune(string(word))
   104  			if len(cluster) != len(testCase.expected[index]) {
   105  				t.Errorf(`Test case %d %q failed: Word at index %d has %d codepoints %x, %d expected %x`,
   106  					testNum,
   107  					testCase.original,
   108  					index,
   109  					len(cluster),
   110  					cluster,
   111  					len(testCase.expected[index]),
   112  					testCase.expected[index])
   113  				break
   114  			}
   115  			for i, r := range cluster {
   116  				if r != testCase.expected[index][i] {
   117  					t.Errorf(`Test case %d %q failed: Word at index %d is %x, expected %x`,
   118  						testNum,
   119  						testCase.original,
   120  						index,
   121  						cluster,
   122  						testCase.expected[index])
   123  					break WordLoop
   124  				}
   125  			}
   126  		}
   127  		if index < len(testCase.expected) {
   128  			t.Errorf(`Test case %d %q failed: Fewer words returned (%d) than expected (%d)`,
   129  				testNum,
   130  				testCase.original,
   131  				index,
   132  				len(testCase.expected))
   133  		}
   134  	}
   135  	word, rest, newState := FirstWordInString("", -1)
   136  	if len(word) > 0 {
   137  		t.Errorf(`Expected word to be empty string, got %q`, word)
   138  	}
   139  	if len(rest) > 0 {
   140  		t.Errorf(`Expected rest to be empty string, got %q`, rest)
   141  	}
   142  	if newState != 0 {
   143  		t.Errorf(`Expected newState to be 0, got %d`, newState)
   144  	}
   145  }
   146  
   147  // Benchmark the use of the word break function for byte slices.
   148  func BenchmarkWordFunctionBytes(b *testing.B) {
   149  	for i := 0; i < b.N; i++ {
   150  		var c []byte
   151  		state := -1
   152  		str := benchmarkBytes
   153  		for len(str) > 0 {
   154  			c, str, state = FirstWord(str, state)
   155  			resultRunes = []rune(string(c))
   156  		}
   157  	}
   158  }
   159  
   160  // Benchmark the use of the word break function for strings.
   161  func BenchmarkWordFunctionString(b *testing.B) {
   162  	for i := 0; i < b.N; i++ {
   163  		var c string
   164  		state := -1
   165  		str := benchmarkStr
   166  		for len(str) > 0 {
   167  			c, str, state = FirstWordInString(str, state)
   168  			resultRunes = []rune(c)
   169  		}
   170  	}
   171  }
   172
View as plain text