package uniseg import "testing" // Test all official Unicode test cases for word boundaries using the byte slice // function. func TestWordCasesBytes(t *testing.T) { for testNum, testCase := range wordBreakTestCases { /*t.Logf(`Test case %d %q: Expecting %x, getting %x, code points %x"`, testNum, strings.TrimSpace(testCase.original), testCase.expected, decomposed(testCase.original), []rune(testCase.original))*/ var ( word []byte index int ) state := -1 b := []byte(testCase.original) WordLoop: for index = 0; len(b) > 0; index++ { if index >= len(testCase.expected) { t.Errorf(`Test case %d %q failed: More words %d returned than expected %d`, testNum, testCase.original, index, len(testCase.expected)) break } word, b, state = FirstWord(b, state) cluster := []rune(string(word)) if len(cluster) != len(testCase.expected[index]) { t.Errorf(`Test case %d %q failed: Word at index %d has %d codepoints %x, %d expected %x`, testNum, testCase.original, index, len(cluster), cluster, len(testCase.expected[index]), testCase.expected[index]) break } for i, r := range cluster { if r != testCase.expected[index][i] { t.Errorf(`Test case %d %q failed: Word at index %d is %x, expected %x`, testNum, testCase.original, index, cluster, testCase.expected[index]) break WordLoop } } } if index < len(testCase.expected) { t.Errorf(`Test case %d %q failed: Fewer words returned (%d) than expected (%d)`, testNum, testCase.original, index, len(testCase.expected)) } } word, rest, newState := FirstWord([]byte{}, -1) if len(word) > 0 { t.Errorf(`Expected word to be empty byte slice, got %q`, word) } if len(rest) > 0 { t.Errorf(`Expected rest to be empty byte slice, got %q`, rest) } if newState != 0 { t.Errorf(`Expected newState to be 0, got %d`, newState) } } // Test all official Unicode test cases for word boundaries using the string // function. func TestWordCasesString(t *testing.T) { for testNum, testCase := range wordBreakTestCases { /*t.Logf(`Test case %d %q: Expecting %x, getting %x, code points %x"`, testNum, strings.TrimSpace(testCase.original), testCase.expected, decomposed(testCase.original), []rune(testCase.original))*/ var ( word string index int ) state := -1 str := testCase.original WordLoop: for index = 0; len(str) > 0; index++ { if index >= len(testCase.expected) { t.Errorf(`Test case %d %q failed: More words %d returned than expected %d`, testNum, testCase.original, index, len(testCase.expected)) break } word, str, state = FirstWordInString(str, state) cluster := []rune(string(word)) if len(cluster) != len(testCase.expected[index]) { t.Errorf(`Test case %d %q failed: Word at index %d has %d codepoints %x, %d expected %x`, testNum, testCase.original, index, len(cluster), cluster, len(testCase.expected[index]), testCase.expected[index]) break } for i, r := range cluster { if r != testCase.expected[index][i] { t.Errorf(`Test case %d %q failed: Word at index %d is %x, expected %x`, testNum, testCase.original, index, cluster, testCase.expected[index]) break WordLoop } } } if index < len(testCase.expected) { t.Errorf(`Test case %d %q failed: Fewer words returned (%d) than expected (%d)`, testNum, testCase.original, index, len(testCase.expected)) } } word, rest, newState := FirstWordInString("", -1) if len(word) > 0 { t.Errorf(`Expected word to be empty string, got %q`, word) } if len(rest) > 0 { t.Errorf(`Expected rest to be empty string, got %q`, rest) } if newState != 0 { t.Errorf(`Expected newState to be 0, got %d`, newState) } } // Benchmark the use of the word break function for byte slices. func BenchmarkWordFunctionBytes(b *testing.B) { for i := 0; i < b.N; i++ { var c []byte state := -1 str := benchmarkBytes for len(str) > 0 { c, str, state = FirstWord(str, state) resultRunes = []rune(string(c)) } } } // Benchmark the use of the word break function for strings. func BenchmarkWordFunctionString(b *testing.B) { for i := 0; i < b.N; i++ { var c string state := -1 str := benchmarkStr for len(str) > 0 { c, str, state = FirstWordInString(str, state) resultRunes = []rune(c) } } }