sentence_test.go

Documentation: github.com/rivo/uniseg

     1  package uniseg
     2  
     3  import (
     4  	"testing"
     5  )
     6  
     7  // Test all official Unicode test cases for sentence boundaries using the byte
     8  // slice function.
     9  func TestSentenceCasesBytes(t *testing.T) {
    10  	for testNum, testCase := range sentenceBreakTestCases {
    11  		/*t.Logf(`Test case %d %q: Expecting %x, getting %x, code points %x"`,
    12  		testNum,
    13  		strings.TrimSpace(testCase.original),
    14  		testCase.expected,
    15  		decomposed(testCase.original),
    16  		[]rune(testCase.original))*/
    17  		var (
    18  			sentence []byte
    19  			index    int
    20  		)
    21  		state := -1
    22  		b := []byte(testCase.original)
    23  	WordLoop:
    24  		for index = 0; len(b) > 0; index++ {
    25  			if index >= len(testCase.expected) {
    26  				t.Errorf(`Test case %d %q failed: More sentences %d returned than expected %d`,
    27  					testNum,
    28  					testCase.original,
    29  					index,
    30  					len(testCase.expected))
    31  				break
    32  			}
    33  			sentence, b, state = FirstSentence(b, state)
    34  			cluster := []rune(string(sentence))
    35  			if len(cluster) != len(testCase.expected[index]) {
    36  				t.Errorf(`Test case %d %q failed: Sentence at index %d has %d codepoints %x, %d expected %x`,
    37  					testNum,
    38  					testCase.original,
    39  					index,
    40  					len(cluster),
    41  					cluster,
    42  					len(testCase.expected[index]),
    43  					testCase.expected[index])
    44  				break
    45  			}
    46  			for i, r := range cluster {
    47  				if r != testCase.expected[index][i] {
    48  					t.Errorf(`Test case %d %q failed: Sentence at index %d is %x, expected %x`,
    49  						testNum,
    50  						testCase.original,
    51  						index,
    52  						cluster,
    53  						testCase.expected[index])
    54  					break WordLoop
    55  				}
    56  			}
    57  		}
    58  		if index < len(testCase.expected) {
    59  			t.Errorf(`Test case %d %q failed: Fewer sentences returned (%d) than expected (%d)`,
    60  				testNum,
    61  				testCase.original,
    62  				index,
    63  				len(testCase.expected))
    64  		}
    65  	}
    66  	sentence, rest, newState := FirstSentence([]byte{}, -1)
    67  	if len(sentence) > 0 {
    68  		t.Errorf(`Expected sentence to be empty byte slice, got %q`, sentence)
    69  	}
    70  	if len(rest) > 0 {
    71  		t.Errorf(`Expected rest to be empty byte slice, got %q`, rest)
    72  	}
    73  	if newState != 0 {
    74  		t.Errorf(`Expected newState to be 0, got %d`, newState)
    75  	}
    76  }
    77  
    78  // Test all official Unicode test cases for sentence boundaries using the string
    79  // function.
    80  func TestSentenceCasesString(t *testing.T) {
    81  	for testNum, testCase := range sentenceBreakTestCases {
    82  		/*t.Logf(`Test case %d %q: Expecting %x, getting %x, code points %x"`,
    83  		testNum,
    84  		strings.TrimSpace(testCase.original),
    85  		testCase.expected,
    86  		decomposed(testCase.original),
    87  		[]rune(testCase.original))*/
    88  		var (
    89  			sentence string
    90  			index    int
    91  		)
    92  		state := -1
    93  		str := testCase.original
    94  	WordLoop:
    95  		for index = 0; len(str) > 0; index++ {
    96  			if index >= len(testCase.expected) {
    97  				t.Errorf(`Test case %d %q failed: More sentences %d returned than expected %d`,
    98  					testNum,
    99  					testCase.original,
   100  					index,
   101  					len(testCase.expected))
   102  				break
   103  			}
   104  			sentence, str, state = FirstSentenceInString(str, state)
   105  			cluster := []rune(string(sentence))
   106  			if len(cluster) != len(testCase.expected[index]) {
   107  				t.Errorf(`Test case %d %q failed: Sentence at index %d has %d codepoints %x, %d expected %x`,
   108  					testNum,
   109  					testCase.original,
   110  					index,
   111  					len(cluster),
   112  					cluster,
   113  					len(testCase.expected[index]),
   114  					testCase.expected[index])
   115  				break
   116  			}
   117  			for i, r := range cluster {
   118  				if r != testCase.expected[index][i] {
   119  					t.Errorf(`Test case %d %q failed: Sentence at index %d is %x, expected %x`,
   120  						testNum,
   121  						testCase.original,
   122  						index,
   123  						cluster,
   124  						testCase.expected[index])
   125  					break WordLoop
   126  				}
   127  			}
   128  		}
   129  		if index < len(testCase.expected) {
   130  			t.Errorf(`Test case %d %q failed: Fewer sentences returned (%d) than expected (%d)`,
   131  				testNum,
   132  				testCase.original,
   133  				index,
   134  				len(testCase.expected))
   135  		}
   136  	}
   137  	sentence, rest, newState := FirstSentenceInString("", -1)
   138  	if len(sentence) > 0 {
   139  		t.Errorf(`Expected sentence to be empty string, got %q`, sentence)
   140  	}
   141  	if len(rest) > 0 {
   142  		t.Errorf(`Expected rest to be empty string, got %q`, rest)
   143  	}
   144  	if newState != 0 {
   145  		t.Errorf(`Expected newState to be 0, got %d`, newState)
   146  	}
   147  }
   148  
   149  // Benchmark the use of the sentence break function for byte slices.
   150  func BenchmarkSentenceFunctionBytes(b *testing.B) {
   151  	for i := 0; i < b.N; i++ {
   152  		var c []byte
   153  		state := -1
   154  		str := benchmarkBytes
   155  		for len(str) > 0 {
   156  			c, str, state = FirstSentence(str, state)
   157  			resultRunes = []rune(string(c))
   158  		}
   159  	}
   160  }
   161  
   162  // Benchmark the use of the sentence break function for strings.
   163  func BenchmarkSentenceFunctionString(b *testing.B) {
   164  	for i := 0; i < b.N; i++ {
   165  		var c string
   166  		state := -1
   167  		str := benchmarkStr
   168  		for len(str) > 0 {
   169  			c, str, state = FirstSentenceInString(str, state)
   170  			resultRunes = []rune(c)
   171  		}
   172  	}
   173  }
   174
View as plain text