1 package uniseg
2
3 import (
4 "testing"
5 )
6
7
8
9 func TestStepBytesGrapheme(t *testing.T) {
10 for testNum, testCase := range graphemeBreakTestCases {
11
17 b := []byte(testCase.original)
18 state := -1
19 var (
20 index int
21 c []byte
22 )
23 GraphemeLoop:
24 for len(b) > 0 {
25 c, b, _, state = Step(b, state)
26
27 if index >= len(testCase.expected) {
28 t.Errorf(`Test case %d %q failed: More grapheme clusters returned than expected %d`,
29 testNum,
30 testCase.original,
31 len(testCase.expected))
32 break
33 }
34
35 cluster := []rune(string(c))
36 if len(cluster) != len(testCase.expected[index]) {
37 t.Errorf(`Test case %d %q failed: Grapheme cluster at index %d has %d codepoints %x, %d expected %x`,
38 testNum,
39 testCase.original,
40 index,
41 len(cluster),
42 cluster,
43 len(testCase.expected[index]),
44 testCase.expected[index])
45 break
46 }
47 for i, r := range cluster {
48 if r != testCase.expected[index][i] {
49 t.Errorf(`Test case %d %q failed: Grapheme cluster at index %d is %x, expected %x`,
50 testNum,
51 testCase.original,
52 index,
53 cluster,
54 testCase.expected[index])
55 break GraphemeLoop
56 }
57 }
58
59 index++
60 }
61 if index < len(testCase.expected) {
62 t.Errorf(`Test case %d %q failed: Fewer grapheme clusters returned (%d) than expected (%d)`,
63 testNum,
64 testCase.original,
65 index,
66 len(testCase.expected))
67 }
68 }
69 cluster, rest, boundaries, newState := Step([]byte{}, -1)
70 if len(cluster) > 0 {
71 t.Errorf(`Expected cluster to be empty byte slice, got %q`, cluster)
72 }
73 if len(rest) > 0 {
74 t.Errorf(`Expected rest to be empty byte slice, got %q`, rest)
75 }
76 if boundaries != 0 {
77 t.Errorf(`Expected width to be 0, got %d`, boundaries)
78 }
79 if newState != 0 {
80 t.Errorf(`Expected newState to be 0, got %d`, newState)
81 }
82 }
83
84
85
86 func TestStepBytesWord(t *testing.T) {
87 for testNum, testCase := range wordBreakTestCases {
88 if testNum == 1700 {
89
90
91
92
93 continue
94 }
95
101 b := []byte(testCase.original)
102 state := -1
103 var (
104 index, boundaries int
105 c []byte
106 growingCluster []rune
107 )
108 GraphemeLoop:
109 for len(b) > 0 {
110 c, b, boundaries, state = Step(b, state)
111
112 if index >= len(testCase.expected) {
113 t.Errorf(`Test case %d %q failed: More words returned than expected %d`,
114 testNum,
115 testCase.original,
116 len(testCase.expected))
117 break
118 }
119
120 growingCluster = append(growingCluster, []rune(string(c))...)
121 if boundaries&MaskWord == 0 {
122 continue
123 }
124 cluster := growingCluster
125 growingCluster = nil
126 if len(cluster) != len(testCase.expected[index]) {
127 t.Errorf(`Test case %d %q failed: Word at index %d has %d codepoints %x, %d expected %x`,
128 testNum,
129 testCase.original,
130 index,
131 len(cluster),
132 cluster,
133 len(testCase.expected[index]),
134 testCase.expected[index])
135 break
136 }
137 for i, r := range cluster {
138 if r != testCase.expected[index][i] {
139 t.Errorf(`Test case %d %q failed: Word at index %d is %x, expected %x`,
140 testNum,
141 testCase.original,
142 index,
143 cluster,
144 testCase.expected[index])
145 break GraphemeLoop
146 }
147 }
148
149 index++
150 }
151 if index < len(testCase.expected) {
152 t.Errorf(`Test case %d %q failed: Fewer words returned (%d) than expected (%d)`,
153 testNum,
154 testCase.original,
155 index,
156 len(testCase.expected))
157 }
158 }
159 }
160
161
162
163 func TestStepBytesSentence(t *testing.T) {
164 for testNum, testCase := range sentenceBreakTestCases {
165
171 b := []byte(testCase.original)
172 state := -1
173 var (
174 index, boundaries int
175 c []byte
176 growingCluster []rune
177 )
178 GraphemeLoop:
179 for len(b) > 0 {
180 c, b, boundaries, state = Step(b, state)
181
182 if index >= len(testCase.expected) {
183 t.Errorf(`Test case %d %q failed: More sentences returned than expected %d`,
184 testNum,
185 testCase.original,
186 len(testCase.expected))
187 break
188 }
189
190 growingCluster = append(growingCluster, []rune(string(c))...)
191 if boundaries&MaskSentence == 0 {
192 continue
193 }
194 cluster := growingCluster
195 growingCluster = nil
196 if len(cluster) != len(testCase.expected[index]) {
197 t.Errorf(`Test case %d %q failed: Sentence at index %d has %d codepoints %x, %d expected %x`,
198 testNum,
199 testCase.original,
200 index,
201 len(cluster),
202 cluster,
203 len(testCase.expected[index]),
204 testCase.expected[index])
205 break
206 }
207 for i, r := range cluster {
208 if r != testCase.expected[index][i] {
209 t.Errorf(`Test case %d %q failed: Sentence at index %d is %x, expected %x`,
210 testNum,
211 testCase.original,
212 index,
213 cluster,
214 testCase.expected[index])
215 break GraphemeLoop
216 }
217 }
218
219 index++
220 }
221 if index < len(testCase.expected) {
222 t.Errorf(`Test case %d %q failed: Fewer sentences returned (%d) than expected (%d)`,
223 testNum,
224 testCase.original,
225 index,
226 len(testCase.expected))
227 }
228 }
229 }
230
231
232
233
234
235
236
237
238
239 func TestStepStringGrapheme(t *testing.T) {
240 for testNum, testCase := range graphemeBreakTestCases {
241
247 str := testCase.original
248 state := -1
249 var (
250 index int
251 c string
252 )
253 GraphemeLoop:
254 for len(str) > 0 {
255 c, str, _, state = StepString(str, state)
256
257 if index >= len(testCase.expected) {
258 t.Errorf(`Test case %d %q failed: More grapheme clusters returned than expected %d`,
259 testNum,
260 testCase.original,
261 len(testCase.expected))
262 break
263 }
264
265 cluster := []rune(c)
266 if len(cluster) != len(testCase.expected[index]) {
267 t.Errorf(`Test case %d %q failed: Grapheme cluster at index %d has %d codepoints %x, %d expected %x`,
268 testNum,
269 testCase.original,
270 index,
271 len(cluster),
272 cluster,
273 len(testCase.expected[index]),
274 testCase.expected[index])
275 break
276 }
277 for i, r := range cluster {
278 if r != testCase.expected[index][i] {
279 t.Errorf(`Test case %d %q failed: Grapheme cluster at index %d is %x, expected %x`,
280 testNum,
281 testCase.original,
282 index,
283 cluster,
284 testCase.expected[index])
285 break GraphemeLoop
286 }
287 }
288
289 index++
290 }
291 if index < len(testCase.expected) {
292 t.Errorf(`Test case %d %q failed: Fewer grapheme clusters returned (%d) than expected (%d)`,
293 testNum,
294 testCase.original,
295 index,
296 len(testCase.expected))
297 }
298 }
299 cluster, rest, boundaries, newState := StepString("", -1)
300 if len(cluster) > 0 {
301 t.Errorf(`Expected cluster to be empty string, got %q`, cluster)
302 }
303 if len(rest) > 0 {
304 t.Errorf(`Expected rest to be empty string, got %q`, rest)
305 }
306 if boundaries != 0 {
307 t.Errorf(`Expected width to be 0, got %d`, boundaries)
308 }
309 if newState != 0 {
310 t.Errorf(`Expected newState to be 0, got %d`, newState)
311 }
312 }
313
314
315
316 func TestStepStringWord(t *testing.T) {
317 for testNum, testCase := range wordBreakTestCases {
318 if testNum == 1700 {
319
320
321
322
323 continue
324 }
325
331 str := testCase.original
332 state := -1
333 var (
334 index, boundaries int
335 c string
336 growingCluster []rune
337 )
338 GraphemeLoop:
339 for len(str) > 0 {
340 c, str, boundaries, state = StepString(str, state)
341
342 if index >= len(testCase.expected) {
343 t.Errorf(`Test case %d %q failed: More words returned than expected %d`,
344 testNum,
345 testCase.original,
346 len(testCase.expected))
347 break
348 }
349
350 growingCluster = append(growingCluster, []rune(c)...)
351 if boundaries&MaskWord == 0 {
352 continue
353 }
354 cluster := growingCluster
355 growingCluster = nil
356 if len(cluster) != len(testCase.expected[index]) {
357 t.Errorf(`Test case %d %q failed: Word at index %d has %d codepoints %x, %d expected %x`,
358 testNum,
359 testCase.original,
360 index,
361 len(cluster),
362 cluster,
363 len(testCase.expected[index]),
364 testCase.expected[index])
365 break
366 }
367 for i, r := range cluster {
368 if r != testCase.expected[index][i] {
369 t.Errorf(`Test case %d %q failed: Word at index %d is %x, expected %x`,
370 testNum,
371 testCase.original,
372 index,
373 cluster,
374 testCase.expected[index])
375 break GraphemeLoop
376 }
377 }
378
379 index++
380 }
381 if index < len(testCase.expected) {
382 t.Errorf(`Test case %d %q failed: Fewer words returned (%d) than expected (%d)`,
383 testNum,
384 testCase.original,
385 index,
386 len(testCase.expected))
387 }
388 }
389 }
390
391
392
393 func TestStepStringSentence(t *testing.T) {
394 for testNum, testCase := range sentenceBreakTestCases {
395
401 str := testCase.original
402 state := -1
403 var (
404 index, boundaries int
405 c string
406 growingCluster []rune
407 )
408 GraphemeLoop:
409 for len(str) > 0 {
410 c, str, boundaries, state = StepString(str, state)
411
412 if index >= len(testCase.expected) {
413 t.Errorf(`Test case %d %q failed: More sentences returned than expected %d`,
414 testNum,
415 testCase.original,
416 len(testCase.expected))
417 break
418 }
419
420 growingCluster = append(growingCluster, []rune(c)...)
421 if boundaries&MaskSentence == 0 {
422 continue
423 }
424 cluster := growingCluster
425 growingCluster = nil
426 if len(cluster) != len(testCase.expected[index]) {
427 t.Errorf(`Test case %d %q failed: Sentence at index %d has %d codepoints %x, %d expected %x`,
428 testNum,
429 testCase.original,
430 index,
431 len(cluster),
432 cluster,
433 len(testCase.expected[index]),
434 testCase.expected[index])
435 break
436 }
437 for i, r := range cluster {
438 if r != testCase.expected[index][i] {
439 t.Errorf(`Test case %d %q failed: Sentence at index %d is %x, expected %x`,
440 testNum,
441 testCase.original,
442 index,
443 cluster,
444 testCase.expected[index])
445 break GraphemeLoop
446 }
447 }
448
449 index++
450 }
451 if index < len(testCase.expected) {
452 t.Errorf(`Test case %d %q failed: Fewer sentences returned (%d) than expected (%d)`,
453 testNum,
454 testCase.original,
455 index,
456 len(testCase.expected))
457 }
458 }
459 }
460
461
462 func BenchmarkStepBytes(b *testing.B) {
463 for i := 0; i < b.N; i++ {
464 var c []byte
465 state := -1
466 str := benchmarkBytes
467 for len(str) > 0 {
468 c, str, _, state = Step(str, state)
469 resultRunes = []rune(string(c))
470 }
471 }
472 }
473
474
475 func BenchmarkStepString(b *testing.B) {
476 for i := 0; i < b.N; i++ {
477 var c string
478 state := -1
479 str := benchmarkStr
480 for len(str) > 0 {
481 c, str, _, state = StepString(str, state)
482 resultRunes = []rune(c)
483 }
484 }
485 }
486
487
488 func FuzzStepString(f *testing.F) {
489 for _, tc := range graphemeBreakTestCases {
490 f.Add(tc.original)
491 }
492 f.Fuzz(func(t *testing.T, orig string) {
493 var (
494 c string
495 b []byte
496 boundaries int
497 )
498 str := orig
499 state := -1
500 for len(str) > 0 {
501 c, str, boundaries, state = StepString(str, state)
502 b = append(b, []byte(c)...)
503 }
504
505
506 if string(b) != orig {
507 t.Errorf("Fuzzing failed: %q != %q", string(b), orig)
508 }
509
510
511 if orig == "" {
512 return
513 }
514
515
516 if boundaries&MaskWord == 0 {
517 t.Errorf("String %q does not end on a word boundary (final boundary = %x)", orig, state)
518 }
519 if boundaries&MaskSentence == 0 {
520 t.Errorf("String %q does not end on a sentence boundary (final boundary = %x)", orig, state)
521 }
522 if boundaries&MaskLine != LineMustBreak {
523 t.Errorf("String %q does not end with a mandatory line break (final boundary = %x)", orig, state)
524 }
525
526
527
528
529 })
530 }
531
View as plain text