1 package uniseg
2
3 import (
4 "testing"
5 )
6
7 const benchmarkStr = "This is 🏳️🌈, a test string ツ for grapheme cluster testing. 🏋🏽♀️🙂🙂 It's only relevant for benchmark tests."
8
9 var benchmarkBytes = []byte(benchmarkStr)
10
11
12 var resultRunes []rune
13
14 type testCase = struct {
15 original string
16 expected [][]rune
17 }
18
19
20 var testCases = []testCase{
21 {original: "", expected: [][]rune{}},
22 {original: "x", expected: [][]rune{{0x78}}},
23 {original: "basic", expected: [][]rune{{0x62}, {0x61}, {0x73}, {0x69}, {0x63}}},
24 {original: "möp", expected: [][]rune{{0x6d}, {0x6f, 0x308}, {0x70}}},
25 {original: "\r\n", expected: [][]rune{{0xd, 0xa}}},
26 {original: "\n\n", expected: [][]rune{{0xa}, {0xa}}},
27 {original: "\t*", expected: [][]rune{{0x9}, {0x2a}}},
28 {original: "뢴", expected: [][]rune{{0x1105, 0x116c, 0x11ab}}},
29 {original: "ܐܒܓܕ", expected: [][]rune{{0x710}, {0x70f, 0x712}, {0x713}, {0x715}}},
30 {original: "ำ", expected: [][]rune{{0xe33}}},
31 {original: "ำำ", expected: [][]rune{{0xe33, 0xe33}}},
32 {original: "สระอำ", expected: [][]rune{{0xe2a}, {0xe23}, {0xe30}, {0xe2d, 0xe33}}},
33 {original: "*뢴*", expected: [][]rune{{0x2a}, {0x1105, 0x116c, 0x11ab}, {0x2a}}},
34 {original: "*👩❤️💋👩*", expected: [][]rune{{0x2a}, {0x1f469, 0x200d, 0x2764, 0xfe0f, 0x200d, 0x1f48b, 0x200d, 0x1f469}, {0x2a}}},
35 {original: "👩❤️💋👩", expected: [][]rune{{0x1f469, 0x200d, 0x2764, 0xfe0f, 0x200d, 0x1f48b, 0x200d, 0x1f469}}},
36 {original: "🏋🏽♀️", expected: [][]rune{{0x1f3cb, 0x1f3fd, 0x200d, 0x2640, 0xfe0f}}},
37 {original: "🙂", expected: [][]rune{{0x1f642}}},
38 {original: "🙂🙂", expected: [][]rune{{0x1f642}, {0x1f642}}},
39 {original: "🇩🇪", expected: [][]rune{{0x1f1e9, 0x1f1ea}}},
40 {original: "🏳️🌈", expected: [][]rune{{0x1f3f3, 0xfe0f, 0x200d, 0x1f308}}},
41 {original: "\t🏳️🌈", expected: [][]rune{{0x9}, {0x1f3f3, 0xfe0f, 0x200d, 0x1f308}}},
42 {original: "\t🏳️🌈\t", expected: [][]rune{{0x9}, {0x1f3f3, 0xfe0f, 0x200d, 0x1f308}, {0x9}}},
43 {original: "\r\n\uFE0E", expected: [][]rune{{13, 10}, {0xfe0e}}},
44 }
45
46
47 func decomposed(s string) (runes [][]rune) {
48 gr := NewGraphemes(s)
49 for gr.Next() {
50 runes = append(runes, gr.Runes())
51 }
52 return
53 }
54
55
56 func TestGraphemesClass(t *testing.T) {
57 allCases := append(testCases, graphemeBreakTestCases...)
58 for testNum, testCase := range allCases {
59
65 gr := NewGraphemes(testCase.original)
66 var index int
67 GraphemeLoop:
68 for index = 0; gr.Next(); index++ {
69 if index >= len(testCase.expected) {
70 t.Errorf(`Test case %d %q failed: More grapheme clusters returned than expected %d`,
71 testNum,
72 testCase.original,
73 len(testCase.expected))
74 break
75 }
76 cluster := gr.Runes()
77 if len(cluster) != len(testCase.expected[index]) {
78 t.Errorf(`Test case %d %q failed: Grapheme cluster at index %d has %d codepoints %x, %d expected %x`,
79 testNum,
80 testCase.original,
81 index,
82 len(cluster),
83 cluster,
84 len(testCase.expected[index]),
85 testCase.expected[index])
86 break
87 }
88 for i, r := range cluster {
89 if r != testCase.expected[index][i] {
90 t.Errorf(`Test case %d %q failed: Grapheme cluster at index %d is %x, expected %x`,
91 testNum,
92 testCase.original,
93 index,
94 cluster,
95 testCase.expected[index])
96 break GraphemeLoop
97 }
98 }
99 }
100 if index < len(testCase.expected) {
101 t.Errorf(`Test case %d %q failed: Fewer grapheme clusters returned (%d) than expected (%d)`,
102 testNum,
103 testCase.original,
104 index,
105 len(testCase.expected))
106 }
107 }
108 }
109
110
111
112 func TestGraphemesClassWord(t *testing.T) {
113 for testNum, testCase := range wordBreakTestCases {
114 if testNum == 1700 {
115
116
117
118
119 continue
120 }
121
127 gr := NewGraphemes(testCase.original)
128 var (
129 index int
130 cluster []rune
131 )
132 if !gr.IsWordBoundary() {
133 t.Error("Expected initial IsWordBoundary to be true, got false")
134 }
135 GraphemeLoop:
136 for gr.Next() {
137 if index >= len(testCase.expected) {
138 t.Errorf(`Test case %d %q failed: More words returned than expected %d`,
139 testNum,
140 testCase.original,
141 len(testCase.expected))
142 break
143 }
144 cluster = append(cluster, gr.Runes()...)
145 if gr.IsWordBoundary() {
146 if len(cluster) != len(testCase.expected[index]) {
147 t.Errorf(`Test case %d %q failed: Word at index %d has %d codepoints %x, %d expected %x`,
148 testNum,
149 testCase.original,
150 index,
151 len(cluster),
152 cluster,
153 len(testCase.expected[index]),
154 testCase.expected[index])
155 break
156 }
157 for i, r := range cluster {
158 if r != testCase.expected[index][i] {
159 t.Errorf(`Test case %d %q failed: Word at index %d is %x, expected %x`,
160 testNum,
161 testCase.original,
162 index,
163 cluster,
164 testCase.expected[index])
165 break GraphemeLoop
166 }
167 }
168 cluster = nil
169 index++
170 }
171 }
172 if index < len(testCase.expected) {
173 t.Errorf(`Test case %d %q failed: Fewer words returned (%d) than expected (%d)`,
174 testNum,
175 testCase.original,
176 index,
177 len(testCase.expected))
178 }
179 }
180 }
181
182
183
184 func TestGraphemesClassSentence(t *testing.T) {
185 for testNum, testCase := range sentenceBreakTestCases {
186
192 gr := NewGraphemes(testCase.original)
193 var (
194 index int
195 cluster []rune
196 )
197 if !gr.IsSentenceBoundary() {
198 t.Error("Expected initial IsSentenceBoundary to be true, got false")
199 }
200 GraphemeLoop:
201 for gr.Next() {
202 if index >= len(testCase.expected) {
203 t.Errorf(`Test case %d %q failed: More sentences returned than expected %d`,
204 testNum,
205 testCase.original,
206 len(testCase.expected))
207 break
208 }
209 cluster = append(cluster, gr.Runes()...)
210 if gr.IsSentenceBoundary() {
211 if len(cluster) != len(testCase.expected[index]) {
212 t.Errorf(`Test case %d %q failed: Sentence at index %d has %d codepoints %x, %d expected %x`,
213 testNum,
214 testCase.original,
215 index,
216 len(cluster),
217 cluster,
218 len(testCase.expected[index]),
219 testCase.expected[index])
220 break
221 }
222 for i, r := range cluster {
223 if r != testCase.expected[index][i] {
224 t.Errorf(`Test case %d %q failed: Sentence at index %d is %x, expected %x`,
225 testNum,
226 testCase.original,
227 index,
228 cluster,
229 testCase.expected[index])
230 break GraphemeLoop
231 }
232 }
233 cluster = nil
234 index++
235 }
236 }
237 if index < len(testCase.expected) {
238 t.Errorf(`Test case %d %q failed: Fewer sentences returned (%d) than expected (%d)`,
239 testNum,
240 testCase.original,
241 index,
242 len(testCase.expected))
243 }
244 }
245 }
246
247
248 func TestGraphemesStr(t *testing.T) {
249 gr := NewGraphemes("möp")
250 gr.Next()
251 gr.Next()
252 gr.Next()
253 if str := gr.Str(); str != "p" {
254 t.Errorf(`Expected "p", got %q`, str)
255 }
256 }
257
258
259 func TestGraphemesBytes(t *testing.T) {
260 gr := NewGraphemes("A👩❤️💋👩B")
261 gr.Next()
262 gr.Next()
263 gr.Next()
264 b := gr.Bytes()
265 if len(b) != 1 {
266 t.Fatalf(`Expected len("B") == 1, got %d`, len(b))
267 }
268 if b[0] != 'B' {
269 t.Errorf(`Expected "B", got %q`, string(b[0]))
270 }
271 }
272
273
274 func TestGraphemesPositions(t *testing.T) {
275 gr := NewGraphemes("A👩❤️💋👩B")
276 gr.Next()
277 gr.Next()
278 from, to := gr.Positions()
279 if from != 1 || to != 28 {
280 t.Errorf(`Expected from=%d to=%d, got from=%d to=%d`, 1, 28, from, to)
281 }
282 }
283
284
285 func TestGraphemesReset(t *testing.T) {
286 gr := NewGraphemes("möp")
287 gr.Next()
288 gr.Next()
289 gr.Next()
290 gr.Reset()
291 gr.Next()
292 if str := gr.Str(); str != "m" {
293 t.Errorf(`Expected "m", got %q`, str)
294 }
295 }
296
297
298 func TestGraphemesEarly(t *testing.T) {
299 gr := NewGraphemes("test")
300 r := gr.Runes()
301 if r != nil {
302 t.Errorf(`Expected nil rune slice, got %x`, r)
303 }
304 str := gr.Str()
305 if str != "" {
306 t.Errorf(`Expected empty string, got %q`, str)
307 }
308 b := gr.Bytes()
309 if b != nil {
310 t.Errorf(`Expected byte rune slice, got %x`, b)
311 }
312 from, to := gr.Positions()
313 if from != 0 || to != 0 {
314 t.Errorf(`Expected from=%d to=%d, got from=%d to=%d`, 0, 0, from, to)
315 }
316 }
317
318
319 func TestGraphemesLate(t *testing.T) {
320 gr := NewGraphemes("x")
321 gr.Next()
322 gr.Next()
323 r := gr.Runes()
324 if r != nil {
325 t.Errorf(`Expected nil rune slice, got %x`, r)
326 }
327 str := gr.Str()
328 if str != "" {
329 t.Errorf(`Expected empty string, got %q`, str)
330 }
331 b := gr.Bytes()
332 if b != nil {
333 t.Errorf(`Expected byte rune slice, got %x`, b)
334 }
335 from, to := gr.Positions()
336 if from != 1 || to != 1 {
337 t.Errorf(`Expected from=%d to=%d, got from=%d to=%d`, 1, 1, from, to)
338 }
339 }
340
341
342 func TestGraphemesCount(t *testing.T) {
343 if n := GraphemeClusterCount("🇩🇪🏳️🌈"); n != 2 {
344 t.Errorf(`Expected 2 grapheme clusters, got %d`, n)
345 }
346 }
347
348
349 func TestReverseString(t *testing.T) {
350 for _, testCase := range testCases {
351 var r []rune
352 for index := len(testCase.expected) - 1; index >= 0; index-- {
353 r = append(r, testCase.expected[index]...)
354 }
355 if string(r) != ReverseString(testCase.original) {
356 t.Errorf(`Exepected reverse of %q to be %q, got %q`, testCase.original, string(r), ReverseString(testCase.original))
357 }
358 }
359
360
361 if ReverseString("🇩🇪🏳️🌈") != "🏳️🌈🇩🇪" {
362 t.Error("Flags weren't reversed correctly")
363 }
364 if ReverseString("🏳️🌈") != "🏳️🌈" {
365 t.Error("Flag wasn't reversed correctly")
366 }
367 if ReverseString("") != "" {
368 t.Error("Empty string wasn't reversed correctly")
369 }
370 }
371
372
373 func TestGraphemesFunctionBytes(t *testing.T) {
374 allCases := append(testCases, graphemeBreakTestCases...)
375 for testNum, testCase := range allCases {
376
382 b := []byte(testCase.original)
383 state := -1
384 var (
385 index int
386 c []byte
387 )
388 GraphemeLoop:
389 for len(b) > 0 {
390 c, b, _, state = FirstGraphemeCluster(b, state)
391
392 if index >= len(testCase.expected) {
393 t.Errorf(`Test case %d %q failed: More grapheme clusters returned than expected %d`,
394 testNum,
395 testCase.original,
396 len(testCase.expected))
397 break
398 }
399
400 cluster := []rune(string(c))
401 if len(cluster) != len(testCase.expected[index]) {
402 t.Errorf(`Test case %d %q failed: Grapheme cluster at index %d has %d codepoints %x, %d expected %x`,
403 testNum,
404 testCase.original,
405 index,
406 len(cluster),
407 cluster,
408 len(testCase.expected[index]),
409 testCase.expected[index])
410 break
411 }
412 for i, r := range cluster {
413 if r != testCase.expected[index][i] {
414 t.Errorf(`Test case %d %q failed: Grapheme cluster at index %d is %x, expected %x`,
415 testNum,
416 testCase.original,
417 index,
418 cluster,
419 testCase.expected[index])
420 break GraphemeLoop
421 }
422 }
423
424 index++
425 }
426 if index < len(testCase.expected) {
427 t.Errorf(`Test case %d %q failed: Fewer grapheme clusters returned (%d) than expected (%d)`,
428 testNum,
429 testCase.original,
430 index,
431 len(testCase.expected))
432 }
433 }
434 cluster, rest, width, newState := FirstGraphemeCluster([]byte{}, 0)
435 if len(cluster) > 0 {
436 t.Errorf(`Expected cluster to be empty byte slice, got %q`, cluster)
437 }
438 if len(rest) > 0 {
439 t.Errorf(`Expected rest to be empty byte slice, got %q`, rest)
440 }
441 if width != 0 {
442 t.Errorf(`Expected width to be 0, got %d`, width)
443 }
444 if newState != 0 {
445 t.Errorf(`Expected newState to be 0, got %d`, newState)
446 }
447 }
448
449
450 func TestGraphemesFunctionString(t *testing.T) {
451 allCases := append(testCases, graphemeBreakTestCases...)
452 for testNum, testCase := range allCases {
453
459 str := testCase.original
460 state := -1
461 var (
462 index int
463 c string
464 )
465 GraphemeLoop:
466 for len(str) > 0 {
467 c, str, _, state = FirstGraphemeClusterInString(str, state)
468
469 if index >= len(testCase.expected) {
470 t.Errorf(`Test case %d %q failed: More grapheme clusters returned than expected %d`,
471 testNum,
472 testCase.original,
473 len(testCase.expected))
474 break
475 }
476
477 cluster := []rune(c)
478 if len(cluster) != len(testCase.expected[index]) {
479 t.Errorf(`Test case %d %q failed: Grapheme cluster at index %d has %d codepoints %x, %d expected %x`,
480 testNum,
481 testCase.original,
482 index,
483 len(cluster),
484 cluster,
485 len(testCase.expected[index]),
486 testCase.expected[index])
487 break
488 }
489 for i, r := range cluster {
490 if r != testCase.expected[index][i] {
491 t.Errorf(`Test case %d %q failed: Grapheme cluster at index %d is %x, expected %x`,
492 testNum,
493 testCase.original,
494 index,
495 cluster,
496 testCase.expected[index])
497 break GraphemeLoop
498 }
499 }
500
501 index++
502 }
503 if index < len(testCase.expected) {
504 t.Errorf(`Test case %d %q failed: Fewer grapheme clusters returned (%d) than expected (%d)`,
505 testNum,
506 testCase.original,
507 index,
508 len(testCase.expected))
509 }
510 }
511 }
512
513
514 func BenchmarkGraphemesClass(b *testing.B) {
515 for i := 0; i < b.N; i++ {
516 g := NewGraphemes(benchmarkStr)
517 for g.Next() {
518 resultRunes = g.Runes()
519 }
520 }
521 }
522
523
524 func BenchmarkGraphemesFunctionBytes(b *testing.B) {
525 for i := 0; i < b.N; i++ {
526 var c []byte
527 state := -1
528 str := benchmarkBytes
529 for len(str) > 0 {
530 c, str, _, state = FirstGraphemeCluster(str, state)
531 resultRunes = []rune(string(c))
532 }
533 }
534 }
535
536
537 func BenchmarkGraphemesFunctionString(b *testing.B) {
538 for i := 0; i < b.N; i++ {
539 var c string
540 state := -1
541 str := benchmarkStr
542 for len(str) > 0 {
543 c, str, _, state = FirstGraphemeClusterInString(str, state)
544 resultRunes = []rune(c)
545 }
546 }
547 }
548
View as plain text