1 package uniseg
2
3 import "unicode/utf8"
4
5
6 const (
7 lbAny = iota
8 lbBK
9 lbCR
10 lbLF
11 lbNL
12 lbSP
13 lbZW
14 lbWJ
15 lbGL
16 lbBA
17 lbHY
18 lbCL
19 lbCP
20 lbEX
21 lbIS
22 lbSY
23 lbOP
24 lbQU
25 lbQUSP
26 lbNS
27 lbCLCPSP
28 lbB2
29 lbB2SP
30 lbCB
31 lbBB
32 lbLB21a
33 lbHL
34 lbAL
35 lbNU
36 lbPR
37 lbEB
38 lbIDEM
39 lbNUNU
40 lbNUSY
41 lbNUIS
42 lbNUCL
43 lbNUCP
44 lbPO
45 lbJL
46 lbJV
47 lbJT
48 lbH2
49 lbH3
50 lbOddRI
51 lbEvenRI
52 lbExtPicCn
53 lbZWJBit = 64
54 lbCPeaFWHBit = 128
55 )
56
57
58
59
60
61 const (
62 LineDontBreak = iota
63 LineCanBreak
64 LineMustBreak
65 )
66
67
68
69
70
71 func lbTransitions(state, prop int) (newState, lineBreak, rule int) {
72 switch uint64(state) | uint64(prop)<<32 {
73
74 case lbBK | prAny<<32:
75 return lbAny, LineMustBreak, 40
76
77
78 case lbCR | prLF<<32:
79 return lbLF, LineDontBreak, 50
80 case lbCR | prAny<<32:
81 return lbAny, LineMustBreak, 50
82 case lbLF | prAny<<32:
83 return lbAny, LineMustBreak, 50
84 case lbNL | prAny<<32:
85 return lbAny, LineMustBreak, 50
86
87
88 case lbAny | prBK<<32:
89 return lbBK, LineDontBreak, 60
90 case lbAny | prCR<<32:
91 return lbCR, LineDontBreak, 60
92 case lbAny | prLF<<32:
93 return lbLF, LineDontBreak, 60
94 case lbAny | prNL<<32:
95 return lbNL, LineDontBreak, 60
96
97
98 case lbAny | prSP<<32:
99 return lbSP, LineDontBreak, 70
100 case lbAny | prZW<<32:
101 return lbZW, LineDontBreak, 70
102
103
104 case lbZW | prSP<<32:
105 return lbZW, LineDontBreak, 70
106 case lbZW | prAny<<32:
107 return lbAny, LineCanBreak, 80
108
109
110 case lbAny | prWJ<<32:
111 return lbWJ, LineDontBreak, 110
112 case lbWJ | prAny<<32:
113 return lbAny, LineDontBreak, 110
114
115
116 case lbAny | prGL<<32:
117 return lbGL, LineCanBreak, 310
118 case lbGL | prAny<<32:
119 return lbAny, LineDontBreak, 120
120
121
122 case lbAny | prCL<<32:
123 return lbCL, LineCanBreak, 310
124 case lbAny | prCP<<32:
125 return lbCP, LineCanBreak, 310
126 case lbAny | prEX<<32:
127 return lbEX, LineDontBreak, 130
128 case lbAny | prIS<<32:
129 return lbIS, LineCanBreak, 310
130 case lbAny | prSY<<32:
131 return lbSY, LineCanBreak, 310
132
133
134 case lbAny | prOP<<32:
135 return lbOP, LineCanBreak, 310
136 case lbOP | prSP<<32:
137 return lbOP, LineDontBreak, 70
138 case lbOP | prAny<<32:
139 return lbAny, LineDontBreak, 140
140
141
142 case lbQU | prSP<<32:
143 return lbQUSP, LineDontBreak, 70
144 case lbQU | prOP<<32:
145 return lbOP, LineDontBreak, 150
146 case lbQUSP | prOP<<32:
147 return lbOP, LineDontBreak, 150
148
149
150 case lbCL | prSP<<32:
151 return lbCLCPSP, LineDontBreak, 70
152 case lbNUCL | prSP<<32:
153 return lbCLCPSP, LineDontBreak, 70
154 case lbCP | prSP<<32:
155 return lbCLCPSP, LineDontBreak, 70
156 case lbNUCP | prSP<<32:
157 return lbCLCPSP, LineDontBreak, 70
158 case lbCL | prNS<<32:
159 return lbNS, LineDontBreak, 160
160 case lbNUCL | prNS<<32:
161 return lbNS, LineDontBreak, 160
162 case lbCP | prNS<<32:
163 return lbNS, LineDontBreak, 160
164 case lbNUCP | prNS<<32:
165 return lbNS, LineDontBreak, 160
166 case lbCLCPSP | prNS<<32:
167 return lbNS, LineDontBreak, 160
168
169
170 case lbAny | prB2<<32:
171 return lbB2, LineCanBreak, 310
172 case lbB2 | prSP<<32:
173 return lbB2SP, LineDontBreak, 70
174 case lbB2 | prB2<<32:
175 return lbB2, LineDontBreak, 170
176 case lbB2SP | prB2<<32:
177 return lbB2, LineDontBreak, 170
178
179
180 case lbSP | prAny<<32:
181 return lbAny, LineCanBreak, 180
182 case lbQUSP | prAny<<32:
183 return lbAny, LineCanBreak, 180
184 case lbCLCPSP | prAny<<32:
185 return lbAny, LineCanBreak, 180
186 case lbB2SP | prAny<<32:
187 return lbAny, LineCanBreak, 180
188
189
190 case lbAny | prQU<<32:
191 return lbQU, LineDontBreak, 190
192 case lbQU | prAny<<32:
193 return lbAny, LineDontBreak, 190
194
195
196 case lbAny | prCB<<32:
197 return lbCB, LineCanBreak, 200
198 case lbCB | prAny<<32:
199 return lbAny, LineCanBreak, 200
200
201
202 case lbAny | prBA<<32:
203 return lbBA, LineDontBreak, 210
204 case lbAny | prHY<<32:
205 return lbHY, LineDontBreak, 210
206 case lbAny | prNS<<32:
207 return lbNS, LineDontBreak, 210
208 case lbAny | prBB<<32:
209 return lbBB, LineCanBreak, 310
210 case lbBB | prAny<<32:
211 return lbAny, LineDontBreak, 210
212
213
214 case lbAny | prHL<<32:
215 return lbHL, LineCanBreak, 310
216 case lbHL | prHY<<32:
217 return lbLB21a, LineDontBreak, 210
218 case lbHL | prBA<<32:
219 return lbLB21a, LineDontBreak, 210
220 case lbLB21a | prAny<<32:
221 return lbAny, LineDontBreak, 211
222
223
224 case lbSY | prHL<<32:
225 return lbHL, LineDontBreak, 212
226 case lbNUSY | prHL<<32:
227 return lbHL, LineDontBreak, 212
228
229
230 case lbAny | prIN<<32:
231 return lbAny, LineDontBreak, 220
232
233
234 case lbAny | prAL<<32:
235 return lbAL, LineCanBreak, 310
236 case lbAny | prNU<<32:
237 return lbNU, LineCanBreak, 310
238 case lbAL | prNU<<32:
239 return lbNU, LineDontBreak, 230
240 case lbHL | prNU<<32:
241 return lbNU, LineDontBreak, 230
242 case lbNU | prAL<<32:
243 return lbAL, LineDontBreak, 230
244 case lbNU | prHL<<32:
245 return lbHL, LineDontBreak, 230
246 case lbNUNU | prAL<<32:
247 return lbAL, LineDontBreak, 230
248 case lbNUNU | prHL<<32:
249 return lbHL, LineDontBreak, 230
250
251
252 case lbAny | prPR<<32:
253 return lbPR, LineCanBreak, 310
254 case lbAny | prID<<32:
255 return lbIDEM, LineCanBreak, 310
256 case lbAny | prEB<<32:
257 return lbEB, LineCanBreak, 310
258 case lbAny | prEM<<32:
259 return lbIDEM, LineCanBreak, 310
260 case lbPR | prID<<32:
261 return lbIDEM, LineDontBreak, 231
262 case lbPR | prEB<<32:
263 return lbEB, LineDontBreak, 231
264 case lbPR | prEM<<32:
265 return lbIDEM, LineDontBreak, 231
266 case lbIDEM | prPO<<32:
267 return lbPO, LineDontBreak, 231
268 case lbEB | prPO<<32:
269 return lbPO, LineDontBreak, 231
270
271
272 case lbAny | prPO<<32:
273 return lbPO, LineCanBreak, 310
274 case lbPR | prAL<<32:
275 return lbAL, LineDontBreak, 240
276 case lbPR | prHL<<32:
277 return lbHL, LineDontBreak, 240
278 case lbPO | prAL<<32:
279 return lbAL, LineDontBreak, 240
280 case lbPO | prHL<<32:
281 return lbHL, LineDontBreak, 240
282 case lbAL | prPR<<32:
283 return lbPR, LineDontBreak, 240
284 case lbAL | prPO<<32:
285 return lbPO, LineDontBreak, 240
286 case lbHL | prPR<<32:
287 return lbPR, LineDontBreak, 240
288 case lbHL | prPO<<32:
289 return lbPO, LineDontBreak, 240
290
291
292 case lbPR | prNU<<32:
293 return lbNU, LineDontBreak, 250
294 case lbPO | prNU<<32:
295 return lbNU, LineDontBreak, 250
296 case lbOP | prNU<<32:
297 return lbNU, LineDontBreak, 250
298 case lbHY | prNU<<32:
299 return lbNU, LineDontBreak, 250
300 case lbNU | prNU<<32:
301 return lbNUNU, LineDontBreak, 250
302 case lbNU | prSY<<32:
303 return lbNUSY, LineDontBreak, 250
304 case lbNU | prIS<<32:
305 return lbNUIS, LineDontBreak, 250
306 case lbNUNU | prNU<<32:
307 return lbNUNU, LineDontBreak, 250
308 case lbNUNU | prSY<<32:
309 return lbNUSY, LineDontBreak, 250
310 case lbNUNU | prIS<<32:
311 return lbNUIS, LineDontBreak, 250
312 case lbNUSY | prNU<<32:
313 return lbNUNU, LineDontBreak, 250
314 case lbNUSY | prSY<<32:
315 return lbNUSY, LineDontBreak, 250
316 case lbNUSY | prIS<<32:
317 return lbNUIS, LineDontBreak, 250
318 case lbNUIS | prNU<<32:
319 return lbNUNU, LineDontBreak, 250
320 case lbNUIS | prSY<<32:
321 return lbNUSY, LineDontBreak, 250
322 case lbNUIS | prIS<<32:
323 return lbNUIS, LineDontBreak, 250
324 case lbNU | prCL<<32:
325 return lbNUCL, LineDontBreak, 250
326 case lbNU | prCP<<32:
327 return lbNUCP, LineDontBreak, 250
328 case lbNUNU | prCL<<32:
329 return lbNUCL, LineDontBreak, 250
330 case lbNUNU | prCP<<32:
331 return lbNUCP, LineDontBreak, 250
332 case lbNUSY | prCL<<32:
333 return lbNUCL, LineDontBreak, 250
334 case lbNUSY | prCP<<32:
335 return lbNUCP, LineDontBreak, 250
336 case lbNUIS | prCL<<32:
337 return lbNUCL, LineDontBreak, 250
338 case lbNUIS | prCP<<32:
339 return lbNUCP, LineDontBreak, 250
340 case lbNU | prPO<<32:
341 return lbPO, LineDontBreak, 250
342 case lbNUNU | prPO<<32:
343 return lbPO, LineDontBreak, 250
344 case lbNUSY | prPO<<32:
345 return lbPO, LineDontBreak, 250
346 case lbNUIS | prPO<<32:
347 return lbPO, LineDontBreak, 250
348 case lbNUCL | prPO<<32:
349 return lbPO, LineDontBreak, 250
350 case lbNUCP | prPO<<32:
351 return lbPO, LineDontBreak, 250
352 case lbNU | prPR<<32:
353 return lbPR, LineDontBreak, 250
354 case lbNUNU | prPR<<32:
355 return lbPR, LineDontBreak, 250
356 case lbNUSY | prPR<<32:
357 return lbPR, LineDontBreak, 250
358 case lbNUIS | prPR<<32:
359 return lbPR, LineDontBreak, 250
360 case lbNUCL | prPR<<32:
361 return lbPR, LineDontBreak, 250
362 case lbNUCP | prPR<<32:
363 return lbPR, LineDontBreak, 250
364
365
366 case lbAny | prJL<<32:
367 return lbJL, LineCanBreak, 310
368 case lbAny | prJV<<32:
369 return lbJV, LineCanBreak, 310
370 case lbAny | prJT<<32:
371 return lbJT, LineCanBreak, 310
372 case lbAny | prH2<<32:
373 return lbH2, LineCanBreak, 310
374 case lbAny | prH3<<32:
375 return lbH3, LineCanBreak, 310
376 case lbJL | prJL<<32:
377 return lbJL, LineDontBreak, 260
378 case lbJL | prJV<<32:
379 return lbJV, LineDontBreak, 260
380 case lbJL | prH2<<32:
381 return lbH2, LineDontBreak, 260
382 case lbJL | prH3<<32:
383 return lbH3, LineDontBreak, 260
384 case lbJV | prJV<<32:
385 return lbJV, LineDontBreak, 260
386 case lbJV | prJT<<32:
387 return lbJT, LineDontBreak, 260
388 case lbH2 | prJV<<32:
389 return lbJV, LineDontBreak, 260
390 case lbH2 | prJT<<32:
391 return lbJT, LineDontBreak, 260
392 case lbJT | prJT<<32:
393 return lbJT, LineDontBreak, 260
394 case lbH3 | prJT<<32:
395 return lbJT, LineDontBreak, 260
396
397
398 case lbJL | prPO<<32:
399 return lbPO, LineDontBreak, 270
400 case lbJV | prPO<<32:
401 return lbPO, LineDontBreak, 270
402 case lbJT | prPO<<32:
403 return lbPO, LineDontBreak, 270
404 case lbH2 | prPO<<32:
405 return lbPO, LineDontBreak, 270
406 case lbH3 | prPO<<32:
407 return lbPO, LineDontBreak, 270
408 case lbPR | prJL<<32:
409 return lbJL, LineDontBreak, 270
410 case lbPR | prJV<<32:
411 return lbJV, LineDontBreak, 270
412 case lbPR | prJT<<32:
413 return lbJT, LineDontBreak, 270
414 case lbPR | prH2<<32:
415 return lbH2, LineDontBreak, 270
416 case lbPR | prH3<<32:
417 return lbH3, LineDontBreak, 270
418
419
420 case lbAL | prAL<<32:
421 return lbAL, LineDontBreak, 280
422 case lbAL | prHL<<32:
423 return lbHL, LineDontBreak, 280
424 case lbHL | prAL<<32:
425 return lbAL, LineDontBreak, 280
426 case lbHL | prHL<<32:
427 return lbHL, LineDontBreak, 280
428
429
430 case lbIS | prAL<<32:
431 return lbAL, LineDontBreak, 290
432 case lbIS | prHL<<32:
433 return lbHL, LineDontBreak, 290
434 case lbNUIS | prAL<<32:
435 return lbAL, LineDontBreak, 290
436 case lbNUIS | prHL<<32:
437 return lbHL, LineDontBreak, 290
438
439 default:
440 return -1, -1, -1
441 }
442 }
443
444
445
446
447
448
449
450 func transitionLineBreakState(state int, r rune, b []byte, str string) (newState int, lineBreak int) {
451
452 nextProperty, generalCategory := propertyLineBreak(r)
453
454
455 var forceNoBreak, isCPeaFWH bool
456 if state >= 0 && state&lbCPeaFWHBit != 0 {
457 isCPeaFWH = true
458 state = state &^ lbCPeaFWHBit
459 }
460 if state >= 0 && state&lbZWJBit != 0 {
461 state = state &^ lbZWJBit
462 forceNoBreak = true
463 }
464
465 defer func() {
466
467 if newState == lbCP || newState == lbNUCP {
468 ea := propertyEastAsianWidth(r)
469 if ea != prF && ea != prW && ea != prH {
470 newState |= lbCPeaFWHBit
471 }
472 }
473
474
475 if forceNoBreak {
476 lineBreak = LineDontBreak
477 }
478 }()
479
480
481 if nextProperty == prAI || nextProperty == prSG || nextProperty == prXX {
482 nextProperty = prAL
483 } else if nextProperty == prSA {
484 if generalCategory == gcMn || generalCategory == gcMc {
485 nextProperty = prCM
486 } else {
487 nextProperty = prAL
488 }
489 } else if nextProperty == prCJ {
490 nextProperty = prNS
491 }
492
493
494 if nextProperty == prZWJ || nextProperty == prCM {
495 var bit int
496 if nextProperty == prZWJ {
497 bit = lbZWJBit
498 }
499 mustBreakState := state < 0 || state == lbBK || state == lbCR || state == lbLF || state == lbNL
500 if !mustBreakState && state != lbSP && state != lbZW && state != lbQUSP && state != lbCLCPSP && state != lbB2SP {
501
502 return state | bit, LineDontBreak
503 } else {
504
505 if mustBreakState {
506 return lbAL | bit, LineMustBreak
507 }
508 return lbAL | bit, LineCanBreak
509 }
510 }
511
512
513 var rule int
514 newState, lineBreak, rule = lbTransitions(state, nextProperty)
515 if newState < 0 {
516
517 anyPropProp, anyPropLineBreak, anyPropRule := lbTransitions(state, prAny)
518 anyStateProp, anyStateLineBreak, anyStateRule := lbTransitions(lbAny, nextProperty)
519 if anyPropProp >= 0 && anyStateProp >= 0 {
520
521 newState, lineBreak, rule = anyStateProp, anyStateLineBreak, anyStateRule
522 if anyPropRule < anyStateRule {
523 lineBreak, rule = anyPropLineBreak, anyPropRule
524 }
525 } else if anyPropProp >= 0 {
526
527 newState, lineBreak, rule = anyPropProp, anyPropLineBreak, anyPropRule
528
529
530
531
532 } else if anyStateProp >= 0 {
533
534 newState, lineBreak, rule = anyStateProp, anyStateLineBreak, anyStateRule
535 } else {
536
537 newState, lineBreak, rule = lbAny, LineCanBreak, 310
538 }
539 }
540
541
542 if rule > 121 &&
543 nextProperty == prGL &&
544 (state != lbSP && state != lbBA && state != lbHY && state != lbLB21a && state != lbQUSP && state != lbCLCPSP && state != lbB2SP) {
545 return lbGL, LineDontBreak
546 }
547
548
549 if rule > 130 && state != lbNU && state != lbNUNU {
550 switch nextProperty {
551 case prCL:
552 return lbCL, LineDontBreak
553 case prCP:
554 return lbCP, LineDontBreak
555 case prIS:
556 return lbIS, LineDontBreak
557 case prSY:
558 return lbSY, LineDontBreak
559 }
560 }
561
562
563 if rule > 250 &&
564 (state == lbPR || state == lbPO) &&
565 nextProperty == prOP || nextProperty == prHY {
566 var r rune
567 if b != nil {
568 r, _ = utf8.DecodeRune(b)
569 } else {
570 r, _ = utf8.DecodeRuneInString(str)
571 }
572 if r != utf8.RuneError {
573 pr, _ := propertyLineBreak(r)
574 if pr == prNU {
575 return lbNU, LineDontBreak
576 }
577 }
578 }
579
580
581 if rule > 300 {
582 if (state == lbAL || state == lbHL || state == lbNU || state == lbNUNU) && nextProperty == prOP {
583 ea := propertyEastAsianWidth(r)
584 if ea != prF && ea != prW && ea != prH {
585 return lbOP, LineDontBreak
586 }
587 } else if isCPeaFWH {
588 switch nextProperty {
589 case prAL:
590 return lbAL, LineDontBreak
591 case prHL:
592 return lbHL, LineDontBreak
593 case prNU:
594 return lbNU, LineDontBreak
595 }
596 }
597 }
598
599
600 if newState == lbAny && nextProperty == prRI {
601 if state != lbOddRI && state != lbEvenRI {
602
603 return lbOddRI, lineBreak
604 }
605 if state == lbOddRI {
606
607 return lbEvenRI, LineDontBreak
608 }
609 return lbOddRI, lineBreak
610 }
611
612
613 if rule > 302 {
614 if nextProperty == prEM {
615 if state == lbEB || state == lbExtPicCn {
616 return prAny, LineDontBreak
617 }
618 }
619 graphemeProperty := propertyGraphemes(r)
620 if graphemeProperty == prExtendedPictographic && generalCategory == gcCn {
621 return lbExtPicCn, LineCanBreak
622 }
623 }
624
625 return
626 }
627
View as plain text