1 package uniseg
2
3 import "unicode/utf8"
4
5
6 const (
7 MaskLine = 3
8 MaskWord = 4
9 MaskSentence = 8
10 )
11
12
13
14 const ShiftWidth = 4
15
16
17
18 const (
19 shiftWord = 2
20 shiftSentence = 3
21
22 )
23
24
25
26
27
28 const (
29 shiftWordState = 4
30 shiftSentenceState = 9
31 shiftLineState = 13
32 shiftPropState = 21
33 )
34
35
36
37 const (
38 maskGraphemeState = 0xf
39 maskWordState = 0x1f
40 maskSentenceState = 0xf
41 maskLineState = 0xff
42 )
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92 func Step(b []byte, state int) (cluster, rest []byte, boundaries int, newState int) {
93
94 if len(b) == 0 {
95 return
96 }
97
98
99 r, length := utf8.DecodeRune(b)
100 if len(b) <= length {
101 var prop int
102 if state < 0 {
103 prop = propertyGraphemes(r)
104 } else {
105 prop = state >> shiftPropState
106 }
107 return b, nil, LineMustBreak | (1 << shiftWord) | (1 << shiftSentence) | (runeWidth(r, prop) << ShiftWidth), grAny | (wbAny << shiftWordState) | (sbAny << shiftSentenceState) | (lbAny << shiftLineState) | (prop << shiftPropState)
108 }
109
110
111 var graphemeState, wordState, sentenceState, lineState, firstProp int
112 remainder := b[length:]
113 if state < 0 {
114 graphemeState, firstProp, _ = transitionGraphemeState(state, r)
115 wordState, _ = transitionWordBreakState(state, r, remainder, "")
116 sentenceState, _ = transitionSentenceBreakState(state, r, remainder, "")
117 lineState, _ = transitionLineBreakState(state, r, remainder, "")
118 } else {
119 graphemeState = state & maskGraphemeState
120 wordState = (state >> shiftWordState) & maskWordState
121 sentenceState = (state >> shiftSentenceState) & maskSentenceState
122 lineState = (state >> shiftLineState) & maskLineState
123 firstProp = state >> shiftPropState
124 }
125
126
127 width := runeWidth(r, firstProp)
128 for {
129 var (
130 graphemeBoundary, wordBoundary, sentenceBoundary bool
131 lineBreak, prop int
132 )
133
134 r, l := utf8.DecodeRune(remainder)
135 remainder = b[length+l:]
136
137 graphemeState, prop, graphemeBoundary = transitionGraphemeState(graphemeState, r)
138 wordState, wordBoundary = transitionWordBreakState(wordState, r, remainder, "")
139 sentenceState, sentenceBoundary = transitionSentenceBreakState(sentenceState, r, remainder, "")
140 lineState, lineBreak = transitionLineBreakState(lineState, r, remainder, "")
141
142 if graphemeBoundary {
143 boundary := lineBreak | (width << ShiftWidth)
144 if wordBoundary {
145 boundary |= 1 << shiftWord
146 }
147 if sentenceBoundary {
148 boundary |= 1 << shiftSentence
149 }
150 return b[:length], b[length:], boundary, graphemeState | (wordState << shiftWordState) | (sentenceState << shiftSentenceState) | (lineState << shiftLineState) | (prop << shiftPropState)
151 }
152
153 if firstProp == prExtendedPictographic {
154 if r == vs15 {
155 width = 1
156 } else if r == vs16 {
157 width = 2
158 }
159 } else if firstProp != prRegionalIndicator && firstProp != prL {
160 width += runeWidth(r, prop)
161 }
162
163 length += l
164 if len(b) <= length {
165 return b, nil, LineMustBreak | (1 << shiftWord) | (1 << shiftSentence) | (width << ShiftWidth), grAny | (wbAny << shiftWordState) | (sbAny << shiftSentenceState) | (lbAny << shiftLineState) | (prop << shiftPropState)
166 }
167 }
168 }
169
170
171 func StepString(str string, state int) (cluster, rest string, boundaries int, newState int) {
172
173 if len(str) == 0 {
174 return
175 }
176
177
178 r, length := utf8.DecodeRuneInString(str)
179 if len(str) <= length {
180 prop := propertyGraphemes(r)
181 return str, "", LineMustBreak | (1 << shiftWord) | (1 << shiftSentence) | (runeWidth(r, prop) << ShiftWidth), grAny | (wbAny << shiftWordState) | (sbAny << shiftSentenceState) | (lbAny << shiftLineState)
182 }
183
184
185 var graphemeState, wordState, sentenceState, lineState, firstProp int
186 remainder := str[length:]
187 if state < 0 {
188 graphemeState, firstProp, _ = transitionGraphemeState(state, r)
189 wordState, _ = transitionWordBreakState(state, r, nil, remainder)
190 sentenceState, _ = transitionSentenceBreakState(state, r, nil, remainder)
191 lineState, _ = transitionLineBreakState(state, r, nil, remainder)
192 } else {
193 graphemeState = state & maskGraphemeState
194 wordState = (state >> shiftWordState) & maskWordState
195 sentenceState = (state >> shiftSentenceState) & maskSentenceState
196 lineState = (state >> shiftLineState) & maskLineState
197 firstProp = state >> shiftPropState
198 }
199
200
201 width := runeWidth(r, firstProp)
202 for {
203 var (
204 graphemeBoundary, wordBoundary, sentenceBoundary bool
205 lineBreak, prop int
206 )
207
208 r, l := utf8.DecodeRuneInString(remainder)
209 remainder = str[length+l:]
210
211 graphemeState, prop, graphemeBoundary = transitionGraphemeState(graphemeState, r)
212 wordState, wordBoundary = transitionWordBreakState(wordState, r, nil, remainder)
213 sentenceState, sentenceBoundary = transitionSentenceBreakState(sentenceState, r, nil, remainder)
214 lineState, lineBreak = transitionLineBreakState(lineState, r, nil, remainder)
215
216 if graphemeBoundary {
217 boundary := lineBreak | (width << ShiftWidth)
218 if wordBoundary {
219 boundary |= 1 << shiftWord
220 }
221 if sentenceBoundary {
222 boundary |= 1 << shiftSentence
223 }
224 return str[:length], str[length:], boundary, graphemeState | (wordState << shiftWordState) | (sentenceState << shiftSentenceState) | (lineState << shiftLineState) | (prop << shiftPropState)
225 }
226
227 if firstProp == prExtendedPictographic {
228 if r == vs15 {
229 width = 1
230 } else if r == vs16 {
231 width = 2
232 }
233 } else if firstProp != prRegionalIndicator && firstProp != prL {
234 width += runeWidth(r, prop)
235 }
236
237 length += l
238 if len(str) <= length {
239 return str, "", LineMustBreak | (1 << shiftWord) | (1 << shiftSentence) | (width << ShiftWidth), grAny | (wbAny << shiftWordState) | (sbAny << shiftSentenceState) | (lbAny << shiftLineState) | (prop << shiftPropState)
240 }
241 }
242 }
243
View as plain text