1 package uniseg
2
3 import "unicode/utf8"
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 type Graphemes struct {
21
22 original string
23
24
25 remaining string
26
27
28 cluster string
29
30
31
32 offset int
33
34
35 boundaries int
36
37
38 state int
39 }
40
41
42 func NewGraphemes(str string) *Graphemes {
43 return &Graphemes{
44 original: str,
45 remaining: str,
46 state: -1,
47 }
48 }
49
50
51
52
53 func (g *Graphemes) Next() bool {
54 if len(g.remaining) == 0 {
55
56 g.state = -2
57 g.cluster = ""
58 return false
59 }
60 g.offset += len(g.cluster)
61 g.cluster, g.remaining, g.boundaries, g.state = StepString(g.remaining, g.state)
62 return true
63 }
64
65
66
67
68 func (g *Graphemes) Runes() []rune {
69 if g.state < 0 {
70 return nil
71 }
72 return []rune(g.cluster)
73 }
74
75
76
77
78 func (g *Graphemes) Str() string {
79 return g.cluster
80 }
81
82
83
84
85 func (g *Graphemes) Bytes() []byte {
86 if g.state < 0 {
87 return nil
88 }
89 return []byte(g.cluster)
90 }
91
92
93
94
95
96
97
98 func (g *Graphemes) Positions() (int, int) {
99 if g.state == -1 {
100 return 0, 0
101 } else if g.state == -2 {
102 return 1, 1
103 }
104 return g.offset, g.offset + len(g.cluster)
105 }
106
107
108
109 func (g *Graphemes) IsWordBoundary() bool {
110 if g.state < 0 {
111 return true
112 }
113 return g.boundaries&MaskWord != 0
114 }
115
116
117
118 func (g *Graphemes) IsSentenceBoundary() bool {
119 if g.state < 0 {
120 return true
121 }
122 return g.boundaries&MaskSentence != 0
123 }
124
125
126
127
128
129 func (g *Graphemes) LineBreak() int {
130 if g.state == -1 {
131 return LineDontBreak
132 }
133 if g.state == -2 {
134 return LineMustBreak
135 }
136 return g.boundaries & MaskLine
137 }
138
139
140 func (g *Graphemes) Width() int {
141 if g.state < 0 {
142 return 0
143 }
144 return g.boundaries >> ShiftWidth
145 }
146
147
148
149 func (g *Graphemes) Reset() {
150 g.state = -1
151 g.offset = 0
152 g.cluster = ""
153 g.remaining = g.original
154 }
155
156
157
158 func GraphemeClusterCount(s string) (n int) {
159 state := -1
160 for len(s) > 0 {
161 _, s, _, state = FirstGraphemeClusterInString(s, state)
162 n++
163 }
164 return
165 }
166
167
168
169 func ReverseString(s string) string {
170 str := []byte(s)
171 reversed := make([]byte, len(str))
172 state := -1
173 index := len(str)
174 for len(str) > 0 {
175 var cluster []byte
176 cluster, str, _, state = FirstGraphemeCluster(str, state)
177 index -= len(cluster)
178 copy(reversed[index:], cluster)
179 if index <= len(str)/2 {
180 break
181 }
182 }
183 return string(reversed)
184 }
185
186
187
188 const shiftGraphemePropState = 4
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215 func FirstGraphemeCluster(b []byte, state int) (cluster, rest []byte, width, newState int) {
216
217 if len(b) == 0 {
218 return
219 }
220
221
222 r, length := utf8.DecodeRune(b)
223 if len(b) <= length {
224 var prop int
225 if state < 0 {
226 prop = propertyGraphemes(r)
227 } else {
228 prop = state >> shiftGraphemePropState
229 }
230 return b, nil, runeWidth(r, prop), grAny | (prop << shiftGraphemePropState)
231 }
232
233
234 var firstProp int
235 if state < 0 {
236 state, firstProp, _ = transitionGraphemeState(state, r)
237 } else {
238 firstProp = state >> shiftGraphemePropState
239 }
240 width += runeWidth(r, firstProp)
241
242
243 for {
244 var (
245 prop int
246 boundary bool
247 )
248
249 r, l := utf8.DecodeRune(b[length:])
250 state, prop, boundary = transitionGraphemeState(state&maskGraphemeState, r)
251
252 if boundary {
253 return b[:length], b[length:], width, state | (prop << shiftGraphemePropState)
254 }
255
256 if firstProp == prExtendedPictographic {
257 if r == vs15 {
258 width = 1
259 } else if r == vs16 {
260 width = 2
261 }
262 } else if firstProp != prRegionalIndicator && firstProp != prL {
263 width += runeWidth(r, prop)
264 }
265
266 length += l
267 if len(b) <= length {
268 return b, nil, width, grAny | (prop << shiftGraphemePropState)
269 }
270 }
271 }
272
273
274
275 func FirstGraphemeClusterInString(str string, state int) (cluster, rest string, width, newState int) {
276
277 if len(str) == 0 {
278 return
279 }
280
281
282 r, length := utf8.DecodeRuneInString(str)
283 if len(str) <= length {
284 var prop int
285 if state < 0 {
286 prop = propertyGraphemes(r)
287 } else {
288 prop = state >> shiftGraphemePropState
289 }
290 return str, "", runeWidth(r, prop), grAny | (prop << shiftGraphemePropState)
291 }
292
293
294 var firstProp int
295 if state < 0 {
296 state, firstProp, _ = transitionGraphemeState(state, r)
297 } else {
298 firstProp = state >> shiftGraphemePropState
299 }
300 width += runeWidth(r, firstProp)
301
302
303 for {
304 var (
305 prop int
306 boundary bool
307 )
308
309 r, l := utf8.DecodeRuneInString(str[length:])
310 state, prop, boundary = transitionGraphemeState(state&maskGraphemeState, r)
311
312 if boundary {
313 return str[:length], str[length:], width, state | (prop << shiftGraphemePropState)
314 }
315
316 if firstProp == prExtendedPictographic {
317 if r == vs15 {
318 width = 1
319 } else if r == vs16 {
320 width = 2
321 }
322 } else if firstProp != prRegionalIndicator && firstProp != prL {
323 width += runeWidth(r, prop)
324 }
325
326 length += l
327 if len(str) <= length {
328 return str, "", width, grAny | (prop << shiftGraphemePropState)
329 }
330 }
331 }
332
View as plain text