1 package util
2
3 import "unicode"
4
5 var cjkRadicalsSupplement = &unicode.RangeTable{
6 R16: []unicode.Range16{
7 {0x2E80, 0x2EFF, 1},
8 },
9 }
10
11 var kangxiRadicals = &unicode.RangeTable{
12 R16: []unicode.Range16{
13 {0x2F00, 0x2FDF, 1},
14 },
15 }
16
17 var ideographicDescriptionCharacters = &unicode.RangeTable{
18 R16: []unicode.Range16{
19 {0x2FF0, 0x2FFF, 1},
20 },
21 }
22
23 var cjkSymbolsAndPunctuation = &unicode.RangeTable{
24 R16: []unicode.Range16{
25 {0x3000, 0x303F, 1},
26 },
27 }
28
29 var hiragana = &unicode.RangeTable{
30 R16: []unicode.Range16{
31 {0x3040, 0x309F, 1},
32 },
33 }
34
35 var katakana = &unicode.RangeTable{
36 R16: []unicode.Range16{
37 {0x30A0, 0x30FF, 1},
38 },
39 }
40
41 var kanbun = &unicode.RangeTable{
42 R16: []unicode.Range16{
43 {0x3130, 0x318F, 1},
44 {0x3190, 0x319F, 1},
45 },
46 }
47
48 var cjkStrokes = &unicode.RangeTable{
49 R16: []unicode.Range16{
50 {0x31C0, 0x31EF, 1},
51 },
52 }
53
54 var katakanaPhoneticExtensions = &unicode.RangeTable{
55 R16: []unicode.Range16{
56 {0x31F0, 0x31FF, 1},
57 },
58 }
59
60 var cjkCompatibility = &unicode.RangeTable{
61 R16: []unicode.Range16{
62 {0x3300, 0x33FF, 1},
63 },
64 }
65
66 var cjkUnifiedIdeographsExtensionA = &unicode.RangeTable{
67 R16: []unicode.Range16{
68 {0x3400, 0x4DBF, 1},
69 },
70 }
71
72 var cjkUnifiedIdeographs = &unicode.RangeTable{
73 R16: []unicode.Range16{
74 {0x4E00, 0x9FFF, 1},
75 },
76 }
77
78 var yiSyllables = &unicode.RangeTable{
79 R16: []unicode.Range16{
80 {0xA000, 0xA48F, 1},
81 },
82 }
83
84 var yiRadicals = &unicode.RangeTable{
85 R16: []unicode.Range16{
86 {0xA490, 0xA4CF, 1},
87 },
88 }
89
90 var cjkCompatibilityIdeographs = &unicode.RangeTable{
91 R16: []unicode.Range16{
92 {0xF900, 0xFAFF, 1},
93 },
94 }
95
96 var verticalForms = &unicode.RangeTable{
97 R16: []unicode.Range16{
98 {0xFE10, 0xFE1F, 1},
99 },
100 }
101
102 var cjkCompatibilityForms = &unicode.RangeTable{
103 R16: []unicode.Range16{
104 {0xFE30, 0xFE4F, 1},
105 },
106 }
107
108 var smallFormVariants = &unicode.RangeTable{
109 R16: []unicode.Range16{
110 {0xFE50, 0xFE6F, 1},
111 },
112 }
113
114 var halfwidthAndFullwidthForms = &unicode.RangeTable{
115 R16: []unicode.Range16{
116 {0xFF00, 0xFFEF, 1},
117 },
118 }
119
120 var kanaSupplement = &unicode.RangeTable{
121 R32: []unicode.Range32{
122 {0x1B000, 0x1B0FF, 1},
123 },
124 }
125
126 var kanaExtendedA = &unicode.RangeTable{
127 R32: []unicode.Range32{
128 {0x1B100, 0x1B12F, 1},
129 },
130 }
131
132 var smallKanaExtension = &unicode.RangeTable{
133 R32: []unicode.Range32{
134 {0x1B130, 0x1B16F, 1},
135 },
136 }
137
138 var cjkUnifiedIdeographsExtensionB = &unicode.RangeTable{
139 R32: []unicode.Range32{
140 {0x20000, 0x2A6DF, 1},
141 },
142 }
143
144 var cjkUnifiedIdeographsExtensionC = &unicode.RangeTable{
145 R32: []unicode.Range32{
146 {0x2A700, 0x2B73F, 1},
147 },
148 }
149
150 var cjkUnifiedIdeographsExtensionD = &unicode.RangeTable{
151 R32: []unicode.Range32{
152 {0x2B740, 0x2B81F, 1},
153 },
154 }
155
156 var cjkUnifiedIdeographsExtensionE = &unicode.RangeTable{
157 R32: []unicode.Range32{
158 {0x2B820, 0x2CEAF, 1},
159 },
160 }
161
162 var cjkUnifiedIdeographsExtensionF = &unicode.RangeTable{
163 R32: []unicode.Range32{
164 {0x2CEB0, 0x2EBEF, 1},
165 },
166 }
167
168 var cjkCompatibilityIdeographsSupplement = &unicode.RangeTable{
169 R32: []unicode.Range32{
170 {0x2F800, 0x2FA1F, 1},
171 },
172 }
173
174 var cjkUnifiedIdeographsExtensionG = &unicode.RangeTable{
175 R32: []unicode.Range32{
176 {0x30000, 0x3134F, 1},
177 },
178 }
179
180
181 func IsEastAsianWideRune(r rune) bool {
182 return unicode.Is(unicode.Hiragana, r) ||
183 unicode.Is(unicode.Katakana, r) ||
184 unicode.Is(unicode.Han, r) ||
185 unicode.Is(unicode.Lm, r) ||
186 unicode.Is(unicode.Hangul, r) ||
187 unicode.Is(cjkSymbolsAndPunctuation, r)
188 }
189
190
191
192 func IsSpaceDiscardingUnicodeRune(r rune) bool {
193 return unicode.Is(cjkRadicalsSupplement, r) ||
194 unicode.Is(kangxiRadicals, r) ||
195 unicode.Is(ideographicDescriptionCharacters, r) ||
196 unicode.Is(cjkSymbolsAndPunctuation, r) ||
197 unicode.Is(hiragana, r) ||
198 unicode.Is(katakana, r) ||
199 unicode.Is(kanbun, r) ||
200 unicode.Is(cjkStrokes, r) ||
201 unicode.Is(katakanaPhoneticExtensions, r) ||
202 unicode.Is(cjkCompatibility, r) ||
203 unicode.Is(cjkUnifiedIdeographsExtensionA, r) ||
204 unicode.Is(cjkUnifiedIdeographs, r) ||
205 unicode.Is(yiSyllables, r) ||
206 unicode.Is(yiRadicals, r) ||
207 unicode.Is(cjkCompatibilityIdeographs, r) ||
208 unicode.Is(verticalForms, r) ||
209 unicode.Is(cjkCompatibilityForms, r) ||
210 unicode.Is(smallFormVariants, r) ||
211 unicode.Is(halfwidthAndFullwidthForms, r) ||
212 unicode.Is(kanaSupplement, r) ||
213 unicode.Is(kanaExtendedA, r) ||
214 unicode.Is(smallKanaExtension, r) ||
215 unicode.Is(cjkUnifiedIdeographsExtensionB, r) ||
216 unicode.Is(cjkUnifiedIdeographsExtensionC, r) ||
217 unicode.Is(cjkUnifiedIdeographsExtensionD, r) ||
218 unicode.Is(cjkUnifiedIdeographsExtensionE, r) ||
219 unicode.Is(cjkUnifiedIdeographsExtensionF, r) ||
220 unicode.Is(cjkCompatibilityIdeographsSupplement, r) ||
221 unicode.Is(cjkUnifiedIdeographsExtensionG, r)
222 }
223
224
225
226 func EastAsianWidth(r rune) string {
227 switch {
228 case r == 0x3000,
229 (0xFF01 <= r && r <= 0xFF60),
230 (0xFFE0 <= r && r <= 0xFFE6):
231 return "F"
232
233 case r == 0x20A9,
234 (0xFF61 <= r && r <= 0xFFBE),
235 (0xFFC2 <= r && r <= 0xFFC7),
236 (0xFFCA <= r && r <= 0xFFCF),
237 (0xFFD2 <= r && r <= 0xFFD7),
238 (0xFFDA <= r && r <= 0xFFDC),
239 (0xFFE8 <= r && r <= 0xFFEE):
240 return "H"
241
242 case (0x1100 <= r && r <= 0x115F),
243 (0x11A3 <= r && r <= 0x11A7),
244 (0x11FA <= r && r <= 0x11FF),
245 (0x2329 <= r && r <= 0x232A),
246 (0x2E80 <= r && r <= 0x2E99),
247 (0x2E9B <= r && r <= 0x2EF3),
248 (0x2F00 <= r && r <= 0x2FD5),
249 (0x2FF0 <= r && r <= 0x2FFB),
250 (0x3001 <= r && r <= 0x303E),
251 (0x3041 <= r && r <= 0x3096),
252 (0x3099 <= r && r <= 0x30FF),
253 (0x3105 <= r && r <= 0x312D),
254 (0x3131 <= r && r <= 0x318E),
255 (0x3190 <= r && r <= 0x31BA),
256 (0x31C0 <= r && r <= 0x31E3),
257 (0x31F0 <= r && r <= 0x321E),
258 (0x3220 <= r && r <= 0x3247),
259 (0x3250 <= r && r <= 0x32FE),
260 (0x3300 <= r && r <= 0x4DBF),
261 (0x4E00 <= r && r <= 0xA48C),
262 (0xA490 <= r && r <= 0xA4C6),
263 (0xA960 <= r && r <= 0xA97C),
264 (0xAC00 <= r && r <= 0xD7A3),
265 (0xD7B0 <= r && r <= 0xD7C6),
266 (0xD7CB <= r && r <= 0xD7FB),
267 (0xF900 <= r && r <= 0xFAFF),
268 (0xFE10 <= r && r <= 0xFE19),
269 (0xFE30 <= r && r <= 0xFE52),
270 (0xFE54 <= r && r <= 0xFE66),
271 (0xFE68 <= r && r <= 0xFE6B),
272 (0x1B000 <= r && r <= 0x1B001),
273 (0x1F200 <= r && r <= 0x1F202),
274 (0x1F210 <= r && r <= 0x1F23A),
275 (0x1F240 <= r && r <= 0x1F248),
276 (0x1F250 <= r && r <= 0x1F251),
277 (0x20000 <= r && r <= 0x2F73F),
278 (0x2B740 <= r && r <= 0x2FFFD),
279 (0x30000 <= r && r <= 0x3FFFD):
280 return "W"
281
282 case (0x0020 <= r && r <= 0x007E),
283 (0x00A2 <= r && r <= 0x00A3),
284 (0x00A5 <= r && r <= 0x00A6),
285 r == 0x00AC,
286 r == 0x00AF,
287 (0x27E6 <= r && r <= 0x27ED),
288 (0x2985 <= r && r <= 0x2986):
289 return "Na"
290
291 case (0x00A1 == r),
292 (0x00A4 == r),
293 (0x00A7 <= r && r <= 0x00A8),
294 (0x00AA == r),
295 (0x00AD <= r && r <= 0x00AE),
296 (0x00B0 <= r && r <= 0x00B4),
297 (0x00B6 <= r && r <= 0x00BA),
298 (0x00BC <= r && r <= 0x00BF),
299 (0x00C6 == r),
300 (0x00D0 == r),
301 (0x00D7 <= r && r <= 0x00D8),
302 (0x00DE <= r && r <= 0x00E1),
303 (0x00E6 == r),
304 (0x00E8 <= r && r <= 0x00EA),
305 (0x00EC <= r && r <= 0x00ED),
306 (0x00F0 == r),
307 (0x00F2 <= r && r <= 0x00F3),
308 (0x00F7 <= r && r <= 0x00FA),
309 (0x00FC == r),
310 (0x00FE == r),
311 (0x0101 == r),
312 (0x0111 == r),
313 (0x0113 == r),
314 (0x011B == r),
315 (0x0126 <= r && r <= 0x0127),
316 (0x012B == r),
317 (0x0131 <= r && r <= 0x0133),
318 (0x0138 == r),
319 (0x013F <= r && r <= 0x0142),
320 (0x0144 == r),
321 (0x0148 <= r && r <= 0x014B),
322 (0x014D == r),
323 (0x0152 <= r && r <= 0x0153),
324 (0x0166 <= r && r <= 0x0167),
325 (0x016B == r),
326 (0x01CE == r),
327 (0x01D0 == r),
328 (0x01D2 == r),
329 (0x01D4 == r),
330 (0x01D6 == r),
331 (0x01D8 == r),
332 (0x01DA == r),
333 (0x01DC == r),
334 (0x0251 == r),
335 (0x0261 == r),
336 (0x02C4 == r),
337 (0x02C7 == r),
338 (0x02C9 <= r && r <= 0x02CB),
339 (0x02CD == r),
340 (0x02D0 == r),
341 (0x02D8 <= r && r <= 0x02DB),
342 (0x02DD == r),
343 (0x02DF == r),
344 (0x0300 <= r && r <= 0x036F),
345 (0x0391 <= r && r <= 0x03A1),
346 (0x03A3 <= r && r <= 0x03A9),
347 (0x03B1 <= r && r <= 0x03C1),
348 (0x03C3 <= r && r <= 0x03C9),
349 (0x0401 == r),
350 (0x0410 <= r && r <= 0x044F),
351 (0x0451 == r),
352 (0x2010 == r),
353 (0x2013 <= r && r <= 0x2016),
354 (0x2018 <= r && r <= 0x2019),
355 (0x201C <= r && r <= 0x201D),
356 (0x2020 <= r && r <= 0x2022),
357 (0x2024 <= r && r <= 0x2027),
358 (0x2030 == r),
359 (0x2032 <= r && r <= 0x2033),
360 (0x2035 == r),
361 (0x203B == r),
362 (0x203E == r),
363 (0x2074 == r),
364 (0x207F == r),
365 (0x2081 <= r && r <= 0x2084),
366 (0x20AC == r),
367 (0x2103 == r),
368 (0x2105 == r),
369 (0x2109 == r),
370 (0x2113 == r),
371 (0x2116 == r),
372 (0x2121 <= r && r <= 0x2122),
373 (0x2126 == r),
374 (0x212B == r),
375 (0x2153 <= r && r <= 0x2154),
376 (0x215B <= r && r <= 0x215E),
377 (0x2160 <= r && r <= 0x216B),
378 (0x2170 <= r && r <= 0x2179),
379 (0x2189 == r),
380 (0x2190 <= r && r <= 0x2199),
381 (0x21B8 <= r && r <= 0x21B9),
382 (0x21D2 == r),
383 (0x21D4 == r),
384 (0x21E7 == r),
385 (0x2200 == r),
386 (0x2202 <= r && r <= 0x2203),
387 (0x2207 <= r && r <= 0x2208),
388 (0x220B == r),
389 (0x220F == r),
390 (0x2211 == r),
391 (0x2215 == r),
392 (0x221A == r),
393 (0x221D <= r && r <= 0x2220),
394 (0x2223 == r),
395 (0x2225 == r),
396 (0x2227 <= r && r <= 0x222C),
397 (0x222E == r),
398 (0x2234 <= r && r <= 0x2237),
399 (0x223C <= r && r <= 0x223D),
400 (0x2248 == r),
401 (0x224C == r),
402 (0x2252 == r),
403 (0x2260 <= r && r <= 0x2261),
404 (0x2264 <= r && r <= 0x2267),
405 (0x226A <= r && r <= 0x226B),
406 (0x226E <= r && r <= 0x226F),
407 (0x2282 <= r && r <= 0x2283),
408 (0x2286 <= r && r <= 0x2287),
409 (0x2295 == r),
410 (0x2299 == r),
411 (0x22A5 == r),
412 (0x22BF == r),
413 (0x2312 == r),
414 (0x2460 <= r && r <= 0x24E9),
415 (0x24EB <= r && r <= 0x254B),
416 (0x2550 <= r && r <= 0x2573),
417 (0x2580 <= r && r <= 0x258F),
418 (0x2592 <= r && r <= 0x2595),
419 (0x25A0 <= r && r <= 0x25A1),
420 (0x25A3 <= r && r <= 0x25A9),
421 (0x25B2 <= r && r <= 0x25B3),
422 (0x25B6 <= r && r <= 0x25B7),
423 (0x25BC <= r && r <= 0x25BD),
424 (0x25C0 <= r && r <= 0x25C1),
425 (0x25C6 <= r && r <= 0x25C8),
426 (0x25CB == r),
427 (0x25CE <= r && r <= 0x25D1),
428 (0x25E2 <= r && r <= 0x25E5),
429 (0x25EF == r),
430 (0x2605 <= r && r <= 0x2606),
431 (0x2609 == r),
432 (0x260E <= r && r <= 0x260F),
433 (0x2614 <= r && r <= 0x2615),
434 (0x261C == r),
435 (0x261E == r),
436 (0x2640 == r),
437 (0x2642 == r),
438 (0x2660 <= r && r <= 0x2661),
439 (0x2663 <= r && r <= 0x2665),
440 (0x2667 <= r && r <= 0x266A),
441 (0x266C <= r && r <= 0x266D),
442 (0x266F == r),
443 (0x269E <= r && r <= 0x269F),
444 (0x26BE <= r && r <= 0x26BF),
445 (0x26C4 <= r && r <= 0x26CD),
446 (0x26CF <= r && r <= 0x26E1),
447 (0x26E3 == r),
448 (0x26E8 <= r && r <= 0x26FF),
449 (0x273D == r),
450 (0x2757 == r),
451 (0x2776 <= r && r <= 0x277F),
452 (0x2B55 <= r && r <= 0x2B59),
453 (0x3248 <= r && r <= 0x324F),
454 (0xE000 <= r && r <= 0xF8FF),
455 (0xFE00 <= r && r <= 0xFE0F),
456 (0xFFFD == r),
457 (0x1F100 <= r && r <= 0x1F10A),
458 (0x1F110 <= r && r <= 0x1F12D),
459 (0x1F130 <= r && r <= 0x1F169),
460 (0x1F170 <= r && r <= 0x1F19A),
461 (0xE0100 <= r && r <= 0xE01EF),
462 (0xF0000 <= r && r <= 0xFFFFD),
463 (0x100000 <= r && r <= 0x10FFFD):
464 return "A"
465
466 default:
467 return "N"
468 }
469 }
470
View as plain text