...
1
2
3
4 package xstrings
5
6 import (
7 "unicode"
8 "unicode/utf8"
9 )
10
11
12 func Len(str string) int {
13 return utf8.RuneCountInString(str)
14 }
15
16
17
18
19
20 func WordCount(str string) int {
21 var r rune
22 var size, n int
23
24 inWord := false
25
26 for len(str) > 0 {
27 r, size = utf8.DecodeRuneInString(str)
28
29 switch {
30 case isAlphabet(r):
31 if !inWord {
32 inWord = true
33 n++
34 }
35
36 case inWord && (r == '\'' || r == '-'):
37
38
39 default:
40 inWord = false
41 }
42
43 str = str[size:]
44 }
45
46 return n
47 }
48
49 const minCJKCharacter = '\u3400'
50
51
52 func isAlphabet(r rune) bool {
53 if !unicode.IsLetter(r) {
54 return false
55 }
56
57 switch {
58
59 case r < minCJKCharacter:
60 return true
61
62
63 case r >= '\u4E00' && r <= '\u9FCC':
64 return false
65
66
67 case r >= '\u3400' && r <= '\u4D85':
68 return false
69
70
71 case r >= '\U00020000' && r <= '\U0002B81D':
72 return false
73 }
74
75 return true
76 }
77
78
79
80
81
82
83 func Width(str string) int {
84 var r rune
85 var size, n int
86
87 for len(str) > 0 {
88 r, size = utf8.DecodeRuneInString(str)
89 n += RuneWidth(r)
90 str = str[size:]
91 }
92
93 return n
94 }
95
96
97
98
99
100
101 func RuneWidth(r rune) int {
102 switch {
103 case r == utf8.RuneError || r < '\x20':
104 return 0
105
106 case '\x20' <= r && r < '\u2000':
107 return 1
108
109 case '\u2000' <= r && r < '\uFF61':
110 return 2
111
112 case '\uFF61' <= r && r < '\uFFA0':
113 return 1
114
115 case '\uFFA0' <= r:
116 return 2
117 }
118
119 return 0
120 }
121
View as plain text