1
2
3
4
5 package simplifiedchinese
6
7 import (
8 "unicode/utf8"
9
10 "golang.org/x/text/encoding"
11 "golang.org/x/text/encoding/internal"
12 "golang.org/x/text/encoding/internal/identifier"
13 "golang.org/x/text/transform"
14 )
15
16 var (
17
18 GB18030 encoding.Encoding = &gbk18030
19
20
21 GBK encoding.Encoding = &gbk
22 )
23
24 var gbk = internal.Encoding{
25 &internal.SimpleEncoding{
26 gbkDecoder{gb18030: false},
27 gbkEncoder{gb18030: false},
28 },
29 "GBK",
30 identifier.GBK,
31 }
32
33 var gbk18030 = internal.Encoding{
34 &internal.SimpleEncoding{
35 gbkDecoder{gb18030: true},
36 gbkEncoder{gb18030: true},
37 },
38 "GB18030",
39 identifier.GB18030,
40 }
41
42 type gbkDecoder struct {
43 transform.NopResetter
44 gb18030 bool
45 }
46
47 func (d gbkDecoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
48 r, size := rune(0), 0
49 loop:
50 for ; nSrc < len(src); nSrc += size {
51 switch c0 := src[nSrc]; {
52 case c0 < utf8.RuneSelf:
53 r, size = rune(c0), 1
54
55
56
57
58
59
60 case c0 == 0x80:
61 r, size = '€', 1
62
63 case c0 < 0xff:
64 if nSrc+1 >= len(src) {
65 if !atEOF {
66 err = transform.ErrShortSrc
67 break loop
68 }
69 r, size = utf8.RuneError, 1
70 goto write
71 }
72 c1 := src[nSrc+1]
73 switch {
74 case 0x40 <= c1 && c1 < 0x7f:
75 c1 -= 0x40
76 case 0x80 <= c1 && c1 < 0xff:
77 c1 -= 0x41
78 case d.gb18030 && 0x30 <= c1 && c1 < 0x40:
79 if nSrc+3 >= len(src) {
80 if !atEOF {
81 err = transform.ErrShortSrc
82 break loop
83 }
84
85
86 r, size = utf8.RuneError, 1
87 goto write
88 }
89 c2 := src[nSrc+2]
90 if c2 < 0x81 || 0xff <= c2 {
91 r, size = utf8.RuneError, 1
92 goto write
93 }
94 c3 := src[nSrc+3]
95 if c3 < 0x30 || 0x3a <= c3 {
96 r, size = utf8.RuneError, 1
97 goto write
98 }
99 size = 4
100 r = ((rune(c0-0x81)*10+rune(c1-0x30))*126+rune(c2-0x81))*10 + rune(c3-0x30)
101 if r < 39420 {
102 i, j := 0, len(gb18030)
103 for i < j {
104 h := i + (j-i)/2
105 if r >= rune(gb18030[h][0]) {
106 i = h + 1
107 } else {
108 j = h
109 }
110 }
111 dec := &gb18030[i-1]
112 r += rune(dec[1]) - rune(dec[0])
113 goto write
114 }
115 r -= 189000
116 if 0 <= r && r < 0x100000 {
117 r += 0x10000
118 } else {
119 r, size = utf8.RuneError, 1
120 }
121 goto write
122 default:
123 r, size = utf8.RuneError, 1
124 goto write
125 }
126 r, size = '\ufffd', 2
127 if i := int(c0-0x81)*190 + int(c1); i < len(decode) {
128 r = rune(decode[i])
129 if r == 0 {
130 r = '\ufffd'
131 }
132 }
133
134 default:
135 r, size = utf8.RuneError, 1
136 }
137
138 write:
139 if nDst+utf8.RuneLen(r) > len(dst) {
140 err = transform.ErrShortDst
141 break loop
142 }
143 nDst += utf8.EncodeRune(dst[nDst:], r)
144 }
145 return nDst, nSrc, err
146 }
147
148 type gbkEncoder struct {
149 transform.NopResetter
150 gb18030 bool
151 }
152
153 func (e gbkEncoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
154 r, r2, size := rune(0), rune(0), 0
155 for ; nSrc < len(src); nSrc += size {
156 r = rune(src[nSrc])
157
158
159 if r < utf8.RuneSelf {
160 size = 1
161
162 } else {
163
164 r, size = utf8.DecodeRune(src[nSrc:])
165 if size == 1 {
166
167
168
169 if !atEOF && !utf8.FullRune(src[nSrc:]) {
170 err = transform.ErrShortSrc
171 break
172 }
173 }
174
175
176 switch {
177 case encode0Low <= r && r < encode0High:
178 if r2 = rune(encode0[r-encode0Low]); r2 != 0 {
179 goto write2
180 }
181 case encode1Low <= r && r < encode1High:
182
183
184
185
186
187 if !e.gb18030 && r == '€' {
188 r = 0x80
189 goto write1
190 }
191 if r2 = rune(encode1[r-encode1Low]); r2 != 0 {
192 goto write2
193 }
194 case encode2Low <= r && r < encode2High:
195 if r2 = rune(encode2[r-encode2Low]); r2 != 0 {
196 goto write2
197 }
198 case encode3Low <= r && r < encode3High:
199 if r2 = rune(encode3[r-encode3Low]); r2 != 0 {
200 goto write2
201 }
202 case encode4Low <= r && r < encode4High:
203 if r2 = rune(encode4[r-encode4Low]); r2 != 0 {
204 goto write2
205 }
206 }
207
208 if e.gb18030 {
209 if r < 0x10000 {
210 i, j := 0, len(gb18030)
211 for i < j {
212 h := i + (j-i)/2
213 if r >= rune(gb18030[h][1]) {
214 i = h + 1
215 } else {
216 j = h
217 }
218 }
219 dec := &gb18030[i-1]
220 r += rune(dec[0]) - rune(dec[1])
221 goto write4
222 } else if r < 0x110000 {
223 r += 189000 - 0x10000
224 goto write4
225 }
226 }
227 err = internal.ErrASCIIReplacement
228 break
229 }
230
231 write1:
232 if nDst >= len(dst) {
233 err = transform.ErrShortDst
234 break
235 }
236 dst[nDst] = uint8(r)
237 nDst++
238 continue
239
240 write2:
241 if nDst+2 > len(dst) {
242 err = transform.ErrShortDst
243 break
244 }
245 dst[nDst+0] = uint8(r2 >> 8)
246 dst[nDst+1] = uint8(r2)
247 nDst += 2
248 continue
249
250 write4:
251 if nDst+4 > len(dst) {
252 err = transform.ErrShortDst
253 break
254 }
255 dst[nDst+3] = uint8(r%10 + 0x30)
256 r /= 10
257 dst[nDst+2] = uint8(r%126 + 0x81)
258 r /= 126
259 dst[nDst+1] = uint8(r%10 + 0x30)
260 r /= 10
261 dst[nDst+0] = uint8(r + 0x81)
262 nDst += 4
263 continue
264 }
265 return nDst, nSrc, err
266 }
267
268 func init() {
269
270 if numEncodeTables != 5 {
271 panic("bad numEncodeTables")
272 }
273 }
274
View as plain text