1
2
3
4
5 package asciisanitizer
6
7 import (
8 "bytes"
9 "errors"
10 "strings"
11 "unicode"
12 "unicode/utf8"
13
14 "golang.org/x/text/transform"
15 )
16
17
18 type Sanitizer struct {
19
20
21
22 JSON bool
23 addEscape bool
24 }
25
26
27
28 func (t *Sanitizer) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
29 transfer := func(write, read []byte) error {
30 readLength := len(read)
31 writeLength := len(write)
32 if writeLength > len(dst) {
33 return transform.ErrShortDst
34 }
35 copy(dst, write)
36 nDst += writeLength
37 dst = dst[writeLength:]
38 nSrc += readLength
39 src = src[readLength:]
40 return nil
41 }
42
43 for len(src) > 0 {
44
45 if t.JSON && len(src) < 6 && !atEOF {
46 err = transform.ErrShortSrc
47 return
48 }
49 r, size := utf8.DecodeRune(src)
50 if r == utf8.RuneError && size < 2 {
51 if !atEOF {
52 err = transform.ErrShortSrc
53 return
54 } else {
55 err = errors.New("invalid UTF-8 string")
56 return
57 }
58 }
59
60 if unicode.IsControl(r) {
61 if repl, found := mapControlToCaret(r); found {
62 err = transfer(repl, src[:size])
63 if err != nil {
64 return
65 }
66 continue
67 }
68 }
69
70 if t.JSON && len(src) >= 6 {
71 if repl, found := mapJSONControlToCaret(src[:6]); found {
72 if t.addEscape {
73
74
75 repl = append([]byte{'\\'}, repl...)
76 t.addEscape = false
77 }
78 err = transfer(repl, src[:6])
79 if err != nil {
80 return
81 }
82 continue
83 }
84 }
85 err = transfer(src[:size], src[:size])
86 if err != nil {
87 return
88 }
89 if t.JSON {
90 if r == '\\' {
91 t.addEscape = !t.addEscape
92 } else {
93 t.addEscape = false
94 }
95 }
96 }
97 return
98 }
99
100
101 func (t *Sanitizer) Reset() {
102 t.addEscape = false
103 }
104
105
106 func mapControlToCaret(r rune) ([]byte, bool) {
107
108 m := map[rune]string{
109 0: `^@`,
110 1: `^A`,
111 2: `^B`,
112 3: `^C`,
113 4: `^D`,
114 5: `^E`,
115 6: `^F`,
116 7: `^G`,
117 8: `^H`,
118 12: `^L`,
119 14: `^N`,
120 15: `^O`,
121 16: `^P`,
122 17: `^Q`,
123 18: `^R`,
124 19: `^S`,
125 20: `^T`,
126 21: `^U`,
127 22: `^V`,
128 23: `^W`,
129 24: `^X`,
130 25: `^Y`,
131 26: `^Z`,
132 27: `^[`,
133 28: `^\\`,
134 29: `^]`,
135 30: `^^`,
136 31: `^_`,
137 128: `^@`,
138 129: `^A`,
139 130: `^B`,
140 131: `^C`,
141 132: `^D`,
142 133: `^E`,
143 134: `^F`,
144 135: `^G`,
145 136: `^H`,
146 137: `^I`,
147 138: `^J`,
148 139: `^K`,
149 140: `^L`,
150 141: `^M`,
151 142: `^N`,
152 143: `^O`,
153 144: `^P`,
154 145: `^Q`,
155 146: `^R`,
156 147: `^S`,
157 148: `^T`,
158 149: `^U`,
159 150: `^V`,
160 151: `^W`,
161 152: `^X`,
162 153: `^Y`,
163 154: `^Z`,
164 155: `^[`,
165 156: `^\\`,
166 157: `^]`,
167 158: `^^`,
168 159: `^_`,
169 }
170 if c, ok := m[r]; ok {
171 return []byte(c), true
172 }
173 return nil, false
174 }
175
176
177
178
179 func mapJSONControlToCaret(b []byte) ([]byte, bool) {
180 if len(b) != 6 {
181 return nil, false
182 }
183 if !bytes.HasPrefix(b, []byte(`\u00`)) {
184 return nil, false
185 }
186
187 m := map[string]string{
188 `\u0000`: `^@`,
189 `\u0001`: `^A`,
190 `\u0002`: `^B`,
191 `\u0003`: `^C`,
192 `\u0004`: `^D`,
193 `\u0005`: `^E`,
194 `\u0006`: `^F`,
195 `\u0007`: `^G`,
196 `\u0008`: `^H`,
197 `\u000c`: `^L`,
198 `\u000e`: `^N`,
199 `\u000f`: `^O`,
200 `\u0010`: `^P`,
201 `\u0011`: `^Q`,
202 `\u0012`: `^R`,
203 `\u0013`: `^S`,
204 `\u0014`: `^T`,
205 `\u0015`: `^U`,
206 `\u0016`: `^V`,
207 `\u0017`: `^W`,
208 `\u0018`: `^X`,
209 `\u0019`: `^Y`,
210 `\u001a`: `^Z`,
211 `\u001b`: `^[`,
212 `\u001c`: `^\\`,
213 `\u001d`: `^]`,
214 `\u001e`: `^^`,
215 `\u001f`: `^_`,
216 `\u0080`: `^@`,
217 `\u0081`: `^A`,
218 `\u0082`: `^B`,
219 `\u0083`: `^C`,
220 `\u0084`: `^D`,
221 `\u0085`: `^E`,
222 `\u0086`: `^F`,
223 `\u0087`: `^G`,
224 `\u0088`: `^H`,
225 `\u0089`: `^I`,
226 `\u008a`: `^J`,
227 `\u008b`: `^K`,
228 `\u008c`: `^L`,
229 `\u008d`: `^M`,
230 `\u008e`: `^N`,
231 `\u008f`: `^O`,
232 `\u0090`: `^P`,
233 `\u0091`: `^Q`,
234 `\u0092`: `^R`,
235 `\u0093`: `^S`,
236 `\u0094`: `^T`,
237 `\u0095`: `^U`,
238 `\u0096`: `^V`,
239 `\u0097`: `^W`,
240 `\u0098`: `^X`,
241 `\u0099`: `^Y`,
242 `\u009a`: `^Z`,
243 `\u009b`: `^[`,
244 `\u009c`: `^\\`,
245 `\u009d`: `^]`,
246 `\u009e`: `^^`,
247 `\u009f`: `^_`,
248 }
249 if c, ok := m[strings.ToLower(string(b))]; ok {
250 return []byte(c), true
251 }
252 return nil, false
253 }
254
View as plain text