1
2
3
4 package xstrings
5
6 import (
7 "unicode"
8 "unicode/utf8"
9 )
10
11 type runeRangeMap struct {
12 FromLo rune
13 FromHi rune
14 ToLo rune
15 ToHi rune
16 }
17
18 type runeDict struct {
19 Dict [unicode.MaxASCII + 1]rune
20 }
21
22 type runeMap map[rune]rune
23
24
25
26
27 type Translator struct {
28 quickDict *runeDict
29 runeMap runeMap
30 ranges []*runeRangeMap
31 mappedRune rune
32 reverted bool
33 hasPattern bool
34 }
35
36
37 func NewTranslator(from, to string) *Translator {
38 tr := &Translator{}
39
40 if from == "" {
41 return tr
42 }
43
44 reverted := from[0] == '^'
45 deletion := len(to) == 0
46
47 if reverted {
48 from = from[1:]
49 }
50
51 var fromStart, fromEnd, fromRangeStep rune
52 var toStart, toEnd, toRangeStep rune
53 var fromRangeSize, toRangeSize rune
54 var singleRunes []rune
55
56
57 updateRange := func() {
58
59 if toEnd == utf8.RuneError {
60 return
61 }
62
63 if toRangeStep == 0 {
64 to, toStart, toEnd, toRangeStep = nextRuneRange(to, toEnd)
65 return
66 }
67
68
69 if toStart != toEnd {
70 toStart += toRangeStep
71 return
72 }
73
74
75 if to == "" {
76 toEnd = utf8.RuneError
77 return
78 }
79
80
81 to, toStart, toEnd, toRangeStep = nextRuneRange(to, utf8.RuneError)
82 }
83
84 if deletion {
85 toStart = utf8.RuneError
86 toEnd = utf8.RuneError
87 } else {
88
89 if reverted {
90 var size int
91
92 for len(to) > 0 {
93 toStart, size = utf8.DecodeRuneInString(to)
94 to = to[size:]
95 }
96
97 toEnd = utf8.RuneError
98 } else {
99 to, toStart, toEnd, toRangeStep = nextRuneRange(to, utf8.RuneError)
100 }
101 }
102
103 fromEnd = utf8.RuneError
104
105 for len(from) > 0 {
106 from, fromStart, fromEnd, fromRangeStep = nextRuneRange(from, fromEnd)
107
108
109 if fromRangeStep == 0 {
110 singleRunes = tr.addRune(fromStart, toStart, singleRunes)
111 updateRange()
112 continue
113 }
114
115 for toEnd != utf8.RuneError && fromStart != fromEnd {
116
117
118 if toRangeStep == 0 {
119 singleRunes = tr.addRune(fromStart, toStart, singleRunes)
120 updateRange()
121 fromStart += fromRangeStep
122 continue
123 }
124
125 fromRangeSize = (fromEnd - fromStart) * fromRangeStep
126 toRangeSize = (toEnd - toStart) * toRangeStep
127
128
129 if fromRangeSize > toRangeSize {
130 fromStart, toStart = tr.addRuneRange(fromStart, fromStart+toRangeSize*fromRangeStep, toStart, toEnd, singleRunes)
131 fromStart += fromRangeStep
132 updateRange()
133
134
135
136 if fromStart == fromEnd {
137 singleRunes = tr.addRune(fromStart, toStart, singleRunes)
138 updateRange()
139 }
140
141 continue
142 }
143
144 fromStart, toStart = tr.addRuneRange(fromStart, fromEnd, toStart, toStart+fromRangeSize*toRangeStep, singleRunes)
145 updateRange()
146 break
147 }
148
149 if fromStart == fromEnd {
150 fromEnd = utf8.RuneError
151 continue
152 }
153
154 _, toStart = tr.addRuneRange(fromStart, fromEnd, toStart, toStart, singleRunes)
155 fromEnd = utf8.RuneError
156 }
157
158 if fromEnd != utf8.RuneError {
159 tr.addRune(fromEnd, toStart, singleRunes)
160 }
161
162 tr.reverted = reverted
163 tr.mappedRune = -1
164 tr.hasPattern = true
165
166
167 if deletion || reverted {
168 tr.mappedRune = toStart
169 }
170
171 return tr
172 }
173
174 func (tr *Translator) addRune(from, to rune, singleRunes []rune) []rune {
175 if from <= unicode.MaxASCII {
176 if tr.quickDict == nil {
177 tr.quickDict = &runeDict{}
178 }
179
180 tr.quickDict.Dict[from] = to
181 } else {
182 if tr.runeMap == nil {
183 tr.runeMap = make(runeMap)
184 }
185
186 tr.runeMap[from] = to
187 }
188
189 singleRunes = append(singleRunes, from)
190 return singleRunes
191 }
192
193 func (tr *Translator) addRuneRange(fromLo, fromHi, toLo, toHi rune, singleRunes []rune) (rune, rune) {
194 var r rune
195 var rrm *runeRangeMap
196
197 if fromLo < fromHi {
198 rrm = &runeRangeMap{
199 FromLo: fromLo,
200 FromHi: fromHi,
201 ToLo: toLo,
202 ToHi: toHi,
203 }
204 } else {
205 rrm = &runeRangeMap{
206 FromLo: fromHi,
207 FromHi: fromLo,
208 ToLo: toHi,
209 ToHi: toLo,
210 }
211 }
212
213
214 for _, r = range singleRunes {
215 if rrm.FromLo <= r && r <= rrm.FromHi {
216 if r <= unicode.MaxASCII {
217 tr.quickDict.Dict[r] = 0
218 } else {
219 delete(tr.runeMap, r)
220 }
221 }
222 }
223
224 tr.ranges = append(tr.ranges, rrm)
225 return fromHi, toHi
226 }
227
228 func nextRuneRange(str string, last rune) (remaining string, start, end rune, rangeStep rune) {
229 var r rune
230 var size int
231
232 remaining = str
233 escaping := false
234 isRange := false
235
236 for len(remaining) > 0 {
237 r, size = utf8.DecodeRuneInString(remaining)
238 remaining = remaining[size:]
239
240
241 if !escaping {
242 if r == '\\' {
243 escaping = true
244 continue
245 }
246
247 if r == '-' {
248
249 if last == utf8.RuneError {
250 continue
251 }
252
253 start = last
254 isRange = true
255 continue
256 }
257 }
258
259 escaping = false
260
261 if last != utf8.RuneError {
262
263
264 if isRange && last == r {
265 isRange = false
266 continue
267 }
268
269 start = last
270 end = r
271
272 if isRange {
273 if start < end {
274 rangeStep = 1
275 } else {
276 rangeStep = -1
277 }
278 }
279
280 return
281 }
282
283 last = r
284 }
285
286 start = last
287 end = utf8.RuneError
288 return
289 }
290
291
292
293
294 func (tr *Translator) Translate(str string) string {
295 if !tr.hasPattern || str == "" {
296 return str
297 }
298
299 var r rune
300 var size int
301 var needTr bool
302
303 orig := str
304
305 var output *stringBuilder
306
307 for len(str) > 0 {
308 r, size = utf8.DecodeRuneInString(str)
309 r, needTr = tr.TranslateRune(r)
310
311 if needTr && output == nil {
312 output = allocBuffer(orig, str)
313 }
314
315 if r != utf8.RuneError && output != nil {
316 output.WriteRune(r)
317 }
318
319 str = str[size:]
320 }
321
322
323 if output == nil {
324 return orig
325 }
326
327 return output.String()
328 }
329
330
331
332 func (tr *Translator) TranslateRune(r rune) (result rune, translated bool) {
333 switch {
334 case tr.quickDict != nil:
335 if r <= unicode.MaxASCII {
336 result = tr.quickDict.Dict[r]
337
338 if result != 0 {
339 translated = true
340
341 if tr.mappedRune >= 0 {
342 result = tr.mappedRune
343 }
344
345 break
346 }
347 }
348
349 fallthrough
350
351 case tr.runeMap != nil:
352 var ok bool
353
354 if result, ok = tr.runeMap[r]; ok {
355 translated = true
356
357 if tr.mappedRune >= 0 {
358 result = tr.mappedRune
359 }
360
361 break
362 }
363
364 fallthrough
365
366 default:
367 var rrm *runeRangeMap
368 ranges := tr.ranges
369
370 for i := len(ranges) - 1; i >= 0; i-- {
371 rrm = ranges[i]
372
373 if rrm.FromLo <= r && r <= rrm.FromHi {
374 translated = true
375
376 if tr.mappedRune >= 0 {
377 result = tr.mappedRune
378 break
379 }
380
381 if rrm.ToLo < rrm.ToHi {
382 result = rrm.ToLo + r - rrm.FromLo
383 } else if rrm.ToLo > rrm.ToHi {
384
385 result = rrm.ToLo - r + rrm.FromLo
386 } else {
387 result = rrm.ToLo
388 }
389
390 break
391 }
392 }
393 }
394
395 if tr.reverted {
396 if !translated {
397 result = tr.mappedRune
398 }
399
400 translated = !translated
401 }
402
403 if !translated {
404 result = r
405 }
406
407 return
408 }
409
410
411 func (tr *Translator) HasPattern() bool {
412 return tr.hasPattern
413 }
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445 func Translate(str, from, to string) string {
446 tr := NewTranslator(from, to)
447 return tr.Translate(str)
448 }
449
450
451
452
453
454
455
456
457
458 func Delete(str, pattern string) string {
459 tr := NewTranslator(pattern, "")
460 return tr.Translate(str)
461 }
462
463
464
465
466
467
468
469
470
471 func Count(str, pattern string) int {
472 if pattern == "" || str == "" {
473 return 0
474 }
475
476 var r rune
477 var size int
478 var matched bool
479
480 tr := NewTranslator(pattern, "")
481 cnt := 0
482
483 for len(str) > 0 {
484 r, size = utf8.DecodeRuneInString(str)
485 str = str[size:]
486
487 if _, matched = tr.TranslateRune(r); matched {
488 cnt++
489 }
490 }
491
492 return cnt
493 }
494
495
496
497
498
499
500
501
502
503 func Squeeze(str, pattern string) string {
504 var last, r rune
505 var size int
506 var skipSqueeze, matched bool
507 var tr *Translator
508 var output *stringBuilder
509
510 orig := str
511 last = -1
512
513 if len(pattern) > 0 {
514 tr = NewTranslator(pattern, "")
515 }
516
517 for len(str) > 0 {
518 r, size = utf8.DecodeRuneInString(str)
519
520
521 if last == r && !skipSqueeze {
522 if tr != nil {
523 if _, matched = tr.TranslateRune(r); !matched {
524 skipSqueeze = true
525 }
526 }
527
528 if output == nil {
529 output = allocBuffer(orig, str)
530 }
531
532 if skipSqueeze {
533 output.WriteRune(r)
534 }
535 } else {
536 if output != nil {
537 output.WriteRune(r)
538 }
539
540 last = r
541 skipSqueeze = false
542 }
543
544 str = str[size:]
545 }
546
547 if output == nil {
548 return orig
549 }
550
551 return output.String()
552 }
553
View as plain text