1 package syntax
2
3 import (
4 "bytes"
5 "encoding/binary"
6 "fmt"
7 "sort"
8 "unicode"
9 "unicode/utf8"
10 )
11
12
13 type CharSet struct {
14 ranges []singleRange
15 categories []category
16 sub *CharSet
17 negate bool
18 anything bool
19 }
20
21 type category struct {
22 negate bool
23 cat string
24 }
25
26 type singleRange struct {
27 first rune
28 last rune
29 }
30
31 const (
32 spaceCategoryText = " "
33 wordCategoryText = "W"
34 )
35
36 var (
37 ecmaSpace = []rune{0x0009, 0x000e, 0x0020, 0x0021, 0x00a0, 0x00a1, 0x1680, 0x1681, 0x2000, 0x200b, 0x2028, 0x202a, 0x202f, 0x2030, 0x205f, 0x2060, 0x3000, 0x3001, 0xfeff, 0xff00}
38 ecmaWord = []rune{0x0030, 0x003a, 0x0041, 0x005b, 0x005f, 0x0060, 0x0061, 0x007b}
39 ecmaDigit = []rune{0x0030, 0x003a}
40
41 re2Space = []rune{0x0009, 0x000b, 0x000c, 0x000e, 0x0020, 0x0021}
42 )
43
44 var (
45 AnyClass = getCharSetFromOldString([]rune{0}, false)
46 ECMAAnyClass = getCharSetFromOldString([]rune{0, 0x000a, 0x000b, 0x000d, 0x000e}, false)
47 NoneClass = getCharSetFromOldString(nil, false)
48 ECMAWordClass = getCharSetFromOldString(ecmaWord, false)
49 NotECMAWordClass = getCharSetFromOldString(ecmaWord, true)
50 ECMASpaceClass = getCharSetFromOldString(ecmaSpace, false)
51 NotECMASpaceClass = getCharSetFromOldString(ecmaSpace, true)
52 ECMADigitClass = getCharSetFromOldString(ecmaDigit, false)
53 NotECMADigitClass = getCharSetFromOldString(ecmaDigit, true)
54
55 WordClass = getCharSetFromCategoryString(false, false, wordCategoryText)
56 NotWordClass = getCharSetFromCategoryString(true, false, wordCategoryText)
57 SpaceClass = getCharSetFromCategoryString(false, false, spaceCategoryText)
58 NotSpaceClass = getCharSetFromCategoryString(true, false, spaceCategoryText)
59 DigitClass = getCharSetFromCategoryString(false, false, "Nd")
60 NotDigitClass = getCharSetFromCategoryString(false, true, "Nd")
61
62 RE2SpaceClass = getCharSetFromOldString(re2Space, false)
63 NotRE2SpaceClass = getCharSetFromOldString(re2Space, true)
64 )
65
66 var unicodeCategories = func() map[string]*unicode.RangeTable {
67 retVal := make(map[string]*unicode.RangeTable)
68 for k, v := range unicode.Scripts {
69 retVal[k] = v
70 }
71 for k, v := range unicode.Categories {
72 retVal[k] = v
73 }
74 for k, v := range unicode.Properties {
75 retVal[k] = v
76 }
77 return retVal
78 }()
79
80 func getCharSetFromCategoryString(negateSet bool, negateCat bool, cats ...string) func() *CharSet {
81 if negateCat && negateSet {
82 panic("BUG! You should only negate the set OR the category in a constant setup, but not both")
83 }
84
85 c := CharSet{negate: negateSet}
86
87 c.categories = make([]category, len(cats))
88 for i, cat := range cats {
89 c.categories[i] = category{cat: cat, negate: negateCat}
90 }
91 return func() *CharSet {
92
93 local := c
94
95 return &local
96 }
97 }
98
99 func getCharSetFromOldString(setText []rune, negate bool) func() *CharSet {
100 c := CharSet{}
101 if len(setText) > 0 {
102 fillFirst := false
103 l := len(setText)
104 if negate {
105 if setText[0] == 0 {
106 setText = setText[1:]
107 } else {
108 l++
109 fillFirst = true
110 }
111 }
112
113 if l%2 == 0 {
114 c.ranges = make([]singleRange, l/2)
115 } else {
116 c.ranges = make([]singleRange, l/2+1)
117 }
118
119 first := true
120 if fillFirst {
121 c.ranges[0] = singleRange{first: 0}
122 first = false
123 }
124
125 i := 0
126 for _, r := range setText {
127 if first {
128
129 c.ranges[i] = singleRange{first: r}
130 first = false
131 } else {
132 c.ranges[i].last = r - 1
133 i++
134 first = true
135 }
136 }
137 if !first {
138 c.ranges[i].last = utf8.MaxRune
139 }
140 }
141
142 return func() *CharSet {
143 local := c
144 return &local
145 }
146 }
147
148
149 func (c CharSet) Copy() CharSet {
150 ret := CharSet{
151 anything: c.anything,
152 negate: c.negate,
153 }
154
155 ret.ranges = append(ret.ranges, c.ranges...)
156 ret.categories = append(ret.categories, c.categories...)
157
158 if c.sub != nil {
159 sub := c.sub.Copy()
160 ret.sub = &sub
161 }
162
163 return ret
164 }
165
166
167 func (c CharSet) String() string {
168 buf := &bytes.Buffer{}
169 buf.WriteRune('[')
170
171 if c.IsNegated() {
172 buf.WriteRune('^')
173 }
174
175 for _, r := range c.ranges {
176
177 buf.WriteString(CharDescription(r.first))
178 if r.first != r.last {
179 if r.last-r.first != 1 {
180
181 buf.WriteRune('-')
182 }
183 buf.WriteString(CharDescription(r.last))
184 }
185 }
186
187 for _, c := range c.categories {
188 buf.WriteString(c.String())
189 }
190
191 if c.sub != nil {
192 buf.WriteRune('-')
193 buf.WriteString(c.sub.String())
194 }
195
196 buf.WriteRune(']')
197
198 return buf.String()
199 }
200
201
202 func (c CharSet) mapHashFill(buf *bytes.Buffer) {
203 if c.negate {
204 buf.WriteByte(0)
205 } else {
206 buf.WriteByte(1)
207 }
208
209 binary.Write(buf, binary.LittleEndian, len(c.ranges))
210 binary.Write(buf, binary.LittleEndian, len(c.categories))
211 for _, r := range c.ranges {
212 buf.WriteRune(r.first)
213 buf.WriteRune(r.last)
214 }
215 for _, ct := range c.categories {
216 buf.WriteString(ct.cat)
217 if ct.negate {
218 buf.WriteByte(1)
219 } else {
220 buf.WriteByte(0)
221 }
222 }
223
224 if c.sub != nil {
225 c.sub.mapHashFill(buf)
226 }
227 }
228
229
230
231 func (c CharSet) CharIn(ch rune) bool {
232 val := false
233
234
235
236 for _, r := range c.ranges {
237 if ch < r.first {
238 continue
239 }
240 if ch <= r.last {
241 val = true
242 break
243 }
244 }
245
246
247 if !val && len(c.categories) > 0 {
248 for _, ct := range c.categories {
249
250 if ct.cat == spaceCategoryText {
251 if unicode.IsSpace(ch) {
252
253
254 val = !ct.negate
255 break
256 } else if ct.negate {
257 val = true
258 break
259 }
260 } else if ct.cat == wordCategoryText {
261 if IsWordChar(ch) {
262 val = !ct.negate
263 break
264 } else if ct.negate {
265 val = true
266 break
267 }
268 } else if unicode.Is(unicodeCategories[ct.cat], ch) {
269
270
271
272 val = !ct.negate
273 break
274 } else if ct.negate {
275 val = true
276 break
277 }
278 }
279 }
280
281
282 if c.negate {
283 val = !val
284 }
285
286
287 if val && c.sub != nil {
288 val = !c.sub.CharIn(ch)
289 }
290
291
292 return val
293 }
294
295 func (c category) String() string {
296 switch c.cat {
297 case spaceCategoryText:
298 if c.negate {
299 return "\\S"
300 }
301 return "\\s"
302 case wordCategoryText:
303 if c.negate {
304 return "\\W"
305 }
306 return "\\w"
307 }
308 if _, ok := unicodeCategories[c.cat]; ok {
309
310 if c.negate {
311 return "\\P{" + c.cat + "}"
312 }
313 return "\\p{" + c.cat + "}"
314 }
315 return "Unknown category: " + c.cat
316 }
317
318
319 func CharDescription(ch rune) string {
320
331
332 b := &bytes.Buffer{}
333 escape(b, ch, false)
334 return b.String()
335 }
336
337
338
339
340
341 func IsWordChar(r rune) bool {
342
343 return unicode.In(r,
344 unicode.Categories["L"], unicode.Categories["Mn"],
345 unicode.Categories["Nd"], unicode.Categories["Pc"]) || r == '\u200D' || r == '\u200C'
346
347 }
348
349 func IsECMAWordChar(r rune) bool {
350 return unicode.In(r,
351 unicode.Categories["L"], unicode.Categories["Mn"],
352 unicode.Categories["Nd"], unicode.Categories["Pc"])
353
354
355 }
356
357
358
359 func (c CharSet) SingletonChar() rune {
360 return c.ranges[0].first
361 }
362
363 func (c CharSet) IsSingleton() bool {
364 return !c.negate &&
365 len(c.categories) == 0 && len(c.ranges) == 1 &&
366 c.sub == nil &&
367 c.ranges[0].first == c.ranges[0].last
368 }
369
370 func (c CharSet) IsSingletonInverse() bool {
371 return c.negate &&
372 len(c.categories) == 0 && len(c.ranges) == 1 &&
373 c.sub == nil &&
374 c.ranges[0].first == c.ranges[0].last
375 }
376
377 func (c CharSet) IsMergeable() bool {
378 return !c.IsNegated() && !c.HasSubtraction()
379 }
380
381 func (c CharSet) IsNegated() bool {
382 return c.negate
383 }
384
385 func (c CharSet) HasSubtraction() bool {
386 return c.sub != nil
387 }
388
389 func (c CharSet) IsEmpty() bool {
390 return len(c.ranges) == 0 && len(c.categories) == 0 && c.sub == nil
391 }
392
393 func (c *CharSet) addDigit(ecma, negate bool, pattern string) {
394 if ecma {
395 if negate {
396 c.addRanges(NotECMADigitClass().ranges)
397 } else {
398 c.addRanges(ECMADigitClass().ranges)
399 }
400 } else {
401 c.addCategories(category{cat: "Nd", negate: negate})
402 }
403 }
404
405 func (c *CharSet) addChar(ch rune) {
406 c.addRange(ch, ch)
407 }
408
409 func (c *CharSet) addSpace(ecma, re2, negate bool) {
410 if ecma {
411 if negate {
412 c.addRanges(NotECMASpaceClass().ranges)
413 } else {
414 c.addRanges(ECMASpaceClass().ranges)
415 }
416 } else if re2 {
417 if negate {
418 c.addRanges(NotRE2SpaceClass().ranges)
419 } else {
420 c.addRanges(RE2SpaceClass().ranges)
421 }
422 } else {
423 c.addCategories(category{cat: spaceCategoryText, negate: negate})
424 }
425 }
426
427 func (c *CharSet) addWord(ecma, negate bool) {
428 if ecma {
429 if negate {
430 c.addRanges(NotECMAWordClass().ranges)
431 } else {
432 c.addRanges(ECMAWordClass().ranges)
433 }
434 } else {
435 c.addCategories(category{cat: wordCategoryText, negate: negate})
436 }
437 }
438
439
440 func (c *CharSet) addSet(set CharSet) {
441 if c.anything {
442 return
443 }
444 if set.anything {
445 c.makeAnything()
446 return
447 }
448
449 c.ranges = append(c.ranges, set.ranges...)
450 c.addCategories(set.categories...)
451 c.canonicalize()
452 }
453
454 func (c *CharSet) makeAnything() {
455 c.anything = true
456 c.categories = []category{}
457 c.ranges = AnyClass().ranges
458 }
459
460 func (c *CharSet) addCategories(cats ...category) {
461
462 if c.anything {
463
464
465 return
466 }
467
468 for _, ct := range cats {
469 found := false
470 for _, ct2 := range c.categories {
471 if ct.cat == ct2.cat {
472 if ct.negate != ct2.negate {
473
474
475 c.makeAnything()
476 return
477 }
478 found = true
479 break
480 }
481 }
482
483 if !found {
484 c.categories = append(c.categories, ct)
485 }
486 }
487 }
488
489
490 func (c *CharSet) addRanges(ranges []singleRange) {
491 if c.anything {
492 return
493 }
494 c.ranges = append(c.ranges, ranges...)
495 c.canonicalize()
496 }
497
498
499 func (c *CharSet) addNegativeRanges(ranges []singleRange) {
500 if c.anything {
501 return
502 }
503
504 var hi rune
505
506
507 for _, r := range ranges {
508 if hi < r.first {
509 c.ranges = append(c.ranges, singleRange{hi, r.first - 1})
510 }
511 hi = r.last + 1
512 }
513
514 if hi < utf8.MaxRune {
515 c.ranges = append(c.ranges, singleRange{hi, utf8.MaxRune})
516 }
517
518 c.canonicalize()
519 }
520
521 func isValidUnicodeCat(catName string) bool {
522 _, ok := unicodeCategories[catName]
523 return ok
524 }
525
526 func (c *CharSet) addCategory(categoryName string, negate, caseInsensitive bool, pattern string) {
527 if !isValidUnicodeCat(categoryName) {
528
529 panic(fmt.Errorf("Unknown unicode category, script, or property '%v'", categoryName))
530
531 }
532
533 if caseInsensitive && (categoryName == "Ll" || categoryName == "Lu" || categoryName == "Lt") {
534
535 c.addCategories(
536 category{cat: "Ll", negate: negate},
537 category{cat: "Lu", negate: negate},
538 category{cat: "Lt", negate: negate})
539 }
540 c.addCategories(category{cat: categoryName, negate: negate})
541 }
542
543 func (c *CharSet) addSubtraction(sub *CharSet) {
544 c.sub = sub
545 }
546
547 func (c *CharSet) addRange(chMin, chMax rune) {
548 c.ranges = append(c.ranges, singleRange{first: chMin, last: chMax})
549 c.canonicalize()
550 }
551
552 func (c *CharSet) addNamedASCII(name string, negate bool) bool {
553 var rs []singleRange
554
555 switch name {
556 case "alnum":
557 rs = []singleRange{singleRange{'0', '9'}, singleRange{'A', 'Z'}, singleRange{'a', 'z'}}
558 case "alpha":
559 rs = []singleRange{singleRange{'A', 'Z'}, singleRange{'a', 'z'}}
560 case "ascii":
561 rs = []singleRange{singleRange{0, 0x7f}}
562 case "blank":
563 rs = []singleRange{singleRange{'\t', '\t'}, singleRange{' ', ' '}}
564 case "cntrl":
565 rs = []singleRange{singleRange{0, 0x1f}, singleRange{0x7f, 0x7f}}
566 case "digit":
567 c.addDigit(false, negate, "")
568 case "graph":
569 rs = []singleRange{singleRange{'!', '~'}}
570 case "lower":
571 rs = []singleRange{singleRange{'a', 'z'}}
572 case "print":
573 rs = []singleRange{singleRange{' ', '~'}}
574 case "punct":
575 rs = []singleRange{singleRange{'!', '/'}, singleRange{':', '@'}, singleRange{'[', '`'}, singleRange{'{', '~'}}
576 case "space":
577 c.addSpace(true, false, negate)
578 case "upper":
579 rs = []singleRange{singleRange{'A', 'Z'}}
580 case "word":
581 c.addWord(true, negate)
582 case "xdigit":
583 rs = []singleRange{singleRange{'0', '9'}, singleRange{'A', 'F'}, singleRange{'a', 'f'}}
584 default:
585 return false
586 }
587
588 if len(rs) > 0 {
589 if negate {
590 c.addNegativeRanges(rs)
591 } else {
592 c.addRanges(rs)
593 }
594 }
595
596 return true
597 }
598
599 type singleRangeSorter []singleRange
600
601 func (p singleRangeSorter) Len() int { return len(p) }
602 func (p singleRangeSorter) Less(i, j int) bool { return p[i].first < p[j].first }
603 func (p singleRangeSorter) Swap(i, j int) { p[i], p[j] = p[j], p[i] }
604
605
606 func (c *CharSet) canonicalize() {
607 var i, j int
608 var last rune
609
610
611
612
613
614 if len(c.ranges) > 1 {
615 sort.Sort(singleRangeSorter(c.ranges))
616
617 done := false
618
619 for i, j = 1, 0; ; i++ {
620 for last = c.ranges[j].last; ; i++ {
621 if i == len(c.ranges) || last == utf8.MaxRune {
622 done = true
623 break
624 }
625
626 CurrentRange := c.ranges[i]
627 if CurrentRange.first > last+1 {
628 break
629 }
630
631 if last < CurrentRange.last {
632 last = CurrentRange.last
633 }
634 }
635
636 c.ranges[j] = singleRange{first: c.ranges[j].first, last: last}
637
638 j++
639
640 if done {
641 break
642 }
643
644 if j < i {
645 c.ranges[j] = c.ranges[i]
646 }
647 }
648
649 c.ranges = append(c.ranges[:j], c.ranges[len(c.ranges):]...)
650 }
651 }
652
653
654
655 func (c *CharSet) addLowercase() {
656 if c.anything {
657 return
658 }
659 toAdd := []singleRange{}
660 for i := 0; i < len(c.ranges); i++ {
661 r := c.ranges[i]
662 if r.first == r.last {
663 lower := unicode.ToLower(r.first)
664 c.ranges[i] = singleRange{first: lower, last: lower}
665 } else {
666 toAdd = append(toAdd, r)
667 }
668 }
669
670 for _, r := range toAdd {
671 c.addLowercaseRange(r.first, r.last)
672 }
673 c.canonicalize()
674 }
675
676
703
704 const (
705 LowercaseSet = 0
706 LowercaseAdd = 1
707 LowercaseBor = 2
708 LowercaseBad = 3
709 )
710
711 type lcMap struct {
712 chMin, chMax rune
713 op, data int32
714 }
715
716 var lcTable = []lcMap{
717 lcMap{'\u0041', '\u005A', LowercaseAdd, 32},
718 lcMap{'\u00C0', '\u00DE', LowercaseAdd, 32},
719 lcMap{'\u0100', '\u012E', LowercaseBor, 0},
720 lcMap{'\u0130', '\u0130', LowercaseSet, 0x0069},
721 lcMap{'\u0132', '\u0136', LowercaseBor, 0},
722 lcMap{'\u0139', '\u0147', LowercaseBad, 0},
723 lcMap{'\u014A', '\u0176', LowercaseBor, 0},
724 lcMap{'\u0178', '\u0178', LowercaseSet, 0x00FF},
725 lcMap{'\u0179', '\u017D', LowercaseBad, 0},
726 lcMap{'\u0181', '\u0181', LowercaseSet, 0x0253},
727 lcMap{'\u0182', '\u0184', LowercaseBor, 0},
728 lcMap{'\u0186', '\u0186', LowercaseSet, 0x0254},
729 lcMap{'\u0187', '\u0187', LowercaseSet, 0x0188},
730 lcMap{'\u0189', '\u018A', LowercaseAdd, 205},
731 lcMap{'\u018B', '\u018B', LowercaseSet, 0x018C},
732 lcMap{'\u018E', '\u018E', LowercaseSet, 0x01DD},
733 lcMap{'\u018F', '\u018F', LowercaseSet, 0x0259},
734 lcMap{'\u0190', '\u0190', LowercaseSet, 0x025B},
735 lcMap{'\u0191', '\u0191', LowercaseSet, 0x0192},
736 lcMap{'\u0193', '\u0193', LowercaseSet, 0x0260},
737 lcMap{'\u0194', '\u0194', LowercaseSet, 0x0263},
738 lcMap{'\u0196', '\u0196', LowercaseSet, 0x0269},
739 lcMap{'\u0197', '\u0197', LowercaseSet, 0x0268},
740 lcMap{'\u0198', '\u0198', LowercaseSet, 0x0199},
741 lcMap{'\u019C', '\u019C', LowercaseSet, 0x026F},
742 lcMap{'\u019D', '\u019D', LowercaseSet, 0x0272},
743 lcMap{'\u019F', '\u019F', LowercaseSet, 0x0275},
744 lcMap{'\u01A0', '\u01A4', LowercaseBor, 0},
745 lcMap{'\u01A7', '\u01A7', LowercaseSet, 0x01A8},
746 lcMap{'\u01A9', '\u01A9', LowercaseSet, 0x0283},
747 lcMap{'\u01AC', '\u01AC', LowercaseSet, 0x01AD},
748 lcMap{'\u01AE', '\u01AE', LowercaseSet, 0x0288},
749 lcMap{'\u01AF', '\u01AF', LowercaseSet, 0x01B0},
750 lcMap{'\u01B1', '\u01B2', LowercaseAdd, 217},
751 lcMap{'\u01B3', '\u01B5', LowercaseBad, 0},
752 lcMap{'\u01B7', '\u01B7', LowercaseSet, 0x0292},
753 lcMap{'\u01B8', '\u01B8', LowercaseSet, 0x01B9},
754 lcMap{'\u01BC', '\u01BC', LowercaseSet, 0x01BD},
755 lcMap{'\u01C4', '\u01C5', LowercaseSet, 0x01C6},
756 lcMap{'\u01C7', '\u01C8', LowercaseSet, 0x01C9},
757 lcMap{'\u01CA', '\u01CB', LowercaseSet, 0x01CC},
758 lcMap{'\u01CD', '\u01DB', LowercaseBad, 0},
759 lcMap{'\u01DE', '\u01EE', LowercaseBor, 0},
760 lcMap{'\u01F1', '\u01F2', LowercaseSet, 0x01F3},
761 lcMap{'\u01F4', '\u01F4', LowercaseSet, 0x01F5},
762 lcMap{'\u01FA', '\u0216', LowercaseBor, 0},
763 lcMap{'\u0386', '\u0386', LowercaseSet, 0x03AC},
764 lcMap{'\u0388', '\u038A', LowercaseAdd, 37},
765 lcMap{'\u038C', '\u038C', LowercaseSet, 0x03CC},
766 lcMap{'\u038E', '\u038F', LowercaseAdd, 63},
767 lcMap{'\u0391', '\u03AB', LowercaseAdd, 32},
768 lcMap{'\u03E2', '\u03EE', LowercaseBor, 0},
769 lcMap{'\u0401', '\u040F', LowercaseAdd, 80},
770 lcMap{'\u0410', '\u042F', LowercaseAdd, 32},
771 lcMap{'\u0460', '\u0480', LowercaseBor, 0},
772 lcMap{'\u0490', '\u04BE', LowercaseBor, 0},
773 lcMap{'\u04C1', '\u04C3', LowercaseBad, 0},
774 lcMap{'\u04C7', '\u04C7', LowercaseSet, 0x04C8},
775 lcMap{'\u04CB', '\u04CB', LowercaseSet, 0x04CC},
776 lcMap{'\u04D0', '\u04EA', LowercaseBor, 0},
777 lcMap{'\u04EE', '\u04F4', LowercaseBor, 0},
778 lcMap{'\u04F8', '\u04F8', LowercaseSet, 0x04F9},
779 lcMap{'\u0531', '\u0556', LowercaseAdd, 48},
780 lcMap{'\u10A0', '\u10C5', LowercaseAdd, 48},
781 lcMap{'\u1E00', '\u1EF8', LowercaseBor, 0},
782 lcMap{'\u1F08', '\u1F0F', LowercaseAdd, -8},
783 lcMap{'\u1F18', '\u1F1F', LowercaseAdd, -8},
784 lcMap{'\u1F28', '\u1F2F', LowercaseAdd, -8},
785 lcMap{'\u1F38', '\u1F3F', LowercaseAdd, -8},
786 lcMap{'\u1F48', '\u1F4D', LowercaseAdd, -8},
787 lcMap{'\u1F59', '\u1F59', LowercaseSet, 0x1F51},
788 lcMap{'\u1F5B', '\u1F5B', LowercaseSet, 0x1F53},
789 lcMap{'\u1F5D', '\u1F5D', LowercaseSet, 0x1F55},
790 lcMap{'\u1F5F', '\u1F5F', LowercaseSet, 0x1F57},
791 lcMap{'\u1F68', '\u1F6F', LowercaseAdd, -8},
792 lcMap{'\u1F88', '\u1F8F', LowercaseAdd, -8},
793 lcMap{'\u1F98', '\u1F9F', LowercaseAdd, -8},
794 lcMap{'\u1FA8', '\u1FAF', LowercaseAdd, -8},
795 lcMap{'\u1FB8', '\u1FB9', LowercaseAdd, -8},
796 lcMap{'\u1FBA', '\u1FBB', LowercaseAdd, -74},
797 lcMap{'\u1FBC', '\u1FBC', LowercaseSet, 0x1FB3},
798 lcMap{'\u1FC8', '\u1FCB', LowercaseAdd, -86},
799 lcMap{'\u1FCC', '\u1FCC', LowercaseSet, 0x1FC3},
800 lcMap{'\u1FD8', '\u1FD9', LowercaseAdd, -8},
801 lcMap{'\u1FDA', '\u1FDB', LowercaseAdd, -100},
802 lcMap{'\u1FE8', '\u1FE9', LowercaseAdd, -8},
803 lcMap{'\u1FEA', '\u1FEB', LowercaseAdd, -112},
804 lcMap{'\u1FEC', '\u1FEC', LowercaseSet, 0x1FE5},
805 lcMap{'\u1FF8', '\u1FF9', LowercaseAdd, -128},
806 lcMap{'\u1FFA', '\u1FFB', LowercaseAdd, -126},
807 lcMap{'\u1FFC', '\u1FFC', LowercaseSet, 0x1FF3},
808 lcMap{'\u2160', '\u216F', LowercaseAdd, 16},
809 lcMap{'\u24B6', '\u24D0', LowercaseAdd, 26},
810 lcMap{'\uFF21', '\uFF3A', LowercaseAdd, 32},
811 }
812
813 func (c *CharSet) addLowercaseRange(chMin, chMax rune) {
814 var i, iMax, iMid int
815 var chMinT, chMaxT rune
816 var lc lcMap
817
818 for i, iMax = 0, len(lcTable); i < iMax; {
819 iMid = (i + iMax) / 2
820 if lcTable[iMid].chMax < chMin {
821 i = iMid + 1
822 } else {
823 iMax = iMid
824 }
825 }
826
827 for ; i < len(lcTable); i++ {
828 lc = lcTable[i]
829 if lc.chMin > chMax {
830 return
831 }
832 chMinT = lc.chMin
833 if chMinT < chMin {
834 chMinT = chMin
835 }
836
837 chMaxT = lc.chMax
838 if chMaxT > chMax {
839 chMaxT = chMax
840 }
841
842 switch lc.op {
843 case LowercaseSet:
844 chMinT = rune(lc.data)
845 chMaxT = rune(lc.data)
846 break
847 case LowercaseAdd:
848 chMinT += lc.data
849 chMaxT += lc.data
850 break
851 case LowercaseBor:
852 chMinT |= 1
853 chMaxT |= 1
854 break
855 case LowercaseBad:
856 chMinT += (chMinT & 1)
857 chMaxT += (chMaxT & 1)
858 break
859 }
860
861 if chMinT < chMin || chMaxT > chMax {
862 c.addRange(chMinT, chMaxT)
863 }
864 }
865 }
866
View as plain text