1 // Copyright 2016 Google LLC 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package fields 16 17 // This file was copied from https://go.googlesource.com/go/+/go1.7.3/src/encoding/json/fold.go. 18 // Only the license and package were changed. 19 20 import ( 21 "bytes" 22 "unicode/utf8" 23 ) 24 25 const ( 26 caseMask = ^byte(0x20) // Mask to ignore case in ASCII. 27 kelvin = '\u212a' 28 smallLongEss = '\u017f' 29 ) 30 31 // foldFunc returns one of four different case folding equivalence 32 // functions, from most general (and slow) to fastest: 33 // 34 // 1) bytes.EqualFold, if the key s contains any non-ASCII UTF-8 35 // 2) equalFoldRight, if s contains special folding ASCII ('k', 'K', 's', 'S') 36 // 3) asciiEqualFold, no special, but includes non-letters (including _) 37 // 4) simpleLetterEqualFold, no specials, no non-letters. 38 // 39 // The letters S and K are special because they map to 3 runes, not just 2: 40 // - S maps to s and to U+017F 'ſ' Latin small letter long s 41 // - k maps to K and to U+212A 'K' Kelvin sign 42 // 43 // See https://play.golang.org/p/tTxjOc0OGo 44 // 45 // The returned function is specialized for matching against s and 46 // should only be given s. It's not curried for performance reasons. 47 func foldFunc(s []byte) func(s, t []byte) bool { 48 nonLetter := false 49 special := false // special letter 50 for _, b := range s { 51 if b >= utf8.RuneSelf { 52 return bytes.EqualFold 53 } 54 upper := b & caseMask 55 if upper < 'A' || upper > 'Z' { 56 nonLetter = true 57 } else if upper == 'K' || upper == 'S' { 58 // See above for why these letters are special. 59 special = true 60 } 61 } 62 if special { 63 return equalFoldRight 64 } 65 if nonLetter { 66 return asciiEqualFold 67 } 68 return simpleLetterEqualFold 69 } 70 71 // equalFoldRight is a specialization of bytes.EqualFold when s is 72 // known to be all ASCII (including punctuation), but contains an 's', 73 // 'S', 'k', or 'K', requiring a Unicode fold on the bytes in t. 74 // See comments on foldFunc. 75 func equalFoldRight(s, t []byte) bool { 76 for _, sb := range s { 77 if len(t) == 0 { 78 return false 79 } 80 tb := t[0] 81 if tb < utf8.RuneSelf { 82 if sb != tb { 83 sbUpper := sb & caseMask 84 if 'A' <= sbUpper && sbUpper <= 'Z' { 85 if sbUpper != tb&caseMask { 86 return false 87 } 88 } else { 89 return false 90 } 91 } 92 t = t[1:] 93 continue 94 } 95 // sb is ASCII and t is not. t must be either kelvin 96 // sign or long s; sb must be s, S, k, or K. 97 tr, size := utf8.DecodeRune(t) 98 switch sb { 99 case 's', 'S': 100 if tr != smallLongEss { 101 return false 102 } 103 case 'k', 'K': 104 if tr != kelvin { 105 return false 106 } 107 default: 108 return false 109 } 110 t = t[size:] 111 112 } 113 return len(t) <= 0 114 } 115 116 // asciiEqualFold is a specialization of bytes.EqualFold for use when 117 // s is all ASCII (but may contain non-letters) and contains no 118 // special-folding letters. 119 // See comments on foldFunc. 120 func asciiEqualFold(s, t []byte) bool { 121 if len(s) != len(t) { 122 return false 123 } 124 for i, sb := range s { 125 tb := t[i] 126 if sb == tb { 127 continue 128 } 129 if ('a' <= sb && sb <= 'z') || ('A' <= sb && sb <= 'Z') { 130 if sb&caseMask != tb&caseMask { 131 return false 132 } 133 } else { 134 return false 135 } 136 } 137 return true 138 } 139 140 // simpleLetterEqualFold is a specialization of bytes.EqualFold for 141 // use when s is all ASCII letters (no underscores, etc) and also 142 // doesn't contain 'k', 'K', 's', or 'S'. 143 // See comments on foldFunc. 144 func simpleLetterEqualFold(s, t []byte) bool { 145 if len(s) != len(t) { 146 return false 147 } 148 for i, b := range s { 149 if b&caseMask != t[i]&caseMask { 150 return false 151 } 152 } 153 return true 154 } 155