1 // Copyright 2022 The TCell Authors 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use file except in compliance with the License. 5 // You may obtain a copy of the license at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package tcell 16 17 import ( 18 "strings" 19 "sync" 20 21 "golang.org/x/text/encoding" 22 23 gencoding "github.com/gdamore/encoding" 24 ) 25 26 var encodings map[string]encoding.Encoding 27 var encodingLk sync.Mutex 28 var encodingFallback EncodingFallback = EncodingFallbackFail 29 30 // RegisterEncoding may be called by the application to register an encoding. 31 // The presence of additional encodings will facilitate application usage with 32 // terminal environments where the I/O subsystem does not support Unicode. 33 // 34 // Windows systems use Unicode natively, and do not need any of the encoding 35 // subsystem when using Windows Console screens. 36 // 37 // Please see the Go documentation for golang.org/x/text/encoding -- most of 38 // the common ones exist already as stock variables. For example, ISO8859-15 39 // can be registered using the following code: 40 // 41 // import "golang.org/x/text/encoding/charmap" 42 // 43 // ... 44 // RegisterEncoding("ISO8859-15", charmap.ISO8859_15) 45 // 46 // Aliases can be registered as well, for example "8859-15" could be an alias 47 // for "ISO8859-15". 48 // 49 // For POSIX systems, this package will check the environment variables 50 // LC_ALL, LC_CTYPE, and LANG (in that order) to determine the character set. 51 // These are expected to have the following pattern: 52 // 53 // $language[.$codeset[@$variant] 54 // 55 // We extract only the $codeset part, which will usually be something like 56 // UTF-8 or ISO8859-15 or KOI8-R. Note that if the locale is either "POSIX" 57 // or "C", then we assume US-ASCII (the POSIX 'portable character set' 58 // and assume all other characters are somehow invalid.) 59 // 60 // Modern POSIX systems and terminal emulators may use UTF-8, and for those 61 // systems, this API is also unnecessary. For example, Darwin (MacOS X) and 62 // modern Linux running modern xterm generally will out of the box without 63 // any of this. Use of UTF-8 is recommended when possible, as it saves 64 // quite a lot processing overhead. 65 // 66 // Note that some encodings are quite large (for example GB18030 which is a 67 // superset of Unicode) and so the application size can be expected to 68 // increase quite a bit as each encoding is added. 69 70 // The East Asian encodings have been seen to add 100-200K per encoding to the 71 // size of the resulting binary. 72 // 73 func RegisterEncoding(charset string, enc encoding.Encoding) { 74 encodingLk.Lock() 75 charset = strings.ToLower(charset) 76 encodings[charset] = enc 77 encodingLk.Unlock() 78 } 79 80 // EncodingFallback describes how the system behaves when the locale 81 // requires a character set that we do not support. The system always 82 // supports UTF-8 and US-ASCII. On Windows consoles, UTF-16LE is also 83 // supported automatically. Other character sets must be added using the 84 // RegisterEncoding API. (A large group of nearly all of them can be 85 // added using the RegisterAll function in the encoding sub package.) 86 type EncodingFallback int 87 88 const ( 89 // EncodingFallbackFail behavior causes GetEncoding to fail 90 // when it cannot find an encoding. 91 EncodingFallbackFail = iota 92 93 // EncodingFallbackASCII behavior causes GetEncoding to fall back 94 // to a 7-bit ASCII encoding, if no other encoding can be found. 95 EncodingFallbackASCII 96 97 // EncodingFallbackUTF8 behavior causes GetEncoding to assume 98 // UTF8 can pass unmodified upon failure. Note that this behavior 99 // is not recommended, unless you are sure your terminal can cope 100 // with real UTF8 sequences. 101 EncodingFallbackUTF8 102 ) 103 104 // SetEncodingFallback changes the behavior of GetEncoding when a suitable 105 // encoding is not found. The default is EncodingFallbackFail, which 106 // causes GetEncoding to simply return nil. 107 func SetEncodingFallback(fb EncodingFallback) { 108 encodingLk.Lock() 109 encodingFallback = fb 110 encodingLk.Unlock() 111 } 112 113 // GetEncoding is used by Screen implementors who want to locate an encoding 114 // for the given character set name. Note that this will return nil for 115 // either the Unicode (UTF-8) or ASCII encodings, since we don't use 116 // encodings for them but instead have our own native methods. 117 func GetEncoding(charset string) encoding.Encoding { 118 charset = strings.ToLower(charset) 119 encodingLk.Lock() 120 defer encodingLk.Unlock() 121 if enc, ok := encodings[charset]; ok { 122 return enc 123 } 124 switch encodingFallback { 125 case EncodingFallbackASCII: 126 return gencoding.ASCII 127 case EncodingFallbackUTF8: 128 return encoding.Nop 129 } 130 return nil 131 } 132 133 func init() { 134 // We always support UTF-8 and ASCII. 135 encodings = make(map[string]encoding.Encoding) 136 encodings["utf-8"] = gencoding.UTF8 137 encodings["utf8"] = gencoding.UTF8 138 encodings["us-ascii"] = gencoding.ASCII 139 encodings["ascii"] = gencoding.ASCII 140 encodings["iso646"] = gencoding.ASCII 141 } 142