...

Source file src/github.com/gdamore/tcell/v2/encoding.go

Documentation: github.com/gdamore/tcell/v2

     1  // Copyright 2022 The TCell Authors
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use file except in compliance with the License.
     5  // You may obtain a copy of the license at
     6  //
     7  //    http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package tcell
    16  
    17  import (
    18  	"strings"
    19  	"sync"
    20  
    21  	"golang.org/x/text/encoding"
    22  
    23  	gencoding "github.com/gdamore/encoding"
    24  )
    25  
    26  var encodings map[string]encoding.Encoding
    27  var encodingLk sync.Mutex
    28  var encodingFallback EncodingFallback = EncodingFallbackFail
    29  
    30  // RegisterEncoding may be called by the application to register an encoding.
    31  // The presence of additional encodings will facilitate application usage with
    32  // terminal environments where the I/O subsystem does not support Unicode.
    33  //
    34  // Windows systems use Unicode natively, and do not need any of the encoding
    35  // subsystem when using Windows Console screens.
    36  //
    37  // Please see the Go documentation for golang.org/x/text/encoding -- most of
    38  // the common ones exist already as stock variables.  For example, ISO8859-15
    39  // can be registered using the following code:
    40  //
    41  //	import "golang.org/x/text/encoding/charmap"
    42  //
    43  //	  ...
    44  //	  RegisterEncoding("ISO8859-15", charmap.ISO8859_15)
    45  //
    46  // Aliases can be registered as well, for example "8859-15" could be an alias
    47  // for "ISO8859-15".
    48  //
    49  // For POSIX systems, this package will check the environment variables
    50  // LC_ALL, LC_CTYPE,  and LANG (in that order) to determine the character set.
    51  // These are expected to have the following pattern:
    52  //
    53  //	$language[.$codeset[@$variant]
    54  //
    55  // We extract only the $codeset part, which will usually be something like
    56  // UTF-8 or ISO8859-15 or KOI8-R.  Note that if the locale is either "POSIX"
    57  // or "C", then we assume US-ASCII (the POSIX 'portable character set'
    58  // and assume all other characters are somehow invalid.)
    59  //
    60  // Modern POSIX systems and terminal emulators may use UTF-8, and for those
    61  // systems, this API is also unnecessary.  For example, Darwin (MacOS X) and
    62  // modern Linux running modern xterm generally will out of the box without
    63  // any of this.  Use of UTF-8 is recommended when possible, as it saves
    64  // quite a lot processing overhead.
    65  //
    66  // Note that some encodings are quite large (for example GB18030 which is a
    67  // superset of Unicode) and so the application size can be expected to
    68  // increase quite a bit as each encoding is added.
    69  
    70  // The East Asian encodings have been seen to add 100-200K per encoding to the
    71  // size of the resulting binary.
    72  //
    73  func RegisterEncoding(charset string, enc encoding.Encoding) {
    74  	encodingLk.Lock()
    75  	charset = strings.ToLower(charset)
    76  	encodings[charset] = enc
    77  	encodingLk.Unlock()
    78  }
    79  
    80  // EncodingFallback describes how the system behaves when the locale
    81  // requires a character set that we do not support.  The system always
    82  // supports UTF-8 and US-ASCII. On Windows consoles, UTF-16LE is also
    83  // supported automatically.  Other character sets must be added using the
    84  // RegisterEncoding API.  (A large group of nearly all of them can be
    85  // added using the RegisterAll function in the encoding sub package.)
    86  type EncodingFallback int
    87  
    88  const (
    89  	// EncodingFallbackFail behavior causes GetEncoding to fail
    90  	// when it cannot find an encoding.
    91  	EncodingFallbackFail = iota
    92  
    93  	// EncodingFallbackASCII behavior causes GetEncoding to fall back
    94  	// to a 7-bit ASCII encoding, if no other encoding can be found.
    95  	EncodingFallbackASCII
    96  
    97  	// EncodingFallbackUTF8 behavior causes GetEncoding to assume
    98  	// UTF8 can pass unmodified upon failure.  Note that this behavior
    99  	// is not recommended, unless you are sure your terminal can cope
   100  	// with real UTF8 sequences.
   101  	EncodingFallbackUTF8
   102  )
   103  
   104  // SetEncodingFallback changes the behavior of GetEncoding when a suitable
   105  // encoding is not found.  The default is EncodingFallbackFail, which
   106  // causes GetEncoding to simply return nil.
   107  func SetEncodingFallback(fb EncodingFallback) {
   108  	encodingLk.Lock()
   109  	encodingFallback = fb
   110  	encodingLk.Unlock()
   111  }
   112  
   113  // GetEncoding is used by Screen implementors who want to locate an encoding
   114  // for the given character set name.  Note that this will return nil for
   115  // either the Unicode (UTF-8) or ASCII encodings, since we don't use
   116  // encodings for them but instead have our own native methods.
   117  func GetEncoding(charset string) encoding.Encoding {
   118  	charset = strings.ToLower(charset)
   119  	encodingLk.Lock()
   120  	defer encodingLk.Unlock()
   121  	if enc, ok := encodings[charset]; ok {
   122  		return enc
   123  	}
   124  	switch encodingFallback {
   125  	case EncodingFallbackASCII:
   126  		return gencoding.ASCII
   127  	case EncodingFallbackUTF8:
   128  		return encoding.Nop
   129  	}
   130  	return nil
   131  }
   132  
   133  func init() {
   134  	// We always support UTF-8 and ASCII.
   135  	encodings = make(map[string]encoding.Encoding)
   136  	encodings["utf-8"] = gencoding.UTF8
   137  	encodings["utf8"] = gencoding.UTF8
   138  	encodings["us-ascii"] = gencoding.ASCII
   139  	encodings["ascii"] = gencoding.ASCII
   140  	encodings["iso646"] = gencoding.ASCII
   141  }
   142  

View as plain text