...

Text file src/github.com/alecthomas/chroma/v2/lexers/testdata/python2/test_complex_file1.actual

Documentation: github.com/alecthomas/chroma/v2/lexers/testdata/python2

     1#! /usr/bin/env python
     2# From CPython (Lib/base64.py)
     3
     4"""RFC 3548: Base16, Base32, Base64 Data Encodings"""
     5
     6# Modified 04-Oct-1995 by Jack Jansen to use binascii module
     7# Modified 30-Dec-2003 by Barry Warsaw to add full RFC 3548 support
     8
     9import re
    10import struct
    11import string
    12import binascii
    13
    14
    15__all__ = [
    16    # Legacy interface exports traditional RFC 1521 Base64 encodings
    17    'encode', 'decode', 'encodestring', 'decodestring',
    18    # Generalized interface for other encodings
    19    'b64encode', 'b64decode', 'b32encode', 'b32decode',
    20    'b16encode', 'b16decode',
    21    # Standard Base64 encoding
    22    'standard_b64encode', 'standard_b64decode',
    23    # Some common Base64 alternatives.  As referenced by RFC 3458, see thread
    24    # starting at:
    25    #
    26    # http://zgp.org/pipermail/p2p-hackers/2001-September/000316.html
    27    'urlsafe_b64encode', 'urlsafe_b64decode',
    28    ]
    29
    30_translation = [chr(_x) for _x in range(256)]
    31EMPTYSTRING = ''
    32
    33
    34def _translate(s, altchars):
    35    translation = _translation[:]
    36    for k, v in altchars.items():
    37        translation[ord(k)] = v
    38    return s.translate(''.join(translation))
    39
    40
    41
    42# Base64 encoding/decoding uses binascii
    43
    44def b64encode(s, altchars=None):
    45    """Encode a string using Base64.
    46
    47    s is the string to encode.  Optional altchars must be a string of at least
    48    length 2 (additional characters are ignored) which specifies an
    49    alternative alphabet for the '+' and '/' characters.  This allows an
    50    application to e.g. generate url or filesystem safe Base64 strings.
    51
    52    The encoded string is returned.
    53    """
    54    # Strip off the trailing newline
    55    encoded = binascii.b2a_base64(s)[:-1]
    56    if altchars is not None:
    57        return encoded.translate(string.maketrans(b'+/', altchars[:2]))
    58    return encoded
    59
    60
    61def b64decode(s, altchars=None):
    62    """Decode a Base64 encoded string.
    63
    64    s is the string to decode.  Optional altchars must be a string of at least
    65    length 2 (additional characters are ignored) which specifies the
    66    alternative alphabet used instead of the '+' and '/' characters.
    67
    68    The decoded string is returned.  A TypeError is raised if s is
    69    incorrectly padded.  Characters that are neither in the normal base-64
    70    alphabet nor the alternative alphabet are discarded prior to the padding
    71    check.
    72    """
    73    if altchars is not None:
    74        s = s.translate(string.maketrans(altchars[:2], '+/'))
    75    try:
    76        return binascii.a2b_base64(s)
    77    except binascii.Error, msg:
    78        # Transform this exception for consistency
    79        raise TypeError(msg)
    80
    81
    82def standard_b64encode(s):
    83    """Encode a string using the standard Base64 alphabet.
    84
    85    s is the string to encode.  The encoded string is returned.
    86    """
    87    return b64encode(s)
    88
    89def standard_b64decode(s):
    90    """Decode a string encoded with the standard Base64 alphabet.
    91
    92    Argument s is the string to decode.  The decoded string is returned.  A
    93    TypeError is raised if the string is incorrectly padded.  Characters that
    94    are not in the standard alphabet are discarded prior to the padding
    95    check.
    96    """
    97    return b64decode(s)
    98
    99_urlsafe_encode_translation = string.maketrans(b'+/', b'-_')
   100_urlsafe_decode_translation = string.maketrans(b'-_', b'+/')
   101
   102def urlsafe_b64encode(s):
   103    """Encode a string using the URL- and filesystem-safe Base64 alphabet.
   104
   105    Argument s is the string to encode.  The encoded string is returned.  The
   106    alphabet uses '-' instead of '+' and '_' instead of '/'.
   107    """
   108    return b64encode(s).translate(_urlsafe_encode_translation)
   109
   110def urlsafe_b64decode(s):
   111    """Decode a string using the URL- and filesystem-safe Base64 alphabet.
   112
   113    Argument s is the string to decode.  The decoded string is returned.  A
   114    TypeError is raised if the string is incorrectly padded.  Characters that
   115    are not in the URL-safe base-64 alphabet, and are not a plus '+' or slash
   116    '/', are discarded prior to the padding check.
   117
   118    The alphabet uses '-' instead of '+' and '_' instead of '/'.
   119    """
   120    return b64decode(s.translate(_urlsafe_decode_translation))
   121
   122
   123
   124# Base32 encoding/decoding must be done in Python
   125_b32alphabet = {
   126    0: 'A',  9: 'J', 18: 'S', 27: '3',
   127    1: 'B', 10: 'K', 19: 'T', 28: '4',
   128    2: 'C', 11: 'L', 20: 'U', 29: '5',
   129    3: 'D', 12: 'M', 21: 'V', 30: '6',
   130    4: 'E', 13: 'N', 22: 'W', 31: '7',
   131    5: 'F', 14: 'O', 23: 'X',
   132    6: 'G', 15: 'P', 24: 'Y',
   133    7: 'H', 16: 'Q', 25: 'Z',
   134    8: 'I', 17: 'R', 26: '2',
   135    }
   136
   137_b32tab = _b32alphabet.items()
   138_b32tab.sort()
   139_b32tab = [v for k, v in _b32tab]
   140_b32rev = dict([(v, long(k)) for k, v in _b32alphabet.items()])
   141
   142
   143def b32encode(s):
   144    """Encode a string using Base32.
   145
   146    s is the string to encode.  The encoded string is returned.
   147    """
   148    parts = []
   149    quanta, leftover = divmod(len(s), 5)
   150    # Pad the last quantum with zero bits if necessary
   151    if leftover:
   152        s += ('\0' * (5 - leftover))
   153        quanta += 1
   154    for i in range(quanta):
   155        # c1 and c2 are 16 bits wide, c3 is 8 bits wide.  The intent of this
   156        # code is to process the 40 bits in units of 5 bits.  So we take the 1
   157        # leftover bit of c1 and tack it onto c2.  Then we take the 2 leftover
   158        # bits of c2 and tack them onto c3.  The shifts and masks are intended
   159        # to give us values of exactly 5 bits in width.
   160        c1, c2, c3 = struct.unpack('!HHB', s[i*5:(i+1)*5])
   161        c2 += (c1 & 1) << 16 # 17 bits wide
   162        c3 += (c2 & 3) << 8  # 10 bits wide
   163        parts.extend([_b32tab[c1 >> 11],         # bits 1 - 5
   164                      _b32tab[(c1 >> 6) & 0x1f], # bits 6 - 10
   165                      _b32tab[(c1 >> 1) & 0x1f], # bits 11 - 15
   166                      _b32tab[c2 >> 12],         # bits 16 - 20 (1 - 5)
   167                      _b32tab[(c2 >> 7) & 0x1f], # bits 21 - 25 (6 - 10)
   168                      _b32tab[(c2 >> 2) & 0x1f], # bits 26 - 30 (11 - 15)
   169                      _b32tab[c3 >> 5],          # bits 31 - 35 (1 - 5)
   170                      _b32tab[c3 & 0x1f],        # bits 36 - 40 (1 - 5)
   171                      ])
   172    encoded = EMPTYSTRING.join(parts)
   173    # Adjust for any leftover partial quanta
   174    if leftover == 1:
   175        return encoded[:-6] + '======'
   176    elif leftover == 2:
   177        return encoded[:-4] + '===='
   178    elif leftover == 3:
   179        return encoded[:-3] + '==='
   180    elif leftover == 4:
   181        return encoded[:-1] + '='
   182    return encoded
   183
   184
   185def b32decode(s, casefold=False, map01=None):
   186    """Decode a Base32 encoded string.
   187
   188    s is the string to decode.  Optional casefold is a flag specifying whether
   189    a lowercase alphabet is acceptable as input.  For security purposes, the
   190    default is False.
   191
   192    RFC 3548 allows for optional mapping of the digit 0 (zero) to the letter O
   193    (oh), and for optional mapping of the digit 1 (one) to either the letter I
   194    (eye) or letter L (el).  The optional argument map01 when not None,
   195    specifies which letter the digit 1 should be mapped to (when map01 is not
   196    None, the digit 0 is always mapped to the letter O).  For security
   197    purposes the default is None, so that 0 and 1 are not allowed in the
   198    input.
   199
   200    The decoded string is returned.  A TypeError is raised if s were
   201    incorrectly padded or if there are non-alphabet characters present in the
   202    string.
   203    """
   204    quanta, leftover = divmod(len(s), 8)
   205    if leftover:
   206        raise TypeError('Incorrect padding')
   207    # Handle section 2.4 zero and one mapping.  The flag map01 will be either
   208    # False, or the character to map the digit 1 (one) to.  It should be
   209    # either L (el) or I (eye).
   210    if map01:
   211        s = s.translate(string.maketrans(b'01', b'O' + map01))
   212    if casefold:
   213        s = s.upper()
   214    # Strip off pad characters from the right.  We need to count the pad
   215    # characters because this will tell us how many null bytes to remove from
   216    # the end of the decoded string.
   217    padchars = 0
   218    mo = re.search('(?P<pad>[=]*)$', s)
   219    if mo:
   220        padchars = len(mo.group('pad'))
   221        if padchars > 0:
   222            s = s[:-padchars]
   223    # Now decode the full quanta
   224    parts = []
   225    acc = 0
   226    shift = 35
   227    for c in s:
   228        val = _b32rev.get(c)
   229        if val is None:
   230            raise TypeError('Non-base32 digit found')
   231        acc += _b32rev[c] << shift
   232        shift -= 5
   233        if shift < 0:
   234            parts.append(binascii.unhexlify('%010x' % acc))
   235            acc = 0
   236            shift = 35
   237    # Process the last, partial quanta
   238    last = binascii.unhexlify('%010x' % acc)
   239    if padchars == 0:
   240        last = ''                       # No characters
   241    elif padchars == 1:
   242        last = last[:-1]
   243    elif padchars == 3:
   244        last = last[:-2]
   245    elif padchars == 4:
   246        last = last[:-3]
   247    elif padchars == 6:
   248        last = last[:-4]
   249    else:
   250        raise TypeError('Incorrect padding')
   251    parts.append(last)
   252    return EMPTYSTRING.join(parts)
   253
   254
   255
   256# RFC 3548, Base 16 Alphabet specifies uppercase, but hexlify() returns
   257# lowercase.  The RFC also recommends against accepting input case
   258# insensitively.
   259def b16encode(s):
   260    """Encode a string using Base16.
   261
   262    s is the string to encode.  The encoded string is returned.
   263    """
   264    return binascii.hexlify(s).upper()
   265
   266
   267def b16decode(s, casefold=False):
   268    """Decode a Base16 encoded string.
   269
   270    s is the string to decode.  Optional casefold is a flag specifying whether
   271    a lowercase alphabet is acceptable as input.  For security purposes, the
   272    default is False.
   273
   274    The decoded string is returned.  A TypeError is raised if s is
   275    incorrectly padded or if there are non-alphabet characters present in the
   276    string.
   277    """
   278    if casefold:
   279        s = s.upper()
   280    if re.search('[^0-9A-F]', s):
   281        raise TypeError('Non-base16 digit found')
   282    return binascii.unhexlify(s)
   283
   284
   285
   286# Legacy interface.  This code could be cleaned up since I don't believe
   287# binascii has any line length limitations.  It just doesn't seem worth it
   288# though.
   289
   290MAXLINESIZE = 76 # Excluding the CRLF
   291MAXBINSIZE = (MAXLINESIZE//4)*3
   292
   293def encode(input, output):
   294    """Encode a file."""
   295    while True:
   296        s = input.read(MAXBINSIZE)
   297        if not s:
   298            break
   299        while len(s) < MAXBINSIZE:
   300            ns = input.read(MAXBINSIZE-len(s))
   301            if not ns:
   302                break
   303            s += ns
   304        line = binascii.b2a_base64(s)
   305        output.write(line)
   306
   307
   308def decode(input, output):
   309    """Decode a file."""
   310    while True:
   311        line = input.readline()
   312        if not line:
   313            break
   314        s = binascii.a2b_base64(line)
   315        output.write(s)
   316
   317
   318def encodestring(s):
   319    """Encode a string into multiple lines of base-64 data."""
   320    pieces = []
   321    for i in range(0, len(s), MAXBINSIZE):
   322        chunk = s[i : i + MAXBINSIZE]
   323        pieces.append(binascii.b2a_base64(chunk))
   324    return "".join(pieces)
   325
   326
   327def decodestring(s):
   328    """Decode a string."""
   329    return binascii.a2b_base64(s)
   330
   331
   332
   333# Useable as a script...
   334def test():
   335    """Small test program"""
   336    import sys, getopt
   337    try:
   338        opts, args = getopt.getopt(sys.argv[1:], 'deut')
   339    except getopt.error, msg:
   340        sys.stdout = sys.stderr
   341        print msg
   342        print """usage: %s [-d|-e|-u|-t] [file|-]
   343        -d, -u: decode
   344        -e: encode (default)
   345        -t: encode and decode string 'Aladdin:open sesame'"""%sys.argv[0]
   346        sys.exit(2)
   347    func = encode
   348    for o, a in opts:
   349        if o == '-e': func = encode
   350        if o == '-d': func = decode
   351        if o == '-u': func = decode
   352        if o == '-t': test1(); return
   353    if args and args[0] != '-':
   354        with open(args[0], 'rb') as f:
   355            func(f, sys.stdout)
   356    else:
   357        func(sys.stdin, sys.stdout)
   358
   359
   360def test1():
   361    s0 = "Aladdin:open sesame"
   362    s1 = encodestring(s0)
   363    s2 = decodestring(s1)
   364    print s0, repr(s1), s2
   365
   366
   367if __name__ == '__main__':
   368    test()

View as plain text