1#! /usr/bin/env python
2# From CPython (Lib/base64.py)
3
4"""RFC 3548: Base16, Base32, Base64 Data Encodings"""
5
6# Modified 04-Oct-1995 by Jack Jansen to use binascii module
7# Modified 30-Dec-2003 by Barry Warsaw to add full RFC 3548 support
8
9import re
10import struct
11import string
12import binascii
13
14
15__all__ = [
16 # Legacy interface exports traditional RFC 1521 Base64 encodings
17 'encode', 'decode', 'encodestring', 'decodestring',
18 # Generalized interface for other encodings
19 'b64encode', 'b64decode', 'b32encode', 'b32decode',
20 'b16encode', 'b16decode',
21 # Standard Base64 encoding
22 'standard_b64encode', 'standard_b64decode',
23 # Some common Base64 alternatives. As referenced by RFC 3458, see thread
24 # starting at:
25 #
26 # http://zgp.org/pipermail/p2p-hackers/2001-September/000316.html
27 'urlsafe_b64encode', 'urlsafe_b64decode',
28 ]
29
30_translation = [chr(_x) for _x in range(256)]
31EMPTYSTRING = ''
32
33
34def _translate(s, altchars):
35 translation = _translation[:]
36 for k, v in altchars.items():
37 translation[ord(k)] = v
38 return s.translate(''.join(translation))
39
40
41
42# Base64 encoding/decoding uses binascii
43
44def b64encode(s, altchars=None):
45 """Encode a string using Base64.
46
47 s is the string to encode. Optional altchars must be a string of at least
48 length 2 (additional characters are ignored) which specifies an
49 alternative alphabet for the '+' and '/' characters. This allows an
50 application to e.g. generate url or filesystem safe Base64 strings.
51
52 The encoded string is returned.
53 """
54 # Strip off the trailing newline
55 encoded = binascii.b2a_base64(s)[:-1]
56 if altchars is not None:
57 return encoded.translate(string.maketrans(b'+/', altchars[:2]))
58 return encoded
59
60
61def b64decode(s, altchars=None):
62 """Decode a Base64 encoded string.
63
64 s is the string to decode. Optional altchars must be a string of at least
65 length 2 (additional characters are ignored) which specifies the
66 alternative alphabet used instead of the '+' and '/' characters.
67
68 The decoded string is returned. A TypeError is raised if s is
69 incorrectly padded. Characters that are neither in the normal base-64
70 alphabet nor the alternative alphabet are discarded prior to the padding
71 check.
72 """
73 if altchars is not None:
74 s = s.translate(string.maketrans(altchars[:2], '+/'))
75 try:
76 return binascii.a2b_base64(s)
77 except binascii.Error, msg:
78 # Transform this exception for consistency
79 raise TypeError(msg)
80
81
82def standard_b64encode(s):
83 """Encode a string using the standard Base64 alphabet.
84
85 s is the string to encode. The encoded string is returned.
86 """
87 return b64encode(s)
88
89def standard_b64decode(s):
90 """Decode a string encoded with the standard Base64 alphabet.
91
92 Argument s is the string to decode. The decoded string is returned. A
93 TypeError is raised if the string is incorrectly padded. Characters that
94 are not in the standard alphabet are discarded prior to the padding
95 check.
96 """
97 return b64decode(s)
98
99_urlsafe_encode_translation = string.maketrans(b'+/', b'-_')
100_urlsafe_decode_translation = string.maketrans(b'-_', b'+/')
101
102def urlsafe_b64encode(s):
103 """Encode a string using the URL- and filesystem-safe Base64 alphabet.
104
105 Argument s is the string to encode. The encoded string is returned. The
106 alphabet uses '-' instead of '+' and '_' instead of '/'.
107 """
108 return b64encode(s).translate(_urlsafe_encode_translation)
109
110def urlsafe_b64decode(s):
111 """Decode a string using the URL- and filesystem-safe Base64 alphabet.
112
113 Argument s is the string to decode. The decoded string is returned. A
114 TypeError is raised if the string is incorrectly padded. Characters that
115 are not in the URL-safe base-64 alphabet, and are not a plus '+' or slash
116 '/', are discarded prior to the padding check.
117
118 The alphabet uses '-' instead of '+' and '_' instead of '/'.
119 """
120 return b64decode(s.translate(_urlsafe_decode_translation))
121
122
123
124# Base32 encoding/decoding must be done in Python
125_b32alphabet = {
126 0: 'A', 9: 'J', 18: 'S', 27: '3',
127 1: 'B', 10: 'K', 19: 'T', 28: '4',
128 2: 'C', 11: 'L', 20: 'U', 29: '5',
129 3: 'D', 12: 'M', 21: 'V', 30: '6',
130 4: 'E', 13: 'N', 22: 'W', 31: '7',
131 5: 'F', 14: 'O', 23: 'X',
132 6: 'G', 15: 'P', 24: 'Y',
133 7: 'H', 16: 'Q', 25: 'Z',
134 8: 'I', 17: 'R', 26: '2',
135 }
136
137_b32tab = _b32alphabet.items()
138_b32tab.sort()
139_b32tab = [v for k, v in _b32tab]
140_b32rev = dict([(v, long(k)) for k, v in _b32alphabet.items()])
141
142
143def b32encode(s):
144 """Encode a string using Base32.
145
146 s is the string to encode. The encoded string is returned.
147 """
148 parts = []
149 quanta, leftover = divmod(len(s), 5)
150 # Pad the last quantum with zero bits if necessary
151 if leftover:
152 s += ('\0' * (5 - leftover))
153 quanta += 1
154 for i in range(quanta):
155 # c1 and c2 are 16 bits wide, c3 is 8 bits wide. The intent of this
156 # code is to process the 40 bits in units of 5 bits. So we take the 1
157 # leftover bit of c1 and tack it onto c2. Then we take the 2 leftover
158 # bits of c2 and tack them onto c3. The shifts and masks are intended
159 # to give us values of exactly 5 bits in width.
160 c1, c2, c3 = struct.unpack('!HHB', s[i*5:(i+1)*5])
161 c2 += (c1 & 1) << 16 # 17 bits wide
162 c3 += (c2 & 3) << 8 # 10 bits wide
163 parts.extend([_b32tab[c1 >> 11], # bits 1 - 5
164 _b32tab[(c1 >> 6) & 0x1f], # bits 6 - 10
165 _b32tab[(c1 >> 1) & 0x1f], # bits 11 - 15
166 _b32tab[c2 >> 12], # bits 16 - 20 (1 - 5)
167 _b32tab[(c2 >> 7) & 0x1f], # bits 21 - 25 (6 - 10)
168 _b32tab[(c2 >> 2) & 0x1f], # bits 26 - 30 (11 - 15)
169 _b32tab[c3 >> 5], # bits 31 - 35 (1 - 5)
170 _b32tab[c3 & 0x1f], # bits 36 - 40 (1 - 5)
171 ])
172 encoded = EMPTYSTRING.join(parts)
173 # Adjust for any leftover partial quanta
174 if leftover == 1:
175 return encoded[:-6] + '======'
176 elif leftover == 2:
177 return encoded[:-4] + '===='
178 elif leftover == 3:
179 return encoded[:-3] + '==='
180 elif leftover == 4:
181 return encoded[:-1] + '='
182 return encoded
183
184
185def b32decode(s, casefold=False, map01=None):
186 """Decode a Base32 encoded string.
187
188 s is the string to decode. Optional casefold is a flag specifying whether
189 a lowercase alphabet is acceptable as input. For security purposes, the
190 default is False.
191
192 RFC 3548 allows for optional mapping of the digit 0 (zero) to the letter O
193 (oh), and for optional mapping of the digit 1 (one) to either the letter I
194 (eye) or letter L (el). The optional argument map01 when not None,
195 specifies which letter the digit 1 should be mapped to (when map01 is not
196 None, the digit 0 is always mapped to the letter O). For security
197 purposes the default is None, so that 0 and 1 are not allowed in the
198 input.
199
200 The decoded string is returned. A TypeError is raised if s were
201 incorrectly padded or if there are non-alphabet characters present in the
202 string.
203 """
204 quanta, leftover = divmod(len(s), 8)
205 if leftover:
206 raise TypeError('Incorrect padding')
207 # Handle section 2.4 zero and one mapping. The flag map01 will be either
208 # False, or the character to map the digit 1 (one) to. It should be
209 # either L (el) or I (eye).
210 if map01:
211 s = s.translate(string.maketrans(b'01', b'O' + map01))
212 if casefold:
213 s = s.upper()
214 # Strip off pad characters from the right. We need to count the pad
215 # characters because this will tell us how many null bytes to remove from
216 # the end of the decoded string.
217 padchars = 0
218 mo = re.search('(?P<pad>[=]*)$', s)
219 if mo:
220 padchars = len(mo.group('pad'))
221 if padchars > 0:
222 s = s[:-padchars]
223 # Now decode the full quanta
224 parts = []
225 acc = 0
226 shift = 35
227 for c in s:
228 val = _b32rev.get(c)
229 if val is None:
230 raise TypeError('Non-base32 digit found')
231 acc += _b32rev[c] << shift
232 shift -= 5
233 if shift < 0:
234 parts.append(binascii.unhexlify('%010x' % acc))
235 acc = 0
236 shift = 35
237 # Process the last, partial quanta
238 last = binascii.unhexlify('%010x' % acc)
239 if padchars == 0:
240 last = '' # No characters
241 elif padchars == 1:
242 last = last[:-1]
243 elif padchars == 3:
244 last = last[:-2]
245 elif padchars == 4:
246 last = last[:-3]
247 elif padchars == 6:
248 last = last[:-4]
249 else:
250 raise TypeError('Incorrect padding')
251 parts.append(last)
252 return EMPTYSTRING.join(parts)
253
254
255
256# RFC 3548, Base 16 Alphabet specifies uppercase, but hexlify() returns
257# lowercase. The RFC also recommends against accepting input case
258# insensitively.
259def b16encode(s):
260 """Encode a string using Base16.
261
262 s is the string to encode. The encoded string is returned.
263 """
264 return binascii.hexlify(s).upper()
265
266
267def b16decode(s, casefold=False):
268 """Decode a Base16 encoded string.
269
270 s is the string to decode. Optional casefold is a flag specifying whether
271 a lowercase alphabet is acceptable as input. For security purposes, the
272 default is False.
273
274 The decoded string is returned. A TypeError is raised if s is
275 incorrectly padded or if there are non-alphabet characters present in the
276 string.
277 """
278 if casefold:
279 s = s.upper()
280 if re.search('[^0-9A-F]', s):
281 raise TypeError('Non-base16 digit found')
282 return binascii.unhexlify(s)
283
284
285
286# Legacy interface. This code could be cleaned up since I don't believe
287# binascii has any line length limitations. It just doesn't seem worth it
288# though.
289
290MAXLINESIZE = 76 # Excluding the CRLF
291MAXBINSIZE = (MAXLINESIZE//4)*3
292
293def encode(input, output):
294 """Encode a file."""
295 while True:
296 s = input.read(MAXBINSIZE)
297 if not s:
298 break
299 while len(s) < MAXBINSIZE:
300 ns = input.read(MAXBINSIZE-len(s))
301 if not ns:
302 break
303 s += ns
304 line = binascii.b2a_base64(s)
305 output.write(line)
306
307
308def decode(input, output):
309 """Decode a file."""
310 while True:
311 line = input.readline()
312 if not line:
313 break
314 s = binascii.a2b_base64(line)
315 output.write(s)
316
317
318def encodestring(s):
319 """Encode a string into multiple lines of base-64 data."""
320 pieces = []
321 for i in range(0, len(s), MAXBINSIZE):
322 chunk = s[i : i + MAXBINSIZE]
323 pieces.append(binascii.b2a_base64(chunk))
324 return "".join(pieces)
325
326
327def decodestring(s):
328 """Decode a string."""
329 return binascii.a2b_base64(s)
330
331
332
333# Useable as a script...
334def test():
335 """Small test program"""
336 import sys, getopt
337 try:
338 opts, args = getopt.getopt(sys.argv[1:], 'deut')
339 except getopt.error, msg:
340 sys.stdout = sys.stderr
341 print msg
342 print """usage: %s [-d|-e|-u|-t] [file|-]
343 -d, -u: decode
344 -e: encode (default)
345 -t: encode and decode string 'Aladdin:open sesame'"""%sys.argv[0]
346 sys.exit(2)
347 func = encode
348 for o, a in opts:
349 if o == '-e': func = encode
350 if o == '-d': func = decode
351 if o == '-u': func = decode
352 if o == '-t': test1(); return
353 if args and args[0] != '-':
354 with open(args[0], 'rb') as f:
355 func(f, sys.stdout)
356 else:
357 func(sys.stdin, sys.stdout)
358
359
360def test1():
361 s0 = "Aladdin:open sesame"
362 s1 = encodestring(s0)
363 s2 = decodestring(s1)
364 print s0, repr(s1), s2
365
366
367if __name__ == '__main__':
368 test()
View as plain text