Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

bpo-31116: Add Z85 variant to base64 #30598

Merged
merged 11 commits into from
Feb 25, 2024
18 changes: 18 additions & 0 deletions Doc/library/base64.rst
Original file line number Diff line number Diff line change
Expand Up @@ -244,6 +244,24 @@ The modern interface provides:
.. versionadded:: 3.4


.. function:: z85encode(s)

Encode the :term:`bytes-like object` *s* using Z85 (as used in ZeroMQ)
and return the encoded :class:`bytes`. See `Z85 specification
<https://rfc.zeromq.org/spec/32/>`_ for more information.

.. versionadded:: 3.13


.. function:: z85decode(s)

Decode the Z85-encoded :term:`bytes-like object` or ASCII string *s* and
return the decoded :class:`bytes`. See `Z85 specification
<https://rfc.zeromq.org/spec/32/>`_ for more information.

.. versionadded:: 3.13


The legacy interface:

.. function:: decode(input, output)
Expand Down
8 changes: 8 additions & 0 deletions Doc/whatsnew/3.13.rst
Original file line number Diff line number Diff line change
Expand Up @@ -224,6 +224,14 @@ asyncio
the buffer size.
(Contributed by Jamie Phan in :gh:`115199`.)

base64
---

* Add :func:`base64.z85encode` and :func:`base64.z85decode` functions which allow encoding
and decoding z85 data.
See `Z85 specification <https://rfc.zeromq.org/spec/32/>`_ for more information.
(Contributed by Matan Perelman in :gh:`75299`.)

copy
----

Expand Down
29 changes: 28 additions & 1 deletion Lib/base64.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
'b64encode', 'b64decode', 'b32encode', 'b32decode',
'b32hexencode', 'b32hexdecode', 'b16encode', 'b16decode',
# Base85 and Ascii85 encodings
'b85encode', 'b85decode', 'a85encode', 'a85decode',
'b85encode', 'b85decode', 'a85encode', 'a85decode', 'z85encode', 'z85decode',
# Standard Base64 encoding
'standard_b64encode', 'standard_b64decode',
# Some common Base64 alternatives. As referenced by RFC 3458, see thread
Expand Down Expand Up @@ -497,6 +497,33 @@ def b85decode(b):
result = result[:-padding]
return result

_z85alphabet = (b'0123456789abcdefghijklmnopqrstuvwxyz'
b'ABCDEFGHIJKLMNOPQRSTUVWXYZ.-:+=^!/*?&<>()[]{}@%$#')
# Translating b85 valid but z85 invalid chars to b'\x00' is required
# to prevent them from being decoded as b85 valid chars.
_z85_b85_decode_diff = b';_`|~'
_z85_decode_translation = bytes.maketrans(
_z85alphabet + _z85_b85_decode_diff,
_b85alphabet + b'\x00' * len(_z85_b85_decode_diff)
)
_z85_encode_translation = bytes.maketrans(_b85alphabet, _z85alphabet)

def z85encode(s):
"""Encode bytes-like object b in z85 format and return a bytes object."""
return b85encode(s).translate(_z85_encode_translation)

def z85decode(s):
"""Decode the z85-encoded bytes-like object or ASCII string b

The result is returned as a bytes object.
"""
s = _bytes_from_decode_data(s)
s = s.translate(_z85_decode_translation)
try:
return b85decode(s)
except ValueError as e:
raise ValueError(e.args[0].replace('base85', 'z85')) from None

# Legacy interface. This code could be cleaned up since I don't believe
# binascii has any line length limitations. It just doesn't seem worth it
# though. The files should be opened in binary mode.
Expand Down
87 changes: 86 additions & 1 deletion Lib/test/test_base64.py
Original file line number Diff line number Diff line change
Expand Up @@ -545,6 +545,40 @@ def test_b85encode(self):
self.check_other_types(base64.b85encode, b"www.python.org",
b'cXxL#aCvlSZ*DGca%T')

def test_z85encode(self):
eq = self.assertEqual

tests = {
b'': b'',
b'www.python.org': b'CxXl-AcVLsz/dgCA+t',
bytes(range(255)): b"""009c61o!#m2NH?C3>iWS5d]J*6CRx17-skh9337x"""
b"""ar.{NbQB=+c[cR@eg&FcfFLssg=mfIi5%2YjuU>)kTv.7l}6Nnnj=AD"""
b"""oIFnTp/ga?r8($2sxO*itWpVyu$0IOwmYv=xLzi%y&a6dAb/]tBAI+J"""
b"""CZjQZE0{D[FpSr8GOteoH(41EJe-<UKDCY&L:dM3N3<zjOsMmzPRn9P"""
b"""Q[%@^ShV!$TGwUeU^7HuW6^uKXvGh.YUh4]Z})[9-kP:p:JqPF+*1CV"""
b"""^9Zp<!yAd4/Xb0k*$*&A&nJXQ<MkK!>&}x#)cTlf[Bu8v].4}L}1:^-"""
b"""@qDP""",
b"""abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"""
b"""0123456789!@#0^&*();:<>,. []{}""":
b"""vpA.SwObN*x>?B1zeKohADlbxB-}$ND3R+ylQTvjm[uizoh55PpF:[^"""
b"""q=D:$s6eQefFLssg=mfIi5@cEbqrBJdKV-ciY]OSe*aw7DWL""",
b'no padding..': b'zF{UpvpS[.zF7NO',
b'zero compression\x00\x00\x00\x00': b'Ds.bnay/tbAb]JhB7]Mg00000',
b'zero compression\x00\x00\x00': b'Ds.bnay/tbAb]JhB7]Mg0000',
b"""Boundary:\x00\x00\x00\x00""": b"""lt}0:wmoI7iSGcW00""",
b'Space compr: ': b'q/DePwGUG3ze:IRarR^H',
b'\xff': b'@@',
b'\xff'*2: b'%nJ',
b'\xff'*3: b'%nS9',
b'\xff'*4: b'%nSc0',
}

for data, res in tests.items():
eq(base64.z85encode(data), res)

self.check_other_types(base64.z85encode, b"www.python.org",
b'CxXl-AcVLsz/dgCA+t')

def test_a85decode(self):
eq = self.assertEqual

Expand Down Expand Up @@ -626,6 +660,41 @@ def test_b85decode(self):
self.check_other_types(base64.b85decode, b'cXxL#aCvlSZ*DGca%T',
b"www.python.org")

def test_z85decode(self):
eq = self.assertEqual

tests = {
b'': b'',
b'CxXl-AcVLsz/dgCA+t': b'www.python.org',
b"""009c61o!#m2NH?C3>iWS5d]J*6CRx17-skh9337x"""
b"""ar.{NbQB=+c[cR@eg&FcfFLssg=mfIi5%2YjuU>)kTv.7l}6Nnnj=AD"""
b"""oIFnTp/ga?r8($2sxO*itWpVyu$0IOwmYv=xLzi%y&a6dAb/]tBAI+J"""
b"""CZjQZE0{D[FpSr8GOteoH(41EJe-<UKDCY&L:dM3N3<zjOsMmzPRn9P"""
b"""Q[%@^ShV!$TGwUeU^7HuW6^uKXvGh.YUh4]Z})[9-kP:p:JqPF+*1CV"""
b"""^9Zp<!yAd4/Xb0k*$*&A&nJXQ<MkK!>&}x#)cTlf[Bu8v].4}L}1:^-"""
b"""@qDP""": bytes(range(255)),
b"""vpA.SwObN*x>?B1zeKohADlbxB-}$ND3R+ylQTvjm[uizoh55PpF:[^"""
b"""q=D:$s6eQefFLssg=mfIi5@cEbqrBJdKV-ciY]OSe*aw7DWL""":
b"""abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"""
b"""0123456789!@#0^&*();:<>,. []{}""",
b'zF{UpvpS[.zF7NO': b'no padding..',
b'Ds.bnay/tbAb]JhB7]Mg00000': b'zero compression\x00\x00\x00\x00',
b'Ds.bnay/tbAb]JhB7]Mg0000': b'zero compression\x00\x00\x00',
b"""lt}0:wmoI7iSGcW00""": b"""Boundary:\x00\x00\x00\x00""",
b'q/DePwGUG3ze:IRarR^H': b'Space compr: ',
b'@@': b'\xff',
b'%nJ': b'\xff'*2,
b'%nS9': b'\xff'*3,
b'%nSc0': b'\xff'*4,
}

for data, res in tests.items():
eq(base64.z85decode(data), res)
eq(base64.z85decode(data.decode("ascii")), res)

self.check_other_types(base64.z85decode, b'CxXl-AcVLsz/dgCA+t',
b'www.python.org')

def test_a85_padding(self):
eq = self.assertEqual

Expand Down Expand Up @@ -707,14 +776,30 @@ def test_b85decode_errors(self):
self.assertRaises(ValueError, base64.b85decode, b'|NsC')
self.assertRaises(ValueError, base64.b85decode, b'|NsC1')

def test_z85decode_errors(self):
illegal = list(range(33)) + \
list(b'"\',;_`|\\~') + \
list(range(128, 256))
for c in illegal:
with self.assertRaises(ValueError, msg=bytes([c])):
base64.z85decode(b'0000' + bytes([c]))

# b'\xff\xff\xff\xff' encodes to b'%nSc0', the following will overflow:
self.assertRaises(ValueError, base64.z85decode, b'%')
self.assertRaises(ValueError, base64.z85decode, b'%n')
self.assertRaises(ValueError, base64.z85decode, b'%nS')
self.assertRaises(ValueError, base64.z85decode, b'%nSc')
self.assertRaises(ValueError, base64.z85decode, b'%nSc1')

def test_decode_nonascii_str(self):
decode_funcs = (base64.b64decode,
base64.standard_b64decode,
base64.urlsafe_b64decode,
base64.b32decode,
base64.b16decode,
base64.b85decode,
base64.a85decode)
base64.a85decode,
base64.z85decode)
for f in decode_funcs:
self.assertRaises(ValueError, f, 'with non-ascii \xcb')

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Add Z85 encoding to ``base64``.
Loading