From f922c02c529d25d61aa9c28a8192639c1fce8d4d Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Wed, 5 Nov 2025 20:12:31 +0200 Subject: [PATCH] gh-125346: Add more base64 tests Add more tests for the altchars argument of b64decode() and for the map01 argument of b32decode(). --- Doc/library/base64.rst | 18 +++- Lib/base64.py | 40 ++++++++- Lib/test/test_base64.py | 42 +++++++++- Misc/NEWS.d/next/Library/2025-11-06-12-03-29.gh-issue-125346.7Gfpgw.rst | 5 + 4 files changed, 91 insertions(+), 14 deletions(-) Index: Python-3.11.14/Doc/library/base64.rst =================================================================== --- Python-3.11.14.orig/Doc/library/base64.rst 2025-10-09 18:16:55.000000000 +0200 +++ Python-3.11.14/Doc/library/base64.rst 2026-02-11 23:44:54.612595397 +0100 @@ -74,15 +74,20 @@ A :exc:`binascii.Error` exception is raised if *s* is incorrectly padded. - If *validate* is ``False`` (the default), characters that are neither + If *validate* is false (the default), characters that are neither in the normal base-64 alphabet nor the alternative alphabet are - discarded prior to the padding check. If *validate* is ``True``, - these non-alphabet characters in the input result in a - :exc:`binascii.Error`. + discarded prior to the padding check, but the ``+`` and ``/`` characters + keep their meaning if they are not in *altchars* (they will be discarded + in future Python versions). + If *validate* is true, these non-alphabet characters in the input + result in a :exc:`binascii.Error`. For more information about the strict base64 check, see :func:`binascii.a2b_base64` - May assert or raise a :exc:`ValueError` if the length of *altchars* is not 2. + .. deprecated:: next + Accepting the ``+`` and ``/`` characters with an alternative alphabet + is now deprecated. + .. function:: standard_b64encode(s) @@ -113,6 +118,9 @@ ``/`` in the standard Base64 alphabet, and return the decoded :class:`bytes`. + .. deprecated:: next + Accepting the ``+`` and ``/`` characters is now deprecated. + .. function:: b32encode(s) Index: Python-3.11.14/Lib/base64.py =================================================================== --- Python-3.11.14.orig/Lib/base64.py 2026-02-11 23:44:42.099270109 +0100 +++ Python-3.11.14/Lib/base64.py 2026-02-11 23:44:54.613055284 +0100 @@ -71,20 +71,39 @@ The result is returned as a bytes object. A binascii.Error is raised if s is incorrectly padded. - If validate is False (the default), characters that are neither in the + If validate is false (the default), characters that are neither in the normal base-64 alphabet nor the alternative alphabet are discarded prior - to the padding check. If validate is True, these non-alphabet characters + to the padding check. If validate is true, these non-alphabet characters in the input result in a binascii.Error. For more information about the strict base64 check, see: https://docs.python.org/3.11/library/binascii.html#binascii.a2b_base64 """ s = _bytes_from_decode_data(s) + badchar = None if altchars is not None: altchars = _bytes_from_decode_data(altchars) - assert len(altchars) == 2, repr(altchars) + if len(altchars) != 2: + raise ValueError(f'invalid altchars: {altchars!r}') + for b in b'+/': + if b not in altchars and b in s: + badchar = b + break s = s.translate(bytes.maketrans(altchars, b'+/')) - return binascii.a2b_base64(s, strict_mode=validate) + result = binascii.a2b_base64(s, strict_mode=validate) + if badchar is not None: + import warnings + if validate: + warnings.warn(f'invalid character {chr(badchar)!a} in Base64 data ' + f'with altchars={altchars!r} and validate=True ' + f'will be an error in future Python versions', + DeprecationWarning, stacklevel=2) + else: + warnings.warn(f'invalid character {chr(badchar)!a} in Base64 data ' + f'with altchars={altchars!r} and validate=False ' + f'will be discarded in future Python versions', + FutureWarning, stacklevel=2) + return result def standard_b64encode(s): @@ -129,8 +148,19 @@ The alphabet uses '-' instead of '+' and '_' instead of '/'. """ s = _bytes_from_decode_data(s) + badchar = None + for b in b'+/': + if b in s: + badchar = b + break s = s.translate(_urlsafe_decode_translation) - return b64decode(s) + result = binascii.a2b_base64(s, strict_mode=False) + if badchar is not None: + import warnings + warnings.warn(f'invalid character {chr(badchar)!a} in URL-safe Base64 data ' + f'will be discarded in future Python versions', + FutureWarning, stacklevel=2) + return result Index: Python-3.11.14/Lib/test/test_base64.py =================================================================== --- Python-3.11.14.orig/Lib/test/test_base64.py 2026-02-11 23:44:44.270637438 +0100 +++ Python-3.11.14/Lib/test/test_base64.py 2026-02-11 23:44:54.613405604 +0100 @@ -228,6 +228,25 @@ b'\xd3V\xbeo\xf7\x1d') self.check_decode_type_errors(base64.urlsafe_b64decode) + def test_b64decode_altchars(self): + # Test with arbitrary alternative characters + eq = self.assertEqual + res = b'\xd3V\xbeo\xf7\x1d' + for altchars in b'*$', b'+/', b'/+', b'+_', b'-+', b'-/', b'/_': + data = b'01a%cb%ccd' % tuple(altchars) + data_str = data.decode('ascii') + altchars_str = altchars.decode('ascii') + + eq(base64.b64decode(data, altchars=altchars), res) + eq(base64.b64decode(data_str, altchars=altchars), res) + eq(base64.b64decode(data, altchars=altchars_str), res) + eq(base64.b64decode(data_str, altchars=altchars_str), res) + + self.assertRaises(ValueError, base64.b64decode, b'', altchars=b'+') + self.assertRaises(ValueError, base64.b64decode, b'', altchars=b'+/-') + self.assertRaises(ValueError, base64.b64decode, '', altchars='+') + self.assertRaises(ValueError, base64.b64decode, '', altchars='+/-') + def test_b64decode_padding_error(self): self.assertRaises(binascii.Error, base64.b64decode, b'abc') self.assertRaises(binascii.Error, base64.b64decode, 'abc') @@ -259,10 +278,25 @@ with self.assertRaises(binascii.Error): base64.b64decode(bstr.decode('ascii'), validate=True) - # Normal alphabet characters not discarded when alternative given - res = b'\xFB\xEF\xBE\xFF\xFF\xFF' - self.assertEqual(base64.b64decode(b'++[[//]]', b'[]'), res) - self.assertEqual(base64.urlsafe_b64decode(b'++--//__'), res) + # Normal alphabet characters will be discarded when alternative given + with self.assertWarns(FutureWarning): + self.assertEqual(base64.b64decode(b'++++', altchars=b'-_'), + b'\xfb\xef\xbe') + with self.assertWarns(FutureWarning): + self.assertEqual(base64.b64decode(b'////', altchars=b'-_'), + b'\xff\xff\xff') + with self.assertWarns(DeprecationWarning): + self.assertEqual(base64.b64decode(b'++++', altchars=b'-_', validate=True), + b'\xfb\xef\xbe') + with self.assertWarns(DeprecationWarning): + self.assertEqual(base64.b64decode(b'////', altchars=b'-_', validate=True), + b'\xff\xff\xff') + with self.assertWarns(FutureWarning): + self.assertEqual(base64.urlsafe_b64decode(b'++++'), b'\xfb\xef\xbe') + with self.assertWarns(FutureWarning): + self.assertEqual(base64.urlsafe_b64decode(b'////'), b'\xff\xff\xff') + with self.assertRaises(binascii.Error): + base64.b64decode(b'+/!', altchars=b'-_') def test_b32encode(self): eq = self.assertEqual Index: Python-3.11.14/Misc/NEWS.d/next/Library/2025-11-06-12-03-29.gh-issue-125346.7Gfpgw.rst =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ Python-3.11.14/Misc/NEWS.d/next/Library/2025-11-06-12-03-29.gh-issue-125346.7Gfpgw.rst 2026-02-11 23:44:54.613764682 +0100 @@ -0,0 +1,5 @@ +Accepting ``+`` and ``/`` characters with an alternative alphabet in +:func:`base64.b64decode` and :func:`base64.urlsafe_b64decode` is now +deprecated. +In future Python versions they will be errors in the strict mode and +discarded in the non-strict mode.