gh-145980: Add support for alternative alphabets in the binascii module#145981
gh-145980: Add support for alternative alphabets in the binascii module#145981serhiy-storchaka wants to merge 4 commits intopython:mainfrom
Conversation
…i module * Add the alphabet parameter in functions b2a_base64(), a2b_base64(), b2a_base85() and a2b_base85(). * And a number of "*_ALPHABET" constants. * Remove b2a_z85() and a2b_z85().
|
|
||
| .. data:: BASE64_ALPHABET | ||
|
|
||
| The Base 64 alphabet according to :rfc:`4648`. |
There was a problem hiding this comment.
Let's add a .. versionadded: next directive as well.
Doc/library/binascii.rst
Outdated
|
|
||
| .. data:: UU_ALPHABET | ||
|
|
||
| The Uuencoding alphabet. |
There was a problem hiding this comment.
I think it's uuencoding for Unix-to-Unix instead of Uuencoding. I would suggest that you also link the Wikipedia page maybe?
Doc/library/binascii.rst
Outdated
|
|
||
| .. data:: XX_ALPHABET | ||
|
|
||
| The Xxencoding alphabet. |
| return binascii.b2a_base64(s, newline=False, | ||
| alphabet=binascii.URLSAFE_BASE64_ALPHABET) |
There was a problem hiding this comment.
For clarity in the code, maybe have an _URLSAFE_BASE64_ALPHABET global variable?
| def test_constants(self): | ||
| for name in ('BASE64_ALPHABET', 'URLSAFE_BASE64_ALPHABET', | ||
| 'CRYPT_ALPHABET', 'BCRYPT_ALPHABET', | ||
| 'UU_ALPHABET', 'XX_ALPHABET', | ||
| 'BINHEX_ALPHABET'): | ||
| value = getattr(binascii, name) | ||
| self.assertIsInstance(value, bytes) | ||
| self.assertEqual(len(value), 64) | ||
| self.assertEqual(len(set(value)), 64) | ||
| for name in ('BASE85_ALPHABET', 'ASCII85_ALPHABET', | ||
| 'Z85_ALPHABET'): | ||
| value = getattr(binascii, name) | ||
| self.assertIsInstance(value, bytes) | ||
| self.assertEqual(len(value), 85) | ||
| self.assertEqual(len(set(value)), 85) |
There was a problem hiding this comment.
Maybe use some helper method:
def check_alphabet(self, name, size):
with self.subTest(name=name):
alphabet = getattr(binascii, name)
self.assertIsInstance(value, bytes)
self.assertEqual(len(value), size)
self.assertEqual(len(set(value)), size)
Lib/test/test_binascii.py
Outdated
| with self.assertRaises(TypeError): | ||
| binascii.b2a_base64(data, alphabet=None) | ||
| with self.assertRaises(TypeError): | ||
| binascii.a2b_base64(data, alphabet=None) | ||
| with self.assertRaises(TypeError): | ||
| binascii.b2a_base64(data, alphabet=alphabet.decode()) | ||
| with self.assertRaises(TypeError): | ||
| binascii.a2b_base64(data, alphabet=alphabet.decode()) | ||
| with self.assertRaises(TypeError): | ||
| binascii.a2b_base64(data, alphabet=bytearray(alphabet)) | ||
| with self.assertRaises(ValueError): | ||
| binascii.b2a_base64(data, alphabet=alphabet[:-1]) | ||
| with self.assertRaises(ValueError): | ||
| binascii.a2b_base64(data, alphabet=alphabet[:-1]) | ||
| with self.assertRaises(ValueError): | ||
| binascii.b2a_base64(data, alphabet=alphabet+b'?') | ||
| with self.assertRaises(ValueError): | ||
| binascii.a2b_base64(data, alphabet=alphabet+b'?') | ||
|
|
There was a problem hiding this comment.
Can we refactor this with a helper method so that if we need to check some validation later we don't need to update it twice.
For instance, there is a missing test case for binascii.b2a_base64(data, alphabet=bytearray(alphabet)) (only a2b_base64 is tested here)
Lib/test/test_binascii.py
Outdated
| with self.assertRaises(TypeError): | ||
| binascii.a2b_base85(data, alphabet=alphabet.decode()) | ||
| with self.assertRaises(TypeError): | ||
| binascii.a2b_base85(data, alphabet=bytearray(alphabet)) |
| static PyObject * | ||
| get_reverse_table(binascii_state *state, PyObject *alphabet, int size, int padchar) | ||
| { | ||
| PyObject *reverse_table; |
There was a problem hiding this comment.
I'd suggest having a goto error for cleanup in case this function grows.
📚 Documentation preview 📚: https://cpython-previews--145981.org.readthedocs.build/