diff --git a/Doc/library/multiprocessing.shared_memory.rst b/Doc/library/multiprocessing.shared_memory.rst index e8f04a6ac7b95d..533b2e00253c53 100644 --- a/Doc/library/multiprocessing.shared_memory.rst +++ b/Doc/library/multiprocessing.shared_memory.rst @@ -311,28 +311,34 @@ finishes execution. existing :class:`!ShareableList`, specify its shared memory block's unique name while leaving *sequence* set to ``None``. + .. versionchanged:: 3.15 + Trailing null bytes (``\x00``) in :class:`bytes` and :class:`str` values + are now preserved correctly. See :gh:`106939` and :gh:`145261`. + .. note:: - A known issue exists for :class:`bytes` and :class:`str` values. - If they end with ``\x00`` nul bytes or characters, those may be - *silently stripped* when fetching them by index from the - :class:`!ShareableList`. This ``.rstrip(b'\x00')`` behavior is - considered a bug and may go away in the future. See :gh:`106939`. + In Python 3.14 and earlier, a known issue exists for :class:`bytes` and + :class:`str` values. If they end with ``\x00`` nul bytes or characters, + those may be *silently stripped* when fetching them by index from the + :class:`!ShareableList`. This ``.rstrip(b'\x00')`` behavior was fixed + in Python 3.15. - For applications where rstripping of trailing nulls is a problem, - work around it by always unconditionally appending an extra non-0 - byte to the end of such values when storing and unconditionally - removing it when fetching: + For applications that need to work with Python 3.14 and earlier where + rstripping of trailing nulls is a problem, work around it by always + unconditionally appending an extra non-0 byte to the end of such values + when storing and unconditionally removing it when fetching: .. doctest:: >>> from multiprocessing import shared_memory - >>> nul_bug_demo = shared_memory.ShareableList(['?\x00', b'\x03\x02\x01\x00\x00\x00']) - >>> nul_bug_demo[0] - '?' - >>> nul_bug_demo[1] - b'\x03\x02\x01' - >>> nul_bug_demo.shm.unlink() + >>> # Python 3.15+: trailing nulls are preserved + >>> sl = shared_memory.ShareableList(['?\x00', b'\x03\x02\x01\x00\x00\x00']) + >>> sl[0] + '?\x00' + >>> sl[1] + b'\x03\x02\x01\x00\x00\x00' + >>> sl.shm.unlink() + >>> # Workaround for Python 3.14 and earlier: >>> padded = shared_memory.ShareableList(['?\x00\x07', b'\x03\x02\x01\x00\x00\x00\x07']) >>> padded[0][:-1] '?\x00' diff --git a/Lib/multiprocessing/shared_memory.py b/Lib/multiprocessing/shared_memory.py index 99a8ce3320ad4e..80fe66a1f86cb9 100644 --- a/Lib/multiprocessing/shared_memory.py +++ b/Lib/multiprocessing/shared_memory.py @@ -286,8 +286,8 @@ class ShareableList: _alignment = 8 _back_transforms_mapping = { 0: lambda value: value, # int, float, bool - 1: lambda value: value.rstrip(b'\x00').decode(_encoding), # str - 2: lambda value: value.rstrip(b'\x00'), # bytes + 1: lambda value: value.decode(_encoding), # str + 2: lambda value: value, # bytes 3: lambda _value: None, # None } @@ -312,7 +312,7 @@ def __init__(self, sequence=None, *, name=None): self._types_mapping[type(item)] if not isinstance(item, (str, bytes)) else self._types_mapping[type(item)] % ( - self._alignment * (len(item) // self._alignment + 1), + self._alignment * (len(item.encode('utf-8') if isinstance(item, str) else item) // self._alignment + 1), ) for item in sequence ] @@ -355,11 +355,18 @@ def __init__(self, sequence=None, *, name=None): self._offset_data_start, *(v.encode(_enc) if isinstance(v, str) else v for v in sequence) ) + # For bytes and str, store actual byte length so retrieval is exact + _stored_formats = [ + (self._types_mapping[str] % (len(v.encode(_enc)),) if isinstance(v, str) + else self._types_mapping[bytes] % (len(v),) if isinstance(v, bytes) + else f) + for v, f in zip(sequence, _formats) + ] struct.pack_into( self._format_packing_metainfo, self.shm.buf, self._offset_packing_formats, - *(v.encode(_enc) for v in _formats) + *(v.encode(_enc) for v in _stored_formats) ) struct.pack_into( self._format_back_transform_codes, @@ -476,7 +483,9 @@ def __setitem__(self, position, value): self._set_packing_format_and_transform( position, - new_format, + (self._types_mapping[bytes] % (len(encoded_value),) if isinstance(value, bytes) + else self._types_mapping[str] % (len(encoded_value),) if isinstance(value, str) + else new_format), value ) struct.pack_into(new_format, self.shm.buf, offset, encoded_value) diff --git a/Lib/test/_test_multiprocessing.py b/Lib/test/_test_multiprocessing.py index cc07062eee6f98..991165b950cf3e 100644 --- a/Lib/test/_test_multiprocessing.py +++ b/Lib/test/_test_multiprocessing.py @@ -4757,7 +4757,7 @@ def test_shared_memory_ShareableList_basics(self): self.assertEqual(current_format, sl._get_packing_format(0)) # Verify attributes are readable. - self.assertEqual(sl.format, '8s8sdqxxxxxx?xxxxxxxx?q') + self.assertEqual(sl.format, '5s5sdqxxxxxx?xxxxxxxx?q') # Exercise len(). self.assertEqual(len(sl), 7) @@ -4785,17 +4785,17 @@ def test_shared_memory_ShareableList_basics(self): self.assertEqual(sl[3], 42) sl[4] = 'some' # Change type at a given position. self.assertEqual(sl[4], 'some') - self.assertEqual(sl.format, '8s8sdq8sxxxxxxx?q') + self.assertEqual(sl.format, '5s5sdq4sxxxxxxx?q') with self.assertRaisesRegex(ValueError, "exceeds available storage"): sl[4] = 'far too many' self.assertEqual(sl[4], 'some') - sl[0] = 'encodés' # Exactly 8 bytes of UTF-8 data - self.assertEqual(sl[0], 'encodés') + sl[0] = 'hello' # Exactly 5 bytes of UTF-8 data + self.assertEqual(sl[0], 'hello') self.assertEqual(sl[1], b'HoWdY') # no spillage with self.assertRaisesRegex(ValueError, "exceeds available storage"): - sl[0] = 'encodées' # Exactly 9 bytes of UTF-8 data + sl[0] = 'hëllöö' # Exactly 8 bytes of UTF-8 data self.assertEqual(sl[1], b'HoWdY') with self.assertRaisesRegex(ValueError, "exceeds available storage"): @@ -4817,7 +4817,6 @@ def test_shared_memory_ShareableList_basics(self): self.assertNotEqual(sl.shm.name, sl_copy.shm.name) self.assertEqual(name_duplicate, sl_copy.shm.name) self.assertEqual(list(sl), list(sl_copy)) - self.assertEqual(sl.format, sl_copy.format) sl_copy[-1] = 77 self.assertEqual(sl_copy[-1], 77) self.assertNotEqual(sl[-1], 77) diff --git a/Misc/NEWS.d/next/Library/2026-02-26-17-41-14.gh-issue-145261.Wce9Dh.rst b/Misc/NEWS.d/next/Library/2026-02-26-17-41-14.gh-issue-145261.Wce9Dh.rst new file mode 100644 index 00000000000000..6651d1b0858508 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2026-02-26-17-41-14.gh-issue-145261.Wce9Dh.rst @@ -0,0 +1,4 @@ +Fix :class:`~multiprocessing.shared_memory.ShareableList` corrupting +multi-byte UTF-8 strings due to using character count instead of byte count +for slot allocation, and stripping legitimate trailing null bytes from +``bytes`` values.