From caf7182c6f8d74c9018cbf8675beb4da10a2d0bd Mon Sep 17 00:00:00 2001 From: MrEx3cut0r Date: Mon, 9 Mar 2026 16:28:48 +0300 Subject: [PATCH] gh-145668: Optimize FOR_ITER virtual iterators for bytes, bytearray, and str --- Python/bytecodes.c | 6 +++-- Python/ceval.c | 55 +++++++++++++++++++++++++++++++++++++++++----- 2 files changed, 54 insertions(+), 7 deletions(-) diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 8a748fec9e4201..d0fc0ca0928c08 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -3277,8 +3277,10 @@ dummy_func( _Py_GatherStats_GetIter(iterable); #endif PyTypeObject *tp = PyStackRef_TYPE(iterable); - if (tp == &PyTuple_Type || tp == &PyList_Type) { - /* Leave iterable on stack and pushed tagged 0 */ + if (tp == &PyTuple_Type || tp == &PyList_Type || + tp == &PyBytes_Type || tp == &PyByteArray_Type || + tp == &PyUnicode_Type) { + /* Leave iterable on stack and push tagged 0 */ iter = iterable; DEAD(iterable); index_or_null = PyStackRef_TagInt(0); diff --git a/Python/ceval.c b/Python/ceval.c index 1e5142f4b456a1..e10c1d8a883949 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -3704,8 +3704,8 @@ static _PyStackRef foriter_next(PyObject *seq, _PyStackRef index) { assert(PyStackRef_IsTaggedInt(index)); - assert(PyTuple_CheckExact(seq) || PyList_CheckExact(seq)); intptr_t i = PyStackRef_UntagInt(index); + if (PyTuple_CheckExact(seq)) { size_t size = PyTuple_GET_SIZE(seq); if ((size_t)i >= size) { @@ -3713,11 +3713,56 @@ foriter_next(PyObject *seq, _PyStackRef index) } return PyStackRef_FromPyObjectNew(PyTuple_GET_ITEM(seq, i)); } - PyObject *item = _PyList_GetItemRef((PyListObject *)seq, i); - if (item == NULL) { - return PyStackRef_NULL; + + if (PyList_CheckExact(seq)) { + PyObject *item = _PyList_GetItemRef((PyListObject *)seq, i); + if (item == NULL) { + return PyStackRef_NULL; + } + return PyStackRef_FromPyObjectSteal(item); } - return PyStackRef_FromPyObjectSteal(item); + + if (PyBytes_CheckExact(seq)) { + Py_ssize_t size = PyBytes_GET_SIZE(seq); + if (i < 0 || i >= size) { + return PyStackRef_NULL; + } + unsigned char ch = (unsigned char)PyBytes_AS_STRING(seq)[i]; + PyObject *item = PyLong_FromUnsignedLong((unsigned long)ch); + if (item == NULL) { + return PyStackRef_ERROR; + } + return PyStackRef_FromPyObjectSteal(item); + } + + if (PyByteArray_CheckExact(seq)) { + Py_ssize_t size = PyByteArray_GET_SIZE(seq); + if (i < 0 || i >= size) { + return PyStackRef_NULL; + } + unsigned char ch = (unsigned char)PyByteArray_AS_STRING(seq)[i]; + PyObject *item = PyLong_FromUnsignedLong((unsigned long)ch); + if (item == NULL) { + return PyStackRef_ERROR; + } + return PyStackRef_FromPyObjectSteal(item); + } + + if (PyUnicode_CheckExact(seq)) { + Py_ssize_t size = PyUnicode_GET_LENGTH(seq); + if (i < 0 || i >= size) { + return PyStackRef_NULL; + } + // Iteration over str yields 1-character substrings. + PyObject *item = PyUnicode_Substring(seq, i, i + 1); + if (item == NULL) { + return PyStackRef_ERROR; + } + return PyStackRef_FromPyObjectSteal(item); + } + + // Fallback: use the iterator protocol for unsupported types. + return PyStackRef_ERROR; } _PyStackRef _PyForIter_VirtualIteratorNext(PyThreadState* tstate, _PyInterpreterFrame* frame, _PyStackRef iter, _PyStackRef* index_ptr)