66#include "pycore_intrinsics.h"
77#include "pycore_pymem.h" // _PyMem_IsPtrFreed()
88#include "pycore_long.h" // _PY_IS_SMALL_INT()
9+ #include "pycore_hashtable.h" // _Py_hashtable_t
910
1011#include "pycore_opcode_utils.h"
1112#include "pycore_opcode_metadata.h" // OPCODE_HAS_ARG, etc
@@ -1325,30 +1326,43 @@ get_const_value(int opcode, int oparg, PyObject *co_consts)
13251326
13261327// Steals a reference to newconst.
13271328static int
1328- add_const (PyObject * newconst , PyObject * consts , PyObject * const_cache )
1329+ add_const (PyObject * newconst , PyObject * consts , PyObject * const_cache ,
1330+ _Py_hashtable_t * consts_index )
13291331{
13301332 if (_PyCompile_ConstCacheMergeOne (const_cache , & newconst ) < 0 ) {
13311333 Py_DECREF (newconst );
13321334 return -1 ;
13331335 }
13341336
1335- Py_ssize_t index ;
1336- for (index = 0 ; index < PyList_GET_SIZE (consts ); index ++ ) {
1337- if (PyList_GET_ITEM (consts , index ) == newconst ) {
1338- break ;
1339- }
1337+ /* O(1) lookup via pointer-keyed hashtable (replaces linear search). */
1338+ _Py_hashtable_entry_t * entry = _Py_hashtable_get_entry (consts_index , (void * )newconst );
1339+ if (entry != NULL ) {
1340+ /* Already exists */
1341+ Py_DECREF (newconst );
1342+ return (int )(uintptr_t )entry -> value ;
13401343 }
1341- if (index == PyList_GET_SIZE (consts )) {
1342- if ((size_t )index >= (size_t )INT_MAX - 1 ) {
1343- PyErr_SetString (PyExc_OverflowError , "too many constants" );
1344- Py_DECREF (newconst );
1345- return -1 ;
1346- }
1347- if (PyList_Append (consts , newconst )) {
1348- Py_DECREF (newconst );
1349- return -1 ;
1350- }
1344+
1345+ /* Not found – append to consts list */
1346+ Py_ssize_t index = PyList_GET_SIZE (consts );
1347+ if ((size_t )index >= (size_t )INT_MAX - 1 ) {
1348+ PyErr_SetString (PyExc_OverflowError , "too many constants" );
1349+ Py_DECREF (newconst );
1350+ return -1 ;
1351+ }
1352+ if (PyList_Append (consts , newconst )) {
1353+ Py_DECREF (newconst );
1354+ return -1 ;
13511355 }
1356+
1357+ /* Update index (must be after successful append) */
1358+ if (_Py_hashtable_set (consts_index , (void * )newconst , (void * )(uintptr_t )index ) < 0 ) {
1359+ /* OOM – rollback append for consistency */
1360+ PyList_SetSlice (consts , index , index + 1 , NULL );
1361+ Py_DECREF (newconst );
1362+ PyErr_NoMemory ();
1363+ return -1 ;
1364+ }
1365+
13521366 Py_DECREF (newconst );
13531367 return (int )index ;
13541368}
@@ -1424,7 +1438,8 @@ maybe_instr_make_load_smallint(cfg_instr *instr, PyObject *newconst,
14241438/* Steals reference to "newconst" */
14251439static int
14261440instr_make_load_const (cfg_instr * instr , PyObject * newconst ,
1427- PyObject * consts , PyObject * const_cache )
1441+ PyObject * consts , PyObject * const_cache ,
1442+ _Py_hashtable_t * consts_index )
14281443{
14291444 int res = maybe_instr_make_load_smallint (instr , newconst , consts , const_cache );
14301445 if (res < 0 ) {
@@ -1434,7 +1449,7 @@ instr_make_load_const(cfg_instr *instr, PyObject *newconst,
14341449 if (res > 0 ) {
14351450 return SUCCESS ;
14361451 }
1437- int oparg = add_const (newconst , consts , const_cache );
1452+ int oparg = add_const (newconst , consts , const_cache , consts_index );
14381453 RETURN_IF_ERROR (oparg );
14391454 INSTR_SET_OP1 (instr , LOAD_CONST , oparg );
14401455 return SUCCESS ;
@@ -1447,7 +1462,8 @@ instr_make_load_const(cfg_instr *instr, PyObject *newconst,
14471462 Called with codestr pointing to the first LOAD_CONST.
14481463*/
14491464static int
1450- fold_tuple_of_constants (basicblock * bb , int i , PyObject * consts , PyObject * const_cache )
1465+ fold_tuple_of_constants (basicblock * bb , int i , PyObject * consts ,
1466+ PyObject * const_cache , _Py_hashtable_t * consts_index )
14511467{
14521468 /* Pre-conditions */
14531469 assert (PyDict_CheckExact (const_cache ));
@@ -1484,7 +1500,7 @@ fold_tuple_of_constants(basicblock *bb, int i, PyObject *consts, PyObject *const
14841500 }
14851501
14861502 nop_out (const_instrs , seq_size );
1487- return instr_make_load_const (instr , const_tuple , consts , const_cache );
1503+ return instr_make_load_const (instr , const_tuple , consts , const_cache , consts_index );
14881504}
14891505
14901506/* Replace:
@@ -1502,7 +1518,8 @@ fold_tuple_of_constants(basicblock *bb, int i, PyObject *consts, PyObject *const
15021518*/
15031519static int
15041520fold_constant_intrinsic_list_to_tuple (basicblock * bb , int i ,
1505- PyObject * consts , PyObject * const_cache )
1521+ PyObject * consts , PyObject * const_cache ,
1522+ _Py_hashtable_t * consts_index )
15061523{
15071524 assert (PyDict_CheckExact (const_cache ));
15081525 assert (PyList_CheckExact (consts ));
@@ -1554,7 +1571,7 @@ fold_constant_intrinsic_list_to_tuple(basicblock *bb, int i,
15541571 nop_out (& instr , 1 );
15551572 }
15561573 assert (consts_found == 0 );
1557- return instr_make_load_const (intrinsic , newconst , consts , const_cache );
1574+ return instr_make_load_const (intrinsic , newconst , consts , const_cache , consts_index );
15581575 }
15591576
15601577 if (expect_append ) {
@@ -1590,7 +1607,8 @@ Optimize lists and sets for:
15901607*/
15911608static int
15921609optimize_lists_and_sets (basicblock * bb , int i , int nextop ,
1593- PyObject * consts , PyObject * const_cache )
1610+ PyObject * consts , PyObject * const_cache ,
1611+ _Py_hashtable_t * consts_index )
15941612{
15951613 assert (PyDict_CheckExact (const_cache ));
15961614 assert (PyList_CheckExact (consts ));
@@ -1640,7 +1658,7 @@ optimize_lists_and_sets(basicblock *bb, int i, int nextop,
16401658 Py_SETREF (const_result , frozenset );
16411659 }
16421660
1643- int index = add_const (const_result , consts , const_cache );
1661+ int index = add_const (const_result , consts , const_cache , consts_index );
16441662 RETURN_IF_ERROR (index );
16451663 nop_out (const_instrs , seq_size );
16461664
@@ -1837,7 +1855,8 @@ eval_const_binop(PyObject *left, int op, PyObject *right)
18371855}
18381856
18391857static int
1840- fold_const_binop (basicblock * bb , int i , PyObject * consts , PyObject * const_cache )
1858+ fold_const_binop (basicblock * bb , int i , PyObject * consts ,
1859+ PyObject * const_cache , _Py_hashtable_t * consts_index )
18411860{
18421861 #define BINOP_OPERAND_COUNT 2
18431862 assert (PyDict_CheckExact (const_cache ));
@@ -1879,7 +1898,7 @@ fold_const_binop(basicblock *bb, int i, PyObject *consts, PyObject *const_cache)
18791898 }
18801899
18811900 nop_out (operands_instrs , BINOP_OPERAND_COUNT );
1882- return instr_make_load_const (binop , newconst , consts , const_cache );
1901+ return instr_make_load_const (binop , newconst , consts , const_cache , consts_index );
18831902}
18841903
18851904static PyObject *
@@ -1925,7 +1944,8 @@ eval_const_unaryop(PyObject *operand, int opcode, int oparg)
19251944}
19261945
19271946static int
1928- fold_const_unaryop (basicblock * bb , int i , PyObject * consts , PyObject * const_cache )
1947+ fold_const_unaryop (basicblock * bb , int i , PyObject * consts ,
1948+ PyObject * const_cache , _Py_hashtable_t * consts_index )
19291949{
19301950 #define UNARYOP_OPERAND_COUNT 1
19311951 assert (PyDict_CheckExact (const_cache ));
@@ -1962,7 +1982,7 @@ fold_const_unaryop(basicblock *bb, int i, PyObject *consts, PyObject *const_cach
19621982 assert (PyBool_Check (newconst ));
19631983 }
19641984 nop_out (& operand_instr , UNARYOP_OPERAND_COUNT );
1965- return instr_make_load_const (unaryop , newconst , consts , const_cache );
1985+ return instr_make_load_const (unaryop , newconst , consts , const_cache , consts_index );
19661986}
19671987
19681988#define VISITED (-1)
@@ -2157,7 +2177,8 @@ apply_static_swaps(basicblock *block, int i)
21572177}
21582178
21592179static int
2160- basicblock_optimize_load_const (PyObject * const_cache , basicblock * bb , PyObject * consts )
2180+ basicblock_optimize_load_const (PyObject * const_cache , basicblock * bb ,
2181+ PyObject * consts , _Py_hashtable_t * consts_index )
21612182{
21622183 assert (PyDict_CheckExact (const_cache ));
21632184 assert (PyList_CheckExact (consts ));
@@ -2272,7 +2293,7 @@ basicblock_optimize_load_const(PyObject *const_cache, basicblock *bb, PyObject *
22722293 return ERROR ;
22732294 }
22742295 cnt = PyBool_FromLong (is_true );
2275- int index = add_const (cnt , consts , const_cache );
2296+ int index = add_const (cnt , consts , const_cache , consts_index );
22762297 if (index < 0 ) {
22772298 return ERROR ;
22782299 }
@@ -2286,15 +2307,17 @@ basicblock_optimize_load_const(PyObject *const_cache, basicblock *bb, PyObject *
22862307}
22872308
22882309static int
2289- optimize_load_const (PyObject * const_cache , cfg_builder * g , PyObject * consts ) {
2310+ optimize_load_const (PyObject * const_cache , cfg_builder * g , PyObject * consts ,
2311+ _Py_hashtable_t * consts_index ) {
22902312 for (basicblock * b = g -> g_entryblock ; b != NULL ; b = b -> b_next ) {
2291- RETURN_IF_ERROR (basicblock_optimize_load_const (const_cache , b , consts ));
2313+ RETURN_IF_ERROR (basicblock_optimize_load_const (const_cache , b , consts , consts_index ));
22922314 }
22932315 return SUCCESS ;
22942316}
22952317
22962318static int
2297- optimize_basic_block (PyObject * const_cache , basicblock * bb , PyObject * consts )
2319+ optimize_basic_block (PyObject * const_cache , basicblock * bb , PyObject * consts ,
2320+ _Py_hashtable_t * consts_index )
22982321{
22992322 assert (PyDict_CheckExact (const_cache ));
23002323 assert (PyList_CheckExact (consts ));
@@ -2334,11 +2357,11 @@ optimize_basic_block(PyObject *const_cache, basicblock *bb, PyObject *consts)
23342357 continue ;
23352358 }
23362359 }
2337- RETURN_IF_ERROR (fold_tuple_of_constants (bb , i , consts , const_cache ));
2360+ RETURN_IF_ERROR (fold_tuple_of_constants (bb , i , consts , const_cache , consts_index ));
23382361 break ;
23392362 case BUILD_LIST :
23402363 case BUILD_SET :
2341- RETURN_IF_ERROR (optimize_lists_and_sets (bb , i , nextop , consts , const_cache ));
2364+ RETURN_IF_ERROR (optimize_lists_and_sets (bb , i , nextop , consts , const_cache , consts_index ));
23422365 break ;
23432366 case POP_JUMP_IF_NOT_NONE :
23442367 case POP_JUMP_IF_NONE :
@@ -2473,23 +2496,23 @@ optimize_basic_block(PyObject *const_cache, basicblock *bb, PyObject *consts)
24732496 _Py_FALLTHROUGH ;
24742497 case UNARY_INVERT :
24752498 case UNARY_NEGATIVE :
2476- RETURN_IF_ERROR (fold_const_unaryop (bb , i , consts , const_cache ));
2499+ RETURN_IF_ERROR (fold_const_unaryop (bb , i , consts , const_cache , consts_index ));
24772500 break ;
24782501 case CALL_INTRINSIC_1 :
24792502 if (oparg == INTRINSIC_LIST_TO_TUPLE ) {
24802503 if (nextop == GET_ITER ) {
24812504 INSTR_SET_OP0 (inst , NOP );
24822505 }
24832506 else {
2484- RETURN_IF_ERROR (fold_constant_intrinsic_list_to_tuple (bb , i , consts , const_cache ));
2507+ RETURN_IF_ERROR (fold_constant_intrinsic_list_to_tuple (bb , i , consts , const_cache , consts_index ));
24852508 }
24862509 }
24872510 else if (oparg == INTRINSIC_UNARY_POSITIVE ) {
2488- RETURN_IF_ERROR (fold_const_unaryop (bb , i , consts , const_cache ));
2511+ RETURN_IF_ERROR (fold_const_unaryop (bb , i , consts , const_cache , consts_index ));
24892512 }
24902513 break ;
24912514 case BINARY_OP :
2492- RETURN_IF_ERROR (fold_const_binop (bb , i , consts , const_cache ));
2515+ RETURN_IF_ERROR (fold_const_binop (bb , i , consts , const_cache , consts_index ));
24932516 break ;
24942517 }
24952518 }
@@ -2534,16 +2557,17 @@ remove_redundant_nops_and_jumps(cfg_builder *g)
25342557 NOPs. Later those NOPs are removed.
25352558*/
25362559static int
2537- optimize_cfg (cfg_builder * g , PyObject * consts , PyObject * const_cache , int firstlineno )
2560+ optimize_cfg (cfg_builder * g , PyObject * consts , PyObject * const_cache ,
2561+ _Py_hashtable_t * consts_index , int firstlineno )
25382562{
25392563 assert (PyDict_CheckExact (const_cache ));
25402564 RETURN_IF_ERROR (check_cfg (g ));
25412565 RETURN_IF_ERROR (inline_small_or_no_lineno_blocks (g -> g_entryblock ));
25422566 RETURN_IF_ERROR (remove_unreachable (g -> g_entryblock ));
25432567 RETURN_IF_ERROR (resolve_line_numbers (g , firstlineno ));
2544- RETURN_IF_ERROR (optimize_load_const (const_cache , g , consts ));
2568+ RETURN_IF_ERROR (optimize_load_const (const_cache , g , consts , consts_index ));
25452569 for (basicblock * b = g -> g_entryblock ; b != NULL ; b = b -> b_next ) {
2546- RETURN_IF_ERROR (optimize_basic_block (const_cache , b , consts ));
2570+ RETURN_IF_ERROR (optimize_basic_block (const_cache , b , consts , consts_index ));
25472571 }
25482572 RETURN_IF_ERROR (remove_redundant_nops_and_pairs (g -> g_entryblock ));
25492573 RETURN_IF_ERROR (remove_unreachable (g -> g_entryblock ));
@@ -3663,7 +3687,36 @@ _PyCfg_OptimizeCodeUnit(cfg_builder *g, PyObject *consts, PyObject *const_cache,
36633687 RETURN_IF_ERROR (label_exception_targets (g -> g_entryblock ));
36643688
36653689 /** Optimization **/
3666- RETURN_IF_ERROR (optimize_cfg (g , consts , const_cache , firstlineno ));
3690+
3691+ /* Auxiliary pointer→index hashtable for O(1) lookup in add_const. */
3692+ _Py_hashtable_t * consts_index = _Py_hashtable_new (
3693+ _Py_hashtable_hash_ptr , _Py_hashtable_compare_direct );
3694+ if (consts_index == NULL ) {
3695+ PyErr_NoMemory ();
3696+ return ERROR ;
3697+ }
3698+
3699+ /* Seed the index with pre-existing constants. */
3700+ for (Py_ssize_t i = 0 ; i < PyList_GET_SIZE (consts ); i ++ ) {
3701+ PyObject * item = PyList_GET_ITEM (consts , i );
3702+ if (_Py_hashtable_get_entry (consts_index , (void * )item ) != NULL ) {
3703+ continue ; /* duplicate pointer; keep first occurrence */
3704+ }
3705+ if (_Py_hashtable_set (consts_index , (void * )item ,
3706+ (void * )(uintptr_t )i ) < 0 ) {
3707+ _Py_hashtable_destroy (consts_index );
3708+ PyErr_NoMemory ();
3709+ return ERROR ;
3710+ }
3711+ }
3712+
3713+ int ret = optimize_cfg (g , consts , const_cache , consts_index , firstlineno );
3714+
3715+ /* consts_index is invalid after this (consts list may be modified). */
3716+ _Py_hashtable_destroy (consts_index );
3717+
3718+ RETURN_IF_ERROR (ret );
3719+
36673720 RETURN_IF_ERROR (remove_unused_consts (g -> g_entryblock , consts ));
36683721 RETURN_IF_ERROR (
36693722 add_checks_for_loads_of_uninitialized_variables (
0 commit comments