Skip to content

Commit 2a67136

Browse files
author
zhuangjinkai
committed
gh-146455: Fix O(N²) in add_const() after constant folding moved to CFG
The add_const() function in flowgraph.c uses a linear search over the consts list to find the index of a constant. After gh-126835 moved constant folding from the AST optimizer to the CFG optimizer, this function is now called N times for N inner tuple elements during fold_tuple_of_constants(), resulting in O(N²) total time. Fix by maintaining an auxiliary _Py_hashtable_t that maps object pointers to their indices in the consts list, providing O(1) lookup. For a file with 100,000 constant 2-tuples: - Before: 10.38s (add_const occupies 83.76% of CPU time) - After: 1.48s
1 parent 8e1469c commit 2a67136

File tree

1 file changed

+96
-43
lines changed

1 file changed

+96
-43
lines changed

Python/flowgraph.c

Lines changed: 96 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
#include "pycore_intrinsics.h"
77
#include "pycore_pymem.h" // _PyMem_IsPtrFreed()
88
#include "pycore_long.h" // _PY_IS_SMALL_INT()
9+
#include "pycore_hashtable.h" // _Py_hashtable_t
910

1011
#include "pycore_opcode_utils.h"
1112
#include "pycore_opcode_metadata.h" // OPCODE_HAS_ARG, etc
@@ -1325,30 +1326,43 @@ get_const_value(int opcode, int oparg, PyObject *co_consts)
13251326

13261327
// Steals a reference to newconst.
13271328
static int
1328-
add_const(PyObject *newconst, PyObject *consts, PyObject *const_cache)
1329+
add_const(PyObject *newconst, PyObject *consts, PyObject *const_cache,
1330+
_Py_hashtable_t *consts_index)
13291331
{
13301332
if (_PyCompile_ConstCacheMergeOne(const_cache, &newconst) < 0) {
13311333
Py_DECREF(newconst);
13321334
return -1;
13331335
}
13341336

1335-
Py_ssize_t index;
1336-
for (index = 0; index < PyList_GET_SIZE(consts); index++) {
1337-
if (PyList_GET_ITEM(consts, index) == newconst) {
1338-
break;
1339-
}
1337+
/* O(1) lookup via pointer-keyed hashtable (replaces linear search). */
1338+
_Py_hashtable_entry_t *entry = _Py_hashtable_get_entry(consts_index, (void *)newconst);
1339+
if (entry != NULL) {
1340+
/* Already exists */
1341+
Py_DECREF(newconst);
1342+
return (int)(uintptr_t)entry->value;
13401343
}
1341-
if (index == PyList_GET_SIZE(consts)) {
1342-
if ((size_t)index >= (size_t)INT_MAX - 1) {
1343-
PyErr_SetString(PyExc_OverflowError, "too many constants");
1344-
Py_DECREF(newconst);
1345-
return -1;
1346-
}
1347-
if (PyList_Append(consts, newconst)) {
1348-
Py_DECREF(newconst);
1349-
return -1;
1350-
}
1344+
1345+
/* Not found – append to consts list */
1346+
Py_ssize_t index = PyList_GET_SIZE(consts);
1347+
if ((size_t)index >= (size_t)INT_MAX - 1) {
1348+
PyErr_SetString(PyExc_OverflowError, "too many constants");
1349+
Py_DECREF(newconst);
1350+
return -1;
1351+
}
1352+
if (PyList_Append(consts, newconst)) {
1353+
Py_DECREF(newconst);
1354+
return -1;
13511355
}
1356+
1357+
/* Update index (must be after successful append) */
1358+
if (_Py_hashtable_set(consts_index, (void *)newconst, (void *)(uintptr_t)index) < 0) {
1359+
/* OOM – rollback append for consistency */
1360+
PyList_SetSlice(consts, index, index + 1, NULL);
1361+
Py_DECREF(newconst);
1362+
PyErr_NoMemory();
1363+
return -1;
1364+
}
1365+
13521366
Py_DECREF(newconst);
13531367
return (int)index;
13541368
}
@@ -1424,7 +1438,8 @@ maybe_instr_make_load_smallint(cfg_instr *instr, PyObject *newconst,
14241438
/* Steals reference to "newconst" */
14251439
static int
14261440
instr_make_load_const(cfg_instr *instr, PyObject *newconst,
1427-
PyObject *consts, PyObject *const_cache)
1441+
PyObject *consts, PyObject *const_cache,
1442+
_Py_hashtable_t *consts_index)
14281443
{
14291444
int res = maybe_instr_make_load_smallint(instr, newconst, consts, const_cache);
14301445
if (res < 0) {
@@ -1434,7 +1449,7 @@ instr_make_load_const(cfg_instr *instr, PyObject *newconst,
14341449
if (res > 0) {
14351450
return SUCCESS;
14361451
}
1437-
int oparg = add_const(newconst, consts, const_cache);
1452+
int oparg = add_const(newconst, consts, const_cache, consts_index);
14381453
RETURN_IF_ERROR(oparg);
14391454
INSTR_SET_OP1(instr, LOAD_CONST, oparg);
14401455
return SUCCESS;
@@ -1447,7 +1462,8 @@ instr_make_load_const(cfg_instr *instr, PyObject *newconst,
14471462
Called with codestr pointing to the first LOAD_CONST.
14481463
*/
14491464
static int
1450-
fold_tuple_of_constants(basicblock *bb, int i, PyObject *consts, PyObject *const_cache)
1465+
fold_tuple_of_constants(basicblock *bb, int i, PyObject *consts,
1466+
PyObject *const_cache, _Py_hashtable_t *consts_index)
14511467
{
14521468
/* Pre-conditions */
14531469
assert(PyDict_CheckExact(const_cache));
@@ -1484,7 +1500,7 @@ fold_tuple_of_constants(basicblock *bb, int i, PyObject *consts, PyObject *const
14841500
}
14851501

14861502
nop_out(const_instrs, seq_size);
1487-
return instr_make_load_const(instr, const_tuple, consts, const_cache);
1503+
return instr_make_load_const(instr, const_tuple, consts, const_cache, consts_index);
14881504
}
14891505

14901506
/* Replace:
@@ -1502,7 +1518,8 @@ fold_tuple_of_constants(basicblock *bb, int i, PyObject *consts, PyObject *const
15021518
*/
15031519
static int
15041520
fold_constant_intrinsic_list_to_tuple(basicblock *bb, int i,
1505-
PyObject *consts, PyObject *const_cache)
1521+
PyObject *consts, PyObject *const_cache,
1522+
_Py_hashtable_t *consts_index)
15061523
{
15071524
assert(PyDict_CheckExact(const_cache));
15081525
assert(PyList_CheckExact(consts));
@@ -1554,7 +1571,7 @@ fold_constant_intrinsic_list_to_tuple(basicblock *bb, int i,
15541571
nop_out(&instr, 1);
15551572
}
15561573
assert(consts_found == 0);
1557-
return instr_make_load_const(intrinsic, newconst, consts, const_cache);
1574+
return instr_make_load_const(intrinsic, newconst, consts, const_cache, consts_index);
15581575
}
15591576

15601577
if (expect_append) {
@@ -1590,7 +1607,8 @@ Optimize lists and sets for:
15901607
*/
15911608
static int
15921609
optimize_lists_and_sets(basicblock *bb, int i, int nextop,
1593-
PyObject *consts, PyObject *const_cache)
1610+
PyObject *consts, PyObject *const_cache,
1611+
_Py_hashtable_t *consts_index)
15941612
{
15951613
assert(PyDict_CheckExact(const_cache));
15961614
assert(PyList_CheckExact(consts));
@@ -1640,7 +1658,7 @@ optimize_lists_and_sets(basicblock *bb, int i, int nextop,
16401658
Py_SETREF(const_result, frozenset);
16411659
}
16421660

1643-
int index = add_const(const_result, consts, const_cache);
1661+
int index = add_const(const_result, consts, const_cache, consts_index);
16441662
RETURN_IF_ERROR(index);
16451663
nop_out(const_instrs, seq_size);
16461664

@@ -1837,7 +1855,8 @@ eval_const_binop(PyObject *left, int op, PyObject *right)
18371855
}
18381856

18391857
static int
1840-
fold_const_binop(basicblock *bb, int i, PyObject *consts, PyObject *const_cache)
1858+
fold_const_binop(basicblock *bb, int i, PyObject *consts,
1859+
PyObject *const_cache, _Py_hashtable_t *consts_index)
18411860
{
18421861
#define BINOP_OPERAND_COUNT 2
18431862
assert(PyDict_CheckExact(const_cache));
@@ -1879,7 +1898,7 @@ fold_const_binop(basicblock *bb, int i, PyObject *consts, PyObject *const_cache)
18791898
}
18801899

18811900
nop_out(operands_instrs, BINOP_OPERAND_COUNT);
1882-
return instr_make_load_const(binop, newconst, consts, const_cache);
1901+
return instr_make_load_const(binop, newconst, consts, const_cache, consts_index);
18831902
}
18841903

18851904
static PyObject *
@@ -1925,7 +1944,8 @@ eval_const_unaryop(PyObject *operand, int opcode, int oparg)
19251944
}
19261945

19271946
static int
1928-
fold_const_unaryop(basicblock *bb, int i, PyObject *consts, PyObject *const_cache)
1947+
fold_const_unaryop(basicblock *bb, int i, PyObject *consts,
1948+
PyObject *const_cache, _Py_hashtable_t *consts_index)
19291949
{
19301950
#define UNARYOP_OPERAND_COUNT 1
19311951
assert(PyDict_CheckExact(const_cache));
@@ -1962,7 +1982,7 @@ fold_const_unaryop(basicblock *bb, int i, PyObject *consts, PyObject *const_cach
19621982
assert(PyBool_Check(newconst));
19631983
}
19641984
nop_out(&operand_instr, UNARYOP_OPERAND_COUNT);
1965-
return instr_make_load_const(unaryop, newconst, consts, const_cache);
1985+
return instr_make_load_const(unaryop, newconst, consts, const_cache, consts_index);
19661986
}
19671987

19681988
#define VISITED (-1)
@@ -2157,7 +2177,8 @@ apply_static_swaps(basicblock *block, int i)
21572177
}
21582178

21592179
static int
2160-
basicblock_optimize_load_const(PyObject *const_cache, basicblock *bb, PyObject *consts)
2180+
basicblock_optimize_load_const(PyObject *const_cache, basicblock *bb,
2181+
PyObject *consts, _Py_hashtable_t *consts_index)
21612182
{
21622183
assert(PyDict_CheckExact(const_cache));
21632184
assert(PyList_CheckExact(consts));
@@ -2272,7 +2293,7 @@ basicblock_optimize_load_const(PyObject *const_cache, basicblock *bb, PyObject *
22722293
return ERROR;
22732294
}
22742295
cnt = PyBool_FromLong(is_true);
2275-
int index = add_const(cnt, consts, const_cache);
2296+
int index = add_const(cnt, consts, const_cache, consts_index);
22762297
if (index < 0) {
22772298
return ERROR;
22782299
}
@@ -2286,15 +2307,17 @@ basicblock_optimize_load_const(PyObject *const_cache, basicblock *bb, PyObject *
22862307
}
22872308

22882309
static int
2289-
optimize_load_const(PyObject *const_cache, cfg_builder *g, PyObject *consts) {
2310+
optimize_load_const(PyObject *const_cache, cfg_builder *g, PyObject *consts,
2311+
_Py_hashtable_t *consts_index) {
22902312
for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
2291-
RETURN_IF_ERROR(basicblock_optimize_load_const(const_cache, b, consts));
2313+
RETURN_IF_ERROR(basicblock_optimize_load_const(const_cache, b, consts, consts_index));
22922314
}
22932315
return SUCCESS;
22942316
}
22952317

22962318
static int
2297-
optimize_basic_block(PyObject *const_cache, basicblock *bb, PyObject *consts)
2319+
optimize_basic_block(PyObject *const_cache, basicblock *bb, PyObject *consts,
2320+
_Py_hashtable_t *consts_index)
22982321
{
22992322
assert(PyDict_CheckExact(const_cache));
23002323
assert(PyList_CheckExact(consts));
@@ -2334,11 +2357,11 @@ optimize_basic_block(PyObject *const_cache, basicblock *bb, PyObject *consts)
23342357
continue;
23352358
}
23362359
}
2337-
RETURN_IF_ERROR(fold_tuple_of_constants(bb, i, consts, const_cache));
2360+
RETURN_IF_ERROR(fold_tuple_of_constants(bb, i, consts, const_cache, consts_index));
23382361
break;
23392362
case BUILD_LIST:
23402363
case BUILD_SET:
2341-
RETURN_IF_ERROR(optimize_lists_and_sets(bb, i, nextop, consts, const_cache));
2364+
RETURN_IF_ERROR(optimize_lists_and_sets(bb, i, nextop, consts, const_cache, consts_index));
23422365
break;
23432366
case POP_JUMP_IF_NOT_NONE:
23442367
case POP_JUMP_IF_NONE:
@@ -2473,23 +2496,23 @@ optimize_basic_block(PyObject *const_cache, basicblock *bb, PyObject *consts)
24732496
_Py_FALLTHROUGH;
24742497
case UNARY_INVERT:
24752498
case UNARY_NEGATIVE:
2476-
RETURN_IF_ERROR(fold_const_unaryop(bb, i, consts, const_cache));
2499+
RETURN_IF_ERROR(fold_const_unaryop(bb, i, consts, const_cache, consts_index));
24772500
break;
24782501
case CALL_INTRINSIC_1:
24792502
if (oparg == INTRINSIC_LIST_TO_TUPLE) {
24802503
if (nextop == GET_ITER) {
24812504
INSTR_SET_OP0(inst, NOP);
24822505
}
24832506
else {
2484-
RETURN_IF_ERROR(fold_constant_intrinsic_list_to_tuple(bb, i, consts, const_cache));
2507+
RETURN_IF_ERROR(fold_constant_intrinsic_list_to_tuple(bb, i, consts, const_cache, consts_index));
24852508
}
24862509
}
24872510
else if (oparg == INTRINSIC_UNARY_POSITIVE) {
2488-
RETURN_IF_ERROR(fold_const_unaryop(bb, i, consts, const_cache));
2511+
RETURN_IF_ERROR(fold_const_unaryop(bb, i, consts, const_cache, consts_index));
24892512
}
24902513
break;
24912514
case BINARY_OP:
2492-
RETURN_IF_ERROR(fold_const_binop(bb, i, consts, const_cache));
2515+
RETURN_IF_ERROR(fold_const_binop(bb, i, consts, const_cache, consts_index));
24932516
break;
24942517
}
24952518
}
@@ -2534,16 +2557,17 @@ remove_redundant_nops_and_jumps(cfg_builder *g)
25342557
NOPs. Later those NOPs are removed.
25352558
*/
25362559
static int
2537-
optimize_cfg(cfg_builder *g, PyObject *consts, PyObject *const_cache, int firstlineno)
2560+
optimize_cfg(cfg_builder *g, PyObject *consts, PyObject *const_cache,
2561+
_Py_hashtable_t *consts_index, int firstlineno)
25382562
{
25392563
assert(PyDict_CheckExact(const_cache));
25402564
RETURN_IF_ERROR(check_cfg(g));
25412565
RETURN_IF_ERROR(inline_small_or_no_lineno_blocks(g->g_entryblock));
25422566
RETURN_IF_ERROR(remove_unreachable(g->g_entryblock));
25432567
RETURN_IF_ERROR(resolve_line_numbers(g, firstlineno));
2544-
RETURN_IF_ERROR(optimize_load_const(const_cache, g, consts));
2568+
RETURN_IF_ERROR(optimize_load_const(const_cache, g, consts, consts_index));
25452569
for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
2546-
RETURN_IF_ERROR(optimize_basic_block(const_cache, b, consts));
2570+
RETURN_IF_ERROR(optimize_basic_block(const_cache, b, consts, consts_index));
25472571
}
25482572
RETURN_IF_ERROR(remove_redundant_nops_and_pairs(g->g_entryblock));
25492573
RETURN_IF_ERROR(remove_unreachable(g->g_entryblock));
@@ -3663,7 +3687,36 @@ _PyCfg_OptimizeCodeUnit(cfg_builder *g, PyObject *consts, PyObject *const_cache,
36633687
RETURN_IF_ERROR(label_exception_targets(g->g_entryblock));
36643688

36653689
/** Optimization **/
3666-
RETURN_IF_ERROR(optimize_cfg(g, consts, const_cache, firstlineno));
3690+
3691+
/* Auxiliary pointer→index hashtable for O(1) lookup in add_const. */
3692+
_Py_hashtable_t *consts_index = _Py_hashtable_new(
3693+
_Py_hashtable_hash_ptr, _Py_hashtable_compare_direct);
3694+
if (consts_index == NULL) {
3695+
PyErr_NoMemory();
3696+
return ERROR;
3697+
}
3698+
3699+
/* Seed the index with pre-existing constants. */
3700+
for (Py_ssize_t i = 0; i < PyList_GET_SIZE(consts); i++) {
3701+
PyObject *item = PyList_GET_ITEM(consts, i);
3702+
if (_Py_hashtable_get_entry(consts_index, (void *)item) != NULL) {
3703+
continue; /* duplicate pointer; keep first occurrence */
3704+
}
3705+
if (_Py_hashtable_set(consts_index, (void *)item,
3706+
(void *)(uintptr_t)i) < 0) {
3707+
_Py_hashtable_destroy(consts_index);
3708+
PyErr_NoMemory();
3709+
return ERROR;
3710+
}
3711+
}
3712+
3713+
int ret = optimize_cfg(g, consts, const_cache, consts_index, firstlineno);
3714+
3715+
/* consts_index is invalid after this (consts list may be modified). */
3716+
_Py_hashtable_destroy(consts_index);
3717+
3718+
RETURN_IF_ERROR(ret);
3719+
36673720
RETURN_IF_ERROR(remove_unused_consts(g->g_entryblock, consts));
36683721
RETURN_IF_ERROR(
36693722
add_checks_for_loads_of_uninitialized_variables(

0 commit comments

Comments
 (0)