From 672efc72997528c9b54eebcf74953c715e6d44e7 Mon Sep 17 00:00:00 2001 From: Changqing Jing Date: Thu, 16 Apr 2026 11:07:39 +0800 Subject: [PATCH 1/5] perf: cache repeated tree walks to avoid O(N^2) in optimizeTerminatingTails --- src/passes/CodeFolding.cpp | 42 +++++++++++++++++++++++++++++++++----- 1 file changed, 37 insertions(+), 5 deletions(-) diff --git a/src/passes/CodeFolding.cpp b/src/passes/CodeFolding.cpp index 1ccd0737f61..608fa7e2180 100644 --- a/src/passes/CodeFolding.cpp +++ b/src/passes/CodeFolding.cpp @@ -399,6 +399,41 @@ struct CodeFolding // inside that item bool canMove(const std::vector& items, Expression* outOf) { auto allTargets = BranchUtils::getBranchTargets(outOf); + bool hasTry = false; + bool hasTryTable = false; + if (getModule()->features.hasExceptionHandling()) { + hasTry = FindAll(outOf).has(); + hasTryTable = FindAll(outOf).has(); + } + return canMoveImpl(items, allTargets, hasTry, hasTryTable); + } + + // Cached data for the function body, computed on demand to avoid repeated + // O(N) tree walks in optimizeTerminatingTails. + BranchUtils::NameSet bodyBranchTargets; + bool bodyHasTry = false; + bool bodyHasTryTable = false; + bool bodyCachePopulated = false; + + // Like canMove, but uses precomputed branch targets and Try/TryTable + // presence. This avoids repeated O(N) tree walks when outOf is the + // function body and canMove is called multiple times. + bool canMoveWithCachedBodyInfo(const std::vector& items) { + if (!bodyCachePopulated) { + bodyBranchTargets = BranchUtils::getBranchTargets(getFunction()->body); + if (getModule()->features.hasExceptionHandling()) { + bodyHasTry = FindAll(getFunction()->body).has(); + bodyHasTryTable = FindAll(getFunction()->body).has(); + } + bodyCachePopulated = true; + } + return canMoveImpl(items, bodyBranchTargets, bodyHasTry, bodyHasTryTable); + } + + bool canMoveImpl(const std::vector& items, + const BranchUtils::NameSet& allTargets, + bool hasTry, + bool hasTryTable) { for (auto* item : items) { auto exiting = BranchUtils::getExitingBranches(item); std::vector intersection; @@ -429,9 +464,7 @@ struct CodeFolding // conservative approximation because there can be cases that // 'try'/'try_table' is within the expression that may throw so it is // safe to take the expression out. - // TODO: optimize this check to avoid two FindAlls. - if (effects.throws() && - (FindAll(outOf).has() || FindAll(outOf).has())) { + if (effects.throws() && (hasTry || hasTryTable)) { return false; } } @@ -698,8 +731,7 @@ struct CodeFolding cost += WORTH_ADDING_BLOCK_TO_REMOVE_THIS_MUCH; // if we cannot merge to the end, then we definitely need 2 blocks, // and a branch - // TODO: efficiency, entire body - if (!canMove(items, getFunction()->body)) { + if (!canMoveWithCachedBodyInfo(items)) { cost += 1 + WORTH_ADDING_BLOCK_TO_REMOVE_THIS_MUCH; // TODO: to do this, we need to maintain a map of element=>parent, // so that we can insert the new blocks in the right place From 3bb2c606dcae6e6ffee3a563abb43d9be2d71b1a Mon Sep 17 00:00:00 2001 From: Changqing Jing Date: Thu, 7 May 2026 10:38:33 +0800 Subject: [PATCH 2/5] Fix --- src/passes/CodeFolding.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/passes/CodeFolding.cpp b/src/passes/CodeFolding.cpp index 608fa7e2180..4068b504220 100644 --- a/src/passes/CodeFolding.cpp +++ b/src/passes/CodeFolding.cpp @@ -301,6 +301,7 @@ struct CodeFolding unoptimizables.clear(); modifieds.clear(); exitingBranchCache.clear(); + bodyCachePopulated = false; if (needEHFixups) { EHUtils::handleBlockNestedPops(func, *getModule()); } From c2470c3d5efb95a080f3754b6b3a26225e53b7a8 Mon Sep 17 00:00:00 2001 From: Changqing Jing Date: Thu, 7 May 2026 06:40:44 +0000 Subject: [PATCH 3/5] Fix --- src/passes/CodeFolding.cpp | 84 +++++++++++++++++++------------------- 1 file changed, 43 insertions(+), 41 deletions(-) diff --git a/src/passes/CodeFolding.cpp b/src/passes/CodeFolding.cpp index 4068b504220..194ca0dd28b 100644 --- a/src/passes/CodeFolding.cpp +++ b/src/passes/CodeFolding.cpp @@ -301,7 +301,6 @@ struct CodeFolding unoptimizables.clear(); modifieds.clear(); exitingBranchCache.clear(); - bodyCachePopulated = false; if (needEHFixups) { EHUtils::handleBlockNestedPops(func, *getModule()); } @@ -400,41 +399,6 @@ struct CodeFolding // inside that item bool canMove(const std::vector& items, Expression* outOf) { auto allTargets = BranchUtils::getBranchTargets(outOf); - bool hasTry = false; - bool hasTryTable = false; - if (getModule()->features.hasExceptionHandling()) { - hasTry = FindAll(outOf).has(); - hasTryTable = FindAll(outOf).has(); - } - return canMoveImpl(items, allTargets, hasTry, hasTryTable); - } - - // Cached data for the function body, computed on demand to avoid repeated - // O(N) tree walks in optimizeTerminatingTails. - BranchUtils::NameSet bodyBranchTargets; - bool bodyHasTry = false; - bool bodyHasTryTable = false; - bool bodyCachePopulated = false; - - // Like canMove, but uses precomputed branch targets and Try/TryTable - // presence. This avoids repeated O(N) tree walks when outOf is the - // function body and canMove is called multiple times. - bool canMoveWithCachedBodyInfo(const std::vector& items) { - if (!bodyCachePopulated) { - bodyBranchTargets = BranchUtils::getBranchTargets(getFunction()->body); - if (getModule()->features.hasExceptionHandling()) { - bodyHasTry = FindAll(getFunction()->body).has(); - bodyHasTryTable = FindAll(getFunction()->body).has(); - } - bodyCachePopulated = true; - } - return canMoveImpl(items, bodyBranchTargets, bodyHasTry, bodyHasTryTable); - } - - bool canMoveImpl(const std::vector& items, - const BranchUtils::NameSet& allTargets, - bool hasTry, - bool hasTryTable) { for (auto* item : items) { auto exiting = BranchUtils::getExitingBranches(item); std::vector intersection; @@ -465,7 +429,8 @@ struct CodeFolding // conservative approximation because there can be cases that // 'try'/'try_table' is within the expression that may throw so it is // safe to take the expression out. - if (effects.throws() && (hasTry || hasTryTable)) { + if (effects.throws() && + (FindAll(outOf).has() || FindAll(outOf).has())) { return false; } } @@ -667,10 +632,19 @@ struct CodeFolding // equal in the last num items, so we can merge there, but we look for // deeper merges first. // returns whether we optimized something. - bool optimizeTerminatingTails(std::vector& tails, Index num = 0) { + bool optimizeTerminatingTails(std::vector& tails, + Index num = 0, + BranchUtils::NameSet* bodyTargets = nullptr) { if (tails.size() < 2) { return false; } + // Compute body branch targets once and share across recursive calls to + // avoid repeated O(N) tree walks. + BranchUtils::NameSet localBodyTargets; + if (!bodyTargets) { + localBodyTargets = BranchUtils::getBranchTargets(getFunction()->body); + bodyTargets = &localBodyTargets; + } // remove things that are untoward and cannot be optimized tails.erase( std::remove_if(tails.begin(), @@ -731,8 +705,36 @@ struct CodeFolding // can be removed, though cost += WORTH_ADDING_BLOCK_TO_REMOVE_THIS_MUCH; // if we cannot merge to the end, then we definitely need 2 blocks, - // and a branch - if (!canMoveWithCachedBodyInfo(items)) { + // and a branch. Use the pre-computed bodyTargets to avoid repeated + // O(N) getBranchTargets calls. + auto* body = getFunction()->body; + bool canMoveItems = [&]() { + for (auto* item : items) { + auto exiting = BranchUtils::getExitingBranches(item); + std::vector intersection; + std::set_intersection(bodyTargets->begin(), + bodyTargets->end(), + exiting.begin(), + exiting.end(), + std::back_inserter(intersection)); + if (intersection.size() > 0) { + return false; + } + if (getModule()->features.hasExceptionHandling()) { + EffectAnalyzer effects(getPassOptions(), *getModule(), item); + if (effects.danglingPop) { + return false; + } + if (effects.throws() && + (FindAll(body).has() || + FindAll(body).has())) { + return false; + } + } + } + return true; + }(); + if (!canMoveItems) { cost += 1 + WORTH_ADDING_BLOCK_TO_REMOVE_THIS_MUCH; // TODO: to do this, we need to maintain a map of element=>parent, // so that we can insert the new blocks in the right place @@ -828,7 +830,7 @@ struct CodeFolding // as the changes may influence us. we leave further opts to further // passes (as this is rare in practice, it's generally not a perf // issue, but TODO optimize) - if (optimizeTerminatingTails(explore, num + 1)) { + if (optimizeTerminatingTails(explore, num + 1, bodyTargets)) { return true; } } From 52d145c057cc4edf51e2361547e22f049b9dba39 Mon Sep 17 00:00:00 2001 From: Changqing Jing Date: Thu, 7 May 2026 08:45:56 +0000 Subject: [PATCH 4/5] WIP --- src/passes/CodeFolding.cpp | 37 +++++++++---------------------------- 1 file changed, 9 insertions(+), 28 deletions(-) diff --git a/src/passes/CodeFolding.cpp b/src/passes/CodeFolding.cpp index 194ca0dd28b..9d0aa5e27ea 100644 --- a/src/passes/CodeFolding.cpp +++ b/src/passes/CodeFolding.cpp @@ -398,7 +398,14 @@ struct CodeFolding // if one of the items has a branch to something inside outOf that is not // inside that item bool canMove(const std::vector& items, Expression* outOf) { - auto allTargets = BranchUtils::getBranchTargets(outOf); + return canMove(items, outOf, BranchUtils::getBranchTargets(outOf)); + } + + // Overload that accepts pre-computed branch targets to avoid redundant + // O(N) getBranchTargets calls. + bool canMove(const std::vector& items, + Expression* outOf, + const BranchUtils::NameSet& allTargets) { for (auto* item : items) { auto exiting = BranchUtils::getExitingBranches(item); std::vector intersection; @@ -707,33 +714,7 @@ struct CodeFolding // if we cannot merge to the end, then we definitely need 2 blocks, // and a branch. Use the pre-computed bodyTargets to avoid repeated // O(N) getBranchTargets calls. - auto* body = getFunction()->body; - bool canMoveItems = [&]() { - for (auto* item : items) { - auto exiting = BranchUtils::getExitingBranches(item); - std::vector intersection; - std::set_intersection(bodyTargets->begin(), - bodyTargets->end(), - exiting.begin(), - exiting.end(), - std::back_inserter(intersection)); - if (intersection.size() > 0) { - return false; - } - if (getModule()->features.hasExceptionHandling()) { - EffectAnalyzer effects(getPassOptions(), *getModule(), item); - if (effects.danglingPop) { - return false; - } - if (effects.throws() && - (FindAll(body).has() || - FindAll(body).has())) { - return false; - } - } - } - return true; - }(); + bool canMoveItems = canMove(items, getFunction()->body, *bodyTargets); if (!canMoveItems) { cost += 1 + WORTH_ADDING_BLOCK_TO_REMOVE_THIS_MUCH; // TODO: to do this, we need to maintain a map of element=>parent, From 04e281e0f429f4caa8ca10fea169bb2e3215250d Mon Sep 17 00:00:00 2001 From: Changqing Jing Date: Thu, 7 May 2026 09:09:38 +0000 Subject: [PATCH 5/5] Fix --- src/passes/CodeFolding.cpp | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/passes/CodeFolding.cpp b/src/passes/CodeFolding.cpp index 9d0aa5e27ea..551173827f1 100644 --- a/src/passes/CodeFolding.cpp +++ b/src/passes/CodeFolding.cpp @@ -645,13 +645,7 @@ struct CodeFolding if (tails.size() < 2) { return false; } - // Compute body branch targets once and share across recursive calls to - // avoid repeated O(N) tree walks. BranchUtils::NameSet localBodyTargets; - if (!bodyTargets) { - localBodyTargets = BranchUtils::getBranchTargets(getFunction()->body); - bodyTargets = &localBodyTargets; - } // remove things that are untoward and cannot be optimized tails.erase( std::remove_if(tails.begin(), @@ -811,6 +805,13 @@ struct CodeFolding // as the changes may influence us. we leave further opts to further // passes (as this is rare in practice, it's generally not a perf // issue, but TODO optimize) + // Compute body branch targets once and share across recursive + // calls to avoid repeated O(N) tree walks. + if (!bodyTargets) { + localBodyTargets = + BranchUtils::getBranchTargets(getFunction()->body); + bodyTargets = &localBodyTargets; + } if (optimizeTerminatingTails(explore, num + 1, bodyTargets)) { return true; }