From 464405fb9029cebbe0ad1bec9166a5276cf21af4 Mon Sep 17 00:00:00 2001 From: "Aman Khalid (from Dev Box)" Date: Mon, 10 Mar 2025 13:20:53 -0400 Subject: [PATCH] Only run switch peeling once --- src/coreclr/jit/compiler.h | 2 +- src/coreclr/jit/fginline.cpp | 17 +- src/coreclr/jit/fgopt.cpp | 214 +++++++++++--------------- src/coreclr/jit/switchrecognition.cpp | 32 ++-- 4 files changed, 122 insertions(+), 143 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 64157e335097c7..be3e368466f1e7 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -6184,7 +6184,7 @@ class Compiler bool fgOptimizeSwitchBranches(BasicBlock* block); - bool fgOptimizeSwitchJumps(); + void fgPeelSwitch(BasicBlock* block); #ifdef DEBUG void fgPrintEdgeWeights(); #endif diff --git a/src/coreclr/jit/fginline.cpp b/src/coreclr/jit/fginline.cpp index 5b9052b28c6699..3d1e34a5aa1a19 100644 --- a/src/coreclr/jit/fginline.cpp +++ b/src/coreclr/jit/fginline.cpp @@ -1650,13 +1650,18 @@ void Compiler::fgInsertInlineeBlocks(InlineInfo* pInlineInfo) info.compNeedsConsecutiveRegisters |= InlineeCompiler->info.compNeedsConsecutiveRegisters; #endif - // If the inlinee compiler encounters switch tables, disable hot/cold splitting in the root compiler. - // TODO-CQ: Implement hot/cold splitting of methods with switch tables. - if (InlineeCompiler->fgHasSwitch && opts.compProcedureSplitting) + if (InlineeCompiler->fgHasSwitch) { - opts.compProcedureSplitting = false; - JITDUMP("Turning off procedure splitting for this method, as inlinee compiler encountered switch tables; " - "implementation limitation.\n"); + fgHasSwitch = true; + + // If the inlinee compiler encounters switch tables, disable hot/cold splitting in the root compiler. + // TODO-CQ: Implement hot/cold splitting of methods with switch tables. + if (opts.compProcedureSplitting) + { + opts.compProcedureSplitting = false; + JITDUMP("Turning off procedure splitting for this method, as inlinee compiler encountered switch tables; " + "implementation limitation.\n"); + } } #ifdef FEATURE_SIMD diff --git a/src/coreclr/jit/fgopt.cpp b/src/coreclr/jit/fgopt.cpp index 170c1aecdb870e..4002b71210907c 100644 --- a/src/coreclr/jit/fgopt.cpp +++ b/src/coreclr/jit/fgopt.cpp @@ -2825,140 +2825,113 @@ bool Compiler::fgOptimizeBranch(BasicBlock* bJump) } //----------------------------------------------------------------------------- -// fgOptimizeSwitchJump: see if a switch has a dominant case, and modify to -// check for that case up front (aka switch peeling). +// fgPeelSwitch: Modify a switch to check for its dominant case up front. // -// Returns: -// True if the switch now has an upstream check for the dominant case. +// Parameters: +// block - The switch block with a dominant case // -bool Compiler::fgOptimizeSwitchJumps() +void Compiler::fgPeelSwitch(BasicBlock* block) { - if (!fgHasSwitch) - { - return false; - } - - bool modified = false; - - for (BasicBlock* const block : Blocks()) - { - // Lowering expands switches, so calling this method on lowered IR - // does not make sense. - // - assert(!block->IsLIR()); - - if (!block->KindIs(BBJ_SWITCH)) - { - continue; - } - - if (block->isRunRarely()) - { - continue; - } - - if (!block->GetSwitchTargets()->bbsHasDominantCase) - { - continue; - } + assert(block->KindIs(BBJ_SWITCH)); + assert(block->GetSwitchTargets()->bbsHasDominantCase); + assert(!block->isRunRarely()); - // We currently will only see dominant cases with PGO. - // - assert(block->hasProfileWeight()); + // Lowering expands switches, so calling this method on lowered IR + // does not make sense. + // + assert(!block->IsLIR()); - const unsigned dominantCase = block->GetSwitchTargets()->bbsDominantCase; + // We currently will only see dominant cases with PGO. + // + assert(block->hasProfileWeight()); - JITDUMP(FMT_BB " has switch with dominant case %u, considering peeling\n", block->bbNum, dominantCase); + const unsigned dominantCase = block->GetSwitchTargets()->bbsDominantCase; + JITDUMP(FMT_BB " has switch with dominant case %u, considering peeling\n", block->bbNum, dominantCase); - // The dominant case should not be the default case, as we already peel that one. - // - assert(dominantCase < (block->GetSwitchTargets()->bbsCount - 1)); - BasicBlock* const dominantTarget = block->GetSwitchTargets()->bbsDstTab[dominantCase]->getDestinationBlock(); - Statement* const switchStmt = block->lastStmt(); - GenTree* const switchTree = switchStmt->GetRootNode(); - assert(switchTree->OperIs(GT_SWITCH)); - GenTree* const switchValue = switchTree->AsOp()->gtGetOp1(); - - // Split the switch block just before at the switch. - // - // After this, newBlock is the switch block, and - // block is the upstream block. - // - BasicBlock* newBlock = nullptr; - - if (block->firstStmt() == switchStmt) - { - newBlock = fgSplitBlockAtBeginning(block); - } - else - { - newBlock = fgSplitBlockAfterStatement(block, switchStmt->GetPrevStmt()); - } + // The dominant case should not be the default case, as we already peel that one. + // + assert(dominantCase < (block->GetSwitchTargets()->bbsCount - 1)); + BasicBlock* const dominantTarget = block->GetSwitchTargets()->bbsDstTab[dominantCase]->getDestinationBlock(); + Statement* const switchStmt = block->lastStmt(); + GenTree* const switchTree = switchStmt->GetRootNode(); + assert(switchTree->OperIs(GT_SWITCH)); + GenTree* const switchValue = switchTree->gtGetOp1(); + + // Split the switch block just before at the switch. + // + // After this, newBlock is the switch block, and + // block is the upstream block. + // + BasicBlock* newBlock = nullptr; - // Set up a compare in the upstream block, "stealing" the switch value tree. - // - GenTree* const dominantCaseCompare = gtNewOperNode(GT_EQ, TYP_INT, switchValue, gtNewIconNode(dominantCase)); - GenTree* const jmpTree = gtNewOperNode(GT_JTRUE, TYP_VOID, dominantCaseCompare); - Statement* const jmpStmt = fgNewStmtFromTree(jmpTree, switchStmt->GetDebugInfo()); - fgInsertStmtAtEnd(block, jmpStmt); + if (block->firstStmt() == switchStmt) + { + newBlock = fgSplitBlockAtBeginning(block); + } + else + { + newBlock = fgSplitBlockAfterStatement(block, switchStmt->GetPrevStmt()); + } - // Reattach switch value to the switch. This may introduce a comma - // in the upstream compare tree, if the switch value expression is complex. - // - switchTree->AsOp()->gtOp1 = fgMakeMultiUse(&dominantCaseCompare->AsOp()->gtOp1); + // Set up a compare in the upstream block, "stealing" the switch value tree. + // + GenTree* const dominantCaseCompare = gtNewOperNode(GT_EQ, TYP_INT, switchValue, gtNewIconNode(dominantCase)); + GenTree* const jmpTree = gtNewOperNode(GT_JTRUE, TYP_VOID, dominantCaseCompare); + Statement* const jmpStmt = fgNewStmtFromTree(jmpTree, switchStmt->GetDebugInfo()); + fgInsertStmtAtEnd(block, jmpStmt); - // Update flags - // - switchTree->gtFlags = switchTree->AsOp()->gtOp1->gtFlags & GTF_ALL_EFFECT; - dominantCaseCompare->gtFlags |= dominantCaseCompare->AsOp()->gtOp1->gtFlags & GTF_ALL_EFFECT; - jmpTree->gtFlags |= dominantCaseCompare->gtFlags & GTF_ALL_EFFECT; - dominantCaseCompare->gtFlags |= GTF_RELOP_JMP_USED | GTF_DONT_CSE; + // Reattach switch value to the switch. This may introduce a comma + // in the upstream compare tree, if the switch value expression is complex. + // + switchTree->AsOp()->gtOp1 = fgMakeMultiUse(&dominantCaseCompare->AsOp()->gtOp1); - // Wire up the new control flow. - // - FlowEdge* const blockToTargetEdge = fgAddRefPred(dominantTarget, block); - FlowEdge* const blockToNewBlockEdge = newBlock->bbPreds; - block->SetCond(blockToTargetEdge, blockToNewBlockEdge); + // Update flags + // + switchTree->gtFlags = switchTree->gtGetOp1()->gtFlags & GTF_ALL_EFFECT; + dominantCaseCompare->gtFlags |= dominantCaseCompare->gtGetOp1()->gtFlags & GTF_ALL_EFFECT; + jmpTree->gtFlags |= dominantCaseCompare->gtFlags & GTF_ALL_EFFECT; + dominantCaseCompare->gtFlags |= GTF_RELOP_JMP_USED | GTF_DONT_CSE; - // Update profile data - // - const weight_t fraction = newBlock->GetSwitchTargets()->bbsDominantFraction; - const weight_t blockToTargetWeight = block->bbWeight * fraction; + // Wire up the new control flow. + // + FlowEdge* const blockToTargetEdge = fgAddRefPred(dominantTarget, block); + FlowEdge* const blockToNewBlockEdge = newBlock->bbPreds; + block->SetCond(blockToTargetEdge, blockToNewBlockEdge); - newBlock->decreaseBBProfileWeight(blockToTargetWeight); + // Update profile data + // + const weight_t fraction = newBlock->GetSwitchTargets()->bbsDominantFraction; + const weight_t blockToTargetWeight = block->bbWeight * fraction; - blockToTargetEdge->setLikelihood(fraction); - blockToNewBlockEdge->setLikelihood(max(0.0, 1.0 - fraction)); + newBlock->decreaseBBProfileWeight(blockToTargetWeight); - JITDUMP("fgOptimizeSwitchJumps: Updated flow into " FMT_BB " needs to be propagated. Data %s inconsistent.\n", - newBlock->bbNum, fgPgoConsistent ? "is now" : "was already"); - fgPgoConsistent = false; + blockToTargetEdge->setLikelihood(fraction); + blockToNewBlockEdge->setLikelihood(max(0.0, 1.0 - fraction)); - // For now we leave the switch as is, since there's no way - // to indicate that one of the cases is now unreachable. - // - // But it no longer has a dominant case. - // - newBlock->GetSwitchTargets()->bbsHasDominantCase = false; + JITDUMP("fgPeelSwitch: Updated flow into " FMT_BB " needs to be propagated. Data %s inconsistent.\n", + newBlock->bbNum, fgPgoConsistent ? "is now" : "was already"); + fgPgoConsistent = false; - if (fgNodeThreading == NodeThreading::AllTrees) - { - // The switch tree has been modified. - JITDUMP("Rethreading " FMT_STMT "\n", switchStmt->GetID()); - gtSetStmtInfo(switchStmt); - fgSetStmtSeq(switchStmt); + // For now we leave the switch as is, since there's no way + // to indicate that one of the cases is now unreachable. + // + // But it no longer has a dominant case. + // + newBlock->GetSwitchTargets()->bbsHasDominantCase = false; - // fgNewStmtFromTree() already threaded the tree, but calling fgMakeMultiUse() might have - // added new nodes if a COMMA was introduced. - JITDUMP("Rethreading " FMT_STMT "\n", jmpStmt->GetID()); - gtSetStmtInfo(jmpStmt); - fgSetStmtSeq(jmpStmt); - } + if (fgNodeThreading == NodeThreading::AllTrees) + { + // The switch tree has been modified. + JITDUMP("Rethreading " FMT_STMT "\n", switchStmt->GetID()); + gtSetStmtInfo(switchStmt); + fgSetStmtSeq(switchStmt); - modified = true; + // fgNewStmtFromTree() already threaded the tree, but calling fgMakeMultiUse() might have + // added new nodes if a COMMA was introduced. + JITDUMP("Rethreading " FMT_STMT "\n", jmpStmt->GetID()); + gtSetStmtInfo(jmpStmt); + fgSetStmtSeq(jmpStmt); } - - return modified; } //----------------------------------------------------------------------------- @@ -3317,19 +3290,6 @@ bool Compiler::fgReorderBlocks(bool useProfile) } #endif // FEATURE_EH_WINDOWS_X86 - // - // If we are using profile weights we can change some - // switch jumps into conditional test and jump - // - if (fgIsUsingProfileWeights()) - { - optimizedSwitches = fgOptimizeSwitchJumps(); - if (optimizedSwitches) - { - fgUpdateFlowGraph(); - } - } - if (useProfile) { // Don't run the new layout until we get to the backend, diff --git a/src/coreclr/jit/switchrecognition.cpp b/src/coreclr/jit/switchrecognition.cpp index 28fbe13a53e2cb..739a8eda4ab1ac 100644 --- a/src/coreclr/jit/switchrecognition.cpp +++ b/src/coreclr/jit/switchrecognition.cpp @@ -25,25 +25,39 @@ PhaseStatus Compiler::optRecognizeAndOptimizeSwitchJumps() { bool modified = false; + for (BasicBlock* block = fgFirstBB; block != nullptr; block = block->Next()) + { + if (block->isRunRarely()) + { + continue; + } + // Limit to XARCH, ARM is already doing a great job with such comparisons using // a series of ccmp instruction (see ifConvert phase). #ifdef TARGET_XARCH - for (BasicBlock* block = fgFirstBB; block != nullptr; block = block->Next()) - { // block->KindIs(BBJ_COND) check is for better throughput. - if (block->KindIs(BBJ_COND) && !block->isRunRarely() && optSwitchDetectAndConvert(block)) + if (block->KindIs(BBJ_COND) && optSwitchDetectAndConvert(block)) { JITDUMP("Converted block " FMT_BB " to switch\n", block->bbNum) modified = true; + + // Converted switches won't have dominant cases, so we can skip the switch peeling check. + assert(!block->GetSwitchTargets()->bbsHasDominantCase); } - } + else #endif - // When we have profile data, we can identify a switch's dominant case. - // Try peeling the dominant case by checking for it up front. - if (fgIsUsingProfileWeights()) - { - modified |= fgOptimizeSwitchJumps(); + if (block->KindIs(BBJ_SWITCH) && block->GetSwitchTargets()->bbsHasDominantCase) + { + fgPeelSwitch(block); + modified = true; + + // Switch peeling will convert this block into a check for the dominant case, + // and insert the updated switch block after, which doesn't have a dominant case. + // Skip over the switch block in the loop iteration. + assert(block->Next()->KindIs(BBJ_SWITCH)); + block = block->Next(); + } } return modified ? PhaseStatus::MODIFIED_EVERYTHING : PhaseStatus::MODIFIED_NOTHING;