Skip to content

JIT: Run switch peeling only once #113326

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Mar 11, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/coreclr/jit/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -6184,7 +6184,7 @@ class Compiler

bool fgOptimizeSwitchBranches(BasicBlock* block);

bool fgOptimizeSwitchJumps();
void fgPeelSwitch(BasicBlock* block);
#ifdef DEBUG
void fgPrintEdgeWeights();
#endif
Expand Down
17 changes: 11 additions & 6 deletions src/coreclr/jit/fginline.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1650,13 +1650,18 @@ void Compiler::fgInsertInlineeBlocks(InlineInfo* pInlineInfo)
info.compNeedsConsecutiveRegisters |= InlineeCompiler->info.compNeedsConsecutiveRegisters;
#endif

// If the inlinee compiler encounters switch tables, disable hot/cold splitting in the root compiler.
// TODO-CQ: Implement hot/cold splitting of methods with switch tables.
if (InlineeCompiler->fgHasSwitch && opts.compProcedureSplitting)
if (InlineeCompiler->fgHasSwitch)
{
opts.compProcedureSplitting = false;
JITDUMP("Turning off procedure splitting for this method, as inlinee compiler encountered switch tables; "
"implementation limitation.\n");
fgHasSwitch = true;

// If the inlinee compiler encounters switch tables, disable hot/cold splitting in the root compiler.
// TODO-CQ: Implement hot/cold splitting of methods with switch tables.
if (opts.compProcedureSplitting)
{
opts.compProcedureSplitting = false;
JITDUMP("Turning off procedure splitting for this method, as inlinee compiler encountered switch tables; "
"implementation limitation.\n");
}
}

#ifdef FEATURE_SIMD
Expand Down
214 changes: 87 additions & 127 deletions src/coreclr/jit/fgopt.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2825,140 +2825,113 @@ bool Compiler::fgOptimizeBranch(BasicBlock* bJump)
}

//-----------------------------------------------------------------------------
// fgOptimizeSwitchJump: see if a switch has a dominant case, and modify to
// check for that case up front (aka switch peeling).
// fgPeelSwitch: Modify a switch to check for its dominant case up front.
//
// Returns:
// True if the switch now has an upstream check for the dominant case.
// Parameters:
// block - The switch block with a dominant case
//
bool Compiler::fgOptimizeSwitchJumps()
void Compiler::fgPeelSwitch(BasicBlock* block)
{
if (!fgHasSwitch)
{
return false;
}

bool modified = false;

for (BasicBlock* const block : Blocks())
{
// Lowering expands switches, so calling this method on lowered IR
// does not make sense.
//
assert(!block->IsLIR());

if (!block->KindIs(BBJ_SWITCH))
{
continue;
}

if (block->isRunRarely())
{
continue;
}

if (!block->GetSwitchTargets()->bbsHasDominantCase)
{
continue;
}
assert(block->KindIs(BBJ_SWITCH));
assert(block->GetSwitchTargets()->bbsHasDominantCase);
assert(!block->isRunRarely());

// We currently will only see dominant cases with PGO.
//
assert(block->hasProfileWeight());
// Lowering expands switches, so calling this method on lowered IR
// does not make sense.
//
assert(!block->IsLIR());

const unsigned dominantCase = block->GetSwitchTargets()->bbsDominantCase;
// We currently will only see dominant cases with PGO.
//
assert(block->hasProfileWeight());

JITDUMP(FMT_BB " has switch with dominant case %u, considering peeling\n", block->bbNum, dominantCase);
const unsigned dominantCase = block->GetSwitchTargets()->bbsDominantCase;
JITDUMP(FMT_BB " has switch with dominant case %u, considering peeling\n", block->bbNum, dominantCase);

// The dominant case should not be the default case, as we already peel that one.
//
assert(dominantCase < (block->GetSwitchTargets()->bbsCount - 1));
BasicBlock* const dominantTarget = block->GetSwitchTargets()->bbsDstTab[dominantCase]->getDestinationBlock();
Statement* const switchStmt = block->lastStmt();
GenTree* const switchTree = switchStmt->GetRootNode();
assert(switchTree->OperIs(GT_SWITCH));
GenTree* const switchValue = switchTree->AsOp()->gtGetOp1();

// Split the switch block just before at the switch.
//
// After this, newBlock is the switch block, and
// block is the upstream block.
//
BasicBlock* newBlock = nullptr;

if (block->firstStmt() == switchStmt)
{
newBlock = fgSplitBlockAtBeginning(block);
}
else
{
newBlock = fgSplitBlockAfterStatement(block, switchStmt->GetPrevStmt());
}
// The dominant case should not be the default case, as we already peel that one.
//
assert(dominantCase < (block->GetSwitchTargets()->bbsCount - 1));
BasicBlock* const dominantTarget = block->GetSwitchTargets()->bbsDstTab[dominantCase]->getDestinationBlock();
Statement* const switchStmt = block->lastStmt();
GenTree* const switchTree = switchStmt->GetRootNode();
assert(switchTree->OperIs(GT_SWITCH));
GenTree* const switchValue = switchTree->gtGetOp1();

// Split the switch block just before at the switch.
//
// After this, newBlock is the switch block, and
// block is the upstream block.
//
BasicBlock* newBlock = nullptr;

// Set up a compare in the upstream block, "stealing" the switch value tree.
//
GenTree* const dominantCaseCompare = gtNewOperNode(GT_EQ, TYP_INT, switchValue, gtNewIconNode(dominantCase));
GenTree* const jmpTree = gtNewOperNode(GT_JTRUE, TYP_VOID, dominantCaseCompare);
Statement* const jmpStmt = fgNewStmtFromTree(jmpTree, switchStmt->GetDebugInfo());
fgInsertStmtAtEnd(block, jmpStmt);
if (block->firstStmt() == switchStmt)
{
newBlock = fgSplitBlockAtBeginning(block);
}
else
{
newBlock = fgSplitBlockAfterStatement(block, switchStmt->GetPrevStmt());
}

// Reattach switch value to the switch. This may introduce a comma
// in the upstream compare tree, if the switch value expression is complex.
//
switchTree->AsOp()->gtOp1 = fgMakeMultiUse(&dominantCaseCompare->AsOp()->gtOp1);
// Set up a compare in the upstream block, "stealing" the switch value tree.
//
GenTree* const dominantCaseCompare = gtNewOperNode(GT_EQ, TYP_INT, switchValue, gtNewIconNode(dominantCase));
GenTree* const jmpTree = gtNewOperNode(GT_JTRUE, TYP_VOID, dominantCaseCompare);
Statement* const jmpStmt = fgNewStmtFromTree(jmpTree, switchStmt->GetDebugInfo());
fgInsertStmtAtEnd(block, jmpStmt);

// Update flags
//
switchTree->gtFlags = switchTree->AsOp()->gtOp1->gtFlags & GTF_ALL_EFFECT;
dominantCaseCompare->gtFlags |= dominantCaseCompare->AsOp()->gtOp1->gtFlags & GTF_ALL_EFFECT;
jmpTree->gtFlags |= dominantCaseCompare->gtFlags & GTF_ALL_EFFECT;
dominantCaseCompare->gtFlags |= GTF_RELOP_JMP_USED | GTF_DONT_CSE;
// Reattach switch value to the switch. This may introduce a comma
// in the upstream compare tree, if the switch value expression is complex.
//
switchTree->AsOp()->gtOp1 = fgMakeMultiUse(&dominantCaseCompare->AsOp()->gtOp1);

// Wire up the new control flow.
//
FlowEdge* const blockToTargetEdge = fgAddRefPred(dominantTarget, block);
FlowEdge* const blockToNewBlockEdge = newBlock->bbPreds;
block->SetCond(blockToTargetEdge, blockToNewBlockEdge);
// Update flags
//
switchTree->gtFlags = switchTree->gtGetOp1()->gtFlags & GTF_ALL_EFFECT;
dominantCaseCompare->gtFlags |= dominantCaseCompare->gtGetOp1()->gtFlags & GTF_ALL_EFFECT;
jmpTree->gtFlags |= dominantCaseCompare->gtFlags & GTF_ALL_EFFECT;
dominantCaseCompare->gtFlags |= GTF_RELOP_JMP_USED | GTF_DONT_CSE;

// Update profile data
//
const weight_t fraction = newBlock->GetSwitchTargets()->bbsDominantFraction;
const weight_t blockToTargetWeight = block->bbWeight * fraction;
// Wire up the new control flow.
//
FlowEdge* const blockToTargetEdge = fgAddRefPred(dominantTarget, block);
FlowEdge* const blockToNewBlockEdge = newBlock->bbPreds;
block->SetCond(blockToTargetEdge, blockToNewBlockEdge);

newBlock->decreaseBBProfileWeight(blockToTargetWeight);
// Update profile data
//
const weight_t fraction = newBlock->GetSwitchTargets()->bbsDominantFraction;
const weight_t blockToTargetWeight = block->bbWeight * fraction;

blockToTargetEdge->setLikelihood(fraction);
blockToNewBlockEdge->setLikelihood(max(0.0, 1.0 - fraction));
newBlock->decreaseBBProfileWeight(blockToTargetWeight);

JITDUMP("fgOptimizeSwitchJumps: Updated flow into " FMT_BB " needs to be propagated. Data %s inconsistent.\n",
newBlock->bbNum, fgPgoConsistent ? "is now" : "was already");
fgPgoConsistent = false;
blockToTargetEdge->setLikelihood(fraction);
blockToNewBlockEdge->setLikelihood(max(0.0, 1.0 - fraction));

// For now we leave the switch as is, since there's no way
// to indicate that one of the cases is now unreachable.
//
// But it no longer has a dominant case.
//
newBlock->GetSwitchTargets()->bbsHasDominantCase = false;
JITDUMP("fgPeelSwitch: Updated flow into " FMT_BB " needs to be propagated. Data %s inconsistent.\n",
newBlock->bbNum, fgPgoConsistent ? "is now" : "was already");
fgPgoConsistent = false;

if (fgNodeThreading == NodeThreading::AllTrees)
{
// The switch tree has been modified.
JITDUMP("Rethreading " FMT_STMT "\n", switchStmt->GetID());
gtSetStmtInfo(switchStmt);
fgSetStmtSeq(switchStmt);
// For now we leave the switch as is, since there's no way
// to indicate that one of the cases is now unreachable.
//
// But it no longer has a dominant case.
//
newBlock->GetSwitchTargets()->bbsHasDominantCase = false;

// fgNewStmtFromTree() already threaded the tree, but calling fgMakeMultiUse() might have
// added new nodes if a COMMA was introduced.
JITDUMP("Rethreading " FMT_STMT "\n", jmpStmt->GetID());
gtSetStmtInfo(jmpStmt);
fgSetStmtSeq(jmpStmt);
}
if (fgNodeThreading == NodeThreading::AllTrees)
{
// The switch tree has been modified.
JITDUMP("Rethreading " FMT_STMT "\n", switchStmt->GetID());
gtSetStmtInfo(switchStmt);
fgSetStmtSeq(switchStmt);

modified = true;
// fgNewStmtFromTree() already threaded the tree, but calling fgMakeMultiUse() might have
// added new nodes if a COMMA was introduced.
JITDUMP("Rethreading " FMT_STMT "\n", jmpStmt->GetID());
gtSetStmtInfo(jmpStmt);
fgSetStmtSeq(jmpStmt);
}

return modified;
}

//-----------------------------------------------------------------------------
Expand Down Expand Up @@ -3317,19 +3290,6 @@ bool Compiler::fgReorderBlocks(bool useProfile)
}
#endif // FEATURE_EH_WINDOWS_X86

//
// If we are using profile weights we can change some
// switch jumps into conditional test and jump
//
if (fgIsUsingProfileWeights())
{
optimizedSwitches = fgOptimizeSwitchJumps();
if (optimizedSwitches)
{
fgUpdateFlowGraph();
}
}

if (useProfile)
{
// Don't run the new layout until we get to the backend,
Expand Down
32 changes: 23 additions & 9 deletions src/coreclr/jit/switchrecognition.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,25 +25,39 @@ PhaseStatus Compiler::optRecognizeAndOptimizeSwitchJumps()
{
bool modified = false;

for (BasicBlock* block = fgFirstBB; block != nullptr; block = block->Next())
{
if (block->isRunRarely())
{
continue;
}

// Limit to XARCH, ARM is already doing a great job with such comparisons using
// a series of ccmp instruction (see ifConvert phase).
#ifdef TARGET_XARCH
for (BasicBlock* block = fgFirstBB; block != nullptr; block = block->Next())
{
// block->KindIs(BBJ_COND) check is for better throughput.
if (block->KindIs(BBJ_COND) && !block->isRunRarely() && optSwitchDetectAndConvert(block))
if (block->KindIs(BBJ_COND) && optSwitchDetectAndConvert(block))
{
JITDUMP("Converted block " FMT_BB " to switch\n", block->bbNum)
modified = true;

// Converted switches won't have dominant cases, so we can skip the switch peeling check.
assert(!block->GetSwitchTargets()->bbsHasDominantCase);
}
}
else
#endif

// When we have profile data, we can identify a switch's dominant case.
// Try peeling the dominant case by checking for it up front.
if (fgIsUsingProfileWeights())
{
modified |= fgOptimizeSwitchJumps();
if (block->KindIs(BBJ_SWITCH) && block->GetSwitchTargets()->bbsHasDominantCase)
{
fgPeelSwitch(block);
modified = true;

// Switch peeling will convert this block into a check for the dominant case,
// and insert the updated switch block after, which doesn't have a dominant case.
// Skip over the switch block in the loop iteration.
assert(block->Next()->KindIs(BBJ_SWITCH));
block = block->Next();
}
}

return modified ? PhaseStatus::MODIFIED_EVERYTHING : PhaseStatus::MODIFIED_NOTHING;
Expand Down
Loading