Skip to content

Commit 2397af0

Browse files
committed
Revert "Reapply "AMDGPU: Split block for si_end_cf""
This reverts commit eb43d3e. Change-Id: I3d2e12c87aa3dd6ecae9115832635381c67177b7
1 parent a28a3a7 commit 2397af0

7 files changed

+80
-236
lines changed

lib/Target/AMDGPU/SIInstrInfo.cpp

-7
Original file line numberDiff line numberDiff line change
@@ -1397,12 +1397,6 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
13971397
MI.setDesc(get(AMDGPU::S_OR_B32));
13981398
break;
13991399

1400-
case AMDGPU::S_OR_B64_term:
1401-
// This is only a terminator to get the correct spill code placement during
1402-
// register allocation.
1403-
MI.setDesc(get(AMDGPU::S_OR_B64));
1404-
break;
1405-
14061400
case AMDGPU::S_ANDN2_B64_term:
14071401
// This is only a terminator to get the correct spill code placement during
14081402
// register allocation.
@@ -1895,7 +1889,6 @@ bool SIInstrInfo::analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
18951889
case AMDGPU::SI_MASK_BRANCH:
18961890
case AMDGPU::S_MOV_B64_term:
18971891
case AMDGPU::S_XOR_B64_term:
1898-
case AMDGPU::S_OR_B64_term:
18991892
case AMDGPU::S_ANDN2_B64_term:
19001893
case AMDGPU::S_MOV_B32_term:
19011894
case AMDGPU::S_XOR_B32_term:

lib/Target/AMDGPU/SIInstructions.td

-1
Original file line numberDiff line numberDiff line change
@@ -193,7 +193,6 @@ class WrapTerminatorInst<SOP_Pseudo base_inst> : SPseudoInstSI<
193193

194194
let WaveSizePredicate = isWave64 in {
195195
def S_MOV_B64_term : WrapTerminatorInst<S_MOV_B64>;
196-
def S_OR_B64_term : WrapTerminatorInst<S_OR_B64>;
197196
def S_XOR_B64_term : WrapTerminatorInst<S_XOR_B64>;
198197
def S_ANDN2_B64_term : WrapTerminatorInst<S_ANDN2_B64>;
199198
}

lib/Target/AMDGPU/SILowerControlFlow.cpp

+18-118
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,6 @@
5555
#include "llvm/ADT/StringRef.h"
5656
#include "llvm/CodeGen/LiveIntervals.h"
5757
#include "llvm/CodeGen/MachineBasicBlock.h"
58-
#include "llvm/CodeGen/MachineDominators.h"
5958
#include "llvm/CodeGen/MachineFunction.h"
6059
#include "llvm/CodeGen/MachineFunctionPass.h"
6160
#include "llvm/CodeGen/MachineInstr.h"
@@ -80,16 +79,12 @@ class SILowerControlFlow : public MachineFunctionPass {
8079
private:
8180
const SIRegisterInfo *TRI = nullptr;
8281
const SIInstrInfo *TII = nullptr;
83-
MachineRegisterInfo *MRI = nullptr;
8482
LiveIntervals *LIS = nullptr;
85-
MachineDominatorTree *DT = nullptr;
86-
MachineLoopInfo *MLI = nullptr;
87-
83+
MachineRegisterInfo *MRI = nullptr;
8884

8985
const TargetRegisterClass *BoolRC = nullptr;
9086
unsigned AndOpc;
9187
unsigned OrOpc;
92-
unsigned OrTermOpc;
9388
unsigned XorOpc;
9489
unsigned MovTermOpc;
9590
unsigned Andn2TermOpc;
@@ -126,7 +121,7 @@ class SILowerControlFlow : public MachineFunctionPass {
126121
AU.addPreservedID(LiveVariablesID);
127122
AU.addPreservedID(MachineLoopInfoID);
128123
AU.addPreservedID(MachineDominatorsID);
129-
124+
AU.setPreservesCFG();
130125
MachineFunctionPass::getAnalysisUsage(AU);
131126
}
132127
};
@@ -254,7 +249,7 @@ void SILowerControlFlow::emitIf(MachineInstr &MI) {
254249
LIS->InsertMachineInstrInMaps(*SetExec);
255250
LIS->InsertMachineInstrInMaps(*NewBr);
256251

257-
LIS->removeAllRegUnitsForPhysReg(Exec);
252+
LIS->removeAllRegUnitsForPhysReg(AMDGPU::EXEC);
258253
MI.eraseFromParent();
259254

260255
// FIXME: Is there a better way of adjusting the liveness? It shouldn't be
@@ -338,7 +333,7 @@ void SILowerControlFlow::emitElse(MachineInstr &MI) {
338333
LIS->createAndComputeVirtRegInterval(SaveReg);
339334

340335
// Let this be recomputed.
341-
LIS->removeAllRegUnitsForPhysReg(Exec);
336+
LIS->removeAllRegUnitsForPhysReg(AMDGPU::EXEC);
342337
}
343338

344339
void SILowerControlFlow::emitIfBreak(MachineInstr &MI) {
@@ -403,99 +398,23 @@ void SILowerControlFlow::emitLoop(MachineInstr &MI) {
403398
MI.eraseFromParent();
404399
}
405400

406-
// Insert \p Inst (which modifies exec) at \p InsPt in \p MBB, such that \p MBB
407-
// is split as necessary to keep the exec modification in its own block.
408-
static MachineBasicBlock *insertInstWithExecFallthrough(MachineBasicBlock &MBB,
409-
MachineInstr &MI,
410-
MachineInstr *NewMI,
411-
MachineDominatorTree *DT,
412-
LiveIntervals *LIS,
413-
MachineLoopInfo *MLI) {
414-
assert(NewMI->isTerminator());
415-
416-
MachineBasicBlock::iterator InsPt = MI.getIterator();
417-
if (std::next(MI.getIterator()) == MBB.end()) {
418-
// Don't bother with a new block.
419-
MBB.insert(InsPt, NewMI);
420-
if (LIS)
421-
LIS->ReplaceMachineInstrInMaps(MI, *NewMI);
422-
MI.eraseFromParent();
423-
return &MBB;
424-
}
425-
426-
MachineFunction *MF = MBB.getParent();
427-
MachineBasicBlock *SplitMBB
428-
= MF->CreateMachineBasicBlock(MBB.getBasicBlock());
429-
430-
MF->insert(++MachineFunction::iterator(MBB), SplitMBB);
431-
432-
// FIXME: This is working around a MachineDominatorTree API defect.
433-
//
434-
// If a previous pass split a critical edge, it may not have been applied to
435-
// the DomTree yet. applySplitCriticalEdges is lazily applied, and inspects
436-
// the CFG of the given block. Make sure to call a dominator tree method that
437-
// will flush this cache before touching the successors of the block.
438-
MachineDomTreeNode *NodeMBB = nullptr;
439-
if (DT)
440-
NodeMBB = DT->getNode(&MBB);
441-
442-
// Move everything to the new block, except the end_cf pseudo.
443-
SplitMBB->splice(SplitMBB->begin(), &MBB, MBB.begin(), MBB.end());
444-
445-
SplitMBB->transferSuccessorsAndUpdatePHIs(&MBB);
446-
MBB.addSuccessor(SplitMBB, BranchProbability::getOne());
447-
448-
MBB.insert(MBB.end(), NewMI);
449-
450-
if (DT) {
451-
std::vector<MachineDomTreeNode *> Children = NodeMBB->getChildren();
452-
DT->addNewBlock(SplitMBB, &MBB);
453-
454-
// Reparent all of the children to the new block body.
455-
auto *SplitNode = DT->getNode(SplitMBB);
456-
for (auto *Child : Children)
457-
DT->changeImmediateDominator(Child, SplitNode);
458-
}
459-
460-
if (MLI) {
461-
if (MachineLoop *Loop = MLI->getLoopFor(&MBB))
462-
Loop->addBasicBlockToLoop(SplitMBB, MLI->getBase());
463-
}
464-
465-
if (LIS) {
466-
LIS->insertMBBInMaps(SplitMBB);
467-
LIS->ReplaceMachineInstrInMaps(MI, *NewMI);
468-
}
469-
470-
// All live-ins are forwarded.
471-
for (auto &LiveIn : MBB.liveins())
472-
SplitMBB->addLiveIn(LiveIn);
473-
474-
MI.eraseFromParent();
475-
return SplitMBB;
476-
}
477-
478401
void SILowerControlFlow::emitEndCf(MachineInstr &MI) {
479402
MachineBasicBlock &MBB = *MI.getParent();
480403
const DebugLoc &DL = MI.getDebugLoc();
481404

482405
MachineBasicBlock::iterator InsPt = MBB.begin();
406+
MachineInstr *NewMI =
407+
BuildMI(MBB, InsPt, DL, TII->get(OrOpc), Exec)
408+
.addReg(Exec)
409+
.add(MI.getOperand(0));
483410

484-
// First, move the instruction. It's unnecessarily difficult to update
485-
// LiveIntervals when there's a change in control flow, so move the
486-
// instruction before changing the blocks.
487-
MBB.splice(InsPt, &MBB, MI.getIterator());
488411
if (LIS)
489-
LIS->handleMove(MI);
412+
LIS->ReplaceMachineInstrInMaps(MI, *NewMI);
490413

491-
MachineFunction *MF = MBB.getParent();
414+
MI.eraseFromParent();
492415

493-
// Create instruction without inserting it yet.
494-
MachineInstr *NewMI
495-
= BuildMI(*MF, DL, TII->get(OrTermOpc), Exec)
496-
.addReg(Exec)
497-
.add(MI.getOperand(0));
498-
insertInstWithExecFallthrough(MBB, MI, NewMI, DT, LIS, MLI);
416+
if (LIS)
417+
LIS->handleMove(*NewMI);
499418
}
500419

501420
// Returns replace operands for a logical operation, either single result
@@ -517,7 +436,7 @@ void SILowerControlFlow::findMaskOperands(MachineInstr &MI, unsigned OpNo,
517436
// A copy with implcitly defined exec inserted earlier is an exclusion, it
518437
// does not really modify exec.
519438
for (auto I = Def->getIterator(); I != MI.getIterator(); ++I)
520-
if (I->modifiesRegister(Exec, TRI) &&
439+
if (I->modifiesRegister(AMDGPU::EXEC, TRI) &&
521440
!(I->isCopy() && I->getOperand(0).getReg() != Exec))
522441
return;
523442

@@ -560,16 +479,12 @@ bool SILowerControlFlow::runOnMachineFunction(MachineFunction &MF) {
560479

561480
// This doesn't actually need LiveIntervals, but we can preserve them.
562481
LIS = getAnalysisIfAvailable<LiveIntervals>();
563-
DT = getAnalysisIfAvailable<MachineDominatorTree>();
564-
MLI = getAnalysisIfAvailable<MachineLoopInfo>();
565-
566482
MRI = &MF.getRegInfo();
567483
BoolRC = TRI->getBoolRC();
568484

569485
if (ST.isWave32()) {
570486
AndOpc = AMDGPU::S_AND_B32;
571487
OrOpc = AMDGPU::S_OR_B32;
572-
OrTermOpc = AMDGPU::S_OR_B32_term;
573488
XorOpc = AMDGPU::S_XOR_B32;
574489
MovTermOpc = AMDGPU::S_MOV_B32_term;
575490
Andn2TermOpc = AMDGPU::S_ANDN2_B32_term;
@@ -579,7 +494,6 @@ bool SILowerControlFlow::runOnMachineFunction(MachineFunction &MF) {
579494
} else {
580495
AndOpc = AMDGPU::S_AND_B64;
581496
OrOpc = AMDGPU::S_OR_B64;
582-
OrTermOpc = AMDGPU::S_OR_B64_term;
583497
XorOpc = AMDGPU::S_XOR_B64;
584498
MovTermOpc = AMDGPU::S_MOV_B64_term;
585499
Andn2TermOpc = AMDGPU::S_ANDN2_B64_term;
@@ -592,11 +506,11 @@ bool SILowerControlFlow::runOnMachineFunction(MachineFunction &MF) {
592506
for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
593507
BI != BE; BI = NextBB) {
594508
NextBB = std::next(BI);
595-
MachineBasicBlock *MBB = &*BI;
509+
MachineBasicBlock &MBB = *BI;
596510

597511
MachineBasicBlock::iterator I, Next, Last;
598512

599-
for (I = MBB->begin(), Last = MBB->end(); I != MBB->end(); I = Next) {
513+
for (I = MBB.begin(), Last = MBB.end(); I != MBB.end(); I = Next) {
600514
Next = std::next(I);
601515
MachineInstr &MI = *I;
602516

@@ -617,24 +531,10 @@ bool SILowerControlFlow::runOnMachineFunction(MachineFunction &MF) {
617531
emitLoop(MI);
618532
break;
619533

620-
case AMDGPU::SI_END_CF: {
621-
MachineInstr *NextMI = nullptr;
622-
623-
if (Next != MBB->end())
624-
NextMI = &*Next;
625-
534+
case AMDGPU::SI_END_CF:
626535
emitEndCf(MI);
627-
628-
if (NextMI) {
629-
MBB = NextMI->getParent();
630-
Next = NextMI->getIterator();
631-
Last = MBB->end();
632-
}
633-
634-
NextBB = std::next(MBB->getIterator());
635-
BE = MF.end();
636536
break;
637-
}
537+
638538
case AMDGPU::S_AND_B64:
639539
case AMDGPU::S_OR_B64:
640540
case AMDGPU::S_AND_B32:
@@ -650,7 +550,7 @@ bool SILowerControlFlow::runOnMachineFunction(MachineFunction &MF) {
650550
}
651551

652552
// Replay newly inserted code to combine masks
653-
Next = (Last == MBB->end()) ? MBB->begin() : Last;
553+
Next = (Last == MBB.end()) ? MBB.begin() : Last;
654554
}
655555
}
656556

lib/Target/AMDGPU/SIOptimizeExecMasking.cpp

-6
Original file line numberDiff line numberDiff line change
@@ -202,12 +202,6 @@ static bool removeTerminatorBit(const SIInstrInfo &TII, MachineInstr &MI) {
202202
MI.setDesc(TII.get(AMDGPU::S_OR_B32));
203203
return true;
204204
}
205-
case AMDGPU::S_OR_B64_term: {
206-
// This is only a terminator to get the correct spill code placement during
207-
// register allocation.
208-
MI.setDesc(TII.get(AMDGPU::S_OR_B64));
209-
return true;
210-
}
211205
case AMDGPU::S_ANDN2_B64_term: {
212206
// This is only a terminator to get the correct spill code placement during
213207
// register allocation.

lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp

+7-7
Original file line numberDiff line numberDiff line change
@@ -82,14 +82,14 @@ FunctionPass *llvm::createSIOptimizeExecMaskingPreRAPass() {
8282
return new SIOptimizeExecMaskingPreRA();
8383
}
8484

85-
static bool isEndCF(const MachineInstr &MI, const GCNSubtarget &ST,
86-
const SIRegisterInfo *TRI) {
85+
static bool isEndCF(const MachineInstr &MI, const SIRegisterInfo *TRI,
86+
const GCNSubtarget &ST) {
8787
if (ST.isWave32()) {
88-
return MI.getOpcode() == AMDGPU::S_OR_B32_term &&
88+
return MI.getOpcode() == AMDGPU::S_OR_B32 &&
8989
MI.modifiesRegister(AMDGPU::EXEC_LO, TRI);
9090
}
9191

92-
return MI.getOpcode() == AMDGPU::S_OR_B64_term &&
92+
return MI.getOpcode() == AMDGPU::S_OR_B64 &&
9393
MI.modifiesRegister(AMDGPU::EXEC, TRI);
9494
}
9595

@@ -379,13 +379,13 @@ bool SIOptimizeExecMaskingPreRA::runOnMachineFunction(MachineFunction &MF) {
379379

380380
// Try to collapse adjacent endifs.
381381
auto E = MBB.end();
382-
auto Lead = MBB.getFirstTerminator();
383-
if (MBB.succ_size() != 1 || Lead == E || !isEndCF(*Lead, ST, TRI))
382+
auto Lead = skipDebugInstructionsForward(MBB.begin(), E);
383+
if (MBB.succ_size() != 1 || Lead == E || !isEndCF(*Lead, TRI, ST))
384384
continue;
385385

386386
MachineBasicBlock *TmpMBB = &MBB;
387387
auto NextLead = skipIgnoreExecInstsTrivialSucc(TmpMBB, std::next(Lead));
388-
if (NextLead == TmpMBB->end() || !isEndCF(*NextLead, ST, TRI) ||
388+
if (NextLead == TmpMBB->end() || !isEndCF(*NextLead, TRI, ST) ||
389389
!getOrExecSource(*NextLead, *TII, MRI, ST))
390390
continue;
391391

0 commit comments

Comments
 (0)