Skip to content

Commit 59e07f3

Browse files
authored
[AMDGPU][GlobaISel] wrap the load-splitting code in RegBank selection with condition (#98966)
The load-splitting code in RegBank selection is only relevant to those listed address-spaces because there are cases in those address-spaces in which we are not sure how far to split during legalization --------- Signed-off-by: gangc <[email protected]>
1 parent b00fdde commit 59e07f3

File tree

1 file changed

+18
-14
lines changed

1 file changed

+18
-14
lines changed

llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp

+18-14
Original file line numberDiff line numberDiff line change
@@ -1059,6 +1059,7 @@ bool AMDGPURegisterBankInfo::applyMappingLoad(
10591059
Register DstReg = MI.getOperand(0).getReg();
10601060
const LLT LoadTy = MRI.getType(DstReg);
10611061
unsigned LoadSize = LoadTy.getSizeInBits();
1062+
MachineMemOperand *MMO = *MI.memoperands_begin();
10621063
const unsigned MaxNonSmrdLoadSize = 128;
10631064

10641065
const RegisterBank *DstBank =
@@ -1069,7 +1070,6 @@ bool AMDGPURegisterBankInfo::applyMappingLoad(
10691070
if (LoadSize != 32 && (LoadSize != 96 || Subtarget.hasScalarDwordx3Loads()))
10701071
return false;
10711072

1072-
MachineMemOperand *MMO = *MI.memoperands_begin();
10731073
const unsigned MemSize = 8 * MMO->getSize().getValue();
10741074
// Scalar loads of size 8 or 16 bit with proper alignment may be widened to
10751075
// 32 bit. Check to see if we need to widen the memory access, 8 or 16 bit
@@ -1141,25 +1141,29 @@ bool AMDGPURegisterBankInfo::applyMappingLoad(
11411141
if (SrcRegs.empty())
11421142
SrcRegs.push_back(MI.getOperand(1).getReg());
11431143

1144-
assert(LoadSize % MaxNonSmrdLoadSize == 0);
1145-
11461144
// RegBankSelect only emits scalar types, so we need to reset the pointer
11471145
// operand to a pointer type.
11481146
Register BasePtrReg = SrcRegs[0];
11491147
LLT PtrTy = MRI.getType(MI.getOperand(1).getReg());
11501148
MRI.setType(BasePtrReg, PtrTy);
11511149

1152-
unsigned NumSplitParts = LoadTy.getSizeInBits() / MaxNonSmrdLoadSize;
1153-
const LLT LoadSplitTy = LoadTy.divide(NumSplitParts);
1154-
ApplyRegBankMapping O(B, *this, MRI, &AMDGPU::VGPRRegBank);
1155-
LegalizerHelper Helper(B.getMF(), O, B);
1156-
1157-
if (LoadTy.isVector()) {
1158-
if (Helper.fewerElementsVector(MI, 0, LoadSplitTy) != LegalizerHelper::Legalized)
1159-
return false;
1160-
} else {
1161-
if (Helper.narrowScalar(MI, 0, LoadSplitTy) != LegalizerHelper::Legalized)
1162-
return false;
1150+
// The following are the loads not splitted enough during legalization
1151+
// because it was not clear they are smem-load or vmem-load
1152+
if (AMDGPU::isExtendedGlobalAddrSpace(MMO->getAddrSpace()) ||
1153+
MMO->getAddrSpace() == AMDGPUAS::BUFFER_RESOURCE) {
1154+
assert(LoadSize % MaxNonSmrdLoadSize == 0);
1155+
unsigned NumSplitParts = LoadTy.getSizeInBits() / MaxNonSmrdLoadSize;
1156+
const LLT LoadSplitTy = LoadTy.divide(NumSplitParts);
1157+
ApplyRegBankMapping O(B, *this, MRI, &AMDGPU::VGPRRegBank);
1158+
LegalizerHelper Helper(B.getMF(), O, B);
1159+
if (LoadTy.isVector()) {
1160+
if (Helper.fewerElementsVector(MI, 0, LoadSplitTy) !=
1161+
LegalizerHelper::Legalized)
1162+
return false;
1163+
} else {
1164+
if (Helper.narrowScalar(MI, 0, LoadSplitTy) != LegalizerHelper::Legalized)
1165+
return false;
1166+
}
11631167
}
11641168

11651169
MRI.setRegBank(DstReg, AMDGPU::VGPRRegBank);

0 commit comments

Comments
 (0)