@@ -1059,6 +1059,7 @@ bool AMDGPURegisterBankInfo::applyMappingLoad(
1059
1059
Register DstReg = MI.getOperand (0 ).getReg ();
1060
1060
const LLT LoadTy = MRI.getType (DstReg);
1061
1061
unsigned LoadSize = LoadTy.getSizeInBits ();
1062
+ MachineMemOperand *MMO = *MI.memoperands_begin ();
1062
1063
const unsigned MaxNonSmrdLoadSize = 128 ;
1063
1064
1064
1065
const RegisterBank *DstBank =
@@ -1069,7 +1070,6 @@ bool AMDGPURegisterBankInfo::applyMappingLoad(
1069
1070
if (LoadSize != 32 && (LoadSize != 96 || Subtarget.hasScalarDwordx3Loads ()))
1070
1071
return false ;
1071
1072
1072
- MachineMemOperand *MMO = *MI.memoperands_begin ();
1073
1073
const unsigned MemSize = 8 * MMO->getSize ().getValue ();
1074
1074
// Scalar loads of size 8 or 16 bit with proper alignment may be widened to
1075
1075
// 32 bit. Check to see if we need to widen the memory access, 8 or 16 bit
@@ -1141,25 +1141,29 @@ bool AMDGPURegisterBankInfo::applyMappingLoad(
1141
1141
if (SrcRegs.empty ())
1142
1142
SrcRegs.push_back (MI.getOperand (1 ).getReg ());
1143
1143
1144
- assert (LoadSize % MaxNonSmrdLoadSize == 0 );
1145
-
1146
1144
// RegBankSelect only emits scalar types, so we need to reset the pointer
1147
1145
// operand to a pointer type.
1148
1146
Register BasePtrReg = SrcRegs[0 ];
1149
1147
LLT PtrTy = MRI.getType (MI.getOperand (1 ).getReg ());
1150
1148
MRI.setType (BasePtrReg, PtrTy);
1151
1149
1152
- unsigned NumSplitParts = LoadTy.getSizeInBits () / MaxNonSmrdLoadSize;
1153
- const LLT LoadSplitTy = LoadTy.divide (NumSplitParts);
1154
- ApplyRegBankMapping O (B, *this , MRI, &AMDGPU::VGPRRegBank);
1155
- LegalizerHelper Helper (B.getMF (), O, B);
1156
-
1157
- if (LoadTy.isVector ()) {
1158
- if (Helper.fewerElementsVector (MI, 0 , LoadSplitTy) != LegalizerHelper::Legalized)
1159
- return false ;
1160
- } else {
1161
- if (Helper.narrowScalar (MI, 0 , LoadSplitTy) != LegalizerHelper::Legalized)
1162
- return false ;
1150
+ // The following are the loads not splitted enough during legalization
1151
+ // because it was not clear they are smem-load or vmem-load
1152
+ if (AMDGPU::isExtendedGlobalAddrSpace (MMO->getAddrSpace ()) ||
1153
+ MMO->getAddrSpace () == AMDGPUAS::BUFFER_RESOURCE) {
1154
+ assert (LoadSize % MaxNonSmrdLoadSize == 0 );
1155
+ unsigned NumSplitParts = LoadTy.getSizeInBits () / MaxNonSmrdLoadSize;
1156
+ const LLT LoadSplitTy = LoadTy.divide (NumSplitParts);
1157
+ ApplyRegBankMapping O (B, *this , MRI, &AMDGPU::VGPRRegBank);
1158
+ LegalizerHelper Helper (B.getMF (), O, B);
1159
+ if (LoadTy.isVector ()) {
1160
+ if (Helper.fewerElementsVector (MI, 0 , LoadSplitTy) !=
1161
+ LegalizerHelper::Legalized)
1162
+ return false ;
1163
+ } else {
1164
+ if (Helper.narrowScalar (MI, 0 , LoadSplitTy) != LegalizerHelper::Legalized)
1165
+ return false ;
1166
+ }
1163
1167
}
1164
1168
1165
1169
MRI.setRegBank (DstReg, AMDGPU::VGPRRegBank);
0 commit comments