Skip to content

Commit 939a662

Browse files
authored
[AMDGPU] Implement workaround for GFX11.5 export priority (#99273)
On GFX11.5 shaders having completed exports need to execute/wait at a lower priority than shaders still executing exports. Add code to maintain normal priority of 2 for shaders that export and drop to priority 0 after exports.
1 parent 6db5f4f commit 939a662

File tree

6 files changed

+768
-4
lines changed

6 files changed

+768
-4
lines changed

llvm/lib/Target/AMDGPU/AMDGPU.td

+14-4
Original file line numberDiff line numberDiff line change
@@ -947,6 +947,12 @@ def FeatureHasRestrictedSOffset : SubtargetFeature<"restricted-soffset",
947947
"Has restricted SOffset (immediate not supported)."
948948
>;
949949

950+
def FeatureRequiredExportPriority : SubtargetFeature<"required-export-priority",
951+
"HasRequiredExportPriority",
952+
"true",
953+
"Export priority must be explicitly manipulated on GFX11.5"
954+
>;
955+
950956
//===------------------------------------------------------------===//
951957
// Subtarget Features (options and debugging)
952958
//===------------------------------------------------------------===//
@@ -1567,7 +1573,8 @@ def FeatureISAVersion11_Generic: FeatureSet<
15671573
FeatureUserSGPRInit16Bug,
15681574
FeatureMADIntraFwdBug,
15691575
FeaturePrivEnabledTrap2NopBug,
1570-
FeatureRequiresCOV6])>;
1576+
FeatureRequiresCOV6,
1577+
FeatureRequiredExportPriority])>;
15711578

15721579
def FeatureISAVersion11_0_Common : FeatureSet<
15731580
!listconcat(FeatureISAVersion11_Common.Features,
@@ -1597,20 +1604,23 @@ def FeatureISAVersion11_5_0 : FeatureSet<
15971604
!listconcat(FeatureISAVersion11_Common.Features,
15981605
[FeatureSALUFloatInsts,
15991606
FeatureDPPSrc1SGPR,
1600-
FeatureVGPRSingleUseHintInsts])>;
1607+
FeatureVGPRSingleUseHintInsts,
1608+
FeatureRequiredExportPriority])>;
16011609

16021610
def FeatureISAVersion11_5_1 : FeatureSet<
16031611
!listconcat(FeatureISAVersion11_Common.Features,
16041612
[FeatureSALUFloatInsts,
16051613
FeatureDPPSrc1SGPR,
16061614
FeatureVGPRSingleUseHintInsts,
1607-
Feature1_5xVGPRs])>;
1615+
Feature1_5xVGPRs,
1616+
FeatureRequiredExportPriority])>;
16081617

16091618
def FeatureISAVersion11_5_2 : FeatureSet<
16101619
!listconcat(FeatureISAVersion11_Common.Features,
16111620
[FeatureSALUFloatInsts,
16121621
FeatureDPPSrc1SGPR,
1613-
FeatureVGPRSingleUseHintInsts])>;
1622+
FeatureVGPRSingleUseHintInsts,
1623+
FeatureRequiredExportPriority])>;
16141624

16151625
def FeatureISAVersion12 : FeatureSet<
16161626
[FeatureGFX12,

llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp

+112
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
#include "GCNSubtarget.h"
1515
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
1616
#include "SIMachineFunctionInfo.h"
17+
#include "llvm/CodeGen/MachineFrameInfo.h"
1718
#include "llvm/CodeGen/MachineFunction.h"
1819
#include "llvm/CodeGen/ScheduleDAG.h"
1920
#include "llvm/TargetParser/TargetParser.h"
@@ -1104,6 +1105,7 @@ void GCNHazardRecognizer::fixHazards(MachineInstr *MI) {
11041105
fixWMMAHazards(MI);
11051106
fixShift64HighRegBug(MI);
11061107
fixVALUMaskWriteHazard(MI);
1108+
fixRequiredExportPriority(MI);
11071109
}
11081110

11091111
bool GCNHazardRecognizer::fixVcmpxPermlaneHazards(MachineInstr *MI) {
@@ -2895,3 +2897,113 @@ bool GCNHazardRecognizer::fixVALUMaskWriteHazard(MachineInstr *MI) {
28952897

28962898
return true;
28972899
}
2900+
2901+
static bool ensureEntrySetPrio(MachineFunction *MF, int Priority,
2902+
const SIInstrInfo &TII) {
2903+
MachineBasicBlock &EntryMBB = MF->front();
2904+
if (EntryMBB.begin() != EntryMBB.end()) {
2905+
auto &EntryMI = *EntryMBB.begin();
2906+
if (EntryMI.getOpcode() == AMDGPU::S_SETPRIO &&
2907+
EntryMI.getOperand(0).getImm() >= Priority)
2908+
return false;
2909+
}
2910+
2911+
BuildMI(EntryMBB, EntryMBB.begin(), DebugLoc(), TII.get(AMDGPU::S_SETPRIO))
2912+
.addImm(Priority);
2913+
return true;
2914+
}
2915+
2916+
bool GCNHazardRecognizer::fixRequiredExportPriority(MachineInstr *MI) {
2917+
if (!ST.hasRequiredExportPriority())
2918+
return false;
2919+
2920+
// Assume the following shader types will never have exports,
2921+
// and avoid adding or adjusting S_SETPRIO.
2922+
MachineBasicBlock *MBB = MI->getParent();
2923+
MachineFunction *MF = MBB->getParent();
2924+
auto CC = MF->getFunction().getCallingConv();
2925+
switch (CC) {
2926+
case CallingConv::AMDGPU_CS:
2927+
case CallingConv::AMDGPU_CS_Chain:
2928+
case CallingConv::AMDGPU_CS_ChainPreserve:
2929+
case CallingConv::AMDGPU_KERNEL:
2930+
return false;
2931+
default:
2932+
break;
2933+
}
2934+
2935+
const int MaxPriority = 3;
2936+
const int NormalPriority = 2;
2937+
const int PostExportPriority = 0;
2938+
2939+
auto It = MI->getIterator();
2940+
switch (MI->getOpcode()) {
2941+
case AMDGPU::S_ENDPGM:
2942+
case AMDGPU::S_ENDPGM_SAVED:
2943+
case AMDGPU::S_ENDPGM_ORDERED_PS_DONE:
2944+
case AMDGPU::SI_RETURN_TO_EPILOG:
2945+
// Ensure shader with calls raises priority at entry.
2946+
// This ensures correct priority if exports exist in callee.
2947+
if (MF->getFrameInfo().hasCalls())
2948+
return ensureEntrySetPrio(MF, NormalPriority, TII);
2949+
return false;
2950+
case AMDGPU::S_SETPRIO: {
2951+
// Raise minimum priority unless in workaround.
2952+
auto &PrioOp = MI->getOperand(0);
2953+
int Prio = PrioOp.getImm();
2954+
bool InWA = (Prio == PostExportPriority) &&
2955+
(It != MBB->begin() && TII.isEXP(*std::prev(It)));
2956+
if (InWA || Prio >= NormalPriority)
2957+
return false;
2958+
PrioOp.setImm(std::min(Prio + NormalPriority, MaxPriority));
2959+
return true;
2960+
}
2961+
default:
2962+
if (!TII.isEXP(*MI))
2963+
return false;
2964+
break;
2965+
}
2966+
2967+
// Check entry priority at each export (as there will only be a few).
2968+
// Note: amdgpu_gfx can only be a callee, so defer to caller setprio.
2969+
bool Changed = false;
2970+
if (CC != CallingConv::AMDGPU_Gfx)
2971+
Changed = ensureEntrySetPrio(MF, NormalPriority, TII);
2972+
2973+
auto NextMI = std::next(It);
2974+
bool EndOfShader = false;
2975+
if (NextMI != MBB->end()) {
2976+
// Only need WA at end of sequence of exports.
2977+
if (TII.isEXP(*NextMI))
2978+
return Changed;
2979+
// Assume appropriate S_SETPRIO after export means WA already applied.
2980+
if (NextMI->getOpcode() == AMDGPU::S_SETPRIO &&
2981+
NextMI->getOperand(0).getImm() == PostExportPriority)
2982+
return Changed;
2983+
EndOfShader = NextMI->getOpcode() == AMDGPU::S_ENDPGM;
2984+
}
2985+
2986+
const DebugLoc &DL = MI->getDebugLoc();
2987+
2988+
// Lower priority.
2989+
BuildMI(*MBB, NextMI, DL, TII.get(AMDGPU::S_SETPRIO))
2990+
.addImm(PostExportPriority);
2991+
2992+
if (!EndOfShader) {
2993+
// Wait for exports to complete.
2994+
BuildMI(*MBB, NextMI, DL, TII.get(AMDGPU::S_WAITCNT_EXPCNT))
2995+
.addReg(AMDGPU::SGPR_NULL)
2996+
.addImm(0);
2997+
}
2998+
2999+
BuildMI(*MBB, NextMI, DL, TII.get(AMDGPU::S_NOP)).addImm(0);
3000+
BuildMI(*MBB, NextMI, DL, TII.get(AMDGPU::S_NOP)).addImm(0);
3001+
3002+
if (!EndOfShader) {
3003+
// Return to normal (higher) priority.
3004+
BuildMI(*MBB, NextMI, DL, TII.get(AMDGPU::S_SETPRIO))
3005+
.addImm(NormalPriority);
3006+
}
3007+
3008+
return true;
3009+
}

llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h

+1
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,7 @@ class GCNHazardRecognizer final : public ScheduleHazardRecognizer {
107107
bool fixWMMAHazards(MachineInstr *MI);
108108
bool fixShift64HighRegBug(MachineInstr *MI);
109109
bool fixVALUMaskWriteHazard(MachineInstr *MI);
110+
bool fixRequiredExportPriority(MachineInstr *MI);
110111

111112
int checkMAIHazards(MachineInstr *MI);
112113
int checkMAIHazards908(MachineInstr *MI);

llvm/lib/Target/AMDGPU/GCNSubtarget.h

+3
Original file line numberDiff line numberDiff line change
@@ -238,6 +238,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
238238
bool HasVOPDInsts = false;
239239
bool HasVALUTransUseHazard = false;
240240
bool HasForceStoreSC0SC1 = false;
241+
bool HasRequiredExportPriority = false;
241242

242243
bool RequiresCOV6 = false;
243244

@@ -1282,6 +1283,8 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
12821283

12831284
bool hasRestrictedSOffset() const { return HasRestrictedSOffset; }
12841285

1286+
bool hasRequiredExportPriority() const { return HasRequiredExportPriority; }
1287+
12851288
/// \returns true if the target uses LOADcnt/SAMPLEcnt/BVHcnt, DScnt/KMcnt
12861289
/// and STOREcnt rather than VMcnt, LGKMcnt and VScnt respectively.
12871290
bool hasExtendedWaitCounts() const { return getGeneration() >= GFX12; }

0 commit comments

Comments
 (0)