|
14 | 14 | #include "GCNSubtarget.h"
|
15 | 15 | #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
|
16 | 16 | #include "SIMachineFunctionInfo.h"
|
| 17 | +#include "llvm/CodeGen/MachineFrameInfo.h" |
17 | 18 | #include "llvm/CodeGen/MachineFunction.h"
|
18 | 19 | #include "llvm/CodeGen/ScheduleDAG.h"
|
19 | 20 | #include "llvm/TargetParser/TargetParser.h"
|
@@ -1104,6 +1105,7 @@ void GCNHazardRecognizer::fixHazards(MachineInstr *MI) {
|
1104 | 1105 | fixWMMAHazards(MI);
|
1105 | 1106 | fixShift64HighRegBug(MI);
|
1106 | 1107 | fixVALUMaskWriteHazard(MI);
|
| 1108 | + fixRequiredExportPriority(MI); |
1107 | 1109 | }
|
1108 | 1110 |
|
1109 | 1111 | bool GCNHazardRecognizer::fixVcmpxPermlaneHazards(MachineInstr *MI) {
|
@@ -2895,3 +2897,113 @@ bool GCNHazardRecognizer::fixVALUMaskWriteHazard(MachineInstr *MI) {
|
2895 | 2897 |
|
2896 | 2898 | return true;
|
2897 | 2899 | }
|
| 2900 | + |
| 2901 | +static bool ensureEntrySetPrio(MachineFunction *MF, int Priority, |
| 2902 | + const SIInstrInfo &TII) { |
| 2903 | + MachineBasicBlock &EntryMBB = MF->front(); |
| 2904 | + if (EntryMBB.begin() != EntryMBB.end()) { |
| 2905 | + auto &EntryMI = *EntryMBB.begin(); |
| 2906 | + if (EntryMI.getOpcode() == AMDGPU::S_SETPRIO && |
| 2907 | + EntryMI.getOperand(0).getImm() >= Priority) |
| 2908 | + return false; |
| 2909 | + } |
| 2910 | + |
| 2911 | + BuildMI(EntryMBB, EntryMBB.begin(), DebugLoc(), TII.get(AMDGPU::S_SETPRIO)) |
| 2912 | + .addImm(Priority); |
| 2913 | + return true; |
| 2914 | +} |
| 2915 | + |
| 2916 | +bool GCNHazardRecognizer::fixRequiredExportPriority(MachineInstr *MI) { |
| 2917 | + if (!ST.hasRequiredExportPriority()) |
| 2918 | + return false; |
| 2919 | + |
| 2920 | + // Assume the following shader types will never have exports, |
| 2921 | + // and avoid adding or adjusting S_SETPRIO. |
| 2922 | + MachineBasicBlock *MBB = MI->getParent(); |
| 2923 | + MachineFunction *MF = MBB->getParent(); |
| 2924 | + auto CC = MF->getFunction().getCallingConv(); |
| 2925 | + switch (CC) { |
| 2926 | + case CallingConv::AMDGPU_CS: |
| 2927 | + case CallingConv::AMDGPU_CS_Chain: |
| 2928 | + case CallingConv::AMDGPU_CS_ChainPreserve: |
| 2929 | + case CallingConv::AMDGPU_KERNEL: |
| 2930 | + return false; |
| 2931 | + default: |
| 2932 | + break; |
| 2933 | + } |
| 2934 | + |
| 2935 | + const int MaxPriority = 3; |
| 2936 | + const int NormalPriority = 2; |
| 2937 | + const int PostExportPriority = 0; |
| 2938 | + |
| 2939 | + auto It = MI->getIterator(); |
| 2940 | + switch (MI->getOpcode()) { |
| 2941 | + case AMDGPU::S_ENDPGM: |
| 2942 | + case AMDGPU::S_ENDPGM_SAVED: |
| 2943 | + case AMDGPU::S_ENDPGM_ORDERED_PS_DONE: |
| 2944 | + case AMDGPU::SI_RETURN_TO_EPILOG: |
| 2945 | + // Ensure shader with calls raises priority at entry. |
| 2946 | + // This ensures correct priority if exports exist in callee. |
| 2947 | + if (MF->getFrameInfo().hasCalls()) |
| 2948 | + return ensureEntrySetPrio(MF, NormalPriority, TII); |
| 2949 | + return false; |
| 2950 | + case AMDGPU::S_SETPRIO: { |
| 2951 | + // Raise minimum priority unless in workaround. |
| 2952 | + auto &PrioOp = MI->getOperand(0); |
| 2953 | + int Prio = PrioOp.getImm(); |
| 2954 | + bool InWA = (Prio == PostExportPriority) && |
| 2955 | + (It != MBB->begin() && TII.isEXP(*std::prev(It))); |
| 2956 | + if (InWA || Prio >= NormalPriority) |
| 2957 | + return false; |
| 2958 | + PrioOp.setImm(std::min(Prio + NormalPriority, MaxPriority)); |
| 2959 | + return true; |
| 2960 | + } |
| 2961 | + default: |
| 2962 | + if (!TII.isEXP(*MI)) |
| 2963 | + return false; |
| 2964 | + break; |
| 2965 | + } |
| 2966 | + |
| 2967 | + // Check entry priority at each export (as there will only be a few). |
| 2968 | + // Note: amdgpu_gfx can only be a callee, so defer to caller setprio. |
| 2969 | + bool Changed = false; |
| 2970 | + if (CC != CallingConv::AMDGPU_Gfx) |
| 2971 | + Changed = ensureEntrySetPrio(MF, NormalPriority, TII); |
| 2972 | + |
| 2973 | + auto NextMI = std::next(It); |
| 2974 | + bool EndOfShader = false; |
| 2975 | + if (NextMI != MBB->end()) { |
| 2976 | + // Only need WA at end of sequence of exports. |
| 2977 | + if (TII.isEXP(*NextMI)) |
| 2978 | + return Changed; |
| 2979 | + // Assume appropriate S_SETPRIO after export means WA already applied. |
| 2980 | + if (NextMI->getOpcode() == AMDGPU::S_SETPRIO && |
| 2981 | + NextMI->getOperand(0).getImm() == PostExportPriority) |
| 2982 | + return Changed; |
| 2983 | + EndOfShader = NextMI->getOpcode() == AMDGPU::S_ENDPGM; |
| 2984 | + } |
| 2985 | + |
| 2986 | + const DebugLoc &DL = MI->getDebugLoc(); |
| 2987 | + |
| 2988 | + // Lower priority. |
| 2989 | + BuildMI(*MBB, NextMI, DL, TII.get(AMDGPU::S_SETPRIO)) |
| 2990 | + .addImm(PostExportPriority); |
| 2991 | + |
| 2992 | + if (!EndOfShader) { |
| 2993 | + // Wait for exports to complete. |
| 2994 | + BuildMI(*MBB, NextMI, DL, TII.get(AMDGPU::S_WAITCNT_EXPCNT)) |
| 2995 | + .addReg(AMDGPU::SGPR_NULL) |
| 2996 | + .addImm(0); |
| 2997 | + } |
| 2998 | + |
| 2999 | + BuildMI(*MBB, NextMI, DL, TII.get(AMDGPU::S_NOP)).addImm(0); |
| 3000 | + BuildMI(*MBB, NextMI, DL, TII.get(AMDGPU::S_NOP)).addImm(0); |
| 3001 | + |
| 3002 | + if (!EndOfShader) { |
| 3003 | + // Return to normal (higher) priority. |
| 3004 | + BuildMI(*MBB, NextMI, DL, TII.get(AMDGPU::S_SETPRIO)) |
| 3005 | + .addImm(NormalPriority); |
| 3006 | + } |
| 3007 | + |
| 3008 | + return true; |
| 3009 | +} |
0 commit comments