Skip to content

Commit d794f6e

Browse files
rampitecsearlmc1
authored andcommitted
Cherrypick [AMDGPU] Remove wavefrontsize feature from GFX10+ (llvm#98400)
Change-Id: Ic732c0ac93b9767f2b194c6a825165e3709314fc
1 parent dcef30a commit d794f6e

14 files changed

+626
-617
lines changed

llvm/lib/Target/AMDGPU/AMDGPU.td

-3
Original file line numberDiff line numberDiff line change
@@ -1382,7 +1382,6 @@ def FeatureISAVersion10_Common : FeatureSet<
13821382
FeatureLDSBankCount32,
13831383
FeatureDLInsts,
13841384
FeatureNSAEncoding,
1385-
FeatureWavefrontSize32,
13861385
FeatureBackOffBarrier]>;
13871386

13881387
def FeatureISAVersion10_1_Common : FeatureSet<
@@ -1466,7 +1465,6 @@ def FeatureISAVersion11_Common : FeatureSet<
14661465
FeatureDot10Insts,
14671466
FeatureNSAEncoding,
14681467
FeaturePartialNSAEncoding,
1469-
FeatureWavefrontSize32,
14701468
FeatureShaderCyclesRegister,
14711469
FeatureArchitectedFlatScratch,
14721470
FeatureAtomicFaddRtnInsts,
@@ -1539,7 +1537,6 @@ def FeatureISAVersion12 : FeatureSet<
15391537
FeatureDot10Insts,
15401538
FeatureNSAEncoding,
15411539
FeaturePartialNSAEncoding,
1542-
FeatureWavefrontSize32,
15431540
FeatureShaderCyclesHiLoRegisters,
15441541
FeatureArchitectedFlatScratch,
15451542
FeatureArchitectedSGPRs,

llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp

+8
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,14 @@ GCNSubtarget::initializeSubtargetDependencies(const Triple &TT,
104104
: AMDGPUSubtarget::SOUTHERN_ISLANDS;
105105
}
106106

107+
if (!hasFeature(AMDGPU::FeatureWavefrontSize32) &&
108+
!hasFeature(AMDGPU::FeatureWavefrontSize64)) {
109+
// If there is no default wave size it must be a generation before gfx10,
110+
// these have FeatureWavefrontSize64 in their definition already. For gfx10+
111+
// set wave32 as a default.
112+
ToggleFeature(AMDGPU::FeatureWavefrontSize32);
113+
}
114+
107115
// We don't support FP64 for EG/NI atm.
108116
assert(!hasFP64() || (getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS));
109117

llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp

+9
Original file line numberDiff line numberDiff line change
@@ -1398,6 +1398,15 @@ class AMDGPUAsmParser : public MCTargetAsmParser {
13981398
copySTI().ToggleFeature("southern-islands");
13991399
}
14001400

1401+
FeatureBitset FB = getFeatureBits();
1402+
if (!FB[AMDGPU::FeatureWavefrontSize64] &&
1403+
!FB[AMDGPU::FeatureWavefrontSize32]) {
1404+
// If there is no default wave size it must be a generation before gfx10,
1405+
// these have FeatureWavefrontSize64 in their definition already. For
1406+
// gfx10+ set wave32 as a default.
1407+
copySTI().ToggleFeature(AMDGPU::FeatureWavefrontSize32);
1408+
}
1409+
14011410
setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
14021411

14031412
{

llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp

+18-2
Original file line numberDiff line numberDiff line change
@@ -44,10 +44,26 @@ using namespace llvm;
4444

4545
using DecodeStatus = llvm::MCDisassembler::DecodeStatus;
4646

47+
static const MCSubtargetInfo &addDefaultWaveSize(const MCSubtargetInfo &STI,
48+
MCContext &Ctx) {
49+
if (!STI.hasFeature(AMDGPU::FeatureWavefrontSize64) &&
50+
!STI.hasFeature(AMDGPU::FeatureWavefrontSize32)) {
51+
MCSubtargetInfo &STICopy = Ctx.getSubtargetCopy(STI);
52+
// If there is no default wave size it must be a generation before gfx10,
53+
// these have FeatureWavefrontSize64 in their definition already. For gfx10+
54+
// set wave32 as a default.
55+
STICopy.ToggleFeature(AMDGPU::FeatureWavefrontSize32);
56+
return STICopy;
57+
}
58+
59+
return STI;
60+
}
61+
4762
AMDGPUDisassembler::AMDGPUDisassembler(const MCSubtargetInfo &STI,
4863
MCContext &Ctx, MCInstrInfo const *MCII)
49-
: MCDisassembler(STI, Ctx), MCII(MCII), MRI(*Ctx.getRegisterInfo()),
50-
MAI(*Ctx.getAsmInfo()), TargetMaxInstBytes(MAI.getMaxInstLength(&STI)) {
64+
: MCDisassembler(addDefaultWaveSize(STI, Ctx), Ctx), MCII(MCII),
65+
MRI(*Ctx.getRegisterInfo()), MAI(*Ctx.getAsmInfo()),
66+
TargetMaxInstBytes(MAI.getMaxInstLength(&STI)) {
5167
// ToDo: AMDGPUDisassembler supports only VI ISA.
5268
if (!STI.hasFeature(AMDGPU::FeatureGCN3Encoding) && !isGFX10Plus())
5369
report_fatal_error("Disassembly not yet supported for subtarget");

llvm/test/CodeGen/AMDGPU/insert-waitcnts-crash.ll

+147-147
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wavefrontsize.ll

+8-8
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,19 @@
11
; RUN: llc -mtriple=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,W64 %s
2-
; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,W32 %s
3-
; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,W64 %s
4-
; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,-wavefrontsize64 -verify-machineinstrs -amdgpu-enable-vopd=0 < %s | FileCheck -check-prefixes=GCN,W32 %s
5-
; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,W64 %s
2+
; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,W32 %s
3+
; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize64 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,W64 %s
4+
; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32 -verify-machineinstrs -amdgpu-enable-vopd=0 < %s | FileCheck -check-prefixes=GCN,W32 %s
5+
; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,W64 %s
66

77
; RUN: opt -O3 -S < %s | FileCheck -check-prefix=OPT %s
88
; RUN: opt -mtriple=amdgcn-- -O3 -S < %s | FileCheck -check-prefix=OPT %s
99
; RUN: opt -mtriple=amdgcn-- -O3 -mattr=+wavefrontsize32 -S < %s | FileCheck -check-prefix=OPT %s
1010
; RUN: opt -mtriple=amdgcn-- -passes='default<O3>' -mattr=+wavefrontsize32 -S < %s | FileCheck -check-prefix=OPT %s
1111
; RUN: opt -mtriple=amdgcn-- -O3 -mattr=+wavefrontsize64 -S < %s | FileCheck -check-prefix=OPT %s
1212
; RUN: opt -mtriple=amdgcn-- -mcpu=tonga -O3 -S < %s | FileCheck -check-prefix=OPT %s
13-
; RUN: opt -mtriple=amdgcn-- -mcpu=gfx1010 -O3 -mattr=+wavefrontsize32,-wavefrontsize64 -S < %s | FileCheck -check-prefix=OPT %s
14-
; RUN: opt -mtriple=amdgcn-- -mcpu=gfx1010 -O3 -mattr=-wavefrontsize32,+wavefrontsize64 -S < %s | FileCheck -check-prefix=OPT %s
15-
; RUN: opt -mtriple=amdgcn-- -mcpu=gfx1100 -O3 -mattr=+wavefrontsize32,-wavefrontsize64 -S < %s | FileCheck -check-prefix=OPT %s
16-
; RUN: opt -mtriple=amdgcn-- -mcpu=gfx1100 -O3 -mattr=-wavefrontsize32,+wavefrontsize64 -S < %s | FileCheck -check-prefix=OPT %s
13+
; RUN: opt -mtriple=amdgcn-- -mcpu=gfx1010 -O3 -mattr=+wavefrontsize32 -S < %s | FileCheck -check-prefix=OPT %s
14+
; RUN: opt -mtriple=amdgcn-- -mcpu=gfx1010 -O3 -mattr=+wavefrontsize64 -S < %s | FileCheck -check-prefix=OPT %s
15+
; RUN: opt -mtriple=amdgcn-- -mcpu=gfx1100 -O3 -mattr=+wavefrontsize32 -S < %s | FileCheck -check-prefix=OPT %s
16+
; RUN: opt -mtriple=amdgcn-- -mcpu=gfx1100 -O3 -mattr=+wavefrontsize64 -S < %s | FileCheck -check-prefix=OPT %s
1717

1818
; GCN-LABEL: {{^}}fold_wavefrontsize:
1919
; OPT-LABEL: define amdgpu_kernel void @fold_wavefrontsize(

llvm/test/CodeGen/AMDGPU/pei-amdgpu-cs-chain-preserve.mir

+4-4
Original file line numberDiff line numberDiff line change
@@ -145,7 +145,7 @@ body: |
145145
; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr104_lo16
146146
; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr105_lo16
147147
; GCN-NEXT: SCRATCH_STORE_DWORD_ST killed $vgpr10, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5)
148-
; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset <badreg>, 32, $exec_lo_lo16, 32, 128
148+
; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr10_lo16, 32, $exec_lo_lo16, 32, 128
149149
; GCN-NEXT: renamable $vgpr10 = V_MOV_B32_e32 10, implicit $exec
150150
; GCN-NEXT: $vgpr8 = COPY killed renamable $vgpr10
151151
; GCN-NEXT: renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @callee + 4, target-flags(amdgpu-gotprel32-hi) @callee + 12, implicit-def dead $scc
@@ -180,7 +180,7 @@ body: |
180180
; GCN-NEXT: {{ $}}
181181
; GCN-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02
182182
; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30_lo16, 32, $sgpr31_lo16, 32
183-
; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined <badreg>
183+
; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16
184184
; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0_lo16
185185
; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1_lo16
186186
; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2_lo16
@@ -641,8 +641,8 @@ body: |
641641
; GCN-NEXT: {{ $}}
642642
; GCN-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02
643643
; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30_lo16, 32, $sgpr31_lo16, 32
644-
; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined <badreg>
645-
; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined <badreg>
644+
; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16
645+
; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7_lo16
646646
; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0_lo16
647647
; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1_lo16
648648
; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2_lo16

llvm/test/CodeGen/AMDGPU/pei-amdgpu-cs-chain.mir

+9-9
Original file line numberDiff line numberDiff line change
@@ -173,8 +173,8 @@ body: |
173173
; GCN-NEXT: {{ $}}
174174
; GCN-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02
175175
; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30_lo16, 32, $sgpr31_lo16, 32
176-
; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined <badreg>
177-
; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined <badreg>
176+
; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8_lo16
177+
; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9_lo16
178178
; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0_lo16
179179
; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1_lo16
180180
; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2_lo16
@@ -325,7 +325,7 @@ body: |
325325
; GCN-NEXT: {{ $}}
326326
; GCN-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02
327327
; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30_lo16, 32, $sgpr31_lo16, 32
328-
; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined <badreg>
328+
; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8_lo16
329329
; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr35_lo16
330330
; GCN-NEXT: $vgpr8 = SI_SPILL_S32_TO_VGPR $sgpr35, 0, killed $vgpr8
331331
; GCN-NEXT: $sgpr35 = S_MOV_B32 5
@@ -357,8 +357,8 @@ body: |
357357
; GCN-NEXT: {{ $}}
358358
; GCN-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02
359359
; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30_lo16, 32, $sgpr31_lo16, 32
360-
; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined <badreg>
361-
; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined <badreg>
360+
; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8_lo16
361+
; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr16_lo16
362362
; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0_lo16
363363
; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1_lo16
364364
; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2_lo16
@@ -498,10 +498,10 @@ body: |
498498
; GCN-NEXT: {{ $}}
499499
; GCN-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02
500500
; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30_lo16, 32, $sgpr31_lo16, 32
501-
; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined <badreg>
502-
; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined <badreg>
503-
; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined <badreg>
504-
; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined <badreg>
501+
; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0_lo16
502+
; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7_lo16
503+
; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8_lo16
504+
; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9_lo16
505505
; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0_lo16
506506
; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1_lo16
507507
; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2_lo16

0 commit comments

Comments
 (0)