Skip to content

Commit dbb7731

Browse files
pendingchaos1ace
authored andcommitted
aco/gfx11.5: workaround export priority issue
llvm/llvm-project#99273 fossil-db (gfx1150): Totals from 73996 (93.20% of 79395) affected shaders: Instrs: 36015357 -> 36807177 (+2.20%) CodeSize: 189072544 -> 192238748 (+1.67%) Latency: 245845181 -> 246790550 (+0.38%); split: -0.00%, +0.38% InvThroughput: 45068018 -> 45116177 (+0.11%); split: -0.00%, +0.11% Signed-off-by: Rhys Perry <[email protected]> Reviewed-by: Daniel Schürmann <[email protected]> Backport-to: 24.2 Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/30241> (cherry picked from commit 0919ce1)
1 parent f6ba6a5 commit dbb7731

File tree

5 files changed

+74
-2
lines changed

5 files changed

+74
-2
lines changed

.pick_status.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -234,7 +234,7 @@
234234
"description": "aco/gfx11.5: workaround export priority issue",
235235
"nominated": true,
236236
"nomination_type": 4,
237-
"resolution": 0,
237+
"resolution": 1,
238238
"main_sha": null,
239239
"because_sha": null,
240240
"notes": null

src/amd/compiler/aco_insert_NOPs.cpp

+68
Original file line numberDiff line numberDiff line change
@@ -1772,6 +1772,70 @@ mitigate_hazards(Program* program)
17721772
}
17731773
}
17741774

1775+
/* FeatureRequiredExportPriority in LLVM */
1776+
void
1777+
required_export_priority(Program* program)
1778+
{
1779+
/* Skip callees, assuming that the caller has already increased the priority. */
1780+
bool increase_priority = !program->is_epilog && !program->info.vs.has_prolog &&
1781+
(!program->info.merged_shader_compiled_separately ||
1782+
program->stage.sw == SWStage::VS || program->stage.sw == SWStage::TES);
1783+
increase_priority |= program->is_prolog;
1784+
1785+
for (Block& block : program->blocks) {
1786+
std::vector<aco_ptr<Instruction>> new_instructions;
1787+
new_instructions.reserve(block.instructions.size() + 6);
1788+
1789+
Builder bld(program, &new_instructions);
1790+
1791+
if (increase_priority && block.index == 0) {
1792+
if (!block.instructions.empty() && block.instructions[0]->opcode == aco_opcode::s_setprio)
1793+
block.instructions[0]->salu().imm = MAX2(block.instructions[0]->salu().imm, 2);
1794+
else
1795+
bld.sopp(aco_opcode::s_setprio, 2);
1796+
}
1797+
1798+
for (unsigned i = 0; i < block.instructions.size(); i++) {
1799+
Instruction* instr = block.instructions[i].get();
1800+
new_instructions.push_back(std::move(block.instructions[i]));
1801+
1802+
if (instr->opcode == aco_opcode::s_setprio) {
1803+
instr->salu().imm = MAX2(instr->salu().imm, 2);
1804+
continue;
1805+
}
1806+
1807+
bool end_of_export_sequence = instr->isEXP() && (i == block.instructions.size() - 1 ||
1808+
!block.instructions[i + 1]->isEXP());
1809+
if (!end_of_export_sequence)
1810+
continue;
1811+
1812+
bool before_endpgm = false;
1813+
if (i != block.instructions.size() - 1) {
1814+
before_endpgm = block.instructions[i + 1]->opcode == aco_opcode::s_endpgm;
1815+
} else {
1816+
/* Does this fallthrough to a s_endpgm? */
1817+
for (unsigned j = block.index + 1; j < program->blocks.size(); j++) {
1818+
if (program->blocks[j].instructions.size() == 1 &&
1819+
program->blocks[j].instructions[0]->opcode == aco_opcode::s_endpgm)
1820+
before_endpgm = true;
1821+
if (!program->blocks[j].instructions.empty())
1822+
break;
1823+
}
1824+
}
1825+
1826+
bld.sopp(aco_opcode::s_setprio, 0);
1827+
if (!before_endpgm)
1828+
bld.sopk(aco_opcode::s_waitcnt_expcnt, Operand(sgpr_null, s1), 0);
1829+
bld.sopp(aco_opcode::s_nop, 0);
1830+
bld.sopp(aco_opcode::s_nop, 0);
1831+
if (!before_endpgm)
1832+
bld.sopp(aco_opcode::s_setprio, 2);
1833+
}
1834+
1835+
block.instructions = std::move(new_instructions);
1836+
}
1837+
}
1838+
17751839
} /* end namespace */
17761840

17771841
void
@@ -1785,6 +1849,10 @@ insert_NOPs(Program* program)
17851849
mitigate_hazards<NOP_ctx_gfx10, handle_instruction_gfx10, resolve_all_gfx10>(program);
17861850
else
17871851
mitigate_hazards<NOP_ctx_gfx6, handle_instruction_gfx6, resolve_all_gfx6>(program);
1852+
1853+
if (program->gfx_level == GFX11_5 && (program->stage.hw == AC_HW_NEXT_GEN_GEOMETRY_SHADER ||
1854+
program->stage.hw == AC_HW_PIXEL_SHADER))
1855+
required_export_priority(program);
17881856
}
17891857

17901858
} // namespace aco

src/amd/compiler/aco_instruction_selection.cpp

+3-1
Original file line numberDiff line numberDiff line change
@@ -12912,7 +12912,9 @@ select_vs_prolog(Program* program, const struct aco_vs_prolog_info* pinfo, ac_sh
1291212912

1291312913
block->instructions.reserve(16 + pinfo->num_attributes * 4);
1291412914

12915-
bld.sopp(aco_opcode::s_setprio, 0x3u);
12915+
/* Besides performance, the purpose of this is also for the FeatureRequiredExportPriority GFX11.5
12916+
* issue. */
12917+
bld.sopp(aco_opcode::s_setprio, 3);
1291612918

1291712919
uint32_t attrib_mask = BITFIELD_MASK(pinfo->num_attributes);
1291812920
bool has_nontrivial_divisors = pinfo->nontrivial_divisors;

src/amd/compiler/aco_interface.cpp

+1
Original file line numberDiff line numberDiff line change
@@ -222,6 +222,7 @@ aco_compile_shader_part(const struct aco_compiler_options* options,
222222
program->debug.private_data = options->debug.private_data;
223223

224224
program->is_prolog = is_prolog;
225+
program->is_epilog = !is_prolog;
225226

226227
/* Instruction selection */
227228
select_shader_part(program.get(), pinfo, &config, options, info, args);

src/amd/compiler/aco_ir.h

+1
Original file line numberDiff line numberDiff line change
@@ -2035,6 +2035,7 @@ class Program final {
20352035
bool has_pops_overlapped_waves_wait = false;
20362036
bool has_color_exports = false;
20372037
bool is_prolog = false;
2038+
bool is_epilog = false;
20382039

20392040
std::vector<uint8_t> constant_data;
20402041
Temp private_segment_buffer;

0 commit comments

Comments
 (0)