Skip to content

Commit 493572f

Browse files
committed
Make emitter independent of ISa and based on insOpts for ymm embedded rounding
1 parent 067025f commit 493572f

File tree

7 files changed

+124
-17
lines changed

7 files changed

+124
-17
lines changed

src/coreclr/jit/codegenxarch.cpp

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9066,19 +9066,19 @@ void CodeGen::genAmd64EmitterUnitTestsApx()
90669066
// REX2 sub eax, ebx
90679067

90689068
//packed conversion instructions
9069-
theEmitter->emitIns_R_R(INS_vcvttps2dqs, EA_4BYTE, REG_XMM0, REG_XMM1); // xmm
9069+
theEmitter->emitIns_R_R(INS_vcvttps2dqs, EA_16BYTE, REG_XMM0, REG_XMM1); // xmm
90709070
theEmitter->emitIns_R_R(INS_vcvttps2dqs, EA_32BYTE, REG_XMM0, REG_XMM1);// ymm
90719071
theEmitter->emitIns_R_R(INS_vcvttps2dqs, EA_64BYTE, REG_XMM0, REG_XMM1);// zmm
90729072

9073-
theEmitter->emitIns_R_R(INS_vcvttps2udqs, EA_4BYTE, REG_XMM0, REG_XMM1);// xmm
9073+
theEmitter->emitIns_R_R(INS_vcvttps2udqs, EA_16BYTE, REG_XMM0, REG_XMM1);// xmm
90749074
theEmitter->emitIns_R_R(INS_vcvttps2udqs, EA_32BYTE, REG_XMM0, REG_XMM1);// ymm
90759075
theEmitter->emitIns_R_R(INS_vcvttps2udqs, EA_64BYTE, REG_XMM0, REG_XMM1);// zmm
90769076

9077-
theEmitter->emitIns_R_R(INS_vcvttpd2qqs, EA_8BYTE, REG_XMM0, REG_XMM1);// xmm
9077+
theEmitter->emitIns_R_R(INS_vcvttpd2qqs, EA_16BYTE, REG_XMM0, REG_XMM1);// xmm
90789078
theEmitter->emitIns_R_R(INS_vcvttpd2qqs, EA_32BYTE, REG_XMM0, REG_XMM1);// ymm
90799079
theEmitter->emitIns_R_R(INS_vcvttpd2qqs, EA_64BYTE, REG_XMM0, REG_XMM1);// zmm
90809080

9081-
theEmitter->emitIns_R_R(INS_vcvttpd2uqqs, EA_8BYTE, REG_XMM0, REG_XMM1);// xmm
9081+
theEmitter->emitIns_R_R(INS_vcvttpd2uqqs, EA_16BYTE, REG_XMM0, REG_XMM1);// xmm
90829082
theEmitter->emitIns_R_R(INS_vcvttpd2uqqs, EA_32BYTE, REG_XMM0, REG_XMM1);// ymm
90839083
theEmitter->emitIns_R_R(INS_vcvttpd2uqqs, EA_64BYTE, REG_XMM0, REG_XMM1);// zmm
90849084

@@ -9104,19 +9104,22 @@ void CodeGen::genAmd64EmitterUnitTestsApx()
91049104
//VCVT[,T]PS2I[,U]BS
91059105
theEmitter->emitIns_R_R(INS_vcvtps2ibs, EA_16BYTE, REG_XMM0, REG_XMM1);
91069106
theEmitter->emitIns_R_R(INS_vcvtps2ibs, EA_32BYTE, REG_XMM0, REG_XMM1);
9107-
theEmitter->emitIns_R_R(INS_vcvtps2ibs, EA_32BYTE, REG_XMM0, REG_XMM1, INS_OPTS_EVEX_er_ru);
9107+
theEmitter->emitIns_R_R(INS_vcvtps2ibs, EA_32BYTE, REG_XMM0, REG_XMM1, (insOpts)(INS_OPTS_EVEX_er_ru | INS_OPTS_EVEX_ymm_er));
91089108
theEmitter->emitIns_R_R(INS_vcvtps2ibs, EA_64BYTE, REG_XMM0, REG_XMM1);
91099109

91109110
theEmitter->emitIns_R_R(INS_vcvtps2iubs, EA_16BYTE, REG_XMM0, REG_XMM1);
91119111
theEmitter->emitIns_R_R(INS_vcvtps2iubs, EA_32BYTE, REG_XMM0, REG_XMM1);
9112+
theEmitter->emitIns_R_R(INS_vcvtps2iubs, EA_32BYTE, REG_XMM0, REG_XMM1, (insOpts)(INS_OPTS_EVEX_er_rz | INS_OPTS_EVEX_ymm_er));
91129113
theEmitter->emitIns_R_R(INS_vcvtps2iubs, EA_64BYTE, REG_XMM0, REG_XMM1);
91139114

91149115
theEmitter->emitIns_R_R(INS_vcvttps2ibs, EA_16BYTE, REG_XMM0, REG_XMM1);
91159116
theEmitter->emitIns_R_R(INS_vcvttps2ibs, EA_32BYTE, REG_XMM0, REG_XMM1);
9117+
theEmitter->emitIns_R_R(INS_vcvttps2ibs, EA_32BYTE, REG_XMM0, REG_XMM1, (insOpts)(INS_OPTS_EVEX_eb_er_rd | INS_OPTS_EVEX_ymm_er));
91169118
theEmitter->emitIns_R_R(INS_vcvttps2ibs, EA_64BYTE, REG_XMM0, REG_XMM1);
91179119

91189120
theEmitter->emitIns_R_R(INS_vcvttps2iubs, EA_16BYTE, REG_XMM0, REG_XMM1);
91199121
theEmitter->emitIns_R_R(INS_vcvttps2iubs, EA_32BYTE, REG_XMM0, REG_XMM1);
9122+
theEmitter->emitIns_R_R(INS_vcvttps2iubs, EA_32BYTE, REG_XMM0, REG_XMM1, (insOpts)(INS_OPTS_EVEX_er_ru | INS_OPTS_EVEX_ymm_er));
91209123
theEmitter->emitIns_R_R(INS_vcvttps2iubs, EA_64BYTE, REG_XMM0, REG_XMM1);
91219124

91229125
//VPDPW[SU,US,UU]D[,S]

src/coreclr/jit/emit.h

Lines changed: 26 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -793,6 +793,11 @@ class emitter
793793
// For embedded rounding, EVEX.L'L semantic changes to indicate the rounding mode.
794794
// Multiple bits in _idEvexbContext are used to inform emitter to specially handle the EVEX.L'L bits.
795795
unsigned _idEvexbContext : 2;
796+
797+
// EVEX.u can indicate ymm/zmm embedded rounding support
798+
// _idEvexuContext = 0, zmm embedded rounding support
799+
// _idEvexuContext = 1, zmm/ymm embedded rounding support
800+
unsigned _idEvexuContext : 1; // EVEX.u context
796801
#endif // TARGET_XARCH
797802

798803
#ifdef TARGET_ARM64
@@ -825,8 +830,8 @@ class emitter
825830

826831
////////////////////////////////////////////////////////////////////////
827832
// Space taken up to here:
828-
// x86: 48 bits
829-
// amd64: 48 bits
833+
// x86: 49 bits
834+
// amd64: 49 bits
830835
// arm: 48 bits
831836
// arm64: 55 bits
832837
// loongarch64: 46 bits
@@ -844,7 +849,7 @@ class emitter
844849
#elif defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
845850
#define ID_EXTRA_BITFIELD_BITS (14)
846851
#elif defined(TARGET_XARCH)
847-
#define ID_EXTRA_BITFIELD_BITS (16)
852+
#define ID_EXTRA_BITFIELD_BITS (17)
848853
#else
849854
#error Unsupported or unset target architecture
850855
#endif
@@ -878,8 +883,8 @@ class emitter
878883

879884
////////////////////////////////////////////////////////////////////////
880885
// Space taken up to here (with/without prev offset, assuming host==target):
881-
// x86: 54/50 bits
882-
// amd64: 55/50 bits
886+
// x86: 55/51 bits
887+
// amd64: 56/51 bits
883888
// arm: 54/50 bits
884889
// arm64: 62/57 bits
885890
// loongarch64: 53/48 bits
@@ -894,8 +899,8 @@ class emitter
894899

895900
////////////////////////////////////////////////////////////////////////
896901
// Small constant size (with/without prev offset, assuming host==target):
897-
// x86: 10/14 bits
898-
// amd64: 9/14 bits
902+
// x86: 9/13 bits
903+
// amd64: 8/13 bits
899904
// arm: 10/14 bits
900905
// arm64: 2/7 bits
901906
// loongarch64: 11/16 bits
@@ -1654,6 +1659,20 @@ class emitter
16541659
}
16551660

16561661
#ifdef TARGET_XARCH
1662+
bool idIsEvexuContextSet() const
1663+
{
1664+
return _idEvexuContext != 0;
1665+
}
1666+
1667+
void idSetEvexuContext(insOpts instOptions)
1668+
{
1669+
if (instOptions & INS_OPTS_EVEX_u_MASK)
1670+
{
1671+
assert(idIsEvexbContextSet());
1672+
_idEvexuContext = 1;
1673+
}
1674+
}
1675+
16571676
bool idIsEvexbContextSet() const
16581677
{
16591678
return _idEvexbContext != 0;

src/coreclr/jit/emitxarch.cpp

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1373,14 +1373,14 @@ emitter::code_t emitter::AddEvexPrefix(const instrDesc* id, code_t code, emitAtt
13731373
{
13741374
code |= BBIT_IN_BYTE_EVEX_PREFIX;
13751375

1376-
// enable ymm embeddd rounding
1377-
if (emitComp->compOpportunisticallyDependsOn(InstructionSet_AVX10v2))
1378-
{
1379-
code &= ~(uBIT_IN_BYTE_EVEX_PREFIX);
1380-
}
13811376
if (!id->idHasMem())
13821377
{
13831378
// embedded rounding case.
1379+
if (id->idIsEvexuContextSet())
1380+
{
1381+
code &= ~(uBIT_IN_BYTE_EVEX_PREFIX);
1382+
}
1383+
13841384
unsigned roundingMode = id->idGetEvexbContext();
13851385
if (roundingMode == 1)
13861386
{
@@ -6670,6 +6670,7 @@ void emitter::emitIns_R_R(instruction ins, emitAttr attr, regNumber reg1, regNum
66706670
// if EVEX.b needs to be set in this path, then it should be embedded rounding.
66716671
assert(UseEvexEncoding());
66726672
id->idSetEvexbContext(instOptions);
6673+
id->idSetEvexuContext(instOptions);
66736674
}
66746675
SetEvexEmbMaskIfNeeded(id, instOptions);
66756676

@@ -7097,6 +7098,7 @@ void emitter::emitIns_R_R_R(
70977098
// if EVEX.b needs to be set in this path, then it should be embedded rounding.
70987099
assert(UseEvexEncoding());
70997100
id->idSetEvexbContext(instOptions);
7101+
id->idSetEvexuContext(instOptions);
71007102
}
71017103
SetEvexEmbMaskIfNeeded(id, instOptions);
71027104

src/coreclr/jit/gentree.cpp

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27324,6 +27324,58 @@ bool GenTreeHWIntrinsic::OperIsBitwiseHWIntrinsic() const
2732427324
return OperIsBitwiseHWIntrinsic(oper);
2732527325
}
2732627326

27327+
//------------------------------------------------------------------------
27328+
// OperIsYmmEmbRoundingEnabled: Is this HWIntrinsic a node with ymm embedded rounding feature.
27329+
//
27330+
// Return Value:
27331+
// Whether "this" is a node with ymm embedded rounding feature.
27332+
//
27333+
bool GenTreeHWIntrinsic::OperIsYmmEmbRoundingEnabled() const
27334+
{
27335+
#if defined(TARGET_XARCH)
27336+
NamedIntrinsic intrinsicId = GetHWIntrinsicId();
27337+
27338+
if (HWIntrinsicInfo::lookupIsa(intrinsicId) != InstructionSet_AVX10v2)
27339+
{
27340+
return false;
27341+
}
27342+
27343+
#ifdef DEBUG
27344+
assert(OperIsEmbRoundingEnabled());
27345+
#endif
27346+
// The below logic assumes that OperIsEmbRoundingEnabled() is true.
27347+
size_t numArgs = GetOperandCount();
27348+
switch (intrinsicId)
27349+
{
27350+
case NI_AVX10v2_Add:
27351+
case NI_AVX10v2_ConvertToVector128Int32:
27352+
case NI_AVX10v2_ConvertToVector128Single:
27353+
case NI_AVX10v2_ConvertToVector128UInt32:
27354+
case NI_AVX10v2_ConvertToVector256Double:
27355+
case NI_AVX10v2_ConvertToVector256Int32:
27356+
case NI_AVX10v2_ConvertToVector256Int64:
27357+
case NI_AVX10v2_ConvertToVector256Single:
27358+
case NI_AVX10v2_ConvertToVector256UInt32:
27359+
case NI_AVX10v2_ConvertToVector256UInt64:
27360+
case NI_AVX10v2_Divide:
27361+
case NI_AVX10v2_Multiply:
27362+
case NI_AVX10v2_Scale:
27363+
case NI_AVX10v2_Sqrt:
27364+
case NI_AVX10v2_Subtract:
27365+
case NI_AVX10v2_ConvertToSByteWithSaturationAndZeroExtendToInt32:
27366+
case NI_AVX10v2_ConvertToByteWithSaturationAndZeroExtendToInt32:
27367+
{
27368+
return true;
27369+
}
27370+
27371+
default:
27372+
unreached();
27373+
}
27374+
#else // !TARGET_XARCH
27375+
return false;
27376+
#endif // TARGET_XARCH
27377+
}
27378+
2732727379
//------------------------------------------------------------------------
2732827380
// OperIsEmbRoundingEnabled: Is this HWIntrinsic a node with embedded rounding feature.
2732927381
//

src/coreclr/jit/gentree.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6564,6 +6564,7 @@ struct GenTreeHWIntrinsic : public GenTreeJitIntrinsic
65646564
bool OperIsCreateScalarUnsafe() const;
65656565
bool OperIsBitwiseHWIntrinsic() const;
65666566
bool OperIsEmbRoundingEnabled() const;
6567+
bool OperIsYmmEmbRoundingEnabled() const;
65676568

65686569
bool OperIsHWIntrinsic(NamedIntrinsic intrinsicId) const
65696570
{

src/coreclr/jit/hwintrinsiccodegenxarch.cpp

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,27 @@ static bool genIsTableDrivenHWIntrinsic(NamedIntrinsic intrinsicId, HWIntrinsicC
8181
return tableDrivenCategory && tableDrivenFlag;
8282
}
8383

84+
//------------------------------------------------------------------------
85+
// AddYmmEmbRounding: Adds the ymm embedded rounding to the insOpts
86+
//
87+
// Arguments:
88+
// instOptions - The existing insOpts
89+
//
90+
// Return Value:
91+
// The modified insOpts
92+
//
93+
static insOpts AddYmmEmbRounding(insOpts instOptions)
94+
{
95+
// enable ymm embedded rounding based on current
96+
// rounding mode indicator.
97+
98+
assert((instOptions & INS_OPTS_EVEX_b_MASK) != 0);
99+
unsigned result = static_cast<unsigned>(instOptions);
100+
result |= INS_OPTS_EVEX_ymm_er;
101+
102+
return static_cast<insOpts>(result);
103+
}
104+
84105
//------------------------------------------------------------------------
85106
// AddEmbRoundingMode: Adds the embedded rounding mode to the insOpts
86107
//
@@ -304,6 +325,10 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
304325

305326
int8_t mode = static_cast<int8_t>(lastOp->AsIntCon()->IconValue());
306327
instOptions = AddEmbRoundingMode(instOptions, mode);
328+
if (node->OperIsYmmEmbRoundingEnabled())
329+
{
330+
instOptions = AddYmmEmbRounding(instOptions);
331+
}
307332
}
308333
else
309334
{

src/coreclr/jit/instr.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -256,6 +256,11 @@ enum insOpts: unsigned
256256
INS_OPTS_EVEX_z_MASK = 0x20, // mask for EVEX.z related features
257257

258258
INS_OPTS_EVEX_em_zero = 1 << 5, // Embedded mask merges with zero
259+
260+
//One-bit: 0b0100_0000
261+
INS_OPTS_EVEX_u_MASK = 0x40, // mask for EVEX.u related features
262+
263+
INS_OPTS_EVEX_ymm_er = 1 << 6, // EVEX.u is not set
259264
};
260265

261266
#elif defined(TARGET_ARM) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)

0 commit comments

Comments
 (0)