Skip to content

Commit b7f8762

Browse files
author
David Salinas
committed
Revert "AMDGPU: Add 24-bit mul intrinsics"
This reverts commit b508009.
1 parent 2397af0 commit b7f8762

9 files changed

+11
-751
lines changed

include/llvm/IR/IntrinsicsAMDGPU.td

-10
Original file line numberDiff line numberDiff line change
@@ -1358,16 +1358,6 @@ def int_amdgcn_alignbyte : GCCBuiltin<"__builtin_amdgcn_alignbyte">,
13581358
[IntrNoMem, IntrSpeculatable]
13591359
>;
13601360

1361-
def int_amdgcn_mul_i24 : Intrinsic<[llvm_i32_ty],
1362-
[llvm_i32_ty, llvm_i32_ty],
1363-
[IntrNoMem, IntrSpeculatable]
1364-
>;
1365-
1366-
def int_amdgcn_mul_u24 : Intrinsic<[llvm_i32_ty],
1367-
[llvm_i32_ty, llvm_i32_ty],
1368-
[IntrNoMem, IntrSpeculatable]
1369-
>;
1370-
13711361
// llvm.amdgcn.ds.gws.init(i32 bar_val, i32 resource_id)
13721362
//
13731363
// bar_val is the total number of waves that will wait on this

lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp

-127
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,6 @@ class AMDGPUCodeGenPrepare : public FunctionPass,
6161
AssumptionCache *AC = nullptr;
6262
LegacyDivergenceAnalysis *DA = nullptr;
6363
Module *Mod = nullptr;
64-
const DataLayout *DL = nullptr;
6564
bool HasUnsafeFPMath = false;
6665

6766
/// Copies exact/nsw/nuw flags (if any) from binary operation \p I to
@@ -134,16 +133,6 @@ class AMDGPUCodeGenPrepare : public FunctionPass,
134133
/// \returns True.
135134
bool promoteUniformBitreverseToI32(IntrinsicInst &I) const;
136135

137-
138-
unsigned numBitsUnsigned(Value *Op, unsigned ScalarSize) const;
139-
unsigned numBitsSigned(Value *Op, unsigned ScalarSize) const;
140-
bool isI24(Value *V, unsigned ScalarSize) const;
141-
bool isU24(Value *V, unsigned ScalarSize) const;
142-
143-
/// Replace mul instructions with llvm.amdgcn.mul.u24 or llvm.amdgcn.mul.s24.
144-
/// SelectionDAG has an issue where an and asserting the bits are known
145-
bool replaceMulWithMul24(BinaryOperator &I) const;
146-
147136
/// Expands 24 bit div or rem.
148137
Value* expandDivRem24(IRBuilder<> &Builder, BinaryOperator &I,
149138
Value *Num, Value *Den,
@@ -403,118 +392,6 @@ bool AMDGPUCodeGenPrepare::promoteUniformBitreverseToI32(
403392
return true;
404393
}
405394

406-
unsigned AMDGPUCodeGenPrepare::numBitsUnsigned(Value *Op,
407-
unsigned ScalarSize) const {
408-
KnownBits Known = computeKnownBits(Op, *DL, 0, AC);
409-
return ScalarSize - Known.countMinLeadingZeros();
410-
}
411-
412-
unsigned AMDGPUCodeGenPrepare::numBitsSigned(Value *Op,
413-
unsigned ScalarSize) const {
414-
// In order for this to be a signed 24-bit value, bit 23, must
415-
// be a sign bit.
416-
return ScalarSize - ComputeNumSignBits(Op, *DL, 0, AC);
417-
}
418-
419-
bool AMDGPUCodeGenPrepare::isI24(Value *V, unsigned ScalarSize) const {
420-
return ScalarSize >= 24 && // Types less than 24-bit should be treated
421-
// as unsigned 24-bit values.
422-
numBitsSigned(V, ScalarSize) < 24;
423-
}
424-
425-
bool AMDGPUCodeGenPrepare::isU24(Value *V, unsigned ScalarSize) const {
426-
return numBitsUnsigned(V, ScalarSize) <= 24;
427-
}
428-
429-
static void extractValues(IRBuilder<> &Builder,
430-
SmallVectorImpl<Value *> &Values, Value *V) {
431-
VectorType *VT = dyn_cast<VectorType>(V->getType());
432-
if (!VT) {
433-
Values.push_back(V);
434-
return;
435-
}
436-
437-
for (int I = 0, E = VT->getNumElements(); I != E; ++I)
438-
Values.push_back(Builder.CreateExtractElement(V, I));
439-
}
440-
441-
static Value *insertValues(IRBuilder<> &Builder,
442-
Type *Ty,
443-
SmallVectorImpl<Value *> &Values) {
444-
if (Values.size() == 1)
445-
return Values[0];
446-
447-
Value *NewVal = UndefValue::get(Ty);
448-
for (int I = 0, E = Values.size(); I != E; ++I)
449-
NewVal = Builder.CreateInsertElement(NewVal, Values[I], I);
450-
451-
return NewVal;
452-
}
453-
454-
bool AMDGPUCodeGenPrepare::replaceMulWithMul24(BinaryOperator &I) const {
455-
if (I.getOpcode() != Instruction::Mul)
456-
return false;
457-
458-
Type *Ty = I.getType();
459-
unsigned Size = Ty->getScalarSizeInBits();
460-
if (Size <= 16 && ST->has16BitInsts())
461-
return false;
462-
463-
// Prefer scalar if this could be s_mul_i32
464-
if (DA->isUniform(&I))
465-
return false;
466-
467-
Value *LHS = I.getOperand(0);
468-
Value *RHS = I.getOperand(1);
469-
IRBuilder<> Builder(&I);
470-
Builder.SetCurrentDebugLocation(I.getDebugLoc());
471-
472-
Intrinsic::ID IntrID = Intrinsic::not_intrinsic;
473-
474-
// TODO: Should this try to match mulhi24?
475-
if (ST->hasMulU24() && isU24(LHS, Size) && isU24(RHS, Size)) {
476-
IntrID = Intrinsic::amdgcn_mul_u24;
477-
} else if (ST->hasMulI24() && isI24(LHS, Size) && isI24(RHS, Size)) {
478-
IntrID = Intrinsic::amdgcn_mul_i24;
479-
} else
480-
return false;
481-
482-
SmallVector<Value *, 4> LHSVals;
483-
SmallVector<Value *, 4> RHSVals;
484-
SmallVector<Value *, 4> ResultVals;
485-
extractValues(Builder, LHSVals, LHS);
486-
extractValues(Builder, RHSVals, RHS);
487-
488-
489-
IntegerType *I32Ty = Builder.getInt32Ty();
490-
FunctionCallee Intrin = Intrinsic::getDeclaration(Mod, IntrID);
491-
for (int I = 0, E = LHSVals.size(); I != E; ++I) {
492-
Value *LHS, *RHS;
493-
if (IntrID == Intrinsic::amdgcn_mul_u24) {
494-
LHS = Builder.CreateZExtOrTrunc(LHSVals[I], I32Ty);
495-
RHS = Builder.CreateZExtOrTrunc(RHSVals[I], I32Ty);
496-
} else {
497-
LHS = Builder.CreateSExtOrTrunc(LHSVals[I], I32Ty);
498-
RHS = Builder.CreateSExtOrTrunc(RHSVals[I], I32Ty);
499-
}
500-
501-
Value *Result = Builder.CreateCall(Intrin, {LHS, RHS});
502-
503-
if (IntrID == Intrinsic::amdgcn_mul_u24) {
504-
ResultVals.push_back(Builder.CreateZExtOrTrunc(Result,
505-
LHSVals[I]->getType()));
506-
} else {
507-
ResultVals.push_back(Builder.CreateSExtOrTrunc(Result,
508-
LHSVals[I]->getType()));
509-
}
510-
}
511-
512-
I.replaceAllUsesWith(insertValues(Builder, Ty, ResultVals));
513-
I.eraseFromParent();
514-
515-
return true;
516-
}
517-
518395
static bool shouldKeepFDivF32(Value *Num, bool UnsafeDiv, bool HasDenormals) {
519396
const ConstantFP *CNum = dyn_cast<ConstantFP>(Num);
520397
if (!CNum)
@@ -879,9 +756,6 @@ bool AMDGPUCodeGenPrepare::visitBinaryOperator(BinaryOperator &I) {
879756
DA->isUniform(&I) && promoteUniformOpToI32(I))
880757
return true;
881758

882-
if (replaceMulWithMul24(I))
883-
return true;
884-
885759
bool Changed = false;
886760
Instruction::BinaryOps Opc = I.getOpcode();
887761
Type *Ty = I.getType();
@@ -1008,7 +882,6 @@ bool AMDGPUCodeGenPrepare::visitBitreverseIntrinsicInst(IntrinsicInst &I) {
1008882

1009883
bool AMDGPUCodeGenPrepare::doInitialization(Module &M) {
1010884
Mod = &M;
1011-
DL = &Mod->getDataLayout();
1012885
return false;
1013886
}
1014887

lib/Target/AMDGPU/SIISelLowering.cpp

-5
Original file line numberDiff line numberDiff line change
@@ -5929,11 +5929,6 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
59295929
case Intrinsic::amdgcn_cos:
59305930
return DAG.getNode(AMDGPUISD::COS_HW, DL, VT, Op.getOperand(1));
59315931

5932-
case Intrinsic::amdgcn_mul_u24:
5933-
return DAG.getNode(AMDGPUISD::MUL_U24, DL, VT, Op.getOperand(1), Op.getOperand(2));
5934-
case Intrinsic::amdgcn_mul_i24:
5935-
return DAG.getNode(AMDGPUISD::MUL_I24, DL, VT, Op.getOperand(1), Op.getOperand(2));
5936-
59375932
case Intrinsic::amdgcn_log_clamp: {
59385933
if (Subtarget->getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS)
59395934
return SDValue();

0 commit comments

Comments
 (0)