@@ -61,7 +61,6 @@ class AMDGPUCodeGenPrepare : public FunctionPass,
61
61
AssumptionCache *AC = nullptr ;
62
62
LegacyDivergenceAnalysis *DA = nullptr ;
63
63
Module *Mod = nullptr ;
64
- const DataLayout *DL = nullptr ;
65
64
bool HasUnsafeFPMath = false ;
66
65
67
66
// / Copies exact/nsw/nuw flags (if any) from binary operation \p I to
@@ -134,16 +133,6 @@ class AMDGPUCodeGenPrepare : public FunctionPass,
134
133
// / \returns True.
135
134
bool promoteUniformBitreverseToI32 (IntrinsicInst &I) const ;
136
135
137
-
138
- unsigned numBitsUnsigned (Value *Op, unsigned ScalarSize) const ;
139
- unsigned numBitsSigned (Value *Op, unsigned ScalarSize) const ;
140
- bool isI24 (Value *V, unsigned ScalarSize) const ;
141
- bool isU24 (Value *V, unsigned ScalarSize) const ;
142
-
143
- // / Replace mul instructions with llvm.amdgcn.mul.u24 or llvm.amdgcn.mul.s24.
144
- // / SelectionDAG has an issue where an and asserting the bits are known
145
- bool replaceMulWithMul24 (BinaryOperator &I) const ;
146
-
147
136
// / Expands 24 bit div or rem.
148
137
Value* expandDivRem24 (IRBuilder<> &Builder, BinaryOperator &I,
149
138
Value *Num, Value *Den,
@@ -403,118 +392,6 @@ bool AMDGPUCodeGenPrepare::promoteUniformBitreverseToI32(
403
392
return true ;
404
393
}
405
394
406
- unsigned AMDGPUCodeGenPrepare::numBitsUnsigned (Value *Op,
407
- unsigned ScalarSize) const {
408
- KnownBits Known = computeKnownBits (Op, *DL, 0 , AC);
409
- return ScalarSize - Known.countMinLeadingZeros ();
410
- }
411
-
412
- unsigned AMDGPUCodeGenPrepare::numBitsSigned (Value *Op,
413
- unsigned ScalarSize) const {
414
- // In order for this to be a signed 24-bit value, bit 23, must
415
- // be a sign bit.
416
- return ScalarSize - ComputeNumSignBits (Op, *DL, 0 , AC);
417
- }
418
-
419
- bool AMDGPUCodeGenPrepare::isI24 (Value *V, unsigned ScalarSize) const {
420
- return ScalarSize >= 24 && // Types less than 24-bit should be treated
421
- // as unsigned 24-bit values.
422
- numBitsSigned (V, ScalarSize) < 24 ;
423
- }
424
-
425
- bool AMDGPUCodeGenPrepare::isU24 (Value *V, unsigned ScalarSize) const {
426
- return numBitsUnsigned (V, ScalarSize) <= 24 ;
427
- }
428
-
429
- static void extractValues (IRBuilder<> &Builder,
430
- SmallVectorImpl<Value *> &Values, Value *V) {
431
- VectorType *VT = dyn_cast<VectorType>(V->getType ());
432
- if (!VT) {
433
- Values.push_back (V);
434
- return ;
435
- }
436
-
437
- for (int I = 0 , E = VT->getNumElements (); I != E; ++I)
438
- Values.push_back (Builder.CreateExtractElement (V, I));
439
- }
440
-
441
- static Value *insertValues (IRBuilder<> &Builder,
442
- Type *Ty,
443
- SmallVectorImpl<Value *> &Values) {
444
- if (Values.size () == 1 )
445
- return Values[0 ];
446
-
447
- Value *NewVal = UndefValue::get (Ty);
448
- for (int I = 0 , E = Values.size (); I != E; ++I)
449
- NewVal = Builder.CreateInsertElement (NewVal, Values[I], I);
450
-
451
- return NewVal;
452
- }
453
-
454
- bool AMDGPUCodeGenPrepare::replaceMulWithMul24 (BinaryOperator &I) const {
455
- if (I.getOpcode () != Instruction::Mul)
456
- return false ;
457
-
458
- Type *Ty = I.getType ();
459
- unsigned Size = Ty->getScalarSizeInBits ();
460
- if (Size <= 16 && ST->has16BitInsts ())
461
- return false ;
462
-
463
- // Prefer scalar if this could be s_mul_i32
464
- if (DA->isUniform (&I))
465
- return false ;
466
-
467
- Value *LHS = I.getOperand (0 );
468
- Value *RHS = I.getOperand (1 );
469
- IRBuilder<> Builder (&I);
470
- Builder.SetCurrentDebugLocation (I.getDebugLoc ());
471
-
472
- Intrinsic::ID IntrID = Intrinsic::not_intrinsic;
473
-
474
- // TODO: Should this try to match mulhi24?
475
- if (ST->hasMulU24 () && isU24 (LHS, Size ) && isU24 (RHS, Size )) {
476
- IntrID = Intrinsic::amdgcn_mul_u24;
477
- } else if (ST->hasMulI24 () && isI24 (LHS, Size ) && isI24 (RHS, Size )) {
478
- IntrID = Intrinsic::amdgcn_mul_i24;
479
- } else
480
- return false ;
481
-
482
- SmallVector<Value *, 4 > LHSVals;
483
- SmallVector<Value *, 4 > RHSVals;
484
- SmallVector<Value *, 4 > ResultVals;
485
- extractValues (Builder, LHSVals, LHS);
486
- extractValues (Builder, RHSVals, RHS);
487
-
488
-
489
- IntegerType *I32Ty = Builder.getInt32Ty ();
490
- FunctionCallee Intrin = Intrinsic::getDeclaration (Mod, IntrID);
491
- for (int I = 0 , E = LHSVals.size (); I != E; ++I) {
492
- Value *LHS, *RHS;
493
- if (IntrID == Intrinsic::amdgcn_mul_u24) {
494
- LHS = Builder.CreateZExtOrTrunc (LHSVals[I], I32Ty);
495
- RHS = Builder.CreateZExtOrTrunc (RHSVals[I], I32Ty);
496
- } else {
497
- LHS = Builder.CreateSExtOrTrunc (LHSVals[I], I32Ty);
498
- RHS = Builder.CreateSExtOrTrunc (RHSVals[I], I32Ty);
499
- }
500
-
501
- Value *Result = Builder.CreateCall (Intrin, {LHS, RHS});
502
-
503
- if (IntrID == Intrinsic::amdgcn_mul_u24) {
504
- ResultVals.push_back (Builder.CreateZExtOrTrunc (Result,
505
- LHSVals[I]->getType ()));
506
- } else {
507
- ResultVals.push_back (Builder.CreateSExtOrTrunc (Result,
508
- LHSVals[I]->getType ()));
509
- }
510
- }
511
-
512
- I.replaceAllUsesWith (insertValues (Builder, Ty, ResultVals));
513
- I.eraseFromParent ();
514
-
515
- return true ;
516
- }
517
-
518
395
static bool shouldKeepFDivF32 (Value *Num, bool UnsafeDiv, bool HasDenormals) {
519
396
const ConstantFP *CNum = dyn_cast<ConstantFP>(Num);
520
397
if (!CNum)
@@ -879,9 +756,6 @@ bool AMDGPUCodeGenPrepare::visitBinaryOperator(BinaryOperator &I) {
879
756
DA->isUniform (&I) && promoteUniformOpToI32 (I))
880
757
return true ;
881
758
882
- if (replaceMulWithMul24 (I))
883
- return true ;
884
-
885
759
bool Changed = false ;
886
760
Instruction::BinaryOps Opc = I.getOpcode ();
887
761
Type *Ty = I.getType ();
@@ -1008,7 +882,6 @@ bool AMDGPUCodeGenPrepare::visitBitreverseIntrinsicInst(IntrinsicInst &I) {
1008
882
1009
883
bool AMDGPUCodeGenPrepare::doInitialization (Module &M) {
1010
884
Mod = &M;
1011
- DL = &Mod->getDataLayout ();
1012
885
return false ;
1013
886
}
1014
887
0 commit comments