Skip to content

Commit f3076b1

Browse files
authored
1 parent 6116bfe commit f3076b1

File tree

9 files changed

+22
-14
lines changed

9 files changed

+22
-14
lines changed

cmake/llvm-hash.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
092b6e73e651469527662443b592f98f442ece72
1+
3c709802d31b5bc5ed3af8284b40593ff39b9eec

lib/Conversion/TritonGPUToLLVM/ConvertLayoutOpToLLVM.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -294,8 +294,8 @@ struct ConvertLayoutOpUsingLinearLayoutsConversion
294294
b.shl(b.lshr(offset, b.i32_val(rshiftVal)), b.i32_val(lshiftVal)),
295295
offset);
296296
}
297-
auto vecAddr = b.gep(sharedPtrTy, elemTy, smemBase, offset);
298-
vecAddr.setInbounds(true);
297+
auto vecAddr = b.gep(sharedPtrTy, elemTy, smemBase, offset,
298+
LLVM::GEPNoWrapFlags::inbounds);
299299
return vecAddr;
300300
};
301301

lib/Conversion/TritonGPUToLLVM/Utility.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -398,8 +398,8 @@ Value getSmemVecAddr(const LinearLayout &regLayout,
398398
smemOffset = b.sub(smemOffset, baseToAllocBaseDist);
399399
}
400400
auto ptrTy = smemBase.getType();
401-
auto vecAddr = b.gep(ptrTy, elemLlvmTy, smemBase, smemOffset);
402-
vecAddr.setInbounds(true);
401+
auto vecAddr = b.gep(ptrTy, elemLlvmTy, smemBase, smemOffset,
402+
LLVM::GEPNoWrapFlags::inbounds);
403403
return vecAddr;
404404
}
405405

test/Conversion/cvt_to_llvm.mlir

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ tt.func private @convert_layout_blocked_blocked_vec(%arg0: tensor<16x16xi32, #bl
4848

4949
// CHECK-DAG: [[X_MOD_2:%.*]] = and i32 [[TID]], 1
5050
// CHECK-DAG: [[X_2_4_LOWER:%.*]] = shl {{.*}} i32 [[IS_UPPER_HALF]], 1
51-
// CHECK-DAG: [[X_2_4_UPPER0:%.*]] = shl i32 [[TID]], 1
51+
// CHECK-DAG: [[X_2_4_UPPER0:%.*]] = shl {{.*}} i32 [[TID]], 1
5252
// CHECK-DAG: [[X_2_4_UPPER1:%.*]] = and i32 [[X_2_4_UPPER0]], 24
5353
// CHECK-DAG: [[X_GE_16:%.*]] = and i32 [[TID]], 16
5454
// CHECK-DAG: [[X_GE_16_2:%.*]] = lshr exact i32 [[X_GE_16]], 3

third_party/amd/lib/TritonAMDGPUToLLVM/BuiltinFuncToLLVM.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -204,7 +204,7 @@ struct ConvertBuiltinFuncToLLVM
204204
ModuleOp mod = getOperation();
205205

206206
GreedyRewriteConfig config;
207-
config.enableRegionSimplification = GreedySimplifyRegionLevel::Aggressive;
207+
config.setRegionSimplificationLevel(GreedySimplifyRegionLevel::Aggressive);
208208

209209
RewritePatternSet patterns(context);
210210
patterns.add<CallOpConversion>(context, this->ftz);

third_party/amd/lib/TritonAMDGPUToLLVM/TritonGPUToLLVM.cpp

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
#include "mlir/Conversion/MathToLLVM/MathToLLVM.h"
1212
#include "mlir/Conversion/SCFToControlFlow/SCFToControlFlow.h"
1313
#include "mlir/Conversion/UBToLLVM/UBToLLVM.h"
14+
#include "mlir/Dialect/AMDGPU/Utils/Chipset.h"
1415
#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
1516
#include "mlir/Dialect/LLVMIR/NVVMDialect.h"
1617
#include "mlir/Dialect/LLVMIR/ROCDLDialect.h"
@@ -209,9 +210,16 @@ struct ConvertTritonAMDGPUToLLVM
209210
mlir::arith::populateArithToLLVMConversionPatterns(typeConverter, patterns);
210211
mlir::populateMathToLLVMConversionPatterns(typeConverter, patterns);
211212

213+
FailureOr<mlir::amdgpu::Chipset> maybeChipset =
214+
mlir::amdgpu::Chipset::parse(this->arch);
215+
if (failed(maybeChipset)) {
216+
emitError(UnknownLoc::get(&getContext()),
217+
"Invalid AMDGPU chipset name: " + this->arch);
218+
return signalPassFailure();
219+
}
212220
// Native lowering patterns
213-
mlir::populateGpuToROCDLConversionPatterns(typeConverter, patterns,
214-
mlir::gpu::amd::HIP);
221+
mlir::populateGpuToROCDLConversionPatterns(
222+
typeConverter, patterns, mlir::gpu::amd::HIP, *maybeChipset);
215223

216224
mlir::cf::populateControlFlowToLLVMConversionPatterns(typeConverter,
217225
patterns);

third_party/amd/lib/TritonAMDGPUTransforms/CanonicalizePointers.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
#include "mlir/IR/TypeUtilities.h"
1414
#include "mlir/IR/Value.h"
1515
#include "mlir/Pass/Pass.h"
16-
#include "mlir/Transforms/OneToNTypeConversion.h"
16+
#include "mlir/Transforms/DialectConversion.h"
1717
#include "triton/Analysis/Utility.h"
1818
#include "triton/Dialect/Triton/IR/Dialect.h"
1919
#include "triton/Dialect/Triton/IR/Types.h"

third_party/nvidia/lib/TritonNVIDIAGPUToLLVM/MemoryOpToLLVM.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -241,8 +241,8 @@ LogicalResult lowerDistributedToSharedStmatrix(
241241
for (int i = 0; i < srcVals.size(); i += step) {
242242
auto regIdx = reps.apply({{kReg, i}, {kLane, 0}, {kWarp, 0}})[0].second;
243243
Value offset = b.xor_(regBase, b.i32_val(regIdx));
244-
auto vecAddr = b.gep(smemPtrTy, llvmElemTy, smemBase, offset);
245-
vecAddr.setInbounds(true);
244+
auto vecAddr = b.gep(smemPtrTy, llvmElemTy, smemBase, offset,
245+
LLVM::GEPNoWrapFlags::inbounds);
246246
SmallVector<Value> inValsVec;
247247
for (int j = 0; j < step; j++)
248248
inValsVec.push_back(srcVals[i + j]);

third_party/nvidia/lib/TritonNVIDIAGPUToLLVM/TargetInfo.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -230,7 +230,7 @@ void TargetInfo::storeDShared(RewriterBase &rewriter, Location loc, Value ptr,
230230
SmallVector<Value> vals = unpackLLVector(loc, val, rewriter);
231231
for (int i = 0; i < vec / maxVec; i++) {
232232
auto newPtr = b.gep(ptr.getType(), elemTy, ptr, b.i32_val(i * maxVec),
233-
/*inbounds=*/true);
233+
LLVM::GEPNoWrapFlags::inbounds);
234234
storeDShared(
235235
rewriter, loc, newPtr, ctaId,
236236
packLLVector(loc, ArrayRef(vals).slice(i * maxVec, maxVec), rewriter),
@@ -343,7 +343,7 @@ Value TargetInfo::loadDShared(RewriterBase &rewriter, Location loc, Value ptr,
343343
SmallVector<Value> vals;
344344
for (int i = 0; i < vec / maxVec; i++) {
345345
auto newPtr = b.gep(ptr.getType(), elemTy, ptr, b.i32_val(i * maxVec),
346-
/*inbounds=*/true);
346+
LLVM::GEPNoWrapFlags::inbounds);
347347
auto newVal = loadDShared(rewriter, loc, newPtr, ctaId,
348348
vec_ty(elemTy, maxVec), pred);
349349
for (Value v : unpackLLVector(loc, newVal, rewriter)) {

0 commit comments

Comments
 (0)