Skip to content

[OpenMP][MLIR] Descriptor explicit member map lowering changes #96265

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
Original file line number Diff line number Diff line change
Expand Up @@ -852,7 +852,7 @@ def MapInfoOp : OpenMP_Op<"map.info", [AttrSizedOperandSegments]> {
TypeAttr:$var_type,
Optional<OpenMP_PointerLikeType>:$var_ptr_ptr,
Variadic<OpenMP_PointerLikeType>:$members,
OptionalAttr<AnyIntElementsAttr>:$members_index,
OptionalAttr<IndexListArrayAttr>:$members_index,
Variadic<OpenMP_MapBoundsType>:$bounds, /* rank-0 to rank-{n-1} */
OptionalAttr<UI64Attr>:$map_type,
OptionalAttr<VariableCaptureKindAttr>:$map_capture_type,
Expand Down
52 changes: 16 additions & 36 deletions mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1064,16 +1064,15 @@ static void printMapClause(OpAsmPrinter &p, Operation *op,
}

static ParseResult parseMembersIndex(OpAsmParser &parser,
DenseIntElementsAttr &membersIdx) {
SmallVector<APInt> values;
ArrayAttr &membersIdx) {
SmallVector<Attribute, 4> values, memberIdxs;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: Is there a reason for the magic number "4" here? If not, it's generally preferred to leave the default (or zero, if it refuses to give you a compile-time default).

int64_t value;
int64_t shape[2] = {0, 0};
unsigned shapeTmp = 0;

auto parseIndices = [&]() -> ParseResult {
if (parser.parseInteger(value))
return failure();
shapeTmp++;
values.push_back(APInt(32, value));
values.push_back(IntegerAttr::get(parser.getBuilder().getIntegerType(64),
mlir::APInt(64, value)));
return success();
};

Expand All @@ -1087,50 +1086,31 @@ static ParseResult parseMembersIndex(OpAsmParser &parser,
if (failed(parser.parseRSquare()))
return failure();

// Only set once, if any indices are not the same size
// we error out in the next check as that's unsupported
if (shape[1] == 0)
shape[1] = shapeTmp;

// Verify that the recently parsed list is equal to the
// first one we parsed, they must be equal lengths to
// keep the rectangular shape DenseIntElementsAttr
// requires
if (shapeTmp != shape[1])
return failure();

shapeTmp = 0;
shape[0]++;
memberIdxs.push_back(ArrayAttr::get(parser.getContext(), values));
values.clear();
} while (succeeded(parser.parseOptionalComma()));

if (!values.empty()) {
ShapedType valueType =
VectorType::get(shape, IntegerType::get(parser.getContext(), 32));
membersIdx = DenseIntElementsAttr::get(valueType, values);
}
if (!memberIdxs.empty())
membersIdx = ArrayAttr::get(parser.getContext(), memberIdxs);

return success();
}

static void printMembersIndex(OpAsmPrinter &p, MapInfoOp op,
DenseIntElementsAttr membersIdx) {
llvm::ArrayRef<int64_t> shape = membersIdx.getShapedType().getShape();
assert(shape.size() <= 2);

ArrayAttr membersIdx) {
if (!membersIdx)
return;

for (int i = 0; i < shape[0]; ++i) {
for (size_t i = 0; i < membersIdx.getValue().size(); i++) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: I think it would be useful to use llvm::join for the inner loop to improve readability. Then, using llvm::enumerate for the outer loop would probably help as well.

auto memberIdx = mlir::cast<mlir::ArrayAttr>(membersIdx.getValue()[i]);
p << "[";
int rowOffset = i * shape[1];
for (int j = 0; j < shape[1]; ++j) {
p << membersIdx.getValues<int32_t>()[rowOffset + j];
if ((j + 1) < shape[1])
for (size_t j = 0; j < memberIdx.getValue().size(); j++) {
p << mlir::cast<mlir::IntegerAttr>(memberIdx.getValue()[j]).getInt();
if ((j + 1) < memberIdx.getValue().size())
p << ",";
}
p << "]";

if ((i + 1) < shape[0])
if ((i + 1) < membersIdx.getValue().size())
p << ", ";
}
}
Expand Down
116 changes: 74 additions & 42 deletions mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2261,47 +2261,47 @@ static int getMapDataMemberIdx(MapInfoData &mapData,

static mlir::omp::MapInfoOp
getFirstOrLastMappedMemberPtr(mlir::omp::MapInfoOp mapInfo, bool first) {
mlir::DenseIntElementsAttr indexAttr = mapInfo.getMembersIndexAttr();

mlir::ArrayAttr indexAttr = mapInfo.getMembersIndexAttr();
// Only 1 member has been mapped, we can return it.
if (indexAttr.size() == 1)
if (auto mapOp = mlir::dyn_cast<mlir::omp::MapInfoOp>(
mapInfo.getMembers()[0].getDefiningOp()))
return mapOp;

llvm::ArrayRef<int64_t> shape = indexAttr.getShapedType().getShape();
llvm::SmallVector<size_t> indices(shape[0]);
llvm::SmallVector<size_t> indices(indexAttr.size());
std::iota(indices.begin(), indices.end(), 0);

llvm::sort(indices.begin(), indices.end(),
[&](const size_t a, const size_t b) {
auto indexValues = indexAttr.getValues<int32_t>();
for (int i = 0; i < shape[1]; ++i) {
int aIndex = indexValues[a * shape[1] + i];
int bIndex = indexValues[b * shape[1] + i];

if (aIndex == bIndex)
continue;

if (aIndex != -1 && bIndex == -1)
return false;

if (aIndex == -1 && bIndex != -1)
return true;
llvm::sort(
indices.begin(), indices.end(), [&](const size_t a, const size_t b) {
auto memberIndicesA = mlir::cast<mlir::ArrayAttr>(indexAttr[a]);
auto memberIndicesB = mlir::cast<mlir::ArrayAttr>(indexAttr[b]);

size_t smallestMember = memberIndicesA.size() < memberIndicesB.size()
? memberIndicesA.size()
: memberIndicesB.size();
for (size_t i = 0; i < smallestMember; ++i) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: I think llvm::zip could simplify this a bit, since it already implements iterating over two ranges of potentially different sizes until the end of the shortest one is reached.

int64_t aIndex =
mlir::cast<mlir::IntegerAttr>(memberIndicesA.getValue()[i])
.getInt();
int64_t bIndex =
mlir::cast<mlir::IntegerAttr>(memberIndicesB.getValue()[i])
.getInt();

if (aIndex == bIndex)
continue;

// A is earlier in the record type layout than B
if (aIndex < bIndex)
return first;
if (aIndex < bIndex)
return first;

if (bIndex < aIndex)
return !first;
}
if (aIndex > bIndex)
return !first;
}

// Iterated the entire list and couldn't make a decision, all
// elements were likely the same. Return false, since the sort
// comparator should return false for equal elements.
return false;
});
// Iterated the up until the end of the smallest member and
// they were found to be equal up to that point, so select
// the member with the lowest index count, so the "parent"
return memberIndicesA.size() < memberIndicesB.size();
});

return llvm::cast<mlir::omp::MapInfoOp>(
mapInfo.getMembers()[indices.front()].getDefiningOp());
Expand Down Expand Up @@ -2455,10 +2455,13 @@ static llvm::omp::OpenMPOffloadMappingFlags mapParentWithMembers(
mlir::dyn_cast<mlir::omp::MapInfoOp>(mapData.MapClause[mapDataIndex]);
int firstMemberIdx = getMapDataMemberIdx(
mapData, getFirstOrLastMappedMemberPtr(mapOp, true));
lowAddr = builder.CreatePointerCast(mapData.Pointers[firstMemberIdx],
builder.getPtrTy());
int lastMemberIdx = getMapDataMemberIdx(
mapData, getFirstOrLastMappedMemberPtr(mapOp, false));

// NOTE/TODO: Should perhaps use OriginalValue here instead of Pointers to
// avoid offset or any manipulations interfering with the calculation.
lowAddr = builder.CreatePointerCast(mapData.Pointers[firstMemberIdx],
builder.getPtrTy());
highAddr = builder.CreatePointerCast(
builder.CreateGEP(mapData.BaseType[lastMemberIdx],
mapData.Pointers[lastMemberIdx], builder.getInt64(1)),
Expand All @@ -2472,17 +2475,8 @@ static llvm::omp::OpenMPOffloadMappingFlags mapParentWithMembers(
/*isSigned=*/false);
combinedInfo.Sizes.push_back(size);

// TODO: This will need to be expanded to include the whole host of logic for
// the map flags that Clang currently supports (e.g. it should take the map
// flag of the parent map flag, remove the OMP_MAP_TARGET_PARAM and do some
// further case specific flag modifications). For the moment, it handles what
// we support as expected.
llvm::omp::OpenMPOffloadMappingFlags mapFlag =
llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO;

llvm::omp::OpenMPOffloadMappingFlags memberOfFlag =
ompBuilder.getMemberOfFlag(combinedInfo.BasePointers.size() - 1);
ompBuilder.setCorrectMemberOfFlag(mapFlag, memberOfFlag);

// This creates the initial MEMBER_OF mapping that consists of
// the parent/top level container (same as above effectively, except
Expand All @@ -2491,6 +2485,12 @@ static llvm::omp::OpenMPOffloadMappingFlags mapParentWithMembers(
// only relevant if the structure in its totality is being mapped,
// otherwise the above suffices.
if (!parentClause.getPartialMap()) {
// TODO: This will need to be expanded to include the whole host of logic
// for the map flags that Clang currently supports (e.g. it should do some
// further case specific flag modifications). For the moment, it handles
// what we support as expected.
llvm::omp::OpenMPOffloadMappingFlags mapFlag = mapData.Types[mapDataIndex];
ompBuilder.setCorrectMemberOfFlag(mapFlag, memberOfFlag);
combinedInfo.Types.emplace_back(mapFlag);
combinedInfo.DevicePointers.emplace_back(
llvm::OpenMPIRBuilder::DeviceInfoTy::None);
Expand Down Expand Up @@ -2541,6 +2541,31 @@ static void processMapMembersWithParent(

assert(memberDataIdx >= 0 && "could not find mapped member of structure");

// If we're currently mapping a pointer to a block of data, we must
// initially map the pointer, and then attatch/bind the data with a
// subsequent map to the pointer, this segment of code generates the
// pointer mapping. This pointer map can in certain cases be optimised
// out as Clang currently does in its lowering, however, for the moment
Comment on lines +2546 to +2548
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
// subsequent map to the pointer, this segment of code generates the
// pointer mapping. This pointer map can in certain cases be optimised
// out as Clang currently does in its lowering, however, for the moment
// subsequent map to the pointer. This segment of code generates the
// pointer mapping, which can in certain cases be optimised out as Clang
// currently does in its lowering. However, for the moment

// we do not do so, in part as we have substantially less information on
// the data being mapped at this stage; at least for the moment.
Comment on lines +2549 to +2550
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
// we do not do so, in part as we have substantially less information on
// the data being mapped at this stage; at least for the moment.
// we do not do so, in part as we currently have substantially less information
// on the data being mapped at this stage.

if (checkIfPointerMap(memberClause)) {
auto mapFlag = llvm::omp::OpenMPOffloadMappingFlags(
memberClause.getMapType().value());
mapFlag &= ~llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF;
ompBuilder.setCorrectMemberOfFlag(mapFlag, memberOfFlag);
combinedInfo.Types.emplace_back(mapFlag);
combinedInfo.DevicePointers.emplace_back(
llvm::OpenMPIRBuilder::DeviceInfoTy::None);
combinedInfo.Names.emplace_back(
LLVM::createMappingInformation(memberClause.getLoc(), ompBuilder));
combinedInfo.BasePointers.emplace_back(
mapData.BasePointers[mapDataIndex]);
combinedInfo.Pointers.emplace_back(mapData.BasePointers[memberDataIdx]);
combinedInfo.Sizes.emplace_back(builder.getInt64(
moduleTranslation.getLLVMModule()->getDataLayout().getPointerSize()));
}

// Same MemberOfFlag to indicate its link with parent and other members
// of.
auto mapFlag =
Expand All @@ -2556,7 +2581,14 @@ static void processMapMembersWithParent(
llvm::OpenMPIRBuilder::DeviceInfoTy::None);
combinedInfo.Names.emplace_back(
LLVM::createMappingInformation(memberClause.getLoc(), ompBuilder));
combinedInfo.BasePointers.emplace_back(mapData.BasePointers[mapDataIndex]);

if (checkIfPointerMap(memberClause))
combinedInfo.BasePointers.emplace_back(
mapData.BasePointers[memberDataIdx]);
else
combinedInfo.BasePointers.emplace_back(
mapData.BasePointers[mapDataIndex]);

combinedInfo.Pointers.emplace_back(mapData.Pointers[memberDataIdx]);
combinedInfo.Sizes.emplace_back(mapData.Sizes[memberDataIdx]);
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s

// This test checks the offload sizes, map types and base pointers and pointers
// provided to the OpenMP kernel argument structure are correct when lowering
// to LLVM-IR from MLIR when performing explicit member mapping of a record type
// that includes fortran allocatables in various locations of the record types
// hierarchy.

module attributes {omp.is_target_device = false} {
llvm.func @omp_nested_derived_type_alloca_map(%arg0: !llvm.ptr) {
%0 = llvm.mlir.constant(4 : index) : i64
%1 = llvm.mlir.constant(1 : index) : i64
%2 = llvm.mlir.constant(2 : index) : i64
%3 = llvm.mlir.constant(0 : index) : i64
%4 = llvm.mlir.constant(6 : index) : i64
%5 = omp.map.bounds lower_bound(%3 : i64) upper_bound(%0 : i64) extent(%0 : i64) stride(%1 : i64) start_idx(%3 : i64) {stride_in_bytes = true}
%6 = llvm.getelementptr %arg0[0, 6] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"_QFtest_nested_derived_type_alloca_map_operand_and_block_additionTtop_layer", (f32, struct<(ptr, i64, i32, i8, i8, i8, i8)>, array<10 x i32>, f32, struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>, i32, struct<"_QFtest_nested_derived_type_alloca_map_operand_and_block_additionTmiddle_layer", (f32, array<10 x i32>, struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>, i32)>)>
%7 = llvm.getelementptr %6[0, 2] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"_QFtest_nested_derived_type_alloca_map_operand_and_block_additionTmiddle_layer", (f32, array<10 x i32>, struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>, i32)>
%8 = llvm.getelementptr %7[0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>
%9 = omp.map.info var_ptr(%7 : !llvm.ptr, i32) var_ptr_ptr(%8 : !llvm.ptr) map_clauses(tofrom) capture(ByRef) bounds(%5) -> !llvm.ptr {name = ""}
%10 = omp.map.info var_ptr(%7 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = "one_l%nest%array_k"}
%11 = omp.map.info var_ptr(%arg0 : !llvm.ptr, !llvm.struct<"_QFtest_nested_derived_type_alloca_map_operand_and_block_additionTtop_layer", (f32, struct<(ptr, i64, i32, i8, i8, i8, i8)>, array<10 x i32>, f32, struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>, i32, struct<"_QFtest_nested_derived_type_alloca_map_operand_and_block_additionTmiddle_layer", (f32, array<10 x i32>, struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>, i32)>)>) map_clauses(tofrom) capture(ByRef) members(%10, %9 : [6,2], [6,2,0] : !llvm.ptr, !llvm.ptr) -> !llvm.ptr {name = "one_l", partial_map = true}
omp.target map_entries(%10 -> %arg1, %9 -> %arg2, %11 -> %arg3 : !llvm.ptr, !llvm.ptr, !llvm.ptr) {
^bb0(%arg1: !llvm.ptr, %arg2: !llvm.ptr, %arg3: !llvm.ptr):
omp.terminator
}
llvm.return
}
}

// CHECK: @.offload_sizes{{.*}} = private unnamed_addr constant [4 x i64] [i64 0, i64 48, i64 8, i64 20]
// CHECK: @.offload_maptypes{{.*}} = private unnamed_addr constant [4 x i64] [i64 32, i64 281474976710659, i64 281474976710659, i64 281474976710675]

// CHECK: define void @omp_nested_derived_type_alloca_map(ptr %[[ARG:.*]]) {

// CHECK: %[[NESTED_DTYPE_MEMBER_GEP:.*]] = getelementptr %_QFtest_nested_derived_type_alloca_map_operand_and_block_additionTtop_layer, ptr %[[ARG]], i32 0, i32 6
// CHECK: %[[NESTED_ALLOCATABLE_MEMBER_GEP:.*]] = getelementptr %_QFtest_nested_derived_type_alloca_map_operand_and_block_additionTmiddle_layer, ptr %[[NESTED_DTYPE_MEMBER_GEP]], i32 0, i32 2
// CHECK: %[[NESTED_ALLOCATABLE_MEMBER_BADDR_GEP:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, ptr %[[NESTED_ALLOCATABLE_MEMBER_GEP]], i32 0, i32 0
// CHECK: %[[NESTED_ALLOCATABLE_MEMBER_BADDR_LOAD:.*]] = load ptr, ptr %[[NESTED_ALLOCATABLE_MEMBER_BADDR_GEP]], align 8
// CHECK: %[[ARR_OFFSET:.*]] = getelementptr inbounds i32, ptr %[[NESTED_ALLOCATABLE_MEMBER_BADDR_LOAD]], i64 0
// CHECK: %[[DTYPE_SIZE_SEGMENT_CALC_1:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, ptr %[[NESTED_ALLOCATABLE_MEMBER_GEP]], i64 1
// CHECK: %[[DTYPE_SIZE_SEGMENT_CALC_2:.*]] = ptrtoint ptr %[[DTYPE_SIZE_SEGMENT_CALC_1]] to i64
// CHECK: %[[DTYPE_SIZE_SEGMENT_CALC_3:.*]] = ptrtoint ptr %[[NESTED_ALLOCATABLE_MEMBER_GEP]] to i64
// CHECK: %[[DTYPE_SIZE_SEGMENT_CALC_4:.*]] = sub i64 %[[DTYPE_SIZE_SEGMENT_CALC_2]], %[[DTYPE_SIZE_SEGMENT_CALC_3]]
// CHECK: %[[DTYPE_SIZE_SEGMENT_CALC_5:.*]] = sdiv exact i64 %[[DTYPE_SIZE_SEGMENT_CALC_4]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)

// CHECK: %[[BASE_PTRS:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_baseptrs, i32 0, i32 0
// CHECK: store ptr %[[ARG]], ptr %[[BASE_PTRS]], align 8
// CHECK: %[[OFFLOAD_PTRS:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_ptrs, i32 0, i32 0
// CHECK: store ptr %[[NESTED_ALLOCATABLE_MEMBER_GEP]], ptr %[[OFFLOAD_PTRS]], align 8
// CHECK: %[[OFFLOAD_SIZES:.*]] = getelementptr inbounds [4 x i64], ptr %.offload_sizes, i32 0, i32 0
// CHECK: store i64 %[[DTYPE_SIZE_SEGMENT_CALC_5]], ptr %[[OFFLOAD_SIZES]], align 8

// CHECK: %[[BASE_PTRS:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_baseptrs, i32 0, i32 1
// CHECK: store ptr %[[ARG]], ptr %[[BASE_PTRS]], align 8
// CHECK: %[[OFFLOAD_PTRS:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_ptrs, i32 0, i32 1
// CHECK: store ptr %[[NESTED_ALLOCATABLE_MEMBER_GEP]], ptr %[[OFFLOAD_PTRS]], align 8

// CHECK: %[[BASE_PTRS:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_baseptrs, i32 0, i32 2
// CHECK: store ptr %[[ARG]], ptr %[[BASE_PTRS]], align 8
// CHECK: %[[OFFLOAD_PTRS:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_ptrs, i32 0, i32 2
// CHECK: store ptr %[[NESTED_ALLOCATABLE_MEMBER_BADDR_GEP]], ptr %[[OFFLOAD_PTRS]], align 8

// CHECK: %[[BASE_PTRS:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_baseptrs, i32 0, i32 3
// CHECK: store ptr %[[NESTED_ALLOCATABLE_MEMBER_BADDR_GEP]], ptr %[[BASE_PTRS]], align 8
// CHECK: %[[OFFLOAD_PTRS:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_ptrs, i32 0, i32 3
// CHECK: store ptr %[[ARR_OFFSET]], ptr %[[OFFLOAD_PTRS]], align 8
Loading
Loading