[OpenMP] Don't emit redundant zero-sized mapping nodes for overlapped structs (#148947)
The handling of overlapped structure mapping in CGOpenMPRuntime.cpp can lead to redundant zero-sized mapping nodes at runtime. This patch fixes it using a combination of approaches: trivially adjacent struct members won't have a mapping node created between them, and for more complicated cases (inheritance) the physical layout of the struct/class is used to make sure that elements aren't missed. I've introduced a new class to track the state whilst iterating over the struct. This reduces a bit of redundancy in the code (accumulating CombinedInfo both during and after the loop), which I think is a bit neater. Before: omptarget --> Entry 0: Base=0x00007fff8d483830, Begin=0x00007fff8d483830, Size=48, Type=0x20, Name=unknown omptarget --> Entry 1: Base=0x00007fff8d483830, Begin=0x00007fff8d483830, Size=0, Type=0x1000000000003, Name=unknown omptarget --> Entry 2: Base=0x00007fff8d483830, Begin=0x00007fff8d483834, Size=0, Type=0x1000000000003, Name=unknown omptarget --> Entry 3: Base=0x00007fff8d483830, Begin=0x00007fff8d483838, Size=0, Type=0x1000000000003, Name=unknown omptarget --> Entry 4: Base=0x00007fff8d483830, Begin=0x00007fff8d48383c, Size=20, Type=0x1000000000003, Name=unknown omptarget --> Entry 5: Base=0x00007fff8d483830, Begin=0x00007fff8d483854, Size=0, Type=0x1000000000003, Name=unknown omptarget --> Entry 6: Base=0x00007fff8d483830, Begin=0x00007fff8d483858, Size=0, Type=0x1000000000003, Name=unknown omptarget --> Entry 7: Base=0x00007fff8d483830, Begin=0x00007fff8d48385c, Size=4, Type=0x1000000000003, Name=unknown omptarget --> Entry 8: Base=0x00007fff8d483830, Begin=0x00007fff8d483830, Size=4, Type=0x1000000000003, Name=unknown omptarget --> Entry 9: Base=0x00007fff8d483830, Begin=0x00007fff8d483834, Size=4, Type=0x1000000000003, Name=unknown omptarget --> Entry 10: Base=0x00007fff8d483830, Begin=0x00007fff8d483838, Size=4, Type=0x1000000000003, Name=unknown omptarget --> Entry 11: Base=0x00007fff8d483840, Begin=0x00005e7665275130, Size=32, Type=0x1000000000013, Name=unknown omptarget --> Entry 12: Base=0x00007fff8d483830, Begin=0x00007fff8d483850, Size=4, Type=0x1000000000003, Name=unknown omptarget --> Entry 13: Base=0x00007fff8d483830, Begin=0x00007fff8d483854, Size=4, Type=0x1000000000003, Name=unknown omptarget --> Entry 14: Base=0x00007fff8d483830, Begin=0x00007fff8d483858, Size=4, Type=0x1000000000003, Name=unknown After: omptarget --> Entry 0: Base=0x00007fffd0f562e0, Begin=0x00007fffd0f562e0, Size=48, Type=0x20, Name=unknown omptarget --> Entry 1: Base=0x00007fffd0f562e0, Begin=0x00007fffd0f562ec, Size=20, Type=0x1000000000003, Name=unknown omptarget --> Entry 2: Base=0x00007fffd0f562e0, Begin=0x00007fffd0f5630c, Size=4, Type=0x1000000000003, Name=unknown omptarget --> Entry 3: Base=0x00007fffd0f562e0, Begin=0x00007fffd0f562e0, Size=4, Type=0x1000000000003, Name=unknown omptarget --> Entry 4: Base=0x00007fffd0f562e0, Begin=0x00007fffd0f562e4, Size=4, Type=0x1000000000003, Name=unknown omptarget --> Entry 5: Base=0x00007fffd0f562e0, Begin=0x00007fffd0f562e8, Size=4, Type=0x1000000000003, Name=unknown omptarget --> Entry 6: Base=0x00007fffd0f562f0, Begin=0x000058b6013fb130, Size=32, Type=0x1000000000013, Name=unknown omptarget --> Entry 7: Base=0x00007fffd0f562e0, Begin=0x00007fffd0f56300, Size=4, Type=0x1000000000003, Name=unknown omptarget --> Entry 8: Base=0x00007fffd0f562e0, Begin=0x00007fffd0f56304, Size=4, Type=0x1000000000003, Name=unknown omptarget --> Entry 9: Base=0x00007fffd0f562e0, Begin=0x00007fffd0f56308, Size=4, Type=0x1000000000003, Name=unknown For code: #include <cstdlib> #include <cstdio> struct S { int x; int y; int z; int *p1; int *p2; }; struct T : public S { int a; int b; int c; }; int main() { T v; v.p1 = (int*) calloc(8, sizeof(int)); v.p2 = (int*) calloc(8, sizeof(int)); #pragma omp target map(tofrom: v, v.x, v.y, v.z, v.p1[:8], v.a, v.b, v.c) { v.x++; v.y += 2; v.z += 3; v.p1[0] += 4; v.a += 7; v.b += 5; v.c += 6; } return 0; }
This commit is contained in:
parent
2149d6d1c9
commit
889faabe78
@ -7080,6 +7080,110 @@ private:
|
||||
return ConstLength.getSExtValue() != 1;
|
||||
}
|
||||
|
||||
/// A helper class to copy structures with overlapped elements, i.e. those
|
||||
/// which have mappings of both "s" and "s.mem". Consecutive elements that
|
||||
/// are not explicitly copied have mapping nodes synthesized for them,
|
||||
/// taking care to avoid generating zero-sized copies.
|
||||
class CopyOverlappedEntryGaps {
|
||||
CodeGenFunction &CGF;
|
||||
MapCombinedInfoTy &CombinedInfo;
|
||||
OpenMPOffloadMappingFlags Flags = OpenMPOffloadMappingFlags::OMP_MAP_NONE;
|
||||
const ValueDecl *MapDecl = nullptr;
|
||||
const Expr *MapExpr = nullptr;
|
||||
Address BP = Address::invalid();
|
||||
bool IsNonContiguous = false;
|
||||
uint64_t DimSize = 0;
|
||||
// These elements track the position as the struct is iterated over
|
||||
// (in order of increasing element address).
|
||||
const RecordDecl *LastParent = nullptr;
|
||||
uint64_t Cursor = 0;
|
||||
unsigned LastIndex = -1u;
|
||||
Address LB = Address::invalid();
|
||||
|
||||
public:
|
||||
CopyOverlappedEntryGaps(CodeGenFunction &CGF,
|
||||
MapCombinedInfoTy &CombinedInfo,
|
||||
OpenMPOffloadMappingFlags Flags,
|
||||
const ValueDecl *MapDecl, const Expr *MapExpr,
|
||||
Address BP, Address LB, bool IsNonContiguous,
|
||||
uint64_t DimSize)
|
||||
: CGF(CGF), CombinedInfo(CombinedInfo), Flags(Flags), MapDecl(MapDecl),
|
||||
MapExpr(MapExpr), BP(BP), LB(LB), IsNonContiguous(IsNonContiguous),
|
||||
DimSize(DimSize) {}
|
||||
|
||||
void processField(
|
||||
const OMPClauseMappableExprCommon::MappableComponent &MC,
|
||||
const FieldDecl *FD,
|
||||
llvm::function_ref<LValue(CodeGenFunction &, const MemberExpr *)>
|
||||
EmitMemberExprBase) {
|
||||
const RecordDecl *RD = FD->getParent();
|
||||
const ASTRecordLayout &RL = CGF.getContext().getASTRecordLayout(RD);
|
||||
uint64_t FieldOffset = RL.getFieldOffset(FD->getFieldIndex());
|
||||
uint64_t FieldSize =
|
||||
CGF.getContext().getTypeSize(FD->getType().getCanonicalType());
|
||||
Address ComponentLB = Address::invalid();
|
||||
|
||||
if (FD->getType()->isLValueReferenceType()) {
|
||||
const auto *ME = cast<MemberExpr>(MC.getAssociatedExpression());
|
||||
LValue BaseLVal = EmitMemberExprBase(CGF, ME);
|
||||
ComponentLB =
|
||||
CGF.EmitLValueForFieldInitialization(BaseLVal, FD).getAddress();
|
||||
} else {
|
||||
ComponentLB =
|
||||
CGF.EmitOMPSharedLValue(MC.getAssociatedExpression()).getAddress();
|
||||
}
|
||||
|
||||
if (!LastParent)
|
||||
LastParent = RD;
|
||||
if (FD->getParent() == LastParent) {
|
||||
if (FD->getFieldIndex() != LastIndex + 1)
|
||||
copyUntilField(FD, ComponentLB);
|
||||
} else {
|
||||
LastParent = FD->getParent();
|
||||
if (((int64_t)FieldOffset - (int64_t)Cursor) > 0)
|
||||
copyUntilField(FD, ComponentLB);
|
||||
}
|
||||
Cursor = FieldOffset + FieldSize;
|
||||
LastIndex = FD->getFieldIndex();
|
||||
LB = CGF.Builder.CreateConstGEP(ComponentLB, 1);
|
||||
}
|
||||
|
||||
void copyUntilField(const FieldDecl *FD, Address ComponentLB) {
|
||||
llvm::Value *ComponentLBPtr = ComponentLB.emitRawPointer(CGF);
|
||||
llvm::Value *LBPtr = LB.emitRawPointer(CGF);
|
||||
llvm::Value *Size =
|
||||
CGF.Builder.CreatePtrDiff(CGF.Int8Ty, ComponentLBPtr, LBPtr);
|
||||
copySizedChunk(LBPtr, Size);
|
||||
}
|
||||
|
||||
void copyUntilEnd(Address HB) {
|
||||
if (LastParent) {
|
||||
const ASTRecordLayout &RL =
|
||||
CGF.getContext().getASTRecordLayout(LastParent);
|
||||
if ((uint64_t)CGF.getContext().toBits(RL.getSize()) <= Cursor)
|
||||
return;
|
||||
}
|
||||
llvm::Value *LBPtr = LB.emitRawPointer(CGF);
|
||||
llvm::Value *Size = CGF.Builder.CreatePtrDiff(
|
||||
CGF.Int8Ty, CGF.Builder.CreateConstGEP(HB, 1).emitRawPointer(CGF),
|
||||
LBPtr);
|
||||
copySizedChunk(LBPtr, Size);
|
||||
}
|
||||
|
||||
void copySizedChunk(llvm::Value *Base, llvm::Value *Size) {
|
||||
CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
|
||||
CombinedInfo.BasePointers.push_back(BP.emitRawPointer(CGF));
|
||||
CombinedInfo.DevicePtrDecls.push_back(nullptr);
|
||||
CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
|
||||
CombinedInfo.Pointers.push_back(Base);
|
||||
CombinedInfo.Sizes.push_back(
|
||||
CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
|
||||
CombinedInfo.Types.push_back(Flags);
|
||||
CombinedInfo.Mappers.push_back(nullptr);
|
||||
CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize : 1);
|
||||
}
|
||||
};
|
||||
|
||||
/// Generate the base pointers, section pointers, sizes, map type bits, and
|
||||
/// user-defined mappers (all included in \a CombinedInfo) for the provided
|
||||
/// map type, map or motion modifiers, and expression components.
|
||||
@ -7570,63 +7674,22 @@ private:
|
||||
getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit,
|
||||
/*AddPtrFlag=*/false,
|
||||
/*AddIsTargetParamFlag=*/false, IsNonContiguous);
|
||||
llvm::Value *Size = nullptr;
|
||||
CopyOverlappedEntryGaps CopyGaps(CGF, CombinedInfo, Flags, MapDecl,
|
||||
MapExpr, BP, LB, IsNonContiguous,
|
||||
DimSize);
|
||||
// Do bitcopy of all non-overlapped structure elements.
|
||||
for (OMPClauseMappableExprCommon::MappableExprComponentListRef
|
||||
Component : OverlappedElements) {
|
||||
Address ComponentLB = Address::invalid();
|
||||
for (const OMPClauseMappableExprCommon::MappableComponent &MC :
|
||||
Component) {
|
||||
if (const ValueDecl *VD = MC.getAssociatedDeclaration()) {
|
||||
const auto *FD = dyn_cast<FieldDecl>(VD);
|
||||
if (FD && FD->getType()->isLValueReferenceType()) {
|
||||
const auto *ME =
|
||||
cast<MemberExpr>(MC.getAssociatedExpression());
|
||||
LValue BaseLVal = EmitMemberExprBase(CGF, ME);
|
||||
ComponentLB =
|
||||
CGF.EmitLValueForFieldInitialization(BaseLVal, FD)
|
||||
.getAddress();
|
||||
} else {
|
||||
ComponentLB =
|
||||
CGF.EmitOMPSharedLValue(MC.getAssociatedExpression())
|
||||
.getAddress();
|
||||
}
|
||||
llvm::Value *ComponentLBPtr = ComponentLB.emitRawPointer(CGF);
|
||||
llvm::Value *LBPtr = LB.emitRawPointer(CGF);
|
||||
Size = CGF.Builder.CreatePtrDiff(CGF.Int8Ty, ComponentLBPtr,
|
||||
LBPtr);
|
||||
break;
|
||||
if (const auto *FD = dyn_cast<FieldDecl>(VD)) {
|
||||
CopyGaps.processField(MC, FD, EmitMemberExprBase);
|
||||
}
|
||||
}
|
||||
assert(Size && "Failed to determine structure size");
|
||||
CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
|
||||
CombinedInfo.BasePointers.push_back(BP.emitRawPointer(CGF));
|
||||
CombinedInfo.DevicePtrDecls.push_back(nullptr);
|
||||
CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
|
||||
CombinedInfo.Pointers.push_back(LB.emitRawPointer(CGF));
|
||||
CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
|
||||
Size, CGF.Int64Ty, /*isSigned=*/true));
|
||||
CombinedInfo.Types.push_back(Flags);
|
||||
CombinedInfo.Mappers.push_back(nullptr);
|
||||
CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
|
||||
: 1);
|
||||
LB = CGF.Builder.CreateConstGEP(ComponentLB, 1);
|
||||
}
|
||||
CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
|
||||
CombinedInfo.BasePointers.push_back(BP.emitRawPointer(CGF));
|
||||
CombinedInfo.DevicePtrDecls.push_back(nullptr);
|
||||
CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
|
||||
CombinedInfo.Pointers.push_back(LB.emitRawPointer(CGF));
|
||||
llvm::Value *LBPtr = LB.emitRawPointer(CGF);
|
||||
Size = CGF.Builder.CreatePtrDiff(
|
||||
CGF.Int8Ty, CGF.Builder.CreateConstGEP(HB, 1).emitRawPointer(CGF),
|
||||
LBPtr);
|
||||
CombinedInfo.Sizes.push_back(
|
||||
CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
|
||||
CombinedInfo.Types.push_back(Flags);
|
||||
CombinedInfo.Mappers.push_back(nullptr);
|
||||
CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
|
||||
: 1);
|
||||
}
|
||||
CopyGaps.copyUntilEnd(HB);
|
||||
break;
|
||||
}
|
||||
llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression());
|
||||
|
52
clang/test/OpenMP/copy-gaps-1.cpp
Normal file
52
clang/test/OpenMP/copy-gaps-1.cpp
Normal file
@ -0,0 +1,52 @@
|
||||
// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp-targets=amdgcn-amd-amdhsa -fopenmp -emit-llvm %s -o - | FileCheck %s
|
||||
// expected-no-diagnostics
|
||||
|
||||
struct S {
|
||||
int x;
|
||||
int y;
|
||||
int z;
|
||||
int *p1;
|
||||
int *p2;
|
||||
};
|
||||
|
||||
struct T : public S {
|
||||
int a;
|
||||
int b;
|
||||
int c;
|
||||
};
|
||||
|
||||
int main() {
|
||||
T v;
|
||||
|
||||
#pragma omp target map(tofrom: v, v.x, v.y, v.z, v.p1[:8], v.a, v.b, v.c)
|
||||
{
|
||||
v.x++;
|
||||
v.y += 2;
|
||||
v.z += 3;
|
||||
v.p1[0] += 4;
|
||||
v.a += 7;
|
||||
v.b += 5;
|
||||
v.c += 6;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// CHECK: [[CSTSZ:@.+]] = private {{.*}}constant [10 x i64] [i64 0, i64 0, i64 0, i64 4, i64 4, i64 4, i64 32, i64 4, i64 4, i64 4]
|
||||
// CHECK: [[CSTTY:@.+]] = private {{.*}}constant [10 x i64] [i64 [[#0x20]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]], i64 [[#0x1000000000013]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]]]
|
||||
|
||||
// CHECK-DAG: call i32 @__tgt_target_kernel(ptr @{{.+}}, i64 -1, i32 -1, i32 0, ptr @.{{.+}}.region_id, ptr [[ARGS:%.+]])
|
||||
// CHECK-DAG: [[KSIZE:%.+]] = getelementptr inbounds {{.+}}[[ARGS]], i32 0, i32 4
|
||||
// CHECK-DAG: store ptr [[SZBASE:%.+]], ptr [[KSIZE]], align 8
|
||||
// CHECK-DAG: [[SZBASE]] = getelementptr inbounds [10 x i64], ptr [[SIZES:%[^,]*]], i32 0, i32 0
|
||||
|
||||
// Check for filling of four non-constant size elements here: the whole struct
|
||||
// size, the (padded) region covering p1 & p2, and the padding at the end of
|
||||
// struct T.
|
||||
|
||||
// CHECK-DAG: [[STR:%.+]] = getelementptr inbounds [10 x i64], ptr [[SIZES]], i32 0, i32 0
|
||||
// CHECK-DAG: store i64 %{{.+}}, ptr [[STR]], align 8
|
||||
// CHECK-DAG: [[P1P2:%.+]] = getelementptr inbounds [10 x i64], ptr [[SIZES]], i32 0, i32 1
|
||||
// CHECK-DAG: store i64 %{{.+}}, ptr [[P1P2]], align 8
|
||||
// CHECK-DAG: [[PAD:%.+]] = getelementptr inbounds [10 x i64], ptr [[SIZES]], i32 0, i32 2
|
||||
// CHECK-DAG: store i64 %{{.+}}, ptr [[PAD]], align 8
|
52
clang/test/OpenMP/copy-gaps-2.cpp
Normal file
52
clang/test/OpenMP/copy-gaps-2.cpp
Normal file
@ -0,0 +1,52 @@
|
||||
// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp-targets=amdgcn-amd-amdhsa -fopenmp -emit-llvm %s -o - | FileCheck %s
|
||||
// expected-no-diagnostics
|
||||
|
||||
struct S {
|
||||
int x;
|
||||
int y;
|
||||
int z;
|
||||
};
|
||||
|
||||
struct M : public S {
|
||||
int mid;
|
||||
};
|
||||
|
||||
struct T : public M {
|
||||
int a;
|
||||
int b;
|
||||
int c;
|
||||
};
|
||||
|
||||
int main() {
|
||||
T v;
|
||||
|
||||
#pragma omp target map(tofrom: v, v.y, v.z, v.a)
|
||||
{
|
||||
v.y++;
|
||||
v.z += 2;
|
||||
v.a += 3;
|
||||
v.mid += 5;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// CHECK: [[CSTSZ:@.+]] = private {{.*}}constant [7 x i64] [i64 0, i64 0, i64 0, i64 0, i64 4, i64 4, i64 4]
|
||||
// CHECK: [[CSTTY:@.+]] = private {{.*}}constant [7 x i64] [i64 [[#0x20]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]]]
|
||||
|
||||
// CHECK-DAG: call i32 @__tgt_target_kernel(ptr @{{.+}}, i64 -1, i32 -1, i32 0, ptr @.{{.+}}.region_id, ptr [[ARGS:%.+]])
|
||||
// CHECK-DAG: [[KSIZE:%.+]] = getelementptr inbounds {{.+}}[[ARGS]], i32 0, i32 4
|
||||
// CHECK-DAG: store ptr [[SZBASE:%.+]], ptr [[KSIZE]], align 8
|
||||
// CHECK-DAG: [[SZBASE]] = getelementptr inbounds [7 x i64], ptr [[SIZES:%[^,]*]], i32 0, i32 0
|
||||
|
||||
// Fill four non-constant size elements here: the whole struct size, the region
|
||||
// covering v.x, the region covering v.mid and the region covering v.b and v.c.
|
||||
|
||||
// CHECK-DAG: [[STR:%.+]] = getelementptr inbounds [7 x i64], ptr [[SIZES]], i32 0, i32 0
|
||||
// CHECK-DAG: store i64 %{{.+}}, ptr [[STR]], align 8
|
||||
// CHECK-DAG: [[X:%.+]] = getelementptr inbounds [7 x i64], ptr [[SIZES]], i32 0, i32 1
|
||||
// CHECK-DAG: store i64 %{{.+}}, ptr [[X]], align 8
|
||||
// CHECK-DAG: [[MID:%.+]] = getelementptr inbounds [7 x i64], ptr [[SIZES]], i32 0, i32 2
|
||||
// CHECK-DAG: store i64 %{{.+}}, ptr [[MID]], align 8
|
||||
// CHECK-DAG: [[BC:%.+]] = getelementptr inbounds [7 x i64], ptr [[SIZES]], i32 0, i32 3
|
||||
// CHECK-DAG: store i64 %{{.+}}, ptr [[BC]], align 8
|
46
clang/test/OpenMP/copy-gaps-3.cpp
Normal file
46
clang/test/OpenMP/copy-gaps-3.cpp
Normal file
@ -0,0 +1,46 @@
|
||||
// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp-targets=amdgcn-amd-amdhsa -fopenmp -emit-llvm %s -o - | FileCheck %s
|
||||
// expected-no-diagnostics
|
||||
|
||||
struct S {
|
||||
int x;
|
||||
int y;
|
||||
int z;
|
||||
};
|
||||
|
||||
struct T : public S {
|
||||
int a;
|
||||
int b;
|
||||
int c;
|
||||
};
|
||||
|
||||
int main() {
|
||||
T v;
|
||||
|
||||
// This one should have no gap between v.z & v.a.
|
||||
#pragma omp target map(tofrom: v, v.y, v.z, v.a)
|
||||
{
|
||||
v.y++;
|
||||
v.z += 2;
|
||||
v.a += 3;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// CHECK: [[CSTSZ:@.+]] = private {{.*}}constant [6 x i64] [i64 0, i64 0, i64 0, i64 4, i64 4, i64 4]
|
||||
// CHECK: [[CSTTY:@.+]] = private {{.*}}constant [6 x i64] [i64 [[#0x20]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]]]
|
||||
|
||||
// CHECK-DAG: call i32 @__tgt_target_kernel(ptr @{{.+}}, i64 -1, i32 -1, i32 0, ptr @.{{.+}}.region_id, ptr [[ARGS:%.+]])
|
||||
// CHECK-DAG: [[KSIZE:%.+]] = getelementptr inbounds {{.+}}[[ARGS]], i32 0, i32 4
|
||||
// CHECK-DAG: store ptr [[SZBASE:%.+]], ptr [[KSIZE]], align 8
|
||||
// CHECK-DAG: [[SZBASE]] = getelementptr inbounds [6 x i64], ptr [[SIZES:%[^,]*]], i32 0, i32 0
|
||||
|
||||
// Fill three non-constant size elements here: the whole struct size, the region
|
||||
// covering v.x, and the region covering v.b and v.c.
|
||||
|
||||
// CHECK-DAG: [[STR:%.+]] = getelementptr inbounds [6 x i64], ptr [[SIZES]], i32 0, i32 0
|
||||
// CHECK-DAG: store i64 %{{.+}}, ptr [[STR]], align 8
|
||||
// CHECK-DAG: [[X:%.+]] = getelementptr inbounds [6 x i64], ptr [[SIZES]], i32 0, i32 1
|
||||
// CHECK-DAG: store i64 %{{.+}}, ptr [[X]], align 8
|
||||
// CHECK-DAG: [[BC:%.+]] = getelementptr inbounds [6 x i64], ptr [[SIZES]], i32 0, i32 2
|
||||
// CHECK-DAG: store i64 %{{.+}}, ptr [[BC]], align 8
|
48
clang/test/OpenMP/copy-gaps-4.cpp
Normal file
48
clang/test/OpenMP/copy-gaps-4.cpp
Normal file
@ -0,0 +1,48 @@
|
||||
// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp-targets=amdgcn-amd-amdhsa -fopenmp -emit-llvm %s -o - | FileCheck %s
|
||||
// expected-no-diagnostics
|
||||
|
||||
struct S {
|
||||
int x;
|
||||
int y;
|
||||
char z; // Hidden padding after here...
|
||||
};
|
||||
|
||||
struct T : public S {
|
||||
int a;
|
||||
int b;
|
||||
int c;
|
||||
};
|
||||
|
||||
int main() {
|
||||
T v;
|
||||
|
||||
#pragma omp target map(tofrom: v, v.y, v.z, v.a)
|
||||
{
|
||||
v.y++;
|
||||
v.z += 2;
|
||||
v.a += 3;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// CHECK: [[CSTSZ:@.+]] = private {{.*}}constant [7 x i64] [i64 0, i64 0, i64 0, i64 0, i64 4, i64 1, i64 4]
|
||||
// CHECK: [[CSTTY:@.+]] = private {{.*}}constant [7 x i64] [i64 [[#0x20]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]]]
|
||||
|
||||
// CHECK-DAG: call i32 @__tgt_target_kernel(ptr @{{.+}}, i64 -1, i32 -1, i32 0, ptr @.{{.+}}.region_id, ptr [[ARGS:%.+]])
|
||||
// CHECK-DAG: [[KSIZE:%.+]] = getelementptr inbounds {{.+}}[[ARGS]], i32 0, i32 4
|
||||
// CHECK-DAG: store ptr [[SZBASE:%.+]], ptr [[KSIZE]], align 8
|
||||
// CHECK-DAG: [[SZBASE]] = getelementptr inbounds [7 x i64], ptr [[SIZES:%[^,]*]], i32 0, i32 0
|
||||
|
||||
// Fill four non-constant size elements here: the whole struct size, the region
|
||||
// covering v.x, the region covering padding after v.z and the region covering
|
||||
// v.b and v.c.
|
||||
|
||||
// CHECK-DAG: [[STR:%.+]] = getelementptr inbounds [7 x i64], ptr [[SIZES]], i32 0, i32 0
|
||||
// CHECK-DAG: store i64 %{{.+}}, ptr [[STR]], align 8
|
||||
// CHECK-DAG: [[X:%.+]] = getelementptr inbounds [7 x i64], ptr [[SIZES]], i32 0, i32 1
|
||||
// CHECK-DAG: store i64 %{{.+}}, ptr [[X]], align 8
|
||||
// CHECK-DAG: [[PAD:%.+]] = getelementptr inbounds [7 x i64], ptr [[SIZES]], i32 0, i32 2
|
||||
// CHECK-DAG: store i64 %{{.+}}, ptr [[PAD]], align 8
|
||||
// CHECK-DAG: [[BC:%.+]] = getelementptr inbounds [7 x i64], ptr [[SIZES]], i32 0, i32 3
|
||||
// CHECK-DAG: store i64 %{{.+}}, ptr [[BC]], align 8
|
50
clang/test/OpenMP/copy-gaps-5.cpp
Normal file
50
clang/test/OpenMP/copy-gaps-5.cpp
Normal file
@ -0,0 +1,50 @@
|
||||
// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp-targets=amdgcn-amd-amdhsa -fopenmp -emit-llvm %s -o - | FileCheck %s
|
||||
// expected-no-diagnostics
|
||||
|
||||
template<typename C>
|
||||
struct S {
|
||||
C x;
|
||||
C y;
|
||||
char z; // Hidden padding after here...
|
||||
};
|
||||
|
||||
template<typename C>
|
||||
struct T : public S<C> {
|
||||
C a;
|
||||
C b;
|
||||
C c;
|
||||
};
|
||||
|
||||
int main() {
|
||||
T<int> v;
|
||||
|
||||
#pragma omp target map(tofrom: v, v.y, v.z, v.a)
|
||||
{
|
||||
v.y++;
|
||||
v.z += 2;
|
||||
v.a += 3;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// CHECK: [[CSTSZ:@.+]] = private {{.*}}constant [7 x i64] [i64 0, i64 0, i64 0, i64 0, i64 4, i64 1, i64 4]
|
||||
// CHECK: [[CSTTY:@.+]] = private {{.*}}constant [7 x i64] [i64 [[#0x20]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]]]
|
||||
|
||||
// CHECK-DAG: call i32 @__tgt_target_kernel(ptr @{{.+}}, i64 -1, i32 -1, i32 0, ptr @.{{.+}}.region_id, ptr [[ARGS:%.+]])
|
||||
// CHECK-DAG: [[KSIZE:%.+]] = getelementptr inbounds {{.+}}[[ARGS]], i32 0, i32 4
|
||||
// CHECK-DAG: store ptr [[SZBASE:%.+]], ptr [[KSIZE]], align 8
|
||||
// CHECK-DAG: [[SZBASE]] = getelementptr inbounds [7 x i64], ptr [[SIZES:%[^,]*]], i32 0, i32 0
|
||||
|
||||
// Fill four non-constant size elements here: the whole struct size, the region
|
||||
// covering v.x, the region covering padding after v.z and the region covering
|
||||
// v.b and v.c.
|
||||
|
||||
// CHECK-DAG: [[STR:%.+]] = getelementptr inbounds [7 x i64], ptr [[SIZES]], i32 0, i32 0
|
||||
// CHECK-DAG: store i64 %{{.+}}, ptr [[STR]], align 8
|
||||
// CHECK-DAG: [[X:%.+]] = getelementptr inbounds [7 x i64], ptr [[SIZES]], i32 0, i32 1
|
||||
// CHECK-DAG: store i64 %{{.+}}, ptr [[X]], align 8
|
||||
// CHECK-DAG: [[PAD:%.+]] = getelementptr inbounds [7 x i64], ptr [[SIZES]], i32 0, i32 2
|
||||
// CHECK-DAG: store i64 %{{.+}}, ptr [[PAD]], align 8
|
||||
// CHECK-DAG: [[BC:%.+]] = getelementptr inbounds [7 x i64], ptr [[SIZES]], i32 0, i32 3
|
||||
// CHECK-DAG: store i64 %{{.+}}, ptr [[BC]], align 8
|
87
clang/test/OpenMP/copy-gaps-6.cpp
Normal file
87
clang/test/OpenMP/copy-gaps-6.cpp
Normal file
@ -0,0 +1,87 @@
|
||||
// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp-targets=amdgcn-amd-amdhsa -fopenmp -emit-llvm %s -o - | FileCheck %s
|
||||
// expected-no-diagnostics
|
||||
|
||||
struct S {
|
||||
int x;
|
||||
int *arr;
|
||||
int y;
|
||||
int z;
|
||||
};
|
||||
|
||||
int main() {
|
||||
S v;
|
||||
|
||||
#pragma omp target map(tofrom: v, v.x, v.z)
|
||||
{
|
||||
v.x++;
|
||||
v.y += 2;
|
||||
v.z += 3;
|
||||
}
|
||||
|
||||
#pragma omp target map(tofrom: v, v.x, v.arr[:1])
|
||||
{
|
||||
v.x++;
|
||||
v.y += 2;
|
||||
v.arr[0] += 2;
|
||||
v.z += 4;
|
||||
}
|
||||
|
||||
#pragma omp target map(tofrom: v, v.arr[:1])
|
||||
{
|
||||
v.x++;
|
||||
v.y += 2;
|
||||
v.arr[0] += 2;
|
||||
v.z += 4;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// CHECK: [[CSTSZ0:@.+]] = private {{.*}}constant [4 x i64] [i64 0, i64 0, i64 4, i64 4]
|
||||
// CHECK: [[CSTTY0:@.+]] = private {{.*}}constant [4 x i64] [i64 [[#0x20]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]]]
|
||||
|
||||
// CHECK: [[CSTSZ1:@.+]] = private {{.*}}constant [4 x i64] [i64 0, i64 0, i64 4, i64 4]
|
||||
// CHECK: [[CSTTY1:@.+]] = private {{.*}}constant [4 x i64] [i64 [[#0x20]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]], i64 [[#0x1000000000013]]]
|
||||
|
||||
// CHECK: [[CSTSZ2:@.+]] = private {{.*}}constant [3 x i64] [i64 0, i64 24, i64 4]
|
||||
// CHECK: [[CSTTY2:@.+]] = private {{.*}}constant [3 x i64] [i64 [[#0x20]], i64 [[#0x1000000000003]], i64 [[#0x1000000000013]]]
|
||||
|
||||
// CHECK-DAG: call i32 @__tgt_target_kernel(ptr @{{.+}}, i64 -1, i32 -1, i32 0, ptr @.{{.+}}.region_id, ptr [[ARGS:%.+]])
|
||||
// CHECK-DAG: [[KSIZE:%.+]] = getelementptr inbounds {{.+}}[[ARGS]], i32 0, i32 4
|
||||
// CHECK-DAG: store ptr [[SZBASE:%.+]], ptr [[KSIZE]], align 8
|
||||
// CHECK-DAG: [[SZBASE]] = getelementptr inbounds [4 x i64], ptr [[SIZES:%[^,]*]], i32 0, i32 0
|
||||
|
||||
// Fill two non-constant size elements here: the whole struct size, and the
|
||||
// region covering v.arr and v.y.
|
||||
|
||||
// CHECK-DAG: [[STR:%.+]] = getelementptr inbounds [4 x i64], ptr [[SIZES]], i32 0, i32 0
|
||||
// CHECK-DAG: store i64 %{{.+}}, ptr [[STR]], align 8
|
||||
// CHECK-DAG: [[ARRY:%.+]] = getelementptr inbounds [4 x i64], ptr [[SIZES]], i32 0, i32 1
|
||||
// CHECK-DAG: store i64 %{{.+}}, ptr [[ARRY]], align 8
|
||||
|
||||
// CHECK: call void
|
||||
|
||||
// CHECK-DAG: call i32 @__tgt_target_kernel(ptr @{{.+}}, i64 -1, i32 -1, i32 0, ptr @.{{.+}}.region_id, ptr [[ARGS:%.+]])
|
||||
// CHECK-DAG: [[KSIZE:%.+]] = getelementptr inbounds {{.+}}[[ARGS]], i32 0, i32 4
|
||||
// CHECK-DAG: store ptr [[SZBASE:%.+]], ptr [[KSIZE]], align 8
|
||||
// CHECK-DAG: [[SZBASE]] = getelementptr inbounds [4 x i64], ptr [[SIZES:%[^,]*]], i32 0, i32 0
|
||||
|
||||
// Fill two non-constant size elements here: the whole struct size, and the
|
||||
// region covering v.arr, v.y and v.z.
|
||||
|
||||
// CHECK-DAG: [[STR:%.+]] = getelementptr inbounds [4 x i64], ptr [[SIZES]], i32 0, i32 0
|
||||
// CHECK-DAG: store i64 %{{.+}}, ptr [[STR]], align 8
|
||||
// CHECK-DAG: [[ARRYZ:%.+]] = getelementptr inbounds [4 x i64], ptr [[SIZES]], i32 0, i32 1
|
||||
// CHECK-DAG: store i64 %{{.+}}, ptr [[ARRYZ]], align 8
|
||||
|
||||
// CHECK: call void
|
||||
|
||||
// CHECK-DAG: call i32 @__tgt_target_kernel(ptr @{{.+}}, i64 -1, i32 -1, i32 0, ptr @.{{.+}}.region_id, ptr [[ARGS:%.+]])
|
||||
// CHECK-DAG: [[KSIZE:%.+]] = getelementptr inbounds {{.+}}[[ARGS]], i32 0, i32 4
|
||||
// CHECK-DAG: store ptr [[SZBASE:%.+]], ptr [[KSIZE]], align 8
|
||||
// CHECK-DAG: [[SZBASE]] = getelementptr inbounds [3 x i64], ptr [[SIZES:%[^,]*]], i32 0, i32 0
|
||||
|
||||
// Fill one non-constant size element here: the whole struct size.
|
||||
|
||||
// CHECK-DAG: [[STR:%.+]] = getelementptr inbounds [3 x i64], ptr [[SIZES]], i32 0, i32 0
|
||||
// CHECK-DAG: store i64 %{{.+}}, ptr [[STR]], align 8
|
@ -27,11 +27,11 @@ public:
|
||||
void foo();
|
||||
};
|
||||
|
||||
// CK35-DAG: [[SIZE_TO:@.+]] = private {{.*}}constant [4 x i64] [i64 0, i64 0, i64 0, i64 8]
|
||||
// CK35-DAG: [[SIZE_TO:@.+]] = private {{.*}}constant [3 x i64] [i64 0, i64 0, i64 8]
|
||||
// TARGET_PARAM = 0x20
|
||||
// MEMBER_OF_1 | TO = 0x1000000000001
|
||||
// MEMBER_OF_1 | PTR_AND_OBJ | TO = 0x1000000000011
|
||||
// CK35-DAG: [[MTYPE_TO:@.+]] = {{.+}}constant [4 x i64] [i64 [[#0x20]], i64 [[#0x1000000000001]], i64 [[#0x1000000000001]], i64 [[#0x1000000000011]]]
|
||||
// CK35-DAG: [[MTYPE_TO:@.+]] = {{.+}}constant [3 x i64] [i64 [[#0x20]], i64 [[#0x1000000000001]], i64 [[#0x1000000000011]]]
|
||||
// CK35-DAG: [[SIZE_FROM:@.+]] = private {{.*}}constant [2 x i64] [i64 0, i64 8]
|
||||
// TARGET_PARAM = 0x20
|
||||
// MEMBER_OF_1 | PTR_AND_OBJ | FROM = 0x1000000000012
|
||||
@ -86,35 +86,14 @@ void ref_map() {
|
||||
// CK35-DAG: [[B_BEGIN_INTPTR]] = ptrtoint ptr [[B_BEGIN_VOID:%.+]] to i64
|
||||
// CK35-DAG: [[B_ADDR:%.+]] = getelementptr inbounds nuw %class.S, ptr [[S_ADDR]], i32 0, i32 1
|
||||
|
||||
// pass MEMBER_OF_1 | TO {&s, &s.b+1, ((ptr)(&s+1)-(ptr)(&s.b+1))} to copy the data of remainder of s.
|
||||
// pass MEMBER_OF_1 | PTR_AND_OBJ | TO {&s, &s.b, 8|4} to copy the data of s.b.
|
||||
|
||||
// CK35-DAG: [[BP2:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 2
|
||||
// CK35-DAG: [[P2:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 2
|
||||
// CK35-DAG: [[S2:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 2
|
||||
|
||||
|
||||
// CK35-DAG: store ptr [[S_ADDR]], ptr [[BP2]],
|
||||
// CK35-DAG: store ptr [[B_END:%.+]], ptr [[P2]],
|
||||
// CK35-DAG: store i64 [[REM_SIZE:%.+]], ptr [[S2]],
|
||||
|
||||
// CK35-DAG: [[B_END]] = getelementptr ptr, ptr [[B_ADDR]], i{{.+}} 1
|
||||
|
||||
// CK35-DAG: [[REM_SIZE]] = sdiv exact i64 [[SZ:%.+]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
|
||||
// CK35-DAG: [[SZ]] = sub i64 [[S_END_INTPTR:%.+]], [[B_END_INTPTR:%.+]]
|
||||
// CK35-DAG: [[B_END_INTPTR]] = ptrtoint ptr [[B_END_VOID:%.+]] to i64
|
||||
// CK35-DAG: [[S_END_INTPTR]] = ptrtoint ptr [[S_END_VOID:%.+]] to i64
|
||||
// CK35-DAG: [[S_END_VOID]] = getelementptr i8, ptr [[S_LAST:%.+]], i{{.+}} 1
|
||||
// CK35-64-DAG: [[S_LAST]] = getelementptr i8, ptr [[S_VOIDPTR:%.+]], i64 15
|
||||
// CK35-32-DAG: [[S_LAST]] = getelementptr i8, ptr [[S_VOIDPTR:%.+]], i32 7
|
||||
|
||||
// pass MEMBER_OF_1 | PTR_AND_OBJ | TO {&s, &s.b, 8|4} to copy the data of s.b.
|
||||
|
||||
// CK35-DAG: [[BP3:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 3
|
||||
// CK35-DAG: [[P3:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 3
|
||||
|
||||
|
||||
// CK35-DAG: store ptr [[S_ADDR]], ptr [[BP3]],
|
||||
// CK35-DAG: store ptr [[B_ADDR:%.+]], ptr [[P3]],
|
||||
// CK35-DAG: store ptr [[B_ADDR:%.+]], ptr [[P2]],
|
||||
|
||||
// CK35-DAG: [[B_ADDR]] = load ptr, ptr [[B_REF:%.+]], align {{[0-9]+}}, !nonnull !{{[0-9]+}}, !align !{{[0-9]+}}
|
||||
// CK35-DAG: [[B_REF]] = getelementptr inbounds nuw %class.S, ptr [[S_ADDR]], i32 0, i32 1
|
||||
|
Loading…
x
Reference in New Issue
Block a user