[OpenMP] Don't emit redundant zero-sized mapping nodes for overlapped structs (#148947)

The handling of overlapped structure mapping in CGOpenMPRuntime.cpp can
lead to redundant zero-sized mapping nodes at runtime. This patch fixes
it using a combination of approaches: trivially adjacent struct members
won't have a mapping node created between them, and for more complicated
cases (inheritance) the physical layout of the struct/class is used to
make sure that elements aren't missed.

I've introduced a new class to track the state whilst iterating over the
struct. This reduces a bit of redundancy in the code (accumulating
CombinedInfo both during and after the loop), which I think is a bit
neater.

Before:

omptarget --> Entry  0: Base=0x00007fff8d483830, Begin=0x00007fff8d483830, Size=48, Type=0x20, Name=unknown
omptarget --> Entry  1: Base=0x00007fff8d483830, Begin=0x00007fff8d483830, Size=0, Type=0x1000000000003, Name=unknown
omptarget --> Entry  2: Base=0x00007fff8d483830, Begin=0x00007fff8d483834, Size=0, Type=0x1000000000003, Name=unknown
omptarget --> Entry  3: Base=0x00007fff8d483830, Begin=0x00007fff8d483838, Size=0, Type=0x1000000000003, Name=unknown
omptarget --> Entry  4: Base=0x00007fff8d483830, Begin=0x00007fff8d48383c, Size=20, Type=0x1000000000003, Name=unknown
omptarget --> Entry  5: Base=0x00007fff8d483830, Begin=0x00007fff8d483854, Size=0, Type=0x1000000000003, Name=unknown
omptarget --> Entry  6: Base=0x00007fff8d483830, Begin=0x00007fff8d483858, Size=0, Type=0x1000000000003, Name=unknown
omptarget --> Entry  7: Base=0x00007fff8d483830, Begin=0x00007fff8d48385c, Size=4, Type=0x1000000000003, Name=unknown
omptarget --> Entry  8: Base=0x00007fff8d483830, Begin=0x00007fff8d483830, Size=4, Type=0x1000000000003, Name=unknown
omptarget --> Entry  9: Base=0x00007fff8d483830, Begin=0x00007fff8d483834, Size=4, Type=0x1000000000003, Name=unknown
omptarget --> Entry 10: Base=0x00007fff8d483830, Begin=0x00007fff8d483838, Size=4, Type=0x1000000000003, Name=unknown
omptarget --> Entry 11: Base=0x00007fff8d483840, Begin=0x00005e7665275130, Size=32, Type=0x1000000000013, Name=unknown
omptarget --> Entry 12: Base=0x00007fff8d483830, Begin=0x00007fff8d483850, Size=4, Type=0x1000000000003, Name=unknown
omptarget --> Entry 13: Base=0x00007fff8d483830, Begin=0x00007fff8d483854, Size=4, Type=0x1000000000003, Name=unknown
omptarget --> Entry 14: Base=0x00007fff8d483830, Begin=0x00007fff8d483858, Size=4, Type=0x1000000000003, Name=unknown

After:

omptarget --> Entry  0: Base=0x00007fffd0f562e0, Begin=0x00007fffd0f562e0, Size=48, Type=0x20, Name=unknown
omptarget --> Entry  1: Base=0x00007fffd0f562e0, Begin=0x00007fffd0f562ec, Size=20, Type=0x1000000000003, Name=unknown
omptarget --> Entry  2: Base=0x00007fffd0f562e0, Begin=0x00007fffd0f5630c, Size=4, Type=0x1000000000003, Name=unknown
omptarget --> Entry  3: Base=0x00007fffd0f562e0, Begin=0x00007fffd0f562e0, Size=4, Type=0x1000000000003, Name=unknown
omptarget --> Entry  4: Base=0x00007fffd0f562e0, Begin=0x00007fffd0f562e4, Size=4, Type=0x1000000000003, Name=unknown
omptarget --> Entry  5: Base=0x00007fffd0f562e0, Begin=0x00007fffd0f562e8, Size=4, Type=0x1000000000003, Name=unknown
omptarget --> Entry  6: Base=0x00007fffd0f562f0, Begin=0x000058b6013fb130, Size=32, Type=0x1000000000013, Name=unknown
omptarget --> Entry  7: Base=0x00007fffd0f562e0, Begin=0x00007fffd0f56300, Size=4, Type=0x1000000000003, Name=unknown
omptarget --> Entry  8: Base=0x00007fffd0f562e0, Begin=0x00007fffd0f56304, Size=4, Type=0x1000000000003, Name=unknown
omptarget --> Entry  9: Base=0x00007fffd0f562e0, Begin=0x00007fffd0f56308, Size=4, Type=0x1000000000003, Name=unknown

For code:

  #include <cstdlib>
  #include <cstdio>

  struct S {
    int x;
    int y;
    int z;
    int *p1;
    int *p2;
  };

  struct T : public S {
    int a;
    int b;
    int c;
  };

  int main() {
    T v;
    v.p1 = (int*) calloc(8, sizeof(int));
    v.p2 = (int*) calloc(8, sizeof(int));

  #pragma omp target map(tofrom: v, v.x, v.y, v.z, v.p1[:8], v.a, v.b, v.c)
    {
      v.x++;
      v.y += 2;
      v.z += 3;
      v.p1[0] += 4;
      v.a += 7;
      v.b += 5;
      v.c += 6;
    }

    return 0;
  }
This commit is contained in:
Julian Brown 2025-07-24 14:45:04 +01:00 committed by GitHub
parent 2149d6d1c9
commit 889faabe78
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 449 additions and 72 deletions

View File

@ -7080,6 +7080,110 @@ private:
return ConstLength.getSExtValue() != 1;
}
/// A helper class to copy structures with overlapped elements, i.e. those
/// which have mappings of both "s" and "s.mem". Consecutive elements that
/// are not explicitly copied have mapping nodes synthesized for them,
/// taking care to avoid generating zero-sized copies.
class CopyOverlappedEntryGaps {
CodeGenFunction &CGF;
MapCombinedInfoTy &CombinedInfo;
OpenMPOffloadMappingFlags Flags = OpenMPOffloadMappingFlags::OMP_MAP_NONE;
const ValueDecl *MapDecl = nullptr;
const Expr *MapExpr = nullptr;
Address BP = Address::invalid();
bool IsNonContiguous = false;
uint64_t DimSize = 0;
// These elements track the position as the struct is iterated over
// (in order of increasing element address).
const RecordDecl *LastParent = nullptr;
uint64_t Cursor = 0;
unsigned LastIndex = -1u;
Address LB = Address::invalid();
public:
CopyOverlappedEntryGaps(CodeGenFunction &CGF,
MapCombinedInfoTy &CombinedInfo,
OpenMPOffloadMappingFlags Flags,
const ValueDecl *MapDecl, const Expr *MapExpr,
Address BP, Address LB, bool IsNonContiguous,
uint64_t DimSize)
: CGF(CGF), CombinedInfo(CombinedInfo), Flags(Flags), MapDecl(MapDecl),
MapExpr(MapExpr), BP(BP), LB(LB), IsNonContiguous(IsNonContiguous),
DimSize(DimSize) {}
void processField(
const OMPClauseMappableExprCommon::MappableComponent &MC,
const FieldDecl *FD,
llvm::function_ref<LValue(CodeGenFunction &, const MemberExpr *)>
EmitMemberExprBase) {
const RecordDecl *RD = FD->getParent();
const ASTRecordLayout &RL = CGF.getContext().getASTRecordLayout(RD);
uint64_t FieldOffset = RL.getFieldOffset(FD->getFieldIndex());
uint64_t FieldSize =
CGF.getContext().getTypeSize(FD->getType().getCanonicalType());
Address ComponentLB = Address::invalid();
if (FD->getType()->isLValueReferenceType()) {
const auto *ME = cast<MemberExpr>(MC.getAssociatedExpression());
LValue BaseLVal = EmitMemberExprBase(CGF, ME);
ComponentLB =
CGF.EmitLValueForFieldInitialization(BaseLVal, FD).getAddress();
} else {
ComponentLB =
CGF.EmitOMPSharedLValue(MC.getAssociatedExpression()).getAddress();
}
if (!LastParent)
LastParent = RD;
if (FD->getParent() == LastParent) {
if (FD->getFieldIndex() != LastIndex + 1)
copyUntilField(FD, ComponentLB);
} else {
LastParent = FD->getParent();
if (((int64_t)FieldOffset - (int64_t)Cursor) > 0)
copyUntilField(FD, ComponentLB);
}
Cursor = FieldOffset + FieldSize;
LastIndex = FD->getFieldIndex();
LB = CGF.Builder.CreateConstGEP(ComponentLB, 1);
}
void copyUntilField(const FieldDecl *FD, Address ComponentLB) {
llvm::Value *ComponentLBPtr = ComponentLB.emitRawPointer(CGF);
llvm::Value *LBPtr = LB.emitRawPointer(CGF);
llvm::Value *Size =
CGF.Builder.CreatePtrDiff(CGF.Int8Ty, ComponentLBPtr, LBPtr);
copySizedChunk(LBPtr, Size);
}
void copyUntilEnd(Address HB) {
if (LastParent) {
const ASTRecordLayout &RL =
CGF.getContext().getASTRecordLayout(LastParent);
if ((uint64_t)CGF.getContext().toBits(RL.getSize()) <= Cursor)
return;
}
llvm::Value *LBPtr = LB.emitRawPointer(CGF);
llvm::Value *Size = CGF.Builder.CreatePtrDiff(
CGF.Int8Ty, CGF.Builder.CreateConstGEP(HB, 1).emitRawPointer(CGF),
LBPtr);
copySizedChunk(LBPtr, Size);
}
void copySizedChunk(llvm::Value *Base, llvm::Value *Size) {
CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
CombinedInfo.BasePointers.push_back(BP.emitRawPointer(CGF));
CombinedInfo.DevicePtrDecls.push_back(nullptr);
CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
CombinedInfo.Pointers.push_back(Base);
CombinedInfo.Sizes.push_back(
CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
CombinedInfo.Types.push_back(Flags);
CombinedInfo.Mappers.push_back(nullptr);
CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize : 1);
}
};
/// Generate the base pointers, section pointers, sizes, map type bits, and
/// user-defined mappers (all included in \a CombinedInfo) for the provided
/// map type, map or motion modifiers, and expression components.
@ -7570,63 +7674,22 @@ private:
getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit,
/*AddPtrFlag=*/false,
/*AddIsTargetParamFlag=*/false, IsNonContiguous);
llvm::Value *Size = nullptr;
CopyOverlappedEntryGaps CopyGaps(CGF, CombinedInfo, Flags, MapDecl,
MapExpr, BP, LB, IsNonContiguous,
DimSize);
// Do bitcopy of all non-overlapped structure elements.
for (OMPClauseMappableExprCommon::MappableExprComponentListRef
Component : OverlappedElements) {
Address ComponentLB = Address::invalid();
for (const OMPClauseMappableExprCommon::MappableComponent &MC :
Component) {
if (const ValueDecl *VD = MC.getAssociatedDeclaration()) {
const auto *FD = dyn_cast<FieldDecl>(VD);
if (FD && FD->getType()->isLValueReferenceType()) {
const auto *ME =
cast<MemberExpr>(MC.getAssociatedExpression());
LValue BaseLVal = EmitMemberExprBase(CGF, ME);
ComponentLB =
CGF.EmitLValueForFieldInitialization(BaseLVal, FD)
.getAddress();
} else {
ComponentLB =
CGF.EmitOMPSharedLValue(MC.getAssociatedExpression())
.getAddress();
}
llvm::Value *ComponentLBPtr = ComponentLB.emitRawPointer(CGF);
llvm::Value *LBPtr = LB.emitRawPointer(CGF);
Size = CGF.Builder.CreatePtrDiff(CGF.Int8Ty, ComponentLBPtr,
LBPtr);
break;
if (const auto *FD = dyn_cast<FieldDecl>(VD)) {
CopyGaps.processField(MC, FD, EmitMemberExprBase);
}
}
assert(Size && "Failed to determine structure size");
CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
CombinedInfo.BasePointers.push_back(BP.emitRawPointer(CGF));
CombinedInfo.DevicePtrDecls.push_back(nullptr);
CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
CombinedInfo.Pointers.push_back(LB.emitRawPointer(CGF));
CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
Size, CGF.Int64Ty, /*isSigned=*/true));
CombinedInfo.Types.push_back(Flags);
CombinedInfo.Mappers.push_back(nullptr);
CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
: 1);
LB = CGF.Builder.CreateConstGEP(ComponentLB, 1);
}
CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
CombinedInfo.BasePointers.push_back(BP.emitRawPointer(CGF));
CombinedInfo.DevicePtrDecls.push_back(nullptr);
CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
CombinedInfo.Pointers.push_back(LB.emitRawPointer(CGF));
llvm::Value *LBPtr = LB.emitRawPointer(CGF);
Size = CGF.Builder.CreatePtrDiff(
CGF.Int8Ty, CGF.Builder.CreateConstGEP(HB, 1).emitRawPointer(CGF),
LBPtr);
CombinedInfo.Sizes.push_back(
CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
CombinedInfo.Types.push_back(Flags);
CombinedInfo.Mappers.push_back(nullptr);
CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
: 1);
}
CopyGaps.copyUntilEnd(HB);
break;
}
llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression());

View File

@ -0,0 +1,52 @@
// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp-targets=amdgcn-amd-amdhsa -fopenmp -emit-llvm %s -o - | FileCheck %s
// expected-no-diagnostics
struct S {
int x;
int y;
int z;
int *p1;
int *p2;
};
struct T : public S {
int a;
int b;
int c;
};
int main() {
T v;
#pragma omp target map(tofrom: v, v.x, v.y, v.z, v.p1[:8], v.a, v.b, v.c)
{
v.x++;
v.y += 2;
v.z += 3;
v.p1[0] += 4;
v.a += 7;
v.b += 5;
v.c += 6;
}
return 0;
}
// CHECK: [[CSTSZ:@.+]] = private {{.*}}constant [10 x i64] [i64 0, i64 0, i64 0, i64 4, i64 4, i64 4, i64 32, i64 4, i64 4, i64 4]
// CHECK: [[CSTTY:@.+]] = private {{.*}}constant [10 x i64] [i64 [[#0x20]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]], i64 [[#0x1000000000013]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]]]
// CHECK-DAG: call i32 @__tgt_target_kernel(ptr @{{.+}}, i64 -1, i32 -1, i32 0, ptr @.{{.+}}.region_id, ptr [[ARGS:%.+]])
// CHECK-DAG: [[KSIZE:%.+]] = getelementptr inbounds {{.+}}[[ARGS]], i32 0, i32 4
// CHECK-DAG: store ptr [[SZBASE:%.+]], ptr [[KSIZE]], align 8
// CHECK-DAG: [[SZBASE]] = getelementptr inbounds [10 x i64], ptr [[SIZES:%[^,]*]], i32 0, i32 0
// Check for filling of four non-constant size elements here: the whole struct
// size, the (padded) region covering p1 & p2, and the padding at the end of
// struct T.
// CHECK-DAG: [[STR:%.+]] = getelementptr inbounds [10 x i64], ptr [[SIZES]], i32 0, i32 0
// CHECK-DAG: store i64 %{{.+}}, ptr [[STR]], align 8
// CHECK-DAG: [[P1P2:%.+]] = getelementptr inbounds [10 x i64], ptr [[SIZES]], i32 0, i32 1
// CHECK-DAG: store i64 %{{.+}}, ptr [[P1P2]], align 8
// CHECK-DAG: [[PAD:%.+]] = getelementptr inbounds [10 x i64], ptr [[SIZES]], i32 0, i32 2
// CHECK-DAG: store i64 %{{.+}}, ptr [[PAD]], align 8

View File

@ -0,0 +1,52 @@
// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp-targets=amdgcn-amd-amdhsa -fopenmp -emit-llvm %s -o - | FileCheck %s
// expected-no-diagnostics
struct S {
int x;
int y;
int z;
};
struct M : public S {
int mid;
};
struct T : public M {
int a;
int b;
int c;
};
int main() {
T v;
#pragma omp target map(tofrom: v, v.y, v.z, v.a)
{
v.y++;
v.z += 2;
v.a += 3;
v.mid += 5;
}
return 0;
}
// CHECK: [[CSTSZ:@.+]] = private {{.*}}constant [7 x i64] [i64 0, i64 0, i64 0, i64 0, i64 4, i64 4, i64 4]
// CHECK: [[CSTTY:@.+]] = private {{.*}}constant [7 x i64] [i64 [[#0x20]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]]]
// CHECK-DAG: call i32 @__tgt_target_kernel(ptr @{{.+}}, i64 -1, i32 -1, i32 0, ptr @.{{.+}}.region_id, ptr [[ARGS:%.+]])
// CHECK-DAG: [[KSIZE:%.+]] = getelementptr inbounds {{.+}}[[ARGS]], i32 0, i32 4
// CHECK-DAG: store ptr [[SZBASE:%.+]], ptr [[KSIZE]], align 8
// CHECK-DAG: [[SZBASE]] = getelementptr inbounds [7 x i64], ptr [[SIZES:%[^,]*]], i32 0, i32 0
// Fill four non-constant size elements here: the whole struct size, the region
// covering v.x, the region covering v.mid and the region covering v.b and v.c.
// CHECK-DAG: [[STR:%.+]] = getelementptr inbounds [7 x i64], ptr [[SIZES]], i32 0, i32 0
// CHECK-DAG: store i64 %{{.+}}, ptr [[STR]], align 8
// CHECK-DAG: [[X:%.+]] = getelementptr inbounds [7 x i64], ptr [[SIZES]], i32 0, i32 1
// CHECK-DAG: store i64 %{{.+}}, ptr [[X]], align 8
// CHECK-DAG: [[MID:%.+]] = getelementptr inbounds [7 x i64], ptr [[SIZES]], i32 0, i32 2
// CHECK-DAG: store i64 %{{.+}}, ptr [[MID]], align 8
// CHECK-DAG: [[BC:%.+]] = getelementptr inbounds [7 x i64], ptr [[SIZES]], i32 0, i32 3
// CHECK-DAG: store i64 %{{.+}}, ptr [[BC]], align 8

View File

@ -0,0 +1,46 @@
// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp-targets=amdgcn-amd-amdhsa -fopenmp -emit-llvm %s -o - | FileCheck %s
// expected-no-diagnostics
struct S {
int x;
int y;
int z;
};
struct T : public S {
int a;
int b;
int c;
};
int main() {
T v;
// This one should have no gap between v.z & v.a.
#pragma omp target map(tofrom: v, v.y, v.z, v.a)
{
v.y++;
v.z += 2;
v.a += 3;
}
return 0;
}
// CHECK: [[CSTSZ:@.+]] = private {{.*}}constant [6 x i64] [i64 0, i64 0, i64 0, i64 4, i64 4, i64 4]
// CHECK: [[CSTTY:@.+]] = private {{.*}}constant [6 x i64] [i64 [[#0x20]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]]]
// CHECK-DAG: call i32 @__tgt_target_kernel(ptr @{{.+}}, i64 -1, i32 -1, i32 0, ptr @.{{.+}}.region_id, ptr [[ARGS:%.+]])
// CHECK-DAG: [[KSIZE:%.+]] = getelementptr inbounds {{.+}}[[ARGS]], i32 0, i32 4
// CHECK-DAG: store ptr [[SZBASE:%.+]], ptr [[KSIZE]], align 8
// CHECK-DAG: [[SZBASE]] = getelementptr inbounds [6 x i64], ptr [[SIZES:%[^,]*]], i32 0, i32 0
// Fill three non-constant size elements here: the whole struct size, the region
// covering v.x, and the region covering v.b and v.c.
// CHECK-DAG: [[STR:%.+]] = getelementptr inbounds [6 x i64], ptr [[SIZES]], i32 0, i32 0
// CHECK-DAG: store i64 %{{.+}}, ptr [[STR]], align 8
// CHECK-DAG: [[X:%.+]] = getelementptr inbounds [6 x i64], ptr [[SIZES]], i32 0, i32 1
// CHECK-DAG: store i64 %{{.+}}, ptr [[X]], align 8
// CHECK-DAG: [[BC:%.+]] = getelementptr inbounds [6 x i64], ptr [[SIZES]], i32 0, i32 2
// CHECK-DAG: store i64 %{{.+}}, ptr [[BC]], align 8

View File

@ -0,0 +1,48 @@
// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp-targets=amdgcn-amd-amdhsa -fopenmp -emit-llvm %s -o - | FileCheck %s
// expected-no-diagnostics
struct S {
int x;
int y;
char z; // Hidden padding after here...
};
struct T : public S {
int a;
int b;
int c;
};
int main() {
T v;
#pragma omp target map(tofrom: v, v.y, v.z, v.a)
{
v.y++;
v.z += 2;
v.a += 3;
}
return 0;
}
// CHECK: [[CSTSZ:@.+]] = private {{.*}}constant [7 x i64] [i64 0, i64 0, i64 0, i64 0, i64 4, i64 1, i64 4]
// CHECK: [[CSTTY:@.+]] = private {{.*}}constant [7 x i64] [i64 [[#0x20]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]]]
// CHECK-DAG: call i32 @__tgt_target_kernel(ptr @{{.+}}, i64 -1, i32 -1, i32 0, ptr @.{{.+}}.region_id, ptr [[ARGS:%.+]])
// CHECK-DAG: [[KSIZE:%.+]] = getelementptr inbounds {{.+}}[[ARGS]], i32 0, i32 4
// CHECK-DAG: store ptr [[SZBASE:%.+]], ptr [[KSIZE]], align 8
// CHECK-DAG: [[SZBASE]] = getelementptr inbounds [7 x i64], ptr [[SIZES:%[^,]*]], i32 0, i32 0
// Fill four non-constant size elements here: the whole struct size, the region
// covering v.x, the region covering padding after v.z and the region covering
// v.b and v.c.
// CHECK-DAG: [[STR:%.+]] = getelementptr inbounds [7 x i64], ptr [[SIZES]], i32 0, i32 0
// CHECK-DAG: store i64 %{{.+}}, ptr [[STR]], align 8
// CHECK-DAG: [[X:%.+]] = getelementptr inbounds [7 x i64], ptr [[SIZES]], i32 0, i32 1
// CHECK-DAG: store i64 %{{.+}}, ptr [[X]], align 8
// CHECK-DAG: [[PAD:%.+]] = getelementptr inbounds [7 x i64], ptr [[SIZES]], i32 0, i32 2
// CHECK-DAG: store i64 %{{.+}}, ptr [[PAD]], align 8
// CHECK-DAG: [[BC:%.+]] = getelementptr inbounds [7 x i64], ptr [[SIZES]], i32 0, i32 3
// CHECK-DAG: store i64 %{{.+}}, ptr [[BC]], align 8

View File

@ -0,0 +1,50 @@
// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp-targets=amdgcn-amd-amdhsa -fopenmp -emit-llvm %s -o - | FileCheck %s
// expected-no-diagnostics
template<typename C>
struct S {
C x;
C y;
char z; // Hidden padding after here...
};
template<typename C>
struct T : public S<C> {
C a;
C b;
C c;
};
int main() {
T<int> v;
#pragma omp target map(tofrom: v, v.y, v.z, v.a)
{
v.y++;
v.z += 2;
v.a += 3;
}
return 0;
}
// CHECK: [[CSTSZ:@.+]] = private {{.*}}constant [7 x i64] [i64 0, i64 0, i64 0, i64 0, i64 4, i64 1, i64 4]
// CHECK: [[CSTTY:@.+]] = private {{.*}}constant [7 x i64] [i64 [[#0x20]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]]]
// CHECK-DAG: call i32 @__tgt_target_kernel(ptr @{{.+}}, i64 -1, i32 -1, i32 0, ptr @.{{.+}}.region_id, ptr [[ARGS:%.+]])
// CHECK-DAG: [[KSIZE:%.+]] = getelementptr inbounds {{.+}}[[ARGS]], i32 0, i32 4
// CHECK-DAG: store ptr [[SZBASE:%.+]], ptr [[KSIZE]], align 8
// CHECK-DAG: [[SZBASE]] = getelementptr inbounds [7 x i64], ptr [[SIZES:%[^,]*]], i32 0, i32 0
// Fill four non-constant size elements here: the whole struct size, the region
// covering v.x, the region covering padding after v.z and the region covering
// v.b and v.c.
// CHECK-DAG: [[STR:%.+]] = getelementptr inbounds [7 x i64], ptr [[SIZES]], i32 0, i32 0
// CHECK-DAG: store i64 %{{.+}}, ptr [[STR]], align 8
// CHECK-DAG: [[X:%.+]] = getelementptr inbounds [7 x i64], ptr [[SIZES]], i32 0, i32 1
// CHECK-DAG: store i64 %{{.+}}, ptr [[X]], align 8
// CHECK-DAG: [[PAD:%.+]] = getelementptr inbounds [7 x i64], ptr [[SIZES]], i32 0, i32 2
// CHECK-DAG: store i64 %{{.+}}, ptr [[PAD]], align 8
// CHECK-DAG: [[BC:%.+]] = getelementptr inbounds [7 x i64], ptr [[SIZES]], i32 0, i32 3
// CHECK-DAG: store i64 %{{.+}}, ptr [[BC]], align 8

View File

@ -0,0 +1,87 @@
// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp-targets=amdgcn-amd-amdhsa -fopenmp -emit-llvm %s -o - | FileCheck %s
// expected-no-diagnostics
struct S {
int x;
int *arr;
int y;
int z;
};
int main() {
S v;
#pragma omp target map(tofrom: v, v.x, v.z)
{
v.x++;
v.y += 2;
v.z += 3;
}
#pragma omp target map(tofrom: v, v.x, v.arr[:1])
{
v.x++;
v.y += 2;
v.arr[0] += 2;
v.z += 4;
}
#pragma omp target map(tofrom: v, v.arr[:1])
{
v.x++;
v.y += 2;
v.arr[0] += 2;
v.z += 4;
}
return 0;
}
// CHECK: [[CSTSZ0:@.+]] = private {{.*}}constant [4 x i64] [i64 0, i64 0, i64 4, i64 4]
// CHECK: [[CSTTY0:@.+]] = private {{.*}}constant [4 x i64] [i64 [[#0x20]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]]]
// CHECK: [[CSTSZ1:@.+]] = private {{.*}}constant [4 x i64] [i64 0, i64 0, i64 4, i64 4]
// CHECK: [[CSTTY1:@.+]] = private {{.*}}constant [4 x i64] [i64 [[#0x20]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]], i64 [[#0x1000000000013]]]
// CHECK: [[CSTSZ2:@.+]] = private {{.*}}constant [3 x i64] [i64 0, i64 24, i64 4]
// CHECK: [[CSTTY2:@.+]] = private {{.*}}constant [3 x i64] [i64 [[#0x20]], i64 [[#0x1000000000003]], i64 [[#0x1000000000013]]]
// CHECK-DAG: call i32 @__tgt_target_kernel(ptr @{{.+}}, i64 -1, i32 -1, i32 0, ptr @.{{.+}}.region_id, ptr [[ARGS:%.+]])
// CHECK-DAG: [[KSIZE:%.+]] = getelementptr inbounds {{.+}}[[ARGS]], i32 0, i32 4
// CHECK-DAG: store ptr [[SZBASE:%.+]], ptr [[KSIZE]], align 8
// CHECK-DAG: [[SZBASE]] = getelementptr inbounds [4 x i64], ptr [[SIZES:%[^,]*]], i32 0, i32 0
// Fill two non-constant size elements here: the whole struct size, and the
// region covering v.arr and v.y.
// CHECK-DAG: [[STR:%.+]] = getelementptr inbounds [4 x i64], ptr [[SIZES]], i32 0, i32 0
// CHECK-DAG: store i64 %{{.+}}, ptr [[STR]], align 8
// CHECK-DAG: [[ARRY:%.+]] = getelementptr inbounds [4 x i64], ptr [[SIZES]], i32 0, i32 1
// CHECK-DAG: store i64 %{{.+}}, ptr [[ARRY]], align 8
// CHECK: call void
// CHECK-DAG: call i32 @__tgt_target_kernel(ptr @{{.+}}, i64 -1, i32 -1, i32 0, ptr @.{{.+}}.region_id, ptr [[ARGS:%.+]])
// CHECK-DAG: [[KSIZE:%.+]] = getelementptr inbounds {{.+}}[[ARGS]], i32 0, i32 4
// CHECK-DAG: store ptr [[SZBASE:%.+]], ptr [[KSIZE]], align 8
// CHECK-DAG: [[SZBASE]] = getelementptr inbounds [4 x i64], ptr [[SIZES:%[^,]*]], i32 0, i32 0
// Fill two non-constant size elements here: the whole struct size, and the
// region covering v.arr, v.y and v.z.
// CHECK-DAG: [[STR:%.+]] = getelementptr inbounds [4 x i64], ptr [[SIZES]], i32 0, i32 0
// CHECK-DAG: store i64 %{{.+}}, ptr [[STR]], align 8
// CHECK-DAG: [[ARRYZ:%.+]] = getelementptr inbounds [4 x i64], ptr [[SIZES]], i32 0, i32 1
// CHECK-DAG: store i64 %{{.+}}, ptr [[ARRYZ]], align 8
// CHECK: call void
// CHECK-DAG: call i32 @__tgt_target_kernel(ptr @{{.+}}, i64 -1, i32 -1, i32 0, ptr @.{{.+}}.region_id, ptr [[ARGS:%.+]])
// CHECK-DAG: [[KSIZE:%.+]] = getelementptr inbounds {{.+}}[[ARGS]], i32 0, i32 4
// CHECK-DAG: store ptr [[SZBASE:%.+]], ptr [[KSIZE]], align 8
// CHECK-DAG: [[SZBASE]] = getelementptr inbounds [3 x i64], ptr [[SIZES:%[^,]*]], i32 0, i32 0
// Fill one non-constant size element here: the whole struct size.
// CHECK-DAG: [[STR:%.+]] = getelementptr inbounds [3 x i64], ptr [[SIZES]], i32 0, i32 0
// CHECK-DAG: store i64 %{{.+}}, ptr [[STR]], align 8

View File

@ -27,11 +27,11 @@ public:
void foo();
};
// CK35-DAG: [[SIZE_TO:@.+]] = private {{.*}}constant [4 x i64] [i64 0, i64 0, i64 0, i64 8]
// CK35-DAG: [[SIZE_TO:@.+]] = private {{.*}}constant [3 x i64] [i64 0, i64 0, i64 8]
// TARGET_PARAM = 0x20
// MEMBER_OF_1 | TO = 0x1000000000001
// MEMBER_OF_1 | PTR_AND_OBJ | TO = 0x1000000000011
// CK35-DAG: [[MTYPE_TO:@.+]] = {{.+}}constant [4 x i64] [i64 [[#0x20]], i64 [[#0x1000000000001]], i64 [[#0x1000000000001]], i64 [[#0x1000000000011]]]
// CK35-DAG: [[MTYPE_TO:@.+]] = {{.+}}constant [3 x i64] [i64 [[#0x20]], i64 [[#0x1000000000001]], i64 [[#0x1000000000011]]]
// CK35-DAG: [[SIZE_FROM:@.+]] = private {{.*}}constant [2 x i64] [i64 0, i64 8]
// TARGET_PARAM = 0x20
// MEMBER_OF_1 | PTR_AND_OBJ | FROM = 0x1000000000012
@ -86,35 +86,14 @@ void ref_map() {
// CK35-DAG: [[B_BEGIN_INTPTR]] = ptrtoint ptr [[B_BEGIN_VOID:%.+]] to i64
// CK35-DAG: [[B_ADDR:%.+]] = getelementptr inbounds nuw %class.S, ptr [[S_ADDR]], i32 0, i32 1
// pass MEMBER_OF_1 | TO {&s, &s.b+1, ((ptr)(&s+1)-(ptr)(&s.b+1))} to copy the data of remainder of s.
// pass MEMBER_OF_1 | PTR_AND_OBJ | TO {&s, &s.b, 8|4} to copy the data of s.b.
// CK35-DAG: [[BP2:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 2
// CK35-DAG: [[P2:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 2
// CK35-DAG: [[S2:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 2
// CK35-DAG: store ptr [[S_ADDR]], ptr [[BP2]],
// CK35-DAG: store ptr [[B_END:%.+]], ptr [[P2]],
// CK35-DAG: store i64 [[REM_SIZE:%.+]], ptr [[S2]],
// CK35-DAG: [[B_END]] = getelementptr ptr, ptr [[B_ADDR]], i{{.+}} 1
// CK35-DAG: [[REM_SIZE]] = sdiv exact i64 [[SZ:%.+]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
// CK35-DAG: [[SZ]] = sub i64 [[S_END_INTPTR:%.+]], [[B_END_INTPTR:%.+]]
// CK35-DAG: [[B_END_INTPTR]] = ptrtoint ptr [[B_END_VOID:%.+]] to i64
// CK35-DAG: [[S_END_INTPTR]] = ptrtoint ptr [[S_END_VOID:%.+]] to i64
// CK35-DAG: [[S_END_VOID]] = getelementptr i8, ptr [[S_LAST:%.+]], i{{.+}} 1
// CK35-64-DAG: [[S_LAST]] = getelementptr i8, ptr [[S_VOIDPTR:%.+]], i64 15
// CK35-32-DAG: [[S_LAST]] = getelementptr i8, ptr [[S_VOIDPTR:%.+]], i32 7
// pass MEMBER_OF_1 | PTR_AND_OBJ | TO {&s, &s.b, 8|4} to copy the data of s.b.
// CK35-DAG: [[BP3:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 3
// CK35-DAG: [[P3:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 3
// CK35-DAG: store ptr [[S_ADDR]], ptr [[BP3]],
// CK35-DAG: store ptr [[B_ADDR:%.+]], ptr [[P3]],
// CK35-DAG: store ptr [[B_ADDR:%.+]], ptr [[P2]],
// CK35-DAG: [[B_ADDR]] = load ptr, ptr [[B_REF:%.+]], align {{[0-9]+}}, !nonnull !{{[0-9]+}}, !align !{{[0-9]+}}
// CK35-DAG: [[B_REF]] = getelementptr inbounds nuw %class.S, ptr [[S_ADDR]], i32 0, i32 1