[CIR] Use data size in emitAggregateCopy for overlapping copies (#186702)
Add skip_tail_padding property to cir.copy to handle potentially-overlapping subobject copies directly, instead of falling back to cir.libc.memcpy. When set, the lowering uses the record's data size (excluding tail padding) for the memcpy length. This keeps typed semantics and promotability of cir.copy. Also fix CXXABILowering to preserve op properties when recreating operations, and expose RecordType::computeStructDataSize() for computing data size of padded record types.
This commit is contained in:
parent
930ef7736e
commit
348295ac05
@ -366,8 +366,10 @@ public:
|
||||
|
||||
/// Create a copy with inferred length.
|
||||
cir::CopyOp createCopy(mlir::Value dst, mlir::Value src,
|
||||
bool isVolatile = false) {
|
||||
return cir::CopyOp::create(*this, dst.getLoc(), dst, src, isVolatile);
|
||||
bool isVolatile = false,
|
||||
bool skipTailPadding = false) {
|
||||
return cir::CopyOp::create(*this, dst.getLoc(), dst, src, isVolatile,
|
||||
skipTailPadding);
|
||||
}
|
||||
|
||||
cir::StoreOp createStore(mlir::Location loc, mlir::Value val, mlir::Value dst,
|
||||
|
||||
@ -4149,21 +4149,32 @@ def CIR_CopyOp : CIR_Op<"copy",[
|
||||
|
||||
The `volatile` keyword indicates that the operation is volatile.
|
||||
|
||||
The `skip_tail_padding` keyword indicates that only the data bytes should
|
||||
be copied, excluding any tail padding. This is used when copying
|
||||
potentially-overlapping subobjects where the tail padding might be occupied
|
||||
by other objects (e.g. fields marked with `[[no_unique_address]]`). This
|
||||
is only valid when the pointee type is a record type.
|
||||
|
||||
Examples:
|
||||
|
||||
```mlir
|
||||
// Copying contents from one record to another:
|
||||
cir.copy %0 to %1 : !cir.ptr<!record_ty>
|
||||
|
||||
// Copying without tail padding (for overlapping subobjects):
|
||||
cir.copy %0 to %1 skip_tail_padding : !cir.ptr<!record_ty>
|
||||
```
|
||||
}];
|
||||
|
||||
let arguments = (ins
|
||||
Arg<CIR_PointerType, "", [MemWrite]>:$dst,
|
||||
Arg<CIR_PointerType, "", [MemRead]>:$src,
|
||||
UnitAttr:$is_volatile
|
||||
UnitAttr:$is_volatile,
|
||||
UnitAttr:$skip_tail_padding
|
||||
);
|
||||
|
||||
let assemblyFormat = [{$src `to` $dst (`volatile` $is_volatile^)?
|
||||
(`skip_tail_padding` $skip_tail_padding^)?
|
||||
attr-dict `:` qualified(type($dst))
|
||||
}];
|
||||
let hasVerifier = 1;
|
||||
@ -4172,10 +4183,15 @@ def CIR_CopyOp : CIR_Op<"copy",[
|
||||
/// Returns the pointer type being copied.
|
||||
cir::PointerType getType() { return getSrc().getType(); }
|
||||
|
||||
/// Returns the number of bytes to be copied.
|
||||
unsigned getLength(const mlir::DataLayout &dt) {
|
||||
return dt.getTypeSize(getType().getPointee());
|
||||
}
|
||||
/// Returns the number of bytes to be copied. When skip_tail_padding is
|
||||
/// set, returns the data size (excluding tail padding) of the pointee
|
||||
/// record type. Otherwise returns the full type size.
|
||||
unsigned getCopySizeInBytes(const mlir::DataLayout &dl) {
|
||||
if (getSkipTailPadding())
|
||||
return mlir::cast<cir::RecordType>(getType().getPointee())
|
||||
.computeStructDataSize(dl);
|
||||
return dl.getTypeSize(getType().getPointee());
|
||||
}
|
||||
}];
|
||||
}
|
||||
|
||||
|
||||
@ -766,6 +766,9 @@ def CIR_RecordType : CIR_Type<"Record", "record", [
|
||||
return isComplete();
|
||||
}
|
||||
|
||||
/// Returns the data size (excluding tail padding) for struct types.
|
||||
unsigned computeStructDataSize(const mlir::DataLayout &dataLayout) const;
|
||||
|
||||
private:
|
||||
|
||||
static constexpr char abi_conversion_prefix[] = "__post_abi_";
|
||||
|
||||
@ -1244,16 +1244,21 @@ void CIRGenFunction::emitAggregateCopy(LValue dest, LValue src, QualType ty,
|
||||
|
||||
assert(!cir::MissingFeatures::aggValueSlotVolatile());
|
||||
|
||||
// NOTE(cir): original codegen would normally convert destPtr and srcPtr to
|
||||
// i8* since memcpy operates on bytes. We don't need that in CIR because
|
||||
// cir.copy will operate on any CIR pointer that points to a sized type.
|
||||
|
||||
// Don't do any of the memmove_collectable tests if GC isn't set.
|
||||
if (cgm.getLangOpts().getGC() != LangOptions::NonGC)
|
||||
cgm.errorNYI("emitAggregateCopy: GC");
|
||||
|
||||
[[maybe_unused]] cir::CopyOp copyOp =
|
||||
builder.createCopy(destPtr.getPointer(), srcPtr.getPointer(), isVolatile);
|
||||
// If the data size (excluding tail padding) differs from the full type size,
|
||||
// use skip_tail_padding to avoid clobbering tail padding that may be occupied
|
||||
// by other objects (e.g. fields marked with [[no_unique_address]]).
|
||||
CharUnits dataSize = typeInfo.Width;
|
||||
bool skipTailPadding =
|
||||
mayOverlap && dataSize != getContext().getTypeSizeInChars(ty);
|
||||
// NOTE(cir): original codegen would normally convert destPtr and srcPtr to
|
||||
// i8* since memcpy operates on bytes. We don't need that in CIR because
|
||||
// cir.copy will operate on any CIR pointer that points to a sized type.
|
||||
builder.createCopy(destPtr.getPointer(), srcPtr.getPointer(), isVolatile,
|
||||
skipTailPadding);
|
||||
|
||||
assert(!cir::MissingFeatures::opTBAA());
|
||||
}
|
||||
|
||||
@ -377,7 +377,7 @@ public:
|
||||
enum IsDestructed_t { IsNotDestructed, IsDestructed };
|
||||
enum IsZeroed_t { IsNotZeroed, IsZeroed };
|
||||
enum IsAliased_t { IsNotAliased, IsAliased };
|
||||
enum Overlap_t { MayOverlap, DoesNotOverlap };
|
||||
enum Overlap_t { DoesNotOverlap, MayOverlap };
|
||||
|
||||
/// Returns an aggregate value slot indicating that the aggregate
|
||||
/// value is being ignored.
|
||||
|
||||
@ -2856,6 +2856,11 @@ LogicalResult cir::CopyOp::verify() {
|
||||
if (getSrc() == getDst())
|
||||
return emitError() << "source and destination are the same";
|
||||
|
||||
if (getSkipTailPadding() &&
|
||||
!mlir::isa<cir::RecordType>(getType().getPointee()))
|
||||
return emitError()
|
||||
<< "skip_tail_padding is only valid for record pointee types";
|
||||
|
||||
return mlir::success();
|
||||
}
|
||||
|
||||
|
||||
@ -155,7 +155,8 @@ bool cir::CopyOp::canUsesBeRemoved(
|
||||
if (getDst() == getSrc())
|
||||
return false;
|
||||
|
||||
return getLength(dataLayout) == dataLayout.getTypeSize(slot.elemType);
|
||||
return getCopySizeInBytes(dataLayout) ==
|
||||
dataLayout.getTypeSize(slot.elemType);
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
@ -427,6 +427,29 @@ RecordType::computeStructSize(const mlir::DataLayout &dataLayout) const {
|
||||
return recordSize;
|
||||
}
|
||||
|
||||
unsigned
|
||||
RecordType::computeStructDataSize(const mlir::DataLayout &dataLayout) const {
|
||||
assert(isComplete() && "Cannot get layout of incomplete records");
|
||||
|
||||
// Compute the data size (excluding tail padding) for this record type. For
|
||||
// padded records, the last member is the tail padding array added by
|
||||
// CIRGenRecordLayoutBuilder::appendPaddingBytes, so we exclude it. For
|
||||
// non-padded records, data size equals the full struct size without
|
||||
// alignment.
|
||||
auto members = getMembers();
|
||||
unsigned numMembers =
|
||||
getPadded() && members.size() > 1 ? members.size() - 1 : members.size();
|
||||
unsigned recordSize = 0;
|
||||
for (unsigned i = 0; i < numMembers; ++i) {
|
||||
mlir::Type ty = members[i];
|
||||
const uint64_t tyAlign =
|
||||
(getPacked() ? 1 : dataLayout.getTypeABIAlignment(ty));
|
||||
recordSize = llvm::alignTo(recordSize, tyAlign);
|
||||
recordSize += dataLayout.getTypeSize(ty);
|
||||
}
|
||||
return recordSize;
|
||||
}
|
||||
|
||||
// We also compute the alignment as part of computeStructSize, but this is more
|
||||
// efficient. Ideally, we'd like to compute both at once and cache the result,
|
||||
// but that's implemented yet.
|
||||
|
||||
@ -188,7 +188,8 @@ mlir::LogicalResult CIRToLLVMCopyOpLowering::matchAndRewrite(
|
||||
mlir::ConversionPatternRewriter &rewriter) const {
|
||||
mlir::DataLayout layout(op->getParentOfType<mlir::ModuleOp>());
|
||||
const mlir::Value length = mlir::LLVM::ConstantOp::create(
|
||||
rewriter, op.getLoc(), rewriter.getI64Type(), op.getLength(layout));
|
||||
rewriter, op.getLoc(), rewriter.getI64Type(),
|
||||
op.getCopySizeInBytes(layout));
|
||||
assert(!cir::MissingFeatures::aggValueSlotVolatile());
|
||||
rewriter.replaceOpWithNewOp<mlir::LLVM::MemcpyOp>(
|
||||
op, adaptor.getDst(), adaptor.getSrc(), length, op.getIsVolatile());
|
||||
|
||||
73
clang/test/CIR/CodeGen/aggregate-copy-overlap.cpp
Normal file
73
clang/test/CIR/CodeGen/aggregate-copy-overlap.cpp
Normal file
@ -0,0 +1,73 @@
|
||||
// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu \
|
||||
// RUN: -fclangir -emit-cir %s -o %t.cir
|
||||
// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
|
||||
// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu \
|
||||
// RUN: -fclangir -emit-llvm %s -o %t.ll
|
||||
// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
|
||||
// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu \
|
||||
// RUN: -emit-llvm %s -o %t.og.ll
|
||||
// RUN: FileCheck --check-prefix=OGCG --input-file=%t.og.ll %s
|
||||
|
||||
// Test that emitAggregateCopy uses the data size (excluding tail padding)
|
||||
// when copying potentially-overlapping subobjects, and uses full type size
|
||||
// otherwise.
|
||||
|
||||
struct Base { int x; };
|
||||
|
||||
struct HasPadding : Base {
|
||||
char c;
|
||||
// sizeof(HasPadding) = 8 (4 for x, 1 for c, 3 tail padding)
|
||||
// data size = 5
|
||||
};
|
||||
|
||||
struct VBase { int v; };
|
||||
|
||||
// Outer has a virtual base, so its nvsize (14) is smaller than its full
|
||||
// sizeof (24). Because [[no_unique_address]] HasPadding extends beyond
|
||||
// nvsize (offset 8 + sizeof 8 = 16 > 14), getOverlapForFieldInit returns
|
||||
// MayOverlap, and emitAggregateCopy must use the data size (5) instead of
|
||||
// the full sizeof (8).
|
||||
struct Outer : virtual VBase {
|
||||
[[no_unique_address]] HasPadding hp;
|
||||
char extra;
|
||||
Outer(const HasPadding &hp, char e) : hp(hp), extra(e) {}
|
||||
};
|
||||
|
||||
// With virtual bases, only the C1 (complete) constructor is emitted.
|
||||
// CIR-LABEL: cir.func {{.*}} @_ZN5OuterC1ERK10HasPaddingc(
|
||||
// CIR: cir.copy %{{.+}} to %{{.+}} skip_tail_padding : !cir.ptr<!rec_HasPadding>
|
||||
|
||||
// LLVM-LABEL: define {{.*}} void @_ZN5OuterC1ERK10HasPaddingc(
|
||||
// LLVM: %[[GEP:.*]] = getelementptr %struct.Outer, ptr %{{.+}}, i32 0, i32 1
|
||||
// LLVM: call void @llvm.memcpy.p0.p0.i64(ptr %[[GEP]], ptr %{{.+}}, i64 5, i1 false)
|
||||
|
||||
// OGCG-LABEL: define {{.*}} void @_ZN5OuterC1ERK10HasPaddingc(
|
||||
// OGCG: %[[GEP:.*]] = getelementptr inbounds nuw %struct.Outer, ptr %{{.+}}, i32 0, i32 1
|
||||
// OGCG: call void @llvm.memcpy.p0.p0.i64(ptr {{.*}} %[[GEP]], ptr {{.*}} %{{.+}}, i64 5, i1 false)
|
||||
|
||||
void test_overlap(const HasPadding &hp) {
|
||||
Outer o(hp, 'x');
|
||||
}
|
||||
|
||||
// NonOverlapping does NOT have [[no_unique_address]], so the copy uses
|
||||
// cir.copy (full type size) rather than cir.libc.memcpy.
|
||||
struct NonOverlapping {
|
||||
HasPadding hp;
|
||||
char extra;
|
||||
NonOverlapping(const HasPadding &hp, char e) : hp(hp), extra(e) {}
|
||||
};
|
||||
|
||||
// CIR-LABEL: cir.func {{.*}} @_ZN14NonOverlappingC2ERK10HasPaddingc(
|
||||
// CIR: cir.copy %{{.+}} to %{{.+}} : !cir.ptr<!rec_HasPadding>
|
||||
|
||||
// LLVM-LABEL: define {{.*}} void @_ZN14NonOverlappingC2ERK10HasPaddingc(
|
||||
// LLVM: %[[GEP:.*]] = getelementptr %struct.NonOverlapping, ptr %{{.+}}, i32 0, i32 0
|
||||
// LLVM: call void @llvm.memcpy.p0.p0.i64(ptr %[[GEP]], ptr %{{.+}}, i64 8, i1 false)
|
||||
|
||||
// OGCG-LABEL: define {{.*}} void @_ZN14NonOverlappingC2ERK10HasPaddingc(
|
||||
// OGCG: %[[GEP:.*]] = getelementptr inbounds nuw %struct.NonOverlapping, ptr %{{.+}}, i32 0, i32 0
|
||||
// OGCG: call void @llvm.memcpy.p0.p0.i64(ptr {{.*}} %[[GEP]], ptr {{.*}} %{{.+}}, i64 8, i1 false)
|
||||
|
||||
void test_no_overlap(const HasPadding &hp) {
|
||||
NonOverlapping o(hp, 'x');
|
||||
}
|
||||
@ -35,17 +35,15 @@ struct Outer {
|
||||
// CIR: %[[THIS:.*]] = cir.load %{{.+}} : !cir.ptr<!cir.ptr<!rec_Outer>>, !cir.ptr<!rec_Outer>
|
||||
// CIR: %[[M_BASE:.*]] = cir.get_member %[[THIS]][0] {name = "m"} : !cir.ptr<!rec_Outer> -> !cir.ptr<!rec_Middle2Ebase>
|
||||
// CIR-NEXT: %[[M_COMPLETE:.*]] = cir.cast bitcast %[[M_BASE]] : !cir.ptr<!rec_Middle2Ebase> -> !cir.ptr<!rec_Middle>
|
||||
// CIR: cir.copy %{{.+}} to %[[M_COMPLETE]] : !cir.ptr<!rec_Middle>
|
||||
// CIR: cir.copy %{{.+}} to %[[M_COMPLETE]] skip_tail_padding : !cir.ptr<!rec_Middle>
|
||||
// CIR: %[[EXTRA:.*]] = cir.get_member %[[THIS]][1] {name = "extra"} : !cir.ptr<!rec_Outer> -> !cir.ptr<!s8i>
|
||||
|
||||
// LLVM-LABEL: define {{.*}} void @_ZN5OuterC2ERK6Middlec(
|
||||
// LLVM: %[[GEP:.*]] = getelementptr %struct.Outer, ptr %{{.+}}, i32 0, i32 0
|
||||
// LLVM: call void @llvm.memcpy.p0.p0.i64(ptr %[[GEP]], ptr %{{.+}}, i64 8, i1 false)
|
||||
// LLVM: call void @llvm.memcpy.p0.p0.i64(ptr %[[GEP]], ptr %{{.+}}, i64 5, i1 false)
|
||||
|
||||
// OGCG-LABEL: define {{.*}} void @_ZN5OuterC2ERK6Middlec(
|
||||
// OGCG: %[[GEP:.*]] = getelementptr inbounds nuw %struct.Outer, ptr %{{.+}}, i32 0, i32 0
|
||||
// TODO(CIR): OG emits i64 5 here via ConstructorMemcpyizer, which CIR
|
||||
// doesn't have yet. CIR copies the full 8-byte type instead.
|
||||
// OGCG: call void @llvm.memcpy.p0.p0.i64(ptr {{.*}} %[[GEP]], ptr {{.*}} %{{.+}}, i64 5, i1 false)
|
||||
|
||||
void test(const Middle &m) {
|
||||
|
||||
@ -1,10 +1,16 @@
|
||||
// RUN: cir-opt %s --verify-roundtrip | FileCheck %s
|
||||
|
||||
!s32i = !cir.int<s, 32>
|
||||
!rec = !cir.record<struct "S" {!s32i, !cir.int<s, 8>}>
|
||||
module {
|
||||
cir.func @shouldParseCopyOp(%arg0 : !cir.ptr<!s32i>, %arg1 : !cir.ptr<!s32i>) {
|
||||
cir.copy %arg0 to %arg1 : !cir.ptr<!s32i>
|
||||
// CHECK: cir.copy %arg0 to %arg1 : !cir.ptr<!s32i>
|
||||
cir.return
|
||||
}
|
||||
cir.func @shouldParseCopyOpSkipTailPadding(%arg0 : !cir.ptr<!rec>, %arg1 : !cir.ptr<!rec>) {
|
||||
cir.copy %arg0 to %arg1 skip_tail_padding : !cir.ptr<!rec>
|
||||
// CHECK: cir.copy %arg0 to %arg1 skip_tail_padding : !cir.ptr<!rec_S>
|
||||
cir.return
|
||||
}
|
||||
}
|
||||
|
||||
@ -19,3 +19,14 @@ module {
|
||||
cir.return
|
||||
}
|
||||
}
|
||||
|
||||
// -----
|
||||
|
||||
module {
|
||||
// skip_tail_padding is only valid for record pointee types.
|
||||
cir.func @invalid_skip_tail_padding(%arg0 : !cir.ptr<!cir.int<s, 32>>, %arg1 : !cir.ptr<!cir.int<s, 32>>) {
|
||||
// expected-error@+1 {{skip_tail_padding is only valid for record pointee types}}
|
||||
cir.copy %arg0 to %arg1 skip_tail_padding : !cir.ptr<!cir.int<s, 32>>
|
||||
cir.return
|
||||
}
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user