[CIR] Use data size in emitAggregateCopy for overlapping copies (#186702)

Add skip_tail_padding property to cir.copy to handle
potentially-overlapping
subobject copies directly, instead of falling back to cir.libc.memcpy.
When
set, the lowering uses the record's data size (excluding tail padding)
for
the memcpy length. This keeps typed semantics and promotability of
cir.copy.

Also fix CXXABILowering to preserve op properties when recreating
operations,
and expose RecordType::computeStructDataSize() for computing data size
of
padded record types.
This commit is contained in:
Henrich Lauko 2026-04-06 18:24:10 +02:00 committed by GitHub
parent 930ef7736e
commit 348295ac05
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
13 changed files with 164 additions and 20 deletions

View File

@ -366,8 +366,10 @@ public:
/// Create a copy with inferred length.
cir::CopyOp createCopy(mlir::Value dst, mlir::Value src,
bool isVolatile = false) {
return cir::CopyOp::create(*this, dst.getLoc(), dst, src, isVolatile);
bool isVolatile = false,
bool skipTailPadding = false) {
return cir::CopyOp::create(*this, dst.getLoc(), dst, src, isVolatile,
skipTailPadding);
}
cir::StoreOp createStore(mlir::Location loc, mlir::Value val, mlir::Value dst,

View File

@ -4149,21 +4149,32 @@ def CIR_CopyOp : CIR_Op<"copy",[
The `volatile` keyword indicates that the operation is volatile.
The `skip_tail_padding` keyword indicates that only the data bytes should
be copied, excluding any tail padding. This is used when copying
potentially-overlapping subobjects where the tail padding might be occupied
by other objects (e.g. fields marked with `[[no_unique_address]]`). This
is only valid when the pointee type is a record type.
Examples:
```mlir
// Copying contents from one record to another:
cir.copy %0 to %1 : !cir.ptr<!record_ty>
// Copying without tail padding (for overlapping subobjects):
cir.copy %0 to %1 skip_tail_padding : !cir.ptr<!record_ty>
```
}];
let arguments = (ins
Arg<CIR_PointerType, "", [MemWrite]>:$dst,
Arg<CIR_PointerType, "", [MemRead]>:$src,
UnitAttr:$is_volatile
UnitAttr:$is_volatile,
UnitAttr:$skip_tail_padding
);
let assemblyFormat = [{$src `to` $dst (`volatile` $is_volatile^)?
(`skip_tail_padding` $skip_tail_padding^)?
attr-dict `:` qualified(type($dst))
}];
let hasVerifier = 1;
@ -4172,10 +4183,15 @@ def CIR_CopyOp : CIR_Op<"copy",[
/// Returns the pointer type being copied.
cir::PointerType getType() { return getSrc().getType(); }
/// Returns the number of bytes to be copied.
unsigned getLength(const mlir::DataLayout &dt) {
return dt.getTypeSize(getType().getPointee());
}
/// Returns the number of bytes to be copied. When skip_tail_padding is
/// set, returns the data size (excluding tail padding) of the pointee
/// record type. Otherwise returns the full type size.
unsigned getCopySizeInBytes(const mlir::DataLayout &dl) {
if (getSkipTailPadding())
return mlir::cast<cir::RecordType>(getType().getPointee())
.computeStructDataSize(dl);
return dl.getTypeSize(getType().getPointee());
}
}];
}

View File

@ -766,6 +766,9 @@ def CIR_RecordType : CIR_Type<"Record", "record", [
return isComplete();
}
/// Returns the data size (excluding tail padding) for struct types.
unsigned computeStructDataSize(const mlir::DataLayout &dataLayout) const;
private:
static constexpr char abi_conversion_prefix[] = "__post_abi_";

View File

@ -1244,16 +1244,21 @@ void CIRGenFunction::emitAggregateCopy(LValue dest, LValue src, QualType ty,
assert(!cir::MissingFeatures::aggValueSlotVolatile());
// NOTE(cir): original codegen would normally convert destPtr and srcPtr to
// i8* since memcpy operates on bytes. We don't need that in CIR because
// cir.copy will operate on any CIR pointer that points to a sized type.
// Don't do any of the memmove_collectable tests if GC isn't set.
if (cgm.getLangOpts().getGC() != LangOptions::NonGC)
cgm.errorNYI("emitAggregateCopy: GC");
[[maybe_unused]] cir::CopyOp copyOp =
builder.createCopy(destPtr.getPointer(), srcPtr.getPointer(), isVolatile);
// If the data size (excluding tail padding) differs from the full type size,
// use skip_tail_padding to avoid clobbering tail padding that may be occupied
// by other objects (e.g. fields marked with [[no_unique_address]]).
CharUnits dataSize = typeInfo.Width;
bool skipTailPadding =
mayOverlap && dataSize != getContext().getTypeSizeInChars(ty);
// NOTE(cir): original codegen would normally convert destPtr and srcPtr to
// i8* since memcpy operates on bytes. We don't need that in CIR because
// cir.copy will operate on any CIR pointer that points to a sized type.
builder.createCopy(destPtr.getPointer(), srcPtr.getPointer(), isVolatile,
skipTailPadding);
assert(!cir::MissingFeatures::opTBAA());
}

View File

@ -377,7 +377,7 @@ public:
enum IsDestructed_t { IsNotDestructed, IsDestructed };
enum IsZeroed_t { IsNotZeroed, IsZeroed };
enum IsAliased_t { IsNotAliased, IsAliased };
enum Overlap_t { MayOverlap, DoesNotOverlap };
enum Overlap_t { DoesNotOverlap, MayOverlap };
/// Returns an aggregate value slot indicating that the aggregate
/// value is being ignored.

View File

@ -2856,6 +2856,11 @@ LogicalResult cir::CopyOp::verify() {
if (getSrc() == getDst())
return emitError() << "source and destination are the same";
if (getSkipTailPadding() &&
!mlir::isa<cir::RecordType>(getType().getPointee()))
return emitError()
<< "skip_tail_padding is only valid for record pointee types";
return mlir::success();
}

View File

@ -155,7 +155,8 @@ bool cir::CopyOp::canUsesBeRemoved(
if (getDst() == getSrc())
return false;
return getLength(dataLayout) == dataLayout.getTypeSize(slot.elemType);
return getCopySizeInBytes(dataLayout) ==
dataLayout.getTypeSize(slot.elemType);
}
//===----------------------------------------------------------------------===//

View File

@ -427,6 +427,29 @@ RecordType::computeStructSize(const mlir::DataLayout &dataLayout) const {
return recordSize;
}
unsigned
RecordType::computeStructDataSize(const mlir::DataLayout &dataLayout) const {
assert(isComplete() && "Cannot get layout of incomplete records");
// Compute the data size (excluding tail padding) for this record type. For
// padded records, the last member is the tail padding array added by
// CIRGenRecordLayoutBuilder::appendPaddingBytes, so we exclude it. For
// non-padded records, data size equals the full struct size without
// alignment.
auto members = getMembers();
unsigned numMembers =
getPadded() && members.size() > 1 ? members.size() - 1 : members.size();
unsigned recordSize = 0;
for (unsigned i = 0; i < numMembers; ++i) {
mlir::Type ty = members[i];
const uint64_t tyAlign =
(getPacked() ? 1 : dataLayout.getTypeABIAlignment(ty));
recordSize = llvm::alignTo(recordSize, tyAlign);
recordSize += dataLayout.getTypeSize(ty);
}
return recordSize;
}
// We also compute the alignment as part of computeStructSize, but this is more
// efficient. Ideally, we'd like to compute both at once and cache the result,
// but that's implemented yet.

View File

@ -188,7 +188,8 @@ mlir::LogicalResult CIRToLLVMCopyOpLowering::matchAndRewrite(
mlir::ConversionPatternRewriter &rewriter) const {
mlir::DataLayout layout(op->getParentOfType<mlir::ModuleOp>());
const mlir::Value length = mlir::LLVM::ConstantOp::create(
rewriter, op.getLoc(), rewriter.getI64Type(), op.getLength(layout));
rewriter, op.getLoc(), rewriter.getI64Type(),
op.getCopySizeInBytes(layout));
assert(!cir::MissingFeatures::aggValueSlotVolatile());
rewriter.replaceOpWithNewOp<mlir::LLVM::MemcpyOp>(
op, adaptor.getDst(), adaptor.getSrc(), length, op.getIsVolatile());

View File

@ -0,0 +1,73 @@
// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu \
// RUN: -fclangir -emit-cir %s -o %t.cir
// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu \
// RUN: -fclangir -emit-llvm %s -o %t.ll
// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu \
// RUN: -emit-llvm %s -o %t.og.ll
// RUN: FileCheck --check-prefix=OGCG --input-file=%t.og.ll %s
// Test that emitAggregateCopy uses the data size (excluding tail padding)
// when copying potentially-overlapping subobjects, and uses full type size
// otherwise.
struct Base { int x; };
struct HasPadding : Base {
char c;
// sizeof(HasPadding) = 8 (4 for x, 1 for c, 3 tail padding)
// data size = 5
};
struct VBase { int v; };
// Outer has a virtual base, so its nvsize (14) is smaller than its full
// sizeof (24). Because [[no_unique_address]] HasPadding extends beyond
// nvsize (offset 8 + sizeof 8 = 16 > 14), getOverlapForFieldInit returns
// MayOverlap, and emitAggregateCopy must use the data size (5) instead of
// the full sizeof (8).
struct Outer : virtual VBase {
[[no_unique_address]] HasPadding hp;
char extra;
Outer(const HasPadding &hp, char e) : hp(hp), extra(e) {}
};
// With virtual bases, only the C1 (complete) constructor is emitted.
// CIR-LABEL: cir.func {{.*}} @_ZN5OuterC1ERK10HasPaddingc(
// CIR: cir.copy %{{.+}} to %{{.+}} skip_tail_padding : !cir.ptr<!rec_HasPadding>
// LLVM-LABEL: define {{.*}} void @_ZN5OuterC1ERK10HasPaddingc(
// LLVM: %[[GEP:.*]] = getelementptr %struct.Outer, ptr %{{.+}}, i32 0, i32 1
// LLVM: call void @llvm.memcpy.p0.p0.i64(ptr %[[GEP]], ptr %{{.+}}, i64 5, i1 false)
// OGCG-LABEL: define {{.*}} void @_ZN5OuterC1ERK10HasPaddingc(
// OGCG: %[[GEP:.*]] = getelementptr inbounds nuw %struct.Outer, ptr %{{.+}}, i32 0, i32 1
// OGCG: call void @llvm.memcpy.p0.p0.i64(ptr {{.*}} %[[GEP]], ptr {{.*}} %{{.+}}, i64 5, i1 false)
void test_overlap(const HasPadding &hp) {
Outer o(hp, 'x');
}
// NonOverlapping does NOT have [[no_unique_address]], so the copy uses
// cir.copy (full type size) rather than cir.libc.memcpy.
struct NonOverlapping {
HasPadding hp;
char extra;
NonOverlapping(const HasPadding &hp, char e) : hp(hp), extra(e) {}
};
// CIR-LABEL: cir.func {{.*}} @_ZN14NonOverlappingC2ERK10HasPaddingc(
// CIR: cir.copy %{{.+}} to %{{.+}} : !cir.ptr<!rec_HasPadding>
// LLVM-LABEL: define {{.*}} void @_ZN14NonOverlappingC2ERK10HasPaddingc(
// LLVM: %[[GEP:.*]] = getelementptr %struct.NonOverlapping, ptr %{{.+}}, i32 0, i32 0
// LLVM: call void @llvm.memcpy.p0.p0.i64(ptr %[[GEP]], ptr %{{.+}}, i64 8, i1 false)
// OGCG-LABEL: define {{.*}} void @_ZN14NonOverlappingC2ERK10HasPaddingc(
// OGCG: %[[GEP:.*]] = getelementptr inbounds nuw %struct.NonOverlapping, ptr %{{.+}}, i32 0, i32 0
// OGCG: call void @llvm.memcpy.p0.p0.i64(ptr {{.*}} %[[GEP]], ptr {{.*}} %{{.+}}, i64 8, i1 false)
void test_no_overlap(const HasPadding &hp) {
NonOverlapping o(hp, 'x');
}

View File

@ -35,17 +35,15 @@ struct Outer {
// CIR: %[[THIS:.*]] = cir.load %{{.+}} : !cir.ptr<!cir.ptr<!rec_Outer>>, !cir.ptr<!rec_Outer>
// CIR: %[[M_BASE:.*]] = cir.get_member %[[THIS]][0] {name = "m"} : !cir.ptr<!rec_Outer> -> !cir.ptr<!rec_Middle2Ebase>
// CIR-NEXT: %[[M_COMPLETE:.*]] = cir.cast bitcast %[[M_BASE]] : !cir.ptr<!rec_Middle2Ebase> -> !cir.ptr<!rec_Middle>
// CIR: cir.copy %{{.+}} to %[[M_COMPLETE]] : !cir.ptr<!rec_Middle>
// CIR: cir.copy %{{.+}} to %[[M_COMPLETE]] skip_tail_padding : !cir.ptr<!rec_Middle>
// CIR: %[[EXTRA:.*]] = cir.get_member %[[THIS]][1] {name = "extra"} : !cir.ptr<!rec_Outer> -> !cir.ptr<!s8i>
// LLVM-LABEL: define {{.*}} void @_ZN5OuterC2ERK6Middlec(
// LLVM: %[[GEP:.*]] = getelementptr %struct.Outer, ptr %{{.+}}, i32 0, i32 0
// LLVM: call void @llvm.memcpy.p0.p0.i64(ptr %[[GEP]], ptr %{{.+}}, i64 8, i1 false)
// LLVM: call void @llvm.memcpy.p0.p0.i64(ptr %[[GEP]], ptr %{{.+}}, i64 5, i1 false)
// OGCG-LABEL: define {{.*}} void @_ZN5OuterC2ERK6Middlec(
// OGCG: %[[GEP:.*]] = getelementptr inbounds nuw %struct.Outer, ptr %{{.+}}, i32 0, i32 0
// TODO(CIR): OG emits i64 5 here via ConstructorMemcpyizer, which CIR
// doesn't have yet. CIR copies the full 8-byte type instead.
// OGCG: call void @llvm.memcpy.p0.p0.i64(ptr {{.*}} %[[GEP]], ptr {{.*}} %{{.+}}, i64 5, i1 false)
void test(const Middle &m) {

View File

@ -1,10 +1,16 @@
// RUN: cir-opt %s --verify-roundtrip | FileCheck %s
!s32i = !cir.int<s, 32>
!rec = !cir.record<struct "S" {!s32i, !cir.int<s, 8>}>
module {
cir.func @shouldParseCopyOp(%arg0 : !cir.ptr<!s32i>, %arg1 : !cir.ptr<!s32i>) {
cir.copy %arg0 to %arg1 : !cir.ptr<!s32i>
// CHECK: cir.copy %arg0 to %arg1 : !cir.ptr<!s32i>
cir.return
}
cir.func @shouldParseCopyOpSkipTailPadding(%arg0 : !cir.ptr<!rec>, %arg1 : !cir.ptr<!rec>) {
cir.copy %arg0 to %arg1 skip_tail_padding : !cir.ptr<!rec>
// CHECK: cir.copy %arg0 to %arg1 skip_tail_padding : !cir.ptr<!rec_S>
cir.return
}
}

View File

@ -19,3 +19,14 @@ module {
cir.return
}
}
// -----
module {
// skip_tail_padding is only valid for record pointee types.
cir.func @invalid_skip_tail_padding(%arg0 : !cir.ptr<!cir.int<s, 32>>, %arg1 : !cir.ptr<!cir.int<s, 32>>) {
// expected-error@+1 {{skip_tail_padding is only valid for record pointee types}}
cir.copy %arg0 to %arg1 skip_tail_padding : !cir.ptr<!cir.int<s, 32>>
cir.return
}
}