[CIR] Generalize cxx alloc new size handling (#187790)

The non-constant size handling in `emitCXXNewAllocSize` was making the
incorrect assumption that the default behavior of the size value being
explicitly cast to size_t would be the only behavior we'd see. This is
actually only true with C++14 and later. To properly handle earlier
standards, we need the more robust checking that classic codegen does in
the equivalent function. This change adds that handling.

Assisted-by: Cursor / claude-4.6-opus-high
This commit is contained in:
Andy Kaylor 2026-03-23 11:47:16 -07:00 committed by GitHub
parent 89503bda38
commit 729480c4ae
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 222 additions and 20 deletions

View File

@ -534,39 +534,97 @@ static mlir::Value emitCXXNewAllocSize(CIRGenFunction &cgf, const CXXNewExpr *e,
// Create a value for the variable number of elements
numElements = cgf.emitScalarExpr(*e->getArraySize());
auto numElementsType = mlir::cast<cir::IntType>(numElements.getType());
[[maybe_unused]] unsigned numElementsWidth = numElementsType.getWidth();
unsigned numElementsWidth = numElementsType.getWidth();
// We might need check for overflow.
// The number of elements can have an arbitrary integer type;
// essentially, we need to multiply it by a constant factor, add a
// cookie size, and verify that the result is representable as a
// size_t. That's just a gloss, though, and it's wrong in one
// important way: if the count is negative, it's an error even if
// the cookie size would bring the total size >= 0.
bool isSigned =
(*e->getArraySize())->getType()->isSignedIntegerOrEnumerationType();
mlir::Value hasOverflow;
// Classic codegen checks for the size variable being signed, having a
// smaller width than size_t, and having a larger width than size_t.
// However, the AST implicitly casts the size variable to size_t so none of
// these conditions will ever be met.
assert(
!(*e->getArraySize())->getType()->isSignedIntegerOrEnumerationType() &&
(numElementsWidth == sizeWidth) &&
(numElements.getType() == cgf.sizeTy) &&
"Expected array size to be implicitly cast to size_t!");
// There are up to three conditions we need to test for:
// 1) if minElements > 0, we need to check whether numElements is smaller
// There are up to five conditions we need to test for:
// 1) if isSigned, we need to check whether numElements is negative;
// 2) if numElementsWidth > sizeWidth, we need to check whether
// numElements is larger than something representable in size_t;
// 3) if minElements > 0, we need to check whether numElements is smaller
// than that.
// 2) we need to compute
// 4) we need to compute
// sizeWithoutCookie := numElements * typeSizeMultiplier
// and check whether it overflows; and
// 3) if we need a cookie, we need to compute
// 5) if we need a cookie, we need to compute
// size := sizeWithoutCookie + cookieSize
// and check whether it overflows.
mlir::Value hasOverflow;
// If numElementsWidth > sizeWidth, then one way or another, we're
// going to have to do a comparison for (2), and this happens to
// take care of (1), too.
if (numElementsWidth > sizeWidth) {
llvm::APInt threshold =
llvm::APInt::getOneBitSet(numElementsWidth, sizeWidth);
// Use an unsigned comparison regardless of the sign of numElements.
mlir::Value unsignedNumElements = numElements;
if (isSigned)
unsignedNumElements = cgf.getBuilder().createIntCast(
numElements, cgf.getBuilder().getUIntNTy(numElementsWidth));
mlir::Value thresholdV =
cgf.getBuilder().getConstInt(loc, threshold, /*isUnsigned=*/true);
hasOverflow = cgf.getBuilder().createCompare(
loc, cir::CmpOpKind::ge, unsignedNumElements, thresholdV);
numElements = cgf.getBuilder().createIntCast(
unsignedNumElements, mlir::cast<cir::IntType>(cgf.sizeTy));
// Otherwise, if we're signed, we want to sext up to size_t.
} else if (isSigned) {
if (numElementsWidth < sizeWidth)
numElements = cgf.getBuilder().createIntCast(
numElements, cgf.getBuilder().getSIntNTy(sizeWidth));
// If there's a non-1 type size multiplier, then we can do the
// signedness check at the same time as we do the multiply
// because a negative number times anything will cause an
// unsigned overflow. Otherwise, we have to do it here. But at
// least in this case, we can subsume the >= minElements check.
if (typeSizeMultiplier == 1)
hasOverflow = cgf.getBuilder().createCompare(
loc, cir::CmpOpKind::lt, numElements,
cgf.getBuilder().getConstInt(loc, numElements.getType(),
minElements));
numElements = cgf.getBuilder().createIntCast(
numElements, mlir::cast<cir::IntType>(cgf.sizeTy));
// Otherwise, zext up to size_t if necessary.
} else if (numElementsWidth < sizeWidth) {
numElements = cgf.getBuilder().createIntCast(
numElements, mlir::cast<cir::IntType>(cgf.sizeTy));
}
assert(numElements.getType() == cgf.sizeTy);
if (minElements) {
// Don't allow allocation of fewer elements than we have initializers.
if (!hasOverflow) {
// FIXME: Avoid creating this twice. It may happen above.
mlir::Value minElementsV = cgf.getBuilder().getConstInt(
loc, llvm::APInt(sizeWidth, minElements));
hasOverflow = cgf.getBuilder().createCompare(loc, cir::CmpOpKind::lt,
numElements, minElementsV);
} else if (numElementsWidth > sizeWidth) {
// The other existing overflow subsumes this check.
// We do an unsigned comparison, since any signed value < -1 is
// taken care of either above or below.
mlir::Value minElementsV = cgf.getBuilder().getConstInt(
loc, llvm::APInt(sizeWidth, minElements));
hasOverflow = cgf.getBuilder().createOr(
loc, hasOverflow,
cgf.getBuilder().createCompare(loc, cir::CmpOpKind::lt, numElements,
minElementsV));
}
}

View File

@ -344,13 +344,23 @@ void CIRGenFunction::LexicalScope::cleanup() {
return;
// Get rid of any empty block at the end of the scope.
bool entryBlock = builder.getInsertionBlock()->isEntryBlock();
if (!entryBlock && curBlock->empty()) {
bool isEntryBlock = builder.getInsertionBlock()->isEntryBlock();
if (!isEntryBlock && curBlock->empty()) {
curBlock->erase();
for (mlir::Block *retBlock : retBlocks) {
if (retBlock->getUses().empty())
retBlock->erase();
}
// The empty block was created by a terminator (return/break/continue)
// and is now erased. If there are pending cleanup scopes (from variables
// with destructors), we need to pop them and ensure the containing scope
// block gets a proper terminator (e.g. cir.yield). Without this, the
// cleanup-scope-op popping that would otherwise happen in
// ~RunCleanupsScope leaves the scope block without a terminator.
if (hasPendingCleanups()) {
builder.setInsertionPointToEnd(entryBlock);
insertCleanupAndLeave(entryBlock);
}
return;
}

View File

@ -1037,6 +1037,12 @@ public:
performCleanup = false;
cgf.currentCleanupStackDepth = oldCleanupStackDepth;
}
/// Whether there are any pending cleanups that have been pushed since
/// this scope was entered.
bool hasPendingCleanups() const {
return cgf.ehStack.stable_begin() != cleanupStackDepth;
}
};
// Cleanup stack depth of the RunCleanupsScope that was pushed most recently.

View File

@ -0,0 +1,58 @@
// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
// RUN: FileCheck --input-file=%t.cir %s -check-prefix=CIR
// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll
// RUN: FileCheck --input-file=%t.ll %s -check-prefix=LLVM
// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.ll
// RUN: FileCheck --input-file=%t.ll %s -check-prefix=LLVM
struct Struk {
~Struk();
bool check();
};
// Test that a for-loop body containing a local variable with a destructor
// followed by 'continue' and 'return' produces a properly terminated
// cir.scope. This exercises the interaction between LexicalScope cleanup,
// CleanupScopeOp popping, and empty-block erasure.
int test_cleanup_return_in_loop(int n) {
for (int i = 0; i < n; i++) {
Struk s;
if (s.check())
continue;
return i;
}
return -1;
}
// CIR: cir.func {{.*}} @_Z27test_cleanup_return_in_loopi
// CIR: cir.scope {
// CIR: cir.for : cond {
// CIR: } body {
// CIR: cir.scope {
// CIR: cir.cleanup.scope {
// CIR: cir.continue
// CIR: cir.return
// CIR: } cleanup normal {
// CIR: cir.call @_ZN5StrukD1Ev
// CIR: cir.yield
// CIR: cir.yield
// CIR: } step {
// CIR: cir.return
// LLVM: define dso_local noundef i32 @_Z27test_cleanup_return_in_loopi(i32 noundef %{{.*}})
// LLVM: [[LOOP_COND:.*]]:
// LLVM: %[[ICMP:.*]] = icmp slt i32
// LLVM-NEXT: br i1 %[[ICMP]]
// LLVM: [[LOOP_BODY:.*]]:
// LLVM: %[[CALL:.*]] = call noundef {{.*}} @_ZN5Struk5checkEv(
// LLVM-NEXT: br i1 %[[CALL]]
// LLVM: [[CLEANUP:.*]]:
// LLVM: call void @_ZN5StrukD1Ev(
// LLVM: switch i32 %{{.*}}, label %{{.*}} [
// LLVM: ]
// LLVM: [[LOOP_STEP:.*]]:
// LLVM: add nsw i32 %{{.*}}, 1
// LLVM: [[POST_LOOP:.*]]:
// LLVM: store i32 -1, ptr %{{.*}}, align 4
// LLVM: ret i32

View File

@ -0,0 +1,70 @@
// RUN: %clang_cc1 -std=c++11 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
// RUN: %clang_cc1 -std=c++11 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t-cir.ll
// RUN: FileCheck --check-prefix=LLVM --input-file=%t-cir.ll %s
// RUN: %clang_cc1 -std=c++11 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.ll
// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ll %s
// Tests for array new expressions where the array size is not implicitly
// converted to size_t by Sema (pre-C++14 behavior). The CIR codegen must
// handle the signed/width conversion itself.
typedef __typeof__(sizeof(int)) size_t;
// Sized non-allocating (placement) new.
void *operator new[](size_t, void *p) noexcept { return p; }
struct S {
int x;
S();
};
// Signed int array size with multi-byte element (typeSizeMultiplier != 1).
// The sign extension is done and the multiply overflow catches negative values.
void t_new_signed_size(int n) {
auto p = new double[n];
}
// CIR-LABEL: cir.func {{.*}} @_Z17t_new_signed_sizei
// CIR: %[[N:.*]] = cir.load{{.*}} %[[ARG_ALLOCA:.*]]
// CIR: %[[N_SEXT:.*]] = cir.cast integral %[[N]] : !s32i -> !s64i
// CIR: %[[N_SIZE_T:.*]] = cir.cast integral %[[N_SEXT]] : !s64i -> !u64i
// CIR: %[[ELEMENT_SIZE:.*]] = cir.const #cir.int<8> : !u64i
// CIR: %[[RESULT:.*]], %[[OVERFLOW:.*]] = cir.mul.overflow %[[N_SIZE_T]], %[[ELEMENT_SIZE]] : !u64i -> !u64i
// CIR: %[[ALL_ONES:.*]] = cir.const #cir.int<18446744073709551615> : !u64i
// CIR: %[[ALLOC_SIZE:.*]] = cir.select if %[[OVERFLOW]] then %[[ALL_ONES]] else %[[RESULT]]
// LLVM-LABEL: define{{.*}} void @_Z17t_new_signed_sizei
// LLVM: %[[N:.*]] = load i32, ptr %{{.+}}
// LLVM: %[[N_SIZE_T:.*]] = sext i32 %[[N]] to i64
// LLVM: %[[MUL_OVERFLOW:.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %[[N_SIZE_T]], i64 8)
// OGCG-LABEL: define{{.*}} void @_Z17t_new_signed_sizei
// OGCG: %[[N:.*]] = load i32, ptr %{{.+}}
// OGCG: %[[N_SIZE_T:.*]] = sext i32 %[[N]] to i64
// OGCG: %[[MUL_OVERFLOW:.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %[[N_SIZE_T]], i64 8)
// Signed int array size with single-byte element (typeSizeMultiplier == 1).
// A signed comparison catches negative values directly.
void t_new_signed_size_char(int n) {
auto p = new char[n];
}
// CIR-LABEL: cir.func {{.*}} @_Z22t_new_signed_size_chari
// CIR: %[[N:.*]] = cir.load{{.*}} %[[ARG_ALLOCA:.*]]
// CIR: %[[N_SEXT:.*]] = cir.cast integral %[[N]] : !s32i -> !s64i
// CIR: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i
// CIR: %[[IS_NEG:.*]] = cir.cmp lt %[[N_SEXT]], %[[ZERO]] : !s64i
// CIR: %[[N_SIZE_T:.*]] = cir.cast integral %[[N_SEXT]] : !s64i -> !u64i
// CIR: %[[ALL_ONES:.*]] = cir.const #cir.int<18446744073709551615> : !u64i
// CIR: %[[ALLOC_SIZE:.*]] = cir.select if %[[IS_NEG]] then %[[ALL_ONES]] else %[[N_SIZE_T]]
// LLVM-LABEL: define{{.*}} void @_Z22t_new_signed_size_chari
// LLVM: %[[N:.*]] = load i32, ptr %{{.+}}
// LLVM: %[[N_SIZE_T:.*]] = sext i32 %[[N]] to i64
// LLVM: %[[IS_NEG:.*]] = icmp slt i64 %[[N_SIZE_T]], 0
// OGCG-LABEL: define{{.*}} void @_Z22t_new_signed_size_chari
// OGCG: %[[N:.*]] = load i32, ptr %{{.+}}
// OGCG: %[[N_SIZE_T:.*]] = sext i32 %[[N]] to i64
// OGCG: %[[IS_NEG:.*]] = icmp slt i64 %[[N_SIZE_T]], 0