[CIR] Generalize cxx alloc new size handling (#187790)
The non-constant size handling in `emitCXXNewAllocSize` was making the incorrect assumption that the default behavior of the size value being explicitly cast to size_t would be the only behavior we'd see. This is actually only true with C++14 and later. To properly handle earlier standards, we need the more robust checking that classic codegen does in the equivalent function. This change adds that handling. Assisted-by: Cursor / claude-4.6-opus-high
This commit is contained in:
parent
89503bda38
commit
729480c4ae
@ -534,39 +534,97 @@ static mlir::Value emitCXXNewAllocSize(CIRGenFunction &cgf, const CXXNewExpr *e,
|
||||
// Create a value for the variable number of elements
|
||||
numElements = cgf.emitScalarExpr(*e->getArraySize());
|
||||
auto numElementsType = mlir::cast<cir::IntType>(numElements.getType());
|
||||
[[maybe_unused]] unsigned numElementsWidth = numElementsType.getWidth();
|
||||
unsigned numElementsWidth = numElementsType.getWidth();
|
||||
|
||||
// We might need check for overflow.
|
||||
// The number of elements can have an arbitrary integer type;
|
||||
// essentially, we need to multiply it by a constant factor, add a
|
||||
// cookie size, and verify that the result is representable as a
|
||||
// size_t. That's just a gloss, though, and it's wrong in one
|
||||
// important way: if the count is negative, it's an error even if
|
||||
// the cookie size would bring the total size >= 0.
|
||||
bool isSigned =
|
||||
(*e->getArraySize())->getType()->isSignedIntegerOrEnumerationType();
|
||||
|
||||
mlir::Value hasOverflow;
|
||||
// Classic codegen checks for the size variable being signed, having a
|
||||
// smaller width than size_t, and having a larger width than size_t.
|
||||
// However, the AST implicitly casts the size variable to size_t so none of
|
||||
// these conditions will ever be met.
|
||||
assert(
|
||||
!(*e->getArraySize())->getType()->isSignedIntegerOrEnumerationType() &&
|
||||
(numElementsWidth == sizeWidth) &&
|
||||
(numElements.getType() == cgf.sizeTy) &&
|
||||
"Expected array size to be implicitly cast to size_t!");
|
||||
|
||||
// There are up to three conditions we need to test for:
|
||||
// 1) if minElements > 0, we need to check whether numElements is smaller
|
||||
// There are up to five conditions we need to test for:
|
||||
// 1) if isSigned, we need to check whether numElements is negative;
|
||||
// 2) if numElementsWidth > sizeWidth, we need to check whether
|
||||
// numElements is larger than something representable in size_t;
|
||||
// 3) if minElements > 0, we need to check whether numElements is smaller
|
||||
// than that.
|
||||
// 2) we need to compute
|
||||
// 4) we need to compute
|
||||
// sizeWithoutCookie := numElements * typeSizeMultiplier
|
||||
// and check whether it overflows; and
|
||||
// 3) if we need a cookie, we need to compute
|
||||
// 5) if we need a cookie, we need to compute
|
||||
// size := sizeWithoutCookie + cookieSize
|
||||
// and check whether it overflows.
|
||||
|
||||
mlir::Value hasOverflow;
|
||||
|
||||
// If numElementsWidth > sizeWidth, then one way or another, we're
|
||||
// going to have to do a comparison for (2), and this happens to
|
||||
// take care of (1), too.
|
||||
if (numElementsWidth > sizeWidth) {
|
||||
llvm::APInt threshold =
|
||||
llvm::APInt::getOneBitSet(numElementsWidth, sizeWidth);
|
||||
|
||||
// Use an unsigned comparison regardless of the sign of numElements.
|
||||
mlir::Value unsignedNumElements = numElements;
|
||||
if (isSigned)
|
||||
unsignedNumElements = cgf.getBuilder().createIntCast(
|
||||
numElements, cgf.getBuilder().getUIntNTy(numElementsWidth));
|
||||
|
||||
mlir::Value thresholdV =
|
||||
cgf.getBuilder().getConstInt(loc, threshold, /*isUnsigned=*/true);
|
||||
hasOverflow = cgf.getBuilder().createCompare(
|
||||
loc, cir::CmpOpKind::ge, unsignedNumElements, thresholdV);
|
||||
numElements = cgf.getBuilder().createIntCast(
|
||||
unsignedNumElements, mlir::cast<cir::IntType>(cgf.sizeTy));
|
||||
|
||||
// Otherwise, if we're signed, we want to sext up to size_t.
|
||||
} else if (isSigned) {
|
||||
if (numElementsWidth < sizeWidth)
|
||||
numElements = cgf.getBuilder().createIntCast(
|
||||
numElements, cgf.getBuilder().getSIntNTy(sizeWidth));
|
||||
|
||||
// If there's a non-1 type size multiplier, then we can do the
|
||||
// signedness check at the same time as we do the multiply
|
||||
// because a negative number times anything will cause an
|
||||
// unsigned overflow. Otherwise, we have to do it here. But at
|
||||
// least in this case, we can subsume the >= minElements check.
|
||||
if (typeSizeMultiplier == 1)
|
||||
hasOverflow = cgf.getBuilder().createCompare(
|
||||
loc, cir::CmpOpKind::lt, numElements,
|
||||
cgf.getBuilder().getConstInt(loc, numElements.getType(),
|
||||
minElements));
|
||||
|
||||
numElements = cgf.getBuilder().createIntCast(
|
||||
numElements, mlir::cast<cir::IntType>(cgf.sizeTy));
|
||||
|
||||
// Otherwise, zext up to size_t if necessary.
|
||||
} else if (numElementsWidth < sizeWidth) {
|
||||
numElements = cgf.getBuilder().createIntCast(
|
||||
numElements, mlir::cast<cir::IntType>(cgf.sizeTy));
|
||||
}
|
||||
|
||||
assert(numElements.getType() == cgf.sizeTy);
|
||||
|
||||
if (minElements) {
|
||||
// Don't allow allocation of fewer elements than we have initializers.
|
||||
if (!hasOverflow) {
|
||||
// FIXME: Avoid creating this twice. It may happen above.
|
||||
mlir::Value minElementsV = cgf.getBuilder().getConstInt(
|
||||
loc, llvm::APInt(sizeWidth, minElements));
|
||||
hasOverflow = cgf.getBuilder().createCompare(loc, cir::CmpOpKind::lt,
|
||||
numElements, minElementsV);
|
||||
} else if (numElementsWidth > sizeWidth) {
|
||||
// The other existing overflow subsumes this check.
|
||||
// We do an unsigned comparison, since any signed value < -1 is
|
||||
// taken care of either above or below.
|
||||
mlir::Value minElementsV = cgf.getBuilder().getConstInt(
|
||||
loc, llvm::APInt(sizeWidth, minElements));
|
||||
hasOverflow = cgf.getBuilder().createOr(
|
||||
loc, hasOverflow,
|
||||
cgf.getBuilder().createCompare(loc, cir::CmpOpKind::lt, numElements,
|
||||
minElementsV));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -344,13 +344,23 @@ void CIRGenFunction::LexicalScope::cleanup() {
|
||||
return;
|
||||
|
||||
// Get rid of any empty block at the end of the scope.
|
||||
bool entryBlock = builder.getInsertionBlock()->isEntryBlock();
|
||||
if (!entryBlock && curBlock->empty()) {
|
||||
bool isEntryBlock = builder.getInsertionBlock()->isEntryBlock();
|
||||
if (!isEntryBlock && curBlock->empty()) {
|
||||
curBlock->erase();
|
||||
for (mlir::Block *retBlock : retBlocks) {
|
||||
if (retBlock->getUses().empty())
|
||||
retBlock->erase();
|
||||
}
|
||||
// The empty block was created by a terminator (return/break/continue)
|
||||
// and is now erased. If there are pending cleanup scopes (from variables
|
||||
// with destructors), we need to pop them and ensure the containing scope
|
||||
// block gets a proper terminator (e.g. cir.yield). Without this, the
|
||||
// cleanup-scope-op popping that would otherwise happen in
|
||||
// ~RunCleanupsScope leaves the scope block without a terminator.
|
||||
if (hasPendingCleanups()) {
|
||||
builder.setInsertionPointToEnd(entryBlock);
|
||||
insertCleanupAndLeave(entryBlock);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
@ -1037,6 +1037,12 @@ public:
|
||||
performCleanup = false;
|
||||
cgf.currentCleanupStackDepth = oldCleanupStackDepth;
|
||||
}
|
||||
|
||||
/// Whether there are any pending cleanups that have been pushed since
|
||||
/// this scope was entered.
|
||||
bool hasPendingCleanups() const {
|
||||
return cgf.ehStack.stable_begin() != cleanupStackDepth;
|
||||
}
|
||||
};
|
||||
|
||||
// Cleanup stack depth of the RunCleanupsScope that was pushed most recently.
|
||||
|
||||
58
clang/test/CIR/CodeGen/cleanup-scope-return-in-loop.cpp
Normal file
58
clang/test/CIR/CodeGen/cleanup-scope-return-in-loop.cpp
Normal file
@ -0,0 +1,58 @@
|
||||
// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
|
||||
// RUN: FileCheck --input-file=%t.cir %s -check-prefix=CIR
|
||||
// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll
|
||||
// RUN: FileCheck --input-file=%t.ll %s -check-prefix=LLVM
|
||||
// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.ll
|
||||
// RUN: FileCheck --input-file=%t.ll %s -check-prefix=LLVM
|
||||
|
||||
struct Struk {
|
||||
~Struk();
|
||||
bool check();
|
||||
};
|
||||
|
||||
// Test that a for-loop body containing a local variable with a destructor
|
||||
// followed by 'continue' and 'return' produces a properly terminated
|
||||
// cir.scope. This exercises the interaction between LexicalScope cleanup,
|
||||
// CleanupScopeOp popping, and empty-block erasure.
|
||||
|
||||
int test_cleanup_return_in_loop(int n) {
|
||||
for (int i = 0; i < n; i++) {
|
||||
Struk s;
|
||||
if (s.check())
|
||||
continue;
|
||||
return i;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
// CIR: cir.func {{.*}} @_Z27test_cleanup_return_in_loopi
|
||||
// CIR: cir.scope {
|
||||
// CIR: cir.for : cond {
|
||||
// CIR: } body {
|
||||
// CIR: cir.scope {
|
||||
// CIR: cir.cleanup.scope {
|
||||
// CIR: cir.continue
|
||||
// CIR: cir.return
|
||||
// CIR: } cleanup normal {
|
||||
// CIR: cir.call @_ZN5StrukD1Ev
|
||||
// CIR: cir.yield
|
||||
// CIR: cir.yield
|
||||
// CIR: } step {
|
||||
// CIR: cir.return
|
||||
|
||||
// LLVM: define dso_local noundef i32 @_Z27test_cleanup_return_in_loopi(i32 noundef %{{.*}})
|
||||
// LLVM: [[LOOP_COND:.*]]:
|
||||
// LLVM: %[[ICMP:.*]] = icmp slt i32
|
||||
// LLVM-NEXT: br i1 %[[ICMP]]
|
||||
// LLVM: [[LOOP_BODY:.*]]:
|
||||
// LLVM: %[[CALL:.*]] = call noundef {{.*}} @_ZN5Struk5checkEv(
|
||||
// LLVM-NEXT: br i1 %[[CALL]]
|
||||
// LLVM: [[CLEANUP:.*]]:
|
||||
// LLVM: call void @_ZN5StrukD1Ev(
|
||||
// LLVM: switch i32 %{{.*}}, label %{{.*}} [
|
||||
// LLVM: ]
|
||||
// LLVM: [[LOOP_STEP:.*]]:
|
||||
// LLVM: add nsw i32 %{{.*}}, 1
|
||||
// LLVM: [[POST_LOOP:.*]]:
|
||||
// LLVM: store i32 -1, ptr %{{.*}}, align 4
|
||||
// LLVM: ret i32
|
||||
70
clang/test/CIR/CodeGen/new-array-size-conv.cpp
Normal file
70
clang/test/CIR/CodeGen/new-array-size-conv.cpp
Normal file
@ -0,0 +1,70 @@
|
||||
// RUN: %clang_cc1 -std=c++11 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
|
||||
// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
|
||||
// RUN: %clang_cc1 -std=c++11 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t-cir.ll
|
||||
// RUN: FileCheck --check-prefix=LLVM --input-file=%t-cir.ll %s
|
||||
// RUN: %clang_cc1 -std=c++11 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.ll
|
||||
// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ll %s
|
||||
|
||||
// Tests for array new expressions where the array size is not implicitly
|
||||
// converted to size_t by Sema (pre-C++14 behavior). The CIR codegen must
|
||||
// handle the signed/width conversion itself.
|
||||
|
||||
typedef __typeof__(sizeof(int)) size_t;
|
||||
|
||||
// Sized non-allocating (placement) new.
|
||||
void *operator new[](size_t, void *p) noexcept { return p; }
|
||||
|
||||
struct S {
|
||||
int x;
|
||||
S();
|
||||
};
|
||||
|
||||
// Signed int array size with multi-byte element (typeSizeMultiplier != 1).
|
||||
// The sign extension is done and the multiply overflow catches negative values.
|
||||
void t_new_signed_size(int n) {
|
||||
auto p = new double[n];
|
||||
}
|
||||
|
||||
// CIR-LABEL: cir.func {{.*}} @_Z17t_new_signed_sizei
|
||||
// CIR: %[[N:.*]] = cir.load{{.*}} %[[ARG_ALLOCA:.*]]
|
||||
// CIR: %[[N_SEXT:.*]] = cir.cast integral %[[N]] : !s32i -> !s64i
|
||||
// CIR: %[[N_SIZE_T:.*]] = cir.cast integral %[[N_SEXT]] : !s64i -> !u64i
|
||||
// CIR: %[[ELEMENT_SIZE:.*]] = cir.const #cir.int<8> : !u64i
|
||||
// CIR: %[[RESULT:.*]], %[[OVERFLOW:.*]] = cir.mul.overflow %[[N_SIZE_T]], %[[ELEMENT_SIZE]] : !u64i -> !u64i
|
||||
// CIR: %[[ALL_ONES:.*]] = cir.const #cir.int<18446744073709551615> : !u64i
|
||||
// CIR: %[[ALLOC_SIZE:.*]] = cir.select if %[[OVERFLOW]] then %[[ALL_ONES]] else %[[RESULT]]
|
||||
|
||||
// LLVM-LABEL: define{{.*}} void @_Z17t_new_signed_sizei
|
||||
// LLVM: %[[N:.*]] = load i32, ptr %{{.+}}
|
||||
// LLVM: %[[N_SIZE_T:.*]] = sext i32 %[[N]] to i64
|
||||
// LLVM: %[[MUL_OVERFLOW:.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %[[N_SIZE_T]], i64 8)
|
||||
|
||||
// OGCG-LABEL: define{{.*}} void @_Z17t_new_signed_sizei
|
||||
// OGCG: %[[N:.*]] = load i32, ptr %{{.+}}
|
||||
// OGCG: %[[N_SIZE_T:.*]] = sext i32 %[[N]] to i64
|
||||
// OGCG: %[[MUL_OVERFLOW:.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %[[N_SIZE_T]], i64 8)
|
||||
|
||||
// Signed int array size with single-byte element (typeSizeMultiplier == 1).
|
||||
// A signed comparison catches negative values directly.
|
||||
void t_new_signed_size_char(int n) {
|
||||
auto p = new char[n];
|
||||
}
|
||||
|
||||
// CIR-LABEL: cir.func {{.*}} @_Z22t_new_signed_size_chari
|
||||
// CIR: %[[N:.*]] = cir.load{{.*}} %[[ARG_ALLOCA:.*]]
|
||||
// CIR: %[[N_SEXT:.*]] = cir.cast integral %[[N]] : !s32i -> !s64i
|
||||
// CIR: %[[ZERO:.*]] = cir.const #cir.int<0> : !s64i
|
||||
// CIR: %[[IS_NEG:.*]] = cir.cmp lt %[[N_SEXT]], %[[ZERO]] : !s64i
|
||||
// CIR: %[[N_SIZE_T:.*]] = cir.cast integral %[[N_SEXT]] : !s64i -> !u64i
|
||||
// CIR: %[[ALL_ONES:.*]] = cir.const #cir.int<18446744073709551615> : !u64i
|
||||
// CIR: %[[ALLOC_SIZE:.*]] = cir.select if %[[IS_NEG]] then %[[ALL_ONES]] else %[[N_SIZE_T]]
|
||||
|
||||
// LLVM-LABEL: define{{.*}} void @_Z22t_new_signed_size_chari
|
||||
// LLVM: %[[N:.*]] = load i32, ptr %{{.+}}
|
||||
// LLVM: %[[N_SIZE_T:.*]] = sext i32 %[[N]] to i64
|
||||
// LLVM: %[[IS_NEG:.*]] = icmp slt i64 %[[N_SIZE_T]], 0
|
||||
|
||||
// OGCG-LABEL: define{{.*}} void @_Z22t_new_signed_size_chari
|
||||
// OGCG: %[[N:.*]] = load i32, ptr %{{.+}}
|
||||
// OGCG: %[[N_SIZE_T:.*]] = sext i32 %[[N]] to i64
|
||||
// OGCG: %[[IS_NEG:.*]] = icmp slt i64 %[[N_SIZE_T]], 0
|
||||
Loading…
x
Reference in New Issue
Block a user