[ClangFE] Improve handling of casting of atomic memory operations. (#86691)

- Factor out a shouldCastToInt() method.
- Also pass through pointer type values to not be casted to integer.

CC @uweigand
This commit is contained in:
Jonas Paulsson 2024-04-02 18:52:57 +02:00 committed by GitHub
parent 668c1ea4aa
commit 4d5e834c5b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 70 additions and 51 deletions

View File

@ -197,11 +197,11 @@ namespace {
llvm::Value *getScalarRValValueOrNull(RValue RVal) const;
/// Converts an rvalue to integer value if needed.
llvm::Value *convertRValueToInt(RValue RVal, bool CastFP = true) const;
llvm::Value *convertRValueToInt(RValue RVal, bool CmpXchg = false) const;
RValue ConvertToValueOrAtomic(llvm::Value *IntVal, AggValueSlot ResultSlot,
SourceLocation Loc, bool AsValue,
bool CastFP = true) const;
bool CmpXchg = false) const;
/// Copy an atomic r-value into atomic-layout memory.
void emitCopyIntoMemory(RValue rvalue) const;
@ -264,7 +264,7 @@ namespace {
llvm::AtomicOrdering AO, bool IsVolatile);
/// Emits atomic load as LLVM instruction.
llvm::Value *EmitAtomicLoadOp(llvm::AtomicOrdering AO, bool IsVolatile,
bool CastFP = true);
bool CmpXchg = false);
/// Emits atomic compare-and-exchange op as a libcall.
llvm::Value *EmitAtomicCompareExchangeLibcall(
llvm::Value *ExpectedAddr, llvm::Value *DesiredAddr,
@ -1401,13 +1401,26 @@ RValue AtomicInfo::convertAtomicTempToRValue(Address addr,
LVal.getBaseInfo(), TBAAAccessInfo()));
}
/// Return true if \param ValTy is a type that should be casted to integer
/// around the atomic memory operation. If \param CmpXchg is true, then the
/// cast of a floating point type is made as that instruction can not have
/// floating point operands. TODO: Allow compare-and-exchange and FP - see
/// comment in AtomicExpandPass.cpp.
static bool shouldCastToInt(llvm::Type *ValTy, bool CmpXchg) {
if (ValTy->isFloatingPointTy())
return ValTy->isX86_FP80Ty() || CmpXchg;
return !ValTy->isIntegerTy() && !ValTy->isPointerTy();
}
RValue AtomicInfo::ConvertToValueOrAtomic(llvm::Value *Val,
AggValueSlot ResultSlot,
SourceLocation Loc, bool AsValue,
bool CastFP) const {
bool CmpXchg) const {
// Try not to in some easy cases.
assert((Val->getType()->isIntegerTy() || Val->getType()->isIEEELikeFPTy()) &&
"Expected integer or floating point value");
assert((Val->getType()->isIntegerTy() || Val->getType()->isPointerTy() ||
Val->getType()->isIEEELikeFPTy()) &&
"Expected integer, pointer or floating point value when converting "
"result.");
if (getEvaluationKind() == TEK_Scalar &&
(((!LVal.isBitField() ||
LVal.getBitFieldInfo().Size == ValueSizeInBits) &&
@ -1416,13 +1429,12 @@ RValue AtomicInfo::ConvertToValueOrAtomic(llvm::Value *Val,
auto *ValTy = AsValue
? CGF.ConvertTypeForMem(ValueTy)
: getAtomicAddress().getElementType();
if (ValTy->isIntegerTy() || (!CastFP && ValTy->isIEEELikeFPTy())) {
if (!shouldCastToInt(ValTy, CmpXchg)) {
assert((!ValTy->isIntegerTy() || Val->getType() == ValTy) &&
"Different integer types.");
return RValue::get(CGF.EmitFromMemory(Val, ValueTy));
} else if (ValTy->isPointerTy())
return RValue::get(CGF.Builder.CreateIntToPtr(Val, ValTy));
else if (llvm::CastInst::isBitCastable(Val->getType(), ValTy))
}
if (llvm::CastInst::isBitCastable(Val->getType(), ValTy))
return RValue::get(CGF.Builder.CreateBitCast(Val, ValTy));
}
@ -1459,10 +1471,10 @@ void AtomicInfo::EmitAtomicLoadLibcall(llvm::Value *AddForLoaded,
}
llvm::Value *AtomicInfo::EmitAtomicLoadOp(llvm::AtomicOrdering AO,
bool IsVolatile, bool CastFP) {
bool IsVolatile, bool CmpXchg) {
// Okay, we're doing this natively.
Address Addr = getAtomicAddress();
if (!(Addr.getElementType()->isIEEELikeFPTy() && !CastFP))
if (shouldCastToInt(Addr.getElementType(), CmpXchg))
Addr = castToAtomicIntPointer(Addr);
llvm::LoadInst *Load = CGF.Builder.CreateLoad(Addr, "atomic-load");
Load->setAtomic(AO);
@ -1523,7 +1535,7 @@ RValue AtomicInfo::EmitAtomicLoad(AggValueSlot ResultSlot, SourceLocation Loc,
}
// Okay, we're doing this natively.
auto *Load = EmitAtomicLoadOp(AO, IsVolatile, /*CastFP=*/false);
auto *Load = EmitAtomicLoadOp(AO, IsVolatile);
// If we're ignoring an aggregate return, don't do anything.
if (getEvaluationKind() == TEK_Aggregate && ResultSlot.isIgnored())
@ -1531,8 +1543,7 @@ RValue AtomicInfo::EmitAtomicLoad(AggValueSlot ResultSlot, SourceLocation Loc,
// Okay, turn that back into the original value or atomic (for non-simple
// lvalues) type.
return ConvertToValueOrAtomic(Load, ResultSlot, Loc, AsValue,
/*CastFP=*/false);
return ConvertToValueOrAtomic(Load, ResultSlot, Loc, AsValue);
}
/// Emit a load from an l-value of atomic type. Note that the r-value
@ -1601,20 +1612,17 @@ llvm::Value *AtomicInfo::getScalarRValValueOrNull(RValue RVal) const {
return nullptr;
}
llvm::Value *AtomicInfo::convertRValueToInt(RValue RVal, bool CastFP) const {
llvm::Value *AtomicInfo::convertRValueToInt(RValue RVal, bool CmpXchg) const {
// If we've got a scalar value of the right size, try to avoid going
// through memory. Floats get casted if needed by AtomicExpandPass.
if (llvm::Value *Value = getScalarRValValueOrNull(RVal)) {
if (isa<llvm::IntegerType>(Value->getType()) ||
(!CastFP && Value->getType()->isIEEELikeFPTy()))
if (!shouldCastToInt(Value->getType(), CmpXchg))
return CGF.EmitToMemory(Value, ValueTy);
else {
llvm::IntegerType *InputIntTy = llvm::IntegerType::get(
CGF.getLLVMContext(),
LVal.isSimple() ? getValueSizeInBits() : getAtomicSizeInBits());
if (isa<llvm::PointerType>(Value->getType()))
return CGF.Builder.CreatePtrToInt(Value, InputIntTy);
else if (llvm::BitCastInst::isBitCastable(Value->getType(), InputIntTy))
if (llvm::BitCastInst::isBitCastable(Value->getType(), InputIntTy))
return CGF.Builder.CreateBitCast(Value, InputIntTy);
}
}
@ -1687,13 +1695,14 @@ std::pair<RValue, llvm::Value *> AtomicInfo::EmitAtomicCompareExchange(
// If we've got a scalar value of the right size, try to avoid going
// through memory.
auto *ExpectedVal = convertRValueToInt(Expected);
auto *DesiredVal = convertRValueToInt(Desired);
auto *ExpectedVal = convertRValueToInt(Expected, /*CmpXchg=*/true);
auto *DesiredVal = convertRValueToInt(Desired, /*CmpXchg=*/true);
auto Res = EmitAtomicCompareExchangeOp(ExpectedVal, DesiredVal, Success,
Failure, IsWeak);
return std::make_pair(
ConvertToValueOrAtomic(Res.first, AggValueSlot::ignored(),
SourceLocation(), /*AsValue=*/false),
SourceLocation(), /*AsValue=*/false,
/*CmpXchg=*/true),
Res.second);
}
@ -1787,7 +1796,7 @@ void AtomicInfo::EmitAtomicUpdateOp(
auto Failure = llvm::AtomicCmpXchgInst::getStrongestFailureOrdering(AO);
// Do the atomic load.
auto *OldVal = EmitAtomicLoadOp(Failure, IsVolatile);
auto *OldVal = EmitAtomicLoadOp(Failure, IsVolatile, /*CmpXchg=*/true);
// For non-simple lvalues perform compare-and-swap procedure.
auto *ContBB = CGF.createBasicBlock("atomic_cont");
auto *ExitBB = CGF.createBasicBlock("atomic_exit");
@ -1803,7 +1812,8 @@ void AtomicInfo::EmitAtomicUpdateOp(
CGF.Builder.CreateStore(PHI, NewAtomicIntAddr);
}
auto OldRVal = ConvertToValueOrAtomic(PHI, AggValueSlot::ignored(),
SourceLocation(), /*AsValue=*/false);
SourceLocation(), /*AsValue=*/false,
/*CmpXchg=*/true);
EmitAtomicUpdateValue(CGF, *this, OldRVal, UpdateOp, NewAtomicAddr);
auto *DesiredVal = CGF.Builder.CreateLoad(NewAtomicIntAddr);
// Try to write new value using cmpxchg operation.
@ -1869,7 +1879,7 @@ void AtomicInfo::EmitAtomicUpdateOp(llvm::AtomicOrdering AO, RValue UpdateRVal,
auto Failure = llvm::AtomicCmpXchgInst::getStrongestFailureOrdering(AO);
// Do the atomic load.
auto *OldVal = EmitAtomicLoadOp(Failure, IsVolatile);
auto *OldVal = EmitAtomicLoadOp(Failure, IsVolatile, /*CmpXchg=*/true);
// For non-simple lvalues perform compare-and-swap procedure.
auto *ContBB = CGF.createBasicBlock("atomic_cont");
auto *ExitBB = CGF.createBasicBlock("atomic_exit");
@ -1969,21 +1979,16 @@ void CodeGenFunction::EmitAtomicStore(RValue rvalue, LValue dest,
}
// Okay, we're doing this natively.
llvm::Value *ValToStore =
atomics.convertRValueToInt(rvalue, /*CastFP=*/false);
llvm::Value *ValToStore = atomics.convertRValueToInt(rvalue);
// Do the atomic store.
Address Addr = atomics.getAtomicAddress();
bool ShouldCastToInt = true;
if (llvm::Value *Value = atomics.getScalarRValValueOrNull(rvalue))
if (isa<llvm::IntegerType>(Value->getType()) ||
Value->getType()->isIEEELikeFPTy())
ShouldCastToInt = false;
if (ShouldCastToInt) {
Addr = atomics.castToAtomicIntPointer(Addr);
ValToStore = Builder.CreateIntCast(ValToStore, Addr.getElementType(),
/*isSigned=*/false);
}
if (shouldCastToInt(Value->getType(), /*CmpXchg=*/false)) {
Addr = atomics.castToAtomicIntPointer(Addr);
ValToStore = Builder.CreateIntCast(ValToStore, Addr.getElementType(),
/*isSigned=*/false);
}
llvm::StoreInst *store = Builder.CreateStore(ValToStore, Addr);
if (AO == llvm::AtomicOrdering::Acquire)

View File

@ -1,10 +1,14 @@
// RUN: %clang_cc1 %s -emit-llvm -o - -triple=i686-apple-darwin9 | FileCheck %s
// RUN: %clang_cc1 %s -emit-llvm -o - -triple=i686-apple-darwin9 | FileCheck %s --check-prefixes=CHECK,X86
// RUN: %clang_cc1 %s -emit-llvm -o - -triple=s390x-linux-gnu | FileCheck %s --check-prefixes=CHECK,SYSTEMZ
// CHECK: @[[NONSTATIC_GLOB_POINTER_FROM_INT:.+]] = global ptr null
// CHECK: @[[GLOB_POINTER:.+]] = internal global ptr null
// CHECK: @[[GLOB_POINTER_FROM_INT:.+]] = internal global ptr null
// CHECK: @[[GLOB_INT:.+]] = internal global i32 0
// CHECK: @[[GLOB_FLT:.+]] = internal global float {{[0e\+-\.]+}}, align
// CHECK: @[[GLOB_DBL:.+]] = internal global double {{[0e\+-\.]+}}, align
// X86: @[[GLOB_LONGDBL:.+]] = internal global x86_fp80 {{[0xK]+}}, align
// SYSTEMZ: @[[GLOB_LONGDBL:.+]] = internal global fp128 {{[0xL]+}}, align
int atomic(void) {
// non-sensical test for sync functions
@ -79,8 +83,10 @@ int atomic(void) {
// CHECK: atomicrmw nand ptr %valc, i8 6 seq_cst, align 1
__sync_val_compare_and_swap((void **)0, (void *)0, (void *)0);
// CHECK: [[PAIR:%[a-z0-9_.]+]] = cmpxchg ptr null, i32 0, i32 0 seq_cst seq_cst, align 4
// CHECK: extractvalue { i32, i1 } [[PAIR]], 0
// X86: [[PAIR:%[a-z0-9_.]+]] = cmpxchg ptr null, i32 0, i32 0 seq_cst seq_cst, align 4
// X86-NEXT: extractvalue { i32, i1 } [[PAIR]], 0
// SYSTEMZ: [[PAIR:%[a-z0-9_.]+]] = cmpxchg ptr null, i64 0, i64 0 seq_cst seq_cst, align 8
// SYSTEMZ-NEXT: extractvalue { i64, i1 } [[PAIR]], 0
if ( __sync_val_compare_and_swap(&valb, 0, 1)) {
// CHECK: [[PAIR:%[a-z0-9_.]+]] = cmpxchg ptr %valb, i8 0, i8 1 seq_cst seq_cst, align 1
@ -90,13 +96,15 @@ int atomic(void) {
}
__sync_bool_compare_and_swap((void **)0, (void *)0, (void *)0);
// CHECK: cmpxchg ptr null, i32 0, i32 0 seq_cst seq_cst, align 4
// X86: cmpxchg ptr null, i32 0, i32 0 seq_cst seq_cst, align 4
// SYSTEMZ: cmpxchg ptr null, i64 0, i64 0 seq_cst seq_cst, align 8
__sync_lock_release(&val);
// CHECK: store atomic i32 0, {{.*}} release, align 4
__sync_lock_release(&ptrval);
// CHECK: store atomic i32 0, {{.*}} release, align 4
// X86: store atomic i32 0, {{.*}} release, align 4
// SYSTEMZ: store atomic i64 0, {{.*}} release, align 8
__sync_synchronize ();
// CHECK: fence seq_cst
@ -131,19 +139,25 @@ static _Atomic(int *) glob_pointer_from_int = 0;
_Atomic(int *) nonstatic_glob_pointer_from_int = 0LL;
static _Atomic int glob_int = 0;
static _Atomic float glob_flt = 0.0f;
static _Atomic double glob_dbl = 0.0f;
static _Atomic long double glob_longdbl = 0.0f;
void force_global_uses(void) {
// X86: %atomic-temp = alloca x86_fp80, align 16
(void)glob_pointer;
// CHECK: %[[LOCAL_INT:.+]] = load atomic i32, ptr @[[GLOB_POINTER]] seq_cst
// CHECK-NEXT: inttoptr i32 %[[LOCAL_INT]] to ptr
// CHECK: load atomic ptr, ptr @[[GLOB_POINTER]] seq_cst
(void)glob_pointer_from_int;
// CHECK: %[[LOCAL_INT_2:.+]] = load atomic i32, ptr @[[GLOB_POINTER_FROM_INT]] seq_cst
// CHECK-NEXT: inttoptr i32 %[[LOCAL_INT_2]] to ptr
// CHECK-NEXT: load atomic ptr, ptr @[[GLOB_POINTER_FROM_INT]] seq_cst
(void)nonstatic_glob_pointer_from_int;
// CHECK: %[[LOCAL_INT_3:.+]] = load atomic i32, ptr @[[NONSTATIC_GLOB_POINTER_FROM_INT]] seq_cst
// CHECK-NEXT: inttoptr i32 %[[LOCAL_INT_3]] to ptr
// CHECK-NEXT: load atomic ptr, ptr @[[NONSTATIC_GLOB_POINTER_FROM_INT]] seq_cst
(void)glob_int;
// CHECK: load atomic i32, ptr @[[GLOB_INT]] seq_cst
// CHECK-NEXT: load atomic i32, ptr @[[GLOB_INT]] seq_cst
(void)glob_flt;
// CHECK: load atomic float, ptr @[[GLOB_FLT]] seq_cst
// CHECK-NEXT: load atomic float, ptr @[[GLOB_FLT]] seq_cst
(void)glob_dbl;
// CHECK-NEXT: load atomic double, ptr @[[GLOB_DBL]] seq_cst
(void)glob_longdbl;
// X86: call void @__atomic_load(i32 noundef 16, ptr noundef @glob_longdbl, ptr noundef %atomic-temp
// X86-NEXT: %0 = load x86_fp80, ptr %atomic-temp, align 16
// SYSTEMZ: load atomic fp128, ptr @[[GLOB_LONGDBL]] seq_cst
}