[ClangFE] Improve handling of casting of atomic memory operations. (#86691)

- Factor out a shouldCastToInt() method. - Also pass through pointer type values to not be casted to integer. CC @uweigand
2024-04-02 18:52:57 +02:00 · 2024-04-02 18:52:57 +02:00 · 4d5e834c5b
commit 4d5e834c5b
parent 668c1ea4aa
2 changed files with 70 additions and 51 deletions
--- a/clang/lib/CodeGen/CGAtomic.cpp
+++ b/clang/lib/CodeGen/CGAtomic.cpp
@ -197,11 +197,11 @@ namespace {
    llvm::Value *getScalarRValValueOrNull(RValue RVal) const;

    /// Converts an rvalue to integer value if needed.
-    llvm::Value *convertRValueToInt(RValue RVal, bool CastFP = true) const;
+    llvm::Value *convertRValueToInt(RValue RVal, bool CmpXchg = false) const;

    RValue ConvertToValueOrAtomic(llvm::Value *IntVal, AggValueSlot ResultSlot,
                                  SourceLocation Loc, bool AsValue,
-                                  bool CastFP = true) const;
+                                  bool CmpXchg = false) const;

    /// Copy an atomic r-value into atomic-layout memory.
    void emitCopyIntoMemory(RValue rvalue) const;
@ -264,7 +264,7 @@ namespace {
                               llvm::AtomicOrdering AO, bool IsVolatile);
    /// Emits atomic load as LLVM instruction.
    llvm::Value *EmitAtomicLoadOp(llvm::AtomicOrdering AO, bool IsVolatile,
-                                  bool CastFP = true);
+                                  bool CmpXchg = false);
    /// Emits atomic compare-and-exchange op as a libcall.
    llvm::Value *EmitAtomicCompareExchangeLibcall(
        llvm::Value *ExpectedAddr, llvm::Value *DesiredAddr,
@ -1401,13 +1401,26 @@ RValue AtomicInfo::convertAtomicTempToRValue(Address addr,
      LVal.getBaseInfo(), TBAAAccessInfo()));
 }

+/// Return true if \param ValTy is a type that should be casted to integer
+/// around the atomic memory operation. If \param CmpXchg is true, then the
+/// cast of a floating point type is made as that instruction can not have
+/// floating point operands.  TODO: Allow compare-and-exchange and FP - see
+/// comment in AtomicExpandPass.cpp.
+static bool shouldCastToInt(llvm::Type *ValTy, bool CmpXchg) {
+  if (ValTy->isFloatingPointTy())
+    return ValTy->isX86_FP80Ty() || CmpXchg;
+  return !ValTy->isIntegerTy() && !ValTy->isPointerTy();
+}
+
 RValue AtomicInfo::ConvertToValueOrAtomic(llvm::Value *Val,
                                          AggValueSlot ResultSlot,
                                          SourceLocation Loc, bool AsValue,
-                                          bool CastFP) const {
+                                          bool CmpXchg) const {
  // Try not to in some easy cases.
-  assert((Val->getType()->isIntegerTy() || Val->getType()->isIEEELikeFPTy()) &&
-         "Expected integer or floating point value");
+  assert((Val->getType()->isIntegerTy() || Val->getType()->isPointerTy() ||
+          Val->getType()->isIEEELikeFPTy()) &&
+         "Expected integer, pointer or floating point value when converting "
+         "result.");
  if (getEvaluationKind() == TEK_Scalar &&
      (((!LVal.isBitField() ||
         LVal.getBitFieldInfo().Size == ValueSizeInBits) &&
@ -1416,13 +1429,12 @@ RValue AtomicInfo::ConvertToValueOrAtomic(llvm::Value *Val,
    auto *ValTy = AsValue
                      ? CGF.ConvertTypeForMem(ValueTy)
                      : getAtomicAddress().getElementType();
-    if (ValTy->isIntegerTy() || (!CastFP && ValTy->isIEEELikeFPTy())) {
+    if (!shouldCastToInt(ValTy, CmpXchg)) {
      assert((!ValTy->isIntegerTy() || Val->getType() == ValTy) &&
             "Different integer types.");
      return RValue::get(CGF.EmitFromMemory(Val, ValueTy));
-    } else if (ValTy->isPointerTy())
-      return RValue::get(CGF.Builder.CreateIntToPtr(Val, ValTy));
-    else if (llvm::CastInst::isBitCastable(Val->getType(), ValTy))
+    }
+    if (llvm::CastInst::isBitCastable(Val->getType(), ValTy))
      return RValue::get(CGF.Builder.CreateBitCast(Val, ValTy));
  }

@ -1459,10 +1471,10 @@ void AtomicInfo::EmitAtomicLoadLibcall(llvm::Value *AddForLoaded,
 }

 llvm::Value *AtomicInfo::EmitAtomicLoadOp(llvm::AtomicOrdering AO,
-                                          bool IsVolatile, bool CastFP) {
+                                          bool IsVolatile, bool CmpXchg) {
  // Okay, we're doing this natively.
  Address Addr = getAtomicAddress();
-  if (!(Addr.getElementType()->isIEEELikeFPTy() && !CastFP))
+  if (shouldCastToInt(Addr.getElementType(), CmpXchg))
    Addr = castToAtomicIntPointer(Addr);
  llvm::LoadInst *Load = CGF.Builder.CreateLoad(Addr, "atomic-load");
  Load->setAtomic(AO);
@ -1523,7 +1535,7 @@ RValue AtomicInfo::EmitAtomicLoad(AggValueSlot ResultSlot, SourceLocation Loc,
  }

  // Okay, we're doing this natively.
-  auto *Load = EmitAtomicLoadOp(AO, IsVolatile, /*CastFP=*/false);
+  auto *Load = EmitAtomicLoadOp(AO, IsVolatile);

  // If we're ignoring an aggregate return, don't do anything.
  if (getEvaluationKind() == TEK_Aggregate && ResultSlot.isIgnored())
@ -1531,8 +1543,7 @@ RValue AtomicInfo::EmitAtomicLoad(AggValueSlot ResultSlot, SourceLocation Loc,

  // Okay, turn that back into the original value or atomic (for non-simple
  // lvalues) type.
-  return ConvertToValueOrAtomic(Load, ResultSlot, Loc, AsValue,
-                                /*CastFP=*/false);
+  return ConvertToValueOrAtomic(Load, ResultSlot, Loc, AsValue);
 }

 /// Emit a load from an l-value of atomic type.  Note that the r-value
@ -1601,20 +1612,17 @@ llvm::Value *AtomicInfo::getScalarRValValueOrNull(RValue RVal) const {
  return nullptr;
 }

-llvm::Value *AtomicInfo::convertRValueToInt(RValue RVal, bool CastFP) const {
+llvm::Value *AtomicInfo::convertRValueToInt(RValue RVal, bool CmpXchg) const {
  // If we've got a scalar value of the right size, try to avoid going
  // through memory. Floats get casted if needed by AtomicExpandPass.
  if (llvm::Value *Value = getScalarRValValueOrNull(RVal)) {
-    if (isa<llvm::IntegerType>(Value->getType()) ||
-        (!CastFP && Value->getType()->isIEEELikeFPTy()))
+    if (!shouldCastToInt(Value->getType(), CmpXchg))
      return CGF.EmitToMemory(Value, ValueTy);
    else {
      llvm::IntegerType *InputIntTy = llvm::IntegerType::get(
          CGF.getLLVMContext(),
          LVal.isSimple() ? getValueSizeInBits() : getAtomicSizeInBits());
-      if (isa<llvm::PointerType>(Value->getType()))
-        return CGF.Builder.CreatePtrToInt(Value, InputIntTy);
-      else if (llvm::BitCastInst::isBitCastable(Value->getType(), InputIntTy))
+      if (llvm::BitCastInst::isBitCastable(Value->getType(), InputIntTy))
        return CGF.Builder.CreateBitCast(Value, InputIntTy);
    }
  }
@ -1687,13 +1695,14 @@ std::pair<RValue, llvm::Value *> AtomicInfo::EmitAtomicCompareExchange(

  // If we've got a scalar value of the right size, try to avoid going
  // through memory.
-  auto *ExpectedVal = convertRValueToInt(Expected);
-  auto *DesiredVal = convertRValueToInt(Desired);
+  auto *ExpectedVal = convertRValueToInt(Expected, /*CmpXchg=*/true);
+  auto *DesiredVal = convertRValueToInt(Desired, /*CmpXchg=*/true);
  auto Res = EmitAtomicCompareExchangeOp(ExpectedVal, DesiredVal, Success,
                                         Failure, IsWeak);
  return std::make_pair(
      ConvertToValueOrAtomic(Res.first, AggValueSlot::ignored(),
-                             SourceLocation(), /*AsValue=*/false),
+                             SourceLocation(), /*AsValue=*/false,
+                             /*CmpXchg=*/true),
      Res.second);
 }

@ -1787,7 +1796,7 @@ void AtomicInfo::EmitAtomicUpdateOp(
  auto Failure = llvm::AtomicCmpXchgInst::getStrongestFailureOrdering(AO);

  // Do the atomic load.
-  auto *OldVal = EmitAtomicLoadOp(Failure, IsVolatile);
+  auto *OldVal = EmitAtomicLoadOp(Failure, IsVolatile, /*CmpXchg=*/true);
  // For non-simple lvalues perform compare-and-swap procedure.
  auto *ContBB = CGF.createBasicBlock("atomic_cont");
  auto *ExitBB = CGF.createBasicBlock("atomic_exit");
@ -1803,7 +1812,8 @@ void AtomicInfo::EmitAtomicUpdateOp(
    CGF.Builder.CreateStore(PHI, NewAtomicIntAddr);
  }
  auto OldRVal = ConvertToValueOrAtomic(PHI, AggValueSlot::ignored(),
-                                        SourceLocation(), /*AsValue=*/false);
+                                        SourceLocation(), /*AsValue=*/false,
+                                        /*CmpXchg=*/true);
  EmitAtomicUpdateValue(CGF, *this, OldRVal, UpdateOp, NewAtomicAddr);
  auto *DesiredVal = CGF.Builder.CreateLoad(NewAtomicIntAddr);
  // Try to write new value using cmpxchg operation.
@ -1869,7 +1879,7 @@ void AtomicInfo::EmitAtomicUpdateOp(llvm::AtomicOrdering AO, RValue UpdateRVal,
  auto Failure = llvm::AtomicCmpXchgInst::getStrongestFailureOrdering(AO);

  // Do the atomic load.
-  auto *OldVal = EmitAtomicLoadOp(Failure, IsVolatile);
+  auto *OldVal = EmitAtomicLoadOp(Failure, IsVolatile, /*CmpXchg=*/true);
  // For non-simple lvalues perform compare-and-swap procedure.
  auto *ContBB = CGF.createBasicBlock("atomic_cont");
  auto *ExitBB = CGF.createBasicBlock("atomic_exit");
@ -1969,21 +1979,16 @@ void CodeGenFunction::EmitAtomicStore(RValue rvalue, LValue dest,
    }

    // Okay, we're doing this natively.
-    llvm::Value *ValToStore =
-        atomics.convertRValueToInt(rvalue, /*CastFP=*/false);
+    llvm::Value *ValToStore = atomics.convertRValueToInt(rvalue);

    // Do the atomic store.
    Address Addr = atomics.getAtomicAddress();
-    bool ShouldCastToInt = true;
    if (llvm::Value *Value = atomics.getScalarRValValueOrNull(rvalue))
-      if (isa<llvm::IntegerType>(Value->getType()) ||
-          Value->getType()->isIEEELikeFPTy())
-        ShouldCastToInt = false;
-    if (ShouldCastToInt) {
-      Addr = atomics.castToAtomicIntPointer(Addr);
-      ValToStore = Builder.CreateIntCast(ValToStore, Addr.getElementType(),
-                                         /*isSigned=*/false);
-    }
+      if (shouldCastToInt(Value->getType(), /*CmpXchg=*/false)) {
+        Addr = atomics.castToAtomicIntPointer(Addr);
+        ValToStore = Builder.CreateIntCast(ValToStore, Addr.getElementType(),
+                                           /*isSigned=*/false);
+      }
    llvm::StoreInst *store = Builder.CreateStore(ValToStore, Addr);

    if (AO == llvm::AtomicOrdering::Acquire)
--- a/clang/test/CodeGen/atomic.c
+++ b/clang/test/CodeGen/atomic.c
@ -1,10 +1,14 @@
-// RUN: %clang_cc1 %s -emit-llvm -o - -triple=i686-apple-darwin9 | FileCheck %s
+// RUN: %clang_cc1 %s -emit-llvm -o - -triple=i686-apple-darwin9 | FileCheck %s --check-prefixes=CHECK,X86
+// RUN: %clang_cc1 %s -emit-llvm -o - -triple=s390x-linux-gnu | FileCheck %s --check-prefixes=CHECK,SYSTEMZ

 // CHECK: @[[NONSTATIC_GLOB_POINTER_FROM_INT:.+]] = global ptr null
 // CHECK: @[[GLOB_POINTER:.+]] = internal global ptr null
 // CHECK: @[[GLOB_POINTER_FROM_INT:.+]] = internal global ptr null
 // CHECK: @[[GLOB_INT:.+]] = internal global i32 0
 // CHECK: @[[GLOB_FLT:.+]] = internal global float {{[0e\+-\.]+}}, align
+// CHECK: @[[GLOB_DBL:.+]] = internal global double {{[0e\+-\.]+}}, align
+// X86:   @[[GLOB_LONGDBL:.+]] = internal global x86_fp80 {{[0xK]+}}, align
+// SYSTEMZ: @[[GLOB_LONGDBL:.+]] = internal global fp128 {{[0xL]+}}, align

 int atomic(void) {
  // non-sensical test for sync functions
@ -79,8 +83,10 @@ int atomic(void) {
  // CHECK: atomicrmw nand ptr %valc, i8 6 seq_cst, align 1
 
  __sync_val_compare_and_swap((void **)0, (void *)0, (void *)0);
-  // CHECK: [[PAIR:%[a-z0-9_.]+]] = cmpxchg ptr null, i32 0, i32 0 seq_cst seq_cst, align 4
-  // CHECK: extractvalue { i32, i1 } [[PAIR]], 0
+  // X86:      [[PAIR:%[a-z0-9_.]+]] = cmpxchg ptr null, i32 0, i32 0 seq_cst seq_cst, align 4
+  // X86-NEXT: extractvalue { i32, i1 } [[PAIR]], 0
+  // SYSTEMZ:      [[PAIR:%[a-z0-9_.]+]] = cmpxchg ptr null, i64 0, i64 0 seq_cst seq_cst, align 8
+  // SYSTEMZ-NEXT: extractvalue { i64, i1 } [[PAIR]], 0

  if ( __sync_val_compare_and_swap(&valb, 0, 1)) {
    // CHECK: [[PAIR:%[a-z0-9_.]+]] = cmpxchg ptr %valb, i8 0, i8 1 seq_cst seq_cst, align 1
@ -90,13 +96,15 @@ int atomic(void) {
  }
  
  __sync_bool_compare_and_swap((void **)0, (void *)0, (void *)0);
-  // CHECK: cmpxchg ptr null, i32 0, i32 0 seq_cst seq_cst, align 4
+  // X86:     cmpxchg ptr null, i32 0, i32 0 seq_cst seq_cst, align 4
+  // SYSTEMZ: cmpxchg ptr null, i64 0, i64 0 seq_cst seq_cst, align 8
  
  __sync_lock_release(&val);
  // CHECK: store atomic i32 0, {{.*}} release, align 4

  __sync_lock_release(&ptrval);
-  // CHECK: store atomic i32 0, {{.*}} release, align 4
+  // X86:     store atomic i32 0, {{.*}} release, align 4
+  // SYSTEMZ: store atomic i64 0, {{.*}} release, align 8

  __sync_synchronize ();
  // CHECK: fence seq_cst
@ -131,19 +139,25 @@ static _Atomic(int *) glob_pointer_from_int = 0;
 _Atomic(int *) nonstatic_glob_pointer_from_int = 0LL;
 static _Atomic int glob_int = 0;
 static _Atomic float glob_flt = 0.0f;
+static _Atomic double glob_dbl = 0.0f;
+static _Atomic long double glob_longdbl = 0.0f;

 void force_global_uses(void) {
+  // X86:   %atomic-temp = alloca x86_fp80, align 16
  (void)glob_pointer;
-  // CHECK: %[[LOCAL_INT:.+]] = load atomic i32, ptr @[[GLOB_POINTER]] seq_cst
-  // CHECK-NEXT: inttoptr i32 %[[LOCAL_INT]] to ptr
+  // CHECK: load atomic ptr, ptr @[[GLOB_POINTER]] seq_cst
  (void)glob_pointer_from_int;
-  // CHECK: %[[LOCAL_INT_2:.+]] = load atomic i32, ptr @[[GLOB_POINTER_FROM_INT]] seq_cst
-  // CHECK-NEXT: inttoptr i32 %[[LOCAL_INT_2]] to ptr
+  // CHECK-NEXT: load atomic ptr, ptr @[[GLOB_POINTER_FROM_INT]] seq_cst
  (void)nonstatic_glob_pointer_from_int;
-  // CHECK: %[[LOCAL_INT_3:.+]] = load atomic i32, ptr @[[NONSTATIC_GLOB_POINTER_FROM_INT]] seq_cst
-  // CHECK-NEXT: inttoptr i32 %[[LOCAL_INT_3]] to ptr
+  // CHECK-NEXT: load atomic ptr, ptr @[[NONSTATIC_GLOB_POINTER_FROM_INT]] seq_cst
  (void)glob_int;
-  // CHECK: load atomic i32, ptr @[[GLOB_INT]] seq_cst
+  // CHECK-NEXT: load atomic i32, ptr @[[GLOB_INT]] seq_cst
  (void)glob_flt;
-  // CHECK: load atomic float, ptr @[[GLOB_FLT]] seq_cst
+  // CHECK-NEXT: load atomic float, ptr @[[GLOB_FLT]] seq_cst
+  (void)glob_dbl;
+  // CHECK-NEXT: load atomic double, ptr @[[GLOB_DBL]] seq_cst
+  (void)glob_longdbl;
+  // X86:      call void @__atomic_load(i32 noundef 16, ptr noundef @glob_longdbl, ptr noundef %atomic-temp
+  // X86-NEXT: %0 = load x86_fp80, ptr %atomic-temp, align 16
+  // SYSTEMZ: load atomic fp128, ptr @[[GLOB_LONGDBL]] seq_cst
 }