zhijian lin 85a9f2e148
[PowerPC] enable AtomicExpandImpl::expandAtomicCmpXchg for powerpc (#142395)
In PowerPC, the AtomicCmpXchgInst is lowered to
ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS. However, this node does not handle
the weak attribute of AtomicCmpXchgInst. As a result, when compiling C++
atomic_compare_exchange_weak_explicit, the generated assembly includes a
"reservation lost" loop — i.e., it branches back and retries if the
stwcx. (store-conditional) fails. This differs from GCC’s codegen, which
does not include that loop for weak compare-exchange.

Since PowerPC uses LL/SC-style atomic instructions, the patch enables
AtomicExpandImpl::expandAtomicCmpXchg for PowerPC. With this, the weak
attribute is properly respected, and the "reservation lost" loop is
removed for weak operations.

---------

Co-authored-by: Matt Arsenault <arsenm2@gmail.com>
2025-06-13 09:14:48 -04:00

92 lines
4.4 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -verify-machineinstrs -mcpu=ppc -mtriple=powerpc64-unknown-unknown \
; RUN: < %s | FileCheck --check-prefix=CHECK-64 %s
; RUN: llc -verify-machineinstrs -mcpu=ppc -mtriple=powerpc-unknown-unknown \
; RUN: < %s | FileCheck --check-prefix=CHECK-32 %s
define float @test_add(ptr %ptr, float %incr) {
; CHECK-64-LABEL: test_add:
; CHECK-64: # %bb.0: # %entry
; CHECK-64-NEXT: sync
; CHECK-64-NEXT: lfs 0, 0(3)
; CHECK-64-NEXT: b .LBB0_3
; CHECK-64-NEXT: .LBB0_1: # %cmpxchg.nostore
; CHECK-64-NEXT: # in Loop: Header=BB0_3 Depth=1
; CHECK-64-NEXT: crxor 20, 20, 20
; CHECK-64-NEXT: .LBB0_2: # %cmpxchg.end
; CHECK-64-NEXT: # in Loop: Header=BB0_3 Depth=1
; CHECK-64-NEXT: stw 4, -12(1)
; CHECK-64-NEXT: lfs 0, -12(1)
; CHECK-64-NEXT: bc 12, 20, .LBB0_7
; CHECK-64-NEXT: .LBB0_3: # %atomicrmw.start
; CHECK-64-NEXT: # =>This Loop Header: Depth=1
; CHECK-64-NEXT: # Child Loop BB0_4 Depth 2
; CHECK-64-NEXT: fadds 2, 0, 1
; CHECK-64-NEXT: stfs 2, -4(1)
; CHECK-64-NEXT: stfs 0, -8(1)
; CHECK-64-NEXT: lwz 5, -4(1)
; CHECK-64-NEXT: lwz 6, -8(1)
; CHECK-64-NEXT: .LBB0_4: # %cmpxchg.start
; CHECK-64-NEXT: # Parent Loop BB0_3 Depth=1
; CHECK-64-NEXT: # => This Inner Loop Header: Depth=2
; CHECK-64-NEXT: lwarx 4, 0, 3
; CHECK-64-NEXT: cmplw 4, 6
; CHECK-64-NEXT: bne 0, .LBB0_1
; CHECK-64-NEXT: # %bb.5: # %cmpxchg.fencedstore
; CHECK-64-NEXT: # in Loop: Header=BB0_4 Depth=2
; CHECK-64-NEXT: stwcx. 5, 0, 3
; CHECK-64-NEXT: bne 0, .LBB0_4
; CHECK-64-NEXT: # %bb.6: # in Loop: Header=BB0_3 Depth=1
; CHECK-64-NEXT: creqv 20, 20, 20
; CHECK-64-NEXT: b .LBB0_2
; CHECK-64-NEXT: .LBB0_7: # %atomicrmw.end
; CHECK-64-NEXT: fmr 1, 0
; CHECK-64-NEXT: lwsync
; CHECK-64-NEXT: blr
;
; CHECK-32-LABEL: test_add:
; CHECK-32: # %bb.0: # %entry
; CHECK-32-NEXT: stwu 1, -32(1)
; CHECK-32-NEXT: .cfi_def_cfa_offset 32
; CHECK-32-NEXT: sync
; CHECK-32-NEXT: lfs 0, 0(3)
; CHECK-32-NEXT: b .LBB0_3
; CHECK-32-NEXT: .LBB0_1: # %cmpxchg.nostore
; CHECK-32-NEXT: # in Loop: Header=BB0_3 Depth=1
; CHECK-32-NEXT: crxor 20, 20, 20
; CHECK-32-NEXT: .LBB0_2: # %cmpxchg.end
; CHECK-32-NEXT: # in Loop: Header=BB0_3 Depth=1
; CHECK-32-NEXT: stw 4, 20(1)
; CHECK-32-NEXT: lfs 0, 20(1)
; CHECK-32-NEXT: bc 12, 20, .LBB0_7
; CHECK-32-NEXT: .LBB0_3: # %atomicrmw.start
; CHECK-32-NEXT: # =>This Loop Header: Depth=1
; CHECK-32-NEXT: # Child Loop BB0_4 Depth 2
; CHECK-32-NEXT: fadds 2, 0, 1
; CHECK-32-NEXT: stfs 2, 28(1)
; CHECK-32-NEXT: stfs 0, 24(1)
; CHECK-32-NEXT: lwz 5, 28(1)
; CHECK-32-NEXT: lwz 6, 24(1)
; CHECK-32-NEXT: .LBB0_4: # %cmpxchg.start
; CHECK-32-NEXT: # Parent Loop BB0_3 Depth=1
; CHECK-32-NEXT: # => This Inner Loop Header: Depth=2
; CHECK-32-NEXT: lwarx 4, 0, 3
; CHECK-32-NEXT: cmplw 4, 6
; CHECK-32-NEXT: bne 0, .LBB0_1
; CHECK-32-NEXT: # %bb.5: # %cmpxchg.fencedstore
; CHECK-32-NEXT: # in Loop: Header=BB0_4 Depth=2
; CHECK-32-NEXT: stwcx. 5, 0, 3
; CHECK-32-NEXT: bne 0, .LBB0_4
; CHECK-32-NEXT: # %bb.6: # in Loop: Header=BB0_3 Depth=1
; CHECK-32-NEXT: creqv 20, 20, 20
; CHECK-32-NEXT: b .LBB0_2
; CHECK-32-NEXT: .LBB0_7: # %atomicrmw.end
; CHECK-32-NEXT: fmr 1, 0
; CHECK-32-NEXT: lwsync
; CHECK-32-NEXT: addi 1, 1, 32
; CHECK-32-NEXT: blr
entry:
%r = atomicrmw fadd ptr %ptr, float %incr seq_cst
ret float %r
}