llvm-project/llvm/test/CodeGen/PowerPC/atomic-compare-exchange-weak.ll
zhijian lin 85a9f2e148
[PowerPC] enable AtomicExpandImpl::expandAtomicCmpXchg for powerpc (#142395)
In PowerPC, the AtomicCmpXchgInst is lowered to
ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS. However, this node does not handle
the weak attribute of AtomicCmpXchgInst. As a result, when compiling C++
atomic_compare_exchange_weak_explicit, the generated assembly includes a
"reservation lost" loop — i.e., it branches back and retries if the
stwcx. (store-conditional) fails. This differs from GCC’s codegen, which
does not include that loop for weak compare-exchange.

Since PowerPC uses LL/SC-style atomic instructions, the patch enables
AtomicExpandImpl::expandAtomicCmpXchg for PowerPC. With this, the weak
attribute is properly respected, and the "reservation lost" loop is
removed for weak operations.

---------

Co-authored-by: Matt Arsenault <arsenm2@gmail.com>
2025-06-13 09:14:48 -04:00

101 lines
3.7 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; Test the generation of asm for the function:
; int foo(_Atomic int *cp, int *old, int c) {
; return atomic_compare_exchange_weak_explicit(cp, old, c, __ATOMIC_RELAXED, __ATOMIC_RELAXED);
; }
; RUN: llc < %s -ppc-asm-full-reg-names -mtriple=powerpc-ibm-aix -mcpu=pwr8 -verify-machineinstrs \
; RUN: | FileCheck %s --check-prefix=CHECK
; RUN: llc < %s -ppc-asm-full-reg-names -mtriple=powerpc64-ibm-aix -mcpu=pwr8 -verify-machineinstrs \
; RUN: | FileCheck %s --check-prefix=CHECK64
define i32 @foo(ptr noundef %cp, ptr noundef %old, i32 noundef %c) {
; CHECK-LABEL: foo:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: stw r3, -4(r1)
; CHECK-NEXT: stw r4, -8(r1)
; CHECK-NEXT: lwz r7, 0(r4)
; CHECK-NEXT: stw r5, -12(r1)
; CHECK-NEXT: stw r5, -16(r1)
; CHECK-NEXT: lwarx r6, 0, r3
; CHECK-NEXT: cmplw r6, r7
; CHECK-NEXT: bne cr0, L..BB0_2
; CHECK-NEXT: # %bb.1: # %cmpxchg.fencedstore
; CHECK-NEXT: stwcx. r5, 0, r3
; CHECK-NEXT: beq cr0, L..BB0_5
; CHECK-NEXT: L..BB0_2: # %cmpxchg.failure
; CHECK-NEXT: crxor 4*cr5+lt, 4*cr5+lt, 4*cr5+lt
; CHECK-NEXT: # %bb.3: # %cmpxchg.store_expected
; CHECK-NEXT: stw r6, 0(r4)
; CHECK-NEXT: L..BB0_4: # %cmpxchg.continue
; CHECK-NEXT: li r3, 0
; CHECK-NEXT: li r4, 1
; CHECK-NEXT: isel r3, r4, r3, 4*cr5+lt
; CHECK-NEXT: stb r3, -17(r1)
; CHECK-NEXT: blr
; CHECK-NEXT: L..BB0_5:
; CHECK-NEXT: creqv 4*cr5+lt, 4*cr5+lt, 4*cr5+lt
; CHECK-NEXT: b L..BB0_4
;
; CHECK64-LABEL: foo:
; CHECK64: # %bb.0: # %entry
; CHECK64-NEXT: std r3, -8(r1)
; CHECK64-NEXT: std r4, -16(r1)
; CHECK64-NEXT: lwz r7, 0(r4)
; CHECK64-NEXT: stw r5, -20(r1)
; CHECK64-NEXT: stw r5, -24(r1)
; CHECK64-NEXT: lwarx r6, 0, r3
; CHECK64-NEXT: cmplw r6, r7
; CHECK64-NEXT: bne cr0, L..BB0_2
; CHECK64-NEXT: # %bb.1: # %cmpxchg.fencedstore
; CHECK64-NEXT: stwcx. r5, 0, r3
; CHECK64-NEXT: beq cr0, L..BB0_5
; CHECK64-NEXT: L..BB0_2: # %cmpxchg.failure
; CHECK64-NEXT: crxor 4*cr5+lt, 4*cr5+lt, 4*cr5+lt
; CHECK64-NEXT: # %bb.3: # %cmpxchg.store_expected
; CHECK64-NEXT: stw r6, 0(r4)
; CHECK64-NEXT: L..BB0_4: # %cmpxchg.continue
; CHECK64-NEXT: li r3, 0
; CHECK64-NEXT: li r4, 1
; CHECK64-NEXT: isel r3, r4, r3, 4*cr5+lt
; CHECK64-NEXT: li r4, 1
; CHECK64-NEXT: stb r3, -25(r1)
; CHECK64-NEXT: li r3, 0
; CHECK64-NEXT: isel r3, r4, r3, 4*cr5+lt
; CHECK64-NEXT: blr
; CHECK64-NEXT: L..BB0_5:
; CHECK64-NEXT: creqv 4*cr5+lt, 4*cr5+lt, 4*cr5+lt
; CHECK64-NEXT: b L..BB0_4
entry:
%cp.addr = alloca ptr, align 4
%old.addr = alloca ptr, align 4
%c.addr = alloca i32, align 4
%.atomictmp = alloca i32, align 4
%cmpxchg.bool = alloca i8, align 1
store ptr %cp, ptr %cp.addr, align 4
store ptr %old, ptr %old.addr, align 4
store i32 %c, ptr %c.addr, align 4
%0 = load ptr, ptr %cp.addr, align 4
%1 = load ptr, ptr %old.addr, align 4
%2 = load i32, ptr %c.addr, align 4
store i32 %2, ptr %.atomictmp, align 4
%3 = load i32, ptr %1, align 4
%4 = load i32, ptr %.atomictmp, align 4
%5 = cmpxchg weak ptr %0, i32 %3, i32 %4 monotonic monotonic, align 4
%6 = extractvalue { i32, i1 } %5, 0
%7 = extractvalue { i32, i1 } %5, 1
br i1 %7, label %cmpxchg.continue, label %cmpxchg.store_expected
cmpxchg.store_expected: ; preds = %entry
store i32 %6, ptr %1, align 4
br label %cmpxchg.continue
cmpxchg.continue: ; preds = %cmpxchg.store_expected, %entry
%storedv = zext i1 %7 to i8
store i8 %storedv, ptr %cmpxchg.bool, align 1
%8 = load i8, ptr %cmpxchg.bool, align 1
%loadedv = trunc i8 %8 to i1
%conv = zext i1 %loadedv to i32
ret i32 %conv
}