Chen Zheng eb7d16ea25 [PowerPC] make expensive mflr be away from its user in the function prologue
mflr is kind of expensive on Power version smaller than 10, so we should
schedule the store for the mflr's def away from mflr.

In epilogue, the expensive mtlr has no user for its def, so it doesn't
matter that the load and the mtlr are back-to-back.

Reviewed By: RolandF

Differential Revision: https://reviews.llvm.org/D137423
2022-11-14 21:14:20 -05:00

1604 lines
52 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-unknown -mcpu=pwr8 \
; RUN: -ppc-asm-full-reg-names -ppc-quadword-atomics \
; RUN: -ppc-track-subreg-liveness < %s | FileCheck %s
; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-unknown -mcpu=pwr7 \
; RUN: -ppc-asm-full-reg-names -ppc-quadword-atomics \
; RUN: -ppc-track-subreg-liveness < %s | FileCheck --check-prefix=PWR7 %s
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 \
; RUN: -ppc-asm-full-reg-names -ppc-track-subreg-liveness < %s | FileCheck \
; RUN: --check-prefix=LE-PWR8 %s
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-freebsd -mcpu=pwr8 \
; RUN: -ppc-asm-full-reg-names -ppc-track-subreg-liveness < %s | FileCheck \
; RUN: --check-prefix=LE-PWR8 %s
; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-aix -mcpu=pwr8 \
; RUN: -ppc-asm-full-reg-names -ppc-track-subreg-liveness < %s | FileCheck \
; RUN: --check-prefix=AIX64-PWR8 %s
; On 32-bit PPC platform, 16-byte lock free atomic instructions are not available,
; it's expected not to generate inlined lock-free code on such platforms, even arch level
; is pwr8+ and `-ppc-quadword-atomics` is on.
; RUN: llc -verify-machineinstrs -mtriple=powerpc-unknown-unknown -mcpu=pwr8 \
; RUN: -ppc-quadword-atomics -ppc-asm-full-reg-names -ppc-track-subreg-liveness < %s \
; RUN: | FileCheck --check-prefix=PPC-PWR8 %s
define i128 @swap(ptr %a, i128 %x) {
; CHECK-LABEL: swap:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: sync
; CHECK-NEXT: .LBB0_1: # %entry
; CHECK-NEXT: #
; CHECK-NEXT: lqarx r6, 0, r3
; CHECK-NEXT: mr r9, r5
; CHECK-NEXT: mr r8, r4
; CHECK-NEXT: stqcx. r8, 0, r3
; CHECK-NEXT: bne cr0, .LBB0_1
; CHECK-NEXT: # %bb.2: # %entry
; CHECK-NEXT: lwsync
; CHECK-NEXT: mr r3, r6
; CHECK-NEXT: mr r4, r7
; CHECK-NEXT: blr
;
; PWR7-LABEL: swap:
; PWR7: # %bb.0: # %entry
; PWR7-NEXT: mflr r0
; PWR7-NEXT: stdu r1, -112(r1)
; PWR7-NEXT: std r0, 128(r1)
; PWR7-NEXT: .cfi_def_cfa_offset 112
; PWR7-NEXT: .cfi_offset lr, 16
; PWR7-NEXT: li r6, 5
; PWR7-NEXT: bl __atomic_exchange_16
; PWR7-NEXT: nop
; PWR7-NEXT: addi r1, r1, 112
; PWR7-NEXT: ld r0, 16(r1)
; PWR7-NEXT: mtlr r0
; PWR7-NEXT: blr
;
; LE-PWR8-LABEL: swap:
; LE-PWR8: # %bb.0: # %entry
; LE-PWR8-NEXT: sync
; LE-PWR8-NEXT: .LBB0_1: # %entry
; LE-PWR8-NEXT: #
; LE-PWR8-NEXT: lqarx r6, 0, r3
; LE-PWR8-NEXT: mr r9, r4
; LE-PWR8-NEXT: mr r8, r5
; LE-PWR8-NEXT: stqcx. r8, 0, r3
; LE-PWR8-NEXT: bne cr0, .LBB0_1
; LE-PWR8-NEXT: # %bb.2: # %entry
; LE-PWR8-NEXT: lwsync
; LE-PWR8-NEXT: mr r3, r7
; LE-PWR8-NEXT: mr r4, r6
; LE-PWR8-NEXT: blr
;
; AIX64-PWR8-LABEL: swap:
; AIX64-PWR8: # %bb.0: # %entry
; AIX64-PWR8-NEXT: mflr r0
; AIX64-PWR8-NEXT: stdu r1, -112(r1)
; AIX64-PWR8-NEXT: li r6, 5
; AIX64-PWR8-NEXT: std r0, 128(r1)
; AIX64-PWR8-NEXT: bl .__atomic_exchange_16[PR]
; AIX64-PWR8-NEXT: nop
; AIX64-PWR8-NEXT: addi r1, r1, 112
; AIX64-PWR8-NEXT: ld r0, 16(r1)
; AIX64-PWR8-NEXT: mtlr r0
; AIX64-PWR8-NEXT: blr
;
; PPC-PWR8-LABEL: swap:
; PPC-PWR8: # %bb.0: # %entry
; PPC-PWR8-NEXT: mflr r0
; PPC-PWR8-NEXT: stwu r1, -48(r1)
; PPC-PWR8-NEXT: stw r0, 52(r1)
; PPC-PWR8-NEXT: .cfi_def_cfa_offset 48
; PPC-PWR8-NEXT: .cfi_offset lr, 4
; PPC-PWR8-NEXT: mr r4, r3
; PPC-PWR8-NEXT: stw r7, 40(r1)
; PPC-PWR8-NEXT: stw r6, 36(r1)
; PPC-PWR8-NEXT: addi r6, r1, 16
; PPC-PWR8-NEXT: li r3, 16
; PPC-PWR8-NEXT: li r7, 5
; PPC-PWR8-NEXT: stw r5, 32(r1)
; PPC-PWR8-NEXT: addi r5, r1, 32
; PPC-PWR8-NEXT: stw r8, 44(r1)
; PPC-PWR8-NEXT: bl __atomic_exchange
; PPC-PWR8-NEXT: lwz r6, 28(r1)
; PPC-PWR8-NEXT: lwz r5, 24(r1)
; PPC-PWR8-NEXT: lwz r4, 20(r1)
; PPC-PWR8-NEXT: lwz r3, 16(r1)
; PPC-PWR8-NEXT: lwz r0, 52(r1)
; PPC-PWR8-NEXT: addi r1, r1, 48
; PPC-PWR8-NEXT: mtlr r0
; PPC-PWR8-NEXT: blr
entry:
%0 = atomicrmw xchg ptr %a, i128 %x seq_cst, align 16
ret i128 %0
}
define i128 @add(ptr %a, i128 %x) {
; CHECK-LABEL: add:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: sync
; CHECK-NEXT: .LBB1_1: # %entry
; CHECK-NEXT: #
; CHECK-NEXT: lqarx r6, 0, r3
; CHECK-NEXT: addc r9, r5, r7
; CHECK-NEXT: adde r8, r4, r6
; CHECK-NEXT: stqcx. r8, 0, r3
; CHECK-NEXT: bne cr0, .LBB1_1
; CHECK-NEXT: # %bb.2: # %entry
; CHECK-NEXT: lwsync
; CHECK-NEXT: mr r3, r6
; CHECK-NEXT: mr r4, r7
; CHECK-NEXT: blr
;
; PWR7-LABEL: add:
; PWR7: # %bb.0: # %entry
; PWR7-NEXT: mflr r0
; PWR7-NEXT: stdu r1, -112(r1)
; PWR7-NEXT: std r0, 128(r1)
; PWR7-NEXT: .cfi_def_cfa_offset 112
; PWR7-NEXT: .cfi_offset lr, 16
; PWR7-NEXT: li r6, 5
; PWR7-NEXT: bl __atomic_fetch_add_16
; PWR7-NEXT: nop
; PWR7-NEXT: addi r1, r1, 112
; PWR7-NEXT: ld r0, 16(r1)
; PWR7-NEXT: mtlr r0
; PWR7-NEXT: blr
;
; LE-PWR8-LABEL: add:
; LE-PWR8: # %bb.0: # %entry
; LE-PWR8-NEXT: sync
; LE-PWR8-NEXT: .LBB1_1: # %entry
; LE-PWR8-NEXT: #
; LE-PWR8-NEXT: lqarx r6, 0, r3
; LE-PWR8-NEXT: addc r9, r4, r7
; LE-PWR8-NEXT: adde r8, r5, r6
; LE-PWR8-NEXT: stqcx. r8, 0, r3
; LE-PWR8-NEXT: bne cr0, .LBB1_1
; LE-PWR8-NEXT: # %bb.2: # %entry
; LE-PWR8-NEXT: lwsync
; LE-PWR8-NEXT: mr r3, r7
; LE-PWR8-NEXT: mr r4, r6
; LE-PWR8-NEXT: blr
;
; AIX64-PWR8-LABEL: add:
; AIX64-PWR8: # %bb.0: # %entry
; AIX64-PWR8-NEXT: mflr r0
; AIX64-PWR8-NEXT: stdu r1, -112(r1)
; AIX64-PWR8-NEXT: li r6, 5
; AIX64-PWR8-NEXT: std r0, 128(r1)
; AIX64-PWR8-NEXT: bl .__atomic_fetch_add_16[PR]
; AIX64-PWR8-NEXT: nop
; AIX64-PWR8-NEXT: addi r1, r1, 112
; AIX64-PWR8-NEXT: ld r0, 16(r1)
; AIX64-PWR8-NEXT: mtlr r0
; AIX64-PWR8-NEXT: blr
;
; PPC-PWR8-LABEL: add:
; PPC-PWR8: # %bb.0: # %entry
; PPC-PWR8-NEXT: mflr r0
; PPC-PWR8-NEXT: stwu r1, -80(r1)
; PPC-PWR8-NEXT: stw r0, 84(r1)
; PPC-PWR8-NEXT: .cfi_def_cfa_offset 80
; PPC-PWR8-NEXT: .cfi_offset lr, 4
; PPC-PWR8-NEXT: .cfi_offset r24, -32
; PPC-PWR8-NEXT: .cfi_offset r25, -28
; PPC-PWR8-NEXT: .cfi_offset r26, -24
; PPC-PWR8-NEXT: .cfi_offset r27, -20
; PPC-PWR8-NEXT: .cfi_offset r28, -16
; PPC-PWR8-NEXT: .cfi_offset r29, -12
; PPC-PWR8-NEXT: .cfi_offset r30, -8
; PPC-PWR8-NEXT: stw r26, 56(r1) # 4-byte Folded Spill
; PPC-PWR8-NEXT: stw r27, 60(r1) # 4-byte Folded Spill
; PPC-PWR8-NEXT: mr r27, r5
; PPC-PWR8-NEXT: mr r26, r3
; PPC-PWR8-NEXT: lwz r5, 8(r3)
; PPC-PWR8-NEXT: lwz r4, 4(r3)
; PPC-PWR8-NEXT: stw r28, 64(r1) # 4-byte Folded Spill
; PPC-PWR8-NEXT: mr r28, r6
; PPC-PWR8-NEXT: lwz r6, 12(r3)
; PPC-PWR8-NEXT: lwz r3, 0(r3)
; PPC-PWR8-NEXT: stw r24, 48(r1) # 4-byte Folded Spill
; PPC-PWR8-NEXT: addi r24, r1, 16
; PPC-PWR8-NEXT: stw r25, 52(r1) # 4-byte Folded Spill
; PPC-PWR8-NEXT: stw r29, 68(r1) # 4-byte Folded Spill
; PPC-PWR8-NEXT: mr r29, r7
; PPC-PWR8-NEXT: addi r25, r1, 32
; PPC-PWR8-NEXT: stw r30, 72(r1) # 4-byte Folded Spill
; PPC-PWR8-NEXT: mr r30, r8
; PPC-PWR8-NEXT: .p2align 4
; PPC-PWR8-NEXT: .LBB1_1: # %atomicrmw.start
; PPC-PWR8-NEXT: #
; PPC-PWR8-NEXT: addc r7, r6, r30
; PPC-PWR8-NEXT: stw r4, 36(r1)
; PPC-PWR8-NEXT: stw r3, 32(r1)
; PPC-PWR8-NEXT: adde r8, r5, r29
; PPC-PWR8-NEXT: stw r5, 40(r1)
; PPC-PWR8-NEXT: stw r6, 44(r1)
; PPC-PWR8-NEXT: mr r5, r25
; PPC-PWR8-NEXT: adde r4, r4, r28
; PPC-PWR8-NEXT: stw r7, 28(r1)
; PPC-PWR8-NEXT: stw r8, 24(r1)
; PPC-PWR8-NEXT: mr r6, r24
; PPC-PWR8-NEXT: adde r3, r3, r27
; PPC-PWR8-NEXT: stw r4, 20(r1)
; PPC-PWR8-NEXT: mr r4, r26
; PPC-PWR8-NEXT: li r7, 5
; PPC-PWR8-NEXT: stw r3, 16(r1)
; PPC-PWR8-NEXT: li r3, 16
; PPC-PWR8-NEXT: li r8, 5
; PPC-PWR8-NEXT: bl __atomic_compare_exchange
; PPC-PWR8-NEXT: cmplwi r3, 0
; PPC-PWR8-NEXT: lwz r6, 44(r1)
; PPC-PWR8-NEXT: lwz r5, 40(r1)
; PPC-PWR8-NEXT: lwz r4, 36(r1)
; PPC-PWR8-NEXT: lwz r3, 32(r1)
; PPC-PWR8-NEXT: beq cr0, .LBB1_1
; PPC-PWR8-NEXT: # %bb.2: # %atomicrmw.end
; PPC-PWR8-NEXT: lwz r30, 72(r1) # 4-byte Folded Reload
; PPC-PWR8-NEXT: lwz r29, 68(r1) # 4-byte Folded Reload
; PPC-PWR8-NEXT: lwz r28, 64(r1) # 4-byte Folded Reload
; PPC-PWR8-NEXT: lwz r27, 60(r1) # 4-byte Folded Reload
; PPC-PWR8-NEXT: lwz r26, 56(r1) # 4-byte Folded Reload
; PPC-PWR8-NEXT: lwz r25, 52(r1) # 4-byte Folded Reload
; PPC-PWR8-NEXT: lwz r24, 48(r1) # 4-byte Folded Reload
; PPC-PWR8-NEXT: lwz r0, 84(r1)
; PPC-PWR8-NEXT: addi r1, r1, 80
; PPC-PWR8-NEXT: mtlr r0
; PPC-PWR8-NEXT: blr
entry:
%0 = atomicrmw add ptr %a, i128 %x seq_cst, align 16
ret i128 %0
}
define i128 @sub(ptr %a, i128 %x) {
; CHECK-LABEL: sub:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: sync
; CHECK-NEXT: .LBB2_1: # %entry
; CHECK-NEXT: #
; CHECK-NEXT: lqarx r6, 0, r3
; CHECK-NEXT: subc r9, r7, r5
; CHECK-NEXT: subfe r8, r4, r6
; CHECK-NEXT: stqcx. r8, 0, r3
; CHECK-NEXT: bne cr0, .LBB2_1
; CHECK-NEXT: # %bb.2: # %entry
; CHECK-NEXT: lwsync
; CHECK-NEXT: mr r3, r6
; CHECK-NEXT: mr r4, r7
; CHECK-NEXT: blr
;
; PWR7-LABEL: sub:
; PWR7: # %bb.0: # %entry
; PWR7-NEXT: mflr r0
; PWR7-NEXT: stdu r1, -112(r1)
; PWR7-NEXT: std r0, 128(r1)
; PWR7-NEXT: .cfi_def_cfa_offset 112
; PWR7-NEXT: .cfi_offset lr, 16
; PWR7-NEXT: li r6, 5
; PWR7-NEXT: bl __atomic_fetch_sub_16
; PWR7-NEXT: nop
; PWR7-NEXT: addi r1, r1, 112
; PWR7-NEXT: ld r0, 16(r1)
; PWR7-NEXT: mtlr r0
; PWR7-NEXT: blr
;
; LE-PWR8-LABEL: sub:
; LE-PWR8: # %bb.0: # %entry
; LE-PWR8-NEXT: sync
; LE-PWR8-NEXT: .LBB2_1: # %entry
; LE-PWR8-NEXT: #
; LE-PWR8-NEXT: lqarx r6, 0, r3
; LE-PWR8-NEXT: subc r9, r7, r4
; LE-PWR8-NEXT: subfe r8, r5, r6
; LE-PWR8-NEXT: stqcx. r8, 0, r3
; LE-PWR8-NEXT: bne cr0, .LBB2_1
; LE-PWR8-NEXT: # %bb.2: # %entry
; LE-PWR8-NEXT: lwsync
; LE-PWR8-NEXT: mr r3, r7
; LE-PWR8-NEXT: mr r4, r6
; LE-PWR8-NEXT: blr
;
; AIX64-PWR8-LABEL: sub:
; AIX64-PWR8: # %bb.0: # %entry
; AIX64-PWR8-NEXT: mflr r0
; AIX64-PWR8-NEXT: stdu r1, -112(r1)
; AIX64-PWR8-NEXT: li r6, 5
; AIX64-PWR8-NEXT: std r0, 128(r1)
; AIX64-PWR8-NEXT: bl .__atomic_fetch_sub_16[PR]
; AIX64-PWR8-NEXT: nop
; AIX64-PWR8-NEXT: addi r1, r1, 112
; AIX64-PWR8-NEXT: ld r0, 16(r1)
; AIX64-PWR8-NEXT: mtlr r0
; AIX64-PWR8-NEXT: blr
;
; PPC-PWR8-LABEL: sub:
; PPC-PWR8: # %bb.0: # %entry
; PPC-PWR8-NEXT: mflr r0
; PPC-PWR8-NEXT: stwu r1, -80(r1)
; PPC-PWR8-NEXT: stw r0, 84(r1)
; PPC-PWR8-NEXT: .cfi_def_cfa_offset 80
; PPC-PWR8-NEXT: .cfi_offset lr, 4
; PPC-PWR8-NEXT: .cfi_offset r24, -32
; PPC-PWR8-NEXT: .cfi_offset r25, -28
; PPC-PWR8-NEXT: .cfi_offset r26, -24
; PPC-PWR8-NEXT: .cfi_offset r27, -20
; PPC-PWR8-NEXT: .cfi_offset r28, -16
; PPC-PWR8-NEXT: .cfi_offset r29, -12
; PPC-PWR8-NEXT: .cfi_offset r30, -8
; PPC-PWR8-NEXT: stw r26, 56(r1) # 4-byte Folded Spill
; PPC-PWR8-NEXT: stw r27, 60(r1) # 4-byte Folded Spill
; PPC-PWR8-NEXT: mr r27, r5
; PPC-PWR8-NEXT: mr r26, r3
; PPC-PWR8-NEXT: lwz r5, 8(r3)
; PPC-PWR8-NEXT: lwz r4, 4(r3)
; PPC-PWR8-NEXT: stw r28, 64(r1) # 4-byte Folded Spill
; PPC-PWR8-NEXT: mr r28, r6
; PPC-PWR8-NEXT: lwz r6, 12(r3)
; PPC-PWR8-NEXT: lwz r3, 0(r3)
; PPC-PWR8-NEXT: stw r24, 48(r1) # 4-byte Folded Spill
; PPC-PWR8-NEXT: addi r24, r1, 16
; PPC-PWR8-NEXT: stw r25, 52(r1) # 4-byte Folded Spill
; PPC-PWR8-NEXT: stw r29, 68(r1) # 4-byte Folded Spill
; PPC-PWR8-NEXT: mr r29, r7
; PPC-PWR8-NEXT: addi r25, r1, 32
; PPC-PWR8-NEXT: stw r30, 72(r1) # 4-byte Folded Spill
; PPC-PWR8-NEXT: mr r30, r8
; PPC-PWR8-NEXT: .p2align 4
; PPC-PWR8-NEXT: .LBB2_1: # %atomicrmw.start
; PPC-PWR8-NEXT: #
; PPC-PWR8-NEXT: subc r7, r6, r30
; PPC-PWR8-NEXT: stw r4, 36(r1)
; PPC-PWR8-NEXT: stw r3, 32(r1)
; PPC-PWR8-NEXT: subfe r8, r29, r5
; PPC-PWR8-NEXT: stw r5, 40(r1)
; PPC-PWR8-NEXT: stw r6, 44(r1)
; PPC-PWR8-NEXT: mr r5, r25
; PPC-PWR8-NEXT: subfe r4, r28, r4
; PPC-PWR8-NEXT: stw r7, 28(r1)
; PPC-PWR8-NEXT: stw r8, 24(r1)
; PPC-PWR8-NEXT: mr r6, r24
; PPC-PWR8-NEXT: subfe r3, r27, r3
; PPC-PWR8-NEXT: stw r4, 20(r1)
; PPC-PWR8-NEXT: mr r4, r26
; PPC-PWR8-NEXT: li r7, 5
; PPC-PWR8-NEXT: stw r3, 16(r1)
; PPC-PWR8-NEXT: li r3, 16
; PPC-PWR8-NEXT: li r8, 5
; PPC-PWR8-NEXT: bl __atomic_compare_exchange
; PPC-PWR8-NEXT: cmplwi r3, 0
; PPC-PWR8-NEXT: lwz r6, 44(r1)
; PPC-PWR8-NEXT: lwz r5, 40(r1)
; PPC-PWR8-NEXT: lwz r4, 36(r1)
; PPC-PWR8-NEXT: lwz r3, 32(r1)
; PPC-PWR8-NEXT: beq cr0, .LBB2_1
; PPC-PWR8-NEXT: # %bb.2: # %atomicrmw.end
; PPC-PWR8-NEXT: lwz r30, 72(r1) # 4-byte Folded Reload
; PPC-PWR8-NEXT: lwz r29, 68(r1) # 4-byte Folded Reload
; PPC-PWR8-NEXT: lwz r28, 64(r1) # 4-byte Folded Reload
; PPC-PWR8-NEXT: lwz r27, 60(r1) # 4-byte Folded Reload
; PPC-PWR8-NEXT: lwz r26, 56(r1) # 4-byte Folded Reload
; PPC-PWR8-NEXT: lwz r25, 52(r1) # 4-byte Folded Reload
; PPC-PWR8-NEXT: lwz r24, 48(r1) # 4-byte Folded Reload
; PPC-PWR8-NEXT: lwz r0, 84(r1)
; PPC-PWR8-NEXT: addi r1, r1, 80
; PPC-PWR8-NEXT: mtlr r0
; PPC-PWR8-NEXT: blr
entry:
%0 = atomicrmw sub ptr %a, i128 %x seq_cst, align 16
ret i128 %0
}
define i128 @and(ptr %a, i128 %x) {
; CHECK-LABEL: and:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: sync
; CHECK-NEXT: .LBB3_1: # %entry
; CHECK-NEXT: #
; CHECK-NEXT: lqarx r6, 0, r3
; CHECK-NEXT: and r9, r5, r7
; CHECK-NEXT: and r8, r4, r6
; CHECK-NEXT: stqcx. r8, 0, r3
; CHECK-NEXT: bne cr0, .LBB3_1
; CHECK-NEXT: # %bb.2: # %entry
; CHECK-NEXT: lwsync
; CHECK-NEXT: mr r3, r6
; CHECK-NEXT: mr r4, r7
; CHECK-NEXT: blr
;
; PWR7-LABEL: and:
; PWR7: # %bb.0: # %entry
; PWR7-NEXT: mflr r0
; PWR7-NEXT: stdu r1, -112(r1)
; PWR7-NEXT: std r0, 128(r1)
; PWR7-NEXT: .cfi_def_cfa_offset 112
; PWR7-NEXT: .cfi_offset lr, 16
; PWR7-NEXT: li r6, 5
; PWR7-NEXT: bl __atomic_fetch_and_16
; PWR7-NEXT: nop
; PWR7-NEXT: addi r1, r1, 112
; PWR7-NEXT: ld r0, 16(r1)
; PWR7-NEXT: mtlr r0
; PWR7-NEXT: blr
;
; LE-PWR8-LABEL: and:
; LE-PWR8: # %bb.0: # %entry
; LE-PWR8-NEXT: sync
; LE-PWR8-NEXT: .LBB3_1: # %entry
; LE-PWR8-NEXT: #
; LE-PWR8-NEXT: lqarx r6, 0, r3
; LE-PWR8-NEXT: and r9, r4, r7
; LE-PWR8-NEXT: and r8, r5, r6
; LE-PWR8-NEXT: stqcx. r8, 0, r3
; LE-PWR8-NEXT: bne cr0, .LBB3_1
; LE-PWR8-NEXT: # %bb.2: # %entry
; LE-PWR8-NEXT: lwsync
; LE-PWR8-NEXT: mr r3, r7
; LE-PWR8-NEXT: mr r4, r6
; LE-PWR8-NEXT: blr
;
; AIX64-PWR8-LABEL: and:
; AIX64-PWR8: # %bb.0: # %entry
; AIX64-PWR8-NEXT: mflr r0
; AIX64-PWR8-NEXT: stdu r1, -112(r1)
; AIX64-PWR8-NEXT: li r6, 5
; AIX64-PWR8-NEXT: std r0, 128(r1)
; AIX64-PWR8-NEXT: bl .__atomic_fetch_and_16[PR]
; AIX64-PWR8-NEXT: nop
; AIX64-PWR8-NEXT: addi r1, r1, 112
; AIX64-PWR8-NEXT: ld r0, 16(r1)
; AIX64-PWR8-NEXT: mtlr r0
; AIX64-PWR8-NEXT: blr
;
; PPC-PWR8-LABEL: and:
; PPC-PWR8: # %bb.0: # %entry
; PPC-PWR8-NEXT: mflr r0
; PPC-PWR8-NEXT: stwu r1, -80(r1)
; PPC-PWR8-NEXT: stw r0, 84(r1)
; PPC-PWR8-NEXT: .cfi_def_cfa_offset 80
; PPC-PWR8-NEXT: .cfi_offset lr, 4
; PPC-PWR8-NEXT: .cfi_offset r24, -32
; PPC-PWR8-NEXT: .cfi_offset r25, -28
; PPC-PWR8-NEXT: .cfi_offset r26, -24
; PPC-PWR8-NEXT: .cfi_offset r27, -20
; PPC-PWR8-NEXT: .cfi_offset r28, -16
; PPC-PWR8-NEXT: .cfi_offset r29, -12
; PPC-PWR8-NEXT: .cfi_offset r30, -8
; PPC-PWR8-NEXT: stw r26, 56(r1) # 4-byte Folded Spill
; PPC-PWR8-NEXT: stw r27, 60(r1) # 4-byte Folded Spill
; PPC-PWR8-NEXT: mr r27, r5
; PPC-PWR8-NEXT: mr r26, r3
; PPC-PWR8-NEXT: lwz r5, 8(r3)
; PPC-PWR8-NEXT: lwz r4, 4(r3)
; PPC-PWR8-NEXT: stw r28, 64(r1) # 4-byte Folded Spill
; PPC-PWR8-NEXT: mr r28, r6
; PPC-PWR8-NEXT: lwz r6, 12(r3)
; PPC-PWR8-NEXT: lwz r3, 0(r3)
; PPC-PWR8-NEXT: stw r24, 48(r1) # 4-byte Folded Spill
; PPC-PWR8-NEXT: addi r24, r1, 16
; PPC-PWR8-NEXT: stw r25, 52(r1) # 4-byte Folded Spill
; PPC-PWR8-NEXT: stw r29, 68(r1) # 4-byte Folded Spill
; PPC-PWR8-NEXT: mr r29, r7
; PPC-PWR8-NEXT: addi r25, r1, 32
; PPC-PWR8-NEXT: stw r30, 72(r1) # 4-byte Folded Spill
; PPC-PWR8-NEXT: mr r30, r8
; PPC-PWR8-NEXT: .p2align 4
; PPC-PWR8-NEXT: .LBB3_1: # %atomicrmw.start
; PPC-PWR8-NEXT: #
; PPC-PWR8-NEXT: stw r3, 32(r1)
; PPC-PWR8-NEXT: stw r4, 36(r1)
; PPC-PWR8-NEXT: and r7, r5, r29
; PPC-PWR8-NEXT: and r8, r6, r30
; PPC-PWR8-NEXT: and r3, r3, r27
; PPC-PWR8-NEXT: and r4, r4, r28
; PPC-PWR8-NEXT: stw r5, 40(r1)
; PPC-PWR8-NEXT: stw r6, 44(r1)
; PPC-PWR8-NEXT: mr r5, r25
; PPC-PWR8-NEXT: mr r6, r24
; PPC-PWR8-NEXT: stw r8, 28(r1)
; PPC-PWR8-NEXT: stw r7, 24(r1)
; PPC-PWR8-NEXT: li r7, 5
; PPC-PWR8-NEXT: li r8, 5
; PPC-PWR8-NEXT: stw r4, 20(r1)
; PPC-PWR8-NEXT: stw r3, 16(r1)
; PPC-PWR8-NEXT: li r3, 16
; PPC-PWR8-NEXT: mr r4, r26
; PPC-PWR8-NEXT: bl __atomic_compare_exchange
; PPC-PWR8-NEXT: cmplwi r3, 0
; PPC-PWR8-NEXT: lwz r6, 44(r1)
; PPC-PWR8-NEXT: lwz r5, 40(r1)
; PPC-PWR8-NEXT: lwz r4, 36(r1)
; PPC-PWR8-NEXT: lwz r3, 32(r1)
; PPC-PWR8-NEXT: beq cr0, .LBB3_1
; PPC-PWR8-NEXT: # %bb.2: # %atomicrmw.end
; PPC-PWR8-NEXT: lwz r30, 72(r1) # 4-byte Folded Reload
; PPC-PWR8-NEXT: lwz r29, 68(r1) # 4-byte Folded Reload
; PPC-PWR8-NEXT: lwz r28, 64(r1) # 4-byte Folded Reload
; PPC-PWR8-NEXT: lwz r27, 60(r1) # 4-byte Folded Reload
; PPC-PWR8-NEXT: lwz r26, 56(r1) # 4-byte Folded Reload
; PPC-PWR8-NEXT: lwz r25, 52(r1) # 4-byte Folded Reload
; PPC-PWR8-NEXT: lwz r24, 48(r1) # 4-byte Folded Reload
; PPC-PWR8-NEXT: lwz r0, 84(r1)
; PPC-PWR8-NEXT: addi r1, r1, 80
; PPC-PWR8-NEXT: mtlr r0
; PPC-PWR8-NEXT: blr
entry:
%0 = atomicrmw and ptr %a, i128 %x seq_cst, align 16
ret i128 %0
}
define i128 @or(ptr %a, i128 %x) {
; CHECK-LABEL: or:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: sync
; CHECK-NEXT: .LBB4_1: # %entry
; CHECK-NEXT: #
; CHECK-NEXT: lqarx r6, 0, r3
; CHECK-NEXT: or r9, r5, r7
; CHECK-NEXT: or r8, r4, r6
; CHECK-NEXT: stqcx. r8, 0, r3
; CHECK-NEXT: bne cr0, .LBB4_1
; CHECK-NEXT: # %bb.2: # %entry
; CHECK-NEXT: lwsync
; CHECK-NEXT: mr r3, r6
; CHECK-NEXT: mr r4, r7
; CHECK-NEXT: blr
;
; PWR7-LABEL: or:
; PWR7: # %bb.0: # %entry
; PWR7-NEXT: mflr r0
; PWR7-NEXT: stdu r1, -112(r1)
; PWR7-NEXT: std r0, 128(r1)
; PWR7-NEXT: .cfi_def_cfa_offset 112
; PWR7-NEXT: .cfi_offset lr, 16
; PWR7-NEXT: li r6, 5
; PWR7-NEXT: bl __atomic_fetch_or_16
; PWR7-NEXT: nop
; PWR7-NEXT: addi r1, r1, 112
; PWR7-NEXT: ld r0, 16(r1)
; PWR7-NEXT: mtlr r0
; PWR7-NEXT: blr
;
; LE-PWR8-LABEL: or:
; LE-PWR8: # %bb.0: # %entry
; LE-PWR8-NEXT: sync
; LE-PWR8-NEXT: .LBB4_1: # %entry
; LE-PWR8-NEXT: #
; LE-PWR8-NEXT: lqarx r6, 0, r3
; LE-PWR8-NEXT: or r9, r4, r7
; LE-PWR8-NEXT: or r8, r5, r6
; LE-PWR8-NEXT: stqcx. r8, 0, r3
; LE-PWR8-NEXT: bne cr0, .LBB4_1
; LE-PWR8-NEXT: # %bb.2: # %entry
; LE-PWR8-NEXT: lwsync
; LE-PWR8-NEXT: mr r3, r7
; LE-PWR8-NEXT: mr r4, r6
; LE-PWR8-NEXT: blr
;
; AIX64-PWR8-LABEL: or:
; AIX64-PWR8: # %bb.0: # %entry
; AIX64-PWR8-NEXT: mflr r0
; AIX64-PWR8-NEXT: stdu r1, -112(r1)
; AIX64-PWR8-NEXT: li r6, 5
; AIX64-PWR8-NEXT: std r0, 128(r1)
; AIX64-PWR8-NEXT: bl .__atomic_fetch_or_16[PR]
; AIX64-PWR8-NEXT: nop
; AIX64-PWR8-NEXT: addi r1, r1, 112
; AIX64-PWR8-NEXT: ld r0, 16(r1)
; AIX64-PWR8-NEXT: mtlr r0
; AIX64-PWR8-NEXT: blr
;
; PPC-PWR8-LABEL: or:
; PPC-PWR8: # %bb.0: # %entry
; PPC-PWR8-NEXT: mflr r0
; PPC-PWR8-NEXT: stwu r1, -80(r1)
; PPC-PWR8-NEXT: stw r0, 84(r1)
; PPC-PWR8-NEXT: .cfi_def_cfa_offset 80
; PPC-PWR8-NEXT: .cfi_offset lr, 4
; PPC-PWR8-NEXT: .cfi_offset r24, -32
; PPC-PWR8-NEXT: .cfi_offset r25, -28
; PPC-PWR8-NEXT: .cfi_offset r26, -24
; PPC-PWR8-NEXT: .cfi_offset r27, -20
; PPC-PWR8-NEXT: .cfi_offset r28, -16
; PPC-PWR8-NEXT: .cfi_offset r29, -12
; PPC-PWR8-NEXT: .cfi_offset r30, -8
; PPC-PWR8-NEXT: stw r26, 56(r1) # 4-byte Folded Spill
; PPC-PWR8-NEXT: stw r27, 60(r1) # 4-byte Folded Spill
; PPC-PWR8-NEXT: mr r27, r5
; PPC-PWR8-NEXT: mr r26, r3
; PPC-PWR8-NEXT: lwz r5, 8(r3)
; PPC-PWR8-NEXT: lwz r4, 4(r3)
; PPC-PWR8-NEXT: stw r28, 64(r1) # 4-byte Folded Spill
; PPC-PWR8-NEXT: mr r28, r6
; PPC-PWR8-NEXT: lwz r6, 12(r3)
; PPC-PWR8-NEXT: lwz r3, 0(r3)
; PPC-PWR8-NEXT: stw r24, 48(r1) # 4-byte Folded Spill
; PPC-PWR8-NEXT: addi r24, r1, 16
; PPC-PWR8-NEXT: stw r25, 52(r1) # 4-byte Folded Spill
; PPC-PWR8-NEXT: stw r29, 68(r1) # 4-byte Folded Spill
; PPC-PWR8-NEXT: mr r29, r7
; PPC-PWR8-NEXT: addi r25, r1, 32
; PPC-PWR8-NEXT: stw r30, 72(r1) # 4-byte Folded Spill
; PPC-PWR8-NEXT: mr r30, r8
; PPC-PWR8-NEXT: .p2align 4
; PPC-PWR8-NEXT: .LBB4_1: # %atomicrmw.start
; PPC-PWR8-NEXT: #
; PPC-PWR8-NEXT: stw r3, 32(r1)
; PPC-PWR8-NEXT: stw r4, 36(r1)
; PPC-PWR8-NEXT: or r7, r5, r29
; PPC-PWR8-NEXT: or r8, r6, r30
; PPC-PWR8-NEXT: or r3, r3, r27
; PPC-PWR8-NEXT: or r4, r4, r28
; PPC-PWR8-NEXT: stw r5, 40(r1)
; PPC-PWR8-NEXT: stw r6, 44(r1)
; PPC-PWR8-NEXT: mr r5, r25
; PPC-PWR8-NEXT: mr r6, r24
; PPC-PWR8-NEXT: stw r8, 28(r1)
; PPC-PWR8-NEXT: stw r7, 24(r1)
; PPC-PWR8-NEXT: li r7, 5
; PPC-PWR8-NEXT: li r8, 5
; PPC-PWR8-NEXT: stw r4, 20(r1)
; PPC-PWR8-NEXT: stw r3, 16(r1)
; PPC-PWR8-NEXT: li r3, 16
; PPC-PWR8-NEXT: mr r4, r26
; PPC-PWR8-NEXT: bl __atomic_compare_exchange
; PPC-PWR8-NEXT: cmplwi r3, 0
; PPC-PWR8-NEXT: lwz r6, 44(r1)
; PPC-PWR8-NEXT: lwz r5, 40(r1)
; PPC-PWR8-NEXT: lwz r4, 36(r1)
; PPC-PWR8-NEXT: lwz r3, 32(r1)
; PPC-PWR8-NEXT: beq cr0, .LBB4_1
; PPC-PWR8-NEXT: # %bb.2: # %atomicrmw.end
; PPC-PWR8-NEXT: lwz r30, 72(r1) # 4-byte Folded Reload
; PPC-PWR8-NEXT: lwz r29, 68(r1) # 4-byte Folded Reload
; PPC-PWR8-NEXT: lwz r28, 64(r1) # 4-byte Folded Reload
; PPC-PWR8-NEXT: lwz r27, 60(r1) # 4-byte Folded Reload
; PPC-PWR8-NEXT: lwz r26, 56(r1) # 4-byte Folded Reload
; PPC-PWR8-NEXT: lwz r25, 52(r1) # 4-byte Folded Reload
; PPC-PWR8-NEXT: lwz r24, 48(r1) # 4-byte Folded Reload
; PPC-PWR8-NEXT: lwz r0, 84(r1)
; PPC-PWR8-NEXT: addi r1, r1, 80
; PPC-PWR8-NEXT: mtlr r0
; PPC-PWR8-NEXT: blr
entry:
%0 = atomicrmw or ptr %a, i128 %x seq_cst, align 16
ret i128 %0
}
define i128 @xor(ptr %a, i128 %x) {
; CHECK-LABEL: xor:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: sync
; CHECK-NEXT: .LBB5_1: # %entry
; CHECK-NEXT: #
; CHECK-NEXT: lqarx r6, 0, r3
; CHECK-NEXT: xor r9, r5, r7
; CHECK-NEXT: xor r8, r4, r6
; CHECK-NEXT: stqcx. r8, 0, r3
; CHECK-NEXT: bne cr0, .LBB5_1
; CHECK-NEXT: # %bb.2: # %entry
; CHECK-NEXT: lwsync
; CHECK-NEXT: mr r3, r6
; CHECK-NEXT: mr r4, r7
; CHECK-NEXT: blr
;
; PWR7-LABEL: xor:
; PWR7: # %bb.0: # %entry
; PWR7-NEXT: mflr r0
; PWR7-NEXT: stdu r1, -112(r1)
; PWR7-NEXT: std r0, 128(r1)
; PWR7-NEXT: .cfi_def_cfa_offset 112
; PWR7-NEXT: .cfi_offset lr, 16
; PWR7-NEXT: li r6, 5
; PWR7-NEXT: bl __atomic_fetch_xor_16
; PWR7-NEXT: nop
; PWR7-NEXT: addi r1, r1, 112
; PWR7-NEXT: ld r0, 16(r1)
; PWR7-NEXT: mtlr r0
; PWR7-NEXT: blr
;
; LE-PWR8-LABEL: xor:
; LE-PWR8: # %bb.0: # %entry
; LE-PWR8-NEXT: sync
; LE-PWR8-NEXT: .LBB5_1: # %entry
; LE-PWR8-NEXT: #
; LE-PWR8-NEXT: lqarx r6, 0, r3
; LE-PWR8-NEXT: xor r9, r4, r7
; LE-PWR8-NEXT: xor r8, r5, r6
; LE-PWR8-NEXT: stqcx. r8, 0, r3
; LE-PWR8-NEXT: bne cr0, .LBB5_1
; LE-PWR8-NEXT: # %bb.2: # %entry
; LE-PWR8-NEXT: lwsync
; LE-PWR8-NEXT: mr r3, r7
; LE-PWR8-NEXT: mr r4, r6
; LE-PWR8-NEXT: blr
;
; AIX64-PWR8-LABEL: xor:
; AIX64-PWR8: # %bb.0: # %entry
; AIX64-PWR8-NEXT: mflr r0
; AIX64-PWR8-NEXT: stdu r1, -112(r1)
; AIX64-PWR8-NEXT: li r6, 5
; AIX64-PWR8-NEXT: std r0, 128(r1)
; AIX64-PWR8-NEXT: bl .__atomic_fetch_xor_16[PR]
; AIX64-PWR8-NEXT: nop
; AIX64-PWR8-NEXT: addi r1, r1, 112
; AIX64-PWR8-NEXT: ld r0, 16(r1)
; AIX64-PWR8-NEXT: mtlr r0
; AIX64-PWR8-NEXT: blr
;
; PPC-PWR8-LABEL: xor:
; PPC-PWR8: # %bb.0: # %entry
; PPC-PWR8-NEXT: mflr r0
; PPC-PWR8-NEXT: stwu r1, -80(r1)
; PPC-PWR8-NEXT: stw r0, 84(r1)
; PPC-PWR8-NEXT: .cfi_def_cfa_offset 80
; PPC-PWR8-NEXT: .cfi_offset lr, 4
; PPC-PWR8-NEXT: .cfi_offset r24, -32
; PPC-PWR8-NEXT: .cfi_offset r25, -28
; PPC-PWR8-NEXT: .cfi_offset r26, -24
; PPC-PWR8-NEXT: .cfi_offset r27, -20
; PPC-PWR8-NEXT: .cfi_offset r28, -16
; PPC-PWR8-NEXT: .cfi_offset r29, -12
; PPC-PWR8-NEXT: .cfi_offset r30, -8
; PPC-PWR8-NEXT: stw r26, 56(r1) # 4-byte Folded Spill
; PPC-PWR8-NEXT: stw r27, 60(r1) # 4-byte Folded Spill
; PPC-PWR8-NEXT: mr r27, r5
; PPC-PWR8-NEXT: mr r26, r3
; PPC-PWR8-NEXT: lwz r5, 8(r3)
; PPC-PWR8-NEXT: lwz r4, 4(r3)
; PPC-PWR8-NEXT: stw r28, 64(r1) # 4-byte Folded Spill
; PPC-PWR8-NEXT: mr r28, r6
; PPC-PWR8-NEXT: lwz r6, 12(r3)
; PPC-PWR8-NEXT: lwz r3, 0(r3)
; PPC-PWR8-NEXT: stw r24, 48(r1) # 4-byte Folded Spill
; PPC-PWR8-NEXT: addi r24, r1, 16
; PPC-PWR8-NEXT: stw r25, 52(r1) # 4-byte Folded Spill
; PPC-PWR8-NEXT: stw r29, 68(r1) # 4-byte Folded Spill
; PPC-PWR8-NEXT: mr r29, r7
; PPC-PWR8-NEXT: addi r25, r1, 32
; PPC-PWR8-NEXT: stw r30, 72(r1) # 4-byte Folded Spill
; PPC-PWR8-NEXT: mr r30, r8
; PPC-PWR8-NEXT: .p2align 4
; PPC-PWR8-NEXT: .LBB5_1: # %atomicrmw.start
; PPC-PWR8-NEXT: #
; PPC-PWR8-NEXT: stw r3, 32(r1)
; PPC-PWR8-NEXT: stw r4, 36(r1)
; PPC-PWR8-NEXT: xor r7, r5, r29
; PPC-PWR8-NEXT: xor r8, r6, r30
; PPC-PWR8-NEXT: xor r3, r3, r27
; PPC-PWR8-NEXT: xor r4, r4, r28
; PPC-PWR8-NEXT: stw r5, 40(r1)
; PPC-PWR8-NEXT: stw r6, 44(r1)
; PPC-PWR8-NEXT: mr r5, r25
; PPC-PWR8-NEXT: mr r6, r24
; PPC-PWR8-NEXT: stw r8, 28(r1)
; PPC-PWR8-NEXT: stw r7, 24(r1)
; PPC-PWR8-NEXT: li r7, 5
; PPC-PWR8-NEXT: li r8, 5
; PPC-PWR8-NEXT: stw r4, 20(r1)
; PPC-PWR8-NEXT: stw r3, 16(r1)
; PPC-PWR8-NEXT: li r3, 16
; PPC-PWR8-NEXT: mr r4, r26
; PPC-PWR8-NEXT: bl __atomic_compare_exchange
; PPC-PWR8-NEXT: cmplwi r3, 0
; PPC-PWR8-NEXT: lwz r6, 44(r1)
; PPC-PWR8-NEXT: lwz r5, 40(r1)
; PPC-PWR8-NEXT: lwz r4, 36(r1)
; PPC-PWR8-NEXT: lwz r3, 32(r1)
; PPC-PWR8-NEXT: beq cr0, .LBB5_1
; PPC-PWR8-NEXT: # %bb.2: # %atomicrmw.end
; PPC-PWR8-NEXT: lwz r30, 72(r1) # 4-byte Folded Reload
; PPC-PWR8-NEXT: lwz r29, 68(r1) # 4-byte Folded Reload
; PPC-PWR8-NEXT: lwz r28, 64(r1) # 4-byte Folded Reload
; PPC-PWR8-NEXT: lwz r27, 60(r1) # 4-byte Folded Reload
; PPC-PWR8-NEXT: lwz r26, 56(r1) # 4-byte Folded Reload
; PPC-PWR8-NEXT: lwz r25, 52(r1) # 4-byte Folded Reload
; PPC-PWR8-NEXT: lwz r24, 48(r1) # 4-byte Folded Reload
; PPC-PWR8-NEXT: lwz r0, 84(r1)
; PPC-PWR8-NEXT: addi r1, r1, 80
; PPC-PWR8-NEXT: mtlr r0
; PPC-PWR8-NEXT: blr
entry:
%0 = atomicrmw xor ptr %a, i128 %x seq_cst, align 16
ret i128 %0
}
define i128 @nand(ptr %a, i128 %x) {
; CHECK-LABEL: nand:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: sync
; CHECK-NEXT: .LBB6_1: # %entry
; CHECK-NEXT: #
; CHECK-NEXT: lqarx r6, 0, r3
; CHECK-NEXT: nand r9, r5, r7
; CHECK-NEXT: nand r8, r4, r6
; CHECK-NEXT: stqcx. r8, 0, r3
; CHECK-NEXT: bne cr0, .LBB6_1
; CHECK-NEXT: # %bb.2: # %entry
; CHECK-NEXT: lwsync
; CHECK-NEXT: mr r3, r6
; CHECK-NEXT: mr r4, r7
; CHECK-NEXT: blr
;
; PWR7-LABEL: nand:
; PWR7: # %bb.0: # %entry
; PWR7-NEXT: mflr r0
; PWR7-NEXT: stdu r1, -112(r1)
; PWR7-NEXT: std r0, 128(r1)
; PWR7-NEXT: .cfi_def_cfa_offset 112
; PWR7-NEXT: .cfi_offset lr, 16
; PWR7-NEXT: li r6, 5
; PWR7-NEXT: bl __atomic_fetch_nand_16
; PWR7-NEXT: nop
; PWR7-NEXT: addi r1, r1, 112
; PWR7-NEXT: ld r0, 16(r1)
; PWR7-NEXT: mtlr r0
; PWR7-NEXT: blr
;
; LE-PWR8-LABEL: nand:
; LE-PWR8: # %bb.0: # %entry
; LE-PWR8-NEXT: sync
; LE-PWR8-NEXT: .LBB6_1: # %entry
; LE-PWR8-NEXT: #
; LE-PWR8-NEXT: lqarx r6, 0, r3
; LE-PWR8-NEXT: nand r9, r4, r7
; LE-PWR8-NEXT: nand r8, r5, r6
; LE-PWR8-NEXT: stqcx. r8, 0, r3
; LE-PWR8-NEXT: bne cr0, .LBB6_1
; LE-PWR8-NEXT: # %bb.2: # %entry
; LE-PWR8-NEXT: lwsync
; LE-PWR8-NEXT: mr r3, r7
; LE-PWR8-NEXT: mr r4, r6
; LE-PWR8-NEXT: blr
;
; AIX64-PWR8-LABEL: nand:
; AIX64-PWR8: # %bb.0: # %entry
; AIX64-PWR8-NEXT: mflr r0
; AIX64-PWR8-NEXT: stdu r1, -112(r1)
; AIX64-PWR8-NEXT: li r6, 5
; AIX64-PWR8-NEXT: std r0, 128(r1)
; AIX64-PWR8-NEXT: bl .__atomic_fetch_nand_16[PR]
; AIX64-PWR8-NEXT: nop
; AIX64-PWR8-NEXT: addi r1, r1, 112
; AIX64-PWR8-NEXT: ld r0, 16(r1)
; AIX64-PWR8-NEXT: mtlr r0
; AIX64-PWR8-NEXT: blr
;
; PPC-PWR8-LABEL: nand:
; PPC-PWR8: # %bb.0: # %entry
; PPC-PWR8-NEXT: mflr r0
; PPC-PWR8-NEXT: stwu r1, -80(r1)
; PPC-PWR8-NEXT: stw r0, 84(r1)
; PPC-PWR8-NEXT: .cfi_def_cfa_offset 80
; PPC-PWR8-NEXT: .cfi_offset lr, 4
; PPC-PWR8-NEXT: .cfi_offset r24, -32
; PPC-PWR8-NEXT: .cfi_offset r25, -28
; PPC-PWR8-NEXT: .cfi_offset r26, -24
; PPC-PWR8-NEXT: .cfi_offset r27, -20
; PPC-PWR8-NEXT: .cfi_offset r28, -16
; PPC-PWR8-NEXT: .cfi_offset r29, -12
; PPC-PWR8-NEXT: .cfi_offset r30, -8
; PPC-PWR8-NEXT: stw r26, 56(r1) # 4-byte Folded Spill
; PPC-PWR8-NEXT: stw r27, 60(r1) # 4-byte Folded Spill
; PPC-PWR8-NEXT: mr r27, r5
; PPC-PWR8-NEXT: mr r26, r3
; PPC-PWR8-NEXT: lwz r5, 8(r3)
; PPC-PWR8-NEXT: lwz r4, 4(r3)
; PPC-PWR8-NEXT: stw r28, 64(r1) # 4-byte Folded Spill
; PPC-PWR8-NEXT: mr r28, r6
; PPC-PWR8-NEXT: lwz r6, 12(r3)
; PPC-PWR8-NEXT: lwz r3, 0(r3)
; PPC-PWR8-NEXT: stw r24, 48(r1) # 4-byte Folded Spill
; PPC-PWR8-NEXT: addi r24, r1, 16
; PPC-PWR8-NEXT: stw r25, 52(r1) # 4-byte Folded Spill
; PPC-PWR8-NEXT: stw r29, 68(r1) # 4-byte Folded Spill
; PPC-PWR8-NEXT: mr r29, r7
; PPC-PWR8-NEXT: addi r25, r1, 32
; PPC-PWR8-NEXT: stw r30, 72(r1) # 4-byte Folded Spill
; PPC-PWR8-NEXT: mr r30, r8
; PPC-PWR8-NEXT: .p2align 4
; PPC-PWR8-NEXT: .LBB6_1: # %atomicrmw.start
; PPC-PWR8-NEXT: #
; PPC-PWR8-NEXT: stw r3, 32(r1)
; PPC-PWR8-NEXT: stw r4, 36(r1)
; PPC-PWR8-NEXT: nand r7, r5, r29
; PPC-PWR8-NEXT: nand r8, r6, r30
; PPC-PWR8-NEXT: nand r3, r3, r27
; PPC-PWR8-NEXT: nand r4, r4, r28
; PPC-PWR8-NEXT: stw r5, 40(r1)
; PPC-PWR8-NEXT: stw r6, 44(r1)
; PPC-PWR8-NEXT: mr r5, r25
; PPC-PWR8-NEXT: mr r6, r24
; PPC-PWR8-NEXT: stw r8, 28(r1)
; PPC-PWR8-NEXT: stw r7, 24(r1)
; PPC-PWR8-NEXT: li r7, 5
; PPC-PWR8-NEXT: li r8, 5
; PPC-PWR8-NEXT: stw r4, 20(r1)
; PPC-PWR8-NEXT: stw r3, 16(r1)
; PPC-PWR8-NEXT: li r3, 16
; PPC-PWR8-NEXT: mr r4, r26
; PPC-PWR8-NEXT: bl __atomic_compare_exchange
; PPC-PWR8-NEXT: cmplwi r3, 0
; PPC-PWR8-NEXT: lwz r6, 44(r1)
; PPC-PWR8-NEXT: lwz r5, 40(r1)
; PPC-PWR8-NEXT: lwz r4, 36(r1)
; PPC-PWR8-NEXT: lwz r3, 32(r1)
; PPC-PWR8-NEXT: beq cr0, .LBB6_1
; PPC-PWR8-NEXT: # %bb.2: # %atomicrmw.end
; PPC-PWR8-NEXT: lwz r30, 72(r1) # 4-byte Folded Reload
; PPC-PWR8-NEXT: lwz r29, 68(r1) # 4-byte Folded Reload
; PPC-PWR8-NEXT: lwz r28, 64(r1) # 4-byte Folded Reload
; PPC-PWR8-NEXT: lwz r27, 60(r1) # 4-byte Folded Reload
; PPC-PWR8-NEXT: lwz r26, 56(r1) # 4-byte Folded Reload
; PPC-PWR8-NEXT: lwz r25, 52(r1) # 4-byte Folded Reload
; PPC-PWR8-NEXT: lwz r24, 48(r1) # 4-byte Folded Reload
; PPC-PWR8-NEXT: lwz r0, 84(r1)
; PPC-PWR8-NEXT: addi r1, r1, 80
; PPC-PWR8-NEXT: mtlr r0
; PPC-PWR8-NEXT: blr
entry:
%0 = atomicrmw nand ptr %a, i128 %x seq_cst, align 16
ret i128 %0
}
;; CmpXchg
define i128 @cas_weak_acquire_acquire(ptr %a, i128 %cmp, i128 %new) {
; CHECK-LABEL: cas_weak_acquire_acquire:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: .LBB7_1: # %entry
; CHECK-NEXT: #
; CHECK-NEXT: lqarx r8, 0, r3
; CHECK-NEXT: xor r11, r9, r5
; CHECK-NEXT: xor r10, r8, r4
; CHECK-NEXT: or. r11, r11, r10
; CHECK-NEXT: bne cr0, .LBB7_3
; CHECK-NEXT: # %bb.2: # %entry
; CHECK-NEXT: #
; CHECK-NEXT: mr r11, r7
; CHECK-NEXT: mr r10, r6
; CHECK-NEXT: stqcx. r10, 0, r3
; CHECK-NEXT: bne cr0, .LBB7_1
; CHECK-NEXT: b .LBB7_4
; CHECK-NEXT: .LBB7_3: # %entry
; CHECK-NEXT: stqcx. r8, 0, r3
; CHECK-NEXT: .LBB7_4: # %entry
; CHECK-NEXT: lwsync
; CHECK-NEXT: mr r3, r8
; CHECK-NEXT: mr r4, r9
; CHECK-NEXT: blr
;
; PWR7-LABEL: cas_weak_acquire_acquire:
; PWR7: # %bb.0: # %entry
; PWR7-NEXT: mflr r0
; PWR7-NEXT: stdu r1, -128(r1)
; PWR7-NEXT: std r0, 144(r1)
; PWR7-NEXT: .cfi_def_cfa_offset 128
; PWR7-NEXT: .cfi_offset lr, 16
; PWR7-NEXT: std r5, 120(r1)
; PWR7-NEXT: std r4, 112(r1)
; PWR7-NEXT: addi r4, r1, 112
; PWR7-NEXT: mr r5, r6
; PWR7-NEXT: mr r6, r7
; PWR7-NEXT: li r7, 2
; PWR7-NEXT: li r8, 2
; PWR7-NEXT: bl __atomic_compare_exchange_16
; PWR7-NEXT: nop
; PWR7-NEXT: ld r4, 120(r1)
; PWR7-NEXT: ld r3, 112(r1)
; PWR7-NEXT: addi r1, r1, 128
; PWR7-NEXT: ld r0, 16(r1)
; PWR7-NEXT: mtlr r0
; PWR7-NEXT: blr
;
; LE-PWR8-LABEL: cas_weak_acquire_acquire:
; LE-PWR8: # %bb.0: # %entry
; LE-PWR8-NEXT: .LBB7_1: # %entry
; LE-PWR8-NEXT: #
; LE-PWR8-NEXT: lqarx r8, 0, r3
; LE-PWR8-NEXT: xor r11, r9, r4
; LE-PWR8-NEXT: xor r10, r8, r5
; LE-PWR8-NEXT: or. r11, r11, r10
; LE-PWR8-NEXT: bne cr0, .LBB7_3
; LE-PWR8-NEXT: # %bb.2: # %entry
; LE-PWR8-NEXT: #
; LE-PWR8-NEXT: mr r11, r6
; LE-PWR8-NEXT: mr r10, r7
; LE-PWR8-NEXT: stqcx. r10, 0, r3
; LE-PWR8-NEXT: bne cr0, .LBB7_1
; LE-PWR8-NEXT: b .LBB7_4
; LE-PWR8-NEXT: .LBB7_3: # %entry
; LE-PWR8-NEXT: stqcx. r8, 0, r3
; LE-PWR8-NEXT: .LBB7_4: # %entry
; LE-PWR8-NEXT: lwsync
; LE-PWR8-NEXT: mr r3, r9
; LE-PWR8-NEXT: mr r4, r8
; LE-PWR8-NEXT: blr
;
; AIX64-PWR8-LABEL: cas_weak_acquire_acquire:
; AIX64-PWR8: # %bb.0: # %entry
; AIX64-PWR8-NEXT: mflr r0
; AIX64-PWR8-NEXT: stdu r1, -128(r1)
; AIX64-PWR8-NEXT: std r0, 144(r1)
; AIX64-PWR8-NEXT: std r5, 120(r1)
; AIX64-PWR8-NEXT: std r4, 112(r1)
; AIX64-PWR8-NEXT: addi r4, r1, 112
; AIX64-PWR8-NEXT: mr r5, r6
; AIX64-PWR8-NEXT: mr r6, r7
; AIX64-PWR8-NEXT: li r7, 2
; AIX64-PWR8-NEXT: li r8, 2
; AIX64-PWR8-NEXT: bl .__atomic_compare_exchange_16[PR]
; AIX64-PWR8-NEXT: nop
; AIX64-PWR8-NEXT: ld r4, 120(r1)
; AIX64-PWR8-NEXT: ld r3, 112(r1)
; AIX64-PWR8-NEXT: addi r1, r1, 128
; AIX64-PWR8-NEXT: ld r0, 16(r1)
; AIX64-PWR8-NEXT: mtlr r0
; AIX64-PWR8-NEXT: blr
;
; PPC-PWR8-LABEL: cas_weak_acquire_acquire:
; PPC-PWR8: # %bb.0: # %entry
; PPC-PWR8-NEXT: mflr r0
; PPC-PWR8-NEXT: stwu r1, -48(r1)
; PPC-PWR8-NEXT: stw r0, 52(r1)
; PPC-PWR8-NEXT: .cfi_def_cfa_offset 48
; PPC-PWR8-NEXT: .cfi_offset lr, 4
; PPC-PWR8-NEXT: mr r4, r3
; PPC-PWR8-NEXT: lwz r3, 56(r1)
; PPC-PWR8-NEXT: lwz r11, 60(r1)
; PPC-PWR8-NEXT: stw r8, 44(r1)
; PPC-PWR8-NEXT: stw r7, 40(r1)
; PPC-PWR8-NEXT: li r7, 2
; PPC-PWR8-NEXT: li r8, 2
; PPC-PWR8-NEXT: stw r6, 36(r1)
; PPC-PWR8-NEXT: stw r5, 32(r1)
; PPC-PWR8-NEXT: addi r5, r1, 32
; PPC-PWR8-NEXT: addi r6, r1, 16
; PPC-PWR8-NEXT: stw r3, 24(r1)
; PPC-PWR8-NEXT: li r3, 16
; PPC-PWR8-NEXT: stw r11, 28(r1)
; PPC-PWR8-NEXT: stw r10, 20(r1)
; PPC-PWR8-NEXT: stw r9, 16(r1)
; PPC-PWR8-NEXT: bl __atomic_compare_exchange
; PPC-PWR8-NEXT: lwz r6, 44(r1)
; PPC-PWR8-NEXT: lwz r5, 40(r1)
; PPC-PWR8-NEXT: lwz r4, 36(r1)
; PPC-PWR8-NEXT: lwz r3, 32(r1)
; PPC-PWR8-NEXT: lwz r0, 52(r1)
; PPC-PWR8-NEXT: addi r1, r1, 48
; PPC-PWR8-NEXT: mtlr r0
; PPC-PWR8-NEXT: blr
entry:
%0 = cmpxchg weak ptr %a, i128 %cmp, i128 %new acquire acquire
%1 = extractvalue { i128, i1 } %0, 0
ret i128 %1
}
define i128 @cas_weak_release_monotonic(ptr %a, i128 %cmp, i128 %new) {
; CHECK-LABEL: cas_weak_release_monotonic:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: lwsync
; CHECK-NEXT: .LBB8_1: # %entry
; CHECK-NEXT: #
; CHECK-NEXT: lqarx r8, 0, r3
; CHECK-NEXT: xor r11, r9, r5
; CHECK-NEXT: xor r10, r8, r4
; CHECK-NEXT: or. r11, r11, r10
; CHECK-NEXT: bne cr0, .LBB8_3
; CHECK-NEXT: # %bb.2: # %entry
; CHECK-NEXT: #
; CHECK-NEXT: mr r11, r7
; CHECK-NEXT: mr r10, r6
; CHECK-NEXT: stqcx. r10, 0, r3
; CHECK-NEXT: bne cr0, .LBB8_1
; CHECK-NEXT: b .LBB8_4
; CHECK-NEXT: .LBB8_3: # %entry
; CHECK-NEXT: stqcx. r8, 0, r3
; CHECK-NEXT: .LBB8_4: # %entry
; CHECK-NEXT: mr r3, r8
; CHECK-NEXT: mr r4, r9
; CHECK-NEXT: blr
;
; PWR7-LABEL: cas_weak_release_monotonic:
; PWR7: # %bb.0: # %entry
; PWR7-NEXT: mflr r0
; PWR7-NEXT: stdu r1, -128(r1)
; PWR7-NEXT: std r0, 144(r1)
; PWR7-NEXT: .cfi_def_cfa_offset 128
; PWR7-NEXT: .cfi_offset lr, 16
; PWR7-NEXT: std r5, 120(r1)
; PWR7-NEXT: std r4, 112(r1)
; PWR7-NEXT: addi r4, r1, 112
; PWR7-NEXT: mr r5, r6
; PWR7-NEXT: mr r6, r7
; PWR7-NEXT: li r7, 3
; PWR7-NEXT: li r8, 0
; PWR7-NEXT: bl __atomic_compare_exchange_16
; PWR7-NEXT: nop
; PWR7-NEXT: ld r4, 120(r1)
; PWR7-NEXT: ld r3, 112(r1)
; PWR7-NEXT: addi r1, r1, 128
; PWR7-NEXT: ld r0, 16(r1)
; PWR7-NEXT: mtlr r0
; PWR7-NEXT: blr
;
; LE-PWR8-LABEL: cas_weak_release_monotonic:
; LE-PWR8: # %bb.0: # %entry
; LE-PWR8-NEXT: lwsync
; LE-PWR8-NEXT: .LBB8_1: # %entry
; LE-PWR8-NEXT: #
; LE-PWR8-NEXT: lqarx r8, 0, r3
; LE-PWR8-NEXT: xor r11, r9, r4
; LE-PWR8-NEXT: xor r10, r8, r5
; LE-PWR8-NEXT: or. r11, r11, r10
; LE-PWR8-NEXT: bne cr0, .LBB8_3
; LE-PWR8-NEXT: # %bb.2: # %entry
; LE-PWR8-NEXT: #
; LE-PWR8-NEXT: mr r11, r6
; LE-PWR8-NEXT: mr r10, r7
; LE-PWR8-NEXT: stqcx. r10, 0, r3
; LE-PWR8-NEXT: bne cr0, .LBB8_1
; LE-PWR8-NEXT: b .LBB8_4
; LE-PWR8-NEXT: .LBB8_3: # %entry
; LE-PWR8-NEXT: stqcx. r8, 0, r3
; LE-PWR8-NEXT: .LBB8_4: # %entry
; LE-PWR8-NEXT: mr r3, r9
; LE-PWR8-NEXT: mr r4, r8
; LE-PWR8-NEXT: blr
;
; AIX64-PWR8-LABEL: cas_weak_release_monotonic:
; AIX64-PWR8: # %bb.0: # %entry
; AIX64-PWR8-NEXT: mflr r0
; AIX64-PWR8-NEXT: stdu r1, -128(r1)
; AIX64-PWR8-NEXT: std r0, 144(r1)
; AIX64-PWR8-NEXT: std r5, 120(r1)
; AIX64-PWR8-NEXT: std r4, 112(r1)
; AIX64-PWR8-NEXT: addi r4, r1, 112
; AIX64-PWR8-NEXT: mr r5, r6
; AIX64-PWR8-NEXT: mr r6, r7
; AIX64-PWR8-NEXT: li r7, 3
; AIX64-PWR8-NEXT: li r8, 0
; AIX64-PWR8-NEXT: bl .__atomic_compare_exchange_16[PR]
; AIX64-PWR8-NEXT: nop
; AIX64-PWR8-NEXT: ld r4, 120(r1)
; AIX64-PWR8-NEXT: ld r3, 112(r1)
; AIX64-PWR8-NEXT: addi r1, r1, 128
; AIX64-PWR8-NEXT: ld r0, 16(r1)
; AIX64-PWR8-NEXT: mtlr r0
; AIX64-PWR8-NEXT: blr
;
; PPC-PWR8-LABEL: cas_weak_release_monotonic:
; PPC-PWR8: # %bb.0: # %entry
; PPC-PWR8-NEXT: mflr r0
; PPC-PWR8-NEXT: stwu r1, -48(r1)
; PPC-PWR8-NEXT: stw r0, 52(r1)
; PPC-PWR8-NEXT: .cfi_def_cfa_offset 48
; PPC-PWR8-NEXT: .cfi_offset lr, 4
; PPC-PWR8-NEXT: mr r4, r3
; PPC-PWR8-NEXT: lwz r3, 56(r1)
; PPC-PWR8-NEXT: lwz r11, 60(r1)
; PPC-PWR8-NEXT: stw r8, 44(r1)
; PPC-PWR8-NEXT: stw r7, 40(r1)
; PPC-PWR8-NEXT: li r7, 3
; PPC-PWR8-NEXT: li r8, 0
; PPC-PWR8-NEXT: stw r6, 36(r1)
; PPC-PWR8-NEXT: stw r5, 32(r1)
; PPC-PWR8-NEXT: addi r5, r1, 32
; PPC-PWR8-NEXT: addi r6, r1, 16
; PPC-PWR8-NEXT: stw r3, 24(r1)
; PPC-PWR8-NEXT: li r3, 16
; PPC-PWR8-NEXT: stw r11, 28(r1)
; PPC-PWR8-NEXT: stw r10, 20(r1)
; PPC-PWR8-NEXT: stw r9, 16(r1)
; PPC-PWR8-NEXT: bl __atomic_compare_exchange
; PPC-PWR8-NEXT: lwz r6, 44(r1)
; PPC-PWR8-NEXT: lwz r5, 40(r1)
; PPC-PWR8-NEXT: lwz r4, 36(r1)
; PPC-PWR8-NEXT: lwz r3, 32(r1)
; PPC-PWR8-NEXT: lwz r0, 52(r1)
; PPC-PWR8-NEXT: addi r1, r1, 48
; PPC-PWR8-NEXT: mtlr r0
; PPC-PWR8-NEXT: blr
entry:
%0 = cmpxchg weak ptr %a, i128 %cmp, i128 %new release monotonic
%1 = extractvalue { i128, i1 } %0, 0
ret i128 %1
}
define i128 @cas_sc_sc(ptr %a, i128 %cmp, i128 %new) {
; CHECK-LABEL: cas_sc_sc:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: sync
; CHECK-NEXT: .LBB9_1: # %entry
; CHECK-NEXT: #
; CHECK-NEXT: lqarx r8, 0, r3
; CHECK-NEXT: xor r11, r9, r5
; CHECK-NEXT: xor r10, r8, r4
; CHECK-NEXT: or. r11, r11, r10
; CHECK-NEXT: bne cr0, .LBB9_3
; CHECK-NEXT: # %bb.2: # %entry
; CHECK-NEXT: #
; CHECK-NEXT: mr r11, r7
; CHECK-NEXT: mr r10, r6
; CHECK-NEXT: stqcx. r10, 0, r3
; CHECK-NEXT: bne cr0, .LBB9_1
; CHECK-NEXT: b .LBB9_4
; CHECK-NEXT: .LBB9_3: # %entry
; CHECK-NEXT: stqcx. r8, 0, r3
; CHECK-NEXT: .LBB9_4: # %entry
; CHECK-NEXT: lwsync
; CHECK-NEXT: mr r3, r8
; CHECK-NEXT: mr r4, r9
; CHECK-NEXT: blr
;
; PWR7-LABEL: cas_sc_sc:
; PWR7: # %bb.0: # %entry
; PWR7-NEXT: mflr r0
; PWR7-NEXT: stdu r1, -128(r1)
; PWR7-NEXT: std r0, 144(r1)
; PWR7-NEXT: .cfi_def_cfa_offset 128
; PWR7-NEXT: .cfi_offset lr, 16
; PWR7-NEXT: std r5, 120(r1)
; PWR7-NEXT: std r4, 112(r1)
; PWR7-NEXT: addi r4, r1, 112
; PWR7-NEXT: mr r5, r6
; PWR7-NEXT: mr r6, r7
; PWR7-NEXT: li r7, 5
; PWR7-NEXT: li r8, 5
; PWR7-NEXT: bl __atomic_compare_exchange_16
; PWR7-NEXT: nop
; PWR7-NEXT: ld r4, 120(r1)
; PWR7-NEXT: ld r3, 112(r1)
; PWR7-NEXT: addi r1, r1, 128
; PWR7-NEXT: ld r0, 16(r1)
; PWR7-NEXT: mtlr r0
; PWR7-NEXT: blr
;
; LE-PWR8-LABEL: cas_sc_sc:
; LE-PWR8: # %bb.0: # %entry
; LE-PWR8-NEXT: sync
; LE-PWR8-NEXT: .LBB9_1: # %entry
; LE-PWR8-NEXT: #
; LE-PWR8-NEXT: lqarx r8, 0, r3
; LE-PWR8-NEXT: xor r11, r9, r4
; LE-PWR8-NEXT: xor r10, r8, r5
; LE-PWR8-NEXT: or. r11, r11, r10
; LE-PWR8-NEXT: bne cr0, .LBB9_3
; LE-PWR8-NEXT: # %bb.2: # %entry
; LE-PWR8-NEXT: #
; LE-PWR8-NEXT: mr r11, r6
; LE-PWR8-NEXT: mr r10, r7
; LE-PWR8-NEXT: stqcx. r10, 0, r3
; LE-PWR8-NEXT: bne cr0, .LBB9_1
; LE-PWR8-NEXT: b .LBB9_4
; LE-PWR8-NEXT: .LBB9_3: # %entry
; LE-PWR8-NEXT: stqcx. r8, 0, r3
; LE-PWR8-NEXT: .LBB9_4: # %entry
; LE-PWR8-NEXT: lwsync
; LE-PWR8-NEXT: mr r3, r9
; LE-PWR8-NEXT: mr r4, r8
; LE-PWR8-NEXT: blr
;
; AIX64-PWR8-LABEL: cas_sc_sc:
; AIX64-PWR8: # %bb.0: # %entry
; AIX64-PWR8-NEXT: mflr r0
; AIX64-PWR8-NEXT: stdu r1, -128(r1)
; AIX64-PWR8-NEXT: std r0, 144(r1)
; AIX64-PWR8-NEXT: std r5, 120(r1)
; AIX64-PWR8-NEXT: std r4, 112(r1)
; AIX64-PWR8-NEXT: addi r4, r1, 112
; AIX64-PWR8-NEXT: mr r5, r6
; AIX64-PWR8-NEXT: mr r6, r7
; AIX64-PWR8-NEXT: li r7, 5
; AIX64-PWR8-NEXT: li r8, 5
; AIX64-PWR8-NEXT: bl .__atomic_compare_exchange_16[PR]
; AIX64-PWR8-NEXT: nop
; AIX64-PWR8-NEXT: ld r4, 120(r1)
; AIX64-PWR8-NEXT: ld r3, 112(r1)
; AIX64-PWR8-NEXT: addi r1, r1, 128
; AIX64-PWR8-NEXT: ld r0, 16(r1)
; AIX64-PWR8-NEXT: mtlr r0
; AIX64-PWR8-NEXT: blr
;
; PPC-PWR8-LABEL: cas_sc_sc:
; PPC-PWR8: # %bb.0: # %entry
; PPC-PWR8-NEXT: mflr r0
; PPC-PWR8-NEXT: stwu r1, -48(r1)
; PPC-PWR8-NEXT: stw r0, 52(r1)
; PPC-PWR8-NEXT: .cfi_def_cfa_offset 48
; PPC-PWR8-NEXT: .cfi_offset lr, 4
; PPC-PWR8-NEXT: mr r4, r3
; PPC-PWR8-NEXT: lwz r3, 56(r1)
; PPC-PWR8-NEXT: lwz r11, 60(r1)
; PPC-PWR8-NEXT: stw r8, 44(r1)
; PPC-PWR8-NEXT: stw r7, 40(r1)
; PPC-PWR8-NEXT: li r7, 5
; PPC-PWR8-NEXT: li r8, 5
; PPC-PWR8-NEXT: stw r6, 36(r1)
; PPC-PWR8-NEXT: stw r5, 32(r1)
; PPC-PWR8-NEXT: addi r5, r1, 32
; PPC-PWR8-NEXT: addi r6, r1, 16
; PPC-PWR8-NEXT: stw r3, 24(r1)
; PPC-PWR8-NEXT: li r3, 16
; PPC-PWR8-NEXT: stw r11, 28(r1)
; PPC-PWR8-NEXT: stw r10, 20(r1)
; PPC-PWR8-NEXT: stw r9, 16(r1)
; PPC-PWR8-NEXT: bl __atomic_compare_exchange
; PPC-PWR8-NEXT: lwz r6, 44(r1)
; PPC-PWR8-NEXT: lwz r5, 40(r1)
; PPC-PWR8-NEXT: lwz r4, 36(r1)
; PPC-PWR8-NEXT: lwz r3, 32(r1)
; PPC-PWR8-NEXT: lwz r0, 52(r1)
; PPC-PWR8-NEXT: addi r1, r1, 48
; PPC-PWR8-NEXT: mtlr r0
; PPC-PWR8-NEXT: blr
entry:
%0 = cmpxchg ptr %a, i128 %cmp, i128 %new seq_cst seq_cst
%1 = extractvalue { i128, i1 } %0, 0
ret i128 %1
}
define i128 @cas_acqrel_acquire(ptr %a, i128 %cmp, i128 %new) {
; CHECK-LABEL: cas_acqrel_acquire:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: lwsync
; CHECK-NEXT: .LBB10_1: # %entry
; CHECK-NEXT: #
; CHECK-NEXT: lqarx r8, 0, r3
; CHECK-NEXT: xor r11, r9, r5
; CHECK-NEXT: xor r10, r8, r4
; CHECK-NEXT: or. r11, r11, r10
; CHECK-NEXT: bne cr0, .LBB10_3
; CHECK-NEXT: # %bb.2: # %entry
; CHECK-NEXT: #
; CHECK-NEXT: mr r11, r7
; CHECK-NEXT: mr r10, r6
; CHECK-NEXT: stqcx. r10, 0, r3
; CHECK-NEXT: bne cr0, .LBB10_1
; CHECK-NEXT: b .LBB10_4
; CHECK-NEXT: .LBB10_3: # %entry
; CHECK-NEXT: stqcx. r8, 0, r3
; CHECK-NEXT: .LBB10_4: # %entry
; CHECK-NEXT: lwsync
; CHECK-NEXT: mr r3, r8
; CHECK-NEXT: mr r4, r9
; CHECK-NEXT: blr
;
; PWR7-LABEL: cas_acqrel_acquire:
; PWR7: # %bb.0: # %entry
; PWR7-NEXT: mflr r0
; PWR7-NEXT: stdu r1, -128(r1)
; PWR7-NEXT: std r0, 144(r1)
; PWR7-NEXT: .cfi_def_cfa_offset 128
; PWR7-NEXT: .cfi_offset lr, 16
; PWR7-NEXT: std r5, 120(r1)
; PWR7-NEXT: std r4, 112(r1)
; PWR7-NEXT: addi r4, r1, 112
; PWR7-NEXT: mr r5, r6
; PWR7-NEXT: mr r6, r7
; PWR7-NEXT: li r7, 4
; PWR7-NEXT: li r8, 2
; PWR7-NEXT: bl __atomic_compare_exchange_16
; PWR7-NEXT: nop
; PWR7-NEXT: ld r4, 120(r1)
; PWR7-NEXT: ld r3, 112(r1)
; PWR7-NEXT: addi r1, r1, 128
; PWR7-NEXT: ld r0, 16(r1)
; PWR7-NEXT: mtlr r0
; PWR7-NEXT: blr
;
; LE-PWR8-LABEL: cas_acqrel_acquire:
; LE-PWR8: # %bb.0: # %entry
; LE-PWR8-NEXT: lwsync
; LE-PWR8-NEXT: .LBB10_1: # %entry
; LE-PWR8-NEXT: #
; LE-PWR8-NEXT: lqarx r8, 0, r3
; LE-PWR8-NEXT: xor r11, r9, r4
; LE-PWR8-NEXT: xor r10, r8, r5
; LE-PWR8-NEXT: or. r11, r11, r10
; LE-PWR8-NEXT: bne cr0, .LBB10_3
; LE-PWR8-NEXT: # %bb.2: # %entry
; LE-PWR8-NEXT: #
; LE-PWR8-NEXT: mr r11, r6
; LE-PWR8-NEXT: mr r10, r7
; LE-PWR8-NEXT: stqcx. r10, 0, r3
; LE-PWR8-NEXT: bne cr0, .LBB10_1
; LE-PWR8-NEXT: b .LBB10_4
; LE-PWR8-NEXT: .LBB10_3: # %entry
; LE-PWR8-NEXT: stqcx. r8, 0, r3
; LE-PWR8-NEXT: .LBB10_4: # %entry
; LE-PWR8-NEXT: lwsync
; LE-PWR8-NEXT: mr r3, r9
; LE-PWR8-NEXT: mr r4, r8
; LE-PWR8-NEXT: blr
;
; AIX64-PWR8-LABEL: cas_acqrel_acquire:
; AIX64-PWR8: # %bb.0: # %entry
; AIX64-PWR8-NEXT: mflr r0
; AIX64-PWR8-NEXT: stdu r1, -128(r1)
; AIX64-PWR8-NEXT: std r0, 144(r1)
; AIX64-PWR8-NEXT: std r5, 120(r1)
; AIX64-PWR8-NEXT: std r4, 112(r1)
; AIX64-PWR8-NEXT: addi r4, r1, 112
; AIX64-PWR8-NEXT: mr r5, r6
; AIX64-PWR8-NEXT: mr r6, r7
; AIX64-PWR8-NEXT: li r7, 4
; AIX64-PWR8-NEXT: li r8, 2
; AIX64-PWR8-NEXT: bl .__atomic_compare_exchange_16[PR]
; AIX64-PWR8-NEXT: nop
; AIX64-PWR8-NEXT: ld r4, 120(r1)
; AIX64-PWR8-NEXT: ld r3, 112(r1)
; AIX64-PWR8-NEXT: addi r1, r1, 128
; AIX64-PWR8-NEXT: ld r0, 16(r1)
; AIX64-PWR8-NEXT: mtlr r0
; AIX64-PWR8-NEXT: blr
;
; PPC-PWR8-LABEL: cas_acqrel_acquire:
; PPC-PWR8: # %bb.0: # %entry
; PPC-PWR8-NEXT: mflr r0
; PPC-PWR8-NEXT: stwu r1, -48(r1)
; PPC-PWR8-NEXT: stw r0, 52(r1)
; PPC-PWR8-NEXT: .cfi_def_cfa_offset 48
; PPC-PWR8-NEXT: .cfi_offset lr, 4
; PPC-PWR8-NEXT: mr r4, r3
; PPC-PWR8-NEXT: lwz r3, 56(r1)
; PPC-PWR8-NEXT: lwz r11, 60(r1)
; PPC-PWR8-NEXT: stw r8, 44(r1)
; PPC-PWR8-NEXT: stw r7, 40(r1)
; PPC-PWR8-NEXT: li r7, 4
; PPC-PWR8-NEXT: li r8, 2
; PPC-PWR8-NEXT: stw r6, 36(r1)
; PPC-PWR8-NEXT: stw r5, 32(r1)
; PPC-PWR8-NEXT: addi r5, r1, 32
; PPC-PWR8-NEXT: addi r6, r1, 16
; PPC-PWR8-NEXT: stw r3, 24(r1)
; PPC-PWR8-NEXT: li r3, 16
; PPC-PWR8-NEXT: stw r11, 28(r1)
; PPC-PWR8-NEXT: stw r10, 20(r1)
; PPC-PWR8-NEXT: stw r9, 16(r1)
; PPC-PWR8-NEXT: bl __atomic_compare_exchange
; PPC-PWR8-NEXT: lwz r6, 44(r1)
; PPC-PWR8-NEXT: lwz r5, 40(r1)
; PPC-PWR8-NEXT: lwz r4, 36(r1)
; PPC-PWR8-NEXT: lwz r3, 32(r1)
; PPC-PWR8-NEXT: lwz r0, 52(r1)
; PPC-PWR8-NEXT: addi r1, r1, 48
; PPC-PWR8-NEXT: mtlr r0
; PPC-PWR8-NEXT: blr
entry:
%0 = cmpxchg ptr %a, i128 %cmp, i128 %new acq_rel acquire
%1 = extractvalue { i128, i1 } %0, 0
ret i128 %1
}
define i1 @cas_acqrel_acquire_check_succ(ptr %a, i128 %cmp, i128 %new) {
; CHECK-LABEL: cas_acqrel_acquire_check_succ:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: lwsync
; CHECK-NEXT: .LBB11_1: # %entry
; CHECK-NEXT: #
; CHECK-NEXT: lqarx r8, 0, r3
; CHECK-NEXT: xor r11, r9, r5
; CHECK-NEXT: xor r10, r8, r4
; CHECK-NEXT: or. r11, r11, r10
; CHECK-NEXT: bne cr0, .LBB11_3
; CHECK-NEXT: # %bb.2: # %entry
; CHECK-NEXT: #
; CHECK-NEXT: mr r11, r7
; CHECK-NEXT: mr r10, r6
; CHECK-NEXT: stqcx. r10, 0, r3
; CHECK-NEXT: bne cr0, .LBB11_1
; CHECK-NEXT: b .LBB11_4
; CHECK-NEXT: .LBB11_3: # %entry
; CHECK-NEXT: stqcx. r8, 0, r3
; CHECK-NEXT: .LBB11_4: # %entry
; CHECK-NEXT: lwsync
; CHECK-NEXT: xor r3, r4, r8
; CHECK-NEXT: xor r4, r5, r9
; CHECK-NEXT: or r3, r4, r3
; CHECK-NEXT: cntlzd r3, r3
; CHECK-NEXT: rldicl r3, r3, 58, 63
; CHECK-NEXT: blr
;
; PWR7-LABEL: cas_acqrel_acquire_check_succ:
; PWR7: # %bb.0: # %entry
; PWR7-NEXT: mflr r0
; PWR7-NEXT: stdu r1, -128(r1)
; PWR7-NEXT: std r0, 144(r1)
; PWR7-NEXT: .cfi_def_cfa_offset 128
; PWR7-NEXT: .cfi_offset lr, 16
; PWR7-NEXT: std r5, 120(r1)
; PWR7-NEXT: std r4, 112(r1)
; PWR7-NEXT: addi r4, r1, 112
; PWR7-NEXT: mr r5, r6
; PWR7-NEXT: mr r6, r7
; PWR7-NEXT: li r7, 4
; PWR7-NEXT: li r8, 2
; PWR7-NEXT: bl __atomic_compare_exchange_16
; PWR7-NEXT: nop
; PWR7-NEXT: addi r1, r1, 128
; PWR7-NEXT: ld r0, 16(r1)
; PWR7-NEXT: mtlr r0
; PWR7-NEXT: blr
;
; LE-PWR8-LABEL: cas_acqrel_acquire_check_succ:
; LE-PWR8: # %bb.0: # %entry
; LE-PWR8-NEXT: lwsync
; LE-PWR8-NEXT: .LBB11_1: # %entry
; LE-PWR8-NEXT: #
; LE-PWR8-NEXT: lqarx r8, 0, r3
; LE-PWR8-NEXT: xor r11, r9, r4
; LE-PWR8-NEXT: xor r10, r8, r5
; LE-PWR8-NEXT: or. r11, r11, r10
; LE-PWR8-NEXT: bne cr0, .LBB11_3
; LE-PWR8-NEXT: # %bb.2: # %entry
; LE-PWR8-NEXT: #
; LE-PWR8-NEXT: mr r11, r6
; LE-PWR8-NEXT: mr r10, r7
; LE-PWR8-NEXT: stqcx. r10, 0, r3
; LE-PWR8-NEXT: bne cr0, .LBB11_1
; LE-PWR8-NEXT: b .LBB11_4
; LE-PWR8-NEXT: .LBB11_3: # %entry
; LE-PWR8-NEXT: stqcx. r8, 0, r3
; LE-PWR8-NEXT: .LBB11_4: # %entry
; LE-PWR8-NEXT: lwsync
; LE-PWR8-NEXT: xor r3, r5, r8
; LE-PWR8-NEXT: xor r4, r4, r9
; LE-PWR8-NEXT: or r3, r4, r3
; LE-PWR8-NEXT: cntlzd r3, r3
; LE-PWR8-NEXT: rldicl r3, r3, 58, 63
; LE-PWR8-NEXT: blr
;
; AIX64-PWR8-LABEL: cas_acqrel_acquire_check_succ:
; AIX64-PWR8: # %bb.0: # %entry
; AIX64-PWR8-NEXT: mflr r0
; AIX64-PWR8-NEXT: stdu r1, -128(r1)
; AIX64-PWR8-NEXT: std r0, 144(r1)
; AIX64-PWR8-NEXT: std r5, 120(r1)
; AIX64-PWR8-NEXT: std r4, 112(r1)
; AIX64-PWR8-NEXT: addi r4, r1, 112
; AIX64-PWR8-NEXT: mr r5, r6
; AIX64-PWR8-NEXT: mr r6, r7
; AIX64-PWR8-NEXT: li r7, 4
; AIX64-PWR8-NEXT: li r8, 2
; AIX64-PWR8-NEXT: bl .__atomic_compare_exchange_16[PR]
; AIX64-PWR8-NEXT: nop
; AIX64-PWR8-NEXT: addi r1, r1, 128
; AIX64-PWR8-NEXT: ld r0, 16(r1)
; AIX64-PWR8-NEXT: mtlr r0
; AIX64-PWR8-NEXT: blr
;
; PPC-PWR8-LABEL: cas_acqrel_acquire_check_succ:
; PPC-PWR8: # %bb.0: # %entry
; PPC-PWR8-NEXT: mflr r0
; PPC-PWR8-NEXT: stwu r1, -48(r1)
; PPC-PWR8-NEXT: stw r0, 52(r1)
; PPC-PWR8-NEXT: .cfi_def_cfa_offset 48
; PPC-PWR8-NEXT: .cfi_offset lr, 4
; PPC-PWR8-NEXT: mr r4, r3
; PPC-PWR8-NEXT: lwz r3, 56(r1)
; PPC-PWR8-NEXT: lwz r11, 60(r1)
; PPC-PWR8-NEXT: stw r8, 44(r1)
; PPC-PWR8-NEXT: stw r7, 40(r1)
; PPC-PWR8-NEXT: li r7, 4
; PPC-PWR8-NEXT: li r8, 2
; PPC-PWR8-NEXT: stw r6, 36(r1)
; PPC-PWR8-NEXT: stw r5, 32(r1)
; PPC-PWR8-NEXT: addi r5, r1, 32
; PPC-PWR8-NEXT: addi r6, r1, 16
; PPC-PWR8-NEXT: stw r3, 24(r1)
; PPC-PWR8-NEXT: li r3, 16
; PPC-PWR8-NEXT: stw r11, 28(r1)
; PPC-PWR8-NEXT: stw r10, 20(r1)
; PPC-PWR8-NEXT: stw r9, 16(r1)
; PPC-PWR8-NEXT: bl __atomic_compare_exchange
; PPC-PWR8-NEXT: lwz r0, 52(r1)
; PPC-PWR8-NEXT: addi r1, r1, 48
; PPC-PWR8-NEXT: mtlr r0
; PPC-PWR8-NEXT: blr
entry:
%0 = cmpxchg ptr %a, i128 %cmp, i128 %new acq_rel acquire
%1 = extractvalue { i128, i1 } %0, 1
ret i1 %1
}