llvm-project/llvm/test/CodeGen/X86/atomic-idempotent.ll
Simon Pilgrim a7115d51be [X86] X86CallFrameOptimization - generalize slow push code path
Replace the explicit isAtom() || isSLM() test with the more general (and more specific) slowTwoMemOps() check to avoid the use of the PUSHrmm push from memory case.

This is actually very tricky to test in anything but quite complex code, but the atomic-idempotent.ll tests seem to be the most straightforward to use.

Differential Revision: https://reviews.llvm.org/D76239
2020-03-29 11:01:59 +01:00

640 lines
19 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,X64
; RUN: llc < %s -mtriple=i686-- -verify-machineinstrs -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,X86,X86-GENERIC,X86-SSE2
; RUN: llc < %s -mtriple=i686-- -verify-machineinstrs -mcpu=slm -mattr=-sse2 | FileCheck %s --check-prefixes=CHECK,X86,X86-GENERIC,X86-SLM
; RUN: llc < %s -mtriple=i686-- -verify-machineinstrs -mcpu=goldmont -mattr=-sse2 | FileCheck %s --check-prefixes=CHECK,X86,X86-GENERIC,X86-SLM
; RUN: llc < %s -mtriple=i686-- -verify-machineinstrs -mcpu=knl -mattr=-sse2 | FileCheck %s --check-prefixes=CHECK,X86,X86-GENERIC,X86-SLM
; RUN: llc < %s -mtriple=i686-- -verify-machineinstrs -mcpu=atom -mattr=-sse2 | FileCheck %s --check-prefixes=CHECK,X86,X86-ATOM
; On x86, an atomic rmw operation that does not modify the value in memory
; (such as atomic add 0) can be replaced by an mfence followed by a mov.
; This is explained (with the motivation for such an optimization) in
; http://www.hpl.hp.com/techreports/2012/HPL-2012-68.pdf
define i8 @add8(i8* %p) {
; X64-LABEL: add8:
; X64: # %bb.0:
; X64-NEXT: mfence
; X64-NEXT: movb (%rdi), %al
; X64-NEXT: retq
;
; X86-SSE2-LABEL: add8:
; X86-SSE2: # %bb.0:
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-SSE2-NEXT: mfence
; X86-SSE2-NEXT: movb (%eax), %al
; X86-SSE2-NEXT: retl
;
; X86-SLM-LABEL: add8:
; X86-SLM: # %bb.0:
; X86-SLM-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-SLM-NEXT: xorl %eax, %eax
; X86-SLM-NEXT: lock xaddb %al, (%ecx)
; X86-SLM-NEXT: # kill: def $al killed $al killed $eax
; X86-SLM-NEXT: retl
;
; X86-ATOM-LABEL: add8:
; X86-ATOM: # %bb.0:
; X86-ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-ATOM-NEXT: xorl %eax, %eax
; X86-ATOM-NEXT: lock xaddb %al, (%ecx)
; X86-ATOM-NEXT: # kill: def $al killed $al killed $eax
; X86-ATOM-NEXT: nop
; X86-ATOM-NEXT: nop
; X86-ATOM-NEXT: retl
%1 = atomicrmw add i8* %p, i8 0 monotonic
ret i8 %1
}
define i16 @or16(i16* %p) {
; X64-LABEL: or16:
; X64: # %bb.0:
; X64-NEXT: mfence
; X64-NEXT: movzwl (%rdi), %eax
; X64-NEXT: retq
;
; X86-SSE2-LABEL: or16:
; X86-SSE2: # %bb.0:
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-SSE2-NEXT: mfence
; X86-SSE2-NEXT: movzwl (%eax), %eax
; X86-SSE2-NEXT: retl
;
; X86-SLM-LABEL: or16:
; X86-SLM: # %bb.0:
; X86-SLM-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-SLM-NEXT: movzwl (%ecx), %eax
; X86-SLM-NEXT: .p2align 4, 0x90
; X86-SLM-NEXT: .LBB1_1: # %atomicrmw.start
; X86-SLM-NEXT: # =>This Inner Loop Header: Depth=1
; X86-SLM-NEXT: lock cmpxchgw %ax, (%ecx)
; X86-SLM-NEXT: jne .LBB1_1
; X86-SLM-NEXT: # %bb.2: # %atomicrmw.end
; X86-SLM-NEXT: retl
;
; X86-ATOM-LABEL: or16:
; X86-ATOM: # %bb.0:
; X86-ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-ATOM-NEXT: movzwl (%ecx), %eax
; X86-ATOM-NEXT: .p2align 4, 0x90
; X86-ATOM-NEXT: .LBB1_1: # %atomicrmw.start
; X86-ATOM-NEXT: # =>This Inner Loop Header: Depth=1
; X86-ATOM-NEXT: lock cmpxchgw %ax, (%ecx)
; X86-ATOM-NEXT: jne .LBB1_1
; X86-ATOM-NEXT: # %bb.2: # %atomicrmw.end
; X86-ATOM-NEXT: retl
%1 = atomicrmw or i16* %p, i16 0 acquire
ret i16 %1
}
define i32 @xor32(i32* %p) {
; X64-LABEL: xor32:
; X64: # %bb.0:
; X64-NEXT: mfence
; X64-NEXT: movl (%rdi), %eax
; X64-NEXT: retq
;
; X86-SSE2-LABEL: xor32:
; X86-SSE2: # %bb.0:
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-SSE2-NEXT: mfence
; X86-SSE2-NEXT: movl (%eax), %eax
; X86-SSE2-NEXT: retl
;
; X86-SLM-LABEL: xor32:
; X86-SLM: # %bb.0:
; X86-SLM-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-SLM-NEXT: movl (%ecx), %eax
; X86-SLM-NEXT: .p2align 4, 0x90
; X86-SLM-NEXT: .LBB2_1: # %atomicrmw.start
; X86-SLM-NEXT: # =>This Inner Loop Header: Depth=1
; X86-SLM-NEXT: lock cmpxchgl %eax, (%ecx)
; X86-SLM-NEXT: jne .LBB2_1
; X86-SLM-NEXT: # %bb.2: # %atomicrmw.end
; X86-SLM-NEXT: retl
;
; X86-ATOM-LABEL: xor32:
; X86-ATOM: # %bb.0:
; X86-ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-ATOM-NEXT: movl (%ecx), %eax
; X86-ATOM-NEXT: .p2align 4, 0x90
; X86-ATOM-NEXT: .LBB2_1: # %atomicrmw.start
; X86-ATOM-NEXT: # =>This Inner Loop Header: Depth=1
; X86-ATOM-NEXT: lock cmpxchgl %eax, (%ecx)
; X86-ATOM-NEXT: jne .LBB2_1
; X86-ATOM-NEXT: # %bb.2: # %atomicrmw.end
; X86-ATOM-NEXT: retl
%1 = atomicrmw xor i32* %p, i32 0 release
ret i32 %1
}
define i64 @sub64(i64* %p) {
; X64-LABEL: sub64:
; X64: # %bb.0:
; X64-NEXT: mfence
; X64-NEXT: movq (%rdi), %rax
; X64-NEXT: retq
;
; X86-LABEL: sub64:
; X86: # %bb.0:
; X86-NEXT: pushl %ebx
; X86-NEXT: .cfi_def_cfa_offset 8
; X86-NEXT: pushl %esi
; X86-NEXT: .cfi_def_cfa_offset 12
; X86-NEXT: .cfi_offset %esi, -12
; X86-NEXT: .cfi_offset %ebx, -8
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: movl (%esi), %eax
; X86-NEXT: movl 4(%esi), %edx
; X86-NEXT: .p2align 4, 0x90
; X86-NEXT: .LBB3_1: # %atomicrmw.start
; X86-NEXT: # =>This Inner Loop Header: Depth=1
; X86-NEXT: movl %edx, %ecx
; X86-NEXT: movl %eax, %ebx
; X86-NEXT: lock cmpxchg8b (%esi)
; X86-NEXT: jne .LBB3_1
; X86-NEXT: # %bb.2: # %atomicrmw.end
; X86-NEXT: popl %esi
; X86-NEXT: .cfi_def_cfa_offset 8
; X86-NEXT: popl %ebx
; X86-NEXT: .cfi_def_cfa_offset 4
; X86-NEXT: retl
%1 = atomicrmw sub i64* %p, i64 0 seq_cst
ret i64 %1
}
define i128 @or128(i128* %p) {
; X64-LABEL: or128:
; X64: # %bb.0:
; X64-NEXT: pushq %rax
; X64-NEXT: .cfi_def_cfa_offset 16
; X64-NEXT: xorl %esi, %esi
; X64-NEXT: xorl %edx, %edx
; X64-NEXT: callq __sync_fetch_and_or_16
; X64-NEXT: popq %rcx
; X64-NEXT: .cfi_def_cfa_offset 8
; X64-NEXT: retq
;
; X86-SSE2-LABEL: or128:
; X86-SSE2: # %bb.0:
; X86-SSE2-NEXT: pushl %ebp
; X86-SSE2-NEXT: .cfi_def_cfa_offset 8
; X86-SSE2-NEXT: .cfi_offset %ebp, -8
; X86-SSE2-NEXT: movl %esp, %ebp
; X86-SSE2-NEXT: .cfi_def_cfa_register %ebp
; X86-SSE2-NEXT: pushl %edi
; X86-SSE2-NEXT: pushl %esi
; X86-SSE2-NEXT: andl $-8, %esp
; X86-SSE2-NEXT: subl $16, %esp
; X86-SSE2-NEXT: .cfi_offset %esi, -16
; X86-SSE2-NEXT: .cfi_offset %edi, -12
; X86-SSE2-NEXT: movl 8(%ebp), %esi
; X86-SSE2-NEXT: movl %esp, %eax
; X86-SSE2-NEXT: pushl $0
; X86-SSE2-NEXT: pushl $0
; X86-SSE2-NEXT: pushl $0
; X86-SSE2-NEXT: pushl $0
; X86-SSE2-NEXT: pushl 12(%ebp)
; X86-SSE2-NEXT: pushl %eax
; X86-SSE2-NEXT: calll __sync_fetch_and_or_16
; X86-SSE2-NEXT: addl $20, %esp
; X86-SSE2-NEXT: movl (%esp), %eax
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-SSE2-NEXT: movl %edi, 8(%esi)
; X86-SSE2-NEXT: movl %edx, 12(%esi)
; X86-SSE2-NEXT: movl %eax, (%esi)
; X86-SSE2-NEXT: movl %ecx, 4(%esi)
; X86-SSE2-NEXT: movl %esi, %eax
; X86-SSE2-NEXT: leal -8(%ebp), %esp
; X86-SSE2-NEXT: popl %esi
; X86-SSE2-NEXT: popl %edi
; X86-SSE2-NEXT: popl %ebp
; X86-SSE2-NEXT: .cfi_def_cfa %esp, 4
; X86-SSE2-NEXT: retl $4
;
; X86-SLM-LABEL: or128:
; X86-SLM: # %bb.0:
; X86-SLM-NEXT: pushl %ebp
; X86-SLM-NEXT: .cfi_def_cfa_offset 8
; X86-SLM-NEXT: .cfi_offset %ebp, -8
; X86-SLM-NEXT: movl %esp, %ebp
; X86-SLM-NEXT: .cfi_def_cfa_register %ebp
; X86-SLM-NEXT: pushl %edi
; X86-SLM-NEXT: pushl %esi
; X86-SLM-NEXT: andl $-8, %esp
; X86-SLM-NEXT: subl $16, %esp
; X86-SLM-NEXT: .cfi_offset %esi, -16
; X86-SLM-NEXT: .cfi_offset %edi, -12
; X86-SLM-NEXT: movl 8(%ebp), %esi
; X86-SLM-NEXT: movl 12(%ebp), %eax
; X86-SLM-NEXT: movl %esp, %ecx
; X86-SLM-NEXT: pushl $0
; X86-SLM-NEXT: pushl $0
; X86-SLM-NEXT: pushl $0
; X86-SLM-NEXT: pushl $0
; X86-SLM-NEXT: pushl %eax
; X86-SLM-NEXT: pushl %ecx
; X86-SLM-NEXT: calll __sync_fetch_and_or_16
; X86-SLM-NEXT: addl $20, %esp
; X86-SLM-NEXT: movl (%esp), %eax
; X86-SLM-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-SLM-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-SLM-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-SLM-NEXT: movl %edi, 8(%esi)
; X86-SLM-NEXT: movl %edx, 12(%esi)
; X86-SLM-NEXT: movl %eax, (%esi)
; X86-SLM-NEXT: movl %ecx, 4(%esi)
; X86-SLM-NEXT: movl %esi, %eax
; X86-SLM-NEXT: leal -8(%ebp), %esp
; X86-SLM-NEXT: popl %esi
; X86-SLM-NEXT: popl %edi
; X86-SLM-NEXT: popl %ebp
; X86-SLM-NEXT: .cfi_def_cfa %esp, 4
; X86-SLM-NEXT: retl $4
;
; X86-ATOM-LABEL: or128:
; X86-ATOM: # %bb.0:
; X86-ATOM-NEXT: pushl %ebp
; X86-ATOM-NEXT: .cfi_def_cfa_offset 8
; X86-ATOM-NEXT: .cfi_offset %ebp, -8
; X86-ATOM-NEXT: leal (%esp), %ebp
; X86-ATOM-NEXT: .cfi_def_cfa_register %ebp
; X86-ATOM-NEXT: pushl %edi
; X86-ATOM-NEXT: pushl %esi
; X86-ATOM-NEXT: andl $-8, %esp
; X86-ATOM-NEXT: leal -{{[0-9]+}}(%esp), %esp
; X86-ATOM-NEXT: .cfi_offset %esi, -16
; X86-ATOM-NEXT: .cfi_offset %edi, -12
; X86-ATOM-NEXT: movl 8(%ebp), %esi
; X86-ATOM-NEXT: movl 12(%ebp), %eax
; X86-ATOM-NEXT: movl %esp, %ecx
; X86-ATOM-NEXT: pushl $0
; X86-ATOM-NEXT: pushl $0
; X86-ATOM-NEXT: pushl $0
; X86-ATOM-NEXT: pushl $0
; X86-ATOM-NEXT: pushl %eax
; X86-ATOM-NEXT: pushl %ecx
; X86-ATOM-NEXT: calll __sync_fetch_and_or_16
; X86-ATOM-NEXT: leal {{[0-9]+}}(%esp), %esp
; X86-ATOM-NEXT: movl (%esp), %ecx
; X86-ATOM-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-ATOM-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-ATOM-NEXT: movl %eax, 8(%esi)
; X86-ATOM-NEXT: movl %edi, 12(%esi)
; X86-ATOM-NEXT: movl %ecx, (%esi)
; X86-ATOM-NEXT: movl %esi, %eax
; X86-ATOM-NEXT: movl %edx, 4(%esi)
; X86-ATOM-NEXT: leal -8(%ebp), %esp
; X86-ATOM-NEXT: popl %esi
; X86-ATOM-NEXT: popl %edi
; X86-ATOM-NEXT: popl %ebp
; X86-ATOM-NEXT: .cfi_def_cfa %esp, 4
; X86-ATOM-NEXT: retl $4
%1 = atomicrmw or i128* %p, i128 0 monotonic
ret i128 %1
}
; For 'and', the idempotent value is (-1)
define i32 @and32 (i32* %p) {
; X64-LABEL: and32:
; X64: # %bb.0:
; X64-NEXT: mfence
; X64-NEXT: movl (%rdi), %eax
; X64-NEXT: retq
;
; X86-SSE2-LABEL: and32:
; X86-SSE2: # %bb.0:
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-SSE2-NEXT: mfence
; X86-SSE2-NEXT: movl (%eax), %eax
; X86-SSE2-NEXT: retl
;
; X86-SLM-LABEL: and32:
; X86-SLM: # %bb.0:
; X86-SLM-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-SLM-NEXT: movl (%ecx), %eax
; X86-SLM-NEXT: .p2align 4, 0x90
; X86-SLM-NEXT: .LBB5_1: # %atomicrmw.start
; X86-SLM-NEXT: # =>This Inner Loop Header: Depth=1
; X86-SLM-NEXT: lock cmpxchgl %eax, (%ecx)
; X86-SLM-NEXT: jne .LBB5_1
; X86-SLM-NEXT: # %bb.2: # %atomicrmw.end
; X86-SLM-NEXT: retl
;
; X86-ATOM-LABEL: and32:
; X86-ATOM: # %bb.0:
; X86-ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-ATOM-NEXT: movl (%ecx), %eax
; X86-ATOM-NEXT: .p2align 4, 0x90
; X86-ATOM-NEXT: .LBB5_1: # %atomicrmw.start
; X86-ATOM-NEXT: # =>This Inner Loop Header: Depth=1
; X86-ATOM-NEXT: lock cmpxchgl %eax, (%ecx)
; X86-ATOM-NEXT: jne .LBB5_1
; X86-ATOM-NEXT: # %bb.2: # %atomicrmw.end
; X86-ATOM-NEXT: retl
%1 = atomicrmw and i32* %p, i32 -1 acq_rel
ret i32 %1
}
define void @or32_nouse_monotonic(i32* %p) {
; X64-LABEL: or32_nouse_monotonic:
; X64: # %bb.0:
; X64-NEXT: #MEMBARRIER
; X64-NEXT: retq
;
; X86-GENERIC-LABEL: or32_nouse_monotonic:
; X86-GENERIC: # %bb.0:
; X86-GENERIC-NEXT: #MEMBARRIER
; X86-GENERIC-NEXT: retl
;
; X86-ATOM-LABEL: or32_nouse_monotonic:
; X86-ATOM: # %bb.0:
; X86-ATOM-NEXT: #MEMBARRIER
; X86-ATOM-NEXT: nop
; X86-ATOM-NEXT: nop
; X86-ATOM-NEXT: nop
; X86-ATOM-NEXT: nop
; X86-ATOM-NEXT: nop
; X86-ATOM-NEXT: nop
; X86-ATOM-NEXT: retl
atomicrmw or i32* %p, i32 0 monotonic
ret void
}
define void @or32_nouse_acquire(i32* %p) {
; X64-LABEL: or32_nouse_acquire:
; X64: # %bb.0:
; X64-NEXT: #MEMBARRIER
; X64-NEXT: retq
;
; X86-GENERIC-LABEL: or32_nouse_acquire:
; X86-GENERIC: # %bb.0:
; X86-GENERIC-NEXT: #MEMBARRIER
; X86-GENERIC-NEXT: retl
;
; X86-ATOM-LABEL: or32_nouse_acquire:
; X86-ATOM: # %bb.0:
; X86-ATOM-NEXT: #MEMBARRIER
; X86-ATOM-NEXT: nop
; X86-ATOM-NEXT: nop
; X86-ATOM-NEXT: nop
; X86-ATOM-NEXT: nop
; X86-ATOM-NEXT: nop
; X86-ATOM-NEXT: nop
; X86-ATOM-NEXT: retl
atomicrmw or i32* %p, i32 0 acquire
ret void
}
define void @or32_nouse_release(i32* %p) {
; X64-LABEL: or32_nouse_release:
; X64: # %bb.0:
; X64-NEXT: #MEMBARRIER
; X64-NEXT: retq
;
; X86-GENERIC-LABEL: or32_nouse_release:
; X86-GENERIC: # %bb.0:
; X86-GENERIC-NEXT: #MEMBARRIER
; X86-GENERIC-NEXT: retl
;
; X86-ATOM-LABEL: or32_nouse_release:
; X86-ATOM: # %bb.0:
; X86-ATOM-NEXT: #MEMBARRIER
; X86-ATOM-NEXT: nop
; X86-ATOM-NEXT: nop
; X86-ATOM-NEXT: nop
; X86-ATOM-NEXT: nop
; X86-ATOM-NEXT: nop
; X86-ATOM-NEXT: nop
; X86-ATOM-NEXT: retl
atomicrmw or i32* %p, i32 0 release
ret void
}
define void @or32_nouse_acq_rel(i32* %p) {
; X64-LABEL: or32_nouse_acq_rel:
; X64: # %bb.0:
; X64-NEXT: #MEMBARRIER
; X64-NEXT: retq
;
; X86-GENERIC-LABEL: or32_nouse_acq_rel:
; X86-GENERIC: # %bb.0:
; X86-GENERIC-NEXT: #MEMBARRIER
; X86-GENERIC-NEXT: retl
;
; X86-ATOM-LABEL: or32_nouse_acq_rel:
; X86-ATOM: # %bb.0:
; X86-ATOM-NEXT: #MEMBARRIER
; X86-ATOM-NEXT: nop
; X86-ATOM-NEXT: nop
; X86-ATOM-NEXT: nop
; X86-ATOM-NEXT: nop
; X86-ATOM-NEXT: nop
; X86-ATOM-NEXT: nop
; X86-ATOM-NEXT: retl
atomicrmw or i32* %p, i32 0 acq_rel
ret void
}
define void @or32_nouse_seq_cst(i32* %p) {
; X64-LABEL: or32_nouse_seq_cst:
; X64: # %bb.0:
; X64-NEXT: lock orl $0, -{{[0-9]+}}(%rsp)
; X64-NEXT: retq
;
; X86-GENERIC-LABEL: or32_nouse_seq_cst:
; X86-GENERIC: # %bb.0:
; X86-GENERIC-NEXT: lock orl $0, (%esp)
; X86-GENERIC-NEXT: retl
;
; X86-ATOM-LABEL: or32_nouse_seq_cst:
; X86-ATOM: # %bb.0:
; X86-ATOM-NEXT: lock orl $0, (%esp)
; X86-ATOM-NEXT: nop
; X86-ATOM-NEXT: nop
; X86-ATOM-NEXT: nop
; X86-ATOM-NEXT: nop
; X86-ATOM-NEXT: nop
; X86-ATOM-NEXT: nop
; X86-ATOM-NEXT: retl
atomicrmw or i32* %p, i32 0 seq_cst
ret void
}
; TODO: The value isn't used on 32 bit, so the cmpxchg8b is unneeded
define void @or64_nouse_seq_cst(i64* %p) {
; X64-LABEL: or64_nouse_seq_cst:
; X64: # %bb.0:
; X64-NEXT: lock orl $0, -{{[0-9]+}}(%rsp)
; X64-NEXT: retq
;
; X86-LABEL: or64_nouse_seq_cst:
; X86: # %bb.0:
; X86-NEXT: pushl %ebx
; X86-NEXT: .cfi_def_cfa_offset 8
; X86-NEXT: pushl %esi
; X86-NEXT: .cfi_def_cfa_offset 12
; X86-NEXT: .cfi_offset %esi, -12
; X86-NEXT: .cfi_offset %ebx, -8
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: movl (%esi), %eax
; X86-NEXT: movl 4(%esi), %edx
; X86-NEXT: .p2align 4, 0x90
; X86-NEXT: .LBB11_1: # %atomicrmw.start
; X86-NEXT: # =>This Inner Loop Header: Depth=1
; X86-NEXT: movl %edx, %ecx
; X86-NEXT: movl %eax, %ebx
; X86-NEXT: lock cmpxchg8b (%esi)
; X86-NEXT: jne .LBB11_1
; X86-NEXT: # %bb.2: # %atomicrmw.end
; X86-NEXT: popl %esi
; X86-NEXT: .cfi_def_cfa_offset 8
; X86-NEXT: popl %ebx
; X86-NEXT: .cfi_def_cfa_offset 4
; X86-NEXT: retl
atomicrmw or i64* %p, i64 0 seq_cst
ret void
}
; TODO: Don't need to lower as sync_and_fetch call
define void @or128_nouse_seq_cst(i128* %p) {
; X64-LABEL: or128_nouse_seq_cst:
; X64: # %bb.0:
; X64-NEXT: pushq %rax
; X64-NEXT: .cfi_def_cfa_offset 16
; X64-NEXT: xorl %esi, %esi
; X64-NEXT: xorl %edx, %edx
; X64-NEXT: callq __sync_fetch_and_or_16
; X64-NEXT: popq %rax
; X64-NEXT: .cfi_def_cfa_offset 8
; X64-NEXT: retq
;
; X86-SSE2-LABEL: or128_nouse_seq_cst:
; X86-SSE2: # %bb.0:
; X86-SSE2-NEXT: pushl %ebp
; X86-SSE2-NEXT: .cfi_def_cfa_offset 8
; X86-SSE2-NEXT: .cfi_offset %ebp, -8
; X86-SSE2-NEXT: movl %esp, %ebp
; X86-SSE2-NEXT: .cfi_def_cfa_register %ebp
; X86-SSE2-NEXT: andl $-8, %esp
; X86-SSE2-NEXT: subl $16, %esp
; X86-SSE2-NEXT: movl %esp, %eax
; X86-SSE2-NEXT: pushl $0
; X86-SSE2-NEXT: pushl $0
; X86-SSE2-NEXT: pushl $0
; X86-SSE2-NEXT: pushl $0
; X86-SSE2-NEXT: pushl 8(%ebp)
; X86-SSE2-NEXT: pushl %eax
; X86-SSE2-NEXT: calll __sync_fetch_and_or_16
; X86-SSE2-NEXT: addl $20, %esp
; X86-SSE2-NEXT: movl %ebp, %esp
; X86-SSE2-NEXT: popl %ebp
; X86-SSE2-NEXT: .cfi_def_cfa %esp, 4
; X86-SSE2-NEXT: retl
;
; X86-SLM-LABEL: or128_nouse_seq_cst:
; X86-SLM: # %bb.0:
; X86-SLM-NEXT: pushl %ebp
; X86-SLM-NEXT: .cfi_def_cfa_offset 8
; X86-SLM-NEXT: .cfi_offset %ebp, -8
; X86-SLM-NEXT: movl %esp, %ebp
; X86-SLM-NEXT: .cfi_def_cfa_register %ebp
; X86-SLM-NEXT: andl $-8, %esp
; X86-SLM-NEXT: subl $16, %esp
; X86-SLM-NEXT: movl 8(%ebp), %eax
; X86-SLM-NEXT: movl %esp, %ecx
; X86-SLM-NEXT: pushl $0
; X86-SLM-NEXT: pushl $0
; X86-SLM-NEXT: pushl $0
; X86-SLM-NEXT: pushl $0
; X86-SLM-NEXT: pushl %eax
; X86-SLM-NEXT: pushl %ecx
; X86-SLM-NEXT: calll __sync_fetch_and_or_16
; X86-SLM-NEXT: addl $20, %esp
; X86-SLM-NEXT: movl %ebp, %esp
; X86-SLM-NEXT: popl %ebp
; X86-SLM-NEXT: .cfi_def_cfa %esp, 4
; X86-SLM-NEXT: retl
;
; X86-ATOM-LABEL: or128_nouse_seq_cst:
; X86-ATOM: # %bb.0:
; X86-ATOM-NEXT: pushl %ebp
; X86-ATOM-NEXT: .cfi_def_cfa_offset 8
; X86-ATOM-NEXT: .cfi_offset %ebp, -8
; X86-ATOM-NEXT: leal (%esp), %ebp
; X86-ATOM-NEXT: .cfi_def_cfa_register %ebp
; X86-ATOM-NEXT: andl $-8, %esp
; X86-ATOM-NEXT: leal -{{[0-9]+}}(%esp), %esp
; X86-ATOM-NEXT: movl 8(%ebp), %eax
; X86-ATOM-NEXT: movl %esp, %ecx
; X86-ATOM-NEXT: pushl $0
; X86-ATOM-NEXT: pushl $0
; X86-ATOM-NEXT: pushl $0
; X86-ATOM-NEXT: pushl $0
; X86-ATOM-NEXT: pushl %eax
; X86-ATOM-NEXT: pushl %ecx
; X86-ATOM-NEXT: calll __sync_fetch_and_or_16
; X86-ATOM-NEXT: leal {{[0-9]+}}(%esp), %esp
; X86-ATOM-NEXT: movl %ebp, %esp
; X86-ATOM-NEXT: popl %ebp
; X86-ATOM-NEXT: .cfi_def_cfa %esp, 4
; X86-ATOM-NEXT: retl
atomicrmw or i128* %p, i128 0 seq_cst
ret void
}
define void @or16_nouse_seq_cst(i16* %p) {
; X64-LABEL: or16_nouse_seq_cst:
; X64: # %bb.0:
; X64-NEXT: lock orl $0, -{{[0-9]+}}(%rsp)
; X64-NEXT: retq
;
; X86-GENERIC-LABEL: or16_nouse_seq_cst:
; X86-GENERIC: # %bb.0:
; X86-GENERIC-NEXT: lock orl $0, (%esp)
; X86-GENERIC-NEXT: retl
;
; X86-ATOM-LABEL: or16_nouse_seq_cst:
; X86-ATOM: # %bb.0:
; X86-ATOM-NEXT: lock orl $0, (%esp)
; X86-ATOM-NEXT: nop
; X86-ATOM-NEXT: nop
; X86-ATOM-NEXT: nop
; X86-ATOM-NEXT: nop
; X86-ATOM-NEXT: nop
; X86-ATOM-NEXT: nop
; X86-ATOM-NEXT: retl
atomicrmw or i16* %p, i16 0 seq_cst
ret void
}
define void @or8_nouse_seq_cst(i8* %p) {
; X64-LABEL: or8_nouse_seq_cst:
; X64: # %bb.0:
; X64-NEXT: lock orl $0, -{{[0-9]+}}(%rsp)
; X64-NEXT: retq
;
; X86-GENERIC-LABEL: or8_nouse_seq_cst:
; X86-GENERIC: # %bb.0:
; X86-GENERIC-NEXT: lock orl $0, (%esp)
; X86-GENERIC-NEXT: retl
;
; X86-ATOM-LABEL: or8_nouse_seq_cst:
; X86-ATOM: # %bb.0:
; X86-ATOM-NEXT: lock orl $0, (%esp)
; X86-ATOM-NEXT: nop
; X86-ATOM-NEXT: nop
; X86-ATOM-NEXT: nop
; X86-ATOM-NEXT: nop
; X86-ATOM-NEXT: nop
; X86-ATOM-NEXT: nop
; X86-ATOM-NEXT: retl
atomicrmw or i8* %p, i8 0 seq_cst
ret void
}