llvm-project/llvm/test/CodeGen/X86/atomic-unaligned.ll
James Y Knight d6f9278ae9
[X86] Use plain load/store instead of cmpxchg16b for atomics with AVX (#74275)
In late 2021, both Intel and AMD finally documented that every
AVX-capable CPU has always been guaranteed to execute aligned 16-byte
loads/stores atomically, and further, guaranteed that all future CPUs
with AVX will do so as well.

Therefore, we may use normal SSE 128-bit load/store instructions to
implement atomics, if AVX is enabled.

Per AMD64 Architecture Programmer's manual, 7.3.2 Access Atomicity:

> Processors that report [AVX] extend the atomicity for cacheable,
> naturally-aligned single loads or stores from a quadword to a double
> quadword.

Per Intel's SDM:

> Processors that enumerate support for Intel(R) AVX guarantee that the
> 16-byte memory operations performed by the following instructions will
> always be carried out atomically:
> - MOVAPD, MOVAPS, and MOVDQA.
> - VMOVAPD, VMOVAPS, and VMOVDQA when encoded with VEX.128.
> - VMOVAPD, VMOVAPS, VMOVDQA32, and VMOVDQA64 when encoded with
>   EVEX.128 and k0 (masking disabled).

This was also confirmed to be true for Zhaoxin CPUs with AVX, in
https://gcc.gnu.org/PR104688
2024-05-16 18:24:23 -04:00

34 lines
1.2 KiB
LLVM

; RUN: llc -mtriple=x86_64 < %s | FileCheck %s
; Quick test to ensure that atomics which are not naturally-aligned
; emit unsized libcalls, and aren't emitted as native instructions or
; sized libcalls.
define void @test_i32(ptr %a) nounwind {
; CHECK-LABEL: test_i32:
; CHECK: callq __atomic_load
; CHECK: callq __atomic_store
; CHECK: callq __atomic_exchange
; CHECK: callq __atomic_compare_exchange
; CHECK: callq __atomic_compare_exchange
%t0 = load atomic i32, ptr %a seq_cst, align 2
store atomic i32 1, ptr %a seq_cst, align 2
%t1 = atomicrmw xchg ptr %a, i32 1 seq_cst, align 2
%t3 = atomicrmw add ptr %a, i32 2 seq_cst, align 2
%t2 = cmpxchg ptr %a, i32 0, i32 1 seq_cst seq_cst, align 2
ret void
}
define void @test_i128(ptr %a) nounwind {
; CHECK-LABEL: test_i128:
; CHECK: callq __atomic_load
; CHECK: callq __atomic_store
; CHECK: callq __atomic_exchange
; CHECK: callq __atomic_compare_exchange
%t0 = load atomic i128, ptr %a seq_cst, align 8
store atomic i128 1, ptr %a seq_cst, align 8
%t1 = atomicrmw xchg ptr %a, i128 1 seq_cst, align 8
%t2 = atomicrmw add ptr %a, i128 2 seq_cst, align 8
%t3 = cmpxchg ptr %a, i128 0, i128 1 seq_cst seq_cst, align 8
ret void
}