
In late 2021, both Intel and AMD finally documented that every AVX-capable CPU has always been guaranteed to execute aligned 16-byte loads/stores atomically, and further, guaranteed that all future CPUs with AVX will do so as well. Therefore, we may use normal SSE 128-bit load/store instructions to implement atomics, if AVX is enabled. Per AMD64 Architecture Programmer's manual, 7.3.2 Access Atomicity: > Processors that report [AVX] extend the atomicity for cacheable, > naturally-aligned single loads or stores from a quadword to a double > quadword. Per Intel's SDM: > Processors that enumerate support for Intel(R) AVX guarantee that the > 16-byte memory operations performed by the following instructions will > always be carried out atomically: > - MOVAPD, MOVAPS, and MOVDQA. > - VMOVAPD, VMOVAPS, and VMOVDQA when encoded with VEX.128. > - VMOVAPD, VMOVAPS, VMOVDQA32, and VMOVDQA64 when encoded with > EVEX.128 and k0 (masking disabled). This was also confirmed to be true for Zhaoxin CPUs with AVX, in https://gcc.gnu.org/PR104688
34 lines
1.2 KiB
LLVM
34 lines
1.2 KiB
LLVM
; RUN: llc -mtriple=x86_64 < %s | FileCheck %s
|
|
|
|
; Quick test to ensure that atomics which are not naturally-aligned
|
|
; emit unsized libcalls, and aren't emitted as native instructions or
|
|
; sized libcalls.
|
|
define void @test_i32(ptr %a) nounwind {
|
|
; CHECK-LABEL: test_i32:
|
|
; CHECK: callq __atomic_load
|
|
; CHECK: callq __atomic_store
|
|
; CHECK: callq __atomic_exchange
|
|
; CHECK: callq __atomic_compare_exchange
|
|
; CHECK: callq __atomic_compare_exchange
|
|
%t0 = load atomic i32, ptr %a seq_cst, align 2
|
|
store atomic i32 1, ptr %a seq_cst, align 2
|
|
%t1 = atomicrmw xchg ptr %a, i32 1 seq_cst, align 2
|
|
%t3 = atomicrmw add ptr %a, i32 2 seq_cst, align 2
|
|
%t2 = cmpxchg ptr %a, i32 0, i32 1 seq_cst seq_cst, align 2
|
|
ret void
|
|
}
|
|
|
|
define void @test_i128(ptr %a) nounwind {
|
|
; CHECK-LABEL: test_i128:
|
|
; CHECK: callq __atomic_load
|
|
; CHECK: callq __atomic_store
|
|
; CHECK: callq __atomic_exchange
|
|
; CHECK: callq __atomic_compare_exchange
|
|
%t0 = load atomic i128, ptr %a seq_cst, align 8
|
|
store atomic i128 1, ptr %a seq_cst, align 8
|
|
%t1 = atomicrmw xchg ptr %a, i128 1 seq_cst, align 8
|
|
%t2 = atomicrmw add ptr %a, i128 2 seq_cst, align 8
|
|
%t3 = cmpxchg ptr %a, i128 0, i128 1 seq_cst seq_cst, align 8
|
|
ret void
|
|
}
|