llvm-project/llvm/test/CodeGen/AMDGPU/bitcast-vector-extract.ll
Matt Davis 404071b059
[SelectionDAG] Preserve volatile undef stores. (#99918)
This patch preserves `undef` SDNodes that are `volatile` qualified.
Previously, these nodes would be discarded. The motivation behind this
change is to adhere to the
[LangRef](https://llvm.org/docs/LangRef.html#volatile-memory-accesses),
even though that doc is mostly in terms of LLVM-IR, it seems reasonable
to imply that the volatile constraints also imply to SDNodes.

> Certain memory accesses, such as
[load](https://llvm.org/docs/LangRef.html#i-load)’s,
[store](https://llvm.org/docs/LangRef.html#i-store)’s, and
[llvm.memcpy](https://llvm.org/docs/LangRef.html#int-memcpy)’s may be
marked volatile. The optimizers must not change the number of volatile
operations or change their order of execution relative to other volatile
operations. The optimizers may change the order of volatile operations
relative to non-volatile operations. This is not Java’s “volatile” and
has no cross-thread synchronization behavior.

Source: https://llvm.org/docs/LangRef.html#volatile-memory-accesses
2024-07-24 08:41:56 -04:00

94 lines
3.8 KiB
LLVM

; RUN: llc -mtriple=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
; RUN: llc -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
; The bitcast should be pushed through the bitcasts so the vectors can
; be broken down and the shared components can be CSEd
; GCN-LABEL: {{^}}store_bitcast_constant_v8i32_to_v8f32:
; GCN: buffer_store_dwordx4
; GCN: buffer_store_dwordx4
; GCN-NOT: v_mov_b32
; GCN: buffer_store_dwordx4
; GCN-NOT: v_mov_b32
; GCN: buffer_store_dwordx4
define amdgpu_kernel void @store_bitcast_constant_v8i32_to_v8f32(ptr addrspace(1) %out, <8 x i32> %vec) {
%vec0.bc = bitcast <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8> to <8 x float>
store volatile <8 x float> %vec0.bc, ptr addrspace(1) %out
%vec1.bc = bitcast <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 9> to <8 x float>
store volatile <8 x float> %vec1.bc, ptr addrspace(1) %out
ret void
}
; GCN-LABEL: {{^}}store_bitcast_constant_v4i64_to_v8f32:
; GCN: buffer_store_dwordx4
; GCN: buffer_store_dwordx4
; GCN-NOT: v_mov_b32
; GCN: buffer_store_dwordx4
; GCN-NOT: v_mov_b32
; GCN: buffer_store_dwordx4
define amdgpu_kernel void @store_bitcast_constant_v4i64_to_v8f32(ptr addrspace(1) %out, <4 x i64> %vec) {
%vec0.bc = bitcast <4 x i64> <i64 7, i64 7, i64 7, i64 8> to <8 x float>
store volatile <8 x float> %vec0.bc, ptr addrspace(1) %out
%vec1.bc = bitcast <4 x i64> <i64 7, i64 7, i64 7, i64 9> to <8 x float>
store volatile <8 x float> %vec1.bc, ptr addrspace(1) %out
ret void
}
; GCN-LABEL: {{^}}store_bitcast_constant_v4i64_to_v4f64:
; GCN: buffer_store_dwordx4
; GCN: buffer_store_dwordx4
; GCN-NOT: v_mov_b32
; GCN: buffer_store_dwordx4
; GCN-NOT: v_mov_b32
; GCN: buffer_store_dwordx4
define amdgpu_kernel void @store_bitcast_constant_v4i64_to_v4f64(ptr addrspace(1) %out, <4 x i64> %vec) {
%vec0.bc = bitcast <4 x i64> <i64 7, i64 7, i64 7, i64 8> to <4 x double>
store volatile <4 x double> %vec0.bc, ptr addrspace(1) %out
%vec1.bc = bitcast <4 x i64> <i64 7, i64 7, i64 7, i64 9> to <4 x double>
store volatile <4 x double> %vec1.bc, ptr addrspace(1) %out
ret void
}
; GCN-LABEL: {{^}}store_bitcast_constant_v8i32_to_v16i16:
; GCN: buffer_store_dwordx4
; GCN: buffer_store_dwordx4
; GCN-NOT: v_mov_b32
; GCN: buffer_store_dwordx4
; GCN-NOT: v_mov_b32
; GCN: buffer_store_dwordx4
define amdgpu_kernel void @store_bitcast_constant_v8i32_to_v16i16(ptr addrspace(1) %out, <16 x i16> %vec) {
%vec0.bc = bitcast <16 x i16> <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 8> to <8 x float>
store volatile <8 x float> %vec0.bc, ptr addrspace(1) %out
%vec1.bc = bitcast <16 x i16> <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 9> to <8 x float>
store volatile <8 x float> %vec1.bc, ptr addrspace(1) %out
ret void
}
; GCN-LABEL: {{^}}store_value_lowered_to_undef_bitcast_source:
; GCN-NOT: store_dword
define amdgpu_kernel void @store_value_lowered_to_undef_bitcast_source(ptr addrspace(1) %out, i64 %a, i64 %b) #0 {
%undef = call i64 @llvm.amdgcn.icmp.i64(i64 %a, i64 %b, i32 999) #1
%bc = bitcast i64 %undef to <2 x i32>
store <2 x i32> %bc, ptr addrspace(1) %out
ret void
}
; GCN-LABEL: {{^}}store_value_lowered_to_undef_bitcast_source_extractelt:
; GCN-NOT: store_dword
define amdgpu_kernel void @store_value_lowered_to_undef_bitcast_source_extractelt(ptr addrspace(1) %out, i64 %a, i64 %b) #0 {
%undef = call i64 @llvm.amdgcn.icmp.i64(i64 %a, i64 %b, i32 9999) #1
%bc = bitcast i64 %undef to <2 x i32>
%elt1 = extractelement <2 x i32> %bc, i32 1
store i32 %elt1, ptr addrspace(1) %out
ret void
}
declare i64 @llvm.amdgcn.icmp.i64(i64, i64, i32) #1
attributes #0 = { nounwind }
attributes #1 = { nounwind readnone convergent }