llvm-project/llvm/test/CodeGen/AMDGPU/store-clobbers-load.ll
Vikash Gupta 49ffe31def
[AMDGPU][LIT][NFC] Adding -mtriple for AMDGPUAnnotateUniformValues Pass tests (#156437)
It specifies the target machine as AMDGPU for
AMDGPUAnnotateUniformValues pass-related test (that uses UA). Before in
its absense, the UA would consider everything Uniform resulting in
setting metadata incorrectly for AMDGPU. Now, after specifying the
AMDGPU, the UA would be rightful sets the right metadata as the test
gets commpiled for AMDGPU.
2025-09-03 15:32:28 +05:30

40 lines
1.7 KiB
LLVM

; RUN: opt -S -mtriple=amdgcn-amd-amdhsa --amdgpu-annotate-uniform < %s | FileCheck -check-prefix=OPT %s
; "load vaddr" depends on the store, so we should not mark vaddr as amdgpu.noclobber.
; OPT-LABEL: @store_clobbers_load(
; OPT: %zero = load <4 x i32>, ptr addrspace(1) %input, align 16{{$}}
define amdgpu_kernel void @store_clobbers_load(ptr addrspace(1) %input, ptr addrspace(1) %out, i32 %index) {
entry:
store i32 0, ptr addrspace(1) %input
%zero = load <4 x i32>, ptr addrspace(1) %input, align 16
%one = insertelement <4 x i32> %zero, i32 1, i32 1
%two = insertelement <4 x i32> %one, i32 2, i32 2
%three = insertelement <4 x i32> %two, i32 3, i32 3
store <4 x i32> %three, ptr addrspace(1) %input, align 16
%rslt = extractelement <4 x i32> %three, i32 %index
store i32 %rslt, ptr addrspace(1) %out, align 4
ret void
}
declare i32 @llvm.amdgcn.workitem.id.x()
@lds0 = addrspace(3) global [512 x i32] poison, align 4
; To check that %arrayidx0 is not marked as amdgpu.noclobber.
; OPT-LABEL: @atomicrmw_clobbers_load(
; OPT: %arrayidx0 = getelementptr inbounds [512 x i32], ptr addrspace(3) @lds0, i32 0, i32 %idx.0
; OPT-NEXT: %val = atomicrmw xchg ptr addrspace(3) %arrayidx0, i32 3 seq_cst
define amdgpu_kernel void @atomicrmw_clobbers_load(ptr addrspace(1) %out0, ptr addrspace(1) %out1) {
%tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
%idx.0 = add nsw i32 %tid.x, 2
%arrayidx0 = getelementptr inbounds [512 x i32], ptr addrspace(3) @lds0, i32 0, i32 %idx.0
%val = atomicrmw xchg ptr addrspace(3) %arrayidx0, i32 3 seq_cst
%load = load i32, ptr addrspace(3) %arrayidx0, align 4
store i32 %val, ptr addrspace(1) %out0, align 4
store i32 %load, ptr addrspace(1) %out1, align 4
ret void
}