llvm-project/llvm/test/CodeGen/AMDGPU/implicitarg-attributes.ll
Sameer Sahasrabuddhe 6a2305484e [AAPointerInfo] track multiple constant offsets for each use
An expression of the form `gep(base, select(pred, const1, const2))` can result
in a set of offsets instead of just one. PointerInfo can now track these sets
instead of conservatively modeling them as Unknown. In general, AAPointerInfo
now uses AAPotentialConstantValues to examine the operands of the GEP.

Reviewed By: jdoerfert

Differential Revision: https://reviews.llvm.org/D138646
2022-12-13 22:27:25 +05:30

71 lines
2.8 KiB
LLVM

; RUN: llc < %s | FileCheck %s
target triple = "amdgcn-amd-amdhsa"
; The call to intrinsic implicitarg_ptr reaches a load through a phi. The
; offsets of the phi cannot be determined, and hence the attirbutor assumes that
; hostcall is in use.
; CHECK-LABEL: amdhsa.kernels:
; CHECK: .value_kind: hidden_hostcall_buffer
; CHECK: .value_kind: hidden_multigrid_sync_arg
; CHECK-LABEL: .name: kernel_1
define amdgpu_kernel void @kernel_1(i32 addrspace(1)* %a, i64 %index1, i64 %index2, i1 %cond) {
entry:
%tmp7 = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
br i1 %cond, label %old, label %new
old: ; preds = %entry
%tmp4 = getelementptr i8, i8 addrspace(4)* %tmp7, i64 %index1
br label %join
new: ; preds = %entry
%tmp12 = getelementptr inbounds i8, i8 addrspace(4)* %tmp7, i64 %index2
br label %join
join: ; preds = %new, %old
%.in.in.in = phi i8 addrspace(4)* [ %tmp12, %new ], [ %tmp4, %old ]
%.in.in = bitcast i8 addrspace(4)* %.in.in.in to i16 addrspace(4)*
;;; THIS USE is where the offset into implicitarg_ptr is unknown
%.in = load i16, i16 addrspace(4)* %.in.in, align 2
%idx.ext = sext i16 %.in to i64
%add.ptr3 = getelementptr inbounds i32, i32 addrspace(1)* %a, i64 %idx.ext
%tmp16 = atomicrmw add i32 addrspace(1)* %add.ptr3, i32 15 syncscope("agent-one-as") monotonic, align 4
ret void
}
; The call to intrinsic implicitarg_ptr is combined with an offset produced by
; select'ing between two constants, before it is eventually used in a GEP to
; form the address of a load. This test ensures that AAPointerInfo can look
; through the select to maintain a set of indices, so that it can precisely
; determine that hostcall and other expensive implicit args are not in use.
; CHECK-NOT: hidden_hostcall_buffer
; CHECK-NOT: hidden_multigrid_sync_arg
; CHECK-LABEL: .name: kernel_2
define amdgpu_kernel void @kernel_2(i32 addrspace(1)* %a, i1 %cond) {
entry:
%tmp7 = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
%tmp5 = select i1 %cond, i64 12, i64 18
%tmp6 = getelementptr inbounds i8, i8 addrspace(4)* %tmp7, i64 %tmp5
%tmp8 = bitcast i8 addrspace(4)* %tmp6 to i16 addrspace(4)*
;;; THIS USE is where multiple offsets are possible, relative to implicitarg_ptr
%tmp9 = load i16, i16 addrspace(4)* %tmp8, align 2
%idx.ext = sext i16 %tmp9 to i64
%add.ptr3 = getelementptr inbounds i32, i32 addrspace(1)* %a, i64 %idx.ext
%tmp16 = atomicrmw add i32 addrspace(1)* %add.ptr3, i32 15 syncscope("agent-one-as") monotonic, align 4
ret void
}
declare i32 @llvm.amdgcn.workitem.id.x()
declare align 4 i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
declare i32 @llvm.amdgcn.workgroup.id.x()