An expression of the form `gep(base, select(pred, const1, const2))` can result in a set of offsets instead of just one. PointerInfo can now track these sets instead of conservatively modeling them as Unknown. In general, AAPointerInfo now uses AAPotentialConstantValues to examine the operands of the GEP. Reviewed By: jdoerfert Differential Revision: https://reviews.llvm.org/D138646
71 lines
2.8 KiB
LLVM
71 lines
2.8 KiB
LLVM
; RUN: llc < %s | FileCheck %s
|
|
|
|
target triple = "amdgcn-amd-amdhsa"
|
|
|
|
; The call to intrinsic implicitarg_ptr reaches a load through a phi. The
|
|
; offsets of the phi cannot be determined, and hence the attirbutor assumes that
|
|
; hostcall is in use.
|
|
|
|
; CHECK-LABEL: amdhsa.kernels:
|
|
; CHECK: .value_kind: hidden_hostcall_buffer
|
|
; CHECK: .value_kind: hidden_multigrid_sync_arg
|
|
; CHECK-LABEL: .name: kernel_1
|
|
|
|
define amdgpu_kernel void @kernel_1(i32 addrspace(1)* %a, i64 %index1, i64 %index2, i1 %cond) {
|
|
entry:
|
|
%tmp7 = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
|
|
br i1 %cond, label %old, label %new
|
|
|
|
old: ; preds = %entry
|
|
%tmp4 = getelementptr i8, i8 addrspace(4)* %tmp7, i64 %index1
|
|
br label %join
|
|
|
|
new: ; preds = %entry
|
|
%tmp12 = getelementptr inbounds i8, i8 addrspace(4)* %tmp7, i64 %index2
|
|
br label %join
|
|
|
|
join: ; preds = %new, %old
|
|
%.in.in.in = phi i8 addrspace(4)* [ %tmp12, %new ], [ %tmp4, %old ]
|
|
%.in.in = bitcast i8 addrspace(4)* %.in.in.in to i16 addrspace(4)*
|
|
|
|
;;; THIS USE is where the offset into implicitarg_ptr is unknown
|
|
%.in = load i16, i16 addrspace(4)* %.in.in, align 2
|
|
|
|
%idx.ext = sext i16 %.in to i64
|
|
%add.ptr3 = getelementptr inbounds i32, i32 addrspace(1)* %a, i64 %idx.ext
|
|
%tmp16 = atomicrmw add i32 addrspace(1)* %add.ptr3, i32 15 syncscope("agent-one-as") monotonic, align 4
|
|
ret void
|
|
}
|
|
|
|
; The call to intrinsic implicitarg_ptr is combined with an offset produced by
|
|
; select'ing between two constants, before it is eventually used in a GEP to
|
|
; form the address of a load. This test ensures that AAPointerInfo can look
|
|
; through the select to maintain a set of indices, so that it can precisely
|
|
; determine that hostcall and other expensive implicit args are not in use.
|
|
|
|
; CHECK-NOT: hidden_hostcall_buffer
|
|
; CHECK-NOT: hidden_multigrid_sync_arg
|
|
; CHECK-LABEL: .name: kernel_2
|
|
|
|
define amdgpu_kernel void @kernel_2(i32 addrspace(1)* %a, i1 %cond) {
|
|
entry:
|
|
%tmp7 = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
|
|
%tmp5 = select i1 %cond, i64 12, i64 18
|
|
%tmp6 = getelementptr inbounds i8, i8 addrspace(4)* %tmp7, i64 %tmp5
|
|
%tmp8 = bitcast i8 addrspace(4)* %tmp6 to i16 addrspace(4)*
|
|
|
|
;;; THIS USE is where multiple offsets are possible, relative to implicitarg_ptr
|
|
%tmp9 = load i16, i16 addrspace(4)* %tmp8, align 2
|
|
|
|
%idx.ext = sext i16 %tmp9 to i64
|
|
%add.ptr3 = getelementptr inbounds i32, i32 addrspace(1)* %a, i64 %idx.ext
|
|
%tmp16 = atomicrmw add i32 addrspace(1)* %add.ptr3, i32 15 syncscope("agent-one-as") monotonic, align 4
|
|
ret void
|
|
}
|
|
|
|
declare i32 @llvm.amdgcn.workitem.id.x()
|
|
|
|
declare align 4 i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
|
|
|
|
declare i32 @llvm.amdgcn.workgroup.id.x()
|