
Adds support for hoisting `writeonly` calls in LICM. This patch adds a missing optimization that allows hoisting of `writeonly` function calls out of loops when it is safe to do so. Previously, such calls were conservatively retained inside the loop body, and the redundant calls were only reduced through unrolling, relying on target-dependent heuristics. Closes #143267 Testing: - Modified previously negative tests for hoisting writeonly calls to be instead positive - Added test cases for hoisting of two writeonly calls where the pointers do/do not alias - Added a test case for not argmemonly writeonly calls.
152 lines
5.4 KiB
LLVM
152 lines
5.4 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
|
|
; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
|
|
|
; Where the mask of lanes wanting to exit the loop on this iteration is not
|
|
; obviously already masked by exec (in this case, the xor with -1 inserted by
|
|
; control flow annotation), then lower control flow must insert an S_AND_B64
|
|
; with exec.
|
|
|
|
define void @needs_and(i32 %arg) {
|
|
; GCN-LABEL: needs_and:
|
|
; GCN: ; %bb.0: ; %entry
|
|
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GCN-NEXT: s_mov_b32 s10, 1
|
|
; GCN-NEXT: s_mov_b64 s[6:7], 0
|
|
; GCN-NEXT: s_branch .LBB0_2
|
|
; GCN-NEXT: .LBB0_1: ; %endif
|
|
; GCN-NEXT: ; in Loop: Header=BB0_2 Depth=1
|
|
; GCN-NEXT: s_or_b64 exec, exec, s[8:9]
|
|
; GCN-NEXT: s_and_b64 s[4:5], exec, vcc
|
|
; GCN-NEXT: s_or_b64 s[6:7], s[4:5], s[6:7]
|
|
; GCN-NEXT: s_add_i32 s10, s10, 1
|
|
; GCN-NEXT: s_andn2_b64 exec, exec, s[6:7]
|
|
; GCN-NEXT: s_cbranch_execz .LBB0_4
|
|
; GCN-NEXT: .LBB0_2: ; %loop
|
|
; GCN-NEXT: ; =>This Inner Loop Header: Depth=1
|
|
; GCN-NEXT: v_cmp_gt_u32_e64 s[4:5], s10, v0
|
|
; GCN-NEXT: v_cmp_le_u32_e32 vcc, s10, v0
|
|
; GCN-NEXT: s_and_saveexec_b64 s[8:9], s[4:5]
|
|
; GCN-NEXT: s_cbranch_execz .LBB0_1
|
|
; GCN-NEXT: ; %bb.3: ; %then
|
|
; GCN-NEXT: ; in Loop: Header=BB0_2 Depth=1
|
|
; GCN-NEXT: s_nop 1
|
|
; GCN-NEXT: buffer_store_dword v0, off, s[4:7], s4
|
|
; GCN-NEXT: s_branch .LBB0_1
|
|
; GCN-NEXT: .LBB0_4: ; %loopexit
|
|
; GCN-NEXT: s_or_b64 exec, exec, s[6:7]
|
|
; GCN-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
br label %loop
|
|
|
|
loop:
|
|
%tmp23phi = phi i32 [ %tmp23, %endif ], [ 0, %entry ]
|
|
%tmp23 = add nuw i32 %tmp23phi, 1
|
|
%tmp27 = icmp ult i32 %arg, %tmp23
|
|
br i1 %tmp27, label %then, label %endif
|
|
|
|
then: ; preds = %bb
|
|
call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float poison, ptr addrspace(8) poison, i32 0, i32 poison, i32 0)
|
|
br label %endif
|
|
|
|
endif: ; preds = %bb28, %bb
|
|
br i1 %tmp27, label %loop, label %loopexit
|
|
|
|
loopexit:
|
|
ret void
|
|
}
|
|
|
|
; Where the mask of lanes wanting to exit the loop on this iteration is
|
|
; obviously already masked by exec (a V_CMP), then lower control flow can omit
|
|
; the S_AND_B64 to avoid an unnecessary instruction.
|
|
|
|
define void @doesnt_need_and(i32 %arg) {
|
|
; GCN-LABEL: doesnt_need_and:
|
|
; GCN: ; %bb.0: ; %entry
|
|
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GCN-NEXT: buffer_store_dword v0, off, s[4:7], s4
|
|
; GCN-NEXT: s_mov_b32 s6, 0
|
|
; GCN-NEXT: s_mov_b64 s[4:5], 0
|
|
; GCN-NEXT: .LBB1_1: ; %loop
|
|
; GCN-NEXT: ; =>This Inner Loop Header: Depth=1
|
|
; GCN-NEXT: s_add_i32 s6, s6, 1
|
|
; GCN-NEXT: v_cmp_le_u32_e32 vcc, s6, v0
|
|
; GCN-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
|
|
; GCN-NEXT: s_andn2_b64 exec, exec, s[4:5]
|
|
; GCN-NEXT: s_cbranch_execnz .LBB1_1
|
|
; GCN-NEXT: ; %bb.2: ; %loopexit
|
|
; GCN-NEXT: s_or_b64 exec, exec, s[4:5]
|
|
; GCN-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
br label %loop
|
|
|
|
loop:
|
|
%tmp23phi = phi i32 [ %tmp23, %loop ], [ 0, %entry ]
|
|
%tmp23 = add nuw i32 %tmp23phi, 1
|
|
%tmp27 = icmp ult i32 %arg, %tmp23
|
|
call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float poison, ptr addrspace(8) poison, i32 0, i32 poison, i32 0)
|
|
br i1 %tmp27, label %loop, label %loopexit
|
|
|
|
loopexit:
|
|
ret void
|
|
}
|
|
|
|
; Another case where the mask of lanes wanting to exit the loop is not masked
|
|
; by exec, because it is a function parameter.
|
|
|
|
define void @break_cond_is_arg(i32 %arg, i1 %breakcond) {
|
|
; GCN-LABEL: break_cond_is_arg:
|
|
; GCN: ; %bb.0: ; %entry
|
|
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GCN-NEXT: v_and_b32_e32 v1, 1, v1
|
|
; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 1, v1
|
|
; GCN-NEXT: s_mov_b32 s10, 1
|
|
; GCN-NEXT: s_mov_b64 s[6:7], 0
|
|
; GCN-NEXT: s_branch .LBB2_2
|
|
; GCN-NEXT: .LBB2_1: ; %endif
|
|
; GCN-NEXT: ; in Loop: Header=BB2_2 Depth=1
|
|
; GCN-NEXT: s_or_b64 exec, exec, s[8:9]
|
|
; GCN-NEXT: s_and_b64 s[4:5], exec, vcc
|
|
; GCN-NEXT: s_or_b64 s[6:7], s[4:5], s[6:7]
|
|
; GCN-NEXT: s_add_i32 s10, s10, 1
|
|
; GCN-NEXT: s_andn2_b64 exec, exec, s[6:7]
|
|
; GCN-NEXT: s_cbranch_execz .LBB2_4
|
|
; GCN-NEXT: .LBB2_2: ; %loop
|
|
; GCN-NEXT: ; =>This Inner Loop Header: Depth=1
|
|
; GCN-NEXT: v_cmp_gt_u32_e64 s[4:5], s10, v0
|
|
; GCN-NEXT: s_and_saveexec_b64 s[8:9], s[4:5]
|
|
; GCN-NEXT: s_cbranch_execz .LBB2_1
|
|
; GCN-NEXT: ; %bb.3: ; %then
|
|
; GCN-NEXT: ; in Loop: Header=BB2_2 Depth=1
|
|
; GCN-NEXT: s_nop 2
|
|
; GCN-NEXT: buffer_store_dword v0, off, s[4:7], s4
|
|
; GCN-NEXT: s_branch .LBB2_1
|
|
; GCN-NEXT: .LBB2_4: ; %loopexit
|
|
; GCN-NEXT: s_or_b64 exec, exec, s[6:7]
|
|
; GCN-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
br label %loop
|
|
|
|
loop:
|
|
%tmp23phi = phi i32 [ %tmp23, %endif ], [ 0, %entry ]
|
|
%tmp23 = add nuw i32 %tmp23phi, 1
|
|
%tmp27 = icmp ult i32 %arg, %tmp23
|
|
br i1 %tmp27, label %then, label %endif
|
|
|
|
then: ; preds = %bb
|
|
call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float poison, ptr addrspace(8) poison, i32 0, i32 poison, i32 0)
|
|
br label %endif
|
|
|
|
endif: ; preds = %bb28, %bb
|
|
br i1 %breakcond, label %loop, label %loopexit
|
|
|
|
loopexit:
|
|
ret void
|
|
}
|
|
|
|
declare void @llvm.amdgcn.raw.ptr.buffer.store.f32(float, ptr addrspace(8), i32, i32, i32 immarg) #0
|
|
|
|
attributes #0 = { nounwind writeonly }
|