[AMDGPU][UniformityAnalysis] Mark set_inactive and set_inactive_chain_arg as SourceOfDivergence (#190640)
`set_inactive` produces a result that varies per-lane based on the EXEC mask, even when both inputs are uniform.
This commit is contained in:
parent
326593b4b4
commit
40d5a7d69e
@ -334,6 +334,8 @@ def : SourceOfDivergence<int_amdgcn_writelane>;
|
||||
def : SourceOfDivergence<int_amdgcn_init_whole_wave>;
|
||||
def : SourceOfDivergence<int_amdgcn_permlane16_swap>;
|
||||
def : SourceOfDivergence<int_amdgcn_permlane32_swap>;
|
||||
def : SourceOfDivergence<int_amdgcn_set_inactive>;
|
||||
def : SourceOfDivergence<int_amdgcn_set_inactive_chain_arg>;
|
||||
|
||||
foreach intr = AMDGPUMFMAIntrinsics908 in
|
||||
def : SourceOfDivergence<intr>;
|
||||
|
||||
@ -846,5 +846,19 @@ define amdgpu_cs void @call_whole_wave(ptr addrspace(1) %out) {
|
||||
|
||||
declare amdgpu_gfx_whole_wave i32 @wwf(i1, i32) #0
|
||||
|
||||
; CHECK: DIVERGENT: %set_inactive = call i32 @llvm.amdgcn.set.inactive.i32(i32 %in, i32 0)
|
||||
define amdgpu_kernel void @set_inactive(ptr addrspace(1) %out, i32 %in) #0 {
|
||||
%set_inactive = call i32 @llvm.amdgcn.set.inactive.i32(i32 %in, i32 0) #0
|
||||
store i32 %set_inactive, ptr addrspace(1) %out, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: DIVERGENT: %set_inactive = call i32 @llvm.amdgcn.set.inactive.chain.arg.i32(i32 %active, i32 %inactive)
|
||||
define amdgpu_cs_chain void @set_inactive_chain_arg(ptr addrspace(1) %out, i32 %inactive, i32 %active) #0 {
|
||||
%set_inactive = call i32 @llvm.amdgcn.set.inactive.chain.arg.i32(i32 %active, i32 %inactive) #0
|
||||
store i32 %set_inactive, ptr addrspace(1) %out, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind convergent }
|
||||
attributes #1 = { nounwind readnone convergent }
|
||||
|
||||
@ -21,12 +21,13 @@ define amdgpu_hs void @wwm(i32 inreg %arg, ptr addrspace(8) inreg %buffer) {
|
||||
; GCN-NEXT: ; %bb.1: ; %bb42
|
||||
; GCN-NEXT: s_mov_b32 s1, 0
|
||||
; GCN-NEXT: .LBB0_2: ; %bb602
|
||||
; GCN-NEXT: v_cmp_ne_u32_e32 vcc, s1, v1
|
||||
; GCN-NEXT: s_cbranch_vccnz .LBB0_4
|
||||
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, s1, v1
|
||||
; GCN-NEXT: s_and_saveexec_b64 s[0:1], vcc
|
||||
; GCN-NEXT: s_cbranch_execz .LBB0_4
|
||||
; GCN-NEXT: ; %bb.3: ; %bb49
|
||||
; GCN-NEXT: v_mov_b32_e32 v1, 1.0
|
||||
; GCN-NEXT: tbuffer_store_format_x v1, off, s[4:7], 1 format:[BUF_DATA_FORMAT_32,BUF_NUM_FORMAT_FLOAT] offset:4 glc
|
||||
; GCN-NEXT: .LBB0_4: ; %bb54
|
||||
; GCN-NEXT: .LBB0_4: ; %UnifiedReturnBlock
|
||||
; GCN-NEXT: s_endpgm
|
||||
entry:
|
||||
br label %work
|
||||
@ -75,12 +76,13 @@ define amdgpu_hs void @strict_wwm(i32 inreg %arg, ptr addrspace(8) inreg %buffer
|
||||
; GCN-NEXT: ; %bb.1: ; %bb42
|
||||
; GCN-NEXT: s_mov_b32 s1, 0
|
||||
; GCN-NEXT: .LBB1_2: ; %bb602
|
||||
; GCN-NEXT: v_cmp_ne_u32_e32 vcc, s1, v1
|
||||
; GCN-NEXT: s_cbranch_vccnz .LBB1_4
|
||||
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, s1, v1
|
||||
; GCN-NEXT: s_and_saveexec_b64 s[0:1], vcc
|
||||
; GCN-NEXT: s_cbranch_execz .LBB1_4
|
||||
; GCN-NEXT: ; %bb.3: ; %bb49
|
||||
; GCN-NEXT: v_mov_b32_e32 v1, 1.0
|
||||
; GCN-NEXT: tbuffer_store_format_x v1, off, s[4:7], 1 format:[BUF_DATA_FORMAT_32,BUF_NUM_FORMAT_FLOAT] offset:4 glc
|
||||
; GCN-NEXT: .LBB1_4: ; %bb54
|
||||
; GCN-NEXT: .LBB1_4: ; %UnifiedReturnBlock
|
||||
; GCN-NEXT: s_endpgm
|
||||
entry:
|
||||
br label %work
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user