
andorbitset.ll is interesting since it directly depends on the difference between poison and undef. Not sure it's useful to keep the version using poison, I assume none of this code makes it to codegen. si-spill-cf.ll was also a nasty case, which I doubt has been reproducing its original issue for a very long time. I had to reclaim an older version, replace some of the poison uses, and run simplify-cfg. There's a very slight change in the final CFG with this, but final the output is approximately the same as it used to be.
1218 lines
38 KiB
LLVM
1218 lines
38 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc -mtriple=amdgcn -mcpu=verde -amdgpu-early-ifcvt=0 -machine-sink-split-probability-threshold=0 -structurizecfg-skip-uniform-regions -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=SI %s
|
|
; RUN: llc -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global -amdgpu-early-ifcvt=0 -machine-sink-split-probability-threshold=0 -structurizecfg-skip-uniform-regions -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI %s
|
|
|
|
define amdgpu_kernel void @uniform_if_scc(i32 %cond, ptr addrspace(1) %out) {
|
|
; SI-LABEL: uniform_if_scc:
|
|
; SI: ; %bb.0: ; %entry
|
|
; SI-NEXT: s_load_dword s0, s[4:5], 0x9
|
|
; SI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SI-NEXT: s_cmp_eq_u32 s0, 0
|
|
; SI-NEXT: s_mov_b32 s0, 0
|
|
; SI-NEXT: s_cbranch_scc1 .LBB0_2
|
|
; SI-NEXT: ; %bb.1: ; %else
|
|
; SI-NEXT: s_mov_b32 s0, 1
|
|
; SI-NEXT: .LBB0_2: ; %done
|
|
; SI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0xb
|
|
; SI-NEXT: s_mov_b32 s7, 0xf000
|
|
; SI-NEXT: s_mov_b32 s6, -1
|
|
; SI-NEXT: v_mov_b32_e32 v0, s0
|
|
; SI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
|
; SI-NEXT: s_endpgm
|
|
;
|
|
; VI-LABEL: uniform_if_scc:
|
|
; VI: ; %bb.0: ; %entry
|
|
; VI-NEXT: s_load_dword s0, s[4:5], 0x24
|
|
; VI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; VI-NEXT: s_cmp_eq_u32 s0, 0
|
|
; VI-NEXT: s_mov_b32 s0, 0
|
|
; VI-NEXT: s_cbranch_scc1 .LBB0_2
|
|
; VI-NEXT: ; %bb.1: ; %else
|
|
; VI-NEXT: s_mov_b32 s0, 1
|
|
; VI-NEXT: .LBB0_2: ; %done
|
|
; VI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2c
|
|
; VI-NEXT: s_mov_b32 s7, 0xf000
|
|
; VI-NEXT: s_mov_b32 s6, -1
|
|
; VI-NEXT: v_mov_b32_e32 v0, s0
|
|
; VI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
|
; VI-NEXT: s_endpgm
|
|
entry:
|
|
%cmp0 = icmp eq i32 %cond, 0
|
|
br i1 %cmp0, label %if, label %else
|
|
|
|
if:
|
|
br label %done
|
|
|
|
else:
|
|
br label %done
|
|
|
|
done:
|
|
%value = phi i32 [0, %if], [1, %else]
|
|
store i32 %value, ptr addrspace(1) %out
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @uniform_if_vcc(float %cond, ptr addrspace(1) %out) {
|
|
; SI-LABEL: uniform_if_vcc:
|
|
; SI: ; %bb.0: ; %entry
|
|
; SI-NEXT: s_load_dword s1, s[4:5], 0x9
|
|
; SI-NEXT: s_mov_b32 s0, 0
|
|
; SI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SI-NEXT: v_cmp_eq_f32_e64 s[2:3], s1, 0
|
|
; SI-NEXT: s_and_b64 vcc, exec, s[2:3]
|
|
; SI-NEXT: s_cbranch_vccnz .LBB1_2
|
|
; SI-NEXT: ; %bb.1: ; %else
|
|
; SI-NEXT: s_mov_b32 s0, 1
|
|
; SI-NEXT: .LBB1_2: ; %done
|
|
; SI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0xb
|
|
; SI-NEXT: s_mov_b32 s7, 0xf000
|
|
; SI-NEXT: s_mov_b32 s6, -1
|
|
; SI-NEXT: v_mov_b32_e32 v0, s0
|
|
; SI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
|
; SI-NEXT: s_endpgm
|
|
;
|
|
; VI-LABEL: uniform_if_vcc:
|
|
; VI: ; %bb.0: ; %entry
|
|
; VI-NEXT: s_load_dword s1, s[4:5], 0x24
|
|
; VI-NEXT: s_mov_b32 s0, 0
|
|
; VI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; VI-NEXT: v_cmp_eq_f32_e64 s[2:3], s1, 0
|
|
; VI-NEXT: s_and_b64 vcc, exec, s[2:3]
|
|
; VI-NEXT: s_cbranch_vccnz .LBB1_2
|
|
; VI-NEXT: ; %bb.1: ; %else
|
|
; VI-NEXT: s_mov_b32 s0, 1
|
|
; VI-NEXT: .LBB1_2: ; %done
|
|
; VI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2c
|
|
; VI-NEXT: s_mov_b32 s7, 0xf000
|
|
; VI-NEXT: s_mov_b32 s6, -1
|
|
; VI-NEXT: v_mov_b32_e32 v0, s0
|
|
; VI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
|
; VI-NEXT: s_endpgm
|
|
entry:
|
|
%cmp0 = fcmp oeq float %cond, 0.0
|
|
br i1 %cmp0, label %if, label %else
|
|
|
|
if:
|
|
br label %done
|
|
|
|
else:
|
|
br label %done
|
|
|
|
done:
|
|
%value = phi i32 [0, %if], [1, %else]
|
|
store i32 %value, ptr addrspace(1) %out
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @uniform_if_swap_br_targets_scc(i32 %cond, ptr addrspace(1) %out) {
|
|
; SI-LABEL: uniform_if_swap_br_targets_scc:
|
|
; SI: ; %bb.0: ; %entry
|
|
; SI-NEXT: s_load_dword s0, s[4:5], 0x9
|
|
; SI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SI-NEXT: s_cmp_lg_u32 s0, 0
|
|
; SI-NEXT: s_mov_b32 s0, 0
|
|
; SI-NEXT: s_cbranch_scc1 .LBB2_2
|
|
; SI-NEXT: ; %bb.1: ; %else
|
|
; SI-NEXT: s_mov_b32 s0, 1
|
|
; SI-NEXT: .LBB2_2: ; %done
|
|
; SI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0xb
|
|
; SI-NEXT: s_mov_b32 s7, 0xf000
|
|
; SI-NEXT: s_mov_b32 s6, -1
|
|
; SI-NEXT: v_mov_b32_e32 v0, s0
|
|
; SI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
|
; SI-NEXT: s_endpgm
|
|
;
|
|
; VI-LABEL: uniform_if_swap_br_targets_scc:
|
|
; VI: ; %bb.0: ; %entry
|
|
; VI-NEXT: s_load_dword s0, s[4:5], 0x24
|
|
; VI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; VI-NEXT: s_cmp_lg_u32 s0, 0
|
|
; VI-NEXT: s_mov_b32 s0, 0
|
|
; VI-NEXT: s_cbranch_scc1 .LBB2_2
|
|
; VI-NEXT: ; %bb.1: ; %else
|
|
; VI-NEXT: s_mov_b32 s0, 1
|
|
; VI-NEXT: .LBB2_2: ; %done
|
|
; VI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2c
|
|
; VI-NEXT: s_mov_b32 s7, 0xf000
|
|
; VI-NEXT: s_mov_b32 s6, -1
|
|
; VI-NEXT: v_mov_b32_e32 v0, s0
|
|
; VI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
|
; VI-NEXT: s_endpgm
|
|
entry:
|
|
%cmp0 = icmp eq i32 %cond, 0
|
|
br i1 %cmp0, label %else, label %if
|
|
|
|
if:
|
|
br label %done
|
|
|
|
else:
|
|
br label %done
|
|
|
|
done:
|
|
%value = phi i32 [0, %if], [1, %else]
|
|
store i32 %value, ptr addrspace(1) %out
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @uniform_if_swap_br_targets_vcc(float %cond, ptr addrspace(1) %out) {
|
|
; SI-LABEL: uniform_if_swap_br_targets_vcc:
|
|
; SI: ; %bb.0: ; %entry
|
|
; SI-NEXT: s_load_dword s1, s[4:5], 0x9
|
|
; SI-NEXT: s_mov_b32 s0, 0
|
|
; SI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SI-NEXT: v_cmp_neq_f32_e64 s[2:3], s1, 0
|
|
; SI-NEXT: s_and_b64 vcc, exec, s[2:3]
|
|
; SI-NEXT: s_cbranch_vccnz .LBB3_2
|
|
; SI-NEXT: ; %bb.1: ; %else
|
|
; SI-NEXT: s_mov_b32 s0, 1
|
|
; SI-NEXT: .LBB3_2: ; %done
|
|
; SI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0xb
|
|
; SI-NEXT: s_mov_b32 s7, 0xf000
|
|
; SI-NEXT: s_mov_b32 s6, -1
|
|
; SI-NEXT: v_mov_b32_e32 v0, s0
|
|
; SI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
|
; SI-NEXT: s_endpgm
|
|
;
|
|
; VI-LABEL: uniform_if_swap_br_targets_vcc:
|
|
; VI: ; %bb.0: ; %entry
|
|
; VI-NEXT: s_load_dword s1, s[4:5], 0x24
|
|
; VI-NEXT: s_mov_b32 s0, 0
|
|
; VI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; VI-NEXT: v_cmp_neq_f32_e64 s[2:3], s1, 0
|
|
; VI-NEXT: s_and_b64 vcc, exec, s[2:3]
|
|
; VI-NEXT: s_cbranch_vccnz .LBB3_2
|
|
; VI-NEXT: ; %bb.1: ; %else
|
|
; VI-NEXT: s_mov_b32 s0, 1
|
|
; VI-NEXT: .LBB3_2: ; %done
|
|
; VI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2c
|
|
; VI-NEXT: s_mov_b32 s7, 0xf000
|
|
; VI-NEXT: s_mov_b32 s6, -1
|
|
; VI-NEXT: v_mov_b32_e32 v0, s0
|
|
; VI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
|
; VI-NEXT: s_endpgm
|
|
entry:
|
|
%cmp0 = fcmp oeq float %cond, 0.0
|
|
br i1 %cmp0, label %else, label %if
|
|
|
|
if:
|
|
br label %done
|
|
|
|
else:
|
|
br label %done
|
|
|
|
done:
|
|
%value = phi i32 [0, %if], [1, %else]
|
|
store i32 %value, ptr addrspace(1) %out
|
|
ret void
|
|
}
|
|
|
|
; Using a floating-point value in an integer compare will cause the compare to
|
|
; be selected for the SALU and then later moved to the VALU.
|
|
define amdgpu_kernel void @uniform_if_move_valu(ptr addrspace(1) %out, float %a) {
|
|
; SI-LABEL: uniform_if_move_valu:
|
|
; SI: ; %bb.0: ; %entry
|
|
; SI-NEXT: s_load_dword s0, s[4:5], 0xb
|
|
; SI-NEXT: v_mov_b32_e32 v0, 0x41200000
|
|
; SI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SI-NEXT: v_add_f32_e32 v0, s0, v0
|
|
; SI-NEXT: v_cmp_ne_u32_e32 vcc, 5, v0
|
|
; SI-NEXT: s_cbranch_vccnz .LBB4_2
|
|
; SI-NEXT: ; %bb.1: ; %if
|
|
; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
|
|
; SI-NEXT: s_mov_b32 s3, 0xf000
|
|
; SI-NEXT: s_mov_b32 s2, -1
|
|
; SI-NEXT: v_mov_b32_e32 v0, 0
|
|
; SI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
|
; SI-NEXT: .LBB4_2: ; %endif
|
|
; SI-NEXT: s_endpgm
|
|
;
|
|
; VI-LABEL: uniform_if_move_valu:
|
|
; VI: ; %bb.0: ; %entry
|
|
; VI-NEXT: s_load_dword s0, s[4:5], 0x2c
|
|
; VI-NEXT: v_mov_b32_e32 v0, 0x41200000
|
|
; VI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; VI-NEXT: v_add_f32_e32 v0, s0, v0
|
|
; VI-NEXT: v_cmp_ne_u32_e32 vcc, 5, v0
|
|
; VI-NEXT: s_cbranch_vccnz .LBB4_2
|
|
; VI-NEXT: ; %bb.1: ; %if
|
|
; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
|
|
; VI-NEXT: s_mov_b32 s3, 0xf000
|
|
; VI-NEXT: s_mov_b32 s2, -1
|
|
; VI-NEXT: v_mov_b32_e32 v0, 0
|
|
; VI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
|
; VI-NEXT: .LBB4_2: ; %endif
|
|
; VI-NEXT: s_endpgm
|
|
entry:
|
|
%a.0 = fadd float %a, 10.0
|
|
%cond = bitcast float %a.0 to i32
|
|
%cmp = icmp eq i32 %cond, 5
|
|
br i1 %cmp, label %if, label %endif
|
|
|
|
if:
|
|
store i32 0, ptr addrspace(1) %out
|
|
br label %endif
|
|
|
|
endif:
|
|
ret void
|
|
}
|
|
|
|
; Using a floating-point value in an integer compare will cause the compare to
|
|
; be selected for the SALU and then later moved to the VALU.
|
|
define amdgpu_kernel void @uniform_if_move_valu_commute(ptr addrspace(1) %out, float %a) {
|
|
; SI-LABEL: uniform_if_move_valu_commute:
|
|
; SI: ; %bb.0: ; %entry
|
|
; SI-NEXT: s_load_dword s0, s[4:5], 0xb
|
|
; SI-NEXT: v_mov_b32_e32 v0, 0x41200000
|
|
; SI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SI-NEXT: v_add_f32_e32 v0, s0, v0
|
|
; SI-NEXT: v_cmp_gt_u32_e32 vcc, 6, v0
|
|
; SI-NEXT: s_cbranch_vccnz .LBB5_2
|
|
; SI-NEXT: ; %bb.1: ; %if
|
|
; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
|
|
; SI-NEXT: s_mov_b32 s3, 0xf000
|
|
; SI-NEXT: s_mov_b32 s2, -1
|
|
; SI-NEXT: v_mov_b32_e32 v0, 0
|
|
; SI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
|
; SI-NEXT: .LBB5_2: ; %endif
|
|
; SI-NEXT: s_endpgm
|
|
;
|
|
; VI-LABEL: uniform_if_move_valu_commute:
|
|
; VI: ; %bb.0: ; %entry
|
|
; VI-NEXT: s_load_dword s0, s[4:5], 0x2c
|
|
; VI-NEXT: v_mov_b32_e32 v0, 0x41200000
|
|
; VI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; VI-NEXT: v_add_f32_e32 v0, s0, v0
|
|
; VI-NEXT: v_cmp_gt_u32_e32 vcc, 6, v0
|
|
; VI-NEXT: s_cbranch_vccnz .LBB5_2
|
|
; VI-NEXT: ; %bb.1: ; %if
|
|
; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
|
|
; VI-NEXT: s_mov_b32 s3, 0xf000
|
|
; VI-NEXT: s_mov_b32 s2, -1
|
|
; VI-NEXT: v_mov_b32_e32 v0, 0
|
|
; VI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
|
; VI-NEXT: .LBB5_2: ; %endif
|
|
; VI-NEXT: s_endpgm
|
|
entry:
|
|
%a.0 = fadd float %a, 10.0
|
|
%cond = bitcast float %a.0 to i32
|
|
%cmp = icmp ugt i32 %cond, 5
|
|
br i1 %cmp, label %if, label %endif
|
|
|
|
if:
|
|
store i32 0, ptr addrspace(1) %out
|
|
br label %endif
|
|
|
|
endif:
|
|
ret void
|
|
}
|
|
|
|
|
|
define amdgpu_kernel void @uniform_if_else_ret(ptr addrspace(1) nocapture %out, i32 %a) {
|
|
; SI-LABEL: uniform_if_else_ret:
|
|
; SI: ; %bb.0: ; %entry
|
|
; SI-NEXT: s_load_dword s2, s[4:5], 0xb
|
|
; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
|
|
; SI-NEXT: s_mov_b32 s3, 0xf000
|
|
; SI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SI-NEXT: s_cmp_lg_u32 s2, 0
|
|
; SI-NEXT: s_cbranch_scc0 .LBB6_2
|
|
; SI-NEXT: ; %bb.1: ; %if.else
|
|
; SI-NEXT: s_mov_b32 s2, -1
|
|
; SI-NEXT: v_mov_b32_e32 v0, 2
|
|
; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
|
; SI-NEXT: s_endpgm
|
|
; SI-NEXT: .LBB6_2: ; %if.then
|
|
; SI-NEXT: s_mov_b32 s2, -1
|
|
; SI-NEXT: v_mov_b32_e32 v0, 1
|
|
; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
|
; SI-NEXT: s_endpgm
|
|
;
|
|
; VI-LABEL: uniform_if_else_ret:
|
|
; VI: ; %bb.0: ; %entry
|
|
; VI-NEXT: s_load_dword s2, s[4:5], 0x2c
|
|
; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
|
|
; VI-NEXT: s_mov_b32 s3, 0xf000
|
|
; VI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; VI-NEXT: s_cmp_lg_u32 s2, 0
|
|
; VI-NEXT: s_cbranch_scc0 .LBB6_2
|
|
; VI-NEXT: ; %bb.1: ; %if.else
|
|
; VI-NEXT: s_mov_b32 s2, -1
|
|
; VI-NEXT: v_mov_b32_e32 v0, 2
|
|
; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
|
; VI-NEXT: s_endpgm
|
|
; VI-NEXT: .LBB6_2: ; %if.then
|
|
; VI-NEXT: s_mov_b32 s2, -1
|
|
; VI-NEXT: v_mov_b32_e32 v0, 1
|
|
; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
|
; VI-NEXT: s_endpgm
|
|
entry:
|
|
%cmp = icmp eq i32 %a, 0
|
|
br i1 %cmp, label %if.then, label %if.else
|
|
|
|
if.then: ; preds = %entry
|
|
store i32 1, ptr addrspace(1) %out
|
|
br label %if.end
|
|
|
|
if.else: ; preds = %entry
|
|
store i32 2, ptr addrspace(1) %out
|
|
br label %if.end
|
|
|
|
if.end: ; preds = %if.else, %if.then
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @uniform_if_else(ptr addrspace(1) nocapture %out0, ptr addrspace(1) nocapture %out1, i32 %a) {
|
|
; SI-LABEL: uniform_if_else:
|
|
; SI: ; %bb.0: ; %entry
|
|
; SI-NEXT: s_load_dword s6, s[4:5], 0xd
|
|
; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
|
|
; SI-NEXT: s_mov_b32 s7, 0xf000
|
|
; SI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SI-NEXT: s_cmp_lg_u32 s6, 0
|
|
; SI-NEXT: s_cbranch_scc0 .LBB7_2
|
|
; SI-NEXT: ; %bb.1: ; %if.else
|
|
; SI-NEXT: s_mov_b32 s6, -1
|
|
; SI-NEXT: s_mov_b32 s4, s0
|
|
; SI-NEXT: s_mov_b32 s5, s1
|
|
; SI-NEXT: v_mov_b32_e32 v0, 2
|
|
; SI-NEXT: s_branch .LBB7_3
|
|
; SI-NEXT: .LBB7_2: ; %if.then
|
|
; SI-NEXT: s_mov_b32 s6, -1
|
|
; SI-NEXT: s_mov_b32 s4, s0
|
|
; SI-NEXT: s_mov_b32 s5, s1
|
|
; SI-NEXT: v_mov_b32_e32 v0, 1
|
|
; SI-NEXT: .LBB7_3: ; %if.end
|
|
; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
|
; SI-NEXT: s_mov_b32 s4, s2
|
|
; SI-NEXT: s_mov_b32 s5, s3
|
|
; SI-NEXT: s_waitcnt expcnt(0)
|
|
; SI-NEXT: v_mov_b32_e32 v0, 3
|
|
; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
|
; SI-NEXT: s_endpgm
|
|
;
|
|
; VI-LABEL: uniform_if_else:
|
|
; VI: ; %bb.0: ; %entry
|
|
; VI-NEXT: s_load_dword s6, s[4:5], 0x34
|
|
; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
|
|
; VI-NEXT: s_mov_b32 s7, 0xf000
|
|
; VI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; VI-NEXT: s_cmp_lg_u32 s6, 0
|
|
; VI-NEXT: s_cbranch_scc0 .LBB7_2
|
|
; VI-NEXT: ; %bb.1: ; %if.else
|
|
; VI-NEXT: s_mov_b32 s6, -1
|
|
; VI-NEXT: s_mov_b32 s4, s0
|
|
; VI-NEXT: s_mov_b32 s5, s1
|
|
; VI-NEXT: v_mov_b32_e32 v0, 2
|
|
; VI-NEXT: s_branch .LBB7_3
|
|
; VI-NEXT: .LBB7_2: ; %if.then
|
|
; VI-NEXT: s_mov_b32 s6, -1
|
|
; VI-NEXT: s_mov_b32 s4, s0
|
|
; VI-NEXT: s_mov_b32 s5, s1
|
|
; VI-NEXT: v_mov_b32_e32 v0, 1
|
|
; VI-NEXT: .LBB7_3: ; %if.end
|
|
; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
|
; VI-NEXT: s_mov_b32 s4, s2
|
|
; VI-NEXT: s_mov_b32 s5, s3
|
|
; VI-NEXT: v_mov_b32_e32 v0, 3
|
|
; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
|
; VI-NEXT: s_endpgm
|
|
entry:
|
|
%cmp = icmp eq i32 %a, 0
|
|
br i1 %cmp, label %if.then, label %if.else
|
|
|
|
if.then: ; preds = %entry
|
|
store i32 1, ptr addrspace(1) %out0
|
|
br label %if.end
|
|
|
|
if.else: ; preds = %entry
|
|
store i32 2, ptr addrspace(1) %out0
|
|
br label %if.end
|
|
|
|
if.end: ; preds = %if.else, %if.then
|
|
store i32 3, ptr addrspace(1) %out1
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @icmp_2_users(ptr addrspace(1) %out, i32 %cond) {
|
|
; SI-LABEL: icmp_2_users:
|
|
; SI: ; %bb.0: ; %main_body
|
|
; SI-NEXT: s_load_dword s2, s[4:5], 0xb
|
|
; SI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SI-NEXT: s_cmp_gt_i32 s2, 0
|
|
; SI-NEXT: s_cselect_b64 s[0:1], -1, 0
|
|
; SI-NEXT: s_cmp_lt_i32 s2, 1
|
|
; SI-NEXT: s_cbranch_scc1 .LBB8_2
|
|
; SI-NEXT: ; %bb.1: ; %IF
|
|
; SI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x9
|
|
; SI-NEXT: s_mov_b32 s7, 0xf000
|
|
; SI-NEXT: s_mov_b32 s6, -1
|
|
; SI-NEXT: v_cndmask_b32_e64 v0, 0, -1, s[0:1]
|
|
; SI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
|
; SI-NEXT: .LBB8_2: ; %ENDIF
|
|
; SI-NEXT: s_endpgm
|
|
;
|
|
; VI-LABEL: icmp_2_users:
|
|
; VI: ; %bb.0: ; %main_body
|
|
; VI-NEXT: s_load_dword s2, s[4:5], 0x2c
|
|
; VI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; VI-NEXT: s_cmp_gt_i32 s2, 0
|
|
; VI-NEXT: s_cselect_b64 s[0:1], -1, 0
|
|
; VI-NEXT: s_cmp_lt_i32 s2, 1
|
|
; VI-NEXT: s_cbranch_scc1 .LBB8_2
|
|
; VI-NEXT: ; %bb.1: ; %IF
|
|
; VI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x24
|
|
; VI-NEXT: s_mov_b32 s7, 0xf000
|
|
; VI-NEXT: s_mov_b32 s6, -1
|
|
; VI-NEXT: v_cndmask_b32_e64 v0, 0, -1, s[0:1]
|
|
; VI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
|
; VI-NEXT: .LBB8_2: ; %ENDIF
|
|
; VI-NEXT: s_endpgm
|
|
main_body:
|
|
%0 = icmp sgt i32 %cond, 0
|
|
%1 = sext i1 %0 to i32
|
|
br i1 %0, label %IF, label %ENDIF
|
|
|
|
IF:
|
|
store i32 %1, ptr addrspace(1) %out
|
|
br label %ENDIF
|
|
|
|
ENDIF: ; preds = %IF, %main_body
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @icmp_users_different_blocks(i32 %cond0, i32 %cond1, ptr addrspace(1) %out) {
|
|
; SI-LABEL: icmp_users_different_blocks:
|
|
; SI: ; %bb.0: ; %bb
|
|
; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
|
|
; SI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SI-NEXT: s_cmp_lt_i32 s0, 1
|
|
; SI-NEXT: s_cbranch_scc1 .LBB9_2
|
|
; SI-NEXT: ; %bb.1: ; %bb2
|
|
; SI-NEXT: s_cmp_gt_i32 s1, 0
|
|
; SI-NEXT: s_cselect_b64 s[0:1], -1, 0
|
|
; SI-NEXT: s_and_b64 vcc, exec, s[0:1]
|
|
; SI-NEXT: s_cbranch_vccz .LBB9_3
|
|
; SI-NEXT: .LBB9_2: ; %bb9
|
|
; SI-NEXT: s_endpgm
|
|
; SI-NEXT: .LBB9_3: ; %bb7
|
|
; SI-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[0:1]
|
|
; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0xb
|
|
; SI-NEXT: v_sub_i32_e32 v0, vcc, v0, v1
|
|
; SI-NEXT: s_mov_b32 s3, 0xf000
|
|
; SI-NEXT: s_mov_b32 s2, -1
|
|
; SI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
|
; SI-NEXT: s_endpgm
|
|
;
|
|
; VI-LABEL: icmp_users_different_blocks:
|
|
; VI: ; %bb.0: ; %bb
|
|
; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
|
|
; VI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; VI-NEXT: s_cmp_lt_i32 s0, 1
|
|
; VI-NEXT: s_cbranch_scc1 .LBB9_2
|
|
; VI-NEXT: ; %bb.1: ; %bb2
|
|
; VI-NEXT: s_cmp_gt_i32 s1, 0
|
|
; VI-NEXT: s_cselect_b64 s[0:1], -1, 0
|
|
; VI-NEXT: s_and_b64 vcc, exec, s[0:1]
|
|
; VI-NEXT: s_cbranch_vccz .LBB9_3
|
|
; VI-NEXT: .LBB9_2: ; %bb9
|
|
; VI-NEXT: s_endpgm
|
|
; VI-NEXT: .LBB9_3: ; %bb7
|
|
; VI-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[0:1]
|
|
; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x2c
|
|
; VI-NEXT: v_sub_u32_e32 v0, vcc, v0, v1
|
|
; VI-NEXT: s_mov_b32 s3, 0xf000
|
|
; VI-NEXT: s_mov_b32 s2, -1
|
|
; VI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
|
; VI-NEXT: s_endpgm
|
|
bb:
|
|
%tmp = tail call i32 @llvm.amdgcn.workitem.id.x() #0
|
|
%cmp0 = icmp sgt i32 %cond0, 0
|
|
br i1 %cmp0, label %bb2, label %bb9
|
|
|
|
bb2: ; preds = %bb
|
|
%cmp1 = icmp sgt i32 %cond1, 0
|
|
%tmp2 = sext i1 %cmp1 to i32
|
|
%tmp3 = add i32 %tmp2, %tmp
|
|
br i1 %cmp1, label %bb9, label %bb7
|
|
|
|
bb7: ; preds = %bb5
|
|
store i32 %tmp3, ptr addrspace(1) %out
|
|
br label %bb9
|
|
|
|
bb9: ; preds = %bb8, %bb4
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @uniform_loop(ptr addrspace(1) %out, i32 %a) {
|
|
; SI-LABEL: uniform_loop:
|
|
; SI: ; %bb.0: ; %entry
|
|
; SI-NEXT: s_load_dword s0, s[4:5], 0xb
|
|
; SI-NEXT: .LBB10_1: ; %loop
|
|
; SI-NEXT: ; =>This Inner Loop Header: Depth=1
|
|
; SI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SI-NEXT: s_add_i32 s0, s0, -1
|
|
; SI-NEXT: s_cmp_lg_u32 s0, 0
|
|
; SI-NEXT: s_cbranch_scc1 .LBB10_1
|
|
; SI-NEXT: ; %bb.2: ; %done
|
|
; SI-NEXT: s_endpgm
|
|
;
|
|
; VI-LABEL: uniform_loop:
|
|
; VI: ; %bb.0: ; %entry
|
|
; VI-NEXT: s_load_dword s0, s[4:5], 0x2c
|
|
; VI-NEXT: .LBB10_1: ; %loop
|
|
; VI-NEXT: ; =>This Inner Loop Header: Depth=1
|
|
; VI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; VI-NEXT: s_add_i32 s0, s0, -1
|
|
; VI-NEXT: s_cmp_lg_u32 s0, 0
|
|
; VI-NEXT: s_cbranch_scc1 .LBB10_1
|
|
; VI-NEXT: ; %bb.2: ; %done
|
|
; VI-NEXT: s_endpgm
|
|
entry:
|
|
br label %loop
|
|
|
|
loop:
|
|
%i = phi i32 [0, %entry], [%i.i, %loop]
|
|
%i.i = add i32 %i, 1
|
|
%cmp = icmp eq i32 %a, %i.i
|
|
br i1 %cmp, label %done, label %loop
|
|
|
|
done:
|
|
ret void
|
|
}
|
|
|
|
; Test uniform and divergent.
|
|
|
|
define amdgpu_kernel void @uniform_inside_divergent(ptr addrspace(1) %out, i32 %cond) {
|
|
; SI-LABEL: uniform_inside_divergent:
|
|
; SI: ; %bb.0: ; %entry
|
|
; SI-NEXT: v_cmp_gt_u32_e32 vcc, 16, v0
|
|
; SI-NEXT: s_and_saveexec_b64 s[0:1], vcc
|
|
; SI-NEXT: s_cbranch_execz .LBB11_2
|
|
; SI-NEXT: ; %bb.1: ; %if
|
|
; SI-NEXT: s_load_dword s6, s[4:5], 0xb
|
|
; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
|
|
; SI-NEXT: s_mov_b32 s3, 0xf000
|
|
; SI-NEXT: s_mov_b32 s2, -1
|
|
; SI-NEXT: v_mov_b32_e32 v0, 0
|
|
; SI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SI-NEXT: s_cmp_lg_u32 s6, 0
|
|
; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
|
; SI-NEXT: s_cbranch_scc0 .LBB11_3
|
|
; SI-NEXT: .LBB11_2: ; %endif
|
|
; SI-NEXT: s_endpgm
|
|
; SI-NEXT: .LBB11_3: ; %if_uniform
|
|
; SI-NEXT: s_waitcnt expcnt(0)
|
|
; SI-NEXT: v_mov_b32_e32 v0, 1
|
|
; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
|
; SI-NEXT: s_endpgm
|
|
;
|
|
; VI-LABEL: uniform_inside_divergent:
|
|
; VI: ; %bb.0: ; %entry
|
|
; VI-NEXT: v_cmp_gt_u32_e32 vcc, 16, v0
|
|
; VI-NEXT: s_and_saveexec_b64 s[0:1], vcc
|
|
; VI-NEXT: s_cbranch_execz .LBB11_2
|
|
; VI-NEXT: ; %bb.1: ; %if
|
|
; VI-NEXT: s_load_dword s6, s[4:5], 0x2c
|
|
; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
|
|
; VI-NEXT: s_mov_b32 s3, 0xf000
|
|
; VI-NEXT: s_mov_b32 s2, -1
|
|
; VI-NEXT: v_mov_b32_e32 v0, 0
|
|
; VI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; VI-NEXT: s_cmp_lg_u32 s6, 0
|
|
; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
|
; VI-NEXT: s_cbranch_scc0 .LBB11_3
|
|
; VI-NEXT: .LBB11_2: ; %endif
|
|
; VI-NEXT: s_endpgm
|
|
; VI-NEXT: .LBB11_3: ; %if_uniform
|
|
; VI-NEXT: v_mov_b32_e32 v0, 1
|
|
; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
|
; VI-NEXT: s_endpgm
|
|
entry:
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
|
|
%d_cmp = icmp ult i32 %tid, 16
|
|
br i1 %d_cmp, label %if, label %endif
|
|
|
|
if:
|
|
store i32 0, ptr addrspace(1) %out
|
|
%u_cmp = icmp eq i32 %cond, 0
|
|
br i1 %u_cmp, label %if_uniform, label %endif
|
|
|
|
if_uniform:
|
|
store i32 1, ptr addrspace(1) %out
|
|
br label %endif
|
|
|
|
endif:
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @divergent_inside_uniform(ptr addrspace(1) %out, i32 %cond) {
|
|
; SI-LABEL: divergent_inside_uniform:
|
|
; SI: ; %bb.0: ; %entry
|
|
; SI-NEXT: s_load_dword s0, s[4:5], 0xb
|
|
; SI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SI-NEXT: s_cmp_lg_u32 s0, 0
|
|
; SI-NEXT: s_cbranch_scc0 .LBB12_2
|
|
; SI-NEXT: .LBB12_1: ; %endif
|
|
; SI-NEXT: s_endpgm
|
|
; SI-NEXT: .LBB12_2: ; %if
|
|
; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
|
|
; SI-NEXT: s_mov_b32 s3, 0xf000
|
|
; SI-NEXT: s_mov_b32 s2, -1
|
|
; SI-NEXT: v_mov_b32_e32 v1, 0
|
|
; SI-NEXT: v_cmp_gt_u32_e32 vcc, 16, v0
|
|
; SI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SI-NEXT: buffer_store_dword v1, off, s[0:3], 0
|
|
; SI-NEXT: s_and_saveexec_b64 s[4:5], vcc
|
|
; SI-NEXT: s_cbranch_execz .LBB12_1
|
|
; SI-NEXT: ; %bb.3: ; %if_uniform
|
|
; SI-NEXT: v_mov_b32_e32 v0, 1
|
|
; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
|
; SI-NEXT: s_endpgm
|
|
;
|
|
; VI-LABEL: divergent_inside_uniform:
|
|
; VI: ; %bb.0: ; %entry
|
|
; VI-NEXT: s_load_dword s0, s[4:5], 0x2c
|
|
; VI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; VI-NEXT: s_cmp_lg_u32 s0, 0
|
|
; VI-NEXT: s_cbranch_scc0 .LBB12_2
|
|
; VI-NEXT: .LBB12_1: ; %endif
|
|
; VI-NEXT: s_endpgm
|
|
; VI-NEXT: .LBB12_2: ; %if
|
|
; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
|
|
; VI-NEXT: s_mov_b32 s3, 0xf000
|
|
; VI-NEXT: s_mov_b32 s2, -1
|
|
; VI-NEXT: v_mov_b32_e32 v1, 0
|
|
; VI-NEXT: v_cmp_gt_u32_e32 vcc, 16, v0
|
|
; VI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; VI-NEXT: buffer_store_dword v1, off, s[0:3], 0
|
|
; VI-NEXT: s_and_saveexec_b64 s[4:5], vcc
|
|
; VI-NEXT: s_cbranch_execz .LBB12_1
|
|
; VI-NEXT: ; %bb.3: ; %if_uniform
|
|
; VI-NEXT: v_mov_b32_e32 v0, 1
|
|
; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
|
; VI-NEXT: s_endpgm
|
|
entry:
|
|
%u_cmp = icmp eq i32 %cond, 0
|
|
br i1 %u_cmp, label %if, label %endif
|
|
|
|
if:
|
|
store i32 0, ptr addrspace(1) %out
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
|
|
%d_cmp = icmp ult i32 %tid, 16
|
|
br i1 %d_cmp, label %if_uniform, label %endif
|
|
|
|
if_uniform:
|
|
store i32 1, ptr addrspace(1) %out
|
|
br label %endif
|
|
|
|
endif:
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @divergent_if_uniform_if(ptr addrspace(1) %out, i32 %cond) {
|
|
; SI-LABEL: divergent_if_uniform_if:
|
|
; SI: ; %bb.0: ; %entry
|
|
; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
|
|
; SI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
|
|
; SI-NEXT: s_and_saveexec_b64 s[6:7], vcc
|
|
; SI-NEXT: s_cbranch_execz .LBB13_2
|
|
; SI-NEXT: ; %bb.1: ; %if
|
|
; SI-NEXT: s_mov_b32 s3, 0xf000
|
|
; SI-NEXT: s_mov_b32 s2, -1
|
|
; SI-NEXT: v_mov_b32_e32 v0, 1
|
|
; SI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
|
; SI-NEXT: .LBB13_2: ; %endif
|
|
; SI-NEXT: s_or_b64 exec, exec, s[6:7]
|
|
; SI-NEXT: s_load_dword s2, s[4:5], 0xb
|
|
; SI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SI-NEXT: s_cmp_lg_u32 s2, 0
|
|
; SI-NEXT: s_cbranch_scc0 .LBB13_4
|
|
; SI-NEXT: ; %bb.3: ; %exit
|
|
; SI-NEXT: s_endpgm
|
|
; SI-NEXT: .LBB13_4: ; %if_uniform
|
|
; SI-NEXT: s_mov_b32 s3, 0xf000
|
|
; SI-NEXT: s_mov_b32 s2, -1
|
|
; SI-NEXT: s_waitcnt expcnt(0)
|
|
; SI-NEXT: v_mov_b32_e32 v0, 2
|
|
; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
|
; SI-NEXT: s_endpgm
|
|
;
|
|
; VI-LABEL: divergent_if_uniform_if:
|
|
; VI: ; %bb.0: ; %entry
|
|
; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
|
|
; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
|
|
; VI-NEXT: s_and_saveexec_b64 s[6:7], vcc
|
|
; VI-NEXT: s_cbranch_execz .LBB13_2
|
|
; VI-NEXT: ; %bb.1: ; %if
|
|
; VI-NEXT: s_mov_b32 s3, 0xf000
|
|
; VI-NEXT: s_mov_b32 s2, -1
|
|
; VI-NEXT: v_mov_b32_e32 v0, 1
|
|
; VI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
|
; VI-NEXT: .LBB13_2: ; %endif
|
|
; VI-NEXT: s_or_b64 exec, exec, s[6:7]
|
|
; VI-NEXT: s_load_dword s2, s[4:5], 0x2c
|
|
; VI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; VI-NEXT: s_cmp_lg_u32 s2, 0
|
|
; VI-NEXT: s_cbranch_scc0 .LBB13_4
|
|
; VI-NEXT: ; %bb.3: ; %exit
|
|
; VI-NEXT: s_endpgm
|
|
; VI-NEXT: .LBB13_4: ; %if_uniform
|
|
; VI-NEXT: s_mov_b32 s3, 0xf000
|
|
; VI-NEXT: s_mov_b32 s2, -1
|
|
; VI-NEXT: v_mov_b32_e32 v0, 2
|
|
; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
|
; VI-NEXT: s_endpgm
|
|
entry:
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
|
|
%d_cmp = icmp eq i32 %tid, 0
|
|
br i1 %d_cmp, label %if, label %endif
|
|
|
|
if:
|
|
store i32 1, ptr addrspace(1) %out
|
|
br label %endif
|
|
|
|
endif:
|
|
%u_cmp = icmp eq i32 %cond, 0
|
|
br i1 %u_cmp, label %if_uniform, label %exit
|
|
|
|
if_uniform:
|
|
store i32 2, ptr addrspace(1) %out
|
|
br label %exit
|
|
|
|
exit:
|
|
ret void
|
|
}
|
|
|
|
; The condition of the branches in the two blocks are
|
|
; uniform. MachineCSE replaces the 2nd condition with the inverse of
|
|
; the first, leaving an scc use in a different block than it was
|
|
; defed.
|
|
|
|
define amdgpu_kernel void @cse_uniform_condition_different_blocks(i32 %cond, ptr addrspace(1) %out) {
|
|
; SI-LABEL: cse_uniform_condition_different_blocks:
|
|
; SI: ; %bb.0: ; %bb
|
|
; SI-NEXT: s_load_dword s0, s[4:5], 0x9
|
|
; SI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SI-NEXT: s_cmp_lt_i32 s0, 1
|
|
; SI-NEXT: s_cbranch_scc1 .LBB14_2
|
|
; SI-NEXT: ; %bb.1: ; %bb2
|
|
; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0xb
|
|
; SI-NEXT: s_mov_b32 s3, 0xf000
|
|
; SI-NEXT: s_mov_b32 s2, -1
|
|
; SI-NEXT: v_mov_b32_e32 v1, 0
|
|
; SI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SI-NEXT: buffer_load_dword v0, off, s[0:3], 0 glc
|
|
; SI-NEXT: s_waitcnt vmcnt(0)
|
|
; SI-NEXT: buffer_store_dword v1, off, s[0:3], 0
|
|
; SI-NEXT: s_waitcnt vmcnt(0)
|
|
; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
|
; SI-NEXT: .LBB14_2: ; %bb9
|
|
; SI-NEXT: s_endpgm
|
|
;
|
|
; VI-LABEL: cse_uniform_condition_different_blocks:
|
|
; VI: ; %bb.0: ; %bb
|
|
; VI-NEXT: s_load_dword s0, s[4:5], 0x24
|
|
; VI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; VI-NEXT: s_cmp_lt_i32 s0, 1
|
|
; VI-NEXT: s_cbranch_scc1 .LBB14_2
|
|
; VI-NEXT: ; %bb.1: ; %bb2
|
|
; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x2c
|
|
; VI-NEXT: s_mov_b32 s3, 0xf000
|
|
; VI-NEXT: s_mov_b32 s2, -1
|
|
; VI-NEXT: v_mov_b32_e32 v1, 0
|
|
; VI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; VI-NEXT: buffer_load_dword v0, off, s[0:3], 0 glc
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_dword v1, off, s[0:3], 0
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
|
; VI-NEXT: .LBB14_2: ; %bb9
|
|
; VI-NEXT: s_endpgm
|
|
bb:
|
|
%tmp = tail call i32 @llvm.amdgcn.workitem.id.x() #0
|
|
%tmp1 = icmp sgt i32 %cond, 0
|
|
br i1 %tmp1, label %bb2, label %bb9
|
|
|
|
bb2: ; preds = %bb
|
|
%tmp3 = load volatile i32, ptr addrspace(1) poison
|
|
store volatile i32 0, ptr addrspace(1) poison
|
|
%tmp9 = icmp sle i32 %cond, 0
|
|
br i1 %tmp9, label %bb9, label %bb7
|
|
|
|
bb7: ; preds = %bb5
|
|
store i32 %tmp3, ptr addrspace(1) %out
|
|
br label %bb9
|
|
|
|
bb9: ; preds = %bb8, %bb4
|
|
ret void
|
|
}
|
|
|
|
; Fall-through to the else
|
|
define amdgpu_kernel void @uniform_if_scc_i64_eq(i64 %cond, ptr addrspace(1) %out) {
|
|
; SI-LABEL: uniform_if_scc_i64_eq:
|
|
; SI: ; %bb.0: ; %entry
|
|
; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
|
|
; SI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SI-NEXT: v_cmp_eq_u64_e64 s[4:5], s[0:1], 0
|
|
; SI-NEXT: s_mov_b32 s0, 0
|
|
; SI-NEXT: s_and_b64 vcc, exec, s[4:5]
|
|
; SI-NEXT: s_cbranch_vccnz .LBB15_2
|
|
; SI-NEXT: ; %bb.1: ; %else
|
|
; SI-NEXT: s_mov_b32 s0, 1
|
|
; SI-NEXT: .LBB15_2: ; %done
|
|
; SI-NEXT: s_mov_b32 s7, 0xf000
|
|
; SI-NEXT: s_mov_b32 s6, -1
|
|
; SI-NEXT: s_mov_b32 s4, s2
|
|
; SI-NEXT: s_mov_b32 s5, s3
|
|
; SI-NEXT: v_mov_b32_e32 v0, s0
|
|
; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
|
; SI-NEXT: s_endpgm
|
|
;
|
|
; VI-LABEL: uniform_if_scc_i64_eq:
|
|
; VI: ; %bb.0: ; %entry
|
|
; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
|
|
; VI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; VI-NEXT: s_cmp_eq_u64 s[0:1], 0
|
|
; VI-NEXT: s_mov_b32 s0, 0
|
|
; VI-NEXT: s_cbranch_scc1 .LBB15_2
|
|
; VI-NEXT: ; %bb.1: ; %else
|
|
; VI-NEXT: s_mov_b32 s0, 1
|
|
; VI-NEXT: .LBB15_2: ; %done
|
|
; VI-NEXT: s_mov_b32 s7, 0xf000
|
|
; VI-NEXT: s_mov_b32 s6, -1
|
|
; VI-NEXT: s_mov_b32 s4, s2
|
|
; VI-NEXT: s_mov_b32 s5, s3
|
|
; VI-NEXT: v_mov_b32_e32 v0, s0
|
|
; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
|
; VI-NEXT: s_endpgm
|
|
entry:
|
|
%cmp0 = icmp eq i64 %cond, 0
|
|
br i1 %cmp0, label %if, label %else
|
|
|
|
if:
|
|
br label %done
|
|
|
|
else:
|
|
br label %done
|
|
|
|
done:
|
|
%value = phi i32 [0, %if], [1, %else]
|
|
store i32 %value, ptr addrspace(1) %out
|
|
ret void
|
|
}
|
|
|
|
; Fall-through to the else
|
|
define amdgpu_kernel void @uniform_if_scc_i64_ne(i64 %cond, ptr addrspace(1) %out) {
|
|
; SI-LABEL: uniform_if_scc_i64_ne:
|
|
; SI: ; %bb.0: ; %entry
|
|
; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
|
|
; SI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SI-NEXT: v_cmp_ne_u64_e64 s[4:5], s[0:1], 0
|
|
; SI-NEXT: s_mov_b32 s0, 0
|
|
; SI-NEXT: s_and_b64 vcc, exec, s[4:5]
|
|
; SI-NEXT: s_cbranch_vccnz .LBB16_2
|
|
; SI-NEXT: ; %bb.1: ; %else
|
|
; SI-NEXT: s_mov_b32 s0, 1
|
|
; SI-NEXT: .LBB16_2: ; %done
|
|
; SI-NEXT: s_mov_b32 s7, 0xf000
|
|
; SI-NEXT: s_mov_b32 s6, -1
|
|
; SI-NEXT: s_mov_b32 s4, s2
|
|
; SI-NEXT: s_mov_b32 s5, s3
|
|
; SI-NEXT: v_mov_b32_e32 v0, s0
|
|
; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
|
; SI-NEXT: s_endpgm
|
|
;
|
|
; VI-LABEL: uniform_if_scc_i64_ne:
|
|
; VI: ; %bb.0: ; %entry
|
|
; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
|
|
; VI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; VI-NEXT: s_cmp_lg_u64 s[0:1], 0
|
|
; VI-NEXT: s_mov_b32 s0, 0
|
|
; VI-NEXT: s_cbranch_scc1 .LBB16_2
|
|
; VI-NEXT: ; %bb.1: ; %else
|
|
; VI-NEXT: s_mov_b32 s0, 1
|
|
; VI-NEXT: .LBB16_2: ; %done
|
|
; VI-NEXT: s_mov_b32 s7, 0xf000
|
|
; VI-NEXT: s_mov_b32 s6, -1
|
|
; VI-NEXT: s_mov_b32 s4, s2
|
|
; VI-NEXT: s_mov_b32 s5, s3
|
|
; VI-NEXT: v_mov_b32_e32 v0, s0
|
|
; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
|
; VI-NEXT: s_endpgm
|
|
entry:
|
|
%cmp0 = icmp ne i64 %cond, 0
|
|
br i1 %cmp0, label %if, label %else
|
|
|
|
if:
|
|
br label %done
|
|
|
|
else:
|
|
br label %done
|
|
|
|
done:
|
|
%value = phi i32 [0, %if], [1, %else]
|
|
store i32 %value, ptr addrspace(1) %out
|
|
ret void
|
|
}
|
|
|
|
; Fall-through to the else
|
|
define amdgpu_kernel void @uniform_if_scc_i64_sgt(i64 %cond, ptr addrspace(1) %out) {
|
|
; SI-LABEL: uniform_if_scc_i64_sgt:
|
|
; SI: ; %bb.0: ; %entry
|
|
; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
|
|
; SI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SI-NEXT: v_cmp_gt_i64_e64 s[4:5], s[0:1], 0
|
|
; SI-NEXT: s_mov_b32 s0, 0
|
|
; SI-NEXT: s_and_b64 vcc, exec, s[4:5]
|
|
; SI-NEXT: s_cbranch_vccnz .LBB17_2
|
|
; SI-NEXT: ; %bb.1: ; %else
|
|
; SI-NEXT: s_mov_b32 s0, 1
|
|
; SI-NEXT: .LBB17_2: ; %done
|
|
; SI-NEXT: s_mov_b32 s7, 0xf000
|
|
; SI-NEXT: s_mov_b32 s6, -1
|
|
; SI-NEXT: s_mov_b32 s4, s2
|
|
; SI-NEXT: s_mov_b32 s5, s3
|
|
; SI-NEXT: v_mov_b32_e32 v0, s0
|
|
; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
|
; SI-NEXT: s_endpgm
|
|
;
|
|
; VI-LABEL: uniform_if_scc_i64_sgt:
|
|
; VI: ; %bb.0: ; %entry
|
|
; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
|
|
; VI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; VI-NEXT: v_cmp_gt_i64_e64 s[4:5], s[0:1], 0
|
|
; VI-NEXT: s_mov_b32 s0, 0
|
|
; VI-NEXT: s_and_b64 vcc, exec, s[4:5]
|
|
; VI-NEXT: s_cbranch_vccnz .LBB17_2
|
|
; VI-NEXT: ; %bb.1: ; %else
|
|
; VI-NEXT: s_mov_b32 s0, 1
|
|
; VI-NEXT: .LBB17_2: ; %done
|
|
; VI-NEXT: s_mov_b32 s7, 0xf000
|
|
; VI-NEXT: s_mov_b32 s6, -1
|
|
; VI-NEXT: s_mov_b32 s4, s2
|
|
; VI-NEXT: s_mov_b32 s5, s3
|
|
; VI-NEXT: v_mov_b32_e32 v0, s0
|
|
; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
|
; VI-NEXT: s_endpgm
|
|
entry:
|
|
%cmp0 = icmp sgt i64 %cond, 0
|
|
br i1 %cmp0, label %if, label %else
|
|
|
|
if:
|
|
br label %done
|
|
|
|
else:
|
|
br label %done
|
|
|
|
done:
|
|
%value = phi i32 [0, %if], [1, %else]
|
|
store i32 %value, ptr addrspace(1) %out
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @move_to_valu_i64_eq(ptr addrspace(1) %out) {
|
|
; SI-LABEL: move_to_valu_i64_eq:
|
|
; SI: ; %bb.0:
|
|
; SI-NEXT: s_mov_b32 m0, -1
|
|
; SI-NEXT: ds_read_b64 v[0:1], v0
|
|
; SI-NEXT: s_mov_b32 s0, 0
|
|
; SI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SI-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[0:1]
|
|
; SI-NEXT: s_cbranch_vccnz .LBB18_2
|
|
; SI-NEXT: ; %bb.1: ; %else
|
|
; SI-NEXT: s_mov_b32 s0, 1
|
|
; SI-NEXT: .LBB18_2: ; %done
|
|
; SI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x9
|
|
; SI-NEXT: s_mov_b32 s7, 0xf000
|
|
; SI-NEXT: s_mov_b32 s6, -1
|
|
; SI-NEXT: v_mov_b32_e32 v0, s0
|
|
; SI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
|
; SI-NEXT: s_endpgm
|
|
;
|
|
; VI-LABEL: move_to_valu_i64_eq:
|
|
; VI: ; %bb.0:
|
|
; VI-NEXT: s_mov_b32 m0, -1
|
|
; VI-NEXT: ds_read_b64 v[0:1], v0
|
|
; VI-NEXT: s_mov_b32 s0, 0
|
|
; VI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; VI-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[0:1]
|
|
; VI-NEXT: s_cbranch_vccnz .LBB18_2
|
|
; VI-NEXT: ; %bb.1: ; %else
|
|
; VI-NEXT: s_mov_b32 s0, 1
|
|
; VI-NEXT: .LBB18_2: ; %done
|
|
; VI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x24
|
|
; VI-NEXT: s_mov_b32 s7, 0xf000
|
|
; VI-NEXT: s_mov_b32 s6, -1
|
|
; VI-NEXT: v_mov_b32_e32 v0, s0
|
|
; VI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
|
; VI-NEXT: s_endpgm
|
|
%cond = load volatile i64, ptr addrspace(3) poison
|
|
%cmp0 = icmp eq i64 %cond, 0
|
|
br i1 %cmp0, label %if, label %else
|
|
|
|
if:
|
|
br label %done
|
|
|
|
else:
|
|
br label %done
|
|
|
|
done:
|
|
%value = phi i32 [0, %if], [1, %else]
|
|
store i32 %value, ptr addrspace(1) %out
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @move_to_valu_i64_ne(ptr addrspace(1) %out) {
|
|
; SI-LABEL: move_to_valu_i64_ne:
|
|
; SI: ; %bb.0:
|
|
; SI-NEXT: s_mov_b32 m0, -1
|
|
; SI-NEXT: ds_read_b64 v[0:1], v0
|
|
; SI-NEXT: s_mov_b32 s0, 0
|
|
; SI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SI-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1]
|
|
; SI-NEXT: s_cbranch_vccnz .LBB19_2
|
|
; SI-NEXT: ; %bb.1: ; %else
|
|
; SI-NEXT: s_mov_b32 s0, 1
|
|
; SI-NEXT: .LBB19_2: ; %done
|
|
; SI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x9
|
|
; SI-NEXT: s_mov_b32 s7, 0xf000
|
|
; SI-NEXT: s_mov_b32 s6, -1
|
|
; SI-NEXT: v_mov_b32_e32 v0, s0
|
|
; SI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
|
; SI-NEXT: s_endpgm
|
|
;
|
|
; VI-LABEL: move_to_valu_i64_ne:
|
|
; VI: ; %bb.0:
|
|
; VI-NEXT: s_mov_b32 m0, -1
|
|
; VI-NEXT: ds_read_b64 v[0:1], v0
|
|
; VI-NEXT: s_mov_b32 s0, 0
|
|
; VI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; VI-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1]
|
|
; VI-NEXT: s_cbranch_vccnz .LBB19_2
|
|
; VI-NEXT: ; %bb.1: ; %else
|
|
; VI-NEXT: s_mov_b32 s0, 1
|
|
; VI-NEXT: .LBB19_2: ; %done
|
|
; VI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x24
|
|
; VI-NEXT: s_mov_b32 s7, 0xf000
|
|
; VI-NEXT: s_mov_b32 s6, -1
|
|
; VI-NEXT: v_mov_b32_e32 v0, s0
|
|
; VI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
|
; VI-NEXT: s_endpgm
|
|
%cond = load volatile i64, ptr addrspace(3) poison
|
|
%cmp0 = icmp ne i64 %cond, 0
|
|
br i1 %cmp0, label %if, label %else
|
|
|
|
if:
|
|
br label %done
|
|
|
|
else:
|
|
br label %done
|
|
|
|
done:
|
|
%value = phi i32 [0, %if], [1, %else]
|
|
store i32 %value, ptr addrspace(1) %out
|
|
ret void
|
|
}
|
|
|
|
define void @move_to_valu_vgpr_operand_phi(ptr addrspace(3) %out) {
|
|
; SI-LABEL: move_to_valu_vgpr_operand_phi:
|
|
; SI: ; %bb.0: ; %bb0
|
|
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; SI-NEXT: v_add_i32_e32 v0, vcc, 28, v0
|
|
; SI-NEXT: v_mov_b32_e32 v1, 1
|
|
; SI-NEXT: s_and_b64 vcc, exec, 0
|
|
; SI-NEXT: s_mov_b32 m0, -1
|
|
; SI-NEXT: s_branch .LBB20_2
|
|
; SI-NEXT: .LBB20_1: ; %bb3
|
|
; SI-NEXT: ; in Loop: Header=BB20_2 Depth=1
|
|
; SI-NEXT: v_add_i32_e64 v0, s[4:5], 8, v0
|
|
; SI-NEXT: .LBB20_2: ; %bb1
|
|
; SI-NEXT: ; =>This Inner Loop Header: Depth=1
|
|
; SI-NEXT: ;;#ASMSTART
|
|
; SI-NEXT: ; def s4
|
|
; SI-NEXT: ;;#ASMEND
|
|
; SI-NEXT: s_cmp_lg_u32 s4, 0
|
|
; SI-NEXT: s_cbranch_scc1 .LBB20_1
|
|
; SI-NEXT: ; %bb.3: ; %bb2
|
|
; SI-NEXT: ; in Loop: Header=BB20_2 Depth=1
|
|
; SI-NEXT: ds_write_b32 v0, v1
|
|
; SI-NEXT: s_mov_b64 vcc, vcc
|
|
; SI-NEXT: s_cbranch_vccz .LBB20_1
|
|
; SI-NEXT: ; %bb.4: ; %DummyReturnBlock
|
|
; SI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; VI-LABEL: move_to_valu_vgpr_operand_phi:
|
|
; VI: ; %bb.0: ; %bb0
|
|
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; VI-NEXT: v_add_u32_e32 v0, vcc, 28, v0
|
|
; VI-NEXT: v_mov_b32_e32 v1, 1
|
|
; VI-NEXT: s_and_b64 vcc, exec, 0
|
|
; VI-NEXT: s_mov_b32 m0, -1
|
|
; VI-NEXT: s_branch .LBB20_2
|
|
; VI-NEXT: .LBB20_1: ; %bb3
|
|
; VI-NEXT: ; in Loop: Header=BB20_2 Depth=1
|
|
; VI-NEXT: v_add_u32_e64 v0, s[4:5], 8, v0
|
|
; VI-NEXT: .LBB20_2: ; %bb1
|
|
; VI-NEXT: ; =>This Inner Loop Header: Depth=1
|
|
; VI-NEXT: ;;#ASMSTART
|
|
; VI-NEXT: ; def s4
|
|
; VI-NEXT: ;;#ASMEND
|
|
; VI-NEXT: s_cmp_lg_u32 s4, 0
|
|
; VI-NEXT: s_cbranch_scc1 .LBB20_1
|
|
; VI-NEXT: ; %bb.3: ; %bb2
|
|
; VI-NEXT: ; in Loop: Header=BB20_2 Depth=1
|
|
; VI-NEXT: ds_write_b32 v0, v1
|
|
; VI-NEXT: s_mov_b64 vcc, vcc
|
|
; VI-NEXT: s_cbranch_vccz .LBB20_1
|
|
; VI-NEXT: ; %bb.4: ; %DummyReturnBlock
|
|
; VI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; VI-NEXT: s_setpc_b64 s[30:31]
|
|
bb0:
|
|
br label %bb1
|
|
|
|
bb1: ; preds = %bb3, %bb0
|
|
%tmp0 = phi i32 [ 8, %bb0 ], [ %tmp4, %bb3 ]
|
|
%tmp1 = add nsw i32 %tmp0, -1
|
|
%tmp2 = getelementptr inbounds i32, ptr addrspace(3) %out, i32 %tmp1
|
|
%cond = call i32 asm "; def $0","=s"()
|
|
%cmp = icmp eq i32 %cond, 0
|
|
br i1 %cmp, label %bb2, label %bb3
|
|
|
|
bb2: ; preds = %bb1
|
|
store volatile i32 1, ptr addrspace(3) %tmp2, align 4
|
|
br label %bb3
|
|
|
|
bb3: ; preds = %bb2, %bb1
|
|
%tmp4 = add nsw i32 %tmp0, 2
|
|
br label %bb1
|
|
}
|
|
|
|
declare i32 @llvm.amdgcn.workitem.id.x() #0
|
|
|
|
attributes #0 = { nounwind readnone }
|
|
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
|
|
; GCN: {{.*}}
|