The new format matches the official ISA spec and ensures the disassembler prints 'export mrt0, v0, off, off, off' instead of 'export mrt0 v0, off, off, off'. No functional encoding changes; printing/AsmString only.
1685 lines
56 KiB
LLVM
1685 lines
56 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
|
|
; RUN: llc -mtriple=amdgcn -mcpu=tonga < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SI %s
|
|
; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize64 < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX10 %s
|
|
; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -amdgpu-enable-delay-alu=0 < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX11-12,GFX11 %s
|
|
; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64 -amdgpu-enable-delay-alu=0 < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX11-12,GFX12 %s
|
|
|
|
define amdgpu_gs void @gs_const() {
|
|
; SI-LABEL: gs_const:
|
|
; SI: ; %bb.0:
|
|
; SI-NEXT: s_mov_b64 s[0:1], exec
|
|
; SI-NEXT: s_andn2_b64 s[0:1], s[0:1], exec
|
|
; SI-NEXT: s_mov_b64 exec, 0
|
|
; SI-NEXT: s_mov_b32 m0, 0
|
|
; SI-NEXT: s_nop 0
|
|
; SI-NEXT: s_sendmsg sendmsg(MSG_GS_DONE, GS_OP_NOP)
|
|
; SI-NEXT: s_endpgm
|
|
; SI-NEXT: ; %bb.1:
|
|
; SI-NEXT: s_mov_b64 exec, 0
|
|
; SI-NEXT: s_endpgm
|
|
;
|
|
; GFX10-LABEL: gs_const:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_mov_b64 s[0:1], exec
|
|
; GFX10-NEXT: s_andn2_b64 s[0:1], s[0:1], exec
|
|
; GFX10-NEXT: s_mov_b64 exec, 0
|
|
; GFX10-NEXT: s_mov_b32 m0, 0
|
|
; GFX10-NEXT: s_sendmsg sendmsg(MSG_GS_DONE, GS_OP_NOP)
|
|
; GFX10-NEXT: s_endpgm
|
|
; GFX10-NEXT: ; %bb.1:
|
|
; GFX10-NEXT: s_mov_b64 exec, 0
|
|
; GFX10-NEXT: s_endpgm
|
|
;
|
|
; GFX11-12-LABEL: gs_const:
|
|
; GFX11-12: ; %bb.0:
|
|
; GFX11-12-NEXT: s_mov_b64 s[0:1], exec
|
|
; GFX11-12-NEXT: s_and_not1_b64 s[0:1], s[0:1], exec
|
|
; GFX11-12-NEXT: s_mov_b64 exec, 0
|
|
; GFX11-12-NEXT: s_mov_b32 m0, 0
|
|
; GFX11-12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
|
|
; GFX11-12-NEXT: s_endpgm
|
|
; GFX11-12-NEXT: ; %bb.1:
|
|
; GFX11-12-NEXT: s_mov_b64 exec, 0
|
|
; GFX11-12-NEXT: s_endpgm
|
|
%tmp = icmp ule i32 0, 3
|
|
%tmp1 = select i1 %tmp, float 1.000000e+00, float -1.000000e+00
|
|
%c1 = fcmp oge float %tmp1, 0.0
|
|
call void @llvm.amdgcn.kill(i1 %c1)
|
|
%tmp2 = icmp ule i32 3, 0
|
|
%tmp3 = select i1 %tmp2, float 1.000000e+00, float -1.000000e+00
|
|
%c2 = fcmp oge float %tmp3, 0.0
|
|
call void @llvm.amdgcn.kill(i1 %c2)
|
|
call void @llvm.amdgcn.s.sendmsg(i32 3, i32 0)
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @vcc_implicit_def(float %arg13, float %arg14) {
|
|
; SI-LABEL: vcc_implicit_def:
|
|
; SI: ; %bb.0:
|
|
; SI-NEXT: v_cmp_nle_f32_e32 vcc, 0, v1
|
|
; SI-NEXT: v_cmp_gt_f32_e64 s[0:1], 0, v0
|
|
; SI-NEXT: s_andn2_b64 exec, exec, vcc
|
|
; SI-NEXT: s_cbranch_scc0 .LBB1_2
|
|
; SI-NEXT: ; %bb.1:
|
|
; SI-NEXT: s_andn2_b64 exec, exec, vcc
|
|
; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1.0, s[0:1]
|
|
; SI-NEXT: exp mrt1, v0, v0, v0, v0 done vm
|
|
; SI-NEXT: s_mov_b32 m0, 0
|
|
; SI-NEXT: s_nop 0
|
|
; SI-NEXT: s_sendmsg sendmsg(MSG_GS_DONE, GS_OP_NOP)
|
|
; SI-NEXT: s_endpgm
|
|
; SI-NEXT: .LBB1_2:
|
|
; SI-NEXT: s_mov_b64 exec, 0
|
|
; SI-NEXT: exp null, off, off, off, off done vm
|
|
; SI-NEXT: s_endpgm
|
|
;
|
|
; GFX10-LABEL: vcc_implicit_def:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: v_cmp_nle_f32_e32 vcc, 0, v1
|
|
; GFX10-NEXT: v_cmp_gt_f32_e64 s[0:1], 0, v0
|
|
; GFX10-NEXT: s_andn2_b64 exec, exec, vcc
|
|
; GFX10-NEXT: s_cbranch_scc0 .LBB1_2
|
|
; GFX10-NEXT: ; %bb.1:
|
|
; GFX10-NEXT: s_andn2_b64 exec, exec, vcc
|
|
; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1.0, s[0:1]
|
|
; GFX10-NEXT: s_mov_b32 m0, 0
|
|
; GFX10-NEXT: s_sendmsg sendmsg(MSG_GS_DONE, GS_OP_NOP)
|
|
; GFX10-NEXT: exp mrt1, v0, v0, v0, v0 done vm
|
|
; GFX10-NEXT: s_endpgm
|
|
; GFX10-NEXT: .LBB1_2:
|
|
; GFX10-NEXT: s_mov_b64 exec, 0
|
|
; GFX10-NEXT: exp null, off, off, off, off done vm
|
|
; GFX10-NEXT: s_endpgm
|
|
;
|
|
; GFX11-LABEL: vcc_implicit_def:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: v_cmp_nle_f32_e32 vcc, 0, v1
|
|
; GFX11-NEXT: v_cmp_gt_f32_e64 s[0:1], 0, v0
|
|
; GFX11-NEXT: s_and_not1_b64 exec, exec, vcc
|
|
; GFX11-NEXT: s_cbranch_scc0 .LBB1_2
|
|
; GFX11-NEXT: ; %bb.1:
|
|
; GFX11-NEXT: s_and_not1_b64 exec, exec, vcc
|
|
; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, 1.0, s[0:1]
|
|
; GFX11-NEXT: s_mov_b32 m0, 0
|
|
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
|
|
; GFX11-NEXT: exp mrt1, v0, v0, v0, v0 done
|
|
; GFX11-NEXT: s_endpgm
|
|
; GFX11-NEXT: .LBB1_2:
|
|
; GFX11-NEXT: s_mov_b64 exec, 0
|
|
; GFX11-NEXT: exp mrt0, off, off, off, off done
|
|
; GFX11-NEXT: s_endpgm
|
|
;
|
|
; GFX12-LABEL: vcc_implicit_def:
|
|
; GFX12: ; %bb.0:
|
|
; GFX12-NEXT: v_cmp_le_f32_e64 s[0:1], 0, v1
|
|
; GFX12-NEXT: s_mov_b64 s[2:3], exec
|
|
; GFX12-NEXT: v_cmp_gt_f32_e32 vcc, 0, v0
|
|
; GFX12-NEXT: s_and_not1_b64 s[0:1], exec, s[0:1]
|
|
; GFX12-NEXT: s_and_not1_b64 s[2:3], s[2:3], s[0:1]
|
|
; GFX12-NEXT: s_cbranch_scc0 .LBB1_2
|
|
; GFX12-NEXT: ; %bb.1:
|
|
; GFX12-NEXT: s_and_b64 exec, exec, s[2:3]
|
|
; GFX12-NEXT: v_cndmask_b32_e64 v0, 0, 1.0, vcc
|
|
; GFX12-NEXT: s_mov_b32 m0, 0
|
|
; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
|
|
; GFX12-NEXT: export mrt1, v0, v0, v0, v0 done
|
|
; GFX12-NEXT: s_endpgm
|
|
; GFX12-NEXT: .LBB1_2:
|
|
; GFX12-NEXT: s_mov_b64 exec, 0
|
|
; GFX12-NEXT: export mrt0, off, off, off, off done
|
|
; GFX12-NEXT: s_endpgm
|
|
%tmp0 = fcmp olt float %arg13, 0.000000e+00
|
|
%c1 = fcmp oge float %arg14, 0.0
|
|
call void @llvm.amdgcn.kill(i1 %c1)
|
|
%tmp1 = select i1 %tmp0, float 1.000000e+00, float 0.000000e+00
|
|
call void @llvm.amdgcn.exp.f32(i32 1, i32 15, float %tmp1, float %tmp1, float %tmp1, float %tmp1, i1 true, i1 true) #0
|
|
call void @llvm.amdgcn.s.sendmsg(i32 3, i32 0)
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_gs void @true() {
|
|
; GCN-LABEL: true:
|
|
; GCN: ; %bb.0:
|
|
; GCN-NEXT: s_endpgm
|
|
call void @llvm.amdgcn.kill(i1 true)
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_gs void @false() {
|
|
; SI-LABEL: false:
|
|
; SI: ; %bb.0:
|
|
; SI-NEXT: s_andn2_b64 exec, exec, exec
|
|
; SI-NEXT: s_mov_b64 exec, 0
|
|
; SI-NEXT: s_mov_b32 m0, 0
|
|
; SI-NEXT: s_nop 0
|
|
; SI-NEXT: s_sendmsg sendmsg(MSG_GS_DONE, GS_OP_NOP)
|
|
; SI-NEXT: s_endpgm
|
|
; SI-NEXT: ; %bb.1:
|
|
; SI-NEXT: s_mov_b64 exec, 0
|
|
; SI-NEXT: s_endpgm
|
|
;
|
|
; GFX10-LABEL: false:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_andn2_b64 exec, exec, exec
|
|
; GFX10-NEXT: s_mov_b64 exec, 0
|
|
; GFX10-NEXT: s_mov_b32 m0, 0
|
|
; GFX10-NEXT: s_sendmsg sendmsg(MSG_GS_DONE, GS_OP_NOP)
|
|
; GFX10-NEXT: s_endpgm
|
|
; GFX10-NEXT: ; %bb.1:
|
|
; GFX10-NEXT: s_mov_b64 exec, 0
|
|
; GFX10-NEXT: s_endpgm
|
|
;
|
|
; GFX11-12-LABEL: false:
|
|
; GFX11-12: ; %bb.0:
|
|
; GFX11-12-NEXT: s_and_not1_b64 exec, exec, exec
|
|
; GFX11-12-NEXT: s_mov_b64 exec, 0
|
|
; GFX11-12-NEXT: s_mov_b32 m0, 0
|
|
; GFX11-12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
|
|
; GFX11-12-NEXT: s_endpgm
|
|
; GFX11-12-NEXT: ; %bb.1:
|
|
; GFX11-12-NEXT: s_mov_b64 exec, 0
|
|
; GFX11-12-NEXT: s_endpgm
|
|
call void @llvm.amdgcn.kill(i1 false)
|
|
call void @llvm.amdgcn.s.sendmsg(i32 3, i32 0)
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_gs void @and(i32 %a, i32 %b, i32 %c, i32 %d) {
|
|
; SI-LABEL: and:
|
|
; SI: ; %bb.0:
|
|
; SI-NEXT: v_cmp_lt_i32_e32 vcc, v0, v1
|
|
; SI-NEXT: v_cmp_lt_i32_e64 s[0:1], v2, v3
|
|
; SI-NEXT: s_or_b64 s[0:1], vcc, s[0:1]
|
|
; SI-NEXT: s_mov_b64 s[2:3], exec
|
|
; SI-NEXT: s_andn2_b64 s[0:1], exec, s[0:1]
|
|
; SI-NEXT: s_andn2_b64 s[2:3], s[2:3], s[0:1]
|
|
; SI-NEXT: s_and_b64 exec, exec, s[2:3]
|
|
; SI-NEXT: s_mov_b32 m0, 0
|
|
; SI-NEXT: s_nop 0
|
|
; SI-NEXT: s_sendmsg sendmsg(MSG_GS_DONE, GS_OP_NOP)
|
|
; SI-NEXT: s_endpgm
|
|
; SI-NEXT: ; %bb.1:
|
|
; SI-NEXT: s_mov_b64 exec, 0
|
|
; SI-NEXT: s_endpgm
|
|
;
|
|
; GFX10-LABEL: and:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: v_cmp_lt_i32_e32 vcc, v0, v1
|
|
; GFX10-NEXT: v_cmp_lt_i32_e64 s[0:1], v2, v3
|
|
; GFX10-NEXT: s_mov_b64 s[2:3], exec
|
|
; GFX10-NEXT: s_or_b64 s[0:1], vcc, s[0:1]
|
|
; GFX10-NEXT: s_andn2_b64 s[0:1], exec, s[0:1]
|
|
; GFX10-NEXT: s_andn2_b64 s[2:3], s[2:3], s[0:1]
|
|
; GFX10-NEXT: s_and_b64 exec, exec, s[2:3]
|
|
; GFX10-NEXT: s_mov_b32 m0, 0
|
|
; GFX10-NEXT: s_sendmsg sendmsg(MSG_GS_DONE, GS_OP_NOP)
|
|
; GFX10-NEXT: s_endpgm
|
|
; GFX10-NEXT: ; %bb.1:
|
|
; GFX10-NEXT: s_mov_b64 exec, 0
|
|
; GFX10-NEXT: s_endpgm
|
|
;
|
|
; GFX11-12-LABEL: and:
|
|
; GFX11-12: ; %bb.0:
|
|
; GFX11-12-NEXT: v_cmp_lt_i32_e32 vcc, v0, v1
|
|
; GFX11-12-NEXT: v_cmp_lt_i32_e64 s[0:1], v2, v3
|
|
; GFX11-12-NEXT: s_mov_b64 s[2:3], exec
|
|
; GFX11-12-NEXT: s_or_b64 s[0:1], vcc, s[0:1]
|
|
; GFX11-12-NEXT: s_and_not1_b64 s[0:1], exec, s[0:1]
|
|
; GFX11-12-NEXT: s_and_not1_b64 s[2:3], s[2:3], s[0:1]
|
|
; GFX11-12-NEXT: s_and_b64 exec, exec, s[2:3]
|
|
; GFX11-12-NEXT: s_mov_b32 m0, 0
|
|
; GFX11-12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
|
|
; GFX11-12-NEXT: s_endpgm
|
|
; GFX11-12-NEXT: ; %bb.1:
|
|
; GFX11-12-NEXT: s_mov_b64 exec, 0
|
|
; GFX11-12-NEXT: s_endpgm
|
|
%c1 = icmp slt i32 %a, %b
|
|
%c2 = icmp slt i32 %c, %d
|
|
%x = or i1 %c1, %c2
|
|
call void @llvm.amdgcn.kill(i1 %x)
|
|
call void @llvm.amdgcn.s.sendmsg(i32 3, i32 0)
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_gs void @andn2(i32 %a, i32 %b, i32 %c, i32 %d) {
|
|
; SI-LABEL: andn2:
|
|
; SI: ; %bb.0:
|
|
; SI-NEXT: v_cmp_lt_i32_e32 vcc, v0, v1
|
|
; SI-NEXT: v_cmp_lt_i32_e64 s[0:1], v2, v3
|
|
; SI-NEXT: s_mov_b64 s[2:3], exec
|
|
; SI-NEXT: s_xor_b64 s[0:1], vcc, s[0:1]
|
|
; SI-NEXT: s_andn2_b64 s[2:3], s[2:3], s[0:1]
|
|
; SI-NEXT: s_and_b64 exec, exec, s[2:3]
|
|
; SI-NEXT: s_mov_b32 m0, 0
|
|
; SI-NEXT: s_nop 0
|
|
; SI-NEXT: s_sendmsg sendmsg(MSG_GS_DONE, GS_OP_NOP)
|
|
; SI-NEXT: s_endpgm
|
|
; SI-NEXT: ; %bb.1:
|
|
; SI-NEXT: s_mov_b64 exec, 0
|
|
; SI-NEXT: s_endpgm
|
|
;
|
|
; GFX10-LABEL: andn2:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: v_cmp_lt_i32_e32 vcc, v0, v1
|
|
; GFX10-NEXT: v_cmp_lt_i32_e64 s[0:1], v2, v3
|
|
; GFX10-NEXT: s_mov_b64 s[2:3], exec
|
|
; GFX10-NEXT: s_xor_b64 s[0:1], vcc, s[0:1]
|
|
; GFX10-NEXT: s_andn2_b64 s[2:3], s[2:3], s[0:1]
|
|
; GFX10-NEXT: s_and_b64 exec, exec, s[2:3]
|
|
; GFX10-NEXT: s_mov_b32 m0, 0
|
|
; GFX10-NEXT: s_sendmsg sendmsg(MSG_GS_DONE, GS_OP_NOP)
|
|
; GFX10-NEXT: s_endpgm
|
|
; GFX10-NEXT: ; %bb.1:
|
|
; GFX10-NEXT: s_mov_b64 exec, 0
|
|
; GFX10-NEXT: s_endpgm
|
|
;
|
|
; GFX11-12-LABEL: andn2:
|
|
; GFX11-12: ; %bb.0:
|
|
; GFX11-12-NEXT: v_cmp_lt_i32_e32 vcc, v0, v1
|
|
; GFX11-12-NEXT: v_cmp_lt_i32_e64 s[0:1], v2, v3
|
|
; GFX11-12-NEXT: s_mov_b64 s[2:3], exec
|
|
; GFX11-12-NEXT: s_xor_b64 s[0:1], vcc, s[0:1]
|
|
; GFX11-12-NEXT: s_and_not1_b64 s[2:3], s[2:3], s[0:1]
|
|
; GFX11-12-NEXT: s_and_b64 exec, exec, s[2:3]
|
|
; GFX11-12-NEXT: s_mov_b32 m0, 0
|
|
; GFX11-12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
|
|
; GFX11-12-NEXT: s_endpgm
|
|
; GFX11-12-NEXT: ; %bb.1:
|
|
; GFX11-12-NEXT: s_mov_b64 exec, 0
|
|
; GFX11-12-NEXT: s_endpgm
|
|
%c1 = icmp slt i32 %a, %b
|
|
%c2 = icmp slt i32 %c, %d
|
|
%x = xor i1 %c1, %c2
|
|
%y = xor i1 %x, 1
|
|
call void @llvm.amdgcn.kill(i1 %y)
|
|
call void @llvm.amdgcn.s.sendmsg(i32 3, i32 0)
|
|
ret void
|
|
}
|
|
|
|
; Should use v_cmp_neq_f32
|
|
define amdgpu_gs void @oeq(float %a) {
|
|
; SI-LABEL: oeq:
|
|
; SI: ; %bb.0:
|
|
; SI-NEXT: v_cmp_neq_f32_e32 vcc, 0, v0
|
|
; SI-NEXT: s_andn2_b64 exec, exec, vcc
|
|
; SI-NEXT: s_andn2_b64 exec, exec, vcc
|
|
; SI-NEXT: s_mov_b32 m0, 0
|
|
; SI-NEXT: s_nop 0
|
|
; SI-NEXT: s_sendmsg sendmsg(MSG_GS_DONE, GS_OP_NOP)
|
|
; SI-NEXT: s_endpgm
|
|
; SI-NEXT: ; %bb.1:
|
|
; SI-NEXT: s_mov_b64 exec, 0
|
|
; SI-NEXT: s_endpgm
|
|
;
|
|
; GFX10-LABEL: oeq:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: v_cmp_neq_f32_e32 vcc, 0, v0
|
|
; GFX10-NEXT: s_andn2_b64 exec, exec, vcc
|
|
; GFX10-NEXT: s_andn2_b64 exec, exec, vcc
|
|
; GFX10-NEXT: s_mov_b32 m0, 0
|
|
; GFX10-NEXT: s_sendmsg sendmsg(MSG_GS_DONE, GS_OP_NOP)
|
|
; GFX10-NEXT: s_endpgm
|
|
; GFX10-NEXT: ; %bb.1:
|
|
; GFX10-NEXT: s_mov_b64 exec, 0
|
|
; GFX10-NEXT: s_endpgm
|
|
;
|
|
; GFX11-LABEL: oeq:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: v_cmp_neq_f32_e32 vcc, 0, v0
|
|
; GFX11-NEXT: s_and_not1_b64 exec, exec, vcc
|
|
; GFX11-NEXT: s_and_not1_b64 exec, exec, vcc
|
|
; GFX11-NEXT: s_mov_b32 m0, 0
|
|
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
|
|
; GFX11-NEXT: s_endpgm
|
|
; GFX11-NEXT: ; %bb.1:
|
|
; GFX11-NEXT: s_mov_b64 exec, 0
|
|
; GFX11-NEXT: s_endpgm
|
|
;
|
|
; GFX12-LABEL: oeq:
|
|
; GFX12: ; %bb.0:
|
|
; GFX12-NEXT: v_cmp_eq_f32_e32 vcc, 0, v0
|
|
; GFX12-NEXT: s_mov_b64 s[0:1], exec
|
|
; GFX12-NEXT: s_and_not1_b64 s[2:3], exec, vcc
|
|
; GFX12-NEXT: s_and_not1_b64 s[0:1], s[0:1], s[2:3]
|
|
; GFX12-NEXT: s_and_b64 exec, exec, s[0:1]
|
|
; GFX12-NEXT: s_mov_b32 m0, 0
|
|
; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
|
|
; GFX12-NEXT: s_endpgm
|
|
; GFX12-NEXT: ; %bb.1:
|
|
; GFX12-NEXT: s_mov_b64 exec, 0
|
|
; GFX12-NEXT: s_endpgm
|
|
%c1 = fcmp oeq float %a, 0.0
|
|
call void @llvm.amdgcn.kill(i1 %c1)
|
|
call void @llvm.amdgcn.s.sendmsg(i32 3, i32 0)
|
|
ret void
|
|
}
|
|
|
|
; Should use v_cmp_nlt_f32
|
|
define amdgpu_gs void @ogt(float %a) {
|
|
; SI-LABEL: ogt:
|
|
; SI: ; %bb.0:
|
|
; SI-NEXT: v_cmp_nlt_f32_e32 vcc, 0, v0
|
|
; SI-NEXT: s_andn2_b64 exec, exec, vcc
|
|
; SI-NEXT: s_andn2_b64 exec, exec, vcc
|
|
; SI-NEXT: s_mov_b32 m0, 0
|
|
; SI-NEXT: s_nop 0
|
|
; SI-NEXT: s_sendmsg sendmsg(MSG_GS_DONE, GS_OP_NOP)
|
|
; SI-NEXT: s_endpgm
|
|
; SI-NEXT: ; %bb.1:
|
|
; SI-NEXT: s_mov_b64 exec, 0
|
|
; SI-NEXT: s_endpgm
|
|
;
|
|
; GFX10-LABEL: ogt:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: v_cmp_nlt_f32_e32 vcc, 0, v0
|
|
; GFX10-NEXT: s_andn2_b64 exec, exec, vcc
|
|
; GFX10-NEXT: s_andn2_b64 exec, exec, vcc
|
|
; GFX10-NEXT: s_mov_b32 m0, 0
|
|
; GFX10-NEXT: s_sendmsg sendmsg(MSG_GS_DONE, GS_OP_NOP)
|
|
; GFX10-NEXT: s_endpgm
|
|
; GFX10-NEXT: ; %bb.1:
|
|
; GFX10-NEXT: s_mov_b64 exec, 0
|
|
; GFX10-NEXT: s_endpgm
|
|
;
|
|
; GFX11-LABEL: ogt:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: v_cmp_nlt_f32_e32 vcc, 0, v0
|
|
; GFX11-NEXT: s_and_not1_b64 exec, exec, vcc
|
|
; GFX11-NEXT: s_and_not1_b64 exec, exec, vcc
|
|
; GFX11-NEXT: s_mov_b32 m0, 0
|
|
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
|
|
; GFX11-NEXT: s_endpgm
|
|
; GFX11-NEXT: ; %bb.1:
|
|
; GFX11-NEXT: s_mov_b64 exec, 0
|
|
; GFX11-NEXT: s_endpgm
|
|
;
|
|
; GFX12-LABEL: ogt:
|
|
; GFX12: ; %bb.0:
|
|
; GFX12-NEXT: v_cmp_lt_f32_e32 vcc, 0, v0
|
|
; GFX12-NEXT: s_mov_b64 s[0:1], exec
|
|
; GFX12-NEXT: s_and_not1_b64 s[2:3], exec, vcc
|
|
; GFX12-NEXT: s_and_not1_b64 s[0:1], s[0:1], s[2:3]
|
|
; GFX12-NEXT: s_and_b64 exec, exec, s[0:1]
|
|
; GFX12-NEXT: s_mov_b32 m0, 0
|
|
; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
|
|
; GFX12-NEXT: s_endpgm
|
|
; GFX12-NEXT: ; %bb.1:
|
|
; GFX12-NEXT: s_mov_b64 exec, 0
|
|
; GFX12-NEXT: s_endpgm
|
|
%c1 = fcmp ogt float %a, 0.0
|
|
call void @llvm.amdgcn.kill(i1 %c1)
|
|
call void @llvm.amdgcn.s.sendmsg(i32 3, i32 0)
|
|
ret void
|
|
}
|
|
|
|
; Should use v_cmp_nle_f32
|
|
define amdgpu_gs void @oge(float %a) {
|
|
; SI-LABEL: oge:
|
|
; SI: ; %bb.0:
|
|
; SI-NEXT: v_cmp_nle_f32_e32 vcc, 0, v0
|
|
; SI-NEXT: s_andn2_b64 exec, exec, vcc
|
|
; SI-NEXT: s_andn2_b64 exec, exec, vcc
|
|
; SI-NEXT: s_mov_b32 m0, 0
|
|
; SI-NEXT: s_nop 0
|
|
; SI-NEXT: s_sendmsg sendmsg(MSG_GS_DONE, GS_OP_NOP)
|
|
; SI-NEXT: s_endpgm
|
|
; SI-NEXT: ; %bb.1:
|
|
; SI-NEXT: s_mov_b64 exec, 0
|
|
; SI-NEXT: s_endpgm
|
|
;
|
|
; GFX10-LABEL: oge:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: v_cmp_nle_f32_e32 vcc, 0, v0
|
|
; GFX10-NEXT: s_andn2_b64 exec, exec, vcc
|
|
; GFX10-NEXT: s_andn2_b64 exec, exec, vcc
|
|
; GFX10-NEXT: s_mov_b32 m0, 0
|
|
; GFX10-NEXT: s_sendmsg sendmsg(MSG_GS_DONE, GS_OP_NOP)
|
|
; GFX10-NEXT: s_endpgm
|
|
; GFX10-NEXT: ; %bb.1:
|
|
; GFX10-NEXT: s_mov_b64 exec, 0
|
|
; GFX10-NEXT: s_endpgm
|
|
;
|
|
; GFX11-LABEL: oge:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: v_cmp_nle_f32_e32 vcc, 0, v0
|
|
; GFX11-NEXT: s_and_not1_b64 exec, exec, vcc
|
|
; GFX11-NEXT: s_and_not1_b64 exec, exec, vcc
|
|
; GFX11-NEXT: s_mov_b32 m0, 0
|
|
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
|
|
; GFX11-NEXT: s_endpgm
|
|
; GFX11-NEXT: ; %bb.1:
|
|
; GFX11-NEXT: s_mov_b64 exec, 0
|
|
; GFX11-NEXT: s_endpgm
|
|
;
|
|
; GFX12-LABEL: oge:
|
|
; GFX12: ; %bb.0:
|
|
; GFX12-NEXT: v_cmp_le_f32_e32 vcc, 0, v0
|
|
; GFX12-NEXT: s_mov_b64 s[0:1], exec
|
|
; GFX12-NEXT: s_and_not1_b64 s[2:3], exec, vcc
|
|
; GFX12-NEXT: s_and_not1_b64 s[0:1], s[0:1], s[2:3]
|
|
; GFX12-NEXT: s_and_b64 exec, exec, s[0:1]
|
|
; GFX12-NEXT: s_mov_b32 m0, 0
|
|
; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
|
|
; GFX12-NEXT: s_endpgm
|
|
; GFX12-NEXT: ; %bb.1:
|
|
; GFX12-NEXT: s_mov_b64 exec, 0
|
|
; GFX12-NEXT: s_endpgm
|
|
%c1 = fcmp oge float %a, 0.0
|
|
call void @llvm.amdgcn.kill(i1 %c1)
|
|
call void @llvm.amdgcn.s.sendmsg(i32 3, i32 0)
|
|
ret void
|
|
}
|
|
|
|
; Should use v_cmp_ngt_f32
|
|
define amdgpu_gs void @olt(float %a) {
|
|
; SI-LABEL: olt:
|
|
; SI: ; %bb.0:
|
|
; SI-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v0
|
|
; SI-NEXT: s_andn2_b64 exec, exec, vcc
|
|
; SI-NEXT: s_andn2_b64 exec, exec, vcc
|
|
; SI-NEXT: s_mov_b32 m0, 0
|
|
; SI-NEXT: s_nop 0
|
|
; SI-NEXT: s_sendmsg sendmsg(MSG_GS_DONE, GS_OP_NOP)
|
|
; SI-NEXT: s_endpgm
|
|
; SI-NEXT: ; %bb.1:
|
|
; SI-NEXT: s_mov_b64 exec, 0
|
|
; SI-NEXT: s_endpgm
|
|
;
|
|
; GFX10-LABEL: olt:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v0
|
|
; GFX10-NEXT: s_andn2_b64 exec, exec, vcc
|
|
; GFX10-NEXT: s_andn2_b64 exec, exec, vcc
|
|
; GFX10-NEXT: s_mov_b32 m0, 0
|
|
; GFX10-NEXT: s_sendmsg sendmsg(MSG_GS_DONE, GS_OP_NOP)
|
|
; GFX10-NEXT: s_endpgm
|
|
; GFX10-NEXT: ; %bb.1:
|
|
; GFX10-NEXT: s_mov_b64 exec, 0
|
|
; GFX10-NEXT: s_endpgm
|
|
;
|
|
; GFX11-LABEL: olt:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v0
|
|
; GFX11-NEXT: s_and_not1_b64 exec, exec, vcc
|
|
; GFX11-NEXT: s_and_not1_b64 exec, exec, vcc
|
|
; GFX11-NEXT: s_mov_b32 m0, 0
|
|
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
|
|
; GFX11-NEXT: s_endpgm
|
|
; GFX11-NEXT: ; %bb.1:
|
|
; GFX11-NEXT: s_mov_b64 exec, 0
|
|
; GFX11-NEXT: s_endpgm
|
|
;
|
|
; GFX12-LABEL: olt:
|
|
; GFX12: ; %bb.0:
|
|
; GFX12-NEXT: v_cmp_gt_f32_e32 vcc, 0, v0
|
|
; GFX12-NEXT: s_mov_b64 s[0:1], exec
|
|
; GFX12-NEXT: s_and_not1_b64 s[2:3], exec, vcc
|
|
; GFX12-NEXT: s_and_not1_b64 s[0:1], s[0:1], s[2:3]
|
|
; GFX12-NEXT: s_and_b64 exec, exec, s[0:1]
|
|
; GFX12-NEXT: s_mov_b32 m0, 0
|
|
; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
|
|
; GFX12-NEXT: s_endpgm
|
|
; GFX12-NEXT: ; %bb.1:
|
|
; GFX12-NEXT: s_mov_b64 exec, 0
|
|
; GFX12-NEXT: s_endpgm
|
|
%c1 = fcmp olt float %a, 0.0
|
|
call void @llvm.amdgcn.kill(i1 %c1)
|
|
call void @llvm.amdgcn.s.sendmsg(i32 3, i32 0)
|
|
ret void
|
|
}
|
|
|
|
; Should use v_cmp_nge_f32
|
|
define amdgpu_gs void @ole(float %a) {
|
|
; SI-LABEL: ole:
|
|
; SI: ; %bb.0:
|
|
; SI-NEXT: v_cmp_nge_f32_e32 vcc, 0, v0
|
|
; SI-NEXT: s_andn2_b64 exec, exec, vcc
|
|
; SI-NEXT: s_andn2_b64 exec, exec, vcc
|
|
; SI-NEXT: s_mov_b32 m0, 0
|
|
; SI-NEXT: s_nop 0
|
|
; SI-NEXT: s_sendmsg sendmsg(MSG_GS_DONE, GS_OP_NOP)
|
|
; SI-NEXT: s_endpgm
|
|
; SI-NEXT: ; %bb.1:
|
|
; SI-NEXT: s_mov_b64 exec, 0
|
|
; SI-NEXT: s_endpgm
|
|
;
|
|
; GFX10-LABEL: ole:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: v_cmp_nge_f32_e32 vcc, 0, v0
|
|
; GFX10-NEXT: s_andn2_b64 exec, exec, vcc
|
|
; GFX10-NEXT: s_andn2_b64 exec, exec, vcc
|
|
; GFX10-NEXT: s_mov_b32 m0, 0
|
|
; GFX10-NEXT: s_sendmsg sendmsg(MSG_GS_DONE, GS_OP_NOP)
|
|
; GFX10-NEXT: s_endpgm
|
|
; GFX10-NEXT: ; %bb.1:
|
|
; GFX10-NEXT: s_mov_b64 exec, 0
|
|
; GFX10-NEXT: s_endpgm
|
|
;
|
|
; GFX11-LABEL: ole:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: v_cmp_nge_f32_e32 vcc, 0, v0
|
|
; GFX11-NEXT: s_and_not1_b64 exec, exec, vcc
|
|
; GFX11-NEXT: s_and_not1_b64 exec, exec, vcc
|
|
; GFX11-NEXT: s_mov_b32 m0, 0
|
|
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
|
|
; GFX11-NEXT: s_endpgm
|
|
; GFX11-NEXT: ; %bb.1:
|
|
; GFX11-NEXT: s_mov_b64 exec, 0
|
|
; GFX11-NEXT: s_endpgm
|
|
;
|
|
; GFX12-LABEL: ole:
|
|
; GFX12: ; %bb.0:
|
|
; GFX12-NEXT: v_cmp_ge_f32_e32 vcc, 0, v0
|
|
; GFX12-NEXT: s_mov_b64 s[0:1], exec
|
|
; GFX12-NEXT: s_and_not1_b64 s[2:3], exec, vcc
|
|
; GFX12-NEXT: s_and_not1_b64 s[0:1], s[0:1], s[2:3]
|
|
; GFX12-NEXT: s_and_b64 exec, exec, s[0:1]
|
|
; GFX12-NEXT: s_mov_b32 m0, 0
|
|
; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
|
|
; GFX12-NEXT: s_endpgm
|
|
; GFX12-NEXT: ; %bb.1:
|
|
; GFX12-NEXT: s_mov_b64 exec, 0
|
|
; GFX12-NEXT: s_endpgm
|
|
%c1 = fcmp ole float %a, 0.0
|
|
call void @llvm.amdgcn.kill(i1 %c1)
|
|
call void @llvm.amdgcn.s.sendmsg(i32 3, i32 0)
|
|
ret void
|
|
}
|
|
|
|
; Should use v_cmp_nlg_f32
|
|
define amdgpu_gs void @one(float %a) {
|
|
; SI-LABEL: one:
|
|
; SI: ; %bb.0:
|
|
; SI-NEXT: v_cmp_nlg_f32_e32 vcc, 0, v0
|
|
; SI-NEXT: s_andn2_b64 exec, exec, vcc
|
|
; SI-NEXT: s_andn2_b64 exec, exec, vcc
|
|
; SI-NEXT: s_mov_b32 m0, 0
|
|
; SI-NEXT: s_nop 0
|
|
; SI-NEXT: s_sendmsg sendmsg(MSG_GS_DONE, GS_OP_NOP)
|
|
; SI-NEXT: s_endpgm
|
|
; SI-NEXT: ; %bb.1:
|
|
; SI-NEXT: s_mov_b64 exec, 0
|
|
; SI-NEXT: s_endpgm
|
|
;
|
|
; GFX10-LABEL: one:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: v_cmp_nlg_f32_e32 vcc, 0, v0
|
|
; GFX10-NEXT: s_andn2_b64 exec, exec, vcc
|
|
; GFX10-NEXT: s_andn2_b64 exec, exec, vcc
|
|
; GFX10-NEXT: s_mov_b32 m0, 0
|
|
; GFX10-NEXT: s_sendmsg sendmsg(MSG_GS_DONE, GS_OP_NOP)
|
|
; GFX10-NEXT: s_endpgm
|
|
; GFX10-NEXT: ; %bb.1:
|
|
; GFX10-NEXT: s_mov_b64 exec, 0
|
|
; GFX10-NEXT: s_endpgm
|
|
;
|
|
; GFX11-LABEL: one:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: v_cmp_nlg_f32_e32 vcc, 0, v0
|
|
; GFX11-NEXT: s_and_not1_b64 exec, exec, vcc
|
|
; GFX11-NEXT: s_and_not1_b64 exec, exec, vcc
|
|
; GFX11-NEXT: s_mov_b32 m0, 0
|
|
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
|
|
; GFX11-NEXT: s_endpgm
|
|
; GFX11-NEXT: ; %bb.1:
|
|
; GFX11-NEXT: s_mov_b64 exec, 0
|
|
; GFX11-NEXT: s_endpgm
|
|
;
|
|
; GFX12-LABEL: one:
|
|
; GFX12: ; %bb.0:
|
|
; GFX12-NEXT: v_cmp_lg_f32_e32 vcc, 0, v0
|
|
; GFX12-NEXT: s_mov_b64 s[0:1], exec
|
|
; GFX12-NEXT: s_and_not1_b64 s[2:3], exec, vcc
|
|
; GFX12-NEXT: s_and_not1_b64 s[0:1], s[0:1], s[2:3]
|
|
; GFX12-NEXT: s_and_b64 exec, exec, s[0:1]
|
|
; GFX12-NEXT: s_mov_b32 m0, 0
|
|
; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
|
|
; GFX12-NEXT: s_endpgm
|
|
; GFX12-NEXT: ; %bb.1:
|
|
; GFX12-NEXT: s_mov_b64 exec, 0
|
|
; GFX12-NEXT: s_endpgm
|
|
%c1 = fcmp one float %a, 0.0
|
|
call void @llvm.amdgcn.kill(i1 %c1)
|
|
call void @llvm.amdgcn.s.sendmsg(i32 3, i32 0)
|
|
ret void
|
|
}
|
|
|
|
; Should use v_cmp_o_f32
|
|
define amdgpu_gs void @ord(float %a) {
|
|
; SI-LABEL: ord:
|
|
; SI: ; %bb.0:
|
|
; SI-NEXT: v_cmp_o_f32_e32 vcc, v0, v0
|
|
; SI-NEXT: s_mov_b64 s[0:1], exec
|
|
; SI-NEXT: s_andn2_b64 s[2:3], exec, vcc
|
|
; SI-NEXT: s_andn2_b64 s[0:1], s[0:1], s[2:3]
|
|
; SI-NEXT: s_and_b64 exec, exec, s[0:1]
|
|
; SI-NEXT: s_mov_b32 m0, 0
|
|
; SI-NEXT: s_nop 0
|
|
; SI-NEXT: s_sendmsg sendmsg(MSG_GS_DONE, GS_OP_NOP)
|
|
; SI-NEXT: s_endpgm
|
|
; SI-NEXT: ; %bb.1:
|
|
; SI-NEXT: s_mov_b64 exec, 0
|
|
; SI-NEXT: s_endpgm
|
|
;
|
|
; GFX10-LABEL: ord:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: v_cmp_o_f32_e32 vcc, v0, v0
|
|
; GFX10-NEXT: s_mov_b64 s[0:1], exec
|
|
; GFX10-NEXT: s_andn2_b64 s[2:3], exec, vcc
|
|
; GFX10-NEXT: s_andn2_b64 s[0:1], s[0:1], s[2:3]
|
|
; GFX10-NEXT: s_and_b64 exec, exec, s[0:1]
|
|
; GFX10-NEXT: s_mov_b32 m0, 0
|
|
; GFX10-NEXT: s_sendmsg sendmsg(MSG_GS_DONE, GS_OP_NOP)
|
|
; GFX10-NEXT: s_endpgm
|
|
; GFX10-NEXT: ; %bb.1:
|
|
; GFX10-NEXT: s_mov_b64 exec, 0
|
|
; GFX10-NEXT: s_endpgm
|
|
;
|
|
; GFX11-12-LABEL: ord:
|
|
; GFX11-12: ; %bb.0:
|
|
; GFX11-12-NEXT: v_cmp_o_f32_e32 vcc, v0, v0
|
|
; GFX11-12-NEXT: s_mov_b64 s[0:1], exec
|
|
; GFX11-12-NEXT: s_and_not1_b64 s[2:3], exec, vcc
|
|
; GFX11-12-NEXT: s_and_not1_b64 s[0:1], s[0:1], s[2:3]
|
|
; GFX11-12-NEXT: s_and_b64 exec, exec, s[0:1]
|
|
; GFX11-12-NEXT: s_mov_b32 m0, 0
|
|
; GFX11-12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
|
|
; GFX11-12-NEXT: s_endpgm
|
|
; GFX11-12-NEXT: ; %bb.1:
|
|
; GFX11-12-NEXT: s_mov_b64 exec, 0
|
|
; GFX11-12-NEXT: s_endpgm
|
|
%c1 = fcmp ord float %a, 0.0
|
|
call void @llvm.amdgcn.kill(i1 %c1)
|
|
call void @llvm.amdgcn.s.sendmsg(i32 3, i32 0)
|
|
ret void
|
|
}
|
|
|
|
; Should use v_cmp_u_f32
|
|
define amdgpu_gs void @uno(float %a) {
|
|
; SI-LABEL: uno:
|
|
; SI: ; %bb.0:
|
|
; SI-NEXT: v_cmp_u_f32_e32 vcc, v0, v0
|
|
; SI-NEXT: s_mov_b64 s[0:1], exec
|
|
; SI-NEXT: s_andn2_b64 s[2:3], exec, vcc
|
|
; SI-NEXT: s_andn2_b64 s[0:1], s[0:1], s[2:3]
|
|
; SI-NEXT: s_and_b64 exec, exec, s[0:1]
|
|
; SI-NEXT: s_mov_b32 m0, 0
|
|
; SI-NEXT: s_nop 0
|
|
; SI-NEXT: s_sendmsg sendmsg(MSG_GS_DONE, GS_OP_NOP)
|
|
; SI-NEXT: s_endpgm
|
|
; SI-NEXT: ; %bb.1:
|
|
; SI-NEXT: s_mov_b64 exec, 0
|
|
; SI-NEXT: s_endpgm
|
|
;
|
|
; GFX10-LABEL: uno:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: v_cmp_u_f32_e32 vcc, v0, v0
|
|
; GFX10-NEXT: s_mov_b64 s[0:1], exec
|
|
; GFX10-NEXT: s_andn2_b64 s[2:3], exec, vcc
|
|
; GFX10-NEXT: s_andn2_b64 s[0:1], s[0:1], s[2:3]
|
|
; GFX10-NEXT: s_and_b64 exec, exec, s[0:1]
|
|
; GFX10-NEXT: s_mov_b32 m0, 0
|
|
; GFX10-NEXT: s_sendmsg sendmsg(MSG_GS_DONE, GS_OP_NOP)
|
|
; GFX10-NEXT: s_endpgm
|
|
; GFX10-NEXT: ; %bb.1:
|
|
; GFX10-NEXT: s_mov_b64 exec, 0
|
|
; GFX10-NEXT: s_endpgm
|
|
;
|
|
; GFX11-12-LABEL: uno:
|
|
; GFX11-12: ; %bb.0:
|
|
; GFX11-12-NEXT: v_cmp_u_f32_e32 vcc, v0, v0
|
|
; GFX11-12-NEXT: s_mov_b64 s[0:1], exec
|
|
; GFX11-12-NEXT: s_and_not1_b64 s[2:3], exec, vcc
|
|
; GFX11-12-NEXT: s_and_not1_b64 s[0:1], s[0:1], s[2:3]
|
|
; GFX11-12-NEXT: s_and_b64 exec, exec, s[0:1]
|
|
; GFX11-12-NEXT: s_mov_b32 m0, 0
|
|
; GFX11-12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
|
|
; GFX11-12-NEXT: s_endpgm
|
|
; GFX11-12-NEXT: ; %bb.1:
|
|
; GFX11-12-NEXT: s_mov_b64 exec, 0
|
|
; GFX11-12-NEXT: s_endpgm
|
|
%c1 = fcmp uno float %a, 0.0
|
|
call void @llvm.amdgcn.kill(i1 %c1)
|
|
call void @llvm.amdgcn.s.sendmsg(i32 3, i32 0)
|
|
ret void
|
|
}
|
|
|
|
; Should use v_cmp_lg_f32
|
|
define amdgpu_gs void @ueq(float %a) {
|
|
; SI-LABEL: ueq:
|
|
; SI: ; %bb.0:
|
|
; SI-NEXT: v_cmp_lg_f32_e32 vcc, 0, v0
|
|
; SI-NEXT: s_andn2_b64 exec, exec, vcc
|
|
; SI-NEXT: s_andn2_b64 exec, exec, vcc
|
|
; SI-NEXT: s_mov_b32 m0, 0
|
|
; SI-NEXT: s_nop 0
|
|
; SI-NEXT: s_sendmsg sendmsg(MSG_GS_DONE, GS_OP_NOP)
|
|
; SI-NEXT: s_endpgm
|
|
; SI-NEXT: ; %bb.1:
|
|
; SI-NEXT: s_mov_b64 exec, 0
|
|
; SI-NEXT: s_endpgm
|
|
;
|
|
; GFX10-LABEL: ueq:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: v_cmp_lg_f32_e32 vcc, 0, v0
|
|
; GFX10-NEXT: s_andn2_b64 exec, exec, vcc
|
|
; GFX10-NEXT: s_andn2_b64 exec, exec, vcc
|
|
; GFX10-NEXT: s_mov_b32 m0, 0
|
|
; GFX10-NEXT: s_sendmsg sendmsg(MSG_GS_DONE, GS_OP_NOP)
|
|
; GFX10-NEXT: s_endpgm
|
|
; GFX10-NEXT: ; %bb.1:
|
|
; GFX10-NEXT: s_mov_b64 exec, 0
|
|
; GFX10-NEXT: s_endpgm
|
|
;
|
|
; GFX11-LABEL: ueq:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: v_cmp_lg_f32_e32 vcc, 0, v0
|
|
; GFX11-NEXT: s_and_not1_b64 exec, exec, vcc
|
|
; GFX11-NEXT: s_and_not1_b64 exec, exec, vcc
|
|
; GFX11-NEXT: s_mov_b32 m0, 0
|
|
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
|
|
; GFX11-NEXT: s_endpgm
|
|
; GFX11-NEXT: ; %bb.1:
|
|
; GFX11-NEXT: s_mov_b64 exec, 0
|
|
; GFX11-NEXT: s_endpgm
|
|
;
|
|
; GFX12-LABEL: ueq:
|
|
; GFX12: ; %bb.0:
|
|
; GFX12-NEXT: v_cmp_nlg_f32_e32 vcc, 0, v0
|
|
; GFX12-NEXT: s_mov_b64 s[0:1], exec
|
|
; GFX12-NEXT: s_and_not1_b64 s[2:3], exec, vcc
|
|
; GFX12-NEXT: s_and_not1_b64 s[0:1], s[0:1], s[2:3]
|
|
; GFX12-NEXT: s_and_b64 exec, exec, s[0:1]
|
|
; GFX12-NEXT: s_mov_b32 m0, 0
|
|
; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
|
|
; GFX12-NEXT: s_endpgm
|
|
; GFX12-NEXT: ; %bb.1:
|
|
; GFX12-NEXT: s_mov_b64 exec, 0
|
|
; GFX12-NEXT: s_endpgm
|
|
%c1 = fcmp ueq float %a, 0.0
|
|
call void @llvm.amdgcn.kill(i1 %c1)
|
|
call void @llvm.amdgcn.s.sendmsg(i32 3, i32 0)
|
|
ret void
|
|
}
|
|
|
|
; Should use v_cmp_ge_f32
|
|
define amdgpu_gs void @ugt(float %a) {
|
|
; SI-LABEL: ugt:
|
|
; SI: ; %bb.0:
|
|
; SI-NEXT: v_cmp_ge_f32_e32 vcc, 0, v0
|
|
; SI-NEXT: s_andn2_b64 exec, exec, vcc
|
|
; SI-NEXT: s_andn2_b64 exec, exec, vcc
|
|
; SI-NEXT: s_mov_b32 m0, 0
|
|
; SI-NEXT: s_nop 0
|
|
; SI-NEXT: s_sendmsg sendmsg(MSG_GS_DONE, GS_OP_NOP)
|
|
; SI-NEXT: s_endpgm
|
|
; SI-NEXT: ; %bb.1:
|
|
; SI-NEXT: s_mov_b64 exec, 0
|
|
; SI-NEXT: s_endpgm
|
|
;
|
|
; GFX10-LABEL: ugt:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: v_cmp_ge_f32_e32 vcc, 0, v0
|
|
; GFX10-NEXT: s_andn2_b64 exec, exec, vcc
|
|
; GFX10-NEXT: s_andn2_b64 exec, exec, vcc
|
|
; GFX10-NEXT: s_mov_b32 m0, 0
|
|
; GFX10-NEXT: s_sendmsg sendmsg(MSG_GS_DONE, GS_OP_NOP)
|
|
; GFX10-NEXT: s_endpgm
|
|
; GFX10-NEXT: ; %bb.1:
|
|
; GFX10-NEXT: s_mov_b64 exec, 0
|
|
; GFX10-NEXT: s_endpgm
|
|
;
|
|
; GFX11-LABEL: ugt:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: v_cmp_ge_f32_e32 vcc, 0, v0
|
|
; GFX11-NEXT: s_and_not1_b64 exec, exec, vcc
|
|
; GFX11-NEXT: s_and_not1_b64 exec, exec, vcc
|
|
; GFX11-NEXT: s_mov_b32 m0, 0
|
|
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
|
|
; GFX11-NEXT: s_endpgm
|
|
; GFX11-NEXT: ; %bb.1:
|
|
; GFX11-NEXT: s_mov_b64 exec, 0
|
|
; GFX11-NEXT: s_endpgm
|
|
;
|
|
; GFX12-LABEL: ugt:
|
|
; GFX12: ; %bb.0:
|
|
; GFX12-NEXT: v_cmp_nge_f32_e32 vcc, 0, v0
|
|
; GFX12-NEXT: s_mov_b64 s[0:1], exec
|
|
; GFX12-NEXT: s_and_not1_b64 s[2:3], exec, vcc
|
|
; GFX12-NEXT: s_and_not1_b64 s[0:1], s[0:1], s[2:3]
|
|
; GFX12-NEXT: s_and_b64 exec, exec, s[0:1]
|
|
; GFX12-NEXT: s_mov_b32 m0, 0
|
|
; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
|
|
; GFX12-NEXT: s_endpgm
|
|
; GFX12-NEXT: ; %bb.1:
|
|
; GFX12-NEXT: s_mov_b64 exec, 0
|
|
; GFX12-NEXT: s_endpgm
|
|
%c1 = fcmp ugt float %a, 0.0
|
|
call void @llvm.amdgcn.kill(i1 %c1)
|
|
call void @llvm.amdgcn.s.sendmsg(i32 3, i32 0)
|
|
ret void
|
|
}
|
|
|
|
; Should use v_cmp_gt_f32_e32 vcc, -1.0
|
|
define amdgpu_gs void @uge(float %a) {
|
|
; SI-LABEL: uge:
|
|
; SI: ; %bb.0:
|
|
; SI-NEXT: v_cmp_gt_f32_e32 vcc, -1.0, v0
|
|
; SI-NEXT: s_andn2_b64 exec, exec, vcc
|
|
; SI-NEXT: s_andn2_b64 exec, exec, vcc
|
|
; SI-NEXT: s_mov_b32 m0, 0
|
|
; SI-NEXT: s_nop 0
|
|
; SI-NEXT: s_sendmsg sendmsg(MSG_GS_DONE, GS_OP_NOP)
|
|
; SI-NEXT: s_endpgm
|
|
; SI-NEXT: ; %bb.1:
|
|
; SI-NEXT: s_mov_b64 exec, 0
|
|
; SI-NEXT: s_endpgm
|
|
;
|
|
; GFX10-LABEL: uge:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: v_cmp_gt_f32_e32 vcc, -1.0, v0
|
|
; GFX10-NEXT: s_andn2_b64 exec, exec, vcc
|
|
; GFX10-NEXT: s_andn2_b64 exec, exec, vcc
|
|
; GFX10-NEXT: s_mov_b32 m0, 0
|
|
; GFX10-NEXT: s_sendmsg sendmsg(MSG_GS_DONE, GS_OP_NOP)
|
|
; GFX10-NEXT: s_endpgm
|
|
; GFX10-NEXT: ; %bb.1:
|
|
; GFX10-NEXT: s_mov_b64 exec, 0
|
|
; GFX10-NEXT: s_endpgm
|
|
;
|
|
; GFX11-LABEL: uge:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: v_cmp_gt_f32_e32 vcc, -1.0, v0
|
|
; GFX11-NEXT: s_and_not1_b64 exec, exec, vcc
|
|
; GFX11-NEXT: s_and_not1_b64 exec, exec, vcc
|
|
; GFX11-NEXT: s_mov_b32 m0, 0
|
|
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
|
|
; GFX11-NEXT: s_endpgm
|
|
; GFX11-NEXT: ; %bb.1:
|
|
; GFX11-NEXT: s_mov_b64 exec, 0
|
|
; GFX11-NEXT: s_endpgm
|
|
;
|
|
; GFX12-LABEL: uge:
|
|
; GFX12: ; %bb.0:
|
|
; GFX12-NEXT: v_cmp_ngt_f32_e32 vcc, -1.0, v0
|
|
; GFX12-NEXT: s_mov_b64 s[0:1], exec
|
|
; GFX12-NEXT: s_and_not1_b64 s[2:3], exec, vcc
|
|
; GFX12-NEXT: s_and_not1_b64 s[0:1], s[0:1], s[2:3]
|
|
; GFX12-NEXT: s_and_b64 exec, exec, s[0:1]
|
|
; GFX12-NEXT: s_mov_b32 m0, 0
|
|
; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
|
|
; GFX12-NEXT: s_endpgm
|
|
; GFX12-NEXT: ; %bb.1:
|
|
; GFX12-NEXT: s_mov_b64 exec, 0
|
|
; GFX12-NEXT: s_endpgm
|
|
%c1 = fcmp uge float %a, -1.0
|
|
call void @llvm.amdgcn.kill(i1 %c1)
|
|
call void @llvm.amdgcn.s.sendmsg(i32 3, i32 0)
|
|
ret void
|
|
}
|
|
|
|
; Should use v_cmp_le_f32_e32 vcc, -2.0
|
|
define amdgpu_gs void @ult(float %a) {
|
|
; SI-LABEL: ult:
|
|
; SI: ; %bb.0:
|
|
; SI-NEXT: v_cmp_le_f32_e32 vcc, -2.0, v0
|
|
; SI-NEXT: s_andn2_b64 exec, exec, vcc
|
|
; SI-NEXT: s_andn2_b64 exec, exec, vcc
|
|
; SI-NEXT: s_mov_b32 m0, 0
|
|
; SI-NEXT: s_nop 0
|
|
; SI-NEXT: s_sendmsg sendmsg(MSG_GS_DONE, GS_OP_NOP)
|
|
; SI-NEXT: s_endpgm
|
|
; SI-NEXT: ; %bb.1:
|
|
; SI-NEXT: s_mov_b64 exec, 0
|
|
; SI-NEXT: s_endpgm
|
|
;
|
|
; GFX10-LABEL: ult:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: v_cmp_le_f32_e32 vcc, -2.0, v0
|
|
; GFX10-NEXT: s_andn2_b64 exec, exec, vcc
|
|
; GFX10-NEXT: s_andn2_b64 exec, exec, vcc
|
|
; GFX10-NEXT: s_mov_b32 m0, 0
|
|
; GFX10-NEXT: s_sendmsg sendmsg(MSG_GS_DONE, GS_OP_NOP)
|
|
; GFX10-NEXT: s_endpgm
|
|
; GFX10-NEXT: ; %bb.1:
|
|
; GFX10-NEXT: s_mov_b64 exec, 0
|
|
; GFX10-NEXT: s_endpgm
|
|
;
|
|
; GFX11-LABEL: ult:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: v_cmp_le_f32_e32 vcc, -2.0, v0
|
|
; GFX11-NEXT: s_and_not1_b64 exec, exec, vcc
|
|
; GFX11-NEXT: s_and_not1_b64 exec, exec, vcc
|
|
; GFX11-NEXT: s_mov_b32 m0, 0
|
|
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
|
|
; GFX11-NEXT: s_endpgm
|
|
; GFX11-NEXT: ; %bb.1:
|
|
; GFX11-NEXT: s_mov_b64 exec, 0
|
|
; GFX11-NEXT: s_endpgm
|
|
;
|
|
; GFX12-LABEL: ult:
|
|
; GFX12: ; %bb.0:
|
|
; GFX12-NEXT: v_cmp_nle_f32_e32 vcc, -2.0, v0
|
|
; GFX12-NEXT: s_mov_b64 s[0:1], exec
|
|
; GFX12-NEXT: s_and_not1_b64 s[2:3], exec, vcc
|
|
; GFX12-NEXT: s_and_not1_b64 s[0:1], s[0:1], s[2:3]
|
|
; GFX12-NEXT: s_and_b64 exec, exec, s[0:1]
|
|
; GFX12-NEXT: s_mov_b32 m0, 0
|
|
; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
|
|
; GFX12-NEXT: s_endpgm
|
|
; GFX12-NEXT: ; %bb.1:
|
|
; GFX12-NEXT: s_mov_b64 exec, 0
|
|
; GFX12-NEXT: s_endpgm
|
|
%c1 = fcmp ult float %a, -2.0
|
|
call void @llvm.amdgcn.kill(i1 %c1)
|
|
call void @llvm.amdgcn.s.sendmsg(i32 3, i32 0)
|
|
ret void
|
|
}
|
|
|
|
; Should use v_cmp_lt_f32_e32 vcc, 2.0
|
|
define amdgpu_gs void @ule(float %a) {
|
|
; SI-LABEL: ule:
|
|
; SI: ; %bb.0:
|
|
; SI-NEXT: v_cmp_lt_f32_e32 vcc, 2.0, v0
|
|
; SI-NEXT: s_andn2_b64 exec, exec, vcc
|
|
; SI-NEXT: s_andn2_b64 exec, exec, vcc
|
|
; SI-NEXT: s_mov_b32 m0, 0
|
|
; SI-NEXT: s_nop 0
|
|
; SI-NEXT: s_sendmsg sendmsg(MSG_GS_DONE, GS_OP_NOP)
|
|
; SI-NEXT: s_endpgm
|
|
; SI-NEXT: ; %bb.1:
|
|
; SI-NEXT: s_mov_b64 exec, 0
|
|
; SI-NEXT: s_endpgm
|
|
;
|
|
; GFX10-LABEL: ule:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: v_cmp_lt_f32_e32 vcc, 2.0, v0
|
|
; GFX10-NEXT: s_andn2_b64 exec, exec, vcc
|
|
; GFX10-NEXT: s_andn2_b64 exec, exec, vcc
|
|
; GFX10-NEXT: s_mov_b32 m0, 0
|
|
; GFX10-NEXT: s_sendmsg sendmsg(MSG_GS_DONE, GS_OP_NOP)
|
|
; GFX10-NEXT: s_endpgm
|
|
; GFX10-NEXT: ; %bb.1:
|
|
; GFX10-NEXT: s_mov_b64 exec, 0
|
|
; GFX10-NEXT: s_endpgm
|
|
;
|
|
; GFX11-LABEL: ule:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: v_cmp_lt_f32_e32 vcc, 2.0, v0
|
|
; GFX11-NEXT: s_and_not1_b64 exec, exec, vcc
|
|
; GFX11-NEXT: s_and_not1_b64 exec, exec, vcc
|
|
; GFX11-NEXT: s_mov_b32 m0, 0
|
|
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
|
|
; GFX11-NEXT: s_endpgm
|
|
; GFX11-NEXT: ; %bb.1:
|
|
; GFX11-NEXT: s_mov_b64 exec, 0
|
|
; GFX11-NEXT: s_endpgm
|
|
;
|
|
; GFX12-LABEL: ule:
|
|
; GFX12: ; %bb.0:
|
|
; GFX12-NEXT: v_cmp_nlt_f32_e32 vcc, 2.0, v0
|
|
; GFX12-NEXT: s_mov_b64 s[0:1], exec
|
|
; GFX12-NEXT: s_and_not1_b64 s[2:3], exec, vcc
|
|
; GFX12-NEXT: s_and_not1_b64 s[0:1], s[0:1], s[2:3]
|
|
; GFX12-NEXT: s_and_b64 exec, exec, s[0:1]
|
|
; GFX12-NEXT: s_mov_b32 m0, 0
|
|
; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
|
|
; GFX12-NEXT: s_endpgm
|
|
; GFX12-NEXT: ; %bb.1:
|
|
; GFX12-NEXT: s_mov_b64 exec, 0
|
|
; GFX12-NEXT: s_endpgm
|
|
%c1 = fcmp ule float %a, 2.0
|
|
call void @llvm.amdgcn.kill(i1 %c1)
|
|
call void @llvm.amdgcn.s.sendmsg(i32 3, i32 0)
|
|
ret void
|
|
}
|
|
|
|
; Should use v_cmp_eq_f32_e32 vcc, 0
|
|
define amdgpu_gs void @une(float %a) {
|
|
; SI-LABEL: une:
|
|
; SI: ; %bb.0:
|
|
; SI-NEXT: v_cmp_eq_f32_e32 vcc, 0, v0
|
|
; SI-NEXT: s_andn2_b64 exec, exec, vcc
|
|
; SI-NEXT: s_andn2_b64 exec, exec, vcc
|
|
; SI-NEXT: s_mov_b32 m0, 0
|
|
; SI-NEXT: s_nop 0
|
|
; SI-NEXT: s_sendmsg sendmsg(MSG_GS_DONE, GS_OP_NOP)
|
|
; SI-NEXT: s_endpgm
|
|
; SI-NEXT: ; %bb.1:
|
|
; SI-NEXT: s_mov_b64 exec, 0
|
|
; SI-NEXT: s_endpgm
|
|
;
|
|
; GFX10-LABEL: une:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: v_cmp_eq_f32_e32 vcc, 0, v0
|
|
; GFX10-NEXT: s_andn2_b64 exec, exec, vcc
|
|
; GFX10-NEXT: s_andn2_b64 exec, exec, vcc
|
|
; GFX10-NEXT: s_mov_b32 m0, 0
|
|
; GFX10-NEXT: s_sendmsg sendmsg(MSG_GS_DONE, GS_OP_NOP)
|
|
; GFX10-NEXT: s_endpgm
|
|
; GFX10-NEXT: ; %bb.1:
|
|
; GFX10-NEXT: s_mov_b64 exec, 0
|
|
; GFX10-NEXT: s_endpgm
|
|
;
|
|
; GFX11-LABEL: une:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: v_cmp_eq_f32_e32 vcc, 0, v0
|
|
; GFX11-NEXT: s_and_not1_b64 exec, exec, vcc
|
|
; GFX11-NEXT: s_and_not1_b64 exec, exec, vcc
|
|
; GFX11-NEXT: s_mov_b32 m0, 0
|
|
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
|
|
; GFX11-NEXT: s_endpgm
|
|
; GFX11-NEXT: ; %bb.1:
|
|
; GFX11-NEXT: s_mov_b64 exec, 0
|
|
; GFX11-NEXT: s_endpgm
|
|
;
|
|
; GFX12-LABEL: une:
|
|
; GFX12: ; %bb.0:
|
|
; GFX12-NEXT: v_cmp_neq_f32_e32 vcc, 0, v0
|
|
; GFX12-NEXT: s_mov_b64 s[0:1], exec
|
|
; GFX12-NEXT: s_and_not1_b64 s[2:3], exec, vcc
|
|
; GFX12-NEXT: s_and_not1_b64 s[0:1], s[0:1], s[2:3]
|
|
; GFX12-NEXT: s_and_b64 exec, exec, s[0:1]
|
|
; GFX12-NEXT: s_mov_b32 m0, 0
|
|
; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
|
|
; GFX12-NEXT: s_endpgm
|
|
; GFX12-NEXT: ; %bb.1:
|
|
; GFX12-NEXT: s_mov_b64 exec, 0
|
|
; GFX12-NEXT: s_endpgm
|
|
%c1 = fcmp une float %a, 0.0
|
|
call void @llvm.amdgcn.kill(i1 %c1)
|
|
call void @llvm.amdgcn.s.sendmsg(i32 3, i32 0)
|
|
ret void
|
|
}
|
|
|
|
; Should use v_cmp_gt_f32_e32 vcc, 1.0
|
|
define amdgpu_gs void @neg_olt(float %a) {
|
|
; SI-LABEL: neg_olt:
|
|
; SI: ; %bb.0:
|
|
; SI-NEXT: v_cmp_gt_f32_e32 vcc, 1.0, v0
|
|
; SI-NEXT: s_andn2_b64 exec, exec, vcc
|
|
; SI-NEXT: s_andn2_b64 exec, exec, vcc
|
|
; SI-NEXT: s_mov_b32 m0, 0
|
|
; SI-NEXT: s_nop 0
|
|
; SI-NEXT: s_sendmsg sendmsg(MSG_GS_DONE, GS_OP_NOP)
|
|
; SI-NEXT: s_endpgm
|
|
; SI-NEXT: ; %bb.1:
|
|
; SI-NEXT: s_mov_b64 exec, 0
|
|
; SI-NEXT: s_endpgm
|
|
;
|
|
; GFX10-LABEL: neg_olt:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: v_cmp_gt_f32_e32 vcc, 1.0, v0
|
|
; GFX10-NEXT: s_andn2_b64 exec, exec, vcc
|
|
; GFX10-NEXT: s_andn2_b64 exec, exec, vcc
|
|
; GFX10-NEXT: s_mov_b32 m0, 0
|
|
; GFX10-NEXT: s_sendmsg sendmsg(MSG_GS_DONE, GS_OP_NOP)
|
|
; GFX10-NEXT: s_endpgm
|
|
; GFX10-NEXT: ; %bb.1:
|
|
; GFX10-NEXT: s_mov_b64 exec, 0
|
|
; GFX10-NEXT: s_endpgm
|
|
;
|
|
; GFX11-LABEL: neg_olt:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: v_cmp_gt_f32_e32 vcc, 1.0, v0
|
|
; GFX11-NEXT: s_and_not1_b64 exec, exec, vcc
|
|
; GFX11-NEXT: s_and_not1_b64 exec, exec, vcc
|
|
; GFX11-NEXT: s_mov_b32 m0, 0
|
|
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
|
|
; GFX11-NEXT: s_endpgm
|
|
; GFX11-NEXT: ; %bb.1:
|
|
; GFX11-NEXT: s_mov_b64 exec, 0
|
|
; GFX11-NEXT: s_endpgm
|
|
;
|
|
; GFX12-LABEL: neg_olt:
|
|
; GFX12: ; %bb.0:
|
|
; GFX12-NEXT: v_cmp_ngt_f32_e32 vcc, 1.0, v0
|
|
; GFX12-NEXT: s_mov_b64 s[0:1], exec
|
|
; GFX12-NEXT: s_and_not1_b64 s[2:3], exec, vcc
|
|
; GFX12-NEXT: s_and_not1_b64 s[0:1], s[0:1], s[2:3]
|
|
; GFX12-NEXT: s_and_b64 exec, exec, s[0:1]
|
|
; GFX12-NEXT: s_mov_b32 m0, 0
|
|
; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
|
|
; GFX12-NEXT: s_endpgm
|
|
; GFX12-NEXT: ; %bb.1:
|
|
; GFX12-NEXT: s_mov_b64 exec, 0
|
|
; GFX12-NEXT: s_endpgm
|
|
%c1 = fcmp olt float %a, 1.0
|
|
%c2 = xor i1 %c1, 1
|
|
call void @llvm.amdgcn.kill(i1 %c2)
|
|
call void @llvm.amdgcn.s.sendmsg(i32 3, i32 0)
|
|
ret void
|
|
}
|
|
|
|
; FIXME: LLVM should be able to combine these fcmp opcodes.
|
|
define amdgpu_ps void @fcmp_x2(float %a) #0 {
|
|
; SI-LABEL: fcmp_x2:
|
|
; SI: ; %bb.0:
|
|
; SI-NEXT: s_mov_b32 s0, 0x3e800000
|
|
; SI-NEXT: v_cmp_lt_f32_e32 vcc, s0, v0
|
|
; SI-NEXT: v_cndmask_b32_e64 v0, 0, -1.0, vcc
|
|
; SI-NEXT: v_cmp_nle_f32_e32 vcc, 0, v0
|
|
; SI-NEXT: s_andn2_b64 exec, exec, vcc
|
|
; SI-NEXT: s_cbranch_scc0 .LBB21_1
|
|
; SI-NEXT: s_endpgm
|
|
; SI-NEXT: .LBB21_1:
|
|
; SI-NEXT: s_mov_b64 exec, 0
|
|
; SI-NEXT: exp null, off, off, off, off done vm
|
|
; SI-NEXT: s_endpgm
|
|
;
|
|
; GFX10-LABEL: fcmp_x2:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: v_cmp_lt_f32_e32 vcc, 0x3e800000, v0
|
|
; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, -1.0, vcc
|
|
; GFX10-NEXT: v_cmp_nle_f32_e32 vcc, 0, v0
|
|
; GFX10-NEXT: s_andn2_b64 exec, exec, vcc
|
|
; GFX10-NEXT: s_cbranch_scc0 .LBB21_1
|
|
; GFX10-NEXT: s_endpgm
|
|
; GFX10-NEXT: .LBB21_1:
|
|
; GFX10-NEXT: s_mov_b64 exec, 0
|
|
; GFX10-NEXT: exp null, off, off, off, off done vm
|
|
; GFX10-NEXT: s_endpgm
|
|
;
|
|
; GFX11-LABEL: fcmp_x2:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: v_cmp_lt_f32_e32 vcc, 0x3e800000, v0
|
|
; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, -1.0, vcc
|
|
; GFX11-NEXT: v_cmp_nle_f32_e32 vcc, 0, v0
|
|
; GFX11-NEXT: s_waitcnt_depctr depctr_va_vcc(0)
|
|
; GFX11-NEXT: s_and_not1_b64 exec, exec, vcc
|
|
; GFX11-NEXT: s_cbranch_scc0 .LBB21_1
|
|
; GFX11-NEXT: s_endpgm
|
|
; GFX11-NEXT: .LBB21_1:
|
|
; GFX11-NEXT: s_mov_b64 exec, 0
|
|
; GFX11-NEXT: exp mrt0, off, off, off, off done
|
|
; GFX11-NEXT: s_endpgm
|
|
;
|
|
; GFX12-LABEL: fcmp_x2:
|
|
; GFX12: ; %bb.0:
|
|
; GFX12-NEXT: v_cmp_lt_f32_e32 vcc, 0x3e800000, v0
|
|
; GFX12-NEXT: v_cndmask_b32_e64 v0, 0, -1.0, vcc
|
|
; GFX12-NEXT: v_cmp_le_f32_e32 vcc, 0, v0
|
|
; GFX12-NEXT: s_and_not1_b64 s[2:3], exec, vcc
|
|
; GFX12-NEXT: s_and_not1_b64 s[0:1], exec, s[2:3]
|
|
; GFX12-NEXT: s_cbranch_scc0 .LBB21_1
|
|
; GFX12-NEXT: s_endpgm
|
|
; GFX12-NEXT: .LBB21_1:
|
|
; GFX12-NEXT: s_mov_b64 exec, 0
|
|
; GFX12-NEXT: export mrt0, off, off, off, off done
|
|
; GFX12-NEXT: s_endpgm
|
|
%ogt = fcmp nsz ogt float %a, 2.500000e-01
|
|
%k = select i1 %ogt, float -1.000000e+00, float 0.000000e+00
|
|
%c = fcmp nsz oge float %k, 0.000000e+00
|
|
call void @llvm.amdgcn.kill(i1 %c) #1
|
|
ret void
|
|
}
|
|
|
|
; Note: an almost identical test for this exists in llvm.amdgcn.wqm.vote.ll
|
|
define amdgpu_ps float @wqm(float %a) {
|
|
; SI-LABEL: wqm:
|
|
; SI: ; %bb.0:
|
|
; SI-NEXT: v_cmp_neq_f32_e32 vcc, 0, v0
|
|
; SI-NEXT: s_wqm_b64 s[2:3], vcc
|
|
; SI-NEXT: s_mov_b64 s[0:1], exec
|
|
; SI-NEXT: s_andn2_b64 s[2:3], exec, s[2:3]
|
|
; SI-NEXT: s_andn2_b64 s[0:1], s[0:1], s[2:3]
|
|
; SI-NEXT: s_cbranch_scc0 .LBB22_2
|
|
; SI-NEXT: ; %bb.1:
|
|
; SI-NEXT: s_and_b64 exec, exec, s[0:1]
|
|
; SI-NEXT: v_mov_b32_e32 v0, 0
|
|
; SI-NEXT: s_branch .LBB22_3
|
|
; SI-NEXT: .LBB22_2:
|
|
; SI-NEXT: s_mov_b64 exec, 0
|
|
; SI-NEXT: exp null, off, off, off, off done vm
|
|
; SI-NEXT: s_endpgm
|
|
; SI-NEXT: .LBB22_3:
|
|
;
|
|
; GFX10-LABEL: wqm:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: v_cmp_neq_f32_e32 vcc, 0, v0
|
|
; GFX10-NEXT: s_mov_b64 s[0:1], exec
|
|
; GFX10-NEXT: s_wqm_b64 s[2:3], vcc
|
|
; GFX10-NEXT: s_andn2_b64 s[2:3], exec, s[2:3]
|
|
; GFX10-NEXT: s_andn2_b64 s[0:1], s[0:1], s[2:3]
|
|
; GFX10-NEXT: s_cbranch_scc0 .LBB22_2
|
|
; GFX10-NEXT: ; %bb.1:
|
|
; GFX10-NEXT: s_and_b64 exec, exec, s[0:1]
|
|
; GFX10-NEXT: v_mov_b32_e32 v0, 0
|
|
; GFX10-NEXT: s_branch .LBB22_3
|
|
; GFX10-NEXT: .LBB22_2:
|
|
; GFX10-NEXT: s_mov_b64 exec, 0
|
|
; GFX10-NEXT: exp null, off, off, off, off done vm
|
|
; GFX10-NEXT: s_endpgm
|
|
; GFX10-NEXT: .LBB22_3:
|
|
;
|
|
; GFX11-LABEL: wqm:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: v_cmp_neq_f32_e32 vcc, 0, v0
|
|
; GFX11-NEXT: s_mov_b64 s[0:1], exec
|
|
; GFX11-NEXT: s_wqm_b64 s[2:3], vcc
|
|
; GFX11-NEXT: s_and_not1_b64 s[2:3], exec, s[2:3]
|
|
; GFX11-NEXT: s_and_not1_b64 s[0:1], s[0:1], s[2:3]
|
|
; GFX11-NEXT: s_cbranch_scc0 .LBB22_2
|
|
; GFX11-NEXT: ; %bb.1:
|
|
; GFX11-NEXT: s_and_b64 exec, exec, s[0:1]
|
|
; GFX11-NEXT: v_mov_b32_e32 v0, 0
|
|
; GFX11-NEXT: s_branch .LBB22_3
|
|
; GFX11-NEXT: .LBB22_2:
|
|
; GFX11-NEXT: s_mov_b64 exec, 0
|
|
; GFX11-NEXT: exp mrt0, off, off, off, off done
|
|
; GFX11-NEXT: s_endpgm
|
|
; GFX11-NEXT: .LBB22_3:
|
|
;
|
|
; GFX12-LABEL: wqm:
|
|
; GFX12: ; %bb.0:
|
|
; GFX12-NEXT: v_cmp_neq_f32_e32 vcc, 0, v0
|
|
; GFX12-NEXT: s_mov_b64 s[0:1], exec
|
|
; GFX12-NEXT: s_wqm_b64 s[2:3], vcc
|
|
; GFX12-NEXT: s_and_not1_b64 s[2:3], exec, s[2:3]
|
|
; GFX12-NEXT: s_and_not1_b64 s[0:1], s[0:1], s[2:3]
|
|
; GFX12-NEXT: s_cbranch_scc0 .LBB22_2
|
|
; GFX12-NEXT: ; %bb.1:
|
|
; GFX12-NEXT: s_and_b64 exec, exec, s[0:1]
|
|
; GFX12-NEXT: v_mov_b32_e32 v0, 0
|
|
; GFX12-NEXT: s_branch .LBB22_3
|
|
; GFX12-NEXT: .LBB22_2:
|
|
; GFX12-NEXT: s_mov_b64 exec, 0
|
|
; GFX12-NEXT: export mrt0, off, off, off, off done
|
|
; GFX12-NEXT: s_endpgm
|
|
; GFX12-NEXT: .LBB22_3:
|
|
%c1 = fcmp une float %a, 0.0
|
|
%c2 = call i1 @llvm.amdgcn.wqm.vote(i1 %c1)
|
|
call void @llvm.amdgcn.kill(i1 %c2)
|
|
ret float 0.0
|
|
}
|
|
|
|
; This checks that we use the 64-bit encoding when the operand is a SGPR.
|
|
define amdgpu_ps void @test_sgpr(float inreg %a) #0 {
|
|
; SI-LABEL: test_sgpr:
|
|
; SI: ; %bb.0:
|
|
; SI-NEXT: v_cmp_nle_f32_e64 vcc, s0, 1.0
|
|
; SI-NEXT: s_andn2_b64 exec, exec, vcc
|
|
; SI-NEXT: s_cbranch_scc0 .LBB23_1
|
|
; SI-NEXT: s_endpgm
|
|
; SI-NEXT: .LBB23_1:
|
|
; SI-NEXT: s_mov_b64 exec, 0
|
|
; SI-NEXT: exp null, off, off, off, off done vm
|
|
; SI-NEXT: s_endpgm
|
|
;
|
|
; GFX10-LABEL: test_sgpr:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: v_cmp_nle_f32_e64 vcc, s0, 1.0
|
|
; GFX10-NEXT: s_andn2_b64 exec, exec, vcc
|
|
; GFX10-NEXT: s_cbranch_scc0 .LBB23_1
|
|
; GFX10-NEXT: s_endpgm
|
|
; GFX10-NEXT: .LBB23_1:
|
|
; GFX10-NEXT: s_mov_b64 exec, 0
|
|
; GFX10-NEXT: exp null, off, off, off, off done vm
|
|
; GFX10-NEXT: s_endpgm
|
|
;
|
|
; GFX11-LABEL: test_sgpr:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: v_cmp_nle_f32_e64 vcc, s0, 1.0
|
|
; GFX11-NEXT: s_and_not1_b64 exec, exec, vcc
|
|
; GFX11-NEXT: s_cbranch_scc0 .LBB23_1
|
|
; GFX11-NEXT: s_endpgm
|
|
; GFX11-NEXT: .LBB23_1:
|
|
; GFX11-NEXT: s_mov_b64 exec, 0
|
|
; GFX11-NEXT: exp mrt0, off, off, off, off done
|
|
; GFX11-NEXT: s_endpgm
|
|
;
|
|
; GFX12-LABEL: test_sgpr:
|
|
; GFX12: ; %bb.0:
|
|
; GFX12-NEXT: s_cmp_le_f32 s0, 1.0
|
|
; GFX12-NEXT: s_cselect_b64 s[0:1], -1, 0
|
|
; GFX12-NEXT: s_and_not1_b64 s[0:1], exec, s[0:1]
|
|
; GFX12-NEXT: s_and_not1_b64 s[2:3], exec, s[0:1]
|
|
; GFX12-NEXT: s_cbranch_scc0 .LBB23_1
|
|
; GFX12-NEXT: s_endpgm
|
|
; GFX12-NEXT: .LBB23_1:
|
|
; GFX12-NEXT: s_mov_b64 exec, 0
|
|
; GFX12-NEXT: export mrt0, off, off, off, off done
|
|
; GFX12-NEXT: s_endpgm
|
|
%c = fcmp ole float %a, 1.000000e+00
|
|
call void @llvm.amdgcn.kill(i1 %c) #1
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @test_non_inline_imm_sgpr(float inreg %a) #0 {
|
|
; SI-LABEL: test_non_inline_imm_sgpr:
|
|
; SI: ; %bb.0:
|
|
; SI-NEXT: v_mov_b32_e32 v0, 0x3fc00000
|
|
; SI-NEXT: v_cmp_le_f32_e32 vcc, s0, v0
|
|
; SI-NEXT: s_andn2_b64 s[0:1], exec, vcc
|
|
; SI-NEXT: s_andn2_b64 s[2:3], exec, s[0:1]
|
|
; SI-NEXT: s_cbranch_scc0 .LBB24_1
|
|
; SI-NEXT: s_endpgm
|
|
; SI-NEXT: .LBB24_1:
|
|
; SI-NEXT: s_mov_b64 exec, 0
|
|
; SI-NEXT: exp null, off, off, off, off done vm
|
|
; SI-NEXT: s_endpgm
|
|
;
|
|
; GFX10-LABEL: test_non_inline_imm_sgpr:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: v_cmp_ge_f32_e64 s[0:1], 0x3fc00000, s0
|
|
; GFX10-NEXT: s_andn2_b64 s[0:1], exec, s[0:1]
|
|
; GFX10-NEXT: s_andn2_b64 s[2:3], exec, s[0:1]
|
|
; GFX10-NEXT: s_cbranch_scc0 .LBB24_1
|
|
; GFX10-NEXT: s_endpgm
|
|
; GFX10-NEXT: .LBB24_1:
|
|
; GFX10-NEXT: s_mov_b64 exec, 0
|
|
; GFX10-NEXT: exp null, off, off, off, off done vm
|
|
; GFX10-NEXT: s_endpgm
|
|
;
|
|
; GFX11-LABEL: test_non_inline_imm_sgpr:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: v_cmp_ge_f32_e64 s[0:1], 0x3fc00000, s0
|
|
; GFX11-NEXT: s_and_not1_b64 s[0:1], exec, s[0:1]
|
|
; GFX11-NEXT: s_and_not1_b64 s[2:3], exec, s[0:1]
|
|
; GFX11-NEXT: s_cbranch_scc0 .LBB24_1
|
|
; GFX11-NEXT: s_endpgm
|
|
; GFX11-NEXT: .LBB24_1:
|
|
; GFX11-NEXT: s_mov_b64 exec, 0
|
|
; GFX11-NEXT: exp mrt0, off, off, off, off done
|
|
; GFX11-NEXT: s_endpgm
|
|
;
|
|
; GFX12-LABEL: test_non_inline_imm_sgpr:
|
|
; GFX12: ; %bb.0:
|
|
; GFX12-NEXT: s_cmp_le_f32 s0, 0x3fc00000
|
|
; GFX12-NEXT: s_cselect_b64 s[0:1], -1, 0
|
|
; GFX12-NEXT: s_and_not1_b64 s[0:1], exec, s[0:1]
|
|
; GFX12-NEXT: s_and_not1_b64 s[2:3], exec, s[0:1]
|
|
; GFX12-NEXT: s_cbranch_scc0 .LBB24_1
|
|
; GFX12-NEXT: s_endpgm
|
|
; GFX12-NEXT: .LBB24_1:
|
|
; GFX12-NEXT: s_mov_b64 exec, 0
|
|
; GFX12-NEXT: export mrt0, off, off, off, off done
|
|
; GFX12-NEXT: s_endpgm
|
|
%c = fcmp ole float %a, 1.500000e+00
|
|
call void @llvm.amdgcn.kill(i1 %c) #1
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_ps void @test_scc_liveness() #0 {
|
|
; SI-LABEL: test_scc_liveness:
|
|
; SI: ; %bb.0: ; %main_body
|
|
; SI-NEXT: s_mov_b64 s[0:1], exec
|
|
; SI-NEXT: s_mov_b32 s2, 0
|
|
; SI-NEXT: .LBB25_1: ; %loop3
|
|
; SI-NEXT: ; =>This Inner Loop Header: Depth=1
|
|
; SI-NEXT: s_cmp_gt_i32 s2, 0
|
|
; SI-NEXT: s_cselect_b64 s[4:5], -1, 0
|
|
; SI-NEXT: s_andn2_b64 s[4:5], exec, s[4:5]
|
|
; SI-NEXT: s_andn2_b64 s[0:1], s[0:1], s[4:5]
|
|
; SI-NEXT: s_cbranch_scc0 .LBB25_4
|
|
; SI-NEXT: ; %bb.2: ; %loop3
|
|
; SI-NEXT: ; in Loop: Header=BB25_1 Depth=1
|
|
; SI-NEXT: s_and_b64 exec, exec, s[0:1]
|
|
; SI-NEXT: s_add_i32 s3, s2, 1
|
|
; SI-NEXT: s_cmp_lt_i32 s2, 1
|
|
; SI-NEXT: s_mov_b32 s2, s3
|
|
; SI-NEXT: s_cbranch_scc1 .LBB25_1
|
|
; SI-NEXT: ; %bb.3: ; %endloop15
|
|
; SI-NEXT: s_endpgm
|
|
; SI-NEXT: .LBB25_4:
|
|
; SI-NEXT: s_mov_b64 exec, 0
|
|
; SI-NEXT: exp null, off, off, off, off done vm
|
|
; SI-NEXT: s_endpgm
|
|
;
|
|
; GFX10-LABEL: test_scc_liveness:
|
|
; GFX10: ; %bb.0: ; %main_body
|
|
; GFX10-NEXT: s_mov_b64 s[0:1], exec
|
|
; GFX10-NEXT: s_mov_b32 s2, 0
|
|
; GFX10-NEXT: .LBB25_1: ; %loop3
|
|
; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1
|
|
; GFX10-NEXT: s_cmp_gt_i32 s2, 0
|
|
; GFX10-NEXT: s_cselect_b64 s[4:5], -1, 0
|
|
; GFX10-NEXT: s_andn2_b64 s[4:5], exec, s[4:5]
|
|
; GFX10-NEXT: s_andn2_b64 s[0:1], s[0:1], s[4:5]
|
|
; GFX10-NEXT: s_cbranch_scc0 .LBB25_4
|
|
; GFX10-NEXT: ; %bb.2: ; %loop3
|
|
; GFX10-NEXT: ; in Loop: Header=BB25_1 Depth=1
|
|
; GFX10-NEXT: s_and_b64 exec, exec, s[0:1]
|
|
; GFX10-NEXT: s_add_i32 s3, s2, 1
|
|
; GFX10-NEXT: s_cmp_lt_i32 s2, 1
|
|
; GFX10-NEXT: s_mov_b32 s2, s3
|
|
; GFX10-NEXT: s_cbranch_scc1 .LBB25_1
|
|
; GFX10-NEXT: ; %bb.3: ; %endloop15
|
|
; GFX10-NEXT: s_endpgm
|
|
; GFX10-NEXT: .LBB25_4:
|
|
; GFX10-NEXT: s_mov_b64 exec, 0
|
|
; GFX10-NEXT: exp null, off, off, off, off done vm
|
|
; GFX10-NEXT: s_endpgm
|
|
;
|
|
; GFX11-LABEL: test_scc_liveness:
|
|
; GFX11: ; %bb.0: ; %main_body
|
|
; GFX11-NEXT: s_mov_b64 s[0:1], exec
|
|
; GFX11-NEXT: s_mov_b32 s2, 0
|
|
; GFX11-NEXT: .LBB25_1: ; %loop3
|
|
; GFX11-NEXT: ; =>This Inner Loop Header: Depth=1
|
|
; GFX11-NEXT: s_cmp_gt_i32 s2, 0
|
|
; GFX11-NEXT: s_cselect_b64 s[4:5], -1, 0
|
|
; GFX11-NEXT: s_and_not1_b64 s[4:5], exec, s[4:5]
|
|
; GFX11-NEXT: s_and_not1_b64 s[0:1], s[0:1], s[4:5]
|
|
; GFX11-NEXT: s_cbranch_scc0 .LBB25_4
|
|
; GFX11-NEXT: ; %bb.2: ; %loop3
|
|
; GFX11-NEXT: ; in Loop: Header=BB25_1 Depth=1
|
|
; GFX11-NEXT: s_and_b64 exec, exec, s[0:1]
|
|
; GFX11-NEXT: s_add_i32 s3, s2, 1
|
|
; GFX11-NEXT: s_cmp_lt_i32 s2, 1
|
|
; GFX11-NEXT: s_mov_b32 s2, s3
|
|
; GFX11-NEXT: s_cbranch_scc1 .LBB25_1
|
|
; GFX11-NEXT: ; %bb.3: ; %endloop15
|
|
; GFX11-NEXT: s_endpgm
|
|
; GFX11-NEXT: .LBB25_4:
|
|
; GFX11-NEXT: s_mov_b64 exec, 0
|
|
; GFX11-NEXT: exp mrt0, off, off, off, off done
|
|
; GFX11-NEXT: s_endpgm
|
|
;
|
|
; GFX12-LABEL: test_scc_liveness:
|
|
; GFX12: ; %bb.0: ; %main_body
|
|
; GFX12-NEXT: s_mov_b64 s[0:1], exec
|
|
; GFX12-NEXT: s_mov_b32 s2, 0
|
|
; GFX12-NEXT: .LBB25_1: ; %loop3
|
|
; GFX12-NEXT: ; =>This Inner Loop Header: Depth=1
|
|
; GFX12-NEXT: s_cmp_gt_i32 s2, 0
|
|
; GFX12-NEXT: s_cselect_b64 s[4:5], -1, 0
|
|
; GFX12-NEXT: s_and_not1_b64 s[4:5], exec, s[4:5]
|
|
; GFX12-NEXT: s_and_not1_b64 s[0:1], s[0:1], s[4:5]
|
|
; GFX12-NEXT: s_cbranch_scc0 .LBB25_4
|
|
; GFX12-NEXT: ; %bb.2: ; %loop3
|
|
; GFX12-NEXT: ; in Loop: Header=BB25_1 Depth=1
|
|
; GFX12-NEXT: s_and_b64 exec, exec, s[0:1]
|
|
; GFX12-NEXT: s_add_co_i32 s3, s2, 1
|
|
; GFX12-NEXT: s_cmp_lt_i32 s2, 1
|
|
; GFX12-NEXT: s_mov_b32 s2, s3
|
|
; GFX12-NEXT: s_cbranch_scc1 .LBB25_1
|
|
; GFX12-NEXT: ; %bb.3: ; %endloop15
|
|
; GFX12-NEXT: s_endpgm
|
|
; GFX12-NEXT: .LBB25_4:
|
|
; GFX12-NEXT: s_mov_b64 exec, 0
|
|
; GFX12-NEXT: export mrt0, off, off, off, off done
|
|
; GFX12-NEXT: s_endpgm
|
|
main_body:
|
|
br label %loop3
|
|
|
|
loop3: ; preds = %loop3, %main_body
|
|
%tmp = phi i32 [ 0, %main_body ], [ %tmp5, %loop3 ]
|
|
%tmp1 = icmp sgt i32 %tmp, 0
|
|
call void @llvm.amdgcn.kill(i1 %tmp1) #1
|
|
%tmp5 = add i32 %tmp, 1
|
|
br i1 %tmp1, label %endloop15, label %loop3
|
|
|
|
endloop15: ; preds = %loop3
|
|
ret void
|
|
}
|
|
|
|
; Check this compiles.
|
|
; If kill is marked as defining VCC then this will fail with live interval issues.
|
|
define amdgpu_ps void @kill_with_loop_exit(float inreg %inp0, float inreg %inp1, <4 x i32> inreg %inp2, float inreg %inp3) {
|
|
; SI-LABEL: kill_with_loop_exit:
|
|
; SI: ; %bb.0: ; %.entry
|
|
; SI-NEXT: v_mov_b32_e32 v0, 0x43000000
|
|
; SI-NEXT: v_cmp_lt_f32_e32 vcc, s0, v0
|
|
; SI-NEXT: v_cmp_lt_f32_e64 s[0:1], s1, v0
|
|
; SI-NEXT: s_and_b64 s[0:1], vcc, s[0:1]
|
|
; SI-NEXT: s_and_b64 vcc, exec, s[0:1]
|
|
; SI-NEXT: v_mov_b32_e32 v0, 1.0
|
|
; SI-NEXT: s_cbranch_vccnz .LBB26_5
|
|
; SI-NEXT: ; %bb.1: ; %.preheader1.preheader
|
|
; SI-NEXT: v_cmp_ngt_f32_e64 s[0:1], s6, 0
|
|
; SI-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[0:1]
|
|
; SI-NEXT: s_mov_b64 s[2:3], exec
|
|
; SI-NEXT: v_mov_b32_e32 v0, 0x3fc00000
|
|
; SI-NEXT: v_cmp_ne_u32_e64 s[0:1], 1, v1
|
|
; SI-NEXT: .LBB26_2: ; %bb
|
|
; SI-NEXT: ; =>This Inner Loop Header: Depth=1
|
|
; SI-NEXT: s_and_b64 vcc, exec, s[0:1]
|
|
; SI-NEXT: v_add_f32_e32 v0, 0x3e800000, v0
|
|
; SI-NEXT: s_cbranch_vccnz .LBB26_2
|
|
; SI-NEXT: ; %bb.3: ; %bb33
|
|
; SI-NEXT: s_andn2_b64 s[2:3], s[2:3], exec
|
|
; SI-NEXT: s_cbranch_scc0 .LBB26_6
|
|
; SI-NEXT: ; %bb.4: ; %bb33
|
|
; SI-NEXT: s_mov_b64 exec, 0
|
|
; SI-NEXT: .LBB26_5: ; %bb35
|
|
; SI-NEXT: exp mrt0, v0, v0, v0, v0 done vm
|
|
; SI-NEXT: s_endpgm
|
|
; SI-NEXT: .LBB26_6:
|
|
; SI-NEXT: s_mov_b64 exec, 0
|
|
; SI-NEXT: exp null, off, off, off, off done vm
|
|
; SI-NEXT: s_endpgm
|
|
;
|
|
; GFX10-LABEL: kill_with_loop_exit:
|
|
; GFX10: ; %bb.0: ; %.entry
|
|
; GFX10-NEXT: v_cmp_gt_f32_e64 s[4:5], 0x43000000, s0
|
|
; GFX10-NEXT: v_cmp_gt_f32_e64 s[0:1], 0x43000000, s1
|
|
; GFX10-NEXT: v_mov_b32_e32 v0, 1.0
|
|
; GFX10-NEXT: s_and_b64 s[0:1], s[4:5], s[0:1]
|
|
; GFX10-NEXT: s_and_b64 vcc, exec, s[0:1]
|
|
; GFX10-NEXT: s_cbranch_vccnz .LBB26_5
|
|
; GFX10-NEXT: ; %bb.1: ; %.preheader1.preheader
|
|
; GFX10-NEXT: v_cmp_ngt_f32_e64 s[0:1], s6, 0
|
|
; GFX10-NEXT: v_mov_b32_e32 v0, 0x3fc00000
|
|
; GFX10-NEXT: s_mov_b64 s[2:3], exec
|
|
; GFX10-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[0:1]
|
|
; GFX10-NEXT: v_cmp_ne_u32_e64 s[0:1], 1, v1
|
|
; GFX10-NEXT: .LBB26_2: ; %bb
|
|
; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1
|
|
; GFX10-NEXT: v_add_f32_e32 v0, 0x3e800000, v0
|
|
; GFX10-NEXT: s_and_b64 vcc, exec, s[0:1]
|
|
; GFX10-NEXT: s_cbranch_vccnz .LBB26_2
|
|
; GFX10-NEXT: ; %bb.3: ; %bb33
|
|
; GFX10-NEXT: s_andn2_b64 s[2:3], s[2:3], exec
|
|
; GFX10-NEXT: s_cbranch_scc0 .LBB26_6
|
|
; GFX10-NEXT: ; %bb.4: ; %bb33
|
|
; GFX10-NEXT: s_mov_b64 exec, 0
|
|
; GFX10-NEXT: .LBB26_5: ; %bb35
|
|
; GFX10-NEXT: exp mrt0, v0, v0, v0, v0 done vm
|
|
; GFX10-NEXT: s_endpgm
|
|
; GFX10-NEXT: .LBB26_6:
|
|
; GFX10-NEXT: s_mov_b64 exec, 0
|
|
; GFX10-NEXT: exp null, off, off, off, off done vm
|
|
; GFX10-NEXT: s_endpgm
|
|
;
|
|
; GFX11-LABEL: kill_with_loop_exit:
|
|
; GFX11: ; %bb.0: ; %.entry
|
|
; GFX11-NEXT: v_cmp_gt_f32_e64 s[4:5], 0x43000000, s0
|
|
; GFX11-NEXT: v_cmp_gt_f32_e64 s[0:1], 0x43000000, s1
|
|
; GFX11-NEXT: v_mov_b32_e32 v0, 1.0
|
|
; GFX11-NEXT: s_and_b64 s[0:1], s[4:5], s[0:1]
|
|
; GFX11-NEXT: s_and_b64 vcc, exec, s[0:1]
|
|
; GFX11-NEXT: s_cbranch_vccnz .LBB26_5
|
|
; GFX11-NEXT: ; %bb.1: ; %.preheader1.preheader
|
|
; GFX11-NEXT: v_cmp_ngt_f32_e64 s[0:1], s6, 0
|
|
; GFX11-NEXT: v_mov_b32_e32 v0, 0x3fc00000
|
|
; GFX11-NEXT: s_mov_b64 s[2:3], exec
|
|
; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[0:1]
|
|
; GFX11-NEXT: v_cmp_ne_u32_e64 s[0:1], 1, v1
|
|
; GFX11-NEXT: s_waitcnt_depctr depctr_va_sdst(0)
|
|
; GFX11-NEXT: .LBB26_2: ; %bb
|
|
; GFX11-NEXT: ; =>This Inner Loop Header: Depth=1
|
|
; GFX11-NEXT: v_add_f32_e32 v0, 0x3e800000, v0
|
|
; GFX11-NEXT: s_and_b64 vcc, exec, s[0:1]
|
|
; GFX11-NEXT: s_cbranch_vccnz .LBB26_2
|
|
; GFX11-NEXT: ; %bb.3: ; %bb33
|
|
; GFX11-NEXT: s_and_not1_b64 s[2:3], s[2:3], exec
|
|
; GFX11-NEXT: s_cbranch_scc0 .LBB26_6
|
|
; GFX11-NEXT: ; %bb.4: ; %bb33
|
|
; GFX11-NEXT: s_mov_b64 exec, 0
|
|
; GFX11-NEXT: .LBB26_5: ; %bb35
|
|
; GFX11-NEXT: exp mrt0, v0, v0, v0, v0 done
|
|
; GFX11-NEXT: s_endpgm
|
|
; GFX11-NEXT: .LBB26_6:
|
|
; GFX11-NEXT: s_mov_b64 exec, 0
|
|
; GFX11-NEXT: exp mrt0, off, off, off, off done
|
|
; GFX11-NEXT: s_endpgm
|
|
;
|
|
; GFX12-LABEL: kill_with_loop_exit:
|
|
; GFX12: ; %bb.0: ; %.entry
|
|
; GFX12-NEXT: s_cmp_lt_f32 s0, 0x43000000
|
|
; GFX12-NEXT: s_cselect_b64 s[4:5], -1, 0
|
|
; GFX12-NEXT: s_cmp_lt_f32 s1, 0x43000000
|
|
; GFX12-NEXT: s_cselect_b64 s[0:1], -1, 0
|
|
; GFX12-NEXT: s_and_b64 s[0:1], s[4:5], s[0:1]
|
|
; GFX12-NEXT: s_mov_b32 s4, 1.0
|
|
; GFX12-NEXT: s_and_b64 vcc, exec, s[0:1]
|
|
; GFX12-NEXT: s_cbranch_vccnz .LBB26_5
|
|
; GFX12-NEXT: ; %bb.1: ; %.preheader1.preheader
|
|
; GFX12-NEXT: s_cmp_ngt_f32 s6, 0
|
|
; GFX12-NEXT: s_mov_b64 s[2:3], exec
|
|
; GFX12-NEXT: s_mov_b32 s4, 0x3fc00000
|
|
; GFX12-NEXT: s_cselect_b64 s[0:1], -1, 0
|
|
; GFX12-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
|
|
; GFX12-NEXT: v_cmp_ne_u32_e64 s[0:1], 1, v0
|
|
; GFX12-NEXT: .LBB26_2: ; %bb
|
|
; GFX12-NEXT: ; =>This Inner Loop Header: Depth=1
|
|
; GFX12-NEXT: s_add_f32 s4, s4, 0x3e800000
|
|
; GFX12-NEXT: s_and_b64 vcc, exec, s[0:1]
|
|
; GFX12-NEXT: s_cbranch_vccnz .LBB26_2
|
|
; GFX12-NEXT: ; %bb.3: ; %bb33
|
|
; GFX12-NEXT: s_and_not1_b64 s[2:3], s[2:3], exec
|
|
; GFX12-NEXT: s_cbranch_scc0 .LBB26_6
|
|
; GFX12-NEXT: ; %bb.4: ; %bb33
|
|
; GFX12-NEXT: s_mov_b64 exec, 0
|
|
; GFX12-NEXT: .LBB26_5: ; %bb35
|
|
; GFX12-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX12-NEXT: export mrt0, v0, v0, v0, v0 done
|
|
; GFX12-NEXT: s_endpgm
|
|
; GFX12-NEXT: .LBB26_6:
|
|
; GFX12-NEXT: s_mov_b64 exec, 0
|
|
; GFX12-NEXT: export mrt0, off, off, off, off done
|
|
; GFX12-NEXT: s_endpgm
|
|
.entry:
|
|
%tmp24 = fcmp olt float %inp0, 1.280000e+02
|
|
%tmp25 = fcmp olt float %inp1, 1.280000e+02
|
|
%tmp26 = and i1 %tmp24, %tmp25
|
|
br i1 %tmp26, label %bb35, label %.preheader1.preheader
|
|
|
|
.preheader1.preheader: ; preds = %.entry
|
|
%tmp31 = fcmp ogt float %inp3, 0.0
|
|
br label %bb
|
|
|
|
bb: ; preds = %bb, %.preheader1.preheader
|
|
%tmp30 = phi float [ %tmp32, %bb ], [ 1.500000e+00, %.preheader1.preheader ]
|
|
%tmp32 = fadd reassoc nnan nsz arcp contract float %tmp30, 2.500000e-01
|
|
%tmp34 = fadd reassoc nnan nsz arcp contract float %tmp30, 2.500000e-01
|
|
br i1 %tmp31, label %bb, label %bb33
|
|
|
|
bb33: ; preds = %bb
|
|
call void @llvm.amdgcn.kill(i1 false)
|
|
br label %bb35
|
|
|
|
bb35: ; preds = %bb33, %.entry
|
|
%tmp36 = phi float [ %tmp34, %bb33 ], [ 1.000000e+00, %.entry ]
|
|
call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %tmp36, float %tmp36, float %tmp36, float %tmp36, i1 true, i1 true) #3
|
|
ret void
|
|
}
|
|
|
|
declare void @llvm.amdgcn.kill(i1) #0
|
|
declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
|
|
declare void @llvm.amdgcn.s.sendmsg(i32, i32) #0
|
|
declare i1 @llvm.amdgcn.wqm.vote(i1)
|
|
|
|
attributes #0 = { nounwind }
|