llvm-project/llvm/test/CodeGen/AMDGPU/workitem-intrinsic-opts.ll
Pierre van Houtryve c4b1557097
[DAG] Fold (setcc ((x | x >> c0 | ...) & mask)) sequences (#146054)
Fold sequences where we extract a bunch of contiguous bits from a value,
merge them into the low bit and then check if the low bits are zero or
not.

Usually the and would be on the outside (the leaves) of the expression,
but the DAG canonicalizes it to a single `and` at the root of the
expression.

The reason I put this in DAGCombiner instead of the target combiner is
because this is a generic, valid transform that's also fairly niche, so
there isn't much risk of a combine loop I think.

See #136727
2025-07-30 10:27:19 +02:00

532 lines
22 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -O3 -mtriple=amdgcn -mcpu=fiji %s -o - | FileCheck %s --check-prefixes=GFX8,DAGISEL-GFX8
; RUN: llc -O3 -mtriple=amdgcn -mcpu=gfx942 %s -o - | FileCheck %s --check-prefixes=GFX942,DAGISEL-GFX942
; RUN: llc -O3 -mtriple=amdgcn -mcpu=gfx1200 %s -o - | FileCheck %s --check-prefixes=GFX12,DAGISEL-GFX12
; RUN: llc -O3 -global-isel -mtriple=amdgcn -mcpu=fiji %s -o - | FileCheck %s --check-prefixes=GFX8,GISEL-GFX8
; RUN: llc -O3 -global-isel -mtriple=amdgcn -mcpu=gfx942 %s -o - | FileCheck %s --check-prefixes=GFX942,GISEL-GFX942
; RUN: llc -O3 -global-isel -mtriple=amdgcn -mcpu=gfx1200 %s -o - | FileCheck %s --check-prefixes=GFX12,GISEL-GFX12
; (workitem_id_x | workitem_id_y | workitem_id_z) == 0
define i1 @workitem_zero() {
; DAGISEL-GFX8-LABEL: workitem_zero:
; DAGISEL-GFX8: ; %bb.0: ; %entry
; DAGISEL-GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; DAGISEL-GFX8-NEXT: v_and_b32_e32 v0, 0x3fffffff, v31
; DAGISEL-GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; DAGISEL-GFX8-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
; DAGISEL-GFX8-NEXT: s_setpc_b64 s[30:31]
;
; DAGISEL-GFX942-LABEL: workitem_zero:
; DAGISEL-GFX942: ; %bb.0: ; %entry
; DAGISEL-GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; DAGISEL-GFX942-NEXT: v_and_b32_e32 v0, 0x3fffffff, v31
; DAGISEL-GFX942-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; DAGISEL-GFX942-NEXT: s_nop 1
; DAGISEL-GFX942-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
; DAGISEL-GFX942-NEXT: s_setpc_b64 s[30:31]
;
; DAGISEL-GFX12-LABEL: workitem_zero:
; DAGISEL-GFX12: ; %bb.0: ; %entry
; DAGISEL-GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; DAGISEL-GFX12-NEXT: s_wait_expcnt 0x0
; DAGISEL-GFX12-NEXT: s_wait_samplecnt 0x0
; DAGISEL-GFX12-NEXT: s_wait_bvhcnt 0x0
; DAGISEL-GFX12-NEXT: s_wait_kmcnt 0x0
; DAGISEL-GFX12-NEXT: v_and_b32_e32 v0, 0x3fffffff, v31
; DAGISEL-GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
; DAGISEL-GFX12-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; DAGISEL-GFX12-NEXT: s_wait_alu 0xfffd
; DAGISEL-GFX12-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; DAGISEL-GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GISEL-GFX8-LABEL: workitem_zero:
; GISEL-GFX8: ; %bb.0: ; %entry
; GISEL-GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-GFX8-NEXT: v_and_b32_e32 v0, 0x3ff, v31
; GISEL-GFX8-NEXT: v_bfe_u32 v1, v31, 10, 10
; GISEL-GFX8-NEXT: v_or_b32_e32 v0, v0, v1
; GISEL-GFX8-NEXT: v_bfe_u32 v1, v31, 20, 10
; GISEL-GFX8-NEXT: v_or_b32_e32 v0, v0, v1
; GISEL-GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GISEL-GFX8-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
; GISEL-GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GISEL-GFX942-LABEL: workitem_zero:
; GISEL-GFX942: ; %bb.0: ; %entry
; GISEL-GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-GFX942-NEXT: v_and_b32_e32 v0, 0x3ff, v31
; GISEL-GFX942-NEXT: v_bfe_u32 v1, v31, 10, 10
; GISEL-GFX942-NEXT: v_bfe_u32 v2, v31, 20, 10
; GISEL-GFX942-NEXT: v_or3_b32 v0, v0, v1, v2
; GISEL-GFX942-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GISEL-GFX942-NEXT: s_nop 1
; GISEL-GFX942-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
; GISEL-GFX942-NEXT: s_setpc_b64 s[30:31]
;
; GISEL-GFX12-LABEL: workitem_zero:
; GISEL-GFX12: ; %bb.0: ; %entry
; GISEL-GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GISEL-GFX12-NEXT: s_wait_expcnt 0x0
; GISEL-GFX12-NEXT: s_wait_samplecnt 0x0
; GISEL-GFX12-NEXT: s_wait_bvhcnt 0x0
; GISEL-GFX12-NEXT: s_wait_kmcnt 0x0
; GISEL-GFX12-NEXT: v_and_b32_e32 v0, 0x3ff, v31
; GISEL-GFX12-NEXT: v_bfe_u32 v1, v31, 10, 10
; GISEL-GFX12-NEXT: v_bfe_u32 v2, v31, 20, 10
; GISEL-GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GISEL-GFX12-NEXT: v_or3_b32 v0, v0, v1, v2
; GISEL-GFX12-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GISEL-GFX12-NEXT: s_wait_alu 0xfffd
; GISEL-GFX12-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; GISEL-GFX12-NEXT: s_setpc_b64 s[30:31]
entry:
%0 = tail call i32 @llvm.amdgcn.workitem.id.x()
%1 = tail call i32 @llvm.amdgcn.workitem.id.y()
%or = or i32 %0, %1
%2 = tail call i32 @llvm.amdgcn.workitem.id.z()
%or1 = or i32 %or, %2
%cmp = icmp eq i32 %or1, 0
ret i1 %cmp
}
; (workitem_id_x | workitem_id_y | workitem_id_z) != 0
define i1 @workitem_nonzero() {
; DAGISEL-GFX8-LABEL: workitem_nonzero:
; DAGISEL-GFX8: ; %bb.0: ; %entry
; DAGISEL-GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; DAGISEL-GFX8-NEXT: v_and_b32_e32 v0, 0x3fffffff, v31
; DAGISEL-GFX8-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
; DAGISEL-GFX8-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
; DAGISEL-GFX8-NEXT: s_setpc_b64 s[30:31]
;
; DAGISEL-GFX942-LABEL: workitem_nonzero:
; DAGISEL-GFX942: ; %bb.0: ; %entry
; DAGISEL-GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; DAGISEL-GFX942-NEXT: v_and_b32_e32 v0, 0x3fffffff, v31
; DAGISEL-GFX942-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
; DAGISEL-GFX942-NEXT: s_nop 1
; DAGISEL-GFX942-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
; DAGISEL-GFX942-NEXT: s_setpc_b64 s[30:31]
;
; DAGISEL-GFX12-LABEL: workitem_nonzero:
; DAGISEL-GFX12: ; %bb.0: ; %entry
; DAGISEL-GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; DAGISEL-GFX12-NEXT: s_wait_expcnt 0x0
; DAGISEL-GFX12-NEXT: s_wait_samplecnt 0x0
; DAGISEL-GFX12-NEXT: s_wait_bvhcnt 0x0
; DAGISEL-GFX12-NEXT: s_wait_kmcnt 0x0
; DAGISEL-GFX12-NEXT: v_and_b32_e32 v0, 0x3fffffff, v31
; DAGISEL-GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
; DAGISEL-GFX12-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
; DAGISEL-GFX12-NEXT: s_wait_alu 0xfffd
; DAGISEL-GFX12-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; DAGISEL-GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GISEL-GFX8-LABEL: workitem_nonzero:
; GISEL-GFX8: ; %bb.0: ; %entry
; GISEL-GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-GFX8-NEXT: v_and_b32_e32 v0, 0x3ff, v31
; GISEL-GFX8-NEXT: v_bfe_u32 v1, v31, 10, 10
; GISEL-GFX8-NEXT: v_or_b32_e32 v0, v0, v1
; GISEL-GFX8-NEXT: v_bfe_u32 v1, v31, 20, 10
; GISEL-GFX8-NEXT: v_or_b32_e32 v0, v0, v1
; GISEL-GFX8-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
; GISEL-GFX8-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
; GISEL-GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GISEL-GFX942-LABEL: workitem_nonzero:
; GISEL-GFX942: ; %bb.0: ; %entry
; GISEL-GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-GFX942-NEXT: v_and_b32_e32 v0, 0x3ff, v31
; GISEL-GFX942-NEXT: v_bfe_u32 v1, v31, 10, 10
; GISEL-GFX942-NEXT: v_bfe_u32 v2, v31, 20, 10
; GISEL-GFX942-NEXT: v_or3_b32 v0, v0, v1, v2
; GISEL-GFX942-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
; GISEL-GFX942-NEXT: s_nop 1
; GISEL-GFX942-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
; GISEL-GFX942-NEXT: s_setpc_b64 s[30:31]
;
; GISEL-GFX12-LABEL: workitem_nonzero:
; GISEL-GFX12: ; %bb.0: ; %entry
; GISEL-GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GISEL-GFX12-NEXT: s_wait_expcnt 0x0
; GISEL-GFX12-NEXT: s_wait_samplecnt 0x0
; GISEL-GFX12-NEXT: s_wait_bvhcnt 0x0
; GISEL-GFX12-NEXT: s_wait_kmcnt 0x0
; GISEL-GFX12-NEXT: v_and_b32_e32 v0, 0x3ff, v31
; GISEL-GFX12-NEXT: v_bfe_u32 v1, v31, 10, 10
; GISEL-GFX12-NEXT: v_bfe_u32 v2, v31, 20, 10
; GISEL-GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GISEL-GFX12-NEXT: v_or3_b32 v0, v0, v1, v2
; GISEL-GFX12-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
; GISEL-GFX12-NEXT: s_wait_alu 0xfffd
; GISEL-GFX12-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; GISEL-GFX12-NEXT: s_setpc_b64 s[30:31]
entry:
%0 = tail call i32 @llvm.amdgcn.workitem.id.x()
%1 = tail call i32 @llvm.amdgcn.workitem.id.y()
%or = or i32 %0, %1
%2 = tail call i32 @llvm.amdgcn.workitem.id.z()
%or1 = or i32 %or, %2
%cmp = icmp ne i32 %or1, 0
ret i1 %cmp
}
; (workgroup_id_x | workgroup_id_y | workgroup_id_z) == 0
define i1 @workgroup_zero() {
; DAGISEL-GFX8-LABEL: workgroup_zero:
; DAGISEL-GFX8: ; %bb.0: ; %entry
; DAGISEL-GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; DAGISEL-GFX8-NEXT: s_or_b32 s4, s12, s13
; DAGISEL-GFX8-NEXT: s_or_b32 s4, s4, s14
; DAGISEL-GFX8-NEXT: s_cmp_eq_u32 s4, 0
; DAGISEL-GFX8-NEXT: s_cselect_b64 s[4:5], -1, 0
; DAGISEL-GFX8-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
; DAGISEL-GFX8-NEXT: s_setpc_b64 s[30:31]
;
; DAGISEL-GFX942-LABEL: workgroup_zero:
; DAGISEL-GFX942: ; %bb.0: ; %entry
; DAGISEL-GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; DAGISEL-GFX942-NEXT: s_or_b32 s0, s12, s13
; DAGISEL-GFX942-NEXT: s_or_b32 s0, s0, s14
; DAGISEL-GFX942-NEXT: s_cmp_eq_u32 s0, 0
; DAGISEL-GFX942-NEXT: s_cselect_b64 s[0:1], -1, 0
; DAGISEL-GFX942-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
; DAGISEL-GFX942-NEXT: s_setpc_b64 s[30:31]
;
; DAGISEL-GFX12-LABEL: workgroup_zero:
; DAGISEL-GFX12: ; %bb.0: ; %entry
; DAGISEL-GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; DAGISEL-GFX12-NEXT: s_wait_expcnt 0x0
; DAGISEL-GFX12-NEXT: s_wait_samplecnt 0x0
; DAGISEL-GFX12-NEXT: s_wait_bvhcnt 0x0
; DAGISEL-GFX12-NEXT: s_wait_kmcnt 0x0
; DAGISEL-GFX12-NEXT: s_and_b32 s0, ttmp7, 0xffff
; DAGISEL-GFX12-NEXT: s_wait_alu 0xfffe
; DAGISEL-GFX12-NEXT: s_lshr_b32 s1, ttmp7, 16
; DAGISEL-GFX12-NEXT: s_or_b32 s0, ttmp9, s0
; DAGISEL-GFX12-NEXT: s_wait_alu 0xfffe
; DAGISEL-GFX12-NEXT: s_or_b32 s0, s0, s1
; DAGISEL-GFX12-NEXT: s_wait_alu 0xfffe
; DAGISEL-GFX12-NEXT: s_cmp_eq_u32 s0, 0
; DAGISEL-GFX12-NEXT: s_cselect_b32 s0, -1, 0
; DAGISEL-GFX12-NEXT: s_wait_alu 0xfffe
; DAGISEL-GFX12-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
; DAGISEL-GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GISEL-GFX8-LABEL: workgroup_zero:
; GISEL-GFX8: ; %bb.0: ; %entry
; GISEL-GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-GFX8-NEXT: s_or_b32 s4, s12, s13
; GISEL-GFX8-NEXT: s_or_b32 s4, s4, s14
; GISEL-GFX8-NEXT: s_cmp_eq_u32 s4, 0
; GISEL-GFX8-NEXT: s_cselect_b32 s4, 1, 0
; GISEL-GFX8-NEXT: v_mov_b32_e32 v0, s4
; GISEL-GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GISEL-GFX942-LABEL: workgroup_zero:
; GISEL-GFX942: ; %bb.0: ; %entry
; GISEL-GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-GFX942-NEXT: s_or_b32 s0, s12, s13
; GISEL-GFX942-NEXT: s_or_b32 s0, s0, s14
; GISEL-GFX942-NEXT: s_cmp_eq_u32 s0, 0
; GISEL-GFX942-NEXT: s_cselect_b32 s0, 1, 0
; GISEL-GFX942-NEXT: v_mov_b32_e32 v0, s0
; GISEL-GFX942-NEXT: s_setpc_b64 s[30:31]
;
; GISEL-GFX12-LABEL: workgroup_zero:
; GISEL-GFX12: ; %bb.0: ; %entry
; GISEL-GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GISEL-GFX12-NEXT: s_wait_expcnt 0x0
; GISEL-GFX12-NEXT: s_wait_samplecnt 0x0
; GISEL-GFX12-NEXT: s_wait_bvhcnt 0x0
; GISEL-GFX12-NEXT: s_wait_kmcnt 0x0
; GISEL-GFX12-NEXT: s_and_b32 s0, ttmp7, 0xffff
; GISEL-GFX12-NEXT: s_wait_alu 0xfffe
; GISEL-GFX12-NEXT: s_lshr_b32 s1, ttmp7, 16
; GISEL-GFX12-NEXT: s_or_b32 s0, ttmp9, s0
; GISEL-GFX12-NEXT: s_wait_alu 0xfffe
; GISEL-GFX12-NEXT: s_or_b32 s0, s0, s1
; GISEL-GFX12-NEXT: s_wait_alu 0xfffe
; GISEL-GFX12-NEXT: s_cmp_eq_u32 s0, 0
; GISEL-GFX12-NEXT: s_cselect_b32 s0, 1, 0
; GISEL-GFX12-NEXT: s_wait_alu 0xfffe
; GISEL-GFX12-NEXT: v_mov_b32_e32 v0, s0
; GISEL-GFX12-NEXT: s_setpc_b64 s[30:31]
entry:
%0 = tail call i32 @llvm.amdgcn.workgroup.id.x()
%1 = tail call i32 @llvm.amdgcn.workgroup.id.y()
%or = or i32 %0, %1
%2 = tail call i32 @llvm.amdgcn.workgroup.id.z()
%or1 = or i32 %or, %2
%cmp = icmp eq i32 %or1, 0
ret i1 %cmp
}
; (workgroup_id_x | workgroup_id_y | workgroup_id_z) != 0
define i1 @workgroup_nonzero() {
; DAGISEL-GFX8-LABEL: workgroup_nonzero:
; DAGISEL-GFX8: ; %bb.0: ; %entry
; DAGISEL-GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; DAGISEL-GFX8-NEXT: s_or_b32 s4, s12, s13
; DAGISEL-GFX8-NEXT: s_or_b32 s4, s4, s14
; DAGISEL-GFX8-NEXT: s_cmp_lg_u32 s4, 0
; DAGISEL-GFX8-NEXT: s_cselect_b64 s[4:5], -1, 0
; DAGISEL-GFX8-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
; DAGISEL-GFX8-NEXT: s_setpc_b64 s[30:31]
;
; DAGISEL-GFX942-LABEL: workgroup_nonzero:
; DAGISEL-GFX942: ; %bb.0: ; %entry
; DAGISEL-GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; DAGISEL-GFX942-NEXT: s_or_b32 s0, s12, s13
; DAGISEL-GFX942-NEXT: s_or_b32 s0, s0, s14
; DAGISEL-GFX942-NEXT: s_cmp_lg_u32 s0, 0
; DAGISEL-GFX942-NEXT: s_cselect_b64 s[0:1], -1, 0
; DAGISEL-GFX942-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
; DAGISEL-GFX942-NEXT: s_setpc_b64 s[30:31]
;
; DAGISEL-GFX12-LABEL: workgroup_nonzero:
; DAGISEL-GFX12: ; %bb.0: ; %entry
; DAGISEL-GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; DAGISEL-GFX12-NEXT: s_wait_expcnt 0x0
; DAGISEL-GFX12-NEXT: s_wait_samplecnt 0x0
; DAGISEL-GFX12-NEXT: s_wait_bvhcnt 0x0
; DAGISEL-GFX12-NEXT: s_wait_kmcnt 0x0
; DAGISEL-GFX12-NEXT: s_and_b32 s0, ttmp7, 0xffff
; DAGISEL-GFX12-NEXT: s_wait_alu 0xfffe
; DAGISEL-GFX12-NEXT: s_lshr_b32 s1, ttmp7, 16
; DAGISEL-GFX12-NEXT: s_or_b32 s0, ttmp9, s0
; DAGISEL-GFX12-NEXT: s_wait_alu 0xfffe
; DAGISEL-GFX12-NEXT: s_or_b32 s0, s0, s1
; DAGISEL-GFX12-NEXT: s_wait_alu 0xfffe
; DAGISEL-GFX12-NEXT: s_cmp_lg_u32 s0, 0
; DAGISEL-GFX12-NEXT: s_cselect_b32 s0, -1, 0
; DAGISEL-GFX12-NEXT: s_wait_alu 0xfffe
; DAGISEL-GFX12-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
; DAGISEL-GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GISEL-GFX8-LABEL: workgroup_nonzero:
; GISEL-GFX8: ; %bb.0: ; %entry
; GISEL-GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-GFX8-NEXT: s_or_b32 s4, s12, s13
; GISEL-GFX8-NEXT: s_or_b32 s4, s4, s14
; GISEL-GFX8-NEXT: s_cmp_lg_u32 s4, 0
; GISEL-GFX8-NEXT: s_cselect_b32 s4, 1, 0
; GISEL-GFX8-NEXT: v_mov_b32_e32 v0, s4
; GISEL-GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GISEL-GFX942-LABEL: workgroup_nonzero:
; GISEL-GFX942: ; %bb.0: ; %entry
; GISEL-GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-GFX942-NEXT: s_or_b32 s0, s12, s13
; GISEL-GFX942-NEXT: s_or_b32 s0, s0, s14
; GISEL-GFX942-NEXT: s_cmp_lg_u32 s0, 0
; GISEL-GFX942-NEXT: s_cselect_b32 s0, 1, 0
; GISEL-GFX942-NEXT: v_mov_b32_e32 v0, s0
; GISEL-GFX942-NEXT: s_setpc_b64 s[30:31]
;
; GISEL-GFX12-LABEL: workgroup_nonzero:
; GISEL-GFX12: ; %bb.0: ; %entry
; GISEL-GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GISEL-GFX12-NEXT: s_wait_expcnt 0x0
; GISEL-GFX12-NEXT: s_wait_samplecnt 0x0
; GISEL-GFX12-NEXT: s_wait_bvhcnt 0x0
; GISEL-GFX12-NEXT: s_wait_kmcnt 0x0
; GISEL-GFX12-NEXT: s_and_b32 s0, ttmp7, 0xffff
; GISEL-GFX12-NEXT: s_wait_alu 0xfffe
; GISEL-GFX12-NEXT: s_lshr_b32 s1, ttmp7, 16
; GISEL-GFX12-NEXT: s_or_b32 s0, ttmp9, s0
; GISEL-GFX12-NEXT: s_wait_alu 0xfffe
; GISEL-GFX12-NEXT: s_or_b32 s0, s0, s1
; GISEL-GFX12-NEXT: s_wait_alu 0xfffe
; GISEL-GFX12-NEXT: s_cmp_lg_u32 s0, 0
; GISEL-GFX12-NEXT: s_cselect_b32 s0, 1, 0
; GISEL-GFX12-NEXT: s_wait_alu 0xfffe
; GISEL-GFX12-NEXT: v_mov_b32_e32 v0, s0
; GISEL-GFX12-NEXT: s_setpc_b64 s[30:31]
entry:
%0 = tail call i32 @llvm.amdgcn.workgroup.id.x()
%1 = tail call i32 @llvm.amdgcn.workgroup.id.y()
%or = or i32 %0, %1
%2 = tail call i32 @llvm.amdgcn.workgroup.id.z()
%or1 = or i32 %or, %2
%cmp = icmp ne i32 %or1, 0
ret i1 %cmp
}
; (workitem_id_x | workitem_id_y | workitem_id_z | workgroup_id_x | workgroup_id_y | workgroup_id_z) == 0
define i1 @workitem_workgroup_zero() {
; GFX8-LABEL: workitem_workgroup_zero:
; GFX8: ; %bb.0: ; %entry
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: s_or_b32 s4, s12, s13
; GFX8-NEXT: s_or_b32 s4, s4, s14
; GFX8-NEXT: v_and_b32_e32 v0, 0x3ff, v31
; GFX8-NEXT: v_or_b32_e32 v0, s4, v0
; GFX8-NEXT: v_bfe_u32 v1, v31, 10, 10
; GFX8-NEXT: v_or_b32_e32 v0, v0, v1
; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX8-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: workitem_workgroup_zero:
; GFX942: ; %bb.0: ; %entry
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: s_or_b32 s0, s12, s13
; GFX942-NEXT: s_or_b32 s0, s0, s14
; GFX942-NEXT: v_and_b32_e32 v0, 0x3ff, v31
; GFX942-NEXT: v_bfe_u32 v1, v31, 10, 10
; GFX942-NEXT: v_or3_b32 v0, s0, v0, v1
; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
; GFX942-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: workitem_workgroup_zero:
; GFX12: ; %bb.0: ; %entry
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: s_wait_expcnt 0x0
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: s_and_b32 s0, ttmp7, 0xffff
; GFX12-NEXT: v_and_b32_e32 v0, 0x3ff, v31
; GFX12-NEXT: v_bfe_u32 v1, v31, 10, 10
; GFX12-NEXT: s_wait_alu 0xfffe
; GFX12-NEXT: s_lshr_b32 s1, ttmp7, 16
; GFX12-NEXT: s_or_b32 s0, ttmp9, s0
; GFX12-NEXT: s_wait_alu 0xfffe
; GFX12-NEXT: s_or_b32 s0, s0, s1
; GFX12-NEXT: s_wait_alu 0xfffe
; GFX12-NEXT: v_or3_b32 v0, s0, v0, v1
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX12-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX12-NEXT: s_wait_alu 0xfffd
; GFX12-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; GFX12-NEXT: s_setpc_b64 s[30:31]
entry:
%0 = tail call i32 @llvm.amdgcn.workgroup.id.x()
%1 = tail call i32 @llvm.amdgcn.workgroup.id.y()
%or = or i32 %0, %1
%2 = tail call i32 @llvm.amdgcn.workgroup.id.z()
%or1 = or i32 %or, %2
%3 = tail call i32 @llvm.amdgcn.workitem.id.x()
%or2 = or i32 %or1, %3
%4 = tail call i32 @llvm.amdgcn.workitem.id.y()
%or3 = or i32 %or2, %4
%5 = tail call i32 @llvm.amdgcn.workitem.id.z()
%or4 = or i32 %or3, %5
%cmp = icmp eq i32 %or3, 0
ret i1 %cmp
}
; (workitem_id_x | workitem_id_y | workitem_id_z | workgroup_id_x | workgroup_id_y | workgroup_id_z) != 0
define i1 @workitem_workgroup_nonzero() {
; GFX8-LABEL: workitem_workgroup_nonzero:
; GFX8: ; %bb.0: ; %entry
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: s_or_b32 s4, s12, s13
; GFX8-NEXT: s_or_b32 s4, s4, s14
; GFX8-NEXT: v_and_b32_e32 v0, 0x3ff, v31
; GFX8-NEXT: v_or_b32_e32 v0, s4, v0
; GFX8-NEXT: v_bfe_u32 v1, v31, 10, 10
; GFX8-NEXT: v_or_b32_e32 v0, v0, v1
; GFX8-NEXT: v_bfe_u32 v1, v31, 20, 10
; GFX8-NEXT: v_or_b32_e32 v0, v0, v1
; GFX8-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
; GFX8-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; DAGISEL-GFX942-LABEL: workitem_workgroup_nonzero:
; DAGISEL-GFX942: ; %bb.0: ; %entry
; DAGISEL-GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; DAGISEL-GFX942-NEXT: s_or_b32 s0, s12, s13
; DAGISEL-GFX942-NEXT: s_or_b32 s0, s0, s14
; DAGISEL-GFX942-NEXT: v_and_b32_e32 v0, 0x3ff, v31
; DAGISEL-GFX942-NEXT: v_or_b32_e32 v0, s0, v0
; DAGISEL-GFX942-NEXT: v_bfe_u32 v1, v31, 20, 10
; DAGISEL-GFX942-NEXT: v_bfe_u32 v2, v31, 10, 10
; DAGISEL-GFX942-NEXT: v_or3_b32 v0, v0, v2, v1
; DAGISEL-GFX942-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
; DAGISEL-GFX942-NEXT: s_nop 1
; DAGISEL-GFX942-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
; DAGISEL-GFX942-NEXT: s_setpc_b64 s[30:31]
;
; DAGISEL-GFX12-LABEL: workitem_workgroup_nonzero:
; DAGISEL-GFX12: ; %bb.0: ; %entry
; DAGISEL-GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; DAGISEL-GFX12-NEXT: s_wait_expcnt 0x0
; DAGISEL-GFX12-NEXT: s_wait_samplecnt 0x0
; DAGISEL-GFX12-NEXT: s_wait_bvhcnt 0x0
; DAGISEL-GFX12-NEXT: s_wait_kmcnt 0x0
; DAGISEL-GFX12-NEXT: v_and_b32_e32 v0, 0x3ff, v31
; DAGISEL-GFX12-NEXT: s_and_b32 s0, ttmp7, 0xffff
; DAGISEL-GFX12-NEXT: s_wait_alu 0xfffe
; DAGISEL-GFX12-NEXT: s_lshr_b32 s1, ttmp7, 16
; DAGISEL-GFX12-NEXT: s_or_b32 s0, ttmp9, s0
; DAGISEL-GFX12-NEXT: v_bfe_u32 v1, v31, 20, 10
; DAGISEL-GFX12-NEXT: v_bfe_u32 v2, v31, 10, 10
; DAGISEL-GFX12-NEXT: s_wait_alu 0xfffe
; DAGISEL-GFX12-NEXT: v_or3_b32 v0, s0, s1, v0
; DAGISEL-GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; DAGISEL-GFX12-NEXT: v_or3_b32 v0, v0, v2, v1
; DAGISEL-GFX12-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
; DAGISEL-GFX12-NEXT: s_wait_alu 0xfffd
; DAGISEL-GFX12-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; DAGISEL-GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GISEL-GFX942-LABEL: workitem_workgroup_nonzero:
; GISEL-GFX942: ; %bb.0: ; %entry
; GISEL-GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-GFX942-NEXT: s_or_b32 s0, s12, s13
; GISEL-GFX942-NEXT: s_or_b32 s0, s0, s14
; GISEL-GFX942-NEXT: v_mov_b32_e32 v0, 0x3ff
; GISEL-GFX942-NEXT: v_and_or_b32 v0, v31, v0, s0
; GISEL-GFX942-NEXT: v_bfe_u32 v1, v31, 10, 10
; GISEL-GFX942-NEXT: v_bfe_u32 v2, v31, 20, 10
; GISEL-GFX942-NEXT: v_or3_b32 v0, v0, v1, v2
; GISEL-GFX942-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
; GISEL-GFX942-NEXT: s_nop 1
; GISEL-GFX942-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
; GISEL-GFX942-NEXT: s_setpc_b64 s[30:31]
;
; GISEL-GFX12-LABEL: workitem_workgroup_nonzero:
; GISEL-GFX12: ; %bb.0: ; %entry
; GISEL-GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GISEL-GFX12-NEXT: s_wait_expcnt 0x0
; GISEL-GFX12-NEXT: s_wait_samplecnt 0x0
; GISEL-GFX12-NEXT: s_wait_bvhcnt 0x0
; GISEL-GFX12-NEXT: s_wait_kmcnt 0x0
; GISEL-GFX12-NEXT: s_and_b32 s0, ttmp7, 0xffff
; GISEL-GFX12-NEXT: s_wait_alu 0xfffe
; GISEL-GFX12-NEXT: s_lshr_b32 s1, ttmp7, 16
; GISEL-GFX12-NEXT: s_or_b32 s0, ttmp9, s0
; GISEL-GFX12-NEXT: v_bfe_u32 v0, v31, 10, 10
; GISEL-GFX12-NEXT: s_wait_alu 0xfffe
; GISEL-GFX12-NEXT: s_or_b32 s0, s0, s1
; GISEL-GFX12-NEXT: v_bfe_u32 v1, v31, 20, 10
; GISEL-GFX12-NEXT: s_wait_alu 0xfffe
; GISEL-GFX12-NEXT: v_and_or_b32 v2, 0x3ff, v31, s0
; GISEL-GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GISEL-GFX12-NEXT: v_or3_b32 v0, v2, v0, v1
; GISEL-GFX12-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
; GISEL-GFX12-NEXT: s_wait_alu 0xfffd
; GISEL-GFX12-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; GISEL-GFX12-NEXT: s_setpc_b64 s[30:31]
entry:
%0 = tail call i32 @llvm.amdgcn.workgroup.id.x()
%1 = tail call i32 @llvm.amdgcn.workgroup.id.y()
%or = or i32 %0, %1
%2 = tail call i32 @llvm.amdgcn.workgroup.id.z()
%or1 = or i32 %or, %2
%3 = tail call i32 @llvm.amdgcn.workitem.id.x()
%or2 = or i32 %or1, %3
%4 = tail call i32 @llvm.amdgcn.workitem.id.y()
%or3 = or i32 %or2, %4
%5 = tail call i32 @llvm.amdgcn.workitem.id.z()
%or4 = or i32 %or3, %5
%cmp = icmp ne i32 %or4, 0
ret i1 %cmp
}