[AMDGPU][GISel] RegBankLegalize rules for amdgcn_inverse_ballot (#190629)

This commit is contained in:
Chinmay Deshpande 2026-04-06 10:30:35 -07:00 committed by GitHub
parent 37801e9e99
commit 12e957fd7f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 21 additions and 10 deletions

View File

@ -1706,6 +1706,10 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
addRulesForIOpcs({amdgcn_wqm_demote}).Any({{}, {{}, {IntrId, Vcc}}});
addRulesForIOpcs({amdgcn_inverse_ballot})
.Any({{DivS1, _, S32}, {{Vcc}, {IntrId, SgprB32_ReadFirstLane}}})
.Any({{DivS1, _, S64}, {{Vcc}, {IntrId, SgprB64_ReadFirstLane}}});
addRulesForIOpcs({amdgcn_live_mask, amdgcn_ps_live})
.Any({{DivS1}, {{Vcc}, {}}});

View File

@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -mattr=+wavefrontsize32 -global-isel=1 < %s | FileCheck -check-prefixes=GFX11,GISEL %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -mattr=+wavefrontsize32 -global-isel=1 -new-reg-bank-select < %s | FileCheck -check-prefixes=GFX11,GISEL %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -mattr=+wavefrontsize32 -global-isel=0 < %s | FileCheck -check-prefixes=GFX11,SDAG %s
; RUN: not llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -global-isel=1 < %s 2>&1 | FileCheck -check-prefix=GISEL-ERR %s
@ -120,13 +120,15 @@ endif:
define amdgpu_cs void @inverse_ballot_branch(i32 inreg %s0_1, i32 inreg %s2, ptr addrspace(1) %out) {
; GISEL-LABEL: inverse_ballot_branch:
; GISEL: ; %bb.0: ; %entry
; GISEL-NEXT: s_xor_b32 s2, s1, -1
; GISEL-NEXT: v_mov_b32_e32 v2, s0
; GISEL-NEXT: s_mov_b32 s2, exec_lo
; GISEL-NEXT: s_xor_b32 s2, s1, s2
; GISEL-NEXT: s_and_saveexec_b32 s1, s2
; GISEL-NEXT: ; %bb.1: ; %if
; GISEL-NEXT: s_add_i32 s0, s0, 1
; GISEL-NEXT: v_mov_b32_e32 v2, s0
; GISEL-NEXT: ; %bb.2: ; %endif
; GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s1
; GISEL-NEXT: v_mov_b32_e32 v2, s0
; GISEL-NEXT: global_store_b32 v[0:1], v2, off
; GISEL-NEXT: s_endpgm
;

View File

@ -1,7 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -mattr=+wavefrontsize64 -global-isel=1 < %s | FileCheck -check-prefix=GISEL_W64 %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -mattr=+wavefrontsize64 -global-isel=1 -new-reg-bank-select < %s | FileCheck -check-prefix=GISEL_W64 %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -mattr=+wavefrontsize64 -global-isel=0 < %s | FileCheck -check-prefix=SDAG_W64 %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -mattr=+wavefrontsize32 -global-isel=1 < %s | FileCheck -check-prefix=GISEL_W32 %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -mattr=+wavefrontsize32 -global-isel=1 -new-reg-bank-select < %s | FileCheck -check-prefix=GISEL_W32 %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -mattr=+wavefrontsize32 -global-isel=0 < %s | FileCheck -check-prefix=SDAG_W32 %s
declare i1 @llvm.amdgcn.inverse.ballot.i64(i64)
@ -294,15 +294,18 @@ endif:
define amdgpu_cs void @inverse_ballot_branch(i64 inreg %s0_1, i64 inreg %s2, ptr addrspace(1) %out) {
; GISEL_W64-LABEL: inverse_ballot_branch:
; GISEL_W64: ; %bb.0: ; %entry
; GISEL_W64-NEXT: s_xor_b64 s[4:5], s[2:3], -1
; GISEL_W64-NEXT: v_mov_b32_e32 v3, s1
; GISEL_W64-NEXT: v_mov_b32_e32 v2, s0
; GISEL_W64-NEXT: s_mov_b64 s[4:5], exec
; GISEL_W64-NEXT: s_xor_b64 s[4:5], s[2:3], s[4:5]
; GISEL_W64-NEXT: s_and_saveexec_b64 s[2:3], s[4:5]
; GISEL_W64-NEXT: ; %bb.1: ; %if
; GISEL_W64-NEXT: s_add_u32 s0, s0, 1
; GISEL_W64-NEXT: s_addc_u32 s1, s1, 0
; GISEL_W64-NEXT: ; %bb.2: ; %endif
; GISEL_W64-NEXT: s_or_b64 exec, exec, s[2:3]
; GISEL_W64-NEXT: v_mov_b32_e32 v3, s1
; GISEL_W64-NEXT: v_mov_b32_e32 v2, s0
; GISEL_W64-NEXT: ; %bb.2: ; %endif
; GISEL_W64-NEXT: s_or_b64 exec, exec, s[2:3]
; GISEL_W64-NEXT: global_store_b64 v[0:1], v[2:3], off
; GISEL_W64-NEXT: s_endpgm
;
@ -324,14 +327,16 @@ define amdgpu_cs void @inverse_ballot_branch(i64 inreg %s0_1, i64 inreg %s2, ptr
;
; GISEL_W32-LABEL: inverse_ballot_branch:
; GISEL_W32: ; %bb.0: ; %entry
; GISEL_W32-NEXT: s_xor_b32 s3, s2, -1
; GISEL_W32-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
; GISEL_W32-NEXT: s_mov_b32 s3, exec_lo
; GISEL_W32-NEXT: s_xor_b32 s3, s2, s3
; GISEL_W32-NEXT: s_and_saveexec_b32 s2, s3
; GISEL_W32-NEXT: ; %bb.1: ; %if
; GISEL_W32-NEXT: s_add_u32 s0, s0, 1
; GISEL_W32-NEXT: s_addc_u32 s1, s1, 0
; GISEL_W32-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
; GISEL_W32-NEXT: ; %bb.2: ; %endif
; GISEL_W32-NEXT: s_or_b32 exec_lo, exec_lo, s2
; GISEL_W32-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
; GISEL_W32-NEXT: global_store_b64 v[0:1], v[2:3], off
; GISEL_W32-NEXT: s_endpgm
;