diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp index 9ce1e1afb064..8ad5d8ed5552 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp @@ -1706,6 +1706,10 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST, addRulesForIOpcs({amdgcn_wqm_demote}).Any({{}, {{}, {IntrId, Vcc}}}); + addRulesForIOpcs({amdgcn_inverse_ballot}) + .Any({{DivS1, _, S32}, {{Vcc}, {IntrId, SgprB32_ReadFirstLane}}}) + .Any({{DivS1, _, S64}, {{Vcc}, {IntrId, SgprB64_ReadFirstLane}}}); + addRulesForIOpcs({amdgcn_live_mask, amdgcn_ps_live}) .Any({{DivS1}, {{Vcc}, {}}}); diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.inverse.ballot.i32.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.inverse.ballot.i32.ll index 05b786bdb5c7..e3275d15656e 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.inverse.ballot.i32.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.inverse.ballot.i32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 -; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -mattr=+wavefrontsize32 -global-isel=1 < %s | FileCheck -check-prefixes=GFX11,GISEL %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -mattr=+wavefrontsize32 -global-isel=1 -new-reg-bank-select < %s | FileCheck -check-prefixes=GFX11,GISEL %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -mattr=+wavefrontsize32 -global-isel=0 < %s | FileCheck -check-prefixes=GFX11,SDAG %s ; RUN: not llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -global-isel=1 < %s 2>&1 | FileCheck -check-prefix=GISEL-ERR %s @@ -120,13 +120,15 @@ endif: define amdgpu_cs void @inverse_ballot_branch(i32 inreg %s0_1, i32 inreg %s2, ptr addrspace(1) %out) { ; GISEL-LABEL: inverse_ballot_branch: ; GISEL: ; %bb.0: ; %entry -; GISEL-NEXT: s_xor_b32 s2, s1, -1 +; GISEL-NEXT: v_mov_b32_e32 v2, s0 +; GISEL-NEXT: s_mov_b32 s2, exec_lo +; GISEL-NEXT: s_xor_b32 s2, s1, s2 ; GISEL-NEXT: s_and_saveexec_b32 s1, s2 ; GISEL-NEXT: ; %bb.1: ; %if ; GISEL-NEXT: s_add_i32 s0, s0, 1 +; GISEL-NEXT: v_mov_b32_e32 v2, s0 ; GISEL-NEXT: ; %bb.2: ; %endif ; GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s1 -; GISEL-NEXT: v_mov_b32_e32 v2, s0 ; GISEL-NEXT: global_store_b32 v[0:1], v2, off ; GISEL-NEXT: s_endpgm ; diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.inverse.ballot.i64.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.inverse.ballot.i64.ll index 193fbdf35ec7..9d8608b4c7dd 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.inverse.ballot.i64.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.inverse.ballot.i64.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 -; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -mattr=+wavefrontsize64 -global-isel=1 < %s | FileCheck -check-prefix=GISEL_W64 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -mattr=+wavefrontsize64 -global-isel=1 -new-reg-bank-select < %s | FileCheck -check-prefix=GISEL_W64 %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -mattr=+wavefrontsize64 -global-isel=0 < %s | FileCheck -check-prefix=SDAG_W64 %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -mattr=+wavefrontsize32 -global-isel=1 < %s | FileCheck -check-prefix=GISEL_W32 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -mattr=+wavefrontsize32 -global-isel=1 -new-reg-bank-select < %s | FileCheck -check-prefix=GISEL_W32 %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -mattr=+wavefrontsize32 -global-isel=0 < %s | FileCheck -check-prefix=SDAG_W32 %s declare i1 @llvm.amdgcn.inverse.ballot.i64(i64) @@ -294,15 +294,18 @@ endif: define amdgpu_cs void @inverse_ballot_branch(i64 inreg %s0_1, i64 inreg %s2, ptr addrspace(1) %out) { ; GISEL_W64-LABEL: inverse_ballot_branch: ; GISEL_W64: ; %bb.0: ; %entry -; GISEL_W64-NEXT: s_xor_b64 s[4:5], s[2:3], -1 +; GISEL_W64-NEXT: v_mov_b32_e32 v3, s1 +; GISEL_W64-NEXT: v_mov_b32_e32 v2, s0 +; GISEL_W64-NEXT: s_mov_b64 s[4:5], exec +; GISEL_W64-NEXT: s_xor_b64 s[4:5], s[2:3], s[4:5] ; GISEL_W64-NEXT: s_and_saveexec_b64 s[2:3], s[4:5] ; GISEL_W64-NEXT: ; %bb.1: ; %if ; GISEL_W64-NEXT: s_add_u32 s0, s0, 1 ; GISEL_W64-NEXT: s_addc_u32 s1, s1, 0 -; GISEL_W64-NEXT: ; %bb.2: ; %endif -; GISEL_W64-NEXT: s_or_b64 exec, exec, s[2:3] ; GISEL_W64-NEXT: v_mov_b32_e32 v3, s1 ; GISEL_W64-NEXT: v_mov_b32_e32 v2, s0 +; GISEL_W64-NEXT: ; %bb.2: ; %endif +; GISEL_W64-NEXT: s_or_b64 exec, exec, s[2:3] ; GISEL_W64-NEXT: global_store_b64 v[0:1], v[2:3], off ; GISEL_W64-NEXT: s_endpgm ; @@ -324,14 +327,16 @@ define amdgpu_cs void @inverse_ballot_branch(i64 inreg %s0_1, i64 inreg %s2, ptr ; ; GISEL_W32-LABEL: inverse_ballot_branch: ; GISEL_W32: ; %bb.0: ; %entry -; GISEL_W32-NEXT: s_xor_b32 s3, s2, -1 +; GISEL_W32-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0 +; GISEL_W32-NEXT: s_mov_b32 s3, exec_lo +; GISEL_W32-NEXT: s_xor_b32 s3, s2, s3 ; GISEL_W32-NEXT: s_and_saveexec_b32 s2, s3 ; GISEL_W32-NEXT: ; %bb.1: ; %if ; GISEL_W32-NEXT: s_add_u32 s0, s0, 1 ; GISEL_W32-NEXT: s_addc_u32 s1, s1, 0 +; GISEL_W32-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0 ; GISEL_W32-NEXT: ; %bb.2: ; %endif ; GISEL_W32-NEXT: s_or_b32 exec_lo, exec_lo, s2 -; GISEL_W32-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0 ; GISEL_W32-NEXT: global_store_b64 v[0:1], v[2:3], off ; GISEL_W32-NEXT: s_endpgm ;