AMDGPU/GlobalISel: RegBankLegalize rules for wqm_demote (#188288)
This commit is contained in:
parent
6e916d0598
commit
69f9ff6c19
@ -1660,6 +1660,8 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
|
||||
.Any({{UniB512}, {{SgprB512}, {IntrId, SgprB512}}})
|
||||
.Any({{DivB512}, {{VgprB512}, {IntrId, VgprB512}}});
|
||||
|
||||
addRulesForIOpcs({amdgcn_wqm_demote}).Any({{}, {{}, {IntrId, Vcc}}});
|
||||
|
||||
addRulesForIOpcs({amdgcn_live_mask, amdgcn_ps_live})
|
||||
.Any({{DivS1}, {{Vcc}, {}}});
|
||||
|
||||
|
||||
@ -1,8 +1,8 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -global-isel -mtriple=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=SI %s
|
||||
; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s
|
||||
; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10-32 %s
|
||||
; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize64 < %s | FileCheck -check-prefix=GFX10-64 %s
|
||||
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=SI %s
|
||||
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s
|
||||
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10-32 %s
|
||||
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize64 < %s | FileCheck -check-prefix=GFX10-64 %s
|
||||
|
||||
define amdgpu_ps void @static_exact(float %arg0, float %arg1) {
|
||||
; SI-LABEL: static_exact:
|
||||
@ -159,11 +159,12 @@ define amdgpu_ps void @branch(float %arg0, float %arg1) {
|
||||
; SI: ; %bb.0: ; %.entry
|
||||
; SI-NEXT: v_cvt_i32_f32_e32 v0, v0
|
||||
; SI-NEXT: v_cvt_i32_f32_e32 v1, v1
|
||||
; SI-NEXT: s_mov_b64 s[2:3], exec
|
||||
; SI-NEXT: s_mov_b64 s[0:1], exec
|
||||
; SI-NEXT: v_or_b32_e32 v0, v0, v1
|
||||
; SI-NEXT: v_and_b32_e32 v0, 1, v0
|
||||
; SI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
|
||||
; SI-NEXT: s_xor_b64 s[2:3], vcc, -1
|
||||
; SI-NEXT: s_xor_b64 s[2:3], vcc, s[2:3]
|
||||
; SI-NEXT: s_and_saveexec_b64 s[4:5], s[2:3]
|
||||
; SI-NEXT: s_xor_b64 s[2:3], exec, s[4:5]
|
||||
; SI-NEXT: s_cbranch_execz .LBB2_3
|
||||
@ -186,11 +187,12 @@ define amdgpu_ps void @branch(float %arg0, float %arg1) {
|
||||
; GFX9: ; %bb.0: ; %.entry
|
||||
; GFX9-NEXT: v_cvt_i32_f32_e32 v0, v0
|
||||
; GFX9-NEXT: v_cvt_i32_f32_e32 v1, v1
|
||||
; GFX9-NEXT: s_mov_b64 s[2:3], exec
|
||||
; GFX9-NEXT: s_mov_b64 s[0:1], exec
|
||||
; GFX9-NEXT: v_or_b32_e32 v0, v0, v1
|
||||
; GFX9-NEXT: v_and_b32_e32 v0, 1, v0
|
||||
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
|
||||
; GFX9-NEXT: s_xor_b64 s[2:3], vcc, -1
|
||||
; GFX9-NEXT: s_xor_b64 s[2:3], vcc, s[2:3]
|
||||
; GFX9-NEXT: s_and_saveexec_b64 s[4:5], s[2:3]
|
||||
; GFX9-NEXT: s_xor_b64 s[2:3], exec, s[4:5]
|
||||
; GFX9-NEXT: s_cbranch_execz .LBB2_3
|
||||
@ -213,11 +215,12 @@ define amdgpu_ps void @branch(float %arg0, float %arg1) {
|
||||
; GFX10-32: ; %bb.0: ; %.entry
|
||||
; GFX10-32-NEXT: v_cvt_i32_f32_e32 v0, v0
|
||||
; GFX10-32-NEXT: v_cvt_i32_f32_e32 v1, v1
|
||||
; GFX10-32-NEXT: s_mov_b32 s1, exec_lo
|
||||
; GFX10-32-NEXT: s_mov_b32 s0, exec_lo
|
||||
; GFX10-32-NEXT: v_or_b32_e32 v0, v0, v1
|
||||
; GFX10-32-NEXT: v_and_b32_e32 v0, 1, v0
|
||||
; GFX10-32-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
|
||||
; GFX10-32-NEXT: s_xor_b32 s1, vcc_lo, -1
|
||||
; GFX10-32-NEXT: s_xor_b32 s1, vcc_lo, s1
|
||||
; GFX10-32-NEXT: s_and_saveexec_b32 s2, s1
|
||||
; GFX10-32-NEXT: s_xor_b32 s1, exec_lo, s2
|
||||
; GFX10-32-NEXT: s_cbranch_execz .LBB2_3
|
||||
@ -240,11 +243,12 @@ define amdgpu_ps void @branch(float %arg0, float %arg1) {
|
||||
; GFX10-64: ; %bb.0: ; %.entry
|
||||
; GFX10-64-NEXT: v_cvt_i32_f32_e32 v0, v0
|
||||
; GFX10-64-NEXT: v_cvt_i32_f32_e32 v1, v1
|
||||
; GFX10-64-NEXT: s_mov_b64 s[2:3], exec
|
||||
; GFX10-64-NEXT: s_mov_b64 s[0:1], exec
|
||||
; GFX10-64-NEXT: v_or_b32_e32 v0, v0, v1
|
||||
; GFX10-64-NEXT: v_and_b32_e32 v0, 1, v0
|
||||
; GFX10-64-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
|
||||
; GFX10-64-NEXT: s_xor_b64 s[2:3], vcc, -1
|
||||
; GFX10-64-NEXT: s_xor_b64 s[2:3], vcc, s[2:3]
|
||||
; GFX10-64-NEXT: s_and_saveexec_b64 s[4:5], s[2:3]
|
||||
; GFX10-64-NEXT: s_xor_b64 s[2:3], exec, s[4:5]
|
||||
; GFX10-64-NEXT: s_cbranch_execz .LBB2_3
|
||||
@ -674,18 +678,19 @@ define amdgpu_ps void @wqm_deriv(<2 x float> %input, float %arg, i32 %index) {
|
||||
; SI-NEXT: s_and_b64 exec, exec, s[4:5]
|
||||
; SI-NEXT: .LBB6_3: ; %.continue0
|
||||
; SI-NEXT: s_or_b64 exec, exec, s[2:3]
|
||||
; SI-NEXT: s_mov_b64 s[2:3], s[0:1]
|
||||
; SI-NEXT: v_cndmask_b32_e64 v0, 1.0, 0, s[2:3]
|
||||
; SI-NEXT: s_mov_b64 s[4:5], s[0:1]
|
||||
; SI-NEXT: v_cndmask_b32_e64 v0, 1.0, 0, s[4:5]
|
||||
; SI-NEXT: v_mov_b32_e32 v1, v0
|
||||
; SI-NEXT: s_nop 1
|
||||
; SI-NEXT: s_mov_b64 s[2:3], exec
|
||||
; SI-NEXT: s_nop 0
|
||||
; SI-NEXT: v_mov_b32_dpp v1, v1 quad_perm:[1,1,1,1] row_mask:0xf bank_mask:0xf bound_ctrl:1
|
||||
; SI-NEXT: s_nop 1
|
||||
; SI-NEXT: v_subrev_f32_dpp v0, v0, v1 quad_perm:[0,0,0,0] row_mask:0xf bank_mask:0xf bound_ctrl:1
|
||||
; SI-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $exec
|
||||
; SI-NEXT: s_and_b64 exec, exec, s[0:1]
|
||||
; SI-NEXT: v_cmp_eq_f32_e32 vcc, 0, v0
|
||||
; SI-NEXT: s_and_b64 s[2:3], s[0:1], vcc
|
||||
; SI-NEXT: s_xor_b64 s[2:3], s[2:3], -1
|
||||
; SI-NEXT: s_and_b64 s[4:5], s[0:1], vcc
|
||||
; SI-NEXT: s_xor_b64 s[2:3], s[4:5], s[2:3]
|
||||
; SI-NEXT: s_and_saveexec_b64 s[4:5], s[2:3]
|
||||
; SI-NEXT: s_xor_b64 s[2:3], exec, s[4:5]
|
||||
; SI-NEXT: s_cbranch_execz .LBB6_6
|
||||
@ -722,18 +727,19 @@ define amdgpu_ps void @wqm_deriv(<2 x float> %input, float %arg, i32 %index) {
|
||||
; GFX9-NEXT: s_and_b64 exec, exec, s[4:5]
|
||||
; GFX9-NEXT: .LBB6_3: ; %.continue0
|
||||
; GFX9-NEXT: s_or_b64 exec, exec, s[2:3]
|
||||
; GFX9-NEXT: s_mov_b64 s[2:3], s[0:1]
|
||||
; GFX9-NEXT: v_cndmask_b32_e64 v0, 1.0, 0, s[2:3]
|
||||
; GFX9-NEXT: s_mov_b64 s[4:5], s[0:1]
|
||||
; GFX9-NEXT: v_cndmask_b32_e64 v0, 1.0, 0, s[4:5]
|
||||
; GFX9-NEXT: v_mov_b32_e32 v1, v0
|
||||
; GFX9-NEXT: s_nop 1
|
||||
; GFX9-NEXT: s_mov_b64 s[2:3], exec
|
||||
; GFX9-NEXT: s_nop 0
|
||||
; GFX9-NEXT: v_mov_b32_dpp v1, v1 quad_perm:[1,1,1,1] row_mask:0xf bank_mask:0xf bound_ctrl:1
|
||||
; GFX9-NEXT: s_nop 1
|
||||
; GFX9-NEXT: v_subrev_f32_dpp v0, v0, v1 quad_perm:[0,0,0,0] row_mask:0xf bank_mask:0xf bound_ctrl:1
|
||||
; GFX9-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $exec
|
||||
; GFX9-NEXT: s_and_b64 exec, exec, s[0:1]
|
||||
; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v0
|
||||
; GFX9-NEXT: s_and_b64 s[2:3], s[0:1], vcc
|
||||
; GFX9-NEXT: s_xor_b64 s[2:3], s[2:3], -1
|
||||
; GFX9-NEXT: s_and_b64 s[4:5], s[0:1], vcc
|
||||
; GFX9-NEXT: s_xor_b64 s[2:3], s[4:5], s[2:3]
|
||||
; GFX9-NEXT: s_and_saveexec_b64 s[4:5], s[2:3]
|
||||
; GFX9-NEXT: s_xor_b64 s[2:3], exec, s[4:5]
|
||||
; GFX9-NEXT: s_cbranch_execz .LBB6_6
|
||||
@ -770,16 +776,17 @@ define amdgpu_ps void @wqm_deriv(<2 x float> %input, float %arg, i32 %index) {
|
||||
; GFX10-32-NEXT: s_and_b32 exec_lo, exec_lo, s2
|
||||
; GFX10-32-NEXT: .LBB6_3: ; %.continue0
|
||||
; GFX10-32-NEXT: s_or_b32 exec_lo, exec_lo, s1
|
||||
; GFX10-32-NEXT: s_mov_b32 s1, s0
|
||||
; GFX10-32-NEXT: v_cndmask_b32_e64 v0, 1.0, 0, s1
|
||||
; GFX10-32-NEXT: s_mov_b32 s2, s0
|
||||
; GFX10-32-NEXT: s_mov_b32 s1, exec_lo
|
||||
; GFX10-32-NEXT: v_cndmask_b32_e64 v0, 1.0, 0, s2
|
||||
; GFX10-32-NEXT: v_mov_b32_e32 v1, v0
|
||||
; GFX10-32-NEXT: v_mov_b32_dpp v1, v1 quad_perm:[1,1,1,1] row_mask:0xf bank_mask:0xf bound_ctrl:1
|
||||
; GFX10-32-NEXT: v_subrev_f32_dpp v0, v0, v1 quad_perm:[0,0,0,0] row_mask:0xf bank_mask:0xf bound_ctrl:1
|
||||
; GFX10-32-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $exec
|
||||
; GFX10-32-NEXT: s_and_b32 exec_lo, exec_lo, s0
|
||||
; GFX10-32-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v0
|
||||
; GFX10-32-NEXT: s_and_b32 s1, s0, vcc_lo
|
||||
; GFX10-32-NEXT: s_xor_b32 s1, s1, -1
|
||||
; GFX10-32-NEXT: s_and_b32 s2, s0, vcc_lo
|
||||
; GFX10-32-NEXT: s_xor_b32 s1, s2, s1
|
||||
; GFX10-32-NEXT: s_and_saveexec_b32 s2, s1
|
||||
; GFX10-32-NEXT: s_xor_b32 s1, exec_lo, s2
|
||||
; GFX10-32-NEXT: s_cbranch_execz .LBB6_6
|
||||
@ -816,16 +823,17 @@ define amdgpu_ps void @wqm_deriv(<2 x float> %input, float %arg, i32 %index) {
|
||||
; GFX10-64-NEXT: s_and_b64 exec, exec, s[4:5]
|
||||
; GFX10-64-NEXT: .LBB6_3: ; %.continue0
|
||||
; GFX10-64-NEXT: s_or_b64 exec, exec, s[2:3]
|
||||
; GFX10-64-NEXT: s_mov_b64 s[2:3], s[0:1]
|
||||
; GFX10-64-NEXT: v_cndmask_b32_e64 v0, 1.0, 0, s[2:3]
|
||||
; GFX10-64-NEXT: s_mov_b64 s[4:5], s[0:1]
|
||||
; GFX10-64-NEXT: s_mov_b64 s[2:3], exec
|
||||
; GFX10-64-NEXT: v_cndmask_b32_e64 v0, 1.0, 0, s[4:5]
|
||||
; GFX10-64-NEXT: v_mov_b32_e32 v1, v0
|
||||
; GFX10-64-NEXT: v_mov_b32_dpp v1, v1 quad_perm:[1,1,1,1] row_mask:0xf bank_mask:0xf bound_ctrl:1
|
||||
; GFX10-64-NEXT: v_subrev_f32_dpp v0, v0, v1 quad_perm:[0,0,0,0] row_mask:0xf bank_mask:0xf bound_ctrl:1
|
||||
; GFX10-64-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $exec
|
||||
; GFX10-64-NEXT: s_and_b64 exec, exec, s[0:1]
|
||||
; GFX10-64-NEXT: v_cmp_eq_f32_e32 vcc, 0, v0
|
||||
; GFX10-64-NEXT: s_and_b64 s[2:3], s[0:1], vcc
|
||||
; GFX10-64-NEXT: s_xor_b64 s[2:3], s[2:3], -1
|
||||
; GFX10-64-NEXT: s_and_b64 s[4:5], s[0:1], vcc
|
||||
; GFX10-64-NEXT: s_xor_b64 s[2:3], s[4:5], s[2:3]
|
||||
; GFX10-64-NEXT: s_and_saveexec_b64 s[4:5], s[2:3]
|
||||
; GFX10-64-NEXT: s_xor_b64 s[2:3], exec, s[4:5]
|
||||
; GFX10-64-NEXT: s_cbranch_execz .LBB6_6
|
||||
@ -885,7 +893,7 @@ define amdgpu_ps void @wqm_deriv_loop(<2 x float> %input, float %arg, i32 %index
|
||||
; SI-NEXT: s_mov_b64 s[0:1], exec
|
||||
; SI-NEXT: s_wqm_b64 exec, exec
|
||||
; SI-NEXT: v_cvt_i32_f32_e32 v0, v0
|
||||
; SI-NEXT: s_mov_b32 s4, 0
|
||||
; SI-NEXT: s_mov_b32 s6, 0
|
||||
; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
|
||||
; SI-NEXT: s_and_saveexec_b64 s[2:3], vcc
|
||||
; SI-NEXT: s_xor_b64 s[2:3], exec, s[2:3]
|
||||
@ -894,36 +902,37 @@ define amdgpu_ps void @wqm_deriv_loop(<2 x float> %input, float %arg, i32 %index
|
||||
; SI-NEXT: s_andn2_b64 s[0:1], s[0:1], exec
|
||||
; SI-NEXT: s_cbranch_scc0 .LBB7_9
|
||||
; SI-NEXT: ; %bb.2: ; %.demote0
|
||||
; SI-NEXT: s_wqm_b64 s[6:7], s[0:1]
|
||||
; SI-NEXT: s_and_b64 exec, exec, s[6:7]
|
||||
; SI-NEXT: s_wqm_b64 s[4:5], s[0:1]
|
||||
; SI-NEXT: s_and_b64 exec, exec, s[4:5]
|
||||
; SI-NEXT: .LBB7_3: ; %.continue0.preheader
|
||||
; SI-NEXT: s_or_b64 exec, exec, s[2:3]
|
||||
; SI-NEXT: s_mov_b64 s[2:3], 0
|
||||
; SI-NEXT: v_mov_b32_e32 v0, s4
|
||||
; SI-NEXT: s_branch .LBB7_5
|
||||
; SI-NEXT: .LBB7_4: ; %.continue1
|
||||
; SI-NEXT: ; in Loop: Header=BB7_5 Depth=1
|
||||
; SI-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; SI-NEXT: v_add_u32_e32 v0, vcc, 1, v0
|
||||
; SI-NEXT: v_cmp_ge_i32_e32 vcc, v0, v1
|
||||
; SI-NEXT: s_add_i32 s6, s6, 1
|
||||
; SI-NEXT: v_cmp_ge_i32_e32 vcc, s6, v1
|
||||
; SI-NEXT: s_or_b64 s[2:3], vcc, s[2:3]
|
||||
; SI-NEXT: s_andn2_b64 exec, exec, s[2:3]
|
||||
; SI-NEXT: s_cbranch_execz .LBB7_8
|
||||
; SI-NEXT: .LBB7_5: ; %.continue0
|
||||
; SI-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; SI-NEXT: s_mov_b64 s[4:5], s[0:1]
|
||||
; SI-NEXT: v_cndmask_b32_e64 v2, v0, 0, s[4:5]
|
||||
; SI-NEXT: v_mov_b32_e32 v3, v2
|
||||
; SI-NEXT: v_mov_b32_e32 v0, s6
|
||||
; SI-NEXT: s_mov_b64 s[8:9], s[0:1]
|
||||
; SI-NEXT: v_cndmask_b32_e64 v0, v0, 0, s[8:9]
|
||||
; SI-NEXT: v_mov_b32_e32 v2, v0
|
||||
; SI-NEXT: s_mov_b64 s[4:5], exec
|
||||
; SI-NEXT: s_nop 0
|
||||
; SI-NEXT: v_mov_b32_dpp v2, v2 quad_perm:[1,1,1,1] row_mask:0xf bank_mask:0xf bound_ctrl:1
|
||||
; SI-NEXT: s_nop 1
|
||||
; SI-NEXT: v_mov_b32_dpp v3, v3 quad_perm:[1,1,1,1] row_mask:0xf bank_mask:0xf bound_ctrl:1
|
||||
; SI-NEXT: s_nop 1
|
||||
; SI-NEXT: v_subrev_f32_dpp v2, v2, v3 quad_perm:[0,0,0,0] row_mask:0xf bank_mask:0xf bound_ctrl:1
|
||||
; SI-NEXT: ; kill: def $vgpr2 killed $vgpr2 killed $exec
|
||||
; SI-NEXT: v_cmp_eq_f32_e32 vcc, 0, v2
|
||||
; SI-NEXT: s_and_b64 s[4:5], s[0:1], vcc
|
||||
; SI-NEXT: s_xor_b64 s[4:5], s[4:5], -1
|
||||
; SI-NEXT: s_and_saveexec_b64 s[6:7], s[4:5]
|
||||
; SI-NEXT: s_xor_b64 s[4:5], exec, s[6:7]
|
||||
; SI-NEXT: v_subrev_f32_dpp v0, v0, v2 quad_perm:[0,0,0,0] row_mask:0xf bank_mask:0xf bound_ctrl:1
|
||||
; SI-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $exec
|
||||
; SI-NEXT: v_cmp_eq_f32_e32 vcc, 0, v0
|
||||
; SI-NEXT: s_and_b64 s[8:9], s[0:1], vcc
|
||||
; SI-NEXT: s_xor_b64 s[4:5], s[8:9], s[4:5]
|
||||
; SI-NEXT: s_and_saveexec_b64 s[8:9], s[4:5]
|
||||
; SI-NEXT: s_xor_b64 s[4:5], exec, s[8:9]
|
||||
; SI-NEXT: s_cbranch_execz .LBB7_4
|
||||
; SI-NEXT: ; %bb.6: ; %.demote1
|
||||
; SI-NEXT: ; in Loop: Header=BB7_5 Depth=1
|
||||
@ -931,8 +940,8 @@ define amdgpu_ps void @wqm_deriv_loop(<2 x float> %input, float %arg, i32 %index
|
||||
; SI-NEXT: s_cbranch_scc0 .LBB7_9
|
||||
; SI-NEXT: ; %bb.7: ; %.demote1
|
||||
; SI-NEXT: ; in Loop: Header=BB7_5 Depth=1
|
||||
; SI-NEXT: s_wqm_b64 s[6:7], s[0:1]
|
||||
; SI-NEXT: s_and_b64 exec, exec, s[6:7]
|
||||
; SI-NEXT: s_wqm_b64 s[8:9], s[0:1]
|
||||
; SI-NEXT: s_and_b64 exec, exec, s[8:9]
|
||||
; SI-NEXT: s_branch .LBB7_4
|
||||
; SI-NEXT: .LBB7_8: ; %.return
|
||||
; SI-NEXT: s_or_b64 exec, exec, s[2:3]
|
||||
@ -951,7 +960,7 @@ define amdgpu_ps void @wqm_deriv_loop(<2 x float> %input, float %arg, i32 %index
|
||||
; GFX9-NEXT: s_mov_b64 s[0:1], exec
|
||||
; GFX9-NEXT: s_wqm_b64 exec, exec
|
||||
; GFX9-NEXT: v_cvt_i32_f32_e32 v0, v0
|
||||
; GFX9-NEXT: s_mov_b32 s4, 0
|
||||
; GFX9-NEXT: s_mov_b32 s6, 0
|
||||
; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
|
||||
; GFX9-NEXT: s_and_saveexec_b64 s[2:3], vcc
|
||||
; GFX9-NEXT: s_xor_b64 s[2:3], exec, s[2:3]
|
||||
@ -960,36 +969,37 @@ define amdgpu_ps void @wqm_deriv_loop(<2 x float> %input, float %arg, i32 %index
|
||||
; GFX9-NEXT: s_andn2_b64 s[0:1], s[0:1], exec
|
||||
; GFX9-NEXT: s_cbranch_scc0 .LBB7_9
|
||||
; GFX9-NEXT: ; %bb.2: ; %.demote0
|
||||
; GFX9-NEXT: s_wqm_b64 s[6:7], s[0:1]
|
||||
; GFX9-NEXT: s_and_b64 exec, exec, s[6:7]
|
||||
; GFX9-NEXT: s_wqm_b64 s[4:5], s[0:1]
|
||||
; GFX9-NEXT: s_and_b64 exec, exec, s[4:5]
|
||||
; GFX9-NEXT: .LBB7_3: ; %.continue0.preheader
|
||||
; GFX9-NEXT: s_or_b64 exec, exec, s[2:3]
|
||||
; GFX9-NEXT: s_mov_b64 s[2:3], 0
|
||||
; GFX9-NEXT: v_mov_b32_e32 v0, s4
|
||||
; GFX9-NEXT: s_branch .LBB7_5
|
||||
; GFX9-NEXT: .LBB7_4: ; %.continue1
|
||||
; GFX9-NEXT: ; in Loop: Header=BB7_5 Depth=1
|
||||
; GFX9-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX9-NEXT: v_add_u32_e32 v0, 1, v0
|
||||
; GFX9-NEXT: v_cmp_ge_i32_e32 vcc, v0, v1
|
||||
; GFX9-NEXT: s_add_i32 s6, s6, 1
|
||||
; GFX9-NEXT: v_cmp_ge_i32_e32 vcc, s6, v1
|
||||
; GFX9-NEXT: s_or_b64 s[2:3], vcc, s[2:3]
|
||||
; GFX9-NEXT: s_andn2_b64 exec, exec, s[2:3]
|
||||
; GFX9-NEXT: s_cbranch_execz .LBB7_8
|
||||
; GFX9-NEXT: .LBB7_5: ; %.continue0
|
||||
; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; GFX9-NEXT: s_mov_b64 s[4:5], s[0:1]
|
||||
; GFX9-NEXT: v_cndmask_b32_e64 v2, v0, 0, s[4:5]
|
||||
; GFX9-NEXT: v_mov_b32_e32 v3, v2
|
||||
; GFX9-NEXT: v_mov_b32_e32 v0, s6
|
||||
; GFX9-NEXT: s_mov_b64 s[8:9], s[0:1]
|
||||
; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, 0, s[8:9]
|
||||
; GFX9-NEXT: v_mov_b32_e32 v2, v0
|
||||
; GFX9-NEXT: s_mov_b64 s[4:5], exec
|
||||
; GFX9-NEXT: s_nop 0
|
||||
; GFX9-NEXT: v_mov_b32_dpp v2, v2 quad_perm:[1,1,1,1] row_mask:0xf bank_mask:0xf bound_ctrl:1
|
||||
; GFX9-NEXT: s_nop 1
|
||||
; GFX9-NEXT: v_mov_b32_dpp v3, v3 quad_perm:[1,1,1,1] row_mask:0xf bank_mask:0xf bound_ctrl:1
|
||||
; GFX9-NEXT: s_nop 1
|
||||
; GFX9-NEXT: v_subrev_f32_dpp v2, v2, v3 quad_perm:[0,0,0,0] row_mask:0xf bank_mask:0xf bound_ctrl:1
|
||||
; GFX9-NEXT: ; kill: def $vgpr2 killed $vgpr2 killed $exec
|
||||
; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v2
|
||||
; GFX9-NEXT: s_and_b64 s[4:5], s[0:1], vcc
|
||||
; GFX9-NEXT: s_xor_b64 s[4:5], s[4:5], -1
|
||||
; GFX9-NEXT: s_and_saveexec_b64 s[6:7], s[4:5]
|
||||
; GFX9-NEXT: s_xor_b64 s[4:5], exec, s[6:7]
|
||||
; GFX9-NEXT: v_subrev_f32_dpp v0, v0, v2 quad_perm:[0,0,0,0] row_mask:0xf bank_mask:0xf bound_ctrl:1
|
||||
; GFX9-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $exec
|
||||
; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v0
|
||||
; GFX9-NEXT: s_and_b64 s[8:9], s[0:1], vcc
|
||||
; GFX9-NEXT: s_xor_b64 s[4:5], s[8:9], s[4:5]
|
||||
; GFX9-NEXT: s_and_saveexec_b64 s[8:9], s[4:5]
|
||||
; GFX9-NEXT: s_xor_b64 s[4:5], exec, s[8:9]
|
||||
; GFX9-NEXT: s_cbranch_execz .LBB7_4
|
||||
; GFX9-NEXT: ; %bb.6: ; %.demote1
|
||||
; GFX9-NEXT: ; in Loop: Header=BB7_5 Depth=1
|
||||
@ -997,8 +1007,8 @@ define amdgpu_ps void @wqm_deriv_loop(<2 x float> %input, float %arg, i32 %index
|
||||
; GFX9-NEXT: s_cbranch_scc0 .LBB7_9
|
||||
; GFX9-NEXT: ; %bb.7: ; %.demote1
|
||||
; GFX9-NEXT: ; in Loop: Header=BB7_5 Depth=1
|
||||
; GFX9-NEXT: s_wqm_b64 s[6:7], s[0:1]
|
||||
; GFX9-NEXT: s_and_b64 exec, exec, s[6:7]
|
||||
; GFX9-NEXT: s_wqm_b64 s[8:9], s[0:1]
|
||||
; GFX9-NEXT: s_and_b64 exec, exec, s[8:9]
|
||||
; GFX9-NEXT: s_branch .LBB7_4
|
||||
; GFX9-NEXT: .LBB7_8: ; %.return
|
||||
; GFX9-NEXT: s_or_b64 exec, exec, s[2:3]
|
||||
@ -1030,29 +1040,30 @@ define amdgpu_ps void @wqm_deriv_loop(<2 x float> %input, float %arg, i32 %index
|
||||
; GFX10-32-NEXT: s_and_b32 exec_lo, exec_lo, s3
|
||||
; GFX10-32-NEXT: .LBB7_3: ; %.continue0.preheader
|
||||
; GFX10-32-NEXT: s_or_b32 exec_lo, exec_lo, s2
|
||||
; GFX10-32-NEXT: v_mov_b32_e32 v0, s1
|
||||
; GFX10-32-NEXT: s_mov_b32 s2, 0
|
||||
; GFX10-32-NEXT: s_branch .LBB7_5
|
||||
; GFX10-32-NEXT: .LBB7_4: ; %.continue1
|
||||
; GFX10-32-NEXT: ; in Loop: Header=BB7_5 Depth=1
|
||||
; GFX10-32-NEXT: s_or_b32 exec_lo, exec_lo, s2
|
||||
; GFX10-32-NEXT: v_add_nc_u32_e32 v0, 1, v0
|
||||
; GFX10-32-NEXT: v_cmp_ge_i32_e32 vcc_lo, v0, v1
|
||||
; GFX10-32-NEXT: s_or_b32 exec_lo, exec_lo, s3
|
||||
; GFX10-32-NEXT: s_add_i32 s2, s2, 1
|
||||
; GFX10-32-NEXT: v_cmp_ge_i32_e32 vcc_lo, s2, v1
|
||||
; GFX10-32-NEXT: s_or_b32 s1, vcc_lo, s1
|
||||
; GFX10-32-NEXT: s_andn2_b32 exec_lo, exec_lo, s1
|
||||
; GFX10-32-NEXT: s_cbranch_execz .LBB7_8
|
||||
; GFX10-32-NEXT: .LBB7_5: ; %.continue0
|
||||
; GFX10-32-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; GFX10-32-NEXT: s_mov_b32 s2, s0
|
||||
; GFX10-32-NEXT: v_cndmask_b32_e64 v2, v0, 0, s2
|
||||
; GFX10-32-NEXT: v_mov_b32_e32 v3, v2
|
||||
; GFX10-32-NEXT: v_mov_b32_dpp v3, v3 quad_perm:[1,1,1,1] row_mask:0xf bank_mask:0xf bound_ctrl:1
|
||||
; GFX10-32-NEXT: v_subrev_f32_dpp v2, v2, v3 quad_perm:[0,0,0,0] row_mask:0xf bank_mask:0xf bound_ctrl:1
|
||||
; GFX10-32-NEXT: ; kill: def $vgpr2 killed $vgpr2 killed $exec
|
||||
; GFX10-32-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v2
|
||||
; GFX10-32-NEXT: s_and_b32 s2, s0, vcc_lo
|
||||
; GFX10-32-NEXT: s_xor_b32 s2, s2, -1
|
||||
; GFX10-32-NEXT: s_and_saveexec_b32 s3, s2
|
||||
; GFX10-32-NEXT: s_xor_b32 s2, exec_lo, s3
|
||||
; GFX10-32-NEXT: s_mov_b32 s4, s0
|
||||
; GFX10-32-NEXT: s_mov_b32 s3, exec_lo
|
||||
; GFX10-32-NEXT: v_cndmask_b32_e64 v0, s2, 0, s4
|
||||
; GFX10-32-NEXT: v_mov_b32_e32 v2, v0
|
||||
; GFX10-32-NEXT: v_mov_b32_dpp v2, v2 quad_perm:[1,1,1,1] row_mask:0xf bank_mask:0xf bound_ctrl:1
|
||||
; GFX10-32-NEXT: v_subrev_f32_dpp v0, v0, v2 quad_perm:[0,0,0,0] row_mask:0xf bank_mask:0xf bound_ctrl:1
|
||||
; GFX10-32-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $exec
|
||||
; GFX10-32-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v0
|
||||
; GFX10-32-NEXT: s_and_b32 s4, s0, vcc_lo
|
||||
; GFX10-32-NEXT: s_xor_b32 s3, s4, s3
|
||||
; GFX10-32-NEXT: s_and_saveexec_b32 s4, s3
|
||||
; GFX10-32-NEXT: s_xor_b32 s3, exec_lo, s4
|
||||
; GFX10-32-NEXT: s_cbranch_execz .LBB7_4
|
||||
; GFX10-32-NEXT: ; %bb.6: ; %.demote1
|
||||
; GFX10-32-NEXT: ; in Loop: Header=BB7_5 Depth=1
|
||||
@ -1060,8 +1071,8 @@ define amdgpu_ps void @wqm_deriv_loop(<2 x float> %input, float %arg, i32 %index
|
||||
; GFX10-32-NEXT: s_cbranch_scc0 .LBB7_9
|
||||
; GFX10-32-NEXT: ; %bb.7: ; %.demote1
|
||||
; GFX10-32-NEXT: ; in Loop: Header=BB7_5 Depth=1
|
||||
; GFX10-32-NEXT: s_wqm_b32 s3, s0
|
||||
; GFX10-32-NEXT: s_and_b32 exec_lo, exec_lo, s3
|
||||
; GFX10-32-NEXT: s_wqm_b32 s4, s0
|
||||
; GFX10-32-NEXT: s_and_b32 exec_lo, exec_lo, s4
|
||||
; GFX10-32-NEXT: s_branch .LBB7_4
|
||||
; GFX10-32-NEXT: .LBB7_8: ; %.return
|
||||
; GFX10-32-NEXT: s_or_b32 exec_lo, exec_lo, s1
|
||||
@ -1080,7 +1091,7 @@ define amdgpu_ps void @wqm_deriv_loop(<2 x float> %input, float %arg, i32 %index
|
||||
; GFX10-64-NEXT: s_mov_b64 s[0:1], exec
|
||||
; GFX10-64-NEXT: s_wqm_b64 exec, exec
|
||||
; GFX10-64-NEXT: v_cvt_i32_f32_e32 v0, v0
|
||||
; GFX10-64-NEXT: s_mov_b32 s4, 0
|
||||
; GFX10-64-NEXT: s_mov_b32 s6, 0
|
||||
; GFX10-64-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
|
||||
; GFX10-64-NEXT: s_and_saveexec_b64 s[2:3], vcc
|
||||
; GFX10-64-NEXT: s_xor_b64 s[2:3], exec, s[2:3]
|
||||
@ -1089,34 +1100,34 @@ define amdgpu_ps void @wqm_deriv_loop(<2 x float> %input, float %arg, i32 %index
|
||||
; GFX10-64-NEXT: s_andn2_b64 s[0:1], s[0:1], exec
|
||||
; GFX10-64-NEXT: s_cbranch_scc0 .LBB7_9
|
||||
; GFX10-64-NEXT: ; %bb.2: ; %.demote0
|
||||
; GFX10-64-NEXT: s_wqm_b64 s[6:7], s[0:1]
|
||||
; GFX10-64-NEXT: s_and_b64 exec, exec, s[6:7]
|
||||
; GFX10-64-NEXT: s_wqm_b64 s[4:5], s[0:1]
|
||||
; GFX10-64-NEXT: s_and_b64 exec, exec, s[4:5]
|
||||
; GFX10-64-NEXT: .LBB7_3: ; %.continue0.preheader
|
||||
; GFX10-64-NEXT: s_or_b64 exec, exec, s[2:3]
|
||||
; GFX10-64-NEXT: v_mov_b32_e32 v0, s4
|
||||
; GFX10-64-NEXT: s_mov_b64 s[2:3], 0
|
||||
; GFX10-64-NEXT: s_branch .LBB7_5
|
||||
; GFX10-64-NEXT: .LBB7_4: ; %.continue1
|
||||
; GFX10-64-NEXT: ; in Loop: Header=BB7_5 Depth=1
|
||||
; GFX10-64-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX10-64-NEXT: v_add_nc_u32_e32 v0, 1, v0
|
||||
; GFX10-64-NEXT: v_cmp_ge_i32_e32 vcc, v0, v1
|
||||
; GFX10-64-NEXT: s_add_i32 s6, s6, 1
|
||||
; GFX10-64-NEXT: v_cmp_ge_i32_e32 vcc, s6, v1
|
||||
; GFX10-64-NEXT: s_or_b64 s[2:3], vcc, s[2:3]
|
||||
; GFX10-64-NEXT: s_andn2_b64 exec, exec, s[2:3]
|
||||
; GFX10-64-NEXT: s_cbranch_execz .LBB7_8
|
||||
; GFX10-64-NEXT: .LBB7_5: ; %.continue0
|
||||
; GFX10-64-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; GFX10-64-NEXT: s_mov_b64 s[4:5], s[0:1]
|
||||
; GFX10-64-NEXT: v_cndmask_b32_e64 v2, v0, 0, s[4:5]
|
||||
; GFX10-64-NEXT: v_mov_b32_e32 v3, v2
|
||||
; GFX10-64-NEXT: v_mov_b32_dpp v3, v3 quad_perm:[1,1,1,1] row_mask:0xf bank_mask:0xf bound_ctrl:1
|
||||
; GFX10-64-NEXT: v_subrev_f32_dpp v2, v2, v3 quad_perm:[0,0,0,0] row_mask:0xf bank_mask:0xf bound_ctrl:1
|
||||
; GFX10-64-NEXT: ; kill: def $vgpr2 killed $vgpr2 killed $exec
|
||||
; GFX10-64-NEXT: v_cmp_eq_f32_e32 vcc, 0, v2
|
||||
; GFX10-64-NEXT: s_and_b64 s[4:5], s[0:1], vcc
|
||||
; GFX10-64-NEXT: s_xor_b64 s[4:5], s[4:5], -1
|
||||
; GFX10-64-NEXT: s_and_saveexec_b64 s[6:7], s[4:5]
|
||||
; GFX10-64-NEXT: s_xor_b64 s[4:5], exec, s[6:7]
|
||||
; GFX10-64-NEXT: s_mov_b64 s[8:9], s[0:1]
|
||||
; GFX10-64-NEXT: s_mov_b64 s[4:5], exec
|
||||
; GFX10-64-NEXT: v_cndmask_b32_e64 v0, s6, 0, s[8:9]
|
||||
; GFX10-64-NEXT: v_mov_b32_e32 v2, v0
|
||||
; GFX10-64-NEXT: v_mov_b32_dpp v2, v2 quad_perm:[1,1,1,1] row_mask:0xf bank_mask:0xf bound_ctrl:1
|
||||
; GFX10-64-NEXT: v_subrev_f32_dpp v0, v0, v2 quad_perm:[0,0,0,0] row_mask:0xf bank_mask:0xf bound_ctrl:1
|
||||
; GFX10-64-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $exec
|
||||
; GFX10-64-NEXT: v_cmp_eq_f32_e32 vcc, 0, v0
|
||||
; GFX10-64-NEXT: s_and_b64 s[8:9], s[0:1], vcc
|
||||
; GFX10-64-NEXT: s_xor_b64 s[4:5], s[8:9], s[4:5]
|
||||
; GFX10-64-NEXT: s_and_saveexec_b64 s[8:9], s[4:5]
|
||||
; GFX10-64-NEXT: s_xor_b64 s[4:5], exec, s[8:9]
|
||||
; GFX10-64-NEXT: s_cbranch_execz .LBB7_4
|
||||
; GFX10-64-NEXT: ; %bb.6: ; %.demote1
|
||||
; GFX10-64-NEXT: ; in Loop: Header=BB7_5 Depth=1
|
||||
@ -1124,8 +1135,8 @@ define amdgpu_ps void @wqm_deriv_loop(<2 x float> %input, float %arg, i32 %index
|
||||
; GFX10-64-NEXT: s_cbranch_scc0 .LBB7_9
|
||||
; GFX10-64-NEXT: ; %bb.7: ; %.demote1
|
||||
; GFX10-64-NEXT: ; in Loop: Header=BB7_5 Depth=1
|
||||
; GFX10-64-NEXT: s_wqm_b64 s[6:7], s[0:1]
|
||||
; GFX10-64-NEXT: s_and_b64 exec, exec, s[6:7]
|
||||
; GFX10-64-NEXT: s_wqm_b64 s[8:9], s[0:1]
|
||||
; GFX10-64-NEXT: s_and_b64 exec, exec, s[8:9]
|
||||
; GFX10-64-NEXT: s_branch .LBB7_4
|
||||
; GFX10-64-NEXT: .LBB7_8: ; %.return
|
||||
; GFX10-64-NEXT: s_or_b64 exec, exec, s[2:3]
|
||||
|
||||
@ -1,6 +1,5 @@
|
||||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect -regbankselect-fast -verify-machineinstrs -o - %s| FileCheck %s
|
||||
# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect -regbankselect-greedy -verify-machineinstrs -o - %s| FileCheck %s
|
||||
# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass='amdgpu-regbankselect,amdgpu-regbanklegalize' -o - %s | FileCheck %s
|
||||
|
||||
---
|
||||
name: wqm_demote_scc
|
||||
@ -15,9 +14,8 @@ body: |
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
|
||||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
|
||||
; CHECK-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY1]]
|
||||
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32)
|
||||
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1)
|
||||
; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.wqm.demote), [[COPY2]](s1)
|
||||
; CHECK-NEXT: [[AMDGPU_COPY_VCC_SCC:%[0-9]+]]:vcc(s1) = G_AMDGPU_COPY_VCC_SCC [[ICMP]](s32)
|
||||
; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.wqm.demote), [[AMDGPU_COPY_VCC_SCC]](s1)
|
||||
%0:_(s32) = COPY $sgpr0
|
||||
%1:_(s32) = COPY $sgpr1
|
||||
%2:_(s1) = G_ICMP intpred(eq), %0, %1
|
||||
@ -52,9 +50,8 @@ body: |
|
||||
bb.0:
|
||||
; CHECK-LABEL: name: wqm_demote_constant_true
|
||||
; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1
|
||||
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[C]](s32)
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1)
|
||||
; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.wqm.demote), [[COPY]](s1)
|
||||
; CHECK-NEXT: [[AMDGPU_COPY_VCC_SCC:%[0-9]+]]:vcc(s1) = G_AMDGPU_COPY_VCC_SCC [[C]](s32)
|
||||
; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.wqm.demote), [[AMDGPU_COPY_VCC_SCC]](s1)
|
||||
%0:_(s1) = G_CONSTANT i1 true
|
||||
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.wqm.demote), %0
|
||||
...
|
||||
@ -67,9 +64,8 @@ body: |
|
||||
bb.0:
|
||||
; CHECK-LABEL: name: wqm_demote_constant_false
|
||||
; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
|
||||
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[C]](s32)
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1)
|
||||
; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.wqm.demote), [[COPY]](s1)
|
||||
; CHECK-NEXT: [[AMDGPU_COPY_VCC_SCC:%[0-9]+]]:vcc(s1) = G_AMDGPU_COPY_VCC_SCC [[C]](s32)
|
||||
; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.wqm.demote), [[AMDGPU_COPY_VCC_SCC]](s1)
|
||||
%0:_(s1) = G_CONSTANT i1 false
|
||||
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.wqm.demote), %0
|
||||
...
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user