AMDGPU: Check if immediate is legal for av_mov_b32_imm_pseudo (#160819)
This is primarily to avoid folding a frame index materialized into an SGPR into the pseudo; this would end up looking like: %sreg = s_mov_b32 %stack.0 %av_32 = av_mov_b32_imm_pseudo %sreg Which is not useful. Match the check used for the b64 case. This is limited to the pseudo to avoid regression due to gfx908's special case - it is expecting to pass here with v_accvgpr_write_b32 for illegal cases, and stay in the intermediate state with an sgpr input. This avoids regressions in a future patch.
This commit is contained in:
parent
f8d547fa8a
commit
597f93d36b
@ -1313,6 +1313,15 @@ void SIFoldOperandsImpl::foldOperand(
|
||||
if (MovSrcRC) {
|
||||
if (UseSubReg)
|
||||
MovSrcRC = TRI->getMatchingSuperRegClass(SrcRC, MovSrcRC, UseSubReg);
|
||||
|
||||
// FIXME: We should be able to directly check immediate operand legality
|
||||
// for all cases, but gfx908 hacks break.
|
||||
if (MovOp == AMDGPU::AV_MOV_B32_IMM_PSEUDO &&
|
||||
(!OpToFold.isImm() ||
|
||||
!TII->isImmOperandLegal(MovDesc, SrcIdx,
|
||||
*OpToFold.getEffectiveImmVal())))
|
||||
break;
|
||||
|
||||
if (!MRI->constrainRegClass(SrcReg, MovSrcRC))
|
||||
break;
|
||||
|
||||
|
||||
@ -209,8 +209,8 @@ body: |
|
||||
bb.0:
|
||||
; GCN-LABEL: name: s_mov_b32_imm_65_copy_to_av_32
|
||||
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65, implicit $exec
|
||||
; GCN-NEXT: [[AV_MOV_:%[0-9]+]]:av_32 = AV_MOV_B32_IMM_PSEUDO [[S_MOV_B32_]], implicit $exec
|
||||
; GCN-NEXT: S_ENDPGM 0, implicit [[AV_MOV_]]
|
||||
; GCN-NEXT: [[COPY:%[0-9]+]]:av_32 = COPY [[S_MOV_B32_]]
|
||||
; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
|
||||
%0:sreg_32 = S_MOV_B32 65, implicit $exec
|
||||
%1:av_32 = COPY %0
|
||||
S_ENDPGM 0, implicit %1
|
||||
|
||||
@ -240,8 +240,8 @@ body: |
|
||||
bb.0:
|
||||
; GCN-LABEL: name: s_mov_b32_imm_literal_copy_s_to_av_32
|
||||
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 999
|
||||
; GCN-NEXT: [[AV_MOV_:%[0-9]+]]:av_32 = AV_MOV_B32_IMM_PSEUDO [[S_MOV_B32_]], implicit $exec
|
||||
; GCN-NEXT: $agpr0 = COPY [[AV_MOV_]]
|
||||
; GCN-NEXT: [[COPY:%[0-9]+]]:av_32 = COPY [[S_MOV_B32_]]
|
||||
; GCN-NEXT: $agpr0 = COPY [[COPY]]
|
||||
; GCN-NEXT: S_ENDPGM 0
|
||||
%0:sreg_32 = S_MOV_B32 999
|
||||
%1:av_32 = COPY %0
|
||||
@ -257,8 +257,8 @@ body: |
|
||||
bb.0:
|
||||
; GCN-LABEL: name: v_mov_b32_imm_literal_copy_v_to_av_32
|
||||
; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 999, implicit $exec
|
||||
; GCN-NEXT: [[AV_MOV_:%[0-9]+]]:av_32 = AV_MOV_B32_IMM_PSEUDO [[V_MOV_B32_e32_]], implicit $exec
|
||||
; GCN-NEXT: $agpr0 = COPY [[AV_MOV_]]
|
||||
; GCN-NEXT: [[COPY:%[0-9]+]]:av_32 = COPY [[V_MOV_B32_e32_]]
|
||||
; GCN-NEXT: $agpr0 = COPY [[COPY]]
|
||||
; GCN-NEXT: S_ENDPGM 0
|
||||
%0:vgpr_32 = V_MOV_B32_e32 999, implicit $exec
|
||||
%1:av_32 = COPY %0
|
||||
|
||||
131
llvm/test/CodeGen/AMDGPU/fold-operands-frame-index-agpr.mir
Normal file
131
llvm/test/CodeGen/AMDGPU/fold-operands-frame-index-agpr.mir
Normal file
@ -0,0 +1,131 @@
|
||||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6
|
||||
# RUN: llc -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs -run-pass=si-fold-operands %s -o - | FileCheck %s
|
||||
|
||||
---
|
||||
name: fold_frame_index_av_mov_b32_imm_pseudo_from_s_mov_b32_fi_to_av
|
||||
tracksRegLiveness: true
|
||||
frameInfo:
|
||||
maxAlignment: 4
|
||||
localFrameSize: 16384
|
||||
stack:
|
||||
- { id: 0, size: 16384, alignment: 4, local-offset: 0 }
|
||||
body: |
|
||||
bb.0:
|
||||
; CHECK-LABEL: name: fold_frame_index_av_mov_b32_imm_pseudo_from_s_mov_b32_fi_to_av
|
||||
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 %stack.0
|
||||
; CHECK-NEXT: [[AV_MOV_:%[0-9]+]]:av_32 = AV_MOV_B32_IMM_PSEUDO [[S_MOV_B32_]], implicit $exec
|
||||
; CHECK-NEXT: SI_RETURN implicit [[AV_MOV_]]
|
||||
%0:sreg_32 = S_MOV_B32 %stack.0
|
||||
%1:av_32 = AV_MOV_B32_IMM_PSEUDO %0, implicit $exec
|
||||
SI_RETURN implicit %1
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
name: fold_frame_index_av_mov_b32_imm_pseudo_from_s_mov_b32_fi_to_v
|
||||
tracksRegLiveness: true
|
||||
frameInfo:
|
||||
maxAlignment: 4
|
||||
localFrameSize: 16384
|
||||
stack:
|
||||
- { id: 0, size: 16384, alignment: 4, local-offset: 0 }
|
||||
body: |
|
||||
bb.0:
|
||||
; CHECK-LABEL: name: fold_frame_index_av_mov_b32_imm_pseudo_from_s_mov_b32_fi_to_v
|
||||
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 %stack.0
|
||||
; CHECK-NEXT: [[AV_MOV_:%[0-9]+]]:av_32 = AV_MOV_B32_IMM_PSEUDO [[S_MOV_B32_]], implicit $exec
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[AV_MOV_]], implicit $exec
|
||||
; CHECK-NEXT: $vgpr0 = COPY [[COPY]]
|
||||
; CHECK-NEXT: SI_RETURN implicit $vgpr0
|
||||
%0:sreg_32 = S_MOV_B32 %stack.0
|
||||
%1:av_32 = AV_MOV_B32_IMM_PSEUDO %0, implicit $exec
|
||||
%2:vgpr_32 = COPY %1, implicit $exec
|
||||
$vgpr0 = COPY %2
|
||||
SI_RETURN implicit $vgpr0
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
name: fold_frame_index_av_mov_b32_imm_pseudo_from_s_mov_b32_lit_to_v
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
; CHECK-LABEL: name: fold_frame_index_av_mov_b32_imm_pseudo_from_s_mov_b32_lit_to_v
|
||||
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1234
|
||||
; CHECK-NEXT: [[AV_MOV_:%[0-9]+]]:av_32 = AV_MOV_B32_IMM_PSEUDO [[S_MOV_B32_]], implicit $exec
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[AV_MOV_]], implicit $exec
|
||||
; CHECK-NEXT: $vgpr0 = COPY [[COPY]]
|
||||
; CHECK-NEXT: SI_RETURN implicit $vgpr0
|
||||
%0:sreg_32 = S_MOV_B32 1234
|
||||
%1:av_32 = AV_MOV_B32_IMM_PSEUDO %0, implicit $exec
|
||||
%2:vgpr_32 = COPY %1, implicit $exec
|
||||
$vgpr0 = COPY %2
|
||||
SI_RETURN implicit $vgpr0
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
name: fold_frame_index_av_mov_b32_imm_pseudo_from_s_mov_b32_imm_to_v
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
; CHECK-LABEL: name: fold_frame_index_av_mov_b32_imm_pseudo_from_s_mov_b32_imm_to_v
|
||||
; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8, implicit $exec
|
||||
; CHECK-NEXT: $vgpr0 = COPY [[V_MOV_B32_e32_]]
|
||||
; CHECK-NEXT: SI_RETURN implicit $vgpr0
|
||||
%0:sreg_32 = S_MOV_B32 8
|
||||
%1:av_32 = AV_MOV_B32_IMM_PSEUDO %0, implicit $exec
|
||||
%2:vgpr_32 = COPY %1, implicit $exec
|
||||
$vgpr0 = COPY %2
|
||||
SI_RETURN implicit $vgpr0
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
name: fold_frame_index_av_regression_0
|
||||
tracksRegLiveness: true
|
||||
frameInfo:
|
||||
maxAlignment: 4
|
||||
localFrameSize: 16384
|
||||
stack:
|
||||
- { id: 0, size: 16384, alignment: 4, local-offset: 0 }
|
||||
body: |
|
||||
bb.0:
|
||||
; CHECK-LABEL: name: fold_frame_index_av_regression_0
|
||||
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 %stack.0
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]], implicit $exec
|
||||
; CHECK-NEXT: $vgpr0 = COPY [[COPY]]
|
||||
; CHECK-NEXT: SI_RETURN implicit $vgpr0
|
||||
%0:sreg_32 = S_MOV_B32 %stack.0
|
||||
%1:av_32 = COPY %0
|
||||
%2:vgpr_32 = COPY %1, implicit $exec
|
||||
$vgpr0 = COPY %2
|
||||
SI_RETURN implicit $vgpr0
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
name: fold_frame_index_av_regression_1
|
||||
tracksRegLiveness: true
|
||||
frameInfo:
|
||||
maxAlignment: 4
|
||||
localFrameSize: 16384
|
||||
stack:
|
||||
- { id: 0, size: 16384, alignment: 4, local-offset: 0 }
|
||||
body: |
|
||||
bb.0:
|
||||
; CHECK-LABEL: name: fold_frame_index_av_regression_1
|
||||
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 %stack.0
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]], implicit $exec
|
||||
; CHECK-NEXT: $vgpr0 = COPY [[COPY]]
|
||||
; CHECK-NEXT: SI_RETURN implicit $vgpr0
|
||||
%0:sreg_32 = S_MOV_B32 %stack.0
|
||||
%1:sreg_32 = S_MOV_B32 killed %0
|
||||
%2:sreg_64 = S_MOV_B64 0
|
||||
%3:av_32 = COPY %1
|
||||
%4:vgpr_32 = COPY %3, implicit $exec
|
||||
$vgpr0 = COPY %4
|
||||
SI_RETURN implicit $vgpr0
|
||||
|
||||
...
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user