AMDGPU: Check if immediate is legal for av_mov_b32_imm_pseudo (#160819)

This is primarily to avoid folding a frame index materialized
into an SGPR into the pseudo; this would end up looking like:
  %sreg = s_mov_b32 %stack.0
  %av_32 = av_mov_b32_imm_pseudo %sreg

Which is not useful.

Match the check used for the b64 case. This is limited to the
pseudo to avoid regression due to gfx908's special case - it
is expecting to pass here with v_accvgpr_write_b32 for illegal
cases, and stay in the intermediate state with an sgpr input.

This avoids regressions in a future patch.
This commit is contained in:
Matt Arsenault 2025-09-27 08:24:20 +09:00 committed by GitHub
parent f8d547fa8a
commit 597f93d36b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 146 additions and 6 deletions

View File

@ -1313,6 +1313,15 @@ void SIFoldOperandsImpl::foldOperand(
if (MovSrcRC) {
if (UseSubReg)
MovSrcRC = TRI->getMatchingSuperRegClass(SrcRC, MovSrcRC, UseSubReg);
// FIXME: We should be able to directly check immediate operand legality
// for all cases, but gfx908 hacks break.
if (MovOp == AMDGPU::AV_MOV_B32_IMM_PSEUDO &&
(!OpToFold.isImm() ||
!TII->isImmOperandLegal(MovDesc, SrcIdx,
*OpToFold.getEffectiveImmVal())))
break;
if (!MRI->constrainRegClass(SrcReg, MovSrcRC))
break;

View File

@ -209,8 +209,8 @@ body: |
bb.0:
; GCN-LABEL: name: s_mov_b32_imm_65_copy_to_av_32
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65, implicit $exec
; GCN-NEXT: [[AV_MOV_:%[0-9]+]]:av_32 = AV_MOV_B32_IMM_PSEUDO [[S_MOV_B32_]], implicit $exec
; GCN-NEXT: S_ENDPGM 0, implicit [[AV_MOV_]]
; GCN-NEXT: [[COPY:%[0-9]+]]:av_32 = COPY [[S_MOV_B32_]]
; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
%0:sreg_32 = S_MOV_B32 65, implicit $exec
%1:av_32 = COPY %0
S_ENDPGM 0, implicit %1

View File

@ -240,8 +240,8 @@ body: |
bb.0:
; GCN-LABEL: name: s_mov_b32_imm_literal_copy_s_to_av_32
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 999
; GCN-NEXT: [[AV_MOV_:%[0-9]+]]:av_32 = AV_MOV_B32_IMM_PSEUDO [[S_MOV_B32_]], implicit $exec
; GCN-NEXT: $agpr0 = COPY [[AV_MOV_]]
; GCN-NEXT: [[COPY:%[0-9]+]]:av_32 = COPY [[S_MOV_B32_]]
; GCN-NEXT: $agpr0 = COPY [[COPY]]
; GCN-NEXT: S_ENDPGM 0
%0:sreg_32 = S_MOV_B32 999
%1:av_32 = COPY %0
@ -257,8 +257,8 @@ body: |
bb.0:
; GCN-LABEL: name: v_mov_b32_imm_literal_copy_v_to_av_32
; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 999, implicit $exec
; GCN-NEXT: [[AV_MOV_:%[0-9]+]]:av_32 = AV_MOV_B32_IMM_PSEUDO [[V_MOV_B32_e32_]], implicit $exec
; GCN-NEXT: $agpr0 = COPY [[AV_MOV_]]
; GCN-NEXT: [[COPY:%[0-9]+]]:av_32 = COPY [[V_MOV_B32_e32_]]
; GCN-NEXT: $agpr0 = COPY [[COPY]]
; GCN-NEXT: S_ENDPGM 0
%0:vgpr_32 = V_MOV_B32_e32 999, implicit $exec
%1:av_32 = COPY %0

View File

@ -0,0 +1,131 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6
# RUN: llc -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs -run-pass=si-fold-operands %s -o - | FileCheck %s
---
name: fold_frame_index_av_mov_b32_imm_pseudo_from_s_mov_b32_fi_to_av
tracksRegLiveness: true
frameInfo:
maxAlignment: 4
localFrameSize: 16384
stack:
- { id: 0, size: 16384, alignment: 4, local-offset: 0 }
body: |
bb.0:
; CHECK-LABEL: name: fold_frame_index_av_mov_b32_imm_pseudo_from_s_mov_b32_fi_to_av
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 %stack.0
; CHECK-NEXT: [[AV_MOV_:%[0-9]+]]:av_32 = AV_MOV_B32_IMM_PSEUDO [[S_MOV_B32_]], implicit $exec
; CHECK-NEXT: SI_RETURN implicit [[AV_MOV_]]
%0:sreg_32 = S_MOV_B32 %stack.0
%1:av_32 = AV_MOV_B32_IMM_PSEUDO %0, implicit $exec
SI_RETURN implicit %1
...
---
name: fold_frame_index_av_mov_b32_imm_pseudo_from_s_mov_b32_fi_to_v
tracksRegLiveness: true
frameInfo:
maxAlignment: 4
localFrameSize: 16384
stack:
- { id: 0, size: 16384, alignment: 4, local-offset: 0 }
body: |
bb.0:
; CHECK-LABEL: name: fold_frame_index_av_mov_b32_imm_pseudo_from_s_mov_b32_fi_to_v
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 %stack.0
; CHECK-NEXT: [[AV_MOV_:%[0-9]+]]:av_32 = AV_MOV_B32_IMM_PSEUDO [[S_MOV_B32_]], implicit $exec
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[AV_MOV_]], implicit $exec
; CHECK-NEXT: $vgpr0 = COPY [[COPY]]
; CHECK-NEXT: SI_RETURN implicit $vgpr0
%0:sreg_32 = S_MOV_B32 %stack.0
%1:av_32 = AV_MOV_B32_IMM_PSEUDO %0, implicit $exec
%2:vgpr_32 = COPY %1, implicit $exec
$vgpr0 = COPY %2
SI_RETURN implicit $vgpr0
...
---
name: fold_frame_index_av_mov_b32_imm_pseudo_from_s_mov_b32_lit_to_v
tracksRegLiveness: true
body: |
bb.0:
; CHECK-LABEL: name: fold_frame_index_av_mov_b32_imm_pseudo_from_s_mov_b32_lit_to_v
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1234
; CHECK-NEXT: [[AV_MOV_:%[0-9]+]]:av_32 = AV_MOV_B32_IMM_PSEUDO [[S_MOV_B32_]], implicit $exec
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[AV_MOV_]], implicit $exec
; CHECK-NEXT: $vgpr0 = COPY [[COPY]]
; CHECK-NEXT: SI_RETURN implicit $vgpr0
%0:sreg_32 = S_MOV_B32 1234
%1:av_32 = AV_MOV_B32_IMM_PSEUDO %0, implicit $exec
%2:vgpr_32 = COPY %1, implicit $exec
$vgpr0 = COPY %2
SI_RETURN implicit $vgpr0
...
---
name: fold_frame_index_av_mov_b32_imm_pseudo_from_s_mov_b32_imm_to_v
tracksRegLiveness: true
body: |
bb.0:
; CHECK-LABEL: name: fold_frame_index_av_mov_b32_imm_pseudo_from_s_mov_b32_imm_to_v
; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8, implicit $exec
; CHECK-NEXT: $vgpr0 = COPY [[V_MOV_B32_e32_]]
; CHECK-NEXT: SI_RETURN implicit $vgpr0
%0:sreg_32 = S_MOV_B32 8
%1:av_32 = AV_MOV_B32_IMM_PSEUDO %0, implicit $exec
%2:vgpr_32 = COPY %1, implicit $exec
$vgpr0 = COPY %2
SI_RETURN implicit $vgpr0
...
---
name: fold_frame_index_av_regression_0
tracksRegLiveness: true
frameInfo:
maxAlignment: 4
localFrameSize: 16384
stack:
- { id: 0, size: 16384, alignment: 4, local-offset: 0 }
body: |
bb.0:
; CHECK-LABEL: name: fold_frame_index_av_regression_0
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 %stack.0
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]], implicit $exec
; CHECK-NEXT: $vgpr0 = COPY [[COPY]]
; CHECK-NEXT: SI_RETURN implicit $vgpr0
%0:sreg_32 = S_MOV_B32 %stack.0
%1:av_32 = COPY %0
%2:vgpr_32 = COPY %1, implicit $exec
$vgpr0 = COPY %2
SI_RETURN implicit $vgpr0
...
---
name: fold_frame_index_av_regression_1
tracksRegLiveness: true
frameInfo:
maxAlignment: 4
localFrameSize: 16384
stack:
- { id: 0, size: 16384, alignment: 4, local-offset: 0 }
body: |
bb.0:
; CHECK-LABEL: name: fold_frame_index_av_regression_1
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 %stack.0
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]], implicit $exec
; CHECK-NEXT: $vgpr0 = COPY [[COPY]]
; CHECK-NEXT: SI_RETURN implicit $vgpr0
%0:sreg_32 = S_MOV_B32 %stack.0
%1:sreg_32 = S_MOV_B32 killed %0
%2:sreg_64 = S_MOV_B64 0
%3:av_32 = COPY %1
%4:vgpr_32 = COPY %3, implicit $exec
$vgpr0 = COPY %4
SI_RETURN implicit $vgpr0
...