AMDGPU: Add pseudoinstruction for agpr or vgpr constants (#130042)
This commit is contained in:
parent
2e53856bda
commit
7425af4b7a
@ -1337,6 +1337,7 @@ bool SIInstrInfo::getConstValDefinedInReg(const MachineInstr &MI,
|
||||
case AMDGPU::S_MOV_B64:
|
||||
case AMDGPU::V_MOV_B64_e32:
|
||||
case AMDGPU::V_ACCVGPR_WRITE_B32_e64:
|
||||
case AMDGPU::AV_MOV_B32_IMM_PSEUDO:
|
||||
case AMDGPU::S_MOV_B64_IMM_PSEUDO:
|
||||
case AMDGPU::V_MOV_B64_PSEUDO: {
|
||||
const MachineOperand &Src0 = MI.getOperand(1);
|
||||
@ -2186,7 +2187,13 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
|
||||
MI.getMF()->getRegInfo().constrainRegClass(MI.getOperand(0).getReg(),
|
||||
&AMDGPU::SReg_32_XM0RegClass);
|
||||
break;
|
||||
|
||||
case AMDGPU::AV_MOV_B32_IMM_PSEUDO: {
|
||||
Register Dst = MI.getOperand(0).getReg();
|
||||
bool IsAGPR = SIRegisterInfo::isAGPRClass(RI.getPhysRegBaseClass(Dst));
|
||||
MI.setDesc(
|
||||
get(IsAGPR ? AMDGPU::V_ACCVGPR_WRITE_B32_e64 : AMDGPU::V_MOV_B32_e32));
|
||||
break;
|
||||
}
|
||||
case AMDGPU::V_MOV_B64_PSEUDO: {
|
||||
Register Dst = MI.getOperand(0).getReg();
|
||||
Register DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
|
||||
@ -3423,6 +3430,7 @@ bool SIInstrInfo::isFoldableCopy(const MachineInstr &MI) {
|
||||
case AMDGPU::V_ACCVGPR_WRITE_B32_e64:
|
||||
case AMDGPU::V_ACCVGPR_READ_B32_e64:
|
||||
case AMDGPU::V_ACCVGPR_MOV_B32:
|
||||
case AMDGPU::AV_MOV_B32_IMM_PSEUDO:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
|
@ -140,6 +140,25 @@ def V_MOV_B64_PSEUDO : VPseudoInstSI <(outs VReg_64:$vdst),
|
||||
let UseNamedOperandTable = 1;
|
||||
}
|
||||
|
||||
// 32-bit materialize immediate which supports AGPR or VGPR. Typically
|
||||
// this should just expand to V_MOV_B32, unless $vdst happens to be
|
||||
// allocated to an AGPR in which case it will lower to
|
||||
// V_ACCVGPR_WRITE_B32. This should always use an inline immediate
|
||||
// operand, as v_accvgpr_write_b32 does not support literal constants.
|
||||
def AV_MOV_B32_IMM_PSEUDO
|
||||
: VPseudoInstSI<(outs AV_32:$vdst), (ins VCSrc_b32:$src0)> {
|
||||
let isReMaterializable = 1;
|
||||
let isAsCheapAsAMove = 1;
|
||||
|
||||
// Imprecise, technically if AGPR it's VOP3 and VOP1 for AGPR. But
|
||||
// this tricks the rematerialize logic into working for it.
|
||||
let VOP3 = 1;
|
||||
let isMoveImm = 1;
|
||||
let SchedRW = [Write32Bit];
|
||||
let Size = 4;
|
||||
let UseNamedOperandTable = 1;
|
||||
}
|
||||
|
||||
// 64-bit vector move with dpp. Expanded post-RA.
|
||||
def V_MOV_B64_DPP_PSEUDO : VOP_DPP_Pseudo <"v_mov_b64_dpp", VOP_I64_I64> {
|
||||
let Size = 16; // Requires two 8-byte v_mov_b32_dpp to complete.
|
||||
|
56
llvm/test/CodeGen/AMDGPU/av_movimm_pseudo_expansion.mir
Normal file
56
llvm/test/CodeGen/AMDGPU/av_movimm_pseudo_expansion.mir
Normal file
@ -0,0 +1,56 @@
|
||||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
|
||||
# RUN: llc -mtriple=amdgcn -mcpu=gfx90a -run-pass=postrapseudos %s -o - | FileCheck %s
|
||||
# RUN: llc -mtriple=amdgcn -mcpu=gfx942 -run-pass=postrapseudos %s -o - | FileCheck %s
|
||||
|
||||
---
|
||||
name: av_mov_b32_imm_pseudo_agpr_0
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
; CHECK-LABEL: name: av_mov_b32_imm_pseudo_agpr_0
|
||||
; CHECK: $agpr0 = V_ACCVGPR_WRITE_B32_e64 0, implicit $exec
|
||||
$agpr0 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
|
||||
...
|
||||
|
||||
---
|
||||
name: av_mov_b32_imm_pseudo_agpr_64
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
; CHECK-LABEL: name: av_mov_b32_imm_pseudo_agpr_64
|
||||
; CHECK: $agpr0 = V_ACCVGPR_WRITE_B32_e64 64, implicit $exec
|
||||
$agpr0 = AV_MOV_B32_IMM_PSEUDO 64, implicit $exec
|
||||
...
|
||||
|
||||
---
|
||||
name: av_mov_b32_imm_pseudo_vgpr_0
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
; CHECK-LABEL: name: av_mov_b32_imm_pseudo_vgpr_0
|
||||
; CHECK: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
|
||||
$vgpr0 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
|
||||
...
|
||||
|
||||
---
|
||||
name: av_mov_b32_imm_pseudo_vgpr_64
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
; CHECK-LABEL: name: av_mov_b32_imm_pseudo_vgpr_64
|
||||
; CHECK: $vgpr0 = V_MOV_B32_e32 64, implicit $exec
|
||||
$vgpr0 = AV_MOV_B32_IMM_PSEUDO 64, implicit $exec
|
||||
...
|
||||
|
||||
---
|
||||
name: av_mov_b32_imm_pseudo_agpr_vgpr
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0
|
||||
; CHECK-LABEL: name: av_mov_b32_imm_pseudo_agpr_vgpr
|
||||
; CHECK: liveins: $vgpr0
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec
|
||||
$agpr1 = AV_MOV_B32_IMM_PSEUDO $vgpr0, implicit $exec
|
||||
...
|
@ -109,3 +109,128 @@ body: |
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
name: av_mov_b32_imm_pseudo_copy_av_32_to_physreg_agpr
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
; GCN-LABEL: name: av_mov_b32_imm_pseudo_copy_av_32_to_physreg_agpr
|
||||
; GCN: [[AV_MOV_:%[0-9]+]]:av_32 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
|
||||
; GCN-NEXT: $agpr0 = COPY [[AV_MOV_]]
|
||||
; GCN-NEXT: S_ENDPGM 0, implicit $agpr0
|
||||
%0:av_32 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
|
||||
$agpr0 = COPY %0
|
||||
S_ENDPGM 0, implicit $agpr0
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
name: av_mov_b32_imm_pseudo_copy_av_32_to_physreg_vgpr
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
; GCN-LABEL: name: av_mov_b32_imm_pseudo_copy_av_32_to_physreg_vgpr
|
||||
; GCN: [[AV_MOV_:%[0-9]+]]:av_32 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
|
||||
; GCN-NEXT: $vgpr0 = COPY [[AV_MOV_]]
|
||||
; GCN-NEXT: S_ENDPGM 0, implicit $vgpr0
|
||||
%0:av_32 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
|
||||
$vgpr0 = COPY %0
|
||||
S_ENDPGM 0, implicit $vgpr0
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
name: av_mov_b32_imm_pseudo_copy_av_32_to_virtreg_agpr
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
; GCN-LABEL: name: av_mov_b32_imm_pseudo_copy_av_32_to_virtreg_agpr
|
||||
; GCN: [[V_ACCVGPR_WRITE_B32_e64_:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 0, implicit $exec
|
||||
; GCN-NEXT: S_ENDPGM 0, implicit [[V_ACCVGPR_WRITE_B32_e64_]]
|
||||
%0:av_32 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
|
||||
%1:agpr_32 = COPY %0
|
||||
S_ENDPGM 0, implicit %1
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
name: av_mov_b32_imm_pseudo_copy_av_32_to_virtreg_vgpr
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
; GCN-LABEL: name: av_mov_b32_imm_pseudo_copy_av_32_to_virtreg_vgpr
|
||||
; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
||||
; GCN-NEXT: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]]
|
||||
%0:av_32 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
|
||||
%1:vgpr_32 = COPY %0
|
||||
S_ENDPGM 0, implicit %1
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
name: v_mov_b32_imm_literal_copy_v_to_agpr_32
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
; GCN-LABEL: name: v_mov_b32_imm_literal_copy_v_to_agpr_32
|
||||
; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 999, implicit $exec
|
||||
; GCN-NEXT: [[COPY:%[0-9]+]]:agpr_32 = COPY [[V_MOV_B32_e32_]]
|
||||
; GCN-NEXT: $agpr0 = COPY [[COPY]]
|
||||
; GCN-NEXT: S_ENDPGM 0
|
||||
%0:vgpr_32 = V_MOV_B32_e32 999, implicit $exec
|
||||
%1:agpr_32 = COPY %0
|
||||
$agpr0 = COPY %1
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
||||
|
||||
# FIXME: Register class restrictions of av register not respected,
|
||||
# issue 130020
|
||||
|
||||
# ---
|
||||
# name: s_mov_b32_inlineimm_copy_s_to_av_32
|
||||
# tracksRegLiveness: true
|
||||
# body: |
|
||||
# bb.0:
|
||||
# %0:sreg_32 = S_MOV_B32 32
|
||||
# %1:av_32 = COPY %0
|
||||
# $agpr0 = COPY %1
|
||||
# S_ENDPGM 0
|
||||
|
||||
# ...
|
||||
|
||||
# ---
|
||||
# name: v_mov_b32_inlineimm_copy_v_to_av_32
|
||||
# tracksRegLiveness: true
|
||||
# body: |
|
||||
# bb.0:
|
||||
# %0:vgpr_32 = V_MOV_B32_e32 32, implicit $exec
|
||||
# %1:av_32 = COPY %0
|
||||
# $agpr0 = COPY %1
|
||||
# S_ENDPGM 0
|
||||
# ...
|
||||
|
||||
# ---
|
||||
# name: s_mov_b32_imm_literal_copy_s_to_av_32
|
||||
# tracksRegLiveness: true
|
||||
# body: |
|
||||
# bb.0:
|
||||
# %0:sreg_32 = S_MOV_B32 999
|
||||
# %1:av_32 = COPY %0
|
||||
# $agpr0 = COPY %1
|
||||
# S_ENDPGM 0
|
||||
|
||||
# ...
|
||||
|
||||
# ---
|
||||
# name: v_mov_b32_imm_literal_copy_v_to_av_32
|
||||
# tracksRegLiveness: true
|
||||
# body: |
|
||||
# bb.0:
|
||||
# %0:vgpr_32 = V_MOV_B32_e32 999, implicit $exec
|
||||
# %1:av_32 = COPY %0
|
||||
# $agpr0 = COPY %1
|
||||
# S_ENDPGM 0
|
||||
|
||||
# ...
|
||||
|
113
llvm/test/CodeGen/AMDGPU/inflate-av-remat-imm.mir
Normal file
113
llvm/test/CodeGen/AMDGPU/inflate-av-remat-imm.mir
Normal file
@ -0,0 +1,113 @@
|
||||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
|
||||
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -simplify-mir -start-before=greedy,2 -stress-regalloc=3 -stop-after=postrapseudos -o - -verify-regalloc %s | FileCheck %s
|
||||
|
||||
# Compare results of using V_MOV_B32 vs. AV_MOV_B32_IMM_PSEUDO during
|
||||
# allocation.
|
||||
|
||||
---
|
||||
name: av_mov_b32_split
|
||||
tracksRegLiveness: true
|
||||
machineFunctionInfo:
|
||||
isEntryFunction: true
|
||||
scratchRSrcReg: '$sgpr72_sgpr73_sgpr74_sgpr75'
|
||||
stackPtrOffsetReg: '$sgpr32'
|
||||
occupancy: 7
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0, $sgpr4_sgpr5
|
||||
|
||||
; CHECK-LABEL: name: av_mov_b32_split
|
||||
; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: renamable $agpr0 = V_ACCVGPR_WRITE_B32_e64 0, implicit $exec
|
||||
; CHECK-NEXT: renamable $agpr1 = V_ACCVGPR_WRITE_B32_e64 1, implicit $exec
|
||||
; CHECK-NEXT: renamable $agpr2 = V_ACCVGPR_WRITE_B32_e64 2, implicit $exec
|
||||
; CHECK-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec
|
||||
; CHECK-NEXT: renamable $agpr0 = V_ACCVGPR_WRITE_B32_e64 3, implicit $exec
|
||||
; CHECK-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec
|
||||
; CHECK-NEXT: renamable $agpr0 = V_ACCVGPR_WRITE_B32_e64 4, implicit $exec
|
||||
; CHECK-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec
|
||||
; CHECK-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec
|
||||
; CHECK-NEXT: S_NOP 0, implicit killed renamable $agpr0
|
||||
; CHECK-NEXT: S_NOP 0, implicit killed renamable $agpr1
|
||||
; CHECK-NEXT: S_NOP 0, implicit killed renamable $agpr2
|
||||
; CHECK-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec
|
||||
; CHECK-NEXT: S_NOP 0, implicit killed renamable $agpr0
|
||||
; CHECK-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec
|
||||
; CHECK-NEXT: S_NOP 0, implicit killed renamable $agpr0
|
||||
%0:vgpr_32 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
|
||||
%1:vgpr_32 = AV_MOV_B32_IMM_PSEUDO 1, implicit $exec
|
||||
%2:vgpr_32 = AV_MOV_B32_IMM_PSEUDO 2, implicit $exec
|
||||
%3:vgpr_32 = AV_MOV_B32_IMM_PSEUDO 3, implicit $exec
|
||||
%4:vgpr_32 = AV_MOV_B32_IMM_PSEUDO 4, implicit $exec
|
||||
|
||||
%5:agpr_32 = COPY %0
|
||||
%6:agpr_32 = COPY %1
|
||||
%7:agpr_32 = COPY %2
|
||||
%8:agpr_32 = COPY %3
|
||||
%9:agpr_32 = COPY %4
|
||||
|
||||
S_NOP 0, implicit %5
|
||||
S_NOP 0, implicit %6
|
||||
S_NOP 0, implicit %7
|
||||
S_NOP 0, implicit %8
|
||||
S_NOP 0, implicit %9
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
name: v_mov_b32_split
|
||||
tracksRegLiveness: true
|
||||
machineFunctionInfo:
|
||||
isEntryFunction: true
|
||||
scratchRSrcReg: '$sgpr72_sgpr73_sgpr74_sgpr75'
|
||||
stackPtrOffsetReg: '$sgpr32'
|
||||
occupancy: 7
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0, $sgpr4_sgpr5
|
||||
|
||||
; CHECK-LABEL: name: v_mov_b32_split
|
||||
; CHECK: liveins: $vgpr0, $vgpr3, $vgpr4, $vgpr5, $sgpr4_sgpr5
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: renamable $vgpr0 = V_MOV_B32_e32 0, implicit $exec
|
||||
; CHECK-NEXT: renamable $vgpr1 = V_MOV_B32_e32 1, implicit $exec
|
||||
; CHECK-NEXT: renamable $vgpr2 = V_MOV_B32_e32 2, implicit $exec
|
||||
; CHECK-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit $exec
|
||||
; CHECK-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec
|
||||
; CHECK-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit $exec
|
||||
; CHECK-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $exec
|
||||
; CHECK-NEXT: renamable $vgpr0 = V_MOV_B32_e32 3, implicit $exec
|
||||
; CHECK-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit $exec
|
||||
; CHECK-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec
|
||||
; CHECK-NEXT: renamable $vgpr0 = V_MOV_B32_e32 4, implicit $exec
|
||||
; CHECK-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit $exec
|
||||
; CHECK-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec
|
||||
; CHECK-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr3, implicit $exec
|
||||
; CHECK-NEXT: S_NOP 0, implicit killed renamable $agpr0
|
||||
; CHECK-NEXT: S_NOP 0, implicit killed renamable $agpr1
|
||||
; CHECK-NEXT: S_NOP 0, implicit killed renamable $agpr2
|
||||
; CHECK-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr4, implicit $exec
|
||||
; CHECK-NEXT: S_NOP 0, implicit killed renamable $agpr0
|
||||
; CHECK-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr5, implicit $exec
|
||||
; CHECK-NEXT: S_NOP 0, implicit killed renamable $agpr0
|
||||
%0:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
||||
%1:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
|
||||
%2:vgpr_32 = V_MOV_B32_e32 2, implicit $exec
|
||||
%3:vgpr_32 = V_MOV_B32_e32 3, implicit $exec
|
||||
%4:vgpr_32 = V_MOV_B32_e32 4, implicit $exec
|
||||
|
||||
%5:agpr_32 = COPY %0
|
||||
%6:agpr_32 = COPY %1
|
||||
%7:agpr_32 = COPY %2
|
||||
%8:agpr_32 = COPY %3
|
||||
%9:agpr_32 = COPY %4
|
||||
|
||||
S_NOP 0, implicit %5
|
||||
S_NOP 0, implicit %6
|
||||
S_NOP 0, implicit %7
|
||||
S_NOP 0, implicit %8
|
||||
S_NOP 0, implicit %9
|
||||
|
||||
...
|
||||
|
@ -563,3 +563,68 @@ body: |
|
||||
SI_RETURN_TO_EPILOG %1
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
name: fold_v_mov_b32_e32_literal_to_agpr
|
||||
body: |
|
||||
bb.0:
|
||||
; GCN-LABEL: name: fold_v_mov_b32_e32_literal_to_agpr
|
||||
; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 999, implicit $exec
|
||||
; GCN-NEXT: [[COPY:%[0-9]+]]:agpr_32 = COPY killed [[V_MOV_B32_e32_]]
|
||||
; GCN-NEXT: SI_RETURN_TO_EPILOG implicit [[COPY]]
|
||||
%0:vgpr_32 = V_MOV_B32_e32 999, implicit $exec
|
||||
%1:agpr_32 = COPY killed %0
|
||||
SI_RETURN_TO_EPILOG implicit %1
|
||||
...
|
||||
|
||||
---
|
||||
name: fold_v_mov_b32_e32_inlineimm_to_agpr
|
||||
body: |
|
||||
bb.0:
|
||||
; GCN-LABEL: name: fold_v_mov_b32_e32_inlineimm_to_agpr
|
||||
; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 64, implicit $exec
|
||||
; GCN-NEXT: [[V_ACCVGPR_WRITE_B32_e64_:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 64, implicit $exec
|
||||
; GCN-NEXT: SI_RETURN_TO_EPILOG implicit [[V_ACCVGPR_WRITE_B32_e64_]]
|
||||
%0:vgpr_32 = V_MOV_B32_e32 64, implicit $exec
|
||||
%1:agpr_32 = COPY killed %0
|
||||
SI_RETURN_TO_EPILOG implicit %1
|
||||
...
|
||||
|
||||
---
|
||||
name: fold_av_mov_b32_imm_pseudo_inlineimm_to_vgpr
|
||||
body: |
|
||||
bb.0:
|
||||
; GCN-LABEL: name: fold_av_mov_b32_imm_pseudo_inlineimm_to_vgpr
|
||||
; GCN: [[AV_MOV_:%[0-9]+]]:av_32 = AV_MOV_B32_IMM_PSEUDO 64, implicit $exec
|
||||
; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 64, implicit $exec
|
||||
; GCN-NEXT: SI_RETURN_TO_EPILOG implicit [[V_MOV_B32_e32_]]
|
||||
%0:av_32 = AV_MOV_B32_IMM_PSEUDO 64, implicit $exec
|
||||
%1:vgpr_32 = COPY killed %0
|
||||
SI_RETURN_TO_EPILOG implicit %1
|
||||
...
|
||||
|
||||
---
|
||||
name: fold_av_mov_b32_imm_pseudo_inlineimm_to_agpr
|
||||
body: |
|
||||
bb.0:
|
||||
; GCN-LABEL: name: fold_av_mov_b32_imm_pseudo_inlineimm_to_agpr
|
||||
; GCN: [[AV_MOV_:%[0-9]+]]:av_32 = AV_MOV_B32_IMM_PSEUDO 64, implicit $exec
|
||||
; GCN-NEXT: [[V_ACCVGPR_WRITE_B32_e64_:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 64, implicit $exec
|
||||
; GCN-NEXT: SI_RETURN_TO_EPILOG implicit [[V_ACCVGPR_WRITE_B32_e64_]]
|
||||
%0:av_32 = AV_MOV_B32_IMM_PSEUDO 64, implicit $exec
|
||||
%1:agpr_32 = COPY killed %0
|
||||
SI_RETURN_TO_EPILOG implicit %1
|
||||
...
|
||||
|
||||
---
|
||||
name: fold_av_mov_b32_imm_pseudo_inlineimm_to_av
|
||||
body: |
|
||||
bb.0:
|
||||
; GCN-LABEL: name: fold_av_mov_b32_imm_pseudo_inlineimm_to_av
|
||||
; GCN: [[AV_MOV_:%[0-9]+]]:av_32 = AV_MOV_B32_IMM_PSEUDO 64, implicit $exec
|
||||
; GCN-NEXT: [[COPY:%[0-9]+]]:av_32 = COPY killed [[AV_MOV_]]
|
||||
; GCN-NEXT: SI_RETURN_TO_EPILOG implicit [[COPY]]
|
||||
%0:av_32 = AV_MOV_B32_IMM_PSEUDO 64, implicit $exec
|
||||
%1:av_32 = COPY killed %0
|
||||
SI_RETURN_TO_EPILOG implicit %1
|
||||
...
|
||||
|
@ -153,3 +153,146 @@ body: |
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
||||
|
||||
# Same, except uses AV_MOV_B32_IMM_PSEUDO which is able to
|
||||
# rematerialize in the inflated register class.
|
||||
---
|
||||
name: temp_vgpr_to_agpr_should_not_undo_split_with_remat_av_pseudo
|
||||
tracksRegLiveness: true
|
||||
machineFunctionInfo:
|
||||
isEntryFunction: true
|
||||
scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99'
|
||||
stackPtrOffsetReg: '$sgpr32'
|
||||
argumentInfo:
|
||||
privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
|
||||
kernargSegmentPtr: { reg: '$sgpr4_sgpr5' }
|
||||
workGroupIDX: { reg: '$sgpr6' }
|
||||
privateSegmentWaveByteOffset: { reg: '$sgpr7' }
|
||||
workItemIDX: { reg: '$vgpr0' }
|
||||
occupancy: 4
|
||||
sgprForEXECCopy: '$sgpr100_sgpr101'
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0, $sgpr4_sgpr5
|
||||
|
||||
; CHECK-LABEL: name: temp_vgpr_to_agpr_should_not_undo_split_with_remat_av_pseudo
|
||||
; CHECK: liveins: $vgpr0, $sgpr4_sgpr5
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: S_NOP 0, implicit-def $agpr0
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; CHECK-NEXT: renamable $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (s64), align 16, addrspace 4)
|
||||
; CHECK-NEXT: [[V_LSHLREV_B32_e32_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e32 7, [[COPY]], implicit $exec
|
||||
; CHECK-NEXT: undef [[GLOBAL_LOAD_DWORDX4_SADDR:%[0-9]+]].sub28_sub29_sub30_sub31:vreg_1024_align2 = GLOBAL_LOAD_DWORDX4_SADDR renamable $sgpr0_sgpr1, [[V_LSHLREV_B32_e32_]], 112, 0, implicit $exec :: (load (s128), addrspace 1)
|
||||
; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX4_SADDR:%[0-9]+]].sub24_sub25_sub26_sub27:vreg_1024_align2 = GLOBAL_LOAD_DWORDX4_SADDR renamable $sgpr0_sgpr1, [[V_LSHLREV_B32_e32_]], 96, 0, implicit $exec :: (load (s128), align 32, addrspace 1)
|
||||
; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX4_SADDR:%[0-9]+]].sub20_sub21_sub22_sub23:vreg_1024_align2 = GLOBAL_LOAD_DWORDX4_SADDR renamable $sgpr0_sgpr1, [[V_LSHLREV_B32_e32_]], 80, 0, implicit $exec :: (load (s128), addrspace 1)
|
||||
; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX4_SADDR:%[0-9]+]].sub16_sub17_sub18_sub19:vreg_1024_align2 = GLOBAL_LOAD_DWORDX4_SADDR renamable $sgpr0_sgpr1, [[V_LSHLREV_B32_e32_]], 64, 0, implicit $exec :: (load (s128), align 64, addrspace 1)
|
||||
; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX4_SADDR:%[0-9]+]].sub12_sub13_sub14_sub15:vreg_1024_align2 = GLOBAL_LOAD_DWORDX4_SADDR renamable $sgpr0_sgpr1, [[V_LSHLREV_B32_e32_]], 48, 0, implicit $exec :: (load (s128), addrspace 1)
|
||||
; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX4_SADDR:%[0-9]+]].sub8_sub9_sub10_sub11:vreg_1024_align2 = GLOBAL_LOAD_DWORDX4_SADDR renamable $sgpr0_sgpr1, [[V_LSHLREV_B32_e32_]], 32, 0, implicit $exec :: (load (s128), align 32, addrspace 1)
|
||||
; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX4_SADDR:%[0-9]+]].sub4_sub5_sub6_sub7:vreg_1024_align2 = GLOBAL_LOAD_DWORDX4_SADDR renamable $sgpr0_sgpr1, [[V_LSHLREV_B32_e32_]], 16, 0, implicit $exec :: (load (s128), addrspace 1)
|
||||
; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX4_SADDR:%[0-9]+]].sub0_sub1_sub2_sub3:vreg_1024_align2 = GLOBAL_LOAD_DWORDX4_SADDR renamable $sgpr0_sgpr1, [[V_LSHLREV_B32_e32_]], 0, 0, implicit $exec :: (load (s128), align 128, addrspace 1)
|
||||
; CHECK-NEXT: [[AV_MOV_:%[0-9]+]]:av_32 = AV_MOV_B32_IMM_PSEUDO 1065353216, implicit $exec
|
||||
; CHECK-NEXT: [[AV_MOV_1:%[0-9]+]]:av_32 = AV_MOV_B32_IMM_PSEUDO 1073741824, implicit $exec
|
||||
; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX4_SADDR:%[0-9]+]]:vreg_1024_align2 = V_MFMA_F32_32X32X1F32_mac_vgprcd_e64 [[AV_MOV_]], [[AV_MOV_1]], [[GLOBAL_LOAD_DWORDX4_SADDR]], 0, 0, 0, implicit $mode, implicit $exec
|
||||
; CHECK-NEXT: [[AV_MOV_2:%[0-9]+]]:av_32 = AV_MOV_B32_IMM_PSEUDO 1073741824, implicit $exec
|
||||
; CHECK-NEXT: [[AV_MOV_3:%[0-9]+]]:av_32 = AV_MOV_B32_IMM_PSEUDO 1065353216, implicit $exec
|
||||
; CHECK-NEXT: early-clobber %5:vreg_1024_align2 = V_MFMA_F32_32X32X1F32_vgprcd_e64 [[AV_MOV_3]], [[AV_MOV_2]], [[GLOBAL_LOAD_DWORDX4_SADDR]], 0, 0, 0, implicit $mode, implicit $exec, implicit $mode, implicit $exec
|
||||
; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX4_SADDR:%[0-9]+]].sub2:vreg_1024_align2 = COPY %5.sub0
|
||||
; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX4_SADDR:%[0-9]+]].sub3:vreg_1024_align2 = COPY %5.sub1
|
||||
; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX4_SADDR:%[0-9]+]].sub4:vreg_1024_align2 = COPY %5.sub2
|
||||
; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX4_SADDR:%[0-9]+]].sub5:vreg_1024_align2 = COPY %5.sub3
|
||||
; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX4_SADDR:%[0-9]+]].sub6:vreg_1024_align2 = COPY %5.sub4
|
||||
; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX4_SADDR:%[0-9]+]].sub7:vreg_1024_align2 = COPY %5.sub5
|
||||
; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX4_SADDR:%[0-9]+]].sub8:vreg_1024_align2 = COPY %5.sub6
|
||||
; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX4_SADDR:%[0-9]+]].sub9:vreg_1024_align2 = COPY %5.sub7
|
||||
; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX4_SADDR:%[0-9]+]].sub10:vreg_1024_align2 = COPY %5.sub8
|
||||
; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX4_SADDR:%[0-9]+]].sub11:vreg_1024_align2 = COPY %5.sub9
|
||||
; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX4_SADDR:%[0-9]+]].sub12:vreg_1024_align2 = COPY %5.sub10
|
||||
; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX4_SADDR:%[0-9]+]].sub13:vreg_1024_align2 = COPY %5.sub11
|
||||
; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX4_SADDR:%[0-9]+]].sub14:vreg_1024_align2 = COPY %5.sub12
|
||||
; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX4_SADDR:%[0-9]+]].sub15:vreg_1024_align2 = COPY %5.sub13
|
||||
; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX4_SADDR:%[0-9]+]].sub16:vreg_1024_align2 = COPY %5.sub14
|
||||
; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX4_SADDR:%[0-9]+]].sub17:vreg_1024_align2 = COPY %5.sub15
|
||||
; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX4_SADDR:%[0-9]+]].sub18:vreg_1024_align2 = COPY %5.sub16
|
||||
; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX4_SADDR:%[0-9]+]].sub19:vreg_1024_align2 = COPY %5.sub17
|
||||
; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX4_SADDR:%[0-9]+]].sub20:vreg_1024_align2 = COPY %5.sub18
|
||||
; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX4_SADDR:%[0-9]+]].sub21:vreg_1024_align2 = COPY %5.sub19
|
||||
; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX4_SADDR:%[0-9]+]].sub22:vreg_1024_align2 = COPY %5.sub20
|
||||
; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX4_SADDR:%[0-9]+]].sub23:vreg_1024_align2 = COPY %5.sub21
|
||||
; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX4_SADDR:%[0-9]+]].sub24:vreg_1024_align2 = COPY %5.sub22
|
||||
; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX4_SADDR:%[0-9]+]].sub25:vreg_1024_align2 = COPY %5.sub23
|
||||
; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX4_SADDR:%[0-9]+]].sub26:vreg_1024_align2 = COPY %5.sub24
|
||||
; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX4_SADDR:%[0-9]+]].sub27:vreg_1024_align2 = COPY %5.sub25
|
||||
; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX4_SADDR:%[0-9]+]].sub28:vreg_1024_align2 = COPY %5.sub26
|
||||
; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX4_SADDR:%[0-9]+]].sub29:vreg_1024_align2 = COPY %5.sub27
|
||||
; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX4_SADDR:%[0-9]+]].sub30:vreg_1024_align2 = COPY %5.sub28
|
||||
; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX4_SADDR:%[0-9]+]].sub31:vreg_1024_align2 = COPY %5.sub29
|
||||
; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX4_SADDR:%[0-9]+]]:vreg_1024_align2 = V_MFMA_F32_32X32X1F32_mac_vgprcd_e64 [[AV_MOV_3]], [[AV_MOV_2]], [[GLOBAL_LOAD_DWORDX4_SADDR]], 0, 0, 0, implicit $mode, implicit $exec
|
||||
; CHECK-NEXT: [[AV_MOV_4:%[0-9]+]]:vgpr_32 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
|
||||
; CHECK-NEXT: GLOBAL_STORE_DWORDX4_SADDR [[AV_MOV_4]], [[GLOBAL_LOAD_DWORDX4_SADDR]].sub24_sub25_sub26_sub27, renamable $sgpr0_sgpr1, 96, 0, implicit $exec :: (store (s128), align 32, addrspace 1)
|
||||
; CHECK-NEXT: GLOBAL_STORE_DWORDX4_SADDR [[AV_MOV_4]], [[GLOBAL_LOAD_DWORDX4_SADDR]].sub28_sub29_sub30_sub31, renamable $sgpr0_sgpr1, 112, 0, implicit $exec :: (store (s128), addrspace 1)
|
||||
; CHECK-NEXT: GLOBAL_STORE_DWORDX4_SADDR [[AV_MOV_4]], [[GLOBAL_LOAD_DWORDX4_SADDR]].sub16_sub17_sub18_sub19, renamable $sgpr0_sgpr1, 64, 0, implicit $exec :: (store (s128), align 64, addrspace 1)
|
||||
; CHECK-NEXT: GLOBAL_STORE_DWORDX4_SADDR [[AV_MOV_4]], [[GLOBAL_LOAD_DWORDX4_SADDR]].sub20_sub21_sub22_sub23, renamable $sgpr0_sgpr1, 80, 0, implicit $exec :: (store (s128), addrspace 1)
|
||||
; CHECK-NEXT: GLOBAL_STORE_DWORDX4_SADDR [[AV_MOV_4]], [[GLOBAL_LOAD_DWORDX4_SADDR]].sub8_sub9_sub10_sub11, renamable $sgpr0_sgpr1, 32, 0, implicit $exec :: (store (s128), align 32, addrspace 1)
|
||||
; CHECK-NEXT: GLOBAL_STORE_DWORDX4_SADDR [[AV_MOV_4]], [[GLOBAL_LOAD_DWORDX4_SADDR]].sub12_sub13_sub14_sub15, renamable $sgpr0_sgpr1, 48, 0, implicit $exec :: (store (s128), addrspace 1)
|
||||
; CHECK-NEXT: GLOBAL_STORE_DWORDX4_SADDR [[AV_MOV_4]], [[GLOBAL_LOAD_DWORDX4_SADDR]].sub0_sub1_sub2_sub3, renamable $sgpr0_sgpr1, 0, 0, implicit $exec :: (store (s128), align 128, addrspace 1)
|
||||
; CHECK-NEXT: GLOBAL_STORE_DWORDX4_SADDR [[AV_MOV_4]], [[GLOBAL_LOAD_DWORDX4_SADDR]].sub4_sub5_sub6_sub7, killed renamable $sgpr0_sgpr1, 16, 0, implicit $exec :: (store (s128), addrspace 1)
|
||||
; CHECK-NEXT: S_ENDPGM 0
|
||||
S_NOP 0, implicit-def $agpr0
|
||||
%0:vgpr_32 = COPY $vgpr0
|
||||
renamable $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (s64), align 16, addrspace 4)
|
||||
%1:vgpr_32 = V_LSHLREV_B32_e32 7, %0, implicit $exec
|
||||
undef %2.sub28_sub29_sub30_sub31:vreg_1024_align2 = GLOBAL_LOAD_DWORDX4_SADDR renamable $sgpr0_sgpr1, %1, 112, 0, implicit $exec :: (load (s128), addrspace 1)
|
||||
%2.sub24_sub25_sub26_sub27:vreg_1024_align2 = GLOBAL_LOAD_DWORDX4_SADDR renamable $sgpr0_sgpr1, %1, 96, 0, implicit $exec :: (load (s128), align 32, addrspace 1)
|
||||
%2.sub20_sub21_sub22_sub23:vreg_1024_align2 = GLOBAL_LOAD_DWORDX4_SADDR renamable $sgpr0_sgpr1, %1, 80, 0, implicit $exec :: (load (s128), addrspace 1)
|
||||
%2.sub16_sub17_sub18_sub19:vreg_1024_align2 = GLOBAL_LOAD_DWORDX4_SADDR renamable $sgpr0_sgpr1, %1, 64, 0, implicit $exec :: (load (s128), align 64, addrspace 1)
|
||||
%2.sub12_sub13_sub14_sub15:vreg_1024_align2 = GLOBAL_LOAD_DWORDX4_SADDR renamable $sgpr0_sgpr1, %1, 48, 0, implicit $exec :: (load (s128), addrspace 1)
|
||||
%2.sub8_sub9_sub10_sub11:vreg_1024_align2 = GLOBAL_LOAD_DWORDX4_SADDR renamable $sgpr0_sgpr1, %1, 32, 0, implicit $exec :: (load (s128), align 32, addrspace 1)
|
||||
%2.sub4_sub5_sub6_sub7:vreg_1024_align2 = GLOBAL_LOAD_DWORDX4_SADDR renamable $sgpr0_sgpr1, %1, 16, 0, implicit $exec :: (load (s128), addrspace 1)
|
||||
%2.sub0_sub1_sub2_sub3:vreg_1024_align2 = GLOBAL_LOAD_DWORDX4_SADDR renamable $sgpr0_sgpr1, %1, 0, 0, implicit $exec :: (load (s128), align 128, addrspace 1)
|
||||
%3:vgpr_32 = AV_MOV_B32_IMM_PSEUDO 1065353216, implicit $exec
|
||||
%4:vgpr_32 = AV_MOV_B32_IMM_PSEUDO 1073741824, implicit $exec
|
||||
%2:vreg_1024_align2 = V_MFMA_F32_32X32X1F32_mac_vgprcd_e64 %3, %4, %2, 0, 0, 0, implicit $mode, implicit $exec
|
||||
early-clobber %5:vreg_1024_align2 = V_MFMA_F32_32X32X1F32_vgprcd_e64 %3, %4, %2, 0, 0, 0, implicit $mode, implicit $exec, implicit $mode, implicit $exec
|
||||
%2.sub2:vreg_1024_align2 = COPY %5.sub0
|
||||
%2.sub3:vreg_1024_align2 = COPY %5.sub1
|
||||
%2.sub4:vreg_1024_align2 = COPY %5.sub2
|
||||
%2.sub5:vreg_1024_align2 = COPY %5.sub3
|
||||
%2.sub6:vreg_1024_align2 = COPY %5.sub4
|
||||
%2.sub7:vreg_1024_align2 = COPY %5.sub5
|
||||
%2.sub8:vreg_1024_align2 = COPY %5.sub6
|
||||
%2.sub9:vreg_1024_align2 = COPY %5.sub7
|
||||
%2.sub10:vreg_1024_align2 = COPY %5.sub8
|
||||
%2.sub11:vreg_1024_align2 = COPY %5.sub9
|
||||
%2.sub12:vreg_1024_align2 = COPY %5.sub10
|
||||
%2.sub13:vreg_1024_align2 = COPY %5.sub11
|
||||
%2.sub14:vreg_1024_align2 = COPY %5.sub12
|
||||
%2.sub15:vreg_1024_align2 = COPY %5.sub13
|
||||
%2.sub16:vreg_1024_align2 = COPY %5.sub14
|
||||
%2.sub17:vreg_1024_align2 = COPY %5.sub15
|
||||
%2.sub18:vreg_1024_align2 = COPY %5.sub16
|
||||
%2.sub19:vreg_1024_align2 = COPY %5.sub17
|
||||
%2.sub20:vreg_1024_align2 = COPY %5.sub18
|
||||
%2.sub21:vreg_1024_align2 = COPY %5.sub19
|
||||
%2.sub22:vreg_1024_align2 = COPY %5.sub20
|
||||
%2.sub23:vreg_1024_align2 = COPY %5.sub21
|
||||
%2.sub24:vreg_1024_align2 = COPY %5.sub22
|
||||
%2.sub25:vreg_1024_align2 = COPY %5.sub23
|
||||
%2.sub26:vreg_1024_align2 = COPY %5.sub24
|
||||
%2.sub27:vreg_1024_align2 = COPY %5.sub25
|
||||
%2.sub28:vreg_1024_align2 = COPY %5.sub26
|
||||
%2.sub29:vreg_1024_align2 = COPY %5.sub27
|
||||
%2.sub30:vreg_1024_align2 = COPY %5.sub28
|
||||
%2.sub31:vreg_1024_align2 = COPY %5.sub29
|
||||
%2:vreg_1024_align2 = V_MFMA_F32_32X32X1F32_mac_vgprcd_e64 %3, %4, %2, 0, 0, 0, implicit $mode, implicit $exec
|
||||
%6:vgpr_32 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
|
||||
GLOBAL_STORE_DWORDX4_SADDR %6, %2.sub24_sub25_sub26_sub27, renamable $sgpr0_sgpr1, 96, 0, implicit $exec :: (store (s128), align 32, addrspace 1)
|
||||
GLOBAL_STORE_DWORDX4_SADDR %6, %2.sub28_sub29_sub30_sub31, renamable $sgpr0_sgpr1, 112, 0, implicit $exec :: (store (s128), addrspace 1)
|
||||
GLOBAL_STORE_DWORDX4_SADDR %6, %2.sub16_sub17_sub18_sub19, renamable $sgpr0_sgpr1, 64, 0, implicit $exec :: (store (s128), align 64, addrspace 1)
|
||||
GLOBAL_STORE_DWORDX4_SADDR %6, %2.sub20_sub21_sub22_sub23, renamable $sgpr0_sgpr1, 80, 0, implicit $exec :: (store (s128), addrspace 1)
|
||||
GLOBAL_STORE_DWORDX4_SADDR %6, %2.sub8_sub9_sub10_sub11, renamable $sgpr0_sgpr1, 32, 0, implicit $exec :: (store (s128), align 32, addrspace 1)
|
||||
GLOBAL_STORE_DWORDX4_SADDR %6, %2.sub12_sub13_sub14_sub15, renamable $sgpr0_sgpr1, 48, 0, implicit $exec :: (store (s128), addrspace 1)
|
||||
GLOBAL_STORE_DWORDX4_SADDR %6, %2.sub0_sub1_sub2_sub3, renamable $sgpr0_sgpr1, 0, 0, implicit $exec :: (store (s128), align 128, addrspace 1)
|
||||
GLOBAL_STORE_DWORDX4_SADDR %6, %2.sub4_sub5_sub6_sub7, killed renamable $sgpr0_sgpr1, 16, 0, implicit $exec :: (store (s128), addrspace 1)
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
||||
|
@ -90,3 +90,48 @@ body: |
|
||||
%4.sub2_sub3:vreg_192 = COPY %2:vreg_64_align2
|
||||
S_ENDPGM 0, implicit %4
|
||||
...
|
||||
|
||||
---
|
||||
name: av_mov_imm_b32
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
; CHECK-LABEL: name: av_mov_imm_b32
|
||||
; CHECK: bb.0:
|
||||
; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
|
||||
; CHECK-NEXT: liveins: $sgpr0
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: undef [[AV_MOV_:%[0-9]+]].sub0:vreg_96 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
|
||||
; CHECK-NEXT: [[AV_MOV_:%[0-9]+]].sub1:vreg_96 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0
|
||||
; CHECK-NEXT: $exec = S_MOV_B64_term [[COPY]]
|
||||
; CHECK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
|
||||
; CHECK-NEXT: S_BRANCH %bb.1
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: bb.1:
|
||||
; CHECK-NEXT: successors: %bb.2(0x80000000)
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: [[AV_MOV_:%[0-9]+]].sub0:vreg_96 = V_MUL_F32_e32 [[AV_MOV_]].sub0, [[AV_MOV_]].sub0, implicit $mode, implicit $exec
|
||||
; CHECK-NEXT: [[AV_MOV_:%[0-9]+]].sub1:vreg_96 = V_MUL_F32_e32 [[AV_MOV_]].sub1, [[AV_MOV_]].sub1, implicit $mode, implicit $exec
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: bb.2:
|
||||
; CHECK-NEXT: S_ENDPGM 0, implicit [[AV_MOV_]]
|
||||
bb.0:
|
||||
liveins: $sgpr0
|
||||
%0:av_32 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
|
||||
%1:vgpr_32 = COPY %0
|
||||
%2:vgpr_32 = COPY %0
|
||||
%3:sreg_64 = COPY $sgpr0
|
||||
$exec = S_MOV_B64_term %3:sreg_64
|
||||
S_CBRANCH_EXECZ %bb.2, implicit $exec
|
||||
S_BRANCH %bb.1
|
||||
|
||||
bb.1:
|
||||
%1:vgpr_32 = V_MUL_F32_e32 %1:vgpr_32, %1:vgpr_32, implicit $mode, implicit $exec
|
||||
%2:vgpr_32 = V_MUL_F32_e32 %2:vgpr_32, %2:vgpr_32, implicit $mode, implicit $exec
|
||||
|
||||
bb.2:
|
||||
undef %4.sub0:vreg_96 = COPY %1:vgpr_32
|
||||
%4.sub1:vreg_96 = COPY %2:vgpr_32
|
||||
S_ENDPGM 0, implicit %4
|
||||
|
||||
...
|
||||
|
@ -0,0 +1,15 @@
|
||||
# RUN: not --crash llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -run-pass=none -o /dev/null %s 2>&1 | FileCheck %s
|
||||
---
|
||||
name: invalid_av_mov_b32_imm_pseudo
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
%0:vgpr_32 = IMPLICIT_DEF
|
||||
|
||||
; CHECK: *** Bad machine code: Illegal immediate value for operand. ***
|
||||
$agpr0 = AV_MOV_B32_IMM_PSEUDO 65, implicit $exec
|
||||
|
||||
; CHECK: *** Bad machine code: Illegal immediate value for operand. ***
|
||||
$vgpr0 = AV_MOV_B32_IMM_PSEUDO 65, implicit $exec
|
||||
|
||||
...
|
Loading…
x
Reference in New Issue
Block a user