[AMDGPU][SILoadStoreOptimizer] Include constrained buffer load variants (#101619)
Use the constrained buffer load opcodes while combining under-aligned loads for XNACK enabled subtargets.
This commit is contained in:
parent
19f379420b
commit
37d7b06da0
@ -352,6 +352,8 @@ static unsigned getOpcodeWidth(const MachineInstr &MI, const SIInstrInfo &TII) {
|
||||
return 1;
|
||||
case AMDGPU::S_BUFFER_LOAD_DWORDX2_IMM:
|
||||
case AMDGPU::S_BUFFER_LOAD_DWORDX2_SGPR_IMM:
|
||||
case AMDGPU::S_BUFFER_LOAD_DWORDX2_IMM_ec:
|
||||
case AMDGPU::S_BUFFER_LOAD_DWORDX2_SGPR_IMM_ec:
|
||||
case AMDGPU::S_LOAD_DWORDX2_IMM:
|
||||
case AMDGPU::S_LOAD_DWORDX2_IMM_ec:
|
||||
case AMDGPU::GLOBAL_LOAD_DWORDX2:
|
||||
@ -363,6 +365,8 @@ static unsigned getOpcodeWidth(const MachineInstr &MI, const SIInstrInfo &TII) {
|
||||
return 2;
|
||||
case AMDGPU::S_BUFFER_LOAD_DWORDX3_IMM:
|
||||
case AMDGPU::S_BUFFER_LOAD_DWORDX3_SGPR_IMM:
|
||||
case AMDGPU::S_BUFFER_LOAD_DWORDX3_IMM_ec:
|
||||
case AMDGPU::S_BUFFER_LOAD_DWORDX3_SGPR_IMM_ec:
|
||||
case AMDGPU::S_LOAD_DWORDX3_IMM:
|
||||
case AMDGPU::S_LOAD_DWORDX3_IMM_ec:
|
||||
case AMDGPU::GLOBAL_LOAD_DWORDX3:
|
||||
@ -374,6 +378,8 @@ static unsigned getOpcodeWidth(const MachineInstr &MI, const SIInstrInfo &TII) {
|
||||
return 3;
|
||||
case AMDGPU::S_BUFFER_LOAD_DWORDX4_IMM:
|
||||
case AMDGPU::S_BUFFER_LOAD_DWORDX4_SGPR_IMM:
|
||||
case AMDGPU::S_BUFFER_LOAD_DWORDX4_IMM_ec:
|
||||
case AMDGPU::S_BUFFER_LOAD_DWORDX4_SGPR_IMM_ec:
|
||||
case AMDGPU::S_LOAD_DWORDX4_IMM:
|
||||
case AMDGPU::S_LOAD_DWORDX4_IMM_ec:
|
||||
case AMDGPU::GLOBAL_LOAD_DWORDX4:
|
||||
@ -385,6 +391,8 @@ static unsigned getOpcodeWidth(const MachineInstr &MI, const SIInstrInfo &TII) {
|
||||
return 4;
|
||||
case AMDGPU::S_BUFFER_LOAD_DWORDX8_IMM:
|
||||
case AMDGPU::S_BUFFER_LOAD_DWORDX8_SGPR_IMM:
|
||||
case AMDGPU::S_BUFFER_LOAD_DWORDX8_IMM_ec:
|
||||
case AMDGPU::S_BUFFER_LOAD_DWORDX8_SGPR_IMM_ec:
|
||||
case AMDGPU::S_LOAD_DWORDX8_IMM:
|
||||
case AMDGPU::S_LOAD_DWORDX8_IMM_ec:
|
||||
return 8;
|
||||
@ -499,12 +507,20 @@ static InstClassEnum getInstClass(unsigned Opc, const SIInstrInfo &TII) {
|
||||
case AMDGPU::S_BUFFER_LOAD_DWORDX3_IMM:
|
||||
case AMDGPU::S_BUFFER_LOAD_DWORDX4_IMM:
|
||||
case AMDGPU::S_BUFFER_LOAD_DWORDX8_IMM:
|
||||
case AMDGPU::S_BUFFER_LOAD_DWORDX2_IMM_ec:
|
||||
case AMDGPU::S_BUFFER_LOAD_DWORDX3_IMM_ec:
|
||||
case AMDGPU::S_BUFFER_LOAD_DWORDX4_IMM_ec:
|
||||
case AMDGPU::S_BUFFER_LOAD_DWORDX8_IMM_ec:
|
||||
return S_BUFFER_LOAD_IMM;
|
||||
case AMDGPU::S_BUFFER_LOAD_DWORD_SGPR_IMM:
|
||||
case AMDGPU::S_BUFFER_LOAD_DWORDX2_SGPR_IMM:
|
||||
case AMDGPU::S_BUFFER_LOAD_DWORDX3_SGPR_IMM:
|
||||
case AMDGPU::S_BUFFER_LOAD_DWORDX4_SGPR_IMM:
|
||||
case AMDGPU::S_BUFFER_LOAD_DWORDX8_SGPR_IMM:
|
||||
case AMDGPU::S_BUFFER_LOAD_DWORDX2_SGPR_IMM_ec:
|
||||
case AMDGPU::S_BUFFER_LOAD_DWORDX3_SGPR_IMM_ec:
|
||||
case AMDGPU::S_BUFFER_LOAD_DWORDX4_SGPR_IMM_ec:
|
||||
case AMDGPU::S_BUFFER_LOAD_DWORDX8_SGPR_IMM_ec:
|
||||
return S_BUFFER_LOAD_SGPR_IMM;
|
||||
case AMDGPU::S_LOAD_DWORD_IMM:
|
||||
case AMDGPU::S_LOAD_DWORDX2_IMM:
|
||||
@ -587,12 +603,20 @@ static unsigned getInstSubclass(unsigned Opc, const SIInstrInfo &TII) {
|
||||
case AMDGPU::S_BUFFER_LOAD_DWORDX3_IMM:
|
||||
case AMDGPU::S_BUFFER_LOAD_DWORDX4_IMM:
|
||||
case AMDGPU::S_BUFFER_LOAD_DWORDX8_IMM:
|
||||
case AMDGPU::S_BUFFER_LOAD_DWORDX2_IMM_ec:
|
||||
case AMDGPU::S_BUFFER_LOAD_DWORDX3_IMM_ec:
|
||||
case AMDGPU::S_BUFFER_LOAD_DWORDX4_IMM_ec:
|
||||
case AMDGPU::S_BUFFER_LOAD_DWORDX8_IMM_ec:
|
||||
return AMDGPU::S_BUFFER_LOAD_DWORD_IMM;
|
||||
case AMDGPU::S_BUFFER_LOAD_DWORD_SGPR_IMM:
|
||||
case AMDGPU::S_BUFFER_LOAD_DWORDX2_SGPR_IMM:
|
||||
case AMDGPU::S_BUFFER_LOAD_DWORDX3_SGPR_IMM:
|
||||
case AMDGPU::S_BUFFER_LOAD_DWORDX4_SGPR_IMM:
|
||||
case AMDGPU::S_BUFFER_LOAD_DWORDX8_SGPR_IMM:
|
||||
case AMDGPU::S_BUFFER_LOAD_DWORDX2_SGPR_IMM_ec:
|
||||
case AMDGPU::S_BUFFER_LOAD_DWORDX3_SGPR_IMM_ec:
|
||||
case AMDGPU::S_BUFFER_LOAD_DWORDX4_SGPR_IMM_ec:
|
||||
case AMDGPU::S_BUFFER_LOAD_DWORDX8_SGPR_IMM_ec:
|
||||
return AMDGPU::S_BUFFER_LOAD_DWORD_SGPR_IMM;
|
||||
case AMDGPU::S_LOAD_DWORD_IMM:
|
||||
case AMDGPU::S_LOAD_DWORDX2_IMM:
|
||||
@ -703,6 +727,10 @@ static AddressRegs getRegs(unsigned Opc, const SIInstrInfo &TII) {
|
||||
case AMDGPU::S_BUFFER_LOAD_DWORDX3_SGPR_IMM:
|
||||
case AMDGPU::S_BUFFER_LOAD_DWORDX4_SGPR_IMM:
|
||||
case AMDGPU::S_BUFFER_LOAD_DWORDX8_SGPR_IMM:
|
||||
case AMDGPU::S_BUFFER_LOAD_DWORDX2_SGPR_IMM_ec:
|
||||
case AMDGPU::S_BUFFER_LOAD_DWORDX3_SGPR_IMM_ec:
|
||||
case AMDGPU::S_BUFFER_LOAD_DWORDX4_SGPR_IMM_ec:
|
||||
case AMDGPU::S_BUFFER_LOAD_DWORDX8_SGPR_IMM_ec:
|
||||
Result.SOffset = true;
|
||||
[[fallthrough]];
|
||||
case AMDGPU::S_BUFFER_LOAD_DWORD_IMM:
|
||||
@ -710,6 +738,10 @@ static AddressRegs getRegs(unsigned Opc, const SIInstrInfo &TII) {
|
||||
case AMDGPU::S_BUFFER_LOAD_DWORDX3_IMM:
|
||||
case AMDGPU::S_BUFFER_LOAD_DWORDX4_IMM:
|
||||
case AMDGPU::S_BUFFER_LOAD_DWORDX8_IMM:
|
||||
case AMDGPU::S_BUFFER_LOAD_DWORDX2_IMM_ec:
|
||||
case AMDGPU::S_BUFFER_LOAD_DWORDX3_IMM_ec:
|
||||
case AMDGPU::S_BUFFER_LOAD_DWORDX4_IMM_ec:
|
||||
case AMDGPU::S_BUFFER_LOAD_DWORDX8_IMM_ec:
|
||||
case AMDGPU::S_LOAD_DWORD_IMM:
|
||||
case AMDGPU::S_LOAD_DWORDX2_IMM:
|
||||
case AMDGPU::S_LOAD_DWORDX3_IMM:
|
||||
@ -1679,6 +1711,14 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeFlatStorePair(
|
||||
return New;
|
||||
}
|
||||
|
||||
static bool needsConstrainedOpcode(const GCNSubtarget &STM,
|
||||
ArrayRef<MachineMemOperand *> MMOs,
|
||||
unsigned Width) {
|
||||
// Conservatively returns true if not found the MMO.
|
||||
return STM.isXNACKEnabled() &&
|
||||
(MMOs.size() != 1 || MMOs[0]->getAlign().value() < Width * 4);
|
||||
}
|
||||
|
||||
unsigned SILoadStoreOptimizer::getNewOpcode(const CombineInfo &CI,
|
||||
const CombineInfo &Paired) {
|
||||
const unsigned Width = CI.Width + Paired.Width;
|
||||
@ -1696,38 +1736,55 @@ unsigned SILoadStoreOptimizer::getNewOpcode(const CombineInfo &CI,
|
||||
|
||||
case UNKNOWN:
|
||||
llvm_unreachable("Unknown instruction class");
|
||||
case S_BUFFER_LOAD_IMM:
|
||||
case S_BUFFER_LOAD_IMM: {
|
||||
// If XNACK is enabled, use the constrained opcodes when the first load is
|
||||
// under-aligned.
|
||||
bool NeedsConstrainedOpc =
|
||||
needsConstrainedOpcode(*STM, CI.I->memoperands(), Width);
|
||||
switch (Width) {
|
||||
default:
|
||||
return 0;
|
||||
case 2:
|
||||
return AMDGPU::S_BUFFER_LOAD_DWORDX2_IMM;
|
||||
return NeedsConstrainedOpc ? AMDGPU::S_BUFFER_LOAD_DWORDX2_IMM_ec
|
||||
: AMDGPU::S_BUFFER_LOAD_DWORDX2_IMM;
|
||||
case 3:
|
||||
return AMDGPU::S_BUFFER_LOAD_DWORDX3_IMM;
|
||||
return NeedsConstrainedOpc ? AMDGPU::S_BUFFER_LOAD_DWORDX3_IMM_ec
|
||||
: AMDGPU::S_BUFFER_LOAD_DWORDX3_IMM;
|
||||
case 4:
|
||||
return AMDGPU::S_BUFFER_LOAD_DWORDX4_IMM;
|
||||
return NeedsConstrainedOpc ? AMDGPU::S_BUFFER_LOAD_DWORDX4_IMM_ec
|
||||
: AMDGPU::S_BUFFER_LOAD_DWORDX4_IMM;
|
||||
case 8:
|
||||
return AMDGPU::S_BUFFER_LOAD_DWORDX8_IMM;
|
||||
return NeedsConstrainedOpc ? AMDGPU::S_BUFFER_LOAD_DWORDX8_IMM_ec
|
||||
: AMDGPU::S_BUFFER_LOAD_DWORDX8_IMM;
|
||||
}
|
||||
case S_BUFFER_LOAD_SGPR_IMM:
|
||||
}
|
||||
case S_BUFFER_LOAD_SGPR_IMM: {
|
||||
// If XNACK is enabled, use the constrained opcodes when the first load is
|
||||
// under-aligned.
|
||||
bool NeedsConstrainedOpc =
|
||||
needsConstrainedOpcode(*STM, CI.I->memoperands(), Width);
|
||||
switch (Width) {
|
||||
default:
|
||||
return 0;
|
||||
case 2:
|
||||
return AMDGPU::S_BUFFER_LOAD_DWORDX2_SGPR_IMM;
|
||||
return NeedsConstrainedOpc ? AMDGPU::S_BUFFER_LOAD_DWORDX2_SGPR_IMM_ec
|
||||
: AMDGPU::S_BUFFER_LOAD_DWORDX2_SGPR_IMM;
|
||||
case 3:
|
||||
return AMDGPU::S_BUFFER_LOAD_DWORDX3_SGPR_IMM;
|
||||
return NeedsConstrainedOpc ? AMDGPU::S_BUFFER_LOAD_DWORDX3_SGPR_IMM_ec
|
||||
: AMDGPU::S_BUFFER_LOAD_DWORDX3_SGPR_IMM;
|
||||
case 4:
|
||||
return AMDGPU::S_BUFFER_LOAD_DWORDX4_SGPR_IMM;
|
||||
return NeedsConstrainedOpc ? AMDGPU::S_BUFFER_LOAD_DWORDX4_SGPR_IMM_ec
|
||||
: AMDGPU::S_BUFFER_LOAD_DWORDX4_SGPR_IMM;
|
||||
case 8:
|
||||
return AMDGPU::S_BUFFER_LOAD_DWORDX8_SGPR_IMM;
|
||||
return NeedsConstrainedOpc ? AMDGPU::S_BUFFER_LOAD_DWORDX8_SGPR_IMM_ec
|
||||
: AMDGPU::S_BUFFER_LOAD_DWORDX8_SGPR_IMM;
|
||||
}
|
||||
}
|
||||
case S_LOAD_IMM: {
|
||||
// If XNACK is enabled, use the constrained opcodes when the first load is
|
||||
// under-aligned.
|
||||
const MachineMemOperand *MMO = *CI.I->memoperands_begin();
|
||||
bool NeedsConstrainedOpc =
|
||||
STM->isXNACKEnabled() && MMO->getAlign().value() < Width * 4;
|
||||
needsConstrainedOpcode(*STM, CI.I->memoperands(), Width);
|
||||
switch (Width) {
|
||||
default:
|
||||
return 0;
|
||||
|
||||
@ -523,14 +523,23 @@ define amdgpu_ps void @s_buffer_load_imm_mergex2(<4 x i32> inreg %desc) {
|
||||
; GFX67-NEXT: exp mrt0 v0, v1, v0, v0 done vm
|
||||
; GFX67-NEXT: s_endpgm
|
||||
;
|
||||
; GFX8910-LABEL: s_buffer_load_imm_mergex2:
|
||||
; GFX8910: ; %bb.0: ; %main_body
|
||||
; GFX8910-NEXT: s_buffer_load_dwordx2 s[0:1], s[0:3], 0x4
|
||||
; GFX8910-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX8910-NEXT: v_mov_b32_e32 v0, s0
|
||||
; GFX8910-NEXT: v_mov_b32_e32 v1, s1
|
||||
; GFX8910-NEXT: exp mrt0 v0, v1, v0, v0 done vm
|
||||
; GFX8910-NEXT: s_endpgm
|
||||
; GFX8-LABEL: s_buffer_load_imm_mergex2:
|
||||
; GFX8: ; %bb.0: ; %main_body
|
||||
; GFX8-NEXT: s_buffer_load_dwordx2 s[0:1], s[0:3], 0x4
|
||||
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX8-NEXT: v_mov_b32_e32 v0, s0
|
||||
; GFX8-NEXT: v_mov_b32_e32 v1, s1
|
||||
; GFX8-NEXT: exp mrt0 v0, v1, v0, v0 done vm
|
||||
; GFX8-NEXT: s_endpgm
|
||||
;
|
||||
; GFX910-LABEL: s_buffer_load_imm_mergex2:
|
||||
; GFX910: ; %bb.0: ; %main_body
|
||||
; GFX910-NEXT: s_buffer_load_dwordx2 s[4:5], s[0:3], 0x4
|
||||
; GFX910-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX910-NEXT: v_mov_b32_e32 v0, s4
|
||||
; GFX910-NEXT: v_mov_b32_e32 v1, s5
|
||||
; GFX910-NEXT: exp mrt0 v0, v1, v0, v0 done vm
|
||||
; GFX910-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-LABEL: s_buffer_load_imm_mergex2:
|
||||
; GFX11: ; %bb.0: ; %main_body
|
||||
@ -570,16 +579,27 @@ define amdgpu_ps void @s_buffer_load_imm_mergex4(<4 x i32> inreg %desc) {
|
||||
; GFX67-NEXT: exp mrt0 v0, v1, v2, v3 done vm
|
||||
; GFX67-NEXT: s_endpgm
|
||||
;
|
||||
; GFX8910-LABEL: s_buffer_load_imm_mergex4:
|
||||
; GFX8910: ; %bb.0: ; %main_body
|
||||
; GFX8910-NEXT: s_buffer_load_dwordx4 s[0:3], s[0:3], 0x8
|
||||
; GFX8910-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX8910-NEXT: v_mov_b32_e32 v0, s0
|
||||
; GFX8910-NEXT: v_mov_b32_e32 v1, s1
|
||||
; GFX8910-NEXT: v_mov_b32_e32 v2, s2
|
||||
; GFX8910-NEXT: v_mov_b32_e32 v3, s3
|
||||
; GFX8910-NEXT: exp mrt0 v0, v1, v2, v3 done vm
|
||||
; GFX8910-NEXT: s_endpgm
|
||||
; GFX8-LABEL: s_buffer_load_imm_mergex4:
|
||||
; GFX8: ; %bb.0: ; %main_body
|
||||
; GFX8-NEXT: s_buffer_load_dwordx4 s[0:3], s[0:3], 0x8
|
||||
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX8-NEXT: v_mov_b32_e32 v0, s0
|
||||
; GFX8-NEXT: v_mov_b32_e32 v1, s1
|
||||
; GFX8-NEXT: v_mov_b32_e32 v2, s2
|
||||
; GFX8-NEXT: v_mov_b32_e32 v3, s3
|
||||
; GFX8-NEXT: exp mrt0 v0, v1, v2, v3 done vm
|
||||
; GFX8-NEXT: s_endpgm
|
||||
;
|
||||
; GFX910-LABEL: s_buffer_load_imm_mergex4:
|
||||
; GFX910: ; %bb.0: ; %main_body
|
||||
; GFX910-NEXT: s_buffer_load_dwordx4 s[4:7], s[0:3], 0x8
|
||||
; GFX910-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX910-NEXT: v_mov_b32_e32 v0, s4
|
||||
; GFX910-NEXT: v_mov_b32_e32 v1, s5
|
||||
; GFX910-NEXT: v_mov_b32_e32 v2, s6
|
||||
; GFX910-NEXT: v_mov_b32_e32 v3, s7
|
||||
; GFX910-NEXT: exp mrt0 v0, v1, v2, v3 done vm
|
||||
; GFX910-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-LABEL: s_buffer_load_imm_mergex4:
|
||||
; GFX11: ; %bb.0: ; %main_body
|
||||
|
||||
@ -9,14 +9,23 @@ body: |
|
||||
bb.0:
|
||||
liveins: $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
|
||||
; CHECK-LABEL: name: merge_s_buffer_load_x2
|
||||
; CHECK: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; CHECK-NEXT: [[S_BUFFER_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_BUFFER_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (dereferenceable invariant load (s64), align 4)
|
||||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32_xm0_xexec = COPY [[S_BUFFER_LOAD_DWORDX2_IMM]].sub0
|
||||
; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[S_BUFFER_LOAD_DWORDX2_IMM]].sub1
|
||||
; CHECK-NEXT: S_ENDPGM 0
|
||||
; GFX10-LABEL: name: merge_s_buffer_load_x2
|
||||
; GFX10: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX10-NEXT: {{ $}}
|
||||
; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX10-NEXT: early-clobber %3:sreg_64_xexec = S_BUFFER_LOAD_DWORDX2_IMM_ec [[COPY]], 0, 0 :: (dereferenceable invariant load (s64), align 4)
|
||||
; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32_xm0_xexec = COPY %3.sub0
|
||||
; GFX10-NEXT: [[COPY2:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed %3.sub1
|
||||
; GFX10-NEXT: S_ENDPGM 0
|
||||
;
|
||||
; GFX12-LABEL: name: merge_s_buffer_load_x2
|
||||
; GFX12: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX12-NEXT: {{ $}}
|
||||
; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX12-NEXT: [[S_BUFFER_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_BUFFER_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (dereferenceable invariant load (s64), align 4)
|
||||
; GFX12-NEXT: [[COPY1:%[0-9]+]]:sreg_32_xm0_xexec = COPY [[S_BUFFER_LOAD_DWORDX2_IMM]].sub0
|
||||
; GFX12-NEXT: [[COPY2:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[S_BUFFER_LOAD_DWORDX2_IMM]].sub1
|
||||
; GFX12-NEXT: S_ENDPGM 0
|
||||
%0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
%1:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %0:sgpr_128, 0, 0 :: (dereferenceable invariant load (s32))
|
||||
%2:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %0:sgpr_128, 4, 0 :: (dereferenceable invariant load (s32))
|
||||
@ -86,9 +95,9 @@ body: |
|
||||
; GFX10: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX10-NEXT: {{ $}}
|
||||
; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX10-NEXT: [[S_BUFFER_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_BUFFER_LOAD_DWORDX4_IMM [[COPY]], 0, 0 :: (dereferenceable invariant load (s128), align 4)
|
||||
; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_64_xexec = COPY [[S_BUFFER_LOAD_DWORDX4_IMM]].sub0_sub1
|
||||
; GFX10-NEXT: [[COPY2:%[0-9]+]]:sreg_64_xexec = COPY killed [[S_BUFFER_LOAD_DWORDX4_IMM]].sub2_sub3
|
||||
; GFX10-NEXT: early-clobber %7:sgpr_128 = S_BUFFER_LOAD_DWORDX4_IMM_ec [[COPY]], 0, 0 :: (dereferenceable invariant load (s128), align 4)
|
||||
; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_64_xexec = COPY %7.sub0_sub1
|
||||
; GFX10-NEXT: [[COPY2:%[0-9]+]]:sreg_64_xexec = COPY killed %7.sub2_sub3
|
||||
; GFX10-NEXT: [[COPY3:%[0-9]+]]:sreg_32_xm0_xexec = COPY [[COPY1]].sub0
|
||||
; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[COPY1]].sub1
|
||||
; GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_32_xm0_xexec = COPY [[COPY2]].sub0
|
||||
@ -170,9 +179,9 @@ body: |
|
||||
; GFX10: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX10-NEXT: {{ $}}
|
||||
; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX10-NEXT: [[S_BUFFER_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_BUFFER_LOAD_DWORDX8_IMM [[COPY]], 0, 0 :: (dereferenceable invariant load (s256), align 4)
|
||||
; GFX10-NEXT: [[COPY1:%[0-9]+]]:sgpr_128 = COPY [[S_BUFFER_LOAD_DWORDX8_IMM]].sub0_sub1_sub2_sub3
|
||||
; GFX10-NEXT: [[COPY2:%[0-9]+]]:sgpr_128 = COPY killed [[S_BUFFER_LOAD_DWORDX8_IMM]].sub4_sub5_sub6_sub7
|
||||
; GFX10-NEXT: early-clobber %15:sgpr_256 = S_BUFFER_LOAD_DWORDX8_IMM_ec [[COPY]], 0, 0 :: (dereferenceable invariant load (s256), align 4)
|
||||
; GFX10-NEXT: [[COPY1:%[0-9]+]]:sgpr_128 = COPY %15.sub0_sub1_sub2_sub3
|
||||
; GFX10-NEXT: [[COPY2:%[0-9]+]]:sgpr_128 = COPY killed %15.sub4_sub5_sub6_sub7
|
||||
; GFX10-NEXT: [[COPY3:%[0-9]+]]:sreg_64_xexec = COPY [[COPY1]].sub0_sub1
|
||||
; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_64_xexec = COPY killed [[COPY1]].sub2_sub3
|
||||
; GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_32_xm0_xexec = COPY [[COPY3]].sub0
|
||||
@ -231,9 +240,9 @@ body: |
|
||||
; GFX10: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX10-NEXT: {{ $}}
|
||||
; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX10-NEXT: [[S_BUFFER_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_BUFFER_LOAD_DWORDX8_IMM [[COPY]], 0, 0 :: (dereferenceable invariant load (s256), align 4)
|
||||
; GFX10-NEXT: [[COPY1:%[0-9]+]]:sgpr_128 = COPY [[S_BUFFER_LOAD_DWORDX8_IMM]].sub4_sub5_sub6_sub7
|
||||
; GFX10-NEXT: [[COPY2:%[0-9]+]]:sgpr_128 = COPY killed [[S_BUFFER_LOAD_DWORDX8_IMM]].sub0_sub1_sub2_sub3
|
||||
; GFX10-NEXT: early-clobber %15:sgpr_256 = S_BUFFER_LOAD_DWORDX8_IMM_ec [[COPY]], 0, 0 :: (dereferenceable invariant load (s256), align 4)
|
||||
; GFX10-NEXT: [[COPY1:%[0-9]+]]:sgpr_128 = COPY %15.sub4_sub5_sub6_sub7
|
||||
; GFX10-NEXT: [[COPY2:%[0-9]+]]:sgpr_128 = COPY killed %15.sub0_sub1_sub2_sub3
|
||||
; GFX10-NEXT: [[COPY3:%[0-9]+]]:sreg_64_xexec = COPY [[COPY1]].sub0_sub1
|
||||
; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_64_xexec = COPY killed [[COPY1]].sub2_sub3
|
||||
; GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_32_xm0_xexec = COPY [[COPY3]].sub1
|
||||
@ -288,18 +297,31 @@ body: |
|
||||
bb.0:
|
||||
liveins: $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
|
||||
; CHECK-LABEL: name: merge_s_buffer_load_x8_out_of_x2
|
||||
; CHECK: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; CHECK-NEXT: [[S_BUFFER_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_BUFFER_LOAD_DWORDX8_IMM [[COPY]], 0, 0 :: (dereferenceable invariant load (s256), align 8)
|
||||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_128 = COPY [[S_BUFFER_LOAD_DWORDX8_IMM]].sub4_sub5_sub6_sub7
|
||||
; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_128 = COPY killed [[S_BUFFER_LOAD_DWORDX8_IMM]].sub0_sub1_sub2_sub3
|
||||
; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY [[COPY1]].sub0_sub1
|
||||
; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY killed [[COPY1]].sub2_sub3
|
||||
; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY [[COPY2]].sub2_sub3
|
||||
; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY killed [[COPY2]].sub0_sub1
|
||||
; CHECK-NEXT: S_ENDPGM 0
|
||||
; GFX10-LABEL: name: merge_s_buffer_load_x8_out_of_x2
|
||||
; GFX10: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX10-NEXT: {{ $}}
|
||||
; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX10-NEXT: early-clobber %7:sgpr_256 = S_BUFFER_LOAD_DWORDX8_IMM_ec [[COPY]], 0, 0 :: (dereferenceable invariant load (s256), align 8)
|
||||
; GFX10-NEXT: [[COPY1:%[0-9]+]]:sgpr_128 = COPY %7.sub4_sub5_sub6_sub7
|
||||
; GFX10-NEXT: [[COPY2:%[0-9]+]]:sgpr_128 = COPY killed %7.sub0_sub1_sub2_sub3
|
||||
; GFX10-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY [[COPY1]].sub0_sub1
|
||||
; GFX10-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY killed [[COPY1]].sub2_sub3
|
||||
; GFX10-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY [[COPY2]].sub2_sub3
|
||||
; GFX10-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY killed [[COPY2]].sub0_sub1
|
||||
; GFX10-NEXT: S_ENDPGM 0
|
||||
;
|
||||
; GFX12-LABEL: name: merge_s_buffer_load_x8_out_of_x2
|
||||
; GFX12: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX12-NEXT: {{ $}}
|
||||
; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX12-NEXT: [[S_BUFFER_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_BUFFER_LOAD_DWORDX8_IMM [[COPY]], 0, 0 :: (dereferenceable invariant load (s256), align 8)
|
||||
; GFX12-NEXT: [[COPY1:%[0-9]+]]:sgpr_128 = COPY [[S_BUFFER_LOAD_DWORDX8_IMM]].sub4_sub5_sub6_sub7
|
||||
; GFX12-NEXT: [[COPY2:%[0-9]+]]:sgpr_128 = COPY killed [[S_BUFFER_LOAD_DWORDX8_IMM]].sub0_sub1_sub2_sub3
|
||||
; GFX12-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY [[COPY1]].sub0_sub1
|
||||
; GFX12-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY killed [[COPY1]].sub2_sub3
|
||||
; GFX12-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY [[COPY2]].sub2_sub3
|
||||
; GFX12-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY killed [[COPY2]].sub0_sub1
|
||||
; GFX12-NEXT: S_ENDPGM 0
|
||||
%0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
%1:sgpr_64 = S_BUFFER_LOAD_DWORDX2_IMM %0:sgpr_128, 16, 0 :: (dereferenceable invariant load (s64))
|
||||
%2:sgpr_64 = S_BUFFER_LOAD_DWORDX2_IMM %0:sgpr_128, 8, 0 :: (dereferenceable invariant load (s64))
|
||||
@ -316,14 +338,23 @@ body: |
|
||||
bb.0:
|
||||
liveins: $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
|
||||
; CHECK-LABEL: name: merge_s_buffer_load_x8_out_of_x4
|
||||
; CHECK: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; CHECK-NEXT: [[S_BUFFER_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_BUFFER_LOAD_DWORDX8_IMM [[COPY]], 0, 0 :: (dereferenceable invariant load (s256), align 16)
|
||||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_128 = COPY [[S_BUFFER_LOAD_DWORDX8_IMM]].sub0_sub1_sub2_sub3
|
||||
; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_128 = COPY killed [[S_BUFFER_LOAD_DWORDX8_IMM]].sub4_sub5_sub6_sub7
|
||||
; CHECK-NEXT: S_ENDPGM 0
|
||||
; GFX10-LABEL: name: merge_s_buffer_load_x8_out_of_x4
|
||||
; GFX10: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX10-NEXT: {{ $}}
|
||||
; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX10-NEXT: early-clobber %3:sgpr_256 = S_BUFFER_LOAD_DWORDX8_IMM_ec [[COPY]], 0, 0 :: (dereferenceable invariant load (s256), align 16)
|
||||
; GFX10-NEXT: [[COPY1:%[0-9]+]]:sgpr_128 = COPY %3.sub0_sub1_sub2_sub3
|
||||
; GFX10-NEXT: [[COPY2:%[0-9]+]]:sgpr_128 = COPY killed %3.sub4_sub5_sub6_sub7
|
||||
; GFX10-NEXT: S_ENDPGM 0
|
||||
;
|
||||
; GFX12-LABEL: name: merge_s_buffer_load_x8_out_of_x4
|
||||
; GFX12: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX12-NEXT: {{ $}}
|
||||
; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX12-NEXT: [[S_BUFFER_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_BUFFER_LOAD_DWORDX8_IMM [[COPY]], 0, 0 :: (dereferenceable invariant load (s256), align 16)
|
||||
; GFX12-NEXT: [[COPY1:%[0-9]+]]:sgpr_128 = COPY [[S_BUFFER_LOAD_DWORDX8_IMM]].sub0_sub1_sub2_sub3
|
||||
; GFX12-NEXT: [[COPY2:%[0-9]+]]:sgpr_128 = COPY killed [[S_BUFFER_LOAD_DWORDX8_IMM]].sub4_sub5_sub6_sub7
|
||||
; GFX12-NEXT: S_ENDPGM 0
|
||||
%0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
%1:sgpr_128 = S_BUFFER_LOAD_DWORDX4_IMM %0:sgpr_128, 0, 0 :: (dereferenceable invariant load (s128))
|
||||
%2:sgpr_128 = S_BUFFER_LOAD_DWORDX4_IMM %0:sgpr_128, 16, 0 :: (dereferenceable invariant load (s128))
|
||||
@ -338,18 +369,31 @@ body: |
|
||||
bb.0:
|
||||
liveins: $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
|
||||
; CHECK-LABEL: name: merge_s_buffer_load_x8_mixed
|
||||
; CHECK: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; CHECK-NEXT: [[S_BUFFER_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_BUFFER_LOAD_DWORDX8_IMM [[COPY]], 0, 0 :: (dereferenceable invariant load (s256), align 16)
|
||||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_128 = COPY [[S_BUFFER_LOAD_DWORDX8_IMM]].sub0_sub1_sub2_sub3
|
||||
; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_128 = COPY killed [[S_BUFFER_LOAD_DWORDX8_IMM]].sub4_sub5_sub6_sub7
|
||||
; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_64_xexec = COPY [[COPY2]].sub0_sub1
|
||||
; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY killed [[COPY2]].sub2_sub3
|
||||
; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32_xm0_xexec = COPY [[COPY3]].sub0
|
||||
; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[COPY3]].sub1
|
||||
; CHECK-NEXT: S_ENDPGM 0
|
||||
; GFX10-LABEL: name: merge_s_buffer_load_x8_mixed
|
||||
; GFX10: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX10-NEXT: {{ $}}
|
||||
; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX10-NEXT: early-clobber %7:sgpr_256 = S_BUFFER_LOAD_DWORDX8_IMM_ec [[COPY]], 0, 0 :: (dereferenceable invariant load (s256), align 16)
|
||||
; GFX10-NEXT: [[COPY1:%[0-9]+]]:sgpr_128 = COPY %7.sub0_sub1_sub2_sub3
|
||||
; GFX10-NEXT: [[COPY2:%[0-9]+]]:sgpr_128 = COPY killed %7.sub4_sub5_sub6_sub7
|
||||
; GFX10-NEXT: [[COPY3:%[0-9]+]]:sreg_64_xexec = COPY [[COPY2]].sub0_sub1
|
||||
; GFX10-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY killed [[COPY2]].sub2_sub3
|
||||
; GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_32_xm0_xexec = COPY [[COPY3]].sub0
|
||||
; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[COPY3]].sub1
|
||||
; GFX10-NEXT: S_ENDPGM 0
|
||||
;
|
||||
; GFX12-LABEL: name: merge_s_buffer_load_x8_mixed
|
||||
; GFX12: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX12-NEXT: {{ $}}
|
||||
; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX12-NEXT: [[S_BUFFER_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_BUFFER_LOAD_DWORDX8_IMM [[COPY]], 0, 0 :: (dereferenceable invariant load (s256), align 16)
|
||||
; GFX12-NEXT: [[COPY1:%[0-9]+]]:sgpr_128 = COPY [[S_BUFFER_LOAD_DWORDX8_IMM]].sub0_sub1_sub2_sub3
|
||||
; GFX12-NEXT: [[COPY2:%[0-9]+]]:sgpr_128 = COPY killed [[S_BUFFER_LOAD_DWORDX8_IMM]].sub4_sub5_sub6_sub7
|
||||
; GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_64_xexec = COPY [[COPY2]].sub0_sub1
|
||||
; GFX12-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY killed [[COPY2]].sub2_sub3
|
||||
; GFX12-NEXT: [[COPY5:%[0-9]+]]:sreg_32_xm0_xexec = COPY [[COPY3]].sub0
|
||||
; GFX12-NEXT: [[COPY6:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[COPY3]].sub1
|
||||
; GFX12-NEXT: S_ENDPGM 0
|
||||
%0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
%1:sgpr_128 = S_BUFFER_LOAD_DWORDX4_IMM %0:sgpr_128, 0, 0 :: (dereferenceable invariant load (s128))
|
||||
%2:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %0:sgpr_128, 16, 0 :: (dereferenceable invariant load (s32))
|
||||
@ -371,9 +415,9 @@ body: |
|
||||
; GFX10-NEXT: {{ $}}
|
||||
; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr4
|
||||
; GFX10-NEXT: [[S_BUFFER_LOAD_DWORDX4_SGPR_IMM:%[0-9]+]]:sgpr_128 = S_BUFFER_LOAD_DWORDX4_SGPR_IMM [[COPY]], [[COPY1]], 0, 0 :: (dereferenceable invariant load (s128), align 4)
|
||||
; GFX10-NEXT: [[COPY2:%[0-9]+]]:sreg_64_xexec = COPY [[S_BUFFER_LOAD_DWORDX4_SGPR_IMM]].sub0_sub1
|
||||
; GFX10-NEXT: [[COPY3:%[0-9]+]]:sreg_64_xexec = COPY killed [[S_BUFFER_LOAD_DWORDX4_SGPR_IMM]].sub2_sub3
|
||||
; GFX10-NEXT: early-clobber %8:sgpr_128 = S_BUFFER_LOAD_DWORDX4_SGPR_IMM_ec [[COPY]], [[COPY1]], 0, 0 :: (dereferenceable invariant load (s128), align 4)
|
||||
; GFX10-NEXT: [[COPY2:%[0-9]+]]:sreg_64_xexec = COPY %8.sub0_sub1
|
||||
; GFX10-NEXT: [[COPY3:%[0-9]+]]:sreg_64_xexec = COPY killed %8.sub2_sub3
|
||||
; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32_xm0_xexec = COPY [[COPY2]].sub0
|
||||
; GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[COPY2]].sub1
|
||||
; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32_xm0_xexec = COPY [[COPY3]].sub0
|
||||
@ -450,4 +494,420 @@ body: |
|
||||
|
||||
S_ENDPGM 0
|
||||
...
|
||||
|
||||
# The constrained multi-dword buffer load merge tests.
|
||||
|
||||
---
|
||||
name: merge_s_buffer_load_x1_x2ec
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
|
||||
; CHECK-LABEL: name: merge_s_buffer_load_x1_x2ec
|
||||
; CHECK: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (dereferenceable invariant load (s32))
|
||||
; CHECK-NEXT: early-clobber %2:sgpr_64 = S_BUFFER_LOAD_DWORDX2_IMM_ec [[COPY]], 4, 0 :: (dereferenceable invariant load (s64))
|
||||
; CHECK-NEXT: S_ENDPGM 0
|
||||
%0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
%1:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %0:sgpr_128, 0, 0 :: (dereferenceable invariant load (s32))
|
||||
early-clobber %2:sgpr_64 = S_BUFFER_LOAD_DWORDX2_IMM_ec %0:sgpr_128, 4, 0 :: (dereferenceable invariant load (s64))
|
||||
|
||||
S_ENDPGM 0
|
||||
...
|
||||
---
|
||||
|
||||
name: merge_s_buffer_load_x2ec_x1
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
|
||||
; GFX10-LABEL: name: merge_s_buffer_load_x2ec_x1
|
||||
; GFX10: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX10-NEXT: {{ $}}
|
||||
; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX10-NEXT: early-clobber %1:sgpr_64 = S_BUFFER_LOAD_DWORDX2_IMM_ec [[COPY]], 0, 0 :: (dereferenceable invariant load (s64))
|
||||
; GFX10-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[COPY]], 8, 0 :: (dereferenceable invariant load (s32))
|
||||
; GFX10-NEXT: S_ENDPGM 0
|
||||
;
|
||||
; GFX12-LABEL: name: merge_s_buffer_load_x2ec_x1
|
||||
; GFX12: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX12-NEXT: {{ $}}
|
||||
; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX12-NEXT: [[S_BUFFER_LOAD_DWORDX3_IMM:%[0-9]+]]:sgpr_96 = S_BUFFER_LOAD_DWORDX3_IMM [[COPY]], 0, 0 :: (dereferenceable invariant load (s96), align 8)
|
||||
; GFX12-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY [[S_BUFFER_LOAD_DWORDX3_IMM]].sub0_sub1
|
||||
; GFX12-NEXT: [[COPY2:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[S_BUFFER_LOAD_DWORDX3_IMM]].sub2
|
||||
; GFX12-NEXT: S_ENDPGM 0
|
||||
%0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
early-clobber %1:sgpr_64 = S_BUFFER_LOAD_DWORDX2_IMM_ec %0:sgpr_128, 0, 0 :: (dereferenceable invariant load (s64))
|
||||
%2:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %0:sgpr_128, 8, 0 :: (dereferenceable invariant load (s32))
|
||||
|
||||
S_ENDPGM 0
|
||||
...
|
||||
---
|
||||
|
||||
name: merge_s_buffer_load_x1_x3ec
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
|
||||
; CHECK-LABEL: name: merge_s_buffer_load_x1_x3ec
|
||||
; CHECK: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (dereferenceable invariant load (s32))
|
||||
; CHECK-NEXT: early-clobber %2:sgpr_96 = S_BUFFER_LOAD_DWORDX3_IMM_ec [[COPY]], 4, 0 :: (dereferenceable invariant load (s96), align 16)
|
||||
; CHECK-NEXT: S_ENDPGM 0
|
||||
%0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
%1:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %0:sgpr_128, 0, 0 :: (dereferenceable invariant load (s32))
|
||||
early-clobber %2:sgpr_96 = S_BUFFER_LOAD_DWORDX3_IMM_ec %0:sgpr_128, 4, 0 :: (dereferenceable invariant load (s96))
|
||||
|
||||
S_ENDPGM 0
|
||||
...
|
||||
---
|
||||
|
||||
name: merge_s_buffer_load_x3ec_x1
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
|
||||
; CHECK-LABEL: name: merge_s_buffer_load_x3ec_x1
|
||||
; CHECK: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; CHECK-NEXT: [[S_BUFFER_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_BUFFER_LOAD_DWORDX4_IMM [[COPY]], 0, 0 :: (dereferenceable invariant load (s128))
|
||||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_96 = COPY [[S_BUFFER_LOAD_DWORDX4_IMM]].sub0_sub1_sub2
|
||||
; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[S_BUFFER_LOAD_DWORDX4_IMM]].sub3
|
||||
; CHECK-NEXT: S_ENDPGM 0
|
||||
%0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
early-clobber %1:sgpr_96 = S_BUFFER_LOAD_DWORDX3_IMM_ec %0:sgpr_128, 0, 0 :: (dereferenceable invariant load (s96))
|
||||
%2:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %0:sgpr_128, 12, 0 :: (dereferenceable invariant load (s32))
|
||||
|
||||
S_ENDPGM 0
|
||||
...
|
||||
---
|
||||
|
||||
name: merge_s_buffer_load_x8_out_of_x2ec_reordered
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
|
||||
; GFX10-LABEL: name: merge_s_buffer_load_x8_out_of_x2ec_reordered
|
||||
; GFX10: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX10-NEXT: {{ $}}
|
||||
; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX10-NEXT: early-clobber %7:sgpr_256 = S_BUFFER_LOAD_DWORDX8_IMM_ec [[COPY]], 0, 0 :: (dereferenceable invariant load (s256), align 8)
|
||||
; GFX10-NEXT: [[COPY1:%[0-9]+]]:sgpr_128 = COPY %7.sub4_sub5_sub6_sub7
|
||||
; GFX10-NEXT: [[COPY2:%[0-9]+]]:sgpr_128 = COPY killed %7.sub0_sub1_sub2_sub3
|
||||
; GFX10-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY [[COPY1]].sub0_sub1
|
||||
; GFX10-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY killed [[COPY1]].sub2_sub3
|
||||
; GFX10-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY [[COPY2]].sub2_sub3
|
||||
; GFX10-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY killed [[COPY2]].sub0_sub1
|
||||
; GFX10-NEXT: S_ENDPGM 0
|
||||
;
|
||||
; GFX12-LABEL: name: merge_s_buffer_load_x8_out_of_x2ec_reordered
|
||||
; GFX12: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX12-NEXT: {{ $}}
|
||||
; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX12-NEXT: [[S_BUFFER_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_BUFFER_LOAD_DWORDX8_IMM [[COPY]], 0, 0 :: (dereferenceable invariant load (s256), align 8)
|
||||
; GFX12-NEXT: [[COPY1:%[0-9]+]]:sgpr_128 = COPY [[S_BUFFER_LOAD_DWORDX8_IMM]].sub4_sub5_sub6_sub7
|
||||
; GFX12-NEXT: [[COPY2:%[0-9]+]]:sgpr_128 = COPY killed [[S_BUFFER_LOAD_DWORDX8_IMM]].sub0_sub1_sub2_sub3
|
||||
; GFX12-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY [[COPY1]].sub0_sub1
|
||||
; GFX12-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY killed [[COPY1]].sub2_sub3
|
||||
; GFX12-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY [[COPY2]].sub2_sub3
|
||||
; GFX12-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY killed [[COPY2]].sub0_sub1
|
||||
; GFX12-NEXT: S_ENDPGM 0
|
||||
%0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
early-clobber %1:sgpr_64 = S_BUFFER_LOAD_DWORDX2_IMM_ec %0:sgpr_128, 16, 0 :: (dereferenceable invariant load (s64))
|
||||
early-clobber %2:sgpr_64 = S_BUFFER_LOAD_DWORDX2_IMM_ec %0:sgpr_128, 8, 0 :: (dereferenceable invariant load (s64))
|
||||
early-clobber %3:sgpr_64 = S_BUFFER_LOAD_DWORDX2_IMM_ec %0:sgpr_128, 0, 0 :: (dereferenceable invariant load (s64))
|
||||
early-clobber %4:sgpr_64 = S_BUFFER_LOAD_DWORDX2_IMM_ec %0:sgpr_128, 24, 0 :: (dereferenceable invariant load (s64))
|
||||
|
||||
S_ENDPGM 0
|
||||
...
|
||||
---
|
||||
|
||||
name: merge_s_buffer_load_x8_out_of_x2ec_x2
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
|
||||
; GFX10-LABEL: name: merge_s_buffer_load_x8_out_of_x2ec_x2
|
||||
; GFX10: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX10-NEXT: {{ $}}
|
||||
; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX10-NEXT: early-clobber %7:sgpr_256 = S_BUFFER_LOAD_DWORDX8_IMM_ec [[COPY]], 0, 0 :: (dereferenceable invariant load (s256), align 8)
|
||||
; GFX10-NEXT: [[COPY1:%[0-9]+]]:sgpr_128 = COPY %7.sub4_sub5_sub6_sub7
|
||||
; GFX10-NEXT: [[COPY2:%[0-9]+]]:sgpr_128 = COPY killed %7.sub0_sub1_sub2_sub3
|
||||
; GFX10-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY [[COPY1]].sub0_sub1
|
||||
; GFX10-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY killed [[COPY1]].sub2_sub3
|
||||
; GFX10-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY [[COPY2]].sub2_sub3
|
||||
; GFX10-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY killed [[COPY2]].sub0_sub1
|
||||
; GFX10-NEXT: S_ENDPGM 0
|
||||
;
|
||||
; GFX12-LABEL: name: merge_s_buffer_load_x8_out_of_x2ec_x2
|
||||
; GFX12: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX12-NEXT: {{ $}}
|
||||
; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX12-NEXT: [[S_BUFFER_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_BUFFER_LOAD_DWORDX8_IMM [[COPY]], 0, 0 :: (dereferenceable invariant load (s256), align 8)
|
||||
; GFX12-NEXT: [[COPY1:%[0-9]+]]:sgpr_128 = COPY [[S_BUFFER_LOAD_DWORDX8_IMM]].sub4_sub5_sub6_sub7
|
||||
; GFX12-NEXT: [[COPY2:%[0-9]+]]:sgpr_128 = COPY killed [[S_BUFFER_LOAD_DWORDX8_IMM]].sub0_sub1_sub2_sub3
|
||||
; GFX12-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY [[COPY1]].sub0_sub1
|
||||
; GFX12-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY killed [[COPY1]].sub2_sub3
|
||||
; GFX12-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY [[COPY2]].sub2_sub3
|
||||
; GFX12-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY killed [[COPY2]].sub0_sub1
|
||||
; GFX12-NEXT: S_ENDPGM 0
|
||||
%0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
early-clobber %1:sgpr_64 = S_BUFFER_LOAD_DWORDX2_IMM_ec %0:sgpr_128, 16, 0 :: (dereferenceable invariant load (s64))
|
||||
early-clobber %2:sgpr_64 = S_BUFFER_LOAD_DWORDX2_IMM_ec %0:sgpr_128, 8, 0 :: (dereferenceable invariant load (s64))
|
||||
%3:sgpr_64 = S_BUFFER_LOAD_DWORDX2_IMM %0:sgpr_128, 0, 0 :: (dereferenceable invariant load (s64))
|
||||
%4:sgpr_64 = S_BUFFER_LOAD_DWORDX2_IMM %0:sgpr_128, 24, 0 :: (dereferenceable invariant load (s64))
|
||||
|
||||
S_ENDPGM 0
|
||||
...
|
||||
---
|
||||
|
||||
name: merge_s_buffer_load_x8_out_of_x4ec
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
|
||||
; GFX10-LABEL: name: merge_s_buffer_load_x8_out_of_x4ec
|
||||
; GFX10: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX10-NEXT: {{ $}}
|
||||
; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX10-NEXT: early-clobber %3:sgpr_256 = S_BUFFER_LOAD_DWORDX8_IMM_ec [[COPY]], 0, 0 :: (dereferenceable invariant load (s256), align 16)
|
||||
; GFX10-NEXT: [[COPY1:%[0-9]+]]:sgpr_128 = COPY %3.sub0_sub1_sub2_sub3
|
||||
; GFX10-NEXT: [[COPY2:%[0-9]+]]:sgpr_128 = COPY killed %3.sub4_sub5_sub6_sub7
|
||||
; GFX10-NEXT: S_ENDPGM 0
|
||||
;
|
||||
; GFX12-LABEL: name: merge_s_buffer_load_x8_out_of_x4ec
|
||||
; GFX12: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX12-NEXT: {{ $}}
|
||||
; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX12-NEXT: [[S_BUFFER_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_BUFFER_LOAD_DWORDX8_IMM [[COPY]], 0, 0 :: (dereferenceable invariant load (s256), align 16)
|
||||
; GFX12-NEXT: [[COPY1:%[0-9]+]]:sgpr_128 = COPY [[S_BUFFER_LOAD_DWORDX8_IMM]].sub0_sub1_sub2_sub3
|
||||
; GFX12-NEXT: [[COPY2:%[0-9]+]]:sgpr_128 = COPY killed [[S_BUFFER_LOAD_DWORDX8_IMM]].sub4_sub5_sub6_sub7
|
||||
; GFX12-NEXT: S_ENDPGM 0
|
||||
%0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
early-clobber %1:sgpr_128 = S_BUFFER_LOAD_DWORDX4_IMM_ec %0:sgpr_128, 0, 0 :: (dereferenceable invariant load (s128))
|
||||
early-clobber %2:sgpr_128 = S_BUFFER_LOAD_DWORDX4_IMM_ec %0:sgpr_128, 16, 0 :: (dereferenceable invariant load (s128))
|
||||
|
||||
S_ENDPGM 0
|
||||
...
|
||||
---
|
||||
|
||||
name: merge_s_buffer_load_x8_out_of_x4ec_x4
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
|
||||
; GFX10-LABEL: name: merge_s_buffer_load_x8_out_of_x4ec_x4
|
||||
; GFX10: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX10-NEXT: {{ $}}
|
||||
; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX10-NEXT: early-clobber %3:sgpr_256 = S_BUFFER_LOAD_DWORDX8_IMM_ec [[COPY]], 0, 0 :: (dereferenceable invariant load (s256), align 16)
|
||||
; GFX10-NEXT: [[COPY1:%[0-9]+]]:sgpr_128 = COPY %3.sub0_sub1_sub2_sub3
|
||||
; GFX10-NEXT: [[COPY2:%[0-9]+]]:sgpr_128 = COPY killed %3.sub4_sub5_sub6_sub7
|
||||
; GFX10-NEXT: S_ENDPGM 0
|
||||
;
|
||||
; GFX12-LABEL: name: merge_s_buffer_load_x8_out_of_x4ec_x4
|
||||
; GFX12: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX12-NEXT: {{ $}}
|
||||
; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX12-NEXT: [[S_BUFFER_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_BUFFER_LOAD_DWORDX8_IMM [[COPY]], 0, 0 :: (dereferenceable invariant load (s256), align 16)
|
||||
; GFX12-NEXT: [[COPY1:%[0-9]+]]:sgpr_128 = COPY [[S_BUFFER_LOAD_DWORDX8_IMM]].sub0_sub1_sub2_sub3
|
||||
; GFX12-NEXT: [[COPY2:%[0-9]+]]:sgpr_128 = COPY killed [[S_BUFFER_LOAD_DWORDX8_IMM]].sub4_sub5_sub6_sub7
|
||||
; GFX12-NEXT: S_ENDPGM 0
|
||||
%0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
early-clobber %1:sgpr_128 = S_BUFFER_LOAD_DWORDX4_IMM_ec %0:sgpr_128, 0, 0 :: (dereferenceable invariant load (s128))
|
||||
%2:sgpr_128 = S_BUFFER_LOAD_DWORDX4_IMM %0:sgpr_128, 16, 0 :: (dereferenceable invariant load (s128))
|
||||
|
||||
S_ENDPGM 0
|
||||
...
|
||||
---
|
||||
|
||||
name: merge_s_buffer_load_x8_out_of_x4_x4ec
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
|
||||
; GFX10-LABEL: name: merge_s_buffer_load_x8_out_of_x4_x4ec
|
||||
; GFX10: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX10-NEXT: {{ $}}
|
||||
; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX10-NEXT: early-clobber %3:sgpr_256 = S_BUFFER_LOAD_DWORDX8_IMM_ec [[COPY]], 0, 0 :: (dereferenceable invariant load (s256), align 16)
|
||||
; GFX10-NEXT: [[COPY1:%[0-9]+]]:sgpr_128 = COPY %3.sub0_sub1_sub2_sub3
|
||||
; GFX10-NEXT: [[COPY2:%[0-9]+]]:sgpr_128 = COPY killed %3.sub4_sub5_sub6_sub7
|
||||
; GFX10-NEXT: S_ENDPGM 0
|
||||
;
|
||||
; GFX12-LABEL: name: merge_s_buffer_load_x8_out_of_x4_x4ec
|
||||
; GFX12: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX12-NEXT: {{ $}}
|
||||
; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX12-NEXT: [[S_BUFFER_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_BUFFER_LOAD_DWORDX8_IMM [[COPY]], 0, 0 :: (dereferenceable invariant load (s256), align 16)
|
||||
; GFX12-NEXT: [[COPY1:%[0-9]+]]:sgpr_128 = COPY [[S_BUFFER_LOAD_DWORDX8_IMM]].sub0_sub1_sub2_sub3
|
||||
; GFX12-NEXT: [[COPY2:%[0-9]+]]:sgpr_128 = COPY killed [[S_BUFFER_LOAD_DWORDX8_IMM]].sub4_sub5_sub6_sub7
|
||||
; GFX12-NEXT: S_ENDPGM 0
|
||||
%0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
%1:sgpr_128 = S_BUFFER_LOAD_DWORDX4_IMM %0:sgpr_128, 0, 0 :: (dereferenceable invariant load (s128))
|
||||
early-clobber %2:sgpr_128 = S_BUFFER_LOAD_DWORDX4_IMM_ec %0:sgpr_128, 16, 0 :: (dereferenceable invariant load (s128))
|
||||
|
||||
S_ENDPGM 0
|
||||
...
|
||||
---
|
||||
|
||||
name: merge_s_buffer_load_x8_mixed_including_ec_opcodes
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
|
||||
; GFX10-LABEL: name: merge_s_buffer_load_x8_mixed_including_ec_opcodes
|
||||
; GFX10: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX10-NEXT: {{ $}}
|
||||
; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX10-NEXT: early-clobber %7:sgpr_256 = S_BUFFER_LOAD_DWORDX8_IMM_ec [[COPY]], 0, 0 :: (dereferenceable invariant load (s256), align 16)
|
||||
; GFX10-NEXT: [[COPY1:%[0-9]+]]:sgpr_128 = COPY %7.sub0_sub1_sub2_sub3
|
||||
; GFX10-NEXT: [[COPY2:%[0-9]+]]:sgpr_128 = COPY killed %7.sub4_sub5_sub6_sub7
|
||||
; GFX10-NEXT: [[COPY3:%[0-9]+]]:sreg_64_xexec = COPY [[COPY2]].sub0_sub1
|
||||
; GFX10-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY killed [[COPY2]].sub2_sub3
|
||||
; GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_32_xm0_xexec = COPY [[COPY3]].sub0
|
||||
; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[COPY3]].sub1
|
||||
; GFX10-NEXT: S_ENDPGM 0
|
||||
;
|
||||
; GFX12-LABEL: name: merge_s_buffer_load_x8_mixed_including_ec_opcodes
|
||||
; GFX12: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX12-NEXT: {{ $}}
|
||||
; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX12-NEXT: [[S_BUFFER_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_BUFFER_LOAD_DWORDX8_IMM [[COPY]], 0, 0 :: (dereferenceable invariant load (s256), align 16)
|
||||
; GFX12-NEXT: [[COPY1:%[0-9]+]]:sgpr_128 = COPY [[S_BUFFER_LOAD_DWORDX8_IMM]].sub0_sub1_sub2_sub3
|
||||
; GFX12-NEXT: [[COPY2:%[0-9]+]]:sgpr_128 = COPY killed [[S_BUFFER_LOAD_DWORDX8_IMM]].sub4_sub5_sub6_sub7
|
||||
; GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_64_xexec = COPY [[COPY2]].sub0_sub1
|
||||
; GFX12-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY killed [[COPY2]].sub2_sub3
|
||||
; GFX12-NEXT: [[COPY5:%[0-9]+]]:sreg_32_xm0_xexec = COPY [[COPY3]].sub0
|
||||
; GFX12-NEXT: [[COPY6:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[COPY3]].sub1
|
||||
; GFX12-NEXT: S_ENDPGM 0
|
||||
%0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
early-clobber %1:sgpr_128 = S_BUFFER_LOAD_DWORDX4_IMM_ec %0:sgpr_128, 0, 0 :: (dereferenceable invariant load (s128))
|
||||
%2:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %0:sgpr_128, 16, 0 :: (dereferenceable invariant load (s32))
|
||||
early-clobber %3:sgpr_64 = S_BUFFER_LOAD_DWORDX2_IMM_ec %0:sgpr_128, 24, 0 :: (dereferenceable invariant load (s64))
|
||||
%4:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %0:sgpr_128, 20, 0 :: (dereferenceable invariant load (s32))
|
||||
|
||||
S_ENDPGM 0
|
||||
...
|
||||
---
|
||||
|
||||
name: merge_s_buffer_load_sgpr_imm_x2ec_x2ec
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4
|
||||
|
||||
; GFX10-LABEL: name: merge_s_buffer_load_sgpr_imm_x2ec_x2ec
|
||||
; GFX10: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4
|
||||
; GFX10-NEXT: {{ $}}
|
||||
; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr4
|
||||
; GFX10-NEXT: early-clobber %4:sgpr_128 = S_BUFFER_LOAD_DWORDX4_SGPR_IMM_ec [[COPY]], [[COPY1]], 0, 0 :: (dereferenceable invariant load (s128), align 8)
|
||||
; GFX10-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY %4.sub0_sub1
|
||||
; GFX10-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY killed %4.sub2_sub3
|
||||
; GFX10-NEXT: S_ENDPGM 0
|
||||
;
|
||||
; GFX12-LABEL: name: merge_s_buffer_load_sgpr_imm_x2ec_x2ec
|
||||
; GFX12: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4
|
||||
; GFX12-NEXT: {{ $}}
|
||||
; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX12-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr4
|
||||
; GFX12-NEXT: [[S_BUFFER_LOAD_DWORDX4_SGPR_IMM:%[0-9]+]]:sgpr_128 = S_BUFFER_LOAD_DWORDX4_SGPR_IMM [[COPY]], [[COPY1]], 0, 0 :: (dereferenceable invariant load (s128), align 8)
|
||||
; GFX12-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY [[S_BUFFER_LOAD_DWORDX4_SGPR_IMM]].sub0_sub1
|
||||
; GFX12-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY killed [[S_BUFFER_LOAD_DWORDX4_SGPR_IMM]].sub2_sub3
|
||||
; GFX12-NEXT: S_ENDPGM 0
|
||||
%0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
%1:sreg_32 = COPY $sgpr4
|
||||
early-clobber %2:sgpr_64 = S_BUFFER_LOAD_DWORDX2_SGPR_IMM_ec %0:sgpr_128, %1:sreg_32, 0, 0 :: (dereferenceable invariant load (s64))
|
||||
early-clobber %3:sgpr_64 = S_BUFFER_LOAD_DWORDX2_SGPR_IMM_ec %0:sgpr_128, %1:sreg_32, 8, 0 :: (dereferenceable invariant load (s64))
|
||||
|
||||
S_ENDPGM 0
|
||||
...
|
||||
|
||||
# No constrained opcode required when the MEM operand has met the required alignment.
|
||||
|
||||
---
|
||||
|
||||
name: merge_s_buffer_load_x2_x2_no_constrained_opc_needed
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
|
||||
; CHECK-LABEL: name: merge_s_buffer_load_x2_x2_no_constrained_opc_needed
|
||||
; CHECK: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; CHECK-NEXT: [[S_BUFFER_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_BUFFER_LOAD_DWORDX4_IMM [[COPY]], 0, 0 :: (dereferenceable invariant load (s128))
|
||||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY [[S_BUFFER_LOAD_DWORDX4_IMM]].sub0_sub1
|
||||
; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY killed [[S_BUFFER_LOAD_DWORDX4_IMM]].sub2_sub3
|
||||
; CHECK-NEXT: S_ENDPGM 0
|
||||
%0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
%1:sgpr_64 = S_BUFFER_LOAD_DWORDX2_IMM %0:sgpr_128, 0, 0 :: (dereferenceable invariant load (s64), align 16)
|
||||
%2:sgpr_64 = S_BUFFER_LOAD_DWORDX2_IMM %0:sgpr_128, 8, 0 :: (dereferenceable invariant load (s64))
|
||||
|
||||
S_ENDPGM 0
|
||||
...
|
||||
---
|
||||
|
||||
name: merge_s_buffer_load_x4_x4_no_constrained_opc_needed
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
|
||||
; CHECK-LABEL: name: merge_s_buffer_load_x4_x4_no_constrained_opc_needed
|
||||
; CHECK: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; CHECK-NEXT: [[S_BUFFER_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_BUFFER_LOAD_DWORDX8_IMM [[COPY]], 0, 0 :: (dereferenceable invariant load (s256))
|
||||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_128 = COPY [[S_BUFFER_LOAD_DWORDX8_IMM]].sub0_sub1_sub2_sub3
|
||||
; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_128 = COPY killed [[S_BUFFER_LOAD_DWORDX8_IMM]].sub4_sub5_sub6_sub7
|
||||
; CHECK-NEXT: S_ENDPGM 0
|
||||
%0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
%1:sgpr_128 = S_BUFFER_LOAD_DWORDX4_IMM %0:sgpr_128, 0, 0 :: (dereferenceable invariant load (s128), align 32)
|
||||
%2:sgpr_128 = S_BUFFER_LOAD_DWORDX4_IMM %0:sgpr_128, 16, 0 :: (dereferenceable invariant load (s128))
|
||||
|
||||
S_ENDPGM 0
|
||||
...
|
||||
---
|
||||
|
||||
name: merge_s_buffer_load_sgpr_imm_x2ec_x2ec_no_constrained_opc_needed
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4
|
||||
|
||||
; CHECK-LABEL: name: merge_s_buffer_load_sgpr_imm_x2ec_x2ec_no_constrained_opc_needed
|
||||
; CHECK: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr4
|
||||
; CHECK-NEXT: [[S_BUFFER_LOAD_DWORDX4_SGPR_IMM:%[0-9]+]]:sgpr_128 = S_BUFFER_LOAD_DWORDX4_SGPR_IMM [[COPY]], [[COPY1]], 0, 0 :: (dereferenceable invariant load (s128))
|
||||
; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY [[S_BUFFER_LOAD_DWORDX4_SGPR_IMM]].sub0_sub1
|
||||
; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY killed [[S_BUFFER_LOAD_DWORDX4_SGPR_IMM]].sub2_sub3
|
||||
; CHECK-NEXT: S_ENDPGM 0
|
||||
%0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
%1:sreg_32 = COPY $sgpr4
|
||||
%2:sgpr_64 = S_BUFFER_LOAD_DWORDX2_SGPR_IMM %0:sgpr_128, %1:sreg_32, 0, 0 :: (dereferenceable invariant load (s64), align 16)
|
||||
%3:sgpr_64 = S_BUFFER_LOAD_DWORDX2_SGPR_IMM %0:sgpr_128, %1:sreg_32, 8, 0 :: (dereferenceable invariant load (s64))
|
||||
|
||||
S_ENDPGM 0
|
||||
...
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user