[AMDGPU] Extend RA hints to handle VGPR_16_Lo128 (#176819)
This commit is contained in:
parent
f734f42bb0
commit
1711e0074f
@ -270,15 +270,15 @@ bool GCNPreRAOptimizationsImpl::run(MachineFunction &MF) {
|
||||
continue;
|
||||
Register Dst = MI.getOperand(0).getReg();
|
||||
Register Src = MI.getOperand(1).getReg();
|
||||
if (Dst.isVirtual() &&
|
||||
MRI->getRegClass(Dst) == &AMDGPU::VGPR_16RegClass &&
|
||||
Src.isPhysical() &&
|
||||
const TargetRegisterClass *DstRC = TRI->getRegClassForReg(*MRI, Dst);
|
||||
bool IsDst16Bit = DstRC == &AMDGPU::VGPR_16RegClass ||
|
||||
DstRC == &AMDGPU::VGPR_16_Lo128RegClass;
|
||||
if (Dst.isVirtual() && IsDst16Bit && Src.isPhysical() &&
|
||||
TRI->getRegClassForReg(*MRI, Src) == &AMDGPU::VGPR_32RegClass)
|
||||
MRI->setRegAllocationHint(Dst, 0, TRI->getSubReg(Src, AMDGPU::lo16));
|
||||
if (Src.isVirtual() &&
|
||||
MRI->getRegClass(Src) == &AMDGPU::VGPR_16RegClass &&
|
||||
Dst.isPhysical() &&
|
||||
TRI->getRegClassForReg(*MRI, Dst) == &AMDGPU::VGPR_32RegClass)
|
||||
Dst.isPhysical() && DstRC == &AMDGPU::VGPR_32RegClass)
|
||||
MRI->setRegAllocationHint(Src, 0, TRI->getSubReg(Dst, AMDGPU::lo16));
|
||||
if (!Dst.isVirtual() || !Src.isVirtual())
|
||||
continue;
|
||||
@ -287,8 +287,7 @@ bool GCNPreRAOptimizationsImpl::run(MachineFunction &MF) {
|
||||
MRI->setRegAllocationHint(Dst, AMDGPURI::Size32, Src);
|
||||
MRI->setRegAllocationHint(Src, AMDGPURI::Size16, Dst);
|
||||
}
|
||||
if (MRI->getRegClass(Dst) == &AMDGPU::VGPR_16RegClass &&
|
||||
MRI->getRegClass(Src) == &AMDGPU::VGPR_32RegClass)
|
||||
if (IsDst16Bit && MRI->getRegClass(Src) == &AMDGPU::VGPR_32RegClass)
|
||||
MRI->setRegAllocationHint(Dst, AMDGPURI::Size16, Src);
|
||||
}
|
||||
}
|
||||
|
||||
@ -205,9 +205,7 @@ define half @test_fmaak(half %x, half %y, half %z) {
|
||||
; GFX11-SDAG-TRUE16-LABEL: test_fmaak:
|
||||
; GFX11-SDAG-TRUE16: ; %bb.0:
|
||||
; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.h, v1.l
|
||||
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
||||
; GFX11-SDAG-TRUE16-NEXT: v_fmaak_f16 v0.l, v0.l, v0.h, 0x4200
|
||||
; GFX11-SDAG-TRUE16-NEXT: v_fmaak_f16 v0.l, v0.l, v1.l, 0x4200
|
||||
; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX11-SDAG-FAKE16-LABEL: test_fmaak:
|
||||
@ -235,9 +233,7 @@ define half @test_fmaak(half %x, half %y, half %z) {
|
||||
; GFX12-SDAG-TRUE16-NEXT: s_wait_samplecnt 0x0
|
||||
; GFX12-SDAG-TRUE16-NEXT: s_wait_bvhcnt 0x0
|
||||
; GFX12-SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.h, v1.l
|
||||
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
||||
; GFX12-SDAG-TRUE16-NEXT: v_fmaak_f16 v0.l, v0.l, v0.h, 0x4200
|
||||
; GFX12-SDAG-TRUE16-NEXT: v_fmaak_f16 v0.l, v0.l, v1.l, 0x4200
|
||||
; GFX12-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX12-SDAG-FAKE16-LABEL: test_fmaak:
|
||||
@ -298,9 +294,7 @@ define half @test_fmamk(half %x, half %y, half %z) {
|
||||
; GFX11-SDAG-TRUE16-LABEL: test_fmamk:
|
||||
; GFX11-SDAG-TRUE16: ; %bb.0:
|
||||
; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.h, v2.l
|
||||
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
||||
; GFX11-SDAG-TRUE16-NEXT: v_fmamk_f16 v0.l, v0.l, 0x4200, v0.h
|
||||
; GFX11-SDAG-TRUE16-NEXT: v_fmamk_f16 v0.l, v0.l, 0x4200, v2.l
|
||||
; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX11-SDAG-FAKE16-LABEL: test_fmamk:
|
||||
@ -330,9 +324,7 @@ define half @test_fmamk(half %x, half %y, half %z) {
|
||||
; GFX12-SDAG-TRUE16-NEXT: s_wait_samplecnt 0x0
|
||||
; GFX12-SDAG-TRUE16-NEXT: s_wait_bvhcnt 0x0
|
||||
; GFX12-SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.h, v2.l
|
||||
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
||||
; GFX12-SDAG-TRUE16-NEXT: v_fmamk_f16 v0.l, v0.l, 0x4200, v0.h
|
||||
; GFX12-SDAG-TRUE16-NEXT: v_fmamk_f16 v0.l, v0.l, 0x4200, v2.l
|
||||
; GFX12-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX12-SDAG-FAKE16-LABEL: test_fmamk:
|
||||
|
||||
@ -3815,8 +3815,7 @@ define half @v_fma_mul_add_32_f16(half %x, half %y) {
|
||||
; GFX11-SDAG-TRUE16-LABEL: v_fma_mul_add_32_f16:
|
||||
; GFX11-SDAG-TRUE16: ; %bb.0:
|
||||
; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.h, v1.l
|
||||
; GFX11-SDAG-TRUE16-NEXT: v_fmamk_f16 v0.l, v0.l, 0x5000, v0.h
|
||||
; GFX11-SDAG-TRUE16-NEXT: v_fmamk_f16 v0.l, v0.l, 0x5000, v1.l
|
||||
; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX11-SDAG-FAKE16-LABEL: v_fma_mul_add_32_f16:
|
||||
@ -3915,8 +3914,7 @@ define half @v_fma_mul_add_neg32_f16(half %x, half %y) {
|
||||
; GFX11-SDAG-TRUE16-LABEL: v_fma_mul_add_neg32_f16:
|
||||
; GFX11-SDAG-TRUE16: ; %bb.0:
|
||||
; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.h, v1.l
|
||||
; GFX11-SDAG-TRUE16-NEXT: v_fmamk_f16 v0.l, v0.l, 0xd000, v0.h
|
||||
; GFX11-SDAG-TRUE16-NEXT: v_fmamk_f16 v0.l, v0.l, 0xd000, v1.l
|
||||
; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX11-SDAG-FAKE16-LABEL: v_fma_mul_add_neg32_f16:
|
||||
|
||||
@ -485,9 +485,7 @@ define amdgpu_kernel void @fmuladd_f16_imm_a(
|
||||
; GFX11-DENORM-TRUE16-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX11-DENORM-TRUE16-NEXT: s_mov_b32 s8, s0
|
||||
; GFX11-DENORM-TRUE16-NEXT: s_mov_b32 s9, s1
|
||||
; GFX11-DENORM-TRUE16-NEXT: v_mov_b16_e32 v0.h, v1.l
|
||||
; GFX11-DENORM-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
||||
; GFX11-DENORM-TRUE16-NEXT: v_fmamk_f16 v0.l, v0.l, 0x4200, v0.h
|
||||
; GFX11-DENORM-TRUE16-NEXT: v_fmamk_f16 v0.l, v0.l, 0x4200, v1.l
|
||||
; GFX11-DENORM-TRUE16-NEXT: buffer_store_b16 v0, off, s[8:11], 0
|
||||
; GFX11-DENORM-TRUE16-NEXT: s_endpgm
|
||||
;
|
||||
@ -719,9 +717,7 @@ define amdgpu_kernel void @fmuladd_f16_imm_b(
|
||||
; GFX11-DENORM-TRUE16-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX11-DENORM-TRUE16-NEXT: s_mov_b32 s8, s0
|
||||
; GFX11-DENORM-TRUE16-NEXT: s_mov_b32 s9, s1
|
||||
; GFX11-DENORM-TRUE16-NEXT: v_mov_b16_e32 v0.h, v1.l
|
||||
; GFX11-DENORM-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
||||
; GFX11-DENORM-TRUE16-NEXT: v_fmamk_f16 v0.l, v0.l, 0x4200, v0.h
|
||||
; GFX11-DENORM-TRUE16-NEXT: v_fmamk_f16 v0.l, v0.l, 0x4200, v1.l
|
||||
; GFX11-DENORM-TRUE16-NEXT: buffer_store_b16 v0, off, s[8:11], 0
|
||||
; GFX11-DENORM-TRUE16-NEXT: s_endpgm
|
||||
;
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user