[AMDGPU] Fix piggybacking after commute in AMDGPULowerVGPREncoding (#183778)
After successfully commuting an instruction to be compatible with the current VGPR MSB mode, update CurrentMode with the commuted instruction's mode requirements. This locks in the mode bits the commuted instruction relies on, preventing later instructions from piggybacking and corrupting those bits. Without this fix, a subsequent instruction needing a different mode could piggyback onto the preceding s_set_vgpr_msb and change mode bits that the commuted instruction depends on. For example, a nullopt src1 position (treated as 0) could be overwritten to a different value, causing incorrect register encoding for the commuted instruction. The fix still allows compatible piggybacking - instructions that only add new mode bits without changing existing ones can still piggyback.
This commit is contained in:
parent
bed89970c3
commit
27d654c4c4
@ -292,8 +292,15 @@ bool AMDGPULowerVGPREncoding::runOnMachineInstr(MachineInstr &MI) {
|
||||
TII->commuteInstruction(MI)) {
|
||||
ModeTy NewModeCommuted;
|
||||
computeMode(NewModeCommuted, MI, Ops.first, Ops.second);
|
||||
if (CurrentMode.isCompatible(NewModeCommuted))
|
||||
return false;
|
||||
if (CurrentMode.isCompatible(NewModeCommuted)) {
|
||||
// Update CurrentMode with mode bits the commuted instruction relies on.
|
||||
// This prevents later instructions from piggybacking and corrupting
|
||||
// those bits (e.g., a nullopt src treated as 0 could be overwritten).
|
||||
bool Unused = false;
|
||||
CurrentMode.update(NewModeCommuted, Unused);
|
||||
// MI was modified by the commute above.
|
||||
return true;
|
||||
}
|
||||
// Commute back.
|
||||
if (!TII->commuteInstruction(MI))
|
||||
llvm_unreachable("Failed to restore commuted instruction.");
|
||||
|
||||
@ -346,15 +346,20 @@ body: |
|
||||
; GCN-NEXT: v_fmaak_f32 v0, v1 /*v257*/, v2 /*v258*/, 0x1
|
||||
$vgpr0 = V_FMAAK_F32 undef $vgpr257, undef $vgpr258, 1, implicit $exec, implicit $mode
|
||||
|
||||
; The v_fmamk below needs src2=1. After the commute, compatible piggybacking
|
||||
; is still allowed, so src2=1 is piggybacked here.
|
||||
; GCN-NEXT: s_set_vgpr_msb 0x551
|
||||
; ASM-SAME: ; msbs: dst=1 src0=1 src1=0 src2=1
|
||||
; GCN-NEXT: v_fmaak_f32 v0 /*v256*/, v1 /*v257*/, v2, 0x1
|
||||
$vgpr256 = V_FMAAK_F32 undef $vgpr257, undef $vgpr2, 1, implicit $exec, implicit $mode
|
||||
|
||||
; Commute FMAAK to avoid mode change
|
||||
; Commute FMAAK to avoid mode change. After commute, CurrentMode is updated
|
||||
; to lock in the mode bits the commuted instruction relies on.
|
||||
; GCN-NEXT: v_fmaak_f32 v0 /*v256*/, v2 /*v258*/, v1, 0x1
|
||||
$vgpr256 = V_FMAAK_F32 undef $vgpr1, undef $vgpr258, 1, implicit $exec, implicit $mode
|
||||
|
||||
; This v_fmamk needs src2=1. Since src2 was nullopt (not locked), it can
|
||||
; piggyback onto the s_set_vgpr_msb above by adding src2=1.
|
||||
; GCN-NEXT: v_fmamk_f32 v0 /*v256*/, v1 /*v257*/, 0x1, v2 /*v258*/
|
||||
$vgpr256 = V_FMAMK_F32 undef $vgpr257, 1, undef $vgpr258, implicit $exec, implicit $mode
|
||||
|
||||
@ -473,6 +478,9 @@ body: |
|
||||
; GCN-NEXT: v_subrev_nc_u32_e32 v0, v0 /*v256*/, v2
|
||||
$vgpr0 = V_SUBREV_U32_e32 undef $vgpr256, undef $vgpr2, implicit $exec
|
||||
|
||||
; GCN-NEXT: s_set_vgpr_msb 0x100
|
||||
; ASM-SAME: ; msbs: dst=0 src0=0 src1=0 src2=0
|
||||
|
||||
; ASM: NumVgprs: 513
|
||||
...
|
||||
|
||||
@ -964,3 +972,41 @@ body: |
|
||||
; GCN-NEXT: v_wmma_scale16_f32_16x16x128_f8f6f4 v[210:217], v[244:259] /*v[500:515]*/, v[0:15], v[10:17], v[0:1], v[2:3]
|
||||
$vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217 = V_WMMA_SCALE16_F32_16X16X128_F8F6F4_f8_f8_w32_threeaddr undef $vgpr500_vgpr501_vgpr502_vgpr503_vgpr504_vgpr505_vgpr506_vgpr507_vgpr508_vgpr509_vgpr510_vgpr511_vgpr512_vgpr513_vgpr514_vgpr515, undef $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 0, undef $vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17, undef $vgpr0_vgpr1, undef $vgpr2_vgpr3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
...
|
||||
|
||||
# Test that incompatible piggybacking is prevented after a successful commute.
|
||||
# After commute, CurrentMode is updated with the commuted instruction's mode.
|
||||
# A subsequent instruction that would CHANGE an existing mode bit cannot
|
||||
# piggyback. However, instructions that only ADD new mode bits (without
|
||||
# changing existing ones) can still piggyback - see fmak_fmamk_vop3 test.
|
||||
|
||||
# ASM-LABEL: {{^}}commute_no_piggyback:
|
||||
# DIS-LABEL: <commute_no_piggyback>:
|
||||
---
|
||||
name: commute_no_piggyback
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
; ASM: %bb.0:
|
||||
|
||||
; Step 1: v_mov sets dst=1, src0=2. src1 is nullopt (defaults to 0).
|
||||
; GCN-NEXT: s_set_vgpr_msb 0x42
|
||||
; ASM-SAME: ; msbs: dst=1 src0=2 src1=0 src2=0
|
||||
; GCN-NEXT: v_mov_b32_e32 v0 /*v256*/, v0 /*v512*/
|
||||
$vgpr256 = V_MOV_B32_e32 undef $vgpr512, implicit $exec
|
||||
|
||||
; Step 2: v_add is commuted to be compatible (src0=2, src1=0).
|
||||
; GCN-NEXT: v_add_f32_e64 v0 /*v256*/, v0 /*v512*/, v1
|
||||
$vgpr256 = V_ADD_F32_e64 0, undef $vgpr1, 0, undef $vgpr512, 0, 0, implicit $exec, implicit $mode
|
||||
|
||||
; Step 3: v_add needs src1=2. Without the fix, this piggybacks onto step 1's
|
||||
; s_set_vgpr_msb, changing src1 from 0 to 2 and breaking step 2.
|
||||
; GCN-NEXT: s_set_vgpr_msb 0x424a
|
||||
; ASM-SAME: ; msbs: dst=1 src0=2 src1=2 src2=0
|
||||
; GCN-NEXT: v_add_f32_e64 v0 /*v256*/, v0 /*v512*/, v1 /*v513*/
|
||||
$vgpr256 = V_ADD_F32_e64 0, undef $vgpr512, 0, undef $vgpr513, 0, 0, implicit $exec, implicit $mode
|
||||
|
||||
; GCN-NEXT: s_set_vgpr_msb 0x4a00
|
||||
; ASM-SAME: ; msbs: dst=0 src0=0 src1=0 src2=0
|
||||
|
||||
; ASM: NumVgprs: 514
|
||||
...
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user