[AMDGPU] Handle s_setreg_imm32_b32 targeting MODE register (#174681)

On certain hardware, this instruction clobbers VGPR MSB `bits[12:19]`,
so we need to restore the current mode.

Fixes SWDEV-571581.
This commit is contained in:
Shilei Tian 2026-01-09 14:43:41 -05:00 committed by GitHub
parent ac508575ed
commit df3629dc0c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 365 additions and 6 deletions

View File

@ -1264,6 +1264,12 @@ def Feature1024AddressableVGPRs : SubtargetFeature<"1024-addressable-vgprs",
"Has 1024 addressable VGPRs"
>;
def FeatureSetregVGPRMSBFixup : SubtargetFeature<"setreg-vgpr-msb-fixup",
"HasSetregVGPRMSBFixup",
"true",
"S_SETREG to MODE clobbers VGPR MSB bits, requires fixup"
>;
def FeatureWaitXcnt : SubtargetFeature<"wait-xcnt",
"HasWaitXcnt",
"true",
@ -2223,7 +2229,8 @@ def FeatureISAVersion12_50_Common : FeatureSet<
def FeatureISAVersion12_50 : FeatureSet<
!listconcat(FeatureISAVersion12_50_Common.Features,
[FeatureAddressableLocalMemorySize327680])>;
[FeatureAddressableLocalMemorySize327680,
FeatureSetregVGPRMSBFixup])>;
def FeatureISAVersion12_51 : FeatureSet<
!listconcat(FeatureISAVersion12_50_Common.Features,

View File

@ -44,8 +44,11 @@
#include "AMDGPU.h"
#include "GCNSubtarget.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "SIDefines.h"
#include "SIInstrInfo.h"
#include "llvm/ADT/PackedVector.h"
#include "llvm/ADT/bit.h"
#include "llvm/Support/MathExtras.h"
using namespace llvm;
@ -63,6 +66,9 @@ class AMDGPULowerVGPREncoding {
using ModeType = PackedVector<unsigned, BitsPerField,
std::bitset<BitsPerField * NumFields>>;
static constexpr unsigned VGPRMSBShift =
llvm::countr_zero_constexpr<unsigned>(AMDGPU::Hwreg::DST_VGPR_MSB);
class ModeTy : public ModeType {
public:
// bitset constructor will set all bits to zero
@ -143,6 +149,16 @@ private:
/// instruction to encourage more coissuing.
MachineBasicBlock::instr_iterator
handleCoissue(MachineBasicBlock::instr_iterator I);
/// Handle S_SETREG_IMM32_B32 targeting MODE register. On certain hardware,
/// this instruction clobbers VGPR MSB bits[12:19], so we need to restore
/// the current mode. \returns true if the instruction was modified or a
/// new one was inserted.
bool handleSetregMode(MachineInstr &MI);
/// Update bits[12:19] of the imm operand in S_SETREG_IMM32_B32 to contain
/// the VGPR MSB mode value. \returns true if the immediate was changed.
bool updateSetregModeImm(MachineInstr &MI, int64_t ModeValue);
};
bool AMDGPULowerVGPREncoding::setMode(ModeTy NewMode, ModeTy Mask,
@ -160,12 +176,19 @@ bool AMDGPULowerVGPREncoding::setMode(ModeTy NewMode, ModeTy Mask,
CurrentMode |= NewMode;
CurrentMask |= Mask;
MachineOperand &Op = MostRecentModeSet->getOperand(0);
// Update MostRecentModeSet with the new mode. It can be either
// S_SET_VGPR_MSB or S_SETREG_IMM32_B32 (with Size <= 12).
if (MostRecentModeSet->getOpcode() == AMDGPU::S_SET_VGPR_MSB) {
MachineOperand &Op = MostRecentModeSet->getOperand(0);
// Carry old mode bits from the existing instruction.
int64_t OldModeBits = Op.getImm() & (ModeMask << ModeWidth);
Op.setImm(CurrentMode | OldModeBits);
} else {
assert(MostRecentModeSet->getOpcode() == AMDGPU::S_SETREG_IMM32_B32 &&
"unexpected MostRecentModeSet opcode");
updateSetregModeImm(*MostRecentModeSet, CurrentMode);
}
// Carry old mode bits from the existing instruction.
int64_t OldModeBits = Op.getImm() & (ModeMask << ModeWidth);
Op.setImm(CurrentMode | OldModeBits);
return true;
}
@ -315,6 +338,82 @@ AMDGPULowerVGPREncoding::handleCoissue(MachineBasicBlock::instr_iterator I) {
return Prev;
}
/// Convert mode value from S_SET_VGPR_MSB format to MODE register format.
/// S_SET_VGPR_MSB uses: (src0[0-1], src1[2-3], src2[4-5], dst[6-7])
/// MODE register uses: (dst[0-1], src0[2-3], src1[4-5], src2[6-7])
/// This is a left rotation by 2 bits on an 8-bit value.
static int64_t convertModeToSetregFormat(int64_t Mode) {
assert(isUInt<8>(Mode) && "Mode expected to be 8-bit");
return llvm::rotl<uint8_t>(static_cast<uint8_t>(Mode), /*R=*/2);
}
bool AMDGPULowerVGPREncoding::updateSetregModeImm(MachineInstr &MI,
int64_t ModeValue) {
assert(MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32);
// Convert from S_SET_VGPR_MSB format to MODE register format
int64_t SetregMode = convertModeToSetregFormat(ModeValue);
MachineOperand *ImmOp = TII->getNamedOperand(MI, AMDGPU::OpName::imm);
int64_t OldImm = ImmOp->getImm();
int64_t NewImm =
(OldImm & ~AMDGPU::Hwreg::VGPR_MSB_MASK) | (SetregMode << VGPRMSBShift);
ImmOp->setImm(NewImm);
return NewImm != OldImm;
}
bool AMDGPULowerVGPREncoding::handleSetregMode(MachineInstr &MI) {
using namespace AMDGPU::Hwreg;
assert(MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32 &&
"only S_SETREG_IMM32_B32 needs to be handled");
MachineOperand *SIMM16Op = TII->getNamedOperand(MI, AMDGPU::OpName::simm16);
assert(SIMM16Op && "SIMM16Op must be present");
auto [HwRegId, Offset, Size] = HwregEncoding::decode(SIMM16Op->getImm());
(void)Offset;
if (HwRegId != ID_MODE)
return false;
int64_t ModeValue = static_cast<int64_t>(CurrentMode);
// Case 1: Size <= 12 - the original instruction uses imm32[0:Size-1], so
// imm32[12:19] is unused. Safe to set imm32[12:19] to the correct VGPR
// MSBs.
if (Size <= VGPRMSBShift) {
// This instruction now acts as MostRecentModeSet so it can be updated if
// CurrentMode changes via piggybacking.
MostRecentModeSet = &MI;
return updateSetregModeImm(MI, ModeValue);
}
// Case 2: Size > 12 - the original instruction uses bits beyond 11, so we
// cannot arbitrarily modify imm32[12:19]. Check if it already matches VGPR
// MSBs. Note: imm32[12:19] is in MODE register format, while ModeValue is
// in S_SET_VGPR_MSB format, so we need to convert before comparing.
MachineOperand *ImmOp = TII->getNamedOperand(MI, AMDGPU::OpName::imm);
assert(ImmOp && "ImmOp must be present");
int64_t ImmBits12To19 = (ImmOp->getImm() & VGPR_MSB_MASK) >> VGPRMSBShift;
int64_t SetregModeValue = convertModeToSetregFormat(ModeValue);
if (ImmBits12To19 == SetregModeValue) {
// Already correct, but we must invalidate MostRecentModeSet because this
// instruction will overwrite mode[12:19]. We can't update this instruction
// via piggybacking (bits[12:19] are meaningful), so if CurrentMode changes,
// a new s_set_vgpr_msb will be inserted after this instruction.
MostRecentModeSet = nullptr;
return false;
}
// imm32[12:19] doesn't match VGPR MSBs - insert s_set_vgpr_msb after
// the original instruction to restore the correct value.
MachineBasicBlock::iterator InsertPt = std::next(MI.getIterator());
MostRecentModeSet = BuildMI(*MBB, InsertPt, MI.getDebugLoc(),
TII->get(AMDGPU::S_SET_VGPR_MSB))
.addImm(ModeValue);
return true;
}
bool AMDGPULowerVGPREncoding::run(MachineFunction &MF) {
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
if (!ST.has1024AddressableVGPRs())
@ -359,6 +458,12 @@ bool AMDGPULowerVGPREncoding::run(MachineFunction &MF) {
continue;
}
if (MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32 &&
ST.hasSetregVGPRMSBFixup()) {
Changed |= handleSetregMode(MI);
continue;
}
Changed |= runOnMachineInstr(MI);
if (ClauseRemaining)

View File

@ -244,6 +244,7 @@ protected:
bool HasRestrictedSOffset = false;
bool Has64BitLiterals = false;
bool Has1024AddressableVGPRs = false;
bool HasSetregVGPRMSBFixup = false;
bool HasBitOp3Insts = false;
bool HasTanhInsts = false;
bool HasTensorCvtLutInsts = false;
@ -1445,6 +1446,8 @@ public:
bool has1024AddressableVGPRs() const { return Has1024AddressableVGPRs; }
bool hasSetregVGPRMSBFixup() const { return HasSetregVGPRMSBFixup; }
bool hasMinimum3Maximum3PKF16() const {
return HasMinimum3Maximum3PKF16;
}

View File

@ -0,0 +1,244 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -run-pass=amdgpu-lower-vgpr-encoding -o - %s | FileCheck %s
---
# Case 1a: Size < 12 (size=4), imm32[12:19]=0
# S_SET_VGPR_MSB format: (src0_msb[0-1], src1_msb[2-3], src2_msb[4-5], dst_msb[6-7])
# MODE register format: (dst_msb[0-1], src0_msb[2-3], src1_msb[4-5], src2_msb[6-7])
# vgpr256/257 (both MSB=1): S_SET_VGPR_MSB mode = (1 << 0) | (1 << 6) = 65
# MODE register mode = (1 << 0) | (1 << 2) = 5
# New setreg imm = 0x5 | (5 << 12) = 0x5005 = 20485
name: setreg_size_lt_12
tracksRegLiveness: true
body: |
bb.0:
; CHECK-LABEL: name: setreg_size_lt_12
; CHECK: S_SET_VGPR_MSB 65, implicit-def $mode
; CHECK-NEXT: $vgpr256 = V_MOV_B32_e32 $vgpr257, implicit $exec
; CHECK-NEXT: S_SETREG_IMM32_B32 20485, 6145, implicit-def $mode, implicit $mode
; CHECK-NEXT: S_ENDPGM 0
$vgpr256 = V_MOV_B32_e32 $vgpr257, implicit $exec
; size=4, offset=0, hwreg=MODE: simm16 = 0x1801 = 6145
S_SETREG_IMM32_B32 5, 6145, implicit-def $mode, implicit $mode
S_ENDPGM 0
...
---
# Case 1b: Size == 12 (boundary), imm32[12:19]=0
# vgpr256/257: S_SET_VGPR_MSB mode = 65, MODE register mode = 5
# New setreg imm = 0xABC | (5 << 12) = 0x5ABC = 23228
name: setreg_size_eq_12
tracksRegLiveness: true
body: |
bb.0:
; CHECK-LABEL: name: setreg_size_eq_12
; CHECK: S_SET_VGPR_MSB 65, implicit-def $mode
; CHECK-NEXT: $vgpr256 = V_MOV_B32_e32 $vgpr257, implicit $exec
; CHECK-NEXT: S_SETREG_IMM32_B32 23228, 22529, implicit-def $mode, implicit $mode
; CHECK-NEXT: S_ENDPGM 0
$vgpr256 = V_MOV_B32_e32 $vgpr257, implicit $exec
; size=12, offset=0, hwreg=MODE: simm16 = 0x5801 = 22529
S_SETREG_IMM32_B32 2748, 22529, implicit-def $mode, implicit $mode
S_ENDPGM 0
...
---
# Case 1c: Size <= 12 with existing non-zero bits in imm32[12:19]
# vgpr256/257: S_SET_VGPR_MSB mode = 65, MODE register mode = 5
# imm32 = 0x23005 (bits 12:19 = 0x23), result = 0x5005 = 20485 (bits 12:19 replaced with 5)
name: setreg_size_lt_12_nonzero_upper
tracksRegLiveness: true
body: |
bb.0:
; CHECK-LABEL: name: setreg_size_lt_12_nonzero_upper
; CHECK: S_SET_VGPR_MSB 65, implicit-def $mode
; CHECK-NEXT: $vgpr256 = V_MOV_B32_e32 $vgpr257, implicit $exec
; CHECK-NEXT: S_SETREG_IMM32_B32 20485, 6145, implicit-def $mode, implicit $mode
; CHECK-NEXT: S_ENDPGM 0
$vgpr256 = V_MOV_B32_e32 $vgpr257, implicit $exec
; size=4, offset=0, hwreg=MODE: simm16 = 0x1801 = 6145
; imm32 = 0x23005 = 143365 (bits 12:19 = 0x23)
S_SETREG_IMM32_B32 143365, 6145, implicit-def $mode, implicit $mode
S_ENDPGM 0
...
---
# Case 2: Size > 12 (size=16), imm32[12:19] already matches VGPR MSBs
# vgpr256/257: S_SET_VGPR_MSB mode = 65, MODE register mode = 5
# imm32 = 0x5ABC = 23228 (bits 12:19 = 5), no modification needed
name: setreg_size_gt_12_match
tracksRegLiveness: true
body: |
bb.0:
; CHECK-LABEL: name: setreg_size_gt_12_match
; CHECK: S_SET_VGPR_MSB 65, implicit-def $mode
; CHECK-NEXT: $vgpr256 = V_MOV_B32_e32 $vgpr257, implicit $exec
; CHECK-NEXT: S_SETREG_IMM32_B32 23228, 30721, implicit-def $mode, implicit $mode
; CHECK-NEXT: S_ENDPGM 0
$vgpr256 = V_MOV_B32_e32 $vgpr257, implicit $exec
; size=16, offset=0, hwreg=MODE: simm16 = 0x7801 = 30721
; imm32 = 0x5ABC = 23228 (bits 12:19 = 5 = MODE register mode for vgpr256/257)
S_SETREG_IMM32_B32 23228, 30721, implicit-def $mode, implicit $mode
S_ENDPGM 0
...
---
# Case 3: Size > 12 (size=16), imm32[12:19] doesn't match VGPR MSBs
# vgpr256/257: S_SET_VGPR_MSB mode = 65, MODE register mode = 5
# imm32 = 0x23ABC = 146108 (bits 12:19 = 0x23 != 5), must insert s_set_vgpr_msb after
name: setreg_size_gt_12_mismatch
tracksRegLiveness: true
body: |
bb.0:
; CHECK-LABEL: name: setreg_size_gt_12_mismatch
; CHECK: S_SET_VGPR_MSB 65, implicit-def $mode
; CHECK-NEXT: $vgpr256 = V_MOV_B32_e32 $vgpr257, implicit $exec
; CHECK-NEXT: S_SETREG_IMM32_B32 146108, 30721, implicit-def $mode, implicit $mode
; CHECK-NEXT: S_SET_VGPR_MSB 65, implicit-def $mode
; CHECK-NEXT: S_ENDPGM 0
$vgpr256 = V_MOV_B32_e32 $vgpr257, implicit $exec
; size=16, offset=0, hwreg=MODE: simm16 = 0x7801 = 30721
; imm32 = 0x23ABC = 146108 (bits 12:19 = 0x23 != 5)
S_SETREG_IMM32_B32 146108, 30721, implicit-def $mode, implicit $mode
S_ENDPGM 0
...
---
# Case 4: Non-MODE hwreg should not be modified
# This uses ID_STATUS=2 instead of ID_MODE=1
# vgpr256/257: S_SET_VGPR_MSB mode = 65
name: setreg_non_mode_hwreg
tracksRegLiveness: true
body: |
bb.0:
; CHECK-LABEL: name: setreg_non_mode_hwreg
; CHECK: S_SET_VGPR_MSB 65, implicit-def $mode
; CHECK-NEXT: $vgpr256 = V_MOV_B32_e32 $vgpr257, implicit $exec
; CHECK-NEXT: S_SETREG_IMM32_B32 5, 6146, implicit-def $mode, implicit $mode
; CHECK-NEXT: S_ENDPGM 0
$vgpr256 = V_MOV_B32_e32 $vgpr257, implicit $exec
; size=4, offset=0, hwreg=STATUS(2): simm16 = 2 | (0 << 6) | (3 << 11) = 0x1802 = 6146
S_SETREG_IMM32_B32 5, 6146, implicit-def $mode, implicit $mode
S_ENDPGM 0
...
---
# Case 5: Size <= 12 but VGPR MSBs already present (no change needed)
# vgpr256/257: S_SET_VGPR_MSB mode = 65, MODE register mode = 5
# imm32 = 0x5005 = 20485 (bits 12:19 = 5 = MODE register mode)
name: setreg_size_lt_12_already_correct
tracksRegLiveness: true
body: |
bb.0:
; CHECK-LABEL: name: setreg_size_lt_12_already_correct
; CHECK: S_SET_VGPR_MSB 65, implicit-def $mode
; CHECK-NEXT: $vgpr256 = V_MOV_B32_e32 $vgpr257, implicit $exec
; CHECK-NEXT: S_SETREG_IMM32_B32 20485, 6145, implicit-def $mode, implicit $mode
; CHECK-NEXT: S_ENDPGM 0
$vgpr256 = V_MOV_B32_e32 $vgpr257, implicit $exec
; size=4, offset=0, hwreg=MODE: simm16 = 0x1801 = 6145
; imm32 = 0x5005 = 20485 (bits 12:19 = 5 = MODE register mode)
S_SETREG_IMM32_B32 20485, 6145, implicit-def $mode, implicit $mode
S_ENDPGM 0
...
---
# Case 6: Different VGPR MSB value (using different high VGPRs)
# vgpr512/513 (both MSB=2): S_SET_VGPR_MSB mode = (2 << 0) | (2 << 6) = 130
# MODE register mode = (2 << 0) | (2 << 2) = 10
# New setreg imm = 0x5 | (10 << 12) = 0xA005 = 40965
name: setreg_different_vgpr_msb
tracksRegLiveness: true
body: |
bb.0:
; CHECK-LABEL: name: setreg_different_vgpr_msb
; CHECK: S_SET_VGPR_MSB 130, implicit-def $mode
; CHECK-NEXT: $vgpr512 = V_MOV_B32_e32 $vgpr513, implicit $exec
; CHECK-NEXT: S_SETREG_IMM32_B32 40965, 6145, implicit-def $mode, implicit $mode
; CHECK-NEXT: S_ENDPGM 0
$vgpr512 = V_MOV_B32_e32 $vgpr513, implicit $exec
; size=4, offset=0, hwreg=MODE: simm16 = 0x1801 = 6145
S_SETREG_IMM32_B32 5, 6145, implicit-def $mode, implicit $mode
S_ENDPGM 0
...
---
# Case 7: Piggybacking successfully updates s_setreg_imm32_b32 (Size <= 12)
# First VGPR (V_MOV vgpr256, vgpr257): S_SET_VGPR_MSB mode = 65, MODE register mode = 5
# Second VGPR (V_ADD_U32 vgpr256, vgpr257, vgpr512):
# S_SET_VGPR_MSB mode = (1 << 0) | (2 << 2) | (1 << 6) = 73 (src0=1, src1=2, dst=1)
# MODE register mode = (1 << 0) | (1 << 2) | (2 << 4) = 37 (dst=1, src0=1, src1=2)
# Piggybacking updates setreg imm32[12:19] from 5 to 37.
# Final setreg imm = 5 | (37 << 12) = 151557
name: setreg_size_le_12_piggyback_superset
tracksRegLiveness: true
body: |
bb.0:
; CHECK-LABEL: name: setreg_size_le_12_piggyback_superset
; CHECK: S_SET_VGPR_MSB 65, implicit-def $mode
; CHECK-NEXT: $vgpr256 = V_MOV_B32_e32 $vgpr257, implicit $exec
; CHECK-NEXT: S_SETREG_IMM32_B32 151557, 6145, implicit-def $mode, implicit $mode
; CHECK-NEXT: $vgpr256 = V_ADD_U32_e32 $vgpr257, $vgpr512, implicit $exec
; CHECK-NEXT: S_ENDPGM 0
$vgpr256 = V_MOV_B32_e32 $vgpr257, implicit $exec
; size=4, offset=0, hwreg=MODE: simm16 = 0x1801 = 6145
S_SETREG_IMM32_B32 5, 6145, implicit-def $mode, implicit $mode
; Second instruction uses same dst/src0 (MSB=1) but adds src1 (MSB=2)
$vgpr256 = V_ADD_U32_e32 $vgpr257, $vgpr512, implicit $exec
S_ENDPGM 0
...
---
# Case 8: s_setreg_imm32_b32 (Size <= 12) followed by VGPR with different mode bits
# First VGPR (V_MOV vgpr256, vgpr0): S_SET_VGPR_MSB mode = 64, MODE register mode = 1
# Second VGPR (V_MOV vgpr256, vgpr256): S_SET_VGPR_MSB mode = 65, MODE register mode = 5
# Setreg gets MODE mode = 1 from first VGPR. Second VGPR needs different src0 bits,
# so a new S_SET_VGPR_MSB is inserted. The new S_SET_VGPR_MSB has mode = 65 | (64 << 8) = 16449.
name: setreg_size_le_12_then_different_vgpr
tracksRegLiveness: true
body: |
bb.0:
liveins: $vgpr0
; CHECK-LABEL: name: setreg_size_le_12_then_different_vgpr
; CHECK: liveins: $vgpr0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: S_SET_VGPR_MSB 64, implicit-def $mode
; CHECK-NEXT: $vgpr256 = V_MOV_B32_e32 $vgpr0, implicit $exec
; CHECK-NEXT: S_SETREG_IMM32_B32 4101, 6145, implicit-def $mode, implicit $mode
; CHECK-NEXT: S_SET_VGPR_MSB 16449, implicit-def $mode
; CHECK-NEXT: $vgpr256 = V_MOV_B32_e32 $vgpr256, implicit $exec
; CHECK-NEXT: S_ENDPGM 0
$vgpr256 = V_MOV_B32_e32 $vgpr0, implicit $exec
; size=4, offset=0, hwreg=MODE: simm16 = 0x1801 = 6145
S_SETREG_IMM32_B32 5, 6145, implicit-def $mode, implicit $mode
$vgpr256 = V_MOV_B32_e32 $vgpr256, implicit $exec
S_ENDPGM 0
...
---
# Case 9: After s_setreg_imm32_b32 (Size > 12, matching), new s_set_vgpr_msb needed
# First VGPR (vgpr256/257): S_SET_VGPR_MSB mode = 65, MODE register mode = 5
# Setreg has size=16 with imm32[12:19]=5 (matches MODE register mode).
# handleSetregMode sets MostRecentModeSet = nullptr (can't piggyback on Size > 12).
# Second VGPR (vgpr512/513): S_SET_VGPR_MSB mode = 130, MODE register mode = 10
# Since MostRecentModeSet = nullptr, a new s_set_vgpr_msb is inserted.
# New s_set_vgpr_msb imm = NewMode | (OldMode << 8) = 130 | (65 << 8) = 16770
name: setreg_size_gt_12_match_then_different_vgpr
tracksRegLiveness: true
body: |
bb.0:
; CHECK-LABEL: name: setreg_size_gt_12_match_then_different_vgpr
; CHECK: S_SET_VGPR_MSB 65, implicit-def $mode
; CHECK-NEXT: $vgpr256 = V_MOV_B32_e32 $vgpr257, implicit $exec
; CHECK-NEXT: S_SETREG_IMM32_B32 23228, 30721, implicit-def $mode, implicit $mode
; CHECK-NEXT: S_SET_VGPR_MSB 16770, implicit-def $mode
; CHECK-NEXT: $vgpr512 = V_MOV_B32_e32 $vgpr513, implicit $exec
; CHECK-NEXT: S_ENDPGM 0
$vgpr256 = V_MOV_B32_e32 $vgpr257, implicit $exec
; size=16, offset=0, hwreg=MODE: simm16 = 0x7801 = 30721
; imm32 = 0x5ABC = 23228 (bits 12:19 = 5 = MODE register mode, matches!)
S_SETREG_IMM32_B32 23228, 30721, implicit-def $mode, implicit $mode
$vgpr512 = V_MOV_B32_e32 $vgpr513, implicit $exec
S_ENDPGM 0
...