[NFCI][AMDGPU] Use X-macro to reduce boilerplate in GCNSubtarget.h (#176844)
`GCNSubtarget.h` contained a large amount of repetitive code following
the pattern `bool HasXXX = false;` for member declarations and `bool
hasXXX() const { return HasXXX; }` for getters. This boilerplate made
the file unnecessarily long and harder to maintain.
This patch introduces an X-macro pattern `GCN_SUBTARGET_HAS_FEATURE`
that consolidates 135 simple subtarget features into a single list. The
macro is expanded twice: once in the protected section to generate
member variable declarations, and once in the public section to generate
the corresponding getter methods. This reduces the file by approximately
600 lines while preserving the exact same API and functionality.
Features with complex getter logic or inconsistent naming conventions
are left as manual implementations for future improvement.
Ideally, these could be generated by TableGen using
`GET_SUBTARGETINFO_MACRO`, similar to the X86 backend. However,
`AMDGPU.td` has several issues that prevent direct adoption: duplicate
field names (e.g., `DumpCode` is set by both `FeatureDumpCode` and
`FeatureDumpCodeLower`), and inconsistent naming conventions where many
features don't have the `Has` prefix (e.g., `FlatAddressSpace`,
`GFX10Insts`, `FP64`). Fixing these issues would require renaming fields
in `AMDGPU.td` and updating all references, which is left for future
work.
This commit is contained in:
parent
f3aa84c810
commit
1843a7fe9f
File diff suppressed because it is too large
Load Diff
@ -773,7 +773,7 @@ void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF,
|
||||
ScratchRsrcReg, ScratchWaveOffsetReg);
|
||||
}
|
||||
|
||||
if (ST.hasWaitXCnt()) {
|
||||
if (ST.hasWaitXcnt()) {
|
||||
// Set REPLAY_MODE (bit 25) in MODE register to enable multi-group XNACK
|
||||
// replay. This aligns hardware behavior with the compiler's s_wait_xcnt
|
||||
// insertion logic, which assumes multi-group mode by default.
|
||||
|
||||
@ -18660,12 +18660,12 @@ static bool globalMemoryFPAtomicIsLegal(const GCNSubtarget &Subtarget,
|
||||
// With AgentScopeFineGrainedRemoteMemoryAtomics, system scoped device local
|
||||
// allocations work.
|
||||
if (HasSystemScope) {
|
||||
if (Subtarget.supportsAgentScopeFineGrainedRemoteMemoryAtomics() &&
|
||||
if (Subtarget.hasAgentScopeFineGrainedRemoteMemoryAtomics() &&
|
||||
RMW->hasMetadata("amdgpu.no.remote.memory"))
|
||||
return true;
|
||||
if (Subtarget.hasEmulatedSystemScopeAtomics())
|
||||
return true;
|
||||
} else if (Subtarget.supportsAgentScopeFineGrainedRemoteMemoryAtomics())
|
||||
} else if (Subtarget.hasAgentScopeFineGrainedRemoteMemoryAtomics())
|
||||
return true;
|
||||
|
||||
return RMW->hasMetadata("amdgpu.no.fine.grained.memory");
|
||||
@ -18775,7 +18775,7 @@ SITargetLowering::shouldExpandAtomicRMWInIR(const AtomicRMWInst *RMW) const {
|
||||
// If fine-grained remote memory works at device scope, we don't need to
|
||||
// do anything.
|
||||
if (!HasSystemScope &&
|
||||
Subtarget->supportsAgentScopeFineGrainedRemoteMemoryAtomics())
|
||||
Subtarget->hasAgentScopeFineGrainedRemoteMemoryAtomics())
|
||||
return atomicSupportedIfLegalIntType(RMW);
|
||||
|
||||
// If we are targeting a remote allocated address, it depends what kind of
|
||||
|
||||
@ -2407,7 +2407,7 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(
|
||||
ScoreBrackets.determineWaitForPhysReg(SmemAccessCounter, Reg, Wait);
|
||||
}
|
||||
|
||||
if (ST->hasWaitXCnt() && Op.isDef())
|
||||
if (ST->hasWaitXcnt() && Op.isDef())
|
||||
ScoreBrackets.determineWaitForPhysReg(X_CNT, Reg, Wait);
|
||||
}
|
||||
}
|
||||
@ -2744,7 +2744,7 @@ void SIInsertWaitcnts::updateEventWaitcntAfter(MachineInstr &Inst,
|
||||
}
|
||||
}
|
||||
|
||||
if (!ST->hasWaitXCnt())
|
||||
if (!ST->hasWaitXcnt())
|
||||
return;
|
||||
|
||||
if (IsVMEMAccess)
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user