[NFCI][AMDGPU] Use X-macro to reduce boilerplate in GCNSubtarget.h (#176844)

`GCNSubtarget.h` contained a large amount of repetitive code following the pattern `bool HasXXX = false;` for member declarations and `bool hasXXX() const { return HasXXX; }` for getters. This boilerplate made the file unnecessarily long and harder to maintain. This patch introduces an X-macro pattern `GCN_SUBTARGET_HAS_FEATURE` that consolidates 135 simple subtarget features into a single list. The macro is expanded twice: once in the protected section to generate member variable declarations, and once in the public section to generate the corresponding getter methods. This reduces the file by approximately 600 lines while preserving the exact same API and functionality. Features with complex getter logic or inconsistent naming conventions are left as manual implementations for future improvement. Ideally, these could be generated by TableGen using `GET_SUBTARGETINFO_MACRO`, similar to the X86 backend. However, `AMDGPU.td` has several issues that prevent direct adoption: duplicate field names (e.g., `DumpCode` is set by both `FeatureDumpCode` and `FeatureDumpCodeLower`), and inconsistent naming conventions where many features don't have the `Has` prefix (e.g., `FlatAddressSpace`, `GFX10Insts`, `FP64`). Fixing these issues would require renaming fields in `AMDGPU.td` and updating all references, which is left for future work.
2026-01-21 15:29:09 -05:00 · 2026-01-21 15:29:09 -05:00 · 1843a7fe9f
commit 1843a7fe9f
parent f3aa84c810
4 changed files with 262 additions and 822 deletions
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
--- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
@ -773,7 +773,7 @@ void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF,
                                         ScratchRsrcReg, ScratchWaveOffsetReg);
  }

-  if (ST.hasWaitXCnt()) {
+  if (ST.hasWaitXcnt()) {
    // Set REPLAY_MODE (bit 25) in MODE register to enable multi-group XNACK
    // replay. This aligns hardware behavior with the compiler's s_wait_xcnt
    // insertion logic, which assumes multi-group mode by default.
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@ -18660,12 +18660,12 @@ static bool globalMemoryFPAtomicIsLegal(const GCNSubtarget &Subtarget,
  // With AgentScopeFineGrainedRemoteMemoryAtomics, system scoped device local
  // allocations work.
  if (HasSystemScope) {
-    if (Subtarget.supportsAgentScopeFineGrainedRemoteMemoryAtomics() &&
+    if (Subtarget.hasAgentScopeFineGrainedRemoteMemoryAtomics() &&
        RMW->hasMetadata("amdgpu.no.remote.memory"))
      return true;
    if (Subtarget.hasEmulatedSystemScopeAtomics())
      return true;
-  } else if (Subtarget.supportsAgentScopeFineGrainedRemoteMemoryAtomics())
+  } else if (Subtarget.hasAgentScopeFineGrainedRemoteMemoryAtomics())
    return true;

  return RMW->hasMetadata("amdgpu.no.fine.grained.memory");
@ -18775,7 +18775,7 @@ SITargetLowering::shouldExpandAtomicRMWInIR(const AtomicRMWInst *RMW) const {
      // If fine-grained remote memory works at device scope, we don't need to
      // do anything.
      if (!HasSystemScope &&
-          Subtarget->supportsAgentScopeFineGrainedRemoteMemoryAtomics())
+          Subtarget->hasAgentScopeFineGrainedRemoteMemoryAtomics())
        return atomicSupportedIfLegalIntType(RMW);

      // If we are targeting a remote allocated address, it depends what kind of
--- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
@ -2407,7 +2407,7 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(
          ScoreBrackets.determineWaitForPhysReg(SmemAccessCounter, Reg, Wait);
        }

-        if (ST->hasWaitXCnt() && Op.isDef())
+        if (ST->hasWaitXcnt() && Op.isDef())
          ScoreBrackets.determineWaitForPhysReg(X_CNT, Reg, Wait);
      }
    }
@ -2744,7 +2744,7 @@ void SIInsertWaitcnts::updateEventWaitcntAfter(MachineInstr &Inst,
    }
  }

-  if (!ST->hasWaitXCnt())
+  if (!ST->hasWaitXcnt())
    return;

  if (IsVMEMAccess)