[AMDGPU][NewPM] Port SIOptimizeExecMaskingPreRA to NPM (#125351)

This commit is contained in:
Akshat Oke 2025-02-20 17:35:56 +05:30 committed by GitHub
parent 505d35aad3
commit 9855d761f3
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 59 additions and 13 deletions

View File

@ -368,7 +368,7 @@ struct AMDGPUUnifyMetadataPass : PassInfoMixin<AMDGPUUnifyMetadataPass> {
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
}; };
void initializeSIOptimizeExecMaskingPreRAPass(PassRegistry&); void initializeSIOptimizeExecMaskingPreRALegacyPass(PassRegistry &);
extern char &SIOptimizeExecMaskingPreRAID; extern char &SIOptimizeExecMaskingPreRAID;
void initializeSIOptimizeVGPRLiveRangeLegacyPass(PassRegistry &); void initializeSIOptimizeVGPRLiveRangeLegacyPass(PassRegistry &);

View File

@ -113,6 +113,7 @@ MACHINE_FUNCTION_PASS("si-lower-sgpr-spills", SILowerSGPRSpillsPass())
MACHINE_FUNCTION_PASS("si-lower-wwm-copies", SILowerWWMCopiesPass()) MACHINE_FUNCTION_PASS("si-lower-wwm-copies", SILowerWWMCopiesPass())
MACHINE_FUNCTION_PASS("si-opt-vgpr-liverange", SIOptimizeVGPRLiveRangePass()) MACHINE_FUNCTION_PASS("si-opt-vgpr-liverange", SIOptimizeVGPRLiveRangePass())
MACHINE_FUNCTION_PASS("si-optimize-exec-masking", SIOptimizeExecMaskingPass()) MACHINE_FUNCTION_PASS("si-optimize-exec-masking", SIOptimizeExecMaskingPass())
MACHINE_FUNCTION_PASS("si-optimize-exec-masking-pre-ra", SIOptimizeExecMaskingPreRAPass())
MACHINE_FUNCTION_PASS("si-peephole-sdwa", SIPeepholeSDWAPass()) MACHINE_FUNCTION_PASS("si-peephole-sdwa", SIPeepholeSDWAPass())
MACHINE_FUNCTION_PASS("si-pre-allocate-wwm-regs", SIPreAllocateWWMRegsPass()) MACHINE_FUNCTION_PASS("si-pre-allocate-wwm-regs", SIPreAllocateWWMRegsPass())
MACHINE_FUNCTION_PASS("si-shrink-instructions", SIShrinkInstructionsPass()) MACHINE_FUNCTION_PASS("si-shrink-instructions", SIShrinkInstructionsPass())
@ -130,7 +131,6 @@ DUMMY_MACHINE_FUNCTION_PASS("si-insert-waitcnts", SIInsertWaitcntsPass())
DUMMY_MACHINE_FUNCTION_PASS("si-late-branch-lowering", SILateBranchLoweringPass()) DUMMY_MACHINE_FUNCTION_PASS("si-late-branch-lowering", SILateBranchLoweringPass())
DUMMY_MACHINE_FUNCTION_PASS("si-memory-legalizer", SIMemoryLegalizerPass()) DUMMY_MACHINE_FUNCTION_PASS("si-memory-legalizer", SIMemoryLegalizerPass())
DUMMY_MACHINE_FUNCTION_PASS("si-mode-register", SIModeRegisterPass()) DUMMY_MACHINE_FUNCTION_PASS("si-mode-register", SIModeRegisterPass())
DUMMY_MACHINE_FUNCTION_PASS("si-optimize-exec-masking-pre-ra", SIOptimizeExecMaskingPreRAPass())
DUMMY_MACHINE_FUNCTION_PASS("si-pre-emit-peephole", SIPreEmitPeepholePass()) DUMMY_MACHINE_FUNCTION_PASS("si-pre-emit-peephole", SIPreEmitPeepholePass())
// TODO: Move amdgpu-preload-kern-arg-prolog to MACHINE_FUNCTION_PASS since it // TODO: Move amdgpu-preload-kern-arg-prolog to MACHINE_FUNCTION_PASS since it
// already exists. // already exists.

View File

@ -51,6 +51,7 @@
#include "SIMachineFunctionInfo.h" #include "SIMachineFunctionInfo.h"
#include "SIMachineScheduler.h" #include "SIMachineScheduler.h"
#include "SIOptimizeExecMasking.h" #include "SIOptimizeExecMasking.h"
#include "SIOptimizeExecMaskingPreRA.h"
#include "SIOptimizeVGPRLiveRange.h" #include "SIOptimizeVGPRLiveRange.h"
#include "SIPeepholeSDWA.h" #include "SIPeepholeSDWA.h"
#include "SIPreAllocateWWMRegs.h" #include "SIPreAllocateWWMRegs.h"
@ -501,7 +502,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
initializeSIFoldOperandsLegacyPass(*PR); initializeSIFoldOperandsLegacyPass(*PR);
initializeSIPeepholeSDWALegacyPass(*PR); initializeSIPeepholeSDWALegacyPass(*PR);
initializeSIShrinkInstructionsLegacyPass(*PR); initializeSIShrinkInstructionsLegacyPass(*PR);
initializeSIOptimizeExecMaskingPreRAPass(*PR); initializeSIOptimizeExecMaskingPreRALegacyPass(*PR);
initializeSIOptimizeVGPRLiveRangeLegacyPass(*PR); initializeSIOptimizeVGPRLiveRangeLegacyPass(*PR);
initializeSILoadStoreOptimizerLegacyPass(*PR); initializeSILoadStoreOptimizerLegacyPass(*PR);
initializeAMDGPUCtorDtorLoweringLegacyPass(*PR); initializeAMDGPUCtorDtorLoweringLegacyPass(*PR);

View File

@ -12,6 +12,7 @@
/// ///
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
#include "SIOptimizeExecMaskingPreRA.h"
#include "AMDGPU.h" #include "AMDGPU.h"
#include "GCNSubtarget.h" #include "GCNSubtarget.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
@ -25,7 +26,7 @@ using namespace llvm;
namespace { namespace {
class SIOptimizeExecMaskingPreRA : public MachineFunctionPass { class SIOptimizeExecMaskingPreRA {
private: private:
const SIRegisterInfo *TRI; const SIRegisterInfo *TRI;
const SIInstrInfo *TII; const SIInstrInfo *TII;
@ -42,11 +43,18 @@ private:
bool optimizeVcndVcmpPair(MachineBasicBlock &MBB); bool optimizeVcndVcmpPair(MachineBasicBlock &MBB);
bool optimizeElseBranch(MachineBasicBlock &MBB); bool optimizeElseBranch(MachineBasicBlock &MBB);
public:
SIOptimizeExecMaskingPreRA(LiveIntervals *LIS) : LIS(LIS) {}
bool run(MachineFunction &MF);
};
class SIOptimizeExecMaskingPreRALegacy : public MachineFunctionPass {
public: public:
static char ID; static char ID;
SIOptimizeExecMaskingPreRA() : MachineFunctionPass(ID) { SIOptimizeExecMaskingPreRALegacy() : MachineFunctionPass(ID) {
initializeSIOptimizeExecMaskingPreRAPass(*PassRegistry::getPassRegistry()); initializeSIOptimizeExecMaskingPreRALegacyPass(
*PassRegistry::getPassRegistry());
} }
bool runOnMachineFunction(MachineFunction &MF) override; bool runOnMachineFunction(MachineFunction &MF) override;
@ -64,18 +72,18 @@ public:
} // End anonymous namespace. } // End anonymous namespace.
INITIALIZE_PASS_BEGIN(SIOptimizeExecMaskingPreRA, DEBUG_TYPE, INITIALIZE_PASS_BEGIN(SIOptimizeExecMaskingPreRALegacy, DEBUG_TYPE,
"SI optimize exec mask operations pre-RA", false, false) "SI optimize exec mask operations pre-RA", false, false)
INITIALIZE_PASS_DEPENDENCY(LiveIntervalsWrapperPass) INITIALIZE_PASS_DEPENDENCY(LiveIntervalsWrapperPass)
INITIALIZE_PASS_END(SIOptimizeExecMaskingPreRA, DEBUG_TYPE, INITIALIZE_PASS_END(SIOptimizeExecMaskingPreRALegacy, DEBUG_TYPE,
"SI optimize exec mask operations pre-RA", false, false) "SI optimize exec mask operations pre-RA", false, false)
char SIOptimizeExecMaskingPreRA::ID = 0; char SIOptimizeExecMaskingPreRALegacy::ID = 0;
char &llvm::SIOptimizeExecMaskingPreRAID = SIOptimizeExecMaskingPreRA::ID; char &llvm::SIOptimizeExecMaskingPreRAID = SIOptimizeExecMaskingPreRALegacy::ID;
FunctionPass *llvm::createSIOptimizeExecMaskingPreRAPass() { FunctionPass *llvm::createSIOptimizeExecMaskingPreRAPass() {
return new SIOptimizeExecMaskingPreRA(); return new SIOptimizeExecMaskingPreRALegacy();
} }
// See if there is a def between \p AndIdx and \p SelIdx that needs to live // See if there is a def between \p AndIdx and \p SelIdx that needs to live
@ -340,15 +348,28 @@ bool SIOptimizeExecMaskingPreRA::optimizeElseBranch(MachineBasicBlock &MBB) {
return true; return true;
} }
bool SIOptimizeExecMaskingPreRA::runOnMachineFunction(MachineFunction &MF) { PreservedAnalyses
SIOptimizeExecMaskingPreRAPass::run(MachineFunction &MF,
MachineFunctionAnalysisManager &MFAM) {
auto &LIS = MFAM.getResult<LiveIntervalsAnalysis>(MF);
SIOptimizeExecMaskingPreRA(&LIS).run(MF);
return PreservedAnalyses::all();
}
bool SIOptimizeExecMaskingPreRALegacy::runOnMachineFunction(
MachineFunction &MF) {
if (skipFunction(MF.getFunction())) if (skipFunction(MF.getFunction()))
return false; return false;
auto *LIS = &getAnalysis<LiveIntervalsWrapperPass>().getLIS();
return SIOptimizeExecMaskingPreRA(LIS).run(MF);
}
bool SIOptimizeExecMaskingPreRA::run(MachineFunction &MF) {
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
TRI = ST.getRegisterInfo(); TRI = ST.getRegisterInfo();
TII = ST.getInstrInfo(); TII = ST.getInstrInfo();
MRI = &MF.getRegInfo(); MRI = &MF.getRegInfo();
LIS = &getAnalysis<LiveIntervalsWrapperPass>().getLIS();
const bool Wave32 = ST.isWave32(); const bool Wave32 = ST.isWave32();
AndOpc = Wave32 ? AMDGPU::S_AND_B32 : AMDGPU::S_AND_B64; AndOpc = Wave32 ? AMDGPU::S_AND_B32 : AMDGPU::S_AND_B64;

View File

@ -0,0 +1,23 @@
//===- SIOptimizeExecMaskingPreRA.h.h ---------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_LIB_TARGET_AMDGPU_SIOPTIMIZEEXECMASKINGPRERA_H
#define LLVM_LIB_TARGET_AMDGPU_SIOPTIMIZEEXECMASKINGPRERA_H
#include "llvm/CodeGen/MachinePassManager.h"
namespace llvm {
class SIOptimizeExecMaskingPreRAPass
: public PassInfoMixin<SIOptimizeExecMaskingPreRAPass> {
public:
PreservedAnalyses run(MachineFunction &MF,
MachineFunctionAnalysisManager &MFAM);
};
} // namespace llvm
#endif // LLVM_LIB_TARGET_AMDGPU_SIOPTIMIZEEXECMASKINGPRERA_H

View File

@ -1,5 +1,6 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=amdgcn -verify-machineinstrs -run-pass=si-optimize-exec-masking-pre-ra %s -o - | FileCheck -check-prefix=GXN %s # RUN: llc -mtriple=amdgcn -verify-machineinstrs -run-pass=si-optimize-exec-masking-pre-ra %s -o - | FileCheck -check-prefix=GXN %s
# RUN: llc -mtriple=amdgcn -verify-machineinstrs -passes=si-optimize-exec-masking-pre-ra %s -o - | FileCheck -check-prefix=GXN %s
# FIXME: This is a miscompile, and the s_or_b64s need to be preserved. # FIXME: This is a miscompile, and the s_or_b64s need to be preserved.