[AMDGPU][NewPM] Port SIOptimizeExecMaskingPreRA to NPM (#125351)
This commit is contained in:
parent
505d35aad3
commit
9855d761f3
@ -368,7 +368,7 @@ struct AMDGPUUnifyMetadataPass : PassInfoMixin<AMDGPUUnifyMetadataPass> {
|
|||||||
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
|
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
|
||||||
};
|
};
|
||||||
|
|
||||||
void initializeSIOptimizeExecMaskingPreRAPass(PassRegistry&);
|
void initializeSIOptimizeExecMaskingPreRALegacyPass(PassRegistry &);
|
||||||
extern char &SIOptimizeExecMaskingPreRAID;
|
extern char &SIOptimizeExecMaskingPreRAID;
|
||||||
|
|
||||||
void initializeSIOptimizeVGPRLiveRangeLegacyPass(PassRegistry &);
|
void initializeSIOptimizeVGPRLiveRangeLegacyPass(PassRegistry &);
|
||||||
|
@ -113,6 +113,7 @@ MACHINE_FUNCTION_PASS("si-lower-sgpr-spills", SILowerSGPRSpillsPass())
|
|||||||
MACHINE_FUNCTION_PASS("si-lower-wwm-copies", SILowerWWMCopiesPass())
|
MACHINE_FUNCTION_PASS("si-lower-wwm-copies", SILowerWWMCopiesPass())
|
||||||
MACHINE_FUNCTION_PASS("si-opt-vgpr-liverange", SIOptimizeVGPRLiveRangePass())
|
MACHINE_FUNCTION_PASS("si-opt-vgpr-liverange", SIOptimizeVGPRLiveRangePass())
|
||||||
MACHINE_FUNCTION_PASS("si-optimize-exec-masking", SIOptimizeExecMaskingPass())
|
MACHINE_FUNCTION_PASS("si-optimize-exec-masking", SIOptimizeExecMaskingPass())
|
||||||
|
MACHINE_FUNCTION_PASS("si-optimize-exec-masking-pre-ra", SIOptimizeExecMaskingPreRAPass())
|
||||||
MACHINE_FUNCTION_PASS("si-peephole-sdwa", SIPeepholeSDWAPass())
|
MACHINE_FUNCTION_PASS("si-peephole-sdwa", SIPeepholeSDWAPass())
|
||||||
MACHINE_FUNCTION_PASS("si-pre-allocate-wwm-regs", SIPreAllocateWWMRegsPass())
|
MACHINE_FUNCTION_PASS("si-pre-allocate-wwm-regs", SIPreAllocateWWMRegsPass())
|
||||||
MACHINE_FUNCTION_PASS("si-shrink-instructions", SIShrinkInstructionsPass())
|
MACHINE_FUNCTION_PASS("si-shrink-instructions", SIShrinkInstructionsPass())
|
||||||
@ -130,7 +131,6 @@ DUMMY_MACHINE_FUNCTION_PASS("si-insert-waitcnts", SIInsertWaitcntsPass())
|
|||||||
DUMMY_MACHINE_FUNCTION_PASS("si-late-branch-lowering", SILateBranchLoweringPass())
|
DUMMY_MACHINE_FUNCTION_PASS("si-late-branch-lowering", SILateBranchLoweringPass())
|
||||||
DUMMY_MACHINE_FUNCTION_PASS("si-memory-legalizer", SIMemoryLegalizerPass())
|
DUMMY_MACHINE_FUNCTION_PASS("si-memory-legalizer", SIMemoryLegalizerPass())
|
||||||
DUMMY_MACHINE_FUNCTION_PASS("si-mode-register", SIModeRegisterPass())
|
DUMMY_MACHINE_FUNCTION_PASS("si-mode-register", SIModeRegisterPass())
|
||||||
DUMMY_MACHINE_FUNCTION_PASS("si-optimize-exec-masking-pre-ra", SIOptimizeExecMaskingPreRAPass())
|
|
||||||
DUMMY_MACHINE_FUNCTION_PASS("si-pre-emit-peephole", SIPreEmitPeepholePass())
|
DUMMY_MACHINE_FUNCTION_PASS("si-pre-emit-peephole", SIPreEmitPeepholePass())
|
||||||
// TODO: Move amdgpu-preload-kern-arg-prolog to MACHINE_FUNCTION_PASS since it
|
// TODO: Move amdgpu-preload-kern-arg-prolog to MACHINE_FUNCTION_PASS since it
|
||||||
// already exists.
|
// already exists.
|
||||||
|
@ -51,6 +51,7 @@
|
|||||||
#include "SIMachineFunctionInfo.h"
|
#include "SIMachineFunctionInfo.h"
|
||||||
#include "SIMachineScheduler.h"
|
#include "SIMachineScheduler.h"
|
||||||
#include "SIOptimizeExecMasking.h"
|
#include "SIOptimizeExecMasking.h"
|
||||||
|
#include "SIOptimizeExecMaskingPreRA.h"
|
||||||
#include "SIOptimizeVGPRLiveRange.h"
|
#include "SIOptimizeVGPRLiveRange.h"
|
||||||
#include "SIPeepholeSDWA.h"
|
#include "SIPeepholeSDWA.h"
|
||||||
#include "SIPreAllocateWWMRegs.h"
|
#include "SIPreAllocateWWMRegs.h"
|
||||||
@ -501,7 +502,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
|
|||||||
initializeSIFoldOperandsLegacyPass(*PR);
|
initializeSIFoldOperandsLegacyPass(*PR);
|
||||||
initializeSIPeepholeSDWALegacyPass(*PR);
|
initializeSIPeepholeSDWALegacyPass(*PR);
|
||||||
initializeSIShrinkInstructionsLegacyPass(*PR);
|
initializeSIShrinkInstructionsLegacyPass(*PR);
|
||||||
initializeSIOptimizeExecMaskingPreRAPass(*PR);
|
initializeSIOptimizeExecMaskingPreRALegacyPass(*PR);
|
||||||
initializeSIOptimizeVGPRLiveRangeLegacyPass(*PR);
|
initializeSIOptimizeVGPRLiveRangeLegacyPass(*PR);
|
||||||
initializeSILoadStoreOptimizerLegacyPass(*PR);
|
initializeSILoadStoreOptimizerLegacyPass(*PR);
|
||||||
initializeAMDGPUCtorDtorLoweringLegacyPass(*PR);
|
initializeAMDGPUCtorDtorLoweringLegacyPass(*PR);
|
||||||
|
@ -12,6 +12,7 @@
|
|||||||
///
|
///
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
#include "SIOptimizeExecMaskingPreRA.h"
|
||||||
#include "AMDGPU.h"
|
#include "AMDGPU.h"
|
||||||
#include "GCNSubtarget.h"
|
#include "GCNSubtarget.h"
|
||||||
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
|
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
|
||||||
@ -25,7 +26,7 @@ using namespace llvm;
|
|||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
class SIOptimizeExecMaskingPreRA : public MachineFunctionPass {
|
class SIOptimizeExecMaskingPreRA {
|
||||||
private:
|
private:
|
||||||
const SIRegisterInfo *TRI;
|
const SIRegisterInfo *TRI;
|
||||||
const SIInstrInfo *TII;
|
const SIInstrInfo *TII;
|
||||||
@ -42,11 +43,18 @@ private:
|
|||||||
bool optimizeVcndVcmpPair(MachineBasicBlock &MBB);
|
bool optimizeVcndVcmpPair(MachineBasicBlock &MBB);
|
||||||
bool optimizeElseBranch(MachineBasicBlock &MBB);
|
bool optimizeElseBranch(MachineBasicBlock &MBB);
|
||||||
|
|
||||||
|
public:
|
||||||
|
SIOptimizeExecMaskingPreRA(LiveIntervals *LIS) : LIS(LIS) {}
|
||||||
|
bool run(MachineFunction &MF);
|
||||||
|
};
|
||||||
|
|
||||||
|
class SIOptimizeExecMaskingPreRALegacy : public MachineFunctionPass {
|
||||||
public:
|
public:
|
||||||
static char ID;
|
static char ID;
|
||||||
|
|
||||||
SIOptimizeExecMaskingPreRA() : MachineFunctionPass(ID) {
|
SIOptimizeExecMaskingPreRALegacy() : MachineFunctionPass(ID) {
|
||||||
initializeSIOptimizeExecMaskingPreRAPass(*PassRegistry::getPassRegistry());
|
initializeSIOptimizeExecMaskingPreRALegacyPass(
|
||||||
|
*PassRegistry::getPassRegistry());
|
||||||
}
|
}
|
||||||
|
|
||||||
bool runOnMachineFunction(MachineFunction &MF) override;
|
bool runOnMachineFunction(MachineFunction &MF) override;
|
||||||
@ -64,18 +72,18 @@ public:
|
|||||||
|
|
||||||
} // End anonymous namespace.
|
} // End anonymous namespace.
|
||||||
|
|
||||||
INITIALIZE_PASS_BEGIN(SIOptimizeExecMaskingPreRA, DEBUG_TYPE,
|
INITIALIZE_PASS_BEGIN(SIOptimizeExecMaskingPreRALegacy, DEBUG_TYPE,
|
||||||
"SI optimize exec mask operations pre-RA", false, false)
|
"SI optimize exec mask operations pre-RA", false, false)
|
||||||
INITIALIZE_PASS_DEPENDENCY(LiveIntervalsWrapperPass)
|
INITIALIZE_PASS_DEPENDENCY(LiveIntervalsWrapperPass)
|
||||||
INITIALIZE_PASS_END(SIOptimizeExecMaskingPreRA, DEBUG_TYPE,
|
INITIALIZE_PASS_END(SIOptimizeExecMaskingPreRALegacy, DEBUG_TYPE,
|
||||||
"SI optimize exec mask operations pre-RA", false, false)
|
"SI optimize exec mask operations pre-RA", false, false)
|
||||||
|
|
||||||
char SIOptimizeExecMaskingPreRA::ID = 0;
|
char SIOptimizeExecMaskingPreRALegacy::ID = 0;
|
||||||
|
|
||||||
char &llvm::SIOptimizeExecMaskingPreRAID = SIOptimizeExecMaskingPreRA::ID;
|
char &llvm::SIOptimizeExecMaskingPreRAID = SIOptimizeExecMaskingPreRALegacy::ID;
|
||||||
|
|
||||||
FunctionPass *llvm::createSIOptimizeExecMaskingPreRAPass() {
|
FunctionPass *llvm::createSIOptimizeExecMaskingPreRAPass() {
|
||||||
return new SIOptimizeExecMaskingPreRA();
|
return new SIOptimizeExecMaskingPreRALegacy();
|
||||||
}
|
}
|
||||||
|
|
||||||
// See if there is a def between \p AndIdx and \p SelIdx that needs to live
|
// See if there is a def between \p AndIdx and \p SelIdx that needs to live
|
||||||
@ -340,15 +348,28 @@ bool SIOptimizeExecMaskingPreRA::optimizeElseBranch(MachineBasicBlock &MBB) {
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool SIOptimizeExecMaskingPreRA::runOnMachineFunction(MachineFunction &MF) {
|
PreservedAnalyses
|
||||||
|
SIOptimizeExecMaskingPreRAPass::run(MachineFunction &MF,
|
||||||
|
MachineFunctionAnalysisManager &MFAM) {
|
||||||
|
auto &LIS = MFAM.getResult<LiveIntervalsAnalysis>(MF);
|
||||||
|
SIOptimizeExecMaskingPreRA(&LIS).run(MF);
|
||||||
|
return PreservedAnalyses::all();
|
||||||
|
}
|
||||||
|
|
||||||
|
bool SIOptimizeExecMaskingPreRALegacy::runOnMachineFunction(
|
||||||
|
MachineFunction &MF) {
|
||||||
if (skipFunction(MF.getFunction()))
|
if (skipFunction(MF.getFunction()))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
|
auto *LIS = &getAnalysis<LiveIntervalsWrapperPass>().getLIS();
|
||||||
|
return SIOptimizeExecMaskingPreRA(LIS).run(MF);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool SIOptimizeExecMaskingPreRA::run(MachineFunction &MF) {
|
||||||
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
|
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
|
||||||
TRI = ST.getRegisterInfo();
|
TRI = ST.getRegisterInfo();
|
||||||
TII = ST.getInstrInfo();
|
TII = ST.getInstrInfo();
|
||||||
MRI = &MF.getRegInfo();
|
MRI = &MF.getRegInfo();
|
||||||
LIS = &getAnalysis<LiveIntervalsWrapperPass>().getLIS();
|
|
||||||
|
|
||||||
const bool Wave32 = ST.isWave32();
|
const bool Wave32 = ST.isWave32();
|
||||||
AndOpc = Wave32 ? AMDGPU::S_AND_B32 : AMDGPU::S_AND_B64;
|
AndOpc = Wave32 ? AMDGPU::S_AND_B32 : AMDGPU::S_AND_B64;
|
||||||
|
23
llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.h
Normal file
23
llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.h
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
//===- SIOptimizeExecMaskingPreRA.h.h ---------------------------*- C++ -*-===//
|
||||||
|
//
|
||||||
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||||
|
// See https://llvm.org/LICENSE.txt for license information.
|
||||||
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
#ifndef LLVM_LIB_TARGET_AMDGPU_SIOPTIMIZEEXECMASKINGPRERA_H
|
||||||
|
#define LLVM_LIB_TARGET_AMDGPU_SIOPTIMIZEEXECMASKINGPRERA_H
|
||||||
|
|
||||||
|
#include "llvm/CodeGen/MachinePassManager.h"
|
||||||
|
|
||||||
|
namespace llvm {
|
||||||
|
class SIOptimizeExecMaskingPreRAPass
|
||||||
|
: public PassInfoMixin<SIOptimizeExecMaskingPreRAPass> {
|
||||||
|
public:
|
||||||
|
PreservedAnalyses run(MachineFunction &MF,
|
||||||
|
MachineFunctionAnalysisManager &MFAM);
|
||||||
|
};
|
||||||
|
} // namespace llvm
|
||||||
|
|
||||||
|
#endif // LLVM_LIB_TARGET_AMDGPU_SIOPTIMIZEEXECMASKINGPRERA_H
|
@ -1,5 +1,6 @@
|
|||||||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||||
# RUN: llc -mtriple=amdgcn -verify-machineinstrs -run-pass=si-optimize-exec-masking-pre-ra %s -o - | FileCheck -check-prefix=GXN %s
|
# RUN: llc -mtriple=amdgcn -verify-machineinstrs -run-pass=si-optimize-exec-masking-pre-ra %s -o - | FileCheck -check-prefix=GXN %s
|
||||||
|
# RUN: llc -mtriple=amdgcn -verify-machineinstrs -passes=si-optimize-exec-masking-pre-ra %s -o - | FileCheck -check-prefix=GXN %s
|
||||||
|
|
||||||
# FIXME: This is a miscompile, and the s_or_b64s need to be preserved.
|
# FIXME: This is a miscompile, and the s_or_b64s need to be preserved.
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user