[AMDGPU] Fix unwanted LICM/CSE of llvm.amdgcn.pops.exiting.wave.id (#96190)
Mark both the intrinsic and the selected MachineInstr as having side effects to prevent MachineLICM and MachineCSE from moving/removing them.
This commit is contained in:
parent
0ae23708ef
commit
bf536cc7db
@ -2345,10 +2345,9 @@ class AMDGPUGlobalLoadLDS :
|
||||
"", [SDNPMemOperand]>;
|
||||
def int_amdgcn_global_load_lds : AMDGPUGlobalLoadLDS;
|
||||
|
||||
// Use read/write of inaccessible memory to model the fact that this reads a
|
||||
// volatile value.
|
||||
// This is IntrHasSideEffects because it reads from a volatile hardware register.
|
||||
def int_amdgcn_pops_exiting_wave_id :
|
||||
DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrInaccessibleMemOnly]>;
|
||||
DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrHasSideEffects]>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// GFX10 Intrinsics
|
||||
|
||||
@ -2583,15 +2583,6 @@ void AMDGPUDAGToDAGISel::SelectDSBvhStackIntrinsic(SDNode *N) {
|
||||
CurDAG->setNodeMemRefs(cast<MachineSDNode>(Selected), {MMO});
|
||||
}
|
||||
|
||||
void AMDGPUDAGToDAGISel::SelectPOPSExitingWaveID(SDNode *N) {
|
||||
// TODO: Select this with a tablegen pattern. This is tricky because the
|
||||
// intrinsic is IntrReadMem/IntrWriteMem but the instruction is not marked
|
||||
// mayLoad/mayStore and tablegen complains about the mismatch.
|
||||
SDValue Reg = CurDAG->getRegister(AMDGPU::SRC_POPS_EXITING_WAVE_ID, MVT::i32);
|
||||
SDValue Chain = N->getOperand(0);
|
||||
CurDAG->SelectNodeTo(N, AMDGPU::S_MOV_B32, N->getVTList(), {Reg, Chain});
|
||||
}
|
||||
|
||||
static unsigned gwsIntrinToOpcode(unsigned IntrID) {
|
||||
switch (IntrID) {
|
||||
case Intrinsic::amdgcn_ds_gws_init:
|
||||
@ -2748,9 +2739,6 @@ void AMDGPUDAGToDAGISel::SelectINTRINSIC_W_CHAIN(SDNode *N) {
|
||||
case Intrinsic::amdgcn_ds_bvh_stack_rtn:
|
||||
SelectDSBvhStackIntrinsic(N);
|
||||
return;
|
||||
case Intrinsic::amdgcn_pops_exiting_wave_id:
|
||||
SelectPOPSExitingWaveID(N);
|
||||
return;
|
||||
}
|
||||
|
||||
SelectCode(N);
|
||||
|
||||
@ -269,7 +269,6 @@ private:
|
||||
void SelectFP_EXTEND(SDNode *N);
|
||||
void SelectDSAppendConsume(SDNode *N, unsigned IntrID);
|
||||
void SelectDSBvhStackIntrinsic(SDNode *N);
|
||||
void SelectPOPSExitingWaveID(SDNode *N);
|
||||
void SelectDS_GWS(SDNode *N, unsigned IntrID);
|
||||
void SelectInterpP1F16(SDNode *N);
|
||||
void SelectINTRINSIC_W_CHAIN(SDNode *N);
|
||||
|
||||
@ -2079,21 +2079,6 @@ bool AMDGPUInstructionSelector::selectDSBvhStackIntrinsic(
|
||||
return constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
|
||||
}
|
||||
|
||||
bool AMDGPUInstructionSelector::selectPOPSExitingWaveID(
|
||||
MachineInstr &MI) const {
|
||||
Register Dst = MI.getOperand(0).getReg();
|
||||
const DebugLoc &DL = MI.getDebugLoc();
|
||||
MachineBasicBlock *MBB = MI.getParent();
|
||||
|
||||
// TODO: Select this with a tablegen pattern. This is tricky because the
|
||||
// intrinsic is IntrReadMem/IntrWriteMem but the instruction is not marked
|
||||
// mayLoad/mayStore and tablegen complains about the mismatch.
|
||||
auto MIB = BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::S_MOV_B32), Dst)
|
||||
.addReg(AMDGPU::SRC_POPS_EXITING_WAVE_ID);
|
||||
MI.eraseFromParent();
|
||||
return constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
|
||||
}
|
||||
|
||||
bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
|
||||
MachineInstr &I) const {
|
||||
Intrinsic::ID IntrinsicID = cast<GIntrinsic>(I).getIntrinsicID();
|
||||
@ -2144,8 +2129,6 @@ bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
|
||||
return selectSBarrierSignalIsfirst(I, IntrinsicID);
|
||||
case Intrinsic::amdgcn_s_barrier_leave:
|
||||
return selectSBarrierLeave(I);
|
||||
case Intrinsic::amdgcn_pops_exiting_wave_id:
|
||||
return selectPOPSExitingWaveID(I);
|
||||
}
|
||||
return selectImpl(I, *CoverageInfo);
|
||||
}
|
||||
|
||||
@ -125,7 +125,6 @@ private:
|
||||
bool selectDSAppendConsume(MachineInstr &MI, bool IsAppend) const;
|
||||
bool selectSBarrier(MachineInstr &MI) const;
|
||||
bool selectDSBvhStackIntrinsic(MachineInstr &MI) const;
|
||||
bool selectPOPSExitingWaveID(MachineInstr &MI) const;
|
||||
|
||||
bool selectImageIntrinsic(MachineInstr &MI,
|
||||
const AMDGPU::ImageDimIntrinsicInfo *Intr) const;
|
||||
|
||||
@ -215,6 +215,11 @@ let isMoveImm = 1 in {
|
||||
} // End Uses = [SCC]
|
||||
} // End isMoveImm = 1
|
||||
|
||||
// Variant of S_MOV_B32 used for reading from volatile registers like
|
||||
// SRC_POPS_EXITING_WAVE_ID.
|
||||
let hasSideEffects = 1 in
|
||||
def S_MOV_B32_sideeffects : SOP1_32 <"s_mov_b32">;
|
||||
|
||||
let Defs = [SCC] in {
|
||||
def S_NOT_B32 : SOP1_32 <"s_not_b32",
|
||||
[(set i32:$sdst, (UniformUnaryFrag<not> i32:$src0))]
|
||||
@ -1880,6 +1885,12 @@ let SubtargetPredicate = isNotGFX9Plus in {
|
||||
def : GetFPModePat<fpmode_mask_gfx6plus>;
|
||||
}
|
||||
|
||||
let SubtargetPredicate = isGFX9GFX10 in
|
||||
def : GCNPat<
|
||||
(int_amdgcn_pops_exiting_wave_id),
|
||||
(S_MOV_B32_sideeffects (i32 SRC_POPS_EXITING_WAVE_ID))
|
||||
>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// SOP2 Patterns
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
@ -36,9 +36,9 @@ define amdgpu_ps void @test(ptr addrspace(1) inreg %ptr) {
|
||||
define amdgpu_ps void @test_loop() {
|
||||
; SDAG-LABEL: test_loop:
|
||||
; SDAG: ; %bb.0:
|
||||
; SDAG-NEXT: s_mov_b32 s0, src_pops_exiting_wave_id
|
||||
; SDAG-NEXT: .LBB1_1: ; %loop
|
||||
; SDAG-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; SDAG-NEXT: s_mov_b32 s0, src_pops_exiting_wave_id
|
||||
; SDAG-NEXT: s_cmp_eq_u32 s0, 0
|
||||
; SDAG-NEXT: s_cbranch_scc1 .LBB1_1
|
||||
; SDAG-NEXT: ; %bb.2: ; %exit
|
||||
@ -46,9 +46,9 @@ define amdgpu_ps void @test_loop() {
|
||||
;
|
||||
; GFX9-GISEL-LABEL: test_loop:
|
||||
; GFX9-GISEL: ; %bb.0:
|
||||
; GFX9-GISEL-NEXT: s_mov_b32 s0, src_pops_exiting_wave_id
|
||||
; GFX9-GISEL-NEXT: .LBB1_1: ; %loop
|
||||
; GFX9-GISEL-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; GFX9-GISEL-NEXT: s_mov_b32 s0, src_pops_exiting_wave_id
|
||||
; GFX9-GISEL-NEXT: s_cmp_eq_u32 s0, 0
|
||||
; GFX9-GISEL-NEXT: s_cbranch_scc1 .LBB1_1
|
||||
; GFX9-GISEL-NEXT: ; %bb.2: ; %exit
|
||||
@ -56,9 +56,9 @@ define amdgpu_ps void @test_loop() {
|
||||
;
|
||||
; GFX10-GISEL-LABEL: test_loop:
|
||||
; GFX10-GISEL: ; %bb.0:
|
||||
; GFX10-GISEL-NEXT: s_mov_b32 s0, src_pops_exiting_wave_id
|
||||
; GFX10-GISEL-NEXT: .LBB1_1: ; %loop
|
||||
; GFX10-GISEL-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; GFX10-GISEL-NEXT: s_mov_b32 s0, src_pops_exiting_wave_id
|
||||
; GFX10-GISEL-NEXT: s_cmp_eq_u32 s0, 0
|
||||
; GFX10-GISEL-NEXT: s_cbranch_scc1 .LBB1_1
|
||||
; GFX10-GISEL-NEXT: ; %bb.2: ; %exit
|
||||
@ -77,14 +77,23 @@ define amdgpu_ps i32 @test_if(i1 inreg %cond) {
|
||||
; SDAG: ; %bb.0: ; %entry
|
||||
; SDAG-NEXT: s_bitcmp0_b32 s0, 0
|
||||
; SDAG-NEXT: s_mov_b32 s0, src_pops_exiting_wave_id
|
||||
; SDAG-NEXT: s_cbranch_scc1 .LBB2_2
|
||||
; SDAG-NEXT: ; %bb.1: ; %body
|
||||
; SDAG-NEXT: s_mov_b32 s0, src_pops_exiting_wave_id
|
||||
; SDAG-NEXT: .LBB2_2: ; %exit
|
||||
; SDAG-NEXT: ; return to shader part epilog
|
||||
;
|
||||
; GFX9-GISEL-LABEL: test_if:
|
||||
; GFX9-GISEL: ; %bb.0: ; %entry
|
||||
; GFX9-GISEL-NEXT: s_mov_b32 s1, s0
|
||||
; GFX9-GISEL-NEXT: s_mov_b32 s0, src_pops_exiting_wave_id
|
||||
; GFX9-GISEL-NEXT: s_xor_b32 s1, s1, 1
|
||||
; GFX9-GISEL-NEXT: s_and_b32 s1, s1, 1
|
||||
; GFX9-GISEL-NEXT: s_mov_b32 s0, src_pops_exiting_wave_id
|
||||
; GFX9-GISEL-NEXT: s_cmp_lg_u32 s1, 0
|
||||
; GFX9-GISEL-NEXT: s_cbranch_scc1 .LBB2_2
|
||||
; GFX9-GISEL-NEXT: ; %bb.1: ; %body
|
||||
; GFX9-GISEL-NEXT: s_mov_b32 s0, src_pops_exiting_wave_id
|
||||
; GFX9-GISEL-NEXT: .LBB2_2: ; %exit
|
||||
; GFX9-GISEL-NEXT: ; return to shader part epilog
|
||||
;
|
||||
; GFX10-GISEL-LABEL: test_if:
|
||||
@ -92,6 +101,11 @@ define amdgpu_ps i32 @test_if(i1 inreg %cond) {
|
||||
; GFX10-GISEL-NEXT: s_xor_b32 s0, s0, 1
|
||||
; GFX10-GISEL-NEXT: s_and_b32 s1, s0, 1
|
||||
; GFX10-GISEL-NEXT: s_mov_b32 s0, src_pops_exiting_wave_id
|
||||
; GFX10-GISEL-NEXT: s_cmp_lg_u32 s1, 0
|
||||
; GFX10-GISEL-NEXT: s_cbranch_scc1 .LBB2_2
|
||||
; GFX10-GISEL-NEXT: ; %bb.1: ; %body
|
||||
; GFX10-GISEL-NEXT: s_mov_b32 s0, src_pops_exiting_wave_id
|
||||
; GFX10-GISEL-NEXT: .LBB2_2: ; %exit
|
||||
; GFX10-GISEL-NEXT: ; return to shader part epilog
|
||||
entry:
|
||||
%id1 = call i32 @llvm.amdgcn.pops.exiting.wave.id()
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user