[AMDGPU] Fix unwanted LICM/CSE of llvm.amdgcn.pops.exiting.wave.id (#96190)

Mark both the intrinsic and the selected MachineInstr as having side
effects to prevent MachineLICM and MachineCSE from moving/removing them.
This commit is contained in:
Jay Foad 2024-06-27 09:27:52 +01:00 committed by GitHub
parent 0ae23708ef
commit bf536cc7db
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 31 additions and 38 deletions

View File

@ -2345,10 +2345,9 @@ class AMDGPUGlobalLoadLDS :
"", [SDNPMemOperand]>;
def int_amdgcn_global_load_lds : AMDGPUGlobalLoadLDS;
// Use read/write of inaccessible memory to model the fact that this reads a
// volatile value.
// This is IntrHasSideEffects because it reads from a volatile hardware register.
def int_amdgcn_pops_exiting_wave_id :
DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrInaccessibleMemOnly]>;
DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrHasSideEffects]>;
//===----------------------------------------------------------------------===//
// GFX10 Intrinsics

View File

@ -2583,15 +2583,6 @@ void AMDGPUDAGToDAGISel::SelectDSBvhStackIntrinsic(SDNode *N) {
CurDAG->setNodeMemRefs(cast<MachineSDNode>(Selected), {MMO});
}
void AMDGPUDAGToDAGISel::SelectPOPSExitingWaveID(SDNode *N) {
// TODO: Select this with a tablegen pattern. This is tricky because the
// intrinsic is IntrReadMem/IntrWriteMem but the instruction is not marked
// mayLoad/mayStore and tablegen complains about the mismatch.
SDValue Reg = CurDAG->getRegister(AMDGPU::SRC_POPS_EXITING_WAVE_ID, MVT::i32);
SDValue Chain = N->getOperand(0);
CurDAG->SelectNodeTo(N, AMDGPU::S_MOV_B32, N->getVTList(), {Reg, Chain});
}
static unsigned gwsIntrinToOpcode(unsigned IntrID) {
switch (IntrID) {
case Intrinsic::amdgcn_ds_gws_init:
@ -2748,9 +2739,6 @@ void AMDGPUDAGToDAGISel::SelectINTRINSIC_W_CHAIN(SDNode *N) {
case Intrinsic::amdgcn_ds_bvh_stack_rtn:
SelectDSBvhStackIntrinsic(N);
return;
case Intrinsic::amdgcn_pops_exiting_wave_id:
SelectPOPSExitingWaveID(N);
return;
}
SelectCode(N);

View File

@ -269,7 +269,6 @@ private:
void SelectFP_EXTEND(SDNode *N);
void SelectDSAppendConsume(SDNode *N, unsigned IntrID);
void SelectDSBvhStackIntrinsic(SDNode *N);
void SelectPOPSExitingWaveID(SDNode *N);
void SelectDS_GWS(SDNode *N, unsigned IntrID);
void SelectInterpP1F16(SDNode *N);
void SelectINTRINSIC_W_CHAIN(SDNode *N);

View File

@ -2079,21 +2079,6 @@ bool AMDGPUInstructionSelector::selectDSBvhStackIntrinsic(
return constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
}
bool AMDGPUInstructionSelector::selectPOPSExitingWaveID(
MachineInstr &MI) const {
Register Dst = MI.getOperand(0).getReg();
const DebugLoc &DL = MI.getDebugLoc();
MachineBasicBlock *MBB = MI.getParent();
// TODO: Select this with a tablegen pattern. This is tricky because the
// intrinsic is IntrReadMem/IntrWriteMem but the instruction is not marked
// mayLoad/mayStore and tablegen complains about the mismatch.
auto MIB = BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::S_MOV_B32), Dst)
.addReg(AMDGPU::SRC_POPS_EXITING_WAVE_ID);
MI.eraseFromParent();
return constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
}
bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
MachineInstr &I) const {
Intrinsic::ID IntrinsicID = cast<GIntrinsic>(I).getIntrinsicID();
@ -2144,8 +2129,6 @@ bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
return selectSBarrierSignalIsfirst(I, IntrinsicID);
case Intrinsic::amdgcn_s_barrier_leave:
return selectSBarrierLeave(I);
case Intrinsic::amdgcn_pops_exiting_wave_id:
return selectPOPSExitingWaveID(I);
}
return selectImpl(I, *CoverageInfo);
}

View File

@ -125,7 +125,6 @@ private:
bool selectDSAppendConsume(MachineInstr &MI, bool IsAppend) const;
bool selectSBarrier(MachineInstr &MI) const;
bool selectDSBvhStackIntrinsic(MachineInstr &MI) const;
bool selectPOPSExitingWaveID(MachineInstr &MI) const;
bool selectImageIntrinsic(MachineInstr &MI,
const AMDGPU::ImageDimIntrinsicInfo *Intr) const;

View File

@ -215,6 +215,11 @@ let isMoveImm = 1 in {
} // End Uses = [SCC]
} // End isMoveImm = 1
// Variant of S_MOV_B32 used for reading from volatile registers like
// SRC_POPS_EXITING_WAVE_ID.
let hasSideEffects = 1 in
def S_MOV_B32_sideeffects : SOP1_32 <"s_mov_b32">;
let Defs = [SCC] in {
def S_NOT_B32 : SOP1_32 <"s_not_b32",
[(set i32:$sdst, (UniformUnaryFrag<not> i32:$src0))]
@ -1880,6 +1885,12 @@ let SubtargetPredicate = isNotGFX9Plus in {
def : GetFPModePat<fpmode_mask_gfx6plus>;
}
let SubtargetPredicate = isGFX9GFX10 in
def : GCNPat<
(int_amdgcn_pops_exiting_wave_id),
(S_MOV_B32_sideeffects (i32 SRC_POPS_EXITING_WAVE_ID))
>;
//===----------------------------------------------------------------------===//
// SOP2 Patterns
//===----------------------------------------------------------------------===//

View File

@ -36,9 +36,9 @@ define amdgpu_ps void @test(ptr addrspace(1) inreg %ptr) {
define amdgpu_ps void @test_loop() {
; SDAG-LABEL: test_loop:
; SDAG: ; %bb.0:
; SDAG-NEXT: s_mov_b32 s0, src_pops_exiting_wave_id
; SDAG-NEXT: .LBB1_1: ; %loop
; SDAG-NEXT: ; =>This Inner Loop Header: Depth=1
; SDAG-NEXT: s_mov_b32 s0, src_pops_exiting_wave_id
; SDAG-NEXT: s_cmp_eq_u32 s0, 0
; SDAG-NEXT: s_cbranch_scc1 .LBB1_1
; SDAG-NEXT: ; %bb.2: ; %exit
@ -46,9 +46,9 @@ define amdgpu_ps void @test_loop() {
;
; GFX9-GISEL-LABEL: test_loop:
; GFX9-GISEL: ; %bb.0:
; GFX9-GISEL-NEXT: s_mov_b32 s0, src_pops_exiting_wave_id
; GFX9-GISEL-NEXT: .LBB1_1: ; %loop
; GFX9-GISEL-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX9-GISEL-NEXT: s_mov_b32 s0, src_pops_exiting_wave_id
; GFX9-GISEL-NEXT: s_cmp_eq_u32 s0, 0
; GFX9-GISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX9-GISEL-NEXT: ; %bb.2: ; %exit
@ -56,9 +56,9 @@ define amdgpu_ps void @test_loop() {
;
; GFX10-GISEL-LABEL: test_loop:
; GFX10-GISEL: ; %bb.0:
; GFX10-GISEL-NEXT: s_mov_b32 s0, src_pops_exiting_wave_id
; GFX10-GISEL-NEXT: .LBB1_1: ; %loop
; GFX10-GISEL-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX10-GISEL-NEXT: s_mov_b32 s0, src_pops_exiting_wave_id
; GFX10-GISEL-NEXT: s_cmp_eq_u32 s0, 0
; GFX10-GISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX10-GISEL-NEXT: ; %bb.2: ; %exit
@ -77,14 +77,23 @@ define amdgpu_ps i32 @test_if(i1 inreg %cond) {
; SDAG: ; %bb.0: ; %entry
; SDAG-NEXT: s_bitcmp0_b32 s0, 0
; SDAG-NEXT: s_mov_b32 s0, src_pops_exiting_wave_id
; SDAG-NEXT: s_cbranch_scc1 .LBB2_2
; SDAG-NEXT: ; %bb.1: ; %body
; SDAG-NEXT: s_mov_b32 s0, src_pops_exiting_wave_id
; SDAG-NEXT: .LBB2_2: ; %exit
; SDAG-NEXT: ; return to shader part epilog
;
; GFX9-GISEL-LABEL: test_if:
; GFX9-GISEL: ; %bb.0: ; %entry
; GFX9-GISEL-NEXT: s_mov_b32 s1, s0
; GFX9-GISEL-NEXT: s_mov_b32 s0, src_pops_exiting_wave_id
; GFX9-GISEL-NEXT: s_xor_b32 s1, s1, 1
; GFX9-GISEL-NEXT: s_and_b32 s1, s1, 1
; GFX9-GISEL-NEXT: s_mov_b32 s0, src_pops_exiting_wave_id
; GFX9-GISEL-NEXT: s_cmp_lg_u32 s1, 0
; GFX9-GISEL-NEXT: s_cbranch_scc1 .LBB2_2
; GFX9-GISEL-NEXT: ; %bb.1: ; %body
; GFX9-GISEL-NEXT: s_mov_b32 s0, src_pops_exiting_wave_id
; GFX9-GISEL-NEXT: .LBB2_2: ; %exit
; GFX9-GISEL-NEXT: ; return to shader part epilog
;
; GFX10-GISEL-LABEL: test_if:
@ -92,6 +101,11 @@ define amdgpu_ps i32 @test_if(i1 inreg %cond) {
; GFX10-GISEL-NEXT: s_xor_b32 s0, s0, 1
; GFX10-GISEL-NEXT: s_and_b32 s1, s0, 1
; GFX10-GISEL-NEXT: s_mov_b32 s0, src_pops_exiting_wave_id
; GFX10-GISEL-NEXT: s_cmp_lg_u32 s1, 0
; GFX10-GISEL-NEXT: s_cbranch_scc1 .LBB2_2
; GFX10-GISEL-NEXT: ; %bb.1: ; %body
; GFX10-GISEL-NEXT: s_mov_b32 s0, src_pops_exiting_wave_id
; GFX10-GISEL-NEXT: .LBB2_2: ; %exit
; GFX10-GISEL-NEXT: ; return to shader part epilog
entry:
%id1 = call i32 @llvm.amdgcn.pops.exiting.wave.id()