diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index 9eb1d7efb106..d040aa8f3827 100644 --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -2345,10 +2345,9 @@ class AMDGPUGlobalLoadLDS : "", [SDNPMemOperand]>; def int_amdgcn_global_load_lds : AMDGPUGlobalLoadLDS; -// Use read/write of inaccessible memory to model the fact that this reads a -// volatile value. +// This is IntrHasSideEffects because it reads from a volatile hardware register. def int_amdgcn_pops_exiting_wave_id : - DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrInaccessibleMemOnly]>; + DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrHasSideEffects]>; //===----------------------------------------------------------------------===// // GFX10 Intrinsics diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index 64868c24a9e7..6d5ffc66d98b 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -2583,15 +2583,6 @@ void AMDGPUDAGToDAGISel::SelectDSBvhStackIntrinsic(SDNode *N) { CurDAG->setNodeMemRefs(cast(Selected), {MMO}); } -void AMDGPUDAGToDAGISel::SelectPOPSExitingWaveID(SDNode *N) { - // TODO: Select this with a tablegen pattern. This is tricky because the - // intrinsic is IntrReadMem/IntrWriteMem but the instruction is not marked - // mayLoad/mayStore and tablegen complains about the mismatch. - SDValue Reg = CurDAG->getRegister(AMDGPU::SRC_POPS_EXITING_WAVE_ID, MVT::i32); - SDValue Chain = N->getOperand(0); - CurDAG->SelectNodeTo(N, AMDGPU::S_MOV_B32, N->getVTList(), {Reg, Chain}); -} - static unsigned gwsIntrinToOpcode(unsigned IntrID) { switch (IntrID) { case Intrinsic::amdgcn_ds_gws_init: @@ -2748,9 +2739,6 @@ void AMDGPUDAGToDAGISel::SelectINTRINSIC_W_CHAIN(SDNode *N) { case Intrinsic::amdgcn_ds_bvh_stack_rtn: SelectDSBvhStackIntrinsic(N); return; - case Intrinsic::amdgcn_pops_exiting_wave_id: - SelectPOPSExitingWaveID(N); - return; } SelectCode(N); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h index bb3bcc221a12..e7911bc1793d 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h @@ -269,7 +269,6 @@ private: void SelectFP_EXTEND(SDNode *N); void SelectDSAppendConsume(SDNode *N, unsigned IntrID); void SelectDSBvhStackIntrinsic(SDNode *N); - void SelectPOPSExitingWaveID(SDNode *N); void SelectDS_GWS(SDNode *N, unsigned IntrID); void SelectInterpP1F16(SDNode *N); void SelectINTRINSIC_W_CHAIN(SDNode *N); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index 5959fcfe32be..a3cb3b3f47e0 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -2079,21 +2079,6 @@ bool AMDGPUInstructionSelector::selectDSBvhStackIntrinsic( return constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI); } -bool AMDGPUInstructionSelector::selectPOPSExitingWaveID( - MachineInstr &MI) const { - Register Dst = MI.getOperand(0).getReg(); - const DebugLoc &DL = MI.getDebugLoc(); - MachineBasicBlock *MBB = MI.getParent(); - - // TODO: Select this with a tablegen pattern. This is tricky because the - // intrinsic is IntrReadMem/IntrWriteMem but the instruction is not marked - // mayLoad/mayStore and tablegen complains about the mismatch. - auto MIB = BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::S_MOV_B32), Dst) - .addReg(AMDGPU::SRC_POPS_EXITING_WAVE_ID); - MI.eraseFromParent(); - return constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI); -} - bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS( MachineInstr &I) const { Intrinsic::ID IntrinsicID = cast(I).getIntrinsicID(); @@ -2144,8 +2129,6 @@ bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS( return selectSBarrierSignalIsfirst(I, IntrinsicID); case Intrinsic::amdgcn_s_barrier_leave: return selectSBarrierLeave(I); - case Intrinsic::amdgcn_pops_exiting_wave_id: - return selectPOPSExitingWaveID(I); } return selectImpl(I, *CoverageInfo); } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h index 48f3b1811801..f561d5d29efc 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h @@ -125,7 +125,6 @@ private: bool selectDSAppendConsume(MachineInstr &MI, bool IsAppend) const; bool selectSBarrier(MachineInstr &MI) const; bool selectDSBvhStackIntrinsic(MachineInstr &MI) const; - bool selectPOPSExitingWaveID(MachineInstr &MI) const; bool selectImageIntrinsic(MachineInstr &MI, const AMDGPU::ImageDimIntrinsicInfo *Intr) const; diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td index e1253d3ed050..64f33199545a 100644 --- a/llvm/lib/Target/AMDGPU/SOPInstructions.td +++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td @@ -215,6 +215,11 @@ let isMoveImm = 1 in { } // End Uses = [SCC] } // End isMoveImm = 1 +// Variant of S_MOV_B32 used for reading from volatile registers like +// SRC_POPS_EXITING_WAVE_ID. +let hasSideEffects = 1 in +def S_MOV_B32_sideeffects : SOP1_32 <"s_mov_b32">; + let Defs = [SCC] in { def S_NOT_B32 : SOP1_32 <"s_not_b32", [(set i32:$sdst, (UniformUnaryFrag i32:$src0))] @@ -1880,6 +1885,12 @@ let SubtargetPredicate = isNotGFX9Plus in { def : GetFPModePat; } +let SubtargetPredicate = isGFX9GFX10 in +def : GCNPat< + (int_amdgcn_pops_exiting_wave_id), + (S_MOV_B32_sideeffects (i32 SRC_POPS_EXITING_WAVE_ID)) +>; + //===----------------------------------------------------------------------===// // SOP2 Patterns //===----------------------------------------------------------------------===// diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.pops.exiting.wave.id.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.pops.exiting.wave.id.ll index aaa4e2b3622d..f3c5ac757e22 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.pops.exiting.wave.id.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.pops.exiting.wave.id.ll @@ -36,9 +36,9 @@ define amdgpu_ps void @test(ptr addrspace(1) inreg %ptr) { define amdgpu_ps void @test_loop() { ; SDAG-LABEL: test_loop: ; SDAG: ; %bb.0: -; SDAG-NEXT: s_mov_b32 s0, src_pops_exiting_wave_id ; SDAG-NEXT: .LBB1_1: ; %loop ; SDAG-NEXT: ; =>This Inner Loop Header: Depth=1 +; SDAG-NEXT: s_mov_b32 s0, src_pops_exiting_wave_id ; SDAG-NEXT: s_cmp_eq_u32 s0, 0 ; SDAG-NEXT: s_cbranch_scc1 .LBB1_1 ; SDAG-NEXT: ; %bb.2: ; %exit @@ -46,9 +46,9 @@ define amdgpu_ps void @test_loop() { ; ; GFX9-GISEL-LABEL: test_loop: ; GFX9-GISEL: ; %bb.0: -; GFX9-GISEL-NEXT: s_mov_b32 s0, src_pops_exiting_wave_id ; GFX9-GISEL-NEXT: .LBB1_1: ; %loop ; GFX9-GISEL-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX9-GISEL-NEXT: s_mov_b32 s0, src_pops_exiting_wave_id ; GFX9-GISEL-NEXT: s_cmp_eq_u32 s0, 0 ; GFX9-GISEL-NEXT: s_cbranch_scc1 .LBB1_1 ; GFX9-GISEL-NEXT: ; %bb.2: ; %exit @@ -56,9 +56,9 @@ define amdgpu_ps void @test_loop() { ; ; GFX10-GISEL-LABEL: test_loop: ; GFX10-GISEL: ; %bb.0: -; GFX10-GISEL-NEXT: s_mov_b32 s0, src_pops_exiting_wave_id ; GFX10-GISEL-NEXT: .LBB1_1: ; %loop ; GFX10-GISEL-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX10-GISEL-NEXT: s_mov_b32 s0, src_pops_exiting_wave_id ; GFX10-GISEL-NEXT: s_cmp_eq_u32 s0, 0 ; GFX10-GISEL-NEXT: s_cbranch_scc1 .LBB1_1 ; GFX10-GISEL-NEXT: ; %bb.2: ; %exit @@ -77,14 +77,23 @@ define amdgpu_ps i32 @test_if(i1 inreg %cond) { ; SDAG: ; %bb.0: ; %entry ; SDAG-NEXT: s_bitcmp0_b32 s0, 0 ; SDAG-NEXT: s_mov_b32 s0, src_pops_exiting_wave_id +; SDAG-NEXT: s_cbranch_scc1 .LBB2_2 +; SDAG-NEXT: ; %bb.1: ; %body +; SDAG-NEXT: s_mov_b32 s0, src_pops_exiting_wave_id +; SDAG-NEXT: .LBB2_2: ; %exit ; SDAG-NEXT: ; return to shader part epilog ; ; GFX9-GISEL-LABEL: test_if: ; GFX9-GISEL: ; %bb.0: ; %entry ; GFX9-GISEL-NEXT: s_mov_b32 s1, s0 -; GFX9-GISEL-NEXT: s_mov_b32 s0, src_pops_exiting_wave_id ; GFX9-GISEL-NEXT: s_xor_b32 s1, s1, 1 ; GFX9-GISEL-NEXT: s_and_b32 s1, s1, 1 +; GFX9-GISEL-NEXT: s_mov_b32 s0, src_pops_exiting_wave_id +; GFX9-GISEL-NEXT: s_cmp_lg_u32 s1, 0 +; GFX9-GISEL-NEXT: s_cbranch_scc1 .LBB2_2 +; GFX9-GISEL-NEXT: ; %bb.1: ; %body +; GFX9-GISEL-NEXT: s_mov_b32 s0, src_pops_exiting_wave_id +; GFX9-GISEL-NEXT: .LBB2_2: ; %exit ; GFX9-GISEL-NEXT: ; return to shader part epilog ; ; GFX10-GISEL-LABEL: test_if: @@ -92,6 +101,11 @@ define amdgpu_ps i32 @test_if(i1 inreg %cond) { ; GFX10-GISEL-NEXT: s_xor_b32 s0, s0, 1 ; GFX10-GISEL-NEXT: s_and_b32 s1, s0, 1 ; GFX10-GISEL-NEXT: s_mov_b32 s0, src_pops_exiting_wave_id +; GFX10-GISEL-NEXT: s_cmp_lg_u32 s1, 0 +; GFX10-GISEL-NEXT: s_cbranch_scc1 .LBB2_2 +; GFX10-GISEL-NEXT: ; %bb.1: ; %body +; GFX10-GISEL-NEXT: s_mov_b32 s0, src_pops_exiting_wave_id +; GFX10-GISEL-NEXT: .LBB2_2: ; %exit ; GFX10-GISEL-NEXT: ; return to shader part epilog entry: %id1 = call i32 @llvm.amdgcn.pops.exiting.wave.id()