[AMDGPU] Fix SIFixSGPRCopies handling of STRICT_WWM and friends (#142122)
SIFixSGPRCopies handled STRICT_WWM (and similar WWM/WQM pseudos) like a COPY. In particular, if the source was a VGPR and the result was an SGPR, lowerVGPR2SGPRCopies would replace it with a readfirstlane, erasing the original pseudo and hence sabotaging the WWM region marking which is supposed to be performed by SIWholeQuadMode. Fix this by handling it more like INSERT_SUBREG, PHI and REG_SEQUENCE: if the source is a VGPR then move the result to a VGPR, and keep the pseudo.
This commit is contained in:
parent
ccb6b0dafd
commit
f8d3bdf6a2
@ -634,11 +634,7 @@ bool SIFixSGPRCopies::run(MachineFunction &MF) {
|
|||||||
switch (MI.getOpcode()) {
|
switch (MI.getOpcode()) {
|
||||||
default:
|
default:
|
||||||
continue;
|
continue;
|
||||||
case AMDGPU::COPY:
|
case AMDGPU::COPY: {
|
||||||
case AMDGPU::WQM:
|
|
||||||
case AMDGPU::STRICT_WQM:
|
|
||||||
case AMDGPU::SOFT_WQM:
|
|
||||||
case AMDGPU::STRICT_WWM: {
|
|
||||||
const TargetRegisterClass *SrcRC, *DstRC;
|
const TargetRegisterClass *SrcRC, *DstRC;
|
||||||
std::tie(SrcRC, DstRC) = getCopyRegClasses(MI, *TRI, *MRI);
|
std::tie(SrcRC, DstRC) = getCopyRegClasses(MI, *TRI, *MRI);
|
||||||
|
|
||||||
@ -662,6 +658,10 @@ bool SIFixSGPRCopies::run(MachineFunction &MF) {
|
|||||||
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
case AMDGPU::WQM:
|
||||||
|
case AMDGPU::STRICT_WQM:
|
||||||
|
case AMDGPU::SOFT_WQM:
|
||||||
|
case AMDGPU::STRICT_WWM:
|
||||||
case AMDGPU::INSERT_SUBREG:
|
case AMDGPU::INSERT_SUBREG:
|
||||||
case AMDGPU::PHI:
|
case AMDGPU::PHI:
|
||||||
case AMDGPU::REG_SEQUENCE: {
|
case AMDGPU::REG_SEQUENCE: {
|
||||||
|
40
llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-wwm.ll
Normal file
40
llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-wwm.ll
Normal file
@ -0,0 +1,40 @@
|
|||||||
|
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
|
||||||
|
; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck %s
|
||||||
|
|
||||||
|
define amdgpu_gs i32 @main() {
|
||||||
|
; CHECK-LABEL: main:
|
||||||
|
; CHECK: ; %bb.0: ; %bb
|
||||||
|
; CHECK-NEXT: s_bitcmp1_b32 0, 0
|
||||||
|
; CHECK-NEXT: s_mov_b32 s0, 0
|
||||||
|
; CHECK-NEXT: s_cselect_b32 s1, -1, 0
|
||||||
|
; CHECK-NEXT: s_or_saveexec_b32 s2, -1
|
||||||
|
; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s1
|
||||||
|
; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
||||||
|
; CHECK-NEXT: v_readfirstlane_b32 s1, v0
|
||||||
|
; CHECK-NEXT: s_mov_b32 exec_lo, s2
|
||||||
|
; CHECK-NEXT: s_or_b32 s0, s0, s1
|
||||||
|
; CHECK-NEXT: s_wait_alu 0xfffe
|
||||||
|
; CHECK-NEXT: s_bitcmp1_b32 s0, 0
|
||||||
|
; CHECK-NEXT: s_cselect_b32 s0, -1, 0
|
||||||
|
; CHECK-NEXT: s_wait_alu 0xfffe
|
||||||
|
; CHECK-NEXT: s_xor_b32 s0, s0, -1
|
||||||
|
; CHECK-NEXT: s_wait_alu 0xfffe
|
||||||
|
; CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, s0
|
||||||
|
; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
||||||
|
; CHECK-NEXT: v_readfirstlane_b32 s0, v1
|
||||||
|
; CHECK-NEXT: s_wait_alu 0xf1ff
|
||||||
|
; CHECK-NEXT: ; return to shader part epilog
|
||||||
|
bb:
|
||||||
|
%i = call i1 @llvm.amdgcn.readfirstlane.i1(i1 false)
|
||||||
|
br label %bb1
|
||||||
|
|
||||||
|
bb1:
|
||||||
|
%i2 = zext i1 %i to i32
|
||||||
|
%i3 = call i32 @llvm.amdgcn.wwm.i32(i32 0)
|
||||||
|
%i4 = call i32 @llvm.amdgcn.wwm.i32(i32 %i2)
|
||||||
|
%i5 = trunc i32 %i4 to i1
|
||||||
|
%i6 = trunc i32 %i3 to i1
|
||||||
|
%i7 = or i1 %i6, %i5
|
||||||
|
%i8 = select i1 %i7, i32 0, i32 1
|
||||||
|
ret i32 %i8
|
||||||
|
}
|
27
llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-wwm.mir
Normal file
27
llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-wwm.mir
Normal file
@ -0,0 +1,27 @@
|
|||||||
|
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
|
||||||
|
# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -run-pass=si-fix-sgpr-copies %s -o - | FileCheck %s
|
||||||
|
|
||||||
|
---
|
||||||
|
name: main
|
||||||
|
tracksRegLiveness: true
|
||||||
|
body: |
|
||||||
|
bb.0:
|
||||||
|
; CHECK-LABEL: name: main
|
||||||
|
; CHECK: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
|
||||||
|
; CHECK-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
|
||||||
|
; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[DEF1]], implicit $exec
|
||||||
|
; CHECK-NEXT: early-clobber %2:sreg_32 = STRICT_WWM killed undef [[V_READFIRSTLANE_B32_]], implicit $exec
|
||||||
|
; CHECK-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 killed undef [[DEF]], killed undef %2, implicit-def dead $scc
|
||||||
|
; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, killed undef [[S_OR_B32_]], implicit-def dead $scc
|
||||||
|
; CHECK-NEXT: S_CMP_EQ_U32 killed undef [[S_AND_B32_]], 1, implicit-def $scc
|
||||||
|
; CHECK-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_XOR_B32 killed undef [[S_AND_B32_]], killed undef [[S_AND_B32_]], implicit-def dead $scc
|
||||||
|
; CHECK-NEXT: SI_RETURN_TO_EPILOG undef $sgpr0
|
||||||
|
%0:sreg_32 = IMPLICIT_DEF
|
||||||
|
%1:vgpr_32 = IMPLICIT_DEF
|
||||||
|
early-clobber %2:sreg_32 = STRICT_WWM killed undef %1, implicit $exec
|
||||||
|
%3:sreg_32 = S_OR_B32 killed undef %0, killed undef %2, implicit-def dead $scc
|
||||||
|
%4:sreg_32 = S_AND_B32 1, killed undef %3, implicit-def dead $scc
|
||||||
|
S_CMP_EQ_U32 killed undef %4, 1, implicit-def $scc
|
||||||
|
%5:sreg_32_xm0_xexec = S_XOR_B32 killed undef %4, killed undef %4, implicit-def dead $scc
|
||||||
|
SI_RETURN_TO_EPILOG undef $sgpr0
|
||||||
|
...
|
Loading…
x
Reference in New Issue
Block a user