[RISCV] Don't lose elements from False in vmerge -> vmv peephole (#149720)
In the vmerge peephole, we currently allow different AVLs for the vmerge and its true operand. If vmerge's VL > true's VL, vmerge can "preserve" elements from false that would otherwise be clobbered with a tail agnostic policy on true. mask 1 1 1 1 0 0 0 0 true x x x x|. . . . AVL=4 vmerge x x x x f f|. . AVL=6 If we convert this to vmv.v.v we will lose those false elements: mask 1 1 1 1 0 0 0 0 true x x x x|. . . . AVL=4 vmv.v.v x x x x . .|. . AVL=6 Fix this by checking that vmerge's AVL is <= true's AVL. Should fix #149335 (cherry picked from commit eafe31b293a5166522fff4f3e2d88c2b5c881381)
This commit is contained in:
parent
ba5aa84b54
commit
5345dc9cd3
@ -434,6 +434,15 @@ bool RISCVVectorPeephole::convertSameMaskVMergeToVMv(MachineInstr &MI) {
|
||||
if (!isKnownSameDefs(TrueMask.getReg(), MIMask.getReg()))
|
||||
return false;
|
||||
|
||||
// Masked off lanes past TrueVL will come from False, and converting to vmv
|
||||
// will lose these lanes unless MIVL <= TrueVL.
|
||||
// TODO: We could relax this for False == Passthru and True policy == TU
|
||||
const MachineOperand &MIVL = MI.getOperand(RISCVII::getVLOpNum(MI.getDesc()));
|
||||
const MachineOperand &TrueVL =
|
||||
True->getOperand(RISCVII::getVLOpNum(True->getDesc()));
|
||||
if (!RISCV::isVLKnownLE(MIVL, TrueVL))
|
||||
return false;
|
||||
|
||||
// True's passthru needs to be equivalent to False
|
||||
Register TruePassthruReg = True->getOperand(1).getReg();
|
||||
Register FalseReg = MI.getOperand(2).getReg();
|
||||
|
@ -78,12 +78,12 @@ body: |
|
||||
; CHECK-NEXT: %false:vrnov0 = COPY $v9
|
||||
; CHECK-NEXT: %mask:vmv0 = COPY $v0
|
||||
; CHECK-NEXT: %true:vrnov0 = PseudoVADD_VV_M1_MASK %false, $noreg, $noreg, %mask, 4, 5 /* e32 */, 0 /* tu, mu */
|
||||
; CHECK-NEXT: %x:vr = PseudoVMV_V_V_M1 %pt, %true, 8, 5 /* e32 */, 0 /* tu, mu */
|
||||
; CHECK-NEXT: %x:vr = PseudoVMV_V_V_M1 %pt, %true, 4, 5 /* e32 */, 0 /* tu, mu */
|
||||
%pt:vrnov0 = COPY $v8
|
||||
%false:vrnov0 = COPY $v9
|
||||
%mask:vmv0 = COPY $v0
|
||||
%true:vrnov0 = PseudoVADD_VV_M1_MASK %false, $noreg, $noreg, %mask, 4, 5 /* e32 */, 0 /* tu, mu */
|
||||
%x:vrnov0 = PseudoVMERGE_VVM_M1 %pt, %false, %true, %mask, 8, 5 /* e32 */
|
||||
%true:vrnov0 = PseudoVADD_VV_M1_MASK %false, $noreg, $noreg, %mask, 8, 5 /* e32 */, 0 /* tu, mu */
|
||||
%x:vrnov0 = PseudoVMERGE_VVM_M1 %pt, %false, %true, %mask, 4, 5 /* e32 */
|
||||
...
|
||||
---
|
||||
# Shouldn't be converted because false operands are different
|
||||
@ -163,3 +163,47 @@ body: |
|
||||
%true:vrnov0 = PseudoVADD_VV_M1_MASK $noreg, $noreg, $noreg, %mask, 4, 5 /* e32 */, 0 /* tu, mu */
|
||||
bb.1:
|
||||
%5:vrnov0 = PseudoVMERGE_VVM_M1 $noreg, %false, %true, %mask, 4, 5 /* e32 */
|
||||
...
|
||||
---
|
||||
# Shouldn't be converted because vmerge adds back in elements from false past avl that would be lost if we converted to vmv.v.v
|
||||
name: preserve_false
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $v8, $v9, $v0, $x8, $x9
|
||||
; CHECK-LABEL: name: preserve_false
|
||||
; CHECK: liveins: $v8, $v9, $v0, $x8, $x9
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: %pt:vrnov0 = COPY $v8
|
||||
; CHECK-NEXT: %false:vr = COPY $v9
|
||||
; CHECK-NEXT: %mask:vmv0 = COPY $v0
|
||||
; CHECK-NEXT: %avl1:gprnox0 = COPY $x8
|
||||
; CHECK-NEXT: %avl2:gprnox0 = COPY $x9
|
||||
; CHECK-NEXT: %true:vrnov0 = PseudoVADD_VV_M1_MASK $noreg, $noreg, $noreg, %mask, %avl1, 5 /* e32 */, 3 /* ta, ma */
|
||||
; CHECK-NEXT: [[PseudoVMERGE_VVM_M1_:%[0-9]+]]:vrnov0 = PseudoVMERGE_VVM_M1 %pt, %false, %true, %mask, %avl2, 5 /* e32 */
|
||||
%pt:vrnov0 = COPY $v8
|
||||
%false:vr = COPY $v9
|
||||
%mask:vmv0 = COPY $v0
|
||||
%avl1:gprnox0 = COPY $x8
|
||||
%avl2:gprnox0 = COPY $x9
|
||||
%true:vrnov0 = PseudoVADD_VV_M1_MASK $noreg, $noreg, $noreg, %mask, %avl1, 5 /* e32 */, 3 /* ta, ma */
|
||||
%5:vrnov0 = PseudoVMERGE_VVM_M1 %pt, %false, %true, %mask, %avl2, 5 /* e32 */
|
||||
...
|
||||
---
|
||||
# But we can convert this one because vmerge's avl being <= true's means we don't lose any false elements past avl.
|
||||
name: preserve_false_avl_known_le
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $v8, $v9, $v0
|
||||
; CHECK-LABEL: name: preserve_false_avl_known_le
|
||||
; CHECK: liveins: $v8, $v9, $v0
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: %pt:vr = COPY $v8
|
||||
; CHECK-NEXT: %false:vrnov0 = COPY $v9
|
||||
; CHECK-NEXT: %mask:vmv0 = COPY $v0
|
||||
; CHECK-NEXT: %true:vrnov0 = PseudoVADD_VV_M1_MASK %false, $noreg, $noreg, %mask, 1, 5 /* e32 */, 3 /* ta, ma */
|
||||
; CHECK-NEXT: [[PseudoVMV_V_V_M1_:%[0-9]+]]:vr = PseudoVMV_V_V_M1 %pt, %true, 1, 5 /* e32 */, 0 /* tu, mu */
|
||||
%pt:vrnov0 = COPY $v8
|
||||
%false:vr = COPY $v9
|
||||
%mask:vmv0 = COPY $v0
|
||||
%true:vrnov0 = PseudoVADD_VV_M1_MASK $noreg, $noreg, $noreg, %mask, 2, 5 /* e32 */, 3 /* ta, ma */
|
||||
%5:vrnov0 = PseudoVMERGE_VVM_M1 %pt, %false, %true, %mask, 1, 5 /* e32 */
|
||||
|
@ -71,10 +71,31 @@ define <vscale x 8 x i64> @vpmerge_m8(<vscale x 8 x i64> %x, <vscale x 8 x i64>
|
||||
ret <vscale x 8 x i64> %1
|
||||
}
|
||||
|
||||
declare <vscale x 1 x i8> @llvm.vp.merge.nxv1i8(<vscale x 1 x i1>, <vscale x 1 x i8>, <vscale x 1 x i8>, i32)
|
||||
declare <vscale x 2 x i8> @llvm.vp.merge.nxv2i8(<vscale x 2 x i1>, <vscale x 2 x i8>, <vscale x 2 x i8>, i32)
|
||||
declare <vscale x 4 x i8> @llvm.vp.merge.nxv4i8(<vscale x 4 x i1>, <vscale x 4 x i8>, <vscale x 4 x i8>, i32)
|
||||
declare <vscale x 8 x i8> @llvm.vp.merge.nxv8i8(<vscale x 8 x i1>, <vscale x 8 x i8>, <vscale x 8 x i8>, i32)
|
||||
declare <vscale x 8 x i16> @llvm.vp.merge.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>, i32)
|
||||
declare <vscale x 8 x i32> @llvm.vp.merge.nxv8i32(<vscale x 8 x i1>, <vscale x 8 x i32>, <vscale x 8 x i32>, i32)
|
||||
declare <vscale x 8 x i64> @llvm.vp.merge.nxv8i64(<vscale x 8 x i1>, <vscale x 8 x i64>, <vscale x 8 x i64>, i32)
|
||||
; Shouldn't be converted because vmerge adds back in elements from false past avl that would be lost if we converted to vmv.v.v
|
||||
define <vscale x 2 x i32> @preserve_false(ptr %p, <vscale x 2 x i32> %pt, <vscale x 2 x i32> %false, <vscale x 2 x i1> %mask, i64 %avl1, i64 %avl2) {
|
||||
; CHECK-LABEL: preserve_false:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
|
||||
; CHECK-NEXT: vmv1r.v v10, v9
|
||||
; CHECK-NEXT: vle32.v v10, (a0), v0.t
|
||||
; CHECK-NEXT: vsetvli zero, a2, e32, m1, tu, ma
|
||||
; CHECK-NEXT: vmerge.vvm v8, v9, v10, v0
|
||||
; CHECK-NEXT: ret
|
||||
%true = call <vscale x 2 x i32> @llvm.riscv.vle.mask(<vscale x 2 x i32> %false, ptr %p, <vscale x 2 x i1> %mask, i64 %avl1, i64 3)
|
||||
%res = call <vscale x 2 x i32> @llvm.riscv.vmerge(<vscale x 2 x i32> %pt, <vscale x 2 x i32> %false, <vscale x 2 x i32> %true, <vscale x 2 x i1> %mask, i64 %avl2)
|
||||
ret <vscale x 2 x i32> %res
|
||||
}
|
||||
|
||||
; Can fold this because its avl is known to be <= than true, so no elements from false need to be introduced past avl.
|
||||
define <vscale x 2 x i32> @preserve_false_avl_known_le(ptr %p, <vscale x 2 x i32> %pt, <vscale x 2 x i32> %false, <vscale x 2 x i1> %mask) {
|
||||
; CHECK-LABEL: preserve_false_avl_known_le:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
|
||||
; CHECK-NEXT: vle32.v v9, (a0), v0.t
|
||||
; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, ma
|
||||
; CHECK-NEXT: vmv.v.v v8, v9
|
||||
; CHECK-NEXT: ret
|
||||
%true = call <vscale x 2 x i32> @llvm.riscv.vle.mask(<vscale x 2 x i32> %false, ptr %p, <vscale x 2 x i1> %mask, i64 2, i64 3)
|
||||
%res = call <vscale x 2 x i32> @llvm.riscv.vmerge(<vscale x 2 x i32> %pt, <vscale x 2 x i32> %false, <vscale x 2 x i32> %true, <vscale x 2 x i1> %mask, i64 1)
|
||||
ret <vscale x 2 x i32> %res
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user