[RISCV] Don't lose elements from False in vmerge -> vmv peephole (#149720)

In the vmerge peephole, we currently allow different AVLs for the vmerge
and its true operand.
If vmerge's VL > true's VL, vmerge can "preserve" elements from false
that would otherwise be clobbered with a tail agnostic policy on true.

    mask	1 1 1 1 0 0 0 0
    true	x x x x|. . . . AVL=4
    vmerge	x x x x f f|. . AVL=6

If we convert this to vmv.v.v we will lose those false elements:

    mask	1 1 1 1 0 0 0 0
    true	x x x x|. . . . AVL=4
    vmv.v.v	x x x x . .|. . AVL=6

Fix this by checking that vmerge's AVL is <= true's AVL.

Should fix #149335

(cherry picked from commit eafe31b293a5166522fff4f3e2d88c2b5c881381)
This commit is contained in:
Luke Lau 2025-07-22 15:21:42 +08:00 committed by Tobias Hieta
parent ba5aa84b54
commit 5345dc9cd3
3 changed files with 84 additions and 10 deletions

View File

@ -434,6 +434,15 @@ bool RISCVVectorPeephole::convertSameMaskVMergeToVMv(MachineInstr &MI) {
if (!isKnownSameDefs(TrueMask.getReg(), MIMask.getReg()))
return false;
// Masked off lanes past TrueVL will come from False, and converting to vmv
// will lose these lanes unless MIVL <= TrueVL.
// TODO: We could relax this for False == Passthru and True policy == TU
const MachineOperand &MIVL = MI.getOperand(RISCVII::getVLOpNum(MI.getDesc()));
const MachineOperand &TrueVL =
True->getOperand(RISCVII::getVLOpNum(True->getDesc()));
if (!RISCV::isVLKnownLE(MIVL, TrueVL))
return false;
// True's passthru needs to be equivalent to False
Register TruePassthruReg = True->getOperand(1).getReg();
Register FalseReg = MI.getOperand(2).getReg();

View File

@ -78,12 +78,12 @@ body: |
; CHECK-NEXT: %false:vrnov0 = COPY $v9
; CHECK-NEXT: %mask:vmv0 = COPY $v0
; CHECK-NEXT: %true:vrnov0 = PseudoVADD_VV_M1_MASK %false, $noreg, $noreg, %mask, 4, 5 /* e32 */, 0 /* tu, mu */
; CHECK-NEXT: %x:vr = PseudoVMV_V_V_M1 %pt, %true, 8, 5 /* e32 */, 0 /* tu, mu */
; CHECK-NEXT: %x:vr = PseudoVMV_V_V_M1 %pt, %true, 4, 5 /* e32 */, 0 /* tu, mu */
%pt:vrnov0 = COPY $v8
%false:vrnov0 = COPY $v9
%mask:vmv0 = COPY $v0
%true:vrnov0 = PseudoVADD_VV_M1_MASK %false, $noreg, $noreg, %mask, 4, 5 /* e32 */, 0 /* tu, mu */
%x:vrnov0 = PseudoVMERGE_VVM_M1 %pt, %false, %true, %mask, 8, 5 /* e32 */
%true:vrnov0 = PseudoVADD_VV_M1_MASK %false, $noreg, $noreg, %mask, 8, 5 /* e32 */, 0 /* tu, mu */
%x:vrnov0 = PseudoVMERGE_VVM_M1 %pt, %false, %true, %mask, 4, 5 /* e32 */
...
---
# Shouldn't be converted because false operands are different
@ -163,3 +163,47 @@ body: |
%true:vrnov0 = PseudoVADD_VV_M1_MASK $noreg, $noreg, $noreg, %mask, 4, 5 /* e32 */, 0 /* tu, mu */
bb.1:
%5:vrnov0 = PseudoVMERGE_VVM_M1 $noreg, %false, %true, %mask, 4, 5 /* e32 */
...
---
# Shouldn't be converted because vmerge adds back in elements from false past avl that would be lost if we converted to vmv.v.v
name: preserve_false
body: |
bb.0:
liveins: $v8, $v9, $v0, $x8, $x9
; CHECK-LABEL: name: preserve_false
; CHECK: liveins: $v8, $v9, $v0, $x8, $x9
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %pt:vrnov0 = COPY $v8
; CHECK-NEXT: %false:vr = COPY $v9
; CHECK-NEXT: %mask:vmv0 = COPY $v0
; CHECK-NEXT: %avl1:gprnox0 = COPY $x8
; CHECK-NEXT: %avl2:gprnox0 = COPY $x9
; CHECK-NEXT: %true:vrnov0 = PseudoVADD_VV_M1_MASK $noreg, $noreg, $noreg, %mask, %avl1, 5 /* e32 */, 3 /* ta, ma */
; CHECK-NEXT: [[PseudoVMERGE_VVM_M1_:%[0-9]+]]:vrnov0 = PseudoVMERGE_VVM_M1 %pt, %false, %true, %mask, %avl2, 5 /* e32 */
%pt:vrnov0 = COPY $v8
%false:vr = COPY $v9
%mask:vmv0 = COPY $v0
%avl1:gprnox0 = COPY $x8
%avl2:gprnox0 = COPY $x9
%true:vrnov0 = PseudoVADD_VV_M1_MASK $noreg, $noreg, $noreg, %mask, %avl1, 5 /* e32 */, 3 /* ta, ma */
%5:vrnov0 = PseudoVMERGE_VVM_M1 %pt, %false, %true, %mask, %avl2, 5 /* e32 */
...
---
# But we can convert this one because vmerge's avl being <= true's means we don't lose any false elements past avl.
name: preserve_false_avl_known_le
body: |
bb.0:
liveins: $v8, $v9, $v0
; CHECK-LABEL: name: preserve_false_avl_known_le
; CHECK: liveins: $v8, $v9, $v0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %pt:vr = COPY $v8
; CHECK-NEXT: %false:vrnov0 = COPY $v9
; CHECK-NEXT: %mask:vmv0 = COPY $v0
; CHECK-NEXT: %true:vrnov0 = PseudoVADD_VV_M1_MASK %false, $noreg, $noreg, %mask, 1, 5 /* e32 */, 3 /* ta, ma */
; CHECK-NEXT: [[PseudoVMV_V_V_M1_:%[0-9]+]]:vr = PseudoVMV_V_V_M1 %pt, %true, 1, 5 /* e32 */, 0 /* tu, mu */
%pt:vrnov0 = COPY $v8
%false:vr = COPY $v9
%mask:vmv0 = COPY $v0
%true:vrnov0 = PseudoVADD_VV_M1_MASK $noreg, $noreg, $noreg, %mask, 2, 5 /* e32 */, 3 /* ta, ma */
%5:vrnov0 = PseudoVMERGE_VVM_M1 %pt, %false, %true, %mask, 1, 5 /* e32 */

View File

@ -71,10 +71,31 @@ define <vscale x 8 x i64> @vpmerge_m8(<vscale x 8 x i64> %x, <vscale x 8 x i64>
ret <vscale x 8 x i64> %1
}
declare <vscale x 1 x i8> @llvm.vp.merge.nxv1i8(<vscale x 1 x i1>, <vscale x 1 x i8>, <vscale x 1 x i8>, i32)
declare <vscale x 2 x i8> @llvm.vp.merge.nxv2i8(<vscale x 2 x i1>, <vscale x 2 x i8>, <vscale x 2 x i8>, i32)
declare <vscale x 4 x i8> @llvm.vp.merge.nxv4i8(<vscale x 4 x i1>, <vscale x 4 x i8>, <vscale x 4 x i8>, i32)
declare <vscale x 8 x i8> @llvm.vp.merge.nxv8i8(<vscale x 8 x i1>, <vscale x 8 x i8>, <vscale x 8 x i8>, i32)
declare <vscale x 8 x i16> @llvm.vp.merge.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>, i32)
declare <vscale x 8 x i32> @llvm.vp.merge.nxv8i32(<vscale x 8 x i1>, <vscale x 8 x i32>, <vscale x 8 x i32>, i32)
declare <vscale x 8 x i64> @llvm.vp.merge.nxv8i64(<vscale x 8 x i1>, <vscale x 8 x i64>, <vscale x 8 x i64>, i32)
; Shouldn't be converted because vmerge adds back in elements from false past avl that would be lost if we converted to vmv.v.v
define <vscale x 2 x i32> @preserve_false(ptr %p, <vscale x 2 x i32> %pt, <vscale x 2 x i32> %false, <vscale x 2 x i1> %mask, i64 %avl1, i64 %avl2) {
; CHECK-LABEL: preserve_false:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
; CHECK-NEXT: vmv1r.v v10, v9
; CHECK-NEXT: vle32.v v10, (a0), v0.t
; CHECK-NEXT: vsetvli zero, a2, e32, m1, tu, ma
; CHECK-NEXT: vmerge.vvm v8, v9, v10, v0
; CHECK-NEXT: ret
%true = call <vscale x 2 x i32> @llvm.riscv.vle.mask(<vscale x 2 x i32> %false, ptr %p, <vscale x 2 x i1> %mask, i64 %avl1, i64 3)
%res = call <vscale x 2 x i32> @llvm.riscv.vmerge(<vscale x 2 x i32> %pt, <vscale x 2 x i32> %false, <vscale x 2 x i32> %true, <vscale x 2 x i1> %mask, i64 %avl2)
ret <vscale x 2 x i32> %res
}
; Can fold this because its avl is known to be <= than true, so no elements from false need to be introduced past avl.
define <vscale x 2 x i32> @preserve_false_avl_known_le(ptr %p, <vscale x 2 x i32> %pt, <vscale x 2 x i32> %false, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: preserve_false_avl_known_le:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; CHECK-NEXT: vle32.v v9, (a0), v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, ma
; CHECK-NEXT: vmv.v.v v8, v9
; CHECK-NEXT: ret
%true = call <vscale x 2 x i32> @llvm.riscv.vle.mask(<vscale x 2 x i32> %false, ptr %p, <vscale x 2 x i1> %mask, i64 2, i64 3)
%res = call <vscale x 2 x i32> @llvm.riscv.vmerge(<vscale x 2 x i32> %pt, <vscale x 2 x i32> %false, <vscale x 2 x i32> %true, <vscale x 2 x i1> %mask, i64 1)
ret <vscale x 2 x i32> %res
}