Philip Reames b5aaf9d988
[InstCombine] Implement vp.reverse reordering/elimination through binop/unop (#143963)
This simply copies the structure of the vector.reverse patterns from
just above, and reimplements them for the vp.reverse intrinsics when the
mask is all ones and the EVLs exactly match.

Its unfortunate that we have three different ways to represent a reverse
(shuffle, vector.reverse, and vp.reverse) but I don't see an obvious way
to remove any them because the semantics are slightly different.

This significantly improves vectorization in TSVC_2's s112 and s1112
loops when using EVL tail folding.
2025-06-18 08:53:45 -07:00

124 lines
8.6 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -passes=instcombine -S | FileCheck %s
define <vscale x 4 x i32> @binop_reverse_elim(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, i32 %evl) {
; CHECK-LABEL: @binop_reverse_elim(
; CHECK-NEXT: [[ADD1:%.*]] = add nsw <vscale x 4 x i32> [[A:%.*]], [[B:%.*]]
; CHECK-NEXT: ret <vscale x 4 x i32> [[ADD1]]
;
%a.rev = tail call <vscale x 4 x i32> @llvm.experimental.vp.reverse(<vscale x 4 x i32> %a, <vscale x 4 x i1> splat (i1 true), i32 %evl)
%b.rev = tail call <vscale x 4 x i32> @llvm.experimental.vp.reverse(<vscale x 4 x i32> %b, <vscale x 4 x i1> splat (i1 true), i32 %evl)
%add = add nsw <vscale x 4 x i32> %a.rev, %b.rev
%add.rev = tail call <vscale x 4 x i32> @llvm.experimental.vp.reverse(<vscale x 4 x i32> %add, <vscale x 4 x i1> splat (i1 true), i32 %evl)
ret <vscale x 4 x i32> %add.rev
}
; Negative test - the mask needs to be reversed between the inner and
; the outer to be correct.
define <vscale x 4 x i32> @binop_reverse_elim_samemask(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i1> %m, i32 %evl) {
; CHECK-LABEL: @binop_reverse_elim_samemask(
; CHECK-NEXT: [[A_REV:%.*]] = tail call <vscale x 4 x i32> @llvm.experimental.vp.reverse.nxv4i32(<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i1> [[M:%.*]], i32 [[EVL:%.*]])
; CHECK-NEXT: [[B_REV:%.*]] = tail call <vscale x 4 x i32> @llvm.experimental.vp.reverse.nxv4i32(<vscale x 4 x i32> [[B:%.*]], <vscale x 4 x i1> [[M]], i32 [[EVL]])
; CHECK-NEXT: [[ADD:%.*]] = add nsw <vscale x 4 x i32> [[A_REV]], [[B_REV]]
; CHECK-NEXT: [[ADD_REV:%.*]] = tail call <vscale x 4 x i32> @llvm.experimental.vp.reverse.nxv4i32(<vscale x 4 x i32> [[ADD]], <vscale x 4 x i1> [[M]], i32 [[EVL]])
; CHECK-NEXT: ret <vscale x 4 x i32> [[ADD_REV]]
;
%a.rev = tail call <vscale x 4 x i32> @llvm.experimental.vp.reverse(<vscale x 4 x i32> %a, <vscale x 4 x i1> %m, i32 %evl)
%b.rev = tail call <vscale x 4 x i32> @llvm.experimental.vp.reverse(<vscale x 4 x i32> %b, <vscale x 4 x i1> %m, i32 %evl)
%add = add nsw <vscale x 4 x i32> %a.rev, %b.rev
%add.rev = tail call <vscale x 4 x i32> @llvm.experimental.vp.reverse(<vscale x 4 x i32> %add, <vscale x 4 x i1> %m, i32 %evl)
ret <vscale x 4 x i32> %add.rev
}
define <vscale x 4 x i32> @binop_reverse_elim_diffmask(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i1> %m1, <vscale x 4 x i1> %m2, i32 %evl) {
; CHECK-LABEL: @binop_reverse_elim_diffmask(
; CHECK-NEXT: [[A_REV:%.*]] = tail call <vscale x 4 x i32> @llvm.experimental.vp.reverse.nxv4i32(<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i1> [[M1:%.*]], i32 [[EVL:%.*]])
; CHECK-NEXT: [[B_REV:%.*]] = tail call <vscale x 4 x i32> @llvm.experimental.vp.reverse.nxv4i32(<vscale x 4 x i32> [[B:%.*]], <vscale x 4 x i1> [[M1]], i32 [[EVL]])
; CHECK-NEXT: [[ADD:%.*]] = add nsw <vscale x 4 x i32> [[A_REV]], [[B_REV]]
; CHECK-NEXT: [[ADD_REV:%.*]] = tail call <vscale x 4 x i32> @llvm.experimental.vp.reverse.nxv4i32(<vscale x 4 x i32> [[ADD]], <vscale x 4 x i1> [[M2:%.*]], i32 10)
; CHECK-NEXT: ret <vscale x 4 x i32> [[ADD_REV]]
;
%a.rev = tail call <vscale x 4 x i32> @llvm.experimental.vp.reverse(<vscale x 4 x i32> %a, <vscale x 4 x i1> %m1, i32 %evl)
%b.rev = tail call <vscale x 4 x i32> @llvm.experimental.vp.reverse(<vscale x 4 x i32> %b, <vscale x 4 x i1> %m1, i32 %evl)
%add = add nsw <vscale x 4 x i32> %a.rev, %b.rev
%add.rev = tail call <vscale x 4 x i32> @llvm.experimental.vp.reverse(<vscale x 4 x i32> %add, <vscale x 4 x i1> %m2, i32 10)
ret <vscale x 4 x i32> %add.rev
}
define <vscale x 4 x i32> @binop_reverse_elim_diffevl(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, i32 %evl) {
; CHECK-LABEL: @binop_reverse_elim_diffevl(
; CHECK-NEXT: [[ADD:%.*]] = add nsw <vscale x 4 x i32> [[A_REV:%.*]], [[B_REV:%.*]]
; CHECK-NEXT: [[ADD1:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vp.reverse.nxv4i32(<vscale x 4 x i32> [[ADD]], <vscale x 4 x i1> splat (i1 true), i32 [[EVL:%.*]])
; CHECK-NEXT: [[ADD_REV:%.*]] = tail call <vscale x 4 x i32> @llvm.experimental.vp.reverse.nxv4i32(<vscale x 4 x i32> [[ADD1]], <vscale x 4 x i1> splat (i1 true), i32 10)
; CHECK-NEXT: ret <vscale x 4 x i32> [[ADD_REV]]
;
%a.rev = tail call <vscale x 4 x i32> @llvm.experimental.vp.reverse(<vscale x 4 x i32> %a, <vscale x 4 x i1> splat (i1 true), i32 %evl)
%b.rev = tail call <vscale x 4 x i32> @llvm.experimental.vp.reverse(<vscale x 4 x i32> %b, <vscale x 4 x i1> splat (i1 true), i32 %evl)
%add = add nsw <vscale x 4 x i32> %a.rev, %b.rev
%add.rev = tail call <vscale x 4 x i32> @llvm.experimental.vp.reverse(<vscale x 4 x i32> %add, <vscale x 4 x i1> splat (i1 true), i32 10)
ret <vscale x 4 x i32> %add.rev
}
define <vscale x 4 x i32> @binop_reverse_splat_elim(<vscale x 4 x i32> %a, i32 %evl) {
; CHECK-LABEL: @binop_reverse_splat_elim(
; CHECK-NEXT: [[ADD1:%.*]] = add nsw <vscale x 4 x i32> [[A:%.*]], splat (i32 22)
; CHECK-NEXT: ret <vscale x 4 x i32> [[ADD1]]
;
%a.rev = tail call <vscale x 4 x i32> @llvm.experimental.vp.reverse(<vscale x 4 x i32> %a, <vscale x 4 x i1> splat (i1 true), i32 %evl)
%add = add nsw <vscale x 4 x i32> %a.rev, splat (i32 22)
%add.rev = tail call <vscale x 4 x i32> @llvm.experimental.vp.reverse(<vscale x 4 x i32> %add, <vscale x 4 x i1> splat (i1 true), i32 %evl)
ret <vscale x 4 x i32> %add.rev
}
define <vscale x 4 x i32> @binop_reverse_splat_elim2(<vscale x 4 x i32> %a, i32 %evl) {
; CHECK-LABEL: @binop_reverse_splat_elim2(
; CHECK-NEXT: [[ADD1:%.*]] = add nsw <vscale x 4 x i32> [[A:%.*]], splat (i32 22)
; CHECK-NEXT: ret <vscale x 4 x i32> [[ADD1]]
;
%a.rev = tail call <vscale x 4 x i32> @llvm.experimental.vp.reverse(<vscale x 4 x i32> %a, <vscale x 4 x i1> splat (i1 true), i32 %evl)
%add = add nsw <vscale x 4 x i32> splat (i32 22), %a.rev
%add.rev = tail call <vscale x 4 x i32> @llvm.experimental.vp.reverse(<vscale x 4 x i32> %add, <vscale x 4 x i1> splat (i1 true), i32 %evl)
ret <vscale x 4 x i32> %add.rev
}
define <vscale x 4 x i32> @binop_reverse_splat_elim3(<vscale x 4 x i32> %a, i32 %b, i32 %evl) {
; CHECK-LABEL: @binop_reverse_splat_elim3(
; CHECK-NEXT: [[B_INS:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[B:%.*]], i64 0
; CHECK-NEXT: [[B_VEC:%.*]] = shufflevector <vscale x 4 x i32> [[B_INS]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
; CHECK-NEXT: [[ADD:%.*]] = add nsw <vscale x 4 x i32> [[B_VEC]], [[A_REV:%.*]]
; CHECK-NEXT: ret <vscale x 4 x i32> [[ADD]]
;
%b.ins = insertelement <vscale x 4 x i32> poison, i32 %b, i32 0
%b.vec = shufflevector <vscale x 4 x i32> %b.ins, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
%a.rev = tail call <vscale x 4 x i32> @llvm.experimental.vp.reverse(<vscale x 4 x i32> %a, <vscale x 4 x i1> splat (i1 true), i32 %evl)
%add = add nsw <vscale x 4 x i32> %b.vec, %a.rev
%add.rev = tail call <vscale x 4 x i32> @llvm.experimental.vp.reverse(<vscale x 4 x i32> %add, <vscale x 4 x i1> splat (i1 true), i32 %evl)
ret <vscale x 4 x i32> %add.rev
}
define <vscale x 4 x i32> @binop_reverse_splat_elim4(<vscale x 4 x i32> %a, i32 %b, i32 %evl) {
; CHECK-LABEL: @binop_reverse_splat_elim4(
; CHECK-NEXT: [[B_INS:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[B:%.*]], i64 0
; CHECK-NEXT: [[B_VEC:%.*]] = shufflevector <vscale x 4 x i32> [[B_INS]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
; CHECK-NEXT: [[ADD1:%.*]] = add nsw <vscale x 4 x i32> [[A:%.*]], [[B_VEC]]
; CHECK-NEXT: ret <vscale x 4 x i32> [[ADD1]]
;
%b.ins = insertelement <vscale x 4 x i32> poison, i32 %b, i32 0
%b.vec = shufflevector <vscale x 4 x i32> %b.ins, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
%a.rev = tail call <vscale x 4 x i32> @llvm.experimental.vp.reverse(<vscale x 4 x i32> %a, <vscale x 4 x i1> splat (i1 true), i32 %evl)
%add = add nsw <vscale x 4 x i32> %a.rev, %b.vec
%add.rev = tail call <vscale x 4 x i32> @llvm.experimental.vp.reverse(<vscale x 4 x i32> %add, <vscale x 4 x i1> splat (i1 true), i32 %evl)
ret <vscale x 4 x i32> %add.rev
}
define <vscale x 4 x float> @unop_reverse_splat_elim(<vscale x 4 x float> %a, <vscale x 4 x float> %b, i32 %evl) {
; CHECK-LABEL: @unop_reverse_splat_elim(
; CHECK-NEXT: [[OP:%.*]] = fneg <vscale x 4 x float> [[A_REV:%.*]]
; CHECK-NEXT: ret <vscale x 4 x float> [[OP]]
;
%a.rev = tail call <vscale x 4 x float> @llvm.experimental.vp.reverse.nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x i1> splat (i1 true), i32 %evl)
%op = fneg <vscale x 4 x float> %a.rev
%op.rev = tail call <vscale x 4 x float> @llvm.experimental.vp.reverse.nxv4f32(<vscale x 4 x float> %op, <vscale x 4 x i1> splat (i1 true), i32 %evl)
ret <vscale x 4 x float> %op.rev
}