llvm-project/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll
Luke Lau dd8d5ffe0d
[RISCV] Sink instructions so AVL dominates in RISCVVLOptimizer (#184155)
The last feature that RISCVVectorPeephole::tryToReduceVL has that
RISCVVLOptimizer doesn't is that if the vl to reduce to doesn't dominate
a vector pseudo, it can sink the pseudo below the vl definition if it's
safe to do so.

This PR shares the logic to check for physical register clobbering in
RISCVInstrInfo, but there may be a better place for it.

The DemandedVLs DenseMap needs to be switched to a MapVector to get
deterministic ordering, since the order in which we sink instructions
can affect the resulting codegen.

This helps removes a few vsetvli toggles in llvm-test-suite.
2026-03-04 05:46:26 +00:00

2580 lines
104 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+zvfbfmin,+v \
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV32
; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+zvfbfmin,+v \
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV64
; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfhmin,+zvfbfmin,+v \
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV32
; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfhmin,+zvfbfmin,+v \
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV64
define <vscale x 1 x i8> @vpgather_nxv1i8(<vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m, i32 zeroext %evl) {
; RV32-LABEL: vpgather_nxv1i8:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t
; RV32-NEXT: vmv1r.v v8, v9
; RV32-NEXT: ret
;
; RV64-LABEL: vpgather_nxv1i8:
; RV64: # %bb.0:
; RV64-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
; RV64-NEXT: vluxei64.v v9, (zero), v8, v0.t
; RV64-NEXT: vmv1r.v v8, v9
; RV64-NEXT: ret
%v = call <vscale x 1 x i8> @llvm.vp.gather.nxv1i8.nxv1p0(<vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m, i32 %evl)
ret <vscale x 1 x i8> %v
}
define <vscale x 2 x i8> @vpgather_nxv2i8(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) {
; RV32-LABEL: vpgather_nxv2i8:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t
; RV32-NEXT: vmv1r.v v8, v9
; RV32-NEXT: ret
;
; RV64-LABEL: vpgather_nxv2i8:
; RV64: # %bb.0:
; RV64-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
; RV64-NEXT: vluxei64.v v10, (zero), v8, v0.t
; RV64-NEXT: vmv1r.v v8, v10
; RV64-NEXT: ret
%v = call <vscale x 2 x i8> @llvm.vp.gather.nxv2i8.nxv2p0(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 %evl)
ret <vscale x 2 x i8> %v
}
define <vscale x 2 x i16> @vpgather_nxv2i8_sextload_nxv2i16(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) {
; RV32-LABEL: vpgather_nxv2i8_sextload_nxv2i16:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t
; RV32-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
; RV32-NEXT: vsext.vf2 v8, v9
; RV32-NEXT: ret
;
; RV64-LABEL: vpgather_nxv2i8_sextload_nxv2i16:
; RV64: # %bb.0:
; RV64-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
; RV64-NEXT: vluxei64.v v10, (zero), v8, v0.t
; RV64-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
; RV64-NEXT: vsext.vf2 v8, v10
; RV64-NEXT: ret
%v = call <vscale x 2 x i8> @llvm.vp.gather.nxv2i8.nxv2p0(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 %evl)
%ev = sext <vscale x 2 x i8> %v to <vscale x 2 x i16>
ret <vscale x 2 x i16> %ev
}
define <vscale x 2 x i16> @vpgather_nxv2i8_zextload_nxv2i16(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) {
; RV32-LABEL: vpgather_nxv2i8_zextload_nxv2i16:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t
; RV32-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
; RV32-NEXT: vzext.vf2 v8, v9
; RV32-NEXT: ret
;
; RV64-LABEL: vpgather_nxv2i8_zextload_nxv2i16:
; RV64: # %bb.0:
; RV64-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
; RV64-NEXT: vluxei64.v v10, (zero), v8, v0.t
; RV64-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
; RV64-NEXT: vzext.vf2 v8, v10
; RV64-NEXT: ret
%v = call <vscale x 2 x i8> @llvm.vp.gather.nxv2i8.nxv2p0(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 %evl)
%ev = zext <vscale x 2 x i8> %v to <vscale x 2 x i16>
ret <vscale x 2 x i16> %ev
}
define <vscale x 2 x i32> @vpgather_nxv2i8_sextload_nxv2i32(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) {
; RV32-LABEL: vpgather_nxv2i8_sextload_nxv2i32:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t
; RV32-NEXT: vsetvli a0, zero, e32, m1, ta, ma
; RV32-NEXT: vsext.vf4 v8, v9
; RV32-NEXT: ret
;
; RV64-LABEL: vpgather_nxv2i8_sextload_nxv2i32:
; RV64: # %bb.0:
; RV64-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
; RV64-NEXT: vluxei64.v v10, (zero), v8, v0.t
; RV64-NEXT: vsetvli a0, zero, e32, m1, ta, ma
; RV64-NEXT: vsext.vf4 v8, v10
; RV64-NEXT: ret
%v = call <vscale x 2 x i8> @llvm.vp.gather.nxv2i8.nxv2p0(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 %evl)
%ev = sext <vscale x 2 x i8> %v to <vscale x 2 x i32>
ret <vscale x 2 x i32> %ev
}
define <vscale x 2 x i32> @vpgather_nxv2i8_zextload_nxv2i32(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) {
; RV32-LABEL: vpgather_nxv2i8_zextload_nxv2i32:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t
; RV32-NEXT: vsetvli a0, zero, e32, m1, ta, ma
; RV32-NEXT: vzext.vf4 v8, v9
; RV32-NEXT: ret
;
; RV64-LABEL: vpgather_nxv2i8_zextload_nxv2i32:
; RV64: # %bb.0:
; RV64-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
; RV64-NEXT: vluxei64.v v10, (zero), v8, v0.t
; RV64-NEXT: vsetvli a0, zero, e32, m1, ta, ma
; RV64-NEXT: vzext.vf4 v8, v10
; RV64-NEXT: ret
%v = call <vscale x 2 x i8> @llvm.vp.gather.nxv2i8.nxv2p0(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 %evl)
%ev = zext <vscale x 2 x i8> %v to <vscale x 2 x i32>
ret <vscale x 2 x i32> %ev
}
define <vscale x 2 x i64> @vpgather_nxv2i8_sextload_nxv2i64(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) {
; RV32-LABEL: vpgather_nxv2i8_sextload_nxv2i64:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
; RV32-NEXT: vluxei32.v v10, (zero), v8, v0.t
; RV32-NEXT: vsetvli a0, zero, e64, m2, ta, ma
; RV32-NEXT: vsext.vf8 v8, v10
; RV32-NEXT: ret
;
; RV64-LABEL: vpgather_nxv2i8_sextload_nxv2i64:
; RV64: # %bb.0:
; RV64-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
; RV64-NEXT: vluxei64.v v10, (zero), v8, v0.t
; RV64-NEXT: vsetvli a0, zero, e64, m2, ta, ma
; RV64-NEXT: vsext.vf8 v8, v10
; RV64-NEXT: ret
%v = call <vscale x 2 x i8> @llvm.vp.gather.nxv2i8.nxv2p0(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 %evl)
%ev = sext <vscale x 2 x i8> %v to <vscale x 2 x i64>
ret <vscale x 2 x i64> %ev
}
define <vscale x 2 x i64> @vpgather_nxv2i8_zextload_nxv2i64(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) {
; RV32-LABEL: vpgather_nxv2i8_zextload_nxv2i64:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
; RV32-NEXT: vluxei32.v v10, (zero), v8, v0.t
; RV32-NEXT: vsetvli a0, zero, e64, m2, ta, ma
; RV32-NEXT: vzext.vf8 v8, v10
; RV32-NEXT: ret
;
; RV64-LABEL: vpgather_nxv2i8_zextload_nxv2i64:
; RV64: # %bb.0:
; RV64-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
; RV64-NEXT: vluxei64.v v10, (zero), v8, v0.t
; RV64-NEXT: vsetvli a0, zero, e64, m2, ta, ma
; RV64-NEXT: vzext.vf8 v8, v10
; RV64-NEXT: ret
%v = call <vscale x 2 x i8> @llvm.vp.gather.nxv2i8.nxv2p0(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 %evl)
%ev = zext <vscale x 2 x i8> %v to <vscale x 2 x i64>
ret <vscale x 2 x i64> %ev
}
define <vscale x 4 x i8> @vpgather_nxv4i8(<vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m, i32 zeroext %evl) {
; RV32-LABEL: vpgather_nxv4i8:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
; RV32-NEXT: vluxei32.v v10, (zero), v8, v0.t
; RV32-NEXT: vmv1r.v v8, v10
; RV32-NEXT: ret
;
; RV64-LABEL: vpgather_nxv4i8:
; RV64: # %bb.0:
; RV64-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
; RV64-NEXT: vluxei64.v v12, (zero), v8, v0.t
; RV64-NEXT: vmv1r.v v8, v12
; RV64-NEXT: ret
%v = call <vscale x 4 x i8> @llvm.vp.gather.nxv4i8.nxv4p0(<vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m, i32 %evl)
ret <vscale x 4 x i8> %v
}
define <vscale x 4 x i8> @vpgather_truemask_nxv4i8(<vscale x 4 x ptr> %ptrs, i32 zeroext %evl) {
; RV32-LABEL: vpgather_truemask_nxv4i8:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
; RV32-NEXT: vluxei32.v v10, (zero), v8
; RV32-NEXT: vmv1r.v v8, v10
; RV32-NEXT: ret
;
; RV64-LABEL: vpgather_truemask_nxv4i8:
; RV64: # %bb.0:
; RV64-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
; RV64-NEXT: vluxei64.v v12, (zero), v8
; RV64-NEXT: vmv1r.v v8, v12
; RV64-NEXT: ret
%v = call <vscale x 4 x i8> @llvm.vp.gather.nxv4i8.nxv4p0(<vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> splat (i1 1), i32 %evl)
ret <vscale x 4 x i8> %v
}
define <vscale x 8 x i8> @vpgather_nxv8i8(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
; RV32-LABEL: vpgather_nxv8i8:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli zero, a0, e8, m1, ta, ma
; RV32-NEXT: vluxei32.v v12, (zero), v8, v0.t
; RV32-NEXT: vmv.v.v v8, v12
; RV32-NEXT: ret
;
; RV64-LABEL: vpgather_nxv8i8:
; RV64: # %bb.0:
; RV64-NEXT: vsetvli zero, a0, e8, m1, ta, ma
; RV64-NEXT: vluxei64.v v16, (zero), v8, v0.t
; RV64-NEXT: vmv.v.v v8, v16
; RV64-NEXT: ret
%v = call <vscale x 8 x i8> @llvm.vp.gather.nxv8i8.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
ret <vscale x 8 x i8> %v
}
define <vscale x 8 x i8> @vpgather_baseidx_nxv8i8(ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
; RV32-LABEL: vpgather_baseidx_nxv8i8:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma
; RV32-NEXT: vsext.vf4 v12, v8
; RV32-NEXT: vsetvli zero, zero, e8, m1, ta, ma
; RV32-NEXT: vluxei32.v v8, (a0), v12, v0.t
; RV32-NEXT: ret
;
; RV64-LABEL: vpgather_baseidx_nxv8i8:
; RV64: # %bb.0:
; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; RV64-NEXT: vsext.vf8 v16, v8
; RV64-NEXT: vsetvli zero, zero, e8, m1, ta, ma
; RV64-NEXT: vluxei64.v v8, (a0), v16, v0.t
; RV64-NEXT: ret
%ptrs = getelementptr inbounds i8, ptr %base, <vscale x 8 x i8> %idxs
%v = call <vscale x 8 x i8> @llvm.vp.gather.nxv8i8.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
ret <vscale x 8 x i8> %v
}
define <vscale x 32 x i8> @vpgather_baseidx_nxv32i8(ptr %base, <vscale x 32 x i8> %idxs, <vscale x 32 x i1> %m, i32 zeroext %evl) {
; RV32-LABEL: vpgather_baseidx_nxv32i8:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli a2, zero, e8, mf2, ta, ma
; RV32-NEXT: vmv1r.v v12, v0
; RV32-NEXT: csrr a3, vlenb
; RV32-NEXT: slli a2, a3, 1
; RV32-NEXT: srli a3, a3, 2
; RV32-NEXT: sub a4, a1, a2
; RV32-NEXT: sltu a5, a1, a4
; RV32-NEXT: addi a5, a5, -1
; RV32-NEXT: and a4, a5, a4
; RV32-NEXT: vslidedown.vx v0, v0, a3
; RV32-NEXT: vsetvli zero, a4, e32, m8, ta, ma
; RV32-NEXT: vsext.vf4 v16, v10
; RV32-NEXT: vsetvli zero, zero, e8, m2, ta, ma
; RV32-NEXT: vluxei32.v v10, (a0), v16, v0.t
; RV32-NEXT: bltu a1, a2, .LBB12_2
; RV32-NEXT: # %bb.1:
; RV32-NEXT: mv a1, a2
; RV32-NEXT: .LBB12_2:
; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma
; RV32-NEXT: vsext.vf4 v16, v8
; RV32-NEXT: vmv1r.v v0, v12
; RV32-NEXT: vsetvli zero, zero, e8, m2, ta, ma
; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t
; RV32-NEXT: ret
;
; RV64-LABEL: vpgather_baseidx_nxv32i8:
; RV64: # %bb.0:
; RV64-NEXT: vsetvli a2, zero, e8, mf2, ta, ma
; RV64-NEXT: vmv1r.v v12, v0
; RV64-NEXT: csrr a2, vlenb
; RV64-NEXT: slli a3, a2, 1
; RV64-NEXT: srli a4, a2, 2
; RV64-NEXT: sub a5, a1, a3
; RV64-NEXT: vslidedown.vx v13, v0, a4
; RV64-NEXT: sltu a4, a1, a5
; RV64-NEXT: addi a4, a4, -1
; RV64-NEXT: and a5, a4, a5
; RV64-NEXT: sub a4, a5, a2
; RV64-NEXT: sltu a6, a5, a4
; RV64-NEXT: addi a6, a6, -1
; RV64-NEXT: and a6, a6, a4
; RV64-NEXT: srli a4, a2, 3
; RV64-NEXT: vsetvli a7, zero, e8, mf4, ta, ma
; RV64-NEXT: vslidedown.vx v0, v13, a4
; RV64-NEXT: vsetvli zero, a6, e64, m8, ta, ma
; RV64-NEXT: vsext.vf8 v16, v11
; RV64-NEXT: bltu a5, a2, .LBB12_2
; RV64-NEXT: # %bb.1:
; RV64-NEXT: mv a5, a2
; RV64-NEXT: .LBB12_2:
; RV64-NEXT: vsetvli zero, zero, e8, m1, ta, ma
; RV64-NEXT: vluxei64.v v11, (a0), v16, v0.t
; RV64-NEXT: vsetvli zero, a5, e64, m8, ta, ma
; RV64-NEXT: vsext.vf8 v16, v10
; RV64-NEXT: vmv1r.v v0, v13
; RV64-NEXT: vsetvli zero, zero, e8, m1, ta, ma
; RV64-NEXT: vluxei64.v v10, (a0), v16, v0.t
; RV64-NEXT: bltu a1, a3, .LBB12_4
; RV64-NEXT: # %bb.3:
; RV64-NEXT: mv a1, a3
; RV64-NEXT: .LBB12_4:
; RV64-NEXT: sub a3, a1, a2
; RV64-NEXT: sltu a5, a1, a3
; RV64-NEXT: addi a5, a5, -1
; RV64-NEXT: and a3, a5, a3
; RV64-NEXT: vsetvli a5, zero, e8, mf4, ta, ma
; RV64-NEXT: vslidedown.vx v0, v12, a4
; RV64-NEXT: vsetvli zero, a3, e64, m8, ta, ma
; RV64-NEXT: vsext.vf8 v16, v9
; RV64-NEXT: vsetvli zero, zero, e8, m1, ta, ma
; RV64-NEXT: vluxei64.v v9, (a0), v16, v0.t
; RV64-NEXT: bltu a1, a2, .LBB12_6
; RV64-NEXT: # %bb.5:
; RV64-NEXT: mv a1, a2
; RV64-NEXT: .LBB12_6:
; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; RV64-NEXT: vsext.vf8 v16, v8
; RV64-NEXT: vmv1r.v v0, v12
; RV64-NEXT: vsetvli zero, zero, e8, m1, ta, ma
; RV64-NEXT: vluxei64.v v8, (a0), v16, v0.t
; RV64-NEXT: ret
%ptrs = getelementptr inbounds i8, ptr %base, <vscale x 32 x i8> %idxs
%v = call <vscale x 32 x i8> @llvm.vp.gather.nxv32i8.nxv32p0(<vscale x 32 x ptr> %ptrs, <vscale x 32 x i1> %m, i32 %evl)
ret <vscale x 32 x i8> %v
}
define <vscale x 1 x i16> @vpgather_nxv1i16(<vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m, i32 zeroext %evl) {
; RV32-LABEL: vpgather_nxv1i16:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t
; RV32-NEXT: vmv1r.v v8, v9
; RV32-NEXT: ret
;
; RV64-LABEL: vpgather_nxv1i16:
; RV64: # %bb.0:
; RV64-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
; RV64-NEXT: vluxei64.v v9, (zero), v8, v0.t
; RV64-NEXT: vmv1r.v v8, v9
; RV64-NEXT: ret
%v = call <vscale x 1 x i16> @llvm.vp.gather.nxv1i16.nxv1p0(<vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m, i32 %evl)
ret <vscale x 1 x i16> %v
}
define <vscale x 2 x i16> @vpgather_nxv2i16(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) {
; RV32-LABEL: vpgather_nxv2i16:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t
; RV32-NEXT: vmv1r.v v8, v9
; RV32-NEXT: ret
;
; RV64-LABEL: vpgather_nxv2i16:
; RV64: # %bb.0:
; RV64-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
; RV64-NEXT: vluxei64.v v10, (zero), v8, v0.t
; RV64-NEXT: vmv1r.v v8, v10
; RV64-NEXT: ret
%v = call <vscale x 2 x i16> @llvm.vp.gather.nxv2i16.nxv2p0(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 %evl)
ret <vscale x 2 x i16> %v
}
define <vscale x 2 x i32> @vpgather_nxv2i16_sextload_nxv2i32(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) {
; RV32-LABEL: vpgather_nxv2i16_sextload_nxv2i32:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t
; RV32-NEXT: vsetvli a0, zero, e32, m1, ta, ma
; RV32-NEXT: vsext.vf2 v8, v9
; RV32-NEXT: ret
;
; RV64-LABEL: vpgather_nxv2i16_sextload_nxv2i32:
; RV64: # %bb.0:
; RV64-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
; RV64-NEXT: vluxei64.v v10, (zero), v8, v0.t
; RV64-NEXT: vsetvli a0, zero, e32, m1, ta, ma
; RV64-NEXT: vsext.vf2 v8, v10
; RV64-NEXT: ret
%v = call <vscale x 2 x i16> @llvm.vp.gather.nxv2i16.nxv2p0(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 %evl)
%ev = sext <vscale x 2 x i16> %v to <vscale x 2 x i32>
ret <vscale x 2 x i32> %ev
}
define <vscale x 2 x i32> @vpgather_nxv2i16_zextload_nxv2i32(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) {
; RV32-LABEL: vpgather_nxv2i16_zextload_nxv2i32:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t
; RV32-NEXT: vsetvli a0, zero, e32, m1, ta, ma
; RV32-NEXT: vzext.vf2 v8, v9
; RV32-NEXT: ret
;
; RV64-LABEL: vpgather_nxv2i16_zextload_nxv2i32:
; RV64: # %bb.0:
; RV64-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
; RV64-NEXT: vluxei64.v v10, (zero), v8, v0.t
; RV64-NEXT: vsetvli a0, zero, e32, m1, ta, ma
; RV64-NEXT: vzext.vf2 v8, v10
; RV64-NEXT: ret
%v = call <vscale x 2 x i16> @llvm.vp.gather.nxv2i16.nxv2p0(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 %evl)
%ev = zext <vscale x 2 x i16> %v to <vscale x 2 x i32>
ret <vscale x 2 x i32> %ev
}
define <vscale x 2 x i64> @vpgather_nxv2i16_sextload_nxv2i64(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) {
; RV32-LABEL: vpgather_nxv2i16_sextload_nxv2i64:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
; RV32-NEXT: vluxei32.v v10, (zero), v8, v0.t
; RV32-NEXT: vsetvli a0, zero, e64, m2, ta, ma
; RV32-NEXT: vsext.vf4 v8, v10
; RV32-NEXT: ret
;
; RV64-LABEL: vpgather_nxv2i16_sextload_nxv2i64:
; RV64: # %bb.0:
; RV64-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
; RV64-NEXT: vluxei64.v v10, (zero), v8, v0.t
; RV64-NEXT: vsetvli a0, zero, e64, m2, ta, ma
; RV64-NEXT: vsext.vf4 v8, v10
; RV64-NEXT: ret
%v = call <vscale x 2 x i16> @llvm.vp.gather.nxv2i16.nxv2p0(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 %evl)
%ev = sext <vscale x 2 x i16> %v to <vscale x 2 x i64>
ret <vscale x 2 x i64> %ev
}
define <vscale x 2 x i64> @vpgather_nxv2i16_zextload_nxv2i64(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) {
; RV32-LABEL: vpgather_nxv2i16_zextload_nxv2i64:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
; RV32-NEXT: vluxei32.v v10, (zero), v8, v0.t
; RV32-NEXT: vsetvli a0, zero, e64, m2, ta, ma
; RV32-NEXT: vzext.vf4 v8, v10
; RV32-NEXT: ret
;
; RV64-LABEL: vpgather_nxv2i16_zextload_nxv2i64:
; RV64: # %bb.0:
; RV64-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
; RV64-NEXT: vluxei64.v v10, (zero), v8, v0.t
; RV64-NEXT: vsetvli a0, zero, e64, m2, ta, ma
; RV64-NEXT: vzext.vf4 v8, v10
; RV64-NEXT: ret
%v = call <vscale x 2 x i16> @llvm.vp.gather.nxv2i16.nxv2p0(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 %evl)
%ev = zext <vscale x 2 x i16> %v to <vscale x 2 x i64>
ret <vscale x 2 x i64> %ev
}
define <vscale x 4 x i16> @vpgather_nxv4i16(<vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m, i32 zeroext %evl) {
; RV32-LABEL: vpgather_nxv4i16:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli zero, a0, e16, m1, ta, ma
; RV32-NEXT: vluxei32.v v10, (zero), v8, v0.t
; RV32-NEXT: vmv.v.v v8, v10
; RV32-NEXT: ret
;
; RV64-LABEL: vpgather_nxv4i16:
; RV64: # %bb.0:
; RV64-NEXT: vsetvli zero, a0, e16, m1, ta, ma
; RV64-NEXT: vluxei64.v v12, (zero), v8, v0.t
; RV64-NEXT: vmv.v.v v8, v12
; RV64-NEXT: ret
%v = call <vscale x 4 x i16> @llvm.vp.gather.nxv4i16.nxv4p0(<vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m, i32 %evl)
ret <vscale x 4 x i16> %v
}
define <vscale x 4 x i16> @vpgather_truemask_nxv4i16(<vscale x 4 x ptr> %ptrs, i32 zeroext %evl) {
; RV32-LABEL: vpgather_truemask_nxv4i16:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli zero, a0, e16, m1, ta, ma
; RV32-NEXT: vluxei32.v v10, (zero), v8
; RV32-NEXT: vmv.v.v v8, v10
; RV32-NEXT: ret
;
; RV64-LABEL: vpgather_truemask_nxv4i16:
; RV64: # %bb.0:
; RV64-NEXT: vsetvli zero, a0, e16, m1, ta, ma
; RV64-NEXT: vluxei64.v v12, (zero), v8
; RV64-NEXT: vmv.v.v v8, v12
; RV64-NEXT: ret
%v = call <vscale x 4 x i16> @llvm.vp.gather.nxv4i16.nxv4p0(<vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> splat (i1 1), i32 %evl)
ret <vscale x 4 x i16> %v
}
define <vscale x 8 x i16> @vpgather_nxv8i16(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
; RV32-LABEL: vpgather_nxv8i16:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli zero, a0, e16, m2, ta, ma
; RV32-NEXT: vluxei32.v v12, (zero), v8, v0.t
; RV32-NEXT: vmv.v.v v8, v12
; RV32-NEXT: ret
;
; RV64-LABEL: vpgather_nxv8i16:
; RV64: # %bb.0:
; RV64-NEXT: vsetvli zero, a0, e16, m2, ta, ma
; RV64-NEXT: vluxei64.v v16, (zero), v8, v0.t
; RV64-NEXT: vmv.v.v v8, v16
; RV64-NEXT: ret
%v = call <vscale x 8 x i16> @llvm.vp.gather.nxv8i16.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
ret <vscale x 8 x i16> %v
}
define <vscale x 8 x i16> @vpgather_baseidx_nxv8i8_nxv8i16(ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
; RV32-LABEL: vpgather_baseidx_nxv8i8_nxv8i16:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma
; RV32-NEXT: vsext.vf4 v12, v8
; RV32-NEXT: vadd.vv v12, v12, v12
; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; RV32-NEXT: vluxei32.v v8, (a0), v12, v0.t
; RV32-NEXT: ret
;
; RV64-LABEL: vpgather_baseidx_nxv8i8_nxv8i16:
; RV64: # %bb.0:
; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; RV64-NEXT: vsext.vf8 v16, v8
; RV64-NEXT: vadd.vv v16, v16, v16
; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; RV64-NEXT: vluxei64.v v8, (a0), v16, v0.t
; RV64-NEXT: ret
%ptrs = getelementptr inbounds i16, ptr %base, <vscale x 8 x i8> %idxs
%v = call <vscale x 8 x i16> @llvm.vp.gather.nxv8i16.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
ret <vscale x 8 x i16> %v
}
define <vscale x 8 x i16> @vpgather_baseidx_sext_nxv8i8_nxv8i16(ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
; RV32-LABEL: vpgather_baseidx_sext_nxv8i8_nxv8i16:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma
; RV32-NEXT: vsext.vf4 v12, v8
; RV32-NEXT: vadd.vv v12, v12, v12
; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; RV32-NEXT: vluxei32.v v8, (a0), v12, v0.t
; RV32-NEXT: ret
;
; RV64-LABEL: vpgather_baseidx_sext_nxv8i8_nxv8i16:
; RV64: # %bb.0:
; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; RV64-NEXT: vsext.vf8 v16, v8
; RV64-NEXT: vadd.vv v16, v16, v16
; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; RV64-NEXT: vluxei64.v v8, (a0), v16, v0.t
; RV64-NEXT: ret
%eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i16>
%ptrs = getelementptr inbounds i16, ptr %base, <vscale x 8 x i16> %eidxs
%v = call <vscale x 8 x i16> @llvm.vp.gather.nxv8i16.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
ret <vscale x 8 x i16> %v
}
define <vscale x 8 x i16> @vpgather_baseidx_zext_nxv8i8_nxv8i16(ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
; RV32-LABEL: vpgather_baseidx_zext_nxv8i8_nxv8i16:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli zero, a1, e8, m1, ta, ma
; RV32-NEXT: vwaddu.vv v10, v8, v8
; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; RV32-NEXT: vluxei16.v v8, (a0), v10, v0.t
; RV32-NEXT: ret
;
; RV64-LABEL: vpgather_baseidx_zext_nxv8i8_nxv8i16:
; RV64: # %bb.0:
; RV64-NEXT: vsetvli zero, a1, e8, m1, ta, ma
; RV64-NEXT: vwaddu.vv v10, v8, v8
; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; RV64-NEXT: vluxei16.v v8, (a0), v10, v0.t
; RV64-NEXT: ret
%eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i16>
%ptrs = getelementptr inbounds i16, ptr %base, <vscale x 8 x i16> %eidxs
%v = call <vscale x 8 x i16> @llvm.vp.gather.nxv8i16.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
ret <vscale x 8 x i16> %v
}
define <vscale x 8 x i16> @vpgather_baseidx_nxv8i16(ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
; RV32-LABEL: vpgather_baseidx_nxv8i16:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma
; RV32-NEXT: vwadd.vv v12, v8, v8
; RV32-NEXT: vluxei32.v v8, (a0), v12, v0.t
; RV32-NEXT: ret
;
; RV64-LABEL: vpgather_baseidx_nxv8i16:
; RV64: # %bb.0:
; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; RV64-NEXT: vsext.vf4 v16, v8
; RV64-NEXT: vadd.vv v16, v16, v16
; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; RV64-NEXT: vluxei64.v v8, (a0), v16, v0.t
; RV64-NEXT: ret
%ptrs = getelementptr inbounds i16, ptr %base, <vscale x 8 x i16> %idxs
%v = call <vscale x 8 x i16> @llvm.vp.gather.nxv8i16.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
ret <vscale x 8 x i16> %v
}
define <vscale x 1 x i32> @vpgather_nxv1i32(<vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m, i32 zeroext %evl) {
; RV32-LABEL: vpgather_nxv1i32:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
; RV32-NEXT: vluxei32.v v8, (zero), v8, v0.t
; RV32-NEXT: ret
;
; RV64-LABEL: vpgather_nxv1i32:
; RV64: # %bb.0:
; RV64-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
; RV64-NEXT: vluxei64.v v9, (zero), v8, v0.t
; RV64-NEXT: vmv1r.v v8, v9
; RV64-NEXT: ret
%v = call <vscale x 1 x i32> @llvm.vp.gather.nxv1i32.nxv1p0(<vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m, i32 %evl)
ret <vscale x 1 x i32> %v
}
define <vscale x 2 x i32> @vpgather_nxv2i32(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) {
; RV32-LABEL: vpgather_nxv2i32:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli zero, a0, e32, m1, ta, ma
; RV32-NEXT: vluxei32.v v8, (zero), v8, v0.t
; RV32-NEXT: ret
;
; RV64-LABEL: vpgather_nxv2i32:
; RV64: # %bb.0:
; RV64-NEXT: vsetvli zero, a0, e32, m1, ta, ma
; RV64-NEXT: vluxei64.v v10, (zero), v8, v0.t
; RV64-NEXT: vmv.v.v v8, v10
; RV64-NEXT: ret
%v = call <vscale x 2 x i32> @llvm.vp.gather.nxv2i32.nxv2p0(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 %evl)
ret <vscale x 2 x i32> %v
}
define <vscale x 2 x i64> @vpgather_nxv2i32_sextload_nxv2i64(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) {
; RV32-LABEL: vpgather_nxv2i32_sextload_nxv2i64:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli zero, a0, e32, m1, ta, ma
; RV32-NEXT: vluxei32.v v10, (zero), v8, v0.t
; RV32-NEXT: vsetvli a0, zero, e64, m2, ta, ma
; RV32-NEXT: vsext.vf2 v8, v10
; RV32-NEXT: ret
;
; RV64-LABEL: vpgather_nxv2i32_sextload_nxv2i64:
; RV64: # %bb.0:
; RV64-NEXT: vsetvli zero, a0, e32, m1, ta, ma
; RV64-NEXT: vluxei64.v v10, (zero), v8, v0.t
; RV64-NEXT: vsetvli a0, zero, e64, m2, ta, ma
; RV64-NEXT: vsext.vf2 v8, v10
; RV64-NEXT: ret
%v = call <vscale x 2 x i32> @llvm.vp.gather.nxv2i32.nxv2p0(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 %evl)
%ev = sext <vscale x 2 x i32> %v to <vscale x 2 x i64>
ret <vscale x 2 x i64> %ev
}
define <vscale x 2 x i64> @vpgather_nxv2i32_zextload_nxv2i64(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) {
; RV32-LABEL: vpgather_nxv2i32_zextload_nxv2i64:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli zero, a0, e32, m1, ta, ma
; RV32-NEXT: vluxei32.v v10, (zero), v8, v0.t
; RV32-NEXT: vsetvli a0, zero, e64, m2, ta, ma
; RV32-NEXT: vzext.vf2 v8, v10
; RV32-NEXT: ret
;
; RV64-LABEL: vpgather_nxv2i32_zextload_nxv2i64:
; RV64: # %bb.0:
; RV64-NEXT: vsetvli zero, a0, e32, m1, ta, ma
; RV64-NEXT: vluxei64.v v10, (zero), v8, v0.t
; RV64-NEXT: vsetvli a0, zero, e64, m2, ta, ma
; RV64-NEXT: vzext.vf2 v8, v10
; RV64-NEXT: ret
%v = call <vscale x 2 x i32> @llvm.vp.gather.nxv2i32.nxv2p0(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 %evl)
%ev = zext <vscale x 2 x i32> %v to <vscale x 2 x i64>
ret <vscale x 2 x i64> %ev
}
define <vscale x 4 x i32> @vpgather_nxv4i32(<vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m, i32 zeroext %evl) {
; RV32-LABEL: vpgather_nxv4i32:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli zero, a0, e32, m2, ta, ma
; RV32-NEXT: vluxei32.v v8, (zero), v8, v0.t
; RV32-NEXT: ret
;
; RV64-LABEL: vpgather_nxv4i32:
; RV64: # %bb.0:
; RV64-NEXT: vsetvli zero, a0, e32, m2, ta, ma
; RV64-NEXT: vluxei64.v v12, (zero), v8, v0.t
; RV64-NEXT: vmv.v.v v8, v12
; RV64-NEXT: ret
%v = call <vscale x 4 x i32> @llvm.vp.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m, i32 %evl)
ret <vscale x 4 x i32> %v
}
define <vscale x 4 x i32> @vpgather_truemask_nxv4i32(<vscale x 4 x ptr> %ptrs, i32 zeroext %evl) {
; RV32-LABEL: vpgather_truemask_nxv4i32:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli zero, a0, e32, m2, ta, ma
; RV32-NEXT: vluxei32.v v8, (zero), v8
; RV32-NEXT: ret
;
; RV64-LABEL: vpgather_truemask_nxv4i32:
; RV64: # %bb.0:
; RV64-NEXT: vsetvli zero, a0, e32, m2, ta, ma
; RV64-NEXT: vluxei64.v v12, (zero), v8
; RV64-NEXT: vmv.v.v v8, v12
; RV64-NEXT: ret
%v = call <vscale x 4 x i32> @llvm.vp.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> splat (i1 1), i32 %evl)
ret <vscale x 4 x i32> %v
}
define <vscale x 8 x i32> @vpgather_nxv8i32(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
; RV32-LABEL: vpgather_nxv8i32:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli zero, a0, e32, m4, ta, ma
; RV32-NEXT: vluxei32.v v8, (zero), v8, v0.t
; RV32-NEXT: ret
;
; RV64-LABEL: vpgather_nxv8i32:
; RV64: # %bb.0:
; RV64-NEXT: vsetvli zero, a0, e32, m4, ta, ma
; RV64-NEXT: vluxei64.v v16, (zero), v8, v0.t
; RV64-NEXT: vmv.v.v v8, v16
; RV64-NEXT: ret
%v = call <vscale x 8 x i32> @llvm.vp.gather.nxv8i32.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
ret <vscale x 8 x i32> %v
}
define <vscale x 8 x i32> @vpgather_baseidx_nxv8i8_nxv8i32(ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
; RV32-LABEL: vpgather_baseidx_nxv8i8_nxv8i32:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma
; RV32-NEXT: vsext.vf4 v12, v8
; RV32-NEXT: vsll.vi v8, v12, 2
; RV32-NEXT: vluxei32.v v8, (a0), v8, v0.t
; RV32-NEXT: ret
;
; RV64-LABEL: vpgather_baseidx_nxv8i8_nxv8i32:
; RV64: # %bb.0:
; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; RV64-NEXT: vsext.vf8 v16, v8
; RV64-NEXT: vsll.vi v16, v16, 2
; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma
; RV64-NEXT: vluxei64.v v8, (a0), v16, v0.t
; RV64-NEXT: ret
%ptrs = getelementptr inbounds i32, ptr %base, <vscale x 8 x i8> %idxs
%v = call <vscale x 8 x i32> @llvm.vp.gather.nxv8i32.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
ret <vscale x 8 x i32> %v
}
define <vscale x 8 x i32> @vpgather_baseidx_sext_nxv8i8_nxv8i32(ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
; RV32-LABEL: vpgather_baseidx_sext_nxv8i8_nxv8i32:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma
; RV32-NEXT: vsext.vf4 v12, v8
; RV32-NEXT: vsll.vi v8, v12, 2
; RV32-NEXT: vluxei32.v v8, (a0), v8, v0.t
; RV32-NEXT: ret
;
; RV64-LABEL: vpgather_baseidx_sext_nxv8i8_nxv8i32:
; RV64: # %bb.0:
; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; RV64-NEXT: vsext.vf8 v16, v8
; RV64-NEXT: vsll.vi v16, v16, 2
; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma
; RV64-NEXT: vluxei64.v v8, (a0), v16, v0.t
; RV64-NEXT: ret
%eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i32>
%ptrs = getelementptr inbounds i32, ptr %base, <vscale x 8 x i32> %eidxs
%v = call <vscale x 8 x i32> @llvm.vp.gather.nxv8i32.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
ret <vscale x 8 x i32> %v
}
define <vscale x 8 x i32> @vpgather_baseidx_zext_nxv8i8_nxv8i32(ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
; RV32-LABEL: vpgather_baseidx_zext_nxv8i8_nxv8i32:
; RV32: # %bb.0:
; RV32-NEXT: li a2, 4
; RV32-NEXT: vsetvli zero, a1, e8, m1, ta, ma
; RV32-NEXT: vwmulu.vx v12, v8, a2
; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, ma
; RV32-NEXT: vluxei16.v v8, (a0), v12, v0.t
; RV32-NEXT: ret
;
; RV64-LABEL: vpgather_baseidx_zext_nxv8i8_nxv8i32:
; RV64: # %bb.0:
; RV64-NEXT: li a2, 4
; RV64-NEXT: vsetvli zero, a1, e8, m1, ta, ma
; RV64-NEXT: vwmulu.vx v12, v8, a2
; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma
; RV64-NEXT: vluxei16.v v8, (a0), v12, v0.t
; RV64-NEXT: ret
%eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i32>
%ptrs = getelementptr inbounds i32, ptr %base, <vscale x 8 x i32> %eidxs
%v = call <vscale x 8 x i32> @llvm.vp.gather.nxv8i32.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
ret <vscale x 8 x i32> %v
}
define <vscale x 8 x i32> @vpgather_baseidx_nxv8i16_nxv8i32(ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
; RV32-LABEL: vpgather_baseidx_nxv8i16_nxv8i32:
; RV32: # %bb.0:
; RV32-NEXT: li a2, 4
; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma
; RV32-NEXT: vwmulsu.vx v12, v8, a2
; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, ma
; RV32-NEXT: vluxei32.v v8, (a0), v12, v0.t
; RV32-NEXT: ret
;
; RV64-LABEL: vpgather_baseidx_nxv8i16_nxv8i32:
; RV64: # %bb.0:
; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; RV64-NEXT: vsext.vf4 v16, v8
; RV64-NEXT: vsll.vi v16, v16, 2
; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma
; RV64-NEXT: vluxei64.v v8, (a0), v16, v0.t
; RV64-NEXT: ret
%ptrs = getelementptr inbounds i32, ptr %base, <vscale x 8 x i16> %idxs
%v = call <vscale x 8 x i32> @llvm.vp.gather.nxv8i32.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
ret <vscale x 8 x i32> %v
}
define <vscale x 8 x i32> @vpgather_baseidx_sext_nxv8i16_nxv8i32(ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
; RV32-LABEL: vpgather_baseidx_sext_nxv8i16_nxv8i32:
; RV32: # %bb.0:
; RV32-NEXT: li a2, 4
; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma
; RV32-NEXT: vwmulsu.vx v12, v8, a2
; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, ma
; RV32-NEXT: vluxei32.v v8, (a0), v12, v0.t
; RV32-NEXT: ret
;
; RV64-LABEL: vpgather_baseidx_sext_nxv8i16_nxv8i32:
; RV64: # %bb.0:
; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; RV64-NEXT: vsext.vf4 v16, v8
; RV64-NEXT: vsll.vi v16, v16, 2
; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma
; RV64-NEXT: vluxei64.v v8, (a0), v16, v0.t
; RV64-NEXT: ret
%eidxs = sext <vscale x 8 x i16> %idxs to <vscale x 8 x i32>
%ptrs = getelementptr inbounds i32, ptr %base, <vscale x 8 x i32> %eidxs
%v = call <vscale x 8 x i32> @llvm.vp.gather.nxv8i32.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
ret <vscale x 8 x i32> %v
}
define <vscale x 8 x i32> @vpgather_baseidx_zext_nxv8i16_nxv8i32(ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
; RV32-LABEL: vpgather_baseidx_zext_nxv8i16_nxv8i32:
; RV32: # %bb.0:
; RV32-NEXT: li a2, 4
; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma
; RV32-NEXT: vwmulu.vx v12, v8, a2
; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, ma
; RV32-NEXT: vluxei32.v v8, (a0), v12, v0.t
; RV32-NEXT: ret
;
; RV64-LABEL: vpgather_baseidx_zext_nxv8i16_nxv8i32:
; RV64: # %bb.0:
; RV64-NEXT: li a2, 4
; RV64-NEXT: vsetvli zero, a1, e16, m2, ta, ma
; RV64-NEXT: vwmulu.vx v12, v8, a2
; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma
; RV64-NEXT: vluxei32.v v8, (a0), v12, v0.t
; RV64-NEXT: ret
%eidxs = zext <vscale x 8 x i16> %idxs to <vscale x 8 x i32>
%ptrs = getelementptr inbounds i32, ptr %base, <vscale x 8 x i32> %eidxs
%v = call <vscale x 8 x i32> @llvm.vp.gather.nxv8i32.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
ret <vscale x 8 x i32> %v
}
define <vscale x 8 x i32> @vpgather_baseidx_nxv8i32(ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
; RV32-LABEL: vpgather_baseidx_nxv8i32:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma
; RV32-NEXT: vsll.vi v8, v8, 2
; RV32-NEXT: vluxei32.v v8, (a0), v8, v0.t
; RV32-NEXT: ret
;
; RV64-LABEL: vpgather_baseidx_nxv8i32:
; RV64: # %bb.0:
; RV64-NEXT: li a2, 4
; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma
; RV64-NEXT: vwmulsu.vx v16, v8, a2
; RV64-NEXT: vluxei64.v v8, (a0), v16, v0.t
; RV64-NEXT: ret
%ptrs = getelementptr inbounds i32, ptr %base, <vscale x 8 x i32> %idxs
%v = call <vscale x 8 x i32> @llvm.vp.gather.nxv8i32.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
ret <vscale x 8 x i32> %v
}
define <vscale x 1 x i64> @vpgather_nxv1i64(<vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m, i32 zeroext %evl) {
; RV32-LABEL: vpgather_nxv1i64:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t
; RV32-NEXT: vmv.v.v v8, v9
; RV32-NEXT: ret
;
; RV64-LABEL: vpgather_nxv1i64:
; RV64: # %bb.0:
; RV64-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; RV64-NEXT: vluxei64.v v8, (zero), v8, v0.t
; RV64-NEXT: ret
%v = call <vscale x 1 x i64> @llvm.vp.gather.nxv1i64.nxv1p0(<vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m, i32 %evl)
ret <vscale x 1 x i64> %v
}
define <vscale x 2 x i64> @vpgather_nxv2i64(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) {
; RV32-LABEL: vpgather_nxv2i64:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
; RV32-NEXT: vmv1r.v v10, v8
; RV32-NEXT: vluxei32.v v8, (zero), v10, v0.t
; RV32-NEXT: ret
;
; RV64-LABEL: vpgather_nxv2i64:
; RV64: # %bb.0:
; RV64-NEXT: vsetvli zero, a0, e64, m2, ta, ma
; RV64-NEXT: vluxei64.v v8, (zero), v8, v0.t
; RV64-NEXT: ret
%v = call <vscale x 2 x i64> @llvm.vp.gather.nxv2i64.nxv2p0(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 %evl)
ret <vscale x 2 x i64> %v
}
define <vscale x 4 x i64> @vpgather_nxv4i64(<vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m, i32 zeroext %evl) {
; RV32-LABEL: vpgather_nxv4i64:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
; RV32-NEXT: vmv2r.v v12, v8
; RV32-NEXT: vluxei32.v v8, (zero), v12, v0.t
; RV32-NEXT: ret
;
; RV64-LABEL: vpgather_nxv4i64:
; RV64: # %bb.0:
; RV64-NEXT: vsetvli zero, a0, e64, m4, ta, ma
; RV64-NEXT: vluxei64.v v8, (zero), v8, v0.t
; RV64-NEXT: ret
%v = call <vscale x 4 x i64> @llvm.vp.gather.nxv4i64.nxv4p0(<vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m, i32 %evl)
ret <vscale x 4 x i64> %v
}
define <vscale x 4 x i64> @vpgather_truemask_nxv4i64(<vscale x 4 x ptr> %ptrs, i32 zeroext %evl) {
; RV32-LABEL: vpgather_truemask_nxv4i64:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
; RV32-NEXT: vmv2r.v v12, v8
; RV32-NEXT: vluxei32.v v8, (zero), v12
; RV32-NEXT: ret
;
; RV64-LABEL: vpgather_truemask_nxv4i64:
; RV64: # %bb.0:
; RV64-NEXT: vsetvli zero, a0, e64, m4, ta, ma
; RV64-NEXT: vluxei64.v v8, (zero), v8
; RV64-NEXT: ret
%v = call <vscale x 4 x i64> @llvm.vp.gather.nxv4i64.nxv4p0(<vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> splat (i1 1), i32 %evl)
ret <vscale x 4 x i64> %v
}
define <vscale x 8 x i64> @vpgather_nxv8i64(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
; RV32-LABEL: vpgather_nxv8i64:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; RV32-NEXT: vmv4r.v v16, v8
; RV32-NEXT: vluxei32.v v8, (zero), v16, v0.t
; RV32-NEXT: ret
;
; RV64-LABEL: vpgather_nxv8i64:
; RV64: # %bb.0:
; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; RV64-NEXT: vluxei64.v v8, (zero), v8, v0.t
; RV64-NEXT: ret
%v = call <vscale x 8 x i64> @llvm.vp.gather.nxv8i64.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
ret <vscale x 8 x i64> %v
}
define <vscale x 8 x i64> @vpgather_baseidx_nxv8i8_nxv8i64(ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
; RV32-LABEL: vpgather_baseidx_nxv8i8_nxv8i64:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma
; RV32-NEXT: vsext.vf4 v12, v8
; RV32-NEXT: vsll.vi v16, v12, 3
; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t
; RV32-NEXT: ret
;
; RV64-LABEL: vpgather_baseidx_nxv8i8_nxv8i64:
; RV64: # %bb.0:
; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; RV64-NEXT: vsext.vf8 v16, v8
; RV64-NEXT: vsll.vi v8, v16, 3
; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t
; RV64-NEXT: ret
%ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i8> %idxs
%v = call <vscale x 8 x i64> @llvm.vp.gather.nxv8i64.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
ret <vscale x 8 x i64> %v
}
define <vscale x 8 x i64> @vpgather_baseidx_sext_nxv8i8_nxv8i64(ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
; RV32-LABEL: vpgather_baseidx_sext_nxv8i8_nxv8i64:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma
; RV32-NEXT: vsext.vf4 v12, v8
; RV32-NEXT: vsll.vi v16, v12, 3
; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t
; RV32-NEXT: ret
;
; RV64-LABEL: vpgather_baseidx_sext_nxv8i8_nxv8i64:
; RV64: # %bb.0:
; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; RV64-NEXT: vsext.vf8 v16, v8
; RV64-NEXT: vsll.vi v8, v16, 3
; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t
; RV64-NEXT: ret
%eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i64>
%ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i64> %eidxs
%v = call <vscale x 8 x i64> @llvm.vp.gather.nxv8i64.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
ret <vscale x 8 x i64> %v
}
define <vscale x 8 x i64> @vpgather_baseidx_zext_nxv8i8_nxv8i64(ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
; RV32-LABEL: vpgather_baseidx_zext_nxv8i8_nxv8i64:
; RV32: # %bb.0:
; RV32-NEXT: li a2, 8
; RV32-NEXT: vsetvli zero, a1, e8, m1, ta, ma
; RV32-NEXT: vwmulu.vx v16, v8, a2
; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; RV32-NEXT: vluxei16.v v8, (a0), v16, v0.t
; RV32-NEXT: ret
;
; RV64-LABEL: vpgather_baseidx_zext_nxv8i8_nxv8i64:
; RV64: # %bb.0:
; RV64-NEXT: li a2, 8
; RV64-NEXT: vsetvli zero, a1, e8, m1, ta, ma
; RV64-NEXT: vwmulu.vx v16, v8, a2
; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; RV64-NEXT: vluxei16.v v8, (a0), v16, v0.t
; RV64-NEXT: ret
%eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i64>
%ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i64> %eidxs
%v = call <vscale x 8 x i64> @llvm.vp.gather.nxv8i64.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
ret <vscale x 8 x i64> %v
}
define <vscale x 8 x i64> @vpgather_baseidx_nxv8i16_nxv8i64(ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
; RV32-LABEL: vpgather_baseidx_nxv8i16_nxv8i64:
; RV32: # %bb.0:
; RV32-NEXT: li a2, 8
; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma
; RV32-NEXT: vwmulsu.vx v16, v8, a2
; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t
; RV32-NEXT: ret
;
; RV64-LABEL: vpgather_baseidx_nxv8i16_nxv8i64:
; RV64: # %bb.0:
; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; RV64-NEXT: vsext.vf4 v16, v8
; RV64-NEXT: vsll.vi v8, v16, 3
; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t
; RV64-NEXT: ret
%ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i16> %idxs
%v = call <vscale x 8 x i64> @llvm.vp.gather.nxv8i64.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
ret <vscale x 8 x i64> %v
}
define <vscale x 8 x i64> @vpgather_baseidx_sext_nxv8i16_nxv8i64(ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
; RV32-LABEL: vpgather_baseidx_sext_nxv8i16_nxv8i64:
; RV32: # %bb.0:
; RV32-NEXT: li a2, 8
; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma
; RV32-NEXT: vwmulsu.vx v16, v8, a2
; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t
; RV32-NEXT: ret
;
; RV64-LABEL: vpgather_baseidx_sext_nxv8i16_nxv8i64:
; RV64: # %bb.0:
; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; RV64-NEXT: vsext.vf4 v16, v8
; RV64-NEXT: vsll.vi v8, v16, 3
; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t
; RV64-NEXT: ret
%eidxs = sext <vscale x 8 x i16> %idxs to <vscale x 8 x i64>
%ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i64> %eidxs
%v = call <vscale x 8 x i64> @llvm.vp.gather.nxv8i64.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
ret <vscale x 8 x i64> %v
}
define <vscale x 8 x i64> @vpgather_baseidx_zext_nxv8i16_nxv8i64(ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
; RV32-LABEL: vpgather_baseidx_zext_nxv8i16_nxv8i64:
; RV32: # %bb.0:
; RV32-NEXT: li a2, 8
; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma
; RV32-NEXT: vwmulu.vx v16, v8, a2
; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t
; RV32-NEXT: ret
;
; RV64-LABEL: vpgather_baseidx_zext_nxv8i16_nxv8i64:
; RV64: # %bb.0:
; RV64-NEXT: li a2, 8
; RV64-NEXT: vsetvli zero, a1, e16, m2, ta, ma
; RV64-NEXT: vwmulu.vx v16, v8, a2
; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; RV64-NEXT: vluxei32.v v8, (a0), v16, v0.t
; RV64-NEXT: ret
%eidxs = zext <vscale x 8 x i16> %idxs to <vscale x 8 x i64>
%ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i64> %eidxs
%v = call <vscale x 8 x i64> @llvm.vp.gather.nxv8i64.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
ret <vscale x 8 x i64> %v
}
define <vscale x 8 x i64> @vpgather_baseidx_nxv8i32_nxv8i64(ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
; RV32-LABEL: vpgather_baseidx_nxv8i32_nxv8i64:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma
; RV32-NEXT: vsll.vi v16, v8, 3
; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t
; RV32-NEXT: ret
;
; RV64-LABEL: vpgather_baseidx_nxv8i32_nxv8i64:
; RV64: # %bb.0:
; RV64-NEXT: li a2, 8
; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma
; RV64-NEXT: vwmulsu.vx v16, v8, a2
; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; RV64-NEXT: vluxei64.v v8, (a0), v16, v0.t
; RV64-NEXT: ret
%ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i32> %idxs
%v = call <vscale x 8 x i64> @llvm.vp.gather.nxv8i64.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
ret <vscale x 8 x i64> %v
}
define <vscale x 8 x i64> @vpgather_baseidx_sext_nxv8i32_nxv8i64(ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
; RV32-LABEL: vpgather_baseidx_sext_nxv8i32_nxv8i64:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma
; RV32-NEXT: vsll.vi v16, v8, 3
; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t
; RV32-NEXT: ret
;
; RV64-LABEL: vpgather_baseidx_sext_nxv8i32_nxv8i64:
; RV64: # %bb.0:
; RV64-NEXT: li a2, 8
; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma
; RV64-NEXT: vwmulsu.vx v16, v8, a2
; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; RV64-NEXT: vluxei64.v v8, (a0), v16, v0.t
; RV64-NEXT: ret
%eidxs = sext <vscale x 8 x i32> %idxs to <vscale x 8 x i64>
%ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i64> %eidxs
%v = call <vscale x 8 x i64> @llvm.vp.gather.nxv8i64.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
ret <vscale x 8 x i64> %v
}
define <vscale x 8 x i64> @vpgather_baseidx_zext_nxv8i32_nxv8i64(ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
; RV32-LABEL: vpgather_baseidx_zext_nxv8i32_nxv8i64:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma
; RV32-NEXT: vsll.vi v16, v8, 3
; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t
; RV32-NEXT: ret
;
; RV64-LABEL: vpgather_baseidx_zext_nxv8i32_nxv8i64:
; RV64: # %bb.0:
; RV64-NEXT: li a2, 8
; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma
; RV64-NEXT: vwmulu.vx v16, v8, a2
; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; RV64-NEXT: vluxei64.v v8, (a0), v16, v0.t
; RV64-NEXT: ret
%eidxs = zext <vscale x 8 x i32> %idxs to <vscale x 8 x i64>
%ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i64> %eidxs
%v = call <vscale x 8 x i64> @llvm.vp.gather.nxv8i64.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
ret <vscale x 8 x i64> %v
}
define <vscale x 8 x i64> @vpgather_baseidx_nxv8i64(ptr %base, <vscale x 8 x i64> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
; RV32-LABEL: vpgather_baseidx_nxv8i64:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma
; RV32-NEXT: vnsrl.wi v16, v8, 0
; RV32-NEXT: vsll.vi v16, v16, 3
; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t
; RV32-NEXT: ret
;
; RV64-LABEL: vpgather_baseidx_nxv8i64:
; RV64: # %bb.0:
; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; RV64-NEXT: vsll.vi v8, v8, 3
; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t
; RV64-NEXT: ret
%ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i64> %idxs
%v = call <vscale x 8 x i64> @llvm.vp.gather.nxv8i64.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
ret <vscale x 8 x i64> %v
}
define <vscale x 1 x bfloat> @vpgather_nxv1bf16(<vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m, i32 zeroext %evl) {
; RV32-LABEL: vpgather_nxv1bf16:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t
; RV32-NEXT: vmv1r.v v8, v9
; RV32-NEXT: ret
;
; RV64-LABEL: vpgather_nxv1bf16:
; RV64: # %bb.0:
; RV64-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
; RV64-NEXT: vluxei64.v v9, (zero), v8, v0.t
; RV64-NEXT: vmv1r.v v8, v9
; RV64-NEXT: ret
%v = call <vscale x 1 x bfloat> @llvm.vp.gather.nxv1bf16.nxv1p0(<vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m, i32 %evl)
ret <vscale x 1 x bfloat> %v
}
define <vscale x 2 x bfloat> @vpgather_nxv2bf16(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) {
; RV32-LABEL: vpgather_nxv2bf16:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t
; RV32-NEXT: vmv1r.v v8, v9
; RV32-NEXT: ret
;
; RV64-LABEL: vpgather_nxv2bf16:
; RV64: # %bb.0:
; RV64-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
; RV64-NEXT: vluxei64.v v10, (zero), v8, v0.t
; RV64-NEXT: vmv1r.v v8, v10
; RV64-NEXT: ret
%v = call <vscale x 2 x bfloat> @llvm.vp.gather.nxv2bf16.nxv2p0(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 %evl)
ret <vscale x 2 x bfloat> %v
}
define <vscale x 4 x bfloat> @vpgather_nxv4bf16(<vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m, i32 zeroext %evl) {
; RV32-LABEL: vpgather_nxv4bf16:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli zero, a0, e16, m1, ta, ma
; RV32-NEXT: vluxei32.v v10, (zero), v8, v0.t
; RV32-NEXT: vmv.v.v v8, v10
; RV32-NEXT: ret
;
; RV64-LABEL: vpgather_nxv4bf16:
; RV64: # %bb.0:
; RV64-NEXT: vsetvli zero, a0, e16, m1, ta, ma
; RV64-NEXT: vluxei64.v v12, (zero), v8, v0.t
; RV64-NEXT: vmv.v.v v8, v12
; RV64-NEXT: ret
%v = call <vscale x 4 x bfloat> @llvm.vp.gather.nxv4bf16.nxv4p0(<vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m, i32 %evl)
ret <vscale x 4 x bfloat> %v
}
define <vscale x 4 x bfloat> @vpgather_truemask_nxv4bf16(<vscale x 4 x ptr> %ptrs, i32 zeroext %evl) {
; RV32-LABEL: vpgather_truemask_nxv4bf16:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli zero, a0, e16, m1, ta, ma
; RV32-NEXT: vluxei32.v v10, (zero), v8
; RV32-NEXT: vmv.v.v v8, v10
; RV32-NEXT: ret
;
; RV64-LABEL: vpgather_truemask_nxv4bf16:
; RV64: # %bb.0:
; RV64-NEXT: vsetvli zero, a0, e16, m1, ta, ma
; RV64-NEXT: vluxei64.v v12, (zero), v8
; RV64-NEXT: vmv.v.v v8, v12
; RV64-NEXT: ret
%v = call <vscale x 4 x bfloat> @llvm.vp.gather.nxv4bf16.nxv4p0(<vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> splat (i1 1), i32 %evl)
ret <vscale x 4 x bfloat> %v
}
define <vscale x 8 x bfloat> @vpgather_nxv8bf16(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
; RV32-LABEL: vpgather_nxv8bf16:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli zero, a0, e16, m2, ta, ma
; RV32-NEXT: vluxei32.v v12, (zero), v8, v0.t
; RV32-NEXT: vmv.v.v v8, v12
; RV32-NEXT: ret
;
; RV64-LABEL: vpgather_nxv8bf16:
; RV64: # %bb.0:
; RV64-NEXT: vsetvli zero, a0, e16, m2, ta, ma
; RV64-NEXT: vluxei64.v v16, (zero), v8, v0.t
; RV64-NEXT: vmv.v.v v8, v16
; RV64-NEXT: ret
%v = call <vscale x 8 x bfloat> @llvm.vp.gather.nxv8bf16.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
ret <vscale x 8 x bfloat> %v
}
define <vscale x 8 x bfloat> @vpgather_baseidx_nxv8i8_nxv8bf16(ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
; RV32-LABEL: vpgather_baseidx_nxv8i8_nxv8bf16:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma
; RV32-NEXT: vsext.vf4 v12, v8
; RV32-NEXT: vadd.vv v12, v12, v12
; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; RV32-NEXT: vluxei32.v v8, (a0), v12, v0.t
; RV32-NEXT: ret
;
; RV64-LABEL: vpgather_baseidx_nxv8i8_nxv8bf16:
; RV64: # %bb.0:
; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; RV64-NEXT: vsext.vf8 v16, v8
; RV64-NEXT: vadd.vv v16, v16, v16
; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; RV64-NEXT: vluxei64.v v8, (a0), v16, v0.t
; RV64-NEXT: ret
%ptrs = getelementptr inbounds bfloat, ptr %base, <vscale x 8 x i8> %idxs
%v = call <vscale x 8 x bfloat> @llvm.vp.gather.nxv8bf16.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
ret <vscale x 8 x bfloat> %v
}
define <vscale x 8 x bfloat> @vpgather_baseidx_sext_nxv8i8_nxv8bf16(ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
; RV32-LABEL: vpgather_baseidx_sext_nxv8i8_nxv8bf16:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma
; RV32-NEXT: vsext.vf4 v12, v8
; RV32-NEXT: vadd.vv v12, v12, v12
; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; RV32-NEXT: vluxei32.v v8, (a0), v12, v0.t
; RV32-NEXT: ret
;
; RV64-LABEL: vpgather_baseidx_sext_nxv8i8_nxv8bf16:
; RV64: # %bb.0:
; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; RV64-NEXT: vsext.vf8 v16, v8
; RV64-NEXT: vadd.vv v16, v16, v16
; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; RV64-NEXT: vluxei64.v v8, (a0), v16, v0.t
; RV64-NEXT: ret
%eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i16>
%ptrs = getelementptr inbounds bfloat, ptr %base, <vscale x 8 x i16> %eidxs
%v = call <vscale x 8 x bfloat> @llvm.vp.gather.nxv8bf16.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
ret <vscale x 8 x bfloat> %v
}
define <vscale x 8 x bfloat> @vpgather_baseidx_zext_nxv8i8_nxv8bf16(ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
; RV32-LABEL: vpgather_baseidx_zext_nxv8i8_nxv8bf16:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli zero, a1, e8, m1, ta, ma
; RV32-NEXT: vwaddu.vv v10, v8, v8
; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; RV32-NEXT: vluxei16.v v8, (a0), v10, v0.t
; RV32-NEXT: ret
;
; RV64-LABEL: vpgather_baseidx_zext_nxv8i8_nxv8bf16:
; RV64: # %bb.0:
; RV64-NEXT: vsetvli zero, a1, e8, m1, ta, ma
; RV64-NEXT: vwaddu.vv v10, v8, v8
; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; RV64-NEXT: vluxei16.v v8, (a0), v10, v0.t
; RV64-NEXT: ret
%eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i16>
%ptrs = getelementptr inbounds bfloat, ptr %base, <vscale x 8 x i16> %eidxs
%v = call <vscale x 8 x bfloat> @llvm.vp.gather.nxv8bf16.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
ret <vscale x 8 x bfloat> %v
}
define <vscale x 8 x bfloat> @vpgather_baseidx_nxv8bf16(ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
; RV32-LABEL: vpgather_baseidx_nxv8bf16:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma
; RV32-NEXT: vwadd.vv v12, v8, v8
; RV32-NEXT: vluxei32.v v8, (a0), v12, v0.t
; RV32-NEXT: ret
;
; RV64-LABEL: vpgather_baseidx_nxv8bf16:
; RV64: # %bb.0:
; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; RV64-NEXT: vsext.vf4 v16, v8
; RV64-NEXT: vadd.vv v16, v16, v16
; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; RV64-NEXT: vluxei64.v v8, (a0), v16, v0.t
; RV64-NEXT: ret
%ptrs = getelementptr inbounds bfloat, ptr %base, <vscale x 8 x i16> %idxs
%v = call <vscale x 8 x bfloat> @llvm.vp.gather.nxv8bf16.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
ret <vscale x 8 x bfloat> %v
}
define <vscale x 1 x half> @vpgather_nxv1f16(<vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m, i32 zeroext %evl) {
; RV32-LABEL: vpgather_nxv1f16:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t
; RV32-NEXT: vmv1r.v v8, v9
; RV32-NEXT: ret
;
; RV64-LABEL: vpgather_nxv1f16:
; RV64: # %bb.0:
; RV64-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
; RV64-NEXT: vluxei64.v v9, (zero), v8, v0.t
; RV64-NEXT: vmv1r.v v8, v9
; RV64-NEXT: ret
%v = call <vscale x 1 x half> @llvm.vp.gather.nxv1f16.nxv1p0(<vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m, i32 %evl)
ret <vscale x 1 x half> %v
}
define <vscale x 2 x half> @vpgather_nxv2f16(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) {
; RV32-LABEL: vpgather_nxv2f16:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t
; RV32-NEXT: vmv1r.v v8, v9
; RV32-NEXT: ret
;
; RV64-LABEL: vpgather_nxv2f16:
; RV64: # %bb.0:
; RV64-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
; RV64-NEXT: vluxei64.v v10, (zero), v8, v0.t
; RV64-NEXT: vmv1r.v v8, v10
; RV64-NEXT: ret
%v = call <vscale x 2 x half> @llvm.vp.gather.nxv2f16.nxv2p0(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 %evl)
ret <vscale x 2 x half> %v
}
define <vscale x 4 x half> @vpgather_nxv4f16(<vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m, i32 zeroext %evl) {
; RV32-LABEL: vpgather_nxv4f16:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli zero, a0, e16, m1, ta, ma
; RV32-NEXT: vluxei32.v v10, (zero), v8, v0.t
; RV32-NEXT: vmv.v.v v8, v10
; RV32-NEXT: ret
;
; RV64-LABEL: vpgather_nxv4f16:
; RV64: # %bb.0:
; RV64-NEXT: vsetvli zero, a0, e16, m1, ta, ma
; RV64-NEXT: vluxei64.v v12, (zero), v8, v0.t
; RV64-NEXT: vmv.v.v v8, v12
; RV64-NEXT: ret
%v = call <vscale x 4 x half> @llvm.vp.gather.nxv4f16.nxv4p0(<vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m, i32 %evl)
ret <vscale x 4 x half> %v
}
define <vscale x 4 x half> @vpgather_truemask_nxv4f16(<vscale x 4 x ptr> %ptrs, i32 zeroext %evl) {
; RV32-LABEL: vpgather_truemask_nxv4f16:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli zero, a0, e16, m1, ta, ma
; RV32-NEXT: vluxei32.v v10, (zero), v8
; RV32-NEXT: vmv.v.v v8, v10
; RV32-NEXT: ret
;
; RV64-LABEL: vpgather_truemask_nxv4f16:
; RV64: # %bb.0:
; RV64-NEXT: vsetvli zero, a0, e16, m1, ta, ma
; RV64-NEXT: vluxei64.v v12, (zero), v8
; RV64-NEXT: vmv.v.v v8, v12
; RV64-NEXT: ret
%v = call <vscale x 4 x half> @llvm.vp.gather.nxv4f16.nxv4p0(<vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> splat (i1 1), i32 %evl)
ret <vscale x 4 x half> %v
}
define <vscale x 8 x half> @vpgather_nxv8f16(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
; RV32-LABEL: vpgather_nxv8f16:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli zero, a0, e16, m2, ta, ma
; RV32-NEXT: vluxei32.v v12, (zero), v8, v0.t
; RV32-NEXT: vmv.v.v v8, v12
; RV32-NEXT: ret
;
; RV64-LABEL: vpgather_nxv8f16:
; RV64: # %bb.0:
; RV64-NEXT: vsetvli zero, a0, e16, m2, ta, ma
; RV64-NEXT: vluxei64.v v16, (zero), v8, v0.t
; RV64-NEXT: vmv.v.v v8, v16
; RV64-NEXT: ret
%v = call <vscale x 8 x half> @llvm.vp.gather.nxv8f16.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
ret <vscale x 8 x half> %v
}
define <vscale x 8 x half> @vpgather_baseidx_nxv8i8_nxv8f16(ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
; RV32-LABEL: vpgather_baseidx_nxv8i8_nxv8f16:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma
; RV32-NEXT: vsext.vf4 v12, v8
; RV32-NEXT: vadd.vv v12, v12, v12
; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; RV32-NEXT: vluxei32.v v8, (a0), v12, v0.t
; RV32-NEXT: ret
;
; RV64-LABEL: vpgather_baseidx_nxv8i8_nxv8f16:
; RV64: # %bb.0:
; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; RV64-NEXT: vsext.vf8 v16, v8
; RV64-NEXT: vadd.vv v16, v16, v16
; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; RV64-NEXT: vluxei64.v v8, (a0), v16, v0.t
; RV64-NEXT: ret
%ptrs = getelementptr inbounds half, ptr %base, <vscale x 8 x i8> %idxs
%v = call <vscale x 8 x half> @llvm.vp.gather.nxv8f16.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
ret <vscale x 8 x half> %v
}
define <vscale x 8 x half> @vpgather_baseidx_sext_nxv8i8_nxv8f16(ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
; RV32-LABEL: vpgather_baseidx_sext_nxv8i8_nxv8f16:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma
; RV32-NEXT: vsext.vf4 v12, v8
; RV32-NEXT: vadd.vv v12, v12, v12
; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; RV32-NEXT: vluxei32.v v8, (a0), v12, v0.t
; RV32-NEXT: ret
;
; RV64-LABEL: vpgather_baseidx_sext_nxv8i8_nxv8f16:
; RV64: # %bb.0:
; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; RV64-NEXT: vsext.vf8 v16, v8
; RV64-NEXT: vadd.vv v16, v16, v16
; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; RV64-NEXT: vluxei64.v v8, (a0), v16, v0.t
; RV64-NEXT: ret
%eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i16>
%ptrs = getelementptr inbounds half, ptr %base, <vscale x 8 x i16> %eidxs
%v = call <vscale x 8 x half> @llvm.vp.gather.nxv8f16.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
ret <vscale x 8 x half> %v
}
define <vscale x 8 x half> @vpgather_baseidx_zext_nxv8i8_nxv8f16(ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
; RV32-LABEL: vpgather_baseidx_zext_nxv8i8_nxv8f16:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli zero, a1, e8, m1, ta, ma
; RV32-NEXT: vwaddu.vv v10, v8, v8
; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; RV32-NEXT: vluxei16.v v8, (a0), v10, v0.t
; RV32-NEXT: ret
;
; RV64-LABEL: vpgather_baseidx_zext_nxv8i8_nxv8f16:
; RV64: # %bb.0:
; RV64-NEXT: vsetvli zero, a1, e8, m1, ta, ma
; RV64-NEXT: vwaddu.vv v10, v8, v8
; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; RV64-NEXT: vluxei16.v v8, (a0), v10, v0.t
; RV64-NEXT: ret
%eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i16>
%ptrs = getelementptr inbounds half, ptr %base, <vscale x 8 x i16> %eidxs
%v = call <vscale x 8 x half> @llvm.vp.gather.nxv8f16.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
ret <vscale x 8 x half> %v
}
define <vscale x 8 x half> @vpgather_baseidx_nxv8f16(ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
; RV32-LABEL: vpgather_baseidx_nxv8f16:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma
; RV32-NEXT: vwadd.vv v12, v8, v8
; RV32-NEXT: vluxei32.v v8, (a0), v12, v0.t
; RV32-NEXT: ret
;
; RV64-LABEL: vpgather_baseidx_nxv8f16:
; RV64: # %bb.0:
; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; RV64-NEXT: vsext.vf4 v16, v8
; RV64-NEXT: vadd.vv v16, v16, v16
; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; RV64-NEXT: vluxei64.v v8, (a0), v16, v0.t
; RV64-NEXT: ret
%ptrs = getelementptr inbounds half, ptr %base, <vscale x 8 x i16> %idxs
%v = call <vscale x 8 x half> @llvm.vp.gather.nxv8f16.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
ret <vscale x 8 x half> %v
}
define <vscale x 1 x float> @vpgather_nxv1f32(<vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m, i32 zeroext %evl) {
; RV32-LABEL: vpgather_nxv1f32:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
; RV32-NEXT: vluxei32.v v8, (zero), v8, v0.t
; RV32-NEXT: ret
;
; RV64-LABEL: vpgather_nxv1f32:
; RV64: # %bb.0:
; RV64-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
; RV64-NEXT: vluxei64.v v9, (zero), v8, v0.t
; RV64-NEXT: vmv1r.v v8, v9
; RV64-NEXT: ret
%v = call <vscale x 1 x float> @llvm.vp.gather.nxv1f32.nxv1p0(<vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m, i32 %evl)
ret <vscale x 1 x float> %v
}
define <vscale x 2 x float> @vpgather_nxv2f32(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) {
; RV32-LABEL: vpgather_nxv2f32:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli zero, a0, e32, m1, ta, ma
; RV32-NEXT: vluxei32.v v8, (zero), v8, v0.t
; RV32-NEXT: ret
;
; RV64-LABEL: vpgather_nxv2f32:
; RV64: # %bb.0:
; RV64-NEXT: vsetvli zero, a0, e32, m1, ta, ma
; RV64-NEXT: vluxei64.v v10, (zero), v8, v0.t
; RV64-NEXT: vmv.v.v v8, v10
; RV64-NEXT: ret
%v = call <vscale x 2 x float> @llvm.vp.gather.nxv2f32.nxv2p0(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 %evl)
ret <vscale x 2 x float> %v
}
define <vscale x 4 x float> @vpgather_nxv4f32(<vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m, i32 zeroext %evl) {
; RV32-LABEL: vpgather_nxv4f32:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli zero, a0, e32, m2, ta, ma
; RV32-NEXT: vluxei32.v v8, (zero), v8, v0.t
; RV32-NEXT: ret
;
; RV64-LABEL: vpgather_nxv4f32:
; RV64: # %bb.0:
; RV64-NEXT: vsetvli zero, a0, e32, m2, ta, ma
; RV64-NEXT: vluxei64.v v12, (zero), v8, v0.t
; RV64-NEXT: vmv.v.v v8, v12
; RV64-NEXT: ret
%v = call <vscale x 4 x float> @llvm.vp.gather.nxv4f32.nxv4p0(<vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m, i32 %evl)
ret <vscale x 4 x float> %v
}
define <vscale x 4 x float> @vpgather_truemask_nxv4f32(<vscale x 4 x ptr> %ptrs, i32 zeroext %evl) {
; RV32-LABEL: vpgather_truemask_nxv4f32:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli zero, a0, e32, m2, ta, ma
; RV32-NEXT: vluxei32.v v8, (zero), v8
; RV32-NEXT: ret
;
; RV64-LABEL: vpgather_truemask_nxv4f32:
; RV64: # %bb.0:
; RV64-NEXT: vsetvli zero, a0, e32, m2, ta, ma
; RV64-NEXT: vluxei64.v v12, (zero), v8
; RV64-NEXT: vmv.v.v v8, v12
; RV64-NEXT: ret
%v = call <vscale x 4 x float> @llvm.vp.gather.nxv4f32.nxv4p0(<vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> splat (i1 1), i32 %evl)
ret <vscale x 4 x float> %v
}
define <vscale x 8 x float> @vpgather_nxv8f32(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
; RV32-LABEL: vpgather_nxv8f32:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli zero, a0, e32, m4, ta, ma
; RV32-NEXT: vluxei32.v v8, (zero), v8, v0.t
; RV32-NEXT: ret
;
; RV64-LABEL: vpgather_nxv8f32:
; RV64: # %bb.0:
; RV64-NEXT: vsetvli zero, a0, e32, m4, ta, ma
; RV64-NEXT: vluxei64.v v16, (zero), v8, v0.t
; RV64-NEXT: vmv.v.v v8, v16
; RV64-NEXT: ret
%v = call <vscale x 8 x float> @llvm.vp.gather.nxv8f32.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
ret <vscale x 8 x float> %v
}
define <vscale x 8 x float> @vpgather_baseidx_nxv8i8_nxv8f32(ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
; RV32-LABEL: vpgather_baseidx_nxv8i8_nxv8f32:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma
; RV32-NEXT: vsext.vf4 v12, v8
; RV32-NEXT: vsll.vi v8, v12, 2
; RV32-NEXT: vluxei32.v v8, (a0), v8, v0.t
; RV32-NEXT: ret
;
; RV64-LABEL: vpgather_baseidx_nxv8i8_nxv8f32:
; RV64: # %bb.0:
; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; RV64-NEXT: vsext.vf8 v16, v8
; RV64-NEXT: vsll.vi v16, v16, 2
; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma
; RV64-NEXT: vluxei64.v v8, (a0), v16, v0.t
; RV64-NEXT: ret
%ptrs = getelementptr inbounds float, ptr %base, <vscale x 8 x i8> %idxs
%v = call <vscale x 8 x float> @llvm.vp.gather.nxv8f32.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
ret <vscale x 8 x float> %v
}
define <vscale x 8 x float> @vpgather_baseidx_sext_nxv8i8_nxv8f32(ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
; RV32-LABEL: vpgather_baseidx_sext_nxv8i8_nxv8f32:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma
; RV32-NEXT: vsext.vf4 v12, v8
; RV32-NEXT: vsll.vi v8, v12, 2
; RV32-NEXT: vluxei32.v v8, (a0), v8, v0.t
; RV32-NEXT: ret
;
; RV64-LABEL: vpgather_baseidx_sext_nxv8i8_nxv8f32:
; RV64: # %bb.0:
; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; RV64-NEXT: vsext.vf8 v16, v8
; RV64-NEXT: vsll.vi v16, v16, 2
; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma
; RV64-NEXT: vluxei64.v v8, (a0), v16, v0.t
; RV64-NEXT: ret
%eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i32>
%ptrs = getelementptr inbounds float, ptr %base, <vscale x 8 x i32> %eidxs
%v = call <vscale x 8 x float> @llvm.vp.gather.nxv8f32.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
ret <vscale x 8 x float> %v
}
define <vscale x 8 x float> @vpgather_baseidx_zext_nxv8i8_nxv8f32(ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
; RV32-LABEL: vpgather_baseidx_zext_nxv8i8_nxv8f32:
; RV32: # %bb.0:
; RV32-NEXT: li a2, 4
; RV32-NEXT: vsetvli zero, a1, e8, m1, ta, ma
; RV32-NEXT: vwmulu.vx v12, v8, a2
; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, ma
; RV32-NEXT: vluxei16.v v8, (a0), v12, v0.t
; RV32-NEXT: ret
;
; RV64-LABEL: vpgather_baseidx_zext_nxv8i8_nxv8f32:
; RV64: # %bb.0:
; RV64-NEXT: li a2, 4
; RV64-NEXT: vsetvli zero, a1, e8, m1, ta, ma
; RV64-NEXT: vwmulu.vx v12, v8, a2
; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma
; RV64-NEXT: vluxei16.v v8, (a0), v12, v0.t
; RV64-NEXT: ret
%eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i32>
%ptrs = getelementptr inbounds float, ptr %base, <vscale x 8 x i32> %eidxs
%v = call <vscale x 8 x float> @llvm.vp.gather.nxv8f32.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
ret <vscale x 8 x float> %v
}
define <vscale x 8 x float> @vpgather_baseidx_nxv8i16_nxv8f32(ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
; RV32-LABEL: vpgather_baseidx_nxv8i16_nxv8f32:
; RV32: # %bb.0:
; RV32-NEXT: li a2, 4
; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma
; RV32-NEXT: vwmulsu.vx v12, v8, a2
; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, ma
; RV32-NEXT: vluxei32.v v8, (a0), v12, v0.t
; RV32-NEXT: ret
;
; RV64-LABEL: vpgather_baseidx_nxv8i16_nxv8f32:
; RV64: # %bb.0:
; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; RV64-NEXT: vsext.vf4 v16, v8
; RV64-NEXT: vsll.vi v16, v16, 2
; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma
; RV64-NEXT: vluxei64.v v8, (a0), v16, v0.t
; RV64-NEXT: ret
%ptrs = getelementptr inbounds float, ptr %base, <vscale x 8 x i16> %idxs
%v = call <vscale x 8 x float> @llvm.vp.gather.nxv8f32.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
ret <vscale x 8 x float> %v
}
define <vscale x 8 x float> @vpgather_baseidx_sext_nxv8i16_nxv8f32(ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
; RV32-LABEL: vpgather_baseidx_sext_nxv8i16_nxv8f32:
; RV32: # %bb.0:
; RV32-NEXT: li a2, 4
; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma
; RV32-NEXT: vwmulsu.vx v12, v8, a2
; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, ma
; RV32-NEXT: vluxei32.v v8, (a0), v12, v0.t
; RV32-NEXT: ret
;
; RV64-LABEL: vpgather_baseidx_sext_nxv8i16_nxv8f32:
; RV64: # %bb.0:
; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; RV64-NEXT: vsext.vf4 v16, v8
; RV64-NEXT: vsll.vi v16, v16, 2
; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma
; RV64-NEXT: vluxei64.v v8, (a0), v16, v0.t
; RV64-NEXT: ret
%eidxs = sext <vscale x 8 x i16> %idxs to <vscale x 8 x i32>
%ptrs = getelementptr inbounds float, ptr %base, <vscale x 8 x i32> %eidxs
%v = call <vscale x 8 x float> @llvm.vp.gather.nxv8f32.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
ret <vscale x 8 x float> %v
}
define <vscale x 8 x float> @vpgather_baseidx_zext_nxv8i16_nxv8f32(ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
; RV32-LABEL: vpgather_baseidx_zext_nxv8i16_nxv8f32:
; RV32: # %bb.0:
; RV32-NEXT: li a2, 4
; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma
; RV32-NEXT: vwmulu.vx v12, v8, a2
; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, ma
; RV32-NEXT: vluxei32.v v8, (a0), v12, v0.t
; RV32-NEXT: ret
;
; RV64-LABEL: vpgather_baseidx_zext_nxv8i16_nxv8f32:
; RV64: # %bb.0:
; RV64-NEXT: li a2, 4
; RV64-NEXT: vsetvli zero, a1, e16, m2, ta, ma
; RV64-NEXT: vwmulu.vx v12, v8, a2
; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma
; RV64-NEXT: vluxei32.v v8, (a0), v12, v0.t
; RV64-NEXT: ret
%eidxs = zext <vscale x 8 x i16> %idxs to <vscale x 8 x i32>
%ptrs = getelementptr inbounds float, ptr %base, <vscale x 8 x i32> %eidxs
%v = call <vscale x 8 x float> @llvm.vp.gather.nxv8f32.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
ret <vscale x 8 x float> %v
}
define <vscale x 8 x float> @vpgather_baseidx_nxv8f32(ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
; RV32-LABEL: vpgather_baseidx_nxv8f32:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma
; RV32-NEXT: vsll.vi v8, v8, 2
; RV32-NEXT: vluxei32.v v8, (a0), v8, v0.t
; RV32-NEXT: ret
;
; RV64-LABEL: vpgather_baseidx_nxv8f32:
; RV64: # %bb.0:
; RV64-NEXT: li a2, 4
; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma
; RV64-NEXT: vwmulsu.vx v16, v8, a2
; RV64-NEXT: vluxei64.v v8, (a0), v16, v0.t
; RV64-NEXT: ret
%ptrs = getelementptr inbounds float, ptr %base, <vscale x 8 x i32> %idxs
%v = call <vscale x 8 x float> @llvm.vp.gather.nxv8f32.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
ret <vscale x 8 x float> %v
}
define <vscale x 1 x double> @vpgather_nxv1f64(<vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m, i32 zeroext %evl) {
; RV32-LABEL: vpgather_nxv1f64:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t
; RV32-NEXT: vmv.v.v v8, v9
; RV32-NEXT: ret
;
; RV64-LABEL: vpgather_nxv1f64:
; RV64: # %bb.0:
; RV64-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; RV64-NEXT: vluxei64.v v8, (zero), v8, v0.t
; RV64-NEXT: ret
%v = call <vscale x 1 x double> @llvm.vp.gather.nxv1f64.nxv1p0(<vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m, i32 %evl)
ret <vscale x 1 x double> %v
}
define <vscale x 2 x double> @vpgather_nxv2f64(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) {
; RV32-LABEL: vpgather_nxv2f64:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
; RV32-NEXT: vmv1r.v v10, v8
; RV32-NEXT: vluxei32.v v8, (zero), v10, v0.t
; RV32-NEXT: ret
;
; RV64-LABEL: vpgather_nxv2f64:
; RV64: # %bb.0:
; RV64-NEXT: vsetvli zero, a0, e64, m2, ta, ma
; RV64-NEXT: vluxei64.v v8, (zero), v8, v0.t
; RV64-NEXT: ret
%v = call <vscale x 2 x double> @llvm.vp.gather.nxv2f64.nxv2p0(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 %evl)
ret <vscale x 2 x double> %v
}
define <vscale x 4 x double> @vpgather_nxv4f64(<vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m, i32 zeroext %evl) {
; RV32-LABEL: vpgather_nxv4f64:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
; RV32-NEXT: vmv2r.v v12, v8
; RV32-NEXT: vluxei32.v v8, (zero), v12, v0.t
; RV32-NEXT: ret
;
; RV64-LABEL: vpgather_nxv4f64:
; RV64: # %bb.0:
; RV64-NEXT: vsetvli zero, a0, e64, m4, ta, ma
; RV64-NEXT: vluxei64.v v8, (zero), v8, v0.t
; RV64-NEXT: ret
%v = call <vscale x 4 x double> @llvm.vp.gather.nxv4f64.nxv4p0(<vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m, i32 %evl)
ret <vscale x 4 x double> %v
}
define <vscale x 4 x double> @vpgather_truemask_nxv4f64(<vscale x 4 x ptr> %ptrs, i32 zeroext %evl) {
; RV32-LABEL: vpgather_truemask_nxv4f64:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
; RV32-NEXT: vmv2r.v v12, v8
; RV32-NEXT: vluxei32.v v8, (zero), v12
; RV32-NEXT: ret
;
; RV64-LABEL: vpgather_truemask_nxv4f64:
; RV64: # %bb.0:
; RV64-NEXT: vsetvli zero, a0, e64, m4, ta, ma
; RV64-NEXT: vluxei64.v v8, (zero), v8
; RV64-NEXT: ret
%v = call <vscale x 4 x double> @llvm.vp.gather.nxv4f64.nxv4p0(<vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> splat (i1 1), i32 %evl)
ret <vscale x 4 x double> %v
}
define <vscale x 6 x double> @vpgather_nxv6f64(<vscale x 6 x ptr> %ptrs, <vscale x 6 x i1> %m, i32 zeroext %evl) {
; RV32-LABEL: vpgather_nxv6f64:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; RV32-NEXT: vmv4r.v v16, v8
; RV32-NEXT: vluxei32.v v8, (zero), v16, v0.t
; RV32-NEXT: ret
;
; RV64-LABEL: vpgather_nxv6f64:
; RV64: # %bb.0:
; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; RV64-NEXT: vluxei64.v v8, (zero), v8, v0.t
; RV64-NEXT: ret
%v = call <vscale x 6 x double> @llvm.vp.gather.nxv6f64.nxv6p0(<vscale x 6 x ptr> %ptrs, <vscale x 6 x i1> %m, i32 %evl)
ret <vscale x 6 x double> %v
}
define <vscale x 6 x double> @vpgather_baseidx_nxv6i8_nxv6f64(ptr %base, <vscale x 6 x i8> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) {
; RV32-LABEL: vpgather_baseidx_nxv6i8_nxv6f64:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma
; RV32-NEXT: vsext.vf4 v12, v8
; RV32-NEXT: vsll.vi v16, v12, 3
; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t
; RV32-NEXT: ret
;
; RV64-LABEL: vpgather_baseidx_nxv6i8_nxv6f64:
; RV64: # %bb.0:
; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; RV64-NEXT: vsext.vf8 v16, v8
; RV64-NEXT: vsll.vi v8, v16, 3
; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t
; RV64-NEXT: ret
%ptrs = getelementptr inbounds double, ptr %base, <vscale x 6 x i8> %idxs
%v = call <vscale x 6 x double> @llvm.vp.gather.nxv6f64.nxv6p0(<vscale x 6 x ptr> %ptrs, <vscale x 6 x i1> %m, i32 %evl)
ret <vscale x 6 x double> %v
}
define <vscale x 6 x double> @vpgather_baseidx_sext_nxv6i8_nxv6f64(ptr %base, <vscale x 6 x i8> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) {
; RV32-LABEL: vpgather_baseidx_sext_nxv6i8_nxv6f64:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma
; RV32-NEXT: vsext.vf4 v12, v8
; RV32-NEXT: vsll.vi v16, v12, 3
; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t
; RV32-NEXT: ret
;
; RV64-LABEL: vpgather_baseidx_sext_nxv6i8_nxv6f64:
; RV64: # %bb.0:
; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; RV64-NEXT: vsext.vf8 v16, v8
; RV64-NEXT: vsll.vi v8, v16, 3
; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t
; RV64-NEXT: ret
%eidxs = sext <vscale x 6 x i8> %idxs to <vscale x 6 x i64>
%ptrs = getelementptr inbounds double, ptr %base, <vscale x 6 x i64> %eidxs
%v = call <vscale x 6 x double> @llvm.vp.gather.nxv6f64.nxv6p0(<vscale x 6 x ptr> %ptrs, <vscale x 6 x i1> %m, i32 %evl)
ret <vscale x 6 x double> %v
}
define <vscale x 6 x double> @vpgather_baseidx_zext_nxv6i8_nxv6f64(ptr %base, <vscale x 6 x i8> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) {
; RV32-LABEL: vpgather_baseidx_zext_nxv6i8_nxv6f64:
; RV32: # %bb.0:
; RV32-NEXT: li a2, 8
; RV32-NEXT: vsetvli zero, a1, e8, m1, ta, ma
; RV32-NEXT: vwmulu.vx v16, v8, a2
; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; RV32-NEXT: vluxei16.v v8, (a0), v16, v0.t
; RV32-NEXT: ret
;
; RV64-LABEL: vpgather_baseidx_zext_nxv6i8_nxv6f64:
; RV64: # %bb.0:
; RV64-NEXT: li a2, 8
; RV64-NEXT: vsetvli zero, a1, e8, m1, ta, ma
; RV64-NEXT: vwmulu.vx v16, v8, a2
; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; RV64-NEXT: vluxei16.v v8, (a0), v16, v0.t
; RV64-NEXT: ret
%eidxs = zext <vscale x 6 x i8> %idxs to <vscale x 6 x i64>
%ptrs = getelementptr inbounds double, ptr %base, <vscale x 6 x i64> %eidxs
%v = call <vscale x 6 x double> @llvm.vp.gather.nxv6f64.nxv6p0(<vscale x 6 x ptr> %ptrs, <vscale x 6 x i1> %m, i32 %evl)
ret <vscale x 6 x double> %v
}
define <vscale x 6 x double> @vpgather_baseidx_nxv6i16_nxv6f64(ptr %base, <vscale x 6 x i16> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) {
; RV32-LABEL: vpgather_baseidx_nxv6i16_nxv6f64:
; RV32: # %bb.0:
; RV32-NEXT: li a2, 8
; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma
; RV32-NEXT: vwmulsu.vx v16, v8, a2
; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t
; RV32-NEXT: ret
;
; RV64-LABEL: vpgather_baseidx_nxv6i16_nxv6f64:
; RV64: # %bb.0:
; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; RV64-NEXT: vsext.vf4 v16, v8
; RV64-NEXT: vsll.vi v8, v16, 3
; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t
; RV64-NEXT: ret
%ptrs = getelementptr inbounds double, ptr %base, <vscale x 6 x i16> %idxs
%v = call <vscale x 6 x double> @llvm.vp.gather.nxv6f64.nxv6p0(<vscale x 6 x ptr> %ptrs, <vscale x 6 x i1> %m, i32 %evl)
ret <vscale x 6 x double> %v
}
define <vscale x 6 x double> @vpgather_baseidx_sext_nxv6i16_nxv6f64(ptr %base, <vscale x 6 x i16> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) {
; RV32-LABEL: vpgather_baseidx_sext_nxv6i16_nxv6f64:
; RV32: # %bb.0:
; RV32-NEXT: li a2, 8
; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma
; RV32-NEXT: vwmulsu.vx v16, v8, a2
; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t
; RV32-NEXT: ret
;
; RV64-LABEL: vpgather_baseidx_sext_nxv6i16_nxv6f64:
; RV64: # %bb.0:
; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; RV64-NEXT: vsext.vf4 v16, v8
; RV64-NEXT: vsll.vi v8, v16, 3
; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t
; RV64-NEXT: ret
%eidxs = sext <vscale x 6 x i16> %idxs to <vscale x 6 x i64>
%ptrs = getelementptr inbounds double, ptr %base, <vscale x 6 x i64> %eidxs
%v = call <vscale x 6 x double> @llvm.vp.gather.nxv6f64.nxv6p0(<vscale x 6 x ptr> %ptrs, <vscale x 6 x i1> %m, i32 %evl)
ret <vscale x 6 x double> %v
}
define <vscale x 6 x double> @vpgather_baseidx_zext_nxv6i16_nxv6f64(ptr %base, <vscale x 6 x i16> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) {
; RV32-LABEL: vpgather_baseidx_zext_nxv6i16_nxv6f64:
; RV32: # %bb.0:
; RV32-NEXT: li a2, 8
; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma
; RV32-NEXT: vwmulu.vx v16, v8, a2
; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t
; RV32-NEXT: ret
;
; RV64-LABEL: vpgather_baseidx_zext_nxv6i16_nxv6f64:
; RV64: # %bb.0:
; RV64-NEXT: li a2, 8
; RV64-NEXT: vsetvli zero, a1, e16, m2, ta, ma
; RV64-NEXT: vwmulu.vx v16, v8, a2
; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; RV64-NEXT: vluxei32.v v8, (a0), v16, v0.t
; RV64-NEXT: ret
%eidxs = zext <vscale x 6 x i16> %idxs to <vscale x 6 x i64>
%ptrs = getelementptr inbounds double, ptr %base, <vscale x 6 x i64> %eidxs
%v = call <vscale x 6 x double> @llvm.vp.gather.nxv6f64.nxv6p0(<vscale x 6 x ptr> %ptrs, <vscale x 6 x i1> %m, i32 %evl)
ret <vscale x 6 x double> %v
}
define <vscale x 6 x double> @vpgather_baseidx_nxv6i32_nxv6f64(ptr %base, <vscale x 6 x i32> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) {
; RV32-LABEL: vpgather_baseidx_nxv6i32_nxv6f64:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma
; RV32-NEXT: vsll.vi v16, v8, 3
; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t
; RV32-NEXT: ret
;
; RV64-LABEL: vpgather_baseidx_nxv6i32_nxv6f64:
; RV64: # %bb.0:
; RV64-NEXT: li a2, 8
; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma
; RV64-NEXT: vwmulsu.vx v16, v8, a2
; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; RV64-NEXT: vluxei64.v v8, (a0), v16, v0.t
; RV64-NEXT: ret
%ptrs = getelementptr inbounds double, ptr %base, <vscale x 6 x i32> %idxs
%v = call <vscale x 6 x double> @llvm.vp.gather.nxv6f64.nxv6p0(<vscale x 6 x ptr> %ptrs, <vscale x 6 x i1> %m, i32 %evl)
ret <vscale x 6 x double> %v
}
define <vscale x 6 x double> @vpgather_baseidx_sext_nxv6i32_nxv6f64(ptr %base, <vscale x 6 x i32> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) {
; RV32-LABEL: vpgather_baseidx_sext_nxv6i32_nxv6f64:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma
; RV32-NEXT: vsll.vi v16, v8, 3
; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t
; RV32-NEXT: ret
;
; RV64-LABEL: vpgather_baseidx_sext_nxv6i32_nxv6f64:
; RV64: # %bb.0:
; RV64-NEXT: li a2, 8
; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma
; RV64-NEXT: vwmulsu.vx v16, v8, a2
; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; RV64-NEXT: vluxei64.v v8, (a0), v16, v0.t
; RV64-NEXT: ret
%eidxs = sext <vscale x 6 x i32> %idxs to <vscale x 6 x i64>
%ptrs = getelementptr inbounds double, ptr %base, <vscale x 6 x i64> %eidxs
%v = call <vscale x 6 x double> @llvm.vp.gather.nxv6f64.nxv6p0(<vscale x 6 x ptr> %ptrs, <vscale x 6 x i1> %m, i32 %evl)
ret <vscale x 6 x double> %v
}
define <vscale x 6 x double> @vpgather_baseidx_zext_nxv6i32_nxv6f64(ptr %base, <vscale x 6 x i32> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) {
; RV32-LABEL: vpgather_baseidx_zext_nxv6i32_nxv6f64:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma
; RV32-NEXT: vsll.vi v16, v8, 3
; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t
; RV32-NEXT: ret
;
; RV64-LABEL: vpgather_baseidx_zext_nxv6i32_nxv6f64:
; RV64: # %bb.0:
; RV64-NEXT: li a2, 8
; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma
; RV64-NEXT: vwmulu.vx v16, v8, a2
; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; RV64-NEXT: vluxei64.v v8, (a0), v16, v0.t
; RV64-NEXT: ret
%eidxs = zext <vscale x 6 x i32> %idxs to <vscale x 6 x i64>
%ptrs = getelementptr inbounds double, ptr %base, <vscale x 6 x i64> %eidxs
%v = call <vscale x 6 x double> @llvm.vp.gather.nxv6f64.nxv6p0(<vscale x 6 x ptr> %ptrs, <vscale x 6 x i1> %m, i32 %evl)
ret <vscale x 6 x double> %v
}
define <vscale x 6 x double> @vpgather_baseidx_nxv6f64(ptr %base, <vscale x 6 x i64> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) {
; RV32-LABEL: vpgather_baseidx_nxv6f64:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma
; RV32-NEXT: vnsrl.wi v16, v8, 0
; RV32-NEXT: vsll.vi v16, v16, 3
; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t
; RV32-NEXT: ret
;
; RV64-LABEL: vpgather_baseidx_nxv6f64:
; RV64: # %bb.0:
; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; RV64-NEXT: vsll.vi v8, v8, 3
; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t
; RV64-NEXT: ret
%ptrs = getelementptr inbounds double, ptr %base, <vscale x 6 x i64> %idxs
%v = call <vscale x 6 x double> @llvm.vp.gather.nxv6f64.nxv6p0(<vscale x 6 x ptr> %ptrs, <vscale x 6 x i1> %m, i32 %evl)
ret <vscale x 6 x double> %v
}
define <vscale x 8 x double> @vpgather_nxv8f64(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
; RV32-LABEL: vpgather_nxv8f64:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; RV32-NEXT: vmv4r.v v16, v8
; RV32-NEXT: vluxei32.v v8, (zero), v16, v0.t
; RV32-NEXT: ret
;
; RV64-LABEL: vpgather_nxv8f64:
; RV64: # %bb.0:
; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; RV64-NEXT: vluxei64.v v8, (zero), v8, v0.t
; RV64-NEXT: ret
%v = call <vscale x 8 x double> @llvm.vp.gather.nxv8f64.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
ret <vscale x 8 x double> %v
}
define <vscale x 8 x double> @vpgather_baseidx_nxv8i8_nxv8f64(ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
; RV32-LABEL: vpgather_baseidx_nxv8i8_nxv8f64:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma
; RV32-NEXT: vsext.vf4 v12, v8
; RV32-NEXT: vsll.vi v16, v12, 3
; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t
; RV32-NEXT: ret
;
; RV64-LABEL: vpgather_baseidx_nxv8i8_nxv8f64:
; RV64: # %bb.0:
; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; RV64-NEXT: vsext.vf8 v16, v8
; RV64-NEXT: vsll.vi v8, v16, 3
; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t
; RV64-NEXT: ret
%ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i8> %idxs
%v = call <vscale x 8 x double> @llvm.vp.gather.nxv8f64.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
ret <vscale x 8 x double> %v
}
define <vscale x 8 x double> @vpgather_baseidx_sext_nxv8i8_nxv8f64(ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
; RV32-LABEL: vpgather_baseidx_sext_nxv8i8_nxv8f64:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma
; RV32-NEXT: vsext.vf4 v12, v8
; RV32-NEXT: vsll.vi v16, v12, 3
; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t
; RV32-NEXT: ret
;
; RV64-LABEL: vpgather_baseidx_sext_nxv8i8_nxv8f64:
; RV64: # %bb.0:
; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; RV64-NEXT: vsext.vf8 v16, v8
; RV64-NEXT: vsll.vi v8, v16, 3
; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t
; RV64-NEXT: ret
%eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i64>
%ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i64> %eidxs
%v = call <vscale x 8 x double> @llvm.vp.gather.nxv8f64.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
ret <vscale x 8 x double> %v
}
define <vscale x 8 x double> @vpgather_baseidx_zext_nxv8i8_nxv8f64(ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
; RV32-LABEL: vpgather_baseidx_zext_nxv8i8_nxv8f64:
; RV32: # %bb.0:
; RV32-NEXT: li a2, 8
; RV32-NEXT: vsetvli zero, a1, e8, m1, ta, ma
; RV32-NEXT: vwmulu.vx v16, v8, a2
; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; RV32-NEXT: vluxei16.v v8, (a0), v16, v0.t
; RV32-NEXT: ret
;
; RV64-LABEL: vpgather_baseidx_zext_nxv8i8_nxv8f64:
; RV64: # %bb.0:
; RV64-NEXT: li a2, 8
; RV64-NEXT: vsetvli zero, a1, e8, m1, ta, ma
; RV64-NEXT: vwmulu.vx v16, v8, a2
; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; RV64-NEXT: vluxei16.v v8, (a0), v16, v0.t
; RV64-NEXT: ret
%eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i64>
%ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i64> %eidxs
%v = call <vscale x 8 x double> @llvm.vp.gather.nxv8f64.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
ret <vscale x 8 x double> %v
}
define <vscale x 8 x double> @vpgather_baseidx_nxv8i16_nxv8f64(ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
; RV32-LABEL: vpgather_baseidx_nxv8i16_nxv8f64:
; RV32: # %bb.0:
; RV32-NEXT: li a2, 8
; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma
; RV32-NEXT: vwmulsu.vx v16, v8, a2
; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t
; RV32-NEXT: ret
;
; RV64-LABEL: vpgather_baseidx_nxv8i16_nxv8f64:
; RV64: # %bb.0:
; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; RV64-NEXT: vsext.vf4 v16, v8
; RV64-NEXT: vsll.vi v8, v16, 3
; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t
; RV64-NEXT: ret
%ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i16> %idxs
%v = call <vscale x 8 x double> @llvm.vp.gather.nxv8f64.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
ret <vscale x 8 x double> %v
}
define <vscale x 8 x double> @vpgather_baseidx_sext_nxv8i16_nxv8f64(ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
; RV32-LABEL: vpgather_baseidx_sext_nxv8i16_nxv8f64:
; RV32: # %bb.0:
; RV32-NEXT: li a2, 8
; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma
; RV32-NEXT: vwmulsu.vx v16, v8, a2
; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t
; RV32-NEXT: ret
;
; RV64-LABEL: vpgather_baseidx_sext_nxv8i16_nxv8f64:
; RV64: # %bb.0:
; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; RV64-NEXT: vsext.vf4 v16, v8
; RV64-NEXT: vsll.vi v8, v16, 3
; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t
; RV64-NEXT: ret
%eidxs = sext <vscale x 8 x i16> %idxs to <vscale x 8 x i64>
%ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i64> %eidxs
%v = call <vscale x 8 x double> @llvm.vp.gather.nxv8f64.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
ret <vscale x 8 x double> %v
}
define <vscale x 8 x double> @vpgather_baseidx_zext_nxv8i16_nxv8f64(ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
; RV32-LABEL: vpgather_baseidx_zext_nxv8i16_nxv8f64:
; RV32: # %bb.0:
; RV32-NEXT: li a2, 8
; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma
; RV32-NEXT: vwmulu.vx v16, v8, a2
; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t
; RV32-NEXT: ret
;
; RV64-LABEL: vpgather_baseidx_zext_nxv8i16_nxv8f64:
; RV64: # %bb.0:
; RV64-NEXT: li a2, 8
; RV64-NEXT: vsetvli zero, a1, e16, m2, ta, ma
; RV64-NEXT: vwmulu.vx v16, v8, a2
; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; RV64-NEXT: vluxei32.v v8, (a0), v16, v0.t
; RV64-NEXT: ret
%eidxs = zext <vscale x 8 x i16> %idxs to <vscale x 8 x i64>
%ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i64> %eidxs
%v = call <vscale x 8 x double> @llvm.vp.gather.nxv8f64.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
ret <vscale x 8 x double> %v
}
define <vscale x 8 x double> @vpgather_baseidx_nxv8i32_nxv8f64(ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
; RV32-LABEL: vpgather_baseidx_nxv8i32_nxv8f64:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma
; RV32-NEXT: vsll.vi v16, v8, 3
; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t
; RV32-NEXT: ret
;
; RV64-LABEL: vpgather_baseidx_nxv8i32_nxv8f64:
; RV64: # %bb.0:
; RV64-NEXT: li a2, 8
; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma
; RV64-NEXT: vwmulsu.vx v16, v8, a2
; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; RV64-NEXT: vluxei64.v v8, (a0), v16, v0.t
; RV64-NEXT: ret
%ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i32> %idxs
%v = call <vscale x 8 x double> @llvm.vp.gather.nxv8f64.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
ret <vscale x 8 x double> %v
}
define <vscale x 8 x double> @vpgather_baseidx_sext_nxv8i32_nxv8f64(ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
; RV32-LABEL: vpgather_baseidx_sext_nxv8i32_nxv8f64:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma
; RV32-NEXT: vsll.vi v16, v8, 3
; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t
; RV32-NEXT: ret
;
; RV64-LABEL: vpgather_baseidx_sext_nxv8i32_nxv8f64:
; RV64: # %bb.0:
; RV64-NEXT: li a2, 8
; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma
; RV64-NEXT: vwmulsu.vx v16, v8, a2
; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; RV64-NEXT: vluxei64.v v8, (a0), v16, v0.t
; RV64-NEXT: ret
%eidxs = sext <vscale x 8 x i32> %idxs to <vscale x 8 x i64>
%ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i64> %eidxs
%v = call <vscale x 8 x double> @llvm.vp.gather.nxv8f64.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
ret <vscale x 8 x double> %v
}
define <vscale x 8 x double> @vpgather_baseidx_zext_nxv8i32_nxv8f64(ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
; RV32-LABEL: vpgather_baseidx_zext_nxv8i32_nxv8f64:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma
; RV32-NEXT: vsll.vi v16, v8, 3
; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t
; RV32-NEXT: ret
;
; RV64-LABEL: vpgather_baseidx_zext_nxv8i32_nxv8f64:
; RV64: # %bb.0:
; RV64-NEXT: li a2, 8
; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma
; RV64-NEXT: vwmulu.vx v16, v8, a2
; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; RV64-NEXT: vluxei64.v v8, (a0), v16, v0.t
; RV64-NEXT: ret
%eidxs = zext <vscale x 8 x i32> %idxs to <vscale x 8 x i64>
%ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i64> %eidxs
%v = call <vscale x 8 x double> @llvm.vp.gather.nxv8f64.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
ret <vscale x 8 x double> %v
}
define <vscale x 8 x double> @vpgather_baseidx_nxv8f64(ptr %base, <vscale x 8 x i64> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
; RV32-LABEL: vpgather_baseidx_nxv8f64:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma
; RV32-NEXT: vnsrl.wi v16, v8, 0
; RV32-NEXT: vsll.vi v16, v16, 3
; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t
; RV32-NEXT: ret
;
; RV64-LABEL: vpgather_baseidx_nxv8f64:
; RV64: # %bb.0:
; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; RV64-NEXT: vsll.vi v8, v8, 3
; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t
; RV64-NEXT: ret
%ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i64> %idxs
%v = call <vscale x 8 x double> @llvm.vp.gather.nxv8f64.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
ret <vscale x 8 x double> %v
}
define <vscale x 16 x double> @vpgather_nxv16f64(<vscale x 16 x ptr> %ptrs, <vscale x 16 x i1> %m, i32 zeroext %evl) {
; RV32-LABEL: vpgather_nxv16f64:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
; RV32-NEXT: vmv1r.v v24, v0
; RV32-NEXT: csrr a1, vlenb
; RV32-NEXT: sub a2, a0, a1
; RV32-NEXT: srli a3, a1, 3
; RV32-NEXT: vslidedown.vx v0, v0, a3
; RV32-NEXT: sltu a3, a0, a2
; RV32-NEXT: addi a3, a3, -1
; RV32-NEXT: and a2, a3, a2
; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; RV32-NEXT: vluxei32.v v16, (zero), v12, v0.t
; RV32-NEXT: bltu a0, a1, .LBB111_2
; RV32-NEXT: # %bb.1:
; RV32-NEXT: mv a0, a1
; RV32-NEXT: .LBB111_2:
; RV32-NEXT: vmv1r.v v0, v24
; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; RV32-NEXT: vluxei32.v v24, (zero), v8, v0.t
; RV32-NEXT: vmv.v.v v8, v24
; RV32-NEXT: ret
;
; RV64-LABEL: vpgather_nxv16f64:
; RV64: # %bb.0:
; RV64-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
; RV64-NEXT: vmv1r.v v24, v0
; RV64-NEXT: csrr a1, vlenb
; RV64-NEXT: sub a2, a0, a1
; RV64-NEXT: srli a3, a1, 3
; RV64-NEXT: vslidedown.vx v0, v0, a3
; RV64-NEXT: sltu a3, a0, a2
; RV64-NEXT: addi a3, a3, -1
; RV64-NEXT: and a2, a3, a2
; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; RV64-NEXT: vluxei64.v v16, (zero), v16, v0.t
; RV64-NEXT: bltu a0, a1, .LBB111_2
; RV64-NEXT: # %bb.1:
; RV64-NEXT: mv a0, a1
; RV64-NEXT: .LBB111_2:
; RV64-NEXT: vmv1r.v v0, v24
; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; RV64-NEXT: vluxei64.v v8, (zero), v8, v0.t
; RV64-NEXT: ret
%v = call <vscale x 16 x double> @llvm.vp.gather.nxv16f64.nxv16p0(<vscale x 16 x ptr> %ptrs, <vscale x 16 x i1> %m, i32 %evl)
ret <vscale x 16 x double> %v
}
define <vscale x 16 x double> @vpgather_baseidx_nxv16i16_nxv16f64(ptr %base, <vscale x 16 x i16> %idxs, <vscale x 16 x i1> %m, i32 zeroext %evl) {
; RV32-LABEL: vpgather_baseidx_nxv16i16_nxv16f64:
; RV32: # %bb.0:
; RV32-NEXT: li a3, 8
; RV32-NEXT: csrr a2, vlenb
; RV32-NEXT: vsetvli a4, zero, e16, m4, ta, ma
; RV32-NEXT: vwmulsu.vx v24, v8, a3
; RV32-NEXT: mv a3, a1
; RV32-NEXT: bltu a1, a2, .LBB112_2
; RV32-NEXT: # %bb.1:
; RV32-NEXT: mv a3, a2
; RV32-NEXT: .LBB112_2:
; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma
; RV32-NEXT: vluxei32.v v8, (a0), v24, v0.t
; RV32-NEXT: sub a3, a1, a2
; RV32-NEXT: srli a2, a2, 3
; RV32-NEXT: sltu a1, a1, a3
; RV32-NEXT: addi a1, a1, -1
; RV32-NEXT: vsetvli a4, zero, e8, mf4, ta, ma
; RV32-NEXT: vslidedown.vx v0, v0, a2
; RV32-NEXT: and a1, a1, a3
; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; RV32-NEXT: vluxei32.v v16, (a0), v28, v0.t
; RV32-NEXT: ret
;
; RV64-LABEL: vpgather_baseidx_nxv16i16_nxv16f64:
; RV64: # %bb.0:
; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64-NEXT: vmv1r.v v12, v0
; RV64-NEXT: csrr a2, vlenb
; RV64-NEXT: sub a3, a1, a2
; RV64-NEXT: sltu a4, a1, a3
; RV64-NEXT: addi a4, a4, -1
; RV64-NEXT: and a3, a4, a3
; RV64-NEXT: srli a4, a2, 3
; RV64-NEXT: vsetvli zero, a3, e64, m8, ta, ma
; RV64-NEXT: vsext.vf4 v16, v10
; RV64-NEXT: vsetvli a5, zero, e8, mf4, ta, ma
; RV64-NEXT: vslidedown.vx v0, v0, a4
; RV64-NEXT: vsetvli zero, a3, e64, m8, ta, ma
; RV64-NEXT: vsll.vi v16, v16, 3
; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t
; RV64-NEXT: vsetvli a3, zero, e64, m8, ta, ma
; RV64-NEXT: vsext.vf4 v24, v8
; RV64-NEXT: vsll.vi v24, v24, 3
; RV64-NEXT: bltu a1, a2, .LBB112_2
; RV64-NEXT: # %bb.1:
; RV64-NEXT: mv a1, a2
; RV64-NEXT: .LBB112_2:
; RV64-NEXT: vmv1r.v v0, v12
; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; RV64-NEXT: vluxei64.v v8, (a0), v24, v0.t
; RV64-NEXT: ret
%ptrs = getelementptr inbounds double, ptr %base, <vscale x 16 x i16> %idxs
%v = call <vscale x 16 x double> @llvm.vp.gather.nxv16f64.nxv16p0(<vscale x 16 x ptr> %ptrs, <vscale x 16 x i1> %m, i32 %evl)
ret <vscale x 16 x double> %v
}
define <vscale x 16 x double> @vpgather_baseidx_sext_nxv16i16_nxv16f64(ptr %base, <vscale x 16 x i16> %idxs, <vscale x 16 x i1> %m, i32 zeroext %evl) {
; RV32-LABEL: vpgather_baseidx_sext_nxv16i16_nxv16f64:
; RV32: # %bb.0:
; RV32-NEXT: li a3, 8
; RV32-NEXT: csrr a2, vlenb
; RV32-NEXT: vsetvli a4, zero, e16, m4, ta, ma
; RV32-NEXT: vwmulsu.vx v24, v8, a3
; RV32-NEXT: mv a3, a1
; RV32-NEXT: bltu a1, a2, .LBB113_2
; RV32-NEXT: # %bb.1:
; RV32-NEXT: mv a3, a2
; RV32-NEXT: .LBB113_2:
; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma
; RV32-NEXT: vluxei32.v v8, (a0), v24, v0.t
; RV32-NEXT: sub a3, a1, a2
; RV32-NEXT: srli a2, a2, 3
; RV32-NEXT: sltu a1, a1, a3
; RV32-NEXT: addi a1, a1, -1
; RV32-NEXT: vsetvli a4, zero, e8, mf4, ta, ma
; RV32-NEXT: vslidedown.vx v0, v0, a2
; RV32-NEXT: and a1, a1, a3
; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; RV32-NEXT: vluxei32.v v16, (a0), v28, v0.t
; RV32-NEXT: ret
;
; RV64-LABEL: vpgather_baseidx_sext_nxv16i16_nxv16f64:
; RV64: # %bb.0:
; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64-NEXT: vmv1r.v v12, v0
; RV64-NEXT: csrr a2, vlenb
; RV64-NEXT: sub a3, a1, a2
; RV64-NEXT: sltu a4, a1, a3
; RV64-NEXT: addi a4, a4, -1
; RV64-NEXT: and a3, a4, a3
; RV64-NEXT: srli a4, a2, 3
; RV64-NEXT: vsetvli zero, a3, e64, m8, ta, ma
; RV64-NEXT: vsext.vf4 v16, v10
; RV64-NEXT: vsetvli a5, zero, e8, mf4, ta, ma
; RV64-NEXT: vslidedown.vx v0, v0, a4
; RV64-NEXT: vsetvli zero, a3, e64, m8, ta, ma
; RV64-NEXT: vsll.vi v16, v16, 3
; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t
; RV64-NEXT: vsetvli a3, zero, e64, m8, ta, ma
; RV64-NEXT: vsext.vf4 v24, v8
; RV64-NEXT: vsll.vi v24, v24, 3
; RV64-NEXT: bltu a1, a2, .LBB113_2
; RV64-NEXT: # %bb.1:
; RV64-NEXT: mv a1, a2
; RV64-NEXT: .LBB113_2:
; RV64-NEXT: vmv1r.v v0, v12
; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; RV64-NEXT: vluxei64.v v8, (a0), v24, v0.t
; RV64-NEXT: ret
%eidxs = sext <vscale x 16 x i16> %idxs to <vscale x 16 x i64>
%ptrs = getelementptr inbounds double, ptr %base, <vscale x 16 x i64> %eidxs
%v = call <vscale x 16 x double> @llvm.vp.gather.nxv16f64.nxv16p0(<vscale x 16 x ptr> %ptrs, <vscale x 16 x i1> %m, i32 %evl)
ret <vscale x 16 x double> %v
}
define <vscale x 16 x double> @vpgather_baseidx_zext_nxv16i16_nxv16f64(ptr %base, <vscale x 16 x i16> %idxs, <vscale x 16 x i1> %m, i32 zeroext %evl) {
; RV32-LABEL: vpgather_baseidx_zext_nxv16i16_nxv16f64:
; RV32: # %bb.0:
; RV32-NEXT: li a3, 8
; RV32-NEXT: csrr a2, vlenb
; RV32-NEXT: vsetvli a4, zero, e16, m4, ta, ma
; RV32-NEXT: vwmulu.vx v24, v8, a3
; RV32-NEXT: mv a3, a1
; RV32-NEXT: bltu a1, a2, .LBB114_2
; RV32-NEXT: # %bb.1:
; RV32-NEXT: mv a3, a2
; RV32-NEXT: .LBB114_2:
; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma
; RV32-NEXT: vluxei32.v v8, (a0), v24, v0.t
; RV32-NEXT: sub a3, a1, a2
; RV32-NEXT: srli a2, a2, 3
; RV32-NEXT: sltu a1, a1, a3
; RV32-NEXT: addi a1, a1, -1
; RV32-NEXT: vsetvli a4, zero, e8, mf4, ta, ma
; RV32-NEXT: vslidedown.vx v0, v0, a2
; RV32-NEXT: and a1, a1, a3
; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; RV32-NEXT: vluxei32.v v16, (a0), v28, v0.t
; RV32-NEXT: ret
;
; RV64-LABEL: vpgather_baseidx_zext_nxv16i16_nxv16f64:
; RV64: # %bb.0:
; RV64-NEXT: li a3, 8
; RV64-NEXT: csrr a2, vlenb
; RV64-NEXT: vsetvli a4, zero, e16, m4, ta, ma
; RV64-NEXT: vwmulu.vx v24, v8, a3
; RV64-NEXT: mv a3, a1
; RV64-NEXT: bltu a1, a2, .LBB114_2
; RV64-NEXT: # %bb.1:
; RV64-NEXT: mv a3, a2
; RV64-NEXT: .LBB114_2:
; RV64-NEXT: vsetvli zero, a3, e64, m8, ta, ma
; RV64-NEXT: vluxei32.v v8, (a0), v24, v0.t
; RV64-NEXT: sub a3, a1, a2
; RV64-NEXT: srli a2, a2, 3
; RV64-NEXT: sltu a1, a1, a3
; RV64-NEXT: addi a1, a1, -1
; RV64-NEXT: vsetvli a4, zero, e8, mf4, ta, ma
; RV64-NEXT: vslidedown.vx v0, v0, a2
; RV64-NEXT: and a1, a1, a3
; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; RV64-NEXT: vluxei32.v v16, (a0), v28, v0.t
; RV64-NEXT: ret
%eidxs = zext <vscale x 16 x i16> %idxs to <vscale x 16 x i64>
%ptrs = getelementptr inbounds double, ptr %base, <vscale x 16 x i64> %eidxs
%v = call <vscale x 16 x double> @llvm.vp.gather.nxv16f64.nxv16p0(<vscale x 16 x ptr> %ptrs, <vscale x 16 x i1> %m, i32 %evl)
ret <vscale x 16 x double> %v
}