diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 7c257b406d5a..cee593def653 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -6070,6 +6070,16 @@ SDValue DAGCombiner::visitIMINMAX(SDNode *N) { if (N0 == N1) return N0; + // Fold operation with vscale operands. + if (N0.getOpcode() == ISD::VSCALE && N1.getOpcode() == ISD::VSCALE) { + uint64_t C0 = N0->getConstantOperandVal(0); + uint64_t C1 = N1->getConstantOperandVal(0); + if (Opcode == ISD::UMAX) + return C0 > C1 ? N0 : N1; + else if (Opcode == ISD::UMIN) + return C0 > C1 ? N1 : N0; + } + // canonicalize constant to RHS if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && !DAG.isConstantIntBuildVectorOrConstantInt(N1)) diff --git a/llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll b/llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll index ecd098edb30a..b6aa4affbb10 100644 --- a/llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll +++ b/llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll @@ -939,21 +939,17 @@ define @zero_strided_vadd_nxv16i64( %v, p ; CHECK-RV32: # %bb.0: ; CHECK-RV32-NEXT: csrr a1, vlenb ; CHECK-RV32-NEXT: srli a2, a1, 3 -; CHECK-RV32-NEXT: sub a3, a2, a1 -; CHECK-RV32-NEXT: sltu a4, a2, a3 -; CHECK-RV32-NEXT: addi a4, a4, -1 -; CHECK-RV32-NEXT: and a3, a4, a3 -; CHECK-RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma -; CHECK-RV32-NEXT: vlse64.v v24, (a0), zero -; CHECK-RV32-NEXT: bltu a2, a1, .LBB61_2 -; CHECK-RV32-NEXT: # %bb.1: -; CHECK-RV32-NEXT: mv a2, a1 -; CHECK-RV32-NEXT: .LBB61_2: +; CHECK-RV32-NEXT: sub a1, a2, a1 +; CHECK-RV32-NEXT: sltu a3, a2, a1 +; CHECK-RV32-NEXT: addi a3, a3, -1 +; CHECK-RV32-NEXT: and a1, a3, a1 ; CHECK-RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; CHECK-RV32-NEXT: vlse64.v v24, (a0), zero +; CHECK-RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-RV32-NEXT: vlse64.v v0, (a0), zero ; CHECK-RV32-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-RV32-NEXT: vadd.vv v16, v16, v24 -; CHECK-RV32-NEXT: vadd.vv v8, v8, v0 +; CHECK-RV32-NEXT: vadd.vv v8, v8, v24 +; CHECK-RV32-NEXT: vadd.vv v16, v16, v0 ; CHECK-RV32-NEXT: ret ; ; CHECK-RV64-LABEL: zero_strided_vadd_nxv16i64: diff --git a/llvm/test/CodeGen/RISCV/rvv/vadd-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vadd-vp.ll index 6e9f4d45cd6b..946c0bbd7ff6 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vadd-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vadd-vp.ll @@ -1393,32 +1393,46 @@ define @vadd_vi_nxv32i32_unmasked( %va, i declare i32 @llvm.vscale.i32() -; FIXME: The upper half of the operation is doing nothing. -; FIXME: The branches comparing vscale vs. vscale should be constant-foldable. - define @vadd_vi_nxv32i32_evl_nx8( %va, %m) { -; CHECK-LABEL: vadd_vi_nxv32i32_evl_nx8: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma -; CHECK-NEXT: vmv1r.v v24, v0 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: srli a2, a0, 2 -; CHECK-NEXT: slli a1, a0, 1 -; CHECK-NEXT: vslidedown.vx v0, v0, a2 -; CHECK-NEXT: sub a2, a0, a1 -; CHECK-NEXT: sltu a3, a0, a2 -; CHECK-NEXT: addi a3, a3, -1 -; CHECK-NEXT: and a2, a3, a2 -; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; CHECK-NEXT: vadd.vi v16, v16, -1, v0.t -; CHECK-NEXT: bltu a0, a1, .LBB120_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a0, a1 -; CHECK-NEXT: .LBB120_2: -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; CHECK-NEXT: vadd.vi v8, v8, -1, v0.t -; CHECK-NEXT: ret +; RV32-LABEL: vadd_vi_nxv32i32_evl_nx8: +; RV32: # %bb.0: +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: srli a1, a0, 2 +; RV32-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; RV32-NEXT: vadd.vi v8, v8, -1, v0.t +; RV32-NEXT: vsetvli a2, zero, e8, mf2, ta, ma +; RV32-NEXT: vslidedown.vx v0, v0, a1 +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: sub a1, a0, a1 +; RV32-NEXT: sltu a0, a0, a1 +; RV32-NEXT: addi a0, a0, -1 +; RV32-NEXT: and a0, a0, a1 +; RV32-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; RV32-NEXT: vadd.vi v16, v16, -1, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: vadd_vi_nxv32i32_evl_nx8: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e8, mf2, ta, ma +; RV64-NEXT: vmv1r.v v24, v0 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: srli a2, a0, 2 +; RV64-NEXT: slli a1, a0, 1 +; RV64-NEXT: vslidedown.vx v0, v0, a2 +; RV64-NEXT: sub a2, a0, a1 +; RV64-NEXT: sltu a3, a0, a2 +; RV64-NEXT: addi a3, a3, -1 +; RV64-NEXT: and a2, a3, a2 +; RV64-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; RV64-NEXT: vadd.vi v16, v16, -1, v0.t +; RV64-NEXT: bltu a0, a1, .LBB120_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: mv a0, a1 +; RV64-NEXT: .LBB120_2: +; RV64-NEXT: vmv1r.v v0, v24 +; RV64-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; RV64-NEXT: vadd.vi v8, v8, -1, v0.t +; RV64-NEXT: ret %evl = call i32 @llvm.vscale.i32() %evl0 = mul i32 %evl, 8 %v = call @llvm.vp.add.nxv32i32( %va, splat (i32 -1), %m, i32 %evl0) diff --git a/llvm/test/CodeGen/RISCV/rvv/vmax-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vmax-vp.ll index ec95e81b8dd9..d81936354f6f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmax-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmax-vp.ll @@ -1026,32 +1026,46 @@ define @vmax_vx_nxv32i32_unmasked( %va, i declare i32 @llvm.vscale.i32() -; FIXME: The upper half of the operation is doing nothing. -; FIXME: The branches comparing vscale vs. vscale should be constant-foldable. - define @vmax_vx_nxv32i32_evl_nx8( %va, i32 %b, %m) { -; CHECK-LABEL: vmax_vx_nxv32i32_evl_nx8: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma -; CHECK-NEXT: vmv1r.v v24, v0 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: srli a3, a1, 2 -; CHECK-NEXT: slli a2, a1, 1 -; CHECK-NEXT: vslidedown.vx v0, v0, a3 -; CHECK-NEXT: sub a3, a1, a2 -; CHECK-NEXT: sltu a4, a1, a3 -; CHECK-NEXT: addi a4, a4, -1 -; CHECK-NEXT: and a3, a4, a3 -; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma -; CHECK-NEXT: vmax.vx v16, v16, a0, v0.t -; CHECK-NEXT: bltu a1, a2, .LBB82_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a1, a2 -; CHECK-NEXT: .LBB82_2: -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; CHECK-NEXT: vmax.vx v8, v8, a0, v0.t -; CHECK-NEXT: ret +; RV32-LABEL: vmax_vx_nxv32i32_evl_nx8: +; RV32: # %bb.0: +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: srli a2, a1, 2 +; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; RV32-NEXT: vmax.vx v8, v8, a0, v0.t +; RV32-NEXT: vsetvli a3, zero, e8, mf2, ta, ma +; RV32-NEXT: vslidedown.vx v0, v0, a2 +; RV32-NEXT: slli a2, a1, 1 +; RV32-NEXT: sub a2, a1, a2 +; RV32-NEXT: sltu a1, a1, a2 +; RV32-NEXT: addi a1, a1, -1 +; RV32-NEXT: and a1, a1, a2 +; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; RV32-NEXT: vmax.vx v16, v16, a0, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: vmax_vx_nxv32i32_evl_nx8: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e8, mf2, ta, ma +; RV64-NEXT: vmv1r.v v24, v0 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: srli a3, a1, 2 +; RV64-NEXT: slli a2, a1, 1 +; RV64-NEXT: vslidedown.vx v0, v0, a3 +; RV64-NEXT: sub a3, a1, a2 +; RV64-NEXT: sltu a4, a1, a3 +; RV64-NEXT: addi a4, a4, -1 +; RV64-NEXT: and a3, a4, a3 +; RV64-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; RV64-NEXT: vmax.vx v16, v16, a0, v0.t +; RV64-NEXT: bltu a1, a2, .LBB82_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: mv a1, a2 +; RV64-NEXT: .LBB82_2: +; RV64-NEXT: vmv1r.v v0, v24 +; RV64-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; RV64-NEXT: vmax.vx v8, v8, a0, v0.t +; RV64-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer %evl = call i32 @llvm.vscale.i32() diff --git a/llvm/test/CodeGen/RISCV/rvv/vmaxu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vmaxu-vp.ll index bd1a6c6e55c7..7603bcef1973 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmaxu-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmaxu-vp.ll @@ -1025,32 +1025,46 @@ define @vmaxu_vx_nxv32i32_unmasked( %va, declare i32 @llvm.vscale.i32() -; FIXME: The upper half of the operation is doing nothing. -; FIXME: The branches comparing vscale vs. vscale should be constant-foldable. - define @vmaxu_vx_nxv32i32_evl_nx8( %va, i32 %b, %m) { -; CHECK-LABEL: vmaxu_vx_nxv32i32_evl_nx8: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma -; CHECK-NEXT: vmv1r.v v24, v0 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: srli a3, a1, 2 -; CHECK-NEXT: slli a2, a1, 1 -; CHECK-NEXT: vslidedown.vx v0, v0, a3 -; CHECK-NEXT: sub a3, a1, a2 -; CHECK-NEXT: sltu a4, a1, a3 -; CHECK-NEXT: addi a4, a4, -1 -; CHECK-NEXT: and a3, a4, a3 -; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma -; CHECK-NEXT: vmaxu.vx v16, v16, a0, v0.t -; CHECK-NEXT: bltu a1, a2, .LBB82_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a1, a2 -; CHECK-NEXT: .LBB82_2: -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; CHECK-NEXT: vmaxu.vx v8, v8, a0, v0.t -; CHECK-NEXT: ret +; RV32-LABEL: vmaxu_vx_nxv32i32_evl_nx8: +; RV32: # %bb.0: +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: srli a2, a1, 2 +; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; RV32-NEXT: vmaxu.vx v8, v8, a0, v0.t +; RV32-NEXT: vsetvli a3, zero, e8, mf2, ta, ma +; RV32-NEXT: vslidedown.vx v0, v0, a2 +; RV32-NEXT: slli a2, a1, 1 +; RV32-NEXT: sub a2, a1, a2 +; RV32-NEXT: sltu a1, a1, a2 +; RV32-NEXT: addi a1, a1, -1 +; RV32-NEXT: and a1, a1, a2 +; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; RV32-NEXT: vmaxu.vx v16, v16, a0, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: vmaxu_vx_nxv32i32_evl_nx8: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e8, mf2, ta, ma +; RV64-NEXT: vmv1r.v v24, v0 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: srli a3, a1, 2 +; RV64-NEXT: slli a2, a1, 1 +; RV64-NEXT: vslidedown.vx v0, v0, a3 +; RV64-NEXT: sub a3, a1, a2 +; RV64-NEXT: sltu a4, a1, a3 +; RV64-NEXT: addi a4, a4, -1 +; RV64-NEXT: and a3, a4, a3 +; RV64-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; RV64-NEXT: vmaxu.vx v16, v16, a0, v0.t +; RV64-NEXT: bltu a1, a2, .LBB82_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: mv a1, a2 +; RV64-NEXT: .LBB82_2: +; RV64-NEXT: vmv1r.v v0, v24 +; RV64-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; RV64-NEXT: vmaxu.vx v8, v8, a0, v0.t +; RV64-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer %evl = call i32 @llvm.vscale.i32() diff --git a/llvm/test/CodeGen/RISCV/rvv/vmin-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vmin-vp.ll index 6f1f8e129832..3922b09f1f02 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmin-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmin-vp.ll @@ -1026,32 +1026,46 @@ define @vmin_vx_nxv32i32_unmasked( %va, i declare i32 @llvm.vscale.i32() -; FIXME: The upper half of the operation is doing nothing. -; FIXME: The branches comparing vscale vs. vscale should be constant-foldable. - define @vmin_vx_nxv32i32_evl_nx8( %va, i32 %b, %m) { -; CHECK-LABEL: vmin_vx_nxv32i32_evl_nx8: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma -; CHECK-NEXT: vmv1r.v v24, v0 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: srli a3, a1, 2 -; CHECK-NEXT: slli a2, a1, 1 -; CHECK-NEXT: vslidedown.vx v0, v0, a3 -; CHECK-NEXT: sub a3, a1, a2 -; CHECK-NEXT: sltu a4, a1, a3 -; CHECK-NEXT: addi a4, a4, -1 -; CHECK-NEXT: and a3, a4, a3 -; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma -; CHECK-NEXT: vmin.vx v16, v16, a0, v0.t -; CHECK-NEXT: bltu a1, a2, .LBB82_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a1, a2 -; CHECK-NEXT: .LBB82_2: -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; CHECK-NEXT: vmin.vx v8, v8, a0, v0.t -; CHECK-NEXT: ret +; RV32-LABEL: vmin_vx_nxv32i32_evl_nx8: +; RV32: # %bb.0: +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: srli a2, a1, 2 +; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; RV32-NEXT: vmin.vx v8, v8, a0, v0.t +; RV32-NEXT: vsetvli a3, zero, e8, mf2, ta, ma +; RV32-NEXT: vslidedown.vx v0, v0, a2 +; RV32-NEXT: slli a2, a1, 1 +; RV32-NEXT: sub a2, a1, a2 +; RV32-NEXT: sltu a1, a1, a2 +; RV32-NEXT: addi a1, a1, -1 +; RV32-NEXT: and a1, a1, a2 +; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; RV32-NEXT: vmin.vx v16, v16, a0, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: vmin_vx_nxv32i32_evl_nx8: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e8, mf2, ta, ma +; RV64-NEXT: vmv1r.v v24, v0 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: srli a3, a1, 2 +; RV64-NEXT: slli a2, a1, 1 +; RV64-NEXT: vslidedown.vx v0, v0, a3 +; RV64-NEXT: sub a3, a1, a2 +; RV64-NEXT: sltu a4, a1, a3 +; RV64-NEXT: addi a4, a4, -1 +; RV64-NEXT: and a3, a4, a3 +; RV64-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; RV64-NEXT: vmin.vx v16, v16, a0, v0.t +; RV64-NEXT: bltu a1, a2, .LBB82_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: mv a1, a2 +; RV64-NEXT: .LBB82_2: +; RV64-NEXT: vmv1r.v v0, v24 +; RV64-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; RV64-NEXT: vmin.vx v8, v8, a0, v0.t +; RV64-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer %evl = call i32 @llvm.vscale.i32() diff --git a/llvm/test/CodeGen/RISCV/rvv/vminu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vminu-vp.ll index 47e7b7d70c07..59af953fd52d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vminu-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vminu-vp.ll @@ -1025,32 +1025,46 @@ define @vminu_vx_nxv32i32_unmasked( %va, declare i32 @llvm.vscale.i32() -; FIXME: The upper half of the operation is doing nothing. -; FIXME: The branches comparing vscale vs. vscale should be constant-foldable. - define @vminu_vx_nxv32i32_evl_nx8( %va, i32 %b, %m) { -; CHECK-LABEL: vminu_vx_nxv32i32_evl_nx8: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma -; CHECK-NEXT: vmv1r.v v24, v0 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: srli a3, a1, 2 -; CHECK-NEXT: slli a2, a1, 1 -; CHECK-NEXT: vslidedown.vx v0, v0, a3 -; CHECK-NEXT: sub a3, a1, a2 -; CHECK-NEXT: sltu a4, a1, a3 -; CHECK-NEXT: addi a4, a4, -1 -; CHECK-NEXT: and a3, a4, a3 -; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma -; CHECK-NEXT: vminu.vx v16, v16, a0, v0.t -; CHECK-NEXT: bltu a1, a2, .LBB82_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a1, a2 -; CHECK-NEXT: .LBB82_2: -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; CHECK-NEXT: vminu.vx v8, v8, a0, v0.t -; CHECK-NEXT: ret +; RV32-LABEL: vminu_vx_nxv32i32_evl_nx8: +; RV32: # %bb.0: +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: srli a2, a1, 2 +; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; RV32-NEXT: vminu.vx v8, v8, a0, v0.t +; RV32-NEXT: vsetvli a3, zero, e8, mf2, ta, ma +; RV32-NEXT: vslidedown.vx v0, v0, a2 +; RV32-NEXT: slli a2, a1, 1 +; RV32-NEXT: sub a2, a1, a2 +; RV32-NEXT: sltu a1, a1, a2 +; RV32-NEXT: addi a1, a1, -1 +; RV32-NEXT: and a1, a1, a2 +; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; RV32-NEXT: vminu.vx v16, v16, a0, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: vminu_vx_nxv32i32_evl_nx8: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e8, mf2, ta, ma +; RV64-NEXT: vmv1r.v v24, v0 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: srli a3, a1, 2 +; RV64-NEXT: slli a2, a1, 1 +; RV64-NEXT: vslidedown.vx v0, v0, a3 +; RV64-NEXT: sub a3, a1, a2 +; RV64-NEXT: sltu a4, a1, a3 +; RV64-NEXT: addi a4, a4, -1 +; RV64-NEXT: and a3, a4, a3 +; RV64-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; RV64-NEXT: vminu.vx v16, v16, a0, v0.t +; RV64-NEXT: bltu a1, a2, .LBB82_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: mv a1, a2 +; RV64-NEXT: .LBB82_2: +; RV64-NEXT: vmv1r.v v0, v24 +; RV64-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; RV64-NEXT: vminu.vx v8, v8, a0, v0.t +; RV64-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer %evl = call i32 @llvm.vscale.i32() diff --git a/llvm/test/CodeGen/RISCV/rvv/vselect-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vselect-vp.ll index 522c83fd9fa9..2ed3c9bfe2c1 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vselect-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vselect-vp.ll @@ -1,12 +1,12 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+d,+m,+zvfh,+v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 ; RUN: llc -mtriple=riscv64 -mattr=+d,+m,+zvfh,+v -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 ; RUN: llc -mtriple=riscv32 -mattr=+d,+m,+zvfhmin,+v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 ; RUN: llc -mtriple=riscv64 -mattr=+d,+m,+zvfhmin,+v -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 declare @llvm.vp.select.nxv1i1(, , , i32) @@ -398,48 +398,69 @@ define @select_nxv32i32( %a, @select_evl_nxv32i32( %a, %b, %c) { -; CHECK-LABEL: select_evl_nxv32i32: -; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb -; CHECK-NEXT: addi a1, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # vscale x 64-byte Folded Spill -; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma -; CHECK-NEXT: vmv1r.v v7, v0 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a3, a1, 3 -; CHECK-NEXT: slli a2, a1, 1 -; CHECK-NEXT: srli a4, a1, 2 -; CHECK-NEXT: add a3, a0, a3 -; CHECK-NEXT: sub a5, a1, a2 -; CHECK-NEXT: vl8re32.v v24, (a3) -; CHECK-NEXT: sltu a3, a1, a5 -; CHECK-NEXT: addi a3, a3, -1 -; CHECK-NEXT: vl8re32.v v8, (a0) -; CHECK-NEXT: vslidedown.vx v0, v0, a4 -; CHECK-NEXT: and a3, a3, a5 -; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma -; CHECK-NEXT: vmerge.vvm v16, v24, v16, v0 -; CHECK-NEXT: bltu a1, a2, .LBB28_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a1, a2 -; CHECK-NEXT: .LBB28_2: -; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; CHECK-NEXT: vmerge.vvm v8, v8, v24, v0 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: .cfi_def_cfa sp, 16 -; CHECK-NEXT: addi sp, sp, 16 -; CHECK-NEXT: .cfi_def_cfa_offset 0 -; CHECK-NEXT: ret +; RV32-LABEL: select_evl_nxv32i32: +; RV32: # %bb.0: +; RV32-NEXT: vl8re32.v v24, (a0) +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; RV32-NEXT: vmerge.vvm v8, v24, v8, v0 +; RV32-NEXT: slli a2, a1, 3 +; RV32-NEXT: add a0, a0, a2 +; RV32-NEXT: slli a2, a1, 1 +; RV32-NEXT: sub a2, a1, a2 +; RV32-NEXT: vl8re32.v v24, (a0) +; RV32-NEXT: sltu a0, a1, a2 +; RV32-NEXT: addi a0, a0, -1 +; RV32-NEXT: srli a1, a1, 2 +; RV32-NEXT: vsetvli a3, zero, e8, mf2, ta, ma +; RV32-NEXT: vslidedown.vx v0, v0, a1 +; RV32-NEXT: and a0, a0, a2 +; RV32-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; RV32-NEXT: vmerge.vvm v16, v24, v16, v0 +; RV32-NEXT: ret +; +; RV64-LABEL: select_evl_nxv32i32: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 3 +; RV64-NEXT: sub sp, sp, a1 +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV64-NEXT: addi a1, sp, 16 +; RV64-NEXT: vs8r.v v8, (a1) # vscale x 64-byte Folded Spill +; RV64-NEXT: vsetvli a1, zero, e8, mf2, ta, ma +; RV64-NEXT: vmv1r.v v7, v0 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a3, a1, 3 +; RV64-NEXT: slli a2, a1, 1 +; RV64-NEXT: srli a4, a1, 2 +; RV64-NEXT: add a3, a0, a3 +; RV64-NEXT: sub a5, a1, a2 +; RV64-NEXT: vl8re32.v v24, (a3) +; RV64-NEXT: sltu a3, a1, a5 +; RV64-NEXT: addi a3, a3, -1 +; RV64-NEXT: vl8re32.v v8, (a0) +; RV64-NEXT: vslidedown.vx v0, v0, a4 +; RV64-NEXT: and a3, a3, a5 +; RV64-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; RV64-NEXT: vmerge.vvm v16, v24, v16, v0 +; RV64-NEXT: bltu a1, a2, .LBB28_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: mv a1, a2 +; RV64-NEXT: .LBB28_2: +; RV64-NEXT: vmv1r.v v0, v7 +; RV64-NEXT: addi a0, sp, 16 +; RV64-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload +; RV64-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; RV64-NEXT: vmerge.vvm v8, v8, v24, v0 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: .cfi_def_cfa sp, 16 +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: .cfi_def_cfa_offset 0 +; RV64-NEXT: ret %evl = call i32 @llvm.vscale.i32() %evl0 = mul i32 %evl, 8 %v = call @llvm.vp.select.nxv32i32( %a, %b, %c, i32 %evl0)