From 1546138e6f18f2e7d0ca90eebe0ffc18c2364bf6 Mon Sep 17 00:00:00 2001 From: Brandon Wu Date: Tue, 13 Jan 2026 17:33:47 +0800 Subject: [PATCH] [RISCV][llvm] Support min/max codegen for P extension (#175494) --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 2 + llvm/lib/Target/RISCV/RISCVInstrInfoP.td | 16 ++ llvm/test/CodeGen/RISCV/rvp-ext-rv32.ll | 121 +++++++++++++ llvm/test/CodeGen/RISCV/rvp-ext-rv64.ll | 181 ++++++++++++++++++++ 4 files changed, 320 insertions(+) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 97aae9d222f9..afbf57d5900e 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -554,6 +554,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setOperationAction({ISD::SDIV, ISD::UDIV, ISD::SREM, ISD::UREM, ISD::SDIVREM, ISD::UDIVREM}, VTs, Expand); + setOperationAction({ISD::SMIN, ISD::UMIN, ISD::SMAX, ISD::UMAX}, VTs, + Legal); setOperationAction(ISD::SETCC, VTs, Legal); setCondCodeAction({ISD::SETNE, ISD::SETGT, ISD::SETGE, ISD::SETUGT, ISD::SETUGE, ISD::SETULE, ISD::SETLE}, diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td index 959c0f1d36e3..e97e9a0139e4 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td @@ -1635,6 +1635,16 @@ let Predicates = [HasStdExtP] in { (PMSLT_H GPR:$rs1, GPR:$rs2)>; def: Pat<(XLenVecI16VT (setcc (XLenVecI16VT GPR:$rs1), (XLenVecI16VT GPR:$rs2), SETULT)), (PMSLTU_H GPR:$rs1, GPR:$rs2)>; + + // 8/16-bit [s|u]min/[s|u]max patterns + def: Pat<(XLenVecI8VT (smin GPR:$rs1, GPR:$rs2)), (PMIN_B GPR:$rs1, GPR:$rs2)>; + def: Pat<(XLenVecI8VT (umin GPR:$rs1, GPR:$rs2)), (PMINU_B GPR:$rs1, GPR:$rs2)>; + def: Pat<(XLenVecI16VT (smin GPR:$rs1, GPR:$rs2)), (PMIN_H GPR:$rs1, GPR:$rs2)>; + def: Pat<(XLenVecI16VT (umin GPR:$rs1, GPR:$rs2)), (PMINU_H GPR:$rs1, GPR:$rs2)>; + def: Pat<(XLenVecI8VT (smax GPR:$rs1, GPR:$rs2)), (PMAX_B GPR:$rs1, GPR:$rs2)>; + def: Pat<(XLenVecI8VT (umax GPR:$rs1, GPR:$rs2)), (PMAXU_B GPR:$rs1, GPR:$rs2)>; + def: Pat<(XLenVecI16VT (smax GPR:$rs1, GPR:$rs2)), (PMAX_H GPR:$rs1, GPR:$rs2)>; + def: Pat<(XLenVecI16VT (umax GPR:$rs1, GPR:$rs2)), (PMAXU_H GPR:$rs1, GPR:$rs2)>; } // Predicates = [HasStdExtP] let Predicates = [HasStdExtP, IsRV32] in { @@ -1743,6 +1753,12 @@ let Predicates = [HasStdExtP, IsRV64] in { def: Pat<(v2i32 (setcc (v2i32 GPR:$rs2), (v2i32 GPR:$rs1), SETUGT)), (PMSLTU_W GPR:$rs1, GPR:$rs2)>; + // 32-bit [s|u]min/[s|u]max patterns + def: Pat<(v2i32 (smin GPR:$rs1, GPR:$rs2)), (PMIN_W GPR:$rs1, GPR:$rs2)>; + def: Pat<(v2i32 (umin GPR:$rs1, GPR:$rs2)), (PMINU_W GPR:$rs1, GPR:$rs2)>; + def: Pat<(v2i32 (smax GPR:$rs1, GPR:$rs2)), (PMAX_W GPR:$rs1, GPR:$rs2)>; + def: Pat<(v2i32 (umax GPR:$rs1, GPR:$rs2)), (PMAXU_W GPR:$rs1, GPR:$rs2)>; + // 32-bit logical shift left/right patterns def: Pat<(v2i32 (shl GPR:$rs1, (v2i32 (splat_vector uimm5:$shamt)))), (PSLLI_W GPR:$rs1, uimm5:$shamt)>; diff --git a/llvm/test/CodeGen/RISCV/rvp-ext-rv32.ll b/llvm/test/CodeGen/RISCV/rvp-ext-rv32.ll index 1222617825e7..99b551d1a93f 100644 --- a/llvm/test/CodeGen/RISCV/rvp-ext-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvp-ext-rv32.ll @@ -2192,3 +2192,124 @@ define void @test_uge_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { store <4 x i8> %res, ptr %ret_ptr ret void } + +; Test 8/16-bit [s|u]min/[s|u]max +define void @test_smin_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { +; CHECK-LABEL: test_smin_h: +; CHECK: # %bb.0: +; CHECK-NEXT: lw a1, 0(a1) +; CHECK-NEXT: lw a2, 0(a2) +; CHECK-NEXT: pmin.h a1, a1, a2 +; CHECK-NEXT: sw a1, 0(a0) +; CHECK-NEXT: ret + %a = load <2 x i16>, ptr %a_ptr + %b = load <2 x i16>, ptr %b_ptr + %min = call <2 x i16> @llvm.smin.v2i16(<2 x i16> %a, <2 x i16> %b) + store <2 x i16> %min, ptr %ret_ptr + ret void +} + +define void @test_umin_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { +; CHECK-LABEL: test_umin_h: +; CHECK: # %bb.0: +; CHECK-NEXT: lw a1, 0(a1) +; CHECK-NEXT: lw a2, 0(a2) +; CHECK-NEXT: pminu.h a1, a1, a2 +; CHECK-NEXT: sw a1, 0(a0) +; CHECK-NEXT: ret + %a = load <2 x i16>, ptr %a_ptr + %b = load <2 x i16>, ptr %b_ptr + %min = call <2 x i16> @llvm.umin.v2i16(<2 x i16> %a, <2 x i16> %b) + store <2 x i16> %min, ptr %ret_ptr + ret void +} + +define void @test_smin_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { +; CHECK-LABEL: test_smin_b: +; CHECK: # %bb.0: +; CHECK-NEXT: lw a1, 0(a1) +; CHECK-NEXT: lw a2, 0(a2) +; CHECK-NEXT: pmin.b a1, a1, a2 +; CHECK-NEXT: sw a1, 0(a0) +; CHECK-NEXT: ret + %a = load <4 x i8>, ptr %a_ptr + %b = load <4 x i8>, ptr %b_ptr + %min = call <4 x i8> @llvm.smin.v4i8(<4 x i8> %a, <4 x i8> %b) + store <4 x i8> %min, ptr %ret_ptr + ret void +} + +define void @test_umin_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { +; CHECK-LABEL: test_umin_b: +; CHECK: # %bb.0: +; CHECK-NEXT: lw a1, 0(a1) +; CHECK-NEXT: lw a2, 0(a2) +; CHECK-NEXT: pminu.b a1, a1, a2 +; CHECK-NEXT: sw a1, 0(a0) +; CHECK-NEXT: ret + %a = load <4 x i8>, ptr %a_ptr + %b = load <4 x i8>, ptr %b_ptr + %min = call <4 x i8> @llvm.umin.v4i8(<4 x i8> %a, <4 x i8> %b) + store <4 x i8> %min, ptr %ret_ptr + ret void +} + +define void @test_smax_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { +; CHECK-LABEL: test_smax_h: +; CHECK: # %bb.0: +; CHECK-NEXT: lw a1, 0(a1) +; CHECK-NEXT: lw a2, 0(a2) +; CHECK-NEXT: pmax.h a1, a1, a2 +; CHECK-NEXT: sw a1, 0(a0) +; CHECK-NEXT: ret + %a = load <2 x i16>, ptr %a_ptr + %b = load <2 x i16>, ptr %b_ptr + %max = call <2 x i16> @llvm.smax.v2i16(<2 x i16> %a, <2 x i16> %b) + store <2 x i16> %max, ptr %ret_ptr + ret void +} + +define void @test_umax_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { +; CHECK-LABEL: test_umax_h: +; CHECK: # %bb.0: +; CHECK-NEXT: lw a1, 0(a1) +; CHECK-NEXT: lw a2, 0(a2) +; CHECK-NEXT: pmaxu.h a1, a1, a2 +; CHECK-NEXT: sw a1, 0(a0) +; CHECK-NEXT: ret + %a = load <2 x i16>, ptr %a_ptr + %b = load <2 x i16>, ptr %b_ptr + %max = call <2 x i16> @llvm.umax.v2i16(<2 x i16> %a, <2 x i16> %b) + store <2 x i16> %max, ptr %ret_ptr + ret void +} + +define void @test_smax_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { +; CHECK-LABEL: test_smax_b: +; CHECK: # %bb.0: +; CHECK-NEXT: lw a1, 0(a1) +; CHECK-NEXT: lw a2, 0(a2) +; CHECK-NEXT: pmax.b a1, a1, a2 +; CHECK-NEXT: sw a1, 0(a0) +; CHECK-NEXT: ret + %a = load <4 x i8>, ptr %a_ptr + %b = load <4 x i8>, ptr %b_ptr + %max = call <4 x i8> @llvm.smax.v4i8(<4 x i8> %a, <4 x i8> %b) + store <4 x i8> %max, ptr %ret_ptr + ret void +} + +define void @test_umax_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { +; CHECK-LABEL: test_umax_b: +; CHECK: # %bb.0: +; CHECK-NEXT: lw a1, 0(a1) +; CHECK-NEXT: lw a2, 0(a2) +; CHECK-NEXT: pmaxu.b a1, a1, a2 +; CHECK-NEXT: sw a1, 0(a0) +; CHECK-NEXT: ret + %a = load <4 x i8>, ptr %a_ptr + %b = load <4 x i8>, ptr %b_ptr + %max = call <4 x i8> @llvm.umax.v4i8(<4 x i8> %a, <4 x i8> %b) + store <4 x i8> %max, ptr %ret_ptr + ret void +} diff --git a/llvm/test/CodeGen/RISCV/rvp-ext-rv64.ll b/llvm/test/CodeGen/RISCV/rvp-ext-rv64.ll index df6db52a4d71..93ca54c5ddb1 100644 --- a/llvm/test/CodeGen/RISCV/rvp-ext-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvp-ext-rv64.ll @@ -2615,3 +2615,184 @@ define void @test_uge_w(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { store <2 x i32> %res, ptr %ret_ptr ret void } + +; Test 8/16/32-bit [s|u]min/[s|u]max +define void @test_smin_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { +; CHECK-LABEL: test_smin_h: +; CHECK: # %bb.0: +; CHECK-NEXT: ld a1, 0(a1) +; CHECK-NEXT: ld a2, 0(a2) +; CHECK-NEXT: pmin.h a1, a1, a2 +; CHECK-NEXT: sd a1, 0(a0) +; CHECK-NEXT: ret + %a = load <4 x i16>, ptr %a_ptr + %b = load <4 x i16>, ptr %b_ptr + %min = call <4 x i16> @llvm.smin.v2i16(<4 x i16> %a, <4 x i16> %b) + store <4 x i16> %min, ptr %ret_ptr + ret void +} + +define void @test_umin_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { +; CHECK-LABEL: test_umin_h: +; CHECK: # %bb.0: +; CHECK-NEXT: ld a1, 0(a1) +; CHECK-NEXT: ld a2, 0(a2) +; CHECK-NEXT: pminu.h a1, a1, a2 +; CHECK-NEXT: sd a1, 0(a0) +; CHECK-NEXT: ret + %a = load <4 x i16>, ptr %a_ptr + %b = load <4 x i16>, ptr %b_ptr + %min = call <4 x i16> @llvm.umin.v2i16(<4 x i16> %a, <4 x i16> %b) + store <4 x i16> %min, ptr %ret_ptr + ret void +} + +define void @test_smin_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { +; CHECK-LABEL: test_smin_b: +; CHECK: # %bb.0: +; CHECK-NEXT: ld a1, 0(a1) +; CHECK-NEXT: ld a2, 0(a2) +; CHECK-NEXT: pmin.b a1, a1, a2 +; CHECK-NEXT: sd a1, 0(a0) +; CHECK-NEXT: ret + %a = load <8 x i8>, ptr %a_ptr + %b = load <8 x i8>, ptr %b_ptr + %min = call <8 x i8> @llvm.smin.v4i8(<8 x i8> %a, <8 x i8> %b) + store <8 x i8> %min, ptr %ret_ptr + ret void +} + +define void @test_umin_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { +; CHECK-LABEL: test_umin_b: +; CHECK: # %bb.0: +; CHECK-NEXT: ld a1, 0(a1) +; CHECK-NEXT: ld a2, 0(a2) +; CHECK-NEXT: pminu.b a1, a1, a2 +; CHECK-NEXT: sd a1, 0(a0) +; CHECK-NEXT: ret + %a = load <8 x i8>, ptr %a_ptr + %b = load <8 x i8>, ptr %b_ptr + %min = call <8 x i8> @llvm.umin.v4i8(<8 x i8> %a, <8 x i8> %b) + store <8 x i8> %min, ptr %ret_ptr + ret void +} + +define void @test_smin_w(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { +; CHECK-LABEL: test_smin_w: +; CHECK: # %bb.0: +; CHECK-NEXT: ld a1, 0(a1) +; CHECK-NEXT: ld a2, 0(a2) +; CHECK-NEXT: pmin.w a1, a1, a2 +; CHECK-NEXT: sd a1, 0(a0) +; CHECK-NEXT: ret + %a = load <2 x i32>, ptr %a_ptr + %b = load <2 x i32>, ptr %b_ptr + %min = call <2 x i32> @llvm.smin.v2i32(<2 x i32> %a, <2 x i32> %b) + store <2 x i32> %min, ptr %ret_ptr + ret void +} + +define void @test_umin_w(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { +; CHECK-LABEL: test_umin_w: +; CHECK: # %bb.0: +; CHECK-NEXT: ld a1, 0(a1) +; CHECK-NEXT: ld a2, 0(a2) +; CHECK-NEXT: pminu.w a1, a1, a2 +; CHECK-NEXT: sd a1, 0(a0) +; CHECK-NEXT: ret + %a = load <2 x i32>, ptr %a_ptr + %b = load <2 x i32>, ptr %b_ptr + %min = call <2 x i32> @llvm.umin.v2i32(<2 x i32> %a, <2 x i32> %b) + store <2 x i32> %min, ptr %ret_ptr + ret void +} + +define void @test_smax_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { +; CHECK-LABEL: test_smax_h: +; CHECK: # %bb.0: +; CHECK-NEXT: ld a1, 0(a1) +; CHECK-NEXT: ld a2, 0(a2) +; CHECK-NEXT: pmax.h a1, a1, a2 +; CHECK-NEXT: sd a1, 0(a0) +; CHECK-NEXT: ret + %a = load <4 x i16>, ptr %a_ptr + %b = load <4 x i16>, ptr %b_ptr + %max = call <4 x i16> @llvm.smax.v2i16(<4 x i16> %a, <4 x i16> %b) + store <4 x i16> %max, ptr %ret_ptr + ret void +} + +define void @test_umax_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { +; CHECK-LABEL: test_umax_h: +; CHECK: # %bb.0: +; CHECK-NEXT: ld a1, 0(a1) +; CHECK-NEXT: ld a2, 0(a2) +; CHECK-NEXT: pmaxu.h a1, a1, a2 +; CHECK-NEXT: sd a1, 0(a0) +; CHECK-NEXT: ret + %a = load <4 x i16>, ptr %a_ptr + %b = load <4 x i16>, ptr %b_ptr + %max = call <4 x i16> @llvm.umax.v2i16(<4 x i16> %a, <4 x i16> %b) + store <4 x i16> %max, ptr %ret_ptr + ret void +} + +define void @test_smax_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { +; CHECK-LABEL: test_smax_b: +; CHECK: # %bb.0: +; CHECK-NEXT: ld a1, 0(a1) +; CHECK-NEXT: ld a2, 0(a2) +; CHECK-NEXT: pmax.b a1, a1, a2 +; CHECK-NEXT: sd a1, 0(a0) +; CHECK-NEXT: ret + %a = load <8 x i8>, ptr %a_ptr + %b = load <8 x i8>, ptr %b_ptr + %max = call <8 x i8> @llvm.smax.v4i8(<8 x i8> %a, <8 x i8> %b) + store <8 x i8> %max, ptr %ret_ptr + ret void +} + +define void @test_umax_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { +; CHECK-LABEL: test_umax_b: +; CHECK: # %bb.0: +; CHECK-NEXT: ld a1, 0(a1) +; CHECK-NEXT: ld a2, 0(a2) +; CHECK-NEXT: pmaxu.b a1, a1, a2 +; CHECK-NEXT: sd a1, 0(a0) +; CHECK-NEXT: ret + %a = load <8 x i8>, ptr %a_ptr + %b = load <8 x i8>, ptr %b_ptr + %max = call <8 x i8> @llvm.umax.v4i8(<8 x i8> %a, <8 x i8> %b) + store <8 x i8> %max, ptr %ret_ptr + ret void +} + +define void @test_smax_w(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { +; CHECK-LABEL: test_smax_w: +; CHECK: # %bb.0: +; CHECK-NEXT: ld a1, 0(a1) +; CHECK-NEXT: ld a2, 0(a2) +; CHECK-NEXT: pmax.w a1, a1, a2 +; CHECK-NEXT: sd a1, 0(a0) +; CHECK-NEXT: ret + %a = load <2 x i32>, ptr %a_ptr + %b = load <2 x i32>, ptr %b_ptr + %max = call <2 x i32> @llvm.smax.v2i32(<2 x i32> %a, <2 x i32> %b) + store <2 x i32> %max, ptr %ret_ptr + ret void +} + +define void @test_umax_w(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { +; CHECK-LABEL: test_umax_w: +; CHECK: # %bb.0: +; CHECK-NEXT: ld a1, 0(a1) +; CHECK-NEXT: ld a2, 0(a2) +; CHECK-NEXT: pmaxu.w a1, a1, a2 +; CHECK-NEXT: sd a1, 0(a0) +; CHECK-NEXT: ret + %a = load <2 x i32>, ptr %a_ptr + %b = load <2 x i32>, ptr %b_ptr + %max = call <2 x i32> @llvm.umax.v2i32(<2 x i32> %a, <2 x i32> %b) + store <2 x i32> %max, ptr %ret_ptr + ret void +}