[PPC] Custom lower ssubo for i64 (#118711)

This is a follow-up patch to improve the codegen for ssubo node for i64
in 64-bit mode by custom lowering.
This commit is contained in:
Maryam Moghadas 2024-12-05 17:22:44 -05:00 committed by GitHub
parent 426aecb7e9
commit 68e75eebec
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 18 additions and 13 deletions

View File

@ -200,8 +200,11 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
// On P10, the default lowering generates better code using the
// setbc instruction.
if (!Subtarget.hasP10Vector())
if (!Subtarget.hasP10Vector()) {
setOperationAction(ISD::SSUBO, MVT::i32, Custom);
if (isPPC64)
setOperationAction(ISD::SSUBO, MVT::i64, Custom);
}
// Match BITREVERSE to customized fast code sequence in the td file.
setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);
@ -12051,16 +12054,19 @@ SDValue PPCTargetLowering::LowerSSUBO(SDValue Op, SelectionDAG &DAG) const {
SDLoc dl(Op);
SDValue LHS = Op.getOperand(0);
SDValue RHS = Op.getOperand(1);
EVT VT = Op.getNode()->getValueType(0);
SDValue Sub = DAG.getNode(ISD::SUB, dl, MVT::i32, LHS, RHS);
SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, LHS, RHS);
SDValue Xor1 = DAG.getNode(ISD::XOR, dl, MVT::i32, RHS, LHS);
SDValue Xor2 = DAG.getNode(ISD::XOR, dl, MVT::i32, Sub, LHS);
SDValue Xor1 = DAG.getNode(ISD::XOR, dl, VT, RHS, LHS);
SDValue Xor2 = DAG.getNode(ISD::XOR, dl, VT, Sub, LHS);
SDValue And = DAG.getNode(ISD::AND, dl, MVT::i32, Xor1, Xor2);
SDValue And = DAG.getNode(ISD::AND, dl, VT, Xor1, Xor2);
SDValue Overflow =
DAG.getNode(ISD::SRL, dl, VT, And,
DAG.getConstant(VT.getSizeInBits() - 1, dl, MVT::i32));
SDValue Overflow = DAG.getNode(ISD::SRL, dl, MVT::i32, And,
DAG.getConstant(31, dl, MVT::i32));
SDValue OverflowTrunc =
DAG.getNode(ISD::TRUNCATE, dl, Op.getNode()->getValueType(1), Overflow);

View File

@ -144,12 +144,11 @@ entry:
define i1 @test_ssubo_i64(i64 %a, i64 %b) nounwind {
; CHECK-LABEL: test_ssubo_i64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: sub 5, 3, 4
; CHECK-NEXT: cmpdi 1, 4, 0
; CHECK-NEXT: cmpd 5, 3
; CHECK-NEXT: li 3, 1
; CHECK-NEXT: creqv 20, 5, 0
; CHECK-NEXT: isel 3, 0, 3, 20
; CHECK-NEXT: xor 5, 4, 3
; CHECK-NEXT: sub 4, 3, 4
; CHECK-NEXT: xor 3, 4, 3
; CHECK-NEXT: and 3, 5, 3
; CHECK-NEXT: rldicl 3, 3, 1, 63
; CHECK-NEXT: blr
entry:
%res = call { i64, i1 } @llvm.ssub.with.overflow.i64(i64 %a, i64 %b) nounwind