[PowerPC] using milicode call for memccpy instead of lib call (#182563)

AIX has "millicode" routines, which are functions loaded at boot time
into fixed addresses in kernel memory. This allows them to be customized
for the processor. The __memccpy routine is a millicode implementation;
we use millicode for the memccpy function instead of a library call to
improve performance

---------

Co-authored-by: Matt Arsenault <arsenm2@gmail.com>
This commit is contained in:
zhijian lin 2026-02-26 13:09:22 -05:00 committed by GitHub
parent 3839878d20
commit da851db4bb
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
11 changed files with 77 additions and 5 deletions

View File

@ -416,9 +416,9 @@ public:
case LibFunc_sqrtl_finite:
case LibFunc_strcpy: case LibFunc_stpcpy: case LibFunc_strlen:
case LibFunc_strnlen: case LibFunc_strstr: case LibFunc_memchr:
case LibFunc_mempcpy: case LibFunc_tan: case LibFunc_tanf:
case LibFunc_tanl: case LibFunc_tanh: case LibFunc_tanhf:
case LibFunc_tanhl:
case LibFunc_memccpy: case LibFunc_mempcpy: case LibFunc_tan:
case LibFunc_tanf: case LibFunc_tanl: case LibFunc_tanh:
case LibFunc_tanhf: case LibFunc_tanhl:
// clang-format on
return true;
}

View File

@ -1292,6 +1292,12 @@ public:
/// stack arguments from being clobbered.
LLVM_ABI SDValue getStackArgumentTokenFactor(SDValue Chain);
/// Lower a memccpy operation into a target library call and return the
/// resulting chain and call result as SelectionDAG SDValues.
LLVM_ABI std::pair<SDValue, SDValue>
getMemccpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src,
SDValue C, SDValue Size, const CallInst *CI);
/// Lower a memcmp operation into a target library call and return the
/// resulting chain and call result as SelectionDAG SDValues.
LLVM_ABI std::pair<SDValue, SDValue> getMemcmp(SDValue Chain, const SDLoc &dl,

View File

@ -122,6 +122,17 @@ public:
return std::make_pair(SDValue(), SDValue());
}
/// Emit target-specific code that performs a memccpy, in cases where that is
/// faster than a libcall. The first returned SDValue is the result of the
/// memccpy and the second is the chain. Both SDValues can be null if a normal
/// libcall should be used.
virtual std::pair<SDValue, SDValue>
EmitTargetCodeForMemccpy(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain,
SDValue Dst, SDValue Src, SDValue C, SDValue Size,
const CallInst *CI) const {
return std::make_pair(SDValue(), SDValue());
}
/// Emit target-specific code that performs a memcmp/bcmp, in cases where that is
/// faster than a libcall. The first returned SDValue is the result of the
/// memcmp and the second is the chain. Both SDValues can be null if a normal

View File

@ -3161,6 +3161,7 @@ defset list<RuntimeLibcallImpl> PPCRuntimeLibcalls = {
}
defset list<RuntimeLibcallImpl> PPC64AIXCallList = {
def ___memccpy64 : RuntimeLibcallImpl<MEMCCPY>;
def ___memcmp64 : RuntimeLibcallImpl<MEMCMP>;
def ___memmove64 : RuntimeLibcallImpl<MEMMOVE>;
def ___memset64 : RuntimeLibcallImpl<MEMSET>;
@ -3172,6 +3173,7 @@ defset list<RuntimeLibcallImpl> PPC64AIXCallList = {
}
defset list<RuntimeLibcallImpl> PPC32AIXCallList = {
def ___memccpy : RuntimeLibcallImpl<MEMCCPY>;
def ___memcmp : RuntimeLibcallImpl<MEMCMP>;
def ___memmove : RuntimeLibcallImpl<MEMMOVE>;
def ___memset : RuntimeLibcallImpl<MEMSET>;

View File

@ -9387,6 +9387,22 @@ std::pair<SDValue, SDValue> SelectionDAG::getStrstr(SDValue Chain,
RTLIB::STRSTR, this, TLI);
}
std::pair<SDValue, SDValue> SelectionDAG::getMemccpy(SDValue Chain,
const SDLoc &dl,
SDValue Dst, SDValue Src,
SDValue C, SDValue Size,
const CallInst *CI) {
PointerType *PT = PointerType::getUnqual(*getContext());
TargetLowering::ArgListTy Args = {
{Dst, PT},
{Src, PT},
{C, Type::getInt32Ty(*getContext())},
{Size, getDataLayout().getIntPtrType(*getContext())}};
return getRuntimeCallSDValueHelper(Chain, dl, std::move(Args), CI,
RTLIB::MEMCCPY, this, TLI);
}
std::pair<SDValue, SDValue>
SelectionDAG::getMemcmp(SDValue Chain, const SDLoc &dl, SDValue Mem0,
SDValue Mem1, SDValue Size, const CallInst *CI) {

View File

@ -9399,6 +9399,26 @@ bool SelectionDAGBuilder::visitMemChrCall(const CallInst &I) {
return false;
}
/// See if we can lower a memccpy call into an optimized form. If so, return
/// true and lower it, otherwise return false and it will be lowered like a
/// normal call.
/// The caller already checked that \p I calls the appropriate LibFunc with a
/// correct prototype.
bool SelectionDAGBuilder::visitMemCCpyCall(const CallInst &I) {
const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo();
std::pair<SDValue, SDValue> Res = TSI.EmitTargetCodeForMemccpy(
DAG, getCurSDLoc(), DAG.getRoot(), getValue(I.getArgOperand(0)),
getValue(I.getArgOperand(1)), getValue(I.getArgOperand(2)),
getValue(I.getArgOperand(3)), &I);
if (Res.first) {
processIntegerCallValue(I, Res.first, true);
PendingLoads.push_back(Res.second);
return true;
}
return false;
}
/// See if we can lower a mempcpy call into an optimized form. If so, return
/// true and lower it. Otherwise return false, and it will be lowered like a
/// normal call.
@ -9730,6 +9750,10 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
if (visitMemCmpBCmpCall(I))
return;
break;
case LibFunc_memccpy:
if (visitMemCCpyCall(I))
return;
break;
case LibFunc_mempcpy:
if (visitMemPCpyCall(I))
return;

View File

@ -627,6 +627,7 @@ private:
void visitPHI(const PHINode &I);
void visitCall(const CallInst &I);
bool visitMemCmpBCmpCall(const CallInst &I);
bool visitMemCCpyCall(const CallInst &I);
bool visitMemPCpyCall(const CallInst &I);
bool visitMemChrCall(const CallInst &I);
bool visitStrCpyCall(const CallInst &I, bool isStpcpy);

View File

@ -75,6 +75,13 @@ void PPCSelectionDAGInfo::verifyTargetNode(const SelectionDAG &DAG,
SelectionDAGGenTargetInfo::verifyTargetNode(DAG, N);
}
std::pair<SDValue, SDValue> PPCSelectionDAGInfo::EmitTargetCodeForMemccpy(
SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src,
SDValue C, SDValue Size, const CallInst *CI) const {
return DAG.getMemccpy(Chain, dl, Dst, Src, C, Size, CI);
;
}
std::pair<SDValue, SDValue> PPCSelectionDAGInfo::EmitTargetCodeForMemcmp(
SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Op1, SDValue Op2,
SDValue Op3, const CallInst *CI) const {

View File

@ -71,6 +71,10 @@ public:
void verifyTargetNode(const SelectionDAG &DAG,
const SDNode *N) const override;
std::pair<SDValue, SDValue>
EmitTargetCodeForMemccpy(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain,
SDValue Dst, SDValue Src, SDValue C, SDValue Size,
const CallInst *CI) const override;
std::pair<SDValue, SDValue>
EmitTargetCodeForMemcmp(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain,

View File

@ -312,7 +312,7 @@ define ptr @test_memccpy(ptr noalias noundef %dst, ptr noalias noundef %src, i32
; CHECK-AIX-32-P9-NEXT: mflr r0
; CHECK-AIX-32-P9-NEXT: stwu r1, -64(r1)
; CHECK-AIX-32-P9-NEXT: stw r0, 72(r1)
; CHECK-AIX-32-P9-NEXT: bl .memccpy[PR]
; CHECK-AIX-32-P9-NEXT: bl .___memccpy[PR]
; CHECK-AIX-32-P9-NEXT: nop
; CHECK-AIX-32-P9-NEXT: addi r1, r1, 64
; CHECK-AIX-32-P9-NEXT: lwz r0, 8(r1)

View File

@ -425,8 +425,9 @@ define ptr @test_memccpy(ptr noalias noundef %dst, ptr noalias noundef %src, i32
; CHECK-AIX-64-P9: # %bb.0: # %entry
; CHECK-AIX-64-P9-NEXT: mflr r0
; CHECK-AIX-64-P9-NEXT: stdu r1, -112(r1)
; CHECK-AIX-64-P9-NEXT: clrldi r5, r5, 32
; CHECK-AIX-64-P9-NEXT: std r0, 128(r1)
; CHECK-AIX-64-P9-NEXT: bl .memccpy[PR]
; CHECK-AIX-64-P9-NEXT: bl .___memccpy64[PR]
; CHECK-AIX-64-P9-NEXT: nop
; CHECK-AIX-64-P9-NEXT: addi r1, r1, 112
; CHECK-AIX-64-P9-NEXT: ld r0, 16(r1)