[PowerPC] using milicode call for memccpy instead of lib call (#182563)
AIX has "millicode" routines, which are functions loaded at boot time into fixed addresses in kernel memory. This allows them to be customized for the processor. The __memccpy routine is a millicode implementation; we use millicode for the memccpy function instead of a library call to improve performance --------- Co-authored-by: Matt Arsenault <arsenm2@gmail.com>
This commit is contained in:
parent
3839878d20
commit
da851db4bb
@ -416,9 +416,9 @@ public:
|
||||
case LibFunc_sqrtl_finite:
|
||||
case LibFunc_strcpy: case LibFunc_stpcpy: case LibFunc_strlen:
|
||||
case LibFunc_strnlen: case LibFunc_strstr: case LibFunc_memchr:
|
||||
case LibFunc_mempcpy: case LibFunc_tan: case LibFunc_tanf:
|
||||
case LibFunc_tanl: case LibFunc_tanh: case LibFunc_tanhf:
|
||||
case LibFunc_tanhl:
|
||||
case LibFunc_memccpy: case LibFunc_mempcpy: case LibFunc_tan:
|
||||
case LibFunc_tanf: case LibFunc_tanl: case LibFunc_tanh:
|
||||
case LibFunc_tanhf: case LibFunc_tanhl:
|
||||
// clang-format on
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -1292,6 +1292,12 @@ public:
|
||||
/// stack arguments from being clobbered.
|
||||
LLVM_ABI SDValue getStackArgumentTokenFactor(SDValue Chain);
|
||||
|
||||
/// Lower a memccpy operation into a target library call and return the
|
||||
/// resulting chain and call result as SelectionDAG SDValues.
|
||||
LLVM_ABI std::pair<SDValue, SDValue>
|
||||
getMemccpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src,
|
||||
SDValue C, SDValue Size, const CallInst *CI);
|
||||
|
||||
/// Lower a memcmp operation into a target library call and return the
|
||||
/// resulting chain and call result as SelectionDAG SDValues.
|
||||
LLVM_ABI std::pair<SDValue, SDValue> getMemcmp(SDValue Chain, const SDLoc &dl,
|
||||
|
||||
@ -122,6 +122,17 @@ public:
|
||||
return std::make_pair(SDValue(), SDValue());
|
||||
}
|
||||
|
||||
/// Emit target-specific code that performs a memccpy, in cases where that is
|
||||
/// faster than a libcall. The first returned SDValue is the result of the
|
||||
/// memccpy and the second is the chain. Both SDValues can be null if a normal
|
||||
/// libcall should be used.
|
||||
virtual std::pair<SDValue, SDValue>
|
||||
EmitTargetCodeForMemccpy(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain,
|
||||
SDValue Dst, SDValue Src, SDValue C, SDValue Size,
|
||||
const CallInst *CI) const {
|
||||
return std::make_pair(SDValue(), SDValue());
|
||||
}
|
||||
|
||||
/// Emit target-specific code that performs a memcmp/bcmp, in cases where that is
|
||||
/// faster than a libcall. The first returned SDValue is the result of the
|
||||
/// memcmp and the second is the chain. Both SDValues can be null if a normal
|
||||
|
||||
@ -3161,6 +3161,7 @@ defset list<RuntimeLibcallImpl> PPCRuntimeLibcalls = {
|
||||
}
|
||||
|
||||
defset list<RuntimeLibcallImpl> PPC64AIXCallList = {
|
||||
def ___memccpy64 : RuntimeLibcallImpl<MEMCCPY>;
|
||||
def ___memcmp64 : RuntimeLibcallImpl<MEMCMP>;
|
||||
def ___memmove64 : RuntimeLibcallImpl<MEMMOVE>;
|
||||
def ___memset64 : RuntimeLibcallImpl<MEMSET>;
|
||||
@ -3172,6 +3173,7 @@ defset list<RuntimeLibcallImpl> PPC64AIXCallList = {
|
||||
}
|
||||
|
||||
defset list<RuntimeLibcallImpl> PPC32AIXCallList = {
|
||||
def ___memccpy : RuntimeLibcallImpl<MEMCCPY>;
|
||||
def ___memcmp : RuntimeLibcallImpl<MEMCMP>;
|
||||
def ___memmove : RuntimeLibcallImpl<MEMMOVE>;
|
||||
def ___memset : RuntimeLibcallImpl<MEMSET>;
|
||||
|
||||
@ -9387,6 +9387,22 @@ std::pair<SDValue, SDValue> SelectionDAG::getStrstr(SDValue Chain,
|
||||
RTLIB::STRSTR, this, TLI);
|
||||
}
|
||||
|
||||
std::pair<SDValue, SDValue> SelectionDAG::getMemccpy(SDValue Chain,
|
||||
const SDLoc &dl,
|
||||
SDValue Dst, SDValue Src,
|
||||
SDValue C, SDValue Size,
|
||||
const CallInst *CI) {
|
||||
PointerType *PT = PointerType::getUnqual(*getContext());
|
||||
|
||||
TargetLowering::ArgListTy Args = {
|
||||
{Dst, PT},
|
||||
{Src, PT},
|
||||
{C, Type::getInt32Ty(*getContext())},
|
||||
{Size, getDataLayout().getIntPtrType(*getContext())}};
|
||||
return getRuntimeCallSDValueHelper(Chain, dl, std::move(Args), CI,
|
||||
RTLIB::MEMCCPY, this, TLI);
|
||||
}
|
||||
|
||||
std::pair<SDValue, SDValue>
|
||||
SelectionDAG::getMemcmp(SDValue Chain, const SDLoc &dl, SDValue Mem0,
|
||||
SDValue Mem1, SDValue Size, const CallInst *CI) {
|
||||
|
||||
@ -9399,6 +9399,26 @@ bool SelectionDAGBuilder::visitMemChrCall(const CallInst &I) {
|
||||
return false;
|
||||
}
|
||||
|
||||
/// See if we can lower a memccpy call into an optimized form. If so, return
|
||||
/// true and lower it, otherwise return false and it will be lowered like a
|
||||
/// normal call.
|
||||
/// The caller already checked that \p I calls the appropriate LibFunc with a
|
||||
/// correct prototype.
|
||||
bool SelectionDAGBuilder::visitMemCCpyCall(const CallInst &I) {
|
||||
const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo();
|
||||
std::pair<SDValue, SDValue> Res = TSI.EmitTargetCodeForMemccpy(
|
||||
DAG, getCurSDLoc(), DAG.getRoot(), getValue(I.getArgOperand(0)),
|
||||
getValue(I.getArgOperand(1)), getValue(I.getArgOperand(2)),
|
||||
getValue(I.getArgOperand(3)), &I);
|
||||
|
||||
if (Res.first) {
|
||||
processIntegerCallValue(I, Res.first, true);
|
||||
PendingLoads.push_back(Res.second);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/// See if we can lower a mempcpy call into an optimized form. If so, return
|
||||
/// true and lower it. Otherwise return false, and it will be lowered like a
|
||||
/// normal call.
|
||||
@ -9730,6 +9750,10 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
|
||||
if (visitMemCmpBCmpCall(I))
|
||||
return;
|
||||
break;
|
||||
case LibFunc_memccpy:
|
||||
if (visitMemCCpyCall(I))
|
||||
return;
|
||||
break;
|
||||
case LibFunc_mempcpy:
|
||||
if (visitMemPCpyCall(I))
|
||||
return;
|
||||
|
||||
@ -627,6 +627,7 @@ private:
|
||||
void visitPHI(const PHINode &I);
|
||||
void visitCall(const CallInst &I);
|
||||
bool visitMemCmpBCmpCall(const CallInst &I);
|
||||
bool visitMemCCpyCall(const CallInst &I);
|
||||
bool visitMemPCpyCall(const CallInst &I);
|
||||
bool visitMemChrCall(const CallInst &I);
|
||||
bool visitStrCpyCall(const CallInst &I, bool isStpcpy);
|
||||
|
||||
@ -75,6 +75,13 @@ void PPCSelectionDAGInfo::verifyTargetNode(const SelectionDAG &DAG,
|
||||
SelectionDAGGenTargetInfo::verifyTargetNode(DAG, N);
|
||||
}
|
||||
|
||||
std::pair<SDValue, SDValue> PPCSelectionDAGInfo::EmitTargetCodeForMemccpy(
|
||||
SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src,
|
||||
SDValue C, SDValue Size, const CallInst *CI) const {
|
||||
return DAG.getMemccpy(Chain, dl, Dst, Src, C, Size, CI);
|
||||
;
|
||||
}
|
||||
|
||||
std::pair<SDValue, SDValue> PPCSelectionDAGInfo::EmitTargetCodeForMemcmp(
|
||||
SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Op1, SDValue Op2,
|
||||
SDValue Op3, const CallInst *CI) const {
|
||||
|
||||
@ -71,6 +71,10 @@ public:
|
||||
|
||||
void verifyTargetNode(const SelectionDAG &DAG,
|
||||
const SDNode *N) const override;
|
||||
std::pair<SDValue, SDValue>
|
||||
EmitTargetCodeForMemccpy(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain,
|
||||
SDValue Dst, SDValue Src, SDValue C, SDValue Size,
|
||||
const CallInst *CI) const override;
|
||||
|
||||
std::pair<SDValue, SDValue>
|
||||
EmitTargetCodeForMemcmp(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain,
|
||||
|
||||
@ -312,7 +312,7 @@ define ptr @test_memccpy(ptr noalias noundef %dst, ptr noalias noundef %src, i32
|
||||
; CHECK-AIX-32-P9-NEXT: mflr r0
|
||||
; CHECK-AIX-32-P9-NEXT: stwu r1, -64(r1)
|
||||
; CHECK-AIX-32-P9-NEXT: stw r0, 72(r1)
|
||||
; CHECK-AIX-32-P9-NEXT: bl .memccpy[PR]
|
||||
; CHECK-AIX-32-P9-NEXT: bl .___memccpy[PR]
|
||||
; CHECK-AIX-32-P9-NEXT: nop
|
||||
; CHECK-AIX-32-P9-NEXT: addi r1, r1, 64
|
||||
; CHECK-AIX-32-P9-NEXT: lwz r0, 8(r1)
|
||||
|
||||
@ -425,8 +425,9 @@ define ptr @test_memccpy(ptr noalias noundef %dst, ptr noalias noundef %src, i32
|
||||
; CHECK-AIX-64-P9: # %bb.0: # %entry
|
||||
; CHECK-AIX-64-P9-NEXT: mflr r0
|
||||
; CHECK-AIX-64-P9-NEXT: stdu r1, -112(r1)
|
||||
; CHECK-AIX-64-P9-NEXT: clrldi r5, r5, 32
|
||||
; CHECK-AIX-64-P9-NEXT: std r0, 128(r1)
|
||||
; CHECK-AIX-64-P9-NEXT: bl .memccpy[PR]
|
||||
; CHECK-AIX-64-P9-NEXT: bl .___memccpy64[PR]
|
||||
; CHECK-AIX-64-P9-NEXT: nop
|
||||
; CHECK-AIX-64-P9-NEXT: addi r1, r1, 112
|
||||
; CHECK-AIX-64-P9-NEXT: ld r0, 16(r1)
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user