[SystemZ] Reapply memcmp and memcpy patches.
This reverts 3562076 and includes some refactoring as well. Review: Ulrich Weigand Differential Revision: https://reviews.llvm.org/D111733
This commit is contained in:
parent
00baad35b2
commit
a33e4c8ae9
@ -7867,9 +7867,10 @@ MachineBasicBlock *SystemZTargetLowering::emitMemMemWrapper(
|
||||
|
||||
// When generating more than one CLC, all but the last will need to
|
||||
// branch to the end when a difference is found.
|
||||
MachineBasicBlock *EndMBB = (ImmLength > 256 && Opcode == SystemZ::CLC
|
||||
? SystemZ::splitBlockAfter(MI, MBB)
|
||||
: nullptr);
|
||||
MachineBasicBlock *EndMBB =
|
||||
(Opcode == SystemZ::CLC && (ImmLength > 256 || NeedsLoop)
|
||||
? SystemZ::splitBlockAfter(MI, MBB)
|
||||
: nullptr);
|
||||
|
||||
if (NeedsLoop) {
|
||||
Register StartCountReg =
|
||||
@ -7920,8 +7921,8 @@ MachineBasicBlock *SystemZTargetLowering::emitMemMemWrapper(
|
||||
AllDoneMBB = SystemZ::splitBlockBefore(MI, MBB);
|
||||
StartMBB = SystemZ::emitBlockAfter(MBB);
|
||||
LoopMBB = SystemZ::emitBlockAfter(StartMBB);
|
||||
NextMBB = LoopMBB;
|
||||
DoneMBB = SystemZ::emitBlockAfter(LoopMBB);
|
||||
NextMBB = (EndMBB ? SystemZ::emitBlockAfter(LoopMBB) : LoopMBB);
|
||||
DoneMBB = SystemZ::emitBlockAfter(NextMBB);
|
||||
|
||||
// MBB:
|
||||
// # Jump to AllDoneMBB if LenMinus1Reg is -1, or fall thru to StartMBB.
|
||||
@ -8039,18 +8040,23 @@ MachineBasicBlock *SystemZTargetLowering::emitMemMemWrapper(
|
||||
: MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
|
||||
BuildMI(MBB, DL, TII->get(SystemZ::PHI), RemDestReg)
|
||||
.addReg(StartDestReg).addMBB(StartMBB)
|
||||
.addReg(NextDestReg).addMBB(LoopMBB);
|
||||
.addReg(NextDestReg).addMBB(NextMBB);
|
||||
if (!HaveSingleBase)
|
||||
BuildMI(MBB, DL, TII->get(SystemZ::PHI), RemSrcReg)
|
||||
.addReg(StartSrcReg).addMBB(StartMBB)
|
||||
.addReg(NextSrcReg).addMBB(LoopMBB);
|
||||
BuildMI(MBB, DL, TII->get(SystemZ::EXRL_Pseudo))
|
||||
.addImm(Opcode)
|
||||
.addReg(LenMinus1Reg)
|
||||
.addReg(RemDestReg).addImm(DestDisp)
|
||||
.addReg(RemSrcReg).addImm(SrcDisp);
|
||||
.addReg(NextSrcReg).addMBB(NextMBB);
|
||||
MachineInstrBuilder EXRL_MIB =
|
||||
BuildMI(MBB, DL, TII->get(SystemZ::EXRL_Pseudo))
|
||||
.addImm(Opcode)
|
||||
.addReg(LenMinus1Reg)
|
||||
.addReg(RemDestReg).addImm(DestDisp)
|
||||
.addReg(RemSrcReg).addImm(SrcDisp);
|
||||
MBB->addSuccessor(AllDoneMBB);
|
||||
MBB = AllDoneMBB;
|
||||
if (EndMBB) {
|
||||
EXRL_MIB.addReg(SystemZ::CC, RegState::ImplicitDefine);
|
||||
MBB->addLiveIn(SystemZ::CC);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -8569,6 +8575,7 @@ MachineBasicBlock *SystemZTargetLowering::EmitInstrWithCustomInserter(
|
||||
case SystemZ::ATOMIC_CMP_SWAPW:
|
||||
return emitAtomicCmpSwapW(MI, MBB);
|
||||
case SystemZ::MVCImm:
|
||||
case SystemZ::MVCReg:
|
||||
return emitMemMemWrapper(MI, MBB, SystemZ::MVC);
|
||||
case SystemZ::NCImm:
|
||||
return emitMemMemWrapper(MI, MBB, SystemZ::NC);
|
||||
@ -8578,6 +8585,7 @@ MachineBasicBlock *SystemZTargetLowering::EmitInstrWithCustomInserter(
|
||||
case SystemZ::XCReg:
|
||||
return emitMemMemWrapper(MI, MBB, SystemZ::XC);
|
||||
case SystemZ::CLCImm:
|
||||
case SystemZ::CLCReg:
|
||||
return emitMemMemWrapper(MI, MBB, SystemZ::CLC);
|
||||
case SystemZ::CLSTLoop:
|
||||
return emitStringWrapper(MI, MBB, SystemZ::CLST);
|
||||
|
||||
@ -5356,6 +5356,10 @@ multiclass CompareMemorySS<string mnemonic, bits<8> opcode,
|
||||
imm64:$length),
|
||||
[(set CC, (memop bdaddr12only:$dest, bdaddr12only:$src,
|
||||
imm64:$length))]>;
|
||||
def Reg : Pseudo<(outs), (ins bdaddr12only:$dest, bdaddr12only:$src,
|
||||
ADDR64:$length),
|
||||
[(set CC, (memop bdaddr12only:$dest, bdaddr12only:$src,
|
||||
ADDR64:$length))]>;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -17,6 +17,11 @@ using namespace llvm;
|
||||
|
||||
#define DEBUG_TYPE "systemz-selectiondag-info"
|
||||
|
||||
static SDVTList getMemMemVTs(unsigned Op, SelectionDAG &DAG) {
|
||||
return Op == SystemZISD::CLC ? DAG.getVTList(MVT::i32, MVT::Other)
|
||||
: DAG.getVTList(MVT::Other);
|
||||
}
|
||||
|
||||
// Emit a mem-mem operation after subtracting one from size, which will be
|
||||
// added back during pseudo expansion. As the Reg case emitted here may be
|
||||
// converted by DAGCombiner into having an Imm length, they are both emitted
|
||||
@ -24,7 +29,7 @@ using namespace llvm;
|
||||
static SDValue emitMemMemImm(SelectionDAG &DAG, const SDLoc &DL, unsigned Op,
|
||||
SDValue Chain, SDValue Dst, SDValue Src,
|
||||
uint64_t Size) {
|
||||
return DAG.getNode(Op, DL, MVT::Other, Chain, Dst, Src,
|
||||
return DAG.getNode(Op, DL, getMemMemVTs(Op, DAG), Chain, Dst, Src,
|
||||
DAG.getConstant(Size - 1, DL, Src.getValueType()));
|
||||
}
|
||||
|
||||
@ -34,17 +39,7 @@ static SDValue emitMemMemReg(SelectionDAG &DAG, const SDLoc &DL, unsigned Op,
|
||||
SDValue LenMinus1 = DAG.getNode(ISD::ADD, DL, MVT::i64,
|
||||
DAG.getZExtOrTrunc(Size, DL, MVT::i64),
|
||||
DAG.getConstant(-1, DL, MVT::i64));
|
||||
return DAG.getNode(Op, DL, MVT::Other, Chain, Dst, Src, LenMinus1);
|
||||
}
|
||||
|
||||
// Use CLC to compare [Src1, Src1 + Size) with [Src2, Src2 + Size).
|
||||
// One is subtracted from size also here, per above.
|
||||
static SDValue emitCLC(SelectionDAG &DAG, const SDLoc &DL, SDValue Chain,
|
||||
SDValue Src1, SDValue Src2, uint64_t Size) {
|
||||
SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Other);
|
||||
EVT PtrVT = Src1.getValueType();
|
||||
return DAG.getNode(SystemZISD::CLC, DL, VTs, Chain, Src1, Src2,
|
||||
DAG.getConstant(Size - 1, DL, PtrVT));
|
||||
return DAG.getNode(Op, DL, getMemMemVTs(Op, DAG), Chain, Dst, Src, LenMinus1);
|
||||
}
|
||||
|
||||
SDValue SystemZSelectionDAGInfo::EmitTargetCodeForMemcpy(
|
||||
@ -57,7 +52,8 @@ SDValue SystemZSelectionDAGInfo::EmitTargetCodeForMemcpy(
|
||||
if (auto *CSize = dyn_cast<ConstantSDNode>(Size))
|
||||
return emitMemMemImm(DAG, DL, SystemZISD::MVC, Chain, Dst, Src,
|
||||
CSize->getZExtValue());
|
||||
return SDValue();
|
||||
|
||||
return emitMemMemReg(DAG, DL, SystemZISD::MVC, Chain, Dst, Src, Size);
|
||||
}
|
||||
|
||||
// Handle a memset of 1, 2, 4 or 8 bytes with the operands given by
|
||||
@ -166,15 +162,16 @@ std::pair<SDValue, SDValue> SystemZSelectionDAGInfo::EmitTargetCodeForMemcmp(
|
||||
SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Src1,
|
||||
SDValue Src2, SDValue Size, MachinePointerInfo Op1PtrInfo,
|
||||
MachinePointerInfo Op2PtrInfo) const {
|
||||
SDValue CCReg;
|
||||
// Swap operands to invert CC == 1 vs. CC == 2 cases.
|
||||
if (auto *CSize = dyn_cast<ConstantSDNode>(Size)) {
|
||||
uint64_t Bytes = CSize->getZExtValue();
|
||||
assert(Bytes > 0 && "Caller should have handled 0-size case");
|
||||
// Swap operands to invert CC == 1 vs. CC == 2 cases.
|
||||
SDValue CCReg = emitCLC(DAG, DL, Chain, Src2, Src1, Bytes);
|
||||
Chain = CCReg.getValue(1);
|
||||
return std::make_pair(addIPMSequence(DL, CCReg, DAG), Chain);
|
||||
}
|
||||
return std::make_pair(SDValue(), SDValue());
|
||||
CCReg = emitMemMemImm(DAG, DL, SystemZISD::CLC, Chain, Src2, Src1, Bytes);
|
||||
} else
|
||||
CCReg = emitMemMemReg(DAG, DL, SystemZISD::CLC, Chain, Src2, Src1, Size);
|
||||
Chain = CCReg.getValue(1);
|
||||
return std::make_pair(addIPMSequence(DL, CCReg, DAG), Chain);
|
||||
}
|
||||
|
||||
std::pair<SDValue, SDValue> SystemZSelectionDAGInfo::EmitTargetCodeForMemchr(
|
||||
|
||||
@ -19,7 +19,7 @@ declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture r
|
||||
|
||||
define void @fun0(%0*) {
|
||||
; CHECK-LABEL: .LBB0_4
|
||||
; CHECK: => This Inner Loop Header: Depth=2
|
||||
; CHECK: => This Inner Loop Header
|
||||
; CHECK-NOT: 16-byte Folded Spill
|
||||
; CHECK-NOT: 16-byte Folded Reload
|
||||
|
||||
|
||||
@ -219,3 +219,30 @@ define i32 @f13(i8 *%src1, i8 *%src2) {
|
||||
%res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 769)
|
||||
ret i32 %res
|
||||
}
|
||||
|
||||
define i32 @f14(i8 *%src1, i8 *%src2, i64 %Len) {
|
||||
; CHECK-LABEL: f14:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: aghi %r4, -1
|
||||
; CHECK-NEXT: cghi %r4, -1
|
||||
; CHECK-NEXT: je .LBB13_5
|
||||
; CHECK-NEXT: # %bb.1:
|
||||
; CHECK-NEXT: srlg %r0, %r4, 8
|
||||
; CHECK-NEXT: cgije %r0, 0, .LBB13_4
|
||||
; CHECK-NEXT: .LBB13_2: # =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: clc 0(256,%r3), 0(%r2)
|
||||
; CHECK-NEXT: jlh .LBB13_5
|
||||
; CHECK-NEXT: # %bb.3: # in Loop: Header=BB13_2 Depth=1
|
||||
; CHECK-NEXT: la %r3, 256(%r3)
|
||||
; CHECK-NEXT: la %r2, 256(%r2)
|
||||
; CHECK-NEXT: brctg %r0, .LBB13_2
|
||||
; CHECK-NEXT: .LBB13_4:
|
||||
; CHECK-NEXT: exrl %r4, .Ltmp0
|
||||
; CHECK-NEXT: .LBB13_5:
|
||||
; CHECK-NEXT: ipm %r2
|
||||
; CHECK-NEXT: sll %r2, 2
|
||||
; CHECK-NEXT: sra %r2, 30
|
||||
; CHECK-NEXT: br %r14
|
||||
%res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 %Len)
|
||||
ret i32 %res
|
||||
}
|
||||
|
||||
@ -217,3 +217,28 @@ define void @f16() {
|
||||
call void @foo(i8* %dest, i8* %src)
|
||||
ret void
|
||||
}
|
||||
|
||||
; Test a variable length loop.
|
||||
define void @f17(i8* %dest, i8* %src, i64 %Len) {
|
||||
; CHECK-LABEL: f17:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: aghi %r4, -1
|
||||
; CHECK-NEXT: cgibe %r4, -1, 0(%r14)
|
||||
; CHECK-NEXT: .LBB16_1:
|
||||
; CHECK-NEXT: srlg %r0, %r4, 8
|
||||
; CHECK-NEXT: cgije %r0, 0, .LBB16_3
|
||||
; CHECK-NEXT: .LBB16_2: # =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: pfd 2, 768(%r2)
|
||||
; CHECK-NEXT: mvc 0(256,%r2), 0(%r3)
|
||||
; CHECK-NEXT: la %r2, 256(%r2)
|
||||
; CHECK-NEXT: la %r3, 256(%r3)
|
||||
; CHECK-NEXT: brctg %r0, .LBB16_2
|
||||
; CHECK-NEXT: .LBB16_3:
|
||||
; CHECK-NEXT: exrl %r4, .Ltmp0
|
||||
; CHECK-NEXT: br %r14
|
||||
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dest, i8* %src, i64 %Len, i1 false)
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: .Ltmp0:
|
||||
; CHECK-NEXT: mvc 0(1,%r2), 0(%r3)
|
||||
|
||||
@ -1,13 +1,5 @@
|
||||
; RUN: llc -mtriple=s390x-linux-gnu < %s | FileCheck %s
|
||||
|
||||
; CHECK-LABEL: tail_memcpy:
|
||||
; CHECK: jg memcpy
|
||||
define void @tail_memcpy(i8* nocapture %p, i8* nocapture readonly %q, i32 %n) #0 {
|
||||
entry:
|
||||
tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %p, i8* %q, i32 %n, i1 false)
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: tail_memmove:
|
||||
; CHECK: jg memmove
|
||||
define void @tail_memmove(i8* nocapture %p, i8* nocapture readonly %q, i32 %n) #0 {
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user