Compare commits
4 Commits
main
...
users/MacD
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
ea77b25e78 | ||
|
|
d8a8c4e3e0 | ||
|
|
142af7d225 | ||
|
|
9799316c58 |
@ -1688,6 +1688,7 @@ bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
|
||||
}
|
||||
case AArch64::InOutZAUsePseudo:
|
||||
case AArch64::RequiresZASavePseudo:
|
||||
case AArch64::SMEStateAllocPseudo:
|
||||
case AArch64::COALESCER_BARRIER_FPR16:
|
||||
case AArch64::COALESCER_BARRIER_FPR32:
|
||||
case AArch64::COALESCER_BARRIER_FPR64:
|
||||
|
||||
@ -8292,7 +8292,39 @@ SDValue AArch64TargetLowering::LowerFormalArguments(
|
||||
if (Subtarget->hasCustomCallingConv())
|
||||
Subtarget->getRegisterInfo()->UpdateCustomCalleeSavedRegs(MF);
|
||||
|
||||
if (!getTM().useNewSMEABILowering() || Attrs.hasAgnosticZAInterface()) {
|
||||
if (getTM().useNewSMEABILowering()) {
|
||||
if (Subtarget->isTargetWindows() || hasInlineStackProbe(MF)) {
|
||||
SDValue Size;
|
||||
if (Attrs.hasZAState()) {
|
||||
SDValue SVL = DAG.getNode(AArch64ISD::RDSVL, DL, MVT::i64,
|
||||
DAG.getConstant(1, DL, MVT::i32));
|
||||
Size = DAG.getNode(ISD::MUL, DL, MVT::i64, SVL, SVL);
|
||||
} else if (Attrs.hasAgnosticZAInterface()) {
|
||||
RTLIB::Libcall LC = RTLIB::SMEABI_SME_STATE_SIZE;
|
||||
SDValue Callee = DAG.getExternalSymbol(
|
||||
getLibcallName(LC), getPointerTy(DAG.getDataLayout()));
|
||||
auto *RetTy = EVT(MVT::i64).getTypeForEVT(*DAG.getContext());
|
||||
TargetLowering::CallLoweringInfo CLI(DAG);
|
||||
CLI.setDebugLoc(DL).setChain(Chain).setLibCallee(
|
||||
getLibcallCallingConv(LC), RetTy, Callee, {});
|
||||
std::tie(Size, Chain) = LowerCallTo(CLI);
|
||||
}
|
||||
if (Size) {
|
||||
SDValue Buffer = DAG.getNode(
|
||||
ISD::DYNAMIC_STACKALLOC, DL, DAG.getVTList(MVT::i64, MVT::Other),
|
||||
{Chain, Size, DAG.getConstant(1, DL, MVT::i64)});
|
||||
Chain = Buffer.getValue(1);
|
||||
|
||||
Register BufferPtr =
|
||||
MF.getRegInfo().createVirtualRegister(&AArch64::GPR64RegClass);
|
||||
Chain = DAG.getCopyToReg(Chain, DL, BufferPtr, Buffer);
|
||||
Chain = DAG.getNode(AArch64ISD::SME_STATE_ALLOC, DL,
|
||||
DAG.getVTList(MVT::Other), Chain);
|
||||
FuncInfo->setEarlyAllocSMESaveBuffer(BufferPtr);
|
||||
MFI.CreateVariableSizedObject(Align(16), nullptr);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Old SME ABI lowering (deprecated):
|
||||
// Create a 16 Byte TPIDR2 object. The dynamic buffer
|
||||
// will be expanded and stored in the static object later using a
|
||||
@ -9081,9 +9113,13 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
|
||||
bool UseNewSMEABILowering = getTM().useNewSMEABILowering();
|
||||
bool IsAgnosticZAFunction = CallAttrs.caller().hasAgnosticZAInterface();
|
||||
auto ZAMarkerNode = [&]() -> std::optional<unsigned> {
|
||||
// TODO: Handle agnostic ZA functions.
|
||||
if (!UseNewSMEABILowering || IsAgnosticZAFunction)
|
||||
if (!UseNewSMEABILowering)
|
||||
return std::nullopt;
|
||||
if (IsAgnosticZAFunction) {
|
||||
if (CallAttrs.requiresPreservingAllZAState())
|
||||
return AArch64ISD::REQUIRES_ZA_SAVE;
|
||||
return std::nullopt;
|
||||
}
|
||||
if (!CallAttrs.caller().hasZAState() && !CallAttrs.caller().hasZT0State())
|
||||
return std::nullopt;
|
||||
return CallAttrs.requiresLazySave() ? AArch64ISD::REQUIRES_ZA_SAVE
|
||||
@ -9163,7 +9199,8 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
|
||||
};
|
||||
|
||||
bool RequiresLazySave = !UseNewSMEABILowering && CallAttrs.requiresLazySave();
|
||||
bool RequiresSaveAllZA = CallAttrs.requiresPreservingAllZAState();
|
||||
bool RequiresSaveAllZA =
|
||||
!UseNewSMEABILowering && CallAttrs.requiresPreservingAllZAState();
|
||||
if (RequiresLazySave) {
|
||||
const TPIDR2Object &TPIDR2 = FuncInfo->getTPIDR2Obj();
|
||||
MachinePointerInfo MPI =
|
||||
|
||||
@ -238,6 +238,10 @@ class AArch64FunctionInfo final : public MachineFunctionInfo {
|
||||
// Holds the SME function attributes (streaming mode, ZA/ZT0 state).
|
||||
SMEAttrs SMEFnAttrs;
|
||||
|
||||
// Holds the TPIDR2 block if allocated early (for Windows/stack probes
|
||||
// support).
|
||||
Register EarlyAllocSMESaveBuffer = AArch64::NoRegister;
|
||||
|
||||
// Note: The following properties are only used for the old SME ABI lowering:
|
||||
/// The frame-index for the TPIDR2 object used for lazy saves.
|
||||
TPIDR2Object TPIDR2;
|
||||
@ -256,6 +260,12 @@ public:
|
||||
const DenseMap<MachineBasicBlock *, MachineBasicBlock *> &Src2DstMBB)
|
||||
const override;
|
||||
|
||||
void setEarlyAllocSMESaveBuffer(Register Ptr) {
|
||||
EarlyAllocSMESaveBuffer = Ptr;
|
||||
}
|
||||
|
||||
Register getEarlyAllocSMESaveBuffer() { return EarlyAllocSMESaveBuffer; }
|
||||
|
||||
// Old SME ABI lowering state getters/setters:
|
||||
Register getSMESaveBufferAddr() const { return SMESaveBufferAddr; };
|
||||
void setSMESaveBufferAddr(Register Reg) { SMESaveBufferAddr = Reg; };
|
||||
|
||||
@ -93,6 +93,8 @@ let hasSideEffects = 1, isMeta = 1 in {
|
||||
def RequiresZASavePseudo : Pseudo<(outs), (ins), []>, Sched<[]>;
|
||||
}
|
||||
|
||||
def SMEStateAllocPseudo : Pseudo<(outs), (ins), []>, Sched<[]>;
|
||||
|
||||
def CommitZASavePseudo
|
||||
: Pseudo<(outs),
|
||||
(ins GPR64:$tpidr2_el0, i1imm:$zero_za, i64imm:$commit_routine, variable_ops), []>,
|
||||
@ -108,6 +110,11 @@ def AArch64_requires_za_save
|
||||
[SDNPHasChain, SDNPInGlue]>;
|
||||
def : Pat<(AArch64_requires_za_save), (RequiresZASavePseudo)>;
|
||||
|
||||
def AArch64_sme_state_alloc
|
||||
: SDNode<"AArch64ISD::SME_STATE_ALLOC", SDTypeProfile<0, 0,[]>,
|
||||
[SDNPHasChain]>;
|
||||
def : Pat<(AArch64_sme_state_alloc), (SMEStateAllocPseudo)>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Instruction naming conventions.
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
@ -7,7 +7,7 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This pass implements the SME ABI requirements for ZA state. This includes
|
||||
// implementing the lazy ZA state save schemes around calls.
|
||||
// implementing the lazy (and agnostic) ZA state save schemes around calls.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
@ -200,7 +200,7 @@ struct MachineSMEABI : public MachineFunctionPass {
|
||||
|
||||
/// Inserts code to handle changes between ZA states within the function.
|
||||
/// E.g., ACTIVE -> LOCAL_SAVED will insert code required to save ZA.
|
||||
void insertStateChanges();
|
||||
void insertStateChanges(bool IsAgnosticZA);
|
||||
|
||||
// Emission routines for private and shared ZA functions (using lazy saves).
|
||||
void emitNewZAPrologue(MachineBasicBlock &MBB,
|
||||
@ -215,8 +215,41 @@ struct MachineSMEABI : public MachineFunctionPass {
|
||||
void emitZAOff(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
|
||||
bool ClearTPIDR2);
|
||||
|
||||
// Emission routines for agnostic ZA functions.
|
||||
void emitSetupFullZASave(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator MBBI,
|
||||
LiveRegs PhysLiveRegs);
|
||||
void emitFullZASaveRestore(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator MBBI,
|
||||
LiveRegs PhysLiveRegs, bool IsSave);
|
||||
void emitAllocateFullZASaveBuffer(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator MBBI,
|
||||
LiveRegs PhysLiveRegs);
|
||||
|
||||
void emitStateChange(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
|
||||
ZAState From, ZAState To, LiveRegs PhysLiveRegs);
|
||||
ZAState From, ZAState To, LiveRegs PhysLiveRegs,
|
||||
bool IsAgnosticZA);
|
||||
|
||||
// Helpers for switching between lazy/full ZA save/restore routines.
|
||||
void emitZASave(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
|
||||
LiveRegs PhysLiveRegs, bool IsAgnosticZA) {
|
||||
if (IsAgnosticZA)
|
||||
return emitFullZASaveRestore(MBB, MBBI, PhysLiveRegs, /*IsSave=*/true);
|
||||
return emitSetupLazySave(MBB, MBBI);
|
||||
}
|
||||
void emitZARestore(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
|
||||
LiveRegs PhysLiveRegs, bool IsAgnosticZA) {
|
||||
if (IsAgnosticZA)
|
||||
return emitFullZASaveRestore(MBB, MBBI, PhysLiveRegs, /*IsSave=*/false);
|
||||
return emitRestoreLazySave(MBB, MBBI, PhysLiveRegs);
|
||||
}
|
||||
void emitAllocateZASaveBuffer(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator MBBI,
|
||||
LiveRegs PhysLiveRegs, bool IsAgnosticZA) {
|
||||
if (IsAgnosticZA)
|
||||
return emitAllocateFullZASaveBuffer(MBB, MBBI, PhysLiveRegs);
|
||||
return emitAllocateLazySaveBuffer(MBB, MBBI);
|
||||
}
|
||||
|
||||
/// Save live physical registers to virtual registers.
|
||||
PhysRegSave createPhysRegSave(LiveRegs PhysLiveRegs, MachineBasicBlock &MBB,
|
||||
@ -228,6 +261,8 @@ struct MachineSMEABI : public MachineFunctionPass {
|
||||
/// Get or create a TPIDR2 block in this function.
|
||||
TPIDR2State getTPIDR2Block();
|
||||
|
||||
Register getAgnosticZABufferPtr();
|
||||
|
||||
private:
|
||||
/// Contains the needed ZA state (and live registers) at an instruction.
|
||||
struct InstInfo {
|
||||
@ -241,6 +276,7 @@ private:
|
||||
struct BlockInfo {
|
||||
ZAState FixedEntryState{ZAState::ANY};
|
||||
SmallVector<InstInfo> Insts;
|
||||
LiveRegs PhysLiveRegsAtEntry = LiveRegs::None;
|
||||
LiveRegs PhysLiveRegsAtExit = LiveRegs::None;
|
||||
};
|
||||
|
||||
@ -249,6 +285,10 @@ private:
|
||||
SmallVector<BlockInfo> Blocks;
|
||||
SmallVector<ZAState> BundleStates;
|
||||
std::optional<TPIDR2State> TPIDR2Block;
|
||||
std::optional<MachineBasicBlock::iterator> AfterSMEProloguePt;
|
||||
Register AgnosticZABufferPtr = AArch64::NoRegister;
|
||||
LiveRegs PhysLiveRegsAfterSMEPrologue = LiveRegs::None;
|
||||
bool HasFullZASaveRestore = false;
|
||||
} State;
|
||||
|
||||
MachineFunction *MF = nullptr;
|
||||
@ -260,7 +300,8 @@ private:
|
||||
};
|
||||
|
||||
void MachineSMEABI::collectNeededZAStates(SMEAttrs SMEFnAttrs) {
|
||||
assert((SMEFnAttrs.hasZT0State() || SMEFnAttrs.hasZAState()) &&
|
||||
assert((SMEFnAttrs.hasAgnosticZAInterface() || SMEFnAttrs.hasZT0State() ||
|
||||
SMEFnAttrs.hasZAState()) &&
|
||||
"Expected function to have ZA/ZT0 state!");
|
||||
|
||||
State.Blocks.resize(MF->getNumBlockIDs());
|
||||
@ -294,10 +335,20 @@ void MachineSMEABI::collectNeededZAStates(SMEAttrs SMEFnAttrs) {
|
||||
|
||||
Block.PhysLiveRegsAtExit = GetPhysLiveRegs();
|
||||
auto FirstTerminatorInsertPt = MBB.getFirstTerminator();
|
||||
auto FirstNonPhiInsertPt = MBB.getFirstNonPHI();
|
||||
for (MachineInstr &MI : reverse(MBB)) {
|
||||
MachineBasicBlock::iterator MBBI(MI);
|
||||
LiveUnits.stepBackward(MI);
|
||||
LiveRegs PhysLiveRegs = GetPhysLiveRegs();
|
||||
// The SMEStateAllocPseudo marker is added to a function if the save
|
||||
// buffer was allocated in SelectionDAG. It marks the end of the
|
||||
// allocation -- which is a safe point for this pass to insert any TPIDR2
|
||||
// block setup.
|
||||
if (MI.getOpcode() == AArch64::SMEStateAllocPseudo) {
|
||||
State.AfterSMEProloguePt = MBBI;
|
||||
State.PhysLiveRegsAfterSMEPrologue = PhysLiveRegs;
|
||||
}
|
||||
// Note: We treat Agnostic ZA as inout_za with an alternate save/restore.
|
||||
auto [NeededState, InsertPt] = getZAStateBeforeInst(
|
||||
*TRI, MI, /*ZAOffAtReturn=*/SMEFnAttrs.hasPrivateZAInterface());
|
||||
assert((InsertPt == MBBI ||
|
||||
@ -306,6 +357,8 @@ void MachineSMEABI::collectNeededZAStates(SMEAttrs SMEFnAttrs) {
|
||||
// TODO: Do something to avoid state changes where NZCV is live.
|
||||
if (MBBI == FirstTerminatorInsertPt)
|
||||
Block.PhysLiveRegsAtExit = PhysLiveRegs;
|
||||
if (MBBI == FirstNonPhiInsertPt)
|
||||
Block.PhysLiveRegsAtEntry = PhysLiveRegs;
|
||||
if (NeededState != ZAState::ANY)
|
||||
Block.Insts.push_back({NeededState, InsertPt, PhysLiveRegs});
|
||||
}
|
||||
@ -372,7 +425,7 @@ void MachineSMEABI::assignBundleZAStates() {
|
||||
}
|
||||
}
|
||||
|
||||
void MachineSMEABI::insertStateChanges() {
|
||||
void MachineSMEABI::insertStateChanges(bool IsAgnosticZA) {
|
||||
for (MachineBasicBlock &MBB : *MF) {
|
||||
const BlockInfo &Block = State.Blocks[MBB.getNumber()];
|
||||
ZAState InState = State.BundleStates[Bundles->getBundle(MBB.getNumber(),
|
||||
@ -385,7 +438,7 @@ void MachineSMEABI::insertStateChanges() {
|
||||
for (auto &Inst : Block.Insts) {
|
||||
if (CurrentState != Inst.NeededState)
|
||||
emitStateChange(MBB, Inst.InsertPt, CurrentState, Inst.NeededState,
|
||||
Inst.PhysLiveRegs);
|
||||
Inst.PhysLiveRegs, IsAgnosticZA);
|
||||
CurrentState = Inst.NeededState;
|
||||
}
|
||||
|
||||
@ -396,7 +449,7 @@ void MachineSMEABI::insertStateChanges() {
|
||||
State.BundleStates[Bundles->getBundle(MBB.getNumber(), /*Out=*/true)];
|
||||
if (CurrentState != OutState)
|
||||
emitStateChange(MBB, MBB.getFirstTerminator(), CurrentState, OutState,
|
||||
Block.PhysLiveRegsAtExit);
|
||||
Block.PhysLiveRegsAtExit, IsAgnosticZA);
|
||||
}
|
||||
}
|
||||
|
||||
@ -529,23 +582,27 @@ void MachineSMEABI::emitZAOff(MachineBasicBlock &MBB,
|
||||
void MachineSMEABI::emitAllocateLazySaveBuffer(
|
||||
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) {
|
||||
MachineFrameInfo &MFI = MF->getFrameInfo();
|
||||
auto *AFI = MF->getInfo<AArch64FunctionInfo>();
|
||||
|
||||
DebugLoc DL = getDebugLoc(MBB, MBBI);
|
||||
Register SP = MRI->createVirtualRegister(&AArch64::GPR64RegClass);
|
||||
Register SVL = MRI->createVirtualRegister(&AArch64::GPR64RegClass);
|
||||
Register Buffer = MRI->createVirtualRegister(&AArch64::GPR64RegClass);
|
||||
Register Buffer = AFI->getEarlyAllocSMESaveBuffer();
|
||||
|
||||
// Calculate SVL.
|
||||
BuildMI(MBB, MBBI, DL, TII->get(AArch64::RDSVLI_XI), SVL).addImm(1);
|
||||
|
||||
// 1. Allocate the lazy save buffer.
|
||||
{
|
||||
// TODO This function grows the stack with a subtraction, which doesn't work
|
||||
// on Windows. Some refactoring to share the functionality in
|
||||
// LowerWindowsDYNAMIC_STACKALLOC will be required once the Windows ABI
|
||||
// supports SME
|
||||
if (Buffer == AArch64::NoRegister) {
|
||||
// TODO: On Windows, we allocate the lazy save buffer in SelectionDAG (so
|
||||
// Buffer != AArch64::NoRegister). This is done to reuse the existing
|
||||
// expansions (which can insert stack checks). This works, but it means we
|
||||
// will always allocate the lazy save buffer (even if the function contains
|
||||
// no lazy saves). If we want to handle Windows here, we'll need to
|
||||
// implement something similar to LowerWindowsDYNAMIC_STACKALLOC.
|
||||
assert(!Subtarget->isTargetWindows() &&
|
||||
"Lazy ZA save is not yet supported on Windows");
|
||||
Buffer = MRI->createVirtualRegister(&AArch64::GPR64RegClass);
|
||||
// Get original stack pointer.
|
||||
BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::COPY), SP)
|
||||
.addReg(AArch64::SP);
|
||||
@ -606,10 +663,95 @@ void MachineSMEABI::emitNewZAPrologue(MachineBasicBlock &MBB,
|
||||
.addImm(1);
|
||||
}
|
||||
|
||||
Register MachineSMEABI::getAgnosticZABufferPtr() {
|
||||
if (State.AgnosticZABufferPtr != AArch64::NoRegister)
|
||||
return State.AgnosticZABufferPtr;
|
||||
if (auto BufferPtr =
|
||||
MF->getInfo<AArch64FunctionInfo>()->getEarlyAllocSMESaveBuffer();
|
||||
BufferPtr != AArch64::NoRegister)
|
||||
State.AgnosticZABufferPtr = BufferPtr;
|
||||
else
|
||||
State.AgnosticZABufferPtr =
|
||||
MF->getRegInfo().createVirtualRegister(&AArch64::GPR64RegClass);
|
||||
return State.AgnosticZABufferPtr;
|
||||
}
|
||||
|
||||
void MachineSMEABI::emitFullZASaveRestore(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator MBBI,
|
||||
LiveRegs PhysLiveRegs, bool IsSave) {
|
||||
auto *TLI = Subtarget->getTargetLowering();
|
||||
State.HasFullZASaveRestore = true;
|
||||
DebugLoc DL = getDebugLoc(MBB, MBBI);
|
||||
Register BufferPtr = AArch64::X0;
|
||||
|
||||
PhysRegSave RegSave = createPhysRegSave(PhysLiveRegs, MBB, MBBI, DL);
|
||||
|
||||
// Copy the buffer pointer into X0.
|
||||
BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::COPY), BufferPtr)
|
||||
.addReg(getAgnosticZABufferPtr());
|
||||
|
||||
// Call __arm_sme_save/__arm_sme_restore.
|
||||
BuildMI(MBB, MBBI, DL, TII->get(AArch64::BL))
|
||||
.addReg(BufferPtr, RegState::Implicit)
|
||||
.addExternalSymbol(TLI->getLibcallName(
|
||||
IsSave ? RTLIB::SMEABI_SME_SAVE : RTLIB::SMEABI_SME_RESTORE))
|
||||
.addRegMask(TRI->getCallPreservedMask(
|
||||
*MF,
|
||||
CallingConv::AArch64_SME_ABI_Support_Routines_PreserveMost_From_X1));
|
||||
|
||||
restorePhyRegSave(RegSave, MBB, MBBI, DL);
|
||||
}
|
||||
|
||||
void MachineSMEABI::emitAllocateFullZASaveBuffer(
|
||||
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
|
||||
LiveRegs PhysLiveRegs) {
|
||||
auto *AFI = MF->getInfo<AArch64FunctionInfo>();
|
||||
|
||||
// Buffer already allocated in SelectionDAG.
|
||||
if (AFI->getEarlyAllocSMESaveBuffer())
|
||||
return;
|
||||
|
||||
DebugLoc DL = getDebugLoc(MBB, MBBI);
|
||||
Register BufferPtr = getAgnosticZABufferPtr();
|
||||
Register BufferSize = MRI->createVirtualRegister(&AArch64::GPR64RegClass);
|
||||
|
||||
PhysRegSave RegSave = createPhysRegSave(PhysLiveRegs, MBB, MBBI, DL);
|
||||
|
||||
// Calculate the SME state size.
|
||||
{
|
||||
auto *TLI = Subtarget->getTargetLowering();
|
||||
const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
|
||||
BuildMI(MBB, MBBI, DL, TII->get(AArch64::BL))
|
||||
.addExternalSymbol(TLI->getLibcallName(RTLIB::SMEABI_SME_STATE_SIZE))
|
||||
.addReg(AArch64::X0, RegState::ImplicitDefine)
|
||||
.addRegMask(TRI->getCallPreservedMask(
|
||||
*MF, CallingConv::
|
||||
AArch64_SME_ABI_Support_Routines_PreserveMost_From_X1));
|
||||
BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::COPY), BufferSize)
|
||||
.addReg(AArch64::X0);
|
||||
}
|
||||
|
||||
// Allocate a buffer object of the size given __arm_sme_state_size.
|
||||
{
|
||||
MachineFrameInfo &MFI = MF->getFrameInfo();
|
||||
BuildMI(MBB, MBBI, DL, TII->get(AArch64::SUBXrx64), AArch64::SP)
|
||||
.addReg(AArch64::SP)
|
||||
.addReg(BufferSize)
|
||||
.addImm(AArch64_AM::getArithExtendImm(AArch64_AM::UXTX, 0));
|
||||
BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::COPY), BufferPtr)
|
||||
.addReg(AArch64::SP);
|
||||
|
||||
// We have just allocated a variable sized object, tell this to PEI.
|
||||
MFI.CreateVariableSizedObject(Align(16), nullptr);
|
||||
}
|
||||
|
||||
restorePhyRegSave(RegSave, MBB, MBBI, DL);
|
||||
}
|
||||
|
||||
void MachineSMEABI::emitStateChange(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator InsertPt,
|
||||
ZAState From, ZAState To,
|
||||
LiveRegs PhysLiveRegs) {
|
||||
LiveRegs PhysLiveRegs, bool IsAgnosticZA) {
|
||||
|
||||
// ZA not used.
|
||||
if (From == ZAState::ANY || To == ZAState::ANY)
|
||||
@ -641,12 +783,13 @@ void MachineSMEABI::emitStateChange(MachineBasicBlock &MBB,
|
||||
}
|
||||
|
||||
if (From == ZAState::ACTIVE && To == ZAState::LOCAL_SAVED)
|
||||
emitSetupLazySave(MBB, InsertPt);
|
||||
emitZASave(MBB, InsertPt, PhysLiveRegs, IsAgnosticZA);
|
||||
else if (From == ZAState::LOCAL_SAVED && To == ZAState::ACTIVE)
|
||||
emitRestoreLazySave(MBB, InsertPt, PhysLiveRegs);
|
||||
emitZARestore(MBB, InsertPt, PhysLiveRegs, IsAgnosticZA);
|
||||
else if (To == ZAState::OFF) {
|
||||
assert(From != ZAState::CALLER_DORMANT &&
|
||||
"CALLER_DORMANT to OFF should have already been handled");
|
||||
assert(!IsAgnosticZA && "Should not turn ZA off in agnostic ZA function");
|
||||
emitZAOff(MBB, InsertPt, /*ClearTPIDR2=*/From == ZAState::LOCAL_SAVED);
|
||||
} else {
|
||||
dbgs() << "Error: Transition from " << getZAStateString(From) << " to "
|
||||
@ -666,7 +809,8 @@ bool MachineSMEABI::runOnMachineFunction(MachineFunction &MF) {
|
||||
|
||||
auto *AFI = MF.getInfo<AArch64FunctionInfo>();
|
||||
SMEAttrs SMEFnAttrs = AFI->getSMEFnAttrs();
|
||||
if (!SMEFnAttrs.hasZAState() && !SMEFnAttrs.hasZT0State())
|
||||
if (!SMEFnAttrs.hasZAState() && !SMEFnAttrs.hasZT0State() &&
|
||||
!SMEFnAttrs.hasAgnosticZAInterface())
|
||||
return false;
|
||||
|
||||
assert(MF.getRegInfo().isSSA() && "Expected to be run on SSA form!");
|
||||
@ -680,14 +824,28 @@ bool MachineSMEABI::runOnMachineFunction(MachineFunction &MF) {
|
||||
TRI = Subtarget->getRegisterInfo();
|
||||
MRI = &MF.getRegInfo();
|
||||
|
||||
bool IsAgnosticZA = SMEFnAttrs.hasAgnosticZAInterface();
|
||||
|
||||
collectNeededZAStates(SMEFnAttrs);
|
||||
assignBundleZAStates();
|
||||
insertStateChanges();
|
||||
insertStateChanges(/*IsAgnosticZA=*/IsAgnosticZA);
|
||||
|
||||
// Allocate save buffer (if needed).
|
||||
if (State.TPIDR2Block) {
|
||||
MachineBasicBlock &EntryBlock = MF.front();
|
||||
emitAllocateLazySaveBuffer(EntryBlock, EntryBlock.getFirstNonPHI());
|
||||
if (State.HasFullZASaveRestore || State.TPIDR2Block) {
|
||||
if (State.AfterSMEProloguePt) {
|
||||
// Note: With inline stack probes the AfterSMEProloguePt may not be in the
|
||||
// entry block (due to the probing loop).
|
||||
emitAllocateZASaveBuffer(*(*State.AfterSMEProloguePt)->getParent(),
|
||||
*State.AfterSMEProloguePt,
|
||||
State.PhysLiveRegsAfterSMEPrologue,
|
||||
/*IsAgnosticZA=*/IsAgnosticZA);
|
||||
} else {
|
||||
MachineBasicBlock &EntryBlock = MF.front();
|
||||
emitAllocateZASaveBuffer(
|
||||
EntryBlock, EntryBlock.getFirstNonPHI(),
|
||||
State.Blocks[EntryBlock.getNumber()].PhysLiveRegsAtEntry,
|
||||
/*IsAgnosticZA=*/IsAgnosticZA);
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
|
||||
; RUN: llc -mattr=+sme2 < %s | FileCheck %s
|
||||
; RUN: llc -mattr=+sme2 < %s -aarch64-new-sme-abi | FileCheck %s
|
||||
; RUN: llc -mattr=+sme2 < %s | FileCheck %s --check-prefixes=CHECK-COMMON,CHECK
|
||||
; RUN: llc -mattr=+sme2 < %s -aarch64-new-sme-abi | FileCheck %s --check-prefixes=CHECK-COMMON,CHECK-NEWLOWERING
|
||||
|
||||
target triple = "aarch64"
|
||||
|
||||
@ -9,10 +9,10 @@ declare i64 @agnostic_decl(i64) "aarch64_za_state_agnostic"
|
||||
|
||||
; No calls. Test that no buffer is allocated.
|
||||
define i64 @agnostic_caller_no_callees(ptr %ptr) nounwind "aarch64_za_state_agnostic" {
|
||||
; CHECK-LABEL: agnostic_caller_no_callees:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ldr x0, [x0]
|
||||
; CHECK-NEXT: ret
|
||||
; CHECK-COMMON-LABEL: agnostic_caller_no_callees:
|
||||
; CHECK-COMMON: // %bb.0:
|
||||
; CHECK-COMMON-NEXT: ldr x0, [x0]
|
||||
; CHECK-COMMON-NEXT: ret
|
||||
%v = load i64, ptr %ptr
|
||||
ret i64 %v
|
||||
}
|
||||
@ -51,6 +51,29 @@ define i64 @agnostic_caller_private_za_callee(i64 %v) nounwind "aarch64_za_state
|
||||
; CHECK-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload
|
||||
; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload
|
||||
; CHECK-NEXT: ret
|
||||
;
|
||||
; CHECK-NEWLOWERING-LABEL: agnostic_caller_private_za_callee:
|
||||
; CHECK-NEWLOWERING: // %bb.0:
|
||||
; CHECK-NEWLOWERING-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill
|
||||
; CHECK-NEWLOWERING-NEXT: str x19, [sp, #16] // 8-byte Folded Spill
|
||||
; CHECK-NEWLOWERING-NEXT: mov x29, sp
|
||||
; CHECK-NEWLOWERING-NEXT: mov x8, x0
|
||||
; CHECK-NEWLOWERING-NEXT: bl __arm_sme_state_size
|
||||
; CHECK-NEWLOWERING-NEXT: sub sp, sp, x0
|
||||
; CHECK-NEWLOWERING-NEXT: mov x19, sp
|
||||
; CHECK-NEWLOWERING-NEXT: mov x0, x19
|
||||
; CHECK-NEWLOWERING-NEXT: bl __arm_sme_save
|
||||
; CHECK-NEWLOWERING-NEXT: mov x0, x8
|
||||
; CHECK-NEWLOWERING-NEXT: bl private_za_decl
|
||||
; CHECK-NEWLOWERING-NEXT: bl private_za_decl
|
||||
; CHECK-NEWLOWERING-NEXT: mov x8, x0
|
||||
; CHECK-NEWLOWERING-NEXT: mov x0, x19
|
||||
; CHECK-NEWLOWERING-NEXT: bl __arm_sme_restore
|
||||
; CHECK-NEWLOWERING-NEXT: mov x0, x8
|
||||
; CHECK-NEWLOWERING-NEXT: mov sp, x29
|
||||
; CHECK-NEWLOWERING-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload
|
||||
; CHECK-NEWLOWERING-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload
|
||||
; CHECK-NEWLOWERING-NEXT: ret
|
||||
%res = call i64 @private_za_decl(i64 %v)
|
||||
%res2 = call i64 @private_za_decl(i64 %res)
|
||||
ret i64 %res2
|
||||
@ -60,12 +83,12 @@ define i64 @agnostic_caller_private_za_callee(i64 %v) nounwind "aarch64_za_state
|
||||
;
|
||||
; Should not result in save/restore code.
|
||||
define i64 @agnostic_caller_agnostic_callee(i64 %v) nounwind "aarch64_za_state_agnostic" {
|
||||
; CHECK-LABEL: agnostic_caller_agnostic_callee:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
|
||||
; CHECK-NEXT: bl agnostic_decl
|
||||
; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
|
||||
; CHECK-NEXT: ret
|
||||
; CHECK-COMMON-LABEL: agnostic_caller_agnostic_callee:
|
||||
; CHECK-COMMON: // %bb.0:
|
||||
; CHECK-COMMON-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
|
||||
; CHECK-COMMON-NEXT: bl agnostic_decl
|
||||
; CHECK-COMMON-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
|
||||
; CHECK-COMMON-NEXT: ret
|
||||
%res = call i64 @agnostic_decl(i64 %v)
|
||||
ret i64 %res
|
||||
}
|
||||
@ -74,12 +97,12 @@ define i64 @agnostic_caller_agnostic_callee(i64 %v) nounwind "aarch64_za_state_a
|
||||
;
|
||||
; Should not result in lazy-save or save of ZT0
|
||||
define i64 @shared_caller_agnostic_callee(i64 %v) nounwind "aarch64_inout_za" "aarch64_inout_zt0" {
|
||||
; CHECK-LABEL: shared_caller_agnostic_callee:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
|
||||
; CHECK-NEXT: bl agnostic_decl
|
||||
; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
|
||||
; CHECK-NEXT: ret
|
||||
; CHECK-COMMON-LABEL: shared_caller_agnostic_callee:
|
||||
; CHECK-COMMON: // %bb.0:
|
||||
; CHECK-COMMON-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
|
||||
; CHECK-COMMON-NEXT: bl agnostic_decl
|
||||
; CHECK-COMMON-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
|
||||
; CHECK-COMMON-NEXT: ret
|
||||
%res = call i64 @agnostic_decl(i64 %v)
|
||||
ret i64 %res
|
||||
}
|
||||
@ -126,6 +149,41 @@ define i64 @streaming_agnostic_caller_nonstreaming_private_za_callee(i64 %v) nou
|
||||
; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
|
||||
; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload
|
||||
; CHECK-NEXT: ret
|
||||
;
|
||||
; CHECK-NEWLOWERING-LABEL: streaming_agnostic_caller_nonstreaming_private_za_callee:
|
||||
; CHECK-NEWLOWERING: // %bb.0:
|
||||
; CHECK-NEWLOWERING-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill
|
||||
; CHECK-NEWLOWERING-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
|
||||
; CHECK-NEWLOWERING-NEXT: mov x8, x0
|
||||
; CHECK-NEWLOWERING-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
|
||||
; CHECK-NEWLOWERING-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
|
||||
; CHECK-NEWLOWERING-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill
|
||||
; CHECK-NEWLOWERING-NEXT: add x29, sp, #64
|
||||
; CHECK-NEWLOWERING-NEXT: stp x20, x19, [sp, #80] // 16-byte Folded Spill
|
||||
; CHECK-NEWLOWERING-NEXT: bl __arm_sme_state_size
|
||||
; CHECK-NEWLOWERING-NEXT: sub sp, sp, x0
|
||||
; CHECK-NEWLOWERING-NEXT: mov x20, sp
|
||||
; CHECK-NEWLOWERING-NEXT: mov x0, x20
|
||||
; CHECK-NEWLOWERING-NEXT: bl __arm_sme_save
|
||||
; CHECK-NEWLOWERING-NEXT: smstop sm
|
||||
; CHECK-NEWLOWERING-NEXT: mov x0, x8
|
||||
; CHECK-NEWLOWERING-NEXT: bl private_za_decl
|
||||
; CHECK-NEWLOWERING-NEXT: smstart sm
|
||||
; CHECK-NEWLOWERING-NEXT: smstop sm
|
||||
; CHECK-NEWLOWERING-NEXT: bl private_za_decl
|
||||
; CHECK-NEWLOWERING-NEXT: smstart sm
|
||||
; CHECK-NEWLOWERING-NEXT: mov x8, x0
|
||||
; CHECK-NEWLOWERING-NEXT: mov x0, x20
|
||||
; CHECK-NEWLOWERING-NEXT: bl __arm_sme_restore
|
||||
; CHECK-NEWLOWERING-NEXT: mov x0, x8
|
||||
; CHECK-NEWLOWERING-NEXT: sub sp, x29, #64
|
||||
; CHECK-NEWLOWERING-NEXT: ldp x20, x19, [sp, #80] // 16-byte Folded Reload
|
||||
; CHECK-NEWLOWERING-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload
|
||||
; CHECK-NEWLOWERING-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
|
||||
; CHECK-NEWLOWERING-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
|
||||
; CHECK-NEWLOWERING-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
|
||||
; CHECK-NEWLOWERING-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload
|
||||
; CHECK-NEWLOWERING-NEXT: ret
|
||||
%res = call i64 @private_za_decl(i64 %v)
|
||||
%res2 = call i64 @private_za_decl(i64 %res)
|
||||
ret i64 %res2
|
||||
@ -187,6 +245,55 @@ define i64 @streaming_compatible_agnostic_caller_nonstreaming_private_za_callee(
|
||||
; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
|
||||
; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload
|
||||
; CHECK-NEXT: ret
|
||||
;
|
||||
; CHECK-NEWLOWERING-LABEL: streaming_compatible_agnostic_caller_nonstreaming_private_za_callee:
|
||||
; CHECK-NEWLOWERING: // %bb.0:
|
||||
; CHECK-NEWLOWERING-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill
|
||||
; CHECK-NEWLOWERING-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
|
||||
; CHECK-NEWLOWERING-NEXT: mov x8, x0
|
||||
; CHECK-NEWLOWERING-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
|
||||
; CHECK-NEWLOWERING-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
|
||||
; CHECK-NEWLOWERING-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill
|
||||
; CHECK-NEWLOWERING-NEXT: add x29, sp, #64
|
||||
; CHECK-NEWLOWERING-NEXT: stp x20, x19, [sp, #80] // 16-byte Folded Spill
|
||||
; CHECK-NEWLOWERING-NEXT: bl __arm_sme_state_size
|
||||
; CHECK-NEWLOWERING-NEXT: sub sp, sp, x0
|
||||
; CHECK-NEWLOWERING-NEXT: mov x19, sp
|
||||
; CHECK-NEWLOWERING-NEXT: bl __arm_sme_state
|
||||
; CHECK-NEWLOWERING-NEXT: mov x20, x0
|
||||
; CHECK-NEWLOWERING-NEXT: mov x0, x19
|
||||
; CHECK-NEWLOWERING-NEXT: bl __arm_sme_save
|
||||
; CHECK-NEWLOWERING-NEXT: tbz w20, #0, .LBB5_2
|
||||
; CHECK-NEWLOWERING-NEXT: // %bb.1:
|
||||
; CHECK-NEWLOWERING-NEXT: smstop sm
|
||||
; CHECK-NEWLOWERING-NEXT: .LBB5_2:
|
||||
; CHECK-NEWLOWERING-NEXT: mov x0, x8
|
||||
; CHECK-NEWLOWERING-NEXT: bl private_za_decl
|
||||
; CHECK-NEWLOWERING-NEXT: tbz w20, #0, .LBB5_4
|
||||
; CHECK-NEWLOWERING-NEXT: // %bb.3:
|
||||
; CHECK-NEWLOWERING-NEXT: smstart sm
|
||||
; CHECK-NEWLOWERING-NEXT: .LBB5_4:
|
||||
; CHECK-NEWLOWERING-NEXT: tbz w20, #0, .LBB5_6
|
||||
; CHECK-NEWLOWERING-NEXT: // %bb.5:
|
||||
; CHECK-NEWLOWERING-NEXT: smstop sm
|
||||
; CHECK-NEWLOWERING-NEXT: .LBB5_6:
|
||||
; CHECK-NEWLOWERING-NEXT: bl private_za_decl
|
||||
; CHECK-NEWLOWERING-NEXT: tbz w20, #0, .LBB5_8
|
||||
; CHECK-NEWLOWERING-NEXT: // %bb.7:
|
||||
; CHECK-NEWLOWERING-NEXT: smstart sm
|
||||
; CHECK-NEWLOWERING-NEXT: .LBB5_8:
|
||||
; CHECK-NEWLOWERING-NEXT: mov x8, x0
|
||||
; CHECK-NEWLOWERING-NEXT: mov x0, x19
|
||||
; CHECK-NEWLOWERING-NEXT: bl __arm_sme_restore
|
||||
; CHECK-NEWLOWERING-NEXT: mov x0, x8
|
||||
; CHECK-NEWLOWERING-NEXT: sub sp, x29, #64
|
||||
; CHECK-NEWLOWERING-NEXT: ldp x20, x19, [sp, #80] // 16-byte Folded Reload
|
||||
; CHECK-NEWLOWERING-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload
|
||||
; CHECK-NEWLOWERING-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
|
||||
; CHECK-NEWLOWERING-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
|
||||
; CHECK-NEWLOWERING-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
|
||||
; CHECK-NEWLOWERING-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload
|
||||
; CHECK-NEWLOWERING-NEXT: ret
|
||||
%res = call i64 @private_za_decl(i64 %v)
|
||||
%res2 = call i64 @private_za_decl(i64 %res)
|
||||
ret i64 %res2
|
||||
@ -223,6 +330,31 @@ define i64 @test_many_callee_arguments(
|
||||
; CHECK-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload
|
||||
; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload
|
||||
; CHECK-NEXT: ret
|
||||
;
|
||||
; CHECK-NEWLOWERING-LABEL: test_many_callee_arguments:
|
||||
; CHECK-NEWLOWERING: // %bb.0:
|
||||
; CHECK-NEWLOWERING-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill
|
||||
; CHECK-NEWLOWERING-NEXT: str x19, [sp, #16] // 8-byte Folded Spill
|
||||
; CHECK-NEWLOWERING-NEXT: mov x29, sp
|
||||
; CHECK-NEWLOWERING-NEXT: mov x8, x0
|
||||
; CHECK-NEWLOWERING-NEXT: bl __arm_sme_state_size
|
||||
; CHECK-NEWLOWERING-NEXT: sub sp, sp, x0
|
||||
; CHECK-NEWLOWERING-NEXT: mov x19, sp
|
||||
; CHECK-NEWLOWERING-NEXT: ldp x9, x10, [x29, #32]
|
||||
; CHECK-NEWLOWERING-NEXT: mov x0, x19
|
||||
; CHECK-NEWLOWERING-NEXT: bl __arm_sme_save
|
||||
; CHECK-NEWLOWERING-NEXT: stp x9, x10, [sp, #-16]!
|
||||
; CHECK-NEWLOWERING-NEXT: mov x0, x8
|
||||
; CHECK-NEWLOWERING-NEXT: bl many_args_private_za_callee
|
||||
; CHECK-NEWLOWERING-NEXT: add sp, sp, #16
|
||||
; CHECK-NEWLOWERING-NEXT: mov x8, x0
|
||||
; CHECK-NEWLOWERING-NEXT: mov x0, x19
|
||||
; CHECK-NEWLOWERING-NEXT: bl __arm_sme_restore
|
||||
; CHECK-NEWLOWERING-NEXT: mov x0, x8
|
||||
; CHECK-NEWLOWERING-NEXT: mov sp, x29
|
||||
; CHECK-NEWLOWERING-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload
|
||||
; CHECK-NEWLOWERING-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload
|
||||
; CHECK-NEWLOWERING-NEXT: ret
|
||||
i64 %0, i64 %1, i64 %2, i64 %3, i64 %4, i64 %5, i64 %6, i64 %7, i64 %8, i64 %9
|
||||
) nounwind "aarch64_za_state_agnostic" {
|
||||
%ret = call i64 @many_args_private_za_callee(
|
||||
|
||||
71
llvm/test/CodeGen/AArch64/sme-lazy-save-windows.ll
Normal file
71
llvm/test/CodeGen/AArch64/sme-lazy-save-windows.ll
Normal file
@ -0,0 +1,71 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
|
||||
; RUN: llc -mtriple=aarch64-windows-msvc -aarch64-streaming-hazard-size=0 -mattr=+sve,+sme < %s | FileCheck %s
|
||||
; RUN: llc -mtriple=aarch64-windows-msvc -aarch64-streaming-hazard-size=0 -mattr=+sve,+sme -aarch64-new-sme-abi < %s | FileCheck %s --check-prefix=CHECK-NEWLOWERING
|
||||
|
||||
declare void @private_za_callee()
|
||||
declare void @shared_za_callee() "aarch64_inout_za"
|
||||
|
||||
define void @test_lazy_save() nounwind "aarch64_inout_za" {
|
||||
; CHECK-LABEL: test_lazy_save:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: stp x30, x29, [sp, #-32]! // 16-byte Folded Spill
|
||||
; CHECK-NEXT: str x19, [sp, #16] // 8-byte Folded Spill
|
||||
; CHECK-NEXT: mov x29, sp
|
||||
; CHECK-NEXT: sub sp, sp, #16
|
||||
; CHECK-NEXT: rdsvl x8, #1
|
||||
; CHECK-NEXT: mul x9, x8, x8
|
||||
; CHECK-NEXT: lsr x15, x9, #4
|
||||
; CHECK-NEXT: bl __chkstk
|
||||
; CHECK-NEXT: sub x9, sp, x15, lsl #4
|
||||
; CHECK-NEXT: mov sp, x9
|
||||
; CHECK-NEXT: stur x9, [x29, #-16]
|
||||
; CHECK-NEXT: sub x9, x29, #16
|
||||
; CHECK-NEXT: sturh wzr, [x29, #-6]
|
||||
; CHECK-NEXT: stur wzr, [x29, #-4]
|
||||
; CHECK-NEXT: sturh w8, [x29, #-8]
|
||||
; CHECK-NEXT: msr TPIDR2_EL0, x9
|
||||
; CHECK-NEXT: bl private_za_callee
|
||||
; CHECK-NEXT: smstart za
|
||||
; CHECK-NEXT: mrs x8, TPIDR2_EL0
|
||||
; CHECK-NEXT: sub x0, x29, #16
|
||||
; CHECK-NEXT: cbnz x8, .LBB0_2
|
||||
; CHECK-NEXT: // %bb.1:
|
||||
; CHECK-NEXT: bl __arm_tpidr2_restore
|
||||
; CHECK-NEXT: .LBB0_2:
|
||||
; CHECK-NEXT: msr TPIDR2_EL0, xzr
|
||||
; CHECK-NEXT: mov sp, x29
|
||||
; CHECK-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload
|
||||
; CHECK-NEXT: ldp x30, x29, [sp], #32 // 16-byte Folded Reload
|
||||
; CHECK-NEXT: ret
|
||||
;
|
||||
; CHECK-NEWLOWERING-LABEL: test_lazy_save:
|
||||
; CHECK-NEWLOWERING: // %bb.0:
|
||||
; CHECK-NEWLOWERING-NEXT: stp x30, x29, [sp, #-32]! // 16-byte Folded Spill
|
||||
; CHECK-NEWLOWERING-NEXT: str x19, [sp, #16] // 8-byte Folded Spill
|
||||
; CHECK-NEWLOWERING-NEXT: mov x29, sp
|
||||
; CHECK-NEWLOWERING-NEXT: sub sp, sp, #16
|
||||
; CHECK-NEWLOWERING-NEXT: rdsvl x8, #1
|
||||
; CHECK-NEWLOWERING-NEXT: mul x9, x8, x8
|
||||
; CHECK-NEWLOWERING-NEXT: lsr x15, x9, #4
|
||||
; CHECK-NEWLOWERING-NEXT: bl __chkstk
|
||||
; CHECK-NEWLOWERING-NEXT: sub x9, sp, x15, lsl #4
|
||||
; CHECK-NEWLOWERING-NEXT: mov sp, x9
|
||||
; CHECK-NEWLOWERING-NEXT: sub x10, x29, #16
|
||||
; CHECK-NEWLOWERING-NEXT: stp x9, x8, [x29, #-16]
|
||||
; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, x10
|
||||
; CHECK-NEWLOWERING-NEXT: bl private_za_callee
|
||||
; CHECK-NEWLOWERING-NEXT: smstart za
|
||||
; CHECK-NEWLOWERING-NEXT: mrs x8, TPIDR2_EL0
|
||||
; CHECK-NEWLOWERING-NEXT: sub x0, x29, #16
|
||||
; CHECK-NEWLOWERING-NEXT: cbnz x8, .LBB0_2
|
||||
; CHECK-NEWLOWERING-NEXT: // %bb.1:
|
||||
; CHECK-NEWLOWERING-NEXT: bl __arm_tpidr2_restore
|
||||
; CHECK-NEWLOWERING-NEXT: .LBB0_2:
|
||||
; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, xzr
|
||||
; CHECK-NEWLOWERING-NEXT: mov sp, x29
|
||||
; CHECK-NEWLOWERING-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload
|
||||
; CHECK-NEWLOWERING-NEXT: ldp x30, x29, [sp], #32 // 16-byte Folded Reload
|
||||
; CHECK-NEWLOWERING-NEXT: ret
|
||||
call void @private_za_callee()
|
||||
ret void
|
||||
}
|
||||
@ -103,7 +103,6 @@ exit:
|
||||
ret float %ret
|
||||
}
|
||||
|
||||
; FIXME: This is missing stack probes with -aarch64-new-sme-abi.
|
||||
define float @multi_bb_stpidr2_save_required_stackprobe(i32 %a, float %b, float %c) "aarch64_inout_za" "probe-stack"="inline-asm" "stack-probe-size"="65536" {
|
||||
; CHECK-LABEL: multi_bb_stpidr2_save_required_stackprobe:
|
||||
; CHECK: // %bb.0:
|
||||
@ -165,26 +164,35 @@ define float @multi_bb_stpidr2_save_required_stackprobe(i32 %a, float %b, float
|
||||
; CHECK-NEWLOWERING-NEXT: rdsvl x8, #1
|
||||
; CHECK-NEWLOWERING-NEXT: mov x9, sp
|
||||
; CHECK-NEWLOWERING-NEXT: msub x9, x8, x8, x9
|
||||
; CHECK-NEWLOWERING-NEXT: .LBB2_1: // =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEWLOWERING-NEXT: sub sp, sp, #16, lsl #12 // =65536
|
||||
; CHECK-NEWLOWERING-NEXT: cmp sp, x9
|
||||
; CHECK-NEWLOWERING-NEXT: b.le .LBB2_3
|
||||
; CHECK-NEWLOWERING-NEXT: // %bb.2: // in Loop: Header=BB2_1 Depth=1
|
||||
; CHECK-NEWLOWERING-NEXT: str xzr, [sp]
|
||||
; CHECK-NEWLOWERING-NEXT: b .LBB2_1
|
||||
; CHECK-NEWLOWERING-NEXT: .LBB2_3:
|
||||
; CHECK-NEWLOWERING-NEXT: mov sp, x9
|
||||
; CHECK-NEWLOWERING-NEXT: ldr xzr, [sp]
|
||||
; CHECK-NEWLOWERING-NEXT: sub x10, x29, #16
|
||||
; CHECK-NEWLOWERING-NEXT: stp x9, x8, [x29, #-16]
|
||||
; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, x10
|
||||
; CHECK-NEWLOWERING-NEXT: cbz w0, .LBB2_2
|
||||
; CHECK-NEWLOWERING-NEXT: // %bb.1: // %use_b
|
||||
; CHECK-NEWLOWERING-NEXT: cbz w0, .LBB2_5
|
||||
; CHECK-NEWLOWERING-NEXT: // %bb.4: // %use_b
|
||||
; CHECK-NEWLOWERING-NEXT: fmov s1, #4.00000000
|
||||
; CHECK-NEWLOWERING-NEXT: fadd s0, s0, s1
|
||||
; CHECK-NEWLOWERING-NEXT: b .LBB2_3
|
||||
; CHECK-NEWLOWERING-NEXT: .LBB2_2: // %use_c
|
||||
; CHECK-NEWLOWERING-NEXT: b .LBB2_6
|
||||
; CHECK-NEWLOWERING-NEXT: .LBB2_5: // %use_c
|
||||
; CHECK-NEWLOWERING-NEXT: fmov s0, s1
|
||||
; CHECK-NEWLOWERING-NEXT: bl cosf
|
||||
; CHECK-NEWLOWERING-NEXT: .LBB2_3: // %exit
|
||||
; CHECK-NEWLOWERING-NEXT: .LBB2_6: // %exit
|
||||
; CHECK-NEWLOWERING-NEXT: smstart za
|
||||
; CHECK-NEWLOWERING-NEXT: mrs x8, TPIDR2_EL0
|
||||
; CHECK-NEWLOWERING-NEXT: sub x0, x29, #16
|
||||
; CHECK-NEWLOWERING-NEXT: cbnz x8, .LBB2_5
|
||||
; CHECK-NEWLOWERING-NEXT: // %bb.4: // %exit
|
||||
; CHECK-NEWLOWERING-NEXT: cbnz x8, .LBB2_8
|
||||
; CHECK-NEWLOWERING-NEXT: // %bb.7: // %exit
|
||||
; CHECK-NEWLOWERING-NEXT: bl __arm_tpidr2_restore
|
||||
; CHECK-NEWLOWERING-NEXT: .LBB2_5: // %exit
|
||||
; CHECK-NEWLOWERING-NEXT: .LBB2_8: // %exit
|
||||
; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, xzr
|
||||
; CHECK-NEWLOWERING-NEXT: mov sp, x29
|
||||
; CHECK-NEWLOWERING-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user