
## Short Summary This patch adds a new pass `aarch64-machine-sme-abi` to handle the ABI for ZA state (e.g., lazy saves and agnostic ZA functions). This is currently not enabled by default (but aims to be by LLVM 22). The goal is for this new pass to more optimally place ZA saves/restores and to work with exception handling. ## Long Description This patch reimplements management of ZA state for functions with private and shared ZA state. Agnostic ZA functions will be handled in a later patch. For now, this is under the flag `-aarch64-new-sme-abi`, however, we intend for this to replace the current SelectionDAG implementation once complete. The approach taken here is to mark instructions as needing ZA to be in a specific ("ACTIVE" or "LOCAL_SAVED"). Machine instructions implicitly defining or using ZA registers (such as $zt0 or $zab0) require the "ACTIVE" state. Function calls may need the "LOCAL_SAVED" or "ACTIVE" state depending on the callee (having shared or private ZA). We already add ZA register uses/definitions to machine instructions, so no extra work is needed to mark these. Calls need to be marked by glueing Arch64ISD::INOUT_ZA_USE or Arch64ISD::REQUIRES_ZA_SAVE to the CALLSEQ_START. These markers are then used by the MachineSMEABIPass to find instructions where there is a transition between required ZA states. These are the points we need to insert code to set up or restore a ZA save (or initialize ZA). To handle control flow between blocks (which may have different ZA state requirements), we bundle the incoming and outgoing edges of blocks. Bundles are formed by assigning each block an incoming and outgoing bundle (initially, all blocks have their own two bundles). Bundles are then combined by joining the outgoing bundle of a block with the incoming bundle of all successors. These bundles are then assigned a ZA state based on the blocks that participate in the bundle. Blocks whose incoming edges are in a bundle "vote" for a ZA state that matches the state required at the first instruction in the block, and likewise, blocks whose outgoing edges are in a bundle vote for the ZA state that matches the last instruction in the block. The ZA state with the most votes is used, which aims to minimize the number of state transitions.
119 lines
4.2 KiB
C++
119 lines
4.2 KiB
C++
//==-- AArch64TargetMachine.h - Define TargetMachine for AArch64 -*- C++ -*-==//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
// This file declares the AArch64 specific subclass of TargetMachine.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#ifndef LLVM_LIB_TARGET_AARCH64_AARCH64TARGETMACHINE_H
|
|
#define LLVM_LIB_TARGET_AARCH64_AARCH64TARGETMACHINE_H
|
|
|
|
#include "AArch64InstrInfo.h"
|
|
#include "AArch64Subtarget.h"
|
|
#include "llvm/CodeGen/CodeGenTargetMachineImpl.h"
|
|
#include "llvm/IR/DataLayout.h"
|
|
#include <optional>
|
|
|
|
namespace llvm {
|
|
|
|
class AArch64TargetMachine : public CodeGenTargetMachineImpl {
|
|
protected:
|
|
std::unique_ptr<TargetLoweringObjectFile> TLOF;
|
|
mutable StringMap<std::unique_ptr<AArch64Subtarget>> SubtargetMap;
|
|
|
|
/// Reset internal state.
|
|
void reset() override;
|
|
|
|
public:
|
|
AArch64TargetMachine(const Target &T, const Triple &TT, StringRef CPU,
|
|
StringRef FS, const TargetOptions &Options,
|
|
std::optional<Reloc::Model> RM,
|
|
std::optional<CodeModel::Model> CM, CodeGenOptLevel OL,
|
|
bool JIT, bool IsLittleEndian);
|
|
|
|
~AArch64TargetMachine() override;
|
|
const AArch64Subtarget *getSubtargetImpl(const Function &F) const override;
|
|
// DO NOT IMPLEMENT: There is no such thing as a valid default subtarget,
|
|
// subtargets are per-function entities based on the target-specific
|
|
// attributes of each function.
|
|
const AArch64Subtarget *getSubtargetImpl() const = delete;
|
|
|
|
// Pass Pipeline Configuration
|
|
TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
|
|
|
|
void registerPassBuilderCallbacks(PassBuilder &PB) override;
|
|
|
|
TargetTransformInfo getTargetTransformInfo(const Function &F) const override;
|
|
|
|
TargetLoweringObjectFile* getObjFileLowering() const override {
|
|
return TLOF.get();
|
|
}
|
|
|
|
MachineFunctionInfo *
|
|
createMachineFunctionInfo(BumpPtrAllocator &Allocator, const Function &F,
|
|
const TargetSubtargetInfo *STI) const override;
|
|
|
|
yaml::MachineFunctionInfo *createDefaultFuncInfoYAML() const override;
|
|
yaml::MachineFunctionInfo *
|
|
convertFuncInfoToYAML(const MachineFunction &MF) const override;
|
|
bool parseMachineFunctionInfo(const yaml::MachineFunctionInfo &,
|
|
PerFunctionMIParsingState &PFS,
|
|
SMDiagnostic &Error,
|
|
SMRange &SourceRange) const override;
|
|
|
|
/// Returns true if a cast between SrcAS and DestAS is a noop.
|
|
bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override {
|
|
return getPointerSize(SrcAS) == getPointerSize(DestAS);
|
|
}
|
|
ScheduleDAGInstrs *
|
|
createMachineScheduler(MachineSchedContext *C) const override;
|
|
|
|
ScheduleDAGInstrs *
|
|
createPostMachineScheduler(MachineSchedContext *C) const override;
|
|
|
|
size_t clearLinkerOptimizationHints(
|
|
const SmallPtrSetImpl<MachineInstr *> &MIs) const override;
|
|
|
|
/// Returns true if the new SME ABI lowering should be used.
|
|
bool useNewSMEABILowering() const { return UseNewSMEABILowering; }
|
|
|
|
private:
|
|
bool isLittle;
|
|
bool UseNewSMEABILowering;
|
|
};
|
|
|
|
// AArch64 little endian target machine.
|
|
//
|
|
class AArch64leTargetMachine : public AArch64TargetMachine {
|
|
virtual void anchor();
|
|
|
|
public:
|
|
AArch64leTargetMachine(const Target &T, const Triple &TT, StringRef CPU,
|
|
StringRef FS, const TargetOptions &Options,
|
|
std::optional<Reloc::Model> RM,
|
|
std::optional<CodeModel::Model> CM, CodeGenOptLevel OL,
|
|
bool JIT);
|
|
};
|
|
|
|
// AArch64 big endian target machine.
|
|
//
|
|
class AArch64beTargetMachine : public AArch64TargetMachine {
|
|
virtual void anchor();
|
|
|
|
public:
|
|
AArch64beTargetMachine(const Target &T, const Triple &TT, StringRef CPU,
|
|
StringRef FS, const TargetOptions &Options,
|
|
std::optional<Reloc::Model> RM,
|
|
std::optional<CodeModel::Model> CM, CodeGenOptLevel OL,
|
|
bool JIT);
|
|
};
|
|
|
|
} // end namespace llvm
|
|
|
|
#endif
|