llvm-project/llvm/lib/Target/AArch64/AArch64Subtarget.h

//===--- AArch64Subtarget.h - Define Subtarget for the AArch64 -*- C++ -*--===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file declares the AArch64 specific subclass of TargetSubtarget.
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_LIB_TARGET_AARCH64_AARCH64SUBTARGET_H
#define LLVM_LIB_TARGET_AARCH64_AARCH64SUBTARGET_H

#include "AArch64FrameLowering.h"
#include "AArch64ISelLowering.h"
#include "AArch64InstrInfo.h"
#include "AArch64PointerAuth.h"
#include "AArch64RegisterInfo.h"
#include "AArch64SelectionDAGInfo.h"
#include "llvm/CodeGen/GlobalISel/CallLowering.h"
#include "llvm/CodeGen/GlobalISel/InlineAsmLowering.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
#include "llvm/CodeGen/RegisterBankInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/TargetParser/Triple.h"

#define GET_SUBTARGETINFO_HEADER
#include "AArch64GenSubtargetInfo.inc"

namespace llvm {
class GlobalValue;
class StringRef;

class AArch64Subtarget final : public AArch64GenSubtargetInfo {
public:
  enum ARMProcFamilyEnum : uint8_t {
    Generic,
#define ARM_PROCESSOR_FAMILY(ENUM) ENUM,
#include "llvm/TargetParser/AArch64TargetParserDef.inc"
#undef ARM_PROCESSOR_FAMILY
  };

protected:
  /// ARMProcFamily - ARM processor family: Cortex-A53, Cortex-A57, and others.
  ARMProcFamilyEnum ARMProcFamily = Generic;

  // Enable 64-bit vectorization in SLP.
  unsigned MinVectorRegisterBitWidth = 64;

// Bool members corresponding to the SubtargetFeatures defined in tablegen
#define GET_SUBTARGETINFO_MACRO(ATTRIBUTE, DEFAULT, GETTER)                    \
  bool ATTRIBUTE = DEFAULT;
#include "AArch64GenSubtargetInfo.inc"

  unsigned EpilogueVectorizationMinVF = 16;
  uint8_t MaxInterleaveFactor = 2;
  uint8_t VectorInsertExtractBaseCost = 2;
  uint16_t CacheLineSize = 0;
  // Default scatter/gather overhead.
  unsigned ScatterOverhead = 10;
  unsigned GatherOverhead = 10;
  uint16_t PrefetchDistance = 0;
  uint16_t MinPrefetchStride = 1;
  unsigned MaxPrefetchIterationsAhead = UINT_MAX;
  Align PrefFunctionAlignment;
  Align PrefLoopAlignment;
  unsigned MaxBytesForLoopAlignment = 0;
  unsigned MinimumJumpTableEntries = 4;
  unsigned MaxJumpTableSize = 0;

  // ReserveXRegister[i] - X#i is not available as a general purpose register.
  BitVector ReserveXRegister;

  // ReserveXRegisterForRA[i] - X#i is not available for register allocator.
  BitVector ReserveXRegisterForRA;

  // CustomCallUsedXRegister[i] - X#i call saved.
  BitVector CustomCallSavedXRegs;

  bool IsLittle;

  bool IsStreaming;
  bool IsStreamingCompatible;
  std::optional<unsigned> StreamingHazardSize;
  unsigned MinSVEVectorSizeInBits;
  unsigned MaxSVEVectorSizeInBits;
  unsigned VScaleForTuning = 1;
  TailFoldingOpts DefaultSVETFOpts = TailFoldingOpts::Disabled;

  bool EnableSubregLiveness;

  /// TargetTriple - What processor and OS we're targeting.
  Triple TargetTriple;

  AArch64FrameLowering FrameLowering;
  AArch64InstrInfo InstrInfo;
  AArch64SelectionDAGInfo TSInfo;
  AArch64TargetLowering TLInfo;

  /// GlobalISel related APIs.
  std::unique_ptr<CallLowering> CallLoweringInfo;
  std::unique_ptr<InlineAsmLowering> InlineAsmLoweringInfo;
  std::unique_ptr<InstructionSelector> InstSelector;
  std::unique_ptr<LegalizerInfo> Legalizer;
  std::unique_ptr<RegisterBankInfo> RegBankInfo;

private:
  /// initializeSubtargetDependencies - Initializes using CPUString and the
  /// passed in feature string so that we can use initializer lists for
  /// subtarget initialization.
  AArch64Subtarget &initializeSubtargetDependencies(StringRef FS,
                                                    StringRef CPUString,
                                                    StringRef TuneCPUString,
                                                    bool HasMinSize);

  /// Initialize properties based on the selected processor family.
  void initializeProperties(bool HasMinSize);

public:
  /// This constructor initializes the data members to match that
  /// of the specified triple.
  AArch64Subtarget(const Triple &TT, StringRef CPU, StringRef TuneCPU,
                   StringRef FS, const TargetMachine &TM, bool LittleEndian,
                   unsigned MinSVEVectorSizeInBitsOverride = 0,
                   unsigned MaxSVEVectorSizeInBitsOverride = 0,
                   bool IsStreaming = false, bool IsStreamingCompatible = false,
                   bool HasMinSize = false);

  virtual unsigned getHwModeSet() const override;

// Getters for SubtargetFeatures defined in tablegen
#define GET_SUBTARGETINFO_MACRO(ATTRIBUTE, DEFAULT, GETTER)                    \
  bool GETTER() const { return ATTRIBUTE; }
#include "AArch64GenSubtargetInfo.inc"

  const AArch64SelectionDAGInfo *getSelectionDAGInfo() const override {
    return &TSInfo;
  }
  const AArch64FrameLowering *getFrameLowering() const override {
    return &FrameLowering;
  }
  const AArch64TargetLowering *getTargetLowering() const override {
    return &TLInfo;
  }
  const AArch64InstrInfo *getInstrInfo() const override { return &InstrInfo; }
  const AArch64RegisterInfo *getRegisterInfo() const override {
    return &getInstrInfo()->getRegisterInfo();
  }
  const CallLowering *getCallLowering() const override;
  const InlineAsmLowering *getInlineAsmLowering() const override;
  InstructionSelector *getInstructionSelector() const override;
  const LegalizerInfo *getLegalizerInfo() const override;
  const RegisterBankInfo *getRegBankInfo() const override;
  const Triple &getTargetTriple() const { return TargetTriple; }
  bool enableMachineScheduler() const override { return true; }
  bool enablePostRAScheduler() const override { return usePostRAScheduler(); }
  bool enableSubRegLiveness() const override { return EnableSubregLiveness; }

  bool enableMachinePipeliner() const override;
  bool useDFAforSMS() const override { return false; }

  /// Returns ARM processor family.
  /// Avoid this function! CPU specifics should be kept local to this class
  /// and preferably modeled with SubtargetFeatures or properties in
  /// initializeProperties().
  ARMProcFamilyEnum getProcFamily() const {
    return ARMProcFamily;
  }

  bool isXRaySupported() const override { return true; }

  /// Returns true if the function has a streaming body.
  bool isStreaming() const { return IsStreaming; }

  /// Returns true if the function has a streaming-compatible body.
  bool isStreamingCompatible() const { return IsStreamingCompatible; }

  /// Returns the size of memory region that if accessed by both the CPU and
  /// the SME unit could result in a hazard. 0 = disabled.
  unsigned getStreamingHazardSize() const {
    return StreamingHazardSize.value_or(
        !hasSMEFA64() && hasSME() && hasSVE() ? 1024 : 0);
  }

  /// Returns true if the target has NEON and the function at runtime is known
  /// to have NEON enabled (e.g. the function is known not to be in streaming-SVE
  /// mode, which disables NEON instructions).
  bool isNeonAvailable() const {
    return hasNEON() &&
           (hasSMEFA64() || (!isStreaming() && !isStreamingCompatible()));
  }

  /// Returns true if the target has SVE and can use the full range of SVE
  /// instructions, for example because it knows the function is known not to be
  /// in streaming-SVE mode or when the target has FEAT_FA64 enabled.
  bool isSVEAvailable() const {
    return hasSVE() &&
           (hasSMEFA64() || (!isStreaming() && !isStreamingCompatible()));
  }

  /// Returns true if the target has access to the streaming-compatible subset
  /// of SVE instructions.
  bool isStreamingSVEAvailable() const { return hasSME() && isStreaming(); }

  /// Returns true if the target has access to either the full range of SVE
  /// instructions, or the streaming-compatible subset of SVE instructions.
  bool isSVEorStreamingSVEAvailable() const {
    return hasSVE() || isStreamingSVEAvailable();
  }

  /// Returns true if the target has access to either the full range of SVE
  /// instructions, or the streaming-compatible subset of SVE instructions
  /// available to SME2.
  bool isNonStreamingSVEorSME2Available() const {
    return isSVEAvailable() || (isSVEorStreamingSVEAvailable() && hasSME2());
  }

  unsigned getMinVectorRegisterBitWidth() const {
    // Don't assume any minimum vector size when PSTATE.SM may not be 0, because
    // we don't yet support streaming-compatible codegen support that we trust
    // is safe for functions that may be executed in streaming-SVE mode.
    // By returning '0' here, we disable vectorization.
    if (!isSVEAvailable() && !isNeonAvailable())
      return 0;
    return MinVectorRegisterBitWidth;
  }

  bool isXRegisterReserved(size_t i) const { return ReserveXRegister[i]; }
  bool isXRegisterReservedForRA(size_t i) const { return ReserveXRegisterForRA[i]; }
  unsigned getNumXRegisterReserved() const {
    BitVector AllReservedX(AArch64::GPR64commonRegClass.getNumRegs());
    AllReservedX |= ReserveXRegister;
    AllReservedX |= ReserveXRegisterForRA;
    return AllReservedX.count();
  }
  bool isLRReservedForRA() const { return ReserveLRForRA; }
  bool isXRegCustomCalleeSaved(size_t i) const {
    return CustomCallSavedXRegs[i];
  }
  bool hasCustomCallingConv() const { return CustomCallSavedXRegs.any(); }

  /// Return true if the CPU supports any kind of instruction fusion.
  bool hasFusion() const {
    return hasArithmeticBccFusion() || hasArithmeticCbzFusion() ||
           hasFuseAES() || hasFuseArithmeticLogic() || hasFuseCCSelect() ||
           hasFuseAdrpAdd() || hasFuseLiterals();
  }

  unsigned getEpilogueVectorizationMinVF() const {
    return EpilogueVectorizationMinVF;
  }
  unsigned getMaxInterleaveFactor() const { return MaxInterleaveFactor; }
  unsigned getVectorInsertExtractBaseCost() const;
  unsigned getCacheLineSize() const override { return CacheLineSize; }
  unsigned getScatterOverhead() const { return ScatterOverhead; }
  unsigned getGatherOverhead() const { return GatherOverhead; }
  unsigned getPrefetchDistance() const override { return PrefetchDistance; }
  unsigned getMinPrefetchStride(unsigned NumMemAccesses,
                                unsigned NumStridedMemAccesses,
                                unsigned NumPrefetches,
                                bool HasCall) const override {
    return MinPrefetchStride;
  }
  unsigned getMaxPrefetchIterationsAhead() const override {
    return MaxPrefetchIterationsAhead;
  }
  Align getPrefFunctionAlignment() const {
    return PrefFunctionAlignment;
  }
  Align getPrefLoopAlignment() const { return PrefLoopAlignment; }

  unsigned getMaxBytesForLoopAlignment() const {
    return MaxBytesForLoopAlignment;
  }

  unsigned getMaximumJumpTableSize() const { return MaxJumpTableSize; }
  unsigned getMinimumJumpTableEntries() const {
    return MinimumJumpTableEntries;
  }

  /// CPU has TBI (top byte of addresses is ignored during HW address
  /// translation) and OS enables it.
  bool supportsAddressTopByteIgnored() const;

  bool isLittleEndian() const { return IsLittle; }

  bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); }
  bool isTargetIOS() const { return TargetTriple.isiOS(); }
  bool isTargetLinux() const { return TargetTriple.isOSLinux(); }
  bool isTargetWindows() const { return TargetTriple.isOSWindows(); }
  bool isTargetAndroid() const { return TargetTriple.isAndroid(); }
  bool isTargetFuchsia() const { return TargetTriple.isOSFuchsia(); }
  bool isWindowsArm64EC() const { return TargetTriple.isWindowsArm64EC(); }

  bool isTargetCOFF() const { return TargetTriple.isOSBinFormatCOFF(); }
  bool isTargetELF() const { return TargetTriple.isOSBinFormatELF(); }
  bool isTargetMachO() const { return TargetTriple.isOSBinFormatMachO(); }

  bool isTargetILP32() const {
    return TargetTriple.isArch32Bit() ||
           TargetTriple.getEnvironment() == Triple::GNUILP32;
  }

  bool useAA() const override;

  bool addrSinkUsingGEPs() const override {
    // Keeping GEPs inbounds is important for exploiting AArch64
    // addressing-modes in ILP32 mode.
    return useAA() || isTargetILP32();
  }

  bool useSmallAddressing() const {
    switch (TLInfo.getTargetMachine().getCodeModel()) {
      case CodeModel::Kernel:
        // Kernel is currently allowed only for Fuchsia targets,
        // where it is the same as Small for almost all purposes.
      case CodeModel::Small:
        return true;
      default:
        return false;
    }
  }

  /// Returns whether the operating system makes it safer to store sensitive
  /// values in x16 and x17 as opposed to other registers.
  bool isX16X17Safer() const;

  /// ParseSubtargetFeatures - Parses features string setting specified
  /// subtarget options.  Definition of function is auto generated by tblgen.
  void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS);

  /// ClassifyGlobalReference - Find the target operand flags that describe
  /// how a global value should be referenced for the current subtarget.
  unsigned ClassifyGlobalReference(const GlobalValue *GV,
                                   const TargetMachine &TM) const;

  unsigned classifyGlobalFunctionReference(const GlobalValue *GV,
                                           const TargetMachine &TM) const;

  /// This function is design to compatible with the function def in other
  /// targets and escape build error about the virtual function def in base
  /// class TargetSubtargetInfo. Updeate me if AArch64 target need to use it.
  unsigned char
  classifyGlobalFunctionReference(const GlobalValue *GV) const override {
    return 0;
  }

  void overrideSchedPolicy(MachineSchedPolicy &Policy,
                           const SchedRegion &Region) const override;

  void adjustSchedDependency(SUnit *Def, int DefOpIdx, SUnit *Use, int UseOpIdx,
                             SDep &Dep,
                             const TargetSchedModel *SchedModel) const override;

  bool enableEarlyIfConversion() const override;

  std::unique_ptr<PBQPRAConstraint> getCustomPBQPConstraints() const override;

  bool isCallingConvWin64(CallingConv::ID CC, bool IsVarArg) const {
    switch (CC) {
    case CallingConv::C:
    case CallingConv::Fast:
    case CallingConv::Swift:
    case CallingConv::SwiftTail:
      return isTargetWindows();
    case CallingConv::PreserveNone:
      return IsVarArg && isTargetWindows();
    case CallingConv::Win64:
      return true;
    default:
      return false;
    }
  }

  /// Return whether FrameLowering should always set the "extended frame
  /// present" bit in FP, or set it based on a symbol in the runtime.
  bool swiftAsyncContextIsDynamicallySet() const {
    // Older OS versions (particularly system unwinders) are confused by the
    // Swift extended frame, so when building code that might be run on them we
    // must dynamically query the concurrency library to determine whether
    // extended frames should be flagged as present.
    const Triple &TT = getTargetTriple();

    unsigned Major = TT.getOSVersion().getMajor();
    switch(TT.getOS()) {
    default:
      return false;
    case Triple::IOS:
    case Triple::TvOS:
      return Major < 15;
    case Triple::WatchOS:
      return Major < 8;
    case Triple::MacOSX:
    case Triple::Darwin:
      return Major < 12;
    }
  }

  void mirFileLoaded(MachineFunction &MF) const override;

  // Return the known range for the bit length of SVE data registers. A value
  // of 0 means nothing is known about that particular limit beyond what's
  // implied by the architecture.
  unsigned getMaxSVEVectorSizeInBits() const {
    assert(isSVEorStreamingSVEAvailable() &&
           "Tried to get SVE vector length without SVE support!");
    return MaxSVEVectorSizeInBits;
  }

  unsigned getMinSVEVectorSizeInBits() const {
    assert(isSVEorStreamingSVEAvailable() &&
           "Tried to get SVE vector length without SVE support!");
    return MinSVEVectorSizeInBits;
  }

  // Return the known bit length of SVE data registers. A value of 0 means the
  // length is unknown beyond what's implied by the architecture.
  unsigned getSVEVectorSizeInBits() const {
    assert(isSVEorStreamingSVEAvailable() &&
           "Tried to get SVE vector length without SVE support!");
    if (MinSVEVectorSizeInBits == MaxSVEVectorSizeInBits)
      return MaxSVEVectorSizeInBits;
    return 0;
  }

  bool useSVEForFixedLengthVectors() const {
    if (!isSVEorStreamingSVEAvailable())
      return false;

    // Prefer NEON unless larger SVE registers are available.
    return !isNeonAvailable() || getMinSVEVectorSizeInBits() >= 256;
  }

  bool useSVEForFixedLengthVectors(EVT VT) const {
    if (!useSVEForFixedLengthVectors() || !VT.isFixedLengthVector())
      return false;
    return VT.getFixedSizeInBits() > AArch64::SVEBitsPerBlock ||
           !isNeonAvailable();
  }

  unsigned getVScaleForTuning() const { return VScaleForTuning; }

  TailFoldingOpts getSVETailFoldingDefaultOpts() const {
    return DefaultSVETFOpts;
  }

  /// Returns true to use the addvl/inc/dec instructions, as opposed to separate
  /// add + cnt instructions.
  bool useScalarIncVL() const;

  const char* getChkStkName() const {
    if (isWindowsArm64EC())
      return "#__chkstk_arm64ec";
    return "__chkstk";
  }

  /// Choose a method of checking LR before performing a tail call.
  AArch64PAuth::AuthCheckMethod
  getAuthenticatedLRCheckMethod(const MachineFunction &MF) const;

  /// Compute the integer discriminator for a given BlockAddress constant, if
  /// blockaddress signing is enabled, or std::nullopt otherwise.
  /// Blockaddress signing is controlled by the function attribute
  /// "ptrauth-indirect-gotos" on the parent function.
  /// Note that this assumes the discriminator is independent of the indirect
  /// goto branch site itself, i.e., it's the same for all BlockAddresses in
  /// a function.
  std::optional<uint16_t>
  getPtrAuthBlockAddressDiscriminatorIfEnabled(const Function &ParentFn) const;
};
} // End llvm namespace

#endif