llvm-project/llvm/lib/Target/AArch64/AArch64FastISel.cpp
Ahmed Bougacha 0df1a52852
[AArch64][FastISel] Fallback on atomic stlr/cas with non-reg operands. (#133987)
This has been a latent bug for almost 10 years, but is relatively hard
to trigger, needing an address operand that isn't handled by
getRegForValue (in the test here, constexpr casts). When that happens,
it returns 0, which FastISel happily uses as a register operand, all the
way to asm, where we either get a crash on an invalid register, or a
silently corrupt instruction.

Unfortunately, FastISel is still enabled at -O0 for at least
ILP32/arm64_32.
2025-05-08 14:29:24 -07:00

5209 lines
167 KiB
C++

//===- AArch6464FastISel.cpp - AArch64 FastISel implementation ------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file defines the AArch64-specific support for the FastISel class. Some
// of the target-specific code is generated by tablegen in the file
// AArch64GenFastISel.inc, which is #included here.
//
//===----------------------------------------------------------------------===//
#include "AArch64.h"
#include "AArch64CallingConvention.h"
#include "AArch64MachineFunctionInfo.h"
#include "AArch64RegisterInfo.h"
#include "AArch64Subtarget.h"
#include "MCTargetDesc/AArch64AddressingModes.h"
#include "Utils/AArch64BaseInfo.h"
#include "Utils/AArch64SMEAttributes.h"
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/FastISel.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/CodeGenTypes/MachineValueType.h"
#include "llvm/IR/Argument.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GetElementPtrTypeIterator.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/IntrinsicsAArch64.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Operator.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
#include "llvm/MC/MCInstrDesc.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/AtomicOrdering.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CodeGen.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
#include <iterator>
#include <utility>
using namespace llvm;
namespace {
class AArch64FastISel final : public FastISel {
class Address {
public:
using BaseKind = enum {
RegBase,
FrameIndexBase
};
private:
BaseKind Kind = RegBase;
AArch64_AM::ShiftExtendType ExtType = AArch64_AM::InvalidShiftExtend;
union {
unsigned Reg;
int FI;
} Base;
Register OffsetReg;
unsigned Shift = 0;
int64_t Offset = 0;
const GlobalValue *GV = nullptr;
public:
Address() { Base.Reg = 0; }
void setKind(BaseKind K) { Kind = K; }
BaseKind getKind() const { return Kind; }
void setExtendType(AArch64_AM::ShiftExtendType E) { ExtType = E; }
AArch64_AM::ShiftExtendType getExtendType() const { return ExtType; }
bool isRegBase() const { return Kind == RegBase; }
bool isFIBase() const { return Kind == FrameIndexBase; }
void setReg(Register Reg) {
assert(isRegBase() && "Invalid base register access!");
Base.Reg = Reg.id();
}
Register getReg() const {
assert(isRegBase() && "Invalid base register access!");
return Base.Reg;
}
void setOffsetReg(Register Reg) { OffsetReg = Reg; }
Register getOffsetReg() const { return OffsetReg; }
void setFI(unsigned FI) {
assert(isFIBase() && "Invalid base frame index access!");
Base.FI = FI;
}
unsigned getFI() const {
assert(isFIBase() && "Invalid base frame index access!");
return Base.FI;
}
void setOffset(int64_t O) { Offset = O; }
int64_t getOffset() { return Offset; }
void setShift(unsigned S) { Shift = S; }
unsigned getShift() { return Shift; }
void setGlobalValue(const GlobalValue *G) { GV = G; }
const GlobalValue *getGlobalValue() { return GV; }
};
/// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
/// make the right decision when generating code for different targets.
const AArch64Subtarget *Subtarget;
LLVMContext *Context;
bool fastLowerArguments() override;
bool fastLowerCall(CallLoweringInfo &CLI) override;
bool fastLowerIntrinsicCall(const IntrinsicInst *II) override;
private:
// Selection routines.
bool selectAddSub(const Instruction *I);
bool selectLogicalOp(const Instruction *I);
bool selectLoad(const Instruction *I);
bool selectStore(const Instruction *I);
bool selectBranch(const Instruction *I);
bool selectIndirectBr(const Instruction *I);
bool selectCmp(const Instruction *I);
bool selectSelect(const Instruction *I);
bool selectFPExt(const Instruction *I);
bool selectFPTrunc(const Instruction *I);
bool selectFPToInt(const Instruction *I, bool Signed);
bool selectIntToFP(const Instruction *I, bool Signed);
bool selectRem(const Instruction *I, unsigned ISDOpcode);
bool selectRet(const Instruction *I);
bool selectTrunc(const Instruction *I);
bool selectIntExt(const Instruction *I);
bool selectMul(const Instruction *I);
bool selectShift(const Instruction *I);
bool selectBitCast(const Instruction *I);
bool selectFRem(const Instruction *I);
bool selectSDiv(const Instruction *I);
bool selectGetElementPtr(const Instruction *I);
bool selectAtomicCmpXchg(const AtomicCmpXchgInst *I);
// Utility helper routines.
bool isTypeLegal(Type *Ty, MVT &VT);
bool isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed = false);
bool isValueAvailable(const Value *V) const;
bool computeAddress(const Value *Obj, Address &Addr, Type *Ty = nullptr);
bool computeCallAddress(const Value *V, Address &Addr);
bool simplifyAddress(Address &Addr, MVT VT);
void addLoadStoreOperands(Address &Addr, const MachineInstrBuilder &MIB,
MachineMemOperand::Flags Flags,
unsigned ScaleFactor, MachineMemOperand *MMO);
bool isMemCpySmall(uint64_t Len, MaybeAlign Alignment);
bool tryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len,
MaybeAlign Alignment);
bool foldXALUIntrinsic(AArch64CC::CondCode &CC, const Instruction *I,
const Value *Cond);
bool optimizeIntExtLoad(const Instruction *I, MVT RetVT, MVT SrcVT);
bool optimizeSelect(const SelectInst *SI);
Register getRegForGEPIndex(const Value *Idx);
// Emit helper routines.
Register emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
const Value *RHS, bool SetFlags = false,
bool WantResult = true, bool IsZExt = false);
Register emitAddSub_rr(bool UseAdd, MVT RetVT, Register LHSReg,
Register RHSReg, bool SetFlags = false,
bool WantResult = true);
Register emitAddSub_ri(bool UseAdd, MVT RetVT, Register LHSReg, uint64_t Imm,
bool SetFlags = false, bool WantResult = true);
Register emitAddSub_rs(bool UseAdd, MVT RetVT, Register LHSReg,
Register RHSReg, AArch64_AM::ShiftExtendType ShiftType,
uint64_t ShiftImm, bool SetFlags = false,
bool WantResult = true);
Register emitAddSub_rx(bool UseAdd, MVT RetVT, Register LHSReg,
Register RHSReg, AArch64_AM::ShiftExtendType ExtType,
uint64_t ShiftImm, bool SetFlags = false,
bool WantResult = true);
// Emit functions.
bool emitCompareAndBranch(const BranchInst *BI);
bool emitCmp(const Value *LHS, const Value *RHS, bool IsZExt);
bool emitICmp(MVT RetVT, const Value *LHS, const Value *RHS, bool IsZExt);
bool emitICmp_ri(MVT RetVT, Register LHSReg, uint64_t Imm);
bool emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS);
Register emitLoad(MVT VT, MVT ResultVT, Address Addr, bool WantZExt = true,
MachineMemOperand *MMO = nullptr);
bool emitStore(MVT VT, Register SrcReg, Address Addr,
MachineMemOperand *MMO = nullptr);
bool emitStoreRelease(MVT VT, Register SrcReg, Register AddrReg,
MachineMemOperand *MMO = nullptr);
Register emitIntExt(MVT SrcVT, Register SrcReg, MVT DestVT, bool isZExt);
Register emiti1Ext(Register SrcReg, MVT DestVT, bool isZExt);
Register emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
bool SetFlags = false, bool WantResult = true,
bool IsZExt = false);
Register emitAdd_ri_(MVT VT, Register Op0, int64_t Imm);
Register emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
bool SetFlags = false, bool WantResult = true,
bool IsZExt = false);
Register emitSubs_rr(MVT RetVT, Register LHSReg, Register RHSReg,
bool WantResult = true);
Register emitSubs_rs(MVT RetVT, Register LHSReg, Register RHSReg,
AArch64_AM::ShiftExtendType ShiftType, uint64_t ShiftImm,
bool WantResult = true);
Register emitLogicalOp(unsigned ISDOpc, MVT RetVT, const Value *LHS,
const Value *RHS);
Register emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT, Register LHSReg,
uint64_t Imm);
Register emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT, Register LHSReg,
Register RHSReg, uint64_t ShiftImm);
Register emitAnd_ri(MVT RetVT, Register LHSReg, uint64_t Imm);
Register emitMul_rr(MVT RetVT, Register Op0, Register Op1);
Register emitSMULL_rr(MVT RetVT, Register Op0, Register Op1);
Register emitUMULL_rr(MVT RetVT, Register Op0, Register Op1);
Register emitLSL_rr(MVT RetVT, Register Op0Reg, Register Op1Reg);
Register emitLSL_ri(MVT RetVT, MVT SrcVT, Register Op0Reg, uint64_t Imm,
bool IsZExt = true);
Register emitLSR_rr(MVT RetVT, Register Op0Reg, Register Op1Reg);
Register emitLSR_ri(MVT RetVT, MVT SrcVT, Register Op0Reg, uint64_t Imm,
bool IsZExt = true);
Register emitASR_rr(MVT RetVT, Register Op0Reg, Register Op1Reg);
Register emitASR_ri(MVT RetVT, MVT SrcVT, Register Op0Reg, uint64_t Imm,
bool IsZExt = false);
Register materializeInt(const ConstantInt *CI, MVT VT);
Register materializeFP(const ConstantFP *CFP, MVT VT);
Register materializeGV(const GlobalValue *GV);
// Call handling routines.
private:
CCAssignFn *CCAssignFnForCall(CallingConv::ID CC) const;
bool processCallArgs(CallLoweringInfo &CLI, SmallVectorImpl<MVT> &ArgVTs,
unsigned &NumBytes);
bool finishCall(CallLoweringInfo &CLI, unsigned NumBytes);
public:
// Backend specific FastISel code.
Register fastMaterializeAlloca(const AllocaInst *AI) override;
Register fastMaterializeConstant(const Constant *C) override;
Register fastMaterializeFloatZero(const ConstantFP *CF) override;
explicit AArch64FastISel(FunctionLoweringInfo &FuncInfo,
const TargetLibraryInfo *LibInfo)
: FastISel(FuncInfo, LibInfo, /*SkipTargetIndependentISel=*/true) {
Subtarget = &FuncInfo.MF->getSubtarget<AArch64Subtarget>();
Context = &FuncInfo.Fn->getContext();
}
bool fastSelectInstruction(const Instruction *I) override;
#include "AArch64GenFastISel.inc"
};
} // end anonymous namespace
/// Check if the sign-/zero-extend will be a noop.
static bool isIntExtFree(const Instruction *I) {
assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
"Unexpected integer extend instruction.");
assert(!I->getType()->isVectorTy() && I->getType()->isIntegerTy() &&
"Unexpected value type.");
bool IsZExt = isa<ZExtInst>(I);
if (const auto *LI = dyn_cast<LoadInst>(I->getOperand(0)))
if (LI->hasOneUse())
return true;
if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0)))
if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr()))
return true;
return false;
}
/// Determine the implicit scale factor that is applied by a memory
/// operation for a given value type.
static unsigned getImplicitScaleFactor(MVT VT) {
switch (VT.SimpleTy) {
default:
return 0; // invalid
case MVT::i1: // fall-through
case MVT::i8:
return 1;
case MVT::i16:
return 2;
case MVT::i32: // fall-through
case MVT::f32:
return 4;
case MVT::i64: // fall-through
case MVT::f64:
return 8;
}
}
CCAssignFn *AArch64FastISel::CCAssignFnForCall(CallingConv::ID CC) const {
if (CC == CallingConv::GHC)
return CC_AArch64_GHC;
if (CC == CallingConv::CFGuard_Check)
return CC_AArch64_Win64_CFGuard_Check;
if (Subtarget->isTargetDarwin())
return CC_AArch64_DarwinPCS;
if (Subtarget->isTargetWindows())
return CC_AArch64_Win64PCS;
return CC_AArch64_AAPCS;
}
Register AArch64FastISel::fastMaterializeAlloca(const AllocaInst *AI) {
assert(TLI.getValueType(DL, AI->getType(), true) == MVT::i64 &&
"Alloca should always return a pointer.");
// Don't handle dynamic allocas.
auto SI = FuncInfo.StaticAllocaMap.find(AI);
if (SI == FuncInfo.StaticAllocaMap.end())
return Register();
if (SI != FuncInfo.StaticAllocaMap.end()) {
Register ResultReg = createResultReg(&AArch64::GPR64spRegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADDXri),
ResultReg)
.addFrameIndex(SI->second)
.addImm(0)
.addImm(0);
return ResultReg;
}
return Register();
}
Register AArch64FastISel::materializeInt(const ConstantInt *CI, MVT VT) {
if (VT > MVT::i64)
return Register();
if (!CI->isZero())
return fastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue());
// Create a copy from the zero register to materialize a "0" value.
const TargetRegisterClass *RC = (VT == MVT::i64) ? &AArch64::GPR64RegClass
: &AArch64::GPR32RegClass;
unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
Register ResultReg = createResultReg(RC);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(TargetOpcode::COPY),
ResultReg).addReg(ZeroReg, getKillRegState(true));
return ResultReg;
}
Register AArch64FastISel::materializeFP(const ConstantFP *CFP, MVT VT) {
// Positive zero (+0.0) has to be materialized with a fmov from the zero
// register, because the immediate version of fmov cannot encode zero.
if (CFP->isNullValue())
return fastMaterializeFloatZero(CFP);
if (VT != MVT::f32 && VT != MVT::f64)
return Register();
const APFloat Val = CFP->getValueAPF();
bool Is64Bit = (VT == MVT::f64);
// This checks to see if we can use FMOV instructions to materialize
// a constant, otherwise we have to materialize via the constant pool.
int Imm =
Is64Bit ? AArch64_AM::getFP64Imm(Val) : AArch64_AM::getFP32Imm(Val);
if (Imm != -1) {
unsigned Opc = Is64Bit ? AArch64::FMOVDi : AArch64::FMOVSi;
return fastEmitInst_i(Opc, TLI.getRegClassFor(VT), Imm);
}
// For the large code model materialize the FP constant in code.
if (TM.getCodeModel() == CodeModel::Large) {
unsigned Opc1 = Is64Bit ? AArch64::MOVi64imm : AArch64::MOVi32imm;
const TargetRegisterClass *RC = Is64Bit ?
&AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
Register TmpReg = createResultReg(RC);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc1), TmpReg)
.addImm(CFP->getValueAPF().bitcastToAPInt().getZExtValue());
Register ResultReg = createResultReg(TLI.getRegClassFor(VT));
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
TII.get(TargetOpcode::COPY), ResultReg)
.addReg(TmpReg, getKillRegState(true));
return ResultReg;
}
// Materialize via constant pool. MachineConstantPool wants an explicit
// alignment.
Align Alignment = DL.getPrefTypeAlign(CFP->getType());
unsigned CPI = MCP.getConstantPoolIndex(cast<Constant>(CFP), Alignment);
Register ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADRP),
ADRPReg).addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGE);
unsigned Opc = Is64Bit ? AArch64::LDRDui : AArch64::LDRSui;
Register ResultReg = createResultReg(TLI.getRegClassFor(VT));
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), ResultReg)
.addReg(ADRPReg)
.addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
return ResultReg;
}
Register AArch64FastISel::materializeGV(const GlobalValue *GV) {
// We can't handle thread-local variables quickly yet.
if (GV->isThreadLocal())
return Register();
// MachO still uses GOT for large code-model accesses, but ELF requires
// movz/movk sequences, which FastISel doesn't handle yet.
if (!Subtarget->useSmallAddressing() && !Subtarget->isTargetMachO())
return Register();
if (FuncInfo.MF->getInfo<AArch64FunctionInfo>()->hasELFSignedGOT())
return Register();
unsigned OpFlags = Subtarget->ClassifyGlobalReference(GV, TM);
EVT DestEVT = TLI.getValueType(DL, GV->getType(), true);
if (!DestEVT.isSimple())
return Register();
Register ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
Register ResultReg;
if (OpFlags & AArch64II::MO_GOT) {
// ADRP + LDRX
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADRP),
ADRPReg)
.addGlobalAddress(GV, 0, AArch64II::MO_PAGE | OpFlags);
unsigned LdrOpc;
if (Subtarget->isTargetILP32()) {
ResultReg = createResultReg(&AArch64::GPR32RegClass);
LdrOpc = AArch64::LDRWui;
} else {
ResultReg = createResultReg(&AArch64::GPR64RegClass);
LdrOpc = AArch64::LDRXui;
}
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(LdrOpc),
ResultReg)
.addReg(ADRPReg)
.addGlobalAddress(GV, 0, AArch64II::MO_GOT | AArch64II::MO_PAGEOFF |
AArch64II::MO_NC | OpFlags);
if (!Subtarget->isTargetILP32())
return ResultReg;
// LDRWui produces a 32-bit register, but pointers in-register are 64-bits
// so we must extend the result on ILP32.
Register Result64 = createResultReg(&AArch64::GPR64RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
TII.get(TargetOpcode::SUBREG_TO_REG))
.addDef(Result64)
.addImm(0)
.addReg(ResultReg, RegState::Kill)
.addImm(AArch64::sub_32);
return Result64;
} else {
// ADRP + ADDX
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADRP),
ADRPReg)
.addGlobalAddress(GV, 0, AArch64II::MO_PAGE | OpFlags);
if (OpFlags & AArch64II::MO_TAGGED) {
// MO_TAGGED on the page indicates a tagged address. Set the tag now.
// We do so by creating a MOVK that sets bits 48-63 of the register to
// (global address + 0x100000000 - PC) >> 48. This assumes that we're in
// the small code model so we can assume a binary size of <= 4GB, which
// makes the untagged PC relative offset positive. The binary must also be
// loaded into address range [0, 2^48). Both of these properties need to
// be ensured at runtime when using tagged addresses.
//
// TODO: There is duplicate logic in AArch64ExpandPseudoInsts.cpp that
// also uses BuildMI for making an ADRP (+ MOVK) + ADD, but the operands
// are not exactly 1:1 with FastISel so we cannot easily abstract this
// out. At some point, it would be nice to find a way to not have this
// duplciate code.
Register DstReg = createResultReg(&AArch64::GPR64commonRegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::MOVKXi),
DstReg)
.addReg(ADRPReg)
.addGlobalAddress(GV, /*Offset=*/0x100000000,
AArch64II::MO_PREL | AArch64II::MO_G3)
.addImm(48);
ADRPReg = DstReg;
}
ResultReg = createResultReg(&AArch64::GPR64spRegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADDXri),
ResultReg)
.addReg(ADRPReg)
.addGlobalAddress(GV, 0,
AArch64II::MO_PAGEOFF | AArch64II::MO_NC | OpFlags)
.addImm(0);
}
return ResultReg;
}
Register AArch64FastISel::fastMaterializeConstant(const Constant *C) {
EVT CEVT = TLI.getValueType(DL, C->getType(), true);
// Only handle simple types.
if (!CEVT.isSimple())
return Register();
MVT VT = CEVT.getSimpleVT();
// arm64_32 has 32-bit pointers held in 64-bit registers. Because of that,
// 'null' pointers need to have a somewhat special treatment.
if (isa<ConstantPointerNull>(C)) {
assert(VT == MVT::i64 && "Expected 64-bit pointers");
return materializeInt(ConstantInt::get(Type::getInt64Ty(*Context), 0), VT);
}
if (const auto *CI = dyn_cast<ConstantInt>(C))
return materializeInt(CI, VT);
else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
return materializeFP(CFP, VT);
else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
return materializeGV(GV);
return Register();
}
Register AArch64FastISel::fastMaterializeFloatZero(const ConstantFP *CFP) {
assert(CFP->isNullValue() &&
"Floating-point constant is not a positive zero.");
MVT VT;
if (!isTypeLegal(CFP->getType(), VT))
return Register();
if (VT != MVT::f32 && VT != MVT::f64)
return Register();
bool Is64Bit = (VT == MVT::f64);
unsigned ZReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
unsigned Opc = Is64Bit ? AArch64::FMOVXDr : AArch64::FMOVWSr;
return fastEmitInst_r(Opc, TLI.getRegClassFor(VT), ZReg);
}
/// Check if the multiply is by a power-of-2 constant.
static bool isMulPowOf2(const Value *I) {
if (const auto *MI = dyn_cast<MulOperator>(I)) {
if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(0)))
if (C->getValue().isPowerOf2())
return true;
if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(1)))
if (C->getValue().isPowerOf2())
return true;
}
return false;
}
// Computes the address to get to an object.
bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty)
{
const User *U = nullptr;
unsigned Opcode = Instruction::UserOp1;
if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
// Don't walk into other basic blocks unless the object is an alloca from
// another block, otherwise it may not have a virtual register assigned.
if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) ||
FuncInfo.getMBB(I->getParent()) == FuncInfo.MBB) {
Opcode = I->getOpcode();
U = I;
}
} else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) {
Opcode = C->getOpcode();
U = C;
}
if (auto *Ty = dyn_cast<PointerType>(Obj->getType()))
if (Ty->getAddressSpace() > 255)
// Fast instruction selection doesn't support the special
// address spaces.
return false;
switch (Opcode) {
default:
break;
case Instruction::BitCast:
// Look through bitcasts.
return computeAddress(U->getOperand(0), Addr, Ty);
case Instruction::IntToPtr:
// Look past no-op inttoptrs.
if (TLI.getValueType(DL, U->getOperand(0)->getType()) ==
TLI.getPointerTy(DL))
return computeAddress(U->getOperand(0), Addr, Ty);
break;
case Instruction::PtrToInt:
// Look past no-op ptrtoints.
if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
return computeAddress(U->getOperand(0), Addr, Ty);
break;
case Instruction::GetElementPtr: {
Address SavedAddr = Addr;
uint64_t TmpOffset = Addr.getOffset();
// Iterate through the GEP folding the constants into offsets where
// we can.
for (gep_type_iterator GTI = gep_type_begin(U), E = gep_type_end(U);
GTI != E; ++GTI) {
const Value *Op = GTI.getOperand();
if (StructType *STy = GTI.getStructTypeOrNull()) {
const StructLayout *SL = DL.getStructLayout(STy);
unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
TmpOffset += SL->getElementOffset(Idx);
} else {
uint64_t S = GTI.getSequentialElementStride(DL);
while (true) {
if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
// Constant-offset addressing.
TmpOffset += CI->getSExtValue() * S;
break;
}
if (canFoldAddIntoGEP(U, Op)) {
// A compatible add with a constant operand. Fold the constant.
ConstantInt *CI =
cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
TmpOffset += CI->getSExtValue() * S;
// Iterate on the other operand.
Op = cast<AddOperator>(Op)->getOperand(0);
continue;
}
// Unsupported
goto unsupported_gep;
}
}
}
// Try to grab the base operand now.
Addr.setOffset(TmpOffset);
if (computeAddress(U->getOperand(0), Addr, Ty))
return true;
// We failed, restore everything and try the other options.
Addr = SavedAddr;
unsupported_gep:
break;
}
case Instruction::Alloca: {
const AllocaInst *AI = cast<AllocaInst>(Obj);
DenseMap<const AllocaInst *, int>::iterator SI =
FuncInfo.StaticAllocaMap.find(AI);
if (SI != FuncInfo.StaticAllocaMap.end()) {
Addr.setKind(Address::FrameIndexBase);
Addr.setFI(SI->second);
return true;
}
break;
}
case Instruction::Add: {
// Adds of constants are common and easy enough.
const Value *LHS = U->getOperand(0);
const Value *RHS = U->getOperand(1);
if (isa<ConstantInt>(LHS))
std::swap(LHS, RHS);
if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
Addr.setOffset(Addr.getOffset() + CI->getSExtValue());
return computeAddress(LHS, Addr, Ty);
}
Address Backup = Addr;
if (computeAddress(LHS, Addr, Ty) && computeAddress(RHS, Addr, Ty))
return true;
Addr = Backup;
break;
}
case Instruction::Sub: {
// Subs of constants are common and easy enough.
const Value *LHS = U->getOperand(0);
const Value *RHS = U->getOperand(1);
if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
Addr.setOffset(Addr.getOffset() - CI->getSExtValue());
return computeAddress(LHS, Addr, Ty);
}
break;
}
case Instruction::Shl: {
if (Addr.getOffsetReg())
break;
const auto *CI = dyn_cast<ConstantInt>(U->getOperand(1));
if (!CI)
break;
unsigned Val = CI->getZExtValue();
if (Val < 1 || Val > 3)
break;
uint64_t NumBytes = 0;
if (Ty && Ty->isSized()) {
uint64_t NumBits = DL.getTypeSizeInBits(Ty);
NumBytes = NumBits / 8;
if (!isPowerOf2_64(NumBits))
NumBytes = 0;
}
if (NumBytes != (1ULL << Val))
break;
Addr.setShift(Val);
Addr.setExtendType(AArch64_AM::LSL);
const Value *Src = U->getOperand(0);
if (const auto *I = dyn_cast<Instruction>(Src)) {
if (FuncInfo.getMBB(I->getParent()) == FuncInfo.MBB) {
// Fold the zext or sext when it won't become a noop.
if (const auto *ZE = dyn_cast<ZExtInst>(I)) {
if (!isIntExtFree(ZE) &&
ZE->getOperand(0)->getType()->isIntegerTy(32)) {
Addr.setExtendType(AArch64_AM::UXTW);
Src = ZE->getOperand(0);
}
} else if (const auto *SE = dyn_cast<SExtInst>(I)) {
if (!isIntExtFree(SE) &&
SE->getOperand(0)->getType()->isIntegerTy(32)) {
Addr.setExtendType(AArch64_AM::SXTW);
Src = SE->getOperand(0);
}
}
}
}
if (const auto *AI = dyn_cast<BinaryOperator>(Src))
if (AI->getOpcode() == Instruction::And) {
const Value *LHS = AI->getOperand(0);
const Value *RHS = AI->getOperand(1);
if (const auto *C = dyn_cast<ConstantInt>(LHS))
if (C->getValue() == 0xffffffff)
std::swap(LHS, RHS);
if (const auto *C = dyn_cast<ConstantInt>(RHS))
if (C->getValue() == 0xffffffff) {
Addr.setExtendType(AArch64_AM::UXTW);
Register Reg = getRegForValue(LHS);
if (!Reg)
return false;
Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, AArch64::sub_32);
Addr.setOffsetReg(Reg);
return true;
}
}
Register Reg = getRegForValue(Src);
if (!Reg)
return false;
Addr.setOffsetReg(Reg);
return true;
}
case Instruction::Mul: {
if (Addr.getOffsetReg())
break;
if (!isMulPowOf2(U))
break;
const Value *LHS = U->getOperand(0);
const Value *RHS = U->getOperand(1);
// Canonicalize power-of-2 value to the RHS.
if (const auto *C = dyn_cast<ConstantInt>(LHS))
if (C->getValue().isPowerOf2())
std::swap(LHS, RHS);
assert(isa<ConstantInt>(RHS) && "Expected an ConstantInt.");
const auto *C = cast<ConstantInt>(RHS);
unsigned Val = C->getValue().logBase2();
if (Val < 1 || Val > 3)
break;
uint64_t NumBytes = 0;
if (Ty && Ty->isSized()) {
uint64_t NumBits = DL.getTypeSizeInBits(Ty);
NumBytes = NumBits / 8;
if (!isPowerOf2_64(NumBits))
NumBytes = 0;
}
if (NumBytes != (1ULL << Val))
break;
Addr.setShift(Val);
Addr.setExtendType(AArch64_AM::LSL);
const Value *Src = LHS;
if (const auto *I = dyn_cast<Instruction>(Src)) {
if (FuncInfo.getMBB(I->getParent()) == FuncInfo.MBB) {
// Fold the zext or sext when it won't become a noop.
if (const auto *ZE = dyn_cast<ZExtInst>(I)) {
if (!isIntExtFree(ZE) &&
ZE->getOperand(0)->getType()->isIntegerTy(32)) {
Addr.setExtendType(AArch64_AM::UXTW);
Src = ZE->getOperand(0);
}
} else if (const auto *SE = dyn_cast<SExtInst>(I)) {
if (!isIntExtFree(SE) &&
SE->getOperand(0)->getType()->isIntegerTy(32)) {
Addr.setExtendType(AArch64_AM::SXTW);
Src = SE->getOperand(0);
}
}
}
}
Register Reg = getRegForValue(Src);
if (!Reg)
return false;
Addr.setOffsetReg(Reg);
return true;
}
case Instruction::And: {
if (Addr.getOffsetReg())
break;
if (!Ty || DL.getTypeSizeInBits(Ty) != 8)
break;
const Value *LHS = U->getOperand(0);
const Value *RHS = U->getOperand(1);
if (const auto *C = dyn_cast<ConstantInt>(LHS))
if (C->getValue() == 0xffffffff)
std::swap(LHS, RHS);
if (const auto *C = dyn_cast<ConstantInt>(RHS))
if (C->getValue() == 0xffffffff) {
Addr.setShift(0);
Addr.setExtendType(AArch64_AM::LSL);
Addr.setExtendType(AArch64_AM::UXTW);
Register Reg = getRegForValue(LHS);
if (!Reg)
return false;
Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, AArch64::sub_32);
Addr.setOffsetReg(Reg);
return true;
}
break;
}
case Instruction::SExt:
case Instruction::ZExt: {
if (!Addr.getReg() || Addr.getOffsetReg())
break;
const Value *Src = nullptr;
// Fold the zext or sext when it won't become a noop.
if (const auto *ZE = dyn_cast<ZExtInst>(U)) {
if (!isIntExtFree(ZE) && ZE->getOperand(0)->getType()->isIntegerTy(32)) {
Addr.setExtendType(AArch64_AM::UXTW);
Src = ZE->getOperand(0);
}
} else if (const auto *SE = dyn_cast<SExtInst>(U)) {
if (!isIntExtFree(SE) && SE->getOperand(0)->getType()->isIntegerTy(32)) {
Addr.setExtendType(AArch64_AM::SXTW);
Src = SE->getOperand(0);
}
}
if (!Src)
break;
Addr.setShift(0);
Register Reg = getRegForValue(Src);
if (!Reg)
return false;
Addr.setOffsetReg(Reg);
return true;
}
} // end switch
if (Addr.isRegBase() && !Addr.getReg()) {
Register Reg = getRegForValue(Obj);
if (!Reg)
return false;
Addr.setReg(Reg);
return true;
}
if (!Addr.getOffsetReg()) {
Register Reg = getRegForValue(Obj);
if (!Reg)
return false;
Addr.setOffsetReg(Reg);
return true;
}
return false;
}
bool AArch64FastISel::computeCallAddress(const Value *V, Address &Addr) {
const User *U = nullptr;
unsigned Opcode = Instruction::UserOp1;
bool InMBB = true;
if (const auto *I = dyn_cast<Instruction>(V)) {
Opcode = I->getOpcode();
U = I;
InMBB = I->getParent() == FuncInfo.MBB->getBasicBlock();
} else if (const auto *C = dyn_cast<ConstantExpr>(V)) {
Opcode = C->getOpcode();
U = C;
}
switch (Opcode) {
default: break;
case Instruction::BitCast:
// Look past bitcasts if its operand is in the same BB.
if (InMBB)
return computeCallAddress(U->getOperand(0), Addr);
break;
case Instruction::IntToPtr:
// Look past no-op inttoptrs if its operand is in the same BB.
if (InMBB &&
TLI.getValueType(DL, U->getOperand(0)->getType()) ==
TLI.getPointerTy(DL))
return computeCallAddress(U->getOperand(0), Addr);
break;
case Instruction::PtrToInt:
// Look past no-op ptrtoints if its operand is in the same BB.
if (InMBB && TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
return computeCallAddress(U->getOperand(0), Addr);
break;
}
if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
Addr.setGlobalValue(GV);
return true;
}
// If all else fails, try to materialize the value in a register.
if (!Addr.getGlobalValue()) {
Addr.setReg(getRegForValue(V));
return Addr.getReg().isValid();
}
return false;
}
bool AArch64FastISel::isTypeLegal(Type *Ty, MVT &VT) {
EVT evt = TLI.getValueType(DL, Ty, true);
if (Subtarget->isTargetILP32() && Ty->isPointerTy())
return false;
// Only handle simple types.
if (evt == MVT::Other || !evt.isSimple())
return false;
VT = evt.getSimpleVT();
// This is a legal type, but it's not something we handle in fast-isel.
if (VT == MVT::f128)
return false;
// Handle all other legal types, i.e. a register that will directly hold this
// value.
return TLI.isTypeLegal(VT);
}
/// Determine if the value type is supported by FastISel.
///
/// FastISel for AArch64 can handle more value types than are legal. This adds
/// simple value type such as i1, i8, and i16.
bool AArch64FastISel::isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed) {
if (Ty->isVectorTy() && !IsVectorAllowed)
return false;
if (isTypeLegal(Ty, VT))
return true;
// If this is a type than can be sign or zero-extended to a basic operation
// go ahead and accept it now.
if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)
return true;
return false;
}
bool AArch64FastISel::isValueAvailable(const Value *V) const {
if (!isa<Instruction>(V))
return true;
const auto *I = cast<Instruction>(V);
return FuncInfo.getMBB(I->getParent()) == FuncInfo.MBB;
}
bool AArch64FastISel::simplifyAddress(Address &Addr, MVT VT) {
if (Subtarget->isTargetILP32())
return false;
unsigned ScaleFactor = getImplicitScaleFactor(VT);
if (!ScaleFactor)
return false;
bool ImmediateOffsetNeedsLowering = false;
bool RegisterOffsetNeedsLowering = false;
int64_t Offset = Addr.getOffset();
if (((Offset < 0) || (Offset & (ScaleFactor - 1))) && !isInt<9>(Offset))
ImmediateOffsetNeedsLowering = true;
else if (Offset > 0 && !(Offset & (ScaleFactor - 1)) &&
!isUInt<12>(Offset / ScaleFactor))
ImmediateOffsetNeedsLowering = true;
// Cannot encode an offset register and an immediate offset in the same
// instruction. Fold the immediate offset into the load/store instruction and
// emit an additional add to take care of the offset register.
if (!ImmediateOffsetNeedsLowering && Addr.getOffset() && Addr.getOffsetReg())
RegisterOffsetNeedsLowering = true;
// Cannot encode zero register as base.
if (Addr.isRegBase() && Addr.getOffsetReg() && !Addr.getReg())
RegisterOffsetNeedsLowering = true;
// If this is a stack pointer and the offset needs to be simplified then put
// the alloca address into a register, set the base type back to register and
// continue. This should almost never happen.
if ((ImmediateOffsetNeedsLowering || Addr.getOffsetReg()) && Addr.isFIBase())
{
Register ResultReg = createResultReg(&AArch64::GPR64spRegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADDXri),
ResultReg)
.addFrameIndex(Addr.getFI())
.addImm(0)
.addImm(0);
Addr.setKind(Address::RegBase);
Addr.setReg(ResultReg);
}
if (RegisterOffsetNeedsLowering) {
Register ResultReg;
if (Addr.getReg()) {
if (Addr.getExtendType() == AArch64_AM::SXTW ||
Addr.getExtendType() == AArch64_AM::UXTW )
ResultReg = emitAddSub_rx(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
Addr.getOffsetReg(), Addr.getExtendType(),
Addr.getShift());
else
ResultReg = emitAddSub_rs(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
Addr.getOffsetReg(), AArch64_AM::LSL,
Addr.getShift());
} else {
if (Addr.getExtendType() == AArch64_AM::UXTW)
ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
Addr.getShift(), /*IsZExt=*/true);
else if (Addr.getExtendType() == AArch64_AM::SXTW)
ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
Addr.getShift(), /*IsZExt=*/false);
else
ResultReg = emitLSL_ri(MVT::i64, MVT::i64, Addr.getOffsetReg(),
Addr.getShift());
}
if (!ResultReg)
return false;
Addr.setReg(ResultReg);
Addr.setOffsetReg(0);
Addr.setShift(0);
Addr.setExtendType(AArch64_AM::InvalidShiftExtend);
}
// Since the offset is too large for the load/store instruction get the
// reg+offset into a register.
if (ImmediateOffsetNeedsLowering) {
Register ResultReg;
if (Addr.getReg())
// Try to fold the immediate into the add instruction.
ResultReg = emitAdd_ri_(MVT::i64, Addr.getReg(), Offset);
else
ResultReg = fastEmit_i(MVT::i64, MVT::i64, ISD::Constant, Offset);
if (!ResultReg)
return false;
Addr.setReg(ResultReg);
Addr.setOffset(0);
}
return true;
}
void AArch64FastISel::addLoadStoreOperands(Address &Addr,
const MachineInstrBuilder &MIB,
MachineMemOperand::Flags Flags,
unsigned ScaleFactor,
MachineMemOperand *MMO) {
int64_t Offset = Addr.getOffset() / ScaleFactor;
// Frame base works a bit differently. Handle it separately.
if (Addr.isFIBase()) {
int FI = Addr.getFI();
// FIXME: We shouldn't be using getObjectSize/getObjectAlignment. The size
// and alignment should be based on the VT.
MMO = FuncInfo.MF->getMachineMemOperand(
MachinePointerInfo::getFixedStack(*FuncInfo.MF, FI, Offset), Flags,
MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
// Now add the rest of the operands.
MIB.addFrameIndex(FI).addImm(Offset);
} else {
assert(Addr.isRegBase() && "Unexpected address kind.");
const MCInstrDesc &II = MIB->getDesc();
unsigned Idx = (Flags & MachineMemOperand::MOStore) ? 1 : 0;
Addr.setReg(
constrainOperandRegClass(II, Addr.getReg(), II.getNumDefs()+Idx));
Addr.setOffsetReg(
constrainOperandRegClass(II, Addr.getOffsetReg(), II.getNumDefs()+Idx+1));
if (Addr.getOffsetReg()) {
assert(Addr.getOffset() == 0 && "Unexpected offset");
bool IsSigned = Addr.getExtendType() == AArch64_AM::SXTW ||
Addr.getExtendType() == AArch64_AM::SXTX;
MIB.addReg(Addr.getReg());
MIB.addReg(Addr.getOffsetReg());
MIB.addImm(IsSigned);
MIB.addImm(Addr.getShift() != 0);
} else
MIB.addReg(Addr.getReg()).addImm(Offset);
}
if (MMO)
MIB.addMemOperand(MMO);
}
Register AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
const Value *RHS, bool SetFlags,
bool WantResult, bool IsZExt) {
AArch64_AM::ShiftExtendType ExtendType = AArch64_AM::InvalidShiftExtend;
bool NeedExtend = false;
switch (RetVT.SimpleTy) {
default:
return Register();
case MVT::i1:
NeedExtend = true;
break;
case MVT::i8:
NeedExtend = true;
ExtendType = IsZExt ? AArch64_AM::UXTB : AArch64_AM::SXTB;
break;
case MVT::i16:
NeedExtend = true;
ExtendType = IsZExt ? AArch64_AM::UXTH : AArch64_AM::SXTH;
break;
case MVT::i32: // fall-through
case MVT::i64:
break;
}
MVT SrcVT = RetVT;
RetVT.SimpleTy = std::max(RetVT.SimpleTy, MVT::i32);
// Canonicalize immediates to the RHS first.
if (UseAdd && isa<Constant>(LHS) && !isa<Constant>(RHS))
std::swap(LHS, RHS);
// Canonicalize mul by power of 2 to the RHS.
if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS))
if (isMulPowOf2(LHS))
std::swap(LHS, RHS);
// Canonicalize shift immediate to the RHS.
if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS))
if (const auto *SI = dyn_cast<BinaryOperator>(LHS))
if (isa<ConstantInt>(SI->getOperand(1)))
if (SI->getOpcode() == Instruction::Shl ||
SI->getOpcode() == Instruction::LShr ||
SI->getOpcode() == Instruction::AShr )
std::swap(LHS, RHS);
Register LHSReg = getRegForValue(LHS);
if (!LHSReg)
return Register();
if (NeedExtend)
LHSReg = emitIntExt(SrcVT, LHSReg, RetVT, IsZExt);
Register ResultReg;
if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
uint64_t Imm = IsZExt ? C->getZExtValue() : C->getSExtValue();
if (C->isNegative())
ResultReg = emitAddSub_ri(!UseAdd, RetVT, LHSReg, -Imm, SetFlags,
WantResult);
else
ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, Imm, SetFlags,
WantResult);
} else if (const auto *C = dyn_cast<Constant>(RHS))
if (C->isNullValue())
ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, 0, SetFlags, WantResult);
if (ResultReg)
return ResultReg;
// Only extend the RHS within the instruction if there is a valid extend type.
if (ExtendType != AArch64_AM::InvalidShiftExtend && RHS->hasOneUse() &&
isValueAvailable(RHS)) {
Register RHSReg = getRegForValue(RHS);
if (!RHSReg)
return Register();
return emitAddSub_rx(UseAdd, RetVT, LHSReg, RHSReg, ExtendType, 0,
SetFlags, WantResult);
}
// Check if the mul can be folded into the instruction.
if (RHS->hasOneUse() && isValueAvailable(RHS)) {
if (isMulPowOf2(RHS)) {
const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
if (const auto *C = dyn_cast<ConstantInt>(MulLHS))
if (C->getValue().isPowerOf2())
std::swap(MulLHS, MulRHS);
assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
Register RHSReg = getRegForValue(MulLHS);
if (!RHSReg)
return Register();
ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, RHSReg, AArch64_AM::LSL,
ShiftVal, SetFlags, WantResult);
if (ResultReg)
return ResultReg;
}
}
// Check if the shift can be folded into the instruction.
if (RHS->hasOneUse() && isValueAvailable(RHS)) {
if (const auto *SI = dyn_cast<BinaryOperator>(RHS)) {
if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
AArch64_AM::ShiftExtendType ShiftType = AArch64_AM::InvalidShiftExtend;
switch (SI->getOpcode()) {
default: break;
case Instruction::Shl: ShiftType = AArch64_AM::LSL; break;
case Instruction::LShr: ShiftType = AArch64_AM::LSR; break;
case Instruction::AShr: ShiftType = AArch64_AM::ASR; break;
}
uint64_t ShiftVal = C->getZExtValue();
if (ShiftType != AArch64_AM::InvalidShiftExtend) {
Register RHSReg = getRegForValue(SI->getOperand(0));
if (!RHSReg)
return Register();
ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, RHSReg, ShiftType,
ShiftVal, SetFlags, WantResult);
if (ResultReg)
return ResultReg;
}
}
}
}
Register RHSReg = getRegForValue(RHS);
if (!RHSReg)
return Register();
if (NeedExtend)
RHSReg = emitIntExt(SrcVT, RHSReg, RetVT, IsZExt);
return emitAddSub_rr(UseAdd, RetVT, LHSReg, RHSReg, SetFlags, WantResult);
}
Register AArch64FastISel::emitAddSub_rr(bool UseAdd, MVT RetVT, Register LHSReg,
Register RHSReg, bool SetFlags,
bool WantResult) {
assert(LHSReg && RHSReg && "Invalid register number.");
if (LHSReg == AArch64::SP || LHSReg == AArch64::WSP ||
RHSReg == AArch64::SP || RHSReg == AArch64::WSP)
return Register();
if (RetVT != MVT::i32 && RetVT != MVT::i64)
return Register();
static const unsigned OpcTable[2][2][2] = {
{ { AArch64::SUBWrr, AArch64::SUBXrr },
{ AArch64::ADDWrr, AArch64::ADDXrr } },
{ { AArch64::SUBSWrr, AArch64::SUBSXrr },
{ AArch64::ADDSWrr, AArch64::ADDSXrr } }
};
bool Is64Bit = RetVT == MVT::i64;
unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
const TargetRegisterClass *RC =
Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
Register ResultReg;
if (WantResult)
ResultReg = createResultReg(RC);
else
ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
const MCInstrDesc &II = TII.get(Opc);
LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg)
.addReg(LHSReg)
.addReg(RHSReg);
return ResultReg;
}
Register AArch64FastISel::emitAddSub_ri(bool UseAdd, MVT RetVT, Register LHSReg,
uint64_t Imm, bool SetFlags,
bool WantResult) {
assert(LHSReg && "Invalid register number.");
if (RetVT != MVT::i32 && RetVT != MVT::i64)
return Register();
unsigned ShiftImm;
if (isUInt<12>(Imm))
ShiftImm = 0;
else if ((Imm & 0xfff000) == Imm) {
ShiftImm = 12;
Imm >>= 12;
} else
return Register();
static const unsigned OpcTable[2][2][2] = {
{ { AArch64::SUBWri, AArch64::SUBXri },
{ AArch64::ADDWri, AArch64::ADDXri } },
{ { AArch64::SUBSWri, AArch64::SUBSXri },
{ AArch64::ADDSWri, AArch64::ADDSXri } }
};
bool Is64Bit = RetVT == MVT::i64;
unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
const TargetRegisterClass *RC;
if (SetFlags)
RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
else
RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
Register ResultReg;
if (WantResult)
ResultReg = createResultReg(RC);
else
ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
const MCInstrDesc &II = TII.get(Opc);
LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg)
.addReg(LHSReg)
.addImm(Imm)
.addImm(getShifterImm(AArch64_AM::LSL, ShiftImm));
return ResultReg;
}
Register AArch64FastISel::emitAddSub_rs(bool UseAdd, MVT RetVT, Register LHSReg,
Register RHSReg,
AArch64_AM::ShiftExtendType ShiftType,
uint64_t ShiftImm, bool SetFlags,
bool WantResult) {
assert(LHSReg && RHSReg && "Invalid register number.");
assert(LHSReg != AArch64::SP && LHSReg != AArch64::WSP &&
RHSReg != AArch64::SP && RHSReg != AArch64::WSP);
if (RetVT != MVT::i32 && RetVT != MVT::i64)
return Register();
// Don't deal with undefined shifts.
if (ShiftImm >= RetVT.getSizeInBits())
return Register();
static const unsigned OpcTable[2][2][2] = {
{ { AArch64::SUBWrs, AArch64::SUBXrs },
{ AArch64::ADDWrs, AArch64::ADDXrs } },
{ { AArch64::SUBSWrs, AArch64::SUBSXrs },
{ AArch64::ADDSWrs, AArch64::ADDSXrs } }
};
bool Is64Bit = RetVT == MVT::i64;
unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
const TargetRegisterClass *RC =
Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
Register ResultReg;
if (WantResult)
ResultReg = createResultReg(RC);
else
ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
const MCInstrDesc &II = TII.get(Opc);
LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg)
.addReg(LHSReg)
.addReg(RHSReg)
.addImm(getShifterImm(ShiftType, ShiftImm));
return ResultReg;
}
Register AArch64FastISel::emitAddSub_rx(bool UseAdd, MVT RetVT, Register LHSReg,
Register RHSReg,
AArch64_AM::ShiftExtendType ExtType,
uint64_t ShiftImm, bool SetFlags,
bool WantResult) {
assert(LHSReg && RHSReg && "Invalid register number.");
assert(LHSReg != AArch64::XZR && LHSReg != AArch64::WZR &&
RHSReg != AArch64::XZR && RHSReg != AArch64::WZR);
if (RetVT != MVT::i32 && RetVT != MVT::i64)
return Register();
if (ShiftImm >= 4)
return Register();
static const unsigned OpcTable[2][2][2] = {
{ { AArch64::SUBWrx, AArch64::SUBXrx },
{ AArch64::ADDWrx, AArch64::ADDXrx } },
{ { AArch64::SUBSWrx, AArch64::SUBSXrx },
{ AArch64::ADDSWrx, AArch64::ADDSXrx } }
};
bool Is64Bit = RetVT == MVT::i64;
unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
const TargetRegisterClass *RC = nullptr;
if (SetFlags)
RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
else
RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
Register ResultReg;
if (WantResult)
ResultReg = createResultReg(RC);
else
ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
const MCInstrDesc &II = TII.get(Opc);
LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg)
.addReg(LHSReg)
.addReg(RHSReg)
.addImm(getArithExtendImm(ExtType, ShiftImm));
return ResultReg;
}
bool AArch64FastISel::emitCmp(const Value *LHS, const Value *RHS, bool IsZExt) {
Type *Ty = LHS->getType();
EVT EVT = TLI.getValueType(DL, Ty, true);
if (!EVT.isSimple())
return false;
MVT VT = EVT.getSimpleVT();
switch (VT.SimpleTy) {
default:
return false;
case MVT::i1:
case MVT::i8:
case MVT::i16:
case MVT::i32:
case MVT::i64:
return emitICmp(VT, LHS, RHS, IsZExt);
case MVT::f32:
case MVT::f64:
return emitFCmp(VT, LHS, RHS);
}
}
bool AArch64FastISel::emitICmp(MVT RetVT, const Value *LHS, const Value *RHS,
bool IsZExt) {
return emitSub(RetVT, LHS, RHS, /*SetFlags=*/true, /*WantResult=*/false,
IsZExt)
.isValid();
}
bool AArch64FastISel::emitICmp_ri(MVT RetVT, Register LHSReg, uint64_t Imm) {
return emitAddSub_ri(/*UseAdd=*/false, RetVT, LHSReg, Imm,
/*SetFlags=*/true, /*WantResult=*/false)
.isValid();
}
bool AArch64FastISel::emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS) {
if (RetVT != MVT::f32 && RetVT != MVT::f64)
return false;
// Check to see if the 2nd operand is a constant that we can encode directly
// in the compare.
bool UseImm = false;
if (const auto *CFP = dyn_cast<ConstantFP>(RHS))
if (CFP->isZero() && !CFP->isNegative())
UseImm = true;
Register LHSReg = getRegForValue(LHS);
if (!LHSReg)
return false;
if (UseImm) {
unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDri : AArch64::FCMPSri;
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc))
.addReg(LHSReg);
return true;
}
Register RHSReg = getRegForValue(RHS);
if (!RHSReg)
return false;
unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDrr : AArch64::FCMPSrr;
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc))
.addReg(LHSReg)
.addReg(RHSReg);
return true;
}
Register AArch64FastISel::emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
bool SetFlags, bool WantResult, bool IsZExt) {
return emitAddSub(/*UseAdd=*/true, RetVT, LHS, RHS, SetFlags, WantResult,
IsZExt);
}
/// This method is a wrapper to simplify add emission.
///
/// First try to emit an add with an immediate operand using emitAddSub_ri. If
/// that fails, then try to materialize the immediate into a register and use
/// emitAddSub_rr instead.
Register AArch64FastISel::emitAdd_ri_(MVT VT, Register Op0, int64_t Imm) {
Register ResultReg;
if (Imm < 0)
ResultReg = emitAddSub_ri(false, VT, Op0, -Imm);
else
ResultReg = emitAddSub_ri(true, VT, Op0, Imm);
if (ResultReg)
return ResultReg;
Register CReg = fastEmit_i(VT, VT, ISD::Constant, Imm);
if (!CReg)
return Register();
ResultReg = emitAddSub_rr(true, VT, Op0, CReg);
return ResultReg;
}
Register AArch64FastISel::emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
bool SetFlags, bool WantResult, bool IsZExt) {
return emitAddSub(/*UseAdd=*/false, RetVT, LHS, RHS, SetFlags, WantResult,
IsZExt);
}
Register AArch64FastISel::emitSubs_rr(MVT RetVT, Register LHSReg,
Register RHSReg, bool WantResult) {
return emitAddSub_rr(/*UseAdd=*/false, RetVT, LHSReg, RHSReg,
/*SetFlags=*/true, WantResult);
}
Register AArch64FastISel::emitSubs_rs(MVT RetVT, Register LHSReg,
Register RHSReg,
AArch64_AM::ShiftExtendType ShiftType,
uint64_t ShiftImm, bool WantResult) {
return emitAddSub_rs(/*UseAdd=*/false, RetVT, LHSReg, RHSReg, ShiftType,
ShiftImm, /*SetFlags=*/true, WantResult);
}
Register AArch64FastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT,
const Value *LHS, const Value *RHS) {
// Canonicalize immediates to the RHS first.
if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS))
std::swap(LHS, RHS);
// Canonicalize mul by power-of-2 to the RHS.
if (LHS->hasOneUse() && isValueAvailable(LHS))
if (isMulPowOf2(LHS))
std::swap(LHS, RHS);
// Canonicalize shift immediate to the RHS.
if (LHS->hasOneUse() && isValueAvailable(LHS))
if (const auto *SI = dyn_cast<ShlOperator>(LHS))
if (isa<ConstantInt>(SI->getOperand(1)))
std::swap(LHS, RHS);
Register LHSReg = getRegForValue(LHS);
if (!LHSReg)
return Register();
Register ResultReg;
if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
uint64_t Imm = C->getZExtValue();
ResultReg = emitLogicalOp_ri(ISDOpc, RetVT, LHSReg, Imm);
}
if (ResultReg)
return ResultReg;
// Check if the mul can be folded into the instruction.
if (RHS->hasOneUse() && isValueAvailable(RHS)) {
if (isMulPowOf2(RHS)) {
const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
if (const auto *C = dyn_cast<ConstantInt>(MulLHS))
if (C->getValue().isPowerOf2())
std::swap(MulLHS, MulRHS);
assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
Register RHSReg = getRegForValue(MulLHS);
if (!RHSReg)
return Register();
ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, RHSReg, ShiftVal);
if (ResultReg)
return ResultReg;
}
}
// Check if the shift can be folded into the instruction.
if (RHS->hasOneUse() && isValueAvailable(RHS)) {
if (const auto *SI = dyn_cast<ShlOperator>(RHS))
if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
uint64_t ShiftVal = C->getZExtValue();
Register RHSReg = getRegForValue(SI->getOperand(0));
if (!RHSReg)
return Register();
ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, RHSReg, ShiftVal);
if (ResultReg)
return ResultReg;
}
}
Register RHSReg = getRegForValue(RHS);
if (!RHSReg)
return Register();
MVT VT = std::max(MVT::i32, RetVT.SimpleTy);
ResultReg = fastEmit_rr(VT, VT, ISDOpc, LHSReg, RHSReg);
if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
}
return ResultReg;
}
Register AArch64FastISel::emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT,
Register LHSReg, uint64_t Imm) {
static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR),
"ISD nodes are not consecutive!");
static const unsigned OpcTable[3][2] = {
{ AArch64::ANDWri, AArch64::ANDXri },
{ AArch64::ORRWri, AArch64::ORRXri },
{ AArch64::EORWri, AArch64::EORXri }
};
const TargetRegisterClass *RC;
unsigned Opc;
unsigned RegSize;
switch (RetVT.SimpleTy) {
default:
return Register();
case MVT::i1:
case MVT::i8:
case MVT::i16:
case MVT::i32: {
unsigned Idx = ISDOpc - ISD::AND;
Opc = OpcTable[Idx][0];
RC = &AArch64::GPR32spRegClass;
RegSize = 32;
break;
}
case MVT::i64:
Opc = OpcTable[ISDOpc - ISD::AND][1];
RC = &AArch64::GPR64spRegClass;
RegSize = 64;
break;
}
if (!AArch64_AM::isLogicalImmediate(Imm, RegSize))
return Register();
Register ResultReg =
fastEmitInst_ri(Opc, RC, LHSReg,
AArch64_AM::encodeLogicalImmediate(Imm, RegSize));
if (RetVT >= MVT::i8 && RetVT <= MVT::i16 && ISDOpc != ISD::AND) {
uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
}
return ResultReg;
}
Register AArch64FastISel::emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT,
Register LHSReg, Register RHSReg,
uint64_t ShiftImm) {
static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR),
"ISD nodes are not consecutive!");
static const unsigned OpcTable[3][2] = {
{ AArch64::ANDWrs, AArch64::ANDXrs },
{ AArch64::ORRWrs, AArch64::ORRXrs },
{ AArch64::EORWrs, AArch64::EORXrs }
};
// Don't deal with undefined shifts.
if (ShiftImm >= RetVT.getSizeInBits())
return Register();
const TargetRegisterClass *RC;
unsigned Opc;
switch (RetVT.SimpleTy) {
default:
return Register();
case MVT::i1:
case MVT::i8:
case MVT::i16:
case MVT::i32:
Opc = OpcTable[ISDOpc - ISD::AND][0];
RC = &AArch64::GPR32RegClass;
break;
case MVT::i64:
Opc = OpcTable[ISDOpc - ISD::AND][1];
RC = &AArch64::GPR64RegClass;
break;
}
Register ResultReg =
fastEmitInst_rri(Opc, RC, LHSReg, RHSReg,
AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftImm));
if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
}
return ResultReg;
}
Register AArch64FastISel::emitAnd_ri(MVT RetVT, Register LHSReg, uint64_t Imm) {
return emitLogicalOp_ri(ISD::AND, RetVT, LHSReg, Imm);
}
Register AArch64FastISel::emitLoad(MVT VT, MVT RetVT, Address Addr,
bool WantZExt, MachineMemOperand *MMO) {
if (!TLI.allowsMisalignedMemoryAccesses(VT))
return Register();
// Simplify this down to something we can handle.
if (!simplifyAddress(Addr, VT))
return Register();
unsigned ScaleFactor = getImplicitScaleFactor(VT);
if (!ScaleFactor)
llvm_unreachable("Unexpected value type.");
// Negative offsets require unscaled, 9-bit, signed immediate offsets.
// Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
bool UseScaled = true;
if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
UseScaled = false;
ScaleFactor = 1;
}
static const unsigned GPOpcTable[2][8][4] = {
// Sign-extend.
{ { AArch64::LDURSBWi, AArch64::LDURSHWi, AArch64::LDURWi,
AArch64::LDURXi },
{ AArch64::LDURSBXi, AArch64::LDURSHXi, AArch64::LDURSWi,
AArch64::LDURXi },
{ AArch64::LDRSBWui, AArch64::LDRSHWui, AArch64::LDRWui,
AArch64::LDRXui },
{ AArch64::LDRSBXui, AArch64::LDRSHXui, AArch64::LDRSWui,
AArch64::LDRXui },
{ AArch64::LDRSBWroX, AArch64::LDRSHWroX, AArch64::LDRWroX,
AArch64::LDRXroX },
{ AArch64::LDRSBXroX, AArch64::LDRSHXroX, AArch64::LDRSWroX,
AArch64::LDRXroX },
{ AArch64::LDRSBWroW, AArch64::LDRSHWroW, AArch64::LDRWroW,
AArch64::LDRXroW },
{ AArch64::LDRSBXroW, AArch64::LDRSHXroW, AArch64::LDRSWroW,
AArch64::LDRXroW }
},
// Zero-extend.
{ { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi,
AArch64::LDURXi },
{ AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi,
AArch64::LDURXi },
{ AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui,
AArch64::LDRXui },
{ AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui,
AArch64::LDRXui },
{ AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX,
AArch64::LDRXroX },
{ AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX,
AArch64::LDRXroX },
{ AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW,
AArch64::LDRXroW },
{ AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW,
AArch64::LDRXroW }
}
};
static const unsigned FPOpcTable[4][2] = {
{ AArch64::LDURSi, AArch64::LDURDi },
{ AArch64::LDRSui, AArch64::LDRDui },
{ AArch64::LDRSroX, AArch64::LDRDroX },
{ AArch64::LDRSroW, AArch64::LDRDroW }
};
unsigned Opc;
const TargetRegisterClass *RC;
bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
Addr.getOffsetReg();
unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
if (Addr.getExtendType() == AArch64_AM::UXTW ||
Addr.getExtendType() == AArch64_AM::SXTW)
Idx++;
bool IsRet64Bit = RetVT == MVT::i64;
switch (VT.SimpleTy) {
default:
llvm_unreachable("Unexpected value type.");
case MVT::i1: // Intentional fall-through.
case MVT::i8:
Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][0];
RC = (IsRet64Bit && !WantZExt) ?
&AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
break;
case MVT::i16:
Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][1];
RC = (IsRet64Bit && !WantZExt) ?
&AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
break;
case MVT::i32:
Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][2];
RC = (IsRet64Bit && !WantZExt) ?
&AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
break;
case MVT::i64:
Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][3];
RC = &AArch64::GPR64RegClass;
break;
case MVT::f32:
Opc = FPOpcTable[Idx][0];
RC = &AArch64::FPR32RegClass;
break;
case MVT::f64:
Opc = FPOpcTable[Idx][1];
RC = &AArch64::FPR64RegClass;
break;
}
// Create the base instruction, then add the operands.
Register ResultReg = createResultReg(RC);
MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
TII.get(Opc), ResultReg);
addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOLoad, ScaleFactor, MMO);
// Loading an i1 requires special handling.
if (VT == MVT::i1) {
Register ANDReg = emitAnd_ri(MVT::i32, ResultReg, 1);
assert(ANDReg && "Unexpected AND instruction emission failure.");
ResultReg = ANDReg;
}
// For zero-extending loads to 64bit we emit a 32bit load and then convert
// the 32bit reg to a 64bit reg.
if (WantZExt && RetVT == MVT::i64 && VT <= MVT::i32) {
Register Reg64 = createResultReg(&AArch64::GPR64RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
TII.get(AArch64::SUBREG_TO_REG), Reg64)
.addImm(0)
.addReg(ResultReg, getKillRegState(true))
.addImm(AArch64::sub_32);
ResultReg = Reg64;
}
return ResultReg;
}
bool AArch64FastISel::selectAddSub(const Instruction *I) {
MVT VT;
if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
return false;
if (VT.isVector())
return selectOperator(I, I->getOpcode());
Register ResultReg;
switch (I->getOpcode()) {
default:
llvm_unreachable("Unexpected instruction.");
case Instruction::Add:
ResultReg = emitAdd(VT, I->getOperand(0), I->getOperand(1));
break;
case Instruction::Sub:
ResultReg = emitSub(VT, I->getOperand(0), I->getOperand(1));
break;
}
if (!ResultReg)
return false;
updateValueMap(I, ResultReg);
return true;
}
bool AArch64FastISel::selectLogicalOp(const Instruction *I) {
MVT VT;
if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
return false;
if (VT.isVector())
return selectOperator(I, I->getOpcode());
Register ResultReg;
switch (I->getOpcode()) {
default:
llvm_unreachable("Unexpected instruction.");
case Instruction::And:
ResultReg = emitLogicalOp(ISD::AND, VT, I->getOperand(0), I->getOperand(1));
break;
case Instruction::Or:
ResultReg = emitLogicalOp(ISD::OR, VT, I->getOperand(0), I->getOperand(1));
break;
case Instruction::Xor:
ResultReg = emitLogicalOp(ISD::XOR, VT, I->getOperand(0), I->getOperand(1));
break;
}
if (!ResultReg)
return false;
updateValueMap(I, ResultReg);
return true;
}
bool AArch64FastISel::selectLoad(const Instruction *I) {
MVT VT;
// Verify we have a legal type before going any further. Currently, we handle
// simple types that will directly fit in a register (i32/f32/i64/f64) or
// those that can be sign or zero-extended to a basic operation (i1/i8/i16).
if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true) ||
cast<LoadInst>(I)->isAtomic())
return false;
const Value *SV = I->getOperand(0);
if (TLI.supportSwiftError()) {
// Swifterror values can come from either a function parameter with
// swifterror attribute or an alloca with swifterror attribute.
if (const Argument *Arg = dyn_cast<Argument>(SV)) {
if (Arg->hasSwiftErrorAttr())
return false;
}
if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(SV)) {
if (Alloca->isSwiftError())
return false;
}
}
// See if we can handle this address.
Address Addr;
if (!computeAddress(I->getOperand(0), Addr, I->getType()))
return false;
// Fold the following sign-/zero-extend into the load instruction.
bool WantZExt = true;
MVT RetVT = VT;
const Value *IntExtVal = nullptr;
if (I->hasOneUse()) {
if (const auto *ZE = dyn_cast<ZExtInst>(I->use_begin()->getUser())) {
if (isTypeSupported(ZE->getType(), RetVT))
IntExtVal = ZE;
else
RetVT = VT;
} else if (const auto *SE = dyn_cast<SExtInst>(I->use_begin()->getUser())) {
if (isTypeSupported(SE->getType(), RetVT))
IntExtVal = SE;
else
RetVT = VT;
WantZExt = false;
}
}
Register ResultReg =
emitLoad(VT, RetVT, Addr, WantZExt, createMachineMemOperandFor(I));
if (!ResultReg)
return false;
// There are a few different cases we have to handle, because the load or the
// sign-/zero-extend might not be selected by FastISel if we fall-back to
// SelectionDAG. There is also an ordering issue when both instructions are in
// different basic blocks.
// 1.) The load instruction is selected by FastISel, but the integer extend
// not. This usually happens when the integer extend is in a different
// basic block and SelectionDAG took over for that basic block.
// 2.) The load instruction is selected before the integer extend. This only
// happens when the integer extend is in a different basic block.
// 3.) The load instruction is selected by SelectionDAG and the integer extend
// by FastISel. This happens if there are instructions between the load
// and the integer extend that couldn't be selected by FastISel.
if (IntExtVal) {
// The integer extend hasn't been emitted yet. FastISel or SelectionDAG
// could select it. Emit a copy to subreg if necessary. FastISel will remove
// it when it selects the integer extend.
Register Reg = lookUpRegForValue(IntExtVal);
auto *MI = MRI.getUniqueVRegDef(Reg);
if (!MI) {
if (RetVT == MVT::i64 && VT <= MVT::i32) {
if (WantZExt) {
// Delete the last emitted instruction from emitLoad (SUBREG_TO_REG).
MachineBasicBlock::iterator I(std::prev(FuncInfo.InsertPt));
ResultReg = std::prev(I)->getOperand(0).getReg();
removeDeadCode(I, std::next(I));
} else
ResultReg = fastEmitInst_extractsubreg(MVT::i32, ResultReg,
AArch64::sub_32);
}
updateValueMap(I, ResultReg);
return true;
}
// The integer extend has already been emitted - delete all the instructions
// that have been emitted by the integer extend lowering code and use the
// result from the load instruction directly.
while (MI) {
Reg = 0;
for (auto &Opnd : MI->uses()) {
if (Opnd.isReg()) {
Reg = Opnd.getReg();
break;
}
}
MachineBasicBlock::iterator I(MI);
removeDeadCode(I, std::next(I));
MI = nullptr;
if (Reg)
MI = MRI.getUniqueVRegDef(Reg);
}
updateValueMap(IntExtVal, ResultReg);
return true;
}
updateValueMap(I, ResultReg);
return true;
}
bool AArch64FastISel::emitStoreRelease(MVT VT, Register SrcReg,
Register AddrReg,
MachineMemOperand *MMO) {
unsigned Opc;
switch (VT.SimpleTy) {
default: return false;
case MVT::i8: Opc = AArch64::STLRB; break;
case MVT::i16: Opc = AArch64::STLRH; break;
case MVT::i32: Opc = AArch64::STLRW; break;
case MVT::i64: Opc = AArch64::STLRX; break;
}
const MCInstrDesc &II = TII.get(Opc);
SrcReg = constrainOperandRegClass(II, SrcReg, 0);
AddrReg = constrainOperandRegClass(II, AddrReg, 1);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II)
.addReg(SrcReg)
.addReg(AddrReg)
.addMemOperand(MMO);
return true;
}
bool AArch64FastISel::emitStore(MVT VT, Register SrcReg, Address Addr,
MachineMemOperand *MMO) {
if (!TLI.allowsMisalignedMemoryAccesses(VT))
return false;
// Simplify this down to something we can handle.
if (!simplifyAddress(Addr, VT))
return false;
unsigned ScaleFactor = getImplicitScaleFactor(VT);
if (!ScaleFactor)
llvm_unreachable("Unexpected value type.");
// Negative offsets require unscaled, 9-bit, signed immediate offsets.
// Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
bool UseScaled = true;
if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
UseScaled = false;
ScaleFactor = 1;
}
static const unsigned OpcTable[4][6] = {
{ AArch64::STURBBi, AArch64::STURHHi, AArch64::STURWi, AArch64::STURXi,
AArch64::STURSi, AArch64::STURDi },
{ AArch64::STRBBui, AArch64::STRHHui, AArch64::STRWui, AArch64::STRXui,
AArch64::STRSui, AArch64::STRDui },
{ AArch64::STRBBroX, AArch64::STRHHroX, AArch64::STRWroX, AArch64::STRXroX,
AArch64::STRSroX, AArch64::STRDroX },
{ AArch64::STRBBroW, AArch64::STRHHroW, AArch64::STRWroW, AArch64::STRXroW,
AArch64::STRSroW, AArch64::STRDroW }
};
unsigned Opc;
bool VTIsi1 = false;
bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
Addr.getOffsetReg();
unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
if (Addr.getExtendType() == AArch64_AM::UXTW ||
Addr.getExtendType() == AArch64_AM::SXTW)
Idx++;
switch (VT.SimpleTy) {
default: llvm_unreachable("Unexpected value type.");
case MVT::i1: VTIsi1 = true; [[fallthrough]];
case MVT::i8: Opc = OpcTable[Idx][0]; break;
case MVT::i16: Opc = OpcTable[Idx][1]; break;
case MVT::i32: Opc = OpcTable[Idx][2]; break;
case MVT::i64: Opc = OpcTable[Idx][3]; break;
case MVT::f32: Opc = OpcTable[Idx][4]; break;
case MVT::f64: Opc = OpcTable[Idx][5]; break;
}
// Storing an i1 requires special handling.
if (VTIsi1 && SrcReg != AArch64::WZR) {
Register ANDReg = emitAnd_ri(MVT::i32, SrcReg, 1);
assert(ANDReg && "Unexpected AND instruction emission failure.");
SrcReg = ANDReg;
}
// Create the base instruction, then add the operands.
const MCInstrDesc &II = TII.get(Opc);
SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs());
MachineInstrBuilder MIB =
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II).addReg(SrcReg);
addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOStore, ScaleFactor, MMO);
return true;
}
bool AArch64FastISel::selectStore(const Instruction *I) {
MVT VT;
const Value *Op0 = I->getOperand(0);
// Verify we have a legal type before going any further. Currently, we handle
// simple types that will directly fit in a register (i32/f32/i64/f64) or
// those that can be sign or zero-extended to a basic operation (i1/i8/i16).
if (!isTypeSupported(Op0->getType(), VT, /*IsVectorAllowed=*/true))
return false;
const Value *PtrV = I->getOperand(1);
if (TLI.supportSwiftError()) {
// Swifterror values can come from either a function parameter with
// swifterror attribute or an alloca with swifterror attribute.
if (const Argument *Arg = dyn_cast<Argument>(PtrV)) {
if (Arg->hasSwiftErrorAttr())
return false;
}
if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(PtrV)) {
if (Alloca->isSwiftError())
return false;
}
}
// Get the value to be stored into a register. Use the zero register directly
// when possible to avoid an unnecessary copy and a wasted register.
Register SrcReg;
if (const auto *CI = dyn_cast<ConstantInt>(Op0)) {
if (CI->isZero())
SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
} else if (const auto *CF = dyn_cast<ConstantFP>(Op0)) {
if (CF->isZero() && !CF->isNegative()) {
VT = MVT::getIntegerVT(VT.getSizeInBits());
SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
}
}
if (!SrcReg)
SrcReg = getRegForValue(Op0);
if (!SrcReg)
return false;
auto *SI = cast<StoreInst>(I);
// Try to emit a STLR for seq_cst/release.
if (SI->isAtomic()) {
AtomicOrdering Ord = SI->getOrdering();
// The non-atomic instructions are sufficient for relaxed stores.
if (isReleaseOrStronger(Ord)) {
// The STLR addressing mode only supports a base reg; pass that directly.
Register AddrReg = getRegForValue(PtrV);
if (!AddrReg)
return false;
return emitStoreRelease(VT, SrcReg, AddrReg,
createMachineMemOperandFor(I));
}
}
// See if we can handle this address.
Address Addr;
if (!computeAddress(PtrV, Addr, Op0->getType()))
return false;
if (!emitStore(VT, SrcReg, Addr, createMachineMemOperandFor(I)))
return false;
return true;
}
static AArch64CC::CondCode getCompareCC(CmpInst::Predicate Pred) {
switch (Pred) {
case CmpInst::FCMP_ONE:
case CmpInst::FCMP_UEQ:
default:
// AL is our "false" for now. The other two need more compares.
return AArch64CC::AL;
case CmpInst::ICMP_EQ:
case CmpInst::FCMP_OEQ:
return AArch64CC::EQ;
case CmpInst::ICMP_SGT:
case CmpInst::FCMP_OGT:
return AArch64CC::GT;
case CmpInst::ICMP_SGE:
case CmpInst::FCMP_OGE:
return AArch64CC::GE;
case CmpInst::ICMP_UGT:
case CmpInst::FCMP_UGT:
return AArch64CC::HI;
case CmpInst::FCMP_OLT:
return AArch64CC::MI;
case CmpInst::ICMP_ULE:
case CmpInst::FCMP_OLE:
return AArch64CC::LS;
case CmpInst::FCMP_ORD:
return AArch64CC::VC;
case CmpInst::FCMP_UNO:
return AArch64CC::VS;
case CmpInst::FCMP_UGE:
return AArch64CC::PL;
case CmpInst::ICMP_SLT:
case CmpInst::FCMP_ULT:
return AArch64CC::LT;
case CmpInst::ICMP_SLE:
case CmpInst::FCMP_ULE:
return AArch64CC::LE;
case CmpInst::FCMP_UNE:
case CmpInst::ICMP_NE:
return AArch64CC::NE;
case CmpInst::ICMP_UGE:
return AArch64CC::HS;
case CmpInst::ICMP_ULT:
return AArch64CC::LO;
}
}
/// Try to emit a combined compare-and-branch instruction.
bool AArch64FastISel::emitCompareAndBranch(const BranchInst *BI) {
// Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z instructions
// will not be produced, as they are conditional branch instructions that do
// not set flags.
if (FuncInfo.MF->getFunction().hasFnAttribute(
Attribute::SpeculativeLoadHardening))
return false;
assert(isa<CmpInst>(BI->getCondition()) && "Expected cmp instruction");
const CmpInst *CI = cast<CmpInst>(BI->getCondition());
CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
const Value *LHS = CI->getOperand(0);
const Value *RHS = CI->getOperand(1);
MVT VT;
if (!isTypeSupported(LHS->getType(), VT))
return false;
unsigned BW = VT.getSizeInBits();
if (BW > 64)
return false;
MachineBasicBlock *TBB = FuncInfo.getMBB(BI->getSuccessor(0));
MachineBasicBlock *FBB = FuncInfo.getMBB(BI->getSuccessor(1));
// Try to take advantage of fallthrough opportunities.
if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
std::swap(TBB, FBB);
Predicate = CmpInst::getInversePredicate(Predicate);
}
int TestBit = -1;
bool IsCmpNE;
switch (Predicate) {
default:
return false;
case CmpInst::ICMP_EQ:
case CmpInst::ICMP_NE:
if (isa<Constant>(LHS) && cast<Constant>(LHS)->isNullValue())
std::swap(LHS, RHS);
if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue())
return false;
if (const auto *AI = dyn_cast<BinaryOperator>(LHS))
if (AI->getOpcode() == Instruction::And && isValueAvailable(AI)) {
const Value *AndLHS = AI->getOperand(0);
const Value *AndRHS = AI->getOperand(1);
if (const auto *C = dyn_cast<ConstantInt>(AndLHS))
if (C->getValue().isPowerOf2())
std::swap(AndLHS, AndRHS);
if (const auto *C = dyn_cast<ConstantInt>(AndRHS))
if (C->getValue().isPowerOf2()) {
TestBit = C->getValue().logBase2();
LHS = AndLHS;
}
}
if (VT == MVT::i1)
TestBit = 0;
IsCmpNE = Predicate == CmpInst::ICMP_NE;
break;
case CmpInst::ICMP_SLT:
case CmpInst::ICMP_SGE:
if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue())
return false;
TestBit = BW - 1;
IsCmpNE = Predicate == CmpInst::ICMP_SLT;
break;
case CmpInst::ICMP_SGT:
case CmpInst::ICMP_SLE:
if (!isa<ConstantInt>(RHS))
return false;
if (cast<ConstantInt>(RHS)->getValue() != APInt(BW, -1, true))
return false;
TestBit = BW - 1;
IsCmpNE = Predicate == CmpInst::ICMP_SLE;
break;
} // end switch
static const unsigned OpcTable[2][2][2] = {
{ {AArch64::CBZW, AArch64::CBZX },
{AArch64::CBNZW, AArch64::CBNZX} },
{ {AArch64::TBZW, AArch64::TBZX },
{AArch64::TBNZW, AArch64::TBNZX} }
};
bool IsBitTest = TestBit != -1;
bool Is64Bit = BW == 64;
if (TestBit < 32 && TestBit >= 0)
Is64Bit = false;
unsigned Opc = OpcTable[IsBitTest][IsCmpNE][Is64Bit];
const MCInstrDesc &II = TII.get(Opc);
Register SrcReg = getRegForValue(LHS);
if (!SrcReg)
return false;
if (BW == 64 && !Is64Bit)
SrcReg = fastEmitInst_extractsubreg(MVT::i32, SrcReg, AArch64::sub_32);
if ((BW < 32) && !IsBitTest)
SrcReg = emitIntExt(VT, SrcReg, MVT::i32, /*isZExt=*/true);
// Emit the combined compare and branch instruction.
SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs());
MachineInstrBuilder MIB =
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc))
.addReg(SrcReg);
if (IsBitTest)
MIB.addImm(TestBit);
MIB.addMBB(TBB);
finishCondBranch(BI->getParent(), TBB, FBB);
return true;
}
bool AArch64FastISel::selectBranch(const Instruction *I) {
const BranchInst *BI = cast<BranchInst>(I);
if (BI->isUnconditional()) {
MachineBasicBlock *MSucc = FuncInfo.getMBB(BI->getSuccessor(0));
fastEmitBranch(MSucc, BI->getDebugLoc());
return true;
}
MachineBasicBlock *TBB = FuncInfo.getMBB(BI->getSuccessor(0));
MachineBasicBlock *FBB = FuncInfo.getMBB(BI->getSuccessor(1));
if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
if (CI->hasOneUse() && isValueAvailable(CI)) {
// Try to optimize or fold the cmp.
CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
switch (Predicate) {
default:
break;
case CmpInst::FCMP_FALSE:
fastEmitBranch(FBB, MIMD.getDL());
return true;
case CmpInst::FCMP_TRUE:
fastEmitBranch(TBB, MIMD.getDL());
return true;
}
// Try to emit a combined compare-and-branch first.
if (emitCompareAndBranch(BI))
return true;
// Try to take advantage of fallthrough opportunities.
if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
std::swap(TBB, FBB);
Predicate = CmpInst::getInversePredicate(Predicate);
}
// Emit the cmp.
if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
return false;
// FCMP_UEQ and FCMP_ONE cannot be checked with a single branch
// instruction.
AArch64CC::CondCode CC = getCompareCC(Predicate);
AArch64CC::CondCode ExtraCC = AArch64CC::AL;
switch (Predicate) {
default:
break;
case CmpInst::FCMP_UEQ:
ExtraCC = AArch64CC::EQ;
CC = AArch64CC::VS;
break;
case CmpInst::FCMP_ONE:
ExtraCC = AArch64CC::MI;
CC = AArch64CC::GT;
break;
}
assert((CC != AArch64CC::AL) && "Unexpected condition code.");
// Emit the extra branch for FCMP_UEQ and FCMP_ONE.
if (ExtraCC != AArch64CC::AL) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::Bcc))
.addImm(ExtraCC)
.addMBB(TBB);
}
// Emit the branch.
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::Bcc))
.addImm(CC)
.addMBB(TBB);
finishCondBranch(BI->getParent(), TBB, FBB);
return true;
}
} else if (const auto *CI = dyn_cast<ConstantInt>(BI->getCondition())) {
uint64_t Imm = CI->getZExtValue();
MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB;
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::B))
.addMBB(Target);
// Obtain the branch probability and add the target to the successor list.
if (FuncInfo.BPI) {
auto BranchProbability = FuncInfo.BPI->getEdgeProbability(
BI->getParent(), Target->getBasicBlock());
FuncInfo.MBB->addSuccessor(Target, BranchProbability);
} else
FuncInfo.MBB->addSuccessorWithoutProb(Target);
return true;
} else {
AArch64CC::CondCode CC = AArch64CC::NE;
if (foldXALUIntrinsic(CC, I, BI->getCondition())) {
// Fake request the condition, otherwise the intrinsic might be completely
// optimized away.
Register CondReg = getRegForValue(BI->getCondition());
if (!CondReg)
return false;
// Emit the branch.
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::Bcc))
.addImm(CC)
.addMBB(TBB);
finishCondBranch(BI->getParent(), TBB, FBB);
return true;
}
}
Register CondReg = getRegForValue(BI->getCondition());
if (!CondReg)
return false;
// i1 conditions come as i32 values, test the lowest bit with tb(n)z.
unsigned Opcode = AArch64::TBNZW;
if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
std::swap(TBB, FBB);
Opcode = AArch64::TBZW;
}
const MCInstrDesc &II = TII.get(Opcode);
Register ConstrainedCondReg
= constrainOperandRegClass(II, CondReg, II.getNumDefs());
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II)
.addReg(ConstrainedCondReg)
.addImm(0)
.addMBB(TBB);
finishCondBranch(BI->getParent(), TBB, FBB);
return true;
}
bool AArch64FastISel::selectIndirectBr(const Instruction *I) {
const IndirectBrInst *BI = cast<IndirectBrInst>(I);
Register AddrReg = getRegForValue(BI->getOperand(0));
if (!AddrReg)
return false;
// Authenticated indirectbr is not implemented yet.
if (FuncInfo.MF->getFunction().hasFnAttribute("ptrauth-indirect-gotos"))
return false;
// Emit the indirect branch.
const MCInstrDesc &II = TII.get(AArch64::BR);
AddrReg = constrainOperandRegClass(II, AddrReg, II.getNumDefs());
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II).addReg(AddrReg);
// Make sure the CFG is up-to-date.
for (const auto *Succ : BI->successors())
FuncInfo.MBB->addSuccessor(FuncInfo.getMBB(Succ));
return true;
}
bool AArch64FastISel::selectCmp(const Instruction *I) {
const CmpInst *CI = cast<CmpInst>(I);
// Vectors of i1 are weird: bail out.
if (CI->getType()->isVectorTy())
return false;
// Try to optimize or fold the cmp.
CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
Register ResultReg;
switch (Predicate) {
default:
break;
case CmpInst::FCMP_FALSE:
ResultReg = createResultReg(&AArch64::GPR32RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
TII.get(TargetOpcode::COPY), ResultReg)
.addReg(AArch64::WZR, getKillRegState(true));
break;
case CmpInst::FCMP_TRUE:
ResultReg = fastEmit_i(MVT::i32, MVT::i32, ISD::Constant, 1);
break;
}
if (ResultReg) {
updateValueMap(I, ResultReg);
return true;
}
// Emit the cmp.
if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
return false;
ResultReg = createResultReg(&AArch64::GPR32RegClass);
// FCMP_UEQ and FCMP_ONE cannot be checked with a single instruction. These
// condition codes are inverted, because they are used by CSINC.
static unsigned CondCodeTable[2][2] = {
{ AArch64CC::NE, AArch64CC::VC },
{ AArch64CC::PL, AArch64CC::LE }
};
unsigned *CondCodes = nullptr;
switch (Predicate) {
default:
break;
case CmpInst::FCMP_UEQ:
CondCodes = &CondCodeTable[0][0];
break;
case CmpInst::FCMP_ONE:
CondCodes = &CondCodeTable[1][0];
break;
}
if (CondCodes) {
Register TmpReg1 = createResultReg(&AArch64::GPR32RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::CSINCWr),
TmpReg1)
.addReg(AArch64::WZR, getKillRegState(true))
.addReg(AArch64::WZR, getKillRegState(true))
.addImm(CondCodes[0]);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::CSINCWr),
ResultReg)
.addReg(TmpReg1, getKillRegState(true))
.addReg(AArch64::WZR, getKillRegState(true))
.addImm(CondCodes[1]);
updateValueMap(I, ResultReg);
return true;
}
// Now set a register based on the comparison.
AArch64CC::CondCode CC = getCompareCC(Predicate);
assert((CC != AArch64CC::AL) && "Unexpected condition code.");
AArch64CC::CondCode invertedCC = getInvertedCondCode(CC);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::CSINCWr),
ResultReg)
.addReg(AArch64::WZR, getKillRegState(true))
.addReg(AArch64::WZR, getKillRegState(true))
.addImm(invertedCC);
updateValueMap(I, ResultReg);
return true;
}
/// Optimize selects of i1 if one of the operands has a 'true' or 'false'
/// value.
bool AArch64FastISel::optimizeSelect(const SelectInst *SI) {
if (!SI->getType()->isIntegerTy(1))
return false;
const Value *Src1Val, *Src2Val;
unsigned Opc = 0;
bool NeedExtraOp = false;
if (auto *CI = dyn_cast<ConstantInt>(SI->getTrueValue())) {
if (CI->isOne()) {
Src1Val = SI->getCondition();
Src2Val = SI->getFalseValue();
Opc = AArch64::ORRWrr;
} else {
assert(CI->isZero());
Src1Val = SI->getFalseValue();
Src2Val = SI->getCondition();
Opc = AArch64::BICWrr;
}
} else if (auto *CI = dyn_cast<ConstantInt>(SI->getFalseValue())) {
if (CI->isOne()) {
Src1Val = SI->getCondition();
Src2Val = SI->getTrueValue();
Opc = AArch64::ORRWrr;
NeedExtraOp = true;
} else {
assert(CI->isZero());
Src1Val = SI->getCondition();
Src2Val = SI->getTrueValue();
Opc = AArch64::ANDWrr;
}
}
if (!Opc)
return false;
Register Src1Reg = getRegForValue(Src1Val);
if (!Src1Reg)
return false;
Register Src2Reg = getRegForValue(Src2Val);
if (!Src2Reg)
return false;
if (NeedExtraOp)
Src1Reg = emitLogicalOp_ri(ISD::XOR, MVT::i32, Src1Reg, 1);
Register ResultReg = fastEmitInst_rr(Opc, &AArch64::GPR32RegClass, Src1Reg,
Src2Reg);
updateValueMap(SI, ResultReg);
return true;
}
bool AArch64FastISel::selectSelect(const Instruction *I) {
assert(isa<SelectInst>(I) && "Expected a select instruction.");
MVT VT;
if (!isTypeSupported(I->getType(), VT))
return false;
unsigned Opc;
const TargetRegisterClass *RC;
switch (VT.SimpleTy) {
default:
return false;
case MVT::i1:
case MVT::i8:
case MVT::i16:
case MVT::i32:
Opc = AArch64::CSELWr;
RC = &AArch64::GPR32RegClass;
break;
case MVT::i64:
Opc = AArch64::CSELXr;
RC = &AArch64::GPR64RegClass;
break;
case MVT::f32:
Opc = AArch64::FCSELSrrr;
RC = &AArch64::FPR32RegClass;
break;
case MVT::f64:
Opc = AArch64::FCSELDrrr;
RC = &AArch64::FPR64RegClass;
break;
}
const SelectInst *SI = cast<SelectInst>(I);
const Value *Cond = SI->getCondition();
AArch64CC::CondCode CC = AArch64CC::NE;
AArch64CC::CondCode ExtraCC = AArch64CC::AL;
if (optimizeSelect(SI))
return true;
// Try to pickup the flags, so we don't have to emit another compare.
if (foldXALUIntrinsic(CC, I, Cond)) {
// Fake request the condition to force emission of the XALU intrinsic.
Register CondReg = getRegForValue(Cond);
if (!CondReg)
return false;
} else if (isa<CmpInst>(Cond) && cast<CmpInst>(Cond)->hasOneUse() &&
isValueAvailable(Cond)) {
const auto *Cmp = cast<CmpInst>(Cond);
// Try to optimize or fold the cmp.
CmpInst::Predicate Predicate = optimizeCmpPredicate(Cmp);
const Value *FoldSelect = nullptr;
switch (Predicate) {
default:
break;
case CmpInst::FCMP_FALSE:
FoldSelect = SI->getFalseValue();
break;
case CmpInst::FCMP_TRUE:
FoldSelect = SI->getTrueValue();
break;
}
if (FoldSelect) {
Register SrcReg = getRegForValue(FoldSelect);
if (!SrcReg)
return false;
updateValueMap(I, SrcReg);
return true;
}
// Emit the cmp.
if (!emitCmp(Cmp->getOperand(0), Cmp->getOperand(1), Cmp->isUnsigned()))
return false;
// FCMP_UEQ and FCMP_ONE cannot be checked with a single select instruction.
CC = getCompareCC(Predicate);
switch (Predicate) {
default:
break;
case CmpInst::FCMP_UEQ:
ExtraCC = AArch64CC::EQ;
CC = AArch64CC::VS;
break;
case CmpInst::FCMP_ONE:
ExtraCC = AArch64CC::MI;
CC = AArch64CC::GT;
break;
}
assert((CC != AArch64CC::AL) && "Unexpected condition code.");
} else {
Register CondReg = getRegForValue(Cond);
if (!CondReg)
return false;
const MCInstrDesc &II = TII.get(AArch64::ANDSWri);
CondReg = constrainOperandRegClass(II, CondReg, 1);
// Emit a TST instruction (ANDS wzr, reg, #imm).
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II,
AArch64::WZR)
.addReg(CondReg)
.addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
}
Register Src1Reg = getRegForValue(SI->getTrueValue());
Register Src2Reg = getRegForValue(SI->getFalseValue());
if (!Src1Reg || !Src2Reg)
return false;
if (ExtraCC != AArch64CC::AL)
Src2Reg = fastEmitInst_rri(Opc, RC, Src1Reg, Src2Reg, ExtraCC);
Register ResultReg = fastEmitInst_rri(Opc, RC, Src1Reg, Src2Reg, CC);
updateValueMap(I, ResultReg);
return true;
}
bool AArch64FastISel::selectFPExt(const Instruction *I) {
Value *V = I->getOperand(0);
if (!I->getType()->isDoubleTy() || !V->getType()->isFloatTy())
return false;
Register Op = getRegForValue(V);
if (Op == 0)
return false;
Register ResultReg = createResultReg(&AArch64::FPR64RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::FCVTDSr),
ResultReg).addReg(Op);
updateValueMap(I, ResultReg);
return true;
}
bool AArch64FastISel::selectFPTrunc(const Instruction *I) {
Value *V = I->getOperand(0);
if (!I->getType()->isFloatTy() || !V->getType()->isDoubleTy())
return false;
Register Op = getRegForValue(V);
if (Op == 0)
return false;
Register ResultReg = createResultReg(&AArch64::FPR32RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::FCVTSDr),
ResultReg).addReg(Op);
updateValueMap(I, ResultReg);
return true;
}
// FPToUI and FPToSI
bool AArch64FastISel::selectFPToInt(const Instruction *I, bool Signed) {
MVT DestVT;
if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
return false;
Register SrcReg = getRegForValue(I->getOperand(0));
if (!SrcReg)
return false;
EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true);
if (SrcVT == MVT::f128 || SrcVT == MVT::f16 || SrcVT == MVT::bf16)
return false;
unsigned Opc;
if (SrcVT == MVT::f64) {
if (Signed)
Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWDr : AArch64::FCVTZSUXDr;
else
Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWDr : AArch64::FCVTZUUXDr;
} else {
if (Signed)
Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWSr : AArch64::FCVTZSUXSr;
else
Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWSr : AArch64::FCVTZUUXSr;
}
Register ResultReg = createResultReg(
DestVT == MVT::i32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), ResultReg)
.addReg(SrcReg);
updateValueMap(I, ResultReg);
return true;
}
bool AArch64FastISel::selectIntToFP(const Instruction *I, bool Signed) {
MVT DestVT;
if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
return false;
// Let regular ISEL handle FP16
if (DestVT == MVT::f16 || DestVT == MVT::bf16)
return false;
assert((DestVT == MVT::f32 || DestVT == MVT::f64) &&
"Unexpected value type.");
Register SrcReg = getRegForValue(I->getOperand(0));
if (!SrcReg)
return false;
EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true);
// Handle sign-extension.
if (SrcVT == MVT::i16 || SrcVT == MVT::i8 || SrcVT == MVT::i1) {
SrcReg =
emitIntExt(SrcVT.getSimpleVT(), SrcReg, MVT::i32, /*isZExt*/ !Signed);
if (!SrcReg)
return false;
}
unsigned Opc;
if (SrcVT == MVT::i64) {
if (Signed)
Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUXSri : AArch64::SCVTFUXDri;
else
Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUXSri : AArch64::UCVTFUXDri;
} else {
if (Signed)
Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUWSri : AArch64::SCVTFUWDri;
else
Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUWSri : AArch64::UCVTFUWDri;
}
Register ResultReg = fastEmitInst_r(Opc, TLI.getRegClassFor(DestVT), SrcReg);
updateValueMap(I, ResultReg);
return true;
}
bool AArch64FastISel::fastLowerArguments() {
if (!FuncInfo.CanLowerReturn)
return false;
const Function *F = FuncInfo.Fn;
if (F->isVarArg())
return false;
CallingConv::ID CC = F->getCallingConv();
if (CC != CallingConv::C && CC != CallingConv::Swift)
return false;
if (Subtarget->hasCustomCallingConv())
return false;
// Only handle simple cases of up to 8 GPR and FPR each.
unsigned GPRCnt = 0;
unsigned FPRCnt = 0;
for (auto const &Arg : F->args()) {
if (Arg.hasAttribute(Attribute::ByVal) ||
Arg.hasAttribute(Attribute::InReg) ||
Arg.hasAttribute(Attribute::StructRet) ||
Arg.hasAttribute(Attribute::SwiftSelf) ||
Arg.hasAttribute(Attribute::SwiftAsync) ||
Arg.hasAttribute(Attribute::SwiftError) ||
Arg.hasAttribute(Attribute::Nest))
return false;
Type *ArgTy = Arg.getType();
if (ArgTy->isStructTy() || ArgTy->isArrayTy())
return false;
EVT ArgVT = TLI.getValueType(DL, ArgTy);
if (!ArgVT.isSimple())
return false;
MVT VT = ArgVT.getSimpleVT().SimpleTy;
if (VT.isFloatingPoint() && !Subtarget->hasFPARMv8())
return false;
if (VT.isVector() &&
(!Subtarget->hasNEON() || !Subtarget->isLittleEndian()))
return false;
if (VT >= MVT::i1 && VT <= MVT::i64)
++GPRCnt;
else if ((VT >= MVT::f16 && VT <= MVT::f64) || VT.is64BitVector() ||
VT.is128BitVector())
++FPRCnt;
else
return false;
if (GPRCnt > 8 || FPRCnt > 8)
return false;
}
static const MCPhysReg Registers[6][8] = {
{ AArch64::W0, AArch64::W1, AArch64::W2, AArch64::W3, AArch64::W4,
AArch64::W5, AArch64::W6, AArch64::W7 },
{ AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3, AArch64::X4,
AArch64::X5, AArch64::X6, AArch64::X7 },
{ AArch64::H0, AArch64::H1, AArch64::H2, AArch64::H3, AArch64::H4,
AArch64::H5, AArch64::H6, AArch64::H7 },
{ AArch64::S0, AArch64::S1, AArch64::S2, AArch64::S3, AArch64::S4,
AArch64::S5, AArch64::S6, AArch64::S7 },
{ AArch64::D0, AArch64::D1, AArch64::D2, AArch64::D3, AArch64::D4,
AArch64::D5, AArch64::D6, AArch64::D7 },
{ AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3, AArch64::Q4,
AArch64::Q5, AArch64::Q6, AArch64::Q7 }
};
unsigned GPRIdx = 0;
unsigned FPRIdx = 0;
for (auto const &Arg : F->args()) {
MVT VT = TLI.getSimpleValueType(DL, Arg.getType());
unsigned SrcReg;
const TargetRegisterClass *RC;
if (VT >= MVT::i1 && VT <= MVT::i32) {
SrcReg = Registers[0][GPRIdx++];
RC = &AArch64::GPR32RegClass;
VT = MVT::i32;
} else if (VT == MVT::i64) {
SrcReg = Registers[1][GPRIdx++];
RC = &AArch64::GPR64RegClass;
} else if (VT == MVT::f16 || VT == MVT::bf16) {
SrcReg = Registers[2][FPRIdx++];
RC = &AArch64::FPR16RegClass;
} else if (VT == MVT::f32) {
SrcReg = Registers[3][FPRIdx++];
RC = &AArch64::FPR32RegClass;
} else if ((VT == MVT::f64) || VT.is64BitVector()) {
SrcReg = Registers[4][FPRIdx++];
RC = &AArch64::FPR64RegClass;
} else if (VT.is128BitVector()) {
SrcReg = Registers[5][FPRIdx++];
RC = &AArch64::FPR128RegClass;
} else
llvm_unreachable("Unexpected value type.");
Register DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC);
// FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
// Without this, EmitLiveInCopies may eliminate the livein if its only
// use is a bitcast (which isn't turned into an instruction).
Register ResultReg = createResultReg(RC);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
TII.get(TargetOpcode::COPY), ResultReg)
.addReg(DstReg, getKillRegState(true));
updateValueMap(&Arg, ResultReg);
}
return true;
}
bool AArch64FastISel::processCallArgs(CallLoweringInfo &CLI,
SmallVectorImpl<MVT> &OutVTs,
unsigned &NumBytes) {
CallingConv::ID CC = CLI.CallConv;
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(CC, false, *FuncInfo.MF, ArgLocs, *Context);
CCInfo.AnalyzeCallOperands(OutVTs, CLI.OutFlags, CCAssignFnForCall(CC));
// Get a count of how many bytes are to be pushed on the stack.
NumBytes = CCInfo.getStackSize();
// Issue CALLSEQ_START
unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AdjStackDown))
.addImm(NumBytes).addImm(0);
// Process the args.
for (CCValAssign &VA : ArgLocs) {
const Value *ArgVal = CLI.OutVals[VA.getValNo()];
MVT ArgVT = OutVTs[VA.getValNo()];
Register ArgReg = getRegForValue(ArgVal);
if (!ArgReg)
return false;
// Handle arg promotion: SExt, ZExt, AExt.
switch (VA.getLocInfo()) {
case CCValAssign::Full:
break;
case CCValAssign::SExt: {
MVT DestVT = VA.getLocVT();
MVT SrcVT = ArgVT;
ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/false);
if (!ArgReg)
return false;
break;
}
case CCValAssign::AExt:
// Intentional fall-through.
case CCValAssign::ZExt: {
MVT DestVT = VA.getLocVT();
MVT SrcVT = ArgVT;
ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/true);
if (!ArgReg)
return false;
break;
}
default:
llvm_unreachable("Unknown arg promotion!");
}
// Now copy/store arg to correct locations.
if (VA.isRegLoc() && !VA.needsCustom()) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(ArgReg);
CLI.OutRegs.push_back(VA.getLocReg());
} else if (VA.needsCustom()) {
// FIXME: Handle custom args.
return false;
} else {
assert(VA.isMemLoc() && "Assuming store on stack.");
// Don't emit stores for undef values.
if (isa<UndefValue>(ArgVal))
continue;
// Need to store on the stack.
unsigned ArgSize = (ArgVT.getSizeInBits() + 7) / 8;
unsigned BEAlign = 0;
if (ArgSize < 8 && !Subtarget->isLittleEndian())
BEAlign = 8 - ArgSize;
Address Addr;
Addr.setKind(Address::RegBase);
Addr.setReg(AArch64::SP);
Addr.setOffset(VA.getLocMemOffset() + BEAlign);
Align Alignment = DL.getABITypeAlign(ArgVal->getType());
MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
MachinePointerInfo::getStack(*FuncInfo.MF, Addr.getOffset()),
MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment);
if (!emitStore(ArgVT, ArgReg, Addr, MMO))
return false;
}
}
return true;
}
bool AArch64FastISel::finishCall(CallLoweringInfo &CLI, unsigned NumBytes) {
CallingConv::ID CC = CLI.CallConv;
// Issue CALLSEQ_END
unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AdjStackUp))
.addImm(NumBytes).addImm(0);
// Now the return values.
SmallVector<CCValAssign, 16> RVLocs;
CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context);
CCInfo.AnalyzeCallResult(CLI.Ins, CCAssignFnForCall(CC));
Register ResultReg = FuncInfo.CreateRegs(CLI.RetTy);
for (unsigned i = 0; i != RVLocs.size(); ++i) {
CCValAssign &VA = RVLocs[i];
MVT CopyVT = VA.getValVT();
Register CopyReg = ResultReg + i;
// TODO: Handle big-endian results
if (CopyVT.isVector() && !Subtarget->isLittleEndian())
return false;
// Copy result out of their specified physreg.
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(TargetOpcode::COPY),
CopyReg)
.addReg(VA.getLocReg());
CLI.InRegs.push_back(VA.getLocReg());
}
CLI.ResultReg = ResultReg;
CLI.NumResultRegs = RVLocs.size();
return true;
}
bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) {
CallingConv::ID CC = CLI.CallConv;
bool IsTailCall = CLI.IsTailCall;
bool IsVarArg = CLI.IsVarArg;
const Value *Callee = CLI.Callee;
MCSymbol *Symbol = CLI.Symbol;
if (!Callee && !Symbol)
return false;
// Allow SelectionDAG isel to handle calls to functions like setjmp that need
// a bti instruction following the call.
if (CLI.CB && CLI.CB->hasFnAttr(Attribute::ReturnsTwice) &&
!Subtarget->noBTIAtReturnTwice() &&
MF->getInfo<AArch64FunctionInfo>()->branchTargetEnforcement())
return false;
// Allow SelectionDAG isel to handle indirect calls with KCFI checks.
if (CLI.CB && CLI.CB->isIndirectCall() &&
CLI.CB->getOperandBundle(LLVMContext::OB_kcfi))
return false;
// Allow SelectionDAG isel to handle tail calls.
if (IsTailCall)
return false;
// FIXME: we could and should support this, but for now correctness at -O0 is
// more important.
if (Subtarget->isTargetILP32())
return false;
CodeModel::Model CM = TM.getCodeModel();
// Only support the small-addressing and large code models.
if (CM != CodeModel::Large && !Subtarget->useSmallAddressing())
return false;
// FIXME: Add large code model support for ELF.
if (CM == CodeModel::Large && !Subtarget->isTargetMachO())
return false;
// ELF -fno-plt compiled intrinsic calls do not have the nonlazybind
// attribute. Check "RtLibUseGOT" instead.
if (MF->getFunction().getParent()->getRtLibUseGOT())
return false;
// Let SDISel handle vararg functions.
if (IsVarArg)
return false;
if (Subtarget->isWindowsArm64EC())
return false;
for (auto Flag : CLI.OutFlags)
if (Flag.isInReg() || Flag.isSRet() || Flag.isNest() || Flag.isByVal() ||
Flag.isSwiftSelf() || Flag.isSwiftAsync() || Flag.isSwiftError())
return false;
// Set up the argument vectors.
SmallVector<MVT, 16> OutVTs;
OutVTs.reserve(CLI.OutVals.size());
for (auto *Val : CLI.OutVals) {
MVT VT;
if (!isTypeLegal(Val->getType(), VT) &&
!(VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16))
return false;
// We don't handle vector parameters yet.
if (VT.isVector() || VT.getSizeInBits() > 64)
return false;
OutVTs.push_back(VT);
}
Address Addr;
if (Callee && !computeCallAddress(Callee, Addr))
return false;
// The weak function target may be zero; in that case we must use indirect
// addressing via a stub on windows as it may be out of range for a
// PC-relative jump.
if (Subtarget->isTargetWindows() && Addr.getGlobalValue() &&
Addr.getGlobalValue()->hasExternalWeakLinkage())
return false;
// Handle the arguments now that we've gotten them.
unsigned NumBytes;
if (!processCallArgs(CLI, OutVTs, NumBytes))
return false;
const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
if (RegInfo->isAnyArgRegReserved(*MF))
RegInfo->emitReservedArgRegCallError(*MF);
// Issue the call.
MachineInstrBuilder MIB;
if (Subtarget->useSmallAddressing()) {
const MCInstrDesc &II =
TII.get(Addr.getReg() ? getBLRCallOpcode(*MF) : (unsigned)AArch64::BL);
MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II);
if (Symbol)
MIB.addSym(Symbol, 0);
else if (Addr.getGlobalValue())
MIB.addGlobalAddress(Addr.getGlobalValue(), 0, 0);
else if (Addr.getReg()) {
Register Reg = constrainOperandRegClass(II, Addr.getReg(), 0);
MIB.addReg(Reg);
} else
return false;
} else {
Register CallReg;
if (Symbol) {
Register ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADRP),
ADRPReg)
.addSym(Symbol, AArch64II::MO_GOT | AArch64II::MO_PAGE);
CallReg = createResultReg(&AArch64::GPR64RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
TII.get(AArch64::LDRXui), CallReg)
.addReg(ADRPReg)
.addSym(Symbol,
AArch64II::MO_GOT | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
} else if (Addr.getGlobalValue())
CallReg = materializeGV(Addr.getGlobalValue());
else if (Addr.getReg())
CallReg = Addr.getReg();
if (!CallReg)
return false;
const MCInstrDesc &II = TII.get(getBLRCallOpcode(*MF));
CallReg = constrainOperandRegClass(II, CallReg, 0);
MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II).addReg(CallReg);
}
// Add implicit physical register uses to the call.
for (auto Reg : CLI.OutRegs)
MIB.addReg(Reg, RegState::Implicit);
// Add a register mask with the call-preserved registers.
// Proper defs for return values will be added by setPhysRegsDeadExcept().
MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC));
CLI.Call = MIB;
// Finish off the call including any return values.
return finishCall(CLI, NumBytes);
}
bool AArch64FastISel::isMemCpySmall(uint64_t Len, MaybeAlign Alignment) {
if (Alignment)
return Len / Alignment->value() <= 4;
else
return Len < 32;
}
bool AArch64FastISel::tryEmitSmallMemCpy(Address Dest, Address Src,
uint64_t Len, MaybeAlign Alignment) {
// Make sure we don't bloat code by inlining very large memcpy's.
if (!isMemCpySmall(Len, Alignment))
return false;
int64_t UnscaledOffset = 0;
Address OrigDest = Dest;
Address OrigSrc = Src;
while (Len) {
MVT VT;
if (!Alignment || *Alignment >= 8) {
if (Len >= 8)
VT = MVT::i64;
else if (Len >= 4)
VT = MVT::i32;
else if (Len >= 2)
VT = MVT::i16;
else {
VT = MVT::i8;
}
} else {
assert(Alignment && "Alignment is set in this branch");
// Bound based on alignment.
if (Len >= 4 && *Alignment == 4)
VT = MVT::i32;
else if (Len >= 2 && *Alignment == 2)
VT = MVT::i16;
else {
VT = MVT::i8;
}
}
Register ResultReg = emitLoad(VT, VT, Src);
if (!ResultReg)
return false;
if (!emitStore(VT, ResultReg, Dest))
return false;
int64_t Size = VT.getSizeInBits() / 8;
Len -= Size;
UnscaledOffset += Size;
// We need to recompute the unscaled offset for each iteration.
Dest.setOffset(OrigDest.getOffset() + UnscaledOffset);
Src.setOffset(OrigSrc.getOffset() + UnscaledOffset);
}
return true;
}
/// Check if it is possible to fold the condition from the XALU intrinsic
/// into the user. The condition code will only be updated on success.
bool AArch64FastISel::foldXALUIntrinsic(AArch64CC::CondCode &CC,
const Instruction *I,
const Value *Cond) {
if (!isa<ExtractValueInst>(Cond))
return false;
const auto *EV = cast<ExtractValueInst>(Cond);
if (!isa<IntrinsicInst>(EV->getAggregateOperand()))
return false;
const auto *II = cast<IntrinsicInst>(EV->getAggregateOperand());
MVT RetVT;
const Function *Callee = II->getCalledFunction();
Type *RetTy =
cast<StructType>(Callee->getReturnType())->getTypeAtIndex(0U);
if (!isTypeLegal(RetTy, RetVT))
return false;
if (RetVT != MVT::i32 && RetVT != MVT::i64)
return false;
const Value *LHS = II->getArgOperand(0);
const Value *RHS = II->getArgOperand(1);
// Canonicalize immediate to the RHS.
if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) && II->isCommutative())
std::swap(LHS, RHS);
// Simplify multiplies.
Intrinsic::ID IID = II->getIntrinsicID();
switch (IID) {
default:
break;
case Intrinsic::smul_with_overflow:
if (const auto *C = dyn_cast<ConstantInt>(RHS))
if (C->getValue() == 2)
IID = Intrinsic::sadd_with_overflow;
break;
case Intrinsic::umul_with_overflow:
if (const auto *C = dyn_cast<ConstantInt>(RHS))
if (C->getValue() == 2)
IID = Intrinsic::uadd_with_overflow;
break;
}
AArch64CC::CondCode TmpCC;
switch (IID) {
default:
return false;
case Intrinsic::sadd_with_overflow:
case Intrinsic::ssub_with_overflow:
TmpCC = AArch64CC::VS;
break;
case Intrinsic::uadd_with_overflow:
TmpCC = AArch64CC::HS;
break;
case Intrinsic::usub_with_overflow:
TmpCC = AArch64CC::LO;
break;
case Intrinsic::smul_with_overflow:
case Intrinsic::umul_with_overflow:
TmpCC = AArch64CC::NE;
break;
}
// Check if both instructions are in the same basic block.
if (!isValueAvailable(II))
return false;
// Make sure nothing is in the way
BasicBlock::const_iterator Start(I);
BasicBlock::const_iterator End(II);
for (auto Itr = std::prev(Start); Itr != End; --Itr) {
// We only expect extractvalue instructions between the intrinsic and the
// instruction to be selected.
if (!isa<ExtractValueInst>(Itr))
return false;
// Check that the extractvalue operand comes from the intrinsic.
const auto *EVI = cast<ExtractValueInst>(Itr);
if (EVI->getAggregateOperand() != II)
return false;
}
CC = TmpCC;
return true;
}
bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
// FIXME: Handle more intrinsics.
switch (II->getIntrinsicID()) {
default: return false;
case Intrinsic::frameaddress: {
MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo();
MFI.setFrameAddressIsTaken(true);
const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
Register FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF));
Register SrcReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
TII.get(TargetOpcode::COPY), SrcReg).addReg(FramePtr);
// Recursively load frame address
// ldr x0, [fp]
// ldr x0, [x0]
// ldr x0, [x0]
// ...
Register DestReg;
unsigned Depth = cast<ConstantInt>(II->getOperand(0))->getZExtValue();
while (Depth--) {
DestReg = fastEmitInst_ri(AArch64::LDRXui, &AArch64::GPR64RegClass,
SrcReg, 0);
assert(DestReg && "Unexpected LDR instruction emission failure.");
SrcReg = DestReg;
}
updateValueMap(II, SrcReg);
return true;
}
case Intrinsic::sponentry: {
MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo();
// SP = FP + Fixed Object + 16
int FI = MFI.CreateFixedObject(4, 0, false);
Register ResultReg = createResultReg(&AArch64::GPR64spRegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
TII.get(AArch64::ADDXri), ResultReg)
.addFrameIndex(FI)
.addImm(0)
.addImm(0);
updateValueMap(II, ResultReg);
return true;
}
case Intrinsic::memcpy:
case Intrinsic::memmove: {
const auto *MTI = cast<MemTransferInst>(II);
// Don't handle volatile.
if (MTI->isVolatile())
return false;
// Disable inlining for memmove before calls to ComputeAddress. Otherwise,
// we would emit dead code because we don't currently handle memmoves.
bool IsMemCpy = (II->getIntrinsicID() == Intrinsic::memcpy);
if (isa<ConstantInt>(MTI->getLength()) && IsMemCpy) {
// Small memcpy's are common enough that we want to do them without a call
// if possible.
uint64_t Len = cast<ConstantInt>(MTI->getLength())->getZExtValue();
MaybeAlign Alignment;
if (MTI->getDestAlign() || MTI->getSourceAlign())
Alignment = std::min(MTI->getDestAlign().valueOrOne(),
MTI->getSourceAlign().valueOrOne());
if (isMemCpySmall(Len, Alignment)) {
Address Dest, Src;
if (!computeAddress(MTI->getRawDest(), Dest) ||
!computeAddress(MTI->getRawSource(), Src))
return false;
if (tryEmitSmallMemCpy(Dest, Src, Len, Alignment))
return true;
}
}
if (!MTI->getLength()->getType()->isIntegerTy(64))
return false;
if (MTI->getSourceAddressSpace() > 255 || MTI->getDestAddressSpace() > 255)
// Fast instruction selection doesn't support the special
// address spaces.
return false;
const char *IntrMemName = isa<MemCpyInst>(II) ? "memcpy" : "memmove";
return lowerCallTo(II, IntrMemName, II->arg_size() - 1);
}
case Intrinsic::memset: {
const MemSetInst *MSI = cast<MemSetInst>(II);
// Don't handle volatile.
if (MSI->isVolatile())
return false;
if (!MSI->getLength()->getType()->isIntegerTy(64))
return false;
if (MSI->getDestAddressSpace() > 255)
// Fast instruction selection doesn't support the special
// address spaces.
return false;
return lowerCallTo(II, "memset", II->arg_size() - 1);
}
case Intrinsic::sin:
case Intrinsic::cos:
case Intrinsic::tan:
case Intrinsic::pow: {
MVT RetVT;
if (!isTypeLegal(II->getType(), RetVT))
return false;
if (RetVT != MVT::f32 && RetVT != MVT::f64)
return false;
static const RTLIB::Libcall LibCallTable[4][2] = {
{RTLIB::SIN_F32, RTLIB::SIN_F64},
{RTLIB::COS_F32, RTLIB::COS_F64},
{RTLIB::TAN_F32, RTLIB::TAN_F64},
{RTLIB::POW_F32, RTLIB::POW_F64}};
RTLIB::Libcall LC;
bool Is64Bit = RetVT == MVT::f64;
switch (II->getIntrinsicID()) {
default:
llvm_unreachable("Unexpected intrinsic.");
case Intrinsic::sin:
LC = LibCallTable[0][Is64Bit];
break;
case Intrinsic::cos:
LC = LibCallTable[1][Is64Bit];
break;
case Intrinsic::tan:
LC = LibCallTable[2][Is64Bit];
break;
case Intrinsic::pow:
LC = LibCallTable[3][Is64Bit];
break;
}
ArgListTy Args;
Args.reserve(II->arg_size());
// Populate the argument list.
for (auto &Arg : II->args()) {
ArgListEntry Entry;
Entry.Val = Arg;
Entry.Ty = Arg->getType();
Args.push_back(Entry);
}
CallLoweringInfo CLI;
MCContext &Ctx = MF->getContext();
CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), II->getType(),
TLI.getLibcallName(LC), std::move(Args));
if (!lowerCallTo(CLI))
return false;
updateValueMap(II, CLI.ResultReg);
return true;
}
case Intrinsic::fabs: {
MVT VT;
if (!isTypeLegal(II->getType(), VT))
return false;
unsigned Opc;
switch (VT.SimpleTy) {
default:
return false;
case MVT::f32:
Opc = AArch64::FABSSr;
break;
case MVT::f64:
Opc = AArch64::FABSDr;
break;
}
Register SrcReg = getRegForValue(II->getOperand(0));
if (!SrcReg)
return false;
Register ResultReg = createResultReg(TLI.getRegClassFor(VT));
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), ResultReg)
.addReg(SrcReg);
updateValueMap(II, ResultReg);
return true;
}
case Intrinsic::trap:
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::BRK))
.addImm(1);
return true;
case Intrinsic::debugtrap:
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::BRK))
.addImm(0xF000);
return true;
case Intrinsic::sqrt: {
Type *RetTy = II->getCalledFunction()->getReturnType();
MVT VT;
if (!isTypeLegal(RetTy, VT))
return false;
Register Op0Reg = getRegForValue(II->getOperand(0));
if (!Op0Reg)
return false;
Register ResultReg = fastEmit_r(VT, VT, ISD::FSQRT, Op0Reg);
if (!ResultReg)
return false;
updateValueMap(II, ResultReg);
return true;
}
case Intrinsic::sadd_with_overflow:
case Intrinsic::uadd_with_overflow:
case Intrinsic::ssub_with_overflow:
case Intrinsic::usub_with_overflow:
case Intrinsic::smul_with_overflow:
case Intrinsic::umul_with_overflow: {
// This implements the basic lowering of the xalu with overflow intrinsics.
const Function *Callee = II->getCalledFunction();
auto *Ty = cast<StructType>(Callee->getReturnType());
Type *RetTy = Ty->getTypeAtIndex(0U);
MVT VT;
if (!isTypeLegal(RetTy, VT))
return false;
if (VT != MVT::i32 && VT != MVT::i64)
return false;
const Value *LHS = II->getArgOperand(0);
const Value *RHS = II->getArgOperand(1);
// Canonicalize immediate to the RHS.
if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) && II->isCommutative())
std::swap(LHS, RHS);
// Simplify multiplies.
Intrinsic::ID IID = II->getIntrinsicID();
switch (IID) {
default:
break;
case Intrinsic::smul_with_overflow:
if (const auto *C = dyn_cast<ConstantInt>(RHS))
if (C->getValue() == 2) {
IID = Intrinsic::sadd_with_overflow;
RHS = LHS;
}
break;
case Intrinsic::umul_with_overflow:
if (const auto *C = dyn_cast<ConstantInt>(RHS))
if (C->getValue() == 2) {
IID = Intrinsic::uadd_with_overflow;
RHS = LHS;
}
break;
}
Register ResultReg1, ResultReg2, MulReg;
AArch64CC::CondCode CC = AArch64CC::Invalid;
switch (IID) {
default: llvm_unreachable("Unexpected intrinsic!");
case Intrinsic::sadd_with_overflow:
ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);
CC = AArch64CC::VS;
break;
case Intrinsic::uadd_with_overflow:
ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);
CC = AArch64CC::HS;
break;
case Intrinsic::ssub_with_overflow:
ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true);
CC = AArch64CC::VS;
break;
case Intrinsic::usub_with_overflow:
ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true);
CC = AArch64CC::LO;
break;
case Intrinsic::smul_with_overflow: {
CC = AArch64CC::NE;
Register LHSReg = getRegForValue(LHS);
if (!LHSReg)
return false;
Register RHSReg = getRegForValue(RHS);
if (!RHSReg)
return false;
if (VT == MVT::i32) {
MulReg = emitSMULL_rr(MVT::i64, LHSReg, RHSReg);
Register MulSubReg =
fastEmitInst_extractsubreg(VT, MulReg, AArch64::sub_32);
// cmp xreg, wreg, sxtw
emitAddSub_rx(/*UseAdd=*/false, MVT::i64, MulReg, MulSubReg,
AArch64_AM::SXTW, /*ShiftImm=*/0, /*SetFlags=*/true,
/*WantResult=*/false);
MulReg = MulSubReg;
} else {
assert(VT == MVT::i64 && "Unexpected value type.");
// LHSReg and RHSReg cannot be killed by this Mul, since they are
// reused in the next instruction.
MulReg = emitMul_rr(VT, LHSReg, RHSReg);
Register SMULHReg = fastEmit_rr(VT, VT, ISD::MULHS, LHSReg, RHSReg);
emitSubs_rs(VT, SMULHReg, MulReg, AArch64_AM::ASR, 63,
/*WantResult=*/false);
}
break;
}
case Intrinsic::umul_with_overflow: {
CC = AArch64CC::NE;
Register LHSReg = getRegForValue(LHS);
if (!LHSReg)
return false;
Register RHSReg = getRegForValue(RHS);
if (!RHSReg)
return false;
if (VT == MVT::i32) {
MulReg = emitUMULL_rr(MVT::i64, LHSReg, RHSReg);
// tst xreg, #0xffffffff00000000
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
TII.get(AArch64::ANDSXri), AArch64::XZR)
.addReg(MulReg)
.addImm(AArch64_AM::encodeLogicalImmediate(0xFFFFFFFF00000000, 64));
MulReg = fastEmitInst_extractsubreg(VT, MulReg, AArch64::sub_32);
} else {
assert(VT == MVT::i64 && "Unexpected value type.");
// LHSReg and RHSReg cannot be killed by this Mul, since they are
// reused in the next instruction.
MulReg = emitMul_rr(VT, LHSReg, RHSReg);
Register UMULHReg = fastEmit_rr(VT, VT, ISD::MULHU, LHSReg, RHSReg);
emitSubs_rr(VT, AArch64::XZR, UMULHReg, /*WantResult=*/false);
}
break;
}
}
if (MulReg) {
ResultReg1 = createResultReg(TLI.getRegClassFor(VT));
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
TII.get(TargetOpcode::COPY), ResultReg1).addReg(MulReg);
}
if (!ResultReg1)
return false;
ResultReg2 = fastEmitInst_rri(AArch64::CSINCWr, &AArch64::GPR32RegClass,
AArch64::WZR, AArch64::WZR,
getInvertedCondCode(CC));
(void)ResultReg2;
assert((ResultReg1 + 1) == ResultReg2 &&
"Nonconsecutive result registers.");
updateValueMap(II, ResultReg1, 2);
return true;
}
case Intrinsic::aarch64_crc32b:
case Intrinsic::aarch64_crc32h:
case Intrinsic::aarch64_crc32w:
case Intrinsic::aarch64_crc32x:
case Intrinsic::aarch64_crc32cb:
case Intrinsic::aarch64_crc32ch:
case Intrinsic::aarch64_crc32cw:
case Intrinsic::aarch64_crc32cx: {
if (!Subtarget->hasCRC())
return false;
unsigned Opc;
switch (II->getIntrinsicID()) {
default:
llvm_unreachable("Unexpected intrinsic!");
case Intrinsic::aarch64_crc32b:
Opc = AArch64::CRC32Brr;
break;
case Intrinsic::aarch64_crc32h:
Opc = AArch64::CRC32Hrr;
break;
case Intrinsic::aarch64_crc32w:
Opc = AArch64::CRC32Wrr;
break;
case Intrinsic::aarch64_crc32x:
Opc = AArch64::CRC32Xrr;
break;
case Intrinsic::aarch64_crc32cb:
Opc = AArch64::CRC32CBrr;
break;
case Intrinsic::aarch64_crc32ch:
Opc = AArch64::CRC32CHrr;
break;
case Intrinsic::aarch64_crc32cw:
Opc = AArch64::CRC32CWrr;
break;
case Intrinsic::aarch64_crc32cx:
Opc = AArch64::CRC32CXrr;
break;
}
Register LHSReg = getRegForValue(II->getArgOperand(0));
Register RHSReg = getRegForValue(II->getArgOperand(1));
if (!LHSReg || !RHSReg)
return false;
Register ResultReg =
fastEmitInst_rr(Opc, &AArch64::GPR32RegClass, LHSReg, RHSReg);
updateValueMap(II, ResultReg);
return true;
}
}
return false;
}
bool AArch64FastISel::selectRet(const Instruction *I) {
const ReturnInst *Ret = cast<ReturnInst>(I);
const Function &F = *I->getParent()->getParent();
if (!FuncInfo.CanLowerReturn)
return false;
if (F.isVarArg())
return false;
if (TLI.supportSwiftError() &&
F.getAttributes().hasAttrSomewhere(Attribute::SwiftError))
return false;
if (TLI.supportSplitCSR(FuncInfo.MF))
return false;
// Build a list of return value registers.
SmallVector<Register, 4> RetRegs;
if (Ret->getNumOperands() > 0) {
CallingConv::ID CC = F.getCallingConv();
SmallVector<ISD::OutputArg, 4> Outs;
GetReturnInfo(CC, F.getReturnType(), F.getAttributes(), Outs, TLI, DL);
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ValLocs;
CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext());
CCInfo.AnalyzeReturn(Outs, RetCC_AArch64_AAPCS);
// Only handle a single return value for now.
if (ValLocs.size() != 1)
return false;
CCValAssign &VA = ValLocs[0];
const Value *RV = Ret->getOperand(0);
// Don't bother handling odd stuff for now.
if ((VA.getLocInfo() != CCValAssign::Full) &&
(VA.getLocInfo() != CCValAssign::BCvt))
return false;
// Only handle register returns for now.
if (!VA.isRegLoc())
return false;
Register Reg = getRegForValue(RV);
if (!Reg)
return false;
Register SrcReg = Reg + VA.getValNo();
Register DestReg = VA.getLocReg();
// Avoid a cross-class copy. This is very unlikely.
if (!MRI.getRegClass(SrcReg)->contains(DestReg))
return false;
EVT RVEVT = TLI.getValueType(DL, RV->getType());
if (!RVEVT.isSimple())
return false;
// Vectors (of > 1 lane) in big endian need tricky handling.
if (RVEVT.isVector() && RVEVT.getVectorElementCount().isVector() &&
!Subtarget->isLittleEndian())
return false;
MVT RVVT = RVEVT.getSimpleVT();
if (RVVT == MVT::f128)
return false;
MVT DestVT = VA.getValVT();
// Special handling for extended integers.
if (RVVT != DestVT) {
if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16)
return false;
if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt())
return false;
bool IsZExt = Outs[0].Flags.isZExt();
SrcReg = emitIntExt(RVVT, SrcReg, DestVT, IsZExt);
if (!SrcReg)
return false;
}
// "Callee" (i.e. value producer) zero extends pointers at function
// boundary.
if (Subtarget->isTargetILP32() && RV->getType()->isPointerTy())
SrcReg = emitAnd_ri(MVT::i64, SrcReg, 0xffffffff);
// Make the copy.
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
TII.get(TargetOpcode::COPY), DestReg).addReg(SrcReg);
// Add register to return instruction.
RetRegs.push_back(VA.getLocReg());
}
MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
TII.get(AArch64::RET_ReallyLR));
for (Register RetReg : RetRegs)
MIB.addReg(RetReg, RegState::Implicit);
return true;
}
bool AArch64FastISel::selectTrunc(const Instruction *I) {
Type *DestTy = I->getType();
Value *Op = I->getOperand(0);
Type *SrcTy = Op->getType();
EVT SrcEVT = TLI.getValueType(DL, SrcTy, true);
EVT DestEVT = TLI.getValueType(DL, DestTy, true);
if (!SrcEVT.isSimple())
return false;
if (!DestEVT.isSimple())
return false;
MVT SrcVT = SrcEVT.getSimpleVT();
MVT DestVT = DestEVT.getSimpleVT();
if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16 &&
SrcVT != MVT::i8)
return false;
if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8 &&
DestVT != MVT::i1)
return false;
Register SrcReg = getRegForValue(Op);
if (!SrcReg)
return false;
// If we're truncating from i64 to a smaller non-legal type then generate an
// AND. Otherwise, we know the high bits are undefined and a truncate only
// generate a COPY. We cannot mark the source register also as result
// register, because this can incorrectly transfer the kill flag onto the
// source register.
Register ResultReg;
if (SrcVT == MVT::i64) {
uint64_t Mask = 0;
switch (DestVT.SimpleTy) {
default:
// Trunc i64 to i32 is handled by the target-independent fast-isel.
return false;
case MVT::i1:
Mask = 0x1;
break;
case MVT::i8:
Mask = 0xff;
break;
case MVT::i16:
Mask = 0xffff;
break;
}
// Issue an extract_subreg to get the lower 32-bits.
Register Reg32 = fastEmitInst_extractsubreg(MVT::i32, SrcReg,
AArch64::sub_32);
// Create the AND instruction which performs the actual truncation.
ResultReg = emitAnd_ri(MVT::i32, Reg32, Mask);
assert(ResultReg && "Unexpected AND instruction emission failure.");
} else {
ResultReg = createResultReg(&AArch64::GPR32RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
TII.get(TargetOpcode::COPY), ResultReg)
.addReg(SrcReg);
}
updateValueMap(I, ResultReg);
return true;
}
Register AArch64FastISel::emiti1Ext(Register SrcReg, MVT DestVT, bool IsZExt) {
assert((DestVT == MVT::i8 || DestVT == MVT::i16 || DestVT == MVT::i32 ||
DestVT == MVT::i64) &&
"Unexpected value type.");
// Handle i8 and i16 as i32.
if (DestVT == MVT::i8 || DestVT == MVT::i16)
DestVT = MVT::i32;
if (IsZExt) {
Register ResultReg = emitAnd_ri(MVT::i32, SrcReg, 1);
assert(ResultReg && "Unexpected AND instruction emission failure.");
if (DestVT == MVT::i64) {
// We're ZExt i1 to i64. The ANDWri Wd, Ws, #1 implicitly clears the
// upper 32 bits. Emit a SUBREG_TO_REG to extend from Wd to Xd.
Register Reg64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
TII.get(AArch64::SUBREG_TO_REG), Reg64)
.addImm(0)
.addReg(ResultReg)
.addImm(AArch64::sub_32);
ResultReg = Reg64;
}
return ResultReg;
} else {
if (DestVT == MVT::i64) {
// FIXME: We're SExt i1 to i64.
return Register();
}
return fastEmitInst_rii(AArch64::SBFMWri, &AArch64::GPR32RegClass, SrcReg,
0, 0);
}
}
Register AArch64FastISel::emitMul_rr(MVT RetVT, Register Op0, Register Op1) {
unsigned Opc;
Register ZReg;
switch (RetVT.SimpleTy) {
default:
return Register();
case MVT::i8:
case MVT::i16:
case MVT::i32:
RetVT = MVT::i32;
Opc = AArch64::MADDWrrr; ZReg = AArch64::WZR; break;
case MVT::i64:
Opc = AArch64::MADDXrrr; ZReg = AArch64::XZR; break;
}
const TargetRegisterClass *RC =
(RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
return fastEmitInst_rrr(Opc, RC, Op0, Op1, ZReg);
}
Register AArch64FastISel::emitSMULL_rr(MVT RetVT, Register Op0, Register Op1) {
if (RetVT != MVT::i64)
return Register();
return fastEmitInst_rrr(AArch64::SMADDLrrr, &AArch64::GPR64RegClass,
Op0, Op1, AArch64::XZR);
}
Register AArch64FastISel::emitUMULL_rr(MVT RetVT, Register Op0, Register Op1) {
if (RetVT != MVT::i64)
return Register();
return fastEmitInst_rrr(AArch64::UMADDLrrr, &AArch64::GPR64RegClass,
Op0, Op1, AArch64::XZR);
}
Register AArch64FastISel::emitLSL_rr(MVT RetVT, Register Op0Reg,
Register Op1Reg) {
unsigned Opc = 0;
bool NeedTrunc = false;
uint64_t Mask = 0;
switch (RetVT.SimpleTy) {
default:
return Register();
case MVT::i8: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xff; break;
case MVT::i16: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xffff; break;
case MVT::i32: Opc = AArch64::LSLVWr; break;
case MVT::i64: Opc = AArch64::LSLVXr; break;
}
const TargetRegisterClass *RC =
(RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
if (NeedTrunc)
Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Mask);
Register ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op1Reg);
if (NeedTrunc)
ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
return ResultReg;
}
Register AArch64FastISel::emitLSL_ri(MVT RetVT, MVT SrcVT, Register Op0,
uint64_t Shift, bool IsZExt) {
assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
"Unexpected source/return type pair.");
assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
"Unexpected source value type.");
assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
RetVT == MVT::i64) && "Unexpected return value type.");
bool Is64Bit = (RetVT == MVT::i64);
unsigned RegSize = Is64Bit ? 64 : 32;
unsigned DstBits = RetVT.getSizeInBits();
unsigned SrcBits = SrcVT.getSizeInBits();
const TargetRegisterClass *RC =
Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
// Just emit a copy for "zero" shifts.
if (Shift == 0) {
if (RetVT == SrcVT) {
Register ResultReg = createResultReg(RC);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
TII.get(TargetOpcode::COPY), ResultReg)
.addReg(Op0);
return ResultReg;
} else
return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
}
// Don't deal with undefined shifts.
if (Shift >= DstBits)
return Register();
// For immediate shifts we can fold the zero-/sign-extension into the shift.
// {S|U}BFM Wd, Wn, #r, #s
// Wd<32+s-r,32-r> = Wn<s:0> when r > s
// %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
// %2 = shl i16 %1, 4
// Wd<32+7-28,32-28> = Wn<7:0> <- clamp s to 7
// 0b1111_1111_1111_1111__1111_1010_1010_0000 sext
// 0b0000_0000_0000_0000__0000_0101_0101_0000 sext | zext
// 0b0000_0000_0000_0000__0000_1010_1010_0000 zext
// %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
// %2 = shl i16 %1, 8
// Wd<32+7-24,32-24> = Wn<7:0>
// 0b1111_1111_1111_1111__1010_1010_0000_0000 sext
// 0b0000_0000_0000_0000__0101_0101_0000_0000 sext | zext
// 0b0000_0000_0000_0000__1010_1010_0000_0000 zext
// %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
// %2 = shl i16 %1, 12
// Wd<32+3-20,32-20> = Wn<3:0>
// 0b1111_1111_1111_1111__1010_0000_0000_0000 sext
// 0b0000_0000_0000_0000__0101_0000_0000_0000 sext | zext
// 0b0000_0000_0000_0000__1010_0000_0000_0000 zext
unsigned ImmR = RegSize - Shift;
// Limit the width to the length of the source type.
unsigned ImmS = std::min<unsigned>(SrcBits - 1, DstBits - 1 - Shift);
static const unsigned OpcTable[2][2] = {
{AArch64::SBFMWri, AArch64::SBFMXri},
{AArch64::UBFMWri, AArch64::UBFMXri}
};
unsigned Opc = OpcTable[IsZExt][Is64Bit];
if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
Register TmpReg = MRI.createVirtualRegister(RC);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
TII.get(AArch64::SUBREG_TO_REG), TmpReg)
.addImm(0)
.addReg(Op0)
.addImm(AArch64::sub_32);
Op0 = TmpReg;
}
return fastEmitInst_rii(Opc, RC, Op0, ImmR, ImmS);
}
Register AArch64FastISel::emitLSR_rr(MVT RetVT, Register Op0Reg,
Register Op1Reg) {
unsigned Opc = 0;
bool NeedTrunc = false;
uint64_t Mask = 0;
switch (RetVT.SimpleTy) {
default:
return Register();
case MVT::i8: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xff; break;
case MVT::i16: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xffff; break;
case MVT::i32: Opc = AArch64::LSRVWr; break;
case MVT::i64: Opc = AArch64::LSRVXr; break;
}
const TargetRegisterClass *RC =
(RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
if (NeedTrunc) {
Op0Reg = emitAnd_ri(MVT::i32, Op0Reg, Mask);
Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Mask);
}
Register ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op1Reg);
if (NeedTrunc)
ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
return ResultReg;
}
Register AArch64FastISel::emitLSR_ri(MVT RetVT, MVT SrcVT, Register Op0,
uint64_t Shift, bool IsZExt) {
assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
"Unexpected source/return type pair.");
assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
"Unexpected source value type.");
assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
RetVT == MVT::i64) && "Unexpected return value type.");
bool Is64Bit = (RetVT == MVT::i64);
unsigned RegSize = Is64Bit ? 64 : 32;
unsigned DstBits = RetVT.getSizeInBits();
unsigned SrcBits = SrcVT.getSizeInBits();
const TargetRegisterClass *RC =
Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
// Just emit a copy for "zero" shifts.
if (Shift == 0) {
if (RetVT == SrcVT) {
Register ResultReg = createResultReg(RC);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
TII.get(TargetOpcode::COPY), ResultReg)
.addReg(Op0);
return ResultReg;
} else
return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
}
// Don't deal with undefined shifts.
if (Shift >= DstBits)
return Register();
// For immediate shifts we can fold the zero-/sign-extension into the shift.
// {S|U}BFM Wd, Wn, #r, #s
// Wd<s-r:0> = Wn<s:r> when r <= s
// %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
// %2 = lshr i16 %1, 4
// Wd<7-4:0> = Wn<7:4>
// 0b0000_0000_0000_0000__0000_1111_1111_1010 sext
// 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
// 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
// %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
// %2 = lshr i16 %1, 8
// Wd<7-7,0> = Wn<7:7>
// 0b0000_0000_0000_0000__0000_0000_1111_1111 sext
// 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
// 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
// %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
// %2 = lshr i16 %1, 12
// Wd<7-7,0> = Wn<7:7> <- clamp r to 7
// 0b0000_0000_0000_0000__0000_0000_0000_1111 sext
// 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
// 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
if (Shift >= SrcBits && IsZExt)
return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT);
// It is not possible to fold a sign-extend into the LShr instruction. In this
// case emit a sign-extend.
if (!IsZExt) {
Op0 = emitIntExt(SrcVT, Op0, RetVT, IsZExt);
if (!Op0)
return Register();
SrcVT = RetVT;
SrcBits = SrcVT.getSizeInBits();
IsZExt = true;
}
unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
unsigned ImmS = SrcBits - 1;
static const unsigned OpcTable[2][2] = {
{AArch64::SBFMWri, AArch64::SBFMXri},
{AArch64::UBFMWri, AArch64::UBFMXri}
};
unsigned Opc = OpcTable[IsZExt][Is64Bit];
if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
Register TmpReg = MRI.createVirtualRegister(RC);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
TII.get(AArch64::SUBREG_TO_REG), TmpReg)
.addImm(0)
.addReg(Op0)
.addImm(AArch64::sub_32);
Op0 = TmpReg;
}
return fastEmitInst_rii(Opc, RC, Op0, ImmR, ImmS);
}
Register AArch64FastISel::emitASR_rr(MVT RetVT, Register Op0Reg,
Register Op1Reg) {
unsigned Opc = 0;
bool NeedTrunc = false;
uint64_t Mask = 0;
switch (RetVT.SimpleTy) {
default:
return Register();
case MVT::i8: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xff; break;
case MVT::i16: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xffff; break;
case MVT::i32: Opc = AArch64::ASRVWr; break;
case MVT::i64: Opc = AArch64::ASRVXr; break;
}
const TargetRegisterClass *RC =
(RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
if (NeedTrunc) {
Op0Reg = emitIntExt(RetVT, Op0Reg, MVT::i32, /*isZExt=*/false);
Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Mask);
}
Register ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op1Reg);
if (NeedTrunc)
ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
return ResultReg;
}
Register AArch64FastISel::emitASR_ri(MVT RetVT, MVT SrcVT, Register Op0,
uint64_t Shift, bool IsZExt) {
assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
"Unexpected source/return type pair.");
assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
"Unexpected source value type.");
assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
RetVT == MVT::i64) && "Unexpected return value type.");
bool Is64Bit = (RetVT == MVT::i64);
unsigned RegSize = Is64Bit ? 64 : 32;
unsigned DstBits = RetVT.getSizeInBits();
unsigned SrcBits = SrcVT.getSizeInBits();
const TargetRegisterClass *RC =
Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
// Just emit a copy for "zero" shifts.
if (Shift == 0) {
if (RetVT == SrcVT) {
Register ResultReg = createResultReg(RC);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
TII.get(TargetOpcode::COPY), ResultReg)
.addReg(Op0);
return ResultReg;
} else
return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
}
// Don't deal with undefined shifts.
if (Shift >= DstBits)
return Register();
// For immediate shifts we can fold the zero-/sign-extension into the shift.
// {S|U}BFM Wd, Wn, #r, #s
// Wd<s-r:0> = Wn<s:r> when r <= s
// %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
// %2 = ashr i16 %1, 4
// Wd<7-4:0> = Wn<7:4>
// 0b1111_1111_1111_1111__1111_1111_1111_1010 sext
// 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
// 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
// %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
// %2 = ashr i16 %1, 8
// Wd<7-7,0> = Wn<7:7>
// 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
// 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
// 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
// %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
// %2 = ashr i16 %1, 12
// Wd<7-7,0> = Wn<7:7> <- clamp r to 7
// 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
// 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
// 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
if (Shift >= SrcBits && IsZExt)
return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT);
unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
unsigned ImmS = SrcBits - 1;
static const unsigned OpcTable[2][2] = {
{AArch64::SBFMWri, AArch64::SBFMXri},
{AArch64::UBFMWri, AArch64::UBFMXri}
};
unsigned Opc = OpcTable[IsZExt][Is64Bit];
if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
Register TmpReg = MRI.createVirtualRegister(RC);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
TII.get(AArch64::SUBREG_TO_REG), TmpReg)
.addImm(0)
.addReg(Op0)
.addImm(AArch64::sub_32);
Op0 = TmpReg;
}
return fastEmitInst_rii(Opc, RC, Op0, ImmR, ImmS);
}
Register AArch64FastISel::emitIntExt(MVT SrcVT, Register SrcReg, MVT DestVT,
bool IsZExt) {
assert(DestVT != MVT::i1 && "ZeroExt/SignExt an i1?");
// FastISel does not have plumbing to deal with extensions where the SrcVT or
// DestVT are odd things, so test to make sure that they are both types we can
// handle (i1/i8/i16/i32 for SrcVT and i8/i16/i32/i64 for DestVT), otherwise
// bail out to SelectionDAG.
if (((DestVT != MVT::i8) && (DestVT != MVT::i16) &&
(DestVT != MVT::i32) && (DestVT != MVT::i64)) ||
((SrcVT != MVT::i1) && (SrcVT != MVT::i8) &&
(SrcVT != MVT::i16) && (SrcVT != MVT::i32)))
return Register();
unsigned Opc;
unsigned Imm = 0;
switch (SrcVT.SimpleTy) {
default:
return Register();
case MVT::i1:
return emiti1Ext(SrcReg, DestVT, IsZExt);
case MVT::i8:
if (DestVT == MVT::i64)
Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
else
Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
Imm = 7;
break;
case MVT::i16:
if (DestVT == MVT::i64)
Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
else
Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
Imm = 15;
break;
case MVT::i32:
assert(DestVT == MVT::i64 && "IntExt i32 to i32?!?");
Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
Imm = 31;
break;
}
// Handle i8 and i16 as i32.
if (DestVT == MVT::i8 || DestVT == MVT::i16)
DestVT = MVT::i32;
else if (DestVT == MVT::i64) {
Register Src64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
TII.get(AArch64::SUBREG_TO_REG), Src64)
.addImm(0)
.addReg(SrcReg)
.addImm(AArch64::sub_32);
SrcReg = Src64;
}
const TargetRegisterClass *RC =
(DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
return fastEmitInst_rii(Opc, RC, SrcReg, 0, Imm);
}
static bool isZExtLoad(const MachineInstr *LI) {
switch (LI->getOpcode()) {
default:
return false;
case AArch64::LDURBBi:
case AArch64::LDURHHi:
case AArch64::LDURWi:
case AArch64::LDRBBui:
case AArch64::LDRHHui:
case AArch64::LDRWui:
case AArch64::LDRBBroX:
case AArch64::LDRHHroX:
case AArch64::LDRWroX:
case AArch64::LDRBBroW:
case AArch64::LDRHHroW:
case AArch64::LDRWroW:
return true;
}
}
static bool isSExtLoad(const MachineInstr *LI) {
switch (LI->getOpcode()) {
default:
return false;
case AArch64::LDURSBWi:
case AArch64::LDURSHWi:
case AArch64::LDURSBXi:
case AArch64::LDURSHXi:
case AArch64::LDURSWi:
case AArch64::LDRSBWui:
case AArch64::LDRSHWui:
case AArch64::LDRSBXui:
case AArch64::LDRSHXui:
case AArch64::LDRSWui:
case AArch64::LDRSBWroX:
case AArch64::LDRSHWroX:
case AArch64::LDRSBXroX:
case AArch64::LDRSHXroX:
case AArch64::LDRSWroX:
case AArch64::LDRSBWroW:
case AArch64::LDRSHWroW:
case AArch64::LDRSBXroW:
case AArch64::LDRSHXroW:
case AArch64::LDRSWroW:
return true;
}
}
bool AArch64FastISel::optimizeIntExtLoad(const Instruction *I, MVT RetVT,
MVT SrcVT) {
const auto *LI = dyn_cast<LoadInst>(I->getOperand(0));
if (!LI || !LI->hasOneUse())
return false;
// Check if the load instruction has already been selected.
Register Reg = lookUpRegForValue(LI);
if (!Reg)
return false;
MachineInstr *MI = MRI.getUniqueVRegDef(Reg);
if (!MI)
return false;
// Check if the correct load instruction has been emitted - SelectionDAG might
// have emitted a zero-extending load, but we need a sign-extending load.
bool IsZExt = isa<ZExtInst>(I);
const auto *LoadMI = MI;
if (LoadMI->getOpcode() == TargetOpcode::COPY &&
LoadMI->getOperand(1).getSubReg() == AArch64::sub_32) {
Register LoadReg = MI->getOperand(1).getReg();
LoadMI = MRI.getUniqueVRegDef(LoadReg);
assert(LoadMI && "Expected valid instruction");
}
if (!(IsZExt && isZExtLoad(LoadMI)) && !(!IsZExt && isSExtLoad(LoadMI)))
return false;
// Nothing to be done.
if (RetVT != MVT::i64 || SrcVT > MVT::i32) {
updateValueMap(I, Reg);
return true;
}
if (IsZExt) {
Register Reg64 = createResultReg(&AArch64::GPR64RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
TII.get(AArch64::SUBREG_TO_REG), Reg64)
.addImm(0)
.addReg(Reg, getKillRegState(true))
.addImm(AArch64::sub_32);
Reg = Reg64;
} else {
assert((MI->getOpcode() == TargetOpcode::COPY &&
MI->getOperand(1).getSubReg() == AArch64::sub_32) &&
"Expected copy instruction");
Reg = MI->getOperand(1).getReg();
MachineBasicBlock::iterator I(MI);
removeDeadCode(I, std::next(I));
}
updateValueMap(I, Reg);
return true;
}
bool AArch64FastISel::selectIntExt(const Instruction *I) {
assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
"Unexpected integer extend instruction.");
MVT RetVT;
MVT SrcVT;
if (!isTypeSupported(I->getType(), RetVT))
return false;
if (!isTypeSupported(I->getOperand(0)->getType(), SrcVT))
return false;
// Try to optimize already sign-/zero-extended values from load instructions.
if (optimizeIntExtLoad(I, RetVT, SrcVT))
return true;
Register SrcReg = getRegForValue(I->getOperand(0));
if (!SrcReg)
return false;
// Try to optimize already sign-/zero-extended values from function arguments.
bool IsZExt = isa<ZExtInst>(I);
if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0))) {
if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr())) {
if (RetVT == MVT::i64 && SrcVT != MVT::i64) {
Register ResultReg = createResultReg(&AArch64::GPR64RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
TII.get(AArch64::SUBREG_TO_REG), ResultReg)
.addImm(0)
.addReg(SrcReg)
.addImm(AArch64::sub_32);
SrcReg = ResultReg;
}
updateValueMap(I, SrcReg);
return true;
}
}
Register ResultReg = emitIntExt(SrcVT, SrcReg, RetVT, IsZExt);
if (!ResultReg)
return false;
updateValueMap(I, ResultReg);
return true;
}
bool AArch64FastISel::selectRem(const Instruction *I, unsigned ISDOpcode) {
EVT DestEVT = TLI.getValueType(DL, I->getType(), true);
if (!DestEVT.isSimple())
return false;
MVT DestVT = DestEVT.getSimpleVT();
if (DestVT != MVT::i64 && DestVT != MVT::i32)
return false;
unsigned DivOpc;
bool Is64bit = (DestVT == MVT::i64);
switch (ISDOpcode) {
default:
return false;
case ISD::SREM:
DivOpc = Is64bit ? AArch64::SDIVXr : AArch64::SDIVWr;
break;
case ISD::UREM:
DivOpc = Is64bit ? AArch64::UDIVXr : AArch64::UDIVWr;
break;
}
unsigned MSubOpc = Is64bit ? AArch64::MSUBXrrr : AArch64::MSUBWrrr;
Register Src0Reg = getRegForValue(I->getOperand(0));
if (!Src0Reg)
return false;
Register Src1Reg = getRegForValue(I->getOperand(1));
if (!Src1Reg)
return false;
const TargetRegisterClass *RC =
(DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
Register QuotReg = fastEmitInst_rr(DivOpc, RC, Src0Reg, Src1Reg);
assert(QuotReg && "Unexpected DIV instruction emission failure.");
// The remainder is computed as numerator - (quotient * denominator) using the
// MSUB instruction.
Register ResultReg = fastEmitInst_rrr(MSubOpc, RC, QuotReg, Src1Reg, Src0Reg);
updateValueMap(I, ResultReg);
return true;
}
bool AArch64FastISel::selectMul(const Instruction *I) {
MVT VT;
if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
return false;
if (VT.isVector())
return selectBinaryOp(I, ISD::MUL);
const Value *Src0 = I->getOperand(0);
const Value *Src1 = I->getOperand(1);
if (const auto *C = dyn_cast<ConstantInt>(Src0))
if (C->getValue().isPowerOf2())
std::swap(Src0, Src1);
// Try to simplify to a shift instruction.
if (const auto *C = dyn_cast<ConstantInt>(Src1))
if (C->getValue().isPowerOf2()) {
uint64_t ShiftVal = C->getValue().logBase2();
MVT SrcVT = VT;
bool IsZExt = true;
if (const auto *ZExt = dyn_cast<ZExtInst>(Src0)) {
if (!isIntExtFree(ZExt)) {
MVT VT;
if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), VT)) {
SrcVT = VT;
IsZExt = true;
Src0 = ZExt->getOperand(0);
}
}
} else if (const auto *SExt = dyn_cast<SExtInst>(Src0)) {
if (!isIntExtFree(SExt)) {
MVT VT;
if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), VT)) {
SrcVT = VT;
IsZExt = false;
Src0 = SExt->getOperand(0);
}
}
}
Register Src0Reg = getRegForValue(Src0);
if (!Src0Reg)
return false;
Register ResultReg = emitLSL_ri(VT, SrcVT, Src0Reg, ShiftVal, IsZExt);
if (ResultReg) {
updateValueMap(I, ResultReg);
return true;
}
}
Register Src0Reg = getRegForValue(I->getOperand(0));
if (!Src0Reg)
return false;
Register Src1Reg = getRegForValue(I->getOperand(1));
if (!Src1Reg)
return false;
Register ResultReg = emitMul_rr(VT, Src0Reg, Src1Reg);
if (!ResultReg)
return false;
updateValueMap(I, ResultReg);
return true;
}
bool AArch64FastISel::selectShift(const Instruction *I) {
MVT RetVT;
if (!isTypeSupported(I->getType(), RetVT, /*IsVectorAllowed=*/true))
return false;
if (RetVT.isVector())
return selectOperator(I, I->getOpcode());
if (const auto *C = dyn_cast<ConstantInt>(I->getOperand(1))) {
Register ResultReg;
uint64_t ShiftVal = C->getZExtValue();
MVT SrcVT = RetVT;
bool IsZExt = I->getOpcode() != Instruction::AShr;
const Value *Op0 = I->getOperand(0);
if (const auto *ZExt = dyn_cast<ZExtInst>(Op0)) {
if (!isIntExtFree(ZExt)) {
MVT TmpVT;
if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), TmpVT)) {
SrcVT = TmpVT;
IsZExt = true;
Op0 = ZExt->getOperand(0);
}
}
} else if (const auto *SExt = dyn_cast<SExtInst>(Op0)) {
if (!isIntExtFree(SExt)) {
MVT TmpVT;
if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), TmpVT)) {
SrcVT = TmpVT;
IsZExt = false;
Op0 = SExt->getOperand(0);
}
}
}
Register Op0Reg = getRegForValue(Op0);
if (!Op0Reg)
return false;
switch (I->getOpcode()) {
default: llvm_unreachable("Unexpected instruction.");
case Instruction::Shl:
ResultReg = emitLSL_ri(RetVT, SrcVT, Op0Reg, ShiftVal, IsZExt);
break;
case Instruction::AShr:
ResultReg = emitASR_ri(RetVT, SrcVT, Op0Reg, ShiftVal, IsZExt);
break;
case Instruction::LShr:
ResultReg = emitLSR_ri(RetVT, SrcVT, Op0Reg, ShiftVal, IsZExt);
break;
}
if (!ResultReg)
return false;
updateValueMap(I, ResultReg);
return true;
}
Register Op0Reg = getRegForValue(I->getOperand(0));
if (!Op0Reg)
return false;
Register Op1Reg = getRegForValue(I->getOperand(1));
if (!Op1Reg)
return false;
Register ResultReg;
switch (I->getOpcode()) {
default: llvm_unreachable("Unexpected instruction.");
case Instruction::Shl:
ResultReg = emitLSL_rr(RetVT, Op0Reg, Op1Reg);
break;
case Instruction::AShr:
ResultReg = emitASR_rr(RetVT, Op0Reg, Op1Reg);
break;
case Instruction::LShr:
ResultReg = emitLSR_rr(RetVT, Op0Reg, Op1Reg);
break;
}
if (!ResultReg)
return false;
updateValueMap(I, ResultReg);
return true;
}
bool AArch64FastISel::selectBitCast(const Instruction *I) {
MVT RetVT, SrcVT;
if (!isTypeLegal(I->getOperand(0)->getType(), SrcVT))
return false;
if (!isTypeLegal(I->getType(), RetVT))
return false;
unsigned Opc;
if (RetVT == MVT::f32 && SrcVT == MVT::i32)
Opc = AArch64::FMOVWSr;
else if (RetVT == MVT::f64 && SrcVT == MVT::i64)
Opc = AArch64::FMOVXDr;
else if (RetVT == MVT::i32 && SrcVT == MVT::f32)
Opc = AArch64::FMOVSWr;
else if (RetVT == MVT::i64 && SrcVT == MVT::f64)
Opc = AArch64::FMOVDXr;
else
return false;
const TargetRegisterClass *RC = nullptr;
switch (RetVT.SimpleTy) {
default: llvm_unreachable("Unexpected value type.");
case MVT::i32: RC = &AArch64::GPR32RegClass; break;
case MVT::i64: RC = &AArch64::GPR64RegClass; break;
case MVT::f32: RC = &AArch64::FPR32RegClass; break;
case MVT::f64: RC = &AArch64::FPR64RegClass; break;
}
Register Op0Reg = getRegForValue(I->getOperand(0));
if (!Op0Reg)
return false;
Register ResultReg = fastEmitInst_r(Opc, RC, Op0Reg);
if (!ResultReg)
return false;
updateValueMap(I, ResultReg);
return true;
}
bool AArch64FastISel::selectFRem(const Instruction *I) {
MVT RetVT;
if (!isTypeLegal(I->getType(), RetVT))
return false;
RTLIB::Libcall LC;
switch (RetVT.SimpleTy) {
default:
return false;
case MVT::f32:
LC = RTLIB::REM_F32;
break;
case MVT::f64:
LC = RTLIB::REM_F64;
break;
}
ArgListTy Args;
Args.reserve(I->getNumOperands());
// Populate the argument list.
for (auto &Arg : I->operands()) {
ArgListEntry Entry;
Entry.Val = Arg;
Entry.Ty = Arg->getType();
Args.push_back(Entry);
}
CallLoweringInfo CLI;
MCContext &Ctx = MF->getContext();
CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), I->getType(),
TLI.getLibcallName(LC), std::move(Args));
if (!lowerCallTo(CLI))
return false;
updateValueMap(I, CLI.ResultReg);
return true;
}
bool AArch64FastISel::selectSDiv(const Instruction *I) {
MVT VT;
if (!isTypeLegal(I->getType(), VT))
return false;
if (!isa<ConstantInt>(I->getOperand(1)))
return selectBinaryOp(I, ISD::SDIV);
const APInt &C = cast<ConstantInt>(I->getOperand(1))->getValue();
if ((VT != MVT::i32 && VT != MVT::i64) || !C ||
!(C.isPowerOf2() || C.isNegatedPowerOf2()))
return selectBinaryOp(I, ISD::SDIV);
unsigned Lg2 = C.countr_zero();
Register Src0Reg = getRegForValue(I->getOperand(0));
if (!Src0Reg)
return false;
if (cast<BinaryOperator>(I)->isExact()) {
Register ResultReg = emitASR_ri(VT, VT, Src0Reg, Lg2);
if (!ResultReg)
return false;
updateValueMap(I, ResultReg);
return true;
}
int64_t Pow2MinusOne = (1ULL << Lg2) - 1;
Register AddReg = emitAdd_ri_(VT, Src0Reg, Pow2MinusOne);
if (!AddReg)
return false;
// (Src0 < 0) ? Pow2 - 1 : 0;
if (!emitICmp_ri(VT, Src0Reg, 0))
return false;
unsigned SelectOpc;
const TargetRegisterClass *RC;
if (VT == MVT::i64) {
SelectOpc = AArch64::CSELXr;
RC = &AArch64::GPR64RegClass;
} else {
SelectOpc = AArch64::CSELWr;
RC = &AArch64::GPR32RegClass;
}
Register SelectReg = fastEmitInst_rri(SelectOpc, RC, AddReg, Src0Reg,
AArch64CC::LT);
if (!SelectReg)
return false;
// Divide by Pow2 --> ashr. If we're dividing by a negative value we must also
// negate the result.
Register ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
Register ResultReg;
if (C.isNegative())
ResultReg = emitAddSub_rs(/*UseAdd=*/false, VT, ZeroReg, SelectReg,
AArch64_AM::ASR, Lg2);
else
ResultReg = emitASR_ri(VT, VT, SelectReg, Lg2);
if (!ResultReg)
return false;
updateValueMap(I, ResultReg);
return true;
}
/// This is mostly a copy of the existing FastISel getRegForGEPIndex code. We
/// have to duplicate it for AArch64, because otherwise we would fail during the
/// sign-extend emission.
Register AArch64FastISel::getRegForGEPIndex(const Value *Idx) {
Register IdxN = getRegForValue(Idx);
if (!IdxN)
// Unhandled operand. Halt "fast" selection and bail.
return Register();
// If the index is smaller or larger than intptr_t, truncate or extend it.
MVT PtrVT = TLI.getPointerTy(DL);
EVT IdxVT = EVT::getEVT(Idx->getType(), /*HandleUnknown=*/false);
if (IdxVT.bitsLT(PtrVT)) {
IdxN = emitIntExt(IdxVT.getSimpleVT(), IdxN, PtrVT, /*isZExt=*/false);
} else if (IdxVT.bitsGT(PtrVT))
llvm_unreachable("AArch64 FastISel doesn't support types larger than i64");
return IdxN;
}
/// This is mostly a copy of the existing FastISel GEP code, but we have to
/// duplicate it for AArch64, because otherwise we would bail out even for
/// simple cases. This is because the standard fastEmit functions don't cover
/// MUL at all and ADD is lowered very inefficientily.
bool AArch64FastISel::selectGetElementPtr(const Instruction *I) {
if (Subtarget->isTargetILP32())
return false;
Register N = getRegForValue(I->getOperand(0));
if (!N)
return false;
// Keep a running tab of the total offset to coalesce multiple N = N + Offset
// into a single N = N + TotalOffset.
uint64_t TotalOffs = 0;
MVT VT = TLI.getPointerTy(DL);
for (gep_type_iterator GTI = gep_type_begin(I), E = gep_type_end(I);
GTI != E; ++GTI) {
const Value *Idx = GTI.getOperand();
if (auto *StTy = GTI.getStructTypeOrNull()) {
unsigned Field = cast<ConstantInt>(Idx)->getZExtValue();
// N = N + Offset
if (Field)
TotalOffs += DL.getStructLayout(StTy)->getElementOffset(Field);
} else {
// If this is a constant subscript, handle it quickly.
if (const auto *CI = dyn_cast<ConstantInt>(Idx)) {
if (CI->isZero())
continue;
// N = N + Offset
TotalOffs += GTI.getSequentialElementStride(DL) *
cast<ConstantInt>(CI)->getSExtValue();
continue;
}
if (TotalOffs) {
N = emitAdd_ri_(VT, N, TotalOffs);
if (!N)
return false;
TotalOffs = 0;
}
// N = N + Idx * ElementSize;
uint64_t ElementSize = GTI.getSequentialElementStride(DL);
Register IdxN = getRegForGEPIndex(Idx);
if (!IdxN)
return false;
if (ElementSize != 1) {
Register C = fastEmit_i(VT, VT, ISD::Constant, ElementSize);
if (!C)
return false;
IdxN = emitMul_rr(VT, IdxN, C);
if (!IdxN)
return false;
}
N = fastEmit_rr(VT, VT, ISD::ADD, N, IdxN);
if (!N)
return false;
}
}
if (TotalOffs) {
N = emitAdd_ri_(VT, N, TotalOffs);
if (!N)
return false;
}
updateValueMap(I, N);
return true;
}
bool AArch64FastISel::selectAtomicCmpXchg(const AtomicCmpXchgInst *I) {
assert(TM.getOptLevel() == CodeGenOptLevel::None &&
"cmpxchg survived AtomicExpand at optlevel > -O0");
auto *RetPairTy = cast<StructType>(I->getType());
Type *RetTy = RetPairTy->getTypeAtIndex(0U);
assert(RetPairTy->getTypeAtIndex(1U)->isIntegerTy(1) &&
"cmpxchg has a non-i1 status result");
MVT VT;
if (!isTypeLegal(RetTy, VT))
return false;
const TargetRegisterClass *ResRC;
unsigned Opc, CmpOpc;
// This only supports i32/i64, because i8/i16 aren't legal, and the generic
// extractvalue selection doesn't support that.
if (VT == MVT::i32) {
Opc = AArch64::CMP_SWAP_32;
CmpOpc = AArch64::SUBSWrs;
ResRC = &AArch64::GPR32RegClass;
} else if (VT == MVT::i64) {
Opc = AArch64::CMP_SWAP_64;
CmpOpc = AArch64::SUBSXrs;
ResRC = &AArch64::GPR64RegClass;
} else {
return false;
}
const MCInstrDesc &II = TII.get(Opc);
Register AddrReg = getRegForValue(I->getPointerOperand());
Register DesiredReg = getRegForValue(I->getCompareOperand());
Register NewReg = getRegForValue(I->getNewValOperand());
if (!AddrReg || !DesiredReg || !NewReg)
return false;
AddrReg = constrainOperandRegClass(II, AddrReg, II.getNumDefs());
DesiredReg = constrainOperandRegClass(II, DesiredReg, II.getNumDefs() + 1);
NewReg = constrainOperandRegClass(II, NewReg, II.getNumDefs() + 2);
const Register ResultReg1 = createResultReg(ResRC);
const Register ResultReg2 = createResultReg(&AArch64::GPR32RegClass);
const Register ScratchReg = createResultReg(&AArch64::GPR32RegClass);
// FIXME: MachineMemOperand doesn't support cmpxchg yet.
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II)
.addDef(ResultReg1)
.addDef(ScratchReg)
.addUse(AddrReg)
.addUse(DesiredReg)
.addUse(NewReg);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(CmpOpc))
.addDef(VT == MVT::i32 ? AArch64::WZR : AArch64::XZR)
.addUse(ResultReg1)
.addUse(DesiredReg)
.addImm(0);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::CSINCWr))
.addDef(ResultReg2)
.addUse(AArch64::WZR)
.addUse(AArch64::WZR)
.addImm(AArch64CC::NE);
assert((ResultReg1 + 1) == ResultReg2 && "Nonconsecutive result registers.");
updateValueMap(I, ResultReg1, 2);
return true;
}
bool AArch64FastISel::fastSelectInstruction(const Instruction *I) {
if (TLI.fallBackToDAGISel(*I))
return false;
switch (I->getOpcode()) {
default:
break;
case Instruction::Add:
case Instruction::Sub:
return selectAddSub(I);
case Instruction::Mul:
return selectMul(I);
case Instruction::SDiv:
return selectSDiv(I);
case Instruction::SRem:
if (!selectBinaryOp(I, ISD::SREM))
return selectRem(I, ISD::SREM);
return true;
case Instruction::URem:
if (!selectBinaryOp(I, ISD::UREM))
return selectRem(I, ISD::UREM);
return true;
case Instruction::Shl:
case Instruction::LShr:
case Instruction::AShr:
return selectShift(I);
case Instruction::And:
case Instruction::Or:
case Instruction::Xor:
return selectLogicalOp(I);
case Instruction::Br:
return selectBranch(I);
case Instruction::IndirectBr:
return selectIndirectBr(I);
case Instruction::BitCast:
if (!FastISel::selectBitCast(I))
return selectBitCast(I);
return true;
case Instruction::FPToSI:
if (!selectCast(I, ISD::FP_TO_SINT))
return selectFPToInt(I, /*Signed=*/true);
return true;
case Instruction::FPToUI:
return selectFPToInt(I, /*Signed=*/false);
case Instruction::ZExt:
case Instruction::SExt:
return selectIntExt(I);
case Instruction::Trunc:
if (!selectCast(I, ISD::TRUNCATE))
return selectTrunc(I);
return true;
case Instruction::FPExt:
return selectFPExt(I);
case Instruction::FPTrunc:
return selectFPTrunc(I);
case Instruction::SIToFP:
if (!selectCast(I, ISD::SINT_TO_FP))
return selectIntToFP(I, /*Signed=*/true);
return true;
case Instruction::UIToFP:
return selectIntToFP(I, /*Signed=*/false);
case Instruction::Load:
return selectLoad(I);
case Instruction::Store:
return selectStore(I);
case Instruction::FCmp:
case Instruction::ICmp:
return selectCmp(I);
case Instruction::Select:
return selectSelect(I);
case Instruction::Ret:
return selectRet(I);
case Instruction::FRem:
return selectFRem(I);
case Instruction::GetElementPtr:
return selectGetElementPtr(I);
case Instruction::AtomicCmpXchg:
return selectAtomicCmpXchg(cast<AtomicCmpXchgInst>(I));
}
// fall-back to target-independent instruction selection.
return selectOperator(I, I->getOpcode());
}
FastISel *AArch64::createFastISel(FunctionLoweringInfo &FuncInfo,
const TargetLibraryInfo *LibInfo) {
SMEAttrs CallerAttrs(*FuncInfo.Fn);
if (CallerAttrs.hasZAState() || CallerAttrs.hasZT0State() ||
CallerAttrs.hasStreamingInterfaceOrBody() ||
CallerAttrs.hasStreamingCompatibleInterface() ||
CallerAttrs.hasAgnosticZAInterface())
return nullptr;
return new AArch64FastISel(FuncInfo, LibInfo);
}