[AMDGPU][NFC] Correct typos in lib/Target/AMDGPU/AMDGPU*.cpp files. Test commit for new contributor.
This commit is contained in:
parent
f9d69a0ab0
commit
dc6e8dfdfe
@ -94,7 +94,7 @@ AliasResult AMDGPUAAResult::alias(const MemoryLocation &LocA,
|
||||
getUnderlyingObject(A.Ptr->stripPointerCastsForAliasAnalysis());
|
||||
if (const LoadInst *LI = dyn_cast<LoadInst>(ObjA)) {
|
||||
// If a generic pointer is loaded from the constant address space, it
|
||||
// could only be a GLOBAL or CONSTANT one as that address space is soley
|
||||
// could only be a GLOBAL or CONSTANT one as that address space is solely
|
||||
// prepared on the host side, where only GLOBAL or CONSTANT variables are
|
||||
// visible. Note that this even holds for regular functions.
|
||||
if (LI->getPointerAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS)
|
||||
|
||||
@ -70,7 +70,7 @@ recursivelyVisitUsers(GlobalValue &GV,
|
||||
// and just let us hit the error when we can't handle this.
|
||||
//
|
||||
// Unfortunately, clang adds noinline to all functions at -O0. We have
|
||||
// to override this here. until that's fixed.
|
||||
// to override this here until that's fixed.
|
||||
F->removeFnAttr(Attribute::NoInline);
|
||||
|
||||
FuncsToAlwaysInline.insert(F);
|
||||
|
||||
@ -76,7 +76,7 @@ bool AMDGPUAnnotateKernelFeatures::addFeatureAttributes(Function &F) {
|
||||
const Function *Callee =
|
||||
dyn_cast<Function>(CB->getCalledOperand()->stripPointerCasts());
|
||||
|
||||
// Note the occurence of indirect call.
|
||||
// Note the occurrence of indirect call.
|
||||
if (!Callee) {
|
||||
if (!CB->isInlineAsm())
|
||||
HaveCall = true;
|
||||
|
||||
@ -541,7 +541,7 @@ void AMDGPUAtomicOptimizer::optimizeAtomic(Instruction &I,
|
||||
if (NeedResult)
|
||||
ExclScan = buildShiftRight(B, NewV, Identity);
|
||||
|
||||
// Read the value from the last lane, which has accumlated the values of
|
||||
// Read the value from the last lane, which has accumulated the values of
|
||||
// each active lane in the wavefront. This will be our new value which we
|
||||
// will provide to the atomic operation.
|
||||
Value *const LastLaneIdx = B.getInt32(ST->getWavefrontSize() - 1);
|
||||
|
||||
@ -236,7 +236,7 @@ AMDGPUCallLowering::AMDGPUCallLowering(const AMDGPUTargetLowering &TLI)
|
||||
: CallLowering(&TLI) {
|
||||
}
|
||||
|
||||
// FIXME: Compatability shim
|
||||
// FIXME: Compatibility shim
|
||||
static ISD::NodeType extOpcodeToISDExtOpcode(unsigned MIOpc) {
|
||||
switch (MIOpc) {
|
||||
case TargetOpcode::G_SEXT:
|
||||
|
||||
@ -816,7 +816,7 @@ bool AMDGPUCodeGenPrepare::visitXor(BinaryOperator &I) {
|
||||
if (!RHS || !IntrinsicCall || RHS->getSExtValue() != -1)
|
||||
return visitBinaryOperator(I);
|
||||
|
||||
// Check if the Call is an intrinsic intruction to amdgcn_class intrinsic
|
||||
// Check if the Call is an intrinsic instruction to amdgcn_class intrinsic
|
||||
// has only one use
|
||||
if (IntrinsicCall->getIntrinsicID() != Intrinsic::amdgcn_class ||
|
||||
!IntrinsicCall->hasOneUse())
|
||||
|
||||
@ -56,7 +56,7 @@ static bool isNullConstantOrUndef(SDValue V) {
|
||||
}
|
||||
|
||||
static bool getConstantValue(SDValue N, uint32_t &Out) {
|
||||
// This is only used for packed vectors, where ussing 0 for undef should
|
||||
// This is only used for packed vectors, where using 0 for undef should
|
||||
// always be good.
|
||||
if (N.isUndef()) {
|
||||
Out = 0;
|
||||
|
||||
@ -1042,7 +1042,7 @@ CCAssignFn *AMDGPUCallLowering::CCAssignFnForReturn(CallingConv::ID CC,
|
||||
/// In order to correctly lower the arguments we need to know the size of each
|
||||
/// argument. Since Ins[x].VT gives us the size of the register that will
|
||||
/// hold the value, we need to look at Ins[x].ArgVT to see the 'real' type
|
||||
/// for the orignal function argument so that we can deduce the correct memory
|
||||
/// for the original function argument so that we can deduce the correct memory
|
||||
/// type to use for Ins[x]. In most cases the correct memory type will be
|
||||
/// Ins[x].ArgVT. However, this will not always be the case. If, for example,
|
||||
/// we have a kernel argument of type v8i8, this argument will be split into
|
||||
@ -2428,7 +2428,7 @@ SDValue AMDGPUTargetLowering::LowerCTLZ_CTTZ(SDValue Op, SelectionDAG &DAG) cons
|
||||
|
||||
SDValue AMDGPUTargetLowering::LowerINT_TO_FP32(SDValue Op, SelectionDAG &DAG,
|
||||
bool Signed) const {
|
||||
// The regular method coverting a 64-bit integer to float roughly consists of
|
||||
// The regular method converting a 64-bit integer to float roughly consists of
|
||||
// 2 steps: normalization and rounding. In fact, after normalization, the
|
||||
// conversion from a 64-bit integer to a float is essentially the same as the
|
||||
// one from a 32-bit integer. The only difference is that it has more
|
||||
|
||||
@ -439,7 +439,7 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
|
||||
if (!CWidth || !COffset)
|
||||
break;
|
||||
|
||||
// The case of Width == 0 is handled above, which makes this tranformation
|
||||
// The case of Width == 0 is handled above, which makes this transformation
|
||||
// safe. If Width == 0, then the ashr and lshr instructions become poison
|
||||
// value since the shift amount would be equal to the bit size.
|
||||
assert(Width != 0);
|
||||
|
||||
@ -928,7 +928,7 @@ bool AMDGPUInstructionSelector::selectG_INTRINSIC(MachineInstr &I) const {
|
||||
case Intrinsic::amdgcn_if_break: {
|
||||
MachineBasicBlock *BB = I.getParent();
|
||||
|
||||
// FIXME: Manually selecting to avoid dealiing with the SReg_1 trick
|
||||
// FIXME: Manually selecting to avoid dealing with the SReg_1 trick
|
||||
// SelectionDAG uses for wave32 vs wave64.
|
||||
BuildMI(*BB, &I, I.getDebugLoc(), TII.get(AMDGPU::SI_IF_BREAK))
|
||||
.add(I.getOperand(0))
|
||||
@ -1242,7 +1242,7 @@ bool AMDGPUInstructionSelector::selectReturnAddress(MachineInstr &I) const {
|
||||
}
|
||||
|
||||
bool AMDGPUInstructionSelector::selectEndCfIntrinsic(MachineInstr &MI) const {
|
||||
// FIXME: Manually selecting to avoid dealiing with the SReg_1 trick
|
||||
// FIXME: Manually selecting to avoid dealing with the SReg_1 trick
|
||||
// SelectionDAG uses for wave32 vs wave64.
|
||||
MachineBasicBlock *BB = MI.getParent();
|
||||
BuildMI(*BB, &MI, MI.getDebugLoc(), TII.get(AMDGPU::SI_END_CF))
|
||||
@ -2387,7 +2387,7 @@ void AMDGPUInstructionSelector::initM0(MachineInstr &I) const {
|
||||
STI.ldsRequiresM0Init()) {
|
||||
MachineBasicBlock *BB = I.getParent();
|
||||
|
||||
// If DS instructions require M0 initializtion, insert it before selecting.
|
||||
// If DS instructions require M0 initialization, insert it before selecting.
|
||||
BuildMI(*BB, &I, I.getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), AMDGPU::M0)
|
||||
.addImm(-1);
|
||||
}
|
||||
|
||||
@ -59,7 +59,7 @@ static LLT getPow2ScalarType(LLT Ty) {
|
||||
return LLT::scalar(Pow2Bits);
|
||||
}
|
||||
|
||||
/// \returs true if this is an odd sized vector which should widen by adding an
|
||||
/// \returns true if this is an odd sized vector which should widen by adding an
|
||||
/// additional element. This is mostly to handle <3 x s16> -> <4 x s16>. This
|
||||
/// excludes s1 vectors, which should always be scalarized.
|
||||
static LegalityPredicate isSmallOddVector(unsigned TypeIdx) {
|
||||
@ -2487,7 +2487,7 @@ bool AMDGPULegalizerInfo::legalizeGlobalValue(
|
||||
buildPCRelGlobalAddress(GOTAddr, PtrTy, B, GV, 0, SIInstrInfo::MO_GOTPCREL32);
|
||||
|
||||
if (Ty.getSizeInBits() == 32) {
|
||||
// Truncate if this is a 32-bit constant adrdess.
|
||||
// Truncate if this is a 32-bit constant address.
|
||||
auto Load = B.buildLoad(PtrTy, GOTAddr, *GOTMMO);
|
||||
B.buildExtract(DstReg, Load, 0);
|
||||
} else
|
||||
@ -2989,7 +2989,7 @@ void AMDGPULegalizerInfo::legalizeUnsignedDIV_REM32Impl(MachineIRBuilder &B,
|
||||
B.buildSelect(DstRemReg, Cond, B.buildSub(S32, R, Y), R);
|
||||
}
|
||||
|
||||
// Build integer reciprocal sequence arounud V_RCP_IFLAG_F32
|
||||
// Build integer reciprocal sequence around V_RCP_IFLAG_F32
|
||||
//
|
||||
// Return lo, hi of result
|
||||
//
|
||||
@ -4322,8 +4322,8 @@ static void convertImageAddrToPacked(MachineIRBuilder &B, MachineInstr &MI,
|
||||
/// to exposes all register repacking to the legalizer/combiners. We also don't
|
||||
/// want a selected instrution entering RegBankSelect. In order to avoid
|
||||
/// defining a multitude of intermediate image instructions, directly hack on
|
||||
/// the intrinsic's arguments. In cases like a16 addreses, this requires padding
|
||||
/// now unnecessary arguments with $noreg.
|
||||
/// the intrinsic's arguments. In cases like a16 addresses, this requires
|
||||
/// padding now unnecessary arguments with $noreg.
|
||||
bool AMDGPULegalizerInfo::legalizeImageIntrinsic(
|
||||
MachineInstr &MI, MachineIRBuilder &B, GISelChangeObserver &Observer,
|
||||
const AMDGPU::ImageDimIntrinsicInfo *Intr) const {
|
||||
@ -4594,7 +4594,7 @@ bool AMDGPULegalizerInfo::legalizeImageIntrinsic(
|
||||
MI.getOperand(0).setReg(NewResultReg);
|
||||
|
||||
// In the IR, TFE is supposed to be used with a 2 element struct return
|
||||
// type. The intruction really returns these two values in one contiguous
|
||||
// type. The instruction really returns these two values in one contiguous
|
||||
// register, with one additional dword beyond the loaded data. Rewrite the
|
||||
// return type to use a single register result.
|
||||
|
||||
@ -4806,7 +4806,7 @@ bool AMDGPULegalizerInfo::legalizeTrapHsa(
|
||||
|
||||
bool AMDGPULegalizerInfo::legalizeDebugTrapIntrinsic(
|
||||
MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const {
|
||||
// Is non-HSA path or trap-handler disabled? then, report a warning
|
||||
// Is non-HSA path or trap-handler disabled? Then, report a warning
|
||||
// accordingly
|
||||
if (!ST.isTrapHandlerEnabled() ||
|
||||
ST.getTrapHandlerAbi() != GCNSubtarget::TrapHandlerAbi::AMDHSA) {
|
||||
|
||||
@ -54,7 +54,7 @@ private:
|
||||
|
||||
bool useNativeFunc(const StringRef F) const;
|
||||
|
||||
// Return a pointer (pointer expr) to the function if function defintion with
|
||||
// Return a pointer (pointer expr) to the function if function definition with
|
||||
// "FuncName" exists. It may create a new function prototype in pre-link mode.
|
||||
FunctionCallee getFunction(Module *M, const FuncInfo &fInfo);
|
||||
|
||||
@ -660,7 +660,7 @@ bool AMDGPULibCalls::fold(CallInst *CI, AliasAnalysis *AA) {
|
||||
if (isUnsafeMath(CI) && evaluateCall(CI, FInfo))
|
||||
return true;
|
||||
|
||||
// Specilized optimizations for each function call
|
||||
// Specialized optimizations for each function call
|
||||
switch (FInfo.getId()) {
|
||||
case AMDGPULibFunc::EI_RECIP:
|
||||
// skip vector function
|
||||
@ -1231,7 +1231,7 @@ bool AMDGPULibCalls::fold_fma_mad(CallInst *CI, IRBuilder<> &B,
|
||||
return false;
|
||||
}
|
||||
|
||||
// Get a scalar native builtin signle argument FP function
|
||||
// Get a scalar native builtin single argument FP function
|
||||
FunctionCallee AMDGPULibCalls::getNativeFunction(Module *M,
|
||||
const FuncInfo &FInfo) {
|
||||
if (getArgType(FInfo) == AMDGPULibFunc::F64 || !HasNative(FInfo.getId()))
|
||||
|
||||
@ -352,7 +352,7 @@ const unsigned UnmangledFuncInfo::TableSize =
|
||||
static AMDGPULibFunc::Param getRetType(AMDGPULibFunc::EFuncId id,
|
||||
const AMDGPULibFunc::Param (&Leads)[2]) {
|
||||
AMDGPULibFunc::Param Res = Leads[0];
|
||||
// TBD - This switch may require to be extended for other intriniscs
|
||||
// TBD - This switch may require to be extended for other intrinsics
|
||||
switch (id) {
|
||||
case AMDGPULibFunc::EI_SINCOS:
|
||||
Res.PtrKind = AMDGPULibFunc::BYVALUE;
|
||||
@ -778,7 +778,7 @@ namespace {
|
||||
|
||||
|
||||
class ItaniumMangler {
|
||||
SmallVector<AMDGPULibFunc::Param, 10> Str; // list of accumulated substituions
|
||||
SmallVector<AMDGPULibFunc::Param, 10> Str; // list of accumulated substitutions
|
||||
bool UseAddrSpace;
|
||||
|
||||
int findSubst(const AMDGPULibFunc::Param& P) const {
|
||||
|
||||
@ -119,7 +119,7 @@ class AMDGPULowerModuleLDS : public ModulePass {
|
||||
// The llvm.amdgcn.module.lds instance is implicitly used by all kernels
|
||||
// that might call a function which accesses a field within it. This is
|
||||
// presently approximated to 'all kernels' if there are any such functions
|
||||
// in the module. This implicit use is reified as an explicit use here so
|
||||
// in the module. This implicit use is redefined as an explicit use here so
|
||||
// that later passes, specifically PromoteAlloca, account for the required
|
||||
// memory without any knowledge of this transform.
|
||||
|
||||
|
||||
@ -194,7 +194,7 @@ bool PHILinearize::findSourcesFromMBB(MachineBasicBlock *SourceMBB,
|
||||
}
|
||||
|
||||
void PHILinearize::addDest(unsigned DestReg, const DebugLoc &DL) {
|
||||
assert(findPHIInfoElement(DestReg) == nullptr && "Dest already exsists");
|
||||
assert(findPHIInfoElement(DestReg) == nullptr && "Dest already exists");
|
||||
PHISourcesT EmptySet;
|
||||
PHIInfoElementT *NewElement = new PHIInfoElementT();
|
||||
NewElement->DestReg = DestReg;
|
||||
@ -813,7 +813,7 @@ void LinearizedRegion::storeLiveOuts(RegionMRT *Region,
|
||||
} else {
|
||||
LinearizedRegion *SubRegion = CI->getRegionMRT()->getLinearizedRegion();
|
||||
// We should be limited to only store registers that are live out from the
|
||||
// lineaized region
|
||||
// linearized region
|
||||
for (auto MBBI : SubRegion->MBBs) {
|
||||
storeMBBLiveOuts(MBBI, MRI, TRI, PHIInfo, TopRegion);
|
||||
}
|
||||
@ -896,7 +896,7 @@ void LinearizedRegion::replaceRegister(unsigned Register,
|
||||
assert(Register != NewRegister && "Cannot replace a reg with itself");
|
||||
|
||||
LLVM_DEBUG(
|
||||
dbgs() << "Pepareing to replace register (region): "
|
||||
dbgs() << "Preparing to replace register (region): "
|
||||
<< printReg(Register, MRI->getTargetRegisterInfo()) << " with "
|
||||
<< printReg(NewRegister, MRI->getTargetRegisterInfo()) << "\n");
|
||||
|
||||
@ -1404,7 +1404,7 @@ void AMDGPUMachineCFGStructurizer::extractKilledPHIs(MachineBasicBlock *MBB) {
|
||||
MachineInstr &Instr = *I;
|
||||
if (Instr.isPHI()) {
|
||||
unsigned PHIDestReg = getPHIDestReg(Instr);
|
||||
LLVM_DEBUG(dbgs() << "Extractking killed phi:\n");
|
||||
LLVM_DEBUG(dbgs() << "Extracting killed phi:\n");
|
||||
LLVM_DEBUG(Instr.dump());
|
||||
PHIs.insert(&Instr);
|
||||
PHIInfo.addDest(PHIDestReg, Instr.getDebugLoc());
|
||||
@ -1770,7 +1770,7 @@ static void removeExternalCFGSuccessors(MachineBasicBlock *MBB) {
|
||||
static void removeExternalCFGEdges(MachineBasicBlock *StartMBB,
|
||||
MachineBasicBlock *EndMBB) {
|
||||
|
||||
// We have to check against the StartMBB successor becasuse a
|
||||
// We have to check against the StartMBB successor because a
|
||||
// structurized region with a loop will have the entry block split,
|
||||
// and the backedge will go to the entry successor.
|
||||
DenseSet<std::pair<MachineBasicBlock *, MachineBasicBlock *>> Succs;
|
||||
@ -2018,7 +2018,7 @@ void AMDGPUMachineCFGStructurizer::rewriteLiveOutRegs(MachineBasicBlock *IfBB,
|
||||
LLVM_DEBUG(dbgs() << "LiveOut: " << printReg(LI, TRI));
|
||||
if (!containsDef(CodeBB, InnerRegion, LI) ||
|
||||
(!IsSingleBB && (getDefInstr(LI)->getParent() == LRegion->getExit()))) {
|
||||
// If the register simly lives through the CodeBB, we don't have
|
||||
// If the register simply lives through the CodeBB, we don't have
|
||||
// to rewrite anything since the register is not defined in this
|
||||
// part of the code.
|
||||
LLVM_DEBUG(dbgs() << "- through");
|
||||
@ -2028,14 +2028,14 @@ void AMDGPUMachineCFGStructurizer::rewriteLiveOutRegs(MachineBasicBlock *IfBB,
|
||||
unsigned Reg = LI;
|
||||
if (/*!PHIInfo.isSource(Reg) &&*/ Reg != InnerRegion->getBBSelectRegOut()) {
|
||||
// If the register is live out, we do want to create a phi,
|
||||
// unless it is from the Exit block, becasuse in that case there
|
||||
// unless it is from the Exit block, because in that case there
|
||||
// is already a PHI, and no need to create a new one.
|
||||
|
||||
// If the register is just a live out def and not part of a phi
|
||||
// chain, we need to create a PHI node to handle the if region,
|
||||
// and replace all uses outside of the region with the new dest
|
||||
// register, unless it is the outgoing BB select register. We have
|
||||
// already creaed phi nodes for these.
|
||||
// already created phi nodes for these.
|
||||
const TargetRegisterClass *RegClass = MRI->getRegClass(Reg);
|
||||
Register PHIDestReg = MRI->createVirtualRegister(RegClass);
|
||||
Register IfSourceReg = MRI->createVirtualRegister(RegClass);
|
||||
|
||||
@ -149,7 +149,7 @@ bool AMDGPUPrintfRuntimeBindingImpl::lowerPrintfForGpu(Module &M) {
|
||||
IRBuilder<> Builder(Ctx);
|
||||
Type *I32Ty = Type::getInt32Ty(Ctx);
|
||||
unsigned UniqID = 0;
|
||||
// NB: This is important for this string size to be divizable by 4
|
||||
// NB: This is important for this string size to be divisible by 4
|
||||
const char NonLiteralStr[4] = "???";
|
||||
|
||||
for (auto CI : Printfs) {
|
||||
|
||||
@ -9,7 +9,7 @@
|
||||
/// \file
|
||||
/// \brief This pass propagates attributes from kernels to the non-entry
|
||||
/// functions. Most of the library functions were not compiled for specific ABI,
|
||||
/// yet will be correctly compiled if proper attrbutes are propagated from the
|
||||
/// yet will be correctly compiled if proper attributes are propagated from the
|
||||
/// caller.
|
||||
///
|
||||
/// The pass analyzes call graph and propagates ABI target features through the
|
||||
@ -17,7 +17,7 @@
|
||||
///
|
||||
/// It can run in two modes: as a function or module pass. A function pass
|
||||
/// simply propagates attributes. A module pass clones functions if there are
|
||||
/// callers with different ABI. If a function is clonned all call sites will
|
||||
/// callers with different ABI. If a function is cloned all call sites will
|
||||
/// be updated to use a correct clone.
|
||||
///
|
||||
/// A function pass is limited in functionality but can run early in the
|
||||
@ -149,7 +149,7 @@ public:
|
||||
bool process(Module &M);
|
||||
};
|
||||
|
||||
// Allows to propagate attributes early, but no clonning is allowed as it must
|
||||
// Allows to propagate attributes early, but no cloning is allowed as it must
|
||||
// be a function pass to run before any optimizations.
|
||||
// TODO: We shall only need a one instance of module pass, but that needs to be
|
||||
// in the linker pipeline which is currently not possible.
|
||||
@ -168,7 +168,7 @@ public:
|
||||
bool runOnFunction(Function &F) override;
|
||||
};
|
||||
|
||||
// Allows to propagate attributes with clonning but does that late in the
|
||||
// Allows to propagate attributes with cloning but does that late in the
|
||||
// pipeline.
|
||||
class AMDGPUPropagateAttributesLate : public ModulePass {
|
||||
const TargetMachine *TM;
|
||||
@ -273,7 +273,7 @@ bool AMDGPUPropagateAttributes::process() {
|
||||
if (!NewF) {
|
||||
const FnProperties NewProps = CalleeProps.adjustToCaller(CallerProps);
|
||||
if (!AllowClone) {
|
||||
// This may set different features on different iteartions if
|
||||
// This may set different features on different iterations if
|
||||
// there is a contradiction in callers' attributes. In this case
|
||||
// we rely on a second pass running on Module, which is allowed
|
||||
// to clone.
|
||||
|
||||
@ -58,7 +58,7 @@
|
||||
///
|
||||
/// We avoid trying to solve this problem in RegBankSelect. Any VALU G_*
|
||||
/// operation should have its source operands all mapped to VGPRs (except for
|
||||
/// VCC), inserting copies from any SGPR operands. This the most trival legal
|
||||
/// VCC), inserting copies from any SGPR operands. This the most trivial legal
|
||||
/// mapping. Anything beyond the simplest 1:1 instruction selection would be too
|
||||
/// complicated to solve here. Every optimization pattern or instruction
|
||||
/// selected to multiple outputs would have to enforce this rule, and there
|
||||
@ -118,7 +118,7 @@ public:
|
||||
Opc == AMDGPU::G_SEXT) {
|
||||
// LegalizerHelper wants to use the basic legalization artifacts when
|
||||
// widening etc. We don't handle selection with vcc in artifact sources,
|
||||
// so we need to use a sslect instead to handle these properly.
|
||||
// so we need to use a select instead to handle these properly.
|
||||
Register DstReg = MI.getOperand(0).getReg();
|
||||
Register SrcReg = MI.getOperand(1).getReg();
|
||||
const RegisterBank *SrcBank = RBI.getRegBank(SrcReg, MRI, *RBI.TRI);
|
||||
@ -282,7 +282,7 @@ AMDGPURegisterBankInfo::getRegBankFromRegClass(const TargetRegisterClass &RC,
|
||||
// VCC-like use.
|
||||
if (TRI->isSGPRClass(&RC)) {
|
||||
// FIXME: This probably came from a copy from a physical register, which
|
||||
// should be inferrrable from the copied to-type. We don't have many boolean
|
||||
// should be inferable from the copied to-type. We don't have many boolean
|
||||
// physical register constraints so just assume a normal SGPR for now.
|
||||
if (!Ty.isValid())
|
||||
return AMDGPU::SGPRRegBank;
|
||||
@ -1430,7 +1430,7 @@ bool AMDGPURegisterBankInfo::applyMappingSBufferLoad(
|
||||
OffsetBank == &AMDGPU::SGPRRegBank)
|
||||
return true; // Legal mapping
|
||||
|
||||
// FIXME: 96-bit case was widened during legalize. We neeed to narrow it back
|
||||
// FIXME: 96-bit case was widened during legalize. We need to narrow it back
|
||||
// here but don't have an MMO.
|
||||
|
||||
unsigned LoadSize = Ty.getSizeInBits();
|
||||
@ -1455,7 +1455,7 @@ bool AMDGPURegisterBankInfo::applyMappingSBufferLoad(
|
||||
VOffset, SOffset, ImmOffset, Alignment);
|
||||
|
||||
// TODO: 96-bit loads were widened to 128-bit results. Shrink the result if we
|
||||
// can, but we neeed to track an MMO for that.
|
||||
// can, but we need to track an MMO for that.
|
||||
const unsigned MemSize = (Ty.getSizeInBits() + 7) / 8;
|
||||
const Align MemAlign(4); // FIXME: ABI type alignment?
|
||||
MachineMemOperand *BaseMMO = MF.getMachineMemOperand(
|
||||
@ -2153,7 +2153,7 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
|
||||
// The standard handling only considers the result register bank for
|
||||
// phis. For VCC, blindly inserting a copy when the phi is lowered will
|
||||
// produce an invalid copy. We can only copy with some kind of compare to
|
||||
// get a vector boolean result. Insert a regitser bank copy that will be
|
||||
// get a vector boolean result. Insert a register bank copy that will be
|
||||
// correctly lowered to a compare.
|
||||
MachineIRBuilder B(*MI.getParent()->getParent());
|
||||
|
||||
@ -3331,7 +3331,7 @@ AMDGPURegisterBankInfo::getImageMapping(const MachineRegisterInfo &MRI,
|
||||
return getInstructionMapping(1, 1, getOperandsMapping(OpdsMapping), NumOps);
|
||||
}
|
||||
|
||||
/// Return the mapping for a pointer arugment.
|
||||
/// Return the mapping for a pointer argument.
|
||||
const RegisterBankInfo::ValueMapping *
|
||||
AMDGPURegisterBankInfo::getValueMappingForPtr(const MachineRegisterInfo &MRI,
|
||||
Register PtrReg) const {
|
||||
|
||||
@ -142,7 +142,7 @@ class ReplaceLDSUseImpl {
|
||||
// Returns true if uses of given LDS global within non-kernel functions should
|
||||
// be keep as it is without pointer replacement.
|
||||
bool shouldIgnorePointerReplacement(GlobalVariable *GV) {
|
||||
// LDS whose size is very small and doesn`t exceed pointer size is not worth
|
||||
// LDS whose size is very small and doesn't exceed pointer size is not worth
|
||||
// replacing.
|
||||
if (DL.getTypeAllocSize(GV->getValueType()) <= 2)
|
||||
return true;
|
||||
@ -158,7 +158,7 @@ class ReplaceLDSUseImpl {
|
||||
|
||||
// Insert new global LDS pointer which points to LDS.
|
||||
GlobalVariable *createLDSPointer(GlobalVariable *GV) {
|
||||
// LDS pointer which points to LDS is already created? return it.
|
||||
// LDS pointer which points to LDS is already created? Return it.
|
||||
auto PointerEntry = LDSToPointer.insert(std::make_pair(GV, nullptr));
|
||||
if (!PointerEntry.second)
|
||||
return PointerEntry.first->second;
|
||||
@ -185,7 +185,7 @@ class ReplaceLDSUseImpl {
|
||||
// Split entry basic block in such a way that only lane 0 of each wave does
|
||||
// the LDS pointer initialization, and return newly created basic block.
|
||||
BasicBlock *activateLaneZero(Function *K) {
|
||||
// If the entry basic block of kernel K is already splitted, then return
|
||||
// If the entry basic block of kernel K is already split, then return
|
||||
// newly created basic block.
|
||||
auto BasicBlockEntry = KernelToInitBB.insert(std::make_pair(K, nullptr));
|
||||
if (!BasicBlockEntry.second)
|
||||
@ -204,7 +204,7 @@ class ReplaceLDSUseImpl {
|
||||
|
||||
BasicBlock *NBB = SplitBlockAndInsertIfThen(Cond, WB, false)->getParent();
|
||||
|
||||
// Mark that the entry basic block of kernel K is splitted.
|
||||
// Mark that the entry basic block of kernel K is split.
|
||||
KernelToInitBB[K] = NBB;
|
||||
|
||||
return NBB;
|
||||
@ -235,7 +235,7 @@ class ReplaceLDSUseImpl {
|
||||
}
|
||||
|
||||
// We have created an LDS pointer for LDS, and initialized it to point-to LDS
|
||||
// within all relevent kernels. Now replace all the uses of LDS within
|
||||
// within all relevant kernels. Now replace all the uses of LDS within
|
||||
// non-kernel functions by LDS pointer.
|
||||
void replaceLDSUseByPointer(GlobalVariable *GV, GlobalVariable *LDSPointer) {
|
||||
SmallVector<User *, 8> LDSUsers(GV->users());
|
||||
@ -268,8 +268,8 @@ class ReplaceLDSUseImpl {
|
||||
convertConstantExprsToInstructions(I, CE, &UserInsts);
|
||||
}
|
||||
|
||||
// Go through all the user instrutions, if LDS exist within them as an
|
||||
// operand, then replace it by replace instruction.
|
||||
// Go through all the user instructions, if LDS exist within them as
|
||||
// an operand, then replace it by replace instruction.
|
||||
for (auto *II : UserInsts) {
|
||||
auto *ReplaceInst = getReplacementInst(F, GV, LDSPointer);
|
||||
II->replaceUsesOfWith(GV, ReplaceInst);
|
||||
@ -373,7 +373,7 @@ bool ReplaceLDSUseImpl::replaceLDSUse(GlobalVariable *GV) {
|
||||
return false;
|
||||
|
||||
// We have created an LDS pointer for LDS, and initialized it to point-to LDS
|
||||
// within all relevent kernels. Now replace all the uses of LDS within
|
||||
// within all relevant kernels. Now replace all the uses of LDS within
|
||||
// non-kernel functions by LDS pointer.
|
||||
replaceLDSUseByPointer(GV, LDSPointer);
|
||||
|
||||
|
||||
@ -142,8 +142,8 @@ bool AMDGPURewriteOutArguments::checkArgumentUses(Value &Arg) const {
|
||||
|
||||
// Special case handle structs with single members. It is useful to handle
|
||||
// some casts between structs and non-structs, but we can't bitcast
|
||||
// directly between them. directly bitcast between them. Blender uses
|
||||
// some casts that look like { <3 x float> }* to <4 x float>*
|
||||
// directly between them. Blender uses some casts that look like
|
||||
// { <3 x float> }* to <4 x float>*
|
||||
if ((SrcEltTy->isStructTy() && (SrcEltTy->getStructNumElements() != 1)))
|
||||
return false;
|
||||
|
||||
@ -259,7 +259,7 @@ bool AMDGPURewriteOutArguments::runOnFunction(Function &F) {
|
||||
|
||||
// Keep retrying if we are able to successfully eliminate an argument. This
|
||||
// helps with cases with multiple arguments which may alias, such as in a
|
||||
// sincos implemntation. If we have 2 stores to arguments, on the first
|
||||
// sincos implementation. If we have 2 stores to arguments, on the first
|
||||
// attempt the MDA query will succeed for the second store but not the
|
||||
// first. On the second iteration we've removed that out clobbering argument
|
||||
// (by effectively moving it into another function) and will find the second
|
||||
|
||||
@ -1019,7 +1019,7 @@ struct FillMFMAShadowMutation : ScheduleDAGMutation {
|
||||
return true;
|
||||
}
|
||||
|
||||
// Link as much SALU intructions in chain as possible. Return the size
|
||||
// Link as many SALU instructions in chain as possible. Return the size
|
||||
// of the chain. Links up to MaxChain instructions.
|
||||
unsigned linkSALUChain(SUnit *From, SUnit *To, unsigned MaxChain,
|
||||
SmallPtrSetImpl<SUnit *> &Visited) const {
|
||||
|
||||
@ -879,7 +879,7 @@ void AMDGPUPassConfig::addStraightLineScalarOptimizationPasses() {
|
||||
addPass(createLICMPass());
|
||||
addPass(createSeparateConstOffsetFromGEPPass());
|
||||
addPass(createSpeculativeExecutionPass());
|
||||
// ReassociateGEPs exposes more opportunites for SLSR. See
|
||||
// ReassociateGEPs exposes more opportunities for SLSR. See
|
||||
// the example in reassociate-geps-and-slsr.ll.
|
||||
addPass(createStraightLineStrengthReducePass());
|
||||
// SeparateConstOffsetFromGEP and SLSR creates common expressions which GVN or
|
||||
@ -1277,7 +1277,7 @@ bool GCNPassConfig::addRegAssignAndRewriteOptimized() {
|
||||
// Commit allocated register changes. This is mostly necessary because too
|
||||
// many things rely on the use lists of the physical registers, such as the
|
||||
// verifier. This is only necessary with allocators which use LiveIntervals,
|
||||
// since FastRegAlloc does the replacments itself.
|
||||
// since FastRegAlloc does the replacements itself.
|
||||
addPass(createVirtRegRewriter(false));
|
||||
|
||||
// Equivalent of PEI for SGPRs.
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user