[AMDGPU][NFC] Correct typos in lib/Target/AMDGPU/AMDGPU*.cpp files. Test commit for new contributor.

This commit is contained in:
Jacob Lambert 2021-09-20 14:28:10 -07:00
parent f9d69a0ab0
commit dc6e8dfdfe
22 changed files with 61 additions and 61 deletions

View File

@ -94,7 +94,7 @@ AliasResult AMDGPUAAResult::alias(const MemoryLocation &LocA,
getUnderlyingObject(A.Ptr->stripPointerCastsForAliasAnalysis());
if (const LoadInst *LI = dyn_cast<LoadInst>(ObjA)) {
// If a generic pointer is loaded from the constant address space, it
// could only be a GLOBAL or CONSTANT one as that address space is soley
// could only be a GLOBAL or CONSTANT one as that address space is solely
// prepared on the host side, where only GLOBAL or CONSTANT variables are
// visible. Note that this even holds for regular functions.
if (LI->getPointerAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS)

View File

@ -70,7 +70,7 @@ recursivelyVisitUsers(GlobalValue &GV,
// and just let us hit the error when we can't handle this.
//
// Unfortunately, clang adds noinline to all functions at -O0. We have
// to override this here. until that's fixed.
// to override this here until that's fixed.
F->removeFnAttr(Attribute::NoInline);
FuncsToAlwaysInline.insert(F);

View File

@ -76,7 +76,7 @@ bool AMDGPUAnnotateKernelFeatures::addFeatureAttributes(Function &F) {
const Function *Callee =
dyn_cast<Function>(CB->getCalledOperand()->stripPointerCasts());
// Note the occurence of indirect call.
// Note the occurrence of indirect call.
if (!Callee) {
if (!CB->isInlineAsm())
HaveCall = true;

View File

@ -541,7 +541,7 @@ void AMDGPUAtomicOptimizer::optimizeAtomic(Instruction &I,
if (NeedResult)
ExclScan = buildShiftRight(B, NewV, Identity);
// Read the value from the last lane, which has accumlated the values of
// Read the value from the last lane, which has accumulated the values of
// each active lane in the wavefront. This will be our new value which we
// will provide to the atomic operation.
Value *const LastLaneIdx = B.getInt32(ST->getWavefrontSize() - 1);

View File

@ -236,7 +236,7 @@ AMDGPUCallLowering::AMDGPUCallLowering(const AMDGPUTargetLowering &TLI)
: CallLowering(&TLI) {
}
// FIXME: Compatability shim
// FIXME: Compatibility shim
static ISD::NodeType extOpcodeToISDExtOpcode(unsigned MIOpc) {
switch (MIOpc) {
case TargetOpcode::G_SEXT:

View File

@ -816,7 +816,7 @@ bool AMDGPUCodeGenPrepare::visitXor(BinaryOperator &I) {
if (!RHS || !IntrinsicCall || RHS->getSExtValue() != -1)
return visitBinaryOperator(I);
// Check if the Call is an intrinsic intruction to amdgcn_class intrinsic
// Check if the Call is an intrinsic instruction to amdgcn_class intrinsic
// has only one use
if (IntrinsicCall->getIntrinsicID() != Intrinsic::amdgcn_class ||
!IntrinsicCall->hasOneUse())

View File

@ -56,7 +56,7 @@ static bool isNullConstantOrUndef(SDValue V) {
}
static bool getConstantValue(SDValue N, uint32_t &Out) {
// This is only used for packed vectors, where ussing 0 for undef should
// This is only used for packed vectors, where using 0 for undef should
// always be good.
if (N.isUndef()) {
Out = 0;

View File

@ -1042,7 +1042,7 @@ CCAssignFn *AMDGPUCallLowering::CCAssignFnForReturn(CallingConv::ID CC,
/// In order to correctly lower the arguments we need to know the size of each
/// argument. Since Ins[x].VT gives us the size of the register that will
/// hold the value, we need to look at Ins[x].ArgVT to see the 'real' type
/// for the orignal function argument so that we can deduce the correct memory
/// for the original function argument so that we can deduce the correct memory
/// type to use for Ins[x]. In most cases the correct memory type will be
/// Ins[x].ArgVT. However, this will not always be the case. If, for example,
/// we have a kernel argument of type v8i8, this argument will be split into
@ -2428,7 +2428,7 @@ SDValue AMDGPUTargetLowering::LowerCTLZ_CTTZ(SDValue Op, SelectionDAG &DAG) cons
SDValue AMDGPUTargetLowering::LowerINT_TO_FP32(SDValue Op, SelectionDAG &DAG,
bool Signed) const {
// The regular method coverting a 64-bit integer to float roughly consists of
// The regular method converting a 64-bit integer to float roughly consists of
// 2 steps: normalization and rounding. In fact, after normalization, the
// conversion from a 64-bit integer to a float is essentially the same as the
// one from a 32-bit integer. The only difference is that it has more

View File

@ -439,7 +439,7 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
if (!CWidth || !COffset)
break;
// The case of Width == 0 is handled above, which makes this tranformation
// The case of Width == 0 is handled above, which makes this transformation
// safe. If Width == 0, then the ashr and lshr instructions become poison
// value since the shift amount would be equal to the bit size.
assert(Width != 0);

View File

@ -928,7 +928,7 @@ bool AMDGPUInstructionSelector::selectG_INTRINSIC(MachineInstr &I) const {
case Intrinsic::amdgcn_if_break: {
MachineBasicBlock *BB = I.getParent();
// FIXME: Manually selecting to avoid dealiing with the SReg_1 trick
// FIXME: Manually selecting to avoid dealing with the SReg_1 trick
// SelectionDAG uses for wave32 vs wave64.
BuildMI(*BB, &I, I.getDebugLoc(), TII.get(AMDGPU::SI_IF_BREAK))
.add(I.getOperand(0))
@ -1242,7 +1242,7 @@ bool AMDGPUInstructionSelector::selectReturnAddress(MachineInstr &I) const {
}
bool AMDGPUInstructionSelector::selectEndCfIntrinsic(MachineInstr &MI) const {
// FIXME: Manually selecting to avoid dealiing with the SReg_1 trick
// FIXME: Manually selecting to avoid dealing with the SReg_1 trick
// SelectionDAG uses for wave32 vs wave64.
MachineBasicBlock *BB = MI.getParent();
BuildMI(*BB, &MI, MI.getDebugLoc(), TII.get(AMDGPU::SI_END_CF))
@ -2387,7 +2387,7 @@ void AMDGPUInstructionSelector::initM0(MachineInstr &I) const {
STI.ldsRequiresM0Init()) {
MachineBasicBlock *BB = I.getParent();
// If DS instructions require M0 initializtion, insert it before selecting.
// If DS instructions require M0 initialization, insert it before selecting.
BuildMI(*BB, &I, I.getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), AMDGPU::M0)
.addImm(-1);
}

View File

@ -59,7 +59,7 @@ static LLT getPow2ScalarType(LLT Ty) {
return LLT::scalar(Pow2Bits);
}
/// \returs true if this is an odd sized vector which should widen by adding an
/// \returns true if this is an odd sized vector which should widen by adding an
/// additional element. This is mostly to handle <3 x s16> -> <4 x s16>. This
/// excludes s1 vectors, which should always be scalarized.
static LegalityPredicate isSmallOddVector(unsigned TypeIdx) {
@ -2487,7 +2487,7 @@ bool AMDGPULegalizerInfo::legalizeGlobalValue(
buildPCRelGlobalAddress(GOTAddr, PtrTy, B, GV, 0, SIInstrInfo::MO_GOTPCREL32);
if (Ty.getSizeInBits() == 32) {
// Truncate if this is a 32-bit constant adrdess.
// Truncate if this is a 32-bit constant address.
auto Load = B.buildLoad(PtrTy, GOTAddr, *GOTMMO);
B.buildExtract(DstReg, Load, 0);
} else
@ -2989,7 +2989,7 @@ void AMDGPULegalizerInfo::legalizeUnsignedDIV_REM32Impl(MachineIRBuilder &B,
B.buildSelect(DstRemReg, Cond, B.buildSub(S32, R, Y), R);
}
// Build integer reciprocal sequence arounud V_RCP_IFLAG_F32
// Build integer reciprocal sequence around V_RCP_IFLAG_F32
//
// Return lo, hi of result
//
@ -4322,8 +4322,8 @@ static void convertImageAddrToPacked(MachineIRBuilder &B, MachineInstr &MI,
/// to exposes all register repacking to the legalizer/combiners. We also don't
/// want a selected instrution entering RegBankSelect. In order to avoid
/// defining a multitude of intermediate image instructions, directly hack on
/// the intrinsic's arguments. In cases like a16 addreses, this requires padding
/// now unnecessary arguments with $noreg.
/// the intrinsic's arguments. In cases like a16 addresses, this requires
/// padding now unnecessary arguments with $noreg.
bool AMDGPULegalizerInfo::legalizeImageIntrinsic(
MachineInstr &MI, MachineIRBuilder &B, GISelChangeObserver &Observer,
const AMDGPU::ImageDimIntrinsicInfo *Intr) const {
@ -4594,7 +4594,7 @@ bool AMDGPULegalizerInfo::legalizeImageIntrinsic(
MI.getOperand(0).setReg(NewResultReg);
// In the IR, TFE is supposed to be used with a 2 element struct return
// type. The intruction really returns these two values in one contiguous
// type. The instruction really returns these two values in one contiguous
// register, with one additional dword beyond the loaded data. Rewrite the
// return type to use a single register result.
@ -4806,7 +4806,7 @@ bool AMDGPULegalizerInfo::legalizeTrapHsa(
bool AMDGPULegalizerInfo::legalizeDebugTrapIntrinsic(
MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const {
// Is non-HSA path or trap-handler disabled? then, report a warning
// Is non-HSA path or trap-handler disabled? Then, report a warning
// accordingly
if (!ST.isTrapHandlerEnabled() ||
ST.getTrapHandlerAbi() != GCNSubtarget::TrapHandlerAbi::AMDHSA) {

View File

@ -54,7 +54,7 @@ private:
bool useNativeFunc(const StringRef F) const;
// Return a pointer (pointer expr) to the function if function defintion with
// Return a pointer (pointer expr) to the function if function definition with
// "FuncName" exists. It may create a new function prototype in pre-link mode.
FunctionCallee getFunction(Module *M, const FuncInfo &fInfo);
@ -660,7 +660,7 @@ bool AMDGPULibCalls::fold(CallInst *CI, AliasAnalysis *AA) {
if (isUnsafeMath(CI) && evaluateCall(CI, FInfo))
return true;
// Specilized optimizations for each function call
// Specialized optimizations for each function call
switch (FInfo.getId()) {
case AMDGPULibFunc::EI_RECIP:
// skip vector function
@ -1231,7 +1231,7 @@ bool AMDGPULibCalls::fold_fma_mad(CallInst *CI, IRBuilder<> &B,
return false;
}
// Get a scalar native builtin signle argument FP function
// Get a scalar native builtin single argument FP function
FunctionCallee AMDGPULibCalls::getNativeFunction(Module *M,
const FuncInfo &FInfo) {
if (getArgType(FInfo) == AMDGPULibFunc::F64 || !HasNative(FInfo.getId()))

View File

@ -352,7 +352,7 @@ const unsigned UnmangledFuncInfo::TableSize =
static AMDGPULibFunc::Param getRetType(AMDGPULibFunc::EFuncId id,
const AMDGPULibFunc::Param (&Leads)[2]) {
AMDGPULibFunc::Param Res = Leads[0];
// TBD - This switch may require to be extended for other intriniscs
// TBD - This switch may require to be extended for other intrinsics
switch (id) {
case AMDGPULibFunc::EI_SINCOS:
Res.PtrKind = AMDGPULibFunc::BYVALUE;
@ -778,7 +778,7 @@ namespace {
class ItaniumMangler {
SmallVector<AMDGPULibFunc::Param, 10> Str; // list of accumulated substituions
SmallVector<AMDGPULibFunc::Param, 10> Str; // list of accumulated substitutions
bool UseAddrSpace;
int findSubst(const AMDGPULibFunc::Param& P) const {

View File

@ -119,7 +119,7 @@ class AMDGPULowerModuleLDS : public ModulePass {
// The llvm.amdgcn.module.lds instance is implicitly used by all kernels
// that might call a function which accesses a field within it. This is
// presently approximated to 'all kernels' if there are any such functions
// in the module. This implicit use is reified as an explicit use here so
// in the module. This implicit use is redefined as an explicit use here so
// that later passes, specifically PromoteAlloca, account for the required
// memory without any knowledge of this transform.

View File

@ -194,7 +194,7 @@ bool PHILinearize::findSourcesFromMBB(MachineBasicBlock *SourceMBB,
}
void PHILinearize::addDest(unsigned DestReg, const DebugLoc &DL) {
assert(findPHIInfoElement(DestReg) == nullptr && "Dest already exsists");
assert(findPHIInfoElement(DestReg) == nullptr && "Dest already exists");
PHISourcesT EmptySet;
PHIInfoElementT *NewElement = new PHIInfoElementT();
NewElement->DestReg = DestReg;
@ -813,7 +813,7 @@ void LinearizedRegion::storeLiveOuts(RegionMRT *Region,
} else {
LinearizedRegion *SubRegion = CI->getRegionMRT()->getLinearizedRegion();
// We should be limited to only store registers that are live out from the
// lineaized region
// linearized region
for (auto MBBI : SubRegion->MBBs) {
storeMBBLiveOuts(MBBI, MRI, TRI, PHIInfo, TopRegion);
}
@ -896,7 +896,7 @@ void LinearizedRegion::replaceRegister(unsigned Register,
assert(Register != NewRegister && "Cannot replace a reg with itself");
LLVM_DEBUG(
dbgs() << "Pepareing to replace register (region): "
dbgs() << "Preparing to replace register (region): "
<< printReg(Register, MRI->getTargetRegisterInfo()) << " with "
<< printReg(NewRegister, MRI->getTargetRegisterInfo()) << "\n");
@ -1404,7 +1404,7 @@ void AMDGPUMachineCFGStructurizer::extractKilledPHIs(MachineBasicBlock *MBB) {
MachineInstr &Instr = *I;
if (Instr.isPHI()) {
unsigned PHIDestReg = getPHIDestReg(Instr);
LLVM_DEBUG(dbgs() << "Extractking killed phi:\n");
LLVM_DEBUG(dbgs() << "Extracting killed phi:\n");
LLVM_DEBUG(Instr.dump());
PHIs.insert(&Instr);
PHIInfo.addDest(PHIDestReg, Instr.getDebugLoc());
@ -1770,7 +1770,7 @@ static void removeExternalCFGSuccessors(MachineBasicBlock *MBB) {
static void removeExternalCFGEdges(MachineBasicBlock *StartMBB,
MachineBasicBlock *EndMBB) {
// We have to check against the StartMBB successor becasuse a
// We have to check against the StartMBB successor because a
// structurized region with a loop will have the entry block split,
// and the backedge will go to the entry successor.
DenseSet<std::pair<MachineBasicBlock *, MachineBasicBlock *>> Succs;
@ -2018,7 +2018,7 @@ void AMDGPUMachineCFGStructurizer::rewriteLiveOutRegs(MachineBasicBlock *IfBB,
LLVM_DEBUG(dbgs() << "LiveOut: " << printReg(LI, TRI));
if (!containsDef(CodeBB, InnerRegion, LI) ||
(!IsSingleBB && (getDefInstr(LI)->getParent() == LRegion->getExit()))) {
// If the register simly lives through the CodeBB, we don't have
// If the register simply lives through the CodeBB, we don't have
// to rewrite anything since the register is not defined in this
// part of the code.
LLVM_DEBUG(dbgs() << "- through");
@ -2028,14 +2028,14 @@ void AMDGPUMachineCFGStructurizer::rewriteLiveOutRegs(MachineBasicBlock *IfBB,
unsigned Reg = LI;
if (/*!PHIInfo.isSource(Reg) &&*/ Reg != InnerRegion->getBBSelectRegOut()) {
// If the register is live out, we do want to create a phi,
// unless it is from the Exit block, becasuse in that case there
// unless it is from the Exit block, because in that case there
// is already a PHI, and no need to create a new one.
// If the register is just a live out def and not part of a phi
// chain, we need to create a PHI node to handle the if region,
// and replace all uses outside of the region with the new dest
// register, unless it is the outgoing BB select register. We have
// already creaed phi nodes for these.
// already created phi nodes for these.
const TargetRegisterClass *RegClass = MRI->getRegClass(Reg);
Register PHIDestReg = MRI->createVirtualRegister(RegClass);
Register IfSourceReg = MRI->createVirtualRegister(RegClass);

View File

@ -149,7 +149,7 @@ bool AMDGPUPrintfRuntimeBindingImpl::lowerPrintfForGpu(Module &M) {
IRBuilder<> Builder(Ctx);
Type *I32Ty = Type::getInt32Ty(Ctx);
unsigned UniqID = 0;
// NB: This is important for this string size to be divizable by 4
// NB: This is important for this string size to be divisible by 4
const char NonLiteralStr[4] = "???";
for (auto CI : Printfs) {

View File

@ -9,7 +9,7 @@
/// \file
/// \brief This pass propagates attributes from kernels to the non-entry
/// functions. Most of the library functions were not compiled for specific ABI,
/// yet will be correctly compiled if proper attrbutes are propagated from the
/// yet will be correctly compiled if proper attributes are propagated from the
/// caller.
///
/// The pass analyzes call graph and propagates ABI target features through the
@ -17,7 +17,7 @@
///
/// It can run in two modes: as a function or module pass. A function pass
/// simply propagates attributes. A module pass clones functions if there are
/// callers with different ABI. If a function is clonned all call sites will
/// callers with different ABI. If a function is cloned all call sites will
/// be updated to use a correct clone.
///
/// A function pass is limited in functionality but can run early in the
@ -149,7 +149,7 @@ public:
bool process(Module &M);
};
// Allows to propagate attributes early, but no clonning is allowed as it must
// Allows to propagate attributes early, but no cloning is allowed as it must
// be a function pass to run before any optimizations.
// TODO: We shall only need a one instance of module pass, but that needs to be
// in the linker pipeline which is currently not possible.
@ -168,7 +168,7 @@ public:
bool runOnFunction(Function &F) override;
};
// Allows to propagate attributes with clonning but does that late in the
// Allows to propagate attributes with cloning but does that late in the
// pipeline.
class AMDGPUPropagateAttributesLate : public ModulePass {
const TargetMachine *TM;
@ -273,7 +273,7 @@ bool AMDGPUPropagateAttributes::process() {
if (!NewF) {
const FnProperties NewProps = CalleeProps.adjustToCaller(CallerProps);
if (!AllowClone) {
// This may set different features on different iteartions if
// This may set different features on different iterations if
// there is a contradiction in callers' attributes. In this case
// we rely on a second pass running on Module, which is allowed
// to clone.

View File

@ -58,7 +58,7 @@
///
/// We avoid trying to solve this problem in RegBankSelect. Any VALU G_*
/// operation should have its source operands all mapped to VGPRs (except for
/// VCC), inserting copies from any SGPR operands. This the most trival legal
/// VCC), inserting copies from any SGPR operands. This the most trivial legal
/// mapping. Anything beyond the simplest 1:1 instruction selection would be too
/// complicated to solve here. Every optimization pattern or instruction
/// selected to multiple outputs would have to enforce this rule, and there
@ -118,7 +118,7 @@ public:
Opc == AMDGPU::G_SEXT) {
// LegalizerHelper wants to use the basic legalization artifacts when
// widening etc. We don't handle selection with vcc in artifact sources,
// so we need to use a sslect instead to handle these properly.
// so we need to use a select instead to handle these properly.
Register DstReg = MI.getOperand(0).getReg();
Register SrcReg = MI.getOperand(1).getReg();
const RegisterBank *SrcBank = RBI.getRegBank(SrcReg, MRI, *RBI.TRI);
@ -282,7 +282,7 @@ AMDGPURegisterBankInfo::getRegBankFromRegClass(const TargetRegisterClass &RC,
// VCC-like use.
if (TRI->isSGPRClass(&RC)) {
// FIXME: This probably came from a copy from a physical register, which
// should be inferrrable from the copied to-type. We don't have many boolean
// should be inferable from the copied to-type. We don't have many boolean
// physical register constraints so just assume a normal SGPR for now.
if (!Ty.isValid())
return AMDGPU::SGPRRegBank;
@ -1430,7 +1430,7 @@ bool AMDGPURegisterBankInfo::applyMappingSBufferLoad(
OffsetBank == &AMDGPU::SGPRRegBank)
return true; // Legal mapping
// FIXME: 96-bit case was widened during legalize. We neeed to narrow it back
// FIXME: 96-bit case was widened during legalize. We need to narrow it back
// here but don't have an MMO.
unsigned LoadSize = Ty.getSizeInBits();
@ -1455,7 +1455,7 @@ bool AMDGPURegisterBankInfo::applyMappingSBufferLoad(
VOffset, SOffset, ImmOffset, Alignment);
// TODO: 96-bit loads were widened to 128-bit results. Shrink the result if we
// can, but we neeed to track an MMO for that.
// can, but we need to track an MMO for that.
const unsigned MemSize = (Ty.getSizeInBits() + 7) / 8;
const Align MemAlign(4); // FIXME: ABI type alignment?
MachineMemOperand *BaseMMO = MF.getMachineMemOperand(
@ -2153,7 +2153,7 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
// The standard handling only considers the result register bank for
// phis. For VCC, blindly inserting a copy when the phi is lowered will
// produce an invalid copy. We can only copy with some kind of compare to
// get a vector boolean result. Insert a regitser bank copy that will be
// get a vector boolean result. Insert a register bank copy that will be
// correctly lowered to a compare.
MachineIRBuilder B(*MI.getParent()->getParent());
@ -3331,7 +3331,7 @@ AMDGPURegisterBankInfo::getImageMapping(const MachineRegisterInfo &MRI,
return getInstructionMapping(1, 1, getOperandsMapping(OpdsMapping), NumOps);
}
/// Return the mapping for a pointer arugment.
/// Return the mapping for a pointer argument.
const RegisterBankInfo::ValueMapping *
AMDGPURegisterBankInfo::getValueMappingForPtr(const MachineRegisterInfo &MRI,
Register PtrReg) const {

View File

@ -142,7 +142,7 @@ class ReplaceLDSUseImpl {
// Returns true if uses of given LDS global within non-kernel functions should
// be keep as it is without pointer replacement.
bool shouldIgnorePointerReplacement(GlobalVariable *GV) {
// LDS whose size is very small and doesn`t exceed pointer size is not worth
// LDS whose size is very small and doesn't exceed pointer size is not worth
// replacing.
if (DL.getTypeAllocSize(GV->getValueType()) <= 2)
return true;
@ -158,7 +158,7 @@ class ReplaceLDSUseImpl {
// Insert new global LDS pointer which points to LDS.
GlobalVariable *createLDSPointer(GlobalVariable *GV) {
// LDS pointer which points to LDS is already created? return it.
// LDS pointer which points to LDS is already created? Return it.
auto PointerEntry = LDSToPointer.insert(std::make_pair(GV, nullptr));
if (!PointerEntry.second)
return PointerEntry.first->second;
@ -185,7 +185,7 @@ class ReplaceLDSUseImpl {
// Split entry basic block in such a way that only lane 0 of each wave does
// the LDS pointer initialization, and return newly created basic block.
BasicBlock *activateLaneZero(Function *K) {
// If the entry basic block of kernel K is already splitted, then return
// If the entry basic block of kernel K is already split, then return
// newly created basic block.
auto BasicBlockEntry = KernelToInitBB.insert(std::make_pair(K, nullptr));
if (!BasicBlockEntry.second)
@ -204,7 +204,7 @@ class ReplaceLDSUseImpl {
BasicBlock *NBB = SplitBlockAndInsertIfThen(Cond, WB, false)->getParent();
// Mark that the entry basic block of kernel K is splitted.
// Mark that the entry basic block of kernel K is split.
KernelToInitBB[K] = NBB;
return NBB;
@ -235,7 +235,7 @@ class ReplaceLDSUseImpl {
}
// We have created an LDS pointer for LDS, and initialized it to point-to LDS
// within all relevent kernels. Now replace all the uses of LDS within
// within all relevant kernels. Now replace all the uses of LDS within
// non-kernel functions by LDS pointer.
void replaceLDSUseByPointer(GlobalVariable *GV, GlobalVariable *LDSPointer) {
SmallVector<User *, 8> LDSUsers(GV->users());
@ -268,8 +268,8 @@ class ReplaceLDSUseImpl {
convertConstantExprsToInstructions(I, CE, &UserInsts);
}
// Go through all the user instrutions, if LDS exist within them as an
// operand, then replace it by replace instruction.
// Go through all the user instructions, if LDS exist within them as
// an operand, then replace it by replace instruction.
for (auto *II : UserInsts) {
auto *ReplaceInst = getReplacementInst(F, GV, LDSPointer);
II->replaceUsesOfWith(GV, ReplaceInst);
@ -373,7 +373,7 @@ bool ReplaceLDSUseImpl::replaceLDSUse(GlobalVariable *GV) {
return false;
// We have created an LDS pointer for LDS, and initialized it to point-to LDS
// within all relevent kernels. Now replace all the uses of LDS within
// within all relevant kernels. Now replace all the uses of LDS within
// non-kernel functions by LDS pointer.
replaceLDSUseByPointer(GV, LDSPointer);

View File

@ -142,8 +142,8 @@ bool AMDGPURewriteOutArguments::checkArgumentUses(Value &Arg) const {
// Special case handle structs with single members. It is useful to handle
// some casts between structs and non-structs, but we can't bitcast
// directly between them. directly bitcast between them. Blender uses
// some casts that look like { <3 x float> }* to <4 x float>*
// directly between them. Blender uses some casts that look like
// { <3 x float> }* to <4 x float>*
if ((SrcEltTy->isStructTy() && (SrcEltTy->getStructNumElements() != 1)))
return false;
@ -259,7 +259,7 @@ bool AMDGPURewriteOutArguments::runOnFunction(Function &F) {
// Keep retrying if we are able to successfully eliminate an argument. This
// helps with cases with multiple arguments which may alias, such as in a
// sincos implemntation. If we have 2 stores to arguments, on the first
// sincos implementation. If we have 2 stores to arguments, on the first
// attempt the MDA query will succeed for the second store but not the
// first. On the second iteration we've removed that out clobbering argument
// (by effectively moving it into another function) and will find the second

View File

@ -1019,7 +1019,7 @@ struct FillMFMAShadowMutation : ScheduleDAGMutation {
return true;
}
// Link as much SALU intructions in chain as possible. Return the size
// Link as many SALU instructions in chain as possible. Return the size
// of the chain. Links up to MaxChain instructions.
unsigned linkSALUChain(SUnit *From, SUnit *To, unsigned MaxChain,
SmallPtrSetImpl<SUnit *> &Visited) const {

View File

@ -879,7 +879,7 @@ void AMDGPUPassConfig::addStraightLineScalarOptimizationPasses() {
addPass(createLICMPass());
addPass(createSeparateConstOffsetFromGEPPass());
addPass(createSpeculativeExecutionPass());
// ReassociateGEPs exposes more opportunites for SLSR. See
// ReassociateGEPs exposes more opportunities for SLSR. See
// the example in reassociate-geps-and-slsr.ll.
addPass(createStraightLineStrengthReducePass());
// SeparateConstOffsetFromGEP and SLSR creates common expressions which GVN or
@ -1277,7 +1277,7 @@ bool GCNPassConfig::addRegAssignAndRewriteOptimized() {
// Commit allocated register changes. This is mostly necessary because too
// many things rely on the use lists of the physical registers, such as the
// verifier. This is only necessary with allocators which use LiveIntervals,
// since FastRegAlloc does the replacments itself.
// since FastRegAlloc does the replacements itself.
addPass(createVirtRegRewriter(false));
// Equivalent of PEI for SGPRs.