[AMDGPU][NFC] Correct typos in lib/Target/AMDGPU/AMDGPU*.cpp files. Test commit for new contributor.

2021-09-20 14:28:10 -07:00 · 2021-09-20 14:28:10 -07:00 · dc6e8dfdfe
commit dc6e8dfdfe
parent f9d69a0ab0
22 changed files with 61 additions and 61 deletions
--- a/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp
@ -94,7 +94,7 @@ AliasResult AMDGPUAAResult::alias(const MemoryLocation &LocA,
        getUnderlyingObject(A.Ptr->stripPointerCastsForAliasAnalysis());
    if (const LoadInst *LI = dyn_cast<LoadInst>(ObjA)) {
      // If a generic pointer is loaded from the constant address space, it
-      // could only be a GLOBAL or CONSTANT one as that address space is soley
+      // could only be a GLOBAL or CONSTANT one as that address space is solely
      // prepared on the host side, where only GLOBAL or CONSTANT variables are
      // visible. Note that this even holds for regular functions.
      if (LI->getPointerAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS)
--- a/llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp
@ -70,7 +70,7 @@ recursivelyVisitUsers(GlobalValue &GV,
        // and just let us hit the error when we can't handle this.
        //
        // Unfortunately, clang adds noinline to all functions at -O0. We have
-        // to override this here. until that's fixed.
+        // to override this here until that's fixed.
        F->removeFnAttr(Attribute::NoInline);

        FuncsToAlwaysInline.insert(F);
--- a/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp
@ -76,7 +76,7 @@ bool AMDGPUAnnotateKernelFeatures::addFeatureAttributes(Function &F) {
        const Function *Callee =
            dyn_cast<Function>(CB->getCalledOperand()->stripPointerCasts());

-        // Note the occurence of indirect call.
+        // Note the occurrence of indirect call.
        if (!Callee) {
          if (!CB->isInlineAsm())
            HaveCall = true;
--- a/llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp
@ -541,7 +541,7 @@ void AMDGPUAtomicOptimizer::optimizeAtomic(Instruction &I,
      if (NeedResult)
        ExclScan = buildShiftRight(B, NewV, Identity);

-      // Read the value from the last lane, which has accumlated the values of
+      // Read the value from the last lane, which has accumulated the values of
      // each active lane in the wavefront. This will be our new value which we
      // will provide to the atomic operation.
      Value *const LastLaneIdx = B.getInt32(ST->getWavefrontSize() - 1);
--- a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
@ -236,7 +236,7 @@ AMDGPUCallLowering::AMDGPUCallLowering(const AMDGPUTargetLowering &TLI)
  : CallLowering(&TLI) {
 }

-// FIXME: Compatability shim
+// FIXME: Compatibility shim
 static ISD::NodeType extOpcodeToISDExtOpcode(unsigned MIOpc) {
  switch (MIOpc) {
  case TargetOpcode::G_SEXT:
--- a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
@ -816,7 +816,7 @@ bool AMDGPUCodeGenPrepare::visitXor(BinaryOperator &I) {
  if (!RHS || !IntrinsicCall || RHS->getSExtValue() != -1)
    return visitBinaryOperator(I);

-  // Check if the Call is an intrinsic intruction to amdgcn_class intrinsic
+  // Check if the Call is an intrinsic instruction to amdgcn_class intrinsic
  // has only one use
  if (IntrinsicCall->getIntrinsicID() != Intrinsic::amdgcn_class ||
      !IntrinsicCall->hasOneUse())
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@ -56,7 +56,7 @@ static bool isNullConstantOrUndef(SDValue V) {
 }

 static bool getConstantValue(SDValue N, uint32_t &Out) {
-  // This is only used for packed vectors, where ussing 0 for undef should
+  // This is only used for packed vectors, where using 0 for undef should
  // always be good.
  if (N.isUndef()) {
    Out = 0;
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@ -1042,7 +1042,7 @@ CCAssignFn *AMDGPUCallLowering::CCAssignFnForReturn(CallingConv::ID CC,
 /// In order to correctly lower the arguments we need to know the size of each
 /// argument.  Since Ins[x].VT gives us the size of the register that will
 /// hold the value, we need to look at Ins[x].ArgVT to see the 'real' type
-/// for the orignal function argument so that we can deduce the correct memory
+/// for the original function argument so that we can deduce the correct memory
 /// type to use for Ins[x].  In most cases the correct memory type will be
 /// Ins[x].ArgVT.  However, this will not always be the case.  If, for example,
 /// we have a kernel argument of type v8i8, this argument will be split into
@ -2428,7 +2428,7 @@ SDValue AMDGPUTargetLowering::LowerCTLZ_CTTZ(SDValue Op, SelectionDAG &DAG) cons

 SDValue AMDGPUTargetLowering::LowerINT_TO_FP32(SDValue Op, SelectionDAG &DAG,
                                               bool Signed) const {
-  // The regular method coverting a 64-bit integer to float roughly consists of
+  // The regular method converting a 64-bit integer to float roughly consists of
  // 2 steps: normalization and rounding. In fact, after normalization, the
  // conversion from a 64-bit integer to a float is essentially the same as the
  // one from a 32-bit integer. The only difference is that it has more
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
@ -439,7 +439,7 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
    if (!CWidth || !COffset)
      break;

-    // The case of Width == 0 is handled above, which makes this tranformation
+    // The case of Width == 0 is handled above, which makes this transformation
    // safe.  If Width == 0, then the ashr and lshr instructions become poison
    // value since the shift amount would be equal to the bit size.
    assert(Width != 0);
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@ -928,7 +928,7 @@ bool AMDGPUInstructionSelector::selectG_INTRINSIC(MachineInstr &I) const {
  case Intrinsic::amdgcn_if_break: {
    MachineBasicBlock *BB = I.getParent();

-    // FIXME: Manually selecting to avoid dealiing with the SReg_1 trick
+    // FIXME: Manually selecting to avoid dealing with the SReg_1 trick
    // SelectionDAG uses for wave32 vs wave64.
    BuildMI(*BB, &I, I.getDebugLoc(), TII.get(AMDGPU::SI_IF_BREAK))
      .add(I.getOperand(0))
@ -1242,7 +1242,7 @@ bool AMDGPUInstructionSelector::selectReturnAddress(MachineInstr &I) const {
 }

 bool AMDGPUInstructionSelector::selectEndCfIntrinsic(MachineInstr &MI) const {
-  // FIXME: Manually selecting to avoid dealiing with the SReg_1 trick
+  // FIXME: Manually selecting to avoid dealing with the SReg_1 trick
  // SelectionDAG uses for wave32 vs wave64.
  MachineBasicBlock *BB = MI.getParent();
  BuildMI(*BB, &MI, MI.getDebugLoc(), TII.get(AMDGPU::SI_END_CF))
@ -2387,7 +2387,7 @@ void AMDGPUInstructionSelector::initM0(MachineInstr &I) const {
      STI.ldsRequiresM0Init()) {
    MachineBasicBlock *BB = I.getParent();

-    // If DS instructions require M0 initializtion, insert it before selecting.
+    // If DS instructions require M0 initialization, insert it before selecting.
    BuildMI(*BB, &I, I.getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), AMDGPU::M0)
      .addImm(-1);
  }
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@ -59,7 +59,7 @@ static LLT getPow2ScalarType(LLT Ty) {
  return LLT::scalar(Pow2Bits);
 }

-/// \returs true if this is an odd sized vector which should widen by adding an
+/// \returns true if this is an odd sized vector which should widen by adding an
 /// additional element. This is mostly to handle <3 x s16> -> <4 x s16>. This
 /// excludes s1 vectors, which should always be scalarized.
 static LegalityPredicate isSmallOddVector(unsigned TypeIdx) {
@ -2487,7 +2487,7 @@ bool AMDGPULegalizerInfo::legalizeGlobalValue(
  buildPCRelGlobalAddress(GOTAddr, PtrTy, B, GV, 0, SIInstrInfo::MO_GOTPCREL32);

  if (Ty.getSizeInBits() == 32) {
-    // Truncate if this is a 32-bit constant adrdess.
+    // Truncate if this is a 32-bit constant address.
    auto Load = B.buildLoad(PtrTy, GOTAddr, *GOTMMO);
    B.buildExtract(DstReg, Load, 0);
  } else
@ -2989,7 +2989,7 @@ void AMDGPULegalizerInfo::legalizeUnsignedDIV_REM32Impl(MachineIRBuilder &B,
    B.buildSelect(DstRemReg, Cond, B.buildSub(S32, R, Y), R);
 }

-// Build integer reciprocal sequence arounud V_RCP_IFLAG_F32
+// Build integer reciprocal sequence around V_RCP_IFLAG_F32
 //
 // Return lo, hi of result
 //
@ -4322,8 +4322,8 @@ static void convertImageAddrToPacked(MachineIRBuilder &B, MachineInstr &MI,
 /// to exposes all register repacking to the legalizer/combiners. We also don't
 /// want a selected instrution entering RegBankSelect. In order to avoid
 /// defining a multitude of intermediate image instructions, directly hack on
-/// the intrinsic's arguments. In cases like a16 addreses, this requires padding
-/// now unnecessary arguments with $noreg.
+/// the intrinsic's arguments. In cases like a16 addresses, this requires
+/// padding now unnecessary arguments with $noreg.
 bool AMDGPULegalizerInfo::legalizeImageIntrinsic(
    MachineInstr &MI, MachineIRBuilder &B, GISelChangeObserver &Observer,
    const AMDGPU::ImageDimIntrinsicInfo *Intr) const {
@ -4594,7 +4594,7 @@ bool AMDGPULegalizerInfo::legalizeImageIntrinsic(
  MI.getOperand(0).setReg(NewResultReg);

  // In the IR, TFE is supposed to be used with a 2 element struct return
-  // type. The intruction really returns these two values in one contiguous
+  // type. The instruction really returns these two values in one contiguous
  // register, with one additional dword beyond the loaded data. Rewrite the
  // return type to use a single register result.

@ -4806,7 +4806,7 @@ bool AMDGPULegalizerInfo::legalizeTrapHsa(

 bool AMDGPULegalizerInfo::legalizeDebugTrapIntrinsic(
    MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const {
-  // Is non-HSA path or trap-handler disabled? then, report a warning
+  // Is non-HSA path or trap-handler disabled? Then, report a warning
  // accordingly
  if (!ST.isTrapHandlerEnabled() ||
      ST.getTrapHandlerAbi() != GCNSubtarget::TrapHandlerAbi::AMDHSA) {
--- a/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
@ -54,7 +54,7 @@ private:

  bool useNativeFunc(const StringRef F) const;

-  // Return a pointer (pointer expr) to the function if function defintion with
+  // Return a pointer (pointer expr) to the function if function definition with
  // "FuncName" exists. It may create a new function prototype in pre-link mode.
  FunctionCallee getFunction(Module *M, const FuncInfo &fInfo);

@ -660,7 +660,7 @@ bool AMDGPULibCalls::fold(CallInst *CI, AliasAnalysis *AA) {
  if (isUnsafeMath(CI) && evaluateCall(CI, FInfo))
    return true;

-  // Specilized optimizations for each function call
+  // Specialized optimizations for each function call
  switch (FInfo.getId()) {
  case AMDGPULibFunc::EI_RECIP:
    // skip vector function
@ -1231,7 +1231,7 @@ bool AMDGPULibCalls::fold_fma_mad(CallInst *CI, IRBuilder<> &B,
  return false;
 }

-// Get a scalar native builtin signle argument FP function
+// Get a scalar native builtin single argument FP function
 FunctionCallee AMDGPULibCalls::getNativeFunction(Module *M,
                                                 const FuncInfo &FInfo) {
  if (getArgType(FInfo) == AMDGPULibFunc::F64 || !HasNative(FInfo.getId()))
--- a/llvm/lib/Target/AMDGPU/AMDGPULibFunc.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULibFunc.cpp
@ -352,7 +352,7 @@ const unsigned UnmangledFuncInfo::TableSize =
 static AMDGPULibFunc::Param getRetType(AMDGPULibFunc::EFuncId id,
                                       const AMDGPULibFunc::Param (&Leads)[2]) {
  AMDGPULibFunc::Param Res = Leads[0];
-  // TBD - This switch may require to be extended for other intriniscs
+  // TBD - This switch may require to be extended for other intrinsics
  switch (id) {
  case AMDGPULibFunc::EI_SINCOS:
    Res.PtrKind = AMDGPULibFunc::BYVALUE;
@ -778,7 +778,7 @@ namespace {


 class ItaniumMangler {
-  SmallVector<AMDGPULibFunc::Param, 10> Str; // list of accumulated substituions
+  SmallVector<AMDGPULibFunc::Param, 10> Str; // list of accumulated substitutions
  bool  UseAddrSpace;

  int findSubst(const AMDGPULibFunc::Param& P) const {
--- a/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
@ -119,7 +119,7 @@ class AMDGPULowerModuleLDS : public ModulePass {
    // The llvm.amdgcn.module.lds instance is implicitly used by all kernels
    // that might call a function which accesses a field within it. This is
    // presently approximated to 'all kernels' if there are any such functions
-    // in the module. This implicit use is reified as an explicit use here so
+    // in the module. This implicit use is redefined as an explicit use here so
    // that later passes, specifically PromoteAlloca, account for the required
    // memory without any knowledge of this transform.

--- a/llvm/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp
@ -194,7 +194,7 @@ bool PHILinearize::findSourcesFromMBB(MachineBasicBlock *SourceMBB,
 }

 void PHILinearize::addDest(unsigned DestReg, const DebugLoc &DL) {
-  assert(findPHIInfoElement(DestReg) == nullptr && "Dest already exsists");
+  assert(findPHIInfoElement(DestReg) == nullptr && "Dest already exists");
  PHISourcesT EmptySet;
  PHIInfoElementT *NewElement = new PHIInfoElementT();
  NewElement->DestReg = DestReg;
@ -813,7 +813,7 @@ void LinearizedRegion::storeLiveOuts(RegionMRT *Region,
    } else {
      LinearizedRegion *SubRegion = CI->getRegionMRT()->getLinearizedRegion();
      // We should be limited to only store registers that are live out from the
-      // lineaized region
+      // linearized region
      for (auto MBBI : SubRegion->MBBs) {
        storeMBBLiveOuts(MBBI, MRI, TRI, PHIInfo, TopRegion);
      }
@ -896,7 +896,7 @@ void LinearizedRegion::replaceRegister(unsigned Register,
  assert(Register != NewRegister && "Cannot replace a reg with itself");

  LLVM_DEBUG(
-      dbgs() << "Pepareing to replace register (region): "
+      dbgs() << "Preparing to replace register (region): "
             << printReg(Register, MRI->getTargetRegisterInfo()) << " with "
             << printReg(NewRegister, MRI->getTargetRegisterInfo()) << "\n");

@ -1404,7 +1404,7 @@ void AMDGPUMachineCFGStructurizer::extractKilledPHIs(MachineBasicBlock *MBB) {
    MachineInstr &Instr = *I;
    if (Instr.isPHI()) {
      unsigned PHIDestReg = getPHIDestReg(Instr);
-      LLVM_DEBUG(dbgs() << "Extractking killed phi:\n");
+      LLVM_DEBUG(dbgs() << "Extracting killed phi:\n");
      LLVM_DEBUG(Instr.dump());
      PHIs.insert(&Instr);
      PHIInfo.addDest(PHIDestReg, Instr.getDebugLoc());
@ -1770,7 +1770,7 @@ static void removeExternalCFGSuccessors(MachineBasicBlock *MBB) {
 static void removeExternalCFGEdges(MachineBasicBlock *StartMBB,
                                   MachineBasicBlock *EndMBB) {

-  // We have to check against the StartMBB successor becasuse a
+  // We have to check against the StartMBB successor because a
  // structurized region with a loop will have the entry block split,
  // and the backedge will go to the entry successor.
  DenseSet<std::pair<MachineBasicBlock *, MachineBasicBlock *>> Succs;
@ -2018,7 +2018,7 @@ void AMDGPUMachineCFGStructurizer::rewriteLiveOutRegs(MachineBasicBlock *IfBB,
    LLVM_DEBUG(dbgs() << "LiveOut: " << printReg(LI, TRI));
    if (!containsDef(CodeBB, InnerRegion, LI) ||
        (!IsSingleBB && (getDefInstr(LI)->getParent() == LRegion->getExit()))) {
-      // If the register simly lives through the CodeBB, we don't have
+      // If the register simply lives through the CodeBB, we don't have
      // to rewrite anything since the register is not defined in this
      // part of the code.
      LLVM_DEBUG(dbgs() << "- through");
@ -2028,14 +2028,14 @@ void AMDGPUMachineCFGStructurizer::rewriteLiveOutRegs(MachineBasicBlock *IfBB,
    unsigned Reg = LI;
    if (/*!PHIInfo.isSource(Reg) &&*/ Reg != InnerRegion->getBBSelectRegOut()) {
      // If the register is live out, we do want to create a phi,
-      // unless it is from the Exit block, becasuse in that case there
+      // unless it is from the Exit block, because in that case there
      // is already a PHI, and no need to create a new one.

      // If the register is just a live out def and not part of a phi
      // chain, we need to create a PHI node to handle the if region,
      // and replace all uses outside of the region with the new dest
      // register, unless it is the outgoing BB select register. We have
-      // already creaed phi nodes for these.
+      // already created phi nodes for these.
      const TargetRegisterClass *RegClass = MRI->getRegClass(Reg);
      Register PHIDestReg = MRI->createVirtualRegister(RegClass);
      Register IfSourceReg = MRI->createVirtualRegister(RegClass);
--- a/llvm/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp
@ -149,7 +149,7 @@ bool AMDGPUPrintfRuntimeBindingImpl::lowerPrintfForGpu(Module &M) {
  IRBuilder<> Builder(Ctx);
  Type *I32Ty = Type::getInt32Ty(Ctx);
  unsigned UniqID = 0;
-  // NB: This is important for this string size to be divizable by 4
+  // NB: This is important for this string size to be divisible by 4
  const char NonLiteralStr[4] = "???";

  for (auto CI : Printfs) {
--- a/llvm/lib/Target/AMDGPU/AMDGPUPropagateAttributes.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPropagateAttributes.cpp
@ -9,7 +9,7 @@
 /// \file
 /// \brief This pass propagates attributes from kernels to the non-entry
 /// functions. Most of the library functions were not compiled for specific ABI,
-/// yet will be correctly compiled if proper attrbutes are propagated from the
+/// yet will be correctly compiled if proper attributes are propagated from the
 /// caller.
 ///
 /// The pass analyzes call graph and propagates ABI target features through the
@ -17,7 +17,7 @@
 ///
 /// It can run in two modes: as a function or module pass. A function pass
 /// simply propagates attributes. A module pass clones functions if there are
-/// callers with different ABI. If a function is clonned all call sites will
+/// callers with different ABI. If a function is cloned all call sites will
 /// be updated to use a correct clone.
 ///
 /// A function pass is limited in functionality but can run early in the
@ -149,7 +149,7 @@ public:
  bool process(Module &M);
 };

-// Allows to propagate attributes early, but no clonning is allowed as it must
+// Allows to propagate attributes early, but no cloning is allowed as it must
 // be a function pass to run before any optimizations.
 // TODO: We shall only need a one instance of module pass, but that needs to be
 // in the linker pipeline which is currently not possible.
@ -168,7 +168,7 @@ public:
  bool runOnFunction(Function &F) override;
 };

-// Allows to propagate attributes with clonning but does that late in the
+// Allows to propagate attributes with cloning but does that late in the
 // pipeline.
 class AMDGPUPropagateAttributesLate : public ModulePass {
  const TargetMachine *TM;
@ -273,7 +273,7 @@ bool AMDGPUPropagateAttributes::process() {
        if (!NewF) {
          const FnProperties NewProps = CalleeProps.adjustToCaller(CallerProps);
          if (!AllowClone) {
-            // This may set different features on different iteartions if
+            // This may set different features on different iterations if
            // there is a contradiction in callers' attributes. In this case
            // we rely on a second pass running on Module, which is allowed
            // to clone.
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
@ -58,7 +58,7 @@
 ///
 /// We avoid trying to solve this problem in RegBankSelect. Any VALU G_*
 /// operation should have its source operands all mapped to VGPRs (except for
-/// VCC), inserting copies from any SGPR operands. This the most trival legal
+/// VCC), inserting copies from any SGPR operands. This the most trivial legal
 /// mapping. Anything beyond the simplest 1:1 instruction selection would be too
 /// complicated to solve here. Every optimization pattern or instruction
 /// selected to multiple outputs would have to enforce this rule, and there
@ -118,7 +118,7 @@ public:
        Opc == AMDGPU::G_SEXT) {
      // LegalizerHelper wants to use the basic legalization artifacts when
      // widening etc. We don't handle selection with vcc in artifact sources,
-      // so we need to use a sslect instead to handle these properly.
+      // so we need to use a select instead to handle these properly.
      Register DstReg = MI.getOperand(0).getReg();
      Register SrcReg = MI.getOperand(1).getReg();
      const RegisterBank *SrcBank = RBI.getRegBank(SrcReg, MRI, *RBI.TRI);
@ -282,7 +282,7 @@ AMDGPURegisterBankInfo::getRegBankFromRegClass(const TargetRegisterClass &RC,
  // VCC-like use.
  if (TRI->isSGPRClass(&RC)) {
    // FIXME: This probably came from a copy from a physical register, which
-    // should be inferrrable from the copied to-type. We don't have many boolean
+    // should be inferable from the copied to-type. We don't have many boolean
    // physical register constraints so just assume a normal SGPR for now.
    if (!Ty.isValid())
      return AMDGPU::SGPRRegBank;
@ -1430,7 +1430,7 @@ bool AMDGPURegisterBankInfo::applyMappingSBufferLoad(
      OffsetBank == &AMDGPU::SGPRRegBank)
    return true; // Legal mapping

-  // FIXME: 96-bit case was widened during legalize. We neeed to narrow it back
+  // FIXME: 96-bit case was widened during legalize. We need to narrow it back
  // here but don't have an MMO.

  unsigned LoadSize = Ty.getSizeInBits();
@ -1455,7 +1455,7 @@ bool AMDGPURegisterBankInfo::applyMappingSBufferLoad(
                                        VOffset, SOffset, ImmOffset, Alignment);

  // TODO: 96-bit loads were widened to 128-bit results. Shrink the result if we
-  // can, but we neeed to track an MMO for that.
+  // can, but we need to track an MMO for that.
  const unsigned MemSize = (Ty.getSizeInBits() + 7) / 8;
  const Align MemAlign(4); // FIXME: ABI type alignment?
  MachineMemOperand *BaseMMO = MF.getMachineMemOperand(
@ -2153,7 +2153,7 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
      // The standard handling only considers the result register bank for
      // phis. For VCC, blindly inserting a copy when the phi is lowered will
      // produce an invalid copy. We can only copy with some kind of compare to
-      // get a vector boolean result. Insert a regitser bank copy that will be
+      // get a vector boolean result. Insert a register bank copy that will be
      // correctly lowered to a compare.
      MachineIRBuilder B(*MI.getParent()->getParent());

@ -3331,7 +3331,7 @@ AMDGPURegisterBankInfo::getImageMapping(const MachineRegisterInfo &MRI,
  return getInstructionMapping(1, 1, getOperandsMapping(OpdsMapping), NumOps);
 }

-/// Return the mapping for a pointer arugment.
+/// Return the mapping for a pointer argument.
 const RegisterBankInfo::ValueMapping *
 AMDGPURegisterBankInfo::getValueMappingForPtr(const MachineRegisterInfo &MRI,
                                              Register PtrReg) const {
--- a/llvm/lib/Target/AMDGPU/AMDGPUReplaceLDSUseWithPointer.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUReplaceLDSUseWithPointer.cpp
@ -142,7 +142,7 @@ class ReplaceLDSUseImpl {
  // Returns true if uses of given LDS global within non-kernel functions should
  // be keep as it is without pointer replacement.
  bool shouldIgnorePointerReplacement(GlobalVariable *GV) {
-    // LDS whose size is very small and doesn`t exceed pointer size is not worth
+    // LDS whose size is very small and doesn't exceed pointer size is not worth
    // replacing.
    if (DL.getTypeAllocSize(GV->getValueType()) <= 2)
      return true;
@ -158,7 +158,7 @@ class ReplaceLDSUseImpl {

  // Insert new global LDS pointer which points to LDS.
  GlobalVariable *createLDSPointer(GlobalVariable *GV) {
-    // LDS pointer which points to LDS is already created? return it.
+    // LDS pointer which points to LDS is already created? Return it.
    auto PointerEntry = LDSToPointer.insert(std::make_pair(GV, nullptr));
    if (!PointerEntry.second)
      return PointerEntry.first->second;
@ -185,7 +185,7 @@ class ReplaceLDSUseImpl {
  // Split entry basic block in such a way that only lane 0 of each wave does
  // the LDS pointer initialization, and return newly created basic block.
  BasicBlock *activateLaneZero(Function *K) {
-    // If the entry basic block of kernel K is already splitted, then return
+    // If the entry basic block of kernel K is already split, then return
    // newly created basic block.
    auto BasicBlockEntry = KernelToInitBB.insert(std::make_pair(K, nullptr));
    if (!BasicBlockEntry.second)
@ -204,7 +204,7 @@ class ReplaceLDSUseImpl {

    BasicBlock *NBB = SplitBlockAndInsertIfThen(Cond, WB, false)->getParent();

-    // Mark that the entry basic block of kernel K is splitted.
+    // Mark that the entry basic block of kernel K is split.
    KernelToInitBB[K] = NBB;

    return NBB;
@ -235,7 +235,7 @@ class ReplaceLDSUseImpl {
  }

  // We have created an LDS pointer for LDS, and initialized it to point-to LDS
-  // within all relevent kernels. Now replace all the uses of LDS within
+  // within all relevant kernels. Now replace all the uses of LDS within
  // non-kernel functions by LDS pointer.
  void replaceLDSUseByPointer(GlobalVariable *GV, GlobalVariable *LDSPointer) {
    SmallVector<User *, 8> LDSUsers(GV->users());
@ -268,8 +268,8 @@ class ReplaceLDSUseImpl {
            convertConstantExprsToInstructions(I, CE, &UserInsts);
          }

-          // Go through all the user instrutions, if LDS exist within them as an
-          // operand, then replace it by replace instruction.
+          // Go through all the user instructions, if LDS exist within them as
+          // an operand, then replace it by replace instruction.
          for (auto *II : UserInsts) {
            auto *ReplaceInst = getReplacementInst(F, GV, LDSPointer);
            II->replaceUsesOfWith(GV, ReplaceInst);
@ -373,7 +373,7 @@ bool ReplaceLDSUseImpl::replaceLDSUse(GlobalVariable *GV) {
    return false;

  // We have created an LDS pointer for LDS, and initialized it to point-to LDS
-  // within all relevent kernels. Now replace all the uses of LDS within
+  // within all relevant kernels. Now replace all the uses of LDS within
  // non-kernel functions by LDS pointer.
  replaceLDSUseByPointer(GV, LDSPointer);

--- a/llvm/lib/Target/AMDGPU/AMDGPURewriteOutArguments.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURewriteOutArguments.cpp
@ -142,8 +142,8 @@ bool AMDGPURewriteOutArguments::checkArgumentUses(Value &Arg) const {

      // Special case handle structs with single members. It is useful to handle
      // some casts between structs and non-structs, but we can't bitcast
-      // directly between them.  directly bitcast between them.  Blender uses
-      // some casts that look like { <3 x float> }* to <4 x float>*
+      // directly between them. Blender uses some casts that look like
+      // { <3 x float> }* to <4 x float>*
      if ((SrcEltTy->isStructTy() && (SrcEltTy->getStructNumElements() != 1)))
        return false;

@ -259,7 +259,7 @@ bool AMDGPURewriteOutArguments::runOnFunction(Function &F) {

    // Keep retrying if we are able to successfully eliminate an argument. This
    // helps with cases with multiple arguments which may alias, such as in a
-    // sincos implemntation. If we have 2 stores to arguments, on the first
+    // sincos implementation. If we have 2 stores to arguments, on the first
    // attempt the MDA query will succeed for the second store but not the
    // first. On the second iteration we've removed that out clobbering argument
    // (by effectively moving it into another function) and will find the second
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
@ -1019,7 +1019,7 @@ struct FillMFMAShadowMutation : ScheduleDAGMutation {
    return true;
  }

-  // Link as much SALU intructions in chain as possible. Return the size
+  // Link as many SALU instructions in chain as possible. Return the size
  // of the chain. Links up to MaxChain instructions.
  unsigned linkSALUChain(SUnit *From, SUnit *To, unsigned MaxChain,
                         SmallPtrSetImpl<SUnit *> &Visited) const {
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@ -879,7 +879,7 @@ void AMDGPUPassConfig::addStraightLineScalarOptimizationPasses() {
  addPass(createLICMPass());
  addPass(createSeparateConstOffsetFromGEPPass());
  addPass(createSpeculativeExecutionPass());
-  // ReassociateGEPs exposes more opportunites for SLSR. See
+  // ReassociateGEPs exposes more opportunities for SLSR. See
  // the example in reassociate-geps-and-slsr.ll.
  addPass(createStraightLineStrengthReducePass());
  // SeparateConstOffsetFromGEP and SLSR creates common expressions which GVN or
@ -1277,7 +1277,7 @@ bool GCNPassConfig::addRegAssignAndRewriteOptimized() {
  // Commit allocated register changes. This is mostly necessary because too
  // many things rely on the use lists of the physical registers, such as the
  // verifier. This is only necessary with allocators which use LiveIntervals,
-  // since FastRegAlloc does the replacments itself.
+  // since FastRegAlloc does the replacements itself.
  addPass(createVirtRegRewriter(false));

  // Equivalent of PEI for SGPRs.