[bolt] Fix typos discovered by codespell (#124726)
https://github.com/codespell-project/codespell ```bash codespell bolt --skip="*.yaml,Maintainers.txt" --write-changes \ --ignore-words-list=acount,alledges,ans,archtype,defin,iself,mis,mmaped,othere,outweight,vas ```
This commit is contained in:
parent
b228a18e57
commit
0fc05aa1c6
@ -1,7 +1,7 @@
|
||||
# BOLT-based binary analysis
|
||||
|
||||
As part of post-link-time optimizing, BOLT needs to perform a range of analyses
|
||||
on binaries such as recontructing control flow graphs, and more.
|
||||
on binaries such as reconstructing control flow graphs, and more.
|
||||
|
||||
The `llvm-bolt-binary-analysis` tool enables running requested binary analyses
|
||||
on binaries, and generating reports. It does this by building on top of the
|
||||
|
||||
@ -375,7 +375,7 @@
|
||||
|
||||
- `--use-old-text`
|
||||
|
||||
Re-use space in old .text if possible (relocation mode)
|
||||
Reuse space in old .text if possible (relocation mode)
|
||||
|
||||
- `-v <uint>`
|
||||
|
||||
|
||||
@ -15,7 +15,7 @@ However, this approach quickly becomes awkward if we want to insert a lot of cod
|
||||
Currently, our runtime library is written in C++ and contains code that helps us instrument a binary.
|
||||
|
||||
### Limitations
|
||||
Our library is not written with regular C++ code as it is not linked against any other libraries (this means we cannnot rely on anything defined on libstdc++, glibc, libgcc etc), but is self sufficient. In runtime/CMakeLists.txt, we can see it is built with -ffreestanding, which requires the compiler to avoid using a runtime library by itself.
|
||||
Our library is not written with regular C++ code as it is not linked against any other libraries (this means we cannot rely on anything defined on libstdc++, glibc, libgcc etc), but is self sufficient. In runtime/CMakeLists.txt, we can see it is built with -ffreestanding, which requires the compiler to avoid using a runtime library by itself.
|
||||
|
||||
While this requires us to make our own syscalls, it does simplify our linker a lot, which is very limited and can only do basic function name resolving. However, this is a big improvement in comparison with programmatically generating the code in assembly language using MCInsts.
|
||||
|
||||
|
||||
@ -1070,7 +1070,7 @@ HTML_STYLESHEET =
|
||||
# defined cascading style sheet that is included after the standard style sheets
|
||||
# created by doxygen. Using this option one can overrule certain style aspects.
|
||||
# This is preferred over using HTML_STYLESHEET since it does not replace the
|
||||
# standard style sheet and is therefor more robust against future updates.
|
||||
# standard style sheet and is therefore more robust against future updates.
|
||||
# Doxygen will copy the style sheet file to the output directory. For an example
|
||||
# see the documentation.
|
||||
# This tag requires that the tag GENERATE_HTML is set to YES.
|
||||
|
||||
@ -1336,7 +1336,7 @@ public:
|
||||
ColdCodeSectionName = Name.str();
|
||||
}
|
||||
|
||||
/// Return true iif the function will halt execution on entry.
|
||||
/// Return true if the function will halt execution on entry.
|
||||
bool trapsOnEntry() const { return TrapsOnEntry; }
|
||||
|
||||
/// Make the function always trap on entry. Other than the trap instruction,
|
||||
|
||||
@ -60,7 +60,7 @@ public:
|
||||
uint32_t UnitLength = 0;
|
||||
bool IsConstructed = false;
|
||||
// A map of DIE offsets in original DWARF section to DIE ID.
|
||||
// Whih is used to access DieInfoVector.
|
||||
// Which is used to access DieInfoVector.
|
||||
std::unordered_map<uint64_t, uint32_t> DIEIDMap;
|
||||
|
||||
// Some STL implementations don't have a noexcept move constructor for
|
||||
|
||||
@ -326,8 +326,8 @@ public:
|
||||
/// Write out entries in to .debug_addr section for CUs.
|
||||
virtual std::optional<uint64_t> finalize(const size_t BufferSize);
|
||||
|
||||
/// Return buffer with all the entries in .debug_addr already writen out using
|
||||
/// update(...).
|
||||
/// Return buffer with all the entries in .debug_addr already written out
|
||||
/// using update(...).
|
||||
virtual std::unique_ptr<AddressSectionBuffer> releaseBuffer() {
|
||||
return std::move(Buffer);
|
||||
}
|
||||
@ -409,7 +409,7 @@ protected:
|
||||
std::mutex WriterMutex;
|
||||
std::unique_ptr<AddressSectionBuffer> Buffer;
|
||||
std::unique_ptr<raw_svector_ostream> AddressStream;
|
||||
/// Used to track sections that were not modified so that they can be re-used.
|
||||
/// Used to track sections that were not modified so that they can be reused.
|
||||
static DenseMap<uint64_t, uint64_t> UnmodifiedAddressOffsets;
|
||||
};
|
||||
|
||||
|
||||
@ -65,7 +65,7 @@ public:
|
||||
void setCurrentUnit(DWARFUnit &Unit, const uint64_t UnitStartOffset);
|
||||
/// Emit Accelerator table.
|
||||
void emitAccelTable();
|
||||
/// Returns true if the table was crated.
|
||||
/// Returns true if the table was created.
|
||||
bool isCreated() const { return NeedToCreate; }
|
||||
/// Returns buffer containing the accelerator table.
|
||||
std::unique_ptr<DebugBufferVector> releaseBuffer() {
|
||||
@ -91,7 +91,7 @@ private:
|
||||
uint64_t CurrentUnitOffset = 0;
|
||||
const DWARFUnit *CurrentUnit = nullptr;
|
||||
std::unordered_map<uint32_t, uint32_t> AbbrevTagToIndexMap;
|
||||
/// Contains a map of TU hashes to a Foreign TU indecies.
|
||||
/// Contains a map of TU hashes to a Foreign TU indices.
|
||||
/// This is used to reduce the size of Foreign TU list since there could be
|
||||
/// multiple TUs with the same hash.
|
||||
DenseMap<uint64_t, uint32_t> TUHashToIndexMap;
|
||||
|
||||
@ -432,7 +432,7 @@ public:
|
||||
return Analysis->isConditionalBranch(Inst);
|
||||
}
|
||||
|
||||
/// Returns true if Inst is a condtional move instruction
|
||||
/// Returns true if Inst is a conditional move instruction
|
||||
virtual bool isConditionalMove(const MCInst &Inst) const {
|
||||
llvm_unreachable("not implemented");
|
||||
return false;
|
||||
@ -1564,7 +1564,7 @@ public:
|
||||
}
|
||||
|
||||
/// Get the default def_in and live_out registers for the function
|
||||
/// Currently only used for the Stoke optimzation
|
||||
/// Currently only used for the Stoke optimization
|
||||
virtual void getDefaultDefIn(BitVector &Regs) const {
|
||||
llvm_unreachable("not implemented");
|
||||
}
|
||||
|
||||
@ -37,7 +37,7 @@ struct FrameIndexEntry {
|
||||
int64_t StackOffset;
|
||||
uint8_t Size;
|
||||
|
||||
/// If this is false, we will never atempt to remove or optimize this
|
||||
/// If this is false, we will never attempt to remove or optimize this
|
||||
/// instruction. We just use it to keep track of stores we don't fully
|
||||
/// understand but we know it may write to a frame position.
|
||||
bool IsSimple;
|
||||
|
||||
@ -30,7 +30,7 @@ namespace bolt {
|
||||
/// 64-bit range, we guarantee it can reach any code location.
|
||||
///
|
||||
class LongJmpPass : public BinaryFunctionPass {
|
||||
/// Used to implement stub grouping (re-using a stub from one function into
|
||||
/// Used to implement stub grouping (reusing a stub from one function into
|
||||
/// another)
|
||||
using StubTy = std::pair<uint64_t, BinaryBasicBlock *>;
|
||||
using StubGroupTy = SmallVector<StubTy, 4>;
|
||||
|
||||
@ -49,7 +49,7 @@
|
||||
// aggregates the block gaps into 2 values for the function: "weighted" is the
|
||||
// weighted average of the block conservation gaps, where the weights depend on
|
||||
// each block's execution count and instruction count; "worst" is the worst
|
||||
// (biggest) block gap acorss all basic blocks in the function with an execution
|
||||
// (biggest) block gap across all basic blocks in the function with an execution
|
||||
// count of > 500. The pass then reports the 95th percentile of the weighted and
|
||||
// worst values of the 1000 functions in a single BOLT-INFO line. The smaller
|
||||
// the reported values are, the better the BOLT profile satisfies the function
|
||||
|
||||
@ -26,7 +26,7 @@ namespace bolt {
|
||||
|
||||
/// Objects of this class implement various basic block clustering algorithms.
|
||||
/// Basic block clusters are chains of basic blocks that should be laid out
|
||||
/// in this order to maximize performace. These algorithms group basic blocks
|
||||
/// in this order to maximize performance. These algorithms group basic blocks
|
||||
/// into clusters using execution profile data and various heuristics.
|
||||
class ClusterAlgorithm {
|
||||
public:
|
||||
|
||||
@ -128,7 +128,7 @@ private:
|
||||
CUOffsetMap finalizeTypeSections(DIEBuilder &DIEBlder, DIEStreamer &Streamer,
|
||||
GDBIndex &GDBIndexSection);
|
||||
|
||||
/// Process and write out CUs that are passsed in.
|
||||
/// Process and write out CUs that are passed in.
|
||||
void finalizeCompileUnits(DIEBuilder &DIEBlder, DIEStreamer &Streamer,
|
||||
CUOffsetMap &CUMap,
|
||||
const std::list<DWARFUnit *> &CUs,
|
||||
|
||||
@ -844,7 +844,7 @@ BinaryContext::getOrCreateJumpTable(BinaryFunction &Function, uint64_t Address,
|
||||
auto isSibling = std::bind(&BinaryContext::areRelatedFragments, this,
|
||||
&Function, std::placeholders::_1);
|
||||
assert(llvm::all_of(JT->Parents, isSibling) &&
|
||||
"cannot re-use jump table of a different function");
|
||||
"cannot reuse jump table of a different function");
|
||||
(void)isSibling;
|
||||
if (opts::Verbosity > 2) {
|
||||
this->outs() << "BOLT-INFO: multiple fragments access the same jump table"
|
||||
@ -860,7 +860,7 @@ BinaryContext::getOrCreateJumpTable(BinaryFunction &Function, uint64_t Address,
|
||||
return JT->getFirstLabel();
|
||||
}
|
||||
|
||||
// Re-use the existing symbol if possible.
|
||||
// Reuse the existing symbol if possible.
|
||||
MCSymbol *JTLabel = nullptr;
|
||||
if (BinaryData *Object = getBinaryDataAtAddress(Address)) {
|
||||
if (!isInternalSymbolName(Object->getSymbol()->getName()))
|
||||
|
||||
@ -3875,7 +3875,7 @@ uint64_t BinaryFunction::getEntryIDForSymbol(const MCSymbol *Symbol) const {
|
||||
if (FunctionSymbol == Symbol)
|
||||
return 0;
|
||||
|
||||
// Check all secondary entries available as either basic blocks or lables.
|
||||
// Check all secondary entries available as either basic blocks or labels.
|
||||
uint64_t NumEntries = 1;
|
||||
for (const BinaryBasicBlock *BB : BasicBlocks) {
|
||||
MCSymbol *EntrySymbol = getSecondaryEntryPointSymbol(*BB);
|
||||
|
||||
@ -122,7 +122,7 @@ buildCallGraph(BinaryContext &BC, CgFilterFunction Filter, bool CgFromPerfData,
|
||||
// create a node for a function unless it was the target of a call from
|
||||
// a hot block. The alternative would be to set the count to one or
|
||||
// accumulate the number of calls from the callsite into the function
|
||||
// samples. Results from perfomance testing seem to favor the zero
|
||||
// samples. Results from performance testing seem to favor the zero
|
||||
// count though, so I'm leaving it this way for now.
|
||||
return Cg.addNode(Function, Size, Function->getKnownExecutionCount());
|
||||
}
|
||||
|
||||
@ -137,7 +137,7 @@ void DIEBuilder::updateReferences() {
|
||||
DIEInteger(NewAddr));
|
||||
}
|
||||
|
||||
// Handling referenes in location expressions.
|
||||
// Handling references in location expressions.
|
||||
for (LocWithReference &LocExpr : getState().LocWithReferencesToProcess) {
|
||||
SmallVector<uint8_t, 32> Buffer;
|
||||
DataExtractor Data(StringRef((const char *)LocExpr.BlockData.data(),
|
||||
@ -336,7 +336,7 @@ void DIEBuilder::buildCompileUnits(const bool Init) {
|
||||
registerUnit(*DU, false);
|
||||
}
|
||||
|
||||
// Using DULIst since it can be modified by cross CU refrence resolution.
|
||||
// Using DULIst since it can be modified by cross CU reference resolution.
|
||||
for (DWARFUnit *DU : getState().DUList) {
|
||||
if (DU->isTypeUnit())
|
||||
continue;
|
||||
@ -508,7 +508,7 @@ void DIEBuilder::finish() {
|
||||
UnitStartOffset += CurUnitInfo.UnitLength;
|
||||
};
|
||||
// Computing offsets for .debug_types section.
|
||||
// It's processed first when CU is registered so will be at the begginnig of
|
||||
// It's processed first when CU is registered so will be at the beginning of
|
||||
// the vector.
|
||||
uint64_t TypeUnitStartOffset = 0;
|
||||
for (DWARFUnit *CU : getState().DUList) {
|
||||
|
||||
@ -876,7 +876,7 @@ void DebugStrOffsetsWriter::finalizeSection(DWARFUnit &Unit,
|
||||
DIEValue StrListBaseAttrInfo =
|
||||
Die.findAttribute(dwarf::DW_AT_str_offsets_base);
|
||||
auto RetVal = ProcessedBaseOffsets.find(*Val);
|
||||
// Handling re-use of str-offsets section.
|
||||
// Handling reuse of str-offsets section.
|
||||
if (RetVal == ProcessedBaseOffsets.end() || StrOffsetSectionWasModified) {
|
||||
initialize(Unit);
|
||||
// Update String Offsets that were modified.
|
||||
@ -1167,7 +1167,7 @@ void DwarfLineTable::emitCU(MCStreamer *MCOS, MCDwarfLineTableParams Params,
|
||||
// For functions that we do not modify we output them as raw data.
|
||||
// Re-constructing .debug_line_str so that offsets are correct for those
|
||||
// debug line tables.
|
||||
// Bonus is that when we output a final binary we can re-use .debug_line_str
|
||||
// Bonus is that when we output a final binary we can reuse .debug_line_str
|
||||
// section. So we don't have to do the SHF_ALLOC trick we did with
|
||||
// .debug_line.
|
||||
static void parseAndPopulateDebugLineStr(BinarySection &LineStrSection,
|
||||
|
||||
@ -55,7 +55,7 @@ DWARF5AcceleratorTable::DWARF5AcceleratorTable(
|
||||
llvm::hash_value(llvm::StringRef(CStr)), StrOffset);
|
||||
if (!R.second)
|
||||
BC.errs()
|
||||
<< "BOLT-WARNING: [internal-dwarf-error]: collision occured on "
|
||||
<< "BOLT-WARNING: [internal-dwarf-error]: collision occurred on "
|
||||
<< CStr << " at offset : 0x" << Twine::utohexstr(StrOffset)
|
||||
<< ". Previous string offset is: 0x"
|
||||
<< Twine::utohexstr(R.first->second) << ".\n";
|
||||
@ -86,7 +86,7 @@ void DWARF5AcceleratorTable::addUnit(DWARFUnit &Unit,
|
||||
if (Unit.isTypeUnit()) {
|
||||
if (DWOID) {
|
||||
// We adding an entry for a DWO TU. The DWO CU might not have any entries,
|
||||
// so need to add it to the list pre-emptively.
|
||||
// so need to add it to the list preemptively.
|
||||
auto Iter = CUOffsetsToPatch.insert({*DWOID, CUList.size()});
|
||||
if (Iter.second)
|
||||
CUList.push_back(BADCUOFFSET);
|
||||
|
||||
@ -60,7 +60,7 @@ namespace llvm {
|
||||
namespace bolt {
|
||||
|
||||
// Align function to the specified byte-boundary (typically, 64) offsetting
|
||||
// the fuction by not more than the corresponding value
|
||||
// the function by not more than the corresponding value
|
||||
static void alignMaxBytes(BinaryFunction &Function) {
|
||||
Function.setAlignment(opts::AlignFunctions);
|
||||
Function.setMaxAlignmentBytes(opts::AlignFunctionsMaxBytes);
|
||||
@ -68,7 +68,7 @@ static void alignMaxBytes(BinaryFunction &Function) {
|
||||
}
|
||||
|
||||
// Align function to the specified byte-boundary (typically, 64) offsetting
|
||||
// the fuction by not more than the minimum over
|
||||
// the function by not more than the minimum over
|
||||
// -- the size of the function
|
||||
// -- the specified number of bytes
|
||||
static void alignCompact(BinaryFunction &Function,
|
||||
|
||||
@ -145,7 +145,7 @@ void RegReAssign::rankRegisters(BinaryFunction &Function) {
|
||||
const bool CannotUseREX = BC.MIB->cannotUseREX(Inst);
|
||||
const MCInstrDesc &Desc = BC.MII->get(Inst.getOpcode());
|
||||
|
||||
// Disallow substituitions involving regs in implicit uses lists
|
||||
// Disallow substitutions involving regs in implicit uses lists
|
||||
for (MCPhysReg ImplicitUse : Desc.implicit_uses()) {
|
||||
const size_t RegEC =
|
||||
BC.MIB->getAliases(ImplicitUse, false).find_first();
|
||||
@ -153,7 +153,7 @@ void RegReAssign::rankRegisters(BinaryFunction &Function) {
|
||||
std::numeric_limits<decltype(RegScore)::value_type>::min();
|
||||
}
|
||||
|
||||
// Disallow substituitions involving regs in implicit defs lists
|
||||
// Disallow substitutions involving regs in implicit defs lists
|
||||
for (MCPhysReg ImplicitDef : Desc.implicit_defs()) {
|
||||
const size_t RegEC =
|
||||
BC.MIB->getAliases(ImplicitDef, false).find_first();
|
||||
@ -174,7 +174,7 @@ void RegReAssign::rankRegisters(BinaryFunction &Function) {
|
||||
if (RegEC == 0)
|
||||
continue;
|
||||
|
||||
// Disallow substituitions involving regs in instrs that cannot use REX
|
||||
// Disallow substitutions involving regs in instrs that cannot use REX
|
||||
// The relationship of X86 registers is shown in the diagram. BL and BH
|
||||
// do not have a direct alias relationship. However, if the BH register
|
||||
// cannot be swapped, then the BX/EBX/RBX registers cannot be swapped as
|
||||
|
||||
@ -386,7 +386,7 @@ private:
|
||||
}
|
||||
|
||||
/// Compute sum of scores over jumps within \p BlockOrder given \p SplitIndex.
|
||||
/// Increament Score.LocalScore in place by the sum.
|
||||
/// Increment Score.LocalScore in place by the sum.
|
||||
void computeJumpScore(const BasicBlockOrder &BlockOrder,
|
||||
const size_t SplitIndex, SplitScore &Score) {
|
||||
|
||||
@ -413,7 +413,7 @@ private:
|
||||
}
|
||||
|
||||
/// Compute sum of scores over calls originated in the current function
|
||||
/// given \p SplitIndex. Increament Score.LocalScore in place by the sum.
|
||||
/// given \p SplitIndex. Increment Score.LocalScore in place by the sum.
|
||||
void computeLocalCallScore(const BasicBlockOrder &BlockOrder,
|
||||
const size_t SplitIndex, SplitScore &Score) {
|
||||
if (opts::CallScale == 0)
|
||||
@ -455,7 +455,7 @@ private:
|
||||
}
|
||||
|
||||
/// Compute sum of splitting scores for cover calls of the input function.
|
||||
/// Increament Score.CoverCallScore in place by the sum.
|
||||
/// Increment Score.CoverCallScore in place by the sum.
|
||||
void computeCoverCallScore(const BasicBlockOrder &BlockOrder,
|
||||
const size_t SplitIndex,
|
||||
const std::vector<CallInfo> &CoverCalls,
|
||||
@ -467,7 +467,7 @@ private:
|
||||
assert(CI.Length >= Score.HotSizeReduction &&
|
||||
"Length of cover calls must exceed reduced size of hot fragment.");
|
||||
// Compute the new length of the call, which is shorter than the original
|
||||
// one by the size of the splitted fragment minus the total size increase.
|
||||
// one by the size of the split fragment minus the total size increase.
|
||||
const size_t NewCallLength = CI.Length - Score.HotSizeReduction;
|
||||
Score.CoverCallScore += computeCallScore(CI.Count, NewCallLength);
|
||||
}
|
||||
@ -502,12 +502,12 @@ private:
|
||||
|
||||
// First part of LocalScore is the sum over call edges originated in the
|
||||
// input function. These edges can get shorter or longer depending on
|
||||
// SplitIndex. Score.LocalScore is increamented in place.
|
||||
// SplitIndex. Score.LocalScore is incremented in place.
|
||||
computeLocalCallScore(BlockOrder, SplitIndex, Score);
|
||||
|
||||
// Second part of LocalScore is the sum over jump edges with src basic block
|
||||
// and dst basic block in the current function. Score.LocalScore is
|
||||
// increamented in place.
|
||||
// incremented in place.
|
||||
computeJumpScore(BlockOrder, SplitIndex, Score);
|
||||
|
||||
// Compute CoverCallScore and store in Score in place.
|
||||
|
||||
@ -907,7 +907,7 @@ ErrorOr<uint64_t> DataReader::parseHexField(char EndChar, bool EndNl) {
|
||||
StringRef NumStr = NumStrRes.get();
|
||||
uint64_t Num;
|
||||
if (NumStr.getAsInteger(16, Num)) {
|
||||
reportError("expected hexidecimal number");
|
||||
reportError("expected hexadecimal number");
|
||||
Diag << "Found: " << NumStr << "\n";
|
||||
return make_error_code(llvm::errc::io_error);
|
||||
}
|
||||
|
||||
@ -48,7 +48,7 @@ public:
|
||||
};
|
||||
|
||||
Error BuildIDRewriter::sectionInitializer() {
|
||||
// Typically, build ID will reside in .note.gnu.build-id section. Howerver,
|
||||
// Typically, build ID will reside in .note.gnu.build-id section. However,
|
||||
// a linker script can change the section name and such is the case with
|
||||
// the Linux kernel. Hence, we iterate over all note sections.
|
||||
for (BinarySection &NoteSection : BC.sections()) {
|
||||
|
||||
@ -1723,7 +1723,7 @@ StringRef getSectionName(const SectionRef &Section) {
|
||||
return Name;
|
||||
}
|
||||
|
||||
// Exctracts an appropriate slice if input is DWP.
|
||||
// Extracts an appropriate slice if input is DWP.
|
||||
// Applies patches or overwrites the section.
|
||||
std::optional<StringRef> updateDebugData(
|
||||
DWARFContext &DWCtx, StringRef SectionName, StringRef SectionContents,
|
||||
@ -1759,7 +1759,7 @@ std::optional<StringRef> updateDebugData(
|
||||
auto Iter = OverridenSections.find(Kind);
|
||||
if (Iter == OverridenSections.end()) {
|
||||
errs()
|
||||
<< "BOLT-WARNING: [internal-dwarf-error]: Could not find overriden "
|
||||
<< "BOLT-WARNING: [internal-dwarf-error]: Could not find overridden "
|
||||
"section for: "
|
||||
<< Twine::utohexstr(DWOId) << ".\n";
|
||||
return std::nullopt;
|
||||
@ -1991,7 +1991,7 @@ void DWARFRewriter::convertToRangesPatchDebugInfo(
|
||||
}
|
||||
}
|
||||
|
||||
// HighPC was conveted into DW_AT_ranges.
|
||||
// HighPC was converted into DW_AT_ranges.
|
||||
// For DWARF5 we only access ranges through index.
|
||||
|
||||
DIEBldr.replaceValue(&Die, HighPCAttrInfo.getAttribute(), dwarf::DW_AT_ranges,
|
||||
|
||||
@ -308,7 +308,7 @@ void PseudoProbeRewriter::encodePseudoProbes() {
|
||||
Contents.append(OSE.str().begin(), OSE.str().end());
|
||||
};
|
||||
|
||||
// Emit indiviual pseudo probes in a inline tree node
|
||||
// Emit individual pseudo probes in a inline tree node
|
||||
// Probe index, type, attribute, address type and address are encoded
|
||||
// Address of the first probe is absolute.
|
||||
// Other probes' address are represented by delta
|
||||
|
||||
@ -1087,7 +1087,7 @@ void RewriteInstance::discoverFileObjects() {
|
||||
|
||||
if (SymbolAddress == Section->getAddress() + Section->getSize()) {
|
||||
assert(SymbolSize == 0 &&
|
||||
"unexpect non-zero sized symbol at end of section");
|
||||
"unexpected non-zero sized symbol at end of section");
|
||||
LLVM_DEBUG(
|
||||
dbgs()
|
||||
<< "BOLT-DEBUG: rejecting as symbol points to end of its section\n");
|
||||
@ -2440,7 +2440,7 @@ void RewriteInstance::processDynamicRelocations() {
|
||||
}
|
||||
|
||||
// The rest of dynamic relocations - DT_RELA.
|
||||
// The static executable might have .rela.dyn secion and not have PT_DYNAMIC
|
||||
// The static executable might have .rela.dyn section and not have PT_DYNAMIC
|
||||
if (!DynamicRelocationsSize && BC->IsStaticExecutable) {
|
||||
ErrorOr<BinarySection &> DynamicRelSectionOrErr =
|
||||
BC->getUniqueSectionByName(getRelaDynSectionName());
|
||||
@ -5017,7 +5017,7 @@ void RewriteInstance::updateELFSymbolTable(
|
||||
if (!Section)
|
||||
return false;
|
||||
|
||||
// Remove the section symbol iif the corresponding section was stripped.
|
||||
// Remove the section symbol if the corresponding section was stripped.
|
||||
if (Symbol.getType() == ELF::STT_SECTION) {
|
||||
if (!getNewSectionIndex(Symbol.st_shndx))
|
||||
return true;
|
||||
|
||||
@ -1296,7 +1296,7 @@ public:
|
||||
AArch64_AM::ShiftExtendType ExtendType =
|
||||
AArch64_AM::getArithExtendType(OperandExtension);
|
||||
if (ShiftVal != 2) {
|
||||
// TODO: Handle the patten where ShiftVal != 2.
|
||||
// TODO: Handle the pattern where ShiftVal != 2.
|
||||
// The following code sequence below has no shift amount,
|
||||
// the range could be 0 to 4.
|
||||
// The pattern comes from libc, it occurs when the binary is static.
|
||||
@ -1626,7 +1626,7 @@ public:
|
||||
int getUncondBranchEncodingSize() const override { return 28; }
|
||||
|
||||
// This helper function creates the snippet of code that compares a register
|
||||
// RegNo with an immedaite Imm, and jumps to Target if they are equal.
|
||||
// RegNo with an immediate Imm, and jumps to Target if they are equal.
|
||||
// cmp RegNo, #Imm
|
||||
// b.eq Target
|
||||
// where cmp is an alias for subs, which results in the code below:
|
||||
@ -1648,7 +1648,7 @@ public:
|
||||
}
|
||||
|
||||
// This helper function creates the snippet of code that compares a register
|
||||
// RegNo with an immedaite Imm, and jumps to Target if they are not equal.
|
||||
// RegNo with an immediate Imm, and jumps to Target if they are not equal.
|
||||
// cmp RegNo, #Imm
|
||||
// b.ne Target
|
||||
// where cmp is an alias for subs, which results in the code below:
|
||||
|
||||
@ -2715,7 +2715,7 @@ public:
|
||||
|
||||
bool FoundOne = false;
|
||||
|
||||
// Iterate only through src operands that arent also dest operands
|
||||
// Iterate only through src operands that aren't also dest operands
|
||||
for (unsigned Index = InstDesc.getNumDefs() + (HasLHS ? 1 : 0),
|
||||
E = InstDesc.getNumOperands();
|
||||
Index != E; ++Index) {
|
||||
|
||||
@ -285,7 +285,7 @@ cl::opt<bool> TimeRewrite("time-rewrite",
|
||||
|
||||
cl::opt<bool> UseOldText(
|
||||
"use-old-text",
|
||||
cl::desc("re-use space in old .text if possible (relocation mode)"),
|
||||
cl::desc("reuse space in old .text if possible (relocation mode)"),
|
||||
cl::cat(BoltCategory));
|
||||
|
||||
cl::opt<bool> UpdateDebugSections(
|
||||
|
||||
@ -24,7 +24,7 @@
|
||||
{}
|
||||
#endif
|
||||
|
||||
// Function constains trampoline to _start,
|
||||
// Function constrains trampoline to _start,
|
||||
// so we can resume regular execution of the function that we hooked.
|
||||
extern void __bolt_hugify_start_program();
|
||||
|
||||
|
||||
@ -214,7 +214,7 @@ private:
|
||||
/// __bolt_instr_setup, our initialization routine.
|
||||
BumpPtrAllocator *GlobalAlloc;
|
||||
|
||||
// Base address which we substract from recorded PC values when searching for
|
||||
// Base address which we subtract from recorded PC values when searching for
|
||||
// indirect call description entries. Needed because indCall descriptions are
|
||||
// mapped read-only and contain static addresses. Initialized in
|
||||
// __bolt_instr_setup.
|
||||
@ -261,7 +261,7 @@ struct SimpleHashTableEntryBase {
|
||||
// Currently we have to do it the ugly way because
|
||||
// we want every message to be printed atomically via a single call to
|
||||
// __write. If we use reportNumber() and others nultiple times, we'll get
|
||||
// garbage in mulithreaded environment
|
||||
// garbage in multithreaded environment
|
||||
char Buf[BufSize];
|
||||
char *Ptr = Buf;
|
||||
Ptr = intToStr(Ptr, __getpid(), 10);
|
||||
@ -1585,7 +1585,7 @@ __bolt_instr_data_dump(int FD, const char *LibPath = nullptr,
|
||||
/// at user-specified intervals
|
||||
void watchProcess() {
|
||||
timespec ts, rem;
|
||||
uint64_t Ellapsed = 0ull;
|
||||
uint64_t Elapsed = 0ull;
|
||||
int FD = openProfile();
|
||||
uint64_t ppid;
|
||||
if (__bolt_instr_wait_forks) {
|
||||
@ -1615,10 +1615,10 @@ void watchProcess() {
|
||||
break;
|
||||
}
|
||||
|
||||
if (++Ellapsed < __bolt_instr_sleep_time)
|
||||
if (++Elapsed < __bolt_instr_sleep_time)
|
||||
continue;
|
||||
|
||||
Ellapsed = 0;
|
||||
Elapsed = 0;
|
||||
__bolt_instr_data_dump(FD);
|
||||
if (__bolt_instr_no_counters_clear == false)
|
||||
__bolt_instr_clear_counters();
|
||||
|
||||
@ -41,7 +41,7 @@
|
||||
// Anonymous namespace covering everything but our library entry point
|
||||
namespace {
|
||||
|
||||
// Get the difference between runtime addrress of .text section and
|
||||
// Get the difference between runtime address of .text section and
|
||||
// static address in section header table. Can be extracted from arbitrary
|
||||
// pc value recorded at runtime to get the corresponding static address, which
|
||||
// in turn can be used to search for indirect call description. Needed because
|
||||
|
||||
@ -105,7 +105,7 @@
|
||||
// Anonymous namespace covering everything but our library entry point
|
||||
namespace {
|
||||
|
||||
// Get the difference between runtime addrress of .text section and
|
||||
// Get the difference between runtime address of .text section and
|
||||
// static address in section header table. Can be extracted from arbitrary
|
||||
// pc value recorded at runtime to get the corresponding static address, which
|
||||
// in turn can be used to search for indirect call description. Needed because
|
||||
|
||||
@ -40,7 +40,7 @@
|
||||
|
||||
namespace {
|
||||
|
||||
// Get the difference between runtime addrress of .text section and
|
||||
// Get the difference between runtime address of .text section and
|
||||
// static address in section header table. Can be extracted from arbitrary
|
||||
// pc value recorded at runtime to get the corresponding static address, which
|
||||
// in turn can be used to search for indirect call description. Needed because
|
||||
@ -171,8 +171,9 @@ uint64_t __exit(uint64_t code) {
|
||||
#if !defined(__APPLE__)
|
||||
// We use a stack-allocated buffer for string manipulation in many pieces of
|
||||
// this code, including the code that prints each line of the fdata file. This
|
||||
// buffer needs to accomodate large function names, but shouldn't be arbitrarily
|
||||
// large (dynamically allocated) for simplicity of our memory space usage.
|
||||
// buffer needs to accommodate large function names, but shouldn't be
|
||||
// arbitrarily large (dynamically allocated) for simplicity of our memory space
|
||||
// usage.
|
||||
|
||||
// Declare some syscall wrappers we use throughout this code to avoid linking
|
||||
// against system libc.
|
||||
|
||||
@ -3,7 +3,7 @@
|
||||
# RUN: split-file %s %t
|
||||
|
||||
// For the first test case, in case the nop before .Lci will be removed
|
||||
// the pointer to exit function won't be alinged and the test will fail.
|
||||
// the pointer to exit function won't be aligned and the test will fail.
|
||||
|
||||
# RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown \
|
||||
# RUN: %t/xword_align.s -o %t_xa.o
|
||||
|
||||
@ -9,7 +9,7 @@
|
||||
// RUN: FileCheck --check-prefix=REL_CHECK %s
|
||||
|
||||
// Non-pie static executable doesn't generate PT_DYNAMIC, check relocation
|
||||
// is readed successfully and IPLT trampoline has been identified by bolt.
|
||||
// is read successfully and IPLT trampoline has been identified by bolt.
|
||||
// RUN: %clang %cflags -nostdlib -O3 %p/../Inputs/ifunc.c -fuse-ld=lld -no-pie \
|
||||
// RUN: -o %t.O3_nopie.exe -Wl,-q
|
||||
// RUN: llvm-readelf -l %t.O3_nopie.exe | \
|
||||
|
||||
@ -1,8 +1,8 @@
|
||||
## Test the control of aggressiveness of 3-way splitting by -call-scale.
|
||||
## When -call-scale=0.0, the tested function is 2-way splitted.
|
||||
## When -call-scale=1.0, the tested function is 3-way splitted with 5 blocks
|
||||
## When -call-scale=0.0, the tested function is 2-way split.
|
||||
## When -call-scale=1.0, the tested function is 3-way split with 5 blocks
|
||||
## in warm because of the increased benefit of shortening the call edges.
|
||||
## When -call-scale=1000.0, the tested function is still 3-way splitted with
|
||||
## When -call-scale=1000.0, the tested function is still 3-way split with
|
||||
## 5 blocks in warm because cdsplit does not allow hot-warm splitting to break
|
||||
## a fall through branch from a basic block to its most likely successor.
|
||||
|
||||
|
||||
@ -8,7 +8,7 @@
|
||||
# RUN: llvm-dwarfdump --show-form --verbose --debug-str-offsets %t.bolt >> %t.txt
|
||||
# RUN: cat %t.txt | FileCheck --check-prefix=CHECK %s
|
||||
|
||||
## This test checks we correclty re-renerate .debug_str_offsets.
|
||||
## This test checks we correctly re-renerate .debug_str_offsets.
|
||||
|
||||
# CHECK: .debug_str_offsets contents
|
||||
# CHECK-NEXT: 0x00000000: Contribution size = 52, Format = DWARF32, Version = 5
|
||||
|
||||
@ -7,7 +7,7 @@
|
||||
# RUN: llvm-dwarfdump --show-form --verbose --debug-str-offsets %t.exe | FileCheck -check-prefix=PRE-BOLT %s
|
||||
# RUN: llvm-dwarfdump --show-form --verbose --debug-str-offsets %t.bolt | FileCheck -check-prefix=POST-BOLT %s
|
||||
|
||||
## This test checks we correclty re-generate .debug_str_offsets when there are type units that have an offset not shared with CU.
|
||||
## This test checks we correctly re-generate .debug_str_offsets when there are type units that have an offset not shared with CU.
|
||||
|
||||
# PRE-BOLT: .debug_str_offsets contents
|
||||
# PRE-BOLT-NEXT: Contribution size = 24, Format = DWARF32, Version = 5
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
## In this test case, we reproduce the behavior seen in gcc where the
|
||||
## base address of a jump table is decremented by some number and ends up
|
||||
## at the exact addess of a jump table from another function. After
|
||||
## at the exact address of a jump table from another function. After
|
||||
## linking, the instruction references another jump table and that
|
||||
## confuses BOLT.
|
||||
## We repro here the following issue:
|
||||
@ -28,7 +28,7 @@
|
||||
# ----
|
||||
# Func foo contains a jump table whose start is colocated with a
|
||||
# jump table reference in another function. However, the other function
|
||||
# does not use the first entries of it and is merely doing arithmetics
|
||||
# does not use the first entries of it and is merely doing arithmetic
|
||||
# to save the creation of unused first entries.
|
||||
# ----
|
||||
.globl foo
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
## This test reproduces the case where C++ exception handling is used and split
|
||||
## function optimization is enabled. In particular, function foo is splitted
|
||||
## function optimization is enabled. In particular, function foo is split
|
||||
## to two fragments:
|
||||
## foo: contains 2 try blocks, which invokes bar to throw exception
|
||||
## foo.cold.1: contains 2 corresponding catch blocks (landing pad)
|
||||
|
||||
@ -30,7 +30,7 @@
|
||||
* Reconstruct fdata
|
||||
* RUN: link_fdata %t/main.s %t.o %t.fdata.reconst
|
||||
*
|
||||
* XXX: reenable once dumping data is supported
|
||||
* XXX: re-enable once dumping data is supported
|
||||
* Check if reoptimized file produces the same results
|
||||
* dontrun: %t.exe.reopt > %t.result.reopt
|
||||
* dontrun: cmp %t.result %t.result.reopt
|
||||
|
||||
@ -12,7 +12,7 @@ check_file() {
|
||||
|
||||
fuser -s "$file"
|
||||
local ret=$?
|
||||
if [ $ret -eq 1 ]; then # noone has file open
|
||||
if [ $ret -eq 1 ]; then # no one has file open
|
||||
return 0
|
||||
fi
|
||||
if [ $ret -eq 0 ]; then # file open by some processes
|
||||
|
||||
@ -28,7 +28,7 @@
|
||||
#
|
||||
# TIMEOUT_OR_CMD - optional timeout or command on optimized binary command
|
||||
# if the value is a number with an optional trailing letter
|
||||
# [smhd] it is considered a paramter to "timeout",
|
||||
# [smhd] it is considered a parameter to "timeout",
|
||||
# otherwise it's a shell command that wraps the optimized
|
||||
# binary command.
|
||||
#
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user