//===- bolt/Passes/Aligner.cpp - Pass for optimal code alignment ----------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file implements the AlignerPass class. // //===----------------------------------------------------------------------===// #include "bolt/Passes/Aligner.h" #include "bolt/Core/ParallelUtilities.h" #define DEBUG_TYPE "bolt-aligner" using namespace llvm; namespace opts { extern cl::OptionCategory BoltOptCategory; extern cl::opt AlignBlocks; extern cl::opt PreserveBlocksAlignment; extern cl::opt AlignFunctions; static cl::opt AlignBlocksMinSize( "align-blocks-min-size", cl::desc("minimal size of the basic block that should be aligned"), cl::init(0), cl::ZeroOrMore, cl::Hidden, cl::cat(BoltOptCategory)); static cl::opt AlignBlocksThreshold( "align-blocks-threshold", cl::desc( "align only blocks with frequency larger than containing function " "execution frequency specified in percent. E.g. 1000 means aligning " "blocks that are 10 times more frequently executed than the " "containing function."), cl::init(800), cl::Hidden, cl::cat(BoltOptCategory)); static cl::opt AlignFunctionsMaxBytes( "align-functions-max-bytes", cl::desc("maximum number of bytes to use to align functions"), cl::init(32), cl::cat(BoltOptCategory)); static cl::opt BlockAlignment("block-alignment", cl::desc("boundary to use for alignment of basic blocks"), cl::init(16), cl::ZeroOrMore, cl::cat(BoltOptCategory)); static cl::opt UseCompactAligner("use-compact-aligner", cl::desc("Use compact approach for aligning functions"), cl::init(true), cl::cat(BoltOptCategory)); } // end namespace opts namespace llvm { namespace bolt { // Align function to the specified byte-boundary (typically, 64) offsetting // the fuction by not more than the corresponding value static void alignMaxBytes(BinaryFunction &Function) { Function.setAlignment(opts::AlignFunctions); Function.setMaxAlignmentBytes(opts::AlignFunctionsMaxBytes); Function.setMaxColdAlignmentBytes(opts::AlignFunctionsMaxBytes); } // Align function to the specified byte-boundary (typically, 64) offsetting // the fuction by not more than the minimum over // -- the size of the function // -- the specified number of bytes static void alignCompact(BinaryFunction &Function, const MCCodeEmitter *Emitter) { const BinaryContext &BC = Function.getBinaryContext(); size_t HotSize = 0; size_t ColdSize = 0; for (const BinaryBasicBlock &BB : Function) if (BB.isSplit()) ColdSize += BC.computeCodeSize(BB.begin(), BB.end(), Emitter); else HotSize += BC.computeCodeSize(BB.begin(), BB.end(), Emitter); Function.setAlignment(opts::AlignFunctions); if (HotSize > 0) Function.setMaxAlignmentBytes( std::min(size_t(opts::AlignFunctionsMaxBytes), HotSize)); // using the same option, max-align-bytes, both for cold and hot parts of the // functions, as aligning cold functions typically does not affect performance if (ColdSize > 0) Function.setMaxColdAlignmentBytes( std::min(size_t(opts::AlignFunctionsMaxBytes), ColdSize)); } void AlignerPass::alignBlocks(BinaryFunction &Function, const MCCodeEmitter *Emitter) { if (!Function.hasValidProfile() || !Function.isSimple()) return; const BinaryContext &BC = Function.getBinaryContext(); const uint64_t FuncCount = std::max(1, Function.getKnownExecutionCount()); BinaryBasicBlock *PrevBB = nullptr; for (BinaryBasicBlock *BB : Function.getLayout().blocks()) { uint64_t Count = BB->getKnownExecutionCount(); if (Count <= FuncCount * opts::AlignBlocksThreshold / 100) { PrevBB = BB; continue; } uint64_t FTCount = 0; if (PrevBB && PrevBB->getFallthrough() == BB) FTCount = PrevBB->getBranchInfo(*BB).Count; PrevBB = BB; if (Count < FTCount * 2) continue; const uint64_t BlockSize = BC.computeCodeSize(BB->begin(), BB->end(), Emitter); const uint64_t BytesToUse = std::min(opts::BlockAlignment - 1, BlockSize); if (opts::AlignBlocksMinSize && BlockSize < opts::AlignBlocksMinSize) continue; BB->setAlignment(opts::BlockAlignment); BB->setAlignmentMaxBytes(BytesToUse); // Update stats. LLVM_DEBUG( std::unique_lock Lock(AlignHistogramMtx); AlignHistogram[BytesToUse]++; AlignedBlocksCount += BB->getKnownExecutionCount(); ); } } Error AlignerPass::runOnFunctions(BinaryContext &BC) { if (!BC.HasRelocations) return Error::success(); AlignHistogram.resize(opts::BlockAlignment); ParallelUtilities::WorkFuncTy WorkFun = [&](BinaryFunction &BF) { // Create a separate MCCodeEmitter to allow lock free execution BinaryContext::IndependentCodeEmitter Emitter = BC.createIndependentMCCodeEmitter(); if (opts::UseCompactAligner) alignCompact(BF, Emitter.MCE.get()); else alignMaxBytes(BF); if (opts::AlignBlocks && !opts::PreserveBlocksAlignment) alignBlocks(BF, Emitter.MCE.get()); }; ParallelUtilities::runOnEachFunction( BC, ParallelUtilities::SchedulingPolicy::SP_TRIVIAL, WorkFun, ParallelUtilities::PredicateTy(nullptr), "AlignerPass"); LLVM_DEBUG( dbgs() << "BOLT-DEBUG: max bytes per basic block alignment distribution:\n"; for (unsigned I = 1; I < AlignHistogram.size(); ++I) dbgs() << " " << I << " : " << AlignHistogram[I] << '\n'; dbgs() << "BOLT-DEBUG: total execution count of aligned blocks: " << AlignedBlocksCount << '\n'; ); return Error::success(); } } // end namespace bolt } // end namespace llvm