325 lines
11 KiB
C++
325 lines
11 KiB
C++
//===- KernelInfo.cpp - Kernel Analysis -----------------------------------===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
// This file defines the KernelInfoPrinter class used to emit remarks about
|
|
// function properties from a GPU kernel.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "llvm/Analysis/KernelInfo.h"
|
|
#include "llvm/ADT/SmallString.h"
|
|
#include "llvm/ADT/StringExtras.h"
|
|
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
|
|
#include "llvm/Analysis/TargetTransformInfo.h"
|
|
#include "llvm/IR/DebugInfo.h"
|
|
#include "llvm/IR/Dominators.h"
|
|
#include "llvm/IR/Instructions.h"
|
|
#include "llvm/IR/Metadata.h"
|
|
#include "llvm/IR/Module.h"
|
|
#include "llvm/IR/PassManager.h"
|
|
|
|
using namespace llvm;
|
|
|
|
#define DEBUG_TYPE "kernel-info"
|
|
|
|
namespace {
|
|
|
|
/// Data structure holding function info for kernels.
|
|
class KernelInfo {
|
|
void updateForBB(const BasicBlock &BB, OptimizationRemarkEmitter &ORE);
|
|
|
|
public:
|
|
static void emitKernelInfo(Function &F, FunctionAnalysisManager &FAM,
|
|
TargetMachine *TM);
|
|
|
|
/// Whether the function has external linkage and is not a kernel function.
|
|
bool ExternalNotKernel = false;
|
|
|
|
/// Launch bounds.
|
|
SmallVector<std::pair<StringRef, int64_t>> LaunchBounds;
|
|
|
|
/// The number of alloca instructions inside the function, the number of those
|
|
/// with allocation sizes that cannot be determined at compile time, and the
|
|
/// sum of the sizes that can be.
|
|
///
|
|
/// With the current implementation for at least some GPU archs,
|
|
/// AllocasDyn > 0 might not be possible, but we report AllocasDyn anyway in
|
|
/// case the implementation changes.
|
|
int64_t Allocas = 0;
|
|
int64_t AllocasDyn = 0;
|
|
int64_t AllocasStaticSizeSum = 0;
|
|
|
|
/// Number of direct/indirect calls (anything derived from CallBase).
|
|
int64_t DirectCalls = 0;
|
|
int64_t IndirectCalls = 0;
|
|
|
|
/// Number of direct calls made from this function to other functions
|
|
/// defined in this module.
|
|
int64_t DirectCallsToDefinedFunctions = 0;
|
|
|
|
/// Number of direct calls to inline assembly.
|
|
int64_t InlineAssemblyCalls = 0;
|
|
|
|
/// Number of calls of type InvokeInst.
|
|
int64_t Invokes = 0;
|
|
|
|
/// Target-specific flat address space.
|
|
unsigned FlatAddrspace;
|
|
|
|
/// Number of flat address space memory accesses (via load, store, etc.).
|
|
int64_t FlatAddrspaceAccesses = 0;
|
|
};
|
|
|
|
} // end anonymous namespace
|
|
|
|
static void identifyCallee(OptimizationRemark &R, const Module *M,
|
|
const Value *V, StringRef Kind = "") {
|
|
SmallString<100> Name; // might be function name or asm expression
|
|
if (const Function *F = dyn_cast<Function>(V)) {
|
|
if (auto *SubProgram = F->getSubprogram()) {
|
|
if (SubProgram->isArtificial())
|
|
R << "artificial ";
|
|
Name = SubProgram->getName();
|
|
}
|
|
}
|
|
if (Name.empty()) {
|
|
raw_svector_ostream OS(Name);
|
|
V->printAsOperand(OS, /*PrintType=*/false, M);
|
|
}
|
|
if (!Kind.empty())
|
|
R << Kind << " ";
|
|
R << "'" << Name << "'";
|
|
}
|
|
|
|
static void identifyFunction(OptimizationRemark &R, const Function &F) {
|
|
identifyCallee(R, F.getParent(), &F, "function");
|
|
}
|
|
|
|
static void remarkAlloca(OptimizationRemarkEmitter &ORE, const Function &Caller,
|
|
const AllocaInst &Alloca,
|
|
TypeSize::ScalarTy StaticSize) {
|
|
ORE.emit([&] {
|
|
StringRef DbgName;
|
|
DebugLoc Loc;
|
|
bool Artificial = false;
|
|
auto DVRs = findDVRDeclares(&const_cast<AllocaInst &>(Alloca));
|
|
if (!DVRs.empty()) {
|
|
const DbgVariableRecord &DVR = **DVRs.begin();
|
|
DbgName = DVR.getVariable()->getName();
|
|
Loc = DVR.getDebugLoc();
|
|
Artificial = DVR.Variable->isArtificial();
|
|
}
|
|
OptimizationRemark R(DEBUG_TYPE, "Alloca", DiagnosticLocation(Loc),
|
|
Alloca.getParent());
|
|
R << "in ";
|
|
identifyFunction(R, Caller);
|
|
R << ", ";
|
|
if (Artificial)
|
|
R << "artificial ";
|
|
SmallString<20> ValName;
|
|
raw_svector_ostream OS(ValName);
|
|
Alloca.printAsOperand(OS, /*PrintType=*/false, Caller.getParent());
|
|
R << "alloca ('" << ValName << "') ";
|
|
if (!DbgName.empty())
|
|
R << "for '" << DbgName << "' ";
|
|
else
|
|
R << "without debug info ";
|
|
R << "with ";
|
|
if (StaticSize)
|
|
R << "static size of " << itostr(StaticSize) << " bytes";
|
|
else
|
|
R << "dynamic size";
|
|
return R;
|
|
});
|
|
}
|
|
|
|
static void remarkCall(OptimizationRemarkEmitter &ORE, const Function &Caller,
|
|
const CallBase &Call, StringRef CallKind,
|
|
StringRef RemarkKind) {
|
|
ORE.emit([&] {
|
|
OptimizationRemark R(DEBUG_TYPE, RemarkKind, &Call);
|
|
R << "in ";
|
|
identifyFunction(R, Caller);
|
|
R << ", " << CallKind << ", callee is ";
|
|
identifyCallee(R, Caller.getParent(), Call.getCalledOperand());
|
|
return R;
|
|
});
|
|
}
|
|
|
|
static void remarkFlatAddrspaceAccess(OptimizationRemarkEmitter &ORE,
|
|
const Function &Caller,
|
|
const Instruction &Inst) {
|
|
ORE.emit([&] {
|
|
OptimizationRemark R(DEBUG_TYPE, "FlatAddrspaceAccess", &Inst);
|
|
R << "in ";
|
|
identifyFunction(R, Caller);
|
|
if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(&Inst)) {
|
|
R << ", '" << II->getCalledFunction()->getName() << "' call";
|
|
} else {
|
|
R << ", '" << Inst.getOpcodeName() << "' instruction";
|
|
}
|
|
if (!Inst.getType()->isVoidTy()) {
|
|
SmallString<20> Name;
|
|
raw_svector_ostream OS(Name);
|
|
Inst.printAsOperand(OS, /*PrintType=*/false, Caller.getParent());
|
|
R << " ('" << Name << "')";
|
|
}
|
|
R << " accesses memory in flat address space";
|
|
return R;
|
|
});
|
|
}
|
|
|
|
void KernelInfo::updateForBB(const BasicBlock &BB,
|
|
OptimizationRemarkEmitter &ORE) {
|
|
const Function &F = *BB.getParent();
|
|
const Module &M = *F.getParent();
|
|
const DataLayout &DL = M.getDataLayout();
|
|
for (const Instruction &I : BB.instructionsWithoutDebug()) {
|
|
if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(&I)) {
|
|
++Allocas;
|
|
TypeSize::ScalarTy StaticSize = 0;
|
|
if (std::optional<TypeSize> Size = Alloca->getAllocationSize(DL)) {
|
|
StaticSize = Size->getFixedValue();
|
|
assert(StaticSize <=
|
|
(TypeSize::ScalarTy)std::numeric_limits<int64_t>::max());
|
|
AllocasStaticSizeSum += StaticSize;
|
|
} else {
|
|
++AllocasDyn;
|
|
}
|
|
remarkAlloca(ORE, F, *Alloca, StaticSize);
|
|
} else if (const CallBase *Call = dyn_cast<CallBase>(&I)) {
|
|
SmallString<40> CallKind;
|
|
SmallString<40> RemarkKind;
|
|
if (Call->isIndirectCall()) {
|
|
++IndirectCalls;
|
|
CallKind += "indirect";
|
|
RemarkKind += "Indirect";
|
|
} else {
|
|
++DirectCalls;
|
|
CallKind += "direct";
|
|
RemarkKind += "Direct";
|
|
}
|
|
if (isa<InvokeInst>(Call)) {
|
|
++Invokes;
|
|
CallKind += " invoke";
|
|
RemarkKind += "Invoke";
|
|
} else {
|
|
CallKind += " call";
|
|
RemarkKind += "Call";
|
|
}
|
|
if (!Call->isIndirectCall()) {
|
|
if (const Function *Callee = Call->getCalledFunction()) {
|
|
if (!Callee->isIntrinsic() && !Callee->isDeclaration()) {
|
|
++DirectCallsToDefinedFunctions;
|
|
CallKind += " to defined function";
|
|
RemarkKind += "ToDefinedFunction";
|
|
}
|
|
} else if (Call->isInlineAsm()) {
|
|
++InlineAssemblyCalls;
|
|
CallKind += " to inline assembly";
|
|
RemarkKind += "ToInlineAssembly";
|
|
}
|
|
}
|
|
remarkCall(ORE, F, *Call, CallKind, RemarkKind);
|
|
if (const AnyMemIntrinsic *MI = dyn_cast<AnyMemIntrinsic>(Call)) {
|
|
if (MI->getDestAddressSpace() == FlatAddrspace) {
|
|
++FlatAddrspaceAccesses;
|
|
remarkFlatAddrspaceAccess(ORE, F, I);
|
|
} else if (const AnyMemTransferInst *MT =
|
|
dyn_cast<AnyMemTransferInst>(MI)) {
|
|
if (MT->getSourceAddressSpace() == FlatAddrspace) {
|
|
++FlatAddrspaceAccesses;
|
|
remarkFlatAddrspaceAccess(ORE, F, I);
|
|
}
|
|
}
|
|
}
|
|
} else if (const LoadInst *Load = dyn_cast<LoadInst>(&I)) {
|
|
if (Load->getPointerAddressSpace() == FlatAddrspace) {
|
|
++FlatAddrspaceAccesses;
|
|
remarkFlatAddrspaceAccess(ORE, F, I);
|
|
}
|
|
} else if (const StoreInst *Store = dyn_cast<StoreInst>(&I)) {
|
|
if (Store->getPointerAddressSpace() == FlatAddrspace) {
|
|
++FlatAddrspaceAccesses;
|
|
remarkFlatAddrspaceAccess(ORE, F, I);
|
|
}
|
|
} else if (const AtomicRMWInst *At = dyn_cast<AtomicRMWInst>(&I)) {
|
|
if (At->getPointerAddressSpace() == FlatAddrspace) {
|
|
++FlatAddrspaceAccesses;
|
|
remarkFlatAddrspaceAccess(ORE, F, I);
|
|
}
|
|
} else if (const AtomicCmpXchgInst *At = dyn_cast<AtomicCmpXchgInst>(&I)) {
|
|
if (At->getPointerAddressSpace() == FlatAddrspace) {
|
|
++FlatAddrspaceAccesses;
|
|
remarkFlatAddrspaceAccess(ORE, F, I);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
static void remarkProperty(OptimizationRemarkEmitter &ORE, const Function &F,
|
|
StringRef Name, int64_t Value) {
|
|
ORE.emit([&] {
|
|
OptimizationRemark R(DEBUG_TYPE, Name, &F);
|
|
R << "in ";
|
|
identifyFunction(R, F);
|
|
R << ", " << Name << " = " << itostr(Value);
|
|
return R;
|
|
});
|
|
}
|
|
|
|
static std::optional<int64_t> parseFnAttrAsInteger(Function &F,
|
|
StringRef Name) {
|
|
if (!F.hasFnAttribute(Name))
|
|
return std::nullopt;
|
|
return F.getFnAttributeAsParsedInteger(Name);
|
|
}
|
|
|
|
void KernelInfo::emitKernelInfo(Function &F, FunctionAnalysisManager &FAM,
|
|
TargetMachine *TM) {
|
|
KernelInfo KI;
|
|
TargetTransformInfo &TheTTI = FAM.getResult<TargetIRAnalysis>(F);
|
|
KI.FlatAddrspace = TheTTI.getFlatAddressSpace();
|
|
|
|
// Record function properties.
|
|
KI.ExternalNotKernel = F.hasExternalLinkage() && !F.hasKernelCallingConv();
|
|
for (StringRef Name : {"omp_target_num_teams", "omp_target_thread_limit"}) {
|
|
if (auto Val = parseFnAttrAsInteger(F, Name))
|
|
KI.LaunchBounds.push_back({Name, *Val});
|
|
}
|
|
TheTTI.collectKernelLaunchBounds(F, KI.LaunchBounds);
|
|
|
|
auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
|
|
for (const auto &BB : F)
|
|
KI.updateForBB(BB, ORE);
|
|
|
|
#define REMARK_PROPERTY(PROP_NAME) \
|
|
remarkProperty(ORE, F, #PROP_NAME, KI.PROP_NAME)
|
|
REMARK_PROPERTY(ExternalNotKernel);
|
|
for (auto LB : KI.LaunchBounds)
|
|
remarkProperty(ORE, F, LB.first, LB.second);
|
|
REMARK_PROPERTY(Allocas);
|
|
REMARK_PROPERTY(AllocasStaticSizeSum);
|
|
REMARK_PROPERTY(AllocasDyn);
|
|
REMARK_PROPERTY(DirectCalls);
|
|
REMARK_PROPERTY(IndirectCalls);
|
|
REMARK_PROPERTY(DirectCallsToDefinedFunctions);
|
|
REMARK_PROPERTY(InlineAssemblyCalls);
|
|
REMARK_PROPERTY(Invokes);
|
|
REMARK_PROPERTY(FlatAddrspaceAccesses);
|
|
#undef REMARK_PROPERTY
|
|
}
|
|
|
|
PreservedAnalyses KernelInfoPrinter::run(Function &F,
|
|
FunctionAnalysisManager &AM) {
|
|
// Skip it if remarks are not enabled as it will do nothing useful.
|
|
if (F.getContext().getDiagHandlerPtr()->isPassedOptRemarkEnabled(DEBUG_TYPE))
|
|
KernelInfo::emitKernelInfo(F, AM, TM);
|
|
return PreservedAnalyses::all();
|
|
}
|