Marco Elver 5265adc737 [SanitizerBinaryMetadata] Declare callbacks extern weak
Declare callbacks extern weak (if no existing declaration exists), and
only call if the function address is non-null.

This allows to attach semantic metadata to binaries where no user of
that metadata exists, avoiding to have to link empty stub callbacks.

Once the binary is linked (statically or dynamically) against a tool
runtime that implements the callbacks, the respective callbacks will be
called. This vastly simplifies gradual deployment of tools using the
metadata, esp. avoiding having to recompile large codebases with
different compiler flags (which negatively impacts compiler caches).

Reviewed By: dvyukov, vitalybuka

Differential Revision: https://reviews.llvm.org/D142408
2023-01-24 12:54:20 +01:00

476 lines
17 KiB
C++

//===-- ModuleUtils.cpp - Functions to manipulate Modules -----------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This family of functions perform manipulations on Modules.
//
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Utils/ModuleUtils.h"
#include "llvm/Analysis/VectorUtils.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/Module.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/xxhash.h"
using namespace llvm;
#define DEBUG_TYPE "moduleutils"
static void appendToGlobalArray(StringRef ArrayName, Module &M, Function *F,
int Priority, Constant *Data) {
IRBuilder<> IRB(M.getContext());
FunctionType *FnTy = FunctionType::get(IRB.getVoidTy(), false);
// Get the current set of static global constructors and add the new ctor
// to the list.
SmallVector<Constant *, 16> CurrentCtors;
StructType *EltTy = StructType::get(
IRB.getInt32Ty(), PointerType::get(FnTy, F->getAddressSpace()),
IRB.getInt8PtrTy());
if (GlobalVariable *GVCtor = M.getNamedGlobal(ArrayName)) {
if (Constant *Init = GVCtor->getInitializer()) {
unsigned n = Init->getNumOperands();
CurrentCtors.reserve(n + 1);
for (unsigned i = 0; i != n; ++i)
CurrentCtors.push_back(cast<Constant>(Init->getOperand(i)));
}
GVCtor->eraseFromParent();
}
// Build a 3 field global_ctor entry. We don't take a comdat key.
Constant *CSVals[3];
CSVals[0] = IRB.getInt32(Priority);
CSVals[1] = F;
CSVals[2] = Data ? ConstantExpr::getPointerCast(Data, IRB.getInt8PtrTy())
: Constant::getNullValue(IRB.getInt8PtrTy());
Constant *RuntimeCtorInit =
ConstantStruct::get(EltTy, ArrayRef(CSVals, EltTy->getNumElements()));
CurrentCtors.push_back(RuntimeCtorInit);
// Create a new initializer.
ArrayType *AT = ArrayType::get(EltTy, CurrentCtors.size());
Constant *NewInit = ConstantArray::get(AT, CurrentCtors);
// Create the new global variable and replace all uses of
// the old global variable with the new one.
(void)new GlobalVariable(M, NewInit->getType(), false,
GlobalValue::AppendingLinkage, NewInit, ArrayName);
}
void llvm::appendToGlobalCtors(Module &M, Function *F, int Priority, Constant *Data) {
appendToGlobalArray("llvm.global_ctors", M, F, Priority, Data);
}
void llvm::appendToGlobalDtors(Module &M, Function *F, int Priority, Constant *Data) {
appendToGlobalArray("llvm.global_dtors", M, F, Priority, Data);
}
static void collectUsedGlobals(GlobalVariable *GV,
SmallSetVector<Constant *, 16> &Init) {
if (!GV || !GV->hasInitializer())
return;
auto *CA = cast<ConstantArray>(GV->getInitializer());
for (Use &Op : CA->operands())
Init.insert(cast<Constant>(Op));
}
static void appendToUsedList(Module &M, StringRef Name, ArrayRef<GlobalValue *> Values) {
GlobalVariable *GV = M.getGlobalVariable(Name);
SmallSetVector<Constant *, 16> Init;
collectUsedGlobals(GV, Init);
if (GV)
GV->eraseFromParent();
Type *ArrayEltTy = llvm::Type::getInt8PtrTy(M.getContext());
for (auto *V : Values)
Init.insert(ConstantExpr::getPointerBitCastOrAddrSpaceCast(V, ArrayEltTy));
if (Init.empty())
return;
ArrayType *ATy = ArrayType::get(ArrayEltTy, Init.size());
GV = new llvm::GlobalVariable(M, ATy, false, GlobalValue::AppendingLinkage,
ConstantArray::get(ATy, Init.getArrayRef()),
Name);
GV->setSection("llvm.metadata");
}
void llvm::appendToUsed(Module &M, ArrayRef<GlobalValue *> Values) {
appendToUsedList(M, "llvm.used", Values);
}
void llvm::appendToCompilerUsed(Module &M, ArrayRef<GlobalValue *> Values) {
appendToUsedList(M, "llvm.compiler.used", Values);
}
static void removeFromUsedList(Module &M, StringRef Name,
function_ref<bool(Constant *)> ShouldRemove) {
GlobalVariable *GV = M.getNamedGlobal(Name);
if (!GV)
return;
SmallSetVector<Constant *, 16> Init;
collectUsedGlobals(GV, Init);
Type *ArrayEltTy = cast<ArrayType>(GV->getValueType())->getElementType();
SmallVector<Constant *, 16> NewInit;
for (Constant *MaybeRemoved : Init) {
if (!ShouldRemove(MaybeRemoved->stripPointerCasts()))
NewInit.push_back(MaybeRemoved);
}
if (!NewInit.empty()) {
ArrayType *ATy = ArrayType::get(ArrayEltTy, NewInit.size());
GlobalVariable *NewGV =
new GlobalVariable(M, ATy, false, GlobalValue::AppendingLinkage,
ConstantArray::get(ATy, NewInit), "", GV,
GV->getThreadLocalMode(), GV->getAddressSpace());
NewGV->setSection(GV->getSection());
NewGV->takeName(GV);
}
GV->eraseFromParent();
}
void llvm::removeFromUsedLists(Module &M,
function_ref<bool(Constant *)> ShouldRemove) {
removeFromUsedList(M, "llvm.used", ShouldRemove);
removeFromUsedList(M, "llvm.compiler.used", ShouldRemove);
}
void llvm::setKCFIType(Module &M, Function &F, StringRef MangledType) {
if (!M.getModuleFlag("kcfi"))
return;
// Matches CodeGenModule::CreateKCFITypeId in Clang.
LLVMContext &Ctx = M.getContext();
MDBuilder MDB(Ctx);
F.setMetadata(
LLVMContext::MD_kcfi_type,
MDNode::get(Ctx, MDB.createConstant(ConstantInt::get(
Type::getInt32Ty(Ctx),
static_cast<uint32_t>(xxHash64(MangledType))))));
// If the module was compiled with -fpatchable-function-entry, ensure
// we use the same patchable-function-prefix.
if (auto *MD = mdconst::extract_or_null<ConstantInt>(
M.getModuleFlag("kcfi-offset"))) {
if (unsigned Offset = MD->getZExtValue())
F.addFnAttr("patchable-function-prefix", std::to_string(Offset));
}
}
FunctionCallee llvm::declareSanitizerInitFunction(Module &M, StringRef InitName,
ArrayRef<Type *> InitArgTypes,
bool Weak) {
assert(!InitName.empty() && "Expected init function name");
auto *VoidTy = Type::getVoidTy(M.getContext());
auto *FnTy = FunctionType::get(VoidTy, InitArgTypes, false);
auto FnCallee = M.getOrInsertFunction(InitName, FnTy);
auto *Fn = cast<Function>(FnCallee.getCallee());
if (Weak && Fn->isDeclaration())
Fn->setLinkage(Function::ExternalWeakLinkage);
return FnCallee;
}
Function *llvm::createSanitizerCtor(Module &M, StringRef CtorName) {
Function *Ctor = Function::createWithDefaultAttr(
FunctionType::get(Type::getVoidTy(M.getContext()), false),
GlobalValue::InternalLinkage, M.getDataLayout().getProgramAddressSpace(),
CtorName, &M);
Ctor->addFnAttr(Attribute::NoUnwind);
setKCFIType(M, *Ctor, "_ZTSFvvE"); // void (*)(void)
BasicBlock *CtorBB = BasicBlock::Create(M.getContext(), "", Ctor);
ReturnInst::Create(M.getContext(), CtorBB);
// Ensure Ctor cannot be discarded, even if in a comdat.
appendToUsed(M, {Ctor});
return Ctor;
}
std::pair<Function *, FunctionCallee> llvm::createSanitizerCtorAndInitFunctions(
Module &M, StringRef CtorName, StringRef InitName,
ArrayRef<Type *> InitArgTypes, ArrayRef<Value *> InitArgs,
StringRef VersionCheckName, bool Weak) {
assert(!InitName.empty() && "Expected init function name");
assert(InitArgs.size() == InitArgTypes.size() &&
"Sanitizer's init function expects different number of arguments");
FunctionCallee InitFunction =
declareSanitizerInitFunction(M, InitName, InitArgTypes, Weak);
Function *Ctor = createSanitizerCtor(M, CtorName);
IRBuilder<> IRB(M.getContext());
BasicBlock *RetBB = &Ctor->getEntryBlock();
if (Weak) {
RetBB->setName("ret");
auto *EntryBB = BasicBlock::Create(M.getContext(), "entry", Ctor, RetBB);
auto *CallInitBB =
BasicBlock::Create(M.getContext(), "callfunc", Ctor, RetBB);
auto *InitFn = cast<Function>(InitFunction.getCallee());
auto *InitFnPtr =
PointerType::get(InitFn->getType(), InitFn->getAddressSpace());
IRB.SetInsertPoint(EntryBB);
Value *InitNotNull =
IRB.CreateICmpNE(InitFn, ConstantPointerNull::get(InitFnPtr));
IRB.CreateCondBr(InitNotNull, CallInitBB, RetBB);
IRB.SetInsertPoint(CallInitBB);
} else {
IRB.SetInsertPoint(RetBB->getTerminator());
}
IRB.CreateCall(InitFunction, InitArgs);
if (!VersionCheckName.empty()) {
FunctionCallee VersionCheckFunction = M.getOrInsertFunction(
VersionCheckName, FunctionType::get(IRB.getVoidTy(), {}, false),
AttributeList());
IRB.CreateCall(VersionCheckFunction, {});
}
if (Weak)
IRB.CreateBr(RetBB);
return std::make_pair(Ctor, InitFunction);
}
std::pair<Function *, FunctionCallee>
llvm::getOrCreateSanitizerCtorAndInitFunctions(
Module &M, StringRef CtorName, StringRef InitName,
ArrayRef<Type *> InitArgTypes, ArrayRef<Value *> InitArgs,
function_ref<void(Function *, FunctionCallee)> FunctionsCreatedCallback,
StringRef VersionCheckName, bool Weak) {
assert(!CtorName.empty() && "Expected ctor function name");
if (Function *Ctor = M.getFunction(CtorName))
// FIXME: Sink this logic into the module, similar to the handling of
// globals. This will make moving to a concurrent model much easier.
if (Ctor->arg_empty() ||
Ctor->getReturnType() == Type::getVoidTy(M.getContext()))
return {Ctor,
declareSanitizerInitFunction(M, InitName, InitArgTypes, Weak)};
Function *Ctor;
FunctionCallee InitFunction;
std::tie(Ctor, InitFunction) = llvm::createSanitizerCtorAndInitFunctions(
M, CtorName, InitName, InitArgTypes, InitArgs, VersionCheckName, Weak);
FunctionsCreatedCallback(Ctor, InitFunction);
return std::make_pair(Ctor, InitFunction);
}
void llvm::filterDeadComdatFunctions(
SmallVectorImpl<Function *> &DeadComdatFunctions) {
SmallPtrSet<Function *, 32> MaybeDeadFunctions;
SmallPtrSet<Comdat *, 32> MaybeDeadComdats;
for (Function *F : DeadComdatFunctions) {
MaybeDeadFunctions.insert(F);
if (Comdat *C = F->getComdat())
MaybeDeadComdats.insert(C);
}
// Find comdats for which all users are dead now.
SmallPtrSet<Comdat *, 32> DeadComdats;
for (Comdat *C : MaybeDeadComdats) {
auto IsUserDead = [&](GlobalObject *GO) {
auto *F = dyn_cast<Function>(GO);
return F && MaybeDeadFunctions.contains(F);
};
if (all_of(C->getUsers(), IsUserDead))
DeadComdats.insert(C);
}
// Only keep functions which have no comdat or a dead comdat.
erase_if(DeadComdatFunctions, [&](Function *F) {
Comdat *C = F->getComdat();
return C && !DeadComdats.contains(C);
});
}
std::string llvm::getUniqueModuleId(Module *M) {
MD5 Md5;
bool ExportsSymbols = false;
auto AddGlobal = [&](GlobalValue &GV) {
if (GV.isDeclaration() || GV.getName().startswith("llvm.") ||
!GV.hasExternalLinkage() || GV.hasComdat())
return;
ExportsSymbols = true;
Md5.update(GV.getName());
Md5.update(ArrayRef<uint8_t>{0});
};
for (auto &F : *M)
AddGlobal(F);
for (auto &GV : M->globals())
AddGlobal(GV);
for (auto &GA : M->aliases())
AddGlobal(GA);
for (auto &IF : M->ifuncs())
AddGlobal(IF);
if (!ExportsSymbols)
return "";
MD5::MD5Result R;
Md5.final(R);
SmallString<32> Str;
MD5::stringifyResult(R, Str);
return ("." + Str).str();
}
void VFABI::setVectorVariantNames(CallInst *CI,
ArrayRef<std::string> VariantMappings) {
if (VariantMappings.empty())
return;
SmallString<256> Buffer;
llvm::raw_svector_ostream Out(Buffer);
for (const std::string &VariantMapping : VariantMappings)
Out << VariantMapping << ",";
// Get rid of the trailing ','.
assert(!Buffer.str().empty() && "Must have at least one char.");
Buffer.pop_back();
Module *M = CI->getModule();
#ifndef NDEBUG
for (const std::string &VariantMapping : VariantMappings) {
LLVM_DEBUG(dbgs() << "VFABI: adding mapping '" << VariantMapping << "'\n");
std::optional<VFInfo> VI = VFABI::tryDemangleForVFABI(VariantMapping, *M);
assert(VI && "Cannot add an invalid VFABI name.");
assert(M->getNamedValue(VI->VectorName) &&
"Cannot add variant to attribute: "
"vector function declaration is missing.");
}
#endif
CI->addFnAttr(
Attribute::get(M->getContext(), MappingsAttrName, Buffer.str()));
}
void llvm::embedBufferInModule(Module &M, MemoryBufferRef Buf,
StringRef SectionName, Align Alignment) {
// Embed the memory buffer into the module.
Constant *ModuleConstant = ConstantDataArray::get(
M.getContext(), ArrayRef(Buf.getBufferStart(), Buf.getBufferSize()));
GlobalVariable *GV = new GlobalVariable(
M, ModuleConstant->getType(), true, GlobalValue::PrivateLinkage,
ModuleConstant, "llvm.embedded.object");
GV->setSection(SectionName);
GV->setAlignment(Alignment);
LLVMContext &Ctx = M.getContext();
NamedMDNode *MD = M.getOrInsertNamedMetadata("llvm.embedded.objects");
Metadata *MDVals[] = {ConstantAsMetadata::get(GV),
MDString::get(Ctx, SectionName)};
MD->addOperand(llvm::MDNode::get(Ctx, MDVals));
GV->setMetadata(LLVMContext::MD_exclude, llvm::MDNode::get(Ctx, {}));
appendToCompilerUsed(M, GV);
}
bool llvm::lowerGlobalIFuncUsersAsGlobalCtor(
Module &M, ArrayRef<GlobalIFunc *> FilteredIFuncsToLower) {
SmallVector<GlobalIFunc *, 32> AllIFuncs;
ArrayRef<GlobalIFunc *> IFuncsToLower = FilteredIFuncsToLower;
if (FilteredIFuncsToLower.empty()) { // Default to lowering all ifuncs
for (GlobalIFunc &GI : M.ifuncs())
AllIFuncs.push_back(&GI);
IFuncsToLower = AllIFuncs;
}
bool UnhandledUsers = false;
LLVMContext &Ctx = M.getContext();
const DataLayout &DL = M.getDataLayout();
PointerType *TableEntryTy =
Ctx.supportsTypedPointers()
? PointerType::get(Type::getInt8Ty(Ctx), DL.getProgramAddressSpace())
: PointerType::get(Ctx, DL.getProgramAddressSpace());
ArrayType *FuncPtrTableTy =
ArrayType::get(TableEntryTy, IFuncsToLower.size());
Align PtrAlign = DL.getABITypeAlign(TableEntryTy);
// Create a global table of function pointers we'll initialize in a global
// constructor.
auto *FuncPtrTable = new GlobalVariable(
M, FuncPtrTableTy, false, GlobalValue::InternalLinkage,
PoisonValue::get(FuncPtrTableTy), "", nullptr,
GlobalVariable::NotThreadLocal, DL.getDefaultGlobalsAddressSpace());
FuncPtrTable->setAlignment(PtrAlign);
// Create a function to initialize the function pointer table.
Function *NewCtor = Function::Create(
FunctionType::get(Type::getVoidTy(Ctx), false), Function::InternalLinkage,
DL.getProgramAddressSpace(), "", &M);
BasicBlock *BB = BasicBlock::Create(Ctx, "", NewCtor);
IRBuilder<> InitBuilder(BB);
size_t TableIndex = 0;
for (GlobalIFunc *GI : IFuncsToLower) {
Function *ResolvedFunction = GI->getResolverFunction();
// We don't know what to pass to a resolver function taking arguments
//
// FIXME: Is this even valid? clang and gcc don't complain but this
// probably should be invalid IR. We could just pass through undef.
if (!std::empty(ResolvedFunction->getFunctionType()->params())) {
LLVM_DEBUG(dbgs() << "Not lowering ifunc resolver function "
<< ResolvedFunction->getName() << " with parameters\n");
UnhandledUsers = true;
continue;
}
// Initialize the function pointer table.
CallInst *ResolvedFunc = InitBuilder.CreateCall(ResolvedFunction);
Value *Casted = InitBuilder.CreatePointerCast(ResolvedFunc, TableEntryTy);
Constant *GEP = cast<Constant>(InitBuilder.CreateConstInBoundsGEP2_32(
FuncPtrTableTy, FuncPtrTable, 0, TableIndex++));
InitBuilder.CreateAlignedStore(Casted, GEP, PtrAlign);
// Update all users to load a pointer from the global table.
for (User *User : make_early_inc_range(GI->users())) {
Instruction *UserInst = dyn_cast<Instruction>(User);
if (!UserInst) {
// TODO: Should handle constantexpr casts in user instructions. Probably
// can't do much about constant initializers.
UnhandledUsers = true;
continue;
}
IRBuilder<> UseBuilder(UserInst);
LoadInst *ResolvedTarget =
UseBuilder.CreateAlignedLoad(TableEntryTy, GEP, PtrAlign);
Value *ResolvedCast =
UseBuilder.CreatePointerCast(ResolvedTarget, GI->getType());
UserInst->replaceUsesOfWith(GI, ResolvedCast);
}
// If we handled all users, erase the ifunc.
if (GI->use_empty())
GI->eraseFromParent();
}
InitBuilder.CreateRetVoid();
PointerType *ConstantDataTy = Ctx.supportsTypedPointers()
? PointerType::get(Type::getInt8Ty(Ctx), 0)
: PointerType::get(Ctx, 0);
// TODO: Is this the right priority? Probably should be before any other
// constructors?
const int Priority = 10;
appendToGlobalCtors(M, NewCtor, Priority,
ConstantPointerNull::get(ConstantDataTy));
return UnhandledUsers;
}