Manuel Carrasco 7377410dda
[FuzzMutate] Properly handle intrinsics and avoid illegal code genertion (#145495)
This PR addresses issues related to the `amdgcn_cs_chain` intrinsic:

1. Ensures the intrinsic's special attribute and calling convention
requirements are not violated by the mutator.
2. Enforces the necessary unreachable statement following this type of
intrinsic, preventing the fuzzer from generating invalid code.
2025-06-24 09:52:55 -07:00

793 lines
27 KiB
C++

//===-- IRMutator.cpp -----------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "llvm/FuzzMutate/IRMutator.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Bitcode/BitcodeReader.h"
#include "llvm/Bitcode/BitcodeWriter.h"
#include "llvm/FuzzMutate/Operations.h"
#include "llvm/FuzzMutate/Random.h"
#include "llvm/FuzzMutate/RandomIRBuilder.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/FMF.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Operator.h"
#include "llvm/IR/PassInstrumentation.h"
#include "llvm/IR/Verifier.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/Transforms/Scalar/DCE.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include <map>
#include <optional>
using namespace llvm;
void IRMutationStrategy::mutate(Module &M, RandomIRBuilder &IB) {
auto RS = makeSampler<Function *>(IB.Rand);
for (Function &F : M)
if (!F.isDeclaration())
RS.sample(&F, /*Weight=*/1);
while (RS.totalWeight() < IB.MinFunctionNum) {
Function *F = IB.createFunctionDefinition(M);
RS.sample(F, /*Weight=*/1);
}
mutate(*RS.getSelection(), IB);
}
void IRMutationStrategy::mutate(Function &F, RandomIRBuilder &IB) {
auto Range = make_filter_range(make_pointer_range(F),
[](BasicBlock *BB) { return !BB->isEHPad(); });
mutate(*makeSampler(IB.Rand, Range).getSelection(), IB);
}
void IRMutationStrategy::mutate(BasicBlock &BB, RandomIRBuilder &IB) {
mutate(*makeSampler(IB.Rand, make_pointer_range(BB)).getSelection(), IB);
}
size_t llvm::IRMutator::getModuleSize(const Module &M) {
return M.getInstructionCount() + M.size() + M.global_size() + M.alias_size();
}
void IRMutator::mutateModule(Module &M, int Seed, size_t MaxSize) {
std::vector<Type *> Types;
for (const auto &Getter : AllowedTypes)
Types.push_back(Getter(M.getContext()));
RandomIRBuilder IB(Seed, Types);
size_t CurSize = IRMutator::getModuleSize(M);
auto RS = makeSampler<IRMutationStrategy *>(IB.Rand);
for (const auto &Strategy : Strategies)
RS.sample(Strategy.get(),
Strategy->getWeight(CurSize, MaxSize, RS.totalWeight()));
if (RS.totalWeight() == 0)
return;
auto Strategy = RS.getSelection();
Strategy->mutate(M, IB);
}
static void eliminateDeadCode(Function &F) {
FunctionPassManager FPM;
FPM.addPass(DCEPass());
FunctionAnalysisManager FAM;
FAM.registerPass([&] { return TargetLibraryAnalysis(); });
FAM.registerPass([&] { return PassInstrumentationAnalysis(); });
FPM.run(F, FAM);
}
void InjectorIRStrategy::mutate(Function &F, RandomIRBuilder &IB) {
IRMutationStrategy::mutate(F, IB);
eliminateDeadCode(F);
}
std::vector<fuzzerop::OpDescriptor> InjectorIRStrategy::getDefaultOps() {
std::vector<fuzzerop::OpDescriptor> Ops;
describeFuzzerIntOps(Ops);
describeFuzzerFloatOps(Ops);
describeFuzzerControlFlowOps(Ops);
describeFuzzerPointerOps(Ops);
describeFuzzerAggregateOps(Ops);
describeFuzzerVectorOps(Ops);
return Ops;
}
std::optional<fuzzerop::OpDescriptor>
InjectorIRStrategy::chooseOperation(Value *Src, RandomIRBuilder &IB) {
auto OpMatchesPred = [&Src](fuzzerop::OpDescriptor &Op) {
return Op.SourcePreds[0].matches({}, Src);
};
auto RS = makeSampler(IB.Rand, make_filter_range(Operations, OpMatchesPred));
if (RS.isEmpty())
return std::nullopt;
return *RS;
}
static inline Instruction *getEffectiveTerminator(BasicBlock &BB) {
if (Instruction *I = BB.getTerminatingMustTailCall()) {
return I;
} else {
// Certain intrinsics, such as @llvm.amdgcn.cs.chain, must be immediately
// followed by an unreachable instruction..
if (UnreachableInst *UI = dyn_cast<UnreachableInst>(BB.getTerminator())) {
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(UI->getPrevNode())) {
return II;
}
}
}
return BB.getTerminator();
}
static inline BasicBlock::iterator getEndIterator(BasicBlock &BB) {
auto End = BB.end();
if (BB.empty()) {
return End;
}
Instruction *EffectiveTerminator = getEffectiveTerminator(BB);
if (EffectiveTerminator != BB.getTerminator()) {
// Adjust range for special cases such as tail call.
End = std::prev(BB.end());
}
return End;
}
static inline iterator_range<BasicBlock::iterator>
getInsertionRange(BasicBlock &BB) {
auto End = getEndIterator(BB);
return make_range(BB.getFirstInsertionPt(), End);
}
void InjectorIRStrategy::mutate(BasicBlock &BB, RandomIRBuilder &IB) {
SmallVector<Instruction *, 32> Insts(
llvm::make_pointer_range(getInsertionRange(BB)));
if (Insts.size() < 1)
return;
// Choose an insertion point for our new instruction.
size_t IP = uniform<size_t>(IB.Rand, 0, Insts.size() - 1);
auto InstsBefore = ArrayRef(Insts).slice(0, IP);
auto InstsAfter = ArrayRef(Insts).slice(IP);
// Choose a source, which will be used to constrain the operation selection.
SmallVector<Value *, 2> Srcs;
Srcs.push_back(IB.findOrCreateSource(BB, InstsBefore));
// Choose an operation that's constrained to be valid for the type of the
// source, collect any other sources it needs, and then build it.
auto OpDesc = chooseOperation(Srcs[0], IB);
// Bail if no operation was found
if (!OpDesc)
return;
for (const auto &Pred : ArrayRef(OpDesc->SourcePreds).slice(1))
Srcs.push_back(IB.findOrCreateSource(BB, InstsBefore, Srcs, Pred));
if (Value *Op = OpDesc->BuilderFunc(Srcs, Insts[IP]->getIterator())) {
// Find a sink and wire up the results of the operation.
IB.connectToSink(BB, InstsAfter, Op);
}
}
uint64_t InstDeleterIRStrategy::getWeight(size_t CurrentSize, size_t MaxSize,
uint64_t CurrentWeight) {
// If we have less than 200 bytes, panic and try to always delete.
if (CurrentSize > MaxSize - 200)
return CurrentWeight ? CurrentWeight * 100 : 1;
// Draw a line starting from when we only have 1k left and increasing linearly
// to double the current weight.
int64_t Line = (-2 * static_cast<int64_t>(CurrentWeight)) *
(static_cast<int64_t>(MaxSize) -
static_cast<int64_t>(CurrentSize) - 1000) /
1000;
// Clamp negative weights to zero.
if (Line < 0)
return 0;
return Line;
}
void InstDeleterIRStrategy::mutate(Function &F, RandomIRBuilder &IB) {
auto RS = makeSampler<Instruction *>(IB.Rand);
for (Instruction &Inst : instructions(F)) {
// TODO: We can't handle these instructions.
if (Inst.isTerminator() || Inst.isEHPad() || Inst.isSwiftError() ||
isa<PHINode>(Inst))
continue;
RS.sample(&Inst, /*Weight=*/1);
}
if (RS.isEmpty())
return;
// Delete the instruction.
mutate(*RS.getSelection(), IB);
// Clean up any dead code that's left over after removing the instruction.
eliminateDeadCode(F);
}
void InstDeleterIRStrategy::mutate(Instruction &Inst, RandomIRBuilder &IB) {
assert(!Inst.isTerminator() && "Deleting terminators invalidates CFG");
if (Inst.getType()->isVoidTy()) {
// Instructions with void type (ie, store) have no uses to worry about. Just
// erase it and move on.
Inst.eraseFromParent();
return;
}
// Otherwise we need to find some other value with the right type to keep the
// users happy.
auto Pred = fuzzerop::onlyType(Inst.getType());
auto RS = makeSampler<Value *>(IB.Rand);
SmallVector<Instruction *, 32> InstsBefore;
BasicBlock *BB = Inst.getParent();
for (auto I = BB->getFirstInsertionPt(), E = Inst.getIterator(); I != E;
++I) {
if (Pred.matches({}, &*I))
RS.sample(&*I, /*Weight=*/1);
InstsBefore.push_back(&*I);
}
if (!RS)
RS.sample(IB.newSource(*BB, InstsBefore, {}, Pred), /*Weight=*/1);
Inst.replaceAllUsesWith(RS.getSelection());
Inst.eraseFromParent();
}
void InstModificationIRStrategy::mutate(Instruction &Inst,
RandomIRBuilder &IB) {
SmallVector<std::function<void()>, 8> Modifications;
CmpInst *CI = nullptr;
GetElementPtrInst *GEP = nullptr;
switch (Inst.getOpcode()) {
default:
break;
// Add nsw, nuw flag
case Instruction::Add:
case Instruction::Mul:
case Instruction::Sub:
case Instruction::Shl:
Modifications.push_back(
[&Inst]() { Inst.setHasNoSignedWrap(!Inst.hasNoSignedWrap()); });
Modifications.push_back(
[&Inst]() { Inst.setHasNoUnsignedWrap(!Inst.hasNoUnsignedWrap()); });
break;
case Instruction::ICmp:
CI = cast<ICmpInst>(&Inst);
for (unsigned p = CmpInst::FIRST_ICMP_PREDICATE;
p <= CmpInst::LAST_ICMP_PREDICATE; p++) {
Modifications.push_back(
[CI, p]() { CI->setPredicate(static_cast<CmpInst::Predicate>(p)); });
}
break;
// Add inbound flag.
case Instruction::GetElementPtr:
GEP = cast<GetElementPtrInst>(&Inst);
Modifications.push_back(
[GEP]() { GEP->setIsInBounds(!GEP->isInBounds()); });
break;
// Add exact flag.
case Instruction::UDiv:
case Instruction::SDiv:
case Instruction::LShr:
case Instruction::AShr:
Modifications.push_back([&Inst] { Inst.setIsExact(!Inst.isExact()); });
break;
case Instruction::FCmp:
CI = cast<FCmpInst>(&Inst);
for (unsigned p = CmpInst::FIRST_FCMP_PREDICATE;
p <= CmpInst::LAST_FCMP_PREDICATE; p++) {
Modifications.push_back(
[CI, p]() { CI->setPredicate(static_cast<CmpInst::Predicate>(p)); });
}
break;
}
// Add fast math flag if possible.
if (isa<FPMathOperator>(&Inst)) {
// Try setting everything unless they are already on.
Modifications.push_back(
[&Inst] { Inst.setFast(!Inst.getFastMathFlags().all()); });
// Try unsetting everything unless they are already off.
Modifications.push_back(
[&Inst] { Inst.setFast(!Inst.getFastMathFlags().none()); });
// Individual setting by flipping the bit
Modifications.push_back(
[&Inst] { Inst.setHasAllowReassoc(!Inst.hasAllowReassoc()); });
Modifications.push_back([&Inst] { Inst.setHasNoNaNs(!Inst.hasNoNaNs()); });
Modifications.push_back([&Inst] { Inst.setHasNoInfs(!Inst.hasNoInfs()); });
Modifications.push_back(
[&Inst] { Inst.setHasNoSignedZeros(!Inst.hasNoSignedZeros()); });
Modifications.push_back(
[&Inst] { Inst.setHasAllowReciprocal(!Inst.hasAllowReciprocal()); });
Modifications.push_back(
[&Inst] { Inst.setHasAllowContract(!Inst.hasAllowContract()); });
Modifications.push_back(
[&Inst] { Inst.setHasApproxFunc(!Inst.hasApproxFunc()); });
}
// Randomly switch operands of instructions
std::pair<int, int> NoneItem({-1, -1}), ShuffleItems(NoneItem);
switch (Inst.getOpcode()) {
case Instruction::SDiv:
case Instruction::UDiv:
case Instruction::SRem:
case Instruction::URem:
case Instruction::FDiv:
case Instruction::FRem: {
// Verify that the after shuffle the second operand is not
// constant 0.
Value *Operand = Inst.getOperand(0);
if (Constant *C = dyn_cast<Constant>(Operand)) {
if (!C->isZeroValue()) {
ShuffleItems = {0, 1};
}
}
break;
}
case Instruction::Select:
ShuffleItems = {1, 2};
break;
case Instruction::Add:
case Instruction::Sub:
case Instruction::Mul:
case Instruction::Shl:
case Instruction::LShr:
case Instruction::AShr:
case Instruction::And:
case Instruction::Or:
case Instruction::Xor:
case Instruction::FAdd:
case Instruction::FSub:
case Instruction::FMul:
case Instruction::ICmp:
case Instruction::FCmp:
case Instruction::ShuffleVector:
ShuffleItems = {0, 1};
break;
}
if (ShuffleItems != NoneItem) {
Modifications.push_back([&Inst, &ShuffleItems]() {
Value *Op0 = Inst.getOperand(ShuffleItems.first);
Inst.setOperand(ShuffleItems.first, Inst.getOperand(ShuffleItems.second));
Inst.setOperand(ShuffleItems.second, Op0);
});
}
auto RS = makeSampler(IB.Rand, Modifications);
if (RS)
RS.getSelection()();
}
/// Return a case value that is not already taken to make sure we don't have two
/// cases with same value.
static uint64_t getUniqueCaseValue(SmallSet<uint64_t, 4> &CasesTaken,
uint64_t MaxValue, RandomIRBuilder &IB) {
uint64_t tmp;
do {
tmp = uniform<uint64_t>(IB.Rand, 0, MaxValue);
} while (CasesTaken.count(tmp) != 0);
CasesTaken.insert(tmp);
return tmp;
}
/// Determines whether a function is unsupported by the current mutator's
/// implementation. The function returns true if any of the following criteria
/// are met:
/// * The function accepts metadata or token types as arguments.
/// * The function has ABI attributes that could cause UB.
/// * The function uses a non-callable CC that may result in UB.
static bool isUnsupportedFunction(Function *F) {
// Some functions accept metadata type or token type as arguments.
// We don't call those functions for now.
// For example, `@llvm.dbg.declare(metadata, metadata, metadata)`
// https://llvm.org/docs/SourceLevelDebugging.html#llvm-dbg-declare
auto IsUnsupportedTy = [](Type *T) {
return T->isMetadataTy() || T->isTokenTy();
};
if (IsUnsupportedTy(F->getReturnType()) ||
any_of(F->getFunctionType()->params(), IsUnsupportedTy)) {
return true;
}
// ABI attributes must be specified both at the function
// declaration/definition and call-site, otherwise the
// behavior may be undefined.
// We don't call those functions for now to prevent UB from happening.
auto IsABIAttribute = [](AttributeSet A) {
static const Attribute::AttrKind ABIAttrs[] = {
Attribute::StructRet, Attribute::ByVal,
Attribute::InAlloca, Attribute::InReg,
Attribute::StackAlignment, Attribute::SwiftSelf,
Attribute::SwiftAsync, Attribute::SwiftError,
Attribute::Preallocated, Attribute::ByRef,
Attribute::ZExt, Attribute::SExt};
return llvm::any_of(ABIAttrs, [&](Attribute::AttrKind kind) {
return A.hasAttribute(kind);
});
};
auto FuncAttrs = F->getAttributes();
if (IsABIAttribute(FuncAttrs.getRetAttrs())) {
return true;
}
for (size_t i = 0; i < F->arg_size(); i++) {
if (IsABIAttribute(FuncAttrs.getParamAttrs(i))) {
return true;
}
}
// If it is not satisfied, the IR will be invalid.
if (!isCallableCC(F->getCallingConv())) {
return true;
}
// This intrinsic has specific requirements for its parameters and the caller
// must adhere to certain calling conventions.
if (F->isIntrinsic() && F->getIntrinsicID() == Intrinsic::amdgcn_cs_chain) {
return true;
}
return false;
}
void InsertFunctionStrategy::mutate(BasicBlock &BB, RandomIRBuilder &IB) {
Module *M = BB.getParent()->getParent();
// If nullptr is selected, we will create a new function declaration.
SmallVector<Function *, 32> Functions({nullptr});
for (Function &F : M->functions()) {
Functions.push_back(&F);
}
auto RS = makeSampler(IB.Rand, Functions);
Function *F = RS.getSelection();
if (!F || isUnsupportedFunction(F)) {
F = IB.createFunctionDeclaration(*M);
}
FunctionType *FTy = F->getFunctionType();
SmallVector<fuzzerop::SourcePred, 2> SourcePreds;
if (!F->arg_empty()) {
for (Type *ArgTy : FTy->params()) {
SourcePreds.push_back(fuzzerop::onlyType(ArgTy));
}
}
bool isRetVoid = (F->getReturnType() == Type::getVoidTy(M->getContext()));
auto BuilderFunc = [FTy, F, isRetVoid](ArrayRef<Value *> Srcs,
BasicBlock::iterator InsertPt) {
StringRef Name = isRetVoid ? nullptr : "C";
CallInst *Call = CallInst::Create(FTy, F, Srcs, Name, InsertPt);
Call->setCallingConv(F->getCallingConv());
// Don't return this call inst if it return void as it can't be sinked.
return isRetVoid ? nullptr : Call;
};
SmallVector<Instruction *, 32> Insts(
llvm::make_pointer_range(getInsertionRange(BB)));
if (Insts.size() < 1)
return;
// Choose an insertion point for our new call instruction.
uint64_t IP = uniform<uint64_t>(IB.Rand, 0, Insts.size() - 1);
auto InstsBefore = ArrayRef(Insts).slice(0, IP);
auto InstsAfter = ArrayRef(Insts).slice(IP);
// Choose a source, which will be used to constrain the operation selection.
SmallVector<Value *, 2> Srcs;
for (const auto &Pred : ArrayRef(SourcePreds)) {
Srcs.push_back(IB.findOrCreateSource(BB, InstsBefore, Srcs, Pred));
}
if (Value *Op = BuilderFunc(Srcs, Insts[IP]->getIterator())) {
// Find a sink and wire up the results of the operation.
IB.connectToSink(BB, InstsAfter, Op);
}
}
void InsertCFGStrategy::mutate(BasicBlock &BB, RandomIRBuilder &IB) {
SmallVector<Instruction *, 32> Insts(
llvm::make_pointer_range(getInsertionRange(BB)));
if (Insts.size() < 1)
return;
// Choose a point where we split the block.
uint64_t IP = uniform<uint64_t>(IB.Rand, 0, Insts.size() - 1);
auto InstsBeforeSplit = ArrayRef(Insts).slice(0, IP);
// `Sink` inherits Blocks' terminator, `Source` will have a BranchInst
// directly jumps to `Sink`. Here, we have to create a new terminator for
// `Source`.
BasicBlock *Block = Insts[IP]->getParent();
BasicBlock *Source = Block;
BasicBlock *Sink = Block->splitBasicBlock(Insts[IP], "BB");
Function *F = BB.getParent();
LLVMContext &C = F->getParent()->getContext();
// A coin decides if it is branch or switch
if (uniform<uint64_t>(IB.Rand, 0, 1)) {
// Branch
BasicBlock *IfTrue = BasicBlock::Create(C, "T", F);
BasicBlock *IfFalse = BasicBlock::Create(C, "F", F);
Value *Cond =
IB.findOrCreateSource(*Source, InstsBeforeSplit, {},
fuzzerop::onlyType(Type::getInt1Ty(C)), false);
BranchInst *Branch = BranchInst::Create(IfTrue, IfFalse, Cond);
// Remove the old terminator.
ReplaceInstWithInst(Source->getTerminator(), Branch);
// Connect these blocks to `Sink`
connectBlocksToSink({IfTrue, IfFalse}, Sink, IB);
} else {
// Switch
// Determine Integer type, it IS possible we use a boolean to switch.
auto RS =
makeSampler(IB.Rand, make_filter_range(IB.KnownTypes, [](Type *Ty) {
return Ty->isIntegerTy();
}));
assert(RS && "There is no integer type in all allowed types, is the "
"setting correct?");
Type *Ty = RS.getSelection();
IntegerType *IntTy = cast<IntegerType>(Ty);
uint64_t BitSize = IntTy->getBitWidth();
uint64_t MaxCaseVal =
(BitSize >= 64) ? (uint64_t)-1 : ((uint64_t)1 << BitSize) - 1;
// Create Switch inst in Block
Value *Cond = IB.findOrCreateSource(*Source, InstsBeforeSplit, {},
fuzzerop::onlyType(IntTy), false);
BasicBlock *DefaultBlock = BasicBlock::Create(C, "SW_D", F);
uint64_t NumCases = uniform<uint64_t>(IB.Rand, 1, MaxNumCases);
NumCases = (NumCases > MaxCaseVal) ? MaxCaseVal + 1 : NumCases;
SwitchInst *Switch = SwitchInst::Create(Cond, DefaultBlock, NumCases);
// Remove the old terminator.
ReplaceInstWithInst(Source->getTerminator(), Switch);
// Create blocks, for each block assign a case value.
SmallVector<BasicBlock *, 4> Blocks({DefaultBlock});
SmallSet<uint64_t, 4> CasesTaken;
for (uint64_t i = 0; i < NumCases; i++) {
uint64_t CaseVal = getUniqueCaseValue(CasesTaken, MaxCaseVal, IB);
BasicBlock *CaseBlock = BasicBlock::Create(C, "SW_C", F);
ConstantInt *OnValue = ConstantInt::get(IntTy, CaseVal);
Switch->addCase(OnValue, CaseBlock);
Blocks.push_back(CaseBlock);
}
// Connect these blocks to `Sink`
connectBlocksToSink(Blocks, Sink, IB);
}
}
/// The caller has to guarantee that these blocks are "empty", i.e. it doesn't
/// even have terminator.
void InsertCFGStrategy::connectBlocksToSink(ArrayRef<BasicBlock *> Blocks,
BasicBlock *Sink,
RandomIRBuilder &IB) {
uint64_t DirectSinkIdx = uniform<uint64_t>(IB.Rand, 0, Blocks.size() - 1);
for (uint64_t i = 0; i < Blocks.size(); i++) {
// We have at least one block that directly goes to sink.
CFGToSink ToSink = (i == DirectSinkIdx)
? CFGToSink::DirectSink
: static_cast<CFGToSink>(uniform<uint64_t>(
IB.Rand, 0, CFGToSink::EndOfCFGToLink - 1));
BasicBlock *BB = Blocks[i];
Function *F = BB->getParent();
LLVMContext &C = F->getParent()->getContext();
switch (ToSink) {
case CFGToSink::Return: {
Type *RetTy = F->getReturnType();
Value *RetValue = nullptr;
if (!RetTy->isVoidTy())
RetValue =
IB.findOrCreateSource(*BB, {}, {}, fuzzerop::onlyType(RetTy));
ReturnInst::Create(C, RetValue, BB);
break;
}
case CFGToSink::DirectSink: {
BranchInst::Create(Sink, BB);
break;
}
case CFGToSink::SinkOrSelfLoop: {
SmallVector<BasicBlock *, 2> Branches({Sink, BB});
// A coin decides which block is true branch.
uint64_t coin = uniform<uint64_t>(IB.Rand, 0, 1);
Value *Cond = IB.findOrCreateSource(
*BB, {}, {}, fuzzerop::onlyType(Type::getInt1Ty(C)), false);
BranchInst::Create(Branches[coin], Branches[1 - coin], Cond, BB);
break;
}
case CFGToSink::EndOfCFGToLink:
llvm_unreachable("EndOfCFGToLink executed, something's wrong.");
}
}
}
void InsertPHIStrategy::mutate(BasicBlock &BB, RandomIRBuilder &IB) {
// Can't insert PHI node to entry node.
if (&BB == &BB.getParent()->getEntryBlock())
return;
Type *Ty = IB.randomType();
PHINode *PHI = PHINode::Create(Ty, llvm::pred_size(&BB), "", BB.begin());
// Use a map to make sure the same incoming basic block has the same value.
DenseMap<BasicBlock *, Value *> IncomingValues;
for (BasicBlock *Pred : predecessors(&BB)) {
Value *Src = IncomingValues[Pred];
// If `Pred` is not in the map yet, we'll get a nullptr.
if (!Src) {
SmallVector<Instruction *, 32> Insts;
for (auto I = Pred->begin(); I != Pred->end(); ++I)
Insts.push_back(&*I);
// There is no need to inform IB what previously used values are if we are
// using `onlyType`
Src = IB.findOrCreateSource(*Pred, Insts, {}, fuzzerop::onlyType(Ty));
IncomingValues[Pred] = Src;
}
PHI->addIncoming(Src, Pred);
}
SmallVector<Instruction *, 32> InstsAfter(
llvm::make_pointer_range(getInsertionRange(BB)));
IB.connectToSink(BB, InstsAfter, PHI);
}
void SinkInstructionStrategy::mutate(Function &F, RandomIRBuilder &IB) {
for (BasicBlock &BB : F) {
this->mutate(BB, IB);
}
}
void SinkInstructionStrategy::mutate(BasicBlock &BB, RandomIRBuilder &IB) {
SmallVector<Instruction *, 32> Insts(
llvm::make_pointer_range(getInsertionRange(BB)));
if (Insts.size() < 1)
return;
// Choose an Instruction to mutate.
uint64_t Idx = uniform<uint64_t>(IB.Rand, 0, Insts.size() - 1);
Instruction *Inst = Insts[Idx];
// `Idx + 1` so we don't sink to ourselves.
auto InstsAfter = ArrayRef(Insts).slice(Idx + 1);
Type *Ty = Inst->getType();
// Don't sink terminators, void function calls, token, etc.
if (!Ty->isVoidTy() && !Ty->isTokenTy())
// Find a new sink and wire up the results of the operation.
IB.connectToSink(BB, InstsAfter, Inst);
}
void ShuffleBlockStrategy::mutate(BasicBlock &BB, RandomIRBuilder &IB) {
// A deterministic alternative to SmallPtrSet with the same lookup
// performance.
std::map<size_t, Instruction *> AliveInsts;
std::map<Instruction *, size_t> AliveInstsLookup;
size_t InsertIdx = 0;
for (auto &I : make_early_inc_range(
make_range(BB.getFirstInsertionPt(),
getEffectiveTerminator(BB)->getIterator()))) {
// First gather all instructions that can be shuffled. Don't take
// terminator.
AliveInsts.insert({InsertIdx, &I});
AliveInstsLookup.insert({&I, InsertIdx++});
// Then remove these instructions from the block
I.removeFromParent();
}
// Shuffle these instructions using topological sort.
// Returns false if all current instruction's dependencies in this block have
// been shuffled. If so, this instruction can be shuffled too.
auto hasAliveParent = [&AliveInsts, &AliveInstsLookup](size_t Index) {
for (Value *O : AliveInsts[Index]->operands()) {
Instruction *P = dyn_cast<Instruction>(O);
if (P && AliveInstsLookup.count(P))
return true;
}
return false;
};
// Get all alive instructions that depend on the current instruction.
// Takes Instruction* instead of index because the instruction is already
// shuffled.
auto getAliveChildren = [&AliveInstsLookup](Instruction *I) {
SmallSetVector<size_t, 8> Children;
for (Value *U : I->users()) {
if (Instruction *P = dyn_cast<Instruction>(U)) {
auto It = AliveInstsLookup.find(P);
if (It != AliveInstsLookup.end())
Children.insert(It->second);
}
}
return Children;
};
SmallSet<size_t, 8> RootIndices;
SmallVector<Instruction *, 8> Insts;
for (const auto &[Index, Inst] : AliveInsts) {
if (!hasAliveParent(Index))
RootIndices.insert(Index);
}
// Topological sort by randomly selecting a node without a parent, or root.
while (!RootIndices.empty()) {
auto RS = makeSampler<size_t>(IB.Rand);
for (size_t RootIdx : RootIndices)
RS.sample(RootIdx, 1);
size_t RootIdx = RS.getSelection();
RootIndices.erase(RootIdx);
Instruction *Root = AliveInsts[RootIdx];
AliveInsts.erase(RootIdx);
AliveInstsLookup.erase(Root);
Insts.push_back(Root);
for (size_t Child : getAliveChildren(Root)) {
if (!hasAliveParent(Child)) {
RootIndices.insert(Child);
}
}
}
Instruction *Terminator = getEffectiveTerminator(BB);
// Then put instructions back.
for (Instruction *I : Insts) {
I->insertBefore(Terminator->getIterator());
}
}
std::unique_ptr<Module> llvm::parseModule(const uint8_t *Data, size_t Size,
LLVMContext &Context) {
if (Size <= 1)
// We get bogus data given an empty corpus - just create a new module.
return std::make_unique<Module>("M", Context);
auto Buffer = MemoryBuffer::getMemBuffer(
StringRef(reinterpret_cast<const char *>(Data), Size), "Fuzzer input",
/*RequiresNullTerminator=*/false);
SMDiagnostic Err;
auto M = parseBitcodeFile(Buffer->getMemBufferRef(), Context);
if (Error E = M.takeError()) {
errs() << toString(std::move(E)) << "\n";
return nullptr;
}
return std::move(M.get());
}
size_t llvm::writeModule(const Module &M, uint8_t *Dest, size_t MaxSize) {
std::string Buf;
{
raw_string_ostream OS(Buf);
WriteBitcodeToFile(M, OS);
}
if (Buf.size() > MaxSize)
return 0;
memcpy(Dest, Buf.data(), Buf.size());
return Buf.size();
}
std::unique_ptr<Module> llvm::parseAndVerify(const uint8_t *Data, size_t Size,
LLVMContext &Context) {
auto M = parseModule(Data, Size, Context);
if (!M || verifyModule(*M, &errs()))
return nullptr;
return M;
}