diff --git a/llvm/tools/llubi/lib/CMakeLists.txt b/llvm/tools/llubi/lib/CMakeLists.txt index d3b54d0bd45b..b3c7b60cac50 100644 --- a/llvm/tools/llubi/lib/CMakeLists.txt +++ b/llvm/tools/llubi/lib/CMakeLists.txt @@ -7,6 +7,7 @@ set(LLVM_LINK_COMPONENTS add_llvm_library(LLVMUBAwareInterpreter STATIC Context.cpp + ExecutorBase.cpp Interpreter.cpp Value.cpp ) diff --git a/llvm/tools/llubi/lib/ExecutorBase.cpp b/llvm/tools/llubi/lib/ExecutorBase.cpp new file mode 100644 index 000000000000..ec66e831908c --- /dev/null +++ b/llvm/tools/llubi/lib/ExecutorBase.cpp @@ -0,0 +1,127 @@ +//===- ExecutorBase.cpp - Non-visitor methods of InstExecutor -------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements non-visitor methods of InstExecutor for code reuse. +// +//===----------------------------------------------------------------------===// + +#include "ExecutorBase.h" + +namespace llvm::ubi { +Frame::Frame(Function &F, CallBase *CallSite, Frame *LastFrame, + ArrayRef Args, AnyValue &RetVal, + const TargetLibraryInfoImpl &TLIImpl) + : Func(F), LastFrame(LastFrame), CallSite(CallSite), Args(Args), + RetVal(RetVal), TLI(TLIImpl, &F) { + assert((Args.size() == F.arg_size() || + (F.isVarArg() && Args.size() >= F.arg_size())) && + "Expected enough arguments to call the function."); + BB = &Func.getEntryBlock(); + PC = BB->begin(); + for (Argument &Arg : F.args()) + ValueMap[&Arg] = Args[Arg.getArgNo()]; +} + +void ExecutorBase::reportImmediateUB(StringRef Msg) { + // Check if we have already reported an immediate UB. + if (!Status) + return; + Status = false; + // TODO: Provide stack trace information. + Handler.onImmediateUB(Msg); +} + +void ExecutorBase::reportError(StringRef Msg) { + // Check if we have already reported an error message. + if (!Status) + return; + Status = false; + Handler.onError(Msg); +} + +std::optional ExecutorBase::verifyMemAccess(const MemoryObject &MO, + const APInt &Address, + uint64_t AccessSize, + Align Alignment, + bool IsStore) { + // Loading from a stack object outside its lifetime is not undefined + // behavior and returns a poison value instead. Storing to it is still + // undefined behavior. + if (IsStore ? MO.getState() != MemoryObjectState::Alive + : MO.getState() == MemoryObjectState::Freed) { + reportImmediateUB("Try to access a dead memory object."); + return std::nullopt; + } + + if (Address.countr_zero() < Log2(Alignment)) { + reportImmediateUB("Misaligned memory access."); + return std::nullopt; + } + + if (AccessSize > MO.getSize() || Address.ult(MO.getAddress())) { + reportImmediateUB("Memory access is out of bounds."); + return std::nullopt; + } + + APInt Offset = Address - MO.getAddress(); + + if (Offset.ugt(MO.getSize() - AccessSize)) { + reportImmediateUB("Memory access is out of bounds."); + return std::nullopt; + } + + return Offset.getZExtValue(); +} + +AnyValue ExecutorBase::load(const AnyValue &Ptr, Align Alignment, Type *ValTy) { + if (Ptr.isPoison()) { + reportImmediateUB("Invalid memory access with a poison pointer."); + return AnyValue::getPoisonValue(Ctx, ValTy); + } + auto &PtrVal = Ptr.asPointer(); + auto *MO = PtrVal.getMemoryObject(); + if (!MO) { + reportImmediateUB( + "Invalid memory access via a pointer with nullary provenance."); + return AnyValue::getPoisonValue(Ctx, ValTy); + } + // TODO: pointer capability check + if (auto Offset = + verifyMemAccess(*MO, PtrVal.address(), + Ctx.getEffectiveTypeStoreSize(ValTy), Alignment, + /*IsStore=*/false)) { + // Load from a dead stack object yields poison value. + if (MO->getState() == MemoryObjectState::Dead) + return AnyValue::getPoisonValue(Ctx, ValTy); + + return Ctx.load(*MO, *Offset, ValTy); + } + return AnyValue::getPoisonValue(Ctx, ValTy); +} + +void ExecutorBase::store(const AnyValue &Ptr, Align Alignment, + const AnyValue &Val, Type *ValTy) { + if (Ptr.isPoison()) { + reportImmediateUB("Invalid memory access with a poison pointer."); + return; + } + auto &PtrVal = Ptr.asPointer(); + auto *MO = PtrVal.getMemoryObject(); + if (!MO) { + reportImmediateUB( + "Invalid memory access via a pointer with nullary provenance."); + return; + } + // TODO: pointer capability check + if (auto Offset = + verifyMemAccess(*MO, PtrVal.address(), + Ctx.getEffectiveTypeStoreSize(ValTy), Alignment, + /*IsStore=*/true)) + Ctx.store(*MO, *Offset, Val, ValTy); +} +} // namespace llvm::ubi diff --git a/llvm/tools/llubi/lib/ExecutorBase.h b/llvm/tools/llubi/lib/ExecutorBase.h new file mode 100644 index 000000000000..0f80c6a32905 --- /dev/null +++ b/llvm/tools/llubi/lib/ExecutorBase.h @@ -0,0 +1,100 @@ +//===--- ExecutorBase.h - Non-visitor methods of InstExecutor -------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file declares non-visitor methods of InstExecutor for code reuse. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLUBI_EXECUTORBASE_H +#define LLVM_TOOLS_LLUBI_EXECUTORBASE_H + +#include "Context.h" +#include "Value.h" + +namespace llvm::ubi { + +enum class FrameState { + // It is about to enter the function. + // Valid transition: + // -> Running + Entry, + // It is executing instructions inside the function. + // Valid transitions: + // -> Pending (on call) + // -> Exit (on return) + Running, + // It is about to enter a callee or handle return value from the callee. + // Valid transitions: + // -> Running (after returning from callee) + Pending, + // It is about to return the control to the caller. + Exit, +}; + +/// Context for a function call. +/// This struct maintains the state during the execution of a function, +/// including the control flow, values of executed instructions, and stack +/// objects. +struct Frame { + Function &Func; + Frame *LastFrame; + CallBase *CallSite; + ArrayRef Args; + AnyValue &RetVal; + + TargetLibraryInfo TLI; + BasicBlock *BB; + BasicBlock::iterator PC; + FrameState State = FrameState::Entry; + // Stack objects allocated in this frame. They will be automatically freed + // when the function returns. + SmallVector> Allocas; + // Values of arguments and executed instructions in this function. + DenseMap ValueMap; + + // Reserved for in-flight subroutines. + Function *ResolvedCallee = nullptr; + SmallVector CalleeArgs; + AnyValue CalleeRetVal; + + Frame(Function &F, CallBase *CallSite, Frame *LastFrame, + ArrayRef Args, AnyValue &RetVal, + const TargetLibraryInfoImpl &TLIImpl); +}; + +class ExecutorBase { +protected: + Context &Ctx; + EventHandler &Handler; + // Used to indicate whether the interpreter should continue execution. + bool Status; + Frame *CurrentFrame = nullptr; + + ExecutorBase(Context &C, EventHandler &H) + : Ctx(C), Handler(H), Status(true) {} + ~ExecutorBase() = default; + +public: + void reportImmediateUB(StringRef Msg); + void reportError(StringRef Msg); + + /// Check if the upcoming memory access is valid. Returns the offset relative + /// to the underlying object if it is valid. + std::optional verifyMemAccess(const MemoryObject &MO, + const APInt &Address, + uint64_t AccessSize, Align Alignment, + bool IsStore); + + AnyValue load(const AnyValue &Ptr, Align Alignment, Type *ValTy); + void store(const AnyValue &Ptr, Align Alignment, const AnyValue &Val, + Type *ValTy); +}; + +} // namespace llvm::ubi + +#endif // LLVM_TOOLS_LLUBI_EXECUTORBASE_H diff --git a/llvm/tools/llubi/lib/Interpreter.cpp b/llvm/tools/llubi/lib/Interpreter.cpp index 393391d52c89..e5d15be805e0 100644 --- a/llvm/tools/llubi/lib/Interpreter.cpp +++ b/llvm/tools/llubi/lib/Interpreter.cpp @@ -11,6 +11,7 @@ //===----------------------------------------------------------------------===// #include "Context.h" +#include "ExecutorBase.h" #include "Value.h" #include "llvm/IR/GetElementPtrTypeIterator.h" #include "llvm/IR/InlineAsm.h" @@ -23,65 +24,6 @@ namespace llvm::ubi { using namespace PatternMatch; -enum class FrameState { - // It is about to enter the function. - // Valid transition: - // -> Running - Entry, - // It is executing instructions inside the function. - // Valid transitions: - // -> Pending (on call) - // -> Exit (on return) - Running, - // It is about to enter a callee or handle return value from the callee. - // Valid transitions: - // -> Running (after returning from callee) - Pending, - // It is about to return the control to the caller. - Exit, -}; - -/// Context for a function call. -/// This struct maintains the state during the execution of a function, -/// including the control flow, values of executed instructions, and stack -/// objects. -struct Frame { - Function &Func; - Frame *LastFrame; - CallBase *CallSite; - ArrayRef Args; - AnyValue &RetVal; - - TargetLibraryInfo TLI; - BasicBlock *BB; - BasicBlock::iterator PC; - FrameState State = FrameState::Entry; - // Stack objects allocated in this frame. They will be automatically freed - // when the function returns. - SmallVector> Allocas; - // Values of arguments and executed instructions in this function. - DenseMap ValueMap; - - // Reserved for in-flight subroutines. - Function *ResolvedCallee = nullptr; - SmallVector CalleeArgs; - AnyValue CalleeRetVal; - - Frame(Function &F, CallBase *CallSite, Frame *LastFrame, - ArrayRef Args, AnyValue &RetVal, - const TargetLibraryInfoImpl &TLIImpl) - : Func(F), LastFrame(LastFrame), CallSite(CallSite), Args(Args), - RetVal(RetVal), TLI(TLIImpl, &F) { - assert((Args.size() == F.arg_size() || - (F.isVarArg() && Args.size() >= F.arg_size())) && - "Expected enough arguments to call the function."); - BB = &Func.getEntryBlock(); - PC = BB->begin(); - for (Argument &Arg : F.args()) - ValueMap[&Arg] = Args[Arg.getArgNo()]; - } -}; - static AnyValue addNoWrap(const APInt &LHS, const APInt &RHS, bool HasNSW, bool HasNUW) { APInt Res = LHS + RHS; @@ -121,33 +63,12 @@ static AnyValue mulNoWrap(const APInt &LHS, const APInt &RHS, bool HasNSW, /// Instruction executor using the visitor pattern. /// Unlike the Context class that manages the global state, /// InstExecutor only maintains the state for call frames. -class InstExecutor : public InstVisitor { - Context &Ctx; +class InstExecutor : public InstVisitor, + public ExecutorBase { const DataLayout &DL; - EventHandler &Handler; std::list CallStack; - // Used to indicate whether the interpreter should continue execution. - bool Status; - Frame *CurrentFrame = nullptr; AnyValue None; - void reportImmediateUB(StringRef Msg) { - // Check if we have already reported an immediate UB. - if (!Status) - return; - Status = false; - // TODO: Provide stack trace information. - Handler.onImmediateUB(Msg); - } - - void reportError(StringRef Msg) { - // Check if we have already reported an error message. - if (!Status) - return; - Status = false; - Handler.onError(Msg); - } - const AnyValue &getValue(Value *V) { if (auto *C = dyn_cast(V)) return Ctx.getConstantValue(C); @@ -251,88 +172,6 @@ class InstExecutor : public InstVisitor { return false; } - /// Check if the upcoming memory access is valid. Returns the offset relative - /// to the underlying object if it is valid. - std::optional verifyMemAccess(const MemoryObject &MO, - const APInt &Address, - uint64_t AccessSize, Align Alignment, - bool IsStore) { - // Loading from a stack object outside its lifetime is not undefined - // behavior and returns a poison value instead. Storing to it is still - // undefined behavior. - if (IsStore ? MO.getState() != MemoryObjectState::Alive - : MO.getState() == MemoryObjectState::Freed) { - reportImmediateUB("Try to access a dead memory object."); - return std::nullopt; - } - - if (Address.countr_zero() < Log2(Alignment)) { - reportImmediateUB("Misaligned memory access."); - return std::nullopt; - } - - if (AccessSize > MO.getSize() || Address.ult(MO.getAddress())) { - reportImmediateUB("Memory access is out of bounds."); - return std::nullopt; - } - - APInt Offset = Address - MO.getAddress(); - - if (Offset.ugt(MO.getSize() - AccessSize)) { - reportImmediateUB("Memory access is out of bounds."); - return std::nullopt; - } - - return Offset.getZExtValue(); - } - - AnyValue load(const AnyValue &Ptr, Align Alignment, Type *ValTy) { - if (Ptr.isPoison()) { - reportImmediateUB("Invalid memory access with a poison pointer."); - return AnyValue::getPoisonValue(Ctx, ValTy); - } - auto &PtrVal = Ptr.asPointer(); - auto *MO = PtrVal.getMemoryObject(); - if (!MO) { - reportImmediateUB( - "Invalid memory access via a pointer with nullary provenance."); - return AnyValue::getPoisonValue(Ctx, ValTy); - } - // TODO: pointer capability check - if (auto Offset = - verifyMemAccess(*MO, PtrVal.address(), - Ctx.getEffectiveTypeStoreSize(ValTy), Alignment, - /*IsStore=*/false)) { - // Load from a dead stack object yields poison value. - if (MO->getState() == MemoryObjectState::Dead) - return AnyValue::getPoisonValue(Ctx, ValTy); - - return Ctx.load(*MO, *Offset, ValTy); - } - return AnyValue::getPoisonValue(Ctx, ValTy); - } - - void store(const AnyValue &Ptr, Align Alignment, const AnyValue &Val, - Type *ValTy) { - if (Ptr.isPoison()) { - reportImmediateUB("Invalid memory access with a poison pointer."); - return; - } - auto &PtrVal = Ptr.asPointer(); - auto *MO = PtrVal.getMemoryObject(); - if (!MO) { - reportImmediateUB( - "Invalid memory access via a pointer with nullary provenance."); - return; - } - // TODO: pointer capability check - if (auto Offset = - verifyMemAccess(*MO, PtrVal.address(), - Ctx.getEffectiveTypeStoreSize(ValTy), Alignment, - /*IsStore=*/true)) - Ctx.store(*MO, *Offset, Val, ValTy); - } - AnyValue computePtrAdd(const Pointer &Ptr, const APInt &Offset, GEPNoWrapFlags Flags, AnyValue &AccumulatedOffset) { if (Offset.isZero()) @@ -418,7 +257,7 @@ class InstExecutor : public InstVisitor { public: InstExecutor(Context &C, EventHandler &H, Function &F, ArrayRef Args, AnyValue &RetVal) - : Ctx(C), DL(Ctx.getDataLayout()), Handler(H), Status(true) { + : ExecutorBase(C, H), DL(Ctx.getDataLayout()) { CallStack.emplace_back(F, /*CallSite=*/nullptr, /*LastFrame=*/nullptr, Args, RetVal, Ctx.getTLIImpl()); }