[clang][bytecode] Implement __builtin_constant_p (#130143)

Use the regular code paths for interpreting.

Add new instructions: `StartSpeculation` will reset the diagnostics
pointers to `nullptr`, which will keep us from reporting any diagnostics
during speculation. `EndSpeculation` will undo this.

The rest depends on what `Emitter` we use.

For `EvalEmitter`, we have no bytecode, so we implement `speculate()` by
simply visiting the first argument of `__builtin_constant_p`. If the
evaluation fails, we push a `0` on the stack, otherwise a `1`.

For `ByteCodeEmitter`, add another instrucion called `BCP`, that
interprets all the instructions following it until the next
`EndSpeculation` instruction. If any of those instructions fails, we
jump to the `EndLabel`, which brings us right before the
`EndSpeculation`. We then push the result on the stack.
This commit is contained in:
Timm Baeder 2025-03-08 06:06:14 +01:00 committed by GitHub
parent 7602d781b0
commit d08cf7900d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
14 changed files with 306 additions and 83 deletions

View File

@ -367,6 +367,16 @@ bool ByteCodeEmitter::fallthrough(const LabelTy &Label) {
return true;
}
bool ByteCodeEmitter::speculate(const CallExpr *E, const LabelTy &EndLabel) {
const Expr *Arg = E->getArg(0);
PrimType T = Ctx.classify(Arg->getType()).value_or(PT_Ptr);
if (!this->emitBCP(getOffset(EndLabel), T, E))
return false;
if (!this->visit(Arg))
return false;
return true;
}
//===----------------------------------------------------------------------===//
// Opcode emitters
//===----------------------------------------------------------------------===//

View File

@ -48,12 +48,16 @@ protected:
virtual bool visitFunc(const FunctionDecl *E) = 0;
virtual bool visitExpr(const Expr *E, bool DestroyToplevelScope) = 0;
virtual bool visitDeclAndReturn(const VarDecl *E, bool ConstantContext) = 0;
virtual bool visit(const Expr *E) = 0;
virtual bool emitBool(bool V, const Expr *E) = 0;
/// Emits jumps.
bool jumpTrue(const LabelTy &Label);
bool jumpFalse(const LabelTy &Label);
bool jump(const LabelTy &Label);
bool fallthrough(const LabelTy &Label);
/// Speculative execution.
bool speculate(const CallExpr *E, const LabelTy &EndLabel);
/// We're always emitting bytecode.
bool isActive() const { return true; }

View File

@ -4681,6 +4681,28 @@ bool Compiler<Emitter>::visitAPValueInitializer(const APValue &Val,
template <class Emitter>
bool Compiler<Emitter>::VisitBuiltinCallExpr(const CallExpr *E,
unsigned BuiltinID) {
if (BuiltinID == Builtin::BI__builtin_constant_p) {
// Void argument is always invalid and harder to handle later.
if (E->getArg(0)->getType()->isVoidType()) {
if (DiscardResult)
return true;
return this->emitConst(0, E);
}
if (!this->emitStartSpeculation(E))
return false;
LabelTy EndLabel = this->getLabel();
if (!this->speculate(E, EndLabel))
return false;
this->fallthrough(EndLabel);
if (!this->emitEndSpeculation(E))
return false;
if (DiscardResult)
return this->emitPop(classifyPrim(E), E);
return true;
}
const Function *Func = getFunction(E->getDirectCallee());
if (!Func)
return false;

View File

@ -274,7 +274,7 @@ protected:
/// Evaluates an expression and places the result on the stack. If the
/// expression is of composite type, a local variable will be created
/// and a pointer to said variable will be placed on the stack.
bool visit(const Expr *E);
bool visit(const Expr *E) override;
/// Compiles an initializer. This is like visit() but it will never
/// create a variable and instead rely on a variable already having
/// been created. visitInitializer() then relies on a pointer to this
@ -342,6 +342,9 @@ private:
/// Emits an integer constant.
template <typename T> bool emitConst(T Value, PrimType Ty, const Expr *E);
template <typename T> bool emitConst(T Value, const Expr *E);
bool emitBool(bool V, const Expr *E) override {
return this->emitConst(V, E);
}
llvm::RoundingMode getRoundingMode(const Expr *E) const {
FPOptions FPO = E->getFPFeaturesInEffect(Ctx.getLangOpts());

View File

@ -127,6 +127,33 @@ bool EvalEmitter::fallthrough(const LabelTy &Label) {
return true;
}
bool EvalEmitter::speculate(const CallExpr *E, const LabelTy &EndLabel) {
size_t StackSizeBefore = S.Stk.size();
const Expr *Arg = E->getArg(0);
if (!this->visit(Arg)) {
S.Stk.clearTo(StackSizeBefore);
if (S.inConstantContext() || Arg->HasSideEffects(S.getASTContext()))
return this->emitBool(false, E);
return Invalid(S, OpPC);
}
PrimType T = Ctx.classify(Arg->getType()).value_or(PT_Ptr);
if (T == PT_Ptr) {
const auto &Ptr = S.Stk.pop<Pointer>();
return this->emitBool(CheckBCPResult(S, Ptr), E);
} else if (T == PT_FnPtr) {
S.Stk.discard<FunctionPointer>();
// Never accepted
return this->emitBool(false, E);
}
// Otherwise, this is fine!
if (!this->emitPop(T, E))
return false;
return this->emitBool(true, E);
}
template <PrimType OpType> bool EvalEmitter::emitRet(const SourceInfo &Info) {
if (!isActive())
return true;

View File

@ -55,12 +55,16 @@ protected:
virtual bool visitExpr(const Expr *E, bool DestroyToplevelScope) = 0;
virtual bool visitDeclAndReturn(const VarDecl *VD, bool ConstantContext) = 0;
virtual bool visitFunc(const FunctionDecl *F) = 0;
virtual bool visit(const Expr *E) = 0;
virtual bool emitBool(bool V, const Expr *E) = 0;
/// Emits jumps.
bool jumpTrue(const LabelTy &Label);
bool jumpFalse(const LabelTy &Label);
bool jump(const LabelTy &Label);
bool fallthrough(const LabelTy &Label);
/// Speculative execution.
bool speculate(const CallExpr *E, const LabelTy &EndLabel);
/// Since expressions can only jump forward, predicated execution is
/// used to deal with if-else statements.

View File

@ -54,6 +54,79 @@ static bool Jf(InterpState &S, CodePtr &PC, int32_t Offset) {
return true;
}
// https://github.com/llvm/llvm-project/issues/102513
#if defined(_MSC_VER) && !defined(__clang__) && !defined(NDEBUG)
#pragma optimize("", off)
#endif
// FIXME: We have the large switch over all opcodes here again, and in
// Interpret().
static bool BCP(InterpState &S, CodePtr &RealPC, int32_t Offset, PrimType PT) {
[[maybe_unused]] CodePtr PCBefore = RealPC;
size_t StackSizeBefore = S.Stk.size();
auto SpeculativeInterp = [&S, RealPC]() -> bool {
const InterpFrame *StartFrame = S.Current;
CodePtr PC = RealPC;
for (;;) {
auto Op = PC.read<Opcode>();
if (Op == OP_EndSpeculation)
return true;
CodePtr OpPC = PC;
switch (Op) {
#define GET_INTERP
#include "Opcodes.inc"
#undef GET_INTERP
}
}
llvm_unreachable("We didn't see an EndSpeculation op?");
};
if (SpeculativeInterp()) {
if (PT == PT_Ptr) {
const auto &Ptr = S.Stk.pop<Pointer>();
assert(S.Stk.size() == StackSizeBefore);
S.Stk.push<Integral<32, true>>(
Integral<32, true>::from(CheckBCPResult(S, Ptr)));
} else if (PT == PT_FnPtr) {
S.Stk.discard<FunctionPointer>();
S.Stk.push<Integral<32, true>>(Integral<32, true>::from(0));
} else {
// Pop the result from the stack and return success.
TYPE_SWITCH(PT, S.Stk.pop<T>(););
assert(S.Stk.size() == StackSizeBefore);
S.Stk.push<Integral<32, true>>(Integral<32, true>::from(1));
}
} else {
if (!S.inConstantContext())
return Invalid(S, RealPC);
S.Stk.clearTo(StackSizeBefore);
S.Stk.push<Integral<32, true>>(Integral<32, true>::from(0));
}
// RealPC should not have been modified.
assert(*RealPC == *PCBefore);
// Jump to end label. This is a little tricker than just RealPC += Offset
// because our usual jump instructions don't have any arguments, to the offset
// we get is a little too much and we need to subtract the size of the
// bool and PrimType arguments again.
int32_t ParamSize = align(sizeof(PrimType));
assert(Offset >= ParamSize);
RealPC += Offset - ParamSize;
[[maybe_unused]] CodePtr PCCopy = RealPC;
assert(PCCopy.read<Opcode>() == OP_EndSpeculation);
return true;
}
// https://github.com/llvm/llvm-project/issues/102513
#if defined(_MSC_VER) && !defined(__clang__) && !defined(NDEBUG)
#pragma optimize("", on)
#endif
static void diagnoseMissingInitializer(InterpState &S, CodePtr OpPC,
const ValueDecl *VD) {
const SourceInfo &E = S.Current->getSource(OpPC);
@ -290,6 +363,22 @@ void cleanupAfterFunctionCall(InterpState &S, CodePtr OpPC,
TYPE_SWITCH(Ty, S.Stk.discard<T>());
}
bool CheckBCPResult(InterpState &S, const Pointer &Ptr) {
if (Ptr.isDummy())
return false;
if (Ptr.isZero())
return true;
if (Ptr.isIntegralPointer())
return true;
if (Ptr.isTypeidPointer())
return true;
if (const Expr *Base = Ptr.getDeclDesc()->asExpr())
return isa<StringLiteral>(Base);
return false;
}
bool CheckExtern(InterpState &S, CodePtr OpPC, const Pointer &Ptr) {
if (!Ptr.isExtern())
return true;

View File

@ -159,6 +159,7 @@ bool CheckLiteralType(InterpState &S, CodePtr OpPC, const Type *T);
bool InvalidShuffleVectorIndex(InterpState &S, CodePtr OpPC, uint32_t Index);
bool CheckBitCast(InterpState &S, CodePtr OpPC, bool HasIndeterminateBits,
bool TargetIsUCharOrByte);
bool CheckBCPResult(InterpState &S, const Pointer &Ptr);
template <typename T>
static bool handleOverflow(InterpState &S, CodePtr OpPC, const T &SrcValue) {
@ -2776,8 +2777,29 @@ inline bool Unsupported(InterpState &S, CodePtr OpPC) {
return false;
}
inline bool StartSpeculation(InterpState &S, CodePtr OpPC) {
++S.SpeculationDepth;
if (S.SpeculationDepth != 1)
return true;
assert(S.PrevDiags == nullptr);
S.PrevDiags = S.getEvalStatus().Diag;
S.getEvalStatus().Diag = nullptr;
return true;
}
inline bool EndSpeculation(InterpState &S, CodePtr OpPC) {
assert(S.SpeculationDepth != 0);
--S.SpeculationDepth;
if (S.SpeculationDepth == 0) {
S.getEvalStatus().Diag = S.PrevDiags;
S.PrevDiags = nullptr;
}
return true;
}
/// Do nothing and just abort execution.
inline bool Error(InterpState &S, CodePtr OpPC) { return false; }
inline bool SideEffect(InterpState &S, CodePtr OpPC) {
return S.noteSideEffect();
}

View File

@ -1483,80 +1483,6 @@ static bool interp__builtin_ptrauth_string_discriminator(
return true;
}
// FIXME: This implementation is not complete.
// The Compiler instance we create cannot access the current stack frame, local
// variables, function parameters, etc. We also need protection from
// side-effects, fatal errors, etc.
static bool interp__builtin_constant_p(InterpState &S, CodePtr OpPC,
const InterpFrame *Frame,
const Function *Func,
const CallExpr *Call) {
const Expr *Arg = Call->getArg(0);
QualType ArgType = Arg->getType();
auto returnInt = [&S, Call](bool Value) -> bool {
pushInteger(S, Value, Call->getType());
return true;
};
// __builtin_constant_p always has one operand. The rules which gcc follows
// are not precisely documented, but are as follows:
//
// - If the operand is of integral, floating, complex or enumeration type,
// and can be folded to a known value of that type, it returns 1.
// - If the operand can be folded to a pointer to the first character
// of a string literal (or such a pointer cast to an integral type)
// or to a null pointer or an integer cast to a pointer, it returns 1.
//
// Otherwise, it returns 0.
//
// FIXME: GCC also intends to return 1 for literals of aggregate types, but
// its support for this did not work prior to GCC 9 and is not yet well
// understood.
if (ArgType->isIntegralOrEnumerationType() || ArgType->isFloatingType() ||
ArgType->isAnyComplexType() || ArgType->isPointerType() ||
ArgType->isNullPtrType()) {
auto PrevDiags = S.getEvalStatus().Diag;
S.getEvalStatus().Diag = nullptr;
InterpStack Stk;
Compiler<EvalEmitter> C(S.Ctx, S.P, S, Stk);
auto Res = C.interpretExpr(Arg, /*ConvertResultToRValue=*/Arg->isGLValue());
S.getEvalStatus().Diag = PrevDiags;
if (Res.isInvalid()) {
C.cleanup();
Stk.clear();
return returnInt(false);
}
if (!Res.empty()) {
const APValue &LV = Res.toAPValue();
if (LV.isLValue()) {
APValue::LValueBase Base = LV.getLValueBase();
if (Base.isNull()) {
// A null base is acceptable.
return returnInt(true);
} else if (const auto *E = Base.dyn_cast<const Expr *>()) {
if (!isa<StringLiteral>(E))
return returnInt(false);
return returnInt(LV.getLValueOffset().isZero());
} else if (Base.is<TypeInfoLValue>()) {
// Surprisingly, GCC considers __builtin_constant_p(&typeid(int)) to
// evaluate to true.
return returnInt(true);
} else {
// Any other base is not constant enough for GCC.
return returnInt(false);
}
}
}
// Otherwise, any constant value is good enough.
return returnInt(true);
}
return returnInt(false);
}
static bool interp__builtin_operator_new(InterpState &S, CodePtr OpPC,
const InterpFrame *Frame,
const Function *Func,
@ -2468,11 +2394,6 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const Function *F,
return false;
break;
case Builtin::BI__builtin_constant_p:
if (!interp__builtin_constant_p(S, OpPC, Frame, F, Call))
return false;
break;
case Builtin::BI__noop:
pushInteger(S, 0, Call->getType());
break;

View File

@ -144,6 +144,9 @@ public:
SourceLocation EvalLocation;
/// Declaration we're initializing/evaluting, if any.
const VarDecl *EvaluatingDecl = nullptr;
/// Things needed to do speculative execution.
SmallVectorImpl<PartialDiagnosticAt> *PrevDiags = nullptr;
unsigned SpeculationDepth = 0;
llvm::SmallVector<
std::pair<const Expr *, const LifetimeExtendedTemporaryDecl *>>

View File

@ -58,7 +58,7 @@ def ArgRecordField : ArgType { let Name = "const Record::Field *"; }
def ArgFltSemantics : ArgType { let Name = "const llvm::fltSemantics *"; }
def ArgRoundingMode : ArgType { let Name = "llvm::RoundingMode"; }
def ArgLETD: ArgType { let Name = "const LifetimeExtendedTemporaryDecl *"; }
def ArgCastKind : ArgType { let Name = "CastKind"; }
def ArgCastKind : ArgType { let Name = "interp::CastKind"; }
def ArgCallExpr : ArgType { let Name = "const CallExpr *"; }
def ArgExpr : ArgType { let Name = "const Expr *"; }
def ArgOffsetOfExpr : ArgType { let Name = "const OffsetOfExpr *"; }
@ -172,6 +172,14 @@ def Jt : JumpOpcode;
// [Bool] -> [], jumps if false.
def Jf : JumpOpcode;
def StartSpeculation : Opcode;
def EndSpeculation : Opcode;
def BCP : Opcode {
let ChangesPC = 1;
let HasCustomEval = 1;
let Args = [ArgSint32, ArgPrimType];
}
//===----------------------------------------------------------------------===//
// Returns
//===----------------------------------------------------------------------===//

View File

@ -1,6 +1,7 @@
// RUN: %clang_cc1 -fexperimental-new-constant-interpreter -verify=expected,both %s
// RUN: %clang_cc1 -verify=ref,both %s
// RUN: %clang_cc1 -std=c++20 -fexperimental-new-constant-interpreter -verify=expected,both %s
// RUN: %clang_cc1 -std=c++20 -verify=ref,both %s
using intptr_t = __INTPTR_TYPE__;
static_assert(__builtin_constant_p(12), "");
static_assert(__builtin_constant_p(1.0), "");
@ -18,3 +19,108 @@ constexpr int foo(int &a) {
return __builtin_constant_p(a);
}
static_assert(!foo(z));
static_assert(__builtin_constant_p(__builtin_constant_p(1)));
constexpr bool nested(int& a) {
return __builtin_constant_p(__builtin_constant_p(a));
}
static_assert(nested(z));
constexpr bool Local() {
int z = 10;
return __builtin_constant_p(z);
}
static_assert(Local());
constexpr bool Local2() {
int z = 10;
return __builtin_constant_p(&z);
}
static_assert(!Local2());
constexpr bool Parameter(int a) {
return __builtin_constant_p(a);
}
static_assert(Parameter(10));
constexpr bool InvalidLocal() {
int *z;
{
int b = 10;
z = &b;
}
return __builtin_constant_p(z);
}
static_assert(!InvalidLocal());
template<typename T> constexpr bool bcp(T t) {
return __builtin_constant_p(t);
}
constexpr intptr_t ptr_to_int(const void *p) {
return __builtin_constant_p(1) ? (intptr_t)p : (intptr_t)p; // expected-note {{cast that performs the conversions of a reinterpret_cast}}
}
/// This is from test/SemaCXX/builtin-constant-p.cpp, but it makes no sense.
/// ptr_to_int is called before bcp(), so it fails. GCC does not accept this either.
static_assert(bcp(ptr_to_int("foo"))); // expected-error {{not an integral constant expression}} \
// expected-note {{in call to}}
constexpr bool AndFold(const int &a, const int &b) {
return __builtin_constant_p(a && b);
}
static_assert(AndFold(10, 20));
static_assert(!AndFold(z, 10));
static_assert(!AndFold(10, z));
struct F {
int a;
};
constexpr F f{12};
static_assert(__builtin_constant_p(f.a));
constexpr bool Member() {
F f;
return __builtin_constant_p(f.a);
}
static_assert(!Member());
constexpr bool Discard() {
(void)__builtin_constant_p(10);
return true;
}
static_assert(Discard());
static_assert(__builtin_constant_p((int*)123));
constexpr void func() {}
static_assert(!__builtin_constant_p(func));
/// This is from SemaCXX/builtin-constant-p and GCC agrees with the bytecode interpreter.
constexpr int mutate1() {
int n = 1;
int m = __builtin_constant_p(++n);
return n * 10 + m;
}
static_assert(mutate1() == 21); // ref-error {{static assertion failed}} \
// ref-note {{evaluates to '10 == 21'}}
/// Similar for this. GCC agrees with the bytecode interpreter.
constexpr int mutate_param(bool mutate, int &param) {
mutate = mutate; // Mutation of internal state is OK
if (mutate)
++param;
return param;
}
constexpr int mutate6(bool mutate) {
int n = 1;
int m = __builtin_constant_p(mutate_param(mutate, n));
return n * 10 + m;
}
static_assert(mutate6(false) == 11);
static_assert(mutate6(true) == 21); // ref-error {{static assertion failed}} \
// ref-note {{evaluates to '10 == 21'}}

View File

@ -1,5 +1,8 @@
// RUN: %clang_cc1 -triple x86_64-unknown-unknown -emit-llvm -disable-llvm-optzns -o - %s -O2 | FileCheck %s
// RUN: %clang_cc1 -triple x86_64-unknown-unknown -emit-llvm -disable-llvm-optzns -o - %s -O0 | FileCheck %s
// RUN: %clang_cc1 -triple x86_64-unknown-unknown -emit-llvm -disable-llvm-optzns -o - %s -O2 -fexperimental-new-constant-interpreter | FileCheck %s
// RUN: %clang_cc1 -triple x86_64-unknown-unknown -emit-llvm -disable-llvm-optzns -o - %s -O0 -fexperimental-new-constant-interpreter | FileCheck %s
int a = 42;

View File

@ -1,4 +1,5 @@
// RUN: %clang_cc1 -triple=x86_64-linux-gnu -emit-llvm -o - %s | FileCheck %s
// RUN: %clang_cc1 -triple=x86_64-linux-gnu -emit-llvm -o - %s -fexperimental-new-constant-interpreter | FileCheck %s
// Don't crash if the argument to __builtin_constant_p isn't scalar.
template <typename T>