[clang][analysis][dataflow] Detect goto backedges to trigger Widen (#179546)

Currently, the Clang Dataflow Framework only does Widen on backedges
from structured loops.

Missing some Widen calls (e.g., when there are backedges from gotos)
could cause some analyses to iterate ~forever (until the max visits
limit is hit).

This adds a simple search for backedges, and triggers Widen on the
additional backedge nodes. Fixes [issue 179083.
](https://github.com/llvm/llvm-project/issues/179083)
This commit is contained in:
Jan Voung 2026-02-06 16:30:14 -05:00 committed by GitHub
parent 60ecb37896
commit ec15bddde5
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 543 additions and 16 deletions

View File

@ -0,0 +1,54 @@
//===- CFGBackEdges.h - Finds back edges in Clang CFGs -*- C++ ----------*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_CLANG_ANALYSIS_CFG_BACKEDGES_H
#define LLVM_CLANG_ANALYSIS_CFG_BACKEDGES_H
#include "clang/Analysis/CFG.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
namespace clang {
/// Finds and returns back edges in Clang CFGs. The CFG already has some
/// backedge information for structured loops (\c CFGBlock::getLoopTarget).
/// However, unstructured back edges from \c goto statements are not included.
/// This helps find back edges, whether the CFG is reducible or not.
/// This includes CFGBlock::getLoopTarget nodes, but one can filter those out
/// e.g., with \c findNonStructuredLoopBackedgeNodes.
llvm::DenseMap<const CFGBlock *, const CFGBlock *>
findCFGBackEdges(const CFG &CFG);
/// Returns a set of CFG blocks that is the source of a backedge and is not
/// tracked as part of a structured loop (with `CFGBlock::getLoopTarget`).
llvm::SmallDenseSet<const CFGBlock *>
findNonStructuredLoopBackedgeNodes(const CFG &CFG);
/// Given a backedge from B1 to B2, B1 is a "backedge node" in a CFG.
/// It can be:
/// - A block introduced in the CFG exclusively to indicate a structured loop's
/// backedge. They are exactly identified by the presence of a non-null
/// pointer to the entry block of the loop condition. Note that this is not
/// necessarily the block with the loop statement as terminator, because
/// short-circuit operators will result in multiple blocks encoding the loop
/// condition, only one of which will contain the loop statement as
/// terminator.
/// - A block that is part of a backedge in a CFG with unstructured loops
/// (e.g., a CFG with a `goto` statement). Note that this is not necessarily
/// the block with the goto statement as terminator. The choice depends on how
/// blocks and edges are ordered.
///
/// \param NonStructLoopBackedgeNodes is the set of nodes from
/// \c findNonStructuredLoopBackedgeNodes.
bool isBackedgeCFGNode(
const CFGBlock &B,
const llvm::SmallDenseSet<const CFGBlock *> &NonStructLoopBackedgeNodes);
} // namespace clang
#endif // LLVM_CLANG_ANALYSIS_CFG_BACKEDGES_H

View File

@ -0,0 +1,105 @@
//===- CFGBackEdges.cpp - Finds back edges in Clang CFGs ------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include <stack>
#include <utility>
#include <vector>
#include "clang/Analysis/CFG.h"
#include "clang/Analysis/CFGBackEdges.h"
#include "llvm/ADT/DenseMap.h"
namespace clang {
namespace {
struct VisitClockTimes {
// Timestamp for when the node was visited / discovered.
int Pre = -1;
// Timestamp for when we finished visiting a node's successors.
int Post = -1;
};
} // namespace
// Returns true if the CFG contains any goto statements (direct or indirect).
static bool hasGotoInCFG(const CFG &CFG) {
for (const CFGBlock *Block : CFG) {
const Stmt *Term = Block->getTerminatorStmt();
if (Term == nullptr)
continue;
if (isa<GotoStmt>(Term) || isa<IndirectGotoStmt>(Term))
return true;
}
return false;
}
llvm::DenseMap<const CFGBlock *, const CFGBlock *>
findCFGBackEdges(const CFG &CFG) {
// Do a simple textbook DFS with pre and post numberings to find back edges.
llvm::DenseMap<const CFGBlock *, const CFGBlock *> BackEdges;
std::vector<VisitClockTimes> VisitState;
VisitState.resize(CFG.getNumBlockIDs());
std::stack<std::pair<const CFGBlock *, CFGBlock::const_succ_iterator>>
DFSStack;
int Clock = 0;
const CFGBlock &Entry = CFG.getEntry();
VisitState[Entry.getBlockID()].Pre = Clock++;
DFSStack.push({&Entry, Entry.succ_begin()});
while (!DFSStack.empty()) {
auto &[Block, SuccIt] = DFSStack.top();
if (SuccIt == Block->succ_end()) {
VisitState[Block->getBlockID()].Post = Clock++;
DFSStack.pop();
continue;
}
const CFGBlock::AdjacentBlock &AdjacentSucc = *SuccIt++;
const CFGBlock *Succ = AdjacentSucc.getReachableBlock();
// Skip unreachable blocks.
if (Succ == nullptr)
continue;
VisitClockTimes &SuccVisitState = VisitState[Succ->getBlockID()];
if (SuccVisitState.Pre != -1) {
if (SuccVisitState.Post == -1)
BackEdges.insert({Block, Succ});
} else {
SuccVisitState.Pre = Clock++;
DFSStack.push({Succ, Succ->succ_begin()});
}
}
return BackEdges;
}
// Returns a set of CFG blocks that is the source of a backedge and is not
// tracked as part of a structured loop (with `CFGBlock::getLoopTarget`).
llvm::SmallDenseSet<const CFGBlock *>
findNonStructuredLoopBackedgeNodes(const CFG &CFG) {
llvm::SmallDenseSet<const CFGBlock *> NonStructLoopBackedgeNodes;
// We should only need this if the function has gotos.
if (!hasGotoInCFG(CFG))
return NonStructLoopBackedgeNodes;
llvm::DenseMap<const CFGBlock *, const CFGBlock *> Backedges =
findCFGBackEdges(CFG);
for (const auto &[From, To] : Backedges) {
if (From->getLoopTarget() == nullptr)
NonStructLoopBackedgeNodes.insert(From);
}
return NonStructLoopBackedgeNodes;
}
bool isBackedgeCFGNode(
const CFGBlock &B,
const llvm::SmallDenseSet<const CFGBlock *> &NonStructLoopBackedgeNodes) {
return B.getLoopTarget() != nullptr ||
NonStructLoopBackedgeNodes.contains(&B);
}
} // namespace clang

View File

@ -9,6 +9,7 @@ add_clang_library(clangAnalysis
BodyFarm.cpp
CalledOnceCheck.cpp
CFG.cpp
CFGBackEdges.cpp
CFGReachabilityAnalysis.cpp
CFGStmtMap.cpp
CallGraph.cpp

View File

@ -19,18 +19,23 @@
#include "clang/AST/ASTDumper.h"
#include "clang/AST/DeclCXX.h"
#include "clang/AST/OperationKinds.h"
#include "clang/AST/Stmt.h"
#include "clang/AST/StmtCXX.h"
#include "clang/AST/StmtVisitor.h"
#include "clang/Analysis/Analyses/PostOrderCFGView.h"
#include "clang/Analysis/CFG.h"
#include "clang/Analysis/CFGBackEdges.h"
#include "clang/Analysis/FlowSensitive/DataflowEnvironment.h"
#include "clang/Analysis/FlowSensitive/DataflowLattice.h"
#include "clang/Analysis/FlowSensitive/DataflowWorklist.h"
#include "clang/Analysis/FlowSensitive/Transfer.h"
#include "clang/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.h"
#include "clang/Analysis/FlowSensitive/Value.h"
#include "clang/Basic/LLVM.h"
#include "clang/Support/Compiler.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Error.h"
@ -64,16 +69,6 @@ static int blockIndexInPredecessor(const CFGBlock &Pred,
return BlockPos - Pred.succ_begin();
}
// A "backedge" node is a block introduced in the CFG exclusively to indicate a
// loop backedge. They are exactly identified by the presence of a non-null
// pointer to the entry block of the loop condition. Note that this is not
// necessarily the block with the loop statement as terminator, because
// short-circuit operators will result in multiple blocks encoding the loop
// condition, only one of which will contain the loop statement as terminator.
static bool isBackedgeNode(const CFGBlock &B) {
return B.getLoopTarget() != nullptr;
}
namespace {
/// Extracts the terminator's condition expression.
@ -503,6 +498,8 @@ runTypeErasedDataflowAnalysis(
const clang::CFG &CFG = ACFG.getCFG();
PostOrderCFGView POV(&CFG);
ForwardDataflowWorklist Worklist(CFG, &POV);
llvm::SmallDenseSet<const CFGBlock *> NonStructLoopBackedgeNodes =
findNonStructuredLoopBackedgeNodes(CFG);
std::vector<std::optional<TypeErasedDataflowAnalysisState>> BlockStates(
CFG.size());
@ -537,7 +534,7 @@ runTypeErasedDataflowAnalysis(
llvm::errs() << "Old Env:\n";
OldBlockState->Env.dump();
});
if (isBackedgeNode(*Block)) {
if (isBackedgeCFGNode(*Block, NonStructLoopBackedgeNodes)) {
LatticeJoinEffect Effect1 = Analysis.widenTypeErased(
NewBlockState.Lattice, OldBlockState->Lattice);
LatticeJoinEffect Effect2 =

View File

@ -0,0 +1,312 @@
//===- unittests/Analysis/CFGBackEdgesTest.cpp - CFG backedges tests ------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "clang/Analysis/CFGBackEdges.h"
#include "CFGBuildResult.h"
#include "clang/AST/Stmt.h"
#include "clang/Analysis/CFG.h"
#include "clang/Basic/LLVM.h"
#include "gmock/gmock.h"
#include "gtest/gtest.h"
namespace clang {
namespace analysis {
namespace {
using ::testing::IsNull;
using ::testing::NotNull;
using ::testing::SizeIs;
TEST(CFGBackEdgesTest, NoBackedgesLinear) {
const char *Code = R"cc(
int f(int x) {
l1:
x++;
l2:
x++;
return x;
})cc";
BuildResult Result = BuildCFG(Code);
EXPECT_EQ(BuildResult::BuiltCFG, Result.getStatus());
CFG *Cfg = Result.getCFG();
ASSERT_THAT(Cfg, NotNull());
auto BackEdges = findCFGBackEdges(*Cfg);
EXPECT_TRUE(BackEdges.empty());
}
TEST(CFGBackEdgesTest, NoBackedgesOnlyCrossEdge) {
const char *Code = R"cc(
int f(int x) {
if (x > 0)
x++;
else
x--;
return x;
})cc";
BuildResult Result = BuildCFG(Code);
EXPECT_EQ(BuildResult::BuiltCFG, Result.getStatus());
CFG *Cfg = Result.getCFG();
ASSERT_THAT(Cfg, NotNull());
auto BackEdges = findCFGBackEdges(*Cfg);
EXPECT_TRUE(BackEdges.empty());
}
TEST(CFGBackEdgesTest, NoBackedgesWithUnreachableSuccessorForSwitch) {
const char *Code = R"cc(
enum class Kind { A, B };
void f(Kind kind) {
switch(kind) {
case Kind::A: return 0;
case Kind::B: break;
}
return 1;
})cc";
BuildResult Result = BuildCFG(Code);
EXPECT_EQ(BuildResult::BuiltCFG, Result.getStatus());
CFG *Cfg = Result.getCFG();
ASSERT_THAT(Cfg, NotNull());
auto BackEdges = findCFGBackEdges(*Cfg);
EXPECT_TRUE(BackEdges.empty());
}
TEST(CFGBackEdgesTest, ForLoop) {
const char *Code = R"cc(
void f(int n) {
for (int i = 0; i < n; ++i) {}
})cc";
BuildResult Result = BuildCFG(Code);
EXPECT_EQ(BuildResult::BuiltCFG, Result.getStatus());
CFG *Cfg = Result.getCFG();
ASSERT_THAT(Cfg, NotNull());
// Finds one backedge, which is the one looping back to the loop header
// (has a loop target).
auto BackEdges = findCFGBackEdges(*Cfg);
EXPECT_THAT(BackEdges, SizeIs(1));
EXPECT_THAT(BackEdges.begin()->first->getLoopTarget(), NotNull());
}
TEST(CFGBackEdgesTest, WhileLoop) {
const char *Code = R"cc(
void f(int n) {
int i = 0;
while (i < n) { ++i; }
})cc";
BuildResult Result = BuildCFG(Code);
EXPECT_EQ(BuildResult::BuiltCFG, Result.getStatus());
CFG *Cfg = Result.getCFG();
ASSERT_THAT(Cfg, NotNull());
auto BackEdges = findCFGBackEdges(*Cfg);
EXPECT_THAT(BackEdges, SizeIs(1));
EXPECT_THAT(BackEdges.begin()->first->getLoopTarget(), NotNull());
}
TEST(CFGBackEdgesTest, DoWhileLoop) {
const char *Code = R"cc(
void f(int n) {
int i = 0;
do { ++i; } while (i < n);
})cc";
BuildResult Result = BuildCFG(Code);
EXPECT_EQ(BuildResult::BuiltCFG, Result.getStatus());
CFG *Cfg = Result.getCFG();
ASSERT_THAT(Cfg, NotNull());
auto BackEdges = findCFGBackEdges(*Cfg);
EXPECT_THAT(BackEdges, SizeIs(1));
EXPECT_THAT(BackEdges.begin()->first->getLoopTarget(), NotNull());
}
TEST(CFGBackEdgesTest, GotoLoop) {
const char *Code = R"cc(
void f(int n) {
int i = 0;
loop:
if (i < n) {
++i;
goto loop;
}
})cc";
BuildResult Result = BuildCFG(Code);
EXPECT_EQ(BuildResult::BuiltCFG, Result.getStatus());
CFG *Cfg = Result.getCFG();
ASSERT_THAT(Cfg, NotNull());
// Finds one backedge, but since it's an unstructured loop, the loop target is
// null. Instead, the node has a goto terminator.
auto BackEdges = findCFGBackEdges(*Cfg);
EXPECT_THAT(BackEdges, SizeIs(1));
EXPECT_THAT(BackEdges.begin()->first->getLoopTarget(), IsNull());
EXPECT_TRUE(isa<GotoStmt>(BackEdges.begin()->first->getTerminatorStmt()));
}
TEST(CFGBackEdgesTest, WhileWithContinueLoop) {
const char *Code = R"cc(
void f(int n) {
int i = 0;
while (i < n) {
++i;
if (i == 5) continue;
if (i == 10) break;
i *= 2;
}
})cc";
BuildResult Result = BuildCFG(Code);
EXPECT_EQ(BuildResult::BuiltCFG, Result.getStatus());
CFG *Cfg = Result.getCFG();
ASSERT_THAT(Cfg, NotNull());
auto BackEdges = findCFGBackEdges(*Cfg);
EXPECT_THAT(BackEdges, SizeIs(testing::Gt(0)));
for (const auto &[From, To] : BackEdges)
EXPECT_THAT(From->getLoopTarget(), NotNull());
}
TEST(CFGBackEdgesTest, NestedForLoop) {
const char *Code = R"cc(
void f(int n) {
for (int i = 0; i < n; ++i) {
for (int j = i; j < n; ++j) {}
}
})cc";
BuildResult Result = BuildCFG(Code);
EXPECT_EQ(BuildResult::BuiltCFG, Result.getStatus());
CFG *Cfg = Result.getCFG();
ASSERT_THAT(Cfg, NotNull());
auto BackEdges = findCFGBackEdges(*Cfg);
EXPECT_THAT(BackEdges, SizeIs(2));
auto It = BackEdges.begin();
auto *FirstLoopTarget = It->first->getLoopTarget();
EXPECT_THAT(FirstLoopTarget, NotNull());
++It;
auto *SecondLoopTarget = It->first->getLoopTarget();
EXPECT_THAT(SecondLoopTarget, NotNull());
EXPECT_NE(FirstLoopTarget, SecondLoopTarget);
}
TEST(CFGBackEdgesTest, IrreducibleCFG) {
const char *Code = R"cc(
void f(int cond) {
if (cond) goto L1;
L0:
goto L1;
L1:
goto L0;
})cc";
BuildResult Result = BuildCFG(Code);
EXPECT_EQ(BuildResult::BuiltCFG, Result.getStatus());
CFG *Cfg = Result.getCFG();
ASSERT_THAT(Cfg, NotNull());
auto BackEdges = findCFGBackEdges(*Cfg);
// In an irreducible CFG, we still expect to find a back edge.
EXPECT_THAT(BackEdges, SizeIs(1));
EXPECT_TRUE(isa<GotoStmt>(BackEdges.begin()->first->getTerminatorStmt()));
}
TEST(CFGBackEdgesTest, FirstBackedgeIsNotGoto) {
const char *Code = R"cc(
void f(int x, int y) {
if (x > y) {
} else {
L1:
--x;
if (x == 0) return;
}
goto L1;
})cc";
BuildResult Result = BuildCFG(Code);
EXPECT_EQ(BuildResult::BuiltCFG, Result.getStatus());
CFG *Cfg = Result.getCFG();
auto BackEdges = findCFGBackEdges(*Cfg);
EXPECT_THAT(BackEdges, SizeIs(1));
// We might find a backedge where the source block doesn't terminate with
// a `goto`, due to the DFS search order. For example:
//
// B_entry: `if (x > y)`
// \--then--> B1: `<empty>`
// --> B2: `goto L1`
// --> B3: `--x; if (x == 0)`
// \--then--> B4 `return` --> B_exit.
// \--else--> B2: ... (the `if`'s else is a backedge from B3 to B2!)
// \--else--> B3: ...
EXPECT_FALSE(isa<GotoStmt>(BackEdges.begin()->first->getTerminatorStmt()));
}
TEST(CFGBackEdgesTest, FindNonStructuredLoopBackedgeNodes) {
const char *Code = R"cc(
void f(int n) {
for (int i = 0; i < n; ++i) {
int j = 0;
inner_loop:
if (j < n) {
++j;
goto inner_loop;
}
}
})cc";
BuildResult Result = BuildCFG(Code);
EXPECT_EQ(BuildResult::BuiltCFG, Result.getStatus());
CFG *Cfg = Result.getCFG();
ASSERT_THAT(Cfg, NotNull());
// Finds just the goto backedge, and not the for-loop backedge.
auto BackEdgeNodes = findNonStructuredLoopBackedgeNodes(*Cfg);
EXPECT_THAT(BackEdgeNodes, SizeIs(1));
const CFGBlock *Node = *BackEdgeNodes.begin();
EXPECT_EQ(Node->getLoopTarget(), nullptr);
EXPECT_TRUE(isa<GotoStmt>(Node->getTerminatorStmt()));
}
TEST(CFGBackEdgesTest, IsBackedgeCFGNode) {
const char *Code = R"cc(
void f(int n) {
for (int i = 0; i < n; ++i) {
int j = 0;
inner_loop:
if (j < n) {
++j;
goto inner_loop;
}
}
})cc";
BuildResult Result = BuildCFG(Code);
EXPECT_EQ(BuildResult::BuiltCFG, Result.getStatus());
CFG *Cfg = Result.getCFG();
ASSERT_THAT(Cfg, NotNull());
auto BackEdgeNodes = findNonStructuredLoopBackedgeNodes(*Cfg);
// `isBackedgeCFGNode` should be true for both the for-loop backedge node and
// goto backedge nodes.
const CFGBlock *ForLoopBackedgeNode = nullptr;
const CFGBlock *GotoBackedgeNode = nullptr;
for (const CFGBlock *Block : *Cfg) {
if (Block->getLoopTarget() != nullptr) {
ForLoopBackedgeNode = Block;
} else if (Block->getTerminatorStmt() != nullptr &&
isa<GotoStmt>(Block->getTerminatorStmt())) {
GotoBackedgeNode = Block;
}
}
ASSERT_THAT(ForLoopBackedgeNode, NotNull());
ASSERT_THAT(GotoBackedgeNode, NotNull());
EXPECT_TRUE(isBackedgeCFGNode(*ForLoopBackedgeNode, BackEdgeNodes));
EXPECT_TRUE(isBackedgeCFGNode(*GotoBackedgeNode, BackEdgeNodes));
}
} // namespace
} // namespace analysis
} // namespace clang

View File

@ -1,4 +1,5 @@
add_clang_unittest(ClangAnalysisTests
CFGBackEdgesTest.cpp
CFGDominatorTree.cpp
CFGTest.cpp
CloneDetectionTest.cpp

View File

@ -1145,6 +1145,34 @@ TEST_F(WideningTest, DistinctValuesWithDifferentPropertiesWidenedToTop) {
});
}
TEST_F(WideningTest,
DistinctValuesWithDifferentPropertiesWidenedToTopGotoInsteadOfWhile) {
std::string Code = R"cc(
void target(bool Cond) {
int *Foo;
int i = 0;
Foo = nullptr;
start:
if (Cond) {
Foo = &i;
goto start;
}
(void)0;
/*[[p]]*/
}
)cc";
runDataflow(
Code,
[](const llvm::StringMap<DataflowAnalysisState<NoopLattice>> &Results,
ASTContext &ASTCtx) {
const Environment &Env = getEnvironmentAtAnnotation(Results, "p");
const auto &FooVal = getValueForDecl<Value>(ASTCtx, Env, "Foo");
ASSERT_THAT(FooVal.getProperty("is_null"), NotNull());
EXPECT_TRUE(areEquivalentValues(*FooVal.getProperty("is_null"),
Env.makeTopBoolValue()));
});
}
class FlowConditionTest : public Test {
protected:
template <typename Matcher>
@ -1253,18 +1281,47 @@ TEST_F(FlowConditionTest, WhileStmt) {
}
TEST_F(FlowConditionTest, WhileStmtWithAssignmentInCondition) {
std::string Code = R"(
std::string Code = R"cc(
bool getBool();
void target(bool Foo) {
// This test checks whether the analysis preserves the connection between
// the value of `Foo` and the assignment expression, despite widening.
// The equality operator generates a fresh boolean variable on each
// interpretation, which forces use of widening.
while ((Foo = (3 == 4))) {
// The return value of getBool() should have a fresh boolean variable on
// each interpretation, which forces use of widening.
while (Foo = getBool()) {
(void)0;
/*[[p]]*/
}
}
)";
)cc";
runDataflow(
Code,
[](const llvm::StringMap<DataflowAnalysisState<NoopLattice>> &Results,
ASTContext &ASTCtx) {
const Environment &Env = getEnvironmentAtAnnotation(Results, "p");
auto &FooVal = getValueForDecl<BoolValue>(ASTCtx, Env, "Foo").formula();
EXPECT_TRUE(Env.proves(FooVal));
});
}
TEST_F(FlowConditionTest, GotoLoopWithAssignmentInCondition) {
std::string Code = R"cc(
bool getBool();
void target(bool Foo) {
// This test checks whether the analysis preserves the connection between
// the value of `Foo` and the assignment expression, despite widening.
// The return value of getBool() should have a fresh boolean variable on
// each interpretation, which forces use of widening.
start:
if (Foo = getBool()) {
(void)0;
/*[[p]]*/
goto start;
}
}
)cc";
runDataflow(
Code,
[](const llvm::StringMap<DataflowAnalysisState<NoopLattice>> &Results,