[clang-tidy][NFC] Add findTokenInRange and reuse it (#183941)

This commit is contained in:
Daniil Dudkin 2026-03-03 00:19:27 +03:00 committed by GitHub
parent 8107c71511
commit 533f16fe89
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 276 additions and 34 deletions

View File

@ -7,10 +7,10 @@
//===----------------------------------------------------------------------===//
#include "ExplicitConstructorCheck.h"
#include "../utils/LexerUtils.h"
#include "clang/AST/ASTContext.h"
#include "clang/ASTMatchers/ASTMatchFinder.h"
#include "clang/ASTMatchers/ASTMatchers.h"
#include "clang/Lex/Lexer.h"
using namespace clang::ast_matchers;
@ -31,32 +31,6 @@ void ExplicitConstructorCheck::registerMatchers(MatchFinder *Finder) {
this);
}
// Looks for the token matching the predicate and returns the range of the found
// token including trailing whitespace.
static SourceRange findToken(const SourceManager &Sources,
const LangOptions &LangOpts,
SourceLocation StartLoc, SourceLocation EndLoc,
bool (*Pred)(const Token &)) {
if (StartLoc.isMacroID() || EndLoc.isMacroID())
return {};
const FileID File = Sources.getFileID(Sources.getSpellingLoc(StartLoc));
const StringRef Buf = Sources.getBufferData(File);
const char *StartChar = Sources.getCharacterData(StartLoc);
Lexer Lex(StartLoc, LangOpts, StartChar, StartChar, Buf.end());
Lex.SetCommentRetentionState(true);
Token Tok;
do {
Lex.LexFromRawLexer(Tok);
if (Pred(Tok)) {
Token NextTok;
Lex.LexFromRawLexer(NextTok);
return {Tok.getLocation(), NextTok.getLocation()};
}
} while (Tok.isNot(tok::eof) && Tok.getLocation() < EndLoc);
return {};
}
static bool declIsStdInitializerList(const NamedDecl *D) {
// First use the fast getName() method to avoid unnecessary calls to the
// slow getQualifiedNameAsString().
@ -113,9 +87,12 @@ void ExplicitConstructorCheck::check(const MatchFinder::MatchResult &Result) {
return Tok.is(tok::raw_identifier) &&
Tok.getRawIdentifier() == "explicit";
};
const SourceRange ExplicitTokenRange =
findToken(*Result.SourceManager, getLangOpts(),
Ctor->getOuterLocStart(), Ctor->getEndLoc(), IsKwExplicit);
const CharSourceRange ConstructorRange = CharSourceRange::getTokenRange(
Ctor->getOuterLocStart(), Ctor->getEndLoc());
const CharSourceRange ExplicitTokenRange =
utils::lexer::findTokenTextInRange(ConstructorRange,
*Result.SourceManager, getLangOpts(),
IsKwExplicit);
StringRef ConstructorDescription;
if (Ctor->isMoveConstructor())
ConstructorDescription = "move";
@ -127,10 +104,8 @@ void ExplicitConstructorCheck::check(const MatchFinder::MatchResult &Result) {
auto Diag = diag(Ctor->getLocation(),
"%0 constructor should not be declared explicit")
<< ConstructorDescription;
if (ExplicitTokenRange.isValid()) {
Diag << FixItHint::CreateRemoval(
CharSourceRange::getCharRange(ExplicitTokenRange));
}
if (ExplicitTokenRange.isValid())
Diag << FixItHint::CreateRemoval(ExplicitTokenRange);
return;
}

View File

@ -179,6 +179,60 @@ getTrailingCommentsInRange(CharSourceRange Range, const SourceManager &SM,
CommentCollectionMode::TrailingComments);
}
CharSourceRange
findTokenTextInRange(CharSourceRange Range, const SourceManager &SM,
const LangOptions &LangOpts,
llvm::function_ref<bool(const Token &)> Pred) {
if (Range.isInvalid())
return {};
// Normalize to a file-based char range so raw lexing can operate on one
// contiguous buffer and reject unmappable (e.g. macro) ranges.
const CharSourceRange FileRange =
Lexer::makeFileCharRange(Range, SM, LangOpts);
if (FileRange.isInvalid())
return {};
const auto [BeginFID, BeginOffset] =
SM.getDecomposedLoc(FileRange.getBegin());
const auto [EndFID, EndOffset] = SM.getDecomposedLoc(FileRange.getEnd());
if (BeginFID != EndFID || BeginOffset > EndOffset)
return {};
bool Invalid = false;
const StringRef Buffer = SM.getBufferData(BeginFID, &Invalid);
if (Invalid)
return {};
const char *LexStart = Buffer.data() + BeginOffset;
// Re-lex raw tokens in the bounded file buffer while preserving comments so
// callers can match tokens regardless of interleaved comments.
Lexer TheLexer(SM.getLocForStartOfFile(BeginFID), LangOpts, Buffer.begin(),
LexStart, Buffer.end());
TheLexer.SetCommentRetentionState(true);
while (true) {
Token Tok;
if (TheLexer.LexFromRawLexer(Tok))
return {};
if (Tok.is(tok::eof) || Tok.getLocation() == FileRange.getEnd() ||
SM.isBeforeInTranslationUnit(FileRange.getEnd(), Tok.getLocation()))
return {};
if (!Pred(Tok))
continue;
Token NextTok;
if (TheLexer.LexFromRawLexer(NextTok))
return {};
// Return a char range ending at the next token start so trailing trivia of
// the matched token is included (useful for fix-it removals).
return CharSourceRange::getCharRange(Tok.getLocation(),
NextTok.getLocation());
}
}
std::optional<Token> getQualifyingToken(tok::TokenKind TK,
CharSourceRange Range,
const ASTContext &Context,

View File

@ -12,6 +12,7 @@
#include "clang/AST/ASTContext.h"
#include "clang/Basic/TokenKinds.h"
#include "clang/Lex/Lexer.h"
#include "llvm/ADT/STLFunctionalExtras.h"
#include <optional>
#include <utility>
#include <vector>
@ -131,6 +132,14 @@ std::vector<CommentToken>
getTrailingCommentsInRange(CharSourceRange Range, const SourceManager &SM,
const LangOptions &LangOpts);
/// Returns source range of the first token in \p Range matching \p Pred.
/// The returned char range starts at the matched token and ends at the start
/// of the next token. Returns invalid range if no token matches.
CharSourceRange
findTokenTextInRange(CharSourceRange Range, const SourceManager &SM,
const LangOptions &LangOpts,
llvm::function_ref<bool(const Token &)> Pred);
/// Assuming that ``Range`` spans a CVR-qualified type, returns the
/// token in ``Range`` that is responsible for the qualification. ``Range``
/// must be valid with respect to ``SM``. Returns ``std::nullopt`` if no

View File

@ -8,6 +8,7 @@
#include "../clang-tidy/utils/LexerUtils.h"
#include "clang/AST/DeclCXX.h"
#include "clang/Basic/FileManager.h"
#include "clang/Basic/SourceManager.h"
#include "clang/Frontend/ASTUnit.h"
@ -41,6 +42,10 @@ static CharSourceRange rangeFromAnnotations(const llvm::Annotations &A,
return CharSourceRange::getCharRange(Begin, End);
}
static bool isRawIdentifierNamed(const Token &Tok, StringRef Name) {
return Tok.is(tok::raw_identifier) && Tok.getRawIdentifier() == Name;
}
namespace {
TEST(LexerUtilsTest, GetCommentsInRangeAdjacentComments) {
@ -162,6 +167,205 @@ TEST(LexerUtilsTest, GetCommentsInRangeInvalidRange) {
EXPECT_TRUE(Comments.empty());
}
TEST(LexerUtilsTest, FindTokenTextInRangeFindsMatch) {
llvm::Annotations Code(R"cpp(
struct S {
$range[[explicit ]] S(int);
};
)cpp");
std::unique_ptr<ASTUnit> AST = buildAST(Code.code());
ASSERT_TRUE(AST);
const ASTContext &Context = AST->getASTContext();
const SourceManager &SM = Context.getSourceManager();
const LangOptions &LangOpts = Context.getLangOpts();
const CharSourceRange SearchRange =
rangeFromAnnotations(Code, SM, SM.getMainFileID(), "range");
const CharSourceRange MatchedRange = utils::lexer::findTokenTextInRange(
SearchRange, SM, LangOpts,
[](const Token &Tok) { return isRawIdentifierNamed(Tok, "explicit"); });
ASSERT_TRUE(MatchedRange.isValid());
const StringRef CodeText = Code.code();
const size_t ExplicitOffset = CodeText.find("explicit");
ASSERT_NE(StringRef::npos, ExplicitOffset);
const size_t ConstructorOffset = CodeText.find("S(int)");
ASSERT_NE(StringRef::npos, ConstructorOffset);
EXPECT_EQ(ExplicitOffset, SM.getFileOffset(MatchedRange.getBegin()));
EXPECT_EQ(ConstructorOffset, SM.getFileOffset(MatchedRange.getEnd()));
}
TEST(LexerUtilsTest, FindTokenTextInRangeReturnsInvalidWhenNotFound) {
llvm::Annotations Code(R"cpp(
struct S {
$range[[int x = 0;]]
S(int);
};
)cpp");
std::unique_ptr<ASTUnit> AST = buildAST(Code.code());
ASSERT_TRUE(AST);
const ASTContext &Context = AST->getASTContext();
const SourceManager &SM = Context.getSourceManager();
const LangOptions &LangOpts = Context.getLangOpts();
const CharSourceRange SearchRange =
rangeFromAnnotations(Code, SM, SM.getMainFileID(), "range");
const CharSourceRange MatchedRange = utils::lexer::findTokenTextInRange(
SearchRange, SM, LangOpts,
[](const Token &Tok) { return isRawIdentifierNamed(Tok, "explicit"); });
EXPECT_TRUE(MatchedRange.isInvalid());
}
TEST(LexerUtilsTest, FindTokenTextInRangeDoesNotMatchTokenAtEndBoundary) {
llvm::Annotations Code(R"cpp(
struct S {
$range[[int x = 0; ]]explicit S(int);
};
)cpp");
std::unique_ptr<ASTUnit> AST = buildAST(Code.code());
ASSERT_TRUE(AST);
const ASTContext &Context = AST->getASTContext();
const SourceManager &SM = Context.getSourceManager();
const LangOptions &LangOpts = Context.getLangOpts();
const CharSourceRange SearchRange =
rangeFromAnnotations(Code, SM, SM.getMainFileID(), "range");
const CharSourceRange MatchedRange = utils::lexer::findTokenTextInRange(
SearchRange, SM, LangOpts,
[](const Token &Tok) { return isRawIdentifierNamed(Tok, "explicit"); });
EXPECT_TRUE(MatchedRange.isInvalid());
}
TEST(LexerUtilsTest,
FindTokenTextInRangeReturnsInvalidWhenPredicateNeverMatches) {
llvm::Annotations Code(R"cpp(
struct S {
$range[[explicit ]] S(int);
};
)cpp");
std::unique_ptr<ASTUnit> AST = buildAST(Code.code());
ASSERT_TRUE(AST);
const ASTContext &Context = AST->getASTContext();
const SourceManager &SM = Context.getSourceManager();
const LangOptions &LangOpts = Context.getLangOpts();
const CharSourceRange SearchRange =
rangeFromAnnotations(Code, SM, SM.getMainFileID(), "range");
const CharSourceRange MatchedRange = utils::lexer::findTokenTextInRange(
SearchRange, SM, LangOpts, [](const Token &) { return false; });
EXPECT_TRUE(MatchedRange.isInvalid());
}
TEST(LexerUtilsTest, FindTokenTextInRangeReturnsInvalidForInvalidRange) {
std::unique_ptr<ASTUnit> AST = buildAST("struct S { explicit S(int); };");
ASSERT_TRUE(AST);
const ASTContext &Context = AST->getASTContext();
const SourceManager &SM = Context.getSourceManager();
const LangOptions &LangOpts = Context.getLangOpts();
const CharSourceRange MatchedRange = utils::lexer::findTokenTextInRange(
CharSourceRange(), SM, LangOpts, [](const Token &) { return true; });
EXPECT_TRUE(MatchedRange.isInvalid());
}
TEST(LexerUtilsTest, FindTokenTextInRangeReturnsInvalidForReversedOffsets) {
llvm::Annotations Code(R"cpp(
struct S {
$a^explicit S(int);$b^
};
)cpp");
std::unique_ptr<ASTUnit> AST = buildAST(Code.code());
ASSERT_TRUE(AST);
const ASTContext &Context = AST->getASTContext();
const SourceManager &SM = Context.getSourceManager();
const LangOptions &LangOpts = Context.getLangOpts();
const SourceLocation MainFileStart =
SM.getLocForStartOfFile(SM.getMainFileID());
const SourceLocation Begin = MainFileStart.getLocWithOffset(Code.point("b"));
const SourceLocation End = MainFileStart.getLocWithOffset(Code.point("a"));
ASSERT_TRUE(SM.isBeforeInTranslationUnit(End, Begin));
const CharSourceRange ReversedRange =
CharSourceRange::getCharRange(Begin, End);
const CharSourceRange MatchedRange = utils::lexer::findTokenTextInRange(
ReversedRange, SM, LangOpts,
[](const Token &Tok) { return isRawIdentifierNamed(Tok, "explicit"); });
EXPECT_TRUE(MatchedRange.isInvalid());
}
TEST(LexerUtilsTest, FindTokenTextInRangeReturnsInvalidWhenFileRangeIsInvalid) {
llvm::Annotations Code(R"cpp(
#include "header.h"
int $begin^main_var = 0;
)cpp");
const FileContentMappings Mappings = {
{"header.h", "int header_var = 0;\n"},
};
std::unique_ptr<ASTUnit> AST = buildAST(Code.code(), Mappings);
ASSERT_TRUE(AST);
const ASTContext &Context = AST->getASTContext();
const SourceManager &SM = Context.getSourceManager();
const LangOptions &LangOpts = Context.getLangOpts();
const SourceLocation MainFileStart =
SM.getLocForStartOfFile(SM.getMainFileID());
const SourceLocation Begin =
MainFileStart.getLocWithOffset(Code.point("begin"));
ASSERT_TRUE(Begin.isFileID());
auto HeaderFile = AST->getFileManager().getOptionalFileRef("header.h");
ASSERT_TRUE(HeaderFile.has_value());
const FileID HeaderFID = SM.translateFile(*HeaderFile);
ASSERT_TRUE(HeaderFID.isValid());
const SourceLocation HeaderBegin = SM.getLocForStartOfFile(HeaderFID);
ASSERT_TRUE(HeaderBegin.isFileID());
const CharSourceRange SearchRange =
CharSourceRange::getCharRange(Begin, HeaderBegin);
const CharSourceRange FileRange =
Lexer::makeFileCharRange(SearchRange, SM, LangOpts);
EXPECT_TRUE(FileRange.isInvalid());
const CharSourceRange MatchedRange = utils::lexer::findTokenTextInRange(
SearchRange, SM, LangOpts, [](const Token &) { return true; });
EXPECT_TRUE(MatchedRange.isInvalid());
}
TEST(LexerUtilsTest, FindTokenTextInRangeReturnsInvalidForMacroRange) {
std::unique_ptr<ASTUnit> AST = buildAST(R"cpp(
#define EXPLICIT explicit
struct S {
EXPLICIT S(int);
};
)cpp");
ASSERT_TRUE(AST);
const ASTContext &Context = AST->getASTContext();
const SourceManager &SM = Context.getSourceManager();
const LangOptions &LangOpts = Context.getLangOpts();
const CXXConstructorDecl *Ctor = [&Context] {
for (const Decl *D : Context.getTranslationUnitDecl()->decls()) {
const auto *RD = dyn_cast<CXXRecordDecl>(D);
if (!RD)
continue;
for (const CXXConstructorDecl *Ctor : RD->ctors())
if (!Ctor->isImplicit())
return Ctor;
}
return static_cast<const CXXConstructorDecl *>(nullptr);
}();
ASSERT_NE(nullptr, Ctor);
ASSERT_TRUE(Ctor->getOuterLocStart().isMacroID());
const CharSourceRange SearchRange = CharSourceRange::getTokenRange(
Ctor->getOuterLocStart(), Ctor->getEndLoc());
const CharSourceRange MatchedRange = utils::lexer::findTokenTextInRange(
SearchRange, SM, LangOpts,
[](const Token &Tok) { return isRawIdentifierNamed(Tok, "explicit"); });
EXPECT_TRUE(MatchedRange.isInvalid());
}
TEST(LexerUtilsTest, GetTrailingCommentsInRangeAdjacentComments) {
llvm::Annotations Code(R"cpp(
void f() {