[clang-tidy][NFC] Add findTokenInRange and reuse it (#183941)

2026-03-03 00:19:27 +03:00 · 2026-03-03 00:19:27 +03:00 · 533f16fe89
commit 533f16fe89
parent 8107c71511
4 changed files with 276 additions and 34 deletions
--- a/clang-tools-extra/clang-tidy/google/ExplicitConstructorCheck.cpp
+++ b/clang-tools-extra/clang-tidy/google/ExplicitConstructorCheck.cpp
@ -7,10 +7,10 @@
 //===----------------------------------------------------------------------===//

 #include "ExplicitConstructorCheck.h"
+#include "../utils/LexerUtils.h"
 #include "clang/AST/ASTContext.h"
 #include "clang/ASTMatchers/ASTMatchFinder.h"
 #include "clang/ASTMatchers/ASTMatchers.h"
-#include "clang/Lex/Lexer.h"

 using namespace clang::ast_matchers;

@ -31,32 +31,6 @@ void ExplicitConstructorCheck::registerMatchers(MatchFinder *Finder) {
      this);
 }

-// Looks for the token matching the predicate and returns the range of the found
-// token including trailing whitespace.
-static SourceRange findToken(const SourceManager &Sources,
-                             const LangOptions &LangOpts,
-                             SourceLocation StartLoc, SourceLocation EndLoc,
-                             bool (*Pred)(const Token &)) {
-  if (StartLoc.isMacroID() || EndLoc.isMacroID())
-    return {};
-  const FileID File = Sources.getFileID(Sources.getSpellingLoc(StartLoc));
-  const StringRef Buf = Sources.getBufferData(File);
-  const char *StartChar = Sources.getCharacterData(StartLoc);
-  Lexer Lex(StartLoc, LangOpts, StartChar, StartChar, Buf.end());
-  Lex.SetCommentRetentionState(true);
-  Token Tok;
-  do {
-    Lex.LexFromRawLexer(Tok);
-    if (Pred(Tok)) {
-      Token NextTok;
-      Lex.LexFromRawLexer(NextTok);
-      return {Tok.getLocation(), NextTok.getLocation()};
-    }
-  } while (Tok.isNot(tok::eof) && Tok.getLocation() < EndLoc);
-
-  return {};
-}
-
 static bool declIsStdInitializerList(const NamedDecl *D) {
  // First use the fast getName() method to avoid unnecessary calls to the
  // slow getQualifiedNameAsString().
@ -113,9 +87,12 @@ void ExplicitConstructorCheck::check(const MatchFinder::MatchResult &Result) {
      return Tok.is(tok::raw_identifier) &&
             Tok.getRawIdentifier() == "explicit";
    };
-    const SourceRange ExplicitTokenRange =
-        findToken(*Result.SourceManager, getLangOpts(),
-                  Ctor->getOuterLocStart(), Ctor->getEndLoc(), IsKwExplicit);
+    const CharSourceRange ConstructorRange = CharSourceRange::getTokenRange(
+        Ctor->getOuterLocStart(), Ctor->getEndLoc());
+    const CharSourceRange ExplicitTokenRange =
+        utils::lexer::findTokenTextInRange(ConstructorRange,
+                                           *Result.SourceManager, getLangOpts(),
+                                           IsKwExplicit);
    StringRef ConstructorDescription;
    if (Ctor->isMoveConstructor())
      ConstructorDescription = "move";
@ -127,10 +104,8 @@ void ExplicitConstructorCheck::check(const MatchFinder::MatchResult &Result) {
    auto Diag = diag(Ctor->getLocation(),
                     "%0 constructor should not be declared explicit")
                << ConstructorDescription;
-    if (ExplicitTokenRange.isValid()) {
-      Diag << FixItHint::CreateRemoval(
-          CharSourceRange::getCharRange(ExplicitTokenRange));
-    }
+    if (ExplicitTokenRange.isValid())
+      Diag << FixItHint::CreateRemoval(ExplicitTokenRange);
    return;
  }

--- a/clang-tools-extra/clang-tidy/utils/LexerUtils.cpp
+++ b/clang-tools-extra/clang-tidy/utils/LexerUtils.cpp
@ -179,6 +179,60 @@ getTrailingCommentsInRange(CharSourceRange Range, const SourceManager &SM,
                                CommentCollectionMode::TrailingComments);
 }

+CharSourceRange
+findTokenTextInRange(CharSourceRange Range, const SourceManager &SM,
+                     const LangOptions &LangOpts,
+                     llvm::function_ref<bool(const Token &)> Pred) {
+  if (Range.isInvalid())
+    return {};
+
+  // Normalize to a file-based char range so raw lexing can operate on one
+  // contiguous buffer and reject unmappable (e.g. macro) ranges.
+  const CharSourceRange FileRange =
+      Lexer::makeFileCharRange(Range, SM, LangOpts);
+  if (FileRange.isInvalid())
+    return {};
+
+  const auto [BeginFID, BeginOffset] =
+      SM.getDecomposedLoc(FileRange.getBegin());
+  const auto [EndFID, EndOffset] = SM.getDecomposedLoc(FileRange.getEnd());
+  if (BeginFID != EndFID || BeginOffset > EndOffset)
+    return {};
+
+  bool Invalid = false;
+  const StringRef Buffer = SM.getBufferData(BeginFID, &Invalid);
+  if (Invalid)
+    return {};
+
+  const char *LexStart = Buffer.data() + BeginOffset;
+  // Re-lex raw tokens in the bounded file buffer while preserving comments so
+  // callers can match tokens regardless of interleaved comments.
+  Lexer TheLexer(SM.getLocForStartOfFile(BeginFID), LangOpts, Buffer.begin(),
+                 LexStart, Buffer.end());
+  TheLexer.SetCommentRetentionState(true);
+
+  while (true) {
+    Token Tok;
+    if (TheLexer.LexFromRawLexer(Tok))
+      return {};
+
+    if (Tok.is(tok::eof) || Tok.getLocation() == FileRange.getEnd() ||
+        SM.isBeforeInTranslationUnit(FileRange.getEnd(), Tok.getLocation()))
+      return {};
+
+    if (!Pred(Tok))
+      continue;
+
+    Token NextTok;
+    if (TheLexer.LexFromRawLexer(NextTok))
+      return {};
+    // Return a char range ending at the next token start so trailing trivia of
+    // the matched token is included (useful for fix-it removals).
+    return CharSourceRange::getCharRange(Tok.getLocation(),
+                                         NextTok.getLocation());
+  }
+}
+
 std::optional<Token> getQualifyingToken(tok::TokenKind TK,
                                        CharSourceRange Range,
                                        const ASTContext &Context,
--- a/clang-tools-extra/clang-tidy/utils/LexerUtils.h
+++ b/clang-tools-extra/clang-tidy/utils/LexerUtils.h
@ -12,6 +12,7 @@
 #include "clang/AST/ASTContext.h"
 #include "clang/Basic/TokenKinds.h"
 #include "clang/Lex/Lexer.h"
+#include "llvm/ADT/STLFunctionalExtras.h"
 #include <optional>
 #include <utility>
 #include <vector>
@ -131,6 +132,14 @@ std::vector<CommentToken>
 getTrailingCommentsInRange(CharSourceRange Range, const SourceManager &SM,
                           const LangOptions &LangOpts);

+/// Returns source range of the first token in \p Range matching \p Pred.
+/// The returned char range starts at the matched token and ends at the start
+/// of the next token. Returns invalid range if no token matches.
+CharSourceRange
+findTokenTextInRange(CharSourceRange Range, const SourceManager &SM,
+                     const LangOptions &LangOpts,
+                     llvm::function_ref<bool(const Token &)> Pred);
+
 /// Assuming that ``Range`` spans a CVR-qualified type, returns the
 /// token in ``Range`` that is responsible for the qualification. ``Range``
 /// must be valid with respect to ``SM``.  Returns ``std::nullopt`` if no
--- a/clang-tools-extra/unittests/clang-tidy/LexerUtilsTest.cpp
+++ b/clang-tools-extra/unittests/clang-tidy/LexerUtilsTest.cpp
@ -8,6 +8,7 @@

 #include "../clang-tidy/utils/LexerUtils.h"

+#include "clang/AST/DeclCXX.h"
 #include "clang/Basic/FileManager.h"
 #include "clang/Basic/SourceManager.h"
 #include "clang/Frontend/ASTUnit.h"
@ -41,6 +42,10 @@ static CharSourceRange rangeFromAnnotations(const llvm::Annotations &A,
  return CharSourceRange::getCharRange(Begin, End);
 }

+static bool isRawIdentifierNamed(const Token &Tok, StringRef Name) {
+  return Tok.is(tok::raw_identifier) && Tok.getRawIdentifier() == Name;
+}
+
 namespace {

 TEST(LexerUtilsTest, GetCommentsInRangeAdjacentComments) {
@ -162,6 +167,205 @@ TEST(LexerUtilsTest, GetCommentsInRangeInvalidRange) {
  EXPECT_TRUE(Comments.empty());
 }

+TEST(LexerUtilsTest, FindTokenTextInRangeFindsMatch) {
+  llvm::Annotations Code(R"cpp(
+struct S {
+  $range[[explicit   ]] S(int);
+};
+)cpp");
+  std::unique_ptr<ASTUnit> AST = buildAST(Code.code());
+  ASSERT_TRUE(AST);
+  const ASTContext &Context = AST->getASTContext();
+  const SourceManager &SM = Context.getSourceManager();
+  const LangOptions &LangOpts = Context.getLangOpts();
+
+  const CharSourceRange SearchRange =
+      rangeFromAnnotations(Code, SM, SM.getMainFileID(), "range");
+  const CharSourceRange MatchedRange = utils::lexer::findTokenTextInRange(
+      SearchRange, SM, LangOpts,
+      [](const Token &Tok) { return isRawIdentifierNamed(Tok, "explicit"); });
+  ASSERT_TRUE(MatchedRange.isValid());
+
+  const StringRef CodeText = Code.code();
+  const size_t ExplicitOffset = CodeText.find("explicit");
+  ASSERT_NE(StringRef::npos, ExplicitOffset);
+  const size_t ConstructorOffset = CodeText.find("S(int)");
+  ASSERT_NE(StringRef::npos, ConstructorOffset);
+  EXPECT_EQ(ExplicitOffset, SM.getFileOffset(MatchedRange.getBegin()));
+  EXPECT_EQ(ConstructorOffset, SM.getFileOffset(MatchedRange.getEnd()));
+}
+
+TEST(LexerUtilsTest, FindTokenTextInRangeReturnsInvalidWhenNotFound) {
+  llvm::Annotations Code(R"cpp(
+struct S {
+  $range[[int x = 0;]]
+  S(int);
+};
+)cpp");
+  std::unique_ptr<ASTUnit> AST = buildAST(Code.code());
+  ASSERT_TRUE(AST);
+  const ASTContext &Context = AST->getASTContext();
+  const SourceManager &SM = Context.getSourceManager();
+  const LangOptions &LangOpts = Context.getLangOpts();
+
+  const CharSourceRange SearchRange =
+      rangeFromAnnotations(Code, SM, SM.getMainFileID(), "range");
+  const CharSourceRange MatchedRange = utils::lexer::findTokenTextInRange(
+      SearchRange, SM, LangOpts,
+      [](const Token &Tok) { return isRawIdentifierNamed(Tok, "explicit"); });
+  EXPECT_TRUE(MatchedRange.isInvalid());
+}
+
+TEST(LexerUtilsTest, FindTokenTextInRangeDoesNotMatchTokenAtEndBoundary) {
+  llvm::Annotations Code(R"cpp(
+struct S {
+  $range[[int x = 0; ]]explicit S(int);
+};
+)cpp");
+  std::unique_ptr<ASTUnit> AST = buildAST(Code.code());
+  ASSERT_TRUE(AST);
+  const ASTContext &Context = AST->getASTContext();
+  const SourceManager &SM = Context.getSourceManager();
+  const LangOptions &LangOpts = Context.getLangOpts();
+
+  const CharSourceRange SearchRange =
+      rangeFromAnnotations(Code, SM, SM.getMainFileID(), "range");
+  const CharSourceRange MatchedRange = utils::lexer::findTokenTextInRange(
+      SearchRange, SM, LangOpts,
+      [](const Token &Tok) { return isRawIdentifierNamed(Tok, "explicit"); });
+  EXPECT_TRUE(MatchedRange.isInvalid());
+}
+
+TEST(LexerUtilsTest,
+     FindTokenTextInRangeReturnsInvalidWhenPredicateNeverMatches) {
+  llvm::Annotations Code(R"cpp(
+struct S {
+  $range[[explicit ]] S(int);
+};
+)cpp");
+  std::unique_ptr<ASTUnit> AST = buildAST(Code.code());
+  ASSERT_TRUE(AST);
+  const ASTContext &Context = AST->getASTContext();
+  const SourceManager &SM = Context.getSourceManager();
+  const LangOptions &LangOpts = Context.getLangOpts();
+
+  const CharSourceRange SearchRange =
+      rangeFromAnnotations(Code, SM, SM.getMainFileID(), "range");
+  const CharSourceRange MatchedRange = utils::lexer::findTokenTextInRange(
+      SearchRange, SM, LangOpts, [](const Token &) { return false; });
+  EXPECT_TRUE(MatchedRange.isInvalid());
+}
+
+TEST(LexerUtilsTest, FindTokenTextInRangeReturnsInvalidForInvalidRange) {
+  std::unique_ptr<ASTUnit> AST = buildAST("struct S { explicit S(int); };");
+  ASSERT_TRUE(AST);
+  const ASTContext &Context = AST->getASTContext();
+  const SourceManager &SM = Context.getSourceManager();
+  const LangOptions &LangOpts = Context.getLangOpts();
+
+  const CharSourceRange MatchedRange = utils::lexer::findTokenTextInRange(
+      CharSourceRange(), SM, LangOpts, [](const Token &) { return true; });
+  EXPECT_TRUE(MatchedRange.isInvalid());
+}
+
+TEST(LexerUtilsTest, FindTokenTextInRangeReturnsInvalidForReversedOffsets) {
+  llvm::Annotations Code(R"cpp(
+struct S {
+  $a^explicit S(int);$b^
+};
+)cpp");
+  std::unique_ptr<ASTUnit> AST = buildAST(Code.code());
+  ASSERT_TRUE(AST);
+  const ASTContext &Context = AST->getASTContext();
+  const SourceManager &SM = Context.getSourceManager();
+  const LangOptions &LangOpts = Context.getLangOpts();
+
+  const SourceLocation MainFileStart =
+      SM.getLocForStartOfFile(SM.getMainFileID());
+  const SourceLocation Begin = MainFileStart.getLocWithOffset(Code.point("b"));
+  const SourceLocation End = MainFileStart.getLocWithOffset(Code.point("a"));
+  ASSERT_TRUE(SM.isBeforeInTranslationUnit(End, Begin));
+
+  const CharSourceRange ReversedRange =
+      CharSourceRange::getCharRange(Begin, End);
+  const CharSourceRange MatchedRange = utils::lexer::findTokenTextInRange(
+      ReversedRange, SM, LangOpts,
+      [](const Token &Tok) { return isRawIdentifierNamed(Tok, "explicit"); });
+  EXPECT_TRUE(MatchedRange.isInvalid());
+}
+
+TEST(LexerUtilsTest, FindTokenTextInRangeReturnsInvalidWhenFileRangeIsInvalid) {
+  llvm::Annotations Code(R"cpp(
+#include "header.h"
+int $begin^main_var = 0;
+)cpp");
+  const FileContentMappings Mappings = {
+      {"header.h", "int header_var = 0;\n"},
+  };
+  std::unique_ptr<ASTUnit> AST = buildAST(Code.code(), Mappings);
+  ASSERT_TRUE(AST);
+  const ASTContext &Context = AST->getASTContext();
+  const SourceManager &SM = Context.getSourceManager();
+  const LangOptions &LangOpts = Context.getLangOpts();
+
+  const SourceLocation MainFileStart =
+      SM.getLocForStartOfFile(SM.getMainFileID());
+  const SourceLocation Begin =
+      MainFileStart.getLocWithOffset(Code.point("begin"));
+  ASSERT_TRUE(Begin.isFileID());
+
+  auto HeaderFile = AST->getFileManager().getOptionalFileRef("header.h");
+  ASSERT_TRUE(HeaderFile.has_value());
+  const FileID HeaderFID = SM.translateFile(*HeaderFile);
+  ASSERT_TRUE(HeaderFID.isValid());
+  const SourceLocation HeaderBegin = SM.getLocForStartOfFile(HeaderFID);
+  ASSERT_TRUE(HeaderBegin.isFileID());
+
+  const CharSourceRange SearchRange =
+      CharSourceRange::getCharRange(Begin, HeaderBegin);
+  const CharSourceRange FileRange =
+      Lexer::makeFileCharRange(SearchRange, SM, LangOpts);
+  EXPECT_TRUE(FileRange.isInvalid());
+
+  const CharSourceRange MatchedRange = utils::lexer::findTokenTextInRange(
+      SearchRange, SM, LangOpts, [](const Token &) { return true; });
+  EXPECT_TRUE(MatchedRange.isInvalid());
+}
+
+TEST(LexerUtilsTest, FindTokenTextInRangeReturnsInvalidForMacroRange) {
+  std::unique_ptr<ASTUnit> AST = buildAST(R"cpp(
+#define EXPLICIT explicit
+struct S {
+  EXPLICIT S(int);
+};
+)cpp");
+  ASSERT_TRUE(AST);
+  const ASTContext &Context = AST->getASTContext();
+  const SourceManager &SM = Context.getSourceManager();
+  const LangOptions &LangOpts = Context.getLangOpts();
+
+  const CXXConstructorDecl *Ctor = [&Context] {
+    for (const Decl *D : Context.getTranslationUnitDecl()->decls()) {
+      const auto *RD = dyn_cast<CXXRecordDecl>(D);
+      if (!RD)
+        continue;
+      for (const CXXConstructorDecl *Ctor : RD->ctors())
+        if (!Ctor->isImplicit())
+          return Ctor;
+    }
+    return static_cast<const CXXConstructorDecl *>(nullptr);
+  }();
+  ASSERT_NE(nullptr, Ctor);
+  ASSERT_TRUE(Ctor->getOuterLocStart().isMacroID());
+
+  const CharSourceRange SearchRange = CharSourceRange::getTokenRange(
+      Ctor->getOuterLocStart(), Ctor->getEndLoc());
+  const CharSourceRange MatchedRange = utils::lexer::findTokenTextInRange(
+      SearchRange, SM, LangOpts,
+      [](const Token &Tok) { return isRawIdentifierNamed(Tok, "explicit"); });
+  EXPECT_TRUE(MatchedRange.isInvalid());
+}
+
 TEST(LexerUtilsTest, GetTrailingCommentsInRangeAdjacentComments) {
  llvm::Annotations Code(R"cpp(
 void f() {