[clang-reorder-fields] Reorder leading comments (#123740)

Similarly to https://github.com/llvm/llvm-project/pull/122918, leading comments are currently not being moved. ``` struct Foo { // This one is the cool field. int a; int b; }; ``` becomes: ``` struct Foo { // This one is the cool field. int b; int a; }; ``` but should be: ``` struct Foo { int b; // This one is the cool field. int a; }; ```
2025-01-22 13:42:00 +01:00 · 2025-01-22 13:42:00 +01:00 · fbd86d05fe
commit fbd86d05fe
parent c6c647588f
6 changed files with 101 additions and 21 deletions
--- a/clang-tools-extra/clang-reorder-fields/ReorderFieldsAction.cpp
+++ b/clang-tools-extra/clang-reorder-fields/ReorderFieldsAction.cpp
@ -118,6 +118,29 @@ findMembersUsedInInitExpr(const CXXCtorInitializer *Initializer,
  return Results;
 }

+/// Returns the start of the leading comments before `Loc`.
+static SourceLocation getStartOfLeadingComment(SourceLocation Loc,
+                                               const SourceManager &SM,
+                                               const LangOptions &LangOpts) {
+  // We consider any leading comment token that is on the same line or
+  // indented similarly to the first comment to be part of the leading comment.
+  const unsigned Line = SM.getPresumedLineNumber(Loc);
+  const unsigned Column = SM.getPresumedColumnNumber(Loc);
+  std::optional<Token> Tok =
+      Lexer::findPreviousToken(Loc, SM, LangOpts, /*IncludeComments=*/true);
+  while (Tok && Tok->is(tok::comment)) {
+    const SourceLocation CommentLoc =
+        Lexer::GetBeginningOfToken(Tok->getLocation(), SM, LangOpts);
+    if (SM.getPresumedLineNumber(CommentLoc) != Line &&
+        SM.getPresumedColumnNumber(CommentLoc) != Column) {
+      break;
+    }
+    Loc = CommentLoc;
+    Tok = Lexer::findPreviousToken(Loc, SM, LangOpts, /*IncludeComments=*/true);
+  }
+  return Loc;
+}
+
 /// Returns the end of the trailing comments after `Loc`.
 static SourceLocation getEndOfTrailingComment(SourceLocation Loc,
                                              const SourceManager &SM,
@ -159,6 +182,7 @@ static SourceRange getFullFieldSourceRange(const FieldDecl &Field,
    if (CurrentToken->is(tok::semi))
      break;
  }
+  Begin = getStartOfLeadingComment(Begin, SM, LangOpts);
  End = getEndOfTrailingComment(End, SM, LangOpts);
  return SourceRange(Begin, End);
 }
--- a/clang-tools-extra/clang-tidy/utils/LexerUtils.cpp
+++ b/clang-tools-extra/clang-tidy/utils/LexerUtils.cpp
@ -17,25 +17,16 @@ namespace clang::tidy::utils::lexer {
 std::pair<Token, SourceLocation>
 getPreviousTokenAndStart(SourceLocation Location, const SourceManager &SM,
                         const LangOptions &LangOpts, bool SkipComments) {
+  const std::optional<Token> Tok =
+      Lexer::findPreviousToken(Location, SM, LangOpts, !SkipComments);
+
+  if (Tok.has_value()) {
+    return {*Tok, Lexer::GetBeginningOfToken(Tok->getLocation(), SM, LangOpts)};
+  }
+
  Token Token;
  Token.setKind(tok::unknown);
-
-  Location = Location.getLocWithOffset(-1);
-  if (Location.isInvalid())
-    return {Token, Location};
-
-  const auto StartOfFile = SM.getLocForStartOfFile(SM.getFileID(Location));
-  while (Location != StartOfFile) {
-    Location = Lexer::GetBeginningOfToken(Location, SM, LangOpts);
-    if (!Lexer::getRawToken(Location, Token, SM, LangOpts) &&
-        (!SkipComments || !Token.is(tok::comment))) {
-      break;
-    }
-    if (Location == StartOfFile)
-      return {Token, Location};
-    Location = Location.getLocWithOffset(-1);
-  }
-  return {Token, Location};
+  return {Token, SourceLocation()};
 }

 Token getPreviousToken(SourceLocation Location, const SourceManager &SM,
--- a/clang-tools-extra/test/clang-reorder-fields/Comments.cpp
+++ b/clang-tools-extra/test/clang-reorder-fields/Comments.cpp
@ -1,4 +1,4 @@
-// RUN: clang-reorder-fields -record-name Foo -fields-order e1,e3,e2,a,c,b %s -- | FileCheck %s
+// RUN: clang-reorder-fields -record-name Foo -fields-order c,e1,e3,e2,a,b %s -- | FileCheck %s

 class Foo {
  int a; // Trailing comment for a.
@ -12,12 +12,15 @@ class Foo {
  int e3 /*c-like*/;
 };

-// CHECK:       /*c-like*/ int e1;
+// Note: the position of the empty line is somewhat arbitrary.
+
+// CHECK:       // Prefix comments for c.
+// CHECK-NEXT:  int c;
+// CHECK-NEXT:  /*c-like*/ int e1;
 // CHECK-NEXT:  int e3 /*c-like*/;
+// CHECK-EMPTY:
 // CHECK-NEXT:  int /*c-like*/ e2;
 // CHECK-NEXT:  int a; // Trailing comment for a.
-// CHECK-NEXT:  // Prefix comments for c.
-// CHECK-NEXT:  int c;
 // CHECK-NEXT:  int b; // Multiline
 // CHECK-NEXT:         // trailing for b.

--- a/clang/include/clang/Lex/Lexer.h
+++ b/clang/include/clang/Lex/Lexer.h
@ -557,6 +557,12 @@ public:
                                            const LangOptions &LangOpts,
                                            bool IncludeComments = false);

+  /// Finds the token that comes before the given location.
+  static std::optional<Token> findPreviousToken(SourceLocation Loc,
+                                                const SourceManager &SM,
+                                                const LangOptions &LangOpts,
+                                                bool IncludeComments);
+
  /// Checks that the given token is the first token that occurs after
  /// the given location (this excludes comments and whitespace). Returns the
  /// location immediately after the specified token. If the token is not found
--- a/clang/lib/Lex/Lexer.cpp
+++ b/clang/lib/Lex/Lexer.cpp
@ -1352,6 +1352,27 @@ std::optional<Token> Lexer::findNextToken(SourceLocation Loc,
  return Tok;
 }

+std::optional<Token> Lexer::findPreviousToken(SourceLocation Loc,
+                                              const SourceManager &SM,
+                                              const LangOptions &LangOpts,
+                                              bool IncludeComments) {
+  const auto StartOfFile = SM.getLocForStartOfFile(SM.getFileID(Loc));
+  while (Loc != StartOfFile) {
+    Loc = Loc.getLocWithOffset(-1);
+    if (Loc.isInvalid())
+      return std::nullopt;
+
+    Loc = GetBeginningOfToken(Loc, SM, LangOpts);
+    Token Tok;
+    if (getRawToken(Loc, Tok, SM, LangOpts))
+      continue; // Not a token, go to prev location.
+    if (!Tok.is(tok::comment) || IncludeComments) {
+      return Tok;
+    }
+  }
+  return std::nullopt;
+}
+
 /// Checks that the given token is the first token that occurs after the
 /// given location (this excludes comments and whitespace). Returns the location
 /// immediately after the specified token. If the token is not found or the
--- a/clang/unittests/Lex/LexerTest.cpp
+++ b/clang/unittests/Lex/LexerTest.cpp
@ -640,6 +640,41 @@ TEST_F(LexerTest, FindNextTokenIncludingComments) {
                          "=", "abcd", ";"));
 }

+TEST_F(LexerTest, FindPreviousToken) {
+  Lex("int abcd = 0;\n"
+      "// A comment.\n"
+      "int xyz = abcd;\n");
+  std::vector<std::string> GeneratedByPrevToken;
+  SourceLocation Loc = SourceMgr.getLocForEndOfFile(SourceMgr.getMainFileID());
+  while (true) {
+    auto T = Lexer::findPreviousToken(Loc, SourceMgr, LangOpts, false);
+    if (!T.has_value())
+      break;
+    GeneratedByPrevToken.push_back(getSourceText(*T, *T));
+    Loc = Lexer::GetBeginningOfToken(T->getLocation(), SourceMgr, LangOpts);
+  }
+  EXPECT_THAT(GeneratedByPrevToken, ElementsAre(";", "abcd", "=", "xyz", "int",
+                                                ";", "0", "=", "abcd", "int"));
+}
+
+TEST_F(LexerTest, FindPreviousTokenIncludingComments) {
+  Lex("int abcd = 0;\n"
+      "// A comment.\n"
+      "int xyz = abcd;\n");
+  std::vector<std::string> GeneratedByPrevToken;
+  SourceLocation Loc = SourceMgr.getLocForEndOfFile(SourceMgr.getMainFileID());
+  while (true) {
+    auto T = Lexer::findPreviousToken(Loc, SourceMgr, LangOpts, true);
+    if (!T.has_value())
+      break;
+    GeneratedByPrevToken.push_back(getSourceText(*T, *T));
+    Loc = Lexer::GetBeginningOfToken(T->getLocation(), SourceMgr, LangOpts);
+  }
+  EXPECT_THAT(GeneratedByPrevToken,
+              ElementsAre(";", "abcd", "=", "xyz", "int", "// A comment.", ";",
+                          "0", "=", "abcd", "int"));
+}
+
 TEST_F(LexerTest, CreatedFIDCountForPredefinedBuffer) {
  TrivialModuleLoader ModLoader;
  auto PP = CreatePP("", ModLoader);