[clang] Make -dump-tokens option align tokens (#164894)
When using `-Xclang -dump-tokens`, the lexer dump output is currently difficult to read because the data are misaligned. The existing implementation simply separates the token name, spelling, flags, and location using `'\t'`, which results in inconsistent spacing. For example, the current output looks like this on provided in this patch example **(BEFORE THIS PR)**: <img width="2936" height="632" alt="image" src="https://github.com/user-attachments/assets/ad893958-6d57-4a76-8838-7fc56e37e6a7" /> # Changes This small PR improves the readability of the token dump by: + Adding padding after the token name and after the spelling (the padding amount was chosen empirically to produce good average alignment). + Swapping the order of location and flags (since flags can take up a lot of space and disrupt alignment). The result is a more readable output **(AFTER THIS PR)**: <img width="1470" height="315" alt="image" src="https://github.com/user-attachments/assets/c24f24e5-a431-42cc-b5b6-232bac5c635e" />
This commit is contained in:
parent
a44c15874d
commit
b9924c76da
@ -61,6 +61,7 @@
|
||||
#include "llvm/ADT/StringRef.h"
|
||||
#include "llvm/Support/Capacity.h"
|
||||
#include "llvm/Support/ErrorHandling.h"
|
||||
#include "llvm/Support/FormatVariadic.h"
|
||||
#include "llvm/Support/MemoryBuffer.h"
|
||||
#include "llvm/Support/MemoryBufferRef.h"
|
||||
#include "llvm/Support/SaveAndRestore.h"
|
||||
@ -240,14 +241,59 @@ void Preprocessor::FinalizeForModelFile() {
|
||||
}
|
||||
|
||||
void Preprocessor::DumpToken(const Token &Tok, bool DumpFlags) const {
|
||||
llvm::errs() << tok::getTokenName(Tok.getKind());
|
||||
std::string TokenStr;
|
||||
llvm::raw_string_ostream OS(TokenStr);
|
||||
|
||||
if (!Tok.isAnnotation())
|
||||
llvm::errs() << " '" << getSpelling(Tok) << "'";
|
||||
// The alignment of 16 is chosen to comfortably fit most identifiers.
|
||||
OS << llvm::formatv("{0,-16} ", tok::getTokenName(Tok.getKind()));
|
||||
|
||||
// Annotation tokens are just markers that don't have a spelling -- they
|
||||
// indicate where something expanded.
|
||||
if (!Tok.isAnnotation()) {
|
||||
OS << "'";
|
||||
// Escape string to prevent token spelling from spanning multiple lines.
|
||||
OS.write_escaped(getSpelling(Tok));
|
||||
OS << "'";
|
||||
}
|
||||
|
||||
// The alignment of 48 (32 characters for the spelling + the 16 for
|
||||
// the identifier name) fits most variable names, keywords and annotations.
|
||||
llvm::errs() << llvm::formatv("{0,-48} ", OS.str());
|
||||
|
||||
if (!DumpFlags) return;
|
||||
|
||||
llvm::errs() << "\t";
|
||||
auto Loc = Tok.getLocation();
|
||||
llvm::errs() << "Loc=<";
|
||||
DumpLocation(Loc);
|
||||
llvm::errs() << ">";
|
||||
|
||||
// If the token points directly to a file location (i.e. not a macro
|
||||
// expansion), then add additional padding so that trailing markers
|
||||
// align, provided the line/column numbers are reasonably sized.
|
||||
//
|
||||
// Otherwise, if it's a macro expansion, don't bother with alignment,
|
||||
// as the line will include multiple locations and be very long.
|
||||
//
|
||||
// NOTE: To keep this stateless, it doesn't account for filename
|
||||
// length, so when a header starts markers will be temporarily misaligned.
|
||||
if (Loc.isFileID()) {
|
||||
PresumedLoc PLoc = SourceMgr.getPresumedLoc(Loc);
|
||||
|
||||
if (!PLoc.isInvalid()) {
|
||||
int LineWidth = llvm::utostr(PLoc.getLine()).size();
|
||||
int ColumnWidth = llvm::utostr(PLoc.getColumn()).size();
|
||||
|
||||
// Reserve space for lines up to 9999 and columns up to 99,
|
||||
// which is 4 + 2 = 6 characters in total.
|
||||
const int ReservedSpace = 6;
|
||||
|
||||
int LeftSpace = ReservedSpace - LineWidth - ColumnWidth;
|
||||
int Padding = std::max<int>(0, LeftSpace);
|
||||
|
||||
llvm::errs().indent(Padding);
|
||||
}
|
||||
}
|
||||
|
||||
if (Tok.isAtStartOfLine())
|
||||
llvm::errs() << " [StartOfLine]";
|
||||
if (Tok.hasLeadingSpace())
|
||||
@ -256,13 +302,8 @@ void Preprocessor::DumpToken(const Token &Tok, bool DumpFlags) const {
|
||||
llvm::errs() << " [ExpandDisabled]";
|
||||
if (Tok.needsCleaning()) {
|
||||
const char *Start = SourceMgr.getCharacterData(Tok.getLocation());
|
||||
llvm::errs() << " [UnClean='" << StringRef(Start, Tok.getLength())
|
||||
<< "']";
|
||||
llvm::errs() << " [UnClean='" << StringRef(Start, Tok.getLength()) << "']";
|
||||
}
|
||||
|
||||
llvm::errs() << "\tLoc=<";
|
||||
DumpLocation(Tok.getLocation());
|
||||
llvm::errs() << ">";
|
||||
}
|
||||
|
||||
void Preprocessor::DumpLocation(SourceLocation Loc) const {
|
||||
|
||||
37
clang/test/Preprocessor/dump-tokens.cpp
Normal file
37
clang/test/Preprocessor/dump-tokens.cpp
Normal file
@ -0,0 +1,37 @@
|
||||
// RUN: %clang_cc1 -dump-tokens %s 2>&1 | FileCheck %s --strict-whitespace
|
||||
|
||||
// To make location reporting in the test more robust, provide line number and file name explicitly.
|
||||
#line 2 "dump-tokens.cpp"
|
||||
|
||||
// Different kinds of identifiers with different spelling lengths.
|
||||
-> // CHECK: arrow '->' Loc=<{{.*}}:4:1> [StartOfLine]
|
||||
5 // CHECK-NEXT: numeric_constant '5' Loc=<{{.*}}:5:1> [StartOfLine]
|
||||
id // CHECK-NEXT: identifier 'id' Loc=<{{.*}}:6:1> [StartOfLine]
|
||||
& // CHECK-NEXT: amp '&' Loc=<{{.*}}:7:1> [StartOfLine]
|
||||
) // CHECK-NEXT: r_paren ')' Loc=<{{.*}}:8:1> [StartOfLine]
|
||||
unsigned // CHECK-NEXT: unsigned 'unsigned' Loc=<{{.*}}:9:1> [StartOfLine]
|
||||
~ // CHECK-NEXT: tilde '~' Loc=<{{.*}}:10:1> [StartOfLine]
|
||||
long_variable_name_very_long // CHECK-NEXT: identifier 'long_variable_name_very_long' Loc=<{{.*}}:11:1> [StartOfLine]
|
||||
union // CHECK-NEXT: union 'union' Loc=<{{.*}}:12:1> [StartOfLine]
|
||||
42 // CHECK-NEXT: numeric_constant '42' Loc=<{{.*}}:13:1> [StartOfLine]
|
||||
j // CHECK-NEXT: identifier 'j' Loc=<{{.*}}:14:1> [StartOfLine]
|
||||
&= // CHECK-NEXT: ampequal '&=' Loc=<{{.*}}:15:1> [StartOfLine]
|
||||
15 // CHECK-NEXT: numeric_constant '15' Loc=<{{.*}}:16:1> [StartOfLine]
|
||||
|
||||
// Different locations in line and trailing markers.
|
||||
at different locations= in line // CHECK-NEXT: identifier 'at' Loc=<{{.*}}:19:2> [StartOfLine] [LeadingSpace]
|
||||
// CHECK-NEXT: identifier 'different' Loc=<{{.*}}:19:5> [LeadingSpace]
|
||||
// CHECK-NEXT: identifier 'locations' Loc=<{{.*}}:19:15> [LeadingSpace]
|
||||
// CHECK-NEXT: equal '=' Loc=<{{.*}}:19:24>
|
||||
// CHECK-NEXT: identifier 'in' Loc=<{{.*}}:19:26> [LeadingSpace]
|
||||
// CHECK-NEXT: identifier 'line' Loc=<{{.*}}:19:29> [LeadingSpace]
|
||||
|
||||
// Tokens that require escaping & annotations.
|
||||
#pragma clang __debug parser_crash // CHECK-NEXT: annot_pragma_parser_crash Loc=<{{.*}}:27:23>
|
||||
// CHECK-NEXT: eod '\n' Loc=<{{.*}}:27:119> [LeadingSpace]
|
||||
#pragma clang __debug captured // CHECK-NEXT: annot_pragma_captured Loc=<{{.*}}:29:120>
|
||||
#pragma clang __debug dump X // CHECK-NEXT: annot_pragma_dump Loc=<{{.*}}:30:23>
|
||||
// CHECK-NEXT: identifier 'X' Loc=<{{.*}}:30:28> [LeadingSpace]
|
||||
// CHECK-NEXT: eod '\n' Loc=<{{.*}}:30:119> [LeadingSpace]
|
||||
// CHECK-NEXT: eof '' Loc=<{{.*}}:34:1>
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user