Richard 88a7508b1f [clang-tidy] Deal with keyword tokens in preprocessor conditions
When a "keyword" token like __restrict was present in a macro condition,
modernize-macro-to-enum would assert in non-release builds.  However,
even for a "keyword" token, calling getIdentifierInfo()->getName() would
retrieve the text of the token, which is what we want.  Our intention is
to scan names that appear in conditional expressions in potential enum
clusters and invalidate those clusters if they contain the name.

Also, guard against "raw identifiers" appearing as potential enums.
This shouldn't happen, but it doesn't hurt to generalize the code.

Differential Revision: https://reviews.llvm.org/D123349

Fixes #54775
2022-04-08 16:06:06 -06:00

486 lines
16 KiB
C++

//===--- MacroToEnumCheck.cpp - clang-tidy --------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "MacroToEnumCheck.h"
#include "clang/AST/ASTContext.h"
#include "clang/ASTMatchers/ASTMatchFinder.h"
#include "clang/Lex/Preprocessor.h"
#include "llvm/ADT/STLExtras.h"
#include <algorithm>
#include <cassert>
#include <cctype>
#include <string>
namespace clang {
namespace tidy {
namespace modernize {
static bool hasOnlyComments(SourceLocation Loc, const LangOptions &Options,
StringRef Text) {
// Use a lexer to look for tokens; if we find something other than a single
// hash, then there were intervening tokens between macro definitions.
std::string Buffer{Text};
Lexer Lex(Loc, Options, Buffer.c_str(), Buffer.c_str(),
Buffer.c_str() + Buffer.size());
Token Tok;
bool SeenHash = false;
while (!Lex.LexFromRawLexer(Tok)) {
if (Tok.getKind() == tok::hash && !SeenHash) {
SeenHash = true;
continue;
}
return false;
}
// Everything in between was whitespace, so now just look for two blank lines,
// consisting of two consecutive EOL sequences, either '\n', '\r' or '\r\n'.
enum class WhiteSpace {
Nothing,
CR,
LF,
CRLF,
CRLFCR,
};
WhiteSpace State = WhiteSpace::Nothing;
for (char C : Text) {
switch (C) {
case '\r':
if (State == WhiteSpace::CR)
return false;
State = State == WhiteSpace::CRLF ? WhiteSpace::CRLFCR : WhiteSpace::CR;
break;
case '\n':
if (State == WhiteSpace::LF || State == WhiteSpace::CRLFCR)
return false;
State = State == WhiteSpace::CR ? WhiteSpace::CRLF : WhiteSpace::LF;
break;
default:
State = WhiteSpace::Nothing;
break;
}
}
return true;
}
// Validate that this literal token is a valid integer literal. A literal token
// could be a floating-point token, which isn't acceptable as a value for an
// enumeration. A floating-point token must either have a decimal point or an
// exponent ('E' or 'P').
static bool isIntegralConstant(const Token &Token) {
const char *Begin = Token.getLiteralData();
const char *End = Begin + Token.getLength();
// not a hexadecimal floating-point literal
if (Token.getLength() > 2 && Begin[0] == '0' && std::toupper(Begin[1]) == 'X')
return std::none_of(Begin + 2, End, [](char C) {
return C == '.' || std::toupper(C) == 'P';
});
// not a decimal floating-point literal
return std::none_of(
Begin, End, [](char C) { return C == '.' || std::toupper(C) == 'E'; });
}
static StringRef getTokenName(const Token &Tok) {
return Tok.is(tok::raw_identifier) ? Tok.getRawIdentifier()
: Tok.getIdentifierInfo()->getName();
}
namespace {
struct EnumMacro {
EnumMacro(Token Name, const MacroDirective *Directive)
: Name(Name), Directive(Directive) {}
Token Name;
const MacroDirective *Directive;
};
using MacroList = SmallVector<EnumMacro>;
enum class IncludeGuard { None, FileChanged, IfGuard, DefineGuard };
struct FileState {
FileState()
: ConditionScopes(0), LastLine(0), GuardScanner(IncludeGuard::None) {}
int ConditionScopes;
unsigned int LastLine;
IncludeGuard GuardScanner;
SourceLocation LastMacroLocation;
};
class MacroToEnumCallbacks : public PPCallbacks {
public:
MacroToEnumCallbacks(MacroToEnumCheck *Check, const LangOptions &LangOptions,
const SourceManager &SM)
: Check(Check), LangOpts(LangOptions), SM(SM) {}
void FileChanged(SourceLocation Loc, FileChangeReason Reason,
SrcMgr::CharacteristicKind FileType,
FileID PrevFID) override;
void InclusionDirective(SourceLocation HashLoc, const Token &IncludeTok,
StringRef FileName, bool IsAngled,
CharSourceRange FilenameRange, const FileEntry *File,
StringRef SearchPath, StringRef RelativePath,
const Module *Imported,
SrcMgr::CharacteristicKind FileType) override {
clearCurrentEnum(HashLoc);
}
// Keep track of macro definitions that look like enums.
void MacroDefined(const Token &MacroNameTok,
const MacroDirective *MD) override;
// Undefining an enum-like macro results in the enum set being dropped.
void MacroUndefined(const Token &MacroNameTok, const MacroDefinition &MD,
const MacroDirective *Undef) override;
// Conditional compilation clears any adjacent enum-like macros.
// Macros used in conditional expressions clear any adjacent enum-like
// macros.
// Include guards are either
// #if !defined(GUARD)
// or
// #ifndef GUARD
void If(SourceLocation Loc, SourceRange ConditionRange,
ConditionValueKind ConditionValue) override {
conditionStart(Loc);
checkCondition(ConditionRange);
}
void Ifndef(SourceLocation Loc, const Token &MacroNameTok,
const MacroDefinition &MD) override {
conditionStart(Loc);
checkName(MacroNameTok);
}
void Ifdef(SourceLocation Loc, const Token &MacroNameTok,
const MacroDefinition &MD) override {
conditionStart(Loc);
checkName(MacroNameTok);
}
void Elif(SourceLocation Loc, SourceRange ConditionRange,
ConditionValueKind ConditionValue, SourceLocation IfLoc) override {
checkCondition(ConditionRange);
}
void Elifdef(SourceLocation Loc, const Token &MacroNameTok,
const MacroDefinition &MD) override {
checkName(MacroNameTok);
}
void Elifdef(SourceLocation Loc, SourceRange ConditionRange,
SourceLocation IfLoc) override {
PPCallbacks::Elifdef(Loc, ConditionRange, IfLoc);
}
void Elifndef(SourceLocation Loc, const Token &MacroNameTok,
const MacroDefinition &MD) override {
checkName(MacroNameTok);
}
void Elifndef(SourceLocation Loc, SourceRange ConditionRange,
SourceLocation IfLoc) override {
PPCallbacks::Elifndef(Loc, ConditionRange, IfLoc);
}
void Endif(SourceLocation Loc, SourceLocation IfLoc) override;
void PragmaDirective(SourceLocation Loc,
PragmaIntroducerKind Introducer) override;
// After we've seen everything, issue warnings and fix-its.
void EndOfMainFile() override;
private:
void newEnum() {
if (Enums.empty() || !Enums.back().empty())
Enums.emplace_back();
}
bool insideConditional() const {
return (CurrentFile->GuardScanner == IncludeGuard::DefineGuard &&
CurrentFile->ConditionScopes > 1) ||
(CurrentFile->GuardScanner != IncludeGuard::DefineGuard &&
CurrentFile->ConditionScopes > 0);
}
bool isConsecutiveMacro(const MacroDirective *MD) const;
void rememberLastMacroLocation(const MacroDirective *MD) {
CurrentFile->LastLine = SM.getSpellingLineNumber(MD->getLocation());
CurrentFile->LastMacroLocation = Lexer::getLocForEndOfToken(
MD->getMacroInfo()->getDefinitionEndLoc(), 0, SM, LangOpts);
}
void clearLastMacroLocation() {
CurrentFile->LastLine = 0;
CurrentFile->LastMacroLocation = SourceLocation{};
}
void clearCurrentEnum(SourceLocation Loc);
void conditionStart(const SourceLocation &Loc);
void checkCondition(SourceRange ConditionRange);
void checkName(const Token &MacroNameTok);
void warnMacroEnum(const EnumMacro &Macro) const;
void fixEnumMacro(const MacroList &MacroList) const;
MacroToEnumCheck *Check;
const LangOptions &LangOpts;
const SourceManager &SM;
SmallVector<MacroList> Enums;
SmallVector<FileState> Files;
FileState *CurrentFile = nullptr;
};
bool MacroToEnumCallbacks::isConsecutiveMacro(const MacroDirective *MD) const {
if (CurrentFile->LastMacroLocation.isInvalid())
return false;
SourceLocation Loc = MD->getLocation();
if (CurrentFile->LastLine + 1 == SM.getSpellingLineNumber(Loc))
return true;
SourceLocation Define =
SM.translateLineCol(SM.getFileID(Loc), SM.getSpellingLineNumber(Loc), 1);
CharSourceRange BetweenMacros{
SourceRange{CurrentFile->LastMacroLocation, Define}, true};
CharSourceRange CharRange =
Lexer::makeFileCharRange(BetweenMacros, SM, LangOpts);
StringRef BetweenText = Lexer::getSourceText(CharRange, SM, LangOpts);
return hasOnlyComments(Define, LangOpts, BetweenText);
}
void MacroToEnumCallbacks::clearCurrentEnum(SourceLocation Loc) {
// Only drop the most recent Enum set if the directive immediately follows.
if (!Enums.empty() && !Enums.back().empty() &&
SM.getSpellingLineNumber(Loc) == CurrentFile->LastLine + 1)
Enums.pop_back();
clearLastMacroLocation();
}
void MacroToEnumCallbacks::conditionStart(const SourceLocation &Loc) {
++CurrentFile->ConditionScopes;
clearCurrentEnum(Loc);
if (CurrentFile->GuardScanner == IncludeGuard::FileChanged)
CurrentFile->GuardScanner = IncludeGuard::IfGuard;
}
void MacroToEnumCallbacks::checkCondition(SourceRange Range) {
CharSourceRange CharRange = Lexer::makeFileCharRange(
CharSourceRange::getTokenRange(Range), SM, LangOpts);
std::string Text = Lexer::getSourceText(CharRange, SM, LangOpts).str();
Lexer Lex(CharRange.getBegin(), LangOpts, Text.data(), Text.data(),
Text.data() + Text.size());
Token Tok;
bool End = false;
while (!End) {
End = Lex.LexFromRawLexer(Tok);
if (Tok.is(tok::raw_identifier) &&
Tok.getRawIdentifier().str() != "defined")
checkName(Tok);
}
}
void MacroToEnumCallbacks::checkName(const Token &MacroNameTok) {
StringRef Id = getTokenName(MacroNameTok);
llvm::erase_if(Enums, [&Id](const MacroList &MacroList) {
return llvm::any_of(MacroList, [&Id](const EnumMacro &Macro) {
return getTokenName(Macro.Name) == Id;
});
});
}
void MacroToEnumCallbacks::FileChanged(SourceLocation Loc,
FileChangeReason Reason,
SrcMgr::CharacteristicKind FileType,
FileID PrevFID) {
newEnum();
if (Reason == EnterFile) {
Files.emplace_back();
if (!SM.isInMainFile(Loc))
Files.back().GuardScanner = IncludeGuard::FileChanged;
} else if (Reason == ExitFile) {
assert(CurrentFile->ConditionScopes == 0);
Files.pop_back();
}
CurrentFile = &Files.back();
}
void MacroToEnumCallbacks::MacroDefined(const Token &MacroNameTok,
const MacroDirective *MD) {
// Include guards are never candidates for becoming an enum.
if (CurrentFile->GuardScanner == IncludeGuard::IfGuard) {
CurrentFile->GuardScanner = IncludeGuard::DefineGuard;
return;
}
if (insideConditional())
return;
if (SM.getFilename(MD->getLocation()).empty())
return;
const MacroInfo *Info = MD->getMacroInfo();
if (Info->isFunctionLike() || Info->isBuiltinMacro() ||
Info->tokens().empty() || Info->tokens().size() > 2)
return;
// It can be +Lit, -Lit or just Lit.
Token Tok = Info->tokens().front();
if (Info->tokens().size() == 2) {
if (!Tok.isOneOf(tok::TokenKind::minus, tok::TokenKind::plus,
tok::TokenKind::tilde))
return;
Tok = Info->tokens().back();
}
if (!Tok.isLiteral() || isStringLiteral(Tok.getKind()) ||
!isIntegralConstant(Tok))
return;
if (!isConsecutiveMacro(MD))
newEnum();
Enums.back().emplace_back(MacroNameTok, MD);
rememberLastMacroLocation(MD);
}
// Any macro that is undefined removes all adjacent macros from consideration as
// an enum and starts a new enum scan.
void MacroToEnumCallbacks::MacroUndefined(const Token &MacroNameTok,
const MacroDefinition &MD,
const MacroDirective *Undef) {
auto MatchesToken = [&MacroNameTok](const EnumMacro &Macro) {
return getTokenName(Macro.Name) == getTokenName(MacroNameTok);
};
auto It = llvm::find_if(Enums, [MatchesToken](const MacroList &MacroList) {
return llvm::any_of(MacroList, MatchesToken);
});
if (It != Enums.end())
Enums.erase(It);
clearLastMacroLocation();
CurrentFile->GuardScanner = IncludeGuard::None;
}
void MacroToEnumCallbacks::Endif(SourceLocation Loc, SourceLocation IfLoc) {
// The if directive for the include guard isn't counted in the
// ConditionScopes.
if (CurrentFile->ConditionScopes == 0 &&
CurrentFile->GuardScanner == IncludeGuard::DefineGuard)
return;
// We don't need to clear the current enum because the start of the
// conditional block already took care of that.
assert(CurrentFile->ConditionScopes > 0);
--CurrentFile->ConditionScopes;
}
namespace {
template <size_t N>
bool textEquals(const char (&Needle)[N], const char *HayStack) {
return StringRef{HayStack, N - 1} == Needle;
}
template <size_t N> size_t len(const char (&)[N]) { return N - 1; }
} // namespace
void MacroToEnumCallbacks::PragmaDirective(SourceLocation Loc,
PragmaIntroducerKind Introducer) {
if (CurrentFile->GuardScanner != IncludeGuard::FileChanged)
return;
bool Invalid = false;
const char *Text = SM.getCharacterData(
Lexer::getLocForEndOfToken(Loc, 0, SM, LangOpts), &Invalid);
if (Invalid)
return;
while (*Text && std::isspace(*Text))
++Text;
if (textEquals("pragma", Text))
return;
Text += len("pragma");
while (*Text && std::isspace(*Text))
++Text;
if (textEquals("once", Text))
CurrentFile->GuardScanner = IncludeGuard::IfGuard;
}
void MacroToEnumCallbacks::EndOfMainFile() {
for (const MacroList &MacroList : Enums) {
if (MacroList.empty())
continue;
for (const EnumMacro &Macro : MacroList)
warnMacroEnum(Macro);
fixEnumMacro(MacroList);
}
}
void MacroToEnumCallbacks::warnMacroEnum(const EnumMacro &Macro) const {
Check->diag(Macro.Directive->getLocation(),
"macro '%0' defines an integral constant; prefer an enum instead")
<< getTokenName(Macro.Name);
}
void MacroToEnumCallbacks::fixEnumMacro(const MacroList &MacroList) const {
SourceLocation Begin =
MacroList.front().Directive->getMacroInfo()->getDefinitionLoc();
Begin = SM.translateLineCol(SM.getFileID(Begin),
SM.getSpellingLineNumber(Begin), 1);
DiagnosticBuilder Diagnostic =
Check->diag(Begin, "replace macro with enum")
<< FixItHint::CreateInsertion(Begin, "enum {\n");
for (size_t I = 0u; I < MacroList.size(); ++I) {
const EnumMacro &Macro = MacroList[I];
SourceLocation DefineEnd =
Macro.Directive->getMacroInfo()->getDefinitionLoc();
SourceLocation DefineBegin = SM.translateLineCol(
SM.getFileID(DefineEnd), SM.getSpellingLineNumber(DefineEnd), 1);
CharSourceRange DefineRange;
DefineRange.setBegin(DefineBegin);
DefineRange.setEnd(DefineEnd);
Diagnostic << FixItHint::CreateRemoval(DefineRange);
SourceLocation NameEnd = Lexer::getLocForEndOfToken(
Macro.Directive->getMacroInfo()->getDefinitionLoc(), 0, SM, LangOpts);
Diagnostic << FixItHint::CreateInsertion(NameEnd, " =");
SourceLocation ValueEnd = Lexer::getLocForEndOfToken(
Macro.Directive->getMacroInfo()->getDefinitionEndLoc(), 0, SM,
LangOpts);
if (I < MacroList.size() - 1)
Diagnostic << FixItHint::CreateInsertion(ValueEnd, ",");
}
SourceLocation End = Lexer::getLocForEndOfToken(
MacroList.back().Directive->getMacroInfo()->getDefinitionEndLoc(), 0, SM,
LangOpts);
End = SM.translateLineCol(SM.getFileID(End),
SM.getSpellingLineNumber(End) + 1, 1);
Diagnostic << FixItHint::CreateInsertion(End, "};\n");
}
} // namespace
void MacroToEnumCheck::registerPPCallbacks(const SourceManager &SM,
Preprocessor *PP,
Preprocessor *ModuleExpanderPP) {
PP->addPPCallbacks(
std::make_unique<MacroToEnumCallbacks>(this, getLangOpts(), SM));
}
} // namespace modernize
} // namespace tidy
} // namespace clang