[flang] Don't create bogus tokens from token pasting (##) (#147596)

When blank tokens arise from macro replacement in token sequences with
token pasting (##), the preprocessor is producing some bogus tokens
(e.g., "name(") that can lead to subtle bugs later when macro names are
not recognized as such.

The fix is to not paste tokens together when the result would not be a
valid Fortran or C token in the preprocessing context.
This commit is contained in:
Peter Klausler 2025-07-14 11:11:43 -07:00 committed by GitHub
parent e86bd05bdc
commit 4dceb25dd1
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 61 additions and 12 deletions

View File

@ -46,6 +46,8 @@ public:
constexpr const char &operator[](std::size_t j) const {
return interval_.start()[j];
}
constexpr const char &front() const { return (*this)[0]; }
constexpr const char &back() const { return (*this)[size() - 1]; }
bool Contains(const CharBlock &that) const {
return interval_.Contains(that.interval_);

View File

@ -156,23 +156,50 @@ static TokenSequence TokenPasting(TokenSequence &&text) {
}
TokenSequence result;
std::size_t tokens{text.SizeInTokens()};
bool pasting{false};
std::optional<CharBlock> before; // last non-blank token before ##
for (std::size_t j{0}; j < tokens; ++j) {
if (IsTokenPasting(text.TokenAt(j))) {
if (!pasting) {
CharBlock after{text.TokenAt(j)};
if (!before) {
if (IsTokenPasting(after)) {
while (!result.empty() &&
result.TokenAt(result.SizeInTokens() - 1).IsBlank()) {
result.pop_back();
}
if (!result.empty()) {
result.ReopenLastToken();
pasting = true;
before = result.TokenAt(result.SizeInTokens() - 1);
}
} else {
result.AppendRange(text, j, 1);
}
} else if (after.IsBlank() || IsTokenPasting(after)) {
// drop it
} else { // pasting before ## after
bool doPaste{false};
char last{before->back()};
char first{after.front()};
// Apply basic sanity checking to pasting so avoid constructing a bogus
// token that might cause macro replacement to fail, like "macro(".
if (IsLegalInIdentifier(last) && IsLegalInIdentifier(first)) {
doPaste = true;
} else if (IsDecimalDigit(first) &&
(last == '.' || last == '+' || last == '-')) {
doPaste = true; // 1. ## 0, - ## 1
} else if (before->size() == 1 && after.size() == 1) {
if (first == last &&
(last == '<' || last == '>' || last == '*' || last == '/' ||
last == '=' || last == '&' || last == '|' || last == ':')) {
// Fortran **, //, ==, ::
// C <<, >>, &&, || for use in #if expressions
doPaste = true;
} else if (first == '=' && (last == '!' || last == '/')) {
doPaste = true; // != and /=
}
}
} else if (pasting && text.TokenAt(j).IsBlank()) {
} else {
if (doPaste) {
result.ReopenLastToken();
}
result.AppendRange(text, j, 1);
pasting = false;
before.reset();
}
}
return result;

View File

@ -778,7 +778,7 @@ public:
}
void Unparse(const SubstringInquiry &x) {
Walk(x.v);
Put(x.source.end()[-1] == 'n' ? "%LEN" : "%KIND");
Put(x.source.back() == 'n' ? "%LEN" : "%KIND");
}
void Unparse(const SubstringRange &x) { // R910
Walk(x.t, ":");

View File

@ -1269,7 +1269,7 @@ MaybeExpr ExpressionAnalyzer::Analyze(
MaybeExpr ExpressionAnalyzer::Analyze(const parser::SubstringInquiry &x) {
if (MaybeExpr substring{Analyze(x.v)}) {
CHECK(x.source.size() >= 8);
int nameLen{x.source.end()[-1] == 'n' ? 3 /*LEN*/ : 4 /*KIND*/};
int nameLen{x.source.back() == 'n' ? 3 /*LEN*/ : 4 /*KIND*/};
parser::CharBlock name{
x.source.end() - nameLen, static_cast<std::size_t>(nameLen)};
CHECK(name == "len" || name == "kind");

View File

@ -2139,7 +2139,7 @@ bool ImplicitRules::isImplicitNoneExternal() const {
const DeclTypeSpec *ImplicitRules::GetType(
SourceName name, bool respectImplicitNoneType) const {
char ch{name.begin()[0]};
char ch{name.front()};
if (isImplicitNoneType_ && respectImplicitNoneType) {
return nullptr;
} else if (auto it{map_.find(ch)}; it != map_.end()) {

View File

@ -1740,7 +1740,7 @@ std::forward_list<std::string> GetOperatorNames(
std::forward_list<std::string> GetAllNames(
const SemanticsContext &context, const SourceName &name) {
std::string str{name.ToString()};
if (!name.empty() && name.end()[-1] == ')' &&
if (!name.empty() && name.back() == ')' &&
name.ToString().rfind("operator(", 0) == 0) {
for (int i{0}; i != common::LogicalOperator_enumSize; ++i) {
auto names{GetOperatorNames(context, common::LogicalOperator{i})};

View File

@ -0,0 +1,20 @@
! RUN: %flang -E %s 2>&1 | FileCheck %s
#define STRINGIFY(x) #x
#define TOSTRING(x) STRINGIFY(x)
#define PREFIX(x) prefix ## x
#define NAME(x) PREFIX(foo ## x)
#define AUGMENT(x) NAME(x ## suffix)
! CHECK: subroutine prefixfoosuffix()
! CHECK: print *, "prefixfoosuffix"
! CHECK: end subroutine prefixfoosuffix
subroutine AUGMENT()()
print *, TOSTRING(AUGMENT())
end subroutine AUGMENT()
! CHECK: subroutine prefixfoobarsuffix()
! CHECK: print *, "prefixfoobarsuffix"
! CHECK: end subroutine prefixfoobarsuffix
subroutine AUGMENT(bar)()
print *, TOSTRING(AUGMENT(bar))
end subroutine AUGMENT(bar)