[clang][clang-scan-deps] Add LangOptions::AllowLiteralDigitSeparator to fix #88896 (#184235)

Fixes #88896 by following the approach described in
https://github.com/llvm/llvm-project/pull/95798#discussion_r1649496882.
This adds `LangOptions::AllowLiteralDigitSeparator`, following the
pattern used for `RawStringLiterals` in #88265.

It is enabled by default for C++14 and C23, and the Scanner sets it
explicitly to always allow literals with digit separators in directives.

Originally authored by @tsfn (Yifan Fang, <gatsfn@gmail.com>) in
#158420.

Co-authored-by: Yifan Fang <gatsfn@gmail.com>
This commit is contained in:
Naveen Seth Hanig 2026-03-02 22:57:51 +01:00 committed by GitHub
parent 42a0fbc2c7
commit 4f50a725fa
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 26 additions and 1 deletions

View File

@ -449,6 +449,8 @@ LANGOPT(CXXAssumptions, 1, 1, NotCompatible, "Enable or disable codegen and comp
LANGOPT(RawStringLiterals, 1, 1, NotCompatible, "Enable or disable raw string literals")
LANGOPT(AllowLiteralDigitSeparator, 1, 0, NotCompatible, "Allow literal digit seperator in source")
ENUM_LANGOPT(StrictFlexArraysLevel, StrictFlexArraysLevelKind, 2,
StrictFlexArraysLevelKind::Default, NotCompatible,
"Rely on strict definition of flexible arrays")

View File

@ -143,6 +143,9 @@ public:
return isCPlusPlus11() || (!isCPlusPlus() && isC99() && isGNUMode());
}
/// allowLiteralDigitSeparator - Language supports literal digit seperator
bool allowLiteralDigitSeparator() const { return isCPlusPlus14() || isC23(); }
/// isGNUMode - Language includes GNU extensions.
bool isGNUMode() const { return Flags & GNUMode; }

View File

@ -129,6 +129,7 @@ void LangOptions::setLangDefaults(LangOptions &Opts, Language Lang,
Opts.WChar = Std.isCPlusPlus();
Opts.Digraphs = Std.hasDigraphs();
Opts.RawStringLiterals = Std.hasRawStringLiterals();
Opts.AllowLiteralDigitSeparator = Std.allowLiteralDigitSeparator();
Opts.NamedLoops = Std.isC2y();
Opts.HLSL = Lang == Language::HLSL;

View File

@ -4327,6 +4327,7 @@ LangOptions getFormattingLangOpts(const FormatStyle &Style) {
}
LangOpts.Char8 = SinceCpp20;
LangOpts.AllowLiteralDigitSeparator = LangOpts.CPlusPlus14 || LangOpts.C23;
// Turning on digraphs in standards before C++0x is error-prone, because e.g.
// the sequence "<::" will be unconditionally treated as "[:".
// Cf. Lexer::LexTokenInternal.

View File

@ -74,6 +74,7 @@ struct Scanner {
LangOpts.ObjC = true;
LangOpts.LineComment = true;
LangOpts.RawStringLiterals = true;
LangOpts.AllowLiteralDigitSeparator = true;
// FIXME: we do not enable C11 or C++11, so we are missing u/u8/U"".
return LangOpts;
}

View File

@ -2098,7 +2098,7 @@ bool Lexer::LexNumericConstant(Token &Result, const char *CurPtr) {
}
// If we have a digit separator, continue.
if (C == '\'' && (LangOpts.CPlusPlus14 || LangOpts.C23)) {
if (C == '\'' && LangOpts.AllowLiteralDigitSeparator) {
auto [Next, NextSize] = getCharAndSizeNoWarn(CurPtr + Size, LangOpts);
if (isAsciiIdentifierContinue(Next)) {
if (!isLexingRawMode())

View File

@ -7,6 +7,7 @@
//===----------------------------------------------------------------------===//
#include "clang/Lex/DependencyDirectivesScanner.h"
#include "clang/Basic/TokenKinds.h"
#include "llvm/ADT/SmallString.h"
#include "gtest/gtest.h"
@ -1001,6 +1002,22 @@ int z = 128'78;
EXPECT_STREQ("#include <test.h>\n", Out.data());
}
TEST(MinimizeSourceToDependencyDirectivesTest, CharacterLiteralInPreprocessor) {
SmallVector<char, 128> Out;
SmallVector<dependency_directives_scan::Token, 8> Tokens;
SmallVector<Directive, 4> Directives;
StringRef Source = R"(
#if 1'2 == 12
#endif
)";
ASSERT_FALSE(
minimizeSourceToDependencyDirectives(Source, Out, Tokens, Directives));
ASSERT_GE(Tokens.size(), 4u);
EXPECT_EQ(Tokens[2].Kind, tok::numeric_constant);
EXPECT_EQ(Tokens[3].Kind, tok::equalequal);
}
TEST(MinimizeSourceToDependencyDirectivesTest, PragmaOnce) {
SmallVector<char, 128> Out;
SmallVector<dependency_directives_scan::Token, 4> Tokens;