[clang][clang-scan-deps] Add LangOptions::AllowLiteralDigitSeparator to fix #88896 (#184235)

Fixes #88896 by following the approach described in https://github.com/llvm/llvm-project/pull/95798#discussion_r1649496882. This adds `LangOptions::AllowLiteralDigitSeparator`, following the pattern used for `RawStringLiterals` in #88265. It is enabled by default for C++14 and C23, and the Scanner sets it explicitly to always allow literals with digit separators in directives. Originally authored by @tsfn (Yifan Fang, <gatsfn@gmail.com>) in #158420. Co-authored-by: Yifan Fang <gatsfn@gmail.com>
2026-03-02 22:57:51 +01:00 · 2026-03-02 22:57:51 +01:00 · 4f50a725fa
commit 4f50a725fa
parent 42a0fbc2c7
7 changed files with 26 additions and 1 deletions
--- a/clang/include/clang/Basic/LangOptions.def
+++ b/clang/include/clang/Basic/LangOptions.def
@ -449,6 +449,8 @@ LANGOPT(CXXAssumptions, 1, 1, NotCompatible, "Enable or disable codegen and comp

 LANGOPT(RawStringLiterals, 1, 1, NotCompatible, "Enable or disable raw string literals")

+LANGOPT(AllowLiteralDigitSeparator, 1, 0, NotCompatible, "Allow literal digit seperator in source")
+
 ENUM_LANGOPT(StrictFlexArraysLevel, StrictFlexArraysLevelKind, 2,
             StrictFlexArraysLevelKind::Default, NotCompatible,
             "Rely on strict definition of flexible arrays")
--- a/clang/include/clang/Basic/LangStandard.h
+++ b/clang/include/clang/Basic/LangStandard.h
@ -143,6 +143,9 @@ public:
    return isCPlusPlus11() || (!isCPlusPlus() && isC99() && isGNUMode());
  }

+  /// allowLiteralDigitSeparator - Language supports literal digit seperator
+  bool allowLiteralDigitSeparator() const { return isCPlusPlus14() || isC23(); }
+
  /// isGNUMode - Language includes GNU extensions.
  bool isGNUMode() const { return Flags & GNUMode; }

--- a/clang/lib/Basic/LangOptions.cpp
+++ b/clang/lib/Basic/LangOptions.cpp
@ -129,6 +129,7 @@ void LangOptions::setLangDefaults(LangOptions &Opts, Language Lang,
  Opts.WChar = Std.isCPlusPlus();
  Opts.Digraphs = Std.hasDigraphs();
  Opts.RawStringLiterals = Std.hasRawStringLiterals();
+  Opts.AllowLiteralDigitSeparator = Std.allowLiteralDigitSeparator();
  Opts.NamedLoops = Std.isC2y();

  Opts.HLSL = Lang == Language::HLSL;
--- a/clang/lib/Format/Format.cpp
+++ b/clang/lib/Format/Format.cpp
@ -4327,6 +4327,7 @@ LangOptions getFormattingLangOpts(const FormatStyle &Style) {
  }

  LangOpts.Char8 = SinceCpp20;
+  LangOpts.AllowLiteralDigitSeparator = LangOpts.CPlusPlus14 || LangOpts.C23;
  // Turning on digraphs in standards before C++0x is error-prone, because e.g.
  // the sequence "<::" will be unconditionally treated as "[:".
  // Cf. Lexer::LexTokenInternal.
--- a/clang/lib/Lex/DependencyDirectivesScanner.cpp
+++ b/clang/lib/Lex/DependencyDirectivesScanner.cpp
@ -74,6 +74,7 @@ struct Scanner {
    LangOpts.ObjC = true;
    LangOpts.LineComment = true;
    LangOpts.RawStringLiterals = true;
+    LangOpts.AllowLiteralDigitSeparator = true;
    // FIXME: we do not enable C11 or C++11, so we are missing u/u8/U"".
    return LangOpts;
  }
--- a/clang/lib/Lex/Lexer.cpp
+++ b/clang/lib/Lex/Lexer.cpp
@ -2098,7 +2098,7 @@ bool Lexer::LexNumericConstant(Token &Result, const char *CurPtr) {
  }

  // If we have a digit separator, continue.
-  if (C == '\'' && (LangOpts.CPlusPlus14 || LangOpts.C23)) {
+  if (C == '\'' && LangOpts.AllowLiteralDigitSeparator) {
    auto [Next, NextSize] = getCharAndSizeNoWarn(CurPtr + Size, LangOpts);
    if (isAsciiIdentifierContinue(Next)) {
      if (!isLexingRawMode())
--- a/clang/unittests/Lex/DependencyDirectivesScannerTest.cpp
+++ b/clang/unittests/Lex/DependencyDirectivesScannerTest.cpp
@ -7,6 +7,7 @@
 //===----------------------------------------------------------------------===//

 #include "clang/Lex/DependencyDirectivesScanner.h"
+#include "clang/Basic/TokenKinds.h"
 #include "llvm/ADT/SmallString.h"
 #include "gtest/gtest.h"

@ -1001,6 +1002,22 @@ int z = 128'78;
  EXPECT_STREQ("#include <test.h>\n", Out.data());
 }

+TEST(MinimizeSourceToDependencyDirectivesTest, CharacterLiteralInPreprocessor) {
+  SmallVector<char, 128> Out;
+  SmallVector<dependency_directives_scan::Token, 8> Tokens;
+  SmallVector<Directive, 4> Directives;
+
+  StringRef Source = R"(
+    #if 1'2 == 12
+    #endif
+    )";
+  ASSERT_FALSE(
+      minimizeSourceToDependencyDirectives(Source, Out, Tokens, Directives));
+  ASSERT_GE(Tokens.size(), 4u);
+  EXPECT_EQ(Tokens[2].Kind, tok::numeric_constant);
+  EXPECT_EQ(Tokens[3].Kind, tok::equalequal);
+}
+
 TEST(MinimizeSourceToDependencyDirectivesTest, PragmaOnce) {
  SmallVector<char, 128> Out;
  SmallVector<dependency_directives_scan::Token, 4> Tokens;