yronglin 1da403937e
Reapply "[C++20][Modules] Implement P1857R3 Modules Dependency Discovery" (#173130)" (#173789)
The patch reapply https://github.com/llvm/llvm-project/pull/173130.

This patch implement the following papers:
[P1857R3 Modules Dependency Discovery](https://wg21.link/p1857r3).
[P3034R1 Module Declarations Shouldn’t be
Macros](https://wg21.link/P3034R1).
[CWG2947](https://cplusplus.github.io/CWG/issues/2947.html).

At the start of phase 4 an import or module token is treated as starting
a directive and are converted to their respective keywords iff:

 - After skipping horizontal whitespace are
    - at the start of a logical line, or
    - preceded by an export at the start of the logical line.
- Are followed by an identifier pp token (before macro expansion), or
    - <, ", or : (but not ::) pp tokens for import, or
    - ; for module
Otherwise the token is treated as an identifier.

Additionally:

- The entire import or module directive (including the closing ;) must
be on a single logical line and for module must not come from an
#include.
- The expansion of macros must not result in an import or module
directive introducer that was not there prior to macro expansion.
- A module directive may only appear as the first preprocessing tokens
in a file (excluding the global module fragment.)
- Preprocessor conditionals shall not span a module declaration.

After this patch, we handle C++ module-import and module-declaration as
a real pp-directive in preprocessor. Additionally, we refactor module
name lexing, remove the complex state machine and read full module name
during module/import directive handling. Possibly we can introduce a
tok::annot_module_name token in the future, avoid duplicatly parsing
module name in both preprocessor and parser, but it's makes error
recovery much diffcult(eg. import a; import b; in same line).

This patch also introduce 2 new keyword `__preprocessed_module` and
`__preprocessed_import`. These 2 keyword was generated during `-E` mode.
This is useful to avoid confusion with `module` and `import` keyword in
preprocessed output:
```cpp
export module m;
struct import {};
#define EMPTY
EMPTY import foo;
```

Fixes https://github.com/llvm/llvm-project/issues/54047

The previous patch has an use-after-free issue in
Lexer::LexTokenInternal function. Since C++20, the `export`, `import`
and `module` identifiers may be an introducer of a C++ module
declaration/importing directive, and the directive will handled in
`LexIdentifierContinue`. Unfortunately, the EOF may be encountered in
`LexIdentifierContinue` and `CurLexer` might be destructed in
`HandleEndOfFile`, If the code after `LexIdentifierContinue` try to
access `LangOps` or other class members in this Lexer, it will hit
undefined behavior.

This patch also fix the use-after-free issue in Lexer by introduce a
mechanism to delay the destruction of `CurLexer` in `Preprocessor`
class.

---------

Signed-off-by: yronglin <yronglin777@gmail.com>
2026-01-20 17:42:46 +08:00

208 lines
8.2 KiB
C++

// RUN: rm -rf %t
// RUN: mkdir %t
// RUN: split-file %s %t
// RUN: %clang_cc1 -std=c++20 %t/hash.cpp -fsyntax-only -verify
// RUN: %clang_cc1 -std=c++20 %t/module.cpp -fsyntax-only -verify
// RUN: %clang_cc1 -std=c++20 %t/rightpad.cppm -emit-module-interface -o %t/rightpad.pcm
// RUN: %clang_cc1 -std=c++20 %t/M_part.cppm -emit-module-interface -o %t/M_part.pcm
// RUN: %clang_cc1 -std=c++20 -xc++-system-header %t/string -emit-header-unit -o %t/string.pcm
// RUN: %clang_cc1 -std=c++20 -xc++-user-header %t/squee -emit-header-unit -o %t/squee.pcm
// RUN: %clang_cc1 -std=c++20 %t/import.cpp -isystem %t \
// RUN: -fmodule-file=rightpad=%t/rightpad.pcm \
// RUN: -fmodule-file=M:part=%t/M_part.pcm \
// RUN: -fmodule-file=%t/string.pcm \
// RUN: -fmodule-file=%t/squee.pcm \
// RUN: -fsyntax-only -verify
// RUN: %clang_cc1 -std=c++20 %t/module_decl_not_in_same_line.cpp -fsyntax-only -verify
// RUN: %clang_cc1 -std=c++20 %t/foo.cppm -emit-module-interface -o %t/foo.pcm
// RUN: %clang_cc1 -std=c++20 %t/import_decl_not_in_same_line.cpp -fmodule-file=foo=%t/foo.pcm -fsyntax-only -verify
// RUN: %clang_cc1 -std=c++20 %t/not_import.cpp -fsyntax-only -verify
// RUN: %clang_cc1 -std=c++20 %t/import_spaceship.cpp -fsyntax-only -verify
// RUN: %clang_cc1 -std=c++20 %t/leading_empty_macro.cpp -fsyntax-only -verify
// RUN: %clang_cc1 -std=c++20 %t/operator_keyword_and.cpp -fsyntax-only -verify
// RUN: %clang_cc1 -std=c++20 %t/operator_keyword_and2.cpp -fsyntax-only -verify
// RUN: %clang_cc1 -std=c++20 %t/macro_in_module_decl_suffix.cpp -D'ATTR(X)=[[X]]' -fsyntax-only -verify
// RUN: %clang_cc1 -std=c++20 %t/macro_in_module_decl_suffix2.cpp -D'ATTR(X)=[[X]]' -fsyntax-only -verify
// RUN: %clang_cc1 -std=c++20 %t/extra_tokens_after_module_decl1.cpp -fsyntax-only -verify
// RUN: %clang_cc1 -std=c++20 %t/extra_tokens_after_module_decl2.cpp -fsyntax-only -verify
// RUN: %clang_cc1 -std=c++20 %t/object_like_macro_in_module_name.cpp -Dm=x -Dn=y -fsyntax-only -verify
// RUN: %clang_cc1 -std=c++20 %t/object_like_macro_in_partition_name.cpp -Dm=x -Dn=y -fsyntax-only -verify
// RUN: %clang_cc1 -std=c++20 %t/unexpected_character_in_pp_module_suffix.cpp -D'm(x)=x' -fsyntax-only -verify
// RUN: %clang_cc1 -std=c++20 %t/semi_in_same_line.cpp -fsyntax-only -verify
// RUN: %clang_cc1 -std=c++20 %t/preprocessed_module_file.cpp -E | FileCheck %t/preprocessed_module_file.cpp
// RUN: %clang_cc1 -std=c++20 %t/pedantic-errors.cpp -pedantic-errors -fsyntax-only -verify
// RUN: %clang_cc1 -std=c++20 %t/xcpp-output.cpp -fsyntax-only -verify -xc++-cpp-output
// RUN: %clang_cc1 -std=c++20 %t/func_like_macro.cpp -D'm(x)=x' -fsyntax-only -verify
// RUN: %clang_cc1 -std=c++20 %t/lparen.cpp -D'm(x)=x' -D'LPAREN=(' -fsyntax-only -verify
// RUN: %clang_cc1 -std=c++20 %t/control_line.cpp -fsyntax-only -verify
//--- hash.cpp
// expected-no-diagnostics
# // preprocessing directive
//--- module.cpp
// expected-no-diagnostics
module ; // preprocessing directive
export module leftpad; // preprocessing directive
//--- string
#ifndef STRING_H
#define STRING_H
#endif // STRING_H
//--- squee
#ifndef SQUEE_H
#define SQUEE_H
#endif
//--- rightpad.cppm
export module rightpad;
//--- M_part.cppm
export module M:part;
//--- import.cpp
export module M;
import <string>; // expected-warning {{the implementation of header units is in an experimental phase}}
export import "squee"; // expected-warning {{the implementation of header units is in an experimental phase}}
import rightpad; // preprocessing directive
import :part; // preprocessing directive
//--- module_decl_not_in_same_line.cpp
module // expected-error {{a type specifier is required for all declarations}}
;export module M; // expected-error {{export declaration can only be used within a module interface}} \
// expected-error {{unknown type name 'module'}}
//--- foo.cppm
export module foo;
//--- import_decl_not_in_same_line.cpp
export module M;
export
import // expected-error {{unknown type name 'import'}}
foo;
export
import foo; // expected-error {{unknown type name 'import'}}
//--- not_import.cpp
export module M;
import :: // expected-error {{use of undeclared identifier 'import'}}
import -> // expected-error {{cannot use arrow operator on a type}}
//--- import_spaceship.cpp
export module M;
import <=>; // expected-error {{'=' file not found}}
//--- leading_empty_macro.cpp
// expected-no-diagnostics
export module M;
typedef int import;
#define EMP
EMP import m; // The phase 7 grammar should see import as a typedef-name.
//--- operator_keyword_and.cpp
// expected-no-diagnostics
typedef int import;
extern
import and x;
//--- operator_keyword_and2.cpp
// expected-no-diagnostics
typedef int module;
extern
module and x;
//--- macro_in_module_decl_suffix.cpp
export module m ATTR(x); // expected-warning {{unknown attribute 'x' ignored}}
//--- macro_in_module_decl_suffix2.cpp
export module m [[y]] ATTR(x); // expected-warning {{unknown attribute 'y' ignored}} \
// expected-warning {{unknown attribute 'x' ignored}}
//--- extra_tokens_after_module_decl1.cpp
module; int n; // expected-warning {{extra tokens after semicolon in 'module' directive}}
import foo; int n1; // expected-warning {{extra tokens after semicolon in 'import' directive}}
// expected-error@-1 {{module 'foo' not found}}
const int *p1 = &n1;
//--- extra_tokens_after_module_decl2.cpp
export module m; int n2 // expected-warning {{extra tokens after semicolon in 'module' directive}}
;
const int *p2 = &n2;
//--- object_like_macro_in_module_name.cpp
export module m.n;
// expected-error@-1 {{module name component 'm' cannot be a object-like macro}}
// expected-note@* {{macro 'm' defined here}}
// expected-error@-3 {{module name component 'n' cannot be a object-like macro}}
// expected-note@* {{macro 'n' defined here}}
//--- object_like_macro_in_partition_name.cpp
export module m:n;
// expected-error@-1 {{module name component 'm' cannot be a object-like macro}}
// expected-note@* {{macro 'm' defined here}}
// expected-error@-3 {{partition name component 'n' cannot be a object-like macro}}
// expected-note@* {{macro 'n' defined here}}
//--- unexpected_character_in_pp_module_suffix.cpp
export module m();
// expected-error@-1 {{unexpected preprocessing token '(' after module name, only ';' and '[' (start of attribute specifier sequence) are allowed}}
//--- semi_in_same_line.cpp
export module m // OK
[[]];
import foo // expected-error {{module 'foo' not found}}
;
//--- preprocessed_module_file.cpp
// CHECK: __preprocessed_module;
// CHECK-NEXT: export __preprocessed_module M;
// CHECK-NEXT: __preprocessed_import std;
// CHECK-NEXT: export __preprocessed_import bar;
// CHECK-NEXT: struct import {};
// CHECK-EMPTY:
// CHECK-NEXT: import foo;
module;
export module M;
import std;
export import bar;
struct import {};
#define EMPTY
EMPTY import foo;
//--- pedantic-errors.cpp
export module m; int n; // expected-warning {{extra tokens after semicolon in 'module' directive}}
//--- xcpp-output.cpp
// expected-no-diagnostics
typedef int module;
module x;
//--- func_like_macro.cpp
// #define m(x) x
export module m
(foo); // expected-error {{unexpected preprocessing token '(' after module name, only ';' and '[' (start of attribute specifier sequence) are allowed}}
//--- lparen.cpp
// #define m(x) x
// #define LPAREN (
export module m
LPAREN foo); // expected-error {{unexpected preprocessing token 'LPAREN' after module name, only ';' and '[' (start of attribute specifier sequence) are allowed}}
//--- control_line.cpp
#if 0 // #1
export module m; // expected-error {{module directive lines are not allowed on lines controlled by preprocessor conditionals}}
#else
export module m; // expected-error {{module directive lines are not allowed on lines controlled by preprocessor conditionals}} \
// expected-error {{module declaration must occur at the start of the translation unit}} \
// expected-note@#1 {{add 'module;'}}
#endif