Corentin Jabot 31f4859c3e [Clang] Allow additional mathematical symbols in identifiers.
Implement the proposed UAX Profile
"Mathematical notation profile for default identifiers".

This implements a not-yet approved Unicode for a vetted
UAX31 identifier profile
https://www.unicode.org/L2/L2022/22230-math-profile.pdf

This change mitigates the reported disruption caused
by the implementation of UAX31 in C++ and C2x,
as these mathematical symbols are commonly used in the
scientific community.

Fixes #54732

Reviewed By: tahonermann, #clang-language-wg

Differential Revision: https://reviews.llvm.org/D137051
2022-12-16 10:20:49 +01:00

107 lines
4.9 KiB
C
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// RUN: %clang_cc1 -fsyntax-only -verify -x c -std=c11 %s
// RUN: %clang_cc1 -fsyntax-only -verify=expected,c2x -x c -std=c2x %s
// RUN: %clang_cc1 -fsyntax-only -verify=expected,cxx -x c++ -std=c++11 %s
// RUN: %clang_cc1 -std=c99 -E -DPP_ONLY=1 %s | FileCheck %s --strict-whitespace
// RUN: %clang_cc1 -E -DPP_ONLY=1 %s | FileCheck %s --strict-whitespace
// This file contains Unicode characters; please do not "fix" them!
extern int x; // expected-warning {{treating Unicode character as whitespace}}
extern int x; // expected-warning {{treating Unicode character as whitespace}}
// CHECK: extern int {{x}}
// CHECK: extern int {{x}}
#pragma mark ¡Unicode!
#define COPYRIGHT Copyright © 2012
#define XSTR(X) #X
#define STR(X) XSTR(X)
static const char *copyright = STR(COPYRIGHT); // no-warning
// CHECK: static const char *copyright = "Copyright © {{2012}}";
#if PP_ONLY
COPYRIGHT
// CHECK: Copyright © {{2012}}
CHECK : The preprocessor should not complain about Unicode characters like ©.
#endif
int _;
extern int X\UAAAAAAAA; // expected-error {{not allowed in an identifier}}
int Y = '\UAAAAAAAA'; // expected-error {{invalid universal character}}
#if defined(__cplusplus) || (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202000L)
extern int ;
extern int 𑩐;
extern int 𐠈;
extern int ;
extern int \u1B4C; // BALINESE LETTER ARCHAIC JNYA - Added in Unicode 14
extern int \U00016AA2; // TANGSA LETTER GA - Added in Unicode 14
extern int \U0001E4D0; // 𞓐 NAG MUNDARI LETTER O - Added in Unicode 15
extern int _\N{TANGSA LETTER GA};
extern int _\N{TANGSALETTERGA}; // expected-error {{'TANGSALETTERGA' is not a valid Unicode character name}} \
// expected-error {{expected ';' after top level declarator}} \
// expected-note {{characters names in Unicode escape sequences are sensitive to case and whitespace}}
extern int 𝛛; // expected-warning {{mathematical notation character <U+1D6DB> in an identifier is a Clang extension}}
extern int ; // expected-error {{character <U+2089> not allowed at the start of an identifier}} \\
expected-warning {{declaration does not declare anything}}
int a¹b; // expected-warning 6{{mathematical notation character}}
int \u{221E} = 1; // expected-warning {{mathematical notation character}}
int \N{MATHEMATICAL SANS-SERIF BOLD ITALIC PARTIAL DIFFERENTIAL} = 1;
// expected-warning@-1 {{mathematical notation character}}
int a\N{SUBSCRIPT EQUALS SIGN} = 1; // expected-warning {{mathematical notation character}}
// This character doesn't have the XID_Start property
extern int \U00016AC0; // TANGSA DIGIT ZERO // cxx-error {{expected unqualified-id}} \
// c2x-error {{expected identifier or '('}}
extern int 🌹; // expected-error {{unexpected character <U+1F339>}} \
expected-warning {{declaration does not declare anything}}
extern int 🫎; // MOOSE (Unicode 15) \
// expected-error {{unexpected character <U+1FACE>}} \
expected-warning {{declaration does not declare anything}}
extern int 👷; // expected-error {{unexpected character <U+1F477>}} \
expected-warning {{declaration does not declare anything}}
extern int 👷; // expected-warning {{declaration does not declare anything}} \
expected-error {{unexpected character <U+1F477>}} \
expected-error {{unexpected character <U+200D>}} \
expected-error {{unexpected character <U+2640>}}
#else
// A 🌹 by any other name....
extern int 🌹;
int 🌵(int 🌻) { return 🌻+ 1; }
int main (void) {
int 🌷 = 🌵(🌹);
return 🌷;
}
int n; = 3; // expected-warning {{treating Unicode character <U+037E> as an identifier character rather than as ';' symbol}}
int *nv = &n;; // expected-warning 2{{treating Unicode character <U+A789> as an identifier character rather than as ':' symbol}}
// expected-warning@-1 {{treating Unicode character <U+037E> as an identifier character rather than as ';' symbol}}
int vautoreturnx; // expected-warning 12{{treating Unicode character}}
int xx;
// expected-warning@-1 {{identifier contains Unicode character <U+2060> that is invisible in some environments}}
// expected-warning@-2 {{identifier contains Unicode character <U+FEFF> that is invisible in some environments}}
// expected-warning@-3 {{identifier contains Unicode character <U+200D> that is invisible in some environments}}
int foobar = 0; // expected-warning {{identifier contains Unicode character <U+200B> that is invisible in some environments}}
int x = foobar; // expected-error {{undeclared identifier}}
int foo; // expected-error {{unexpected character <U+2223>}}
#ifndef PP_ONLY
#define x // expected-error {{macro name must be an identifier}}
#endif
#endif