[ELF] Add VersionNode lexer state for better version script parsing
... so that `local:*;` will be lexed as three tokens instead of a single one in a version node. This is used by both version scripts and dynamic lists. Fix #174363 In addition, clean up special code for space-separated `local :` and `global :`. This patch brings our lexer behavior closer to GNU ld. While GNU ld additionally rejects more characters like `~/+,=`, we don't implement this additional validation. Pull Request: https://github.com/llvm/llvm-project/pull/174530
This commit is contained in:
parent
5a63367b15
commit
16be2c0555
@ -124,37 +124,62 @@ void ScriptLexer::lex() {
|
||||
return;
|
||||
}
|
||||
|
||||
// Some operators form separate tokens.
|
||||
if (s.starts_with("<<=") || s.starts_with(">>=")) {
|
||||
curTok = s.substr(0, 3);
|
||||
s = s.substr(3);
|
||||
return;
|
||||
}
|
||||
if (s.size() > 1 && (s[1] == '=' && strchr("+-*/!&^|", s[0]))) {
|
||||
curTok = s.substr(0, 2);
|
||||
s = s.substr(2);
|
||||
return;
|
||||
}
|
||||
// In Script and Expr states, recognize compound assignment operators.
|
||||
auto recognizeAssign = [&]() -> bool {
|
||||
if (s.starts_with("<<=") || s.starts_with(">>=")) {
|
||||
curTok = s.substr(0, 3);
|
||||
s = s.substr(3);
|
||||
return true;
|
||||
}
|
||||
if (s.size() > 1 && (s[1] == '=' && strchr("+-*/!&^|", s[0]))) {
|
||||
curTok = s.substr(0, 2);
|
||||
s = s.substr(2);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
};
|
||||
|
||||
// Unquoted token. The non-expression token is more relaxed than tokens in
|
||||
// C-like languages, so that you can write "file-name.cpp" as one bare
|
||||
// token.
|
||||
size_t pos;
|
||||
constexpr StringRef scriptAndVersionChars =
|
||||
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
|
||||
"0123456789_.$/\\~=+[]*?-!^:";
|
||||
constexpr StringRef exprChars =
|
||||
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
|
||||
"0123456789_.$";
|
||||
switch (lexState) {
|
||||
case State::Script:
|
||||
pos = s.find_first_not_of(
|
||||
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
|
||||
"0123456789_.$/\\~=+[]*?-!^:");
|
||||
if (recognizeAssign())
|
||||
return;
|
||||
pos = s.find_first_not_of(scriptAndVersionChars);
|
||||
break;
|
||||
case State::Expr:
|
||||
pos = s.find_first_not_of(
|
||||
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
|
||||
"0123456789_.$");
|
||||
if (recognizeAssign())
|
||||
return;
|
||||
pos = s.find_first_not_of(exprChars);
|
||||
if (pos == 0 && s.size() >= 2 &&
|
||||
((s[0] == s[1] && strchr("<>&|", s[0])) ||
|
||||
is_contained({"==", "!=", "<=", ">=", "<<", ">>"}, s.substr(0, 2))))
|
||||
pos = 2;
|
||||
break;
|
||||
case State::VersionNode:
|
||||
// Treat `:` as a token boundary unless it's part of a scope operator `::`
|
||||
// (for extern "C++"). This behavior resembles GNU ld and allows proper
|
||||
// tokenization of patterns like `local:*`.
|
||||
pos = 0;
|
||||
for (; pos != s.size(); ++pos) {
|
||||
if (s[pos] == ':') {
|
||||
if (pos + 1 != s.size() && s[pos + 1] == ':') {
|
||||
++pos;
|
||||
continue;
|
||||
}
|
||||
} else if (scriptAndVersionChars.contains(s[pos]))
|
||||
continue;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
if (pos == 0)
|
||||
|
||||
@ -44,6 +44,8 @@ protected:
|
||||
enum class State {
|
||||
Script,
|
||||
Expr,
|
||||
// Used by version node and dynamic list parsing.
|
||||
VersionNode,
|
||||
};
|
||||
|
||||
struct Token {
|
||||
|
||||
@ -179,6 +179,7 @@ static ExprValue bitOr(LinkerScript &s, ExprValue a, ExprValue b) {
|
||||
}
|
||||
|
||||
void ScriptParser::readDynamicList() {
|
||||
SaveAndRestore saved(lexState, State::VersionNode);
|
||||
expect("{");
|
||||
SmallVector<SymbolVersion, 0> locals;
|
||||
SmallVector<SymbolVersion, 0> globals;
|
||||
@ -207,6 +208,7 @@ void ScriptParser::readVersionScript() {
|
||||
}
|
||||
|
||||
void ScriptParser::readVersionScriptCommand() {
|
||||
SaveAndRestore saved(lexState, State::VersionNode);
|
||||
if (consume("{")) {
|
||||
readAnonymousDeclaration();
|
||||
return;
|
||||
@ -1779,11 +1781,11 @@ ScriptParser::readSymbols() {
|
||||
SmallVector<SymbolVersion, 0> ext = readVersionExtern();
|
||||
v->insert(v->end(), ext.begin(), ext.end());
|
||||
} else {
|
||||
if (tok == "local:" || (tok == "local" && consume(":"))) {
|
||||
if (tok == "local" && consume(":")) {
|
||||
v = &locals;
|
||||
continue;
|
||||
}
|
||||
if (tok == "global:" || (tok == "global" && consume(":"))) {
|
||||
if (tok == "global" && consume(":")) {
|
||||
v = &globals;
|
||||
continue;
|
||||
}
|
||||
|
||||
@ -2,6 +2,7 @@
|
||||
|
||||
# Test that we can parse multiple externs.
|
||||
|
||||
# RUN: rm -rf %t && mkdir %t && cd %t
|
||||
# RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %s -o %t.o
|
||||
|
||||
# RUN: echo '{ extern "C" { foo; }; extern "C++" { bar; }; };' > %t.list
|
||||
@ -9,3 +10,12 @@
|
||||
|
||||
# RUN: echo '{ extern "C" { foo }; extern "C++" { bar }; };' > %t.list
|
||||
# RUN: ld.lld --dynamic-list %t.list %t.o -shared -o %t.so
|
||||
|
||||
# RUN: echo '{ extern "C++" { std::foo; }; };' > %t.list
|
||||
# RUN: ld.lld --dynamic-list %t.list %t.o -shared -o %t.so
|
||||
|
||||
# RUN: echo '{ extern "C++" { std:foo; }; };' > a.list
|
||||
# RUN: not ld.lld --dynamic-list a.list %t.o -shared 2>&1 | FileCheck %s --check-prefix=ERR-COLON
|
||||
# RUN: echo '{ extern "C++" { std:::foo; }; };' > a.list
|
||||
# RUN: not ld.lld --dynamic-list a.list %t.o -shared 2>&1 | FileCheck %s --check-prefix=ERR-COLON
|
||||
# ERR-COLON: error: a.list:1: ; expected, but got :
|
||||
|
||||
@ -6,7 +6,8 @@
|
||||
# RUN: llvm-readobj -V %t.so | FileCheck %s
|
||||
|
||||
# RUN: echo "SECTIONS { .text : { bar = foo; *(.text) } }" > %t.script
|
||||
# RUN: echo "VERSION { V { global: foo; bar; local: *; }; }" >> %t.script
|
||||
## `:` in `local:*` is lexed as a separate token.
|
||||
# RUN: echo "VERSION { V { global: foo; bar; local:*; }; }" >> %t.script
|
||||
# RUN: ld.lld -T %t.script -shared --no-undefined-version %t.o -o %t.so
|
||||
# RUN: llvm-readobj -V %t.so | FileCheck %s
|
||||
|
||||
|
||||
@ -8,8 +8,9 @@
|
||||
# RUN: ld.lld --version-script %t.script -shared %t.o %t2.so -o %t.so --fatal-warnings
|
||||
# RUN: llvm-readelf --dyn-syms %t.so | FileCheck --check-prefix=DSO %s
|
||||
|
||||
## `:` in `local:*` is lexed as a separate token.
|
||||
# RUN: echo "# comment" > %t3.script
|
||||
# RUN: echo "{ local: *; # comment" >> %t3.script
|
||||
# RUN: echo "{ local:*; # comment" >> %t3.script
|
||||
# RUN: echo -n "}; # comment" >> %t3.script
|
||||
# RUN: ld.lld --version-script %t3.script -shared %t.o %t2.so -o %t3.so
|
||||
# RUN: llvm-readelf --dyn-syms %t3.so | FileCheck --check-prefix=DSO2 %s
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user