
I was doing this API conversion to use std::string_view top-down in D149104, but this exposed issues in individual demanglers that needed to get fixed first. There's no issue with the conversion for the Rust demangler, so convert it first. Reviewed By: MaskRay Differential Revision: https://reviews.llvm.org/D149784
1260 lines
28 KiB
C++
1260 lines
28 KiB
C++
//===--- RustDemangle.cpp ---------------------------------------*- C++ -*-===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
// This file defines a demangler for Rust v0 mangled symbols as specified in
|
|
// https://rust-lang.github.io/rfcs/2603-rust-symbol-name-mangling-v0.html
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "llvm/Demangle/Demangle.h"
|
|
#include "llvm/Demangle/StringViewExtras.h"
|
|
#include "llvm/Demangle/Utility.h"
|
|
|
|
#include <algorithm>
|
|
#include <cassert>
|
|
#include <cstdint>
|
|
#include <cstring>
|
|
#include <limits>
|
|
#include <string_view>
|
|
|
|
using namespace llvm;
|
|
|
|
using llvm::itanium_demangle::OutputBuffer;
|
|
using llvm::itanium_demangle::ScopedOverride;
|
|
using llvm::itanium_demangle::starts_with;
|
|
|
|
namespace {
|
|
|
|
struct Identifier {
|
|
std::string_view Name;
|
|
bool Punycode;
|
|
|
|
bool empty() const { return Name.empty(); }
|
|
};
|
|
|
|
enum class BasicType {
|
|
Bool,
|
|
Char,
|
|
I8,
|
|
I16,
|
|
I32,
|
|
I64,
|
|
I128,
|
|
ISize,
|
|
U8,
|
|
U16,
|
|
U32,
|
|
U64,
|
|
U128,
|
|
USize,
|
|
F32,
|
|
F64,
|
|
Str,
|
|
Placeholder,
|
|
Unit,
|
|
Variadic,
|
|
Never,
|
|
};
|
|
|
|
enum class IsInType {
|
|
No,
|
|
Yes,
|
|
};
|
|
|
|
enum class LeaveGenericsOpen {
|
|
No,
|
|
Yes,
|
|
};
|
|
|
|
class Demangler {
|
|
// Maximum recursion level. Used to avoid stack overflow.
|
|
size_t MaxRecursionLevel;
|
|
// Current recursion level.
|
|
size_t RecursionLevel;
|
|
size_t BoundLifetimes;
|
|
// Input string that is being demangled with "_R" prefix removed.
|
|
std::string_view Input;
|
|
// Position in the input string.
|
|
size_t Position;
|
|
// When true, print methods append the output to the stream.
|
|
// When false, the output is suppressed.
|
|
bool Print;
|
|
// True if an error occurred.
|
|
bool Error;
|
|
|
|
public:
|
|
// Demangled output.
|
|
OutputBuffer Output;
|
|
|
|
Demangler(size_t MaxRecursionLevel = 500);
|
|
|
|
bool demangle(std::string_view MangledName);
|
|
|
|
private:
|
|
bool demanglePath(IsInType Type,
|
|
LeaveGenericsOpen LeaveOpen = LeaveGenericsOpen::No);
|
|
void demangleImplPath(IsInType InType);
|
|
void demangleGenericArg();
|
|
void demangleType();
|
|
void demangleFnSig();
|
|
void demangleDynBounds();
|
|
void demangleDynTrait();
|
|
void demangleOptionalBinder();
|
|
void demangleConst();
|
|
void demangleConstInt();
|
|
void demangleConstBool();
|
|
void demangleConstChar();
|
|
|
|
template <typename Callable> void demangleBackref(Callable Demangler) {
|
|
uint64_t Backref = parseBase62Number();
|
|
if (Error || Backref >= Position) {
|
|
Error = true;
|
|
return;
|
|
}
|
|
|
|
if (!Print)
|
|
return;
|
|
|
|
ScopedOverride<size_t> SavePosition(Position, Position);
|
|
Position = Backref;
|
|
Demangler();
|
|
}
|
|
|
|
Identifier parseIdentifier();
|
|
uint64_t parseOptionalBase62Number(char Tag);
|
|
uint64_t parseBase62Number();
|
|
uint64_t parseDecimalNumber();
|
|
uint64_t parseHexNumber(std::string_view &HexDigits);
|
|
|
|
void print(char C);
|
|
void print(std::string_view S);
|
|
void printDecimalNumber(uint64_t N);
|
|
void printBasicType(BasicType);
|
|
void printLifetime(uint64_t Index);
|
|
void printIdentifier(Identifier Ident);
|
|
|
|
char look() const;
|
|
char consume();
|
|
bool consumeIf(char Prefix);
|
|
|
|
bool addAssign(uint64_t &A, uint64_t B);
|
|
bool mulAssign(uint64_t &A, uint64_t B);
|
|
};
|
|
|
|
} // namespace
|
|
|
|
char *llvm::rustDemangle(std::string_view MangledName) {
|
|
// Return early if mangled name doesn't look like a Rust symbol.
|
|
if (MangledName.empty() || !starts_with(MangledName, "_R"))
|
|
return nullptr;
|
|
|
|
Demangler D;
|
|
if (!D.demangle(MangledName)) {
|
|
std::free(D.Output.getBuffer());
|
|
return nullptr;
|
|
}
|
|
|
|
D.Output += '\0';
|
|
|
|
return D.Output.getBuffer();
|
|
}
|
|
|
|
Demangler::Demangler(size_t MaxRecursionLevel)
|
|
: MaxRecursionLevel(MaxRecursionLevel) {}
|
|
|
|
static inline bool isDigit(const char C) { return '0' <= C && C <= '9'; }
|
|
|
|
static inline bool isHexDigit(const char C) {
|
|
return ('0' <= C && C <= '9') || ('a' <= C && C <= 'f');
|
|
}
|
|
|
|
static inline bool isLower(const char C) { return 'a' <= C && C <= 'z'; }
|
|
|
|
static inline bool isUpper(const char C) { return 'A' <= C && C <= 'Z'; }
|
|
|
|
/// Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
|
|
static inline bool isValid(const char C) {
|
|
return isDigit(C) || isLower(C) || isUpper(C) || C == '_';
|
|
}
|
|
|
|
// Demangles Rust v0 mangled symbol. Returns true when successful, and false
|
|
// otherwise. The demangled symbol is stored in Output field. It is
|
|
// responsibility of the caller to free the memory behind the output stream.
|
|
//
|
|
// <symbol-name> = "_R" <path> [<instantiating-crate>]
|
|
bool Demangler::demangle(std::string_view Mangled) {
|
|
Position = 0;
|
|
Error = false;
|
|
Print = true;
|
|
RecursionLevel = 0;
|
|
BoundLifetimes = 0;
|
|
|
|
if (!starts_with(Mangled, "_R")) {
|
|
Error = true;
|
|
return false;
|
|
}
|
|
Mangled.remove_prefix(2);
|
|
size_t Dot = Mangled.find('.');
|
|
Input = Dot == std::string_view::npos ? Mangled : Mangled.substr(0, Dot);
|
|
|
|
demanglePath(IsInType::No);
|
|
|
|
if (Position != Input.size()) {
|
|
ScopedOverride<bool> SavePrint(Print, false);
|
|
demanglePath(IsInType::No);
|
|
}
|
|
|
|
if (Position != Input.size())
|
|
Error = true;
|
|
|
|
if (Dot != std::string_view::npos) {
|
|
print(" (");
|
|
print(Mangled.substr(Dot));
|
|
print(")");
|
|
}
|
|
|
|
return !Error;
|
|
}
|
|
|
|
// Demangles a path. InType indicates whether a path is inside a type. When
|
|
// LeaveOpen is true, a closing `>` after generic arguments is omitted from the
|
|
// output. Return value indicates whether generics arguments have been left
|
|
// open.
|
|
//
|
|
// <path> = "C" <identifier> // crate root
|
|
// | "M" <impl-path> <type> // <T> (inherent impl)
|
|
// | "X" <impl-path> <type> <path> // <T as Trait> (trait impl)
|
|
// | "Y" <type> <path> // <T as Trait> (trait definition)
|
|
// | "N" <ns> <path> <identifier> // ...::ident (nested path)
|
|
// | "I" <path> {<generic-arg>} "E" // ...<T, U> (generic args)
|
|
// | <backref>
|
|
// <identifier> = [<disambiguator>] <undisambiguated-identifier>
|
|
// <ns> = "C" // closure
|
|
// | "S" // shim
|
|
// | <A-Z> // other special namespaces
|
|
// | <a-z> // internal namespaces
|
|
bool Demangler::demanglePath(IsInType InType, LeaveGenericsOpen LeaveOpen) {
|
|
if (Error || RecursionLevel >= MaxRecursionLevel) {
|
|
Error = true;
|
|
return false;
|
|
}
|
|
ScopedOverride<size_t> SaveRecursionLevel(RecursionLevel, RecursionLevel + 1);
|
|
|
|
switch (consume()) {
|
|
case 'C': {
|
|
parseOptionalBase62Number('s');
|
|
printIdentifier(parseIdentifier());
|
|
break;
|
|
}
|
|
case 'M': {
|
|
demangleImplPath(InType);
|
|
print("<");
|
|
demangleType();
|
|
print(">");
|
|
break;
|
|
}
|
|
case 'X': {
|
|
demangleImplPath(InType);
|
|
print("<");
|
|
demangleType();
|
|
print(" as ");
|
|
demanglePath(IsInType::Yes);
|
|
print(">");
|
|
break;
|
|
}
|
|
case 'Y': {
|
|
print("<");
|
|
demangleType();
|
|
print(" as ");
|
|
demanglePath(IsInType::Yes);
|
|
print(">");
|
|
break;
|
|
}
|
|
case 'N': {
|
|
char NS = consume();
|
|
if (!isLower(NS) && !isUpper(NS)) {
|
|
Error = true;
|
|
break;
|
|
}
|
|
demanglePath(InType);
|
|
|
|
uint64_t Disambiguator = parseOptionalBase62Number('s');
|
|
Identifier Ident = parseIdentifier();
|
|
|
|
if (isUpper(NS)) {
|
|
// Special namespaces
|
|
print("::{");
|
|
if (NS == 'C')
|
|
print("closure");
|
|
else if (NS == 'S')
|
|
print("shim");
|
|
else
|
|
print(NS);
|
|
if (!Ident.empty()) {
|
|
print(":");
|
|
printIdentifier(Ident);
|
|
}
|
|
print('#');
|
|
printDecimalNumber(Disambiguator);
|
|
print('}');
|
|
} else {
|
|
// Implementation internal namespaces.
|
|
if (!Ident.empty()) {
|
|
print("::");
|
|
printIdentifier(Ident);
|
|
}
|
|
}
|
|
break;
|
|
}
|
|
case 'I': {
|
|
demanglePath(InType);
|
|
// Omit "::" when in a type, where it is optional.
|
|
if (InType == IsInType::No)
|
|
print("::");
|
|
print("<");
|
|
for (size_t I = 0; !Error && !consumeIf('E'); ++I) {
|
|
if (I > 0)
|
|
print(", ");
|
|
demangleGenericArg();
|
|
}
|
|
if (LeaveOpen == LeaveGenericsOpen::Yes)
|
|
return true;
|
|
else
|
|
print(">");
|
|
break;
|
|
}
|
|
case 'B': {
|
|
bool IsOpen = false;
|
|
demangleBackref([&] { IsOpen = demanglePath(InType, LeaveOpen); });
|
|
return IsOpen;
|
|
}
|
|
default:
|
|
Error = true;
|
|
break;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
// <impl-path> = [<disambiguator>] <path>
|
|
// <disambiguator> = "s" <base-62-number>
|
|
void Demangler::demangleImplPath(IsInType InType) {
|
|
ScopedOverride<bool> SavePrint(Print, false);
|
|
parseOptionalBase62Number('s');
|
|
demanglePath(InType);
|
|
}
|
|
|
|
// <generic-arg> = <lifetime>
|
|
// | <type>
|
|
// | "K" <const>
|
|
// <lifetime> = "L" <base-62-number>
|
|
void Demangler::demangleGenericArg() {
|
|
if (consumeIf('L'))
|
|
printLifetime(parseBase62Number());
|
|
else if (consumeIf('K'))
|
|
demangleConst();
|
|
else
|
|
demangleType();
|
|
}
|
|
|
|
// <basic-type> = "a" // i8
|
|
// | "b" // bool
|
|
// | "c" // char
|
|
// | "d" // f64
|
|
// | "e" // str
|
|
// | "f" // f32
|
|
// | "h" // u8
|
|
// | "i" // isize
|
|
// | "j" // usize
|
|
// | "l" // i32
|
|
// | "m" // u32
|
|
// | "n" // i128
|
|
// | "o" // u128
|
|
// | "s" // i16
|
|
// | "t" // u16
|
|
// | "u" // ()
|
|
// | "v" // ...
|
|
// | "x" // i64
|
|
// | "y" // u64
|
|
// | "z" // !
|
|
// | "p" // placeholder (e.g. for generic params), shown as _
|
|
static bool parseBasicType(char C, BasicType &Type) {
|
|
switch (C) {
|
|
case 'a':
|
|
Type = BasicType::I8;
|
|
return true;
|
|
case 'b':
|
|
Type = BasicType::Bool;
|
|
return true;
|
|
case 'c':
|
|
Type = BasicType::Char;
|
|
return true;
|
|
case 'd':
|
|
Type = BasicType::F64;
|
|
return true;
|
|
case 'e':
|
|
Type = BasicType::Str;
|
|
return true;
|
|
case 'f':
|
|
Type = BasicType::F32;
|
|
return true;
|
|
case 'h':
|
|
Type = BasicType::U8;
|
|
return true;
|
|
case 'i':
|
|
Type = BasicType::ISize;
|
|
return true;
|
|
case 'j':
|
|
Type = BasicType::USize;
|
|
return true;
|
|
case 'l':
|
|
Type = BasicType::I32;
|
|
return true;
|
|
case 'm':
|
|
Type = BasicType::U32;
|
|
return true;
|
|
case 'n':
|
|
Type = BasicType::I128;
|
|
return true;
|
|
case 'o':
|
|
Type = BasicType::U128;
|
|
return true;
|
|
case 'p':
|
|
Type = BasicType::Placeholder;
|
|
return true;
|
|
case 's':
|
|
Type = BasicType::I16;
|
|
return true;
|
|
case 't':
|
|
Type = BasicType::U16;
|
|
return true;
|
|
case 'u':
|
|
Type = BasicType::Unit;
|
|
return true;
|
|
case 'v':
|
|
Type = BasicType::Variadic;
|
|
return true;
|
|
case 'x':
|
|
Type = BasicType::I64;
|
|
return true;
|
|
case 'y':
|
|
Type = BasicType::U64;
|
|
return true;
|
|
case 'z':
|
|
Type = BasicType::Never;
|
|
return true;
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
|
|
void Demangler::printBasicType(BasicType Type) {
|
|
switch (Type) {
|
|
case BasicType::Bool:
|
|
print("bool");
|
|
break;
|
|
case BasicType::Char:
|
|
print("char");
|
|
break;
|
|
case BasicType::I8:
|
|
print("i8");
|
|
break;
|
|
case BasicType::I16:
|
|
print("i16");
|
|
break;
|
|
case BasicType::I32:
|
|
print("i32");
|
|
break;
|
|
case BasicType::I64:
|
|
print("i64");
|
|
break;
|
|
case BasicType::I128:
|
|
print("i128");
|
|
break;
|
|
case BasicType::ISize:
|
|
print("isize");
|
|
break;
|
|
case BasicType::U8:
|
|
print("u8");
|
|
break;
|
|
case BasicType::U16:
|
|
print("u16");
|
|
break;
|
|
case BasicType::U32:
|
|
print("u32");
|
|
break;
|
|
case BasicType::U64:
|
|
print("u64");
|
|
break;
|
|
case BasicType::U128:
|
|
print("u128");
|
|
break;
|
|
case BasicType::USize:
|
|
print("usize");
|
|
break;
|
|
case BasicType::F32:
|
|
print("f32");
|
|
break;
|
|
case BasicType::F64:
|
|
print("f64");
|
|
break;
|
|
case BasicType::Str:
|
|
print("str");
|
|
break;
|
|
case BasicType::Placeholder:
|
|
print("_");
|
|
break;
|
|
case BasicType::Unit:
|
|
print("()");
|
|
break;
|
|
case BasicType::Variadic:
|
|
print("...");
|
|
break;
|
|
case BasicType::Never:
|
|
print("!");
|
|
break;
|
|
}
|
|
}
|
|
|
|
// <type> = | <basic-type>
|
|
// | <path> // named type
|
|
// | "A" <type> <const> // [T; N]
|
|
// | "S" <type> // [T]
|
|
// | "T" {<type>} "E" // (T1, T2, T3, ...)
|
|
// | "R" [<lifetime>] <type> // &T
|
|
// | "Q" [<lifetime>] <type> // &mut T
|
|
// | "P" <type> // *const T
|
|
// | "O" <type> // *mut T
|
|
// | "F" <fn-sig> // fn(...) -> ...
|
|
// | "D" <dyn-bounds> <lifetime> // dyn Trait<Assoc = X> + Send + 'a
|
|
// | <backref> // backref
|
|
void Demangler::demangleType() {
|
|
if (Error || RecursionLevel >= MaxRecursionLevel) {
|
|
Error = true;
|
|
return;
|
|
}
|
|
ScopedOverride<size_t> SaveRecursionLevel(RecursionLevel, RecursionLevel + 1);
|
|
|
|
size_t Start = Position;
|
|
char C = consume();
|
|
BasicType Type;
|
|
if (parseBasicType(C, Type))
|
|
return printBasicType(Type);
|
|
|
|
switch (C) {
|
|
case 'A':
|
|
print("[");
|
|
demangleType();
|
|
print("; ");
|
|
demangleConst();
|
|
print("]");
|
|
break;
|
|
case 'S':
|
|
print("[");
|
|
demangleType();
|
|
print("]");
|
|
break;
|
|
case 'T': {
|
|
print("(");
|
|
size_t I = 0;
|
|
for (; !Error && !consumeIf('E'); ++I) {
|
|
if (I > 0)
|
|
print(", ");
|
|
demangleType();
|
|
}
|
|
if (I == 1)
|
|
print(",");
|
|
print(")");
|
|
break;
|
|
}
|
|
case 'R':
|
|
case 'Q':
|
|
print('&');
|
|
if (consumeIf('L')) {
|
|
if (auto Lifetime = parseBase62Number()) {
|
|
printLifetime(Lifetime);
|
|
print(' ');
|
|
}
|
|
}
|
|
if (C == 'Q')
|
|
print("mut ");
|
|
demangleType();
|
|
break;
|
|
case 'P':
|
|
print("*const ");
|
|
demangleType();
|
|
break;
|
|
case 'O':
|
|
print("*mut ");
|
|
demangleType();
|
|
break;
|
|
case 'F':
|
|
demangleFnSig();
|
|
break;
|
|
case 'D':
|
|
demangleDynBounds();
|
|
if (consumeIf('L')) {
|
|
if (auto Lifetime = parseBase62Number()) {
|
|
print(" + ");
|
|
printLifetime(Lifetime);
|
|
}
|
|
} else {
|
|
Error = true;
|
|
}
|
|
break;
|
|
case 'B':
|
|
demangleBackref([&] { demangleType(); });
|
|
break;
|
|
default:
|
|
Position = Start;
|
|
demanglePath(IsInType::Yes);
|
|
break;
|
|
}
|
|
}
|
|
|
|
// <fn-sig> := [<binder>] ["U"] ["K" <abi>] {<type>} "E" <type>
|
|
// <abi> = "C"
|
|
// | <undisambiguated-identifier>
|
|
void Demangler::demangleFnSig() {
|
|
ScopedOverride<size_t> SaveBoundLifetimes(BoundLifetimes, BoundLifetimes);
|
|
demangleOptionalBinder();
|
|
|
|
if (consumeIf('U'))
|
|
print("unsafe ");
|
|
|
|
if (consumeIf('K')) {
|
|
print("extern \"");
|
|
if (consumeIf('C')) {
|
|
print("C");
|
|
} else {
|
|
Identifier Ident = parseIdentifier();
|
|
if (Ident.Punycode)
|
|
Error = true;
|
|
for (char C : Ident.Name) {
|
|
// When mangling ABI string, the "-" is replaced with "_".
|
|
if (C == '_')
|
|
C = '-';
|
|
print(C);
|
|
}
|
|
}
|
|
print("\" ");
|
|
}
|
|
|
|
print("fn(");
|
|
for (size_t I = 0; !Error && !consumeIf('E'); ++I) {
|
|
if (I > 0)
|
|
print(", ");
|
|
demangleType();
|
|
}
|
|
print(")");
|
|
|
|
if (consumeIf('u')) {
|
|
// Skip the unit type from the output.
|
|
} else {
|
|
print(" -> ");
|
|
demangleType();
|
|
}
|
|
}
|
|
|
|
// <dyn-bounds> = [<binder>] {<dyn-trait>} "E"
|
|
void Demangler::demangleDynBounds() {
|
|
ScopedOverride<size_t> SaveBoundLifetimes(BoundLifetimes, BoundLifetimes);
|
|
print("dyn ");
|
|
demangleOptionalBinder();
|
|
for (size_t I = 0; !Error && !consumeIf('E'); ++I) {
|
|
if (I > 0)
|
|
print(" + ");
|
|
demangleDynTrait();
|
|
}
|
|
}
|
|
|
|
// <dyn-trait> = <path> {<dyn-trait-assoc-binding>}
|
|
// <dyn-trait-assoc-binding> = "p" <undisambiguated-identifier> <type>
|
|
void Demangler::demangleDynTrait() {
|
|
bool IsOpen = demanglePath(IsInType::Yes, LeaveGenericsOpen::Yes);
|
|
while (!Error && consumeIf('p')) {
|
|
if (!IsOpen) {
|
|
IsOpen = true;
|
|
print('<');
|
|
} else {
|
|
print(", ");
|
|
}
|
|
print(parseIdentifier().Name);
|
|
print(" = ");
|
|
demangleType();
|
|
}
|
|
if (IsOpen)
|
|
print(">");
|
|
}
|
|
|
|
// Demangles optional binder and updates the number of bound lifetimes.
|
|
//
|
|
// <binder> = "G" <base-62-number>
|
|
void Demangler::demangleOptionalBinder() {
|
|
uint64_t Binder = parseOptionalBase62Number('G');
|
|
if (Error || Binder == 0)
|
|
return;
|
|
|
|
// In valid inputs each bound lifetime is referenced later. Referencing a
|
|
// lifetime requires at least one byte of input. Reject inputs that are too
|
|
// short to reference all bound lifetimes. Otherwise demangling of invalid
|
|
// binders could generate excessive amounts of output.
|
|
if (Binder >= Input.size() - BoundLifetimes) {
|
|
Error = true;
|
|
return;
|
|
}
|
|
|
|
print("for<");
|
|
for (size_t I = 0; I != Binder; ++I) {
|
|
BoundLifetimes += 1;
|
|
if (I > 0)
|
|
print(", ");
|
|
printLifetime(1);
|
|
}
|
|
print("> ");
|
|
}
|
|
|
|
// <const> = <basic-type> <const-data>
|
|
// | "p" // placeholder
|
|
// | <backref>
|
|
void Demangler::demangleConst() {
|
|
if (Error || RecursionLevel >= MaxRecursionLevel) {
|
|
Error = true;
|
|
return;
|
|
}
|
|
ScopedOverride<size_t> SaveRecursionLevel(RecursionLevel, RecursionLevel + 1);
|
|
|
|
char C = consume();
|
|
BasicType Type;
|
|
if (parseBasicType(C, Type)) {
|
|
switch (Type) {
|
|
case BasicType::I8:
|
|
case BasicType::I16:
|
|
case BasicType::I32:
|
|
case BasicType::I64:
|
|
case BasicType::I128:
|
|
case BasicType::ISize:
|
|
case BasicType::U8:
|
|
case BasicType::U16:
|
|
case BasicType::U32:
|
|
case BasicType::U64:
|
|
case BasicType::U128:
|
|
case BasicType::USize:
|
|
demangleConstInt();
|
|
break;
|
|
case BasicType::Bool:
|
|
demangleConstBool();
|
|
break;
|
|
case BasicType::Char:
|
|
demangleConstChar();
|
|
break;
|
|
case BasicType::Placeholder:
|
|
print('_');
|
|
break;
|
|
default:
|
|
Error = true;
|
|
break;
|
|
}
|
|
} else if (C == 'B') {
|
|
demangleBackref([&] { demangleConst(); });
|
|
} else {
|
|
Error = true;
|
|
}
|
|
}
|
|
|
|
// <const-data> = ["n"] <hex-number>
|
|
void Demangler::demangleConstInt() {
|
|
if (consumeIf('n'))
|
|
print('-');
|
|
|
|
std::string_view HexDigits;
|
|
uint64_t Value = parseHexNumber(HexDigits);
|
|
if (HexDigits.size() <= 16) {
|
|
printDecimalNumber(Value);
|
|
} else {
|
|
print("0x");
|
|
print(HexDigits);
|
|
}
|
|
}
|
|
|
|
// <const-data> = "0_" // false
|
|
// | "1_" // true
|
|
void Demangler::demangleConstBool() {
|
|
std::string_view HexDigits;
|
|
parseHexNumber(HexDigits);
|
|
if (HexDigits == "0")
|
|
print("false");
|
|
else if (HexDigits == "1")
|
|
print("true");
|
|
else
|
|
Error = true;
|
|
}
|
|
|
|
/// Returns true if CodePoint represents a printable ASCII character.
|
|
static bool isAsciiPrintable(uint64_t CodePoint) {
|
|
return 0x20 <= CodePoint && CodePoint <= 0x7e;
|
|
}
|
|
|
|
// <const-data> = <hex-number>
|
|
void Demangler::demangleConstChar() {
|
|
std::string_view HexDigits;
|
|
uint64_t CodePoint = parseHexNumber(HexDigits);
|
|
if (Error || HexDigits.size() > 6) {
|
|
Error = true;
|
|
return;
|
|
}
|
|
|
|
print("'");
|
|
switch (CodePoint) {
|
|
case '\t':
|
|
print(R"(\t)");
|
|
break;
|
|
case '\r':
|
|
print(R"(\r)");
|
|
break;
|
|
case '\n':
|
|
print(R"(\n)");
|
|
break;
|
|
case '\\':
|
|
print(R"(\\)");
|
|
break;
|
|
case '"':
|
|
print(R"(")");
|
|
break;
|
|
case '\'':
|
|
print(R"(\')");
|
|
break;
|
|
default:
|
|
if (isAsciiPrintable(CodePoint)) {
|
|
char C = CodePoint;
|
|
print(C);
|
|
} else {
|
|
print(R"(\u{)");
|
|
print(HexDigits);
|
|
print('}');
|
|
}
|
|
break;
|
|
}
|
|
print('\'');
|
|
}
|
|
|
|
// <undisambiguated-identifier> = ["u"] <decimal-number> ["_"] <bytes>
|
|
Identifier Demangler::parseIdentifier() {
|
|
bool Punycode = consumeIf('u');
|
|
uint64_t Bytes = parseDecimalNumber();
|
|
|
|
// Underscore resolves the ambiguity when identifier starts with a decimal
|
|
// digit or another underscore.
|
|
consumeIf('_');
|
|
|
|
if (Error || Bytes > Input.size() - Position) {
|
|
Error = true;
|
|
return {};
|
|
}
|
|
std::string_view S = Input.substr(Position, Bytes);
|
|
Position += Bytes;
|
|
|
|
if (!std::all_of(S.begin(), S.end(), isValid)) {
|
|
Error = true;
|
|
return {};
|
|
}
|
|
|
|
return {S, Punycode};
|
|
}
|
|
|
|
// Parses optional base 62 number. The presence of a number is determined using
|
|
// Tag. Returns 0 when tag is absent and parsed value + 1 otherwise
|
|
//
|
|
// This function is indended for parsing disambiguators and binders which when
|
|
// not present have their value interpreted as 0, and otherwise as decoded
|
|
// value + 1. For example for binders, value for "G_" is 1, for "G0_" value is
|
|
// 2. When "G" is absent value is 0.
|
|
uint64_t Demangler::parseOptionalBase62Number(char Tag) {
|
|
if (!consumeIf(Tag))
|
|
return 0;
|
|
|
|
uint64_t N = parseBase62Number();
|
|
if (Error || !addAssign(N, 1))
|
|
return 0;
|
|
|
|
return N;
|
|
}
|
|
|
|
// Parses base 62 number with <0-9a-zA-Z> as digits. Number is terminated by
|
|
// "_". All values are offset by 1, so that "_" encodes 0, "0_" encodes 1,
|
|
// "1_" encodes 2, etc.
|
|
//
|
|
// <base-62-number> = {<0-9a-zA-Z>} "_"
|
|
uint64_t Demangler::parseBase62Number() {
|
|
if (consumeIf('_'))
|
|
return 0;
|
|
|
|
uint64_t Value = 0;
|
|
|
|
while (true) {
|
|
uint64_t Digit;
|
|
char C = consume();
|
|
|
|
if (C == '_') {
|
|
break;
|
|
} else if (isDigit(C)) {
|
|
Digit = C - '0';
|
|
} else if (isLower(C)) {
|
|
Digit = 10 + (C - 'a');
|
|
} else if (isUpper(C)) {
|
|
Digit = 10 + 26 + (C - 'A');
|
|
} else {
|
|
Error = true;
|
|
return 0;
|
|
}
|
|
|
|
if (!mulAssign(Value, 62))
|
|
return 0;
|
|
|
|
if (!addAssign(Value, Digit))
|
|
return 0;
|
|
}
|
|
|
|
if (!addAssign(Value, 1))
|
|
return 0;
|
|
|
|
return Value;
|
|
}
|
|
|
|
// Parses a decimal number that had been encoded without any leading zeros.
|
|
//
|
|
// <decimal-number> = "0"
|
|
// | <1-9> {<0-9>}
|
|
uint64_t Demangler::parseDecimalNumber() {
|
|
char C = look();
|
|
if (!isDigit(C)) {
|
|
Error = true;
|
|
return 0;
|
|
}
|
|
|
|
if (C == '0') {
|
|
consume();
|
|
return 0;
|
|
}
|
|
|
|
uint64_t Value = 0;
|
|
|
|
while (isDigit(look())) {
|
|
if (!mulAssign(Value, 10)) {
|
|
Error = true;
|
|
return 0;
|
|
}
|
|
|
|
uint64_t D = consume() - '0';
|
|
if (!addAssign(Value, D))
|
|
return 0;
|
|
}
|
|
|
|
return Value;
|
|
}
|
|
|
|
// Parses a hexadecimal number with <0-9a-f> as a digits. Returns the parsed
|
|
// value and stores hex digits in HexDigits. The return value is unspecified if
|
|
// HexDigits.size() > 16.
|
|
//
|
|
// <hex-number> = "0_"
|
|
// | <1-9a-f> {<0-9a-f>} "_"
|
|
uint64_t Demangler::parseHexNumber(std::string_view &HexDigits) {
|
|
size_t Start = Position;
|
|
uint64_t Value = 0;
|
|
|
|
if (!isHexDigit(look()))
|
|
Error = true;
|
|
|
|
if (consumeIf('0')) {
|
|
if (!consumeIf('_'))
|
|
Error = true;
|
|
} else {
|
|
while (!Error && !consumeIf('_')) {
|
|
char C = consume();
|
|
Value *= 16;
|
|
if (isDigit(C))
|
|
Value += C - '0';
|
|
else if ('a' <= C && C <= 'f')
|
|
Value += 10 + (C - 'a');
|
|
else
|
|
Error = true;
|
|
}
|
|
}
|
|
|
|
if (Error) {
|
|
HexDigits = std::string_view();
|
|
return 0;
|
|
}
|
|
|
|
size_t End = Position - 1;
|
|
assert(Start < End);
|
|
HexDigits = Input.substr(Start, End - Start);
|
|
return Value;
|
|
}
|
|
|
|
void Demangler::print(char C) {
|
|
if (Error || !Print)
|
|
return;
|
|
|
|
Output += C;
|
|
}
|
|
|
|
void Demangler::print(std::string_view S) {
|
|
if (Error || !Print)
|
|
return;
|
|
|
|
Output += S;
|
|
}
|
|
|
|
void Demangler::printDecimalNumber(uint64_t N) {
|
|
if (Error || !Print)
|
|
return;
|
|
|
|
Output << N;
|
|
}
|
|
|
|
// Prints a lifetime. An index 0 always represents an erased lifetime. Indices
|
|
// starting from 1, are De Bruijn indices, referring to higher-ranked lifetimes
|
|
// bound by one of the enclosing binders.
|
|
void Demangler::printLifetime(uint64_t Index) {
|
|
if (Index == 0) {
|
|
print("'_");
|
|
return;
|
|
}
|
|
|
|
if (Index - 1 >= BoundLifetimes) {
|
|
Error = true;
|
|
return;
|
|
}
|
|
|
|
uint64_t Depth = BoundLifetimes - Index;
|
|
print('\'');
|
|
if (Depth < 26) {
|
|
char C = 'a' + Depth;
|
|
print(C);
|
|
} else {
|
|
print('z');
|
|
printDecimalNumber(Depth - 26 + 1);
|
|
}
|
|
}
|
|
|
|
static inline bool decodePunycodeDigit(char C, size_t &Value) {
|
|
if (isLower(C)) {
|
|
Value = C - 'a';
|
|
return true;
|
|
}
|
|
|
|
if (isDigit(C)) {
|
|
Value = 26 + (C - '0');
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
static void removeNullBytes(OutputBuffer &Output, size_t StartIdx) {
|
|
char *Buffer = Output.getBuffer();
|
|
char *Start = Buffer + StartIdx;
|
|
char *End = Buffer + Output.getCurrentPosition();
|
|
Output.setCurrentPosition(std::remove(Start, End, '\0') - Buffer);
|
|
}
|
|
|
|
// Encodes code point as UTF-8 and stores results in Output. Returns false if
|
|
// CodePoint is not a valid unicode scalar value.
|
|
static inline bool encodeUTF8(size_t CodePoint, char *Output) {
|
|
if (0xD800 <= CodePoint && CodePoint <= 0xDFFF)
|
|
return false;
|
|
|
|
if (CodePoint <= 0x7F) {
|
|
Output[0] = CodePoint;
|
|
return true;
|
|
}
|
|
|
|
if (CodePoint <= 0x7FF) {
|
|
Output[0] = 0xC0 | ((CodePoint >> 6) & 0x3F);
|
|
Output[1] = 0x80 | (CodePoint & 0x3F);
|
|
return true;
|
|
}
|
|
|
|
if (CodePoint <= 0xFFFF) {
|
|
Output[0] = 0xE0 | (CodePoint >> 12);
|
|
Output[1] = 0x80 | ((CodePoint >> 6) & 0x3F);
|
|
Output[2] = 0x80 | (CodePoint & 0x3F);
|
|
return true;
|
|
}
|
|
|
|
if (CodePoint <= 0x10FFFF) {
|
|
Output[0] = 0xF0 | (CodePoint >> 18);
|
|
Output[1] = 0x80 | ((CodePoint >> 12) & 0x3F);
|
|
Output[2] = 0x80 | ((CodePoint >> 6) & 0x3F);
|
|
Output[3] = 0x80 | (CodePoint & 0x3F);
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
// Decodes string encoded using punycode and appends results to Output.
|
|
// Returns true if decoding was successful.
|
|
static bool decodePunycode(std::string_view Input, OutputBuffer &Output) {
|
|
size_t OutputSize = Output.getCurrentPosition();
|
|
size_t InputIdx = 0;
|
|
|
|
// Rust uses an underscore as a delimiter.
|
|
size_t DelimiterPos = std::string_view::npos;
|
|
for (size_t I = 0; I != Input.size(); ++I)
|
|
if (Input[I] == '_')
|
|
DelimiterPos = I;
|
|
|
|
if (DelimiterPos != std::string_view::npos) {
|
|
// Copy basic code points before the last delimiter to the output.
|
|
for (; InputIdx != DelimiterPos; ++InputIdx) {
|
|
char C = Input[InputIdx];
|
|
if (!isValid(C))
|
|
return false;
|
|
// Code points are padded with zeros while decoding is in progress.
|
|
char UTF8[4] = {C};
|
|
Output += std::string_view(UTF8, 4);
|
|
}
|
|
// Skip over the delimiter.
|
|
++InputIdx;
|
|
}
|
|
|
|
size_t Base = 36;
|
|
size_t Skew = 38;
|
|
size_t Bias = 72;
|
|
size_t N = 0x80;
|
|
size_t TMin = 1;
|
|
size_t TMax = 26;
|
|
size_t Damp = 700;
|
|
|
|
auto Adapt = [&](size_t Delta, size_t NumPoints) {
|
|
Delta /= Damp;
|
|
Delta += Delta / NumPoints;
|
|
Damp = 2;
|
|
|
|
size_t K = 0;
|
|
while (Delta > (Base - TMin) * TMax / 2) {
|
|
Delta /= Base - TMin;
|
|
K += Base;
|
|
}
|
|
return K + (((Base - TMin + 1) * Delta) / (Delta + Skew));
|
|
};
|
|
|
|
// Main decoding loop.
|
|
for (size_t I = 0; InputIdx != Input.size(); I += 1) {
|
|
size_t OldI = I;
|
|
size_t W = 1;
|
|
size_t Max = std::numeric_limits<size_t>::max();
|
|
for (size_t K = Base; true; K += Base) {
|
|
if (InputIdx == Input.size())
|
|
return false;
|
|
char C = Input[InputIdx++];
|
|
size_t Digit = 0;
|
|
if (!decodePunycodeDigit(C, Digit))
|
|
return false;
|
|
|
|
if (Digit > (Max - I) / W)
|
|
return false;
|
|
I += Digit * W;
|
|
|
|
size_t T;
|
|
if (K <= Bias)
|
|
T = TMin;
|
|
else if (K >= Bias + TMax)
|
|
T = TMax;
|
|
else
|
|
T = K - Bias;
|
|
|
|
if (Digit < T)
|
|
break;
|
|
|
|
if (W > Max / (Base - T))
|
|
return false;
|
|
W *= (Base - T);
|
|
}
|
|
size_t NumPoints = (Output.getCurrentPosition() - OutputSize) / 4 + 1;
|
|
Bias = Adapt(I - OldI, NumPoints);
|
|
|
|
if (I / NumPoints > Max - N)
|
|
return false;
|
|
N += I / NumPoints;
|
|
I = I % NumPoints;
|
|
|
|
// Insert N at position I in the output.
|
|
char UTF8[4] = {};
|
|
if (!encodeUTF8(N, UTF8))
|
|
return false;
|
|
Output.insert(OutputSize + I * 4, UTF8, 4);
|
|
}
|
|
|
|
removeNullBytes(Output, OutputSize);
|
|
return true;
|
|
}
|
|
|
|
void Demangler::printIdentifier(Identifier Ident) {
|
|
if (Error || !Print)
|
|
return;
|
|
|
|
if (Ident.Punycode) {
|
|
if (!decodePunycode(Ident.Name, Output))
|
|
Error = true;
|
|
} else {
|
|
print(Ident.Name);
|
|
}
|
|
}
|
|
|
|
char Demangler::look() const {
|
|
if (Error || Position >= Input.size())
|
|
return 0;
|
|
|
|
return Input[Position];
|
|
}
|
|
|
|
char Demangler::consume() {
|
|
if (Error || Position >= Input.size()) {
|
|
Error = true;
|
|
return 0;
|
|
}
|
|
|
|
return Input[Position++];
|
|
}
|
|
|
|
bool Demangler::consumeIf(char Prefix) {
|
|
if (Error || Position >= Input.size() || Input[Position] != Prefix)
|
|
return false;
|
|
|
|
Position += 1;
|
|
return true;
|
|
}
|
|
|
|
/// Computes A + B. When computation wraps around sets the error and returns
|
|
/// false. Otherwise assigns the result to A and returns true.
|
|
bool Demangler::addAssign(uint64_t &A, uint64_t B) {
|
|
if (A > std::numeric_limits<uint64_t>::max() - B) {
|
|
Error = true;
|
|
return false;
|
|
}
|
|
|
|
A += B;
|
|
return true;
|
|
}
|
|
|
|
/// Computes A * B. When computation wraps around sets the error and returns
|
|
/// false. Otherwise assigns the result to A and returns true.
|
|
bool Demangler::mulAssign(uint64_t &A, uint64_t B) {
|
|
if (B != 0 && A > std::numeric_limits<uint64_t>::max() / B) {
|
|
Error = true;
|
|
return false;
|
|
}
|
|
|
|
A *= B;
|
|
return true;
|
|
}
|