[llvm-pdbutil] Dump and parse unknown records (#180761)

When new record types or leaf kinds are added to CodeView, then we
should try to dump all the information we have on these records.

- If a type or symbol is unknown, the raw data is now always shown.
Before, you'd have to set `--sym-data` or `--type-data` (but that would
show it for all types).
- Converting to/from YAML, unknown records are now included. Before we'd
error out.

I stumbled upon this when checking PDBs from C++ 20 coroutines compiled
with MSVC. These contain the symbol records `0x1171` and `0x117c`, which
we don't handle yet.
This commit is contained in:
Nerixyz 2026-02-20 12:28:41 +01:00 committed by GitHub
parent b82c7fc652
commit 5f953c157c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 176 additions and 16 deletions

View File

@ -81,6 +81,7 @@ void ScalarEnumerationTraits<SymbolKind>::enumeration(IO &io,
auto SymbolNames = getSymbolTypeNames();
for (const auto &E : SymbolNames)
io.enumCase(Value, E.Name, E.Value);
io.enumFallback<yaml::Hex16>(Value);
}
void ScalarBitSetTraits<CompileSym2Flags>::bitset(IO &io,

View File

@ -24,6 +24,7 @@
#include "llvm/DebugInfo/CodeView/TypeDeserializer.h"
#include "llvm/DebugInfo/CodeView/TypeIndex.h"
#include "llvm/DebugInfo/CodeView/TypeVisitorCallbacks.h"
#include "llvm/ObjectYAML/YAML.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/BinaryStreamReader.h"
#include "llvm/Support/BinaryStreamWriter.h"
@ -86,6 +87,31 @@ struct LeafRecordBase {
virtual Error fromCodeViewRecord(CVType Type) = 0;
};
struct UnknownLeafRecord : public LeafRecordBase {
explicit UnknownLeafRecord(TypeLeafKind K) : LeafRecordBase(K) {}
void map(yaml::IO &IO) override;
CVType toCodeViewRecord(AppendingTypeTableBuilder &TS) const override {
RecordPrefix Prefix;
uint32_t TotalLen = sizeof(RecordPrefix) + Data.size();
Prefix.RecordKind = Kind;
Prefix.RecordLen = TotalLen - 2;
uint8_t *Buffer = TS.getAllocator().Allocate<uint8_t>(TotalLen);
::memcpy(Buffer, &Prefix, sizeof(RecordPrefix));
::memcpy(Buffer + sizeof(RecordPrefix), Data.data(), Data.size());
return CVType(ArrayRef<uint8_t>(Buffer, TotalLen));
}
Error fromCodeViewRecord(CVType Type) override {
this->Kind = Type.kind();
Data = Type.content();
return Error::success();
}
std::vector<uint8_t> Data;
};
template <typename T> struct LeafRecordImpl : public LeafRecordBase {
explicit LeafRecordImpl(TypeLeafKind K)
: LeafRecordBase(K), Record(static_cast<TypeRecordKind>(K)) {}
@ -200,6 +226,7 @@ void ScalarEnumerationTraits<TypeLeafKind>::enumeration(IO &io,
#define CV_TYPE(name, val) io.enumCase(Value, #name, name);
#include "llvm/DebugInfo/CodeView/CodeViewTypes.def"
#undef CV_TYPE
io.enumFallback<Hex16>(Value);
}
void ScalarEnumerationTraits<PointerToMemberRepresentation>::enumeration(
@ -397,6 +424,19 @@ namespace llvm {
namespace CodeViewYAML {
namespace detail {
void UnknownLeafRecord::map(IO &IO) {
yaml::BinaryRef Binary;
if (IO.outputting())
Binary = yaml::BinaryRef(Data);
IO.mapRequired("Data", Binary);
if (!IO.outputting()) {
std::string Str;
raw_string_ostream OS(Str);
Binary.writeAsBinary(OS);
Data.assign(Str.begin(), Str.end());
}
}
template <> void LeafRecordImpl<ModifierRecord>::map(IO &IO) {
IO.mapRequired("ModifiedType", Record.ModifiedType);
IO.mapRequired("Modifiers", Record.Modifiers);
@ -678,7 +718,7 @@ template <typename T>
static inline Expected<LeafRecord> fromCodeViewRecordImpl(CVType Type) {
LeafRecord Result;
auto Impl = std::make_shared<LeafRecordImpl<T>>(Type.kind());
auto Impl = std::make_shared<T>(Type.kind());
if (auto EC = Impl->fromCodeViewRecord(Type))
return std::move(EC);
Result.Leaf = std::move(Impl);
@ -688,7 +728,7 @@ static inline Expected<LeafRecord> fromCodeViewRecordImpl(CVType Type) {
Expected<LeafRecord> LeafRecord::fromCodeViewRecord(CVType Type) {
#define TYPE_RECORD(EnumName, EnumVal, ClassName) \
case EnumName: \
return fromCodeViewRecordImpl<ClassName##Record>(Type);
return fromCodeViewRecordImpl<LeafRecordImpl<ClassName##Record>>(Type);
#define TYPE_RECORD_ALIAS(EnumName, EnumVal, AliasName, ClassName) \
TYPE_RECORD(EnumName, EnumVal, ClassName)
#define MEMBER_RECORD(EnumName, EnumVal, ClassName)
@ -696,9 +736,8 @@ Expected<LeafRecord> LeafRecord::fromCodeViewRecord(CVType Type) {
switch (Type.kind()) {
#include "llvm/DebugInfo/CodeView/CodeViewTypes.def"
default:
llvm_unreachable("Unknown leaf kind!");
return fromCodeViewRecordImpl<UnknownLeafRecord>(Type);
}
return make_error<CodeViewError>(cv_error_code::corrupt_record);
}
CVType
@ -724,7 +763,7 @@ template <typename ConcreteType>
static void mapLeafRecordImpl(IO &IO, const char *Class, TypeLeafKind Kind,
LeafRecord &Obj) {
if (!IO.outputting())
Obj.Leaf = std::make_shared<LeafRecordImpl<ConcreteType>>(Kind);
Obj.Leaf = std::make_shared<ConcreteType>(Kind);
if (Kind == LF_FIELDLIST)
Obj.Leaf->map(IO);
@ -740,7 +779,8 @@ void MappingTraits<LeafRecord>::mapping(IO &IO, LeafRecord &Obj) {
#define TYPE_RECORD(EnumName, EnumVal, ClassName) \
case EnumName: \
mapLeafRecordImpl<ClassName##Record>(IO, #ClassName, Kind, Obj); \
mapLeafRecordImpl<LeafRecordImpl<ClassName##Record>>(IO, #ClassName, Kind, \
Obj); \
break;
#define TYPE_RECORD_ALIAS(EnumName, EnumVal, AliasName, ClassName) \
TYPE_RECORD(EnumName, EnumVal, ClassName)
@ -748,7 +788,8 @@ void MappingTraits<LeafRecord>::mapping(IO &IO, LeafRecord &Obj) {
#define MEMBER_RECORD_ALIAS(EnumName, EnumVal, AliasName, ClassName)
switch (Kind) {
#include "llvm/DebugInfo/CodeView/CodeViewTypes.def"
default: { llvm_unreachable("Unknown leaf kind!"); }
default:
mapLeafRecordImpl<UnknownLeafRecord>(IO, "UnknownLeaf", Kind, Obj);
}
}

View File

@ -0,0 +1,50 @@
# RUN: llvm-pdbutil yaml2pdb %s --pdb=%t.pdb
# RUN: llvm-pdbutil dump --symbols %t.pdb | FileCheck --check-prefix=CHECK-YAML2PDB %s
# RUN: llvm-pdbutil dump --symbols --sym-data %t.pdb | FileCheck --check-prefix=CHECK-YAML2PDB,CHECK-YAML2PDB-SYMDATA %s
# RUN: llvm-pdbutil pdb2yaml --module-syms %t.pdb > %t.yaml
# RUN: FileCheck --input-file=%t.yaml --check-prefix=CHECK-PDB2YAML %s
# CHECK-YAML2PDB: Symbols
# CHECK-YAML2PDB: ============================================================
# CHECK-YAML2PDB: Mod 0000 | `F:\Dev\testing\test.obj`:
# CHECK-YAML2PDB: 4 | S_OBJNAME [size = 32] sig=0, `F:\Dev\testing\test.obj`
# CHECK-YAML2PDB-SYMDATA: bytes (
# CHECK-YAML2PDB-SYMDATA: 0000: 00000000 463A5C44 65765C74 65737469 6E675C74 6573742E 6F626A00 |....F:\Dev\testing\test.obj.|
# CHECK-YAML2PDB-SYMDATA: )
# CHECK-YAML2PDB: 36 | unknown (64188) [size = 36]
# CHECK-YAML2PDB: bytes (
# CHECK-YAML2PDB: 0000: 20000000 12100000 00000000 4F015F5F 636F726F 5F667261 6D655F70 74720000 | ...........O.__coro_frame_ptr..|
# CHECK-YAML2PDB: )
# CHECK-PDB2YAML: DbiStream:
# CHECK-PDB2YAML: Modules:
# CHECK-PDB2YAML: - Module: 'F:\Dev\testing\test.obj'
# CHECK-PDB2YAML: ObjFile: 'F:\Dev\testing\test.obj'
# CHECK-PDB2YAML: Modi:
# CHECK-PDB2YAML: Signature: 4
# CHECK-PDB2YAML: Records:
# CHECK-PDB2YAML: - Kind: S_OBJNAME
# CHECK-PDB2YAML: ObjNameSym:
# CHECK-PDB2YAML: Signature: 0
# CHECK-PDB2YAML: ObjectName: 'F:\Dev\testing\test.obj'
# CHECK-PDB2YAML: - Kind: 0xFABC
# CHECK-PDB2YAML: UnknownSym:
# CHECK-PDB2YAML: Data: 2000000012100000000000004F015F5F636F726F5F6672616D655F7074720000
---
DbiStream:
Modules:
- Module: 'F:\Dev\testing\test.obj'
ObjFile: 'F:\Dev\testing\test.obj'
Modi:
Signature: 4
Records:
- Kind: S_OBJNAME
ObjNameSym:
Signature: 0
ObjectName: 'F:\Dev\testing\test.obj'
- Kind: 0xFABC
UnknownSym:
Data: 2000000012100000000000004F015F5F636F726F5F6672616D655F7074720000
...

View File

@ -0,0 +1,41 @@
# RUN: llvm-pdbutil yaml2pdb %s --pdb=%t.pdb
# RUN: llvm-pdbutil dump --types %t.pdb | FileCheck --check-prefix=CHECK-YAML2PDB %s
# RUN: llvm-pdbutil dump --types --type-data %t.pdb | FileCheck --check-prefix=CHECK-YAML2PDB,CHECK-YAML2PDB-TYPE-DATA %s
# RUN: llvm-pdbutil pdb2yaml --tpi-stream %t.pdb > %t.yaml
# RUN: FileCheck --input-file=%t.yaml --check-prefix=CHECK-PDB2YAML %s
# CHECK-YAML2PDB: Types (TPI Stream)
# CHECK-YAML2PDB: ============================================================
# CHECK-YAML2PDB: Showing 2 records
# CHECK-YAML2PDB: 0x1000 | UNKNOWN RECORD (0xF123) [size = 12]
# CHECK-YAML2PDB: bytes (
# CHECK-YAML2PDB: 0000: 0A0023F1 01234567 89ABCDEF |..#..#Eg....|
# CHECK-YAML2PDB: )
# CHECK-YAML2PDB: 0x1001 | LF_ARGLIST [size = 12]
# CHECK-YAML2PDB: 0x0103 (std::nullptr_t): `std::nullptr_t`
# CHECK-YAML2PDB-TYPE-DATA: bytes (
# CHECK-YAML2PDB-TYPE-DATA: 0000: 0A000112 01000000 03010000 |............|
# CHECK-YAML2PDB-TYPE-DATA: )
# CHECK-PDB2YAML: TpiStream:
# CHECK-PDB2YAML: Version: VC80
# CHECK-PDB2YAML: Records:
# CHECK-PDB2YAML: - Kind: 0xF123
# CHECK-PDB2YAML: UnknownLeaf:
# CHECK-PDB2YAML: Data: 0123456789ABCDEF
# CHECK-PDB2YAML: - Kind: LF_ARGLIST
# CHECK-PDB2YAML: ArgList:
# CHECK-PDB2YAML: ArgIndices: [ 259 ]
---
TpiStream:
Version: VC80
Records:
- Kind: 0xF123
UnknownLeaf:
Data: 0123456789ABCDEF
- Kind: LF_ARGLIST
ArgList:
ArgIndices: [ 259 ]
...

View File

@ -396,14 +396,23 @@ Error MinimalSymbolDumper::visitSymbolBegin(codeview::CVSymbol &Record,
}
Error MinimalSymbolDumper::visitSymbolEnd(CVSymbol &Record) {
if (RecordBytes) {
AutoIndent Indent(P, 7);
P.formatBinary("bytes", Record.content(), 0);
}
if (RecordBytes)
printSymbolBytes(Record);
P.Unindent();
return Error::success();
}
Error MinimalSymbolDumper::visitUnknownSymbol(CVSymbol &Record) {
if (!RecordBytes)
printSymbolBytes(Record);
return Error::success();
}
void MinimalSymbolDumper::printSymbolBytes(CVSymbol &Record) const {
AutoIndent Indent(P, 7);
P.formatBinary("bytes", Record.content(), 0);
}
std::string MinimalSymbolDumper::typeOrIdIndex(codeview::TypeIndex TI,
bool IsType) const {
if (TI.isSimple() || TI.isDecoratedItemId())

View File

@ -36,6 +36,7 @@ public:
Error visitSymbolBegin(codeview::CVSymbol &Record) override;
Error visitSymbolBegin(codeview::CVSymbol &Record, uint32_t Offset) override;
Error visitSymbolEnd(codeview::CVSymbol &Record) override;
Error visitUnknownSymbol(codeview::CVSymbol &Record) override;
void setSymbolGroup(const SymbolGroup *Group) { SymGroup = Group; }
@ -51,6 +52,8 @@ private:
std::string typeIndex(codeview::TypeIndex TI) const;
std::string idIndex(codeview::TypeIndex TI) const;
void printSymbolBytes(codeview::CVSymbol &Record) const;
LinePrinter &P;
/// Dumping certain records requires knowing what machine this is. The

View File

@ -261,11 +261,15 @@ Error MinimalTypeDumpVisitor::visitTypeBegin(CVType &Record, TypeIndex Index) {
}
Error MinimalTypeDumpVisitor::visitTypeEnd(CVType &Record) {
if (RecordBytes)
P.formatBinary("bytes", Record.RecordData, 0);
P.Unindent(Width + 3);
if (RecordBytes) {
AutoIndent Indent(P, 9);
P.formatBinary("Bytes", Record.RecordData, 0);
}
return Error::success();
}
Error MinimalTypeDumpVisitor::visitUnknownType(CVType &Record) {
if (!RecordBytes)
P.formatBinary("bytes", Record.RecordData, 0);
return Error::success();
}
@ -277,7 +281,15 @@ Error MinimalTypeDumpVisitor::visitMemberBegin(CVMemberRecord &Record) {
Error MinimalTypeDumpVisitor::visitMemberEnd(CVMemberRecord &Record) {
if (RecordBytes) {
AutoIndent Indent(P, 2);
P.formatBinary("Bytes", Record.Data, 0);
P.formatBinary("bytes", Record.Data, 0);
}
return Error::success();
}
Error MinimalTypeDumpVisitor::visitUnknownMember(CVMemberRecord &Record) {
if (!RecordBytes) {
AutoIndent Indent(P, 2);
P.formatBinary("bytes", Record.Data, 0);
}
return Error::success();
}

View File

@ -37,8 +37,11 @@ public:
Error visitTypeBegin(codeview::CVType &Record,
codeview::TypeIndex Index) override;
Error visitTypeEnd(codeview::CVType &Record) override;
Error visitUnknownType(codeview::CVType &Record) override;
Error visitMemberBegin(codeview::CVMemberRecord &Record) override;
Error visitMemberEnd(codeview::CVMemberRecord &Record) override;
Error visitUnknownMember(codeview::CVMemberRecord &Record) override;
#define TYPE_RECORD(EnumName, EnumVal, Name) \
Error visitKnownRecord(codeview::CVType &CVR, \