llvm-project/llvm/lib/Remarks/YAMLRemarkSerializer.cpp
Tobias Stadler dfbd76bda0
[Remarks] Restructure bitstream remarks to be fully standalone (#156715)
Currently there are two serialization modes for bitstream Remarks:
standalone and separate. The separate mode splits remark metadata (e.g.
the string table) from actual remark data. The metadata is written into
the object file by the AsmPrinter, while the remark data is stored in a
separate remarks file. This means we can't use bitstream remarks with
tools like opt that don't generate an object file. Also, it is confusing
to post-process bitstream remarks files, because only the standalone
files can be read by llvm-remarkutil. We always need to use dsymutil
to convert the separate files to standalone files, which only works for
MachO. It is not possible for clang/opt to directly emit bitstream
remark files in standalone mode, because the string table can only be
serialized after all remarks were emitted.

Therefore, this change completely removes the separate serialization
mode. Instead, the remark string table is now always written to the end
of the remarks file. This requires us to tell the serializer when to
finalize remark serialization. This automatically happens when the
serializer goes out of scope. However, often the remark file goes out of
scope before the serializer is destroyed. To diagnose this, I have added
an assert to alert users that they need to explicitly call
finalizeLLVMOptimizationRemarks.

This change paves the way for further improvements to the remark
infrastructure, including more tooling (e.g. #159784), size optimizations
for bitstream remarks, and more.

Pull Request: https://github.com/llvm/llvm-project/pull/156715
2025-09-22 16:41:39 +01:00

193 lines
6.5 KiB
C++

//===- YAMLRemarkSerializer.cpp -------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file provides the implementation of the YAML remark serializer using
// LLVM's YAMLTraits.
//
//===----------------------------------------------------------------------===//
#include "llvm/Remarks/YAMLRemarkSerializer.h"
#include "llvm/Remarks/Remark.h"
#include "llvm/Support/FileSystem.h"
#include <optional>
using namespace llvm;
using namespace llvm::remarks;
static void
mapRemarkHeader(yaml::IO &io, StringRef PassName, StringRef RemarkName,
std::optional<RemarkLocation> RL, StringRef FunctionName,
std::optional<uint64_t> Hotness, ArrayRef<Argument> Args) {
io.mapRequired("Pass", PassName);
io.mapRequired("Name", RemarkName);
io.mapOptional("DebugLoc", RL);
io.mapRequired("Function", FunctionName);
io.mapOptional("Hotness", Hotness);
io.mapOptional("Args", Args);
}
namespace llvm {
namespace yaml {
template <> struct MappingTraits<remarks::Remark *> {
static void mapping(IO &io, remarks::Remark *&Remark) {
assert(io.outputting() && "input not yet implemented");
if (io.mapTag("!Passed", (Remark->RemarkType == Type::Passed)))
;
else if (io.mapTag("!Missed", (Remark->RemarkType == Type::Missed)))
;
else if (io.mapTag("!Analysis", (Remark->RemarkType == Type::Analysis)))
;
else if (io.mapTag("!AnalysisFPCommute",
(Remark->RemarkType == Type::AnalysisFPCommute)))
;
else if (io.mapTag("!AnalysisAliasing",
(Remark->RemarkType == Type::AnalysisAliasing)))
;
else if (io.mapTag("!Failure", (Remark->RemarkType == Type::Failure)))
;
else
llvm_unreachable("Unknown remark type");
mapRemarkHeader(io, Remark->PassName, Remark->RemarkName, Remark->Loc,
Remark->FunctionName, Remark->Hotness, Remark->Args);
}
};
template <> struct MappingTraits<RemarkLocation> {
static void mapping(IO &io, RemarkLocation &RL) {
assert(io.outputting() && "input not yet implemented");
StringRef File = RL.SourceFilePath;
unsigned Line = RL.SourceLine;
unsigned Col = RL.SourceColumn;
io.mapRequired("File", File);
io.mapRequired("Line", Line);
io.mapRequired("Column", Col);
}
static const bool flow = true;
};
/// Helper struct for multiline string block literals. Use this type to preserve
/// newlines in strings.
struct StringBlockVal {
StringRef Value;
StringBlockVal(StringRef R) : Value(R) {}
};
template <> struct BlockScalarTraits<StringBlockVal> {
static void output(const StringBlockVal &S, void *Ctx, raw_ostream &OS) {
return ScalarTraits<StringRef>::output(S.Value, Ctx, OS);
}
static StringRef input(StringRef Scalar, void *Ctx, StringBlockVal &S) {
return ScalarTraits<StringRef>::input(Scalar, Ctx, S.Value);
}
};
/// ArrayRef is not really compatible with the YAMLTraits. Everything should be
/// immutable in an ArrayRef, while the SequenceTraits expect a mutable version
/// for inputting, but we're only using the outputting capabilities here.
/// This is a hack, but still nicer than having to manually call the YAMLIO
/// internal methods.
/// Keep this in this file so that it doesn't get misused from YAMLTraits.h.
template <typename T> struct SequenceTraits<ArrayRef<T>> {
static size_t size(IO &io, ArrayRef<T> &seq) { return seq.size(); }
static Argument &element(IO &io, ArrayRef<T> &seq, size_t index) {
assert(io.outputting() && "input not yet implemented");
// The assert above should make this "safer" to satisfy the YAMLTraits.
return const_cast<T &>(seq[index]);
}
};
/// Implement this as a mapping for now to get proper quotation for the value.
template <> struct MappingTraits<Argument> {
static void mapping(IO &io, Argument &A) {
assert(io.outputting() && "input not yet implemented");
if (StringRef(A.Val).count('\n') > 1) {
StringBlockVal S(A.Val);
io.mapRequired(A.Key.data(), S);
} else {
io.mapRequired(A.Key.data(), A.Val);
}
io.mapOptional("DebugLoc", A.Loc);
}
};
} // end namespace yaml
} // end namespace llvm
LLVM_YAML_IS_SEQUENCE_VECTOR(Argument)
YAMLRemarkSerializer::YAMLRemarkSerializer(raw_ostream &OS)
: RemarkSerializer(Format::YAML, OS),
YAMLOutput(OS, reinterpret_cast<void *>(this)) {}
YAMLRemarkSerializer::YAMLRemarkSerializer(raw_ostream &OS,
StringTable StrTabIn)
: YAMLRemarkSerializer(OS) {
StrTab = std::move(StrTabIn);
}
void YAMLRemarkSerializer::emit(const Remark &Remark) {
// Again, YAMLTraits expect a non-const object for inputting, but we're not
// using that here.
auto *R = const_cast<remarks::Remark *>(&Remark);
YAMLOutput << R;
}
std::unique_ptr<MetaSerializer>
YAMLRemarkSerializer::metaSerializer(raw_ostream &OS,
StringRef ExternalFilename) {
return std::make_unique<YAMLMetaSerializer>(OS, ExternalFilename);
}
static void emitMagic(raw_ostream &OS) {
// Emit the magic number.
OS << remarks::Magic;
// Explicitly emit a '\0'.
OS.write('\0');
}
static void emitVersion(raw_ostream &OS) {
// Emit the version number: little-endian uint64_t.
std::array<char, 8> Version;
support::endian::write64le(Version.data(), remarks::CurrentRemarkVersion);
OS.write(Version.data(), Version.size());
}
static void emitExternalFile(raw_ostream &OS, StringRef Filename) {
// Emit the null-terminated absolute path to the remark file.
SmallString<128> FilenameBuf = Filename;
sys::fs::make_absolute(FilenameBuf);
assert(!FilenameBuf.empty() && "The filename can't be empty.");
OS.write(FilenameBuf.data(), FilenameBuf.size());
OS.write('\0');
}
void YAMLMetaSerializer::emit() {
emitMagic(OS);
emitVersion(OS);
// Emit StringTable with size 0. This is left over after removing StringTable
// support from the YAML format. For now, don't unnecessarily change how the
// the metadata is serialized. When changing the format, we should think about
// just reusing the bitstream remark meta for this.
uint64_t StrTabSize = 0;
std::array<char, 8> StrTabSizeBuf;
support::endian::write64le(StrTabSizeBuf.data(), StrTabSize);
OS.write(StrTabSizeBuf.data(), StrTabSizeBuf.size());
emitExternalFile(OS, ExternalFilename);
}