llvm-project/llvm/lib/MC/MCSymbol.cpp
Fangrui Song dc3a4c0fcf
MC: Restructure MCFragment as a fixed part and a variable tail
Refactor the fragment representation of `push rax; jmp foo; nop; jmp foo`,
previously encoded as
`MCDataFragment(nop); MCRelaxableFragment(jmp foo); MCDataFragment(nop); MCRelaxableFragment(jmp foo)`,

to

```
MCFragment(fixed: push rax, variable: jmp foo)
MCFragment(fixed: nop, variable: jmp foo)
```

Changes:

* Eliminate MCEncodedFragment, moving content and fixup storage to MCFragment.
* The new MCFragment contains a fixed-size content (similar to previous
  MCDataFragment) and an optional variable-size tail.
* The variable-size tail supports FT_Relaxable, FT_LEB, FT_Dwarf, and
  FT_DwarfFrame, with plans to extend to other fragment types.
  dyn_cast/isa should be avoided for the converted fragment subclasses.
* In `setVarFixups`, source fixup offsets are relative to the variable part's start.
  Stored fixup (in `FixupStorage`) offsets are relative to the fixed part's start.
  A lot of code does `getFragmentOffset(Frag) + Fixup.getOffset()`,
  expecting the fixup offset to be relative to the fixed part's start.
* HexagonAsmBackend::fixupNeedsRelaxationAdvanced needs to know the
  associated instruction for a fixup. We have to add a `const MCFragment &` parameter.
* In MCObjectStreamer, extend `absoluteSymbolDiff` to apply to
  FT_Relaxable as otherwise there would be many more FT_DwarfFrame
  fragments in -g compilations.

https://llvm-compile-time-tracker.com/compare.php?from=28e1473e8e523150914e8c7ea50b44fb0d2a8d65&to=778d68ad1d48e7f111ea853dd249912c601bee89&stat=instructions:u

```
stage2-O0-g instructins:u geomeon (-0.07%)
stage1-ReleaseLTO-g (link only) max-rss geomean (-0.39%)
```

```
% /t/clang-old -g -c sqlite3.i -w -mllvm -debug-only=mc-dump &| awk '/^[0-9]+/{s[$2]++;tot++} END{print "Total",tot; n=asorti(s, si); for(i=1;i<=n;i++) print si[i],s[si[i]]}'
Total 59675
Align 2215
Data 29700
Dwarf 12044
DwarfCallFrame 4216
Fill 92
LEB 12
Relaxable 11396
% /t/clang-new -g -c sqlite3.i -w -mllvm -debug-only=mc-dump &| awk '/^[0-9]+/{s[$2]++;tot++} END{print "Total",tot; n=asorti(s, si); for(i=1;i<=n;i++) print si[i],s[si[i]]}'
Total 32287
Align 2215
Data 2312
Dwarf 12044
DwarfCallFrame 4216
Fill 92
LEB 12
Relaxable 11396
```

Pull Request: https://github.com/llvm/llvm-project/pull/148544
2025-07-15 21:56:55 -07:00

88 lines
2.9 KiB
C++

//===- lib/MC/MCSymbol.cpp - MCSymbol implementation ----------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "llvm/MC/MCSymbol.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include <cassert>
#include <cstddef>
using namespace llvm;
// Only the address of this fragment is ever actually used.
static MCFragment SentinelFragment;
// Sentinel value for the absolute pseudo fragment.
MCFragment *MCSymbol::AbsolutePseudoFragment = &SentinelFragment;
void *MCSymbol::operator new(size_t s, const MCSymbolTableEntry *Name,
MCContext &Ctx) {
// We may need more space for a Name to account for alignment. So allocate
// space for the storage type and not the name pointer.
size_t Size = s + (Name ? sizeof(NameEntryStorageTy) : 0);
// For safety, ensure that the alignment of a pointer is enough for an
// MCSymbol. This also ensures we don't need padding between the name and
// symbol.
static_assert((unsigned)alignof(MCSymbol) <= alignof(NameEntryStorageTy),
"Bad alignment of MCSymbol");
void *Storage = Ctx.allocate(Size, alignof(NameEntryStorageTy));
NameEntryStorageTy *Start = static_cast<NameEntryStorageTy*>(Storage);
NameEntryStorageTy *End = Start + (Name ? 1 : 0);
return End;
}
void MCSymbol::setVariableValue(const MCExpr *Value) {
assert(Value && "Invalid variable value!");
assert((SymbolContents == SymContentsUnset ||
SymbolContents == SymContentsVariable) &&
"Cannot give common/offset symbol a variable value");
this->Value = Value;
SymbolContents = SymContentsVariable;
setUndefined();
}
void MCSymbol::print(raw_ostream &OS, const MCAsmInfo *MAI) const {
// The name for this MCSymbol is required to be a valid target name. However,
// some targets support quoting names with funny characters. If the name
// contains a funny character, then print it quoted.
StringRef Name = getName();
if (!MAI || MAI->isValidUnquotedName(Name)) {
OS << Name;
return;
}
if (MAI && !MAI->supportsNameQuoting())
report_fatal_error("Symbol name with unsupported characters");
OS << '"';
for (char C : Name) {
if (C == '\n')
OS << "\\n";
else if (C == '"')
OS << "\\\"";
else if (C == '\\')
OS << "\\\\";
else
OS << C;
}
OS << '"';
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD void MCSymbol::dump() const {
dbgs() << *this;
}
#endif