llvm-project/llvm/test/TableGen/CompressWriteLatencyEntry.td
Reid Kleckner bb7242477c
[MC] Use StringTable to reduce dynamic relocations (#144202)
Dynamic relocations are expensive on ELF/Linux platforms because they
are applied in userspace on process startup. Therefore, it is worth
optimizing them to make PIE and PIC dylib builds faster. In +asserts
builds (non-NDEBUG), nikic identified these schedule class name string
pointers as the leading source of dynamic relocations. [1]

This change uses llvm::StringTable and the StringToOffsetTable TableGen
helper to turn the string pointers into 32-bit offsets into a separate
character array.

The number of dynamic relocations is reduced by ~60%:
❯ llvm-readelf --dyn-relocations lib/libLLVM.so | wc -l
381376 # before
155156 # after

The test suite time is modestly affected, but I'm running on a shared
noisy workstation VM with a ton of cores:
https://gist.github.com/rnk/f38882c2fe2e63d0eb58b8fffeab69de
Testing Time: 100.88s   # before
Testing Time: 78.50s. # after
Testing Time: 96.25s.  # before again

I haven't used any fancy hyperfine/denoising tools, but I think the
result is clearly visible and we should ship it.

[1] https://gist.github.com/nikic/554f0a544ca15d5219788f1030f78c5a
2025-06-25 05:23:11 -07:00

55 lines
1.9 KiB
TableGen

// RUN: llvm-tblgen -gen-subtarget -I %p/../../include %s 2>&1 | FileCheck %s
// Make sure that ReadAdvance entries are correctly processed.
// Not all ProcReadAdvance definitions implicitly inherit from SchedRead.
// Some ProcReadAdvances are subclasses of ReadAdvance.
include "llvm/Target/Target.td"
def MyTarget : Target;
let OutOperandList = (outs), InOperandList = (ins) in {
def Inst_A : Instruction;
def Inst_B : Instruction;
def Inst_C : Instruction;
}
let CompleteModel = 0 in {
def SchedModel_A: SchedMachineModel;
}
def Read_D : SchedRead;
// CHECK: extern const llvm::MCWriteLatencyEntry MyTargetWriteLatencyTable[] = {
// CHECK-NEXT: { 0, 0}, // Invalid
// CHECK-NEXT: { 1, 0}, // #1 Write_A_Write_C
// CHECK-NEXT: { 1, 2} // #2 Write_B
// CHECK-NEXT: }; // MyTargetWriteLatencyTable
// CHECK: extern const llvm::MCReadAdvanceEntry MyTargetReadAdvanceTable[] = {
// CHECK-NEXT: {0, 0, 0}, // Invalid
// CHECK-NEXT: {0, 2, 1} // #1
// CHECK-NEXT: }; // MyTargetReadAdvanceTable
// CHECK: static const llvm::MCSchedClassDesc SchedModel_ASchedClasses[] = {
// CHECK-NEXT: {DBGFIELD(1) 8191, false, false, false, 0, 0, 0, 0, 0, 0},
// CHECK-NEXT: {DBGFIELD(/*Inst_A*/ {{[0-9]+}}) 1, false, false, false, 0, 0, 1, 1, 0, 0}, // #1
// CHECK-NEXT: {DBGFIELD(/*Inst_B*/ {{[0-9]+}}) 1, false, false, false, 0, 0, 2, 1, 0, 0}, // #2
// CHECK-NEXT: {DBGFIELD(/*Inst_C*/ {{[0-9]+}}) 1, false, false, false, 0, 0, 1, 1, 1, 1}, // #3
// CHECK-NEXT: }; // SchedModel_ASchedClasses
let SchedModel = SchedModel_A in {
def Write_A : SchedWriteRes<[]>;
def Write_B : SchedWriteRes<[]>;
def Write_C : SchedWriteRes<[]>;
def : InstRW<[Write_A], (instrs Inst_A)>;
def : InstRW<[Write_B], (instrs Inst_B)>;
def : InstRW<[Write_C, Read_D], (instrs Inst_C)>;
def : ReadAdvance<Read_D, 1, [Write_B]>;
}
def ProcessorA: ProcessorModel<"ProcessorA", SchedModel_A, []>;