
Dynamic relocations are expensive on ELF/Linux platforms because they are applied in userspace on process startup. Therefore, it is worth optimizing them to make PIE and PIC dylib builds faster. In +asserts builds (non-NDEBUG), nikic identified these schedule class name string pointers as the leading source of dynamic relocations. [1] This change uses llvm::StringTable and the StringToOffsetTable TableGen helper to turn the string pointers into 32-bit offsets into a separate character array. The number of dynamic relocations is reduced by ~60%: ❯ llvm-readelf --dyn-relocations lib/libLLVM.so | wc -l 381376 # before 155156 # after The test suite time is modestly affected, but I'm running on a shared noisy workstation VM with a ton of cores: https://gist.github.com/rnk/f38882c2fe2e63d0eb58b8fffeab69de Testing Time: 100.88s # before Testing Time: 78.50s. # after Testing Time: 96.25s. # before again I haven't used any fancy hyperfine/denoising tools, but I think the result is clearly visible and we should ship it. [1] https://gist.github.com/nikic/554f0a544ca15d5219788f1030f78c5a
55 lines
1.9 KiB
TableGen
55 lines
1.9 KiB
TableGen
// RUN: llvm-tblgen -gen-subtarget -I %p/../../include %s 2>&1 | FileCheck %s
|
|
|
|
// Make sure that ReadAdvance entries are correctly processed.
|
|
// Not all ProcReadAdvance definitions implicitly inherit from SchedRead.
|
|
// Some ProcReadAdvances are subclasses of ReadAdvance.
|
|
|
|
include "llvm/Target/Target.td"
|
|
|
|
def MyTarget : Target;
|
|
|
|
let OutOperandList = (outs), InOperandList = (ins) in {
|
|
def Inst_A : Instruction;
|
|
def Inst_B : Instruction;
|
|
def Inst_C : Instruction;
|
|
|
|
}
|
|
|
|
let CompleteModel = 0 in {
|
|
def SchedModel_A: SchedMachineModel;
|
|
}
|
|
|
|
def Read_D : SchedRead;
|
|
|
|
// CHECK: extern const llvm::MCWriteLatencyEntry MyTargetWriteLatencyTable[] = {
|
|
// CHECK-NEXT: { 0, 0}, // Invalid
|
|
// CHECK-NEXT: { 1, 0}, // #1 Write_A_Write_C
|
|
// CHECK-NEXT: { 1, 2} // #2 Write_B
|
|
// CHECK-NEXT: }; // MyTargetWriteLatencyTable
|
|
|
|
// CHECK: extern const llvm::MCReadAdvanceEntry MyTargetReadAdvanceTable[] = {
|
|
// CHECK-NEXT: {0, 0, 0}, // Invalid
|
|
// CHECK-NEXT: {0, 2, 1} // #1
|
|
// CHECK-NEXT: }; // MyTargetReadAdvanceTable
|
|
|
|
// CHECK: static const llvm::MCSchedClassDesc SchedModel_ASchedClasses[] = {
|
|
// CHECK-NEXT: {DBGFIELD(1) 8191, false, false, false, 0, 0, 0, 0, 0, 0},
|
|
// CHECK-NEXT: {DBGFIELD(/*Inst_A*/ {{[0-9]+}}) 1, false, false, false, 0, 0, 1, 1, 0, 0}, // #1
|
|
// CHECK-NEXT: {DBGFIELD(/*Inst_B*/ {{[0-9]+}}) 1, false, false, false, 0, 0, 2, 1, 0, 0}, // #2
|
|
// CHECK-NEXT: {DBGFIELD(/*Inst_C*/ {{[0-9]+}}) 1, false, false, false, 0, 0, 1, 1, 1, 1}, // #3
|
|
// CHECK-NEXT: }; // SchedModel_ASchedClasses
|
|
|
|
let SchedModel = SchedModel_A in {
|
|
def Write_A : SchedWriteRes<[]>;
|
|
def Write_B : SchedWriteRes<[]>;
|
|
def Write_C : SchedWriteRes<[]>;
|
|
|
|
def : InstRW<[Write_A], (instrs Inst_A)>;
|
|
def : InstRW<[Write_B], (instrs Inst_B)>;
|
|
def : InstRW<[Write_C, Read_D], (instrs Inst_C)>;
|
|
|
|
def : ReadAdvance<Read_D, 1, [Write_B]>;
|
|
}
|
|
|
|
def ProcessorA: ProcessorModel<"ProcessorA", SchedModel_A, []>;
|