[LLD][MachO] Option to emit separate cstring sections (#158720)

Add the `--{no-}separate-cstring-literal-sections` option to emit
cstring literals into sections defined by their section name. This
allows for changes like https://github.com/swiftlang/swift/pull/84300
and https://github.com/swiftlang/swift/pull/84236 to actually have an
affect. The default behavior has not changed.

The reason this is useful is because strings in different sections might
have different access patterns at runtime. By splitting these strings
into separate sections, we may reduce the number of page faults during
startup. For example, the ObjC runtime accesses all strings in
`__objc_classname` before main.
This commit is contained in:
Ellis Hoag 2025-09-22 11:23:58 -07:00 committed by GitHub
parent 8843111d38
commit fc8f54d496
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 82 additions and 20 deletions

View File

@ -222,6 +222,7 @@ struct Configuration {
bool pgoWarnMismatch;
bool warnThinArchiveMissingMembers;
bool disableVerify;
bool separateCstringLiteralSections;
bool callGraphProfileSort = false;
llvm::StringRef printSymbolOrder;

View File

@ -1522,8 +1522,8 @@ static void foldIdenticalLiterals() {
// We always create a cStringSection, regardless of whether dedupLiterals is
// true. If it isn't, we simply create a non-deduplicating CStringSection.
// Either way, we must unconditionally finalize it here.
in.cStringSection->finalizeContents();
in.objcMethnameSection->finalizeContents();
for (auto *sec : in.cStringSections)
sec->finalizeContents();
in.wordLiteralSection->finalizeContents();
}
@ -1711,7 +1711,7 @@ bool link(ArrayRef<const char *> argsArr, llvm::raw_ostream &stdoutOS,
firstTLVDataSection = nullptr;
tar = nullptr;
memset(&in, 0, sizeof(in));
in = InStruct();
resetLoadedDylibs();
resetOutputSegments();
@ -1983,6 +1983,9 @@ bool link(ArrayRef<const char *> argsArr, llvm::raw_ostream &stdoutOS,
OPT_no_warn_thin_archive_missing_members, true);
config->generateUuid = !args.hasArg(OPT_no_uuid);
config->disableVerify = args.hasArg(OPT_disable_verify);
config->separateCstringLiteralSections =
args.hasFlag(OPT_separate_cstring_literal_sections,
OPT_no_separate_cstring_literal_sections, false);
auto IncompatWithCGSort = [&](StringRef firstArgStr) {
// Throw an error only if --call-graph-profile-sort is explicitly specified

View File

@ -63,15 +63,13 @@ void lld::macho::addInputSection(InputSection *inputSection) {
isec->parent = osec;
inputSections.push_back(isec);
} else if (auto *isec = dyn_cast<CStringInputSection>(inputSection)) {
if (isec->getName() == section_names::objcMethname) {
if (in.objcMethnameSection->inputOrder == UnspecifiedInputOrder)
in.objcMethnameSection->inputOrder = inputSectionsOrder++;
in.objcMethnameSection->addInput(isec);
} else {
if (in.cStringSection->inputOrder == UnspecifiedInputOrder)
in.cStringSection->inputOrder = inputSectionsOrder++;
in.cStringSection->addInput(isec);
}
bool useSectionName = config->separateCstringLiteralSections ||
isec->getName() == section_names::objcMethname;
auto *osec = in.getOrCreateCStringSection(
useSectionName ? isec->getName() : section_names::cString);
if (osec->inputOrder == UnspecifiedInputOrder)
osec->inputOrder = inputSectionsOrder++;
osec->addInput(isec);
} else if (auto *isec = dyn_cast<WordLiteralInputSection>(inputSection)) {
if (in.wordLiteralSection->inputOrder == UnspecifiedInputOrder)
in.wordLiteralSection->inputOrder = inputSectionsOrder++;

View File

@ -239,7 +239,7 @@ void macho::writeMapFile() {
printIsecArrSyms(textOsec->inputs, textOsec->getThunks());
} else if (auto *concatOsec = dyn_cast<ConcatOutputSection>(osec)) {
printIsecArrSyms(concatOsec->inputs);
} else if (osec == in.cStringSection || osec == in.objcMethnameSection) {
} else if (is_contained(in.cStringSections, osec)) {
const auto &liveCStrings = info.liveCStringsForSection.lookup(osec);
uint64_t lastAddr = 0; // strings will never start at address 0, so this
// is a sentinel value

View File

@ -1084,6 +1084,13 @@ def dyld_env : Separate<["-"], "dyld_env">,
def ignore_auto_link : Flag<["-"], "ignore_auto_link">,
HelpText<"Ignore LC_LINKER_OPTIONs">,
Group<grp_rare>;
defm separate_cstring_literal_sections
: BB<"separate-cstring-literal-sections",
"Emit all cstring literals into their respective sections defined by "
"their section names.",
"Emit all cstring literals into the __cstring section. As a special "
"case, the __objc_methname section will still be emitted. (default)">,
Group<grp_rare>;
def grp_deprecated : OptionGroup<"deprecated">, HelpText<"DEPRECATED">;

View File

@ -843,6 +843,9 @@ void writeChainedFixup(uint8_t *buf, const Symbol *sym, int64_t addend);
struct InStruct {
const uint8_t *bufferStart = nullptr;
MachHeaderSection *header = nullptr;
/// The list of cstring sections. Note that this includes \p cStringSection
/// and \p objcMethnameSection already.
llvm::SmallVector<CStringSection *> cStringSections;
CStringSection *cStringSection = nullptr;
DeduplicatedCStringSection *objcMethnameSection = nullptr;
WordLiteralSection *wordLiteralSection = nullptr;
@ -863,6 +866,26 @@ struct InStruct {
InitOffsetsSection *initOffsets = nullptr;
ObjCMethListSection *objcMethList = nullptr;
ChainedFixupsSection *chainedFixups = nullptr;
CStringSection *getOrCreateCStringSection(StringRef name,
bool forceDedupStrings = false) {
auto [it, didEmplace] =
cStringSectionMap.try_emplace(name, cStringSections.size());
if (!didEmplace)
return cStringSections[it->getValue()];
std::string &nameData = *make<std::string>(name);
CStringSection *sec;
if (config->dedupStrings || forceDedupStrings)
sec = make<DeduplicatedCStringSection>(nameData.c_str());
else
sec = make<CStringSection>(nameData.c_str());
cStringSections.push_back(sec);
return sec;
}
private:
llvm::StringMap<unsigned> cStringSectionMap;
};
extern InStruct in;

View File

@ -1377,13 +1377,11 @@ void macho::resetWriter() { LCDylib::resetInstanceCount(); }
void macho::createSyntheticSections() {
in.header = make<MachHeaderSection>();
if (config->dedupStrings)
in.cStringSection =
make<DeduplicatedCStringSection>(section_names::cString);
else
in.cStringSection = make<CStringSection>(section_names::cString);
in.objcMethnameSection =
make<DeduplicatedCStringSection>(section_names::objcMethname);
// Materialize cstring and objcMethname sections
in.cStringSection = in.getOrCreateCStringSection(section_names::cString);
in.objcMethnameSection = cast<DeduplicatedCStringSection>(
in.getOrCreateCStringSection(section_names::objcMethname,
/*forceDedupStrings=*/true));
in.wordLiteralSection = make<WordLiteralSection>();
if (config->emitChainedFixups) {
in.chainedFixups = make<ChainedFixupsSection>();

32
lld/test/MachO/cstring.ll Normal file
View File

@ -0,0 +1,32 @@
; REQUIRES: aarch64
; RUN: llvm-as %s -o %t.o
; RUN: %lld -dylib --separate-cstring-literal-sections %t.o -o - | llvm-objdump --macho --section-headers - | FileCheck %s
; RUN: %lld -dylib --no-separate-cstring-literal-sections %t.o -o - | llvm-objdump --macho --section-headers - | FileCheck %s --check-prefix=CSTR
; RUN: %lld -dylib %t.o -o - | llvm-objdump --macho --section-headers - | FileCheck %s --check-prefix=CSTR
; CHECK-DAG: __cstring
; CHECK-DAG: __new_sec
; CHECK-DAG: __objc_classname
; CHECK-DAG: __objc_methname
; CHECK-DAG: __objc_methtype
; CSTR-DAG: __cstring
; CSTR-DAG: __objc_methname
target triple = "x86_64-apple-darwin"
target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128-Fn32"
@.str = private unnamed_addr constant [10 x i8] c"my string\00", align 1
@.str1 = private unnamed_addr constant [16 x i8] c"my other string\00", section "__TEXT,__new_sec,cstring_literals", align 1
@OBJC_CLASS_NAME_ = private unnamed_addr constant [4 x i8] c"foo\00", section "__TEXT,__objc_classname,cstring_literals", align 1
@OBJC_METH_VAR_NAME_ = private unnamed_addr constant [4 x i8] c"bar\00", section "__TEXT,__objc_methname,cstring_literals", align 1
@OBJC_METH_VAR_TYPE_ = private unnamed_addr constant [4 x i8] c"goo\00", section "__TEXT,__objc_methtype,cstring_literals", align 1
@llvm.compiler.used = appending global [5 x ptr] [
ptr @.str,
ptr @.str1,
ptr @OBJC_METH_VAR_NAME_,
ptr @OBJC_CLASS_NAME_,
ptr @OBJC_METH_VAR_TYPE_
]