From fc8f54d4961e8f15abc7b4736dd5285569285f59 Mon Sep 17 00:00:00 2001 From: Ellis Hoag Date: Mon, 22 Sep 2025 11:23:58 -0700 Subject: [PATCH] [LLD][MachO] Option to emit separate cstring sections (#158720) Add the `--{no-}separate-cstring-literal-sections` option to emit cstring literals into sections defined by their section name. This allows for changes like https://github.com/swiftlang/swift/pull/84300 and https://github.com/swiftlang/swift/pull/84236 to actually have an affect. The default behavior has not changed. The reason this is useful is because strings in different sections might have different access patterns at runtime. By splitting these strings into separate sections, we may reduce the number of page faults during startup. For example, the ObjC runtime accesses all strings in `__objc_classname` before main. --- lld/MachO/Config.h | 1 + lld/MachO/Driver.cpp | 9 ++++++--- lld/MachO/InputSection.cpp | 16 +++++++--------- lld/MachO/MapFile.cpp | 2 +- lld/MachO/Options.td | 7 +++++++ lld/MachO/SyntheticSections.h | 23 +++++++++++++++++++++++ lld/MachO/Writer.cpp | 12 +++++------- lld/test/MachO/cstring.ll | 32 ++++++++++++++++++++++++++++++++ 8 files changed, 82 insertions(+), 20 deletions(-) create mode 100644 lld/test/MachO/cstring.ll diff --git a/lld/MachO/Config.h b/lld/MachO/Config.h index 19dba790c1c7..51b1363d8761 100644 --- a/lld/MachO/Config.h +++ b/lld/MachO/Config.h @@ -222,6 +222,7 @@ struct Configuration { bool pgoWarnMismatch; bool warnThinArchiveMissingMembers; bool disableVerify; + bool separateCstringLiteralSections; bool callGraphProfileSort = false; llvm::StringRef printSymbolOrder; diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp index 5a9b5b66d01b..7ce987e400a2 100644 --- a/lld/MachO/Driver.cpp +++ b/lld/MachO/Driver.cpp @@ -1522,8 +1522,8 @@ static void foldIdenticalLiterals() { // We always create a cStringSection, regardless of whether dedupLiterals is // true. If it isn't, we simply create a non-deduplicating CStringSection. // Either way, we must unconditionally finalize it here. - in.cStringSection->finalizeContents(); - in.objcMethnameSection->finalizeContents(); + for (auto *sec : in.cStringSections) + sec->finalizeContents(); in.wordLiteralSection->finalizeContents(); } @@ -1711,7 +1711,7 @@ bool link(ArrayRef argsArr, llvm::raw_ostream &stdoutOS, firstTLVDataSection = nullptr; tar = nullptr; - memset(&in, 0, sizeof(in)); + in = InStruct(); resetLoadedDylibs(); resetOutputSegments(); @@ -1983,6 +1983,9 @@ bool link(ArrayRef argsArr, llvm::raw_ostream &stdoutOS, OPT_no_warn_thin_archive_missing_members, true); config->generateUuid = !args.hasArg(OPT_no_uuid); config->disableVerify = args.hasArg(OPT_disable_verify); + config->separateCstringLiteralSections = + args.hasFlag(OPT_separate_cstring_literal_sections, + OPT_no_separate_cstring_literal_sections, false); auto IncompatWithCGSort = [&](StringRef firstArgStr) { // Throw an error only if --call-graph-profile-sort is explicitly specified diff --git a/lld/MachO/InputSection.cpp b/lld/MachO/InputSection.cpp index 18b3ff961085..b173e14cc86a 100644 --- a/lld/MachO/InputSection.cpp +++ b/lld/MachO/InputSection.cpp @@ -63,15 +63,13 @@ void lld::macho::addInputSection(InputSection *inputSection) { isec->parent = osec; inputSections.push_back(isec); } else if (auto *isec = dyn_cast(inputSection)) { - if (isec->getName() == section_names::objcMethname) { - if (in.objcMethnameSection->inputOrder == UnspecifiedInputOrder) - in.objcMethnameSection->inputOrder = inputSectionsOrder++; - in.objcMethnameSection->addInput(isec); - } else { - if (in.cStringSection->inputOrder == UnspecifiedInputOrder) - in.cStringSection->inputOrder = inputSectionsOrder++; - in.cStringSection->addInput(isec); - } + bool useSectionName = config->separateCstringLiteralSections || + isec->getName() == section_names::objcMethname; + auto *osec = in.getOrCreateCStringSection( + useSectionName ? isec->getName() : section_names::cString); + if (osec->inputOrder == UnspecifiedInputOrder) + osec->inputOrder = inputSectionsOrder++; + osec->addInput(isec); } else if (auto *isec = dyn_cast(inputSection)) { if (in.wordLiteralSection->inputOrder == UnspecifiedInputOrder) in.wordLiteralSection->inputOrder = inputSectionsOrder++; diff --git a/lld/MachO/MapFile.cpp b/lld/MachO/MapFile.cpp index f3e221a700b1..29ebcdcf9a83 100644 --- a/lld/MachO/MapFile.cpp +++ b/lld/MachO/MapFile.cpp @@ -239,7 +239,7 @@ void macho::writeMapFile() { printIsecArrSyms(textOsec->inputs, textOsec->getThunks()); } else if (auto *concatOsec = dyn_cast(osec)) { printIsecArrSyms(concatOsec->inputs); - } else if (osec == in.cStringSection || osec == in.objcMethnameSection) { + } else if (is_contained(in.cStringSections, osec)) { const auto &liveCStrings = info.liveCStringsForSection.lookup(osec); uint64_t lastAddr = 0; // strings will never start at address 0, so this // is a sentinel value diff --git a/lld/MachO/Options.td b/lld/MachO/Options.td index 8ae50f380741..4eeb8fbe1112 100644 --- a/lld/MachO/Options.td +++ b/lld/MachO/Options.td @@ -1084,6 +1084,13 @@ def dyld_env : Separate<["-"], "dyld_env">, def ignore_auto_link : Flag<["-"], "ignore_auto_link">, HelpText<"Ignore LC_LINKER_OPTIONs">, Group; +defm separate_cstring_literal_sections + : BB<"separate-cstring-literal-sections", + "Emit all cstring literals into their respective sections defined by " + "their section names.", + "Emit all cstring literals into the __cstring section. As a special " + "case, the __objc_methname section will still be emitted. (default)">, + Group; def grp_deprecated : OptionGroup<"deprecated">, HelpText<"DEPRECATED">; diff --git a/lld/MachO/SyntheticSections.h b/lld/MachO/SyntheticSections.h index 5796b0790c83..1abf3c210a64 100644 --- a/lld/MachO/SyntheticSections.h +++ b/lld/MachO/SyntheticSections.h @@ -843,6 +843,9 @@ void writeChainedFixup(uint8_t *buf, const Symbol *sym, int64_t addend); struct InStruct { const uint8_t *bufferStart = nullptr; MachHeaderSection *header = nullptr; + /// The list of cstring sections. Note that this includes \p cStringSection + /// and \p objcMethnameSection already. + llvm::SmallVector cStringSections; CStringSection *cStringSection = nullptr; DeduplicatedCStringSection *objcMethnameSection = nullptr; WordLiteralSection *wordLiteralSection = nullptr; @@ -863,6 +866,26 @@ struct InStruct { InitOffsetsSection *initOffsets = nullptr; ObjCMethListSection *objcMethList = nullptr; ChainedFixupsSection *chainedFixups = nullptr; + + CStringSection *getOrCreateCStringSection(StringRef name, + bool forceDedupStrings = false) { + auto [it, didEmplace] = + cStringSectionMap.try_emplace(name, cStringSections.size()); + if (!didEmplace) + return cStringSections[it->getValue()]; + + std::string &nameData = *make(name); + CStringSection *sec; + if (config->dedupStrings || forceDedupStrings) + sec = make(nameData.c_str()); + else + sec = make(nameData.c_str()); + cStringSections.push_back(sec); + return sec; + } + +private: + llvm::StringMap cStringSectionMap; }; extern InStruct in; diff --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp index f288fadc0d14..995792be4174 100644 --- a/lld/MachO/Writer.cpp +++ b/lld/MachO/Writer.cpp @@ -1377,13 +1377,11 @@ void macho::resetWriter() { LCDylib::resetInstanceCount(); } void macho::createSyntheticSections() { in.header = make(); - if (config->dedupStrings) - in.cStringSection = - make(section_names::cString); - else - in.cStringSection = make(section_names::cString); - in.objcMethnameSection = - make(section_names::objcMethname); + // Materialize cstring and objcMethname sections + in.cStringSection = in.getOrCreateCStringSection(section_names::cString); + in.objcMethnameSection = cast( + in.getOrCreateCStringSection(section_names::objcMethname, + /*forceDedupStrings=*/true)); in.wordLiteralSection = make(); if (config->emitChainedFixups) { in.chainedFixups = make(); diff --git a/lld/test/MachO/cstring.ll b/lld/test/MachO/cstring.ll new file mode 100644 index 000000000000..4f82736b0a5f --- /dev/null +++ b/lld/test/MachO/cstring.ll @@ -0,0 +1,32 @@ +; REQUIRES: aarch64 +; RUN: llvm-as %s -o %t.o + +; RUN: %lld -dylib --separate-cstring-literal-sections %t.o -o - | llvm-objdump --macho --section-headers - | FileCheck %s +; RUN: %lld -dylib --no-separate-cstring-literal-sections %t.o -o - | llvm-objdump --macho --section-headers - | FileCheck %s --check-prefix=CSTR +; RUN: %lld -dylib %t.o -o - | llvm-objdump --macho --section-headers - | FileCheck %s --check-prefix=CSTR + +; CHECK-DAG: __cstring +; CHECK-DAG: __new_sec +; CHECK-DAG: __objc_classname +; CHECK-DAG: __objc_methname +; CHECK-DAG: __objc_methtype + +; CSTR-DAG: __cstring +; CSTR-DAG: __objc_methname + +target triple = "x86_64-apple-darwin" +target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128-Fn32" + +@.str = private unnamed_addr constant [10 x i8] c"my string\00", align 1 +@.str1 = private unnamed_addr constant [16 x i8] c"my other string\00", section "__TEXT,__new_sec,cstring_literals", align 1 +@OBJC_CLASS_NAME_ = private unnamed_addr constant [4 x i8] c"foo\00", section "__TEXT,__objc_classname,cstring_literals", align 1 +@OBJC_METH_VAR_NAME_ = private unnamed_addr constant [4 x i8] c"bar\00", section "__TEXT,__objc_methname,cstring_literals", align 1 +@OBJC_METH_VAR_TYPE_ = private unnamed_addr constant [4 x i8] c"goo\00", section "__TEXT,__objc_methtype,cstring_literals", align 1 + +@llvm.compiler.used = appending global [5 x ptr] [ + ptr @.str, + ptr @.str1, + ptr @OBJC_METH_VAR_NAME_, + ptr @OBJC_CLASS_NAME_, + ptr @OBJC_METH_VAR_TYPE_ +]