From a8874cf50bb676facb4429234dff7774e579faef Mon Sep 17 00:00:00 2001 From: hev Date: Tue, 5 Dec 2023 10:42:53 +0900 Subject: [PATCH] [llvm][IR] Add per-global code model attribute (#72077) This adds a per-global code model attribute, which can override the target's code model to access global variables. Suggested-by: Arthur Eubanks Link: https://discourse.llvm.org/t/how-to-best-implement-code-model-overriding-for-certain-values/71816 Link: https://discourse.llvm.org/t/rfc-add-per-global-code-model-attribute/74944 --- llvm/docs/LangRef.rst | 15 ++++++++++ llvm/include/llvm/AsmParser/LLParser.h | 1 + llvm/include/llvm/AsmParser/LLToken.h | 1 + llvm/include/llvm/IR/GlobalObject.h | 1 + llvm/include/llvm/IR/GlobalVariable.h | 27 +++++++++++++++++ llvm/lib/AsmParser/LLLexer.cpp | 1 + llvm/lib/AsmParser/LLParser.cpp | 29 +++++++++++++++++++ llvm/lib/Bitcode/Reader/BitcodeReader.cpp | 25 ++++++++++++++++ llvm/lib/Bitcode/Writer/BitcodeWriter.cpp | 5 ++-- llvm/lib/IR/AsmWriter.cpp | 21 ++++++++++++++ llvm/lib/IR/Globals.cpp | 11 +++++++ .../Assembler/globalvariable-attributes.ll | 10 +++++++ .../GlobalOpt/globalvar-code-model.ll | 11 +++++++ 13 files changed, 156 insertions(+), 2 deletions(-) create mode 100644 llvm/test/Transforms/GlobalOpt/globalvar-code-model.ll diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index 4bd19a332d8d..cf9b33a30eab 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -705,6 +705,13 @@ information. Attaching section information to an external declaration is an assertion that its definition is located in the specified section. If the definition is located in a different section, the behavior is undefined. +LLVM allows an explicit code model to be specified for globals. If the +target supports it, it will emit globals in the code model specified, +overriding the code model used to compile the translation unit. +The allowed values are "tiny", "small", "kernel", "medium", "large". +This may be extended in the future to specify global data layout that +doesn't cleanly fit into a specific code model. + By default, global initializers are optimized by assuming that global variables defined within the module are not modified from their initial values before the start of the global initializer. This is @@ -761,6 +768,7 @@ Syntax:: [] [, section "name"] [, partition "name"] [, comdat [($name)]] [, align ] + [, code_model "model"] [, no_sanitize_address] [, no_sanitize_hwaddress] [, sanitize_address_dyninit] [, sanitize_memtag] (, !name !N)* @@ -778,6 +786,13 @@ The following example just declares a global variable @G = external global i32 +The following example defines a global variable with the +``large`` code model: + +.. code-block:: llvm + + @G = internal global i32 0, code_model "large" + The following example defines a thread-local global with the ``initialexec`` TLS model: diff --git a/llvm/include/llvm/AsmParser/LLParser.h b/llvm/include/llvm/AsmParser/LLParser.h index 810f3668d05d..793287c772b5 100644 --- a/llvm/include/llvm/AsmParser/LLParser.h +++ b/llvm/include/llvm/AsmParser/LLParser.h @@ -290,6 +290,7 @@ namespace llvm { bool parseOptionalCallingConv(unsigned &CC); bool parseOptionalAlignment(MaybeAlign &Alignment, bool AllowParens = false); + bool parseOptionalCodeModel(CodeModel::Model &model); bool parseOptionalDerefAttrBytes(lltok::Kind AttrKind, uint64_t &Bytes); bool parseOptionalUWTableKind(UWTableKind &Kind); bool parseAllocKind(AllocFnKind &Kind); diff --git a/llvm/include/llvm/AsmParser/LLToken.h b/llvm/include/llvm/AsmParser/LLToken.h index 0683291faae7..0aa0093e8efb 100644 --- a/llvm/include/llvm/AsmParser/LLToken.h +++ b/llvm/include/llvm/AsmParser/LLToken.h @@ -116,6 +116,7 @@ enum Kind { kw_addrspace, kw_section, kw_partition, + kw_code_model, kw_alias, kw_ifunc, kw_module, diff --git a/llvm/include/llvm/IR/GlobalObject.h b/llvm/include/llvm/IR/GlobalObject.h index 889bd3a28e12..ae8e61682444 100644 --- a/llvm/include/llvm/IR/GlobalObject.h +++ b/llvm/include/llvm/IR/GlobalObject.h @@ -51,6 +51,7 @@ protected: Comdat *ObjComdat = nullptr; enum { LastAlignmentBit = 5, + LastCodeModelBit = 8, HasSectionHashEntryBit, GlobalObjectBits, diff --git a/llvm/include/llvm/IR/GlobalVariable.h b/llvm/include/llvm/IR/GlobalVariable.h index 5ddffd16acc6..bcaf8e91432b 100644 --- a/llvm/include/llvm/IR/GlobalVariable.h +++ b/llvm/include/llvm/IR/GlobalVariable.h @@ -47,6 +47,11 @@ class GlobalVariable : public GlobalObject, public ilist_node { // global initializers are run? bool isExternallyInitializedConstant : 1; +private: + static const unsigned CodeModelBits = LastCodeModelBit - LastAlignmentBit; + static const unsigned CodeModelMask = (1 << CodeModelBits) - 1; + static const unsigned CodeModelShift = LastAlignmentBit + 1; + public: /// GlobalVariable ctor - If a parent module is specified, the global is /// automatically inserted into the end of the specified modules global list. @@ -247,6 +252,28 @@ public: getAttributes().hasAttribute("rodata-section"); } + /// Get the custom code model raw value of this global. + /// + unsigned getCodeModelRaw() const { + unsigned Data = getGlobalValueSubClassData(); + return (Data >> CodeModelShift) & CodeModelMask; + } + + /// Get the custom code model of this global if it has one. + /// + /// If this global does not have a custom code model, the empty instance + /// will be returned. + std::optional getCodeModel() const { + unsigned CodeModelData = getCodeModelRaw(); + if (CodeModelData > 0) + return static_cast(CodeModelData - 1); + return {}; + } + + /// Change the code model for this global. + /// + void setCodeModel(CodeModel::Model CM); + // Methods for support type inquiry through isa, cast, and dyn_cast: static bool classof(const Value *V) { return V->getValueID() == Value::GlobalVariableVal; diff --git a/llvm/lib/AsmParser/LLLexer.cpp b/llvm/lib/AsmParser/LLLexer.cpp index 09a205c445db..bf01b39e6f97 100644 --- a/llvm/lib/AsmParser/LLLexer.cpp +++ b/llvm/lib/AsmParser/LLLexer.cpp @@ -571,6 +571,7 @@ lltok::Kind LLLexer::LexIdentifier() { KEYWORD(addrspace); KEYWORD(section); KEYWORD(partition); + KEYWORD(code_model); KEYWORD(alias); KEYWORD(ifunc); KEYWORD(module); diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp index d236b6cfa900..0e962e7f6daa 100644 --- a/llvm/lib/AsmParser/LLParser.cpp +++ b/llvm/lib/AsmParser/LLParser.cpp @@ -1286,6 +1286,11 @@ bool LLParser::parseGlobal(const std::string &Name, LocTy NameLoc, return true; if (Alignment) GV->setAlignment(*Alignment); + } else if (Lex.getKind() == lltok::kw_code_model) { + CodeModel::Model CodeModel; + if (parseOptionalCodeModel(CodeModel)) + return true; + GV->setCodeModel(CodeModel); } else if (Lex.getKind() == lltok::MetadataVar) { if (parseGlobalObjectMetadataAttachment(*GV)) return true; @@ -2168,6 +2173,30 @@ bool LLParser::parseOptionalAlignment(MaybeAlign &Alignment, bool AllowParens) { return false; } +/// parseOptionalCodeModel +/// ::= /* empty */ +/// ::= 'code_model' "large" +bool LLParser::parseOptionalCodeModel(CodeModel::Model &model) { + Lex.Lex(); + auto StrVal = Lex.getStrVal(); + auto ErrMsg = "expected global code model string"; + if (StrVal == "tiny") + model = CodeModel::Tiny; + else if (StrVal == "small") + model = CodeModel::Small; + else if (StrVal == "kernel") + model = CodeModel::Kernel; + else if (StrVal == "medium") + model = CodeModel::Medium; + else if (StrVal == "large") + model = CodeModel::Large; + else + return tokError(ErrMsg); + if (parseToken(lltok::StringConstant, ErrMsg)) + return true; + return false; +} + /// parseOptionalDerefAttrBytes /// ::= /* empty */ /// ::= AttrKind '(' 4 ')' diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp index e4c3770946b3..71417bf5086c 100644 --- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp @@ -1144,6 +1144,23 @@ static bool getDecodedDSOLocal(unsigned Val) { } } +static std::optional getDecodedCodeModel(unsigned Val) { + switch (Val) { + case 1: + return CodeModel::Tiny; + case 2: + return CodeModel::Small; + case 3: + return CodeModel::Kernel; + case 4: + return CodeModel::Medium; + case 5: + return CodeModel::Large; + } + + return {}; +} + static GlobalVariable::ThreadLocalMode getDecodedThreadLocalMode(unsigned Val) { switch (Val) { case 0: return GlobalVariable::NotThreadLocal; @@ -3805,6 +3822,7 @@ Error BitcodeReader::parseGlobalVarRecord(ArrayRef Record) { // dllstorageclass, comdat, attributes, preemption specifier, // partition strtab offset, partition strtab size] (name in VST) // v2: [strtab_offset, strtab_size, v1] + // v3: [v2, code_model] StringRef Name; std::tie(Name, Record) = readNameFromStrtab(Record); @@ -3913,6 +3931,13 @@ Error BitcodeReader::parseGlobalVarRecord(ArrayRef Record) { NewGV->setSanitizerMetadata(Meta); } + if (Record.size() > 17 && Record[17]) { + if (auto CM = getDecodedCodeModel(Record[17])) + NewGV->setCodeModel(*CM); + else + return error("Invalid global variable code model"); + } + return Error::success(); } diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp index 8239775d0486..4969809b8586 100644 --- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -1403,7 +1403,7 @@ void ModuleBitcodeWriter::writeModuleInfo() { // GLOBALVAR: [strtab offset, strtab size, type, isconst, initid, // linkage, alignment, section, visibility, threadlocal, // unnamed_addr, externally_initialized, dllstorageclass, - // comdat, attributes, DSO_Local, GlobalSanitizer] + // comdat, attributes, DSO_Local, GlobalSanitizer, code_model] Vals.push_back(addToStrtab(GV.getName())); Vals.push_back(GV.getName().size()); Vals.push_back(VE.getTypeID(GV.getValueType())); @@ -1420,7 +1420,7 @@ void ModuleBitcodeWriter::writeModuleInfo() { GV.isExternallyInitialized() || GV.getDLLStorageClass() != GlobalValue::DefaultStorageClass || GV.hasComdat() || GV.hasAttributes() || GV.isDSOLocal() || - GV.hasPartition() || GV.hasSanitizerMetadata()) { + GV.hasPartition() || GV.hasSanitizerMetadata() || GV.getCodeModel()) { Vals.push_back(getEncodedVisibility(GV)); Vals.push_back(getEncodedThreadLocalMode(GV)); Vals.push_back(getEncodedUnnamedAddr(GV)); @@ -1438,6 +1438,7 @@ void ModuleBitcodeWriter::writeModuleInfo() { Vals.push_back((GV.hasSanitizerMetadata() ? serializeSanitizerMetadata( GV.getSanitizerMetadata()) : 0)); + Vals.push_back(GV.getCodeModelRaw()); } else { AbbrevToUse = SimpleGVarAbbrev; } diff --git a/llvm/lib/IR/AsmWriter.cpp b/llvm/lib/IR/AsmWriter.cpp index fabc79adbd33..c67c32a28e0d 100644 --- a/llvm/lib/IR/AsmWriter.cpp +++ b/llvm/lib/IR/AsmWriter.cpp @@ -3672,6 +3672,27 @@ void AssemblyWriter::printGlobal(const GlobalVariable *GV) { printEscapedString(GV->getPartition(), Out); Out << '"'; } + if (auto CM = GV->getCodeModel()) { + Out << ", code_model \""; + switch (*CM) { + case CodeModel::Tiny: + Out << "tiny"; + break; + case CodeModel::Small: + Out << "small"; + break; + case CodeModel::Kernel: + Out << "kernel"; + break; + case CodeModel::Medium: + Out << "medium"; + break; + case CodeModel::Large: + Out << "large"; + break; + } + Out << '"'; + } using SanitizerMetadata = llvm::GlobalValue::SanitizerMetadata; if (GV->hasSanitizerMetadata()) { diff --git a/llvm/lib/IR/Globals.cpp b/llvm/lib/IR/Globals.cpp index 7bd4503a689e..51bdbeb0abf2 100644 --- a/llvm/lib/IR/Globals.cpp +++ b/llvm/lib/IR/Globals.cpp @@ -482,6 +482,8 @@ void GlobalVariable::copyAttributesFrom(const GlobalVariable *Src) { GlobalObject::copyAttributesFrom(Src); setExternallyInitialized(Src->isExternallyInitialized()); setAttributes(Src->getAttributes()); + if (auto CM = Src->getCodeModel()) + setCodeModel(*CM); } void GlobalVariable::dropAllReferences() { @@ -489,6 +491,15 @@ void GlobalVariable::dropAllReferences() { clearMetadata(); } +void GlobalVariable::setCodeModel(CodeModel::Model CM) { + unsigned CodeModelData = static_cast(CM) + 1; + unsigned OldData = getGlobalValueSubClassData(); + unsigned NewData = (OldData & ~(CodeModelMask << CodeModelShift)) | + (CodeModelData << CodeModelShift); + setGlobalValueSubClassData(NewData); + assert(getCodeModel() == CM && "Code model representation error!"); +} + //===----------------------------------------------------------------------===// // GlobalAlias Implementation //===----------------------------------------------------------------------===// diff --git a/llvm/test/Assembler/globalvariable-attributes.ll b/llvm/test/Assembler/globalvariable-attributes.ll index 544f9bdb270e..4882b447973c 100644 --- a/llvm/test/Assembler/globalvariable-attributes.ll +++ b/llvm/test/Assembler/globalvariable-attributes.ll @@ -9,6 +9,11 @@ @g7 = global i32 2, sanitize_address_dyninit, align 4 @g8 = global i32 2, sanitize_memtag, align 4 @g9 = global i32 2, no_sanitize_address, no_sanitize_hwaddress, sanitize_memtag, align 4 +@g10 = global i32 2, code_model "tiny" +@g11 = global i32 2, code_model "small" +@g12 = global i32 2, code_model "kernel" +@g13 = global i32 2, code_model "medium" +@g14 = global i32 2, code_model "large" attributes #0 = { "string" = "value" nobuiltin norecurse } @@ -21,6 +26,11 @@ attributes #0 = { "string" = "value" nobuiltin norecurse } ; CHECK: @g7 = global i32 2, sanitize_address_dyninit, align 4 ; CHECK: @g8 = global i32 2, sanitize_memtag, align 4 ; CHECK: @g9 = global i32 2, no_sanitize_address, no_sanitize_hwaddress, sanitize_memtag, align 4 +; CHECK: @g10 = global i32 2, code_model "tiny" +; CHECK: @g11 = global i32 2, code_model "small" +; CHECK: @g12 = global i32 2, code_model "kernel" +; CHECK: @g13 = global i32 2, code_model "medium" +; CHECK: @g14 = global i32 2, code_model "large" ; CHECK: attributes #0 = { "key"="value" "key2"="value2" } ; CHECK: attributes #1 = { "key3"="value3" } diff --git a/llvm/test/Transforms/GlobalOpt/globalvar-code-model.ll b/llvm/test/Transforms/GlobalOpt/globalvar-code-model.ll new file mode 100644 index 000000000000..276a49474f73 --- /dev/null +++ b/llvm/test/Transforms/GlobalOpt/globalvar-code-model.ll @@ -0,0 +1,11 @@ +; RUN: opt -passes=globalopt -S < %s | FileCheck %s + +@G = internal global i32 5, code_model "large" + +define i32 @test() norecurse { + %a = load i32, ptr @G + store i32 4, ptr @G + ret i32 %a +} + +; CHECK: @G = internal unnamed_addr global i1 false, code_model "large"