[clang][llvm][aarch64][win] Add a clang flag and module attribute for import call optimization, and remove LLVM flag (#122831)

Switches import call optimization from being enabled by an LLVM flag to
instead using a module attribute, and creates a new Clang flag that will
set that attribute. This addresses the concern raised in the original
PR:
<https://github.com/llvm/llvm-project/pull/121516#discussion_r1911763991>

This change also only creates the Called Global info if the module
attribute is present, addressing this concern:
<https://github.com/llvm/llvm-project/pull/122762#pullrequestreview-2547595934>
This commit is contained in:
Daniel Paoliello 2025-01-30 09:51:43 -08:00 committed by GitHub
parent 0e43b9547d
commit 845cc968e9
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 63 additions and 41 deletions

View File

@ -465,6 +465,10 @@ ENUM_CODEGENOPT(ZeroCallUsedRegs, llvm::ZeroCallUsedRegs::ZeroCallUsedRegsKind,
/// non-deleting destructors. (No effect on Microsoft ABI.)
CODEGENOPT(CtorDtorReturnThis, 1, 0)
/// Enables emitting Import Call sections on supported targets that can be used
/// by the Windows kernel to enable import call optimization.
CODEGENOPT(ImportCallOptimization, 1, 0)
/// FIXME: Make DebugOptions its own top-level .def file.
#include "DebugOptions.def"

View File

@ -7587,6 +7587,11 @@ def fexperimental_assignment_tracking_EQ : Joined<["-"], "fexperimental-assignme
def enable_tlsdesc : Flag<["-"], "enable-tlsdesc">,
MarshallingInfoFlag<CodeGenOpts<"EnableTLSDESC">>;
def import_call_optimization : Flag<["-"], "import-call-optimization">,
HelpText<"Emit Import Call sections on supported targets that can be used "
"by the Windows kernel to enable import call optimization">,
MarshallingInfoFlag<CodeGenOpts<"ImportCallOptimization">>;
} // let Visibility = [CC1Option]
//===----------------------------------------------------------------------===//

View File

@ -1293,6 +1293,11 @@ void CodeGenModule::Release() {
if (LangOpts.EHAsynch)
getModule().addModuleFlag(llvm::Module::Warning, "eh-asynch", 1);
// Emit Import Call section.
if (CodeGenOpts.ImportCallOptimization)
getModule().addModuleFlag(llvm::Module::Warning, "import-call-optimization",
1);
// Indicate whether this Module was compiled with -fopenmp
if (getLangOpts().OpenMP && !getLangOpts().OpenMPSimd)
getModule().addModuleFlag(llvm::Module::Max, "openmp", LangOpts.OpenMP);

View File

@ -0,0 +1,5 @@
// RUN: %clang_cc1 -import-call-optimization -emit-llvm %s -o - | FileCheck %s
void f(void) {}
// CHECK: !"import-call-optimization", i32 1}

View File

@ -78,11 +78,6 @@ static cl::opt<PtrauthCheckMode> PtrauthAuthChecks(
cl::desc("Check pointer authentication auth/resign failures"),
cl::init(Default));
static cl::opt<bool> EnableImportCallOptimization(
"aarch64-win-import-call-optimization", cl::Hidden,
cl::desc("Enable import call optimization for AArch64 Windows"),
cl::init(false));
#define DEBUG_TYPE "asm-printer"
namespace {
@ -95,6 +90,7 @@ class AArch64AsmPrinter : public AsmPrinter {
#ifndef NDEBUG
unsigned InstsEmitted;
#endif
bool EnableImportCallOptimization = false;
DenseMap<MCSection *, std::vector<std::pair<MCSymbol *, MCSymbol *>>>
SectionToImportedFunctionCalls;
@ -344,6 +340,9 @@ void AArch64AsmPrinter::emitStartOfAsmFile(Module &M) {
OutStreamer->emitSymbolAttribute(S, MCSA_Global);
OutStreamer->emitAssignment(
S, MCConstantExpr::create(Feat00Value, MMI->getContext()));
if (M.getModuleFlag("import-call-optimization"))
EnableImportCallOptimization = true;
}
if (!TT.isOSBinFormatELF())
@ -3172,8 +3171,7 @@ void AArch64AsmPrinter::emitInstruction(const MachineInstr *MI) {
void AArch64AsmPrinter::recordIfImportCall(
const llvm::MachineInstr *BranchInst) {
if (!EnableImportCallOptimization ||
!TM.getTargetTriple().isOSBinFormatCOFF())
if (!EnableImportCallOptimization)
return;
auto [GV, OpFlags] = BranchInst->getMF()->tryGetCalledGlobal(BranchInst);

View File

@ -9573,7 +9573,8 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
DAG.addCallSiteInfo(Ret.getNode(), std::move(CSInfo));
if (CalledGlobal)
if (CalledGlobal &&
MF.getFunction().getParent()->getModuleFlag("import-call-optimization"))
DAG.addCalledGlobal(Ret.getNode(), CalledGlobal, OpFlags);
return Ret;
}
@ -9586,7 +9587,8 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
InGlue = Chain.getValue(1);
DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo));
if (CalledGlobal)
if (CalledGlobal &&
MF.getFunction().getParent()->getModuleFlag("import-call-optimization"))
DAG.addCalledGlobal(Chain.getNode(), CalledGlobal, OpFlags);
uint64_t CalleePopBytes =

View File

@ -1,4 +1,4 @@
; RUN: llc -mtriple=aarch64-pc-windows-msvc -aarch64-win-import-call-optimization < %s | FileCheck %s
; RUN: llc -mtriple=aarch64-pc-windows-msvc < %s | FileCheck %s
define dso_local void @normal_call() local_unnamed_addr {
entry:
@ -16,3 +16,6 @@ declare void @a() local_unnamed_addr
; CHECK-LABEL .section .impcall,"yi"
; CHECK-NEXT .asciz "Imp_Call_V1"
; CHECK-NOT .secnum
!llvm.module.flags = !{!0}
!0 = !{i32 1, !"import-call-optimization", i32 1}

View File

@ -1,7 +1,4 @@
; RUN: llc -mtriple=aarch64-pc-windows-msvc -aarch64-win-import-call-optimization < %s | FileCheck %s --check-prefix=CHECK-ENABLED
; RUN: llc -mtriple=aarch64-pc-windows-msvc < %s | FileCheck %s --check-prefix=CHECK-DISABLED
; CHECK-DISABLED-NOT: .section .impcall
; RUN: llc -mtriple=aarch64-pc-windows-msvc < %s | FileCheck %s --check-prefix=CHECK
define dso_local void @normal_call() local_unnamed_addr section "nc_sect" {
entry:
@ -9,40 +6,43 @@ entry:
call void @a()
ret void
}
; CHECK-ENABLED-LABEL: normal_call:
; CHECK-ENABLED: adrp [[ADRPREG:x[0-9]+]], __imp_a
; CHECK-ENABLED-NEXT: ldr [[LDRREG:x[0-9]+]], [[[ADRPREG]], :lo12:__imp_a]
; CHECK-ENABLED-NEXT: .Limpcall0:
; CHECK-ENABLED-NEXT: blr [[LDRREG]]
; CHECK-ENABLED-NEXT: .Limpcall1:
; CHECK-ENABLED-NEXT: blr [[LDRREG]]
; CHECK-LABEL: normal_call:
; CHECK: adrp [[ADRPREG:x[0-9]+]], __imp_a
; CHECK-NEXT: ldr [[LDRREG:x[0-9]+]], [[[ADRPREG]], :lo12:__imp_a]
; CHECK-NEXT: .Limpcall0:
; CHECK-NEXT: blr [[LDRREG]]
; CHECK-NEXT: .Limpcall1:
; CHECK-NEXT: blr [[LDRREG]]
define dso_local void @tail_call() local_unnamed_addr section "tc_sect" {
entry:
tail call void @b()
ret void
}
; CHECK-ENABLED-LABEL: tail_call:
; CHECK-ENABLED: adrp [[ADRPREG:x[0-9]+]], __imp_b
; CHECK-ENABLED-NEXT: ldr [[LDRREG:x[0-9]+]], [[[ADRPREG]], :lo12:__imp_b]
; CHECK-ENABLED-NEXT: .Limpcall2:
; CHECK-ENABLED-NEXT: br [[LDRREG]]
; CHECK-LABEL: tail_call:
; CHECK: adrp [[ADRPREG:x[0-9]+]], __imp_b
; CHECK-NEXT: ldr [[LDRREG:x[0-9]+]], [[[ADRPREG]], :lo12:__imp_b]
; CHECK-NEXT: .Limpcall2:
; CHECK-NEXT: br [[LDRREG]]
declare dllimport void @a() local_unnamed_addr
declare dllimport void @b() local_unnamed_addr
; CHECK-ENABLED-LABEL .section .impcall,"yi"
; CHECK-ENABLED-NEXT .asciz "Imp_Call_V1"
; CHECK-ENABLED-NEXT .word 32
; CHECK-ENABLED-NEXT .secnum nc_sect
; CHECK-ENABLED-NEXT .word 19
; CHECK-ENABLED-NEXT .secoffset .Limpcall0
; CHECK-ENABLED-NEXT .symidx __imp_a
; CHECK-ENABLED-NEXT .word 19
; CHECK-ENABLED-NEXT .secoffset .Limpcall1
; CHECK-ENABLED-NEXT .symidx __imp_a
; CHECK-ENABLED-NEXT .word 20
; CHECK-ENABLED-NEXT .secnum tc_sect
; CHECK-ENABLED-NEXT .word 19
; CHECK-ENABLED-NEXT .secoffset .Limpcall2
; CHECK-ENABLED-NEXT .symidx __imp_b
; CHECK-LABEL .section .impcall,"yi"
; CHECK-NEXT .asciz "Imp_Call_V1"
; CHECK-NEXT .word 32
; CHECK-NEXT .secnum nc_sect
; CHECK-NEXT .word 19
; CHECK-NEXT .secoffset .Limpcall0
; CHECK-NEXT .symidx __imp_a
; CHECK-NEXT .word 19
; CHECK-NEXT .secoffset .Limpcall1
; CHECK-NEXT .symidx __imp_a
; CHECK-NEXT .word 20
; CHECK-NEXT .secnum tc_sect
; CHECK-NEXT .word 19
; CHECK-NEXT .secoffset .Limpcall2
; CHECK-NEXT .symidx __imp_b
!llvm.module.flags = !{!0}
!0 = !{i32 1, !"import-call-optimization", i32 1}