Flang currently lowers internal procedures passed as actual arguments using LLVM's `llvm.init.trampoline` / `llvm.adjust.trampoline` intrinsics, which require an executable stack. On modern Linux toolchains and security-hardened kernels that enforce W^X (Write XOR Execute), this causes link-time failures (`ld.lld: error: ... requires an executable stack`) or runtime `SEGV` from NX violations. This patch introduces a runtime trampoline pool that allocates trampolines from a dedicated `mmap`'d region instead of the stack. The pool toggles page permissions between writable (for patching) and executable (for dispatch), so the stack stays non-executable throughout. On macOS, MAP_JIT and `pthread_jit_write_protect_np` are used for the same effect. An i-cache flush (`__builtin___clear_cache` on Linux, `sys_icache_invalidate` on macOS) is performed after each write→exec transition. The feature is gated behind a new driver flag, `-fsafe-trampoline` (off by default), which threads through the frontend into the `BoxedProcedurePass`. When enabled, the pass emits calls to `_FortranATrampolineInit`, `_FortranATrampolineAdjust`, and `_FortranATrampolineFree` instead of the legacy intrinsics. The legacy path is completely untouched when the flag is off. The pool is a singleton with a fixed capacity (default 1024 slots, overridable via `FLANG_TRAMPOLINE_POOL_SIZE`). Slot size varies by target (32 bytes on x86-64/AArch64, 48 on PPC64, 64 fallback). Each slot holds a small architecture-specific stub, currently x86-64 (17 bytes, using `r10` as the nest/static-chain register) and AArch64 (24 bytes, using `x15`). The implementation compiles on all architectures but will crash at runtime with a clear diagnostic if trampoline emission is actually attempted on an unsupported target. This avoids breaking the flang-rt build on e.g. RISC-V or PPC64. Freed slots are poisoned (the callee pointer is overwritten with a sentinel) and recycled into a freelist, so the pool can sustain long-running programs that repeatedly create and destroy closures. A few design choices worth calling out: The runtime avoids all C++ runtime dependencies, no `std::mutex`, no `operator new`, no function-local statics with hidden guard variables. Locking is via flang-rt's own `Lock` / `CriticalSection`, memory is via `AllocateMemoryOrCrash` / `FreeMemory`, and the singleton uses explicit double-checked locking with a raw pointer. This was done so the trampoline pool links cleanly in minimal / freestanding flang-rt configurations. `_FortranATrampolineFree` calls are inserted immediately before every `func.return` in the enclosing host function. This is a conservative but correct strategy. The trampoline handle cannot outlive the host's stack frame since the closure captures the host's local variables by reference. The GNU_STACK note is verified via a dedicated integration test (`safe-trampoline-gnustack.f90`) that compiles and links a Fortran program using the runtime path, then inspects the ELF with `llvm-readelf` to confirm the stack segment is `RW` (not `RWE`). **Test coverage:** - `flang/test/Driver/fsafe-trampoline.f90` — flag forwarding (on, off, default) - `flang/test/Fir/boxproc-safe-trampoline.fir` — FIR-level FileCheck for emitted runtime calls - `flang/test/Lower/safe-trampoline.f90` — end-to-end lowering - `flang-rt/test/Driver/safe-trampoline-gnustack.f90` — GNU_STACK ELF verification Closes #182813 Co-authored-by: Sairudra More <moresair@pe31.hpc.amslabs.hpecorp.net>
120 lines
5.3 KiB
TableGen
120 lines
5.3 KiB
TableGen
//===-- CGPasses.td - code gen pass definition file --------*- tablegen -*-===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
// This file contains definitions for passes within the Optimizer/CodeGen/
|
|
// directory.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#ifndef FORTRAN_OPTIMIZER_CODEGEN_FIR_PASSES
|
|
#define FORTRAN_OPTIMIZER_CODEGEN_FIR_PASSES
|
|
|
|
include "mlir/Pass/PassBase.td"
|
|
|
|
def FIRToLLVMLowering : Pass<"fir-to-llvm-ir", "mlir::ModuleOp"> {
|
|
let summary = "Convert FIR dialect to LLVM-IR dialect";
|
|
let description = [{
|
|
Convert the FIR dialect to the LLVM-IR dialect of MLIR. This conversion
|
|
will also convert ops in the standard and FIRCG dialects.
|
|
}];
|
|
let constructor = "::fir::createFIRToLLVMPass()";
|
|
let dependentDialects = ["mlir::LLVM::LLVMDialect", "mlir::DLTIDialect"];
|
|
let options = [
|
|
Option<"forcedTargetTriple", "target", "std::string", /*default=*/"",
|
|
"Override module's target triple.">,
|
|
Option<"forcedDataLayout", "datalayout", "std::string", /*default=*/"",
|
|
"Override module's data layout.">,
|
|
Option<"forcedTargetCPU", "target-cpu", "std::string", /*default=*/"",
|
|
"Override module's target CPU.">,
|
|
Option<"forcedTuneCPU", "tune-cpu", "std::string", /*default=*/"",
|
|
"Override module's tune CPU.">,
|
|
Option<"forcedTargetFeatures", "target-features", "std::string",
|
|
/*default=*/"", "Override module's target features.">,
|
|
Option<"applyTBAA", "apply-tbaa", "bool", /*default=*/"false",
|
|
"Attach TBAA tags to memory accessing operations.">,
|
|
Option<"typeDescriptorsRenamedForAssembly",
|
|
"type-descriptors-renamed-for-assembly", "bool", /*default=*/"false",
|
|
"Global variables created to describe derived types "
|
|
"have been renamed to avoid special symbols in their names.">
|
|
];
|
|
}
|
|
|
|
def CodeGenRewrite : Pass<"cg-rewrite", "mlir::ModuleOp"> {
|
|
let summary = "Rewrite some FIR ops into their code-gen forms.";
|
|
let description = [{
|
|
Fuse specific subgraphs into single Ops for code generation.
|
|
}];
|
|
let dependentDialects = [
|
|
"fir::FIROpsDialect", "fir::FIRCodeGenDialect"
|
|
];
|
|
let options = [
|
|
Option<"preserveDeclare", "preserve-declare", "bool", /*default=*/"false",
|
|
"Preserve DeclareOp during pre codegen re-write.">
|
|
];
|
|
let statistics = [
|
|
Statistic<"numDCE", "num-dce'd", "Number of operations eliminated">
|
|
];
|
|
}
|
|
|
|
def TargetRewritePass : Pass<"target-rewrite", "mlir::ModuleOp"> {
|
|
let summary = "Rewrite some FIR dialect into target specific forms.";
|
|
let description = [{
|
|
Certain abstractions in the FIR dialect need to be rewritten to reflect
|
|
representations that may differ based on the target machine.
|
|
}];
|
|
let dependentDialects = [ "fir::FIROpsDialect", "mlir::func::FuncDialect",
|
|
"mlir::DLTIDialect", "mlir::LLVM::LLVMDialect" ];
|
|
let options = [
|
|
Option<"forcedTargetTriple", "target", "std::string", /*default=*/"",
|
|
"Override module's target triple.">,
|
|
Option<"forcedTargetCPU", "target-cpu", "std::string", /*default=*/"",
|
|
"Override module's target CPU.">,
|
|
Option<"forcedTuneCPU", "tune-cpu", "std::string", /*default=*/"",
|
|
"Override module's tune CPU.">,
|
|
Option<"forcedTargetFeatures", "target-features", "std::string",
|
|
/*default=*/"", "Override module's target features.">,
|
|
Option<"noCharacterConversion", "no-character-conversion",
|
|
"bool", /*default=*/"false",
|
|
"Disable target-specific conversion of CHARACTER.">,
|
|
Option<"noComplexConversion", "no-complex-conversion",
|
|
"bool", /*default=*/"false",
|
|
"Disable target-specific conversion of COMPLEX.">,
|
|
Option<"noStructConversion", "no-struct-conversion",
|
|
"bool", /*default=*/"false",
|
|
"Disable target-specific conversion of derived type value.">
|
|
];
|
|
}
|
|
|
|
def BoxedProcedurePass : Pass<"boxed-procedure", "mlir::ModuleOp"> {
|
|
let options =
|
|
[Option<
|
|
"useThunks", "use-thunks", "bool", /*default=*/"true",
|
|
"Convert procedure pointer abstractions to a single code pointer, "
|
|
"deploying thunks wherever required.">,
|
|
Option<
|
|
"useSafeTrampoline", "use-safe-trampoline", "bool",
|
|
/*default=*/"false",
|
|
"Use runtime trampoline pool instead of stack-based trampolines "
|
|
"for W^X compliance. When enabled, internal procedure pointers "
|
|
"use a runtime-managed pool of executable trampolines with "
|
|
"separate data region, avoiding the need for an executable stack.">];
|
|
}
|
|
|
|
def LowerRepackArraysPass : Pass<"lower-repack-arrays", "mlir::ModuleOp"> {
|
|
let summary = "Convert fir.pack/unpack_array to other FIR operations";
|
|
let description = [{
|
|
Convert fir.pack/unpack_array operations to other FIR operations
|
|
and Fortran runtime calls that implement the semantics
|
|
of packing/unpacking.
|
|
}];
|
|
let dependentDialects = ["fir::FIROpsDialect", "mlir::arith::ArithDialect",
|
|
"mlir::func::FuncDialect"];
|
|
}
|
|
|
|
#endif // FORTRAN_OPTIMIZER_CODEGEN_FIR_PASSES
|