[MLIR] Split ExecutionEngine Initialization out of ctor into an explicit method call (#153524)
Retry landing https://github.com/llvm/llvm-project/pull/153373 ## Major changes from previous attempt - remove the test in CAPI because no existing tests in CAPI deal with sanitizer exemptions - update `mlir/docs/Dialects/GPU.md` to reflect the new behavior: load GPU binary in global ctors, instead of loading them at call site. - skip the test on Aarch64 since we have an issue with initialization there --------- Co-authored-by: Mehdi Amini <joker.eph@gmail.com>
This commit is contained in:
parent
40833eea21
commit
7610b13729
@ -193,10 +193,25 @@ llvm.func @foo() {
|
||||
// mlir-translate --mlir-to-llvmir:
|
||||
@binary_bin_cst = internal constant [6 x i8] c"AMDGPU", align 8
|
||||
@binary_func_kernel_name = private unnamed_addr constant [7 x i8] c"func\00", align 1
|
||||
@binary_module = internal global ptr null
|
||||
@llvm.global_ctors = appending global [1 x {i32, ptr, ptr}] [{i32 123, ptr @binary_load, ptr null}]
|
||||
@llvm.global_dtors = appending global [1 x {i32, ptr, ptr}] [{i32 123, ptr @binary_unload, ptr null}]
|
||||
define internal void @binary_load() section ".text.startup" {
|
||||
entry:
|
||||
%0 = call ptr @mgpuModuleLoad(ptr @binary_bin_cst)
|
||||
store ptr %0, ptr @binary_module
|
||||
...
|
||||
}
|
||||
define internal void @binary_unload() section ".text.startup" {
|
||||
entry:
|
||||
%0 = load ptr, ptr @binary_module, align 8
|
||||
call void @mgpuModuleUnload(ptr %0)
|
||||
...
|
||||
}
|
||||
...
|
||||
define void @foo() {
|
||||
...
|
||||
%module = call ptr @mgpuModuleLoad(ptr @binary_bin_cst)
|
||||
%module = load ptr, ptr @binary_module, align 8
|
||||
%kernel = call ptr @mgpuModuleGetFunction(ptr %module, ptr @binary_func_kernel_name)
|
||||
call void @mgpuLaunchKernel(ptr %kernel, ...) ; Launch the kernel
|
||||
...
|
||||
|
@ -46,6 +46,13 @@ MLIR_CAPI_EXPORTED MlirExecutionEngine mlirExecutionEngineCreate(
|
||||
MlirModule op, int optLevel, int numPaths,
|
||||
const MlirStringRef *sharedLibPaths, bool enableObjectDump);
|
||||
|
||||
/// Initialize the ExecutionEngine. Global constructors specified by
|
||||
/// `llvm.mlir.global_ctors` will be run. One common scenario is that kernel
|
||||
/// binary compiled from `gpu.module` gets loaded during initialization. Make
|
||||
/// sure all symbols are resolvable before initialization by calling
|
||||
/// `mlirExecutionEngineRegisterSymbol` or including shared libraries.
|
||||
MLIR_CAPI_EXPORTED void mlirExecutionEngineInitialize(MlirExecutionEngine jit);
|
||||
|
||||
/// Destroy an ExecutionEngine instance.
|
||||
MLIR_CAPI_EXPORTED void mlirExecutionEngineDestroy(MlirExecutionEngine jit);
|
||||
|
||||
|
@ -227,6 +227,13 @@ public:
|
||||
llvm::function_ref<llvm::orc::SymbolMap(llvm::orc::MangleAndInterner)>
|
||||
symbolMap);
|
||||
|
||||
/// Initialize the ExecutionEngine. Global constructors specified by
|
||||
/// `llvm.mlir.global_ctors` will be run. One common scenario is that kernel
|
||||
/// binary compiled from `gpu.module` gets loaded during initialization. Make
|
||||
/// sure all symbols are resolvable before initialization by calling
|
||||
/// `registerSymbols` or including shared libraries.
|
||||
void initialize();
|
||||
|
||||
private:
|
||||
/// Ordering of llvmContext and jit is important for destruction purposes: the
|
||||
/// jit must be destroyed before the context.
|
||||
@ -250,6 +257,8 @@ private:
|
||||
/// Destroy functions in the libraries loaded by the ExecutionEngine that are
|
||||
/// called when this ExecutionEngine is destructed.
|
||||
SmallVector<LibraryDestroyFn> destroyFns;
|
||||
|
||||
bool isInitialized = false;
|
||||
};
|
||||
|
||||
} // namespace mlir
|
||||
|
@ -7,8 +7,8 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "mlir-c/ExecutionEngine.h"
|
||||
#include "mlir/Bindings/Python/NanobindAdaptors.h"
|
||||
#include "mlir/Bindings/Python/Nanobind.h"
|
||||
#include "mlir/Bindings/Python/NanobindAdaptors.h"
|
||||
|
||||
namespace nb = nanobind;
|
||||
using namespace mlir;
|
||||
@ -124,6 +124,17 @@ NB_MODULE(_mlirExecutionEngine, m) {
|
||||
},
|
||||
nb::arg("name"), nb::arg("callback"),
|
||||
"Register `callback` as the runtime symbol `name`.")
|
||||
.def(
|
||||
"initialize",
|
||||
[](PyExecutionEngine &executionEngine) {
|
||||
mlirExecutionEngineInitialize(executionEngine.get());
|
||||
},
|
||||
"Initialize the ExecutionEngine. Global constructors specified by "
|
||||
"`llvm.mlir.global_ctors` will be run. One common scenario is that "
|
||||
"kernel binary compiled from `gpu.module` gets loaded during "
|
||||
"initialization. Make sure all symbols are resolvable before "
|
||||
"initialization by calling `register_runtime` or including "
|
||||
"shared libraries.")
|
||||
.def(
|
||||
"dump_to_object_file",
|
||||
[](PyExecutionEngine &executionEngine, const std::string &fileName) {
|
||||
|
@ -68,6 +68,10 @@ mlirExecutionEngineCreate(MlirModule op, int optLevel, int numPaths,
|
||||
return wrap(jitOrError->release());
|
||||
}
|
||||
|
||||
extern "C" void mlirExecutionEngineInitialize(MlirExecutionEngine jit) {
|
||||
unwrap(jit)->initialize();
|
||||
}
|
||||
|
||||
extern "C" void mlirExecutionEngineDestroy(MlirExecutionEngine jit) {
|
||||
delete (unwrap(jit));
|
||||
}
|
||||
@ -106,9 +110,8 @@ extern "C" void mlirExecutionEngineRegisterSymbol(MlirExecutionEngine jit,
|
||||
void *sym) {
|
||||
unwrap(jit)->registerSymbols([&](llvm::orc::MangleAndInterner interner) {
|
||||
llvm::orc::SymbolMap symbolMap;
|
||||
symbolMap[interner(unwrap(name))] =
|
||||
{ llvm::orc::ExecutorAddr::fromPtr(sym),
|
||||
llvm::JITSymbolFlags::Exported };
|
||||
symbolMap[interner(unwrap(name))] = {llvm::orc::ExecutorAddr::fromPtr(sym),
|
||||
llvm::JITSymbolFlags::Exported};
|
||||
return symbolMap;
|
||||
});
|
||||
}
|
||||
|
@ -106,7 +106,7 @@ void ExecutionEngine::dumpToObjectFile(StringRef filename) {
|
||||
}
|
||||
// Compilation is lazy and it doesn't populate object cache unless requested.
|
||||
// In case object dump is requested before cache is populated, we need to
|
||||
// force compilation manually.
|
||||
// force compilation manually.
|
||||
if (cache->isEmpty()) {
|
||||
for (std::string &functionName : functionNames) {
|
||||
auto result = lookupPacked(functionName);
|
||||
@ -400,13 +400,6 @@ ExecutionEngine::create(Operation *m, const ExecutionEngineOptions &options,
|
||||
return symbolMap;
|
||||
};
|
||||
engine->registerSymbols(runtimeSymbolMap);
|
||||
|
||||
// Execute the global constructors from the module being processed.
|
||||
// TODO: Allow JIT initialize for AArch64. Currently there's a bug causing a
|
||||
// crash for AArch64 see related issue #71963.
|
||||
if (!engine->jit->getTargetTriple().isAArch64())
|
||||
cantFail(engine->jit->initialize(engine->jit->getMainJITDylib()));
|
||||
|
||||
return std::move(engine);
|
||||
}
|
||||
|
||||
@ -442,6 +435,7 @@ Expected<void *> ExecutionEngine::lookup(StringRef name) const {
|
||||
|
||||
Error ExecutionEngine::invokePacked(StringRef name,
|
||||
MutableArrayRef<void *> args) {
|
||||
initialize();
|
||||
auto expectedFPtr = lookupPacked(name);
|
||||
if (!expectedFPtr)
|
||||
return expectedFPtr.takeError();
|
||||
@ -451,3 +445,13 @@ Error ExecutionEngine::invokePacked(StringRef name,
|
||||
|
||||
return Error::success();
|
||||
}
|
||||
|
||||
void ExecutionEngine::initialize() {
|
||||
if (isInitialized)
|
||||
return;
|
||||
// TODO: Allow JIT initialize for AArch64. Currently there's a bug causing a
|
||||
// crash for AArch64 see related issue #71963.
|
||||
if (!jit->getTargetTriple().isAArch64())
|
||||
cantFail(jit->initialize(jit->getMainJITDylib()));
|
||||
isInitialized = true;
|
||||
}
|
||||
|
@ -202,6 +202,8 @@ compileAndExecute(Options &options, Operation *module, StringRef entryPoint,
|
||||
|
||||
auto engine = std::move(*expectedEngine);
|
||||
|
||||
engine->initialize();
|
||||
|
||||
auto expectedFPtr = engine->lookupPacked(entryPoint);
|
||||
if (!expectedFPtr)
|
||||
return expectedFPtr.takeError();
|
||||
|
@ -19,5 +19,6 @@ class ExecutionEngine:
|
||||
def dump_to_object_file(self, file_name: str) -> None: ...
|
||||
def raw_lookup(self, func_name: str) -> int: ...
|
||||
def raw_register_runtime(self, name: str, callback: object) -> None: ...
|
||||
def init() -> None: ...
|
||||
@property
|
||||
def _CAPIPtr(self) -> object: ...
|
||||
|
@ -30,6 +30,13 @@ if(MLIR_ENABLE_EXECUTION_ENGINE)
|
||||
MLIRCAPIConversion
|
||||
MLIRCAPIExecutionEngine
|
||||
MLIRCAPIRegisterEverything
|
||||
)
|
||||
_add_capi_test_executable(mlir-capi-global-constructors-test
|
||||
global_constructors.c
|
||||
LINK_LIBS PRIVATE
|
||||
MLIRCAPIConversion
|
||||
MLIRCAPIExecutionEngine
|
||||
MLIRCAPIRegisterEverything
|
||||
)
|
||||
endif()
|
||||
|
||||
|
113
mlir/test/CAPI/global_constructors.c
Normal file
113
mlir/test/CAPI/global_constructors.c
Normal file
@ -0,0 +1,113 @@
|
||||
//===- global_constructors.c - Test JIT with the global constructors ------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM
|
||||
// Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
// UNSUPPORTED: target=aarch64{{.*}}, target=arm64{{.*}}
|
||||
/* RUN: mlir-capi-global-constructors-test 2>&1 | FileCheck %s
|
||||
*/
|
||||
/* REQUIRES: host-supports-jit
|
||||
*/
|
||||
|
||||
#include "mlir-c/Conversion.h"
|
||||
#include "mlir-c/ExecutionEngine.h"
|
||||
#include "mlir-c/IR.h"
|
||||
#include "mlir-c/RegisterEverything.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <math.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
static void registerAllUpstreamDialects(MlirContext ctx) {
|
||||
MlirDialectRegistry registry = mlirDialectRegistryCreate();
|
||||
mlirRegisterAllDialects(registry);
|
||||
mlirContextAppendDialectRegistry(ctx, registry);
|
||||
mlirDialectRegistryDestroy(registry);
|
||||
}
|
||||
|
||||
void lowerModuleToLLVM(MlirContext ctx, MlirModule module) {
|
||||
MlirPassManager pm = mlirPassManagerCreate(ctx);
|
||||
MlirOpPassManager opm = mlirPassManagerGetNestedUnder(
|
||||
pm, mlirStringRefCreateFromCString("func.func"));
|
||||
mlirPassManagerAddOwnedPass(pm, mlirCreateConversionConvertFuncToLLVMPass());
|
||||
mlirOpPassManagerAddOwnedPass(
|
||||
opm, mlirCreateConversionArithToLLVMConversionPass());
|
||||
MlirLogicalResult status =
|
||||
mlirPassManagerRunOnOp(pm, mlirModuleGetOperation(module));
|
||||
if (mlirLogicalResultIsFailure(status)) {
|
||||
fprintf(stderr, "Unexpected failure running pass pipeline\n");
|
||||
exit(2);
|
||||
}
|
||||
mlirPassManagerDestroy(pm);
|
||||
}
|
||||
|
||||
// Helper variable to track callback invocations
|
||||
static int initCnt = 0;
|
||||
|
||||
// Callback function that will be called during JIT initialization
|
||||
static void initCallback(void) { initCnt += 1; }
|
||||
|
||||
// CHECK-LABEL: Running test 'testGlobalCtorJitCallback'
|
||||
void testGlobalCtorJitCallback(void) {
|
||||
MlirContext ctx = mlirContextCreate();
|
||||
registerAllUpstreamDialects(ctx);
|
||||
|
||||
// Create module with global constructor that calls our callback
|
||||
MlirModule module = mlirModuleCreateParse(
|
||||
ctx, mlirStringRefCreateFromCString(
|
||||
// clang-format off
|
||||
"module { \n"
|
||||
" llvm.mlir.global_ctors ctors = [@ctor], priorities = [0 : i32], data = [#llvm.zero] \n"
|
||||
" llvm.func @ctor() { \n"
|
||||
" func.call @init_callback() : () -> () \n"
|
||||
" llvm.return \n"
|
||||
" } \n"
|
||||
" func.func private @init_callback() attributes { llvm.emit_c_interface } \n"
|
||||
"} \n"
|
||||
// clang-format on
|
||||
));
|
||||
|
||||
lowerModuleToLLVM(ctx, module);
|
||||
mlirRegisterAllLLVMTranslations(ctx);
|
||||
|
||||
// Create execution engine with initialization disabled
|
||||
MlirExecutionEngine jit = mlirExecutionEngineCreate(
|
||||
module, /*optLevel=*/2, /*numPaths=*/0, /*sharedLibPaths=*/NULL,
|
||||
/*enableObjectDump=*/false);
|
||||
|
||||
if (mlirExecutionEngineIsNull(jit)) {
|
||||
fprintf(stderr, "Execution engine creation failed");
|
||||
exit(2);
|
||||
}
|
||||
|
||||
// Register callback symbol before initialization
|
||||
mlirExecutionEngineRegisterSymbol(
|
||||
jit, mlirStringRefCreateFromCString("_mlir_ciface_init_callback"),
|
||||
(void *)(uintptr_t)initCallback);
|
||||
|
||||
mlirExecutionEngineInitialize(jit);
|
||||
|
||||
// CHECK: Init count: 1
|
||||
printf("Init count: %d\n", initCnt);
|
||||
|
||||
mlirExecutionEngineDestroy(jit);
|
||||
mlirModuleDestroy(module);
|
||||
mlirContextDestroy(ctx);
|
||||
}
|
||||
|
||||
int main(void) {
|
||||
|
||||
#define _STRINGIFY(x) #x
|
||||
#define STRINGIFY(x) _STRINGIFY(x)
|
||||
#define TEST(test) \
|
||||
printf("Running test '" STRINGIFY(test) "'\n"); \
|
||||
test();
|
||||
TEST(testGlobalCtorJitCallback);
|
||||
return 0;
|
||||
}
|
@ -141,6 +141,7 @@ if(LLVM_ENABLE_PIC AND TARGET ${LLVM_NATIVE_ARCH})
|
||||
llc
|
||||
mlir_async_runtime
|
||||
mlir-capi-execution-engine-test
|
||||
mlir-capi-global-constructors-test
|
||||
mlir_c_runner_utils
|
||||
mlir_runner_utils
|
||||
mlir_float16_utils
|
||||
|
@ -190,6 +190,7 @@ tools = [
|
||||
"mlir-translate",
|
||||
"mlir-lsp-server",
|
||||
"mlir-capi-execution-engine-test",
|
||||
"mlir-capi-global-constructors-test",
|
||||
"mlir-capi-ir-test",
|
||||
"mlir-capi-irdl-test",
|
||||
"mlir-capi-llvm-test",
|
||||
|
72
mlir/test/python/global_constructors.py
Normal file
72
mlir/test/python/global_constructors.py
Normal file
@ -0,0 +1,72 @@
|
||||
# UNSUPPORTED: target=aarch64{{.*}}, target=arm64{{.*}}
|
||||
# RUN: %PYTHON %s 2>&1 | FileCheck %s
|
||||
# REQUIRES: host-supports-jit
|
||||
import gc, sys, os, tempfile
|
||||
from mlir.ir import *
|
||||
from mlir.passmanager import *
|
||||
from mlir.execution_engine import *
|
||||
from mlir.runtime import *
|
||||
|
||||
|
||||
# Log everything to stderr and flush so that we have a unified stream to match
|
||||
# errors/info emitted by MLIR to stderr.
|
||||
def log(*args):
|
||||
print(*args, file=sys.stderr)
|
||||
sys.stderr.flush()
|
||||
|
||||
|
||||
def run(f):
|
||||
log("\nTEST:", f.__name__)
|
||||
f()
|
||||
gc.collect()
|
||||
assert Context._get_live_count() == 0
|
||||
|
||||
|
||||
def lowerToLLVM(module):
|
||||
pm = PassManager.parse(
|
||||
"builtin.module(convert-func-to-llvm,reconcile-unrealized-casts)"
|
||||
)
|
||||
pm.run(module.operation)
|
||||
return module
|
||||
|
||||
|
||||
# Test JIT callback in global constructor
|
||||
# CHECK-LABEL: TEST: testJITCallbackInGlobalCtor
|
||||
def testJITCallbackInGlobalCtor():
|
||||
init_cnt = 0
|
||||
|
||||
@ctypes.CFUNCTYPE(None)
|
||||
def initCallback():
|
||||
nonlocal init_cnt
|
||||
init_cnt += 1
|
||||
|
||||
with Context():
|
||||
module = Module.parse(
|
||||
r"""
|
||||
llvm.mlir.global_ctors ctors = [@ctor], priorities = [0 : i32], data = [#llvm.zero]
|
||||
llvm.func @ctor() {
|
||||
func.call @init_callback() : () -> ()
|
||||
llvm.return
|
||||
}
|
||||
func.func private @init_callback() attributes { llvm.emit_c_interface }
|
||||
"""
|
||||
)
|
||||
|
||||
# Setup execution engine
|
||||
execution_engine = ExecutionEngine(lowerToLLVM(module))
|
||||
|
||||
# Validate initialization hasn't run yet
|
||||
assert init_cnt == 0
|
||||
|
||||
# # Register callback
|
||||
execution_engine.register_runtime("init_callback", initCallback)
|
||||
|
||||
# # Initialize and verify
|
||||
execution_engine.initialize()
|
||||
assert init_cnt == 1
|
||||
# # Second initialization should be no-op
|
||||
execution_engine.initialize()
|
||||
assert init_cnt == 1
|
||||
|
||||
|
||||
run(testJITCallbackInGlobalCtor)
|
@ -322,4 +322,55 @@ TEST(NativeMemRefJit, MAYBE_JITCallback) {
|
||||
ASSERT_EQ(elt, coefficient * count++);
|
||||
}
|
||||
|
||||
static int initCnt = 0;
|
||||
// A helper function that will be called during the JIT's initialization.
|
||||
static void initCallback() { initCnt += 1; }
|
||||
|
||||
TEST(MLIRExecutionEngine, MAYBE_JITCallbackInGlobalCtor) {
|
||||
auto tmBuilderOrError = llvm::orc::JITTargetMachineBuilder::detectHost();
|
||||
ASSERT_TRUE(!!tmBuilderOrError);
|
||||
if (tmBuilderOrError->getTargetTriple().isAArch64()) {
|
||||
GTEST_SKIP() << "Skipping global ctor initialization test on Aarch64 "
|
||||
"because of bug #71963";
|
||||
return;
|
||||
}
|
||||
std::string moduleStr = R"mlir(
|
||||
llvm.mlir.global_ctors ctors = [@ctor], priorities = [0 : i32], data = [#llvm.zero]
|
||||
llvm.func @ctor() {
|
||||
func.call @init_callback() : () -> ()
|
||||
llvm.return
|
||||
}
|
||||
func.func private @init_callback() attributes { llvm.emit_c_interface }
|
||||
)mlir";
|
||||
|
||||
DialectRegistry registry;
|
||||
registerAllDialects(registry);
|
||||
registerBuiltinDialectTranslation(registry);
|
||||
registerLLVMDialectTranslation(registry);
|
||||
MLIRContext context(registry);
|
||||
auto module = parseSourceString<ModuleOp>(moduleStr, &context);
|
||||
ASSERT_TRUE(!!module);
|
||||
ASSERT_TRUE(succeeded(lowerToLLVMDialect(*module)));
|
||||
ExecutionEngineOptions jitOptions;
|
||||
auto jitOrError = ExecutionEngine::create(*module, jitOptions);
|
||||
ASSERT_TRUE(!!jitOrError);
|
||||
// validate initialization is not run on construction
|
||||
ASSERT_EQ(initCnt, 0);
|
||||
auto jit = std::move(jitOrError.get());
|
||||
// Define any extra symbols so they're available at initialization.
|
||||
jit->registerSymbols([&](llvm::orc::MangleAndInterner interner) {
|
||||
llvm::orc::SymbolMap symbolMap;
|
||||
symbolMap[interner("_mlir_ciface_init_callback")] = {
|
||||
llvm::orc::ExecutorAddr::fromPtr(initCallback),
|
||||
llvm::JITSymbolFlags::Exported};
|
||||
return symbolMap;
|
||||
});
|
||||
jit->initialize();
|
||||
// validate the side effect of initialization
|
||||
ASSERT_EQ(initCnt, 1);
|
||||
// next initialization should be noop
|
||||
jit->initialize();
|
||||
ASSERT_EQ(initCnt, 1);
|
||||
}
|
||||
|
||||
#endif // _WIN32
|
||||
|
Loading…
x
Reference in New Issue
Block a user