//===-- lib/runtime/io-api-server.cpp ---------------------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // Implements the RPC server-side handlling of the I/O statement API needed for // basic list-directed output (PRINT *) of intrinsic types for the GPU. #include "array.h" #include "io-api-gpu.h" #include "flang-rt/runtime/memory.h" #include "flang-rt/runtime/terminator.h" #include "flang/Runtime/io-api.h" #include #include #include #include #include namespace Fortran::runtime::io { namespace { // Context used to chain the IO operations once run. struct IOContext { Cookie cookie = nullptr; enum Iostat result = IostatOk; }; // The base class to store deferred execution of a function. Uses function // pointers for type erasure to avoid virtual dispatch. struct DeferredFunctionBase { using ExecuteFn = void (*)(void *, IOContext &); using DestroyFn = void (*)(void *); DeferredFunctionBase(void *impl, ExecuteFn exec, DestroyFn dtor) : impl_(impl), execute_(exec), destroy_(dtor) {} DeferredFunctionBase(const DeferredFunctionBase &) = delete; DeferredFunctionBase &operator=(const DeferredFunctionBase &) = delete; DeferredFunctionBase(DeferredFunctionBase &&other) : impl_(other.impl_), execute_(other.execute_), destroy_(other.destroy_) { other.impl_ = nullptr; } DeferredFunctionBase &operator=(DeferredFunctionBase &&other) { if (this != &other) { reset(); impl_ = other.impl_; execute_ = other.execute_; destroy_ = other.destroy_; other.impl_ = nullptr; } return *this; } ~DeferredFunctionBase() { reset(); } void execute(IOContext &ctx) { execute_(impl_, ctx); } static OwningPtr TempString(const char *str, std::size_t size) { if (!str) { return {}; } OwningPtr temp = SizedNew{Terminator{__FILE__, __LINE__}}(size); std::memcpy(temp.get(), str, size); return OwningPtr(temp.release()); } static OwningPtr TempString(const char *str) { if (!str) { return {}; } return TempString(str, std::strlen(str) + 1); } private: void reset() { if (impl_) { destroy_(impl_); FreeMemory(impl_); impl_ = nullptr; } } void *impl_ = nullptr; ExecuteFn execute_ = nullptr; DestroyFn destroy_ = nullptr; }; // Fortran does not support nested or recursive I/O, which is problematic for // parallel execution on a GPU. To support this, we defer execution of runtime // functions coming from the GPU's client until the end of that sequence is // reached. This allows us to finish them in a single pass. template struct DeferredFunction { FnTy fn_; std::tuple...> args_; DeferredFunction(FnTy &&fn, Args &&...args) : fn_(std::forward(fn)), args_(std::forward(args)...) {} // When executing the final command queue we need to replace the temporary // values obtained from the GPU with the returned values from the actual // runtime functions. void execute(IOContext &ctx) { auto caller = [&](auto &&...args) { return fn_(Rewrite(args, ctx)...); }; using RetTy = std::invoke_result_t(), ctx))...>; if constexpr (std::is_same_v) { ctx.cookie = std::apply(caller, args_); } else if constexpr (std::is_same_v) { ctx.result = std::apply(caller, args_); } else { std::apply(caller, args_); } } private: template T &Rewrite(T &v, IOContext &) { return v; } const char *Rewrite(OwningPtr &p, IOContext &) { return p.get(); } Cookie Rewrite(Cookie, IOContext &ctx) { return ctx.cookie; } }; template DeferredFunctionBase MakeDeferred(Fn &&fn, Args &&...args) { Terminator terminator{__FILE__, __LINE__}; using Ty = DeferredFunction; auto ptr = SizedNew{terminator}( sizeof(Ty), std::forward(fn), std::forward(args)...); void *raw = ptr.release(); return DeferredFunctionBase( raw, [](void *self, IOContext &ctx) { static_cast(self)->execute(ctx); }, [](void *self) { static_cast(self)->~Ty(); }); } // The context associated with the queue of deferred functions. This serves as // our cookie object while executing this on the GPU. struct DeferredContext { IOContext ioCtx; DynamicArray commands; }; template bool EnqueueDeferred(FnTy &&fn, Cookie cookie, Args &&...args) { DeferredContext *ctx = reinterpret_cast(cookie); ctx->commands.emplace_back( MakeDeferred(fn, cookie, std::forward(args)...)); return true; } template rpc::RPCStatus HandleOpcodesImpl(rpc::Server::Port &port) { switch (port.get_opcode()) { case BeginExternalListOutput_Opcode: rpc::invoke(port, [](ExternalUnit unitNumber, const char *sourceFile, int sourceLine) -> Cookie { DeferredContext *ctx = new (AllocateMemoryOrCrash( Terminator{__FILE__, __LINE__}, sizeof(DeferredContext))) DeferredContext; ctx->commands.emplace_back( MakeDeferred(IONAME(BeginExternalListOutput), unitNumber, DeferredFunctionBase::TempString(sourceFile), sourceLine)); return reinterpret_cast(ctx); }); break; case BeginExternalFormattedOutput_Opcode: rpc::invoke(port, [](const char *format, std::size_t formatLength, const Descriptor *formatDescriptor, ExternalUnit unitNumber, const char *sourceFile, int sourceLine) -> Cookie { Terminator terminator{__FILE__, __LINE__}; if (formatDescriptor) terminator.Crash("Non-trivial format descriptors are unsupported"); DeferredContext *ctx = new (AllocateMemoryOrCrash(terminator, sizeof(DeferredContext))) DeferredContext; ctx->commands.emplace_back( MakeDeferred(IONAME(BeginExternalFormattedOutput), DeferredFunctionBase::TempString(format, formatLength), formatLength, formatDescriptor, unitNumber, DeferredFunctionBase::TempString(sourceFile), sourceLine)); return reinterpret_cast(ctx); }); break; case EnableHandlers_Opcode: rpc::invoke(port, [](Cookie cookie, bool hasIoStat, bool hasErr, bool hasEnd, bool hasEor, bool hasIoMsg) -> void { EnqueueDeferred(IONAME(EnableHandlers), cookie, hasIoStat, hasErr, hasEnd, hasEor, hasIoMsg); }); break; case EndIoStatement_Opcode: rpc::invoke(port, [](Cookie cookie) -> Iostat { DeferredContext *ctx = reinterpret_cast(cookie); ctx->commands.emplace_back(MakeDeferred(IONAME(EndIoStatement), cookie)); for (auto &fn : ctx->commands) { fn.execute(ctx->ioCtx); } Iostat result = ctx->ioCtx.result; ctx->~DeferredContext(); FreeMemory(ctx); return result; }); break; case OutputInteger8_Opcode: rpc::invoke(port, [](Cookie cookie, std::int8_t n) -> bool { return EnqueueDeferred(IONAME(OutputInteger8), cookie, n); }); break; case OutputInteger16_Opcode: rpc::invoke(port, [](Cookie cookie, std::int16_t n) -> bool { return EnqueueDeferred(IONAME(OutputInteger16), cookie, n); }); break; case OutputInteger32_Opcode: rpc::invoke(port, [](Cookie cookie, std::int32_t n) -> bool { return EnqueueDeferred(IONAME(OutputInteger32), cookie, n); }); break; case OutputInteger64_Opcode: rpc::invoke(port, [](Cookie cookie, std::int64_t n) -> bool { return EnqueueDeferred(IONAME(OutputInteger64), cookie, n); }); break; #ifdef __SIZEOF_INT128__ case OutputInteger128_Opcode: rpc::invoke(port, [](Cookie cookie, common::int128_t n) -> bool { return EnqueueDeferred(IONAME(OutputInteger128), cookie, n); }); break; #endif case OutputReal32_Opcode: rpc::invoke(port, [](Cookie cookie, float x) -> bool { return EnqueueDeferred(IONAME(OutputReal32), cookie, x); }); break; case OutputReal64_Opcode: rpc::invoke(port, [](Cookie cookie, double x) -> bool { return EnqueueDeferred(IONAME(OutputReal64), cookie, x); }); break; case OutputComplex32_Opcode: rpc::invoke(port, [](Cookie cookie, float re, float im) -> bool { return EnqueueDeferred(IONAME(OutputComplex32), cookie, re, im); }); break; case OutputComplex64_Opcode: rpc::invoke( port, [](Cookie cookie, double re, double im) -> bool { return EnqueueDeferred(IONAME(OutputComplex64), cookie, re, im); }); break; case OutputAscii_Opcode: rpc::invoke( port, [](Cookie cookie, const char *x, std::size_t length) -> bool { return EnqueueDeferred(IONAME(OutputAscii), cookie, DeferredFunctionBase::TempString(x, length), length); }); break; case OutputCharacter_Opcode: rpc::invoke(port, [](Cookie cookie, const char *x, std::size_t length, int kind) -> bool { return EnqueueDeferred(IONAME(OutputCharacter), cookie, DeferredFunctionBase::TempString(x, length * kind), length, kind); }); break; case OutputLogical_Opcode: rpc::invoke(port, [](Cookie cookie, bool truth) -> bool { return EnqueueDeferred(IONAME(OutputLogical), cookie, truth); }); break; default: return rpc::RPC_UNHANDLED_OPCODE; } return rpc::RPC_SUCCESS; } } // namespace RT_EXT_API_GROUP_BEGIN std::uint32_t IODEF(HandleRPCOpcodes)(void *raw, std::uint32_t numLanes) { rpc::Server::Port &port = *reinterpret_cast(raw); switch (numLanes) { case 1: return HandleOpcodesImpl<1>(port); case 32: return HandleOpcodesImpl<32>(port); case 64: return HandleOpcodesImpl<64>(port); default: return rpc::RPC_ERROR; } } RT_EXT_API_GROUP_END } // namespace Fortran::runtime::io