[flang-rt] Enable more runtime functions for the GPU target (#183649)
Summary: This enables primarily `stop.cpp` and `descriptor.cpp`. Requires a little bit of wrangling to get it to compile. Unlike the CUDA build, this build uses an in-tree libc++ configured for the GPU. This is configured without thread support, environment, or filesystem, and it is not POSIX at all. So, no mutexes, pthreads, or get/setenv. I tested stop, but i don't know if it's actually legal to exit from OpenMP offloading.
This commit is contained in:
parent
67a51ea34d
commit
c49460bae7
@ -16,7 +16,7 @@
|
||||
|
||||
// Avoid <mutex> if possible to avoid introduction of C++ runtime
|
||||
// library dependence.
|
||||
#ifndef _WIN32
|
||||
#if !defined(_WIN32) && !RT_GPU_TARGET
|
||||
#define USE_PTHREADS 1
|
||||
#else
|
||||
#undef USE_PTHREADS
|
||||
|
||||
@ -35,7 +35,7 @@
|
||||
#define RT_PRETTY_FUNCTION __func__
|
||||
#endif
|
||||
|
||||
#if defined(RT_DEVICE_COMPILATION)
|
||||
#if defined(RT_DEVICE_COMPILATION) || RT_GPU_TARGET
|
||||
// Use the pseudo lock and pseudo file unit implementations
|
||||
// for the device.
|
||||
#define RT_USE_PSEUDO_LOCK 1
|
||||
|
||||
@ -109,9 +109,11 @@ set(gpu_sources
|
||||
copy.cpp
|
||||
derived-api.cpp
|
||||
derived.cpp
|
||||
descriptor.cpp
|
||||
dot-product.cpp
|
||||
edit-output.cpp
|
||||
extrema.cpp
|
||||
environment.cpp
|
||||
findloc.cpp
|
||||
format.cpp
|
||||
inquiry.cpp
|
||||
@ -127,6 +129,7 @@ set(gpu_sources
|
||||
product.cpp
|
||||
ragged.cpp
|
||||
stat.cpp
|
||||
stop.cpp
|
||||
sum.cpp
|
||||
support.cpp
|
||||
terminator.cpp
|
||||
|
||||
@ -8,10 +8,10 @@
|
||||
|
||||
#include "flang-rt/runtime/descriptor.h"
|
||||
#include "ISO_Fortran_util.h"
|
||||
#include "memory.h"
|
||||
#include "flang-rt/runtime/allocator-registry.h"
|
||||
#include "flang-rt/runtime/derived.h"
|
||||
#include "flang-rt/runtime/environment.h"
|
||||
#include "flang-rt/runtime/memory.h"
|
||||
#include "flang-rt/runtime/stat.h"
|
||||
#include "flang-rt/runtime/terminator.h"
|
||||
#include "flang-rt/runtime/type-info.h"
|
||||
|
||||
@ -8,7 +8,7 @@
|
||||
|
||||
#include "flang-rt/runtime/environment.h"
|
||||
#include "environment-default-list.h"
|
||||
#include "memory.h"
|
||||
#include "flang-rt/runtime/memory.h"
|
||||
#include "flang-rt/runtime/tools.h"
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
@ -19,10 +19,11 @@
|
||||
#ifdef _MSC_VER
|
||||
extern char **_environ;
|
||||
#endif
|
||||
#elif defined(__FreeBSD__)
|
||||
#elif defined(__FreeBSD__) || RT_GPU_TARGET
|
||||
// FreeBSD has environ in crt rather than libc. Using "extern char** environ"
|
||||
// in the code of a shared library makes it fail to link with -Wl,--no-undefined
|
||||
// See https://reviews.freebsd.org/D30842#840642
|
||||
// GPU targets do not provide environ.
|
||||
#else
|
||||
extern char **environ;
|
||||
#endif
|
||||
@ -51,6 +52,8 @@ static void (*PostConfigEnvCallback[ExecutionEnvironment::nConfigEnvCallback])(
|
||||
int, const char *[], const char *[], const EnvironmentDefaultList *){
|
||||
nullptr};
|
||||
|
||||
// No environment support on the GPU.
|
||||
#if !RT_GPU_TARGET
|
||||
static void SetEnvironmentDefaults(const EnvironmentDefaultList *envDefaults) {
|
||||
if (!envDefaults) {
|
||||
return;
|
||||
@ -314,6 +317,7 @@ std::int32_t ExecutionEnvironment::UnsetEnv(
|
||||
|
||||
return status;
|
||||
}
|
||||
#endif
|
||||
|
||||
extern "C" {
|
||||
|
||||
|
||||
@ -24,10 +24,14 @@
|
||||
extern "C" {
|
||||
|
||||
[[maybe_unused]] static void DescribeIEEESignaledExceptions() {
|
||||
#if defined(RT_DEVICE_COMPILATION) || RT_GPU_TARGET
|
||||
unsigned excepts{}; // No fenv support on the device.
|
||||
#else
|
||||
#ifdef fetestexcept // a macro in some environments; omit std::
|
||||
auto excepts{fetestexcept(FE_ALL_EXCEPT)};
|
||||
#else
|
||||
auto excepts{std::fetestexcept(FE_ALL_EXCEPT)};
|
||||
#endif
|
||||
#endif
|
||||
if (excepts) {
|
||||
std::fputs("IEEE arithmetic exceptions signaled:", stderr);
|
||||
@ -61,8 +65,10 @@ extern "C" {
|
||||
}
|
||||
|
||||
static void CloseAllExternalUnits(const char *why) {
|
||||
#if !RT_GPU_TARGET
|
||||
Fortran::runtime::io::IoErrorHandler handler{why};
|
||||
Fortran::runtime::io::ExternalFileUnit::CloseAll(handler);
|
||||
#endif
|
||||
}
|
||||
|
||||
[[noreturn]] RT_API_ATTRS void RTNAME(StopStatement)(
|
||||
@ -134,6 +140,7 @@ static void CloseAllExternalUnits(const char *why) {
|
||||
#endif
|
||||
}
|
||||
|
||||
#if !RT_GPU_TARGET
|
||||
static bool StartPause() {
|
||||
if (Fortran::runtime::io::IsATerminal(0)) {
|
||||
Fortran::runtime::io::IoErrorHandler handler{"PAUSE statement"};
|
||||
@ -173,6 +180,7 @@ void RTNAME(PauseStatementText)(const char *code, std::size_t length) {
|
||||
EndPause();
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
[[noreturn]] void RTNAME(FailImageStatement)() {
|
||||
CloseAllExternalUnits("FAIL IMAGE statement");
|
||||
|
||||
@ -70,8 +70,11 @@ RT_API_ATTRS void Terminator::CrashHeader() const {
|
||||
std::printf("\n");
|
||||
#else
|
||||
fputc('\n', stderr);
|
||||
// TODO: This should flush the buffers through the RPC interface.
|
||||
#if !RT_GPU_TARGET
|
||||
// FIXME: re-enable the flush along with the IO enabling.
|
||||
io::FlushOutputOnCrash(*this);
|
||||
#endif
|
||||
#endif
|
||||
NotifyOtherImagesOfErrorTermination(EXIT_FAILURE);
|
||||
#if defined(RT_DEVICE_COMPILATION)
|
||||
|
||||
@ -133,6 +133,18 @@
|
||||
#undef RT_DEVICE_COMPILATION
|
||||
#endif
|
||||
|
||||
/*
|
||||
* RT_GPU_TARGET is defined when compiling natively for a GPU
|
||||
* target (AMDGPU or NVPTX) using a GPU-hosted libc/libc++. This is
|
||||
* distinct from RT_DEVICE_COMPILATION which covers CUDA and OpenMP
|
||||
* offload paths that use separate host/device compilation.
|
||||
*/
|
||||
#if defined(__AMDGPU__) || defined(__NVPTX__)
|
||||
#define RT_GPU_TARGET 1
|
||||
#else
|
||||
#undef RT_GPU_TARGET
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Recurrence in the call graph prevents computing minimal stack size
|
||||
* required for a kernel execution. This macro can be used to disable
|
||||
|
||||
50
offload/test/offloading/fortran/target-descriptor-ops.f90
Normal file
50
offload/test/offloading/fortran/target-descriptor-ops.f90
Normal file
@ -0,0 +1,50 @@
|
||||
! REQUIRES: flang, amdgpu
|
||||
|
||||
! RUN: %libomptarget-compile-fortran-run-and-check-generic
|
||||
program main
|
||||
implicit none
|
||||
integer :: result
|
||||
|
||||
! CHECK: 100
|
||||
result = 0
|
||||
!$omp target map(from: result)
|
||||
block
|
||||
integer, allocatable :: arr(:)
|
||||
integer :: i
|
||||
allocate(arr(4))
|
||||
do i = 1, 4
|
||||
arr(i) = i * 10
|
||||
end do
|
||||
result = arr(1) + arr(2) + arr(3) + arr(4)
|
||||
deallocate(arr)
|
||||
end block
|
||||
!$omp end target
|
||||
print *, result
|
||||
|
||||
! CHECK: 21
|
||||
result = 0
|
||||
!$omp target map(from: result)
|
||||
block
|
||||
integer, allocatable :: mat(:,:)
|
||||
allocate(mat(2, 3))
|
||||
mat(1,1) = 1; mat(2,1) = 2
|
||||
mat(1,2) = 3; mat(2,2) = 4
|
||||
mat(1,3) = 5; mat(2,3) = 6
|
||||
result = mat(1,1) + mat(2,1) + mat(1,2) + mat(2,2) + mat(1,3) + mat(2,3)
|
||||
deallocate(mat)
|
||||
end block
|
||||
!$omp end target
|
||||
print *, result
|
||||
|
||||
! CHECK: 17
|
||||
result = 0
|
||||
!$omp target map(from: result)
|
||||
block
|
||||
integer, allocatable :: arr(:)
|
||||
allocate(arr(8))
|
||||
result = size(arr) + lbound(arr, 1) + ubound(arr, 1)
|
||||
deallocate(arr)
|
||||
end block
|
||||
!$omp end target
|
||||
print *, result
|
||||
end program main
|
||||
Loading…
x
Reference in New Issue
Block a user