diff --git a/flang-rt/include/flang-rt/runtime/lock.h b/flang-rt/include/flang-rt/runtime/lock.h index 7c8853424573..7147c3a6b940 100644 --- a/flang-rt/include/flang-rt/runtime/lock.h +++ b/flang-rt/include/flang-rt/runtime/lock.h @@ -16,7 +16,7 @@ // Avoid if possible to avoid introduction of C++ runtime // library dependence. -#ifndef _WIN32 +#if !defined(_WIN32) && !RT_GPU_TARGET #define USE_PTHREADS 1 #else #undef USE_PTHREADS diff --git a/flang-rt/include/flang-rt/runtime/tools.h b/flang-rt/include/flang-rt/runtime/tools.h index 1939c4d907be..a45c2ac98f2f 100644 --- a/flang-rt/include/flang-rt/runtime/tools.h +++ b/flang-rt/include/flang-rt/runtime/tools.h @@ -35,7 +35,7 @@ #define RT_PRETTY_FUNCTION __func__ #endif -#if defined(RT_DEVICE_COMPILATION) +#if defined(RT_DEVICE_COMPILATION) || RT_GPU_TARGET // Use the pseudo lock and pseudo file unit implementations // for the device. #define RT_USE_PSEUDO_LOCK 1 diff --git a/flang-rt/lib/runtime/CMakeLists.txt b/flang-rt/lib/runtime/CMakeLists.txt index 9fa8376e9b99..d18ce6caccaa 100644 --- a/flang-rt/lib/runtime/CMakeLists.txt +++ b/flang-rt/lib/runtime/CMakeLists.txt @@ -109,9 +109,11 @@ set(gpu_sources copy.cpp derived-api.cpp derived.cpp + descriptor.cpp dot-product.cpp edit-output.cpp extrema.cpp + environment.cpp findloc.cpp format.cpp inquiry.cpp @@ -127,6 +129,7 @@ set(gpu_sources product.cpp ragged.cpp stat.cpp + stop.cpp sum.cpp support.cpp terminator.cpp diff --git a/flang-rt/lib/runtime/descriptor.cpp b/flang-rt/lib/runtime/descriptor.cpp index 04bbb3877a0d..6c9e76afb117 100644 --- a/flang-rt/lib/runtime/descriptor.cpp +++ b/flang-rt/lib/runtime/descriptor.cpp @@ -8,10 +8,10 @@ #include "flang-rt/runtime/descriptor.h" #include "ISO_Fortran_util.h" -#include "memory.h" #include "flang-rt/runtime/allocator-registry.h" #include "flang-rt/runtime/derived.h" #include "flang-rt/runtime/environment.h" +#include "flang-rt/runtime/memory.h" #include "flang-rt/runtime/stat.h" #include "flang-rt/runtime/terminator.h" #include "flang-rt/runtime/type-info.h" diff --git a/flang-rt/lib/runtime/environment.cpp b/flang-rt/lib/runtime/environment.cpp index ae4d6d305f40..53e13cd929bf 100644 --- a/flang-rt/lib/runtime/environment.cpp +++ b/flang-rt/lib/runtime/environment.cpp @@ -8,7 +8,7 @@ #include "flang-rt/runtime/environment.h" #include "environment-default-list.h" -#include "memory.h" +#include "flang-rt/runtime/memory.h" #include "flang-rt/runtime/tools.h" #include #include @@ -19,10 +19,11 @@ #ifdef _MSC_VER extern char **_environ; #endif -#elif defined(__FreeBSD__) +#elif defined(__FreeBSD__) || RT_GPU_TARGET // FreeBSD has environ in crt rather than libc. Using "extern char** environ" // in the code of a shared library makes it fail to link with -Wl,--no-undefined // See https://reviews.freebsd.org/D30842#840642 +// GPU targets do not provide environ. #else extern char **environ; #endif @@ -51,6 +52,8 @@ static void (*PostConfigEnvCallback[ExecutionEnvironment::nConfigEnvCallback])( int, const char *[], const char *[], const EnvironmentDefaultList *){ nullptr}; +// No environment support on the GPU. +#if !RT_GPU_TARGET static void SetEnvironmentDefaults(const EnvironmentDefaultList *envDefaults) { if (!envDefaults) { return; @@ -314,6 +317,7 @@ std::int32_t ExecutionEnvironment::UnsetEnv( return status; } +#endif extern "C" { diff --git a/flang-rt/lib/runtime/stop.cpp b/flang-rt/lib/runtime/stop.cpp index 75fa64c4c003..5abb80af7e66 100644 --- a/flang-rt/lib/runtime/stop.cpp +++ b/flang-rt/lib/runtime/stop.cpp @@ -24,10 +24,14 @@ extern "C" { [[maybe_unused]] static void DescribeIEEESignaledExceptions() { +#if defined(RT_DEVICE_COMPILATION) || RT_GPU_TARGET + unsigned excepts{}; // No fenv support on the device. +#else #ifdef fetestexcept // a macro in some environments; omit std:: auto excepts{fetestexcept(FE_ALL_EXCEPT)}; #else auto excepts{std::fetestexcept(FE_ALL_EXCEPT)}; +#endif #endif if (excepts) { std::fputs("IEEE arithmetic exceptions signaled:", stderr); @@ -61,8 +65,10 @@ extern "C" { } static void CloseAllExternalUnits(const char *why) { +#if !RT_GPU_TARGET Fortran::runtime::io::IoErrorHandler handler{why}; Fortran::runtime::io::ExternalFileUnit::CloseAll(handler); +#endif } [[noreturn]] RT_API_ATTRS void RTNAME(StopStatement)( @@ -134,6 +140,7 @@ static void CloseAllExternalUnits(const char *why) { #endif } +#if !RT_GPU_TARGET static bool StartPause() { if (Fortran::runtime::io::IsATerminal(0)) { Fortran::runtime::io::IoErrorHandler handler{"PAUSE statement"}; @@ -173,6 +180,7 @@ void RTNAME(PauseStatementText)(const char *code, std::size_t length) { EndPause(); } } +#endif [[noreturn]] void RTNAME(FailImageStatement)() { CloseAllExternalUnits("FAIL IMAGE statement"); diff --git a/flang-rt/lib/runtime/terminator.cpp b/flang-rt/lib/runtime/terminator.cpp index e8d64223919e..2c06c8de74d0 100644 --- a/flang-rt/lib/runtime/terminator.cpp +++ b/flang-rt/lib/runtime/terminator.cpp @@ -70,8 +70,11 @@ RT_API_ATTRS void Terminator::CrashHeader() const { std::printf("\n"); #else fputc('\n', stderr); + // TODO: This should flush the buffers through the RPC interface. +#if !RT_GPU_TARGET // FIXME: re-enable the flush along with the IO enabling. io::FlushOutputOnCrash(*this); +#endif #endif NotifyOtherImagesOfErrorTermination(EXIT_FAILURE); #if defined(RT_DEVICE_COMPILATION) diff --git a/flang/include/flang/Common/api-attrs.h b/flang/include/flang/Common/api-attrs.h index fd524ee34ccf..efb495e7f132 100644 --- a/flang/include/flang/Common/api-attrs.h +++ b/flang/include/flang/Common/api-attrs.h @@ -133,6 +133,18 @@ #undef RT_DEVICE_COMPILATION #endif +/* + * RT_GPU_TARGET is defined when compiling natively for a GPU + * target (AMDGPU or NVPTX) using a GPU-hosted libc/libc++. This is + * distinct from RT_DEVICE_COMPILATION which covers CUDA and OpenMP + * offload paths that use separate host/device compilation. + */ +#if defined(__AMDGPU__) || defined(__NVPTX__) +#define RT_GPU_TARGET 1 +#else +#undef RT_GPU_TARGET +#endif + /* * Recurrence in the call graph prevents computing minimal stack size * required for a kernel execution. This macro can be used to disable diff --git a/offload/test/offloading/fortran/target-descriptor-ops.f90 b/offload/test/offloading/fortran/target-descriptor-ops.f90 new file mode 100644 index 000000000000..43dae03b3399 --- /dev/null +++ b/offload/test/offloading/fortran/target-descriptor-ops.f90 @@ -0,0 +1,50 @@ +! REQUIRES: flang, amdgpu + +! RUN: %libomptarget-compile-fortran-run-and-check-generic +program main + implicit none + integer :: result + + ! CHECK: 100 + result = 0 + !$omp target map(from: result) + block + integer, allocatable :: arr(:) + integer :: i + allocate(arr(4)) + do i = 1, 4 + arr(i) = i * 10 + end do + result = arr(1) + arr(2) + arr(3) + arr(4) + deallocate(arr) + end block + !$omp end target + print *, result + + ! CHECK: 21 + result = 0 + !$omp target map(from: result) + block + integer, allocatable :: mat(:,:) + allocate(mat(2, 3)) + mat(1,1) = 1; mat(2,1) = 2 + mat(1,2) = 3; mat(2,2) = 4 + mat(1,3) = 5; mat(2,3) = 6 + result = mat(1,1) + mat(2,1) + mat(1,2) + mat(2,2) + mat(1,3) + mat(2,3) + deallocate(mat) + end block + !$omp end target + print *, result + + ! CHECK: 17 + result = 0 + !$omp target map(from: result) + block + integer, allocatable :: arr(:) + allocate(arr(8)) + result = size(arr) + lbound(arr, 1) + ubound(arr, 1) + deallocate(arr) + end block + !$omp end target + print *, result +end program main