[flang-rt] Enable more runtime functions for the GPU target (#183649)

Summary:
This enables primarily `stop.cpp` and `descriptor.cpp`. Requires a
little bit of wrangling to get it to compile. Unlike the CUDA build,
this build uses an in-tree libc++ configured for the GPU. This is
configured without thread support, environment, or filesystem, and it is
not POSIX at all. So, no mutexes, pthreads, or get/setenv.

I tested stop, but i don't know if it's actually legal to exit from
OpenMP offloading.
This commit is contained in:
Joseph Huber 2026-02-27 12:27:39 -06:00 committed by GitHub
parent 67a51ea34d
commit c49460bae7
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
9 changed files with 85 additions and 5 deletions

View File

@ -16,7 +16,7 @@
// Avoid <mutex> if possible to avoid introduction of C++ runtime
// library dependence.
#ifndef _WIN32
#if !defined(_WIN32) && !RT_GPU_TARGET
#define USE_PTHREADS 1
#else
#undef USE_PTHREADS

View File

@ -35,7 +35,7 @@
#define RT_PRETTY_FUNCTION __func__
#endif
#if defined(RT_DEVICE_COMPILATION)
#if defined(RT_DEVICE_COMPILATION) || RT_GPU_TARGET
// Use the pseudo lock and pseudo file unit implementations
// for the device.
#define RT_USE_PSEUDO_LOCK 1

View File

@ -109,9 +109,11 @@ set(gpu_sources
copy.cpp
derived-api.cpp
derived.cpp
descriptor.cpp
dot-product.cpp
edit-output.cpp
extrema.cpp
environment.cpp
findloc.cpp
format.cpp
inquiry.cpp
@ -127,6 +129,7 @@ set(gpu_sources
product.cpp
ragged.cpp
stat.cpp
stop.cpp
sum.cpp
support.cpp
terminator.cpp

View File

@ -8,10 +8,10 @@
#include "flang-rt/runtime/descriptor.h"
#include "ISO_Fortran_util.h"
#include "memory.h"
#include "flang-rt/runtime/allocator-registry.h"
#include "flang-rt/runtime/derived.h"
#include "flang-rt/runtime/environment.h"
#include "flang-rt/runtime/memory.h"
#include "flang-rt/runtime/stat.h"
#include "flang-rt/runtime/terminator.h"
#include "flang-rt/runtime/type-info.h"

View File

@ -8,7 +8,7 @@
#include "flang-rt/runtime/environment.h"
#include "environment-default-list.h"
#include "memory.h"
#include "flang-rt/runtime/memory.h"
#include "flang-rt/runtime/tools.h"
#include <cstdio>
#include <cstdlib>
@ -19,10 +19,11 @@
#ifdef _MSC_VER
extern char **_environ;
#endif
#elif defined(__FreeBSD__)
#elif defined(__FreeBSD__) || RT_GPU_TARGET
// FreeBSD has environ in crt rather than libc. Using "extern char** environ"
// in the code of a shared library makes it fail to link with -Wl,--no-undefined
// See https://reviews.freebsd.org/D30842#840642
// GPU targets do not provide environ.
#else
extern char **environ;
#endif
@ -51,6 +52,8 @@ static void (*PostConfigEnvCallback[ExecutionEnvironment::nConfigEnvCallback])(
int, const char *[], const char *[], const EnvironmentDefaultList *){
nullptr};
// No environment support on the GPU.
#if !RT_GPU_TARGET
static void SetEnvironmentDefaults(const EnvironmentDefaultList *envDefaults) {
if (!envDefaults) {
return;
@ -314,6 +317,7 @@ std::int32_t ExecutionEnvironment::UnsetEnv(
return status;
}
#endif
extern "C" {

View File

@ -24,10 +24,14 @@
extern "C" {
[[maybe_unused]] static void DescribeIEEESignaledExceptions() {
#if defined(RT_DEVICE_COMPILATION) || RT_GPU_TARGET
unsigned excepts{}; // No fenv support on the device.
#else
#ifdef fetestexcept // a macro in some environments; omit std::
auto excepts{fetestexcept(FE_ALL_EXCEPT)};
#else
auto excepts{std::fetestexcept(FE_ALL_EXCEPT)};
#endif
#endif
if (excepts) {
std::fputs("IEEE arithmetic exceptions signaled:", stderr);
@ -61,8 +65,10 @@ extern "C" {
}
static void CloseAllExternalUnits(const char *why) {
#if !RT_GPU_TARGET
Fortran::runtime::io::IoErrorHandler handler{why};
Fortran::runtime::io::ExternalFileUnit::CloseAll(handler);
#endif
}
[[noreturn]] RT_API_ATTRS void RTNAME(StopStatement)(
@ -134,6 +140,7 @@ static void CloseAllExternalUnits(const char *why) {
#endif
}
#if !RT_GPU_TARGET
static bool StartPause() {
if (Fortran::runtime::io::IsATerminal(0)) {
Fortran::runtime::io::IoErrorHandler handler{"PAUSE statement"};
@ -173,6 +180,7 @@ void RTNAME(PauseStatementText)(const char *code, std::size_t length) {
EndPause();
}
}
#endif
[[noreturn]] void RTNAME(FailImageStatement)() {
CloseAllExternalUnits("FAIL IMAGE statement");

View File

@ -70,8 +70,11 @@ RT_API_ATTRS void Terminator::CrashHeader() const {
std::printf("\n");
#else
fputc('\n', stderr);
// TODO: This should flush the buffers through the RPC interface.
#if !RT_GPU_TARGET
// FIXME: re-enable the flush along with the IO enabling.
io::FlushOutputOnCrash(*this);
#endif
#endif
NotifyOtherImagesOfErrorTermination(EXIT_FAILURE);
#if defined(RT_DEVICE_COMPILATION)

View File

@ -133,6 +133,18 @@
#undef RT_DEVICE_COMPILATION
#endif
/*
* RT_GPU_TARGET is defined when compiling natively for a GPU
* target (AMDGPU or NVPTX) using a GPU-hosted libc/libc++. This is
* distinct from RT_DEVICE_COMPILATION which covers CUDA and OpenMP
* offload paths that use separate host/device compilation.
*/
#if defined(__AMDGPU__) || defined(__NVPTX__)
#define RT_GPU_TARGET 1
#else
#undef RT_GPU_TARGET
#endif
/*
* Recurrence in the call graph prevents computing minimal stack size
* required for a kernel execution. This macro can be used to disable

View File

@ -0,0 +1,50 @@
! REQUIRES: flang, amdgpu
! RUN: %libomptarget-compile-fortran-run-and-check-generic
program main
implicit none
integer :: result
! CHECK: 100
result = 0
!$omp target map(from: result)
block
integer, allocatable :: arr(:)
integer :: i
allocate(arr(4))
do i = 1, 4
arr(i) = i * 10
end do
result = arr(1) + arr(2) + arr(3) + arr(4)
deallocate(arr)
end block
!$omp end target
print *, result
! CHECK: 21
result = 0
!$omp target map(from: result)
block
integer, allocatable :: mat(:,:)
allocate(mat(2, 3))
mat(1,1) = 1; mat(2,1) = 2
mat(1,2) = 3; mat(2,2) = 4
mat(1,3) = 5; mat(2,3) = 6
result = mat(1,1) + mat(2,1) + mat(1,2) + mat(2,2) + mat(1,3) + mat(2,3)
deallocate(mat)
end block
!$omp end target
print *, result
! CHECK: 17
result = 0
!$omp target map(from: result)
block
integer, allocatable :: arr(:)
allocate(arr(8))
result = size(arr) + lbound(arr, 1) + ubound(arr, 1)
deallocate(arr)
end block
!$omp end target
print *, result
end program main