Currently when we generate OpenMP offloading code we always make fallback code for the CPU. This is necessary for implementing features like conditional offloading and ensuring that unhandled pragmas don't result in missing symbols. However, this is problematic for a few cases. For offloading tests we can silently fail to the host without realizing that offloading failed. Additionally, this makes it impossible to provide interoperabiility to other offloading schemes like HIP or CUDA because those methods do not provide any such host fallback guaruntee. this patch adds the `-fopenmp-offload-mandatory` flag to prevent generating the fallback symbol on the CPU and instead replaces the function with a dummy global and the failed branch with 'unreachable'. Reviewed By: ABataev Differential Revision: https://reviews.llvm.org/D120353
91 lines
4.2 KiB
C++
91 lines
4.2 KiB
C++
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+"
|
|
// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-offload-mandatory -emit-llvm %s -o - | FileCheck %s --check-prefix=MANDATORY
|
|
// expected-no-diagnostics
|
|
|
|
void foo() {}
|
|
#pragma omp declare target(foo)
|
|
|
|
void bar() {}
|
|
#pragma omp declare target device_type(nohost) to(bar)
|
|
|
|
void host() {
|
|
#pragma omp target
|
|
{ bar(); }
|
|
}
|
|
|
|
void host_if(bool cond) {
|
|
#pragma omp target if(cond)
|
|
{ bar(); }
|
|
}
|
|
|
|
void host_dev(int device) {
|
|
#pragma omp target device(device)
|
|
{ bar(); }
|
|
}
|
|
// MANDATORY-LABEL: define {{[^@]+}}@_Z3foov
|
|
// MANDATORY-SAME: () #[[ATTR0:[0-9]+]] {
|
|
// MANDATORY-NEXT: entry:
|
|
// MANDATORY-NEXT: ret void
|
|
//
|
|
//
|
|
// MANDATORY-LABEL: define {{[^@]+}}@_Z4hostv
|
|
// MANDATORY-SAME: () #[[ATTR0]] {
|
|
// MANDATORY-NEXT: entry:
|
|
// MANDATORY-NEXT: [[TMP0:%.*]] = call i32 @__tgt_target_mapper(%struct.ident_t* @[[GLOB1:[0-9]+]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4hostv_l12.region_id, i32 0, i8** null, i8** null, i64* null, i64* null, i8** null, i8** null)
|
|
// MANDATORY-NEXT: [[TMP1:%.*]] = icmp ne i32 [[TMP0]], 0
|
|
// MANDATORY-NEXT: br i1 [[TMP1]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
|
|
// MANDATORY: omp_offload.failed:
|
|
// MANDATORY-NEXT: unreachable
|
|
// MANDATORY: omp_offload.cont:
|
|
// MANDATORY-NEXT: ret void
|
|
//
|
|
//
|
|
// MANDATORY-LABEL: define {{[^@]+}}@_Z7host_ifb
|
|
// MANDATORY-SAME: (i1 noundef zeroext [[COND:%.*]]) #[[ATTR0]] {
|
|
// MANDATORY-NEXT: entry:
|
|
// MANDATORY-NEXT: [[COND_ADDR:%.*]] = alloca i8, align 1
|
|
// MANDATORY-NEXT: [[FROMBOOL:%.*]] = zext i1 [[COND]] to i8
|
|
// MANDATORY-NEXT: store i8 [[FROMBOOL]], i8* [[COND_ADDR]], align 1
|
|
// MANDATORY-NEXT: [[TMP0:%.*]] = load i8, i8* [[COND_ADDR]], align 1
|
|
// MANDATORY-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP0]] to i1
|
|
// MANDATORY-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
|
|
// MANDATORY: omp_if.then:
|
|
// MANDATORY-NEXT: [[TMP1:%.*]] = call i32 @__tgt_target_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z7host_ifb_l17.region_id, i32 0, i8** null, i8** null, i64* null, i64* null, i8** null, i8** null)
|
|
// MANDATORY-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
|
|
// MANDATORY-NEXT: br i1 [[TMP2]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
|
|
// MANDATORY: omp_offload.failed:
|
|
// MANDATORY-NEXT: unreachable
|
|
// MANDATORY: omp_offload.cont:
|
|
// MANDATORY-NEXT: br label [[OMP_IF_END:%.*]]
|
|
// MANDATORY: omp_if.else:
|
|
// MANDATORY-NEXT: unreachable
|
|
// MANDATORY: omp_if.end:
|
|
// MANDATORY-NEXT: ret void
|
|
//
|
|
//
|
|
// MANDATORY-LABEL: define {{[^@]+}}@_Z8host_devi
|
|
// MANDATORY-SAME: (i32 noundef signext [[DEVICE:%.*]]) #[[ATTR0]] {
|
|
// MANDATORY-NEXT: entry:
|
|
// MANDATORY-NEXT: [[DEVICE_ADDR:%.*]] = alloca i32, align 4
|
|
// MANDATORY-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
|
|
// MANDATORY-NEXT: store i32 [[DEVICE]], i32* [[DEVICE_ADDR]], align 4
|
|
// MANDATORY-NEXT: [[TMP0:%.*]] = load i32, i32* [[DEVICE_ADDR]], align 4
|
|
// MANDATORY-NEXT: store i32 [[TMP0]], i32* [[DOTCAPTURE_EXPR_]], align 4
|
|
// MANDATORY-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
|
|
// MANDATORY-NEXT: [[TMP2:%.*]] = sext i32 [[TMP1]] to i64
|
|
// MANDATORY-NEXT: [[TMP3:%.*]] = call i32 @__tgt_target_mapper(%struct.ident_t* @[[GLOB1]], i64 [[TMP2]], i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z8host_devi_l22.region_id, i32 0, i8** null, i8** null, i64* null, i64* null, i8** null, i8** null)
|
|
// MANDATORY-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0
|
|
// MANDATORY-NEXT: br i1 [[TMP4]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
|
|
// MANDATORY: omp_offload.failed:
|
|
// MANDATORY-NEXT: unreachable
|
|
// MANDATORY: omp_offload.cont:
|
|
// MANDATORY-NEXT: ret void
|
|
//
|
|
//
|
|
// MANDATORY-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
|
|
// MANDATORY-SAME: () #[[ATTR3:[0-9]+]] {
|
|
// MANDATORY-NEXT: entry:
|
|
// MANDATORY-NEXT: call void @__tgt_register_requires(i64 1)
|
|
// MANDATORY-NEXT: ret void
|
|
//
|