[openmp] Add support for arm64ec to libomp (#176151)
This patch adds arm64ec support to libomp. Note that this support isn't entirely usable on Windows hosts as libomp requires LLVM_PER_TARGET_RUNTIME_DIR=On for to work correctly when multiple runtimes are built, which is unsupported on Windows. A following patch will add arm64x support to the build to rectify this.
This commit is contained in:
parent
f14b865ff5
commit
161c53f66e
@ -105,6 +105,8 @@ function(libomp_get_legal_arch return_arch_string)
|
||||
set(${return_arch_string} "AARCH64_32" PARENT_SCOPE)
|
||||
elseif(AARCH64_A64FX)
|
||||
set(${return_arch_string} "AARCH64_A64FX" PARENT_SCOPE)
|
||||
elseif(ARM64EC)
|
||||
set(${return_arch_string} "ARM64EC" PARENT_SCOPE)
|
||||
elseif(MIPS)
|
||||
set(${return_arch_string} "MIPS" PARENT_SCOPE)
|
||||
elseif(MIPS64)
|
||||
|
||||
@ -42,6 +42,8 @@ elseif(LIBOMP_NATIVE_ARCH MATCHES "aarch64_32")
|
||||
set(LIBOMP_ARCH aarch64_32)
|
||||
elseif(LIBOMP_NATIVE_ARCH MATCHES "aarch64")
|
||||
set(LIBOMP_ARCH aarch64)
|
||||
elseif(LIBOMP_NATIVE_ARCH MATCHES "arm64ec")
|
||||
set(LIBOMP_ARCH arm64ec)
|
||||
elseif(LIBOMP_NATIVE_ARCH MATCHES "arm64")
|
||||
set(LIBOMP_ARCH aarch64)
|
||||
elseif(LIBOMP_NATIVE_ARCH MATCHES "arm")
|
||||
@ -79,7 +81,7 @@ if(LIBOMP_ARCH STREQUAL "aarch64")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
libomp_check_variable(LIBOMP_ARCH 32e x86_64 32 i386 arm ppc ppc64 ppc64le aarch64 aarch64_32 aarch64_a64fx mic mips mips64 riscv64 loongarch64 ve s390x sparc sparcv9 wasm32)
|
||||
libomp_check_variable(LIBOMP_ARCH 32e x86_64 32 i386 arm ppc ppc64 ppc64le aarch64 aarch64_32 aarch64_a64fx arm64ec mic mips mips64 riscv64 loongarch64 ve s390x sparc sparcv9 wasm32)
|
||||
|
||||
set(LIBOMP_LIB_TYPE normal CACHE STRING
|
||||
"Performance,Profiling,Stubs library (normal/profile/stubs)")
|
||||
@ -198,6 +200,8 @@ elseif("${LIBOMP_ARCH}" STREQUAL "aarch64_32") # AARCH64_32 architecture
|
||||
set(AARCH64_32 TRUE)
|
||||
elseif("${LIBOMP_ARCH}" STREQUAL "aarch64_a64fx") # AARCH64_A64FX architecture
|
||||
set(AARCH64_A64FX TRUE)
|
||||
elseif("${LIBOMP_ARCH}" STREQUAL "arm64ec") # ARM64EC architecture
|
||||
set(ARM64EC TRUE)
|
||||
elseif("${LIBOMP_ARCH}" STREQUAL "mic") # Intel(R) Many Integrated Core Architecture
|
||||
set(MIC TRUE)
|
||||
elseif("${LIBOMP_ARCH}" STREQUAL "mips") # MIPS architecture
|
||||
|
||||
@ -17,6 +17,9 @@ function(libomp_get_architecture return_arch)
|
||||
set(detect_arch_src_txt "
|
||||
#if defined(__KNC__)
|
||||
#error ARCHITECTURE=mic
|
||||
// arm64ec also defines _M_AMD64 so this needs to be checked before that
|
||||
#elif defined(_M_ARM64EC) || defined(__arm64ec__)
|
||||
#error ARCHITECTURE=arm64ec
|
||||
#elif defined(__amd64__) || defined(__amd64) || defined(__x86_64__) || defined(__x86_64) || defined(_M_X64) || defined(_M_AMD64)
|
||||
#error ARCHITECTURE=x86_64
|
||||
#elif defined(__i386) || defined(__i386__) || defined(__IA32__) || defined(_M_I86) || defined(_M_IX86) || defined(__X86__) || defined(_X86_)
|
||||
|
||||
@ -86,6 +86,7 @@ else()
|
||||
kmp_csupport.cpp
|
||||
kmp_debug.cpp
|
||||
kmp_itt.cpp
|
||||
kmp_invoke_microtask.cpp
|
||||
kmp_environment.cpp
|
||||
kmp_error.cpp
|
||||
kmp_global.cpp
|
||||
|
||||
@ -3902,7 +3902,8 @@ extern void __kmp_check_stack_overlap(kmp_info_t *thr);
|
||||
extern void __kmp_expand_host_name(char *buffer, size_t size);
|
||||
extern void __kmp_expand_file_name(char *result, size_t rlen, char *pattern);
|
||||
|
||||
#if KMP_ARCH_X86 || KMP_ARCH_X86_64 || (KMP_OS_WINDOWS && (KMP_ARCH_AARCH64 || KMP_ARCH_ARM))
|
||||
#if KMP_ARCH_X86 || KMP_ARCH_X86_64 || \
|
||||
(KMP_OS_WINDOWS && (KMP_ARCH_AARCH64 || KMP_ARCH_ARM || KMP_ARCH_ARM64EC))
|
||||
extern void
|
||||
__kmp_initialize_system_tick(void); /* Initialize timer tick value */
|
||||
#endif
|
||||
|
||||
@ -832,7 +832,7 @@ static inline kmp_cmplx128_a16_t operator/(kmp_cmplx128_a16_t &lhs,
|
||||
// end of the first part of the workaround for C78287
|
||||
#endif // USE_CMPXCHG_FIX
|
||||
|
||||
#if KMP_OS_WINDOWS && (KMP_ARCH_AARCH64 || KMP_ARCH_ARM)
|
||||
#if KMP_OS_WINDOWS && (KMP_ARCH_AARCH64 || KMP_ARCH_ARM || KMP_ARCH_ARM64EC)
|
||||
// Undo explicit type casts to get MSVC ARM64 to build. Uses
|
||||
// OP_CMPXCHG_WORKAROUND definition for OP_CMPXCHG
|
||||
#undef OP_CMPXCHG
|
||||
|
||||
140
openmp/runtime/src/kmp_invoke_microtask.cpp
Normal file
140
openmp/runtime/src/kmp_invoke_microtask.cpp
Normal file
@ -0,0 +1,140 @@
|
||||
#include "kmp.h"
|
||||
|
||||
#if !(KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_MIC || KMP_ARCH_AARCH64 || \
|
||||
KMP_ARCH_PPC64 || KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64 || \
|
||||
KMP_ARCH_ARM || KMP_ARCH_VE || KMP_ARCH_S390X || KMP_ARCH_PPC_XCOFF || \
|
||||
KMP_ARCH_AARCH64_32)
|
||||
|
||||
// Because WebAssembly will use `call_indirect` to invoke the microtask and
|
||||
// WebAssembly indirect calls check that the called signature is a precise
|
||||
// match, we need to cast each microtask function pointer back from `void *` to
|
||||
// its original type.
|
||||
typedef void (*microtask_t0)(int *, int *);
|
||||
typedef void (*microtask_t1)(int *, int *, void *);
|
||||
typedef void (*microtask_t2)(int *, int *, void *, void *);
|
||||
typedef void (*microtask_t3)(int *, int *, void *, void *, void *);
|
||||
typedef void (*microtask_t4)(int *, int *, void *, void *, void *, void *);
|
||||
typedef void (*microtask_t5)(int *, int *, void *, void *, void *, void *,
|
||||
void *);
|
||||
typedef void (*microtask_t6)(int *, int *, void *, void *, void *, void *,
|
||||
void *, void *);
|
||||
typedef void (*microtask_t7)(int *, int *, void *, void *, void *, void *,
|
||||
void *, void *, void *);
|
||||
typedef void (*microtask_t8)(int *, int *, void *, void *, void *, void *,
|
||||
void *, void *, void *, void *);
|
||||
typedef void (*microtask_t9)(int *, int *, void *, void *, void *, void *,
|
||||
void *, void *, void *, void *, void *);
|
||||
typedef void (*microtask_t10)(int *, int *, void *, void *, void *, void *,
|
||||
void *, void *, void *, void *, void *, void *);
|
||||
typedef void (*microtask_t11)(int *, int *, void *, void *, void *, void *,
|
||||
void *, void *, void *, void *, void *, void *,
|
||||
void *);
|
||||
typedef void (*microtask_t12)(int *, int *, void *, void *, void *, void *,
|
||||
void *, void *, void *, void *, void *, void *,
|
||||
void *, void *);
|
||||
typedef void (*microtask_t13)(int *, int *, void *, void *, void *, void *,
|
||||
void *, void *, void *, void *, void *, void *,
|
||||
void *, void *, void *);
|
||||
typedef void (*microtask_t14)(int *, int *, void *, void *, void *, void *,
|
||||
void *, void *, void *, void *, void *, void *,
|
||||
void *, void *, void *, void *);
|
||||
typedef void (*microtask_t15)(int *, int *, void *, void *, void *, void *,
|
||||
void *, void *, void *, void *, void *, void *,
|
||||
void *, void *, void *, void *, void *);
|
||||
|
||||
// we really only need the case with 1 argument, because CLANG always build
|
||||
// a struct of pointers to shared variables referenced in the outlined function
|
||||
int __kmp_invoke_microtask(microtask_t pkfn, int gtid, int tid, int argc,
|
||||
void *p_argv[]
|
||||
#if OMPT_SUPPORT
|
||||
,
|
||||
void **exit_frame_ptr
|
||||
#endif
|
||||
) {
|
||||
#if OMPT_SUPPORT
|
||||
*exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0);
|
||||
#endif
|
||||
|
||||
switch (argc) {
|
||||
default:
|
||||
fprintf(stderr, "Too many args to microtask: %d!\n", argc);
|
||||
fflush(stderr);
|
||||
exit(-1);
|
||||
case 0:
|
||||
(*(microtask_t0)pkfn)(>id, &tid);
|
||||
break;
|
||||
case 1:
|
||||
(*(microtask_t1)pkfn)(>id, &tid, p_argv[0]);
|
||||
break;
|
||||
case 2:
|
||||
(*(microtask_t2)pkfn)(>id, &tid, p_argv[0], p_argv[1]);
|
||||
break;
|
||||
case 3:
|
||||
(*(microtask_t3)pkfn)(>id, &tid, p_argv[0], p_argv[1], p_argv[2]);
|
||||
break;
|
||||
case 4:
|
||||
(*(microtask_t4)pkfn)(>id, &tid, p_argv[0], p_argv[1], p_argv[2],
|
||||
p_argv[3]);
|
||||
break;
|
||||
case 5:
|
||||
(*(microtask_t5)pkfn)(>id, &tid, p_argv[0], p_argv[1], p_argv[2],
|
||||
p_argv[3], p_argv[4]);
|
||||
break;
|
||||
case 6:
|
||||
(*(microtask_t6)pkfn)(>id, &tid, p_argv[0], p_argv[1], p_argv[2],
|
||||
p_argv[3], p_argv[4], p_argv[5]);
|
||||
break;
|
||||
case 7:
|
||||
(*(microtask_t7)pkfn)(>id, &tid, p_argv[0], p_argv[1], p_argv[2],
|
||||
p_argv[3], p_argv[4], p_argv[5], p_argv[6]);
|
||||
break;
|
||||
case 8:
|
||||
(*(microtask_t8)pkfn)(>id, &tid, p_argv[0], p_argv[1], p_argv[2],
|
||||
p_argv[3], p_argv[4], p_argv[5], p_argv[6],
|
||||
p_argv[7]);
|
||||
break;
|
||||
case 9:
|
||||
(*(microtask_t9)pkfn)(>id, &tid, p_argv[0], p_argv[1], p_argv[2],
|
||||
p_argv[3], p_argv[4], p_argv[5], p_argv[6], p_argv[7],
|
||||
p_argv[8]);
|
||||
break;
|
||||
case 10:
|
||||
(*(microtask_t10)pkfn)(>id, &tid, p_argv[0], p_argv[1], p_argv[2],
|
||||
p_argv[3], p_argv[4], p_argv[5], p_argv[6],
|
||||
p_argv[7], p_argv[8], p_argv[9]);
|
||||
break;
|
||||
case 11:
|
||||
(*(microtask_t11)pkfn)(>id, &tid, p_argv[0], p_argv[1], p_argv[2],
|
||||
p_argv[3], p_argv[4], p_argv[5], p_argv[6],
|
||||
p_argv[7], p_argv[8], p_argv[9], p_argv[10]);
|
||||
break;
|
||||
case 12:
|
||||
(*(microtask_t12)pkfn)(>id, &tid, p_argv[0], p_argv[1], p_argv[2],
|
||||
p_argv[3], p_argv[4], p_argv[5], p_argv[6],
|
||||
p_argv[7], p_argv[8], p_argv[9], p_argv[10],
|
||||
p_argv[11]);
|
||||
break;
|
||||
case 13:
|
||||
(*(microtask_t13)pkfn)(>id, &tid, p_argv[0], p_argv[1], p_argv[2],
|
||||
p_argv[3], p_argv[4], p_argv[5], p_argv[6],
|
||||
p_argv[7], p_argv[8], p_argv[9], p_argv[10],
|
||||
p_argv[11], p_argv[12]);
|
||||
break;
|
||||
case 14:
|
||||
(*(microtask_t14)pkfn)(>id, &tid, p_argv[0], p_argv[1], p_argv[2],
|
||||
p_argv[3], p_argv[4], p_argv[5], p_argv[6],
|
||||
p_argv[7], p_argv[8], p_argv[9], p_argv[10],
|
||||
p_argv[11], p_argv[12], p_argv[13]);
|
||||
break;
|
||||
case 15:
|
||||
(*(microtask_t15)pkfn)(>id, &tid, p_argv[0], p_argv[1], p_argv[2],
|
||||
p_argv[3], p_argv[4], p_argv[5], p_argv[6],
|
||||
p_argv[7], p_argv[8], p_argv[9], p_argv[10],
|
||||
p_argv[11], p_argv[12], p_argv[13], p_argv[14]);
|
||||
break;
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
#endif
|
||||
@ -153,7 +153,7 @@ typedef struct kmp_struct64 kmp_uint64;
|
||||
#undef KMP_USE_X87CONTROL
|
||||
#define KMP_USE_X87CONTROL 1
|
||||
#endif
|
||||
#if KMP_ARCH_X86_64 || KMP_ARCH_AARCH64
|
||||
#if KMP_ARCH_X86_64 || KMP_ARCH_AARCH64 || KMP_ARCH_ARM64EC
|
||||
#define KMP_INTPTR 1
|
||||
typedef __int64 kmp_intptr_t;
|
||||
typedef unsigned __int64 kmp_uintptr_t;
|
||||
@ -183,7 +183,7 @@ typedef unsigned long long kmp_uint64;
|
||||
#define KMP_SIZE_T_SPEC KMP_UINT32_SPEC
|
||||
#elif KMP_ARCH_X86_64 || KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || \
|
||||
KMP_ARCH_MIPS64 || KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64 || \
|
||||
KMP_ARCH_VE || KMP_ARCH_S390X || KMP_ARCH_SPARC64
|
||||
KMP_ARCH_VE || KMP_ARCH_S390X || KMP_ARCH_SPARC64 || KMP_ARCH_ARM64EC
|
||||
#define KMP_SIZE_T_SPEC KMP_UINT64_SPEC
|
||||
#else
|
||||
#error "Can't determine size_t printf format specifier."
|
||||
@ -464,7 +464,9 @@ enum kmp_mem_fence_type {
|
||||
|
||||
// Synchronization primitives
|
||||
|
||||
#if KMP_ASM_INTRINS && KMP_OS_WINDOWS && !((KMP_ARCH_AARCH64 || KMP_ARCH_ARM) && (KMP_COMPILER_CLANG || KMP_COMPILER_GCC))
|
||||
#if KMP_ASM_INTRINS && KMP_OS_WINDOWS && \
|
||||
!((KMP_ARCH_AARCH64 || KMP_ARCH_ARM || KMP_ARCH_ARM64EC) && \
|
||||
(KMP_COMPILER_CLANG || KMP_COMPILER_GCC))
|
||||
|
||||
#if KMP_MSVC_COMPAT && !KMP_COMPILER_CLANG
|
||||
#pragma intrinsic(InterlockedExchangeAdd)
|
||||
@ -1053,7 +1055,7 @@ extern kmp_real64 __kmp_xchg_real64(volatile kmp_real64 *p, kmp_real64 v);
|
||||
#if KMP_ARCH_PPC64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS || \
|
||||
KMP_ARCH_MIPS64 || KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64 || \
|
||||
KMP_ARCH_VE || KMP_ARCH_S390X || KMP_ARCH_PPC || KMP_ARCH_AARCH64_32 || \
|
||||
KMP_ARCH_SPARC
|
||||
KMP_ARCH_SPARC || KMP_ARCH_ARM64EC
|
||||
#if KMP_OS_WINDOWS
|
||||
#undef KMP_MB
|
||||
#define KMP_MB() std::atomic_thread_fence(std::memory_order_seq_cst)
|
||||
|
||||
@ -119,6 +119,7 @@
|
||||
#define KMP_ARCH_X86_64 0
|
||||
#define KMP_ARCH_AARCH64 0
|
||||
#define KMP_ARCH_AARCH64_32 0
|
||||
#define KMP_ARCH_ARM64EC 0
|
||||
#define KMP_ARCH_PPC64_ELFv1 0
|
||||
#define KMP_ARCH_PPC64_ELFv2 0
|
||||
#define KMP_ARCH_PPC64_XCOFF 0
|
||||
@ -133,7 +134,10 @@
|
||||
#define KMP_ARCH_SPARC 0
|
||||
|
||||
#if KMP_OS_WINDOWS
|
||||
#if defined(_M_AMD64) || defined(__x86_64)
|
||||
#if defined(_M_ARM64EC) || defined(__arm64ec__)
|
||||
#undef KMP_ARCH_ARM64EC
|
||||
#define KMP_ARCH_ARM64EC 1
|
||||
#elif defined(_M_AMD64) || defined(__x86_64)
|
||||
#undef KMP_ARCH_X86_64
|
||||
#define KMP_ARCH_X86_64 1
|
||||
#elif defined(__aarch64__) || defined(_M_ARM64)
|
||||
@ -291,7 +295,7 @@
|
||||
KMP_ARCH_AARCH64 + KMP_ARCH_MIPS + KMP_ARCH_MIPS64 + \
|
||||
KMP_ARCH_RISCV64 + KMP_ARCH_LOONGARCH64 + KMP_ARCH_VE + \
|
||||
KMP_ARCH_S390X + KMP_ARCH_WASM + KMP_ARCH_PPC + \
|
||||
KMP_ARCH_AARCH64_32 + KMP_ARCH_SPARC)
|
||||
KMP_ARCH_AARCH64_32 + KMP_ARCH_SPARC + KMP_ARCH_ARM64EC)
|
||||
#error Unknown or unsupported architecture
|
||||
#endif
|
||||
|
||||
|
||||
@ -8924,7 +8924,7 @@ __kmp_determine_reduction_method(
|
||||
|
||||
#if KMP_ARCH_X86_64 || KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || \
|
||||
KMP_ARCH_MIPS64 || KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64 || \
|
||||
KMP_ARCH_VE || KMP_ARCH_S390X || KMP_ARCH_WASM
|
||||
KMP_ARCH_VE || KMP_ARCH_S390X || KMP_ARCH_WASM || KMP_ARCH_ARM64EC
|
||||
|
||||
#if KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD || \
|
||||
KMP_OS_OPENBSD || KMP_OS_WINDOWS || KMP_OS_DARWIN || KMP_OS_HAIKU || \
|
||||
|
||||
@ -173,6 +173,8 @@
|
||||
#ifndef ITT_ARCH
|
||||
#if defined _M_IX86 || defined __i386__
|
||||
#define ITT_ARCH ITT_ARCH_IA32
|
||||
#elif defined _M_ARM64EC || defined __arm64ec__
|
||||
#define ITT_ARCH ITT_ARCH_ARM64
|
||||
#elif defined _M_X64 || defined _M_AMD64 || defined __x86_64__
|
||||
#define ITT_ARCH ITT_ARCH_IA32E
|
||||
#elif defined _M_IA64 || defined __ia64__
|
||||
|
||||
@ -121,7 +121,7 @@ KMP_PREFIX_UNDERSCORE(\proc):
|
||||
# endif // KMP_OS_DARWIN
|
||||
#endif // KMP_ARCH_X86 || KMP_ARCH_x86_64
|
||||
|
||||
#if KMP_ARCH_AARCH64 || KMP_ARCH_AARCH64_32 || KMP_ARCH_ARM
|
||||
#if KMP_ARCH_AARCH64 || KMP_ARCH_AARCH64_32 || KMP_ARCH_ARM || KMP_ARCH_ARM64EC
|
||||
|
||||
# if KMP_OS_DARWIN
|
||||
# define KMP_PREFIX_UNDERSCORE(x) _##x // extra underscore for OS X* symbols
|
||||
|
||||
@ -2736,145 +2736,6 @@ finish: // Clean up and exit.
|
||||
|
||||
#endif // USE_LOAD_BALANCE
|
||||
|
||||
#if !(KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_MIC || KMP_ARCH_AARCH64 || \
|
||||
KMP_ARCH_PPC64 || KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64 || \
|
||||
KMP_ARCH_ARM || KMP_ARCH_VE || KMP_ARCH_S390X || KMP_ARCH_PPC_XCOFF || \
|
||||
KMP_ARCH_AARCH64_32)
|
||||
|
||||
// Because WebAssembly will use `call_indirect` to invoke the microtask and
|
||||
// WebAssembly indirect calls check that the called signature is a precise
|
||||
// match, we need to cast each microtask function pointer back from `void *` to
|
||||
// its original type.
|
||||
typedef void (*microtask_t0)(int *, int *);
|
||||
typedef void (*microtask_t1)(int *, int *, void *);
|
||||
typedef void (*microtask_t2)(int *, int *, void *, void *);
|
||||
typedef void (*microtask_t3)(int *, int *, void *, void *, void *);
|
||||
typedef void (*microtask_t4)(int *, int *, void *, void *, void *, void *);
|
||||
typedef void (*microtask_t5)(int *, int *, void *, void *, void *, void *,
|
||||
void *);
|
||||
typedef void (*microtask_t6)(int *, int *, void *, void *, void *, void *,
|
||||
void *, void *);
|
||||
typedef void (*microtask_t7)(int *, int *, void *, void *, void *, void *,
|
||||
void *, void *, void *);
|
||||
typedef void (*microtask_t8)(int *, int *, void *, void *, void *, void *,
|
||||
void *, void *, void *, void *);
|
||||
typedef void (*microtask_t9)(int *, int *, void *, void *, void *, void *,
|
||||
void *, void *, void *, void *, void *);
|
||||
typedef void (*microtask_t10)(int *, int *, void *, void *, void *, void *,
|
||||
void *, void *, void *, void *, void *, void *);
|
||||
typedef void (*microtask_t11)(int *, int *, void *, void *, void *, void *,
|
||||
void *, void *, void *, void *, void *, void *,
|
||||
void *);
|
||||
typedef void (*microtask_t12)(int *, int *, void *, void *, void *, void *,
|
||||
void *, void *, void *, void *, void *, void *,
|
||||
void *, void *);
|
||||
typedef void (*microtask_t13)(int *, int *, void *, void *, void *, void *,
|
||||
void *, void *, void *, void *, void *, void *,
|
||||
void *, void *, void *);
|
||||
typedef void (*microtask_t14)(int *, int *, void *, void *, void *, void *,
|
||||
void *, void *, void *, void *, void *, void *,
|
||||
void *, void *, void *, void *);
|
||||
typedef void (*microtask_t15)(int *, int *, void *, void *, void *, void *,
|
||||
void *, void *, void *, void *, void *, void *,
|
||||
void *, void *, void *, void *, void *);
|
||||
|
||||
// we really only need the case with 1 argument, because CLANG always build
|
||||
// a struct of pointers to shared variables referenced in the outlined function
|
||||
int __kmp_invoke_microtask(microtask_t pkfn, int gtid, int tid, int argc,
|
||||
void *p_argv[]
|
||||
#if OMPT_SUPPORT
|
||||
,
|
||||
void **exit_frame_ptr
|
||||
#endif
|
||||
) {
|
||||
#if OMPT_SUPPORT
|
||||
*exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0);
|
||||
#endif
|
||||
|
||||
switch (argc) {
|
||||
default:
|
||||
fprintf(stderr, "Too many args to microtask: %d!\n", argc);
|
||||
fflush(stderr);
|
||||
exit(-1);
|
||||
case 0:
|
||||
(*(microtask_t0)pkfn)(>id, &tid);
|
||||
break;
|
||||
case 1:
|
||||
(*(microtask_t1)pkfn)(>id, &tid, p_argv[0]);
|
||||
break;
|
||||
case 2:
|
||||
(*(microtask_t2)pkfn)(>id, &tid, p_argv[0], p_argv[1]);
|
||||
break;
|
||||
case 3:
|
||||
(*(microtask_t3)pkfn)(>id, &tid, p_argv[0], p_argv[1], p_argv[2]);
|
||||
break;
|
||||
case 4:
|
||||
(*(microtask_t4)pkfn)(>id, &tid, p_argv[0], p_argv[1], p_argv[2],
|
||||
p_argv[3]);
|
||||
break;
|
||||
case 5:
|
||||
(*(microtask_t5)pkfn)(>id, &tid, p_argv[0], p_argv[1], p_argv[2],
|
||||
p_argv[3], p_argv[4]);
|
||||
break;
|
||||
case 6:
|
||||
(*(microtask_t6)pkfn)(>id, &tid, p_argv[0], p_argv[1], p_argv[2],
|
||||
p_argv[3], p_argv[4], p_argv[5]);
|
||||
break;
|
||||
case 7:
|
||||
(*(microtask_t7)pkfn)(>id, &tid, p_argv[0], p_argv[1], p_argv[2],
|
||||
p_argv[3], p_argv[4], p_argv[5], p_argv[6]);
|
||||
break;
|
||||
case 8:
|
||||
(*(microtask_t8)pkfn)(>id, &tid, p_argv[0], p_argv[1], p_argv[2],
|
||||
p_argv[3], p_argv[4], p_argv[5], p_argv[6],
|
||||
p_argv[7]);
|
||||
break;
|
||||
case 9:
|
||||
(*(microtask_t9)pkfn)(>id, &tid, p_argv[0], p_argv[1], p_argv[2],
|
||||
p_argv[3], p_argv[4], p_argv[5], p_argv[6], p_argv[7],
|
||||
p_argv[8]);
|
||||
break;
|
||||
case 10:
|
||||
(*(microtask_t10)pkfn)(>id, &tid, p_argv[0], p_argv[1], p_argv[2],
|
||||
p_argv[3], p_argv[4], p_argv[5], p_argv[6],
|
||||
p_argv[7], p_argv[8], p_argv[9]);
|
||||
break;
|
||||
case 11:
|
||||
(*(microtask_t11)pkfn)(>id, &tid, p_argv[0], p_argv[1], p_argv[2],
|
||||
p_argv[3], p_argv[4], p_argv[5], p_argv[6],
|
||||
p_argv[7], p_argv[8], p_argv[9], p_argv[10]);
|
||||
break;
|
||||
case 12:
|
||||
(*(microtask_t12)pkfn)(>id, &tid, p_argv[0], p_argv[1], p_argv[2],
|
||||
p_argv[3], p_argv[4], p_argv[5], p_argv[6],
|
||||
p_argv[7], p_argv[8], p_argv[9], p_argv[10],
|
||||
p_argv[11]);
|
||||
break;
|
||||
case 13:
|
||||
(*(microtask_t13)pkfn)(>id, &tid, p_argv[0], p_argv[1], p_argv[2],
|
||||
p_argv[3], p_argv[4], p_argv[5], p_argv[6],
|
||||
p_argv[7], p_argv[8], p_argv[9], p_argv[10],
|
||||
p_argv[11], p_argv[12]);
|
||||
break;
|
||||
case 14:
|
||||
(*(microtask_t14)pkfn)(>id, &tid, p_argv[0], p_argv[1], p_argv[2],
|
||||
p_argv[3], p_argv[4], p_argv[5], p_argv[6],
|
||||
p_argv[7], p_argv[8], p_argv[9], p_argv[10],
|
||||
p_argv[11], p_argv[12], p_argv[13]);
|
||||
break;
|
||||
case 15:
|
||||
(*(microtask_t15)pkfn)(>id, &tid, p_argv[0], p_argv[1], p_argv[2],
|
||||
p_argv[3], p_argv[4], p_argv[5], p_argv[6],
|
||||
p_argv[7], p_argv[8], p_argv[9], p_argv[10],
|
||||
p_argv[11], p_argv[12], p_argv[13], p_argv[14]);
|
||||
break;
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if KMP_OS_LINUX
|
||||
// Functions for hidden helper task
|
||||
namespace {
|
||||
|
||||
@ -12,7 +12,8 @@
|
||||
|
||||
#include "kmp.h"
|
||||
|
||||
#if (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_AARCH64 || KMP_ARCH_ARM)
|
||||
#if (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_AARCH64 || KMP_ARCH_ARM || \
|
||||
KMP_ARCH_ARM64EC)
|
||||
/* Only 32-bit "add-exchange" instruction on IA-32 architecture causes us to
|
||||
use compare_and_store for these routines */
|
||||
|
||||
@ -189,4 +190,5 @@ int __kmp_invoke_microtask(microtask_t pkfn, int gtid, int tid, int argc,
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_AARCH64 || KMP_ARCH_ARM */
|
||||
#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_AARCH64 || KMP_ARCH_ARM \
|
||||
|| KMP_ARCH_ARM64EC */
|
||||
|
||||
@ -1,6 +1,10 @@
|
||||
// RUN: %libomp-compile-and-run
|
||||
#include <stdio.h>
|
||||
|
||||
// This test fails on Arm64EC as __kmp_invoke_microtask doesn't support more
|
||||
// than 16 arguments on that target.
|
||||
// XFAIL: arm64ec
|
||||
|
||||
int main()
|
||||
{
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user