[flang][rt] Add noinline attributes for CUDA compile path for successful compilation (#161760)
NVCC does more aggressive inlining than Clang/GCC causing the exported functions in extrema.cpp and findloc.cpp to become extremely large from function specializations leading to compilation timeouts. Marking the 2 functions in this change as noinline for NVCC alleviates this problem as it removes the worst of the cross-matrix argument specializations. Also remove the workaround in https://github.com/llvm/llvm-project/pull/156542 that opted out findloc.cpp from the CUDA flang-rt build Testing: ninja flang-rt builds in ~30 minutes, these 2 files build in ~3 minutes
This commit is contained in:
parent
d0e98909d2
commit
74180eb024
@ -178,9 +178,6 @@ endif ()
|
||||
if ("${LLVM_RUNTIMES_TARGET}" MATCHES "^amdgcn|^nvptx")
|
||||
set(sources ${gpu_sources})
|
||||
elseif(FLANG_RT_EXPERIMENTAL_OFFLOAD_SUPPORT STREQUAL "CUDA")
|
||||
# findloc.cpp has some issues with higher compute capability. Remove it
|
||||
# from CUDA build until we can lower its memory footprint.
|
||||
list(REMOVE_ITEM supported_sources findloc.cpp)
|
||||
set(sources ${supported_sources})
|
||||
else ()
|
||||
set(sources ${supported_sources} ${host_sources} ${f128_sources})
|
||||
|
||||
@ -397,9 +397,12 @@ template <TypeCategory CAT, bool IS_MAX,
|
||||
template <typename, bool, bool> class COMPARE>
|
||||
struct DoPartialMaxOrMinLocHelper {
|
||||
template <int KIND> struct Functor {
|
||||
RT_API_ATTRS void operator()(const char *intrinsic, Descriptor &result,
|
||||
const Descriptor &x, int kind, int dim, const Descriptor *mask,
|
||||
bool back, Terminator &terminator) const {
|
||||
// NVCC inlines more aggressively which causes too many specializations of
|
||||
// this function to be inlined causing compiler timeouts. Set as
|
||||
// noinline to allow compilation to complete.
|
||||
RT_API_ATTRS RT_DEVICE_NOINLINE void operator()(const char *intrinsic,
|
||||
Descriptor &result, const Descriptor &x, int kind, int dim,
|
||||
const Descriptor *mask, bool back, Terminator &terminator) const {
|
||||
DoPartialMaxOrMinLoc<CAT, KIND, IS_MAX, COMPARE>(
|
||||
intrinsic, result, x, kind, dim, mask, back, terminator);
|
||||
}
|
||||
|
||||
@ -153,10 +153,13 @@ template <TypeCategory CAT,
|
||||
class HELPER>
|
||||
struct NumericFindlocHelper {
|
||||
template <int KIND> struct Functor {
|
||||
RT_API_ATTRS void operator()(TypeCategory targetCat, int targetKind,
|
||||
Descriptor &result, const Descriptor &x, const Descriptor &target,
|
||||
int kind, int dim, const Descriptor *mask, bool back,
|
||||
Terminator &terminator) const {
|
||||
// NVCC inlines more aggressively which causes too many specializations of
|
||||
// this function to be inlined causing compiler timeouts. Set as
|
||||
// noinline to allow compilation to complete.
|
||||
RT_API_ATTRS RT_DEVICE_NOINLINE void operator()(TypeCategory targetCat,
|
||||
int targetKind, Descriptor &result, const Descriptor &x,
|
||||
const Descriptor &target, int kind, int dim, const Descriptor *mask,
|
||||
bool back, Terminator &terminator) const {
|
||||
switch (targetCat) {
|
||||
case TypeCategory::Integer:
|
||||
case TypeCategory::Unsigned:
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user