In most cases, hidden helper task behave similar as detached tasks. That means, for example, if we have to wait for detached tasks, we have to do the same thing for hidden helper tasks as well. This patch adds the missing condition for hidden helper task accordingly along with detached task. Reviewed By: AndreyChurbanov Differential Revision: https://reviews.llvm.org/D107316
878 lines
33 KiB
C++
878 lines
33 KiB
C++
/*
|
|
* kmp_taskdeps.cpp
|
|
*/
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
//#define KMP_SUPPORT_GRAPH_OUTPUT 1
|
|
|
|
#include "kmp.h"
|
|
#include "kmp_io.h"
|
|
#include "kmp_wait_release.h"
|
|
#include "kmp_taskdeps.h"
|
|
#if OMPT_SUPPORT
|
|
#include "ompt-specific.h"
|
|
#endif
|
|
|
|
// TODO: Improve memory allocation? keep a list of pre-allocated structures?
|
|
// allocate in blocks? re-use list finished list entries?
|
|
// TODO: don't use atomic ref counters for stack-allocated nodes.
|
|
// TODO: find an alternate to atomic refs for heap-allocated nodes?
|
|
// TODO: Finish graph output support
|
|
// TODO: kmp_lock_t seems a tad to big (and heavy weight) for this. Check other
|
|
// runtime locks
|
|
// TODO: Any ITT support needed?
|
|
|
|
#ifdef KMP_SUPPORT_GRAPH_OUTPUT
|
|
static std::atomic<kmp_int32> kmp_node_id_seed = ATOMIC_VAR_INIT(0);
|
|
#endif
|
|
|
|
static void __kmp_init_node(kmp_depnode_t *node) {
|
|
node->dn.successors = NULL;
|
|
node->dn.task = NULL; // will point to the right task
|
|
// once dependences have been processed
|
|
for (int i = 0; i < MAX_MTX_DEPS; ++i)
|
|
node->dn.mtx_locks[i] = NULL;
|
|
node->dn.mtx_num_locks = 0;
|
|
__kmp_init_lock(&node->dn.lock);
|
|
KMP_ATOMIC_ST_RLX(&node->dn.nrefs, 1); // init creates the first reference
|
|
#ifdef KMP_SUPPORT_GRAPH_OUTPUT
|
|
node->dn.id = KMP_ATOMIC_INC(&kmp_node_id_seed);
|
|
#endif
|
|
}
|
|
|
|
static inline kmp_depnode_t *__kmp_node_ref(kmp_depnode_t *node) {
|
|
KMP_ATOMIC_INC(&node->dn.nrefs);
|
|
return node;
|
|
}
|
|
|
|
enum { KMP_DEPHASH_OTHER_SIZE = 97, KMP_DEPHASH_MASTER_SIZE = 997 };
|
|
|
|
size_t sizes[] = {997, 2003, 4001, 8191, 16001, 32003, 64007, 131071, 270029};
|
|
const size_t MAX_GEN = 8;
|
|
|
|
static inline size_t __kmp_dephash_hash(kmp_intptr_t addr, size_t hsize) {
|
|
// TODO alternate to try: set = (((Addr64)(addrUsefulBits * 9.618)) %
|
|
// m_num_sets );
|
|
return ((addr >> 6) ^ (addr >> 2)) % hsize;
|
|
}
|
|
|
|
static kmp_dephash_t *__kmp_dephash_extend(kmp_info_t *thread,
|
|
kmp_dephash_t *current_dephash) {
|
|
kmp_dephash_t *h;
|
|
|
|
size_t gen = current_dephash->generation + 1;
|
|
if (gen >= MAX_GEN)
|
|
return current_dephash;
|
|
size_t new_size = sizes[gen];
|
|
|
|
size_t size_to_allocate =
|
|
new_size * sizeof(kmp_dephash_entry_t *) + sizeof(kmp_dephash_t);
|
|
|
|
#if USE_FAST_MEMORY
|
|
h = (kmp_dephash_t *)__kmp_fast_allocate(thread, size_to_allocate);
|
|
#else
|
|
h = (kmp_dephash_t *)__kmp_thread_malloc(thread, size_to_allocate);
|
|
#endif
|
|
|
|
h->size = new_size;
|
|
h->nelements = current_dephash->nelements;
|
|
h->buckets = (kmp_dephash_entry **)(h + 1);
|
|
h->generation = gen;
|
|
h->nconflicts = 0;
|
|
h->last_all = current_dephash->last_all;
|
|
|
|
// make sure buckets are properly initialized
|
|
for (size_t i = 0; i < new_size; i++) {
|
|
h->buckets[i] = NULL;
|
|
}
|
|
|
|
// insert existing elements in the new table
|
|
for (size_t i = 0; i < current_dephash->size; i++) {
|
|
kmp_dephash_entry_t *next, *entry;
|
|
for (entry = current_dephash->buckets[i]; entry; entry = next) {
|
|
next = entry->next_in_bucket;
|
|
// Compute the new hash using the new size, and insert the entry in
|
|
// the new bucket.
|
|
size_t new_bucket = __kmp_dephash_hash(entry->addr, h->size);
|
|
entry->next_in_bucket = h->buckets[new_bucket];
|
|
if (entry->next_in_bucket) {
|
|
h->nconflicts++;
|
|
}
|
|
h->buckets[new_bucket] = entry;
|
|
}
|
|
}
|
|
|
|
// Free old hash table
|
|
#if USE_FAST_MEMORY
|
|
__kmp_fast_free(thread, current_dephash);
|
|
#else
|
|
__kmp_thread_free(thread, current_dephash);
|
|
#endif
|
|
|
|
return h;
|
|
}
|
|
|
|
static kmp_dephash_t *__kmp_dephash_create(kmp_info_t *thread,
|
|
kmp_taskdata_t *current_task) {
|
|
kmp_dephash_t *h;
|
|
|
|
size_t h_size;
|
|
|
|
if (current_task->td_flags.tasktype == TASK_IMPLICIT)
|
|
h_size = KMP_DEPHASH_MASTER_SIZE;
|
|
else
|
|
h_size = KMP_DEPHASH_OTHER_SIZE;
|
|
|
|
size_t size = h_size * sizeof(kmp_dephash_entry_t *) + sizeof(kmp_dephash_t);
|
|
|
|
#if USE_FAST_MEMORY
|
|
h = (kmp_dephash_t *)__kmp_fast_allocate(thread, size);
|
|
#else
|
|
h = (kmp_dephash_t *)__kmp_thread_malloc(thread, size);
|
|
#endif
|
|
h->size = h_size;
|
|
|
|
h->generation = 0;
|
|
h->nelements = 0;
|
|
h->nconflicts = 0;
|
|
h->buckets = (kmp_dephash_entry **)(h + 1);
|
|
h->last_all = NULL;
|
|
|
|
for (size_t i = 0; i < h_size; i++)
|
|
h->buckets[i] = 0;
|
|
|
|
return h;
|
|
}
|
|
|
|
static kmp_dephash_entry *__kmp_dephash_find(kmp_info_t *thread,
|
|
kmp_dephash_t **hash,
|
|
kmp_intptr_t addr) {
|
|
kmp_dephash_t *h = *hash;
|
|
if (h->nelements != 0 && h->nconflicts / h->size >= 1) {
|
|
*hash = __kmp_dephash_extend(thread, h);
|
|
h = *hash;
|
|
}
|
|
size_t bucket = __kmp_dephash_hash(addr, h->size);
|
|
|
|
kmp_dephash_entry_t *entry;
|
|
for (entry = h->buckets[bucket]; entry; entry = entry->next_in_bucket)
|
|
if (entry->addr == addr)
|
|
break;
|
|
|
|
if (entry == NULL) {
|
|
// create entry. This is only done by one thread so no locking required
|
|
#if USE_FAST_MEMORY
|
|
entry = (kmp_dephash_entry_t *)__kmp_fast_allocate(
|
|
thread, sizeof(kmp_dephash_entry_t));
|
|
#else
|
|
entry = (kmp_dephash_entry_t *)__kmp_thread_malloc(
|
|
thread, sizeof(kmp_dephash_entry_t));
|
|
#endif
|
|
entry->addr = addr;
|
|
if (!h->last_all) // no predecessor task with omp_all_memory dependence
|
|
entry->last_out = NULL;
|
|
else // else link the omp_all_memory depnode to the new entry
|
|
entry->last_out = __kmp_node_ref(h->last_all);
|
|
entry->last_set = NULL;
|
|
entry->prev_set = NULL;
|
|
entry->last_flag = 0;
|
|
entry->mtx_lock = NULL;
|
|
entry->next_in_bucket = h->buckets[bucket];
|
|
h->buckets[bucket] = entry;
|
|
h->nelements++;
|
|
if (entry->next_in_bucket)
|
|
h->nconflicts++;
|
|
}
|
|
return entry;
|
|
}
|
|
|
|
static kmp_depnode_list_t *__kmp_add_node(kmp_info_t *thread,
|
|
kmp_depnode_list_t *list,
|
|
kmp_depnode_t *node) {
|
|
kmp_depnode_list_t *new_head;
|
|
|
|
#if USE_FAST_MEMORY
|
|
new_head = (kmp_depnode_list_t *)__kmp_fast_allocate(
|
|
thread, sizeof(kmp_depnode_list_t));
|
|
#else
|
|
new_head = (kmp_depnode_list_t *)__kmp_thread_malloc(
|
|
thread, sizeof(kmp_depnode_list_t));
|
|
#endif
|
|
|
|
new_head->node = __kmp_node_ref(node);
|
|
new_head->next = list;
|
|
|
|
return new_head;
|
|
}
|
|
|
|
static inline void __kmp_track_dependence(kmp_int32 gtid, kmp_depnode_t *source,
|
|
kmp_depnode_t *sink,
|
|
kmp_task_t *sink_task) {
|
|
#ifdef KMP_SUPPORT_GRAPH_OUTPUT
|
|
kmp_taskdata_t *task_source = KMP_TASK_TO_TASKDATA(source->dn.task);
|
|
// do not use sink->dn.task as that is only filled after the dependences
|
|
// are already processed!
|
|
kmp_taskdata_t *task_sink = KMP_TASK_TO_TASKDATA(sink_task);
|
|
|
|
__kmp_printf("%d(%s) -> %d(%s)\n", source->dn.id,
|
|
task_source->td_ident->psource, sink->dn.id,
|
|
task_sink->td_ident->psource);
|
|
#endif
|
|
#if OMPT_SUPPORT && OMPT_OPTIONAL
|
|
/* OMPT tracks dependences between task (a=source, b=sink) in which
|
|
task a blocks the execution of b through the ompt_new_dependence_callback
|
|
*/
|
|
if (ompt_enabled.ompt_callback_task_dependence) {
|
|
kmp_taskdata_t *task_source = KMP_TASK_TO_TASKDATA(source->dn.task);
|
|
ompt_data_t *sink_data;
|
|
if (sink_task)
|
|
sink_data = &(KMP_TASK_TO_TASKDATA(sink_task)->ompt_task_info.task_data);
|
|
else
|
|
sink_data = &__kmp_threads[gtid]->th.ompt_thread_info.task_data;
|
|
|
|
ompt_callbacks.ompt_callback(ompt_callback_task_dependence)(
|
|
&(task_source->ompt_task_info.task_data), sink_data);
|
|
}
|
|
#endif /* OMPT_SUPPORT && OMPT_OPTIONAL */
|
|
}
|
|
|
|
static inline kmp_int32
|
|
__kmp_depnode_link_successor(kmp_int32 gtid, kmp_info_t *thread,
|
|
kmp_task_t *task, kmp_depnode_t *node,
|
|
kmp_depnode_list_t *plist) {
|
|
if (!plist)
|
|
return 0;
|
|
kmp_int32 npredecessors = 0;
|
|
// link node as successor of list elements
|
|
for (kmp_depnode_list_t *p = plist; p; p = p->next) {
|
|
kmp_depnode_t *dep = p->node;
|
|
if (dep->dn.task) {
|
|
KMP_ACQUIRE_DEPNODE(gtid, dep);
|
|
if (dep->dn.task) {
|
|
__kmp_track_dependence(gtid, dep, node, task);
|
|
dep->dn.successors = __kmp_add_node(thread, dep->dn.successors, node);
|
|
KA_TRACE(40, ("__kmp_process_deps: T#%d adding dependence from %p to "
|
|
"%p\n",
|
|
gtid, KMP_TASK_TO_TASKDATA(dep->dn.task),
|
|
KMP_TASK_TO_TASKDATA(task)));
|
|
npredecessors++;
|
|
}
|
|
KMP_RELEASE_DEPNODE(gtid, dep);
|
|
}
|
|
}
|
|
return npredecessors;
|
|
}
|
|
|
|
static inline kmp_int32 __kmp_depnode_link_successor(kmp_int32 gtid,
|
|
kmp_info_t *thread,
|
|
kmp_task_t *task,
|
|
kmp_depnode_t *source,
|
|
kmp_depnode_t *sink) {
|
|
if (!sink)
|
|
return 0;
|
|
kmp_int32 npredecessors = 0;
|
|
if (sink->dn.task) {
|
|
// synchronously add source to sink' list of successors
|
|
KMP_ACQUIRE_DEPNODE(gtid, sink);
|
|
if (sink->dn.task) {
|
|
__kmp_track_dependence(gtid, sink, source, task);
|
|
sink->dn.successors = __kmp_add_node(thread, sink->dn.successors, source);
|
|
KA_TRACE(40, ("__kmp_process_deps: T#%d adding dependence from %p to "
|
|
"%p\n",
|
|
gtid, KMP_TASK_TO_TASKDATA(sink->dn.task),
|
|
KMP_TASK_TO_TASKDATA(task)));
|
|
npredecessors++;
|
|
}
|
|
KMP_RELEASE_DEPNODE(gtid, sink);
|
|
}
|
|
return npredecessors;
|
|
}
|
|
|
|
static inline kmp_int32
|
|
__kmp_process_dep_all(kmp_int32 gtid, kmp_depnode_t *node, kmp_dephash_t *h,
|
|
bool dep_barrier, kmp_task_t *task) {
|
|
KA_TRACE(30, ("__kmp_process_dep_all: T#%d processing dep_all, "
|
|
"dep_barrier = %d\n",
|
|
gtid, dep_barrier));
|
|
kmp_info_t *thread = __kmp_threads[gtid];
|
|
kmp_int32 npredecessors = 0;
|
|
|
|
// process previous omp_all_memory node if any
|
|
npredecessors +=
|
|
__kmp_depnode_link_successor(gtid, thread, task, node, h->last_all);
|
|
__kmp_node_deref(thread, h->last_all);
|
|
if (!dep_barrier) {
|
|
h->last_all = __kmp_node_ref(node);
|
|
} else {
|
|
// if this is a sync point in the serial sequence, then the previous
|
|
// outputs are guaranteed to be completed after the execution of this
|
|
// task so the previous output nodes can be cleared.
|
|
h->last_all = NULL;
|
|
}
|
|
|
|
// process all regular dependences
|
|
for (size_t i = 0; i < h->size; i++) {
|
|
kmp_dephash_entry_t *info = h->buckets[i];
|
|
if (!info) // skip empty slots in dephash
|
|
continue;
|
|
for (; info; info = info->next_in_bucket) {
|
|
// for each entry the omp_all_memory works as OUT dependence
|
|
kmp_depnode_t *last_out = info->last_out;
|
|
kmp_depnode_list_t *last_set = info->last_set;
|
|
kmp_depnode_list_t *prev_set = info->prev_set;
|
|
if (last_set) {
|
|
npredecessors +=
|
|
__kmp_depnode_link_successor(gtid, thread, task, node, last_set);
|
|
__kmp_depnode_list_free(thread, last_set);
|
|
__kmp_depnode_list_free(thread, prev_set);
|
|
info->last_set = NULL;
|
|
info->prev_set = NULL;
|
|
info->last_flag = 0; // no sets in this dephash entry
|
|
} else {
|
|
npredecessors +=
|
|
__kmp_depnode_link_successor(gtid, thread, task, node, last_out);
|
|
}
|
|
__kmp_node_deref(thread, last_out);
|
|
if (!dep_barrier) {
|
|
info->last_out = __kmp_node_ref(node);
|
|
} else {
|
|
info->last_out = NULL;
|
|
}
|
|
}
|
|
}
|
|
KA_TRACE(30, ("__kmp_process_dep_all: T#%d found %d predecessors\n", gtid,
|
|
npredecessors));
|
|
return npredecessors;
|
|
}
|
|
|
|
template <bool filter>
|
|
static inline kmp_int32
|
|
__kmp_process_deps(kmp_int32 gtid, kmp_depnode_t *node, kmp_dephash_t **hash,
|
|
bool dep_barrier, kmp_int32 ndeps,
|
|
kmp_depend_info_t *dep_list, kmp_task_t *task) {
|
|
KA_TRACE(30, ("__kmp_process_deps<%d>: T#%d processing %d dependences : "
|
|
"dep_barrier = %d\n",
|
|
filter, gtid, ndeps, dep_barrier));
|
|
|
|
kmp_info_t *thread = __kmp_threads[gtid];
|
|
kmp_int32 npredecessors = 0;
|
|
for (kmp_int32 i = 0; i < ndeps; i++) {
|
|
const kmp_depend_info_t *dep = &dep_list[i];
|
|
|
|
if (filter && dep->base_addr == 0)
|
|
continue; // skip filtered entries
|
|
|
|
kmp_dephash_entry_t *info =
|
|
__kmp_dephash_find(thread, hash, dep->base_addr);
|
|
kmp_depnode_t *last_out = info->last_out;
|
|
kmp_depnode_list_t *last_set = info->last_set;
|
|
kmp_depnode_list_t *prev_set = info->prev_set;
|
|
|
|
if (dep->flags.out) { // out or inout --> clean lists if any
|
|
if (last_set) {
|
|
npredecessors +=
|
|
__kmp_depnode_link_successor(gtid, thread, task, node, last_set);
|
|
__kmp_depnode_list_free(thread, last_set);
|
|
__kmp_depnode_list_free(thread, prev_set);
|
|
info->last_set = NULL;
|
|
info->prev_set = NULL;
|
|
info->last_flag = 0; // no sets in this dephash entry
|
|
} else {
|
|
npredecessors +=
|
|
__kmp_depnode_link_successor(gtid, thread, task, node, last_out);
|
|
}
|
|
__kmp_node_deref(thread, last_out);
|
|
if (!dep_barrier) {
|
|
info->last_out = __kmp_node_ref(node);
|
|
} else {
|
|
// if this is a sync point in the serial sequence, then the previous
|
|
// outputs are guaranteed to be completed after the execution of this
|
|
// task so the previous output nodes can be cleared.
|
|
info->last_out = NULL;
|
|
}
|
|
} else { // either IN or MTX or SET
|
|
if (info->last_flag == 0 || info->last_flag == dep->flag) {
|
|
// last_set either didn't exist or of same dep kind
|
|
// link node as successor of the last_out if any
|
|
npredecessors +=
|
|
__kmp_depnode_link_successor(gtid, thread, task, node, last_out);
|
|
// link node as successor of all nodes in the prev_set if any
|
|
npredecessors +=
|
|
__kmp_depnode_link_successor(gtid, thread, task, node, prev_set);
|
|
if (dep_barrier) {
|
|
// clean last_out and prev_set if any; don't touch last_set
|
|
__kmp_node_deref(thread, last_out);
|
|
info->last_out = NULL;
|
|
__kmp_depnode_list_free(thread, prev_set);
|
|
info->prev_set = NULL;
|
|
}
|
|
} else { // last_set is of different dep kind, make it prev_set
|
|
// link node as successor of all nodes in the last_set
|
|
npredecessors +=
|
|
__kmp_depnode_link_successor(gtid, thread, task, node, last_set);
|
|
// clean last_out if any
|
|
__kmp_node_deref(thread, last_out);
|
|
info->last_out = NULL;
|
|
// clean prev_set if any
|
|
__kmp_depnode_list_free(thread, prev_set);
|
|
if (!dep_barrier) {
|
|
// move last_set to prev_set, new last_set will be allocated
|
|
info->prev_set = last_set;
|
|
} else {
|
|
info->prev_set = NULL;
|
|
info->last_flag = 0;
|
|
}
|
|
info->last_set = NULL;
|
|
}
|
|
// for dep_barrier last_flag value should remain:
|
|
// 0 if last_set is empty, unchanged otherwise
|
|
if (!dep_barrier) {
|
|
info->last_flag = dep->flag; // store dep kind of the last_set
|
|
info->last_set = __kmp_add_node(thread, info->last_set, node);
|
|
}
|
|
// check if we are processing MTX dependency
|
|
if (dep->flag == KMP_DEP_MTX) {
|
|
if (info->mtx_lock == NULL) {
|
|
info->mtx_lock = (kmp_lock_t *)__kmp_allocate(sizeof(kmp_lock_t));
|
|
__kmp_init_lock(info->mtx_lock);
|
|
}
|
|
KMP_DEBUG_ASSERT(node->dn.mtx_num_locks < MAX_MTX_DEPS);
|
|
kmp_int32 m;
|
|
// Save lock in node's array
|
|
for (m = 0; m < MAX_MTX_DEPS; ++m) {
|
|
// sort pointers in decreasing order to avoid potential livelock
|
|
if (node->dn.mtx_locks[m] < info->mtx_lock) {
|
|
KMP_DEBUG_ASSERT(!node->dn.mtx_locks[node->dn.mtx_num_locks]);
|
|
for (int n = node->dn.mtx_num_locks; n > m; --n) {
|
|
// shift right all lesser non-NULL pointers
|
|
KMP_DEBUG_ASSERT(node->dn.mtx_locks[n - 1] != NULL);
|
|
node->dn.mtx_locks[n] = node->dn.mtx_locks[n - 1];
|
|
}
|
|
node->dn.mtx_locks[m] = info->mtx_lock;
|
|
break;
|
|
}
|
|
}
|
|
KMP_DEBUG_ASSERT(m < MAX_MTX_DEPS); // must break from loop
|
|
node->dn.mtx_num_locks++;
|
|
}
|
|
}
|
|
}
|
|
KA_TRACE(30, ("__kmp_process_deps<%d>: T#%d found %d predecessors\n", filter,
|
|
gtid, npredecessors));
|
|
return npredecessors;
|
|
}
|
|
|
|
#define NO_DEP_BARRIER (false)
|
|
#define DEP_BARRIER (true)
|
|
|
|
// returns true if the task has any outstanding dependence
|
|
static bool __kmp_check_deps(kmp_int32 gtid, kmp_depnode_t *node,
|
|
kmp_task_t *task, kmp_dephash_t **hash,
|
|
bool dep_barrier, kmp_int32 ndeps,
|
|
kmp_depend_info_t *dep_list,
|
|
kmp_int32 ndeps_noalias,
|
|
kmp_depend_info_t *noalias_dep_list) {
|
|
int i, n_mtxs = 0, dep_all = 0;
|
|
#if KMP_DEBUG
|
|
kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
|
|
#endif
|
|
KA_TRACE(20, ("__kmp_check_deps: T#%d checking dependences for task %p : %d "
|
|
"possibly aliased dependences, %d non-aliased dependences : "
|
|
"dep_barrier=%d .\n",
|
|
gtid, taskdata, ndeps, ndeps_noalias, dep_barrier));
|
|
|
|
// Filter deps in dep_list
|
|
// TODO: Different algorithm for large dep_list ( > 10 ? )
|
|
for (i = 0; i < ndeps; i++) {
|
|
if (dep_list[i].base_addr != 0 &&
|
|
dep_list[i].base_addr != (kmp_intptr_t)KMP_SIZE_T_MAX) {
|
|
KMP_DEBUG_ASSERT(
|
|
dep_list[i].flag == KMP_DEP_IN || dep_list[i].flag == KMP_DEP_OUT ||
|
|
dep_list[i].flag == KMP_DEP_INOUT ||
|
|
dep_list[i].flag == KMP_DEP_MTX || dep_list[i].flag == KMP_DEP_SET);
|
|
for (int j = i + 1; j < ndeps; j++) {
|
|
if (dep_list[i].base_addr == dep_list[j].base_addr) {
|
|
if (dep_list[i].flag != dep_list[j].flag) {
|
|
// two different dependences on same address work identical to OUT
|
|
dep_list[i].flag = KMP_DEP_OUT;
|
|
}
|
|
dep_list[j].base_addr = 0; // Mark j element as void
|
|
}
|
|
}
|
|
if (dep_list[i].flag == KMP_DEP_MTX) {
|
|
// limit number of mtx deps to MAX_MTX_DEPS per node
|
|
if (n_mtxs < MAX_MTX_DEPS && task != NULL) {
|
|
++n_mtxs;
|
|
} else {
|
|
dep_list[i].flag = KMP_DEP_OUT; // downgrade mutexinoutset to inout
|
|
}
|
|
}
|
|
} else if (dep_list[i].flag == KMP_DEP_ALL ||
|
|
dep_list[i].base_addr == (kmp_intptr_t)KMP_SIZE_T_MAX) {
|
|
// omp_all_memory dependence can be marked by compiler by either
|
|
// (addr=0 && flag=0x80) (flag KMP_DEP_ALL), or (addr=-1).
|
|
// omp_all_memory overrides all other dependences if any
|
|
dep_all = 1;
|
|
break;
|
|
}
|
|
}
|
|
|
|
// doesn't need to be atomic as no other thread is going to be accessing this
|
|
// node just yet.
|
|
// npredecessors is set -1 to ensure that none of the releasing tasks queues
|
|
// this task before we have finished processing all the dependences
|
|
node->dn.npredecessors = -1;
|
|
|
|
// used to pack all npredecessors additions into a single atomic operation at
|
|
// the end
|
|
int npredecessors;
|
|
|
|
if (!dep_all) { // regular dependences
|
|
npredecessors = __kmp_process_deps<true>(gtid, node, hash, dep_barrier,
|
|
ndeps, dep_list, task);
|
|
npredecessors += __kmp_process_deps<false>(
|
|
gtid, node, hash, dep_barrier, ndeps_noalias, noalias_dep_list, task);
|
|
} else { // omp_all_memory dependence
|
|
npredecessors = __kmp_process_dep_all(gtid, node, *hash, dep_barrier, task);
|
|
}
|
|
|
|
node->dn.task = task;
|
|
KMP_MB();
|
|
|
|
// Account for our initial fake value
|
|
npredecessors++;
|
|
|
|
// Update predecessors and obtain current value to check if there are still
|
|
// any outstanding dependences (some tasks may have finished while we
|
|
// processed the dependences)
|
|
npredecessors =
|
|
node->dn.npredecessors.fetch_add(npredecessors) + npredecessors;
|
|
|
|
KA_TRACE(20, ("__kmp_check_deps: T#%d found %d predecessors for task %p \n",
|
|
gtid, npredecessors, taskdata));
|
|
|
|
// beyond this point the task could be queued (and executed) by a releasing
|
|
// task...
|
|
return npredecessors > 0 ? true : false;
|
|
}
|
|
|
|
/*!
|
|
@ingroup TASKING
|
|
@param loc_ref location of the original task directive
|
|
@param gtid Global Thread ID of encountering thread
|
|
@param new_task task thunk allocated by __kmp_omp_task_alloc() for the ''new
|
|
task''
|
|
@param ndeps Number of depend items with possible aliasing
|
|
@param dep_list List of depend items with possible aliasing
|
|
@param ndeps_noalias Number of depend items with no aliasing
|
|
@param noalias_dep_list List of depend items with no aliasing
|
|
|
|
@return Returns either TASK_CURRENT_NOT_QUEUED if the current task was not
|
|
suspended and queued, or TASK_CURRENT_QUEUED if it was suspended and queued
|
|
|
|
Schedule a non-thread-switchable task with dependences for execution
|
|
*/
|
|
kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32 gtid,
|
|
kmp_task_t *new_task, kmp_int32 ndeps,
|
|
kmp_depend_info_t *dep_list,
|
|
kmp_int32 ndeps_noalias,
|
|
kmp_depend_info_t *noalias_dep_list) {
|
|
|
|
kmp_taskdata_t *new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
|
|
KA_TRACE(10, ("__kmpc_omp_task_with_deps(enter): T#%d loc=%p task=%p\n", gtid,
|
|
loc_ref, new_taskdata));
|
|
__kmp_assert_valid_gtid(gtid);
|
|
kmp_info_t *thread = __kmp_threads[gtid];
|
|
kmp_taskdata_t *current_task = thread->th.th_current_task;
|
|
|
|
#if OMPT_SUPPORT
|
|
if (ompt_enabled.enabled) {
|
|
if (!current_task->ompt_task_info.frame.enter_frame.ptr)
|
|
current_task->ompt_task_info.frame.enter_frame.ptr =
|
|
OMPT_GET_FRAME_ADDRESS(0);
|
|
if (ompt_enabled.ompt_callback_task_create) {
|
|
ompt_callbacks.ompt_callback(ompt_callback_task_create)(
|
|
&(current_task->ompt_task_info.task_data),
|
|
&(current_task->ompt_task_info.frame),
|
|
&(new_taskdata->ompt_task_info.task_data),
|
|
ompt_task_explicit | TASK_TYPE_DETAILS_FORMAT(new_taskdata), 1,
|
|
OMPT_LOAD_OR_GET_RETURN_ADDRESS(gtid));
|
|
}
|
|
|
|
new_taskdata->ompt_task_info.frame.enter_frame.ptr =
|
|
OMPT_GET_FRAME_ADDRESS(0);
|
|
}
|
|
|
|
#if OMPT_OPTIONAL
|
|
/* OMPT grab all dependences if requested by the tool */
|
|
if (ndeps + ndeps_noalias > 0 && ompt_enabled.ompt_callback_dependences) {
|
|
kmp_int32 i;
|
|
|
|
int ompt_ndeps = ndeps + ndeps_noalias;
|
|
ompt_dependence_t *ompt_deps = (ompt_dependence_t *)KMP_OMPT_DEPS_ALLOC(
|
|
thread, (ndeps + ndeps_noalias) * sizeof(ompt_dependence_t));
|
|
|
|
KMP_ASSERT(ompt_deps != NULL);
|
|
|
|
for (i = 0; i < ndeps; i++) {
|
|
ompt_deps[i].variable.ptr = (void *)dep_list[i].base_addr;
|
|
if (dep_list[i].flags.in && dep_list[i].flags.out)
|
|
ompt_deps[i].dependence_type = ompt_dependence_type_inout;
|
|
else if (dep_list[i].flags.out)
|
|
ompt_deps[i].dependence_type = ompt_dependence_type_out;
|
|
else if (dep_list[i].flags.in)
|
|
ompt_deps[i].dependence_type = ompt_dependence_type_in;
|
|
else if (dep_list[i].flags.mtx)
|
|
ompt_deps[i].dependence_type = ompt_dependence_type_mutexinoutset;
|
|
else if (dep_list[i].flags.set)
|
|
ompt_deps[i].dependence_type = ompt_dependence_type_inoutset;
|
|
}
|
|
for (i = 0; i < ndeps_noalias; i++) {
|
|
ompt_deps[ndeps + i].variable.ptr = (void *)noalias_dep_list[i].base_addr;
|
|
if (noalias_dep_list[i].flags.in && noalias_dep_list[i].flags.out)
|
|
ompt_deps[ndeps + i].dependence_type = ompt_dependence_type_inout;
|
|
else if (noalias_dep_list[i].flags.out)
|
|
ompt_deps[ndeps + i].dependence_type = ompt_dependence_type_out;
|
|
else if (noalias_dep_list[i].flags.in)
|
|
ompt_deps[ndeps + i].dependence_type = ompt_dependence_type_in;
|
|
else if (noalias_dep_list[i].flags.mtx)
|
|
ompt_deps[ndeps + i].dependence_type =
|
|
ompt_dependence_type_mutexinoutset;
|
|
else if (noalias_dep_list[i].flags.set)
|
|
ompt_deps[ndeps + i].dependence_type = ompt_dependence_type_inoutset;
|
|
}
|
|
ompt_callbacks.ompt_callback(ompt_callback_dependences)(
|
|
&(new_taskdata->ompt_task_info.task_data), ompt_deps, ompt_ndeps);
|
|
/* We can now free the allocated memory for the dependences */
|
|
/* For OMPD we might want to delay the free until end of this function */
|
|
KMP_OMPT_DEPS_FREE(thread, ompt_deps);
|
|
}
|
|
#endif /* OMPT_OPTIONAL */
|
|
#endif /* OMPT_SUPPORT */
|
|
|
|
bool serial = current_task->td_flags.team_serial ||
|
|
current_task->td_flags.tasking_ser ||
|
|
current_task->td_flags.final;
|
|
kmp_task_team_t *task_team = thread->th.th_task_team;
|
|
serial = serial &&
|
|
!(task_team && (task_team->tt.tt_found_proxy_tasks ||
|
|
task_team->tt.tt_hidden_helper_task_encountered));
|
|
|
|
if (!serial && (ndeps > 0 || ndeps_noalias > 0)) {
|
|
/* if no dependences have been tracked yet, create the dependence hash */
|
|
if (current_task->td_dephash == NULL)
|
|
current_task->td_dephash = __kmp_dephash_create(thread, current_task);
|
|
|
|
#if USE_FAST_MEMORY
|
|
kmp_depnode_t *node =
|
|
(kmp_depnode_t *)__kmp_fast_allocate(thread, sizeof(kmp_depnode_t));
|
|
#else
|
|
kmp_depnode_t *node =
|
|
(kmp_depnode_t *)__kmp_thread_malloc(thread, sizeof(kmp_depnode_t));
|
|
#endif
|
|
|
|
__kmp_init_node(node);
|
|
new_taskdata->td_depnode = node;
|
|
|
|
if (__kmp_check_deps(gtid, node, new_task, ¤t_task->td_dephash,
|
|
NO_DEP_BARRIER, ndeps, dep_list, ndeps_noalias,
|
|
noalias_dep_list)) {
|
|
KA_TRACE(10, ("__kmpc_omp_task_with_deps(exit): T#%d task had blocking "
|
|
"dependences: "
|
|
"loc=%p task=%p, return: TASK_CURRENT_NOT_QUEUED\n",
|
|
gtid, loc_ref, new_taskdata));
|
|
#if OMPT_SUPPORT
|
|
if (ompt_enabled.enabled) {
|
|
current_task->ompt_task_info.frame.enter_frame = ompt_data_none;
|
|
}
|
|
#endif
|
|
return TASK_CURRENT_NOT_QUEUED;
|
|
}
|
|
} else {
|
|
KA_TRACE(10, ("__kmpc_omp_task_with_deps(exit): T#%d ignored dependences "
|
|
"for task (serialized) loc=%p task=%p\n",
|
|
gtid, loc_ref, new_taskdata));
|
|
}
|
|
|
|
KA_TRACE(10, ("__kmpc_omp_task_with_deps(exit): T#%d task had no blocking "
|
|
"dependences : "
|
|
"loc=%p task=%p, transferring to __kmp_omp_task\n",
|
|
gtid, loc_ref, new_taskdata));
|
|
|
|
kmp_int32 ret = __kmp_omp_task(gtid, new_task, true);
|
|
#if OMPT_SUPPORT
|
|
if (ompt_enabled.enabled) {
|
|
current_task->ompt_task_info.frame.enter_frame = ompt_data_none;
|
|
}
|
|
#endif
|
|
return ret;
|
|
}
|
|
|
|
#if OMPT_SUPPORT
|
|
void __ompt_taskwait_dep_finish(kmp_taskdata_t *current_task,
|
|
ompt_data_t *taskwait_task_data) {
|
|
if (ompt_enabled.ompt_callback_task_schedule) {
|
|
ompt_callbacks.ompt_callback(ompt_callback_task_schedule)(
|
|
taskwait_task_data, ompt_taskwait_complete, NULL);
|
|
}
|
|
current_task->ompt_task_info.frame.enter_frame.ptr = NULL;
|
|
*taskwait_task_data = ompt_data_none;
|
|
}
|
|
#endif /* OMPT_SUPPORT */
|
|
|
|
/*!
|
|
@ingroup TASKING
|
|
@param loc_ref location of the original task directive
|
|
@param gtid Global Thread ID of encountering thread
|
|
@param ndeps Number of depend items with possible aliasing
|
|
@param dep_list List of depend items with possible aliasing
|
|
@param ndeps_noalias Number of depend items with no aliasing
|
|
@param noalias_dep_list List of depend items with no aliasing
|
|
|
|
Blocks the current task until all specifies dependences have been fulfilled.
|
|
*/
|
|
void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32 gtid, kmp_int32 ndeps,
|
|
kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias,
|
|
kmp_depend_info_t *noalias_dep_list) {
|
|
KA_TRACE(10, ("__kmpc_omp_wait_deps(enter): T#%d loc=%p\n", gtid, loc_ref));
|
|
|
|
if (ndeps == 0 && ndeps_noalias == 0) {
|
|
KA_TRACE(10, ("__kmpc_omp_wait_deps(exit): T#%d has no dependences to "
|
|
"wait upon : loc=%p\n",
|
|
gtid, loc_ref));
|
|
return;
|
|
}
|
|
__kmp_assert_valid_gtid(gtid);
|
|
kmp_info_t *thread = __kmp_threads[gtid];
|
|
kmp_taskdata_t *current_task = thread->th.th_current_task;
|
|
|
|
#if OMPT_SUPPORT
|
|
// this function represents a taskwait construct with depend clause
|
|
// We signal 4 events:
|
|
// - creation of the taskwait task
|
|
// - dependences of the taskwait task
|
|
// - schedule and finish of the taskwait task
|
|
ompt_data_t *taskwait_task_data = &thread->th.ompt_thread_info.task_data;
|
|
KMP_ASSERT(taskwait_task_data->ptr == NULL);
|
|
if (ompt_enabled.enabled) {
|
|
if (!current_task->ompt_task_info.frame.enter_frame.ptr)
|
|
current_task->ompt_task_info.frame.enter_frame.ptr =
|
|
OMPT_GET_FRAME_ADDRESS(0);
|
|
if (ompt_enabled.ompt_callback_task_create) {
|
|
ompt_callbacks.ompt_callback(ompt_callback_task_create)(
|
|
&(current_task->ompt_task_info.task_data),
|
|
&(current_task->ompt_task_info.frame), taskwait_task_data,
|
|
ompt_task_taskwait | ompt_task_undeferred | ompt_task_mergeable, 1,
|
|
OMPT_LOAD_OR_GET_RETURN_ADDRESS(gtid));
|
|
}
|
|
}
|
|
|
|
#if OMPT_OPTIONAL
|
|
/* OMPT grab all dependences if requested by the tool */
|
|
if (ndeps + ndeps_noalias > 0 && ompt_enabled.ompt_callback_dependences) {
|
|
kmp_int32 i;
|
|
|
|
int ompt_ndeps = ndeps + ndeps_noalias;
|
|
ompt_dependence_t *ompt_deps = (ompt_dependence_t *)KMP_OMPT_DEPS_ALLOC(
|
|
thread, (ndeps + ndeps_noalias) * sizeof(ompt_dependence_t));
|
|
|
|
KMP_ASSERT(ompt_deps != NULL);
|
|
|
|
for (i = 0; i < ndeps; i++) {
|
|
ompt_deps[i].variable.ptr = (void *)dep_list[i].base_addr;
|
|
if (dep_list[i].flags.in && dep_list[i].flags.out)
|
|
ompt_deps[i].dependence_type = ompt_dependence_type_inout;
|
|
else if (dep_list[i].flags.out)
|
|
ompt_deps[i].dependence_type = ompt_dependence_type_out;
|
|
else if (dep_list[i].flags.in)
|
|
ompt_deps[i].dependence_type = ompt_dependence_type_in;
|
|
else if (dep_list[i].flags.mtx)
|
|
ompt_deps[ndeps + i].dependence_type =
|
|
ompt_dependence_type_mutexinoutset;
|
|
else if (dep_list[i].flags.set)
|
|
ompt_deps[ndeps + i].dependence_type = ompt_dependence_type_inoutset;
|
|
}
|
|
for (i = 0; i < ndeps_noalias; i++) {
|
|
ompt_deps[ndeps + i].variable.ptr = (void *)noalias_dep_list[i].base_addr;
|
|
if (noalias_dep_list[i].flags.in && noalias_dep_list[i].flags.out)
|
|
ompt_deps[ndeps + i].dependence_type = ompt_dependence_type_inout;
|
|
else if (noalias_dep_list[i].flags.out)
|
|
ompt_deps[ndeps + i].dependence_type = ompt_dependence_type_out;
|
|
else if (noalias_dep_list[i].flags.in)
|
|
ompt_deps[ndeps + i].dependence_type = ompt_dependence_type_in;
|
|
else if (noalias_dep_list[i].flags.mtx)
|
|
ompt_deps[ndeps + i].dependence_type =
|
|
ompt_dependence_type_mutexinoutset;
|
|
else if (noalias_dep_list[i].flags.set)
|
|
ompt_deps[ndeps + i].dependence_type = ompt_dependence_type_inoutset;
|
|
}
|
|
ompt_callbacks.ompt_callback(ompt_callback_dependences)(
|
|
taskwait_task_data, ompt_deps, ompt_ndeps);
|
|
/* We can now free the allocated memory for the dependences */
|
|
/* For OMPD we might want to delay the free until end of this function */
|
|
KMP_OMPT_DEPS_FREE(thread, ompt_deps);
|
|
ompt_deps = NULL;
|
|
}
|
|
#endif /* OMPT_OPTIONAL */
|
|
#endif /* OMPT_SUPPORT */
|
|
|
|
// We can return immediately as:
|
|
// - dependences are not computed in serial teams (except with proxy tasks)
|
|
// - if the dephash is not yet created it means we have nothing to wait for
|
|
bool ignore = current_task->td_flags.team_serial ||
|
|
current_task->td_flags.tasking_ser ||
|
|
current_task->td_flags.final;
|
|
ignore =
|
|
ignore && thread->th.th_task_team != NULL &&
|
|
thread->th.th_task_team->tt.tt_found_proxy_tasks == FALSE &&
|
|
thread->th.th_task_team->tt.tt_hidden_helper_task_encountered == FALSE;
|
|
ignore = ignore || current_task->td_dephash == NULL;
|
|
|
|
if (ignore) {
|
|
KA_TRACE(10, ("__kmpc_omp_wait_deps(exit): T#%d has no blocking "
|
|
"dependences : loc=%p\n",
|
|
gtid, loc_ref));
|
|
#if OMPT_SUPPORT
|
|
__ompt_taskwait_dep_finish(current_task, taskwait_task_data);
|
|
#endif /* OMPT_SUPPORT */
|
|
return;
|
|
}
|
|
|
|
kmp_depnode_t node = {0};
|
|
__kmp_init_node(&node);
|
|
|
|
if (!__kmp_check_deps(gtid, &node, NULL, ¤t_task->td_dephash,
|
|
DEP_BARRIER, ndeps, dep_list, ndeps_noalias,
|
|
noalias_dep_list)) {
|
|
KA_TRACE(10, ("__kmpc_omp_wait_deps(exit): T#%d has no blocking "
|
|
"dependences : loc=%p\n",
|
|
gtid, loc_ref));
|
|
#if OMPT_SUPPORT
|
|
__ompt_taskwait_dep_finish(current_task, taskwait_task_data);
|
|
#endif /* OMPT_SUPPORT */
|
|
return;
|
|
}
|
|
|
|
int thread_finished = FALSE;
|
|
kmp_flag_32<false, false> flag(
|
|
(std::atomic<kmp_uint32> *)&node.dn.npredecessors, 0U);
|
|
while (node.dn.npredecessors > 0) {
|
|
flag.execute_tasks(thread, gtid, FALSE,
|
|
&thread_finished USE_ITT_BUILD_ARG(NULL),
|
|
__kmp_task_stealing_constraint);
|
|
}
|
|
|
|
#if OMPT_SUPPORT
|
|
__ompt_taskwait_dep_finish(current_task, taskwait_task_data);
|
|
#endif /* OMPT_SUPPORT */
|
|
KA_TRACE(10, ("__kmpc_omp_wait_deps(exit): T#%d finished waiting : loc=%p\n",
|
|
gtid, loc_ref));
|
|
}
|