/* __ompc_add_task_to_pool_default: * Adds a task to the task pool. The task will be added to the current * thread's queue. */ int __ompc_add_task_to_pool_default(omp_task_pool_t *pool, omp_task_t *task) { int success; int myid = __omp_myid; omp_task_queue_level_t *per_thread; Is_True(pool != NULL, ("__ompc_add_task_to_pool: task pool is uninitialized")); Is_True(task != NULL, ("__ompc_add_task_to_pool: tried to add NULL task to pool")); /* num_pending_tasks track not just tasks entered into the task pool, but * also tasks marked as deferred that could not fit into the task pool */ if (__ompc_atomic_inc(&pool->num_pending_tasks) == 1) { pthread_mutex_lock(&pool->pool_lock); pthread_cond_broadcast(&pool->pool_cond); pthread_mutex_unlock(&pool->pool_lock); } per_thread = &pool->level[PER_THREAD]; if (__ompc_task_is_tied(task)) /* For tied tasks, we don't use the task_queue API. We explicitly put to * the tail */ success = __ompc_queue_put_tail( &per_thread->task_queue[TIED_IDX(myid)], task); else success = __ompc_task_queue_put( &per_thread->task_queue[UNTIED_IDX(myid)], task); return success; }
/* __ompc_init_task_pool_simple: * Initializes a task pool, for which tasks may be added and taken. The task * pool will be single-level, with 1 task queue allotted per thread. */ omp_task_pool_t * __ompc_create_task_pool_simple(int team_size) { int i; omp_task_pool_t *new_pool; omp_task_queue_level_t *per_thread; new_pool = (omp_task_pool_t *) aligned_malloc(sizeof(omp_task_pool_t), CACHE_LINE_SIZE); Is_True(new_pool != NULL, ("__ompc_create_task_pool: couldn't malloc new_pool")); new_pool->team_size = team_size; new_pool->num_levels = 1; new_pool->num_pending_tasks = 0; new_pool->level = aligned_malloc(sizeof(omp_task_queue_level_t), CACHE_LINE_SIZE); pthread_mutex_init(&(new_pool->pool_lock), NULL); pthread_cond_init(&(new_pool->pool_cond), NULL); Is_True(new_pool->level != NULL, ("__ompc_create_task_pool: couldn't malloc level")); per_thread = &new_pool->level[PER_THREAD]; per_thread->num_queues = team_size; per_thread->task_queue = aligned_malloc(sizeof(omp_queue_t) * team_size, CACHE_LINE_SIZE); Is_True(per_thread->task_queue != NULL, ("__ompc_create_task_pool: couldn't malloc per-thread task queue")); for (i = 0; i < team_size; i++) __ompc_queue_init(&per_thread->task_queue[i], __omp_task_queue_num_slots); return new_pool; }
/* __ompc_add_task_to_pool_simple_2level: * Adds a task to the task pool. The task will be added to the current * thread's queue. */ int __ompc_add_task_to_pool_simple_2level(omp_task_pool_t *pool, omp_task_t *task) { int success; int myid = __omp_myid; omp_task_queue_level_t *per_thread; omp_task_queue_level_t *community; Is_True(pool != NULL, ("__ompc_add_task_to_pool: task pool is uninitialized")); Is_True(task != NULL, ("__ompc_add_task_to_pool: tried to add NULL task to pool")); success = 0; per_thread = &pool->level[PER_THREAD]; community = &pool->level[COMMUNITY]; /* num_pending_tasks track not just tasks entered into the task pool, but * also tasks marked as deferred that could not fit into the task pool */ if (__ompc_atomic_inc(&pool->num_pending_tasks) == 1) { pthread_mutex_lock(&pool->pool_lock); pthread_cond_broadcast(&pool->pool_cond); pthread_mutex_unlock(&pool->pool_lock); } /* don't try to place it in per-thread queue if it looks to be full, because * we have the community queue to use instead */ if (!__ompc_task_queue_is_full(&per_thread->task_queue[myid])) success = __ompc_task_queue_put(&pool->level[PER_THREAD].task_queue[myid], task); if (!success) success = __ompc_task_queue_donate(pool->level[COMMUNITY].task_queue, task); return success; }
/* __ompc_list_add_slots * q: the queue to add slots to * num_slots: number of additional slots to allocate for queue * (not contiguous) */ static inline void __ompc_list_add_slots(omp_queue_t *q, int num_slots) { omp_queue_slot_t *new_slots, *tail, *head; unsigned int tail_index, head_index; int old_num_slots = q->num_slots; tail = q->tail; head = q->head; new_slots = aligned_malloc( sizeof(omp_queue_slot_t) * num_slots, CACHE_LINE_SIZE); Is_True(new_slots != NULL, ("couldn't resize the queue")); /* think about if we can avoid this initialization */ memset(new_slots, 0, num_slots*sizeof(omp_queue_slot_t)); /* link in the newly allocated slots */ if (tail->next) tail->next->prev = NULL; if (head->prev) head->prev->next = NULL; tail->next = new_slots; new_slots[0].prev = tail; head->prev = &new_slots[num_slots-1]; new_slots[num_slots-1].next = head; q->num_slots = old_num_slots + num_slots; }
/* ==================================================================== * Copy_option * underlying routine to copy odesc's to and from memory * returns number of bytes copied * ====================================================================*/ static INT Copy_option(OPTION_DESC *odesc, char *container, BOOL save) { void *var = ODESC_variable(odesc); size_t sz = 0; Is_True(ODESC_can_change_by_pragma(odesc), ("Copy_option, trying to copy option that cannot change")); switch (ODESC_kind(odesc)) { case OVK_NONE: case OVK_BOOL: sz = sizeof(BOOL); break; case OVK_INT32: case OVK_UINT32: sz = sizeof(INT32); break; case OVK_INT64: case OVK_UINT64: sz = sizeof(INT64); case OVK_NAME: case OVK_SELF: case OVK_LIST: sz = sizeof(void *); } if (sz > 0) { if (save) memcpy(container, var, sz); else memcpy(var, container, sz); } return (sz); }
int __ompc_queue_dyn_array_put_head(omp_queue_t *q, omp_queue_item_t item) { unsigned int head_index; unsigned int num_slots; Is_True(q != NULL, ("tried to put to head on NULL queue")); __ompc_lock(&q->lock1); if (__ompc_queue_array_is_full(q)) { __ompc_dyn_array_resize(q, 2*q->num_slots); } head_index = q->head_index; num_slots = q->num_slots; q->slots[head_index].item = item; q->head_index = head_index ? (head_index - 1) % num_slots : num_slots-1; ++q->used_slots; q->is_empty = 0; __ompc_unlock(&q->lock1); return 1; }
int __ompc_queue_cfifo_dyn_array_put(omp_queue_t *q, omp_queue_item_t item) { unsigned int new_tail_index; unsigned int head_index; Is_True(q != NULL, ("tried to put to tail on NULL queue")); head_index = q->head_index; __ompc_lock(&q->lock2); new_tail_index = (q->tail_index + 1) % q->num_slots; if (new_tail_index == head_index) { /* lock 2 must be acquired after lock 1 to prevent potential deadlock with * __ompc_queue_cfifo_array_transfer_chunk_to_empty routine */ __ompc_unlock(&q->lock2); __ompc_lock(&q->lock1); __ompc_lock(&q->lock2); new_tail_index = (q->tail_index + 1) % q->num_slots; if (new_tail_index == head_index) { __ompc_dyn_array_resize(q, 2*q->num_slots); new_tail_index = (q->tail_index + 1) % q->num_slots; } __ompc_unlock(&q->lock1); } q->slots[new_tail_index].item = item; q->tail_index = new_tail_index; q->is_empty = 0; __ompc_unlock(&q->lock2); return 1; }
/* __ompc_expand_task_pool_default * Expand the task pool for a new team size. Simply a matter of add an extra * task queue per extra thread. */ omp_task_pool_t * __ompc_expand_task_pool_default(omp_task_pool_t *pool, int new_team_size) { int i; int old_team_size; omp_task_queue_level_t *per_thread; if (pool == NULL) return __ompc_create_task_pool(new_team_size); per_thread = &pool->level[PER_THREAD]; old_team_size = pool->team_size; per_thread->num_queues = new_team_size * 2; per_thread->task_queue = aligned_realloc( (void *) per_thread->task_queue, sizeof(omp_queue_t) * old_team_size * 2, sizeof(omp_queue_t) * new_team_size * 2, CACHE_LINE_SIZE); Is_True(per_thread->task_queue != NULL, ("__ompc_expand_task_pool: couldn't expand the task pool")); for (i = old_team_size; i < new_team_size; i++) { __ompc_queue_init(&per_thread->task_queue[TIED_IDX(i)], __omp_task_queue_num_slots); __ompc_queue_init(&per_thread->task_queue[UNTIED_IDX(i)], __omp_task_queue_num_slots); } return pool; }
/* ==================================================================== * * TI_Initialize * * See interface description * * ==================================================================== */ void TI_Initialize(ABI_PROPERTIES_ABI tabi, ISA_SUBSET tisa, PROCESSOR tproc) { static BOOL initialized; if ( !initialized ) { #ifndef TARG_NVISA /* no scheduling info for NVISA */ INT i; BOOL found_targ = FALSE; const char *targ_name = PROCESSOR_Name(tproc); for (i = 0; i < (sizeof(si_machines) / sizeof(si_machines[0])); i++) { if (strcmp(targ_name, si_machines[i].name) == 0) { si_current_machine = i; found_targ = TRUE; break; } } Is_True(found_targ, ("Scheduling info missing for target %s", targ_name)); #endif ISA_SUBSET_Value = tisa; PROCESSOR_Value = tproc; ABI_PROPERTIES_ABI_Value = tabi; ABI_PROPERTIES_Initialize(); ISA_HAZARD_Initialize(); ISA_REGISTER_Initialize(); initialized = TRUE; } }
omp_queue_item_t __ompc_queue_lockless_get_tail(omp_queue_t *q) { unsigned int tail_index, head_index, num_slots; int used_slots; omp_queue_item_t item; Is_True(q != NULL, ("tried to get tail from NULL queue")); tail_index = q->tail_index; tail_index--; q->tail_index = tail_index; __ompc_mfence(); head_index = q->head_index; num_slots = q->num_slots; used_slots = tail_index - head_index; if (used_slots < 0) { q->tail_index = q->head_index; return NULL; } item = q->slots[tail_index % num_slots].item; if (used_slots > 0) { return item; } if (!__ompc_cas(&q->head_index, head_index, head_index + 1)) item = NULL; __ompc_mfence(); q->tail_index = q->head_index; return item; }
omp_queue_item_t __ompc_queue_cfifo_array_get(omp_queue_t *q) { unsigned int new_head_index, tail_index; omp_queue_item_t item; Is_True(q != NULL, ("tried to get head from NULL queue")); __ompc_lock(&q->lock1); if (__ompc_queue_is_empty(q)) { /* queue is empty */ __ompc_unlock(&q->lock1); return NULL; } new_head_index = (q->head_index + 1) % q->num_slots; q->head_index = new_head_index; item = q->slots[q->head_index].item; /* only acquire the lock for setting is_empty if it looks like the queue is * actually empty */ if (new_head_index == q->tail_index) { __ompc_lock(&q->lock2); if (new_head_index == q->tail_index) { q->is_empty = 1; } __ompc_unlock(&q->lock2); } __ompc_unlock(&q->lock1); return item; }
/* __ompc_init_task_pool_simple_2level: * Initializes a task pool, for which tasks may be added and taken. */ omp_task_pool_t * __ompc_create_task_pool_simple_2level(int team_size) { int i; omp_task_pool_t *new_pool; omp_task_queue_level_t *per_thread; omp_task_queue_level_t *community; new_pool = (omp_task_pool_t *) aligned_malloc(sizeof(omp_task_pool_t), CACHE_LINE_SIZE); Is_True(new_pool != NULL, ("__ompc_create_task_pool: couldn't malloc new_pool")); new_pool->team_size = team_size; new_pool->num_levels = 2; new_pool->num_pending_tasks = 0; new_pool->level = aligned_malloc(sizeof(omp_task_queue_level_t)*2, CACHE_LINE_SIZE); pthread_mutex_init(&(new_pool->pool_lock), NULL); pthread_cond_init(&(new_pool->pool_cond), NULL); Is_True(new_pool->level != NULL, ("__ompc_create_task_pool: couldn't malloc level")); per_thread = &new_pool->level[PER_THREAD]; community = &new_pool->level[COMMUNITY]; per_thread->num_queues = team_size; per_thread->task_queue = aligned_malloc(sizeof(omp_queue_t) * team_size, CACHE_LINE_SIZE); community->num_queues = 1; community->task_queue = aligned_malloc(sizeof(omp_queue_t), CACHE_LINE_SIZE); Is_True(per_thread->task_queue != NULL, ("__ompc_create_task_pool: couldn't malloc per-thread task queues")); Is_True(community->task_queue != NULL, ("__ompc_create_task_pool: couldn't malloc community task queue")); for (i = 0; i < team_size; i++) __ompc_queue_init(&per_thread->task_queue[i], __omp_task_queue_num_slots); /* what's a good size for the community queue, as a function of the local queue * sizes and the team size? Just going to make it 2 * local queue size for * now. */ __ompc_queue_init(community->task_queue, __omp_task_queue_num_slots*2); return new_pool; }
omp_queue_item_t __ompc_queue_list_steal_tail(omp_queue_t *q) { unsigned int tail_index, new_tail_index; omp_queue_slot_t *tail, new_tail; unsigned int num_slots; omp_queue_item_t item; Is_True(q != NULL, ("tried to get tail from NULL queue")); if (__ompc_queue_is_empty(q)) { return NULL; } if (__ompc_test_lock(&q->lock1) == 0) return NULL; if (__ompc_queue_is_empty(q)) { __ompc_unlock(&q->lock1); return NULL; } num_slots = q->num_slots; tail = q->tail; item = tail->item; if (tail->prev) { q->tail = tail->prev; } else { q->tail = tail - 1; /* previous slot in memory */ } if (--q->used_slots == 0) q->is_empty = 1; if (q->used_slots != 0) { Is_True(q->head != q->tail, ("queue overflow")); } __ompc_unlock(&q->lock1); return item; }
omp_queue_item_t __ompc_queue_list_steal_head(omp_queue_t *q) { unsigned int head_index, new_head_index; omp_queue_slot_t *head, *new_head; omp_queue_item_t item; Is_True(q != NULL, ("tried to get head from NULL queue")); if (__ompc_queue_is_empty(q)) { return NULL; } if (__ompc_test_lock(&q->lock1) == 0) return NULL; if (__ompc_queue_is_empty(q)) { __ompc_unlock(&q->lock1); return NULL; } head = q->head; if (head->next) { new_head = head->next; } else { new_head = head + 1; /* the next slot in memory */ } item = new_head->item; q->head = new_head; if (--q->used_slots == 0) q->is_empty = 1; if (q->used_slots != 0) { Is_True(q->head != q->tail, ("queue overflow")); } __ompc_unlock(&q->lock1); return item; }
void __ompc_queue_lockless_init(omp_queue_t * q, int num_slots) { q->slots = aligned_malloc( num_slots * sizeof(omp_queue_slot_t), CACHE_LINE_SIZE); Is_True(q->slots != NULL, ("__ompc_queue_init: couldn't malloc slots for queue")); memset(q->slots, 0, num_slots*sizeof(omp_queue_slot_t)); q->head = q->tail = q->slots; q->num_slots = num_slots; q->is_empty = 1; q->head_index = q->tail_index = q->used_slots = q->reject = 0; __ompc_init_lock(&q->lock1); __ompc_init_lock(&q->lock2); }
/* __ompc_add_task_to_pool_simple: * Adds a task to the task pool. The task will be added to the current * thread's queue. */ int __ompc_add_task_to_pool_simple(omp_task_pool_t *pool, omp_task_t *task) { int success; int myid = __omp_myid; Is_True(pool != NULL, ("__ompc_add_task_to_pool: task pool is uninitialized")); Is_True(task != NULL, ("__ompc_add_task_to_pool: tried to add NULL task to pool")); /* num_pending_tasks track not just tasks entered into the task pool, but * also tasks marked as deferred that could not fit into the task pool */ if (__ompc_atomic_inc(&pool->num_pending_tasks) == 1) { pthread_mutex_lock(&pool->pool_lock); pthread_cond_broadcast(&pool->pool_cond); pthread_mutex_unlock(&pool->pool_lock); } success = __ompc_task_queue_put(&pool->level[PER_THREAD].task_queue[myid], task); return success; }
omp_queue_item_t __ompc_queue_array_steal_tail(omp_queue_t *q) { unsigned int tail_index; unsigned int num_slots; omp_queue_item_t item; Is_True(q != NULL, ("tried to get tail from NULL queue")); if (__ompc_queue_is_empty(q)) { return NULL; } if (__ompc_test_lock(&q->lock1) == 0) return NULL; if (__ompc_queue_is_empty(q)) { __ompc_unlock(&q->lock1); return NULL; } tail_index = q->tail_index; num_slots = q->num_slots; item = q->slots[tail_index].item; q->tail_index = tail_index ? (tail_index - 1) % num_slots : num_slots-1; if (--q->used_slots == 0) q->is_empty = 1; if (q->used_slots != 0) { Is_True(q->head_index != q->tail_index, ("queue overflow")); } __ompc_unlock(&q->lock1); return item; }
int __ompc_queue_lockless_put_tail(omp_queue_t *q, omp_queue_item_t item) { unsigned int tail_index, num_slots; int used_slots; Is_True(q != NULL, ("tried to put to tail on NULL queue")); if (__ompc_queue_lockless_is_full(q)) { return 0; } tail_index = q->tail_index; num_slots = q->num_slots; q->slots[tail_index % num_slots].item = item; q->tail_index++; return 1; }
void ARY_Init_List ( LNK_LST_ARY *ary, INT32 n_elems ) { register INT32 i; register LNK_LST *lst; Is_True (n_elems >= 1, ("ARY_Init_List: attempt to allocate array of size %d", n_elems) ); if ((lst=(LNK_LST *)lnk_lst_malloc(sizeof(LNK_LST)*n_elems)) == NULL) ErrMsg ( EC_No_Mem, "ARY_Init_List" ); LST_lists(ary) = lst; ARY_LST_n_elems(ary) = n_elems; for (i=0; i<n_elems; ++i, ++lst) Init_List( lst ); }
/* __ompc_destroy_task_pool_simple: */ void __ompc_destroy_task_pool_simple(omp_task_pool_t *pool) { int i; omp_task_queue_level_t *per_thread; Is_True(pool != NULL, ("__ompc_destroy_task_pool; pool is NULL")); per_thread = &pool->level[PER_THREAD]; for (i = 0; i < pool->team_size; i++) { __ompc_queue_free_slots(&per_thread->task_queue[i]); } pthread_mutex_destroy(&pool->pool_lock); aligned_free(per_thread->task_queue); /* free queues in level 0 */ aligned_free(pool->level); /* free the level array */ aligned_free(pool); /* free the pool itself */ }
static LST_ITM * list_malloc ( void ) { MEM_PTR mem_block; BLK_LST_ITMS *blk; register LST_ITM *itm; register INT32 i; /* * The free list had better be empty. */ Is_True (list_items == NULL, ("list_malloc: free list is not empty")); if ( (mem_block = lnk_lst_malloc(M_BLOCK_SIZE)) == NULL ) ErrMsg ( EC_No_Mem, "list_malloc" ); /* * Link this block up with any previously allocated blocks of * list items. */ blk = (BLK_LST_ITMS *) mem_block; BLK_block(blk) = mem_block; /* it points to itself! */ BLK_next(blk) = block_item_hdr; block_item_hdr = blk; /* * Link (N_LIST_BLOCK-1) elements together and place them on the free * list, making sure the last one on the free list points to NULL. * Take the one remaining item, NULL out its pointer, and return it. */ list_items = (LST_ITM *) ++blk; for (itm=list_items, i=0; i<(N_LIST_BLOCK-2); ++i, ++itm) { LST_val(itm) = -1; LST_next(itm) = itm+1; } LST_next(itm) = NULL; /* "ground" the end of the free list */ ++itm; /* 'itm' now points to the one remaining item */ LST_next(itm) = NULL; return itm; }
int __ompc_queue_dyn_array_put_tail(omp_queue_t *q, omp_queue_item_t item) { Is_True(q != NULL, ("tried to put to tail on NULL queue")); __ompc_lock(&q->lock1); if (__ompc_queue_array_is_full(q)) { __ompc_dyn_array_resize(q, 2*q->num_slots); } q->tail_index = (q->tail_index + 1) % q->num_slots; q->slots[q->tail_index].item = item; ++q->used_slots; q->is_empty = 0; __ompc_unlock(&q->lock1); return 1; }
static inline void __ompc_dyn_array_resize(omp_queue_t *q, int new_num_slots) { unsigned int old_tail_index = q->tail_index; unsigned int head_index = q->head_index; int old_num_slots = q->num_slots; q->head = q->tail = q->slots = aligned_realloc((void *) q->slots, sizeof(omp_queue_slot_t) * old_num_slots, sizeof(omp_queue_slot_t) * new_num_slots, CACHE_LINE_SIZE); Is_True(q->slots != NULL, ("couldn't resize the queue")); if (old_tail_index < head_index) { memcpy(&q->slots[old_num_slots], &q->slots[0], (old_tail_index+1)*sizeof(omp_queue_slot_t)); q->tail_index = old_tail_index + old_num_slots; } q->num_slots = new_num_slots; }
int __ompc_queue_cfifo_array_put(omp_queue_t *q, omp_queue_item_t item) { unsigned int new_tail_index; Is_True(q != NULL, ("tried to put to tail on NULL queue")); __ompc_lock(&q->lock2); new_tail_index = (q->tail_index + 1) % q->num_slots; if (new_tail_index == q->head_index) { /* queue is full */ __ompc_unlock(&q->lock2); return 0; } q->slots[new_tail_index].item = item; q->tail_index = new_tail_index; q->is_empty = 0; __ompc_unlock(&q->lock2); return 1; }
omp_queue_item_t __ompc_queue_lockless_get_head(omp_queue_t *q) { unsigned int head_index, new_head_index, num_slots, tail_index; int used_slots; omp_queue_item_t item; Is_True(q != NULL, ("tried to get head from NULL queue")); head_index = q->head_index; num_slots = q->num_slots; tail_index = q->tail_index; item = q->slots[head_index % num_slots].item; used_slots = tail_index - head_index; if (used_slots <= 0) { return NULL; } if (__ompc_cas(&q->head_index, head_index, head_index+1)) { return item; } return NULL; }
void Add_Ordered_Item_Dupl ( LNK_LST *lst, tlst_val val ) { register LST_ITM *p, *last; register tlst_val this_val; if (LST_Empty(lst)) { LST_first(lst) = p = item_alloc(); LST_val(p) = val; incr_LST_len(lst); return; } p = LST_first(lst); this_val = LST_val(p); if ( val <= this_val ) { /* insert at beginning of the list */ register LST_ITM *new = item_alloc(); LST_next(new) = p; LST_first(lst) = new; LST_val(new) = val; incr_LST_len(lst); return; } last = p; for ( p=LST_next(p); p!=NULL; p=LST_next(p)) { #ifdef LNK_LST_CHECK Is_True ( this_val <= LST_val(p), ("ordered list not sorted: elems %d and %d",this_val,LST_val(p))); #endif /* LNK_LST_CHECK */ this_val = LST_val(p); if ( val <= this_val ) { /* insert here */ register LST_ITM *new = item_alloc(); LST_next(new) = p; LST_next(last) = new; LST_val(new) = val; incr_LST_len(lst); return; } last = p; } /* * If we get to here, we went through the list without finding a * matching item, and all items in the list have values less than * the new value. Append a new item to the end of the list. */ LST_next(last) = p = item_alloc(); LST_val(p) = val; incr_LST_len(lst); /* * If the pointer to the next item in the list is NULL, i.e. when * stepping through the list the end was reached, then make the next * item be this new item. */ if (LST_nxt(lst) == NULL) LST_nxt(lst) = p; }
/* __ompc_remove_task_from_pool_default: *Takes task from the task pool. * * Takes a task from the task pool. First tries to get a task from the current * thread's task queue. If that doesn't work, then it will attempt to steal a * task from another task queue (so long as there are no other tasks, not in a * barrier, that are tied to the current thread). */ omp_task_t *__ompc_remove_task_from_pool_default(omp_task_pool_t *pool) { omp_task_t *task, *current_task; omp_team_t *team; omp_v_thread_t *current_thread; omp_queue_t *my_queue; omp_queue_t *victim_queue; omp_task_queue_level_t *per_thread; int myid = __omp_myid; Is_True(pool != NULL, ("__ompc_remove_task_from_pool: task pool is uninitialized")); current_task = __omp_current_task; current_thread = __omp_current_v_thread; per_thread = &pool->level[PER_THREAD]; /* We get only from the tail for tied tasks. This is necessary to guarantee * that tied tasks are only scheduled if they are descendants of every * suspended tied task not at a barrier */ task = __ompc_queue_get_tail(&per_thread->task_queue[TIED_IDX(myid)]); /* for untied tasks, we can get from the head or tail, depending on what * O64_OMP_TASK_QUEUE is set to */ if (task == NULL) task = __ompc_task_queue_get(&per_thread->task_queue[UNTIED_IDX(myid)]); /* check if there are any untied tasks available in the other task queues */ if (task == NULL) { int first_victim, victim = 0; int team_size = pool->team_size; if (team_size < 2) return NULL; victim = (rand_r(&__omp_seed) % (team_size - 1)); if (victim >= myid) victim++; /* cycle through to find a queue with work to steal */ first_victim = victim; while (1) { while (__ompc_queue_lockless_is_empty( &per_thread->task_queue[UNTIED_IDX(victim)])) { victim++; if (victim == myid) victim++; if (victim == team_size) victim = 0; if (victim == first_victim) goto CHECK_TIED_TASK_QUEUES; } task = __ompc_task_queue_steal( &per_thread->task_queue[UNTIED_IDX(victim)]); if ( task != NULL ) { /* if (!__ompc_task_state_is_unscheduled(task)) { // Is_True(0, ("state of task from queue was not unscheduled")); printf("\n... (1) skipping over a task with state %s; queue size is %d \n", __ompc_task_get_state_string(task), __ompc_queue_num_used_slots(&per_thread->task_queue[UNTIED_IDX(victim)])); task = NULL; } */ return task; } } } /* if no task in local queue and no available untied tasks, we will look in * another queue so long as there are no suspended tasks tied to thread and * the current task is either in a barrier or its not tied */ CHECK_TIED_TASK_QUEUES: if (task == NULL && !current_thread->num_suspended_tied_tasks && (__ompc_task_state_is_in_barrier(current_task) || !__ompc_task_is_tied(current_task))) { int first_victim, victim = 0; int team_size = pool->team_size; victim = (rand_r(&__omp_seed) % (team_size - 1)); if (victim >= myid) victim++; /* cycle through to find a queue with work to steal */ first_victim = victim; while (1) { while (__ompc_queue_is_empty( &per_thread->task_queue[TIED_IDX(victim)])) { victim++; if (victim == myid) victim++; if (victim == team_size) victim = 0; if (victim == first_victim) return NULL; } /* Always steal from the head for tied tasks. Note also that by not * using the task_queue API, CFIFO implementation will not be used */ task = __ompc_queue_steal_head( &per_thread->task_queue[TIED_IDX(victim)]); if ( task != NULL ) { /* if (!__ompc_task_state_is_unscheduled(task)) { // Is_True(0, ("state of task from queue was not unscheduled")); printf("\n... (2) skipping over a task with state %s; queue size is %d \n", __ompc_task_get_state_string(task), __ompc_queue_num_used_slots(&per_thread->task_queue[TIED_IDX(victim)])); task = NULL; } */ return task; } } } /* if ( task != NULL ) { if (!__ompc_task_state_is_unscheduled(task)) { // Is_True(0, ("state of task from queue was not unscheduled")); printf("\n... (3) skipping over a task with state %s; queue size is %d \n", __ompc_task_get_state_string(task), __ompc_queue_num_used_slots(&per_thread->task_queue[UNTIED_IDX(myid)])); task = NULL; } } */ return task; }
void __ompc_task_create(omp_task_func taskfunc, void *frame_pointer, void *firstprivates, int may_delay, int is_tied, int blocks_parent) { int myid; omp_team_t *team; omp_task_t *current_task, *new_task, *orig_task; omp_v_thread_t *current_thread; current_task = __omp_current_task; if (__ompc_task_cutoff()) { //__omp_task_cutoffs++; orig_task = current_task; __omp_current_task = NULL; taskfunc(firstprivates, frame_pointer); __omp_current_task = orig_task; return; /* not reached */ } myid = __omp_myid; current_thread = __omp_current_v_thread; team = current_thread->team; #ifdef USE_COLLECTOR_TASK OMP_COLLECTOR_API_THR_STATE temp_state = (OMP_COLLECTOR_API_THR_STATE)current_thread->state; __ompc_set_state(THR_TASK_CREATE_STATE); __ompc_event_callback(OMP_EVENT_THR_BEGIN_CREATE_TASK); #ifndef OMPT int new_id = __ompc_atomic_inc(&team->collector_task_id); #endif #endif if (may_delay) { new_task = __ompc_task_new(); __ompc_task_set_function(new_task, taskfunc); __ompc_task_set_frame_pointer(new_task, frame_pointer); __ompc_task_set_firstprivates(new_task, firstprivates); new_task->creating_thread_id = myid; new_task->parent = current_task; new_task->depth = current_task->depth + 1; #ifdef USE_COLLECTOR_TASK #ifndef OMPT new_task->task_id = new_id; #endif __omp_collector_task = new_task; __ompc_event_callback(OMP_EVENT_THR_END_CREATE_TASK_DEL); __ompc_set_state(temp_state); #endif __ompc_task_set_flags(new_task, OMP_TASK_IS_DEFERRED); if (is_tied) __ompc_task_set_flags(new_task, OMP_TASK_IS_TIED); __ompc_atomic_inc(¤t_task->num_children); if (blocks_parent) { __ompc_task_set_flags(new_task, OMP_TASK_BLOCKS_PARENT); __ompc_atomic_inc(¤t_task->num_blocking_children); } #ifdef OMPT __ompt_event_callback(ompt_event_task_begin); #endif if (__ompc_add_task_to_pool(team->task_pool, new_task) == 0) { /* couldn't add to task pool, so execute it immediately */ __ompc_task_set_state(current_task, OMP_TASK_READY); __ompc_task_switch(new_task); __ompc_task_set_state(current_task, OMP_TASK_RUNNING); } } else { omp_task_t new_immediate_task; new_task = &new_immediate_task; memset(new_task, 0, sizeof(omp_task_t)); __ompc_task_set_function(new_task, taskfunc); __ompc_task_set_frame_pointer(new_task, frame_pointer); /* firstprivates will be NULL, so don't need to set it */ Is_True(firstprivates == NULL, ("firstprivates should always be NULL")); new_task->creating_thread_id = myid; new_task->parent = current_task; new_task->depth = current_task->depth + 1; #ifdef USE_COLLECTOR_TASK #ifndef OMPT new_task->task_id = new_id; #endif __omp_collector_task = new_task; __ompc_event_callback(OMP_EVENT_THR_END_CREATE_TASK_IMM); #endif if (is_tied) __ompc_task_set_flags(new_task, OMP_TASK_IS_TIED); #ifdef OMPT __ompt_event_callback(ompt_event_task_begin); #endif __ompc_task_set_state(current_task, OMP_TASK_READY); if (__ompc_task_is_tied(current_task)) { /* if current task is tied, it should not go back into task pool */ orig_task = current_task; ++(current_thread->num_suspended_tied_tasks); __omp_current_task = new_task; taskfunc(NULL, frame_pointer); __omp_current_task = orig_task; --(current_thread->num_suspended_tied_tasks); } else { /* if current task is untied, it can go back into task pool, but this * isn't currently supported. */ orig_task = current_task; __omp_current_task = new_task; taskfunc(NULL, frame_pointer); __omp_current_task = orig_task; } __ompc_task_set_state(current_task, OMP_TASK_RUNNING); } }
void __ompc_task_switch(omp_task_t *new_task) { omp_v_thread_t *current_thread = __omp_current_v_thread; omp_task_t *orig_task = __omp_current_task; __ompc_task_set_state(new_task, OMP_TASK_RUNNING); __omp_current_task = new_task; new_task->sdepth = orig_task->sdepth + 1; #ifdef OMPT __ompt_suspended_task_id = orig_task->task_id; __ompt_resumed_task_id = new_task->task_id; __ompt_event_callback(ompt_event_task_switch); #endif #ifdef USE_COLLECTOR_TASK __omp_collector_task = __omp_current_task; omp_v_thread_t *p_vthread = __ompc_get_v_thread_by_num( __omp_myid); OMP_COLLECTOR_API_THR_STATE temp_state = (OMP_COLLECTOR_API_THR_STATE)p_vthread->state; __ompc_ompt_set_state(THR_WORK_STATE, ompt_state_work_parallel, 0); __ompc_event_callback(OMP_EVENT_THR_BEGIN_EXEC_TASK); #endif #ifdef OMPT new_task->frame_s.exit_runtime_frame = __builtin_frame_address(0); if(__ompc_task_is_implicit(new_task)) __ompt_event_callback(ompt_event_implicit_task_begin); #endif if (__ompc_task_is_tied(orig_task) && !__ompc_task_state_is_in_barrier(orig_task)) { ++(current_thread->num_suspended_tied_tasks); new_task->t.func(new_task->firstprivates, new_task->frame_pointer); --(current_thread->num_suspended_tied_tasks); } else { new_task->t.func(new_task->firstprivates, new_task->frame_pointer); } #ifdef OMPT new_task->frame_s.reenter_runtime_frame = __builtin_frame_address(0); if(__ompc_task_is_implicit(new_task)) __ompt_event_callback(ompt_event_implicit_task_end); #endif #ifdef USE_COLLECTOR_TASK __ompc_set_state(temp_state); #endif Is_True(__ompc_task_state_is_exiting(new_task), ("__ompc_task_switch: task returned but not in EXITING state")); if (new_task->num_children == 0) { __ompc_task_delete(new_task); } else { __ompc_task_set_state(new_task, OMP_TASK_FINISHED); __ompc_unlock(&new_task->lock); } __omp_current_task = orig_task; #ifdef USE_COLLECTOR_TASK __omp_collector_task = __omp_current_task; #endif }
void __ompc_task_exit() { omp_task_flag_t flags; int myid; int num_siblings = 0; omp_team_t *team; omp_task_t *current_task, *next_task; omp_v_thread_t *current_thread; current_task = __omp_current_task; /* If no task object assigned for current task, we assume all descendants * were executed work-first order. */ if (current_task == NULL) return; #ifdef USE_COLLECTOR_TASK __omp_collector_task = current_task; // __ompc_ompt_set_state(THR_TASK_FINISH_STATE, ompt_state_work_parallel, 0); // __ompc_ompt_event_callback(OMP_EVENT_THR_BEGIN_FINISH_TASK, ompt_event_task_end); #endif current_thread = __omp_current_v_thread; team = current_thread->team; __ompc_task_set_state(current_task, OMP_TASK_EXITING); if (__ompc_task_is_deferred(current_task)) { Is_True(current_task->parent != NULL, ("deferred task should has a NULL parent")); __ompc_atomic_dec(&team->task_pool->num_pending_tasks); num_siblings = __ompc_atomic_dec(¤t_task->parent->num_children); } /* only try to free parent or put it back on queue if it was a deferred * task and it has no more children (since child tasks may attempt to update * num_children field of parent when they exit) */ if (current_task->parent && __ompc_task_is_deferred(current_task->parent) && num_siblings == 0 && __ompc_task_state_is_finished(current_task->parent)) { __ompc_task_delete(current_task->parent); } /* should not immediately return if descendant tasks may potentially still * need access to current call stack. instead, we look for other tasks to * execute from this point. */ while (current_task->num_blocking_children) { next_task = __ompc_remove_task_from_pool(team->task_pool); if (next_task != NULL) { __ompc_task_switch(next_task); } } #ifdef USE_COLLECTOR_TASK __ompc_set_state(THR_TASK_FINISH_STATE); #endif /* need to decrement num_blocking_children for parent if this is a deferred * task. We put it at the end, to ensure all blocking child tasks have first * completed. */ flags = OMP_TASK_IS_DEFERRED | OMP_TASK_BLOCKS_PARENT; if (__ompc_task_get_flags(current_task, flags) == flags) __ompc_atomic_dec(¤t_task->parent->num_blocking_children); #ifdef USE_COLLECTOR_TASK __omp_collector_task = current_task; __ompc_event_callback(OMP_EVENT_THR_END_FINISH_TASK); #endif }