/** Called when a worker spawns its first task to set its bot value so other * workers can steal tasks from it. */ static inline void set_bot(struct generic_task_desc * val) { trace_event(TRACE_SUBSYS_TWEED, TRACE_EVENT_TWEED_LOCKING, 0); struct worker_desc * tls = (struct worker_desc *) thread_get_tls(); LOCK(tls->lock); trace_event(TRACE_SUBSYS_TWEED, TRACE_EVENT_TWEED_LOCKING_END, 0); tls->bot = val; UNLOCK(tls->lock); }
/** Check if syncing task really is stolen */ int sync_stolen(struct generic_task_desc * _tweed_top_) { #ifndef TWEED_LOCK_FREE struct worker_desc * tls = (struct worker_desc *) thread_get_tls(); #endif LOCK(tls->lock); int ret = ((_tweed_top_->balarm & TWEED_TASK_STOLEN) != 0); UNLOCK(tls->lock); return ret; }
/** Initializes _tweed_top_ to start of this worker's task block */ struct generic_task_desc * set_top(void) { trace_event(TRACE_SUBSYS_TWEED, TRACE_EVENT_TWEED_LOCKING, 0); struct worker_desc * tls = (struct worker_desc *) thread_get_tls(); LOCK(tls->lock); trace_event(TRACE_SUBSYS_TWEED, TRACE_EVENT_TWEED_LOCKING_END, 0); tls->bot = workers[tls->id].task_desc_stack; UNLOCK(tls->lock); return workers[tls->id].task_desc_stack; }
/** Steal work from another worker's task stack */ static int steal(struct generic_task_desc * _tweed_top_, struct worker_desc * victim) { struct generic_task_desc * stolenTask; struct worker_desc * me = (struct worker_desc *) thread_get_tls(); LOCK(victim->lock); stolenTask = victim->bot; // check if there is actually work to steal if (stolenTask != NULL && stolenTask->balarm == TWEED_TASK_NEW) { // try to steal task tweed_task_func_t func = steal_task(stolenTask, me); if (func == NULL) { // we didn't succeed in the steal, back off #ifndef TWEED_USE_CAS stolenTask->balarm = TWEED_TASK_INLINED; stolenTask->thief = NULL; #endif UNLOCK(victim->lock); return 0; // didn't steal anything } else { // we have stolen the task, update bot atomic_inc(&(victim->bot), stolenTask->size); UNLOCK(victim->lock); // and run task trace_event(TRACE_SUBSYS_TWEED, TRACE_EVENT_TWEED_STEAL, victim->core_id); func(_tweed_top_, stolenTask); trace_event(TRACE_SUBSYS_TWEED, TRACE_EVENT_TWEED_STEAL_END, victim->core_id); // signal task completion stolenTask->balarm |= TWEED_TASK_COMPLETE; return 1; } } else { UNLOCK(victim->lock); return 0; // didn't steal anything } }
static void execute__rx(struct bomp_binding *_binding, uint64_t fn, uint64_t arg, uint32_t tid, uint64_t icv_task) { struct bomp_thread *t = _binding->st; struct bomp_tls *tls = thread_get_tls(); BOMP_DEBUG_THREAD("execute__rx: %p %p, %lx\n", t, tls, icv_task); assert(t == &tls->r.thread); struct omp_icv_task icvt; memcpy(&icvt, (void *)icv_task, sizeof(struct omp_icv_task)); bomp_icv_set_task(&icvt); tls->thread_id = tid; bomp_thread_fn_t func= (bomp_thread_fn_t)fn; // calling the function func((void *)arg); bomp_icv_set_task(NULL); tls->thread_id = -1; struct txq_msg_st *msg_st = txq_msg_st_alloc(&t->txq); if (msg_st == NULL) { BOMP_ERROR("allocation of message state failed: %" PRIu32 "\n", tid); return; } msg_st->send = done__tx; msg_st->err = SYS_ERR_OK; txq_send(msg_st); }
void bomp_end_processing(void) { debug_printf("bomp_end_processing\n"); struct bomp_tls *tls = thread_get_tls(); struct waitset *ws = get_default_waitset(); if (tls->role == BOMP_THREAD_ROLE_MASTER) { struct bomp_node *node = &tls->r.master.local; struct bomp_master *master = &tls->r.master; while(master->nodes_active != 1 || node->threads_active != 1) { event_dispatch(ws); } } else if (tls->role == BOMP_THREAD_ROLE_NODE) { struct bomp_node *node = &tls->r.node; while(node->threads_active != 0) { event_dispatch(ws); } } free(tls->icv.task); tls->icv.task = NULL; debug_printf("bomp_end_processing: done\n"); }
void bomp_start_processing(void (*fn)(void *), void *data, coreid_t tid_start, coreid_t nthreads) { struct bomp_tls *tls = thread_get_tls(); debug_printf("bomp_start_processing(%p, %p, %u, %u)\n", fn, data, tid_start, nthreads); /* this function must only be called by the program and node masters */ assert(tls->role == BOMP_THREAD_ROLE_MASTER || tls->role == BOMP_THREAD_ROLE_NODE); /* add one to the tid_start as this will be our ID */ coreid_t tid_current = tid_start + 1; struct bomp_node *node; if (tls->role == BOMP_THREAD_ROLE_MASTER) { node = &tls->r.master.local; if (nthreads > (node->threads_max + 1)) { /* send the requests to the node masters */ nthreads -= (node->threads_max + 1); for (nodeid_t i = 0; i < tls->r.master.num_nodes; ++i) { coreid_t num = bomp_node_exec(&tls->r.master.nodes[i], fn, data, tid_start, nthreads); assert(num <= nthreads); tls->r.master.nodes_active++; nthreads -= num; tid_current += num; if (nthreads == 0) { break; } } nthreads += (node->threads_max); } } else if (tls->role == BOMP_THREAD_ROLE_NODE) { node = &tls->r.node; } debug_printf("nthreads=%u, max_threads=%u\n", nthreads, node->threads_max); assert((node->threads_max + 1)>= nthreads); struct omp_icv_task *icv = bomp_icv_get()->task; for (coreid_t i = 1; i < nthreads; ++i) { node->threads[i].icvt = icv; node->threads_active++; bomp_thread_exec(&node->threads[i], fn, data, tid_current); tid_current++; } /* set the local thread ID */ tls->thread_id = 0; return; #if 0 /* Create Threads and ask them to process the function specified */ /* Let them die as soon as they are done */ unsigned i; struct bomp_work *xdata; struct bomp_barrier *barrier; g_bomp_state->num_threads = nthreads; char *memory = calloc( 1, nthreads * sizeof(struct bomp_thread_local_data *) + sizeof(struct bomp_barrier) + nthreads * sizeof(struct bomp_work)); assert(memory != NULL); g_bomp_state->tld = (struct bomp_thread_local_data **) memory; memory += nthreads * sizeof(struct bomp_thread_local_data *); /* Create a barier for the work that will be carried out by the threads */ barrier = (struct bomp_barrier *) memory; memory += sizeof(struct bomp_barrier); bomp_barrier_init(barrier, nthreads); /* For main thread */ xdata = (struct bomp_work *) memory; memory += sizeof(struct bomp_work); xdata->fn = fn; xdata->data = data; xdata->thread_id = 0; xdata->barrier = barrier; bomp_set_tls(xdata); for (i = 1; i < nthreads; i++) { xdata = (struct bomp_work *) memory; memory += sizeof(struct bomp_work); xdata->fn = fn; xdata->data = data; xdata->thread_id = i; xdata->barrier = barrier; /* Create threads */ bomp_run_on(i * BOMP_DEFAULT_CORE_STRIDE + THREAD_OFFSET, bomp_thread_fn, xdata); } #endif }