/* The data must be released by calling starpu_data_release later on */ int starpu_data_acquire_cb(starpu_data_handle handle, starpu_access_mode mode, void (*callback)(void *), void *arg) { STARPU_ASSERT(handle); struct user_interaction_wrapper *wrapper = malloc(sizeof(struct user_interaction_wrapper)); STARPU_ASSERT(wrapper); wrapper->handle = handle; wrapper->mode = mode; wrapper->callback = callback; wrapper->callback_arg = arg; PTHREAD_COND_INIT(&wrapper->cond, NULL); PTHREAD_MUTEX_INIT(&wrapper->lock, NULL); wrapper->finished = 0; //TODO: instead of having the is_prefetch argument, _starpu_fetch_data shoud consider two flags: async and detached _starpu_spin_lock(&handle->header_lock); handle->per_node[0].refcnt++; _starpu_spin_unlock(&handle->header_lock); PTHREAD_MUTEX_LOCK(&handle->sequential_consistency_mutex); int sequential_consistency = handle->sequential_consistency; if (sequential_consistency) { wrapper->pre_sync_task = starpu_task_create(); wrapper->pre_sync_task->callback_func = starpu_data_acquire_cb_pre_sync_callback; wrapper->pre_sync_task->callback_arg = wrapper; wrapper->post_sync_task = starpu_task_create(); #ifdef STARPU_USE_FXT starpu_job_t job = _starpu_get_job_associated_to_task(wrapper->pre_sync_task); job->model_name = "acquire_cb_pre"; job = _starpu_get_job_associated_to_task(wrapper->post_sync_task); job->model_name = "acquire_cb_post"; #endif _starpu_detect_implicit_data_deps_with_handle(wrapper->pre_sync_task, wrapper->post_sync_task, handle, mode); PTHREAD_MUTEX_UNLOCK(&handle->sequential_consistency_mutex); /* TODO detect if this is superflous */ int ret = starpu_task_submit(wrapper->pre_sync_task, NULL); STARPU_ASSERT(!ret); } else { PTHREAD_MUTEX_UNLOCK(&handle->sequential_consistency_mutex); starpu_data_acquire_cb_pre_sync_callback(wrapper); } return 0; }
static int ws_push_task(struct starpu_task *task) { unsigned sched_ctx_id = task->sched_ctx; struct _starpu_work_stealing_data *ws = (struct _starpu_work_stealing_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id); struct _starpu_deque_jobq *deque_queue; struct _starpu_job *j = _starpu_get_job_associated_to_task(task); int workerid = starpu_worker_get_id(); unsigned worker = 0; struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx_id); struct starpu_sched_ctx_iterator it; workers->init_iterator(workers, &it); /* !! C'est ballot de tout locker! */ while(workers->has_next(workers, &it)) { worker = workers->get_next(workers, &it); starpu_pthread_mutex_t *sched_mutex; starpu_pthread_cond_t *sched_cond; starpu_worker_get_sched_condition(worker, &sched_mutex, &sched_cond); STARPU_PTHREAD_MUTEX_LOCK(sched_mutex); } /* If the current thread is not a worker but * the main thread (-1), we find the better one to * put task on its queue */ if (workerid == -1) workerid = select_worker(sched_ctx_id); deque_queue = ws->queue_array[workerid]; #ifdef HAVE_AYUDAME_H if (AYU_event) { intptr_t id = workerid; AYU_event(AYU_ADDTASKTOQUEUE, j->job_id, &id); } #endif _starpu_job_list_push_back(&deque_queue->jobq, j); deque_queue->njobs++; starpu_push_task_end(task); while(workers->has_next(workers, &it)) { worker = workers->get_next(workers, &it); starpu_pthread_mutex_t *sched_mutex; starpu_pthread_cond_t *sched_cond; starpu_worker_get_sched_condition(worker, &sched_mutex, &sched_cond); #ifndef STARPU_NON_BLOCKING_DRIVERS STARPU_PTHREAD_COND_SIGNAL(sched_cond); #endif STARPU_PTHREAD_MUTEX_UNLOCK(sched_mutex); } return 0; }
int _starpu_data_cpy(starpu_data_handle_t dst_handle, starpu_data_handle_t src_handle, int asynchronous, void (*callback_func)(void*), void *callback_arg, int reduction, struct starpu_task *reduction_dep_task) { struct starpu_task *task = starpu_task_create(); STARPU_ASSERT(task); task->name = "data_cpy"; struct _starpu_job *j = _starpu_get_job_associated_to_task(task); if (reduction) { j->reduction_task = reduction; if (reduction_dep_task) starpu_task_declare_deps_array(task, 1, &reduction_dep_task); } task->cl = ©_cl; unsigned *interface_id = malloc(sizeof(*interface_id)); *interface_id = dst_handle->ops->interfaceid; task->cl_arg = interface_id; task->cl_arg_size = sizeof(*interface_id); task->cl_arg_free = 1; task->callback_func = callback_func; task->callback_arg = callback_arg; STARPU_TASK_SET_HANDLE(task, dst_handle, 0); STARPU_TASK_SET_HANDLE(task, src_handle, 1); task->synchronous = !asynchronous; int ret = _starpu_task_submit_internally(task); STARPU_ASSERT(!ret); return 0; }
/* NB : handle->sequential_consistency_mutex must be hold by the caller */ void _starpu_detect_implicit_data_deps_with_handle(struct starpu_task *pre_sync_task, struct starpu_task *post_sync_task, starpu_data_handle handle, starpu_access_mode mode) { STARPU_ASSERT(!(mode & STARPU_SCRATCH)); if (handle->sequential_consistency) { #ifdef STARPU_USE_FXT /* In case we are generating the DAG, we add an implicit * dependency between the pre and the post sync tasks in case * they are not the same. */ if (pre_sync_task != post_sync_task) { starpu_job_t pre_sync_job = _starpu_get_job_associated_to_task(pre_sync_task); starpu_job_t post_sync_job = _starpu_get_job_associated_to_task(post_sync_task); STARPU_TRACE_GHOST_TASK_DEPS(pre_sync_job->job_id, post_sync_job->job_id); } #endif starpu_access_mode previous_mode = handle->last_submitted_mode; if (mode & STARPU_W) { _STARPU_DEP_DEBUG("W %p\n", handle); if (previous_mode & STARPU_W) { _STARPU_DEP_DEBUG("WAW %p\n", handle); /* (Read) Write */ /* This task depends on the previous writer */ if (handle->last_submitted_writer) { starpu_job_t job = _starpu_get_job_associated_to_task(handle->last_submitted_writer); starpu_task_declare_deps_array(pre_sync_task, 1, &job->event); } #ifdef STARPU_USE_FXT /* If there is a ghost writer instead, we * should declare a ghost dependency here, and * invalidate the ghost value. */ if (handle->last_submitted_ghost_writer_id_is_valid) { starpu_job_t post_sync_job = _starpu_get_job_associated_to_task(post_sync_task); STARPU_TRACE_GHOST_TASK_DEPS(handle->last_submitted_ghost_writer_id, post_sync_job->job_id); handle->last_submitted_ghost_writer_id_is_valid = 0; } #endif handle->last_submitted_writer = post_sync_task; } else { /* The task submitted previously were in read-only * mode: this task must depend on all those read-only * tasks and we get rid of the list of readers */ _STARPU_DEP_DEBUG("WAR %p\n", handle); /* Count the readers */ unsigned nreaders = 0; struct starpu_task_wrapper_list *l; l = handle->last_submitted_readers; while (l) { nreaders++; l = l->next; } _STARPU_DEP_DEBUG("%d readers\n", nreaders); starpu_event events[nreaders]; unsigned i = 0; l = handle->last_submitted_readers; while (l) { STARPU_ASSERT(l->task); starpu_job_t job = _starpu_get_job_associated_to_task(l->task); events[i++] = job->event; struct starpu_task_wrapper_list *prev = l; l = l->next; free(prev); } #ifdef STARPU_USE_FXT /* Declare all dependencies with ghost readers */ starpu_job_t post_sync_job = _starpu_get_job_associated_to_task(post_sync_task); struct starpu_jobid_list *ghost_readers_id = handle->last_submitted_ghost_readers_id; while (ghost_readers_id) { unsigned long id = ghost_readers_id->id; STARPU_TRACE_GHOST_TASK_DEPS(id, post_sync_job->job_id); struct starpu_jobid_list *prev = ghost_readers_id; ghost_readers_id = ghost_readers_id->next; free(prev); } handle->last_submitted_ghost_readers_id = NULL; #endif handle->last_submitted_readers = NULL; handle->last_submitted_writer = post_sync_task; starpu_task_declare_deps_array(pre_sync_task, nreaders, events); } } else { _STARPU_DEP_DEBUG("R %p\n", handle); /* Add a reader */ STARPU_ASSERT(pre_sync_task); STARPU_ASSERT(post_sync_task); /* Add this task to the list of readers */ struct starpu_task_wrapper_list *link = malloc(sizeof(struct starpu_task_wrapper_list)); link->task = post_sync_task; link->next = handle->last_submitted_readers; handle->last_submitted_readers = link; /* This task depends on the previous writer if any */ if (handle->last_submitted_writer) { _STARPU_DEP_DEBUG("RAW %p\n", handle); starpu_job_t job = _starpu_get_job_associated_to_task(handle->last_submitted_writer); starpu_task_declare_deps_array(pre_sync_task, 1, &job->event); } #ifdef STARPU_USE_FXT /* There was perhaps no last submitted writer but a * ghost one, we should report that here, and keep the * ghost writer valid */ if (handle->last_submitted_ghost_writer_id_is_valid) { starpu_job_t post_sync_job = _starpu_get_job_associated_to_task(post_sync_task); STARPU_TRACE_GHOST_TASK_DEPS(handle->last_submitted_ghost_writer_id, post_sync_job->job_id); } #endif } handle->last_submitted_mode = mode; } }
/* NB: We maintain a list of "ghost deps" in case FXT is enabled. Ghost * dependencies are the dependencies that are implicitely enforced by StarPU * even if they do not imply a real dependency. For instance in the following * sequence, f(Ar) g(Ar) h(Aw), we expect to have h depend on both f and g, but * if h is submitted after the termination of f or g, StarPU will not create a * dependency as this is not needed anymore. */ void _starpu_release_data_enforce_sequential_consistency(struct starpu_task *task, starpu_data_handle handle) { PTHREAD_MUTEX_LOCK(&handle->sequential_consistency_mutex); if (handle->sequential_consistency) { /* If this is the last writer, there is no point in adding * extra deps to that tasks that does not exists anymore */ if (task == handle->last_submitted_writer) { handle->last_submitted_writer = NULL; #ifdef STARPU_USE_FXT /* Save the previous writer as the ghost last writer */ handle->last_submitted_ghost_writer_id_is_valid = 1; starpu_job_t ghost_job = _starpu_get_job_associated_to_task(task); handle->last_submitted_ghost_writer_id = ghost_job->job_id; #endif } /* XXX can a task be both the last writer associated to a data * and be in its list of readers ? If not, we should not go * through the entire list once we have detected it was the * last writer. */ /* Same if this is one of the readers: we go through the list * of readers and remove the task if it is found. */ struct starpu_task_wrapper_list *l; l = handle->last_submitted_readers; struct starpu_task_wrapper_list *prev = NULL; while (l) { struct starpu_task_wrapper_list *next = l->next; if (l->task == task) { /* If we found the task in the reader list */ free(l); #ifdef STARPU_USE_FXT /* Save the job id of the reader task in the ghost reader linked list list */ starpu_job_t ghost_reader_job = _starpu_get_job_associated_to_task(task); struct starpu_jobid_list *link = malloc(sizeof(struct starpu_jobid_list)); STARPU_ASSERT(link); link->next = handle->last_submitted_ghost_readers_id; link->id = ghost_reader_job->job_id; handle->last_submitted_ghost_readers_id = link; #endif if (prev) { prev->next = next; } else { /* This is the first element of the list */ handle->last_submitted_readers = next; } /* XXX can we really find the same task again * once we have found it ? Otherwise, we should * avoid going through the entire list and stop * as soon as we find the task. TODO: check how * duplicate dependencies are treated. */ } else { prev = l; } l = next; } } PTHREAD_MUTEX_UNLOCK(&handle->sequential_consistency_mutex); }
/* task depends on the tasks in task array */ void _starpu_task_declare_deps_array(struct starpu_task *task, unsigned ndeps, struct starpu_task *task_array[], int check) { if (ndeps == 0) return; struct _starpu_job *job; job = _starpu_get_job_associated_to_task(task); STARPU_PTHREAD_MUTEX_LOCK(&job->sync_mutex); if (check) STARPU_ASSERT_MSG( !job->submitted || !task->destroy || task->detach #ifdef STARPU_OPENMP || job->continuation #endif , "Task dependencies have to be set before submission (submitted %u destroy %d detach %d)", job->submitted, task->destroy, task->detach); else STARPU_ASSERT_MSG(job->terminated <= 1, "Task dependencies have to be set before termination (terminated %u)", job->terminated); struct _starpu_cg *cg = create_cg_task(ndeps, job); STARPU_PTHREAD_MUTEX_UNLOCK(&job->sync_mutex); unsigned i; for (i = 0; i < ndeps; i++) { struct starpu_task *dep_task = task_array[i]; struct _starpu_job *dep_job; struct _starpu_cg *back_cg = NULL; dep_job = _starpu_get_job_associated_to_task(dep_task); STARPU_ASSERT_MSG(dep_job != job, "A task must not depend on itself."); STARPU_PTHREAD_MUTEX_LOCK(&dep_job->sync_mutex); if (check) { STARPU_ASSERT_MSG(!dep_job->submitted || !dep_job->task->destroy || dep_job->task->detach, "Unless it is not to be destroyed automatically, a task dependencies have to be set before submission"); STARPU_ASSERT_MSG(dep_job->submitted != 2, "For resubmited tasks, dependencies have to be set before first re-submission"); STARPU_ASSERT_MSG(!dep_job->submitted || !dep_job->task->regenerate, "For regenerated tasks, dependencies have to be set before first submission"); } else STARPU_ASSERT_MSG(dep_job->terminated <= 1, "Task dependencies have to be set before termination (terminated %u)", dep_job->terminated); if (dep_job->task->regenerate) { /* Make sure we don't regenerate the dependency before this task is finished */ back_cg = create_cg_task(1, dep_job); /* Just do not take that dependency into account for the first submission */ dep_job->job_successors.ndeps_completed++; } STARPU_PTHREAD_MUTEX_UNLOCK(&dep_job->sync_mutex); _STARPU_TRACE_TASK_DEPS(dep_job, job); _starpu_bound_task_dep(job, dep_job); #ifdef HAVE_AYUDAME_H if (AYU_event && check) { uintptr_t AYU_data[3] = {dep_job->job_id, 0, 0}; AYU_event(AYU_ADDDEPENDENCY, job->job_id, AYU_data); } #endif _starpu_task_add_succ(dep_job, cg); if (dep_job->task->regenerate) _starpu_task_add_succ(job, back_cg); } }
/* The data must be released by calling starpu_data_release later on */ int starpu_data_acquire(starpu_data_handle handle, starpu_access_mode mode) { STARPU_ASSERT(handle); /* it is forbidden to call this function from a callback or a codelet */ if (STARPU_UNLIKELY(!_starpu_worker_may_perform_blocking_calls())) return -EDEADLK; struct user_interaction_wrapper wrapper = { .handle = handle, .mode = mode, .node = 0, // unused .cond = PTHREAD_COND_INITIALIZER, .lock = PTHREAD_MUTEX_INITIALIZER, .finished = 0 }; // _STARPU_DEBUG("TAKE sequential_consistency_mutex starpu_data_acquire\n"); PTHREAD_MUTEX_LOCK(&handle->sequential_consistency_mutex); int sequential_consistency = handle->sequential_consistency; if (sequential_consistency) { wrapper.pre_sync_task = starpu_task_create(); wrapper.post_sync_task = starpu_task_create(); #ifdef STARPU_USE_FXT starpu_job_t job = _starpu_get_job_associated_to_task(wrapper.pre_sync_task); job->model_name = "acquire_pre"; job = _starpu_get_job_associated_to_task(wrapper.post_sync_task); job->model_name = "acquire_post"; #endif _starpu_detect_implicit_data_deps_with_handle(wrapper.pre_sync_task, wrapper.post_sync_task, handle, mode); PTHREAD_MUTEX_UNLOCK(&handle->sequential_consistency_mutex); /* TODO detect if this is superflous */ wrapper.pre_sync_task->synchronous = 1; int ret = starpu_task_submit(wrapper.pre_sync_task, NULL); STARPU_ASSERT(!ret); /* starpu_event event; int ret = starpu_task_submit(wrapper.pre_sync_task, &event); STARPU_ASSERT(!ret); starpu_event_wait(event); starpu_event_release(event); */ } else { PTHREAD_MUTEX_UNLOCK(&handle->sequential_consistency_mutex); } /* we try to get the data, if we do not succeed immediately, we set a * callback function that will be executed automatically when the data is * available again, otherwise we fetch the data directly */ if (!_starpu_attempt_to_submit_data_request_from_apps(handle, mode, _starpu_data_acquire_continuation, &wrapper)) { /* no one has locked this data yet, so we proceed immediately */ int ret = _starpu_fetch_data_on_node(handle, 0, mode, 0, NULL, NULL); STARPU_ASSERT(!ret); } else { PTHREAD_MUTEX_LOCK(&wrapper.lock); while (!wrapper.finished) PTHREAD_COND_WAIT(&wrapper.cond, &wrapper.lock); PTHREAD_MUTEX_UNLOCK(&wrapper.lock); } /* At that moment, the caller holds a reference to the piece of data. * We enqueue the "post" sync task in the list associated to the handle * so that it is submitted by the starpu_data_release * function. */ _starpu_add_post_sync_tasks(wrapper.post_sync_task, handle); return 0; } /* This function must be called after starpu_data_acquire so that the * application release the data */ void starpu_data_release(starpu_data_handle handle) { STARPU_ASSERT(handle); /* The application can now release the rw-lock */ _starpu_release_data_on_node(handle, 0, 0); /* In case there are some implicit dependencies, unlock the "post sync" tasks */ _starpu_unlock_post_sync_tasks(handle); }
void *_starpu_cpu_worker(void *arg) { struct starpu_worker_s *cpu_arg = arg; unsigned memnode = cpu_arg->memory_node; int workerid = cpu_arg->workerid; int devid = cpu_arg->devid; #ifdef STARPU_USE_FXT _starpu_fxt_register_thread(cpu_arg->bindid); #endif STARPU_TRACE_WORKER_INIT_START(STARPU_FUT_CPU_KEY, devid, memnode); _starpu_bind_thread_on_cpu(cpu_arg->config, cpu_arg->bindid); _STARPU_DEBUG("cpu worker %d is ready on logical cpu %d\n", devid, cpu_arg->bindid); _starpu_set_local_memory_node_key(&memnode); _starpu_set_local_worker_key(cpu_arg); snprintf(cpu_arg->name, 32, "CPU %d", devid); cpu_arg->status = STATUS_UNKNOWN; STARPU_TRACE_WORKER_INIT_END /* tell the main thread that we are ready */ PTHREAD_MUTEX_LOCK(&cpu_arg->mutex); cpu_arg->worker_is_initialized = 1; PTHREAD_COND_SIGNAL(&cpu_arg->ready_cond); PTHREAD_MUTEX_UNLOCK(&cpu_arg->mutex); starpu_job_t j; int res; while (_starpu_machine_is_running()) { STARPU_TRACE_START_PROGRESS(memnode); _starpu_datawizard_progress(memnode, 1); STARPU_TRACE_END_PROGRESS(memnode); _starpu_execute_registered_progression_hooks(); PTHREAD_MUTEX_LOCK(cpu_arg->sched_mutex); /* perhaps there is some local task to be executed first */ j = _starpu_pop_local_task(cpu_arg); /* otherwise ask a task to the scheduler */ if (!j) { struct starpu_task *task = _starpu_pop_task(); if (task) j = _starpu_get_job_associated_to_task(task); } if (j == NULL) { if (_starpu_worker_can_block(memnode)) _starpu_block_worker(workerid, cpu_arg->sched_cond, cpu_arg->sched_mutex); PTHREAD_MUTEX_UNLOCK(cpu_arg->sched_mutex); continue; }; PTHREAD_MUTEX_UNLOCK(cpu_arg->sched_mutex); /* can a cpu perform that task ? */ if (!STARPU_CPU_MAY_PERFORM(j)) { /* put it and the end of the queue ... XXX */ _starpu_push_task(j, 0); continue; } _starpu_set_current_task(j->task); res = execute_job_on_cpu(j, cpu_arg); _starpu_set_current_task(NULL); if (res) { switch (res) { case -EAGAIN: _starpu_push_task(j, 0); continue; default: assert(0); } } _starpu_handle_job_termination(j, 0); } STARPU_TRACE_WORKER_DEINIT_START /* In case there remains some memory that was automatically * allocated by StarPU, we release it now. Note that data * coherency is not maintained anymore at that point ! */ _starpu_free_all_automatically_allocated_buffers(memnode); STARPU_TRACE_WORKER_DEINIT_END(STARPU_FUT_CPU_KEY); pthread_exit(NULL); }
int _starpu_push_task_to_workers(struct starpu_task *task) { struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(task->sched_ctx); unsigned nworkers = 0; _STARPU_TRACE_JOB_PUSH(task, task->priority > 0); /* if the contexts still does not have workers put the task back to its place in the empty ctx list */ if(!sched_ctx->is_initial_sched) { /*if there are workers in the ctx that are not able to execute tasks we consider the ctx empty */ nworkers = _starpu_nworkers_able_to_execute_task(task, sched_ctx); if (nworkers == 0) { STARPU_PTHREAD_MUTEX_LOCK(&sched_ctx->empty_ctx_mutex); starpu_task_list_push_back(&sched_ctx->empty_ctx_tasks, task); STARPU_PTHREAD_MUTEX_UNLOCK(&sched_ctx->empty_ctx_mutex); #ifdef STARPU_USE_SC_HYPERVISOR if(sched_ctx != NULL && sched_ctx->id != 0 && sched_ctx->perf_counters != NULL && sched_ctx->perf_counters->notify_empty_ctx) { _STARPU_TRACE_HYPERVISOR_BEGIN(); sched_ctx->perf_counters->notify_empty_ctx(sched_ctx->id, task); _STARPU_TRACE_HYPERVISOR_END(); } #endif return -EAGAIN; } } _starpu_profiling_set_task_push_start_time(task); int ret = 0; if (STARPU_UNLIKELY(task->execute_on_a_specific_worker)) { unsigned node = starpu_worker_get_memory_node(task->workerid); if (starpu_get_prefetch_flag()) starpu_prefetch_task_input_on_node(task, node); ret = _starpu_push_task_on_specific_worker(task, task->workerid); } else { struct _starpu_machine_config *config = _starpu_get_machine_config(); /* When a task can only be executed on a given arch and we have * only one memory node for that arch, we can systematically * prefetch before the scheduling decision. */ if (starpu_get_prefetch_flag()) { if (task->cl->where == STARPU_CPU && config->cpus_nodeid >= 0) starpu_prefetch_task_input_on_node(task, config->cpus_nodeid); else if (task->cl->where == STARPU_CUDA && config->cuda_nodeid >= 0) starpu_prefetch_task_input_on_node(task, config->cuda_nodeid); else if (task->cl->where == STARPU_OPENCL && config->opencl_nodeid >= 0) starpu_prefetch_task_input_on_node(task, config->opencl_nodeid); else if (task->cl->where == STARPU_MIC && config->mic_nodeid >= 0) starpu_prefetch_task_input_on_node(task, config->mic_nodeid); else if (task->cl->where == STARPU_SCC && config->scc_nodeid >= 0) starpu_prefetch_task_input_on_node(task, config->scc_nodeid); } if(!sched_ctx->sched_policy) { /* Note: we have to call that early, or else the task may have * disappeared already */ starpu_push_task_end(task); if(!sched_ctx->awake_workers) ret = _starpu_push_task_on_specific_worker(task, sched_ctx->main_master); else { struct starpu_worker_collection *workers = sched_ctx->workers; struct _starpu_job *job = _starpu_get_job_associated_to_task(task); job->task_size = workers->nworkers; job->combined_workerid = -1; // workerid; its a ctx not combined worker job->active_task_alias_count = 0; STARPU_PTHREAD_BARRIER_INIT(&job->before_work_barrier, NULL, workers->nworkers); STARPU_PTHREAD_BARRIER_INIT(&job->after_work_barrier, NULL, workers->nworkers); job->after_work_busy_barrier = workers->nworkers; unsigned workerid; struct starpu_sched_ctx_iterator it; if(workers->init_iterator) workers->init_iterator(workers, &it); while(workers->has_next(workers, &it)) { workerid = workers->get_next(workers, &it); struct starpu_task *alias = starpu_task_dup(task); alias->destroy = 1; ret |= _starpu_push_task_on_specific_worker(alias, workerid); } } } else { STARPU_ASSERT(sched_ctx->sched_policy->push_task); /* check out if there are any workers in the context */ starpu_pthread_rwlock_t *changing_ctx_mutex = _starpu_sched_ctx_get_changing_ctx_mutex(sched_ctx->id); STARPU_PTHREAD_RWLOCK_RDLOCK(changing_ctx_mutex); nworkers = starpu_sched_ctx_get_nworkers(sched_ctx->id); if (nworkers == 0) ret = -1; else { _STARPU_TRACE_WORKER_SCHEDULING_PUSH; ret = sched_ctx->sched_policy->push_task(task); _STARPU_TRACE_WORKER_SCHEDULING_POP; } STARPU_PTHREAD_RWLOCK_UNLOCK(changing_ctx_mutex); } if(ret == -1) { fprintf(stderr, "repush task \n"); _STARPU_TRACE_JOB_POP(task, task->priority > 0); ret = _starpu_push_task_to_workers(task); } } /* Note: from here, the task might have been destroyed already! */ _STARPU_LOG_OUT(); return ret; }
/* Enqueue a task into the list of tasks explicitely attached to a worker. In * case workerid identifies a combined worker, a task will be enqueued into * each worker of the combination. */ static int _starpu_push_task_on_specific_worker(struct starpu_task *task, int workerid) { int nbasic_workers = (int)starpu_worker_get_count(); /* Is this a basic worker or a combined worker ? */ int is_basic_worker = (workerid < nbasic_workers); unsigned memory_node; struct _starpu_worker *worker = NULL; struct _starpu_combined_worker *combined_worker = NULL; if (is_basic_worker) { worker = _starpu_get_worker_struct(workerid); memory_node = worker->memory_node; } else { combined_worker = _starpu_get_combined_worker_struct(workerid); memory_node = combined_worker->memory_node; } if (use_prefetch) starpu_prefetch_task_input_on_node(task, memory_node); if (is_basic_worker) _starpu_push_task_on_specific_worker_notify_sched(task, worker, workerid, workerid); else { /* Notify all workers of the combined worker */ int worker_size = combined_worker->worker_size; int *combined_workerid = combined_worker->combined_workerid; int j; for (j = 0; j < worker_size; j++) { int subworkerid = combined_workerid[j]; _starpu_push_task_on_specific_worker_notify_sched(task, _starpu_get_worker_struct(subworkerid), subworkerid, workerid); } } #ifdef STARPU_USE_SC_HYPERVISOR starpu_sched_ctx_call_pushed_task_cb(workerid, task->sched_ctx); #endif //STARPU_USE_SC_HYPERVISOR unsigned i; if (is_basic_worker) { unsigned node = starpu_worker_get_memory_node(workerid); if (_starpu_task_uses_multiformat_handles(task)) { unsigned nbuffers = STARPU_TASK_GET_NBUFFERS(task); for (i = 0; i < nbuffers; i++) { struct starpu_task *conversion_task; starpu_data_handle_t handle; handle = STARPU_TASK_GET_HANDLE(task, i); if (!_starpu_handle_needs_conversion_task(handle, node)) continue; conversion_task = _starpu_create_conversion_task(handle, node); conversion_task->mf_skip = 1; conversion_task->execute_on_a_specific_worker = 1; conversion_task->workerid = workerid; _starpu_task_submit_conversion_task(conversion_task, workerid); //_STARPU_DEBUG("Pushing a conversion task\n"); } for (i = 0; i < nbuffers; i++) { starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(task, i); handle->mf_node = node; } } // if(task->sched_ctx != _starpu_get_initial_sched_ctx()->id) if(task->priority > 0) return _starpu_push_local_task(worker, task, 1); else return _starpu_push_local_task(worker, task, 0); } else { /* This is a combined worker so we create task aliases */ int worker_size = combined_worker->worker_size; int *combined_workerid = combined_worker->combined_workerid; int ret = 0; struct _starpu_job *job = _starpu_get_job_associated_to_task(task); job->task_size = worker_size; job->combined_workerid = workerid; job->active_task_alias_count = 0; STARPU_PTHREAD_BARRIER_INIT(&job->before_work_barrier, NULL, worker_size); STARPU_PTHREAD_BARRIER_INIT(&job->after_work_barrier, NULL, worker_size); job->after_work_busy_barrier = worker_size; /* Note: we have to call that early, or else the task may have * disappeared already */ starpu_push_task_end(task); int j; for (j = 0; j < worker_size; j++) { struct starpu_task *alias = starpu_task_dup(task); alias->destroy = 1; worker = _starpu_get_worker_struct(combined_workerid[j]); ret |= _starpu_push_local_task(worker, alias, 0); } return ret; } }