/* the generic interface that call the proper underlying implementation */ int _starpu_push_task(starpu_job_t j, unsigned job_is_already_locked) { struct starpu_task *task = j->task; task->status = STARPU_TASK_READY; /* in case there is no codelet associated to the task (that's a control * task), we directly execute its callback and enforce the * corresponding dependencies */ if (task->cl == NULL) { _starpu_handle_job_termination(j, job_is_already_locked); return 0; } if (STARPU_UNLIKELY(task->execute_on_a_specific_worker)) { unsigned workerid = task->workerid; struct starpu_worker_s *worker = _starpu_get_worker_struct(workerid); if (use_prefetch) { uint32_t memory_node = starpu_worker_get_memory_node(workerid); _starpu_prefetch_task_input_on_node(task, memory_node); } return _starpu_push_local_task(worker, j); } else { STARPU_ASSERT(policy.push_task); return policy.push_task(task); } }
static void find_and_assign_combinations_with_hwloc(int *workerids, int nworkers) { struct _starpu_machine_config *config = _starpu_get_machine_config(); struct _starpu_machine_topology *topology = &config->topology; int synthesize_arity = starpu_get_env_number("STARPU_SYNTHESIZE_ARITY_COMBINED_WORKER"); int min = starpu_get_env_number("STARPU_MIN_WORKERSIZE"); if (min < 2) min = 2; int max = starpu_get_env_number("STARPU_MAX_WORKERSIZE"); if (max == -1) max = INT_MAX; if (synthesize_arity == -1) synthesize_arity = 2; /* First, mark nodes which contain CPU workers, simply by setting their userdata field */ int i; for (i = 0; i < nworkers; i++) { struct _starpu_worker *worker = _starpu_get_worker_struct(workerids[i]); if (worker->perf_arch.devices[0].type == STARPU_CPU_WORKER && worker->perf_arch.devices[0].ncores == 1) { hwloc_obj_t obj = hwloc_get_obj_by_depth(topology->hwtopology, config->pu_depth, worker->bindid); obj = obj->parent; while (obj) { obj->userdata = (void*) -1; obj = obj->parent; } } } find_and_assign_combinations(hwloc_get_root_obj(topology->hwtopology), min, max, synthesize_arity); }
static void combine_all_cpu_workers(int *workerids, int nworkers) { unsigned sched_ctx_id = starpu_sched_ctx_get_context(); if(sched_ctx_id == STARPU_NMAX_SCHED_CTXS) sched_ctx_id = 0; struct starpu_worker_collection* workers = starpu_sched_ctx_get_worker_collection(sched_ctx_id); int cpu_workers[STARPU_NMAXWORKERS]; int ncpus = 0; struct _starpu_worker *worker; int i; int min; int max; for (i = 0; i < nworkers; i++) { worker = _starpu_get_worker_struct(workerids[i]); if (worker->arch == STARPU_CPU_WORKER) cpu_workers[ncpus++] = workerids[i]; } min = starpu_get_env_number("STARPU_MIN_WORKERSIZE"); if (min < 1) min = 1; max = starpu_get_env_number("STARPU_MAX_WORKERSIZE"); if (max == -1 || max > ncpus) max = ncpus; for (i = min; i <= max; i++) { int newworkerid; newworkerid = starpu_combined_worker_assign_workerid(i, cpu_workers); STARPU_ASSERT(newworkerid >= 0); workers->add(workers, newworkerid); } }
static void find_and_assign_combinations_without_hwloc(int *workerids, int nworkers) { int i; unsigned sched_ctx_id = starpu_sched_ctx_get_context(); if(sched_ctx_id == STARPU_NMAX_SCHED_CTXS) sched_ctx_id = 0; int min, max; #ifdef STARPU_USE_MIC unsigned j; int mic_min, mic_max; #endif struct starpu_worker_collection* workers = starpu_sched_ctx_get_worker_collection(sched_ctx_id); /* We put the id of all CPU workers in this array */ int cpu_workers[STARPU_NMAXWORKERS]; unsigned ncpus = 0; #ifdef STARPU_USE_MIC unsigned nb_mics = _starpu_get_machine_config()->topology.nmicdevices; unsigned * nmics_table; int * mic_id; int ** mic_workers; mic_id = malloc(sizeof(int)*nb_mics); nmics_table = malloc(sizeof(unsigned)*nb_mics); mic_workers = malloc(sizeof(int*)*nb_mics); for(j=0; j<nb_mics; j++) { mic_id[j] = -1; nmics_table[j] = 0; mic_workers[j] = malloc(sizeof(int)*STARPU_NMAXWORKERS); } #endif /* STARPU_USE_MIC */ struct _starpu_worker *worker; for (i = 0; i < nworkers; i++) { worker = _starpu_get_worker_struct(workerids[i]); if (worker->arch == STARPU_CPU_WORKER) cpu_workers[ncpus++] = i; #ifdef STARPU_USE_MIC else if(worker->arch == STARPU_MIC_WORKER) { for(j=0; mic_id[j] != worker->devid && mic_id[j] != -1 && j<nb_mics; j++); if(j<nb_mics) { if(mic_id[j] == -1) { mic_id[j] = worker->devid; } mic_workers[j][nmics_table[j]++] = i; } } #endif /* STARPU_USE_MIC */ } min = starpu_get_env_number("STARPU_MIN_WORKERSIZE"); if (min < 2) min = 2; max = starpu_get_env_number("STARPU_MAX_WORKERSIZE"); if (max == -1 || max > (int) ncpus) max = ncpus; assign_combinations_without_hwloc(workers,cpu_workers,ncpus,min,max); #ifdef STARPU_USE_MIC mic_min = starpu_get_env_number("STARPU_MIN_WORKERSIZE"); if (mic_min < 2) mic_min = 2; for(j=0; j<nb_mics; j++) { mic_max = starpu_get_env_number("STARPU_MAX_WORKERSIZE"); if (mic_max == -1 || mic_max > (int) nmics_table[j]) mic_max = nmics_table[j]; assign_combinations_without_hwloc(workers,mic_workers[j],nmics_table[j],mic_min,mic_max); free(mic_workers[j]); } free(mic_id); free(nmics_table); free(mic_workers); #endif /* STARPU_USE_MIC */ }
/* Enqueue a task into the list of tasks explicitely attached to a worker. In * case workerid identifies a combined worker, a task will be enqueued into * each worker of the combination. */ static int _starpu_push_task_on_specific_worker(struct starpu_task *task, int workerid) { int nbasic_workers = (int)starpu_worker_get_count(); /* Is this a basic worker or a combined worker ? */ int is_basic_worker = (workerid < nbasic_workers); unsigned memory_node; struct _starpu_worker *worker = NULL; struct _starpu_combined_worker *combined_worker = NULL; if (is_basic_worker) { worker = _starpu_get_worker_struct(workerid); memory_node = worker->memory_node; } else { combined_worker = _starpu_get_combined_worker_struct(workerid); memory_node = combined_worker->memory_node; } if (use_prefetch) starpu_prefetch_task_input_on_node(task, memory_node); if (is_basic_worker) _starpu_push_task_on_specific_worker_notify_sched(task, worker, workerid, workerid); else { /* Notify all workers of the combined worker */ int worker_size = combined_worker->worker_size; int *combined_workerid = combined_worker->combined_workerid; int j; for (j = 0; j < worker_size; j++) { int subworkerid = combined_workerid[j]; _starpu_push_task_on_specific_worker_notify_sched(task, _starpu_get_worker_struct(subworkerid), subworkerid, workerid); } } #ifdef STARPU_USE_SC_HYPERVISOR starpu_sched_ctx_call_pushed_task_cb(workerid, task->sched_ctx); #endif //STARPU_USE_SC_HYPERVISOR unsigned i; if (is_basic_worker) { unsigned node = starpu_worker_get_memory_node(workerid); if (_starpu_task_uses_multiformat_handles(task)) { unsigned nbuffers = STARPU_TASK_GET_NBUFFERS(task); for (i = 0; i < nbuffers; i++) { struct starpu_task *conversion_task; starpu_data_handle_t handle; handle = STARPU_TASK_GET_HANDLE(task, i); if (!_starpu_handle_needs_conversion_task(handle, node)) continue; conversion_task = _starpu_create_conversion_task(handle, node); conversion_task->mf_skip = 1; conversion_task->execute_on_a_specific_worker = 1; conversion_task->workerid = workerid; _starpu_task_submit_conversion_task(conversion_task, workerid); //_STARPU_DEBUG("Pushing a conversion task\n"); } for (i = 0; i < nbuffers; i++) { starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(task, i); handle->mf_node = node; } } // if(task->sched_ctx != _starpu_get_initial_sched_ctx()->id) if(task->priority > 0) return _starpu_push_local_task(worker, task, 1); else return _starpu_push_local_task(worker, task, 0); } else { /* This is a combined worker so we create task aliases */ int worker_size = combined_worker->worker_size; int *combined_workerid = combined_worker->combined_workerid; int ret = 0; struct _starpu_job *job = _starpu_get_job_associated_to_task(task); job->task_size = worker_size; job->combined_workerid = workerid; job->active_task_alias_count = 0; STARPU_PTHREAD_BARRIER_INIT(&job->before_work_barrier, NULL, worker_size); STARPU_PTHREAD_BARRIER_INIT(&job->after_work_barrier, NULL, worker_size); job->after_work_busy_barrier = worker_size; /* Note: we have to call that early, or else the task may have * disappeared already */ starpu_push_task_end(task); int j; for (j = 0; j < worker_size; j++) { struct starpu_task *alias = starpu_task_dup(task); alias->destroy = 1; worker = _starpu_get_worker_struct(combined_workerid[j]); ret |= _starpu_push_local_task(worker, alias, 0); } return ret; } }
/* The scheduling policy may put tasks directly into a worker's local queue so * that it is not always necessary to create its own queue when the local queue * is sufficient. If "back" not null, the task is put at the back of the queue * where the worker will pop tasks first. Setting "back" to 0 therefore ensures * a FIFO ordering. */ int starpu_push_local_task(int workerid, struct starpu_task *task, int prio) { struct _starpu_worker *worker = _starpu_get_worker_struct(workerid); return _starpu_push_local_task(worker, task, prio); }