/** * Return the most suitable worker to whom add a task. * The number of previously processed tasks, total and local, * and the number of tasks currently awaiting to be processed * by the tasks are taken into account to select the most suitable * worker to add a task to. */ static unsigned select_worker_overload(unsigned sched_ctx_id) { unsigned worker; float worker_ratio; unsigned best_worker = 0; float best_ratio = FLT_MAX; /* Don't try to play smart until we get * enough informations. */ if (performed_total < calibration_value) return select_worker_round_robin(sched_ctx_id); struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx_id); struct starpu_sched_ctx_iterator it; workers->init_iterator(workers, &it); while(workers->has_next(workers, &it)) { worker = workers->get_next(workers, &it); worker_ratio = overload_metric(sched_ctx_id, worker); if (worker_ratio < best_ratio) { best_worker = worker; best_ratio = worker_ratio; } } return best_worker; }
static int ws_push_task(struct starpu_task *task) { unsigned sched_ctx_id = task->sched_ctx; struct _starpu_work_stealing_data *ws = (struct _starpu_work_stealing_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id); struct _starpu_deque_jobq *deque_queue; struct _starpu_job *j = _starpu_get_job_associated_to_task(task); int workerid = starpu_worker_get_id(); unsigned worker = 0; struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx_id); struct starpu_sched_ctx_iterator it; workers->init_iterator(workers, &it); /* !! C'est ballot de tout locker! */ while(workers->has_next(workers, &it)) { worker = workers->get_next(workers, &it); starpu_pthread_mutex_t *sched_mutex; starpu_pthread_cond_t *sched_cond; starpu_worker_get_sched_condition(worker, &sched_mutex, &sched_cond); STARPU_PTHREAD_MUTEX_LOCK(sched_mutex); } /* If the current thread is not a worker but * the main thread (-1), we find the better one to * put task on its queue */ if (workerid == -1) workerid = select_worker(sched_ctx_id); deque_queue = ws->queue_array[workerid]; #ifdef HAVE_AYUDAME_H if (AYU_event) { intptr_t id = workerid; AYU_event(AYU_ADDTASKTOQUEUE, j->job_id, &id); } #endif _starpu_job_list_push_back(&deque_queue->jobq, j); deque_queue->njobs++; starpu_push_task_end(task); while(workers->has_next(workers, &it)) { worker = workers->get_next(workers, &it); starpu_pthread_mutex_t *sched_mutex; starpu_pthread_cond_t *sched_cond; starpu_worker_get_sched_condition(worker, &sched_mutex, &sched_cond); #ifndef STARPU_NON_BLOCKING_DRIVERS STARPU_PTHREAD_COND_SIGNAL(sched_cond); #endif STARPU_PTHREAD_MUTEX_UNLOCK(sched_mutex); } return 0; }
static void synthesize_intermediate_workers(hwloc_obj_t *children, unsigned min, unsigned max, unsigned arity, unsigned n, unsigned synthesize_arity) { unsigned nworkers, i, j; unsigned chunk_size = (n + synthesize_arity-1) / synthesize_arity; unsigned chunk_start; int cpu_workers[STARPU_NMAXWORKERS]; int ret; if (n <= synthesize_arity) /* Not too many children, do not synthesize */ return; _STARPU_DEBUG("%u children > %u, synthesizing intermediate combined workers of size %u\n", n, synthesize_arity, chunk_size); n = 0; j = 0; nworkers = 0; chunk_start = 0; for (i = 0 ; i < arity; i++) { if (children[i]->userdata) { n++; _STARPU_DEBUG("child %u\n", i); find_workers(children[i], cpu_workers, &nworkers); j++; } /* Completed a chunk, or last bit (but not if it's just 1 subobject) */ if (j == chunk_size || (i == arity-1 && j > 1)) { if (nworkers >= min && nworkers <= max) { unsigned sched_ctx_id = starpu_sched_ctx_get_context(); if(sched_ctx_id == STARPU_NMAX_SCHED_CTXS) sched_ctx_id = 0; struct starpu_worker_collection* workers = starpu_sched_ctx_get_worker_collection(sched_ctx_id); _STARPU_DEBUG("Adding it\n"); ret = starpu_combined_worker_assign_workerid(nworkers, cpu_workers); STARPU_ASSERT(ret >= 0); workers->add(workers,ret); } /* Recurse there */ synthesize_intermediate_workers(children+chunk_start, min, max, i - chunk_start, n, synthesize_arity); /* And restart another one */ n = 0; j = 0; nworkers = 0; chunk_start = i+1; } } }
static void find_and_assign_combinations(hwloc_obj_t obj, unsigned min, unsigned max, unsigned synthesize_arity) { char name[64]; unsigned i, n, nworkers; int cpu_workers[STARPU_NMAXWORKERS]; struct _starpu_machine_config *config = _starpu_get_machine_config(); struct _starpu_machine_topology *topology = &config->topology; hwloc_obj_snprintf(name, sizeof(name), topology->hwtopology, obj, "#", 0); _STARPU_DEBUG("Looking at %s\n", name); for (n = 0, i = 0; i < obj->arity; i++) if (obj->children[i]->userdata) /* it has a CPU worker */ n++; if (n == 1) { /* If there is only one child, we go to the next level right away */ find_and_assign_combinations(obj->children[0], min, max, synthesize_arity); return; } /* Add this object */ nworkers = 0; find_workers(obj, cpu_workers, &nworkers); if (nworkers >= min && nworkers <= max) { _STARPU_DEBUG("Adding it\n"); unsigned sched_ctx_id = starpu_sched_ctx_get_context(); if(sched_ctx_id == STARPU_NMAX_SCHED_CTXS) sched_ctx_id = 0; struct starpu_worker_collection* workers = starpu_sched_ctx_get_worker_collection(sched_ctx_id); int newworkerid = starpu_combined_worker_assign_workerid(nworkers, cpu_workers); STARPU_ASSERT(newworkerid >= 0); workers->add(workers,newworkerid); } /* Add artificial intermediate objects recursively */ synthesize_intermediate_workers(obj->children, min, max, obj->arity, n, synthesize_arity); /* And recurse */ for (i = 0; i < obj->arity; i++) if (obj->children[i]->userdata == (void*) -1) find_and_assign_combinations(obj->children[i], min, max, synthesize_arity); }
static int push_task_dummy(struct starpu_task *task) { unsigned sched_ctx_id = task->sched_ctx; struct dummy_sched_data *data = (struct dummy_sched_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id); /* NB: In this simplistic strategy, we assume that the context in which we push task has at least one worker*/ /* lock all workers when pushing tasks on a list where all of them would pop for tasks */ starpu_pthread_mutex_lock(&data->policy_mutex); starpu_task_list_push_front(&data->sched_list, task); starpu_push_task_end(task); starpu_pthread_mutex_unlock(&data->policy_mutex); /*if there are no tasks block */ /* wake people waiting for a task */ unsigned worker = 0; struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx_id); struct starpu_sched_ctx_iterator it; workers->init_iterator(workers, &it); while(workers->has_next(workers, &it)) { worker = workers->get_next(workers, &it); starpu_pthread_mutex_t *sched_mutex; starpu_pthread_cond_t *sched_cond; starpu_worker_get_sched_condition(worker, &sched_mutex, &sched_cond); starpu_pthread_mutex_lock(sched_mutex); starpu_pthread_cond_signal(sched_cond); starpu_pthread_mutex_unlock(sched_mutex); } return 0; }
static void combine_all_cpu_workers(int *workerids, int nworkers) { unsigned sched_ctx_id = starpu_sched_ctx_get_context(); if(sched_ctx_id == STARPU_NMAX_SCHED_CTXS) sched_ctx_id = 0; struct starpu_worker_collection* workers = starpu_sched_ctx_get_worker_collection(sched_ctx_id); int cpu_workers[STARPU_NMAXWORKERS]; int ncpus = 0; struct _starpu_worker *worker; int i; int min; int max; for (i = 0; i < nworkers; i++) { worker = _starpu_get_worker_struct(workerids[i]); if (worker->arch == STARPU_CPU_WORKER) cpu_workers[ncpus++] = workerids[i]; } min = starpu_get_env_number("STARPU_MIN_WORKERSIZE"); if (min < 1) min = 1; max = starpu_get_env_number("STARPU_MAX_WORKERSIZE"); if (max == -1 || max > ncpus) max = ncpus; for (i = min; i <= max; i++) { int newworkerid; newworkerid = starpu_combined_worker_assign_workerid(i, cpu_workers); STARPU_ASSERT(newworkerid >= 0); workers->add(workers, newworkerid); } }
static void find_and_assign_combinations_without_hwloc(int *workerids, int nworkers) { int i; unsigned sched_ctx_id = starpu_sched_ctx_get_context(); if(sched_ctx_id == STARPU_NMAX_SCHED_CTXS) sched_ctx_id = 0; int min, max; #ifdef STARPU_USE_MIC unsigned j; int mic_min, mic_max; #endif struct starpu_worker_collection* workers = starpu_sched_ctx_get_worker_collection(sched_ctx_id); /* We put the id of all CPU workers in this array */ int cpu_workers[STARPU_NMAXWORKERS]; unsigned ncpus = 0; #ifdef STARPU_USE_MIC unsigned nb_mics = _starpu_get_machine_config()->topology.nmicdevices; unsigned * nmics_table; int * mic_id; int ** mic_workers; mic_id = malloc(sizeof(int)*nb_mics); nmics_table = malloc(sizeof(unsigned)*nb_mics); mic_workers = malloc(sizeof(int*)*nb_mics); for(j=0; j<nb_mics; j++) { mic_id[j] = -1; nmics_table[j] = 0; mic_workers[j] = malloc(sizeof(int)*STARPU_NMAXWORKERS); } #endif /* STARPU_USE_MIC */ struct _starpu_worker *worker; for (i = 0; i < nworkers; i++) { worker = _starpu_get_worker_struct(workerids[i]); if (worker->arch == STARPU_CPU_WORKER) cpu_workers[ncpus++] = i; #ifdef STARPU_USE_MIC else if(worker->arch == STARPU_MIC_WORKER) { for(j=0; mic_id[j] != worker->devid && mic_id[j] != -1 && j<nb_mics; j++); if(j<nb_mics) { if(mic_id[j] == -1) { mic_id[j] = worker->devid; } mic_workers[j][nmics_table[j]++] = i; } } #endif /* STARPU_USE_MIC */ } min = starpu_get_env_number("STARPU_MIN_WORKERSIZE"); if (min < 2) min = 2; max = starpu_get_env_number("STARPU_MAX_WORKERSIZE"); if (max == -1 || max > (int) ncpus) max = ncpus; assign_combinations_without_hwloc(workers,cpu_workers,ncpus,min,max); #ifdef STARPU_USE_MIC mic_min = starpu_get_env_number("STARPU_MIN_WORKERSIZE"); if (mic_min < 2) mic_min = 2; for(j=0; j<nb_mics; j++) { mic_max = starpu_get_env_number("STARPU_MAX_WORKERSIZE"); if (mic_max == -1 || mic_max > (int) nmics_table[j]) mic_max = nmics_table[j]; assign_combinations_without_hwloc(workers,mic_workers[j],nmics_table[j],mic_min,mic_max); free(mic_workers[j]); } free(mic_id); free(nmics_table); free(mic_workers); #endif /* STARPU_USE_MIC */ }