void _starpu_init_sched_policy(struct starpu_machine_config_s *config) { /* Perhaps we have to display some help */ display_sched_help_message(); /* Prefetch is activated by default */ use_prefetch = starpu_get_env_number("STARPU_PREFETCH"); if (use_prefetch == -1) use_prefetch = 1; /* By default, we don't calibrate */ unsigned do_calibrate = 0; if (config->user_conf && (config->user_conf->calibrate != -1)) { do_calibrate = config->user_conf->calibrate; } else { int res = starpu_get_env_number("STARPU_CALIBRATE"); do_calibrate = (res < 0)?0:(unsigned)res; } _starpu_set_calibrate_flag(do_calibrate); struct starpu_sched_policy_s *selected_policy; selected_policy = select_sched_policy(config); load_sched_policy(selected_policy); policy.init_sched(&config->topology, &policy); }
static void find_and_assign_combinations_with_hwloc(int *workerids, int nworkers) { struct _starpu_machine_config *config = _starpu_get_machine_config(); struct _starpu_machine_topology *topology = &config->topology; int synthesize_arity = starpu_get_env_number("STARPU_SYNTHESIZE_ARITY_COMBINED_WORKER"); int min = starpu_get_env_number("STARPU_MIN_WORKERSIZE"); if (min < 2) min = 2; int max = starpu_get_env_number("STARPU_MAX_WORKERSIZE"); if (max == -1) max = INT_MAX; if (synthesize_arity == -1) synthesize_arity = 2; /* First, mark nodes which contain CPU workers, simply by setting their userdata field */ int i; for (i = 0; i < nworkers; i++) { struct _starpu_worker *worker = _starpu_get_worker_struct(workerids[i]); if (worker->perf_arch.devices[0].type == STARPU_CPU_WORKER && worker->perf_arch.devices[0].ncores == 1) { hwloc_obj_t obj = hwloc_get_obj_by_depth(topology->hwtopology, config->pu_depth, worker->bindid); obj = obj->parent; while (obj) { obj->userdata = (void*) -1; obj = obj->parent; } } } find_and_assign_combinations(hwloc_get_root_obj(topology->hwtopology), min, max, synthesize_arity); }
static void _starpu_opencl_limit_gpu_mem_if_needed(unsigned devid) { starpu_ssize_t limit; size_t STARPU_ATTRIBUTE_UNUSED totalGlobalMem = 0; size_t STARPU_ATTRIBUTE_UNUSED to_waste = 0; char name[30]; #ifdef STARPU_SIMGRID totalGlobalMem = _starpu_simgrid_get_memsize("OpenCL", devid); #elif defined(STARPU_USE_OPENCL) /* Request the size of the current device's memory */ cl_int err; cl_ulong size; err = clGetDeviceInfo(devices[devid], CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(size), &size, NULL); if (STARPU_UNLIKELY(err != CL_SUCCESS)) STARPU_OPENCL_REPORT_ERROR(err); totalGlobalMem = size; #endif limit = starpu_get_env_number("STARPU_LIMIT_OPENCL_MEM"); if (limit == -1) { sprintf(name, "STARPU_LIMIT_OPENCL_%u_MEM", devid); limit = starpu_get_env_number(name); } #if defined(STARPU_USE_OPENCL) || defined(STARPU_SIMGRID) if (limit == -1) { /* Use 90% of the available memory by default. */ limit = totalGlobalMem / (1024*1024) * 0.9; } #endif global_mem[devid] = limit * 1024*1024; #ifdef STARPU_USE_OPENCL /* How much memory to waste ? */ to_waste = totalGlobalMem - global_mem[devid]; #endif _STARPU_DEBUG("OpenCL device %d: Wasting %ld MB / Limit %ld MB / Total %ld MB / Remains %ld MB\n", devid, (long)to_waste/(1024*1024), (long) limit, (long)totalGlobalMem/(1024*1024), (long)(totalGlobalMem - to_waste)/(1024*1024)); }
static void combine_all_cpu_workers(int *workerids, int nworkers) { unsigned sched_ctx_id = starpu_sched_ctx_get_context(); if(sched_ctx_id == STARPU_NMAX_SCHED_CTXS) sched_ctx_id = 0; struct starpu_worker_collection* workers = starpu_sched_ctx_get_worker_collection(sched_ctx_id); int cpu_workers[STARPU_NMAXWORKERS]; int ncpus = 0; struct _starpu_worker *worker; int i; int min; int max; for (i = 0; i < nworkers; i++) { worker = _starpu_get_worker_struct(workerids[i]); if (worker->arch == STARPU_CPU_WORKER) cpu_workers[ncpus++] = workerids[i]; } min = starpu_get_env_number("STARPU_MIN_WORKERSIZE"); if (min < 1) min = 1; max = starpu_get_env_number("STARPU_MAX_WORKERSIZE"); if (max == -1 || max > ncpus) max = ncpus; for (i = min; i <= max; i++) { int newworkerid; newworkerid = starpu_combined_worker_assign_workerid(i, cpu_workers); STARPU_ASSERT(newworkerid >= 0); workers->add(workers, newworkerid); } }
void _starpu_mpi_cache_stats_init(MPI_Comm comm) { stats_enabled = starpu_get_env_number("STARPU_MPI_CACHE_STATS"); if (stats_enabled == -1) { stats_enabled = 0; } if (stats_enabled == 0) return; if (!getenv("STARPU_SILENT")) fprintf(stderr,"Warning: StarPU is executed with STARPU_MPI_CACHE_STATS=1, which slows down a bit\n"); starpu_mpi_comm_size(comm, &world_size); _STARPU_MPI_DEBUG(1, "allocating for %d nodes\n", world_size); comm_cache_amount = (size_t *) calloc(world_size, sizeof(size_t)); }
void _starpu_init_sched_policy(struct _starpu_machine_config *config, struct _starpu_sched_ctx *sched_ctx, struct starpu_sched_policy *selected_policy) { /* Perhaps we have to display some help */ display_sched_help_message(); /* Prefetch is activated by default */ use_prefetch = starpu_get_env_number("STARPU_PREFETCH"); if (use_prefetch == -1) use_prefetch = 1; /* Set calibrate flag */ _starpu_set_calibrate_flag(config->conf.calibrate); load_sched_policy(selected_policy, sched_ctx); _STARPU_TRACE_WORKER_SCHEDULING_PUSH; sched_ctx->sched_policy->init_sched(sched_ctx->id); _STARPU_TRACE_WORKER_SCHEDULING_POP; }
static int test_cpu(void) { int ret, var = 0; static starpu_pthread_t driver_thread; struct starpu_conf conf; struct starpu_driver d = { .type = STARPU_CPU_WORKER, .id.cpu_id = 0 }; starpu_conf_init(&conf); conf.n_not_launched_drivers = 1; conf.not_launched_drivers = &d; conf.ncpus = 1; ret = starpu_init(&conf); if (ret == -ENODEV || starpu_cpu_worker_get_count() == 0) { FPRINTF(stderr, "WARNING: No CPU worker found\n"); if (ret == 0) starpu_shutdown(); return STARPU_TEST_SKIPPED; } ret = starpu_pthread_create(&driver_thread, NULL, run_driver, &d); if (ret != 0) { ret = 1; goto out2; } struct starpu_task *task; task = starpu_task_create(); cl.where = STARPU_CPU; task->cl = &cl; task->cl_arg = &var; task->synchronous = 1; ret = starpu_task_submit(task); if (ret == -ENODEV) { FPRINTF(stderr, "WARNING: No worker can execute this task\n"); ret = STARPU_TEST_SKIPPED; goto out; } FPRINTF(stderr, "[CPU] Var = %d (expected value: 1)\n", var); ret = !!(var != 1); out: starpu_drivers_request_termination(); if (starpu_pthread_join(driver_thread, NULL) != 0) return 1; out2: starpu_shutdown(); return ret; } #endif /* STARPU_USE_CPU */ #ifdef STARPU_USE_CUDA static int test_cuda(void) { int ret, var = 0; static starpu_pthread_t driver_thread; struct starpu_conf conf; struct starpu_driver d = { .type = STARPU_CUDA_WORKER, .id.cuda_id = 0 }; starpu_conf_init(&conf); conf.n_not_launched_drivers = 1; conf.not_launched_drivers = &d; conf.ncuda = 1; ret = starpu_init(&conf); if (ret == -ENODEV || starpu_cuda_worker_get_count() == 0) { FPRINTF(stderr, "WARNING: No CUDA worker found\n"); if (ret == 0) starpu_shutdown(); return STARPU_TEST_SKIPPED; } ret = starpu_pthread_create(&driver_thread, NULL, run_driver, &d); if (ret == -1) { ret = 1; goto out; } struct starpu_task *task; task = starpu_task_create(); cl.where = STARPU_CUDA; task->cl = &cl; task->cl_arg = &var; task->synchronous = 1; ret = starpu_task_submit(task); if (ret == -ENODEV) { FPRINTF(stderr, "WARNING: No worker can execute this task\n"); ret = STARPU_TEST_SKIPPED; goto out; } out: starpu_drivers_request_termination(); if (starpu_pthread_join(driver_thread, NULL) != 0) return 1; starpu_shutdown(); FPRINTF(stderr, "[CUDA] Var = %d (expected value: 1)\n", var); ret = !!(var != 1); return ret; } #endif /* STARPU_USE_CUDA */ #ifdef STARPU_USE_OPENCL static int test_opencl(void) { int ret, var = 0; static starpu_pthread_t driver_thread; struct starpu_conf conf; cl_int err; cl_uint pdummy; cl_platform_id platform; err = clGetPlatformIDs(1, &platform, &pdummy); if (err != CL_SUCCESS) { FPRINTF(stderr, "WARNING: No OpenCL platform found\n"); return STARPU_TEST_SKIPPED; } cl_device_type device_type = CL_DEVICE_TYPE_GPU|CL_DEVICE_TYPE_ACCELERATOR; if (starpu_get_env_number("STARPU_OPENCL_ON_CPUS") > 0) device_type |= CL_DEVICE_TYPE_CPU; if (starpu_get_env_number("STARPU_OPENCL_ONLY_ON_CPUS") > 0) device_type = CL_DEVICE_TYPE_CPU; cl_device_id device_id; err = clGetDeviceIDs(platform, device_type, 1, &device_id, NULL); if (err != CL_SUCCESS) { FPRINTF(stderr, "WARNING: No GPU devices found on OpenCL platform\n"); return STARPU_TEST_SKIPPED; } struct starpu_driver d = { .type = STARPU_OPENCL_WORKER, .id.opencl_id = device_id }; starpu_conf_init(&conf); conf.n_not_launched_drivers = 1; conf.not_launched_drivers = &d; conf.ncuda = 0; conf.nopencl = 1; ret = starpu_init(&conf); if (ret == -ENODEV || starpu_opencl_worker_get_count() == 0) { FPRINTF(stderr, "WARNING: No OpenCL workers found\n"); if (ret == 0) starpu_shutdown(); return STARPU_TEST_SKIPPED; } ret = starpu_pthread_create(&driver_thread, NULL, run_driver, &d); if (ret == -1) { ret = 1; goto out; } struct starpu_task *task; task = starpu_task_create(); cl.where = STARPU_OPENCL; task->cl = &cl; task->cl_arg = &var; task->synchronous = 1; ret = starpu_task_submit(task); if (ret == -ENODEV) { FPRINTF(stderr, "WARNING: No worker can execute the task\n"); ret = STARPU_TEST_SKIPPED; goto out; } out: starpu_drivers_request_termination(); if (starpu_pthread_join(driver_thread, NULL) != 0) return 1; starpu_shutdown(); FPRINTF(stderr, "[OpenCL] Var = %d (expected value: 1)\n", var); ret = !!(var != 1); return ret; } #endif /* STARPU_USE_OPENCL */ int main(void) { int ret = STARPU_TEST_SKIPPED; #ifdef STARPU_USE_CPU ret = test_cpu(); if (ret == 1) return 1; #endif #ifdef STARPU_USE_CUDA ret = test_cuda(); if (ret == 1) return 1; #endif #ifdef STARPU_USE_OPENCL ret = test_opencl(); if (ret == 1) return 1; #endif return ret; }
static void find_and_assign_combinations_without_hwloc(int *workerids, int nworkers) { int i; unsigned sched_ctx_id = starpu_sched_ctx_get_context(); if(sched_ctx_id == STARPU_NMAX_SCHED_CTXS) sched_ctx_id = 0; int min, max; #ifdef STARPU_USE_MIC unsigned j; int mic_min, mic_max; #endif struct starpu_worker_collection* workers = starpu_sched_ctx_get_worker_collection(sched_ctx_id); /* We put the id of all CPU workers in this array */ int cpu_workers[STARPU_NMAXWORKERS]; unsigned ncpus = 0; #ifdef STARPU_USE_MIC unsigned nb_mics = _starpu_get_machine_config()->topology.nmicdevices; unsigned * nmics_table; int * mic_id; int ** mic_workers; mic_id = malloc(sizeof(int)*nb_mics); nmics_table = malloc(sizeof(unsigned)*nb_mics); mic_workers = malloc(sizeof(int*)*nb_mics); for(j=0; j<nb_mics; j++) { mic_id[j] = -1; nmics_table[j] = 0; mic_workers[j] = malloc(sizeof(int)*STARPU_NMAXWORKERS); } #endif /* STARPU_USE_MIC */ struct _starpu_worker *worker; for (i = 0; i < nworkers; i++) { worker = _starpu_get_worker_struct(workerids[i]); if (worker->arch == STARPU_CPU_WORKER) cpu_workers[ncpus++] = i; #ifdef STARPU_USE_MIC else if(worker->arch == STARPU_MIC_WORKER) { for(j=0; mic_id[j] != worker->devid && mic_id[j] != -1 && j<nb_mics; j++); if(j<nb_mics) { if(mic_id[j] == -1) { mic_id[j] = worker->devid; } mic_workers[j][nmics_table[j]++] = i; } } #endif /* STARPU_USE_MIC */ } min = starpu_get_env_number("STARPU_MIN_WORKERSIZE"); if (min < 2) min = 2; max = starpu_get_env_number("STARPU_MAX_WORKERSIZE"); if (max == -1 || max > (int) ncpus) max = ncpus; assign_combinations_without_hwloc(workers,cpu_workers,ncpus,min,max); #ifdef STARPU_USE_MIC mic_min = starpu_get_env_number("STARPU_MIN_WORKERSIZE"); if (mic_min < 2) mic_min = 2; for(j=0; j<nb_mics; j++) { mic_max = starpu_get_env_number("STARPU_MAX_WORKERSIZE"); if (mic_max == -1 || mic_max > (int) nmics_table[j]) mic_max = nmics_table[j]; assign_combinations_without_hwloc(workers,mic_workers[j],nmics_table[j],mic_min,mic_max); free(mic_workers[j]); } free(mic_id); free(nmics_table); free(mic_workers); #endif /* STARPU_USE_MIC */ }