Example #1
0
void _starpu_init_sched_policy(struct starpu_machine_config_s *config)
{
	/* Perhaps we have to display some help */
	display_sched_help_message();

	/* Prefetch is activated by default */
	use_prefetch = starpu_get_env_number("STARPU_PREFETCH");
	if (use_prefetch == -1)
		use_prefetch = 1;

	/* By default, we don't calibrate */
	unsigned do_calibrate = 0;
	if (config->user_conf && (config->user_conf->calibrate != -1))
	{
		do_calibrate = config->user_conf->calibrate;
	}
	else {
		int res = starpu_get_env_number("STARPU_CALIBRATE");
		do_calibrate =  (res < 0)?0:(unsigned)res;
	}

	_starpu_set_calibrate_flag(do_calibrate);

	struct starpu_sched_policy_s *selected_policy;
	selected_policy = select_sched_policy(config);

	load_sched_policy(selected_policy);

	policy.init_sched(&config->topology, &policy);
}
static void find_and_assign_combinations_with_hwloc(int *workerids, int nworkers)
{
	struct _starpu_machine_config *config = _starpu_get_machine_config();
	struct _starpu_machine_topology *topology = &config->topology;
	int synthesize_arity = starpu_get_env_number("STARPU_SYNTHESIZE_ARITY_COMBINED_WORKER");

	int min = starpu_get_env_number("STARPU_MIN_WORKERSIZE");
	if (min < 2)
		min = 2;
	int max = starpu_get_env_number("STARPU_MAX_WORKERSIZE");
	if (max == -1)
		max = INT_MAX;

	if (synthesize_arity == -1)
		synthesize_arity = 2;

	/* First, mark nodes which contain CPU workers, simply by setting their userdata field */
	int i;
	for (i = 0; i < nworkers; i++)
	{
		struct _starpu_worker *worker = _starpu_get_worker_struct(workerids[i]);
		if (worker->perf_arch.devices[0].type == STARPU_CPU_WORKER && worker->perf_arch.devices[0].ncores == 1)
		{
			hwloc_obj_t obj = hwloc_get_obj_by_depth(topology->hwtopology, config->pu_depth, worker->bindid);
			obj = obj->parent;
			while (obj)
			{
				obj->userdata = (void*) -1;
				obj = obj->parent;
			}
		}
	}
	find_and_assign_combinations(hwloc_get_root_obj(topology->hwtopology), min, max, synthesize_arity);
}
static void _starpu_opencl_limit_gpu_mem_if_needed(unsigned devid)
{
	starpu_ssize_t limit;
	size_t STARPU_ATTRIBUTE_UNUSED totalGlobalMem = 0;
	size_t STARPU_ATTRIBUTE_UNUSED to_waste = 0;
	char name[30];

#ifdef STARPU_SIMGRID
	totalGlobalMem = _starpu_simgrid_get_memsize("OpenCL", devid);
#elif defined(STARPU_USE_OPENCL)
	/* Request the size of the current device's memory */
	cl_int err;
	cl_ulong size;
	err = clGetDeviceInfo(devices[devid], CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(size), &size, NULL);
	if (STARPU_UNLIKELY(err != CL_SUCCESS))
		STARPU_OPENCL_REPORT_ERROR(err);
	totalGlobalMem = size;
#endif

	limit = starpu_get_env_number("STARPU_LIMIT_OPENCL_MEM");
	if (limit == -1)
	{
		sprintf(name, "STARPU_LIMIT_OPENCL_%u_MEM", devid);
		limit = starpu_get_env_number(name);
	}
#if defined(STARPU_USE_OPENCL) || defined(STARPU_SIMGRID)
	if (limit == -1)
	{
		/* Use 90% of the available memory by default.  */
		limit = totalGlobalMem / (1024*1024) * 0.9;
	}
#endif

	global_mem[devid] = limit * 1024*1024;

#ifdef STARPU_USE_OPENCL
	/* How much memory to waste ? */
	to_waste = totalGlobalMem - global_mem[devid];
#endif

	_STARPU_DEBUG("OpenCL device %d: Wasting %ld MB / Limit %ld MB / Total %ld MB / Remains %ld MB\n",
			devid, (long)to_waste/(1024*1024), (long) limit, (long)totalGlobalMem/(1024*1024),
			(long)(totalGlobalMem - to_waste)/(1024*1024));

}
static void combine_all_cpu_workers(int *workerids, int nworkers)
{
	unsigned sched_ctx_id  = starpu_sched_ctx_get_context();
	if(sched_ctx_id == STARPU_NMAX_SCHED_CTXS)
		sched_ctx_id = 0;
	struct starpu_worker_collection* workers = starpu_sched_ctx_get_worker_collection(sched_ctx_id);
	int cpu_workers[STARPU_NMAXWORKERS];
	int ncpus = 0;
	struct _starpu_worker *worker;
	int i;
	int min;
	int max;

	for (i = 0; i < nworkers; i++)
	{
		worker = _starpu_get_worker_struct(workerids[i]);

		if (worker->arch == STARPU_CPU_WORKER)
			cpu_workers[ncpus++] = workerids[i];
	}

	min = starpu_get_env_number("STARPU_MIN_WORKERSIZE");
	if (min < 1)
		min = 1;
	max = starpu_get_env_number("STARPU_MAX_WORKERSIZE");
	if (max == -1 || max > ncpus)
		max = ncpus;

	for (i = min; i <= max; i++)
	{
		int newworkerid;
		newworkerid = starpu_combined_worker_assign_workerid(i, cpu_workers);
		STARPU_ASSERT(newworkerid >= 0);
		workers->add(workers, newworkerid);
	}
}
void _starpu_mpi_cache_stats_init(MPI_Comm comm)
{
	stats_enabled = starpu_get_env_number("STARPU_MPI_CACHE_STATS");
	if (stats_enabled == -1)
	{
		stats_enabled = 0;
	}
	if (stats_enabled == 0) return;

	if (!getenv("STARPU_SILENT")) fprintf(stderr,"Warning: StarPU is executed with STARPU_MPI_CACHE_STATS=1, which slows down a bit\n");

	starpu_mpi_comm_size(comm, &world_size);
	_STARPU_MPI_DEBUG(1, "allocating for %d nodes\n", world_size);

	comm_cache_amount = (size_t *) calloc(world_size, sizeof(size_t));
}
void _starpu_init_sched_policy(struct _starpu_machine_config *config, struct _starpu_sched_ctx *sched_ctx, struct starpu_sched_policy *selected_policy)
{
	/* Perhaps we have to display some help */
	display_sched_help_message();

	/* Prefetch is activated by default */
	use_prefetch = starpu_get_env_number("STARPU_PREFETCH");
	if (use_prefetch == -1)
		use_prefetch = 1;

	/* Set calibrate flag */
	_starpu_set_calibrate_flag(config->conf.calibrate);

	load_sched_policy(selected_policy, sched_ctx);

	_STARPU_TRACE_WORKER_SCHEDULING_PUSH;
	sched_ctx->sched_policy->init_sched(sched_ctx->id);
	_STARPU_TRACE_WORKER_SCHEDULING_POP;
}
static int
test_cpu(void)
{
	int ret, var = 0;
	static starpu_pthread_t driver_thread;
	struct starpu_conf conf;
	struct starpu_driver d =
	{
		.type = STARPU_CPU_WORKER,
		.id.cpu_id = 0
	};

	starpu_conf_init(&conf);
	conf.n_not_launched_drivers = 1;
	conf.not_launched_drivers = &d;
	conf.ncpus = 1;
	ret = starpu_init(&conf);
	if (ret == -ENODEV || starpu_cpu_worker_get_count() == 0)
	{
		FPRINTF(stderr, "WARNING: No CPU worker found\n");
		if (ret == 0)
			starpu_shutdown();
		return STARPU_TEST_SKIPPED;
	}

	ret = starpu_pthread_create(&driver_thread, NULL, run_driver, &d);
	if (ret != 0)
	{
		ret = 1;
		goto out2;
	}

	struct starpu_task *task;
	task = starpu_task_create();
	cl.where = STARPU_CPU;
	task->cl = &cl;
	task->cl_arg = &var;
	task->synchronous = 1;

	ret = starpu_task_submit(task);
	if (ret == -ENODEV)
	{
		FPRINTF(stderr, "WARNING: No worker can execute this task\n");
		ret = STARPU_TEST_SKIPPED;
		goto out;
	}

	FPRINTF(stderr, "[CPU] Var = %d (expected value: 1)\n", var);
	ret = !!(var != 1);
out:
	starpu_drivers_request_termination();
	if (starpu_pthread_join(driver_thread, NULL) != 0)
		return 1;
out2:
	starpu_shutdown();
	return ret;
}
#endif /* STARPU_USE_CPU */

#ifdef STARPU_USE_CUDA
static int
test_cuda(void)
{
	int ret, var = 0;
	static starpu_pthread_t driver_thread;
	struct starpu_conf conf;
	struct starpu_driver d =
	{
		.type = STARPU_CUDA_WORKER,
		.id.cuda_id = 0
	};

	starpu_conf_init(&conf);
	conf.n_not_launched_drivers = 1;
	conf.not_launched_drivers = &d;
	conf.ncuda = 1;
	ret = starpu_init(&conf);
	if (ret == -ENODEV || starpu_cuda_worker_get_count() == 0)
	{
		FPRINTF(stderr, "WARNING: No CUDA worker found\n");
		if (ret == 0)
			starpu_shutdown();
		return STARPU_TEST_SKIPPED;
	}

	ret = starpu_pthread_create(&driver_thread, NULL, run_driver, &d);
	if (ret == -1)
	{
		ret = 1;
		goto out;
	}

	struct starpu_task *task;
	task = starpu_task_create();
	cl.where = STARPU_CUDA;
	task->cl = &cl;
	task->cl_arg = &var;
	task->synchronous = 1;

	ret = starpu_task_submit(task);
	if (ret == -ENODEV)
	{
		FPRINTF(stderr, "WARNING: No worker can execute this task\n");
		ret = STARPU_TEST_SKIPPED;
		goto out;
	}

out:
	starpu_drivers_request_termination();
	if (starpu_pthread_join(driver_thread, NULL) != 0)
		return 1;
	starpu_shutdown();

	FPRINTF(stderr, "[CUDA] Var = %d (expected value: 1)\n", var);
	ret = !!(var != 1);
	return ret;
}
#endif /* STARPU_USE_CUDA */

#ifdef STARPU_USE_OPENCL
static int
test_opencl(void)
{
	int ret, var = 0;
	static starpu_pthread_t driver_thread;
	struct starpu_conf conf;

	cl_int err;
        cl_uint pdummy;
        cl_platform_id platform;
        err = clGetPlatformIDs(1, &platform, &pdummy);
        if (err != CL_SUCCESS)
	{
		FPRINTF(stderr, "WARNING: No OpenCL platform found\n");
		return STARPU_TEST_SKIPPED;
	}

	cl_device_type device_type = CL_DEVICE_TYPE_GPU|CL_DEVICE_TYPE_ACCELERATOR;
	if (starpu_get_env_number("STARPU_OPENCL_ON_CPUS") > 0)
		device_type |= CL_DEVICE_TYPE_CPU;
	if (starpu_get_env_number("STARPU_OPENCL_ONLY_ON_CPUS") > 0)
		device_type = CL_DEVICE_TYPE_CPU;

	cl_device_id device_id;
        err = clGetDeviceIDs(platform, device_type, 1, &device_id, NULL);
        if (err != CL_SUCCESS)
	{
		FPRINTF(stderr, "WARNING: No GPU devices found on OpenCL platform\n");
		return STARPU_TEST_SKIPPED;
	}

	struct starpu_driver d =
	{
		.type = STARPU_OPENCL_WORKER,
		.id.opencl_id = device_id
	};

	starpu_conf_init(&conf);
	conf.n_not_launched_drivers = 1;
	conf.not_launched_drivers = &d;
	conf.ncuda = 0;
	conf.nopencl = 1;
	ret = starpu_init(&conf);
	if (ret == -ENODEV || starpu_opencl_worker_get_count() == 0)
	{
		FPRINTF(stderr, "WARNING: No OpenCL workers found\n");
		if (ret == 0)
			starpu_shutdown();
		return STARPU_TEST_SKIPPED;
	}

	ret = starpu_pthread_create(&driver_thread, NULL, run_driver, &d);
	if (ret == -1)
	{
		ret = 1;
		goto out;
	}

	struct starpu_task *task;
	task = starpu_task_create();
	cl.where = STARPU_OPENCL;
	task->cl = &cl;
	task->cl_arg = &var;
	task->synchronous = 1;

	ret = starpu_task_submit(task);
	if (ret == -ENODEV)
	{
		FPRINTF(stderr, "WARNING: No worker can execute the task\n");
		ret = STARPU_TEST_SKIPPED;
		goto out;
	}

out:
	starpu_drivers_request_termination();
	if (starpu_pthread_join(driver_thread, NULL) != 0)
		return 1;
	starpu_shutdown();

	FPRINTF(stderr, "[OpenCL] Var = %d (expected value: 1)\n", var);
	ret = !!(var != 1);
	return ret;
}
#endif /* STARPU_USE_OPENCL */

int
main(void)
{
	int ret = STARPU_TEST_SKIPPED;
#ifdef STARPU_USE_CPU
	ret = test_cpu();
	if (ret == 1)
		return 1;
#endif
#ifdef STARPU_USE_CUDA
	ret = test_cuda();
	if (ret == 1)
		return 1;
#endif
#ifdef STARPU_USE_OPENCL
	ret = test_opencl();
	if (ret == 1)
		return 1;
#endif
	return ret;
}
static void find_and_assign_combinations_without_hwloc(int *workerids, int nworkers)
{
	int i;
	unsigned sched_ctx_id  = starpu_sched_ctx_get_context();
	if(sched_ctx_id == STARPU_NMAX_SCHED_CTXS)
		sched_ctx_id = 0;
	int min, max;
#ifdef STARPU_USE_MIC
	unsigned j;
	int mic_min, mic_max;
#endif

	struct starpu_worker_collection* workers = starpu_sched_ctx_get_worker_collection(sched_ctx_id);

	/* We put the id of all CPU workers in this array */
	int cpu_workers[STARPU_NMAXWORKERS];
	unsigned ncpus = 0;
#ifdef STARPU_USE_MIC
	unsigned nb_mics = _starpu_get_machine_config()->topology.nmicdevices;
	unsigned * nmics_table;
	int * mic_id;
	int ** mic_workers;
	mic_id = malloc(sizeof(int)*nb_mics);
	nmics_table = malloc(sizeof(unsigned)*nb_mics);
	mic_workers = malloc(sizeof(int*)*nb_mics);
	for(j=0; j<nb_mics; j++)
	{
		mic_id[j] = -1;
		nmics_table[j] = 0;
		mic_workers[j] = malloc(sizeof(int)*STARPU_NMAXWORKERS);
	}
#endif /* STARPU_USE_MIC */

	struct _starpu_worker *worker;
	for (i = 0; i < nworkers; i++)
	{
		worker = _starpu_get_worker_struct(workerids[i]);
		if (worker->arch == STARPU_CPU_WORKER)
			cpu_workers[ncpus++] = i;
#ifdef STARPU_USE_MIC
		else if(worker->arch == STARPU_MIC_WORKER)
		{
			for(j=0; mic_id[j] != worker->devid && mic_id[j] != -1 && j<nb_mics; j++);
			if(j<nb_mics)
			{
				if(mic_id[j] == -1)
				{
					mic_id[j] = worker->devid;					
				}
				mic_workers[j][nmics_table[j]++] = i;
			}
		}
#endif /* STARPU_USE_MIC */

	}


	min = starpu_get_env_number("STARPU_MIN_WORKERSIZE");
	if (min < 2)
		min = 2;
	max = starpu_get_env_number("STARPU_MAX_WORKERSIZE");
	if (max == -1 || max > (int) ncpus)
		max = ncpus;
	
	assign_combinations_without_hwloc(workers,cpu_workers,ncpus,min,max);
#ifdef STARPU_USE_MIC
	mic_min = starpu_get_env_number("STARPU_MIN_WORKERSIZE");
	if (mic_min < 2)
		mic_min = 2;
	for(j=0; j<nb_mics; j++)
	{
		mic_max = starpu_get_env_number("STARPU_MAX_WORKERSIZE");
		if (mic_max == -1 || mic_max > (int) nmics_table[j])
			mic_max = nmics_table[j];
		assign_combinations_without_hwloc(workers,mic_workers[j],nmics_table[j],mic_min,mic_max);
		free(mic_workers[j]);
	}
	free(mic_id);
	free(nmics_table);
	free(mic_workers);
#endif /* STARPU_USE_MIC */
}