Пример #1
0
Caffe::Properties::Properties() :
      init_time_(std::time(nullptr)),
      main_thread_id_(std::this_thread::get_id()),
      caffe_version_(AS_STRING(CAFFE_VERSION)) {
#ifndef CPU_ONLY
  int count = 0;
  CUDA_CHECK(cudaGetDeviceCount(&count));
  compute_capabilities_.resize(count);
  cudaDeviceProp device_prop;
  for (int gpu = 0; gpu < compute_capabilities_.size(); ++gpu) {
    CUDA_CHECK(cudaGetDeviceProperties(&device_prop, gpu));
    compute_capabilities_[gpu] = device_prop.major * 100 + device_prop.minor;
    DLOG(INFO) << "GPU " << gpu << " '" << device_prop.name << "' has compute capability "
        << device_prop.major << "." << device_prop.minor;
  }
#ifdef USE_CUDNN
  cudnn_version_ =
      AS_STRING(CUDNN_MAJOR) "." AS_STRING(CUDNN_MINOR) "." AS_STRING(CUDNN_PATCHLEVEL);
#else
  cudnn_version_ = "USE_CUDNN is not defined";
#endif
  int cublas_version = 0;
  CUBLAS_CHECK(cublasGetVersion(Caffe::cublas_handle(), &cublas_version));
  cublas_version_ = std::to_string(cublas_version);

  int cuda_version = 0;
  CUDA_CHECK(cudaRuntimeGetVersion(&cuda_version));
  cuda_version_ = std::to_string(cuda_version);

  int cuda_driver_version = 0;
  CUDA_CHECK(cudaDriverGetVersion(&cuda_driver_version));
  cuda_driver_version_ = std::to_string(cuda_driver_version);
#endif
}
int main(int argc, char **argv)
{
	int ret;

	/* Not supported yet */
	if (starpu_get_env_number_default("STARPU_GLOBAL_ARBITER", 0) > 0)
		return 77;

	ret = starpu_init(NULL);
	if (ret == -ENODEV)
		return 77;
	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");

#ifdef STARPU_USE_OPENCL
	ret = starpu_opencl_load_opencl_from_file("examples/reductions/dot_product_opencl_kernels.cl",
						  &_opencl_program, NULL);
	STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file");
#endif

#ifdef STARPU_USE_CUDA
	/* cublasSdot has synchronization issues when using a non-blocking stream */
	cublasGetVersion(&cublas_version);
	if (cublas_version >= 7050)
		starpu_cublas_init();
#endif

	unsigned long nelems = _nblocks*_entries_per_block;
	size_t size = nelems*sizeof(float);

	_x = (float *) malloc(size);
	_y = (float *) malloc(size);

	_x_handles = (starpu_data_handle_t *) calloc(_nblocks, sizeof(starpu_data_handle_t));
	_y_handles = (starpu_data_handle_t *) calloc(_nblocks, sizeof(starpu_data_handle_t));

	assert(_x && _y);

        starpu_srand48(0);

	DOT_TYPE reference_dot = 0.0;

	unsigned long i;
	for (i = 0; i < nelems; i++)
	{
		_x[i] = (float)starpu_drand48();
		_y[i] = (float)starpu_drand48();

		reference_dot += (DOT_TYPE)_x[i]*(DOT_TYPE)_y[i];
	}

	unsigned block;
	for (block = 0; block < _nblocks; block++)
	{
		starpu_vector_data_register(&_x_handles[block], STARPU_MAIN_RAM,
			(uintptr_t)&_x[_entries_per_block*block], _entries_per_block, sizeof(float));
		starpu_vector_data_register(&_y_handles[block], STARPU_MAIN_RAM,
			(uintptr_t)&_y[_entries_per_block*block], _entries_per_block, sizeof(float));
	}

	starpu_variable_data_register(&_dot_handle, STARPU_MAIN_RAM, (uintptr_t)&_dot, sizeof(DOT_TYPE));

	/*
	 *	Compute dot product with StarPU
	 */
	starpu_data_set_reduction_methods(_dot_handle, &redux_codelet, &init_codelet);

	for (block = 0; block < _nblocks; block++)
	{
		struct starpu_task *task = starpu_task_create();

		task->cl = &dot_codelet;
		task->destroy = 1;

		task->handles[0] = _x_handles[block];
		task->handles[1] = _y_handles[block];
		task->handles[2] = _dot_handle;

		ret = starpu_task_submit(task);
		if (ret == -ENODEV) goto enodev;
		STARPU_ASSERT(!ret);
	}

	for (block = 0; block < _nblocks; block++)
	{
		starpu_data_unregister(_x_handles[block]);
		starpu_data_unregister(_y_handles[block]);
	}
	starpu_data_unregister(_dot_handle);

	FPRINTF(stderr, "Reference : %e vs. %e (Delta %e)\n", reference_dot, _dot, reference_dot - _dot);

#ifdef STARPU_USE_CUDA
	if (cublas_version >= 7050)
		starpu_cublas_shutdown();
#endif

#ifdef STARPU_USE_OPENCL
        ret = starpu_opencl_unload_opencl(&_opencl_program);
        STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_unload_opencl");
#endif
	starpu_shutdown();

	free(_x);
	free(_y);
	free(_x_handles);
	free(_y_handles);

	if (fabs(reference_dot - _dot) < reference_dot * 1e-6)
		return EXIT_SUCCESS;
	else
		return EXIT_FAILURE;

enodev:
	fprintf(stderr, "WARNING: No one can execute this task\n");
	/* yes, we do not perform the computation but we did detect that no one
 	 * could perform the kernel, so this is not an error from StarPU */
	return 77;
}