Exemplo n.º 1
0
		cuda_texture::cuda_texture(
			cuda_linear_buffer_device::const_ptr dev_smart_ptr,
			int vector_size)
			: tex(0)
			, dev_smart_ptr(dev_smart_ptr)
		{
			struct cudaResourceDesc res_desc;
			memset(&res_desc, 0, sizeof(res_desc));
			res_desc.resType = cudaResourceTypeLinear;
			res_desc.res.linear.devPtr = const_cast<void *>((const void *)(*dev_smart_ptr));
			switch (vector_size)
			{
			case 1:
				res_desc.res.linear.desc = cudaCreateChannelDesc<float>();
				break;
			case 2:
				res_desc.res.linear.desc = cudaCreateChannelDesc<float2>();
				break;
			case 4:
				res_desc.res.linear.desc = cudaCreateChannelDesc<float4>();
				break;
			default:
				throw neural_network_exception((boost::format("Invalid vetor_size %1% for cuda_texture") % vector_size).str());
			}
			res_desc.res.linear.sizeInBytes = dev_smart_ptr->get_size();

			struct cudaTextureDesc tex_desc;
			memset(&tex_desc, 0, sizeof(tex_desc));
			tex_desc.addressMode[0] = cudaAddressModeBorder;
			tex_desc.readMode = cudaReadModeElementType;
			tex_desc.normalizedCoords = 0;

			cuda_safe_call(cudaCreateTextureObject(&tex, &res_desc, &tex_desc, 0));
		}
Exemplo n.º 2
0
		size_t cuda_texture::get_size() const
		{
			struct cudaResourceDesc res_desc;
			cuda_safe_call(cudaGetTextureObjectResourceDesc(&res_desc, tex));

			return res_desc.res.linear.sizeInBytes;
		}
Exemplo n.º 3
0
		void cuda_running_configuration::update_parameters()
		{
	        cuda_safe_call(cudaDriverGetVersion(&driver_version));
	        cuda_safe_call(cudaRuntimeGetVersion(&runtime_version));

			int device_count;
		    cuda_safe_call(cudaGetDeviceCount(&device_count));
			if (device_count <= 0)
				throw neural_network_exception("No CUDA capable devices are found");

			if (device_id >= device_count)
				throw neural_network_exception((boost::format("Device ID %1% specified while %2% devices are available") % device_id % device_count).str());

			cudaDeviceProp device_prop;
			cuda_safe_call(cudaGetDeviceProperties(&device_prop, device_id));
			device_name = device_prop.name;
			compute_capability_major = device_prop.major;
			compute_capability_minor = device_prop.minor;
			clock_rate = device_prop.clockRate;
			memory_clock_rate = device_prop.memoryClockRate;
			memory_bus_width = device_prop.memoryBusWidth;
			global_memory_size = device_prop.totalGlobalMem;
			ecc_enabled = (device_prop.ECCEnabled != 0);
			l2_cache_size = device_prop.l2CacheSize;
			multiprocessor_count = device_prop.multiProcessorCount;
			smem_per_block = device_prop.sharedMemPerBlock;
			max_threads_per_multiprocessor = device_prop.maxThreadsPerMultiProcessor;
			max_threads_per_block = device_prop.maxThreadsPerBlock;
			for(int i = 0; i < sizeof(max_threads_dim) / sizeof(max_threads_dim[0]); ++i)
				max_threads_dim[i] = device_prop.maxThreadsDim[i];
			for(int i = 0; i < sizeof(max_grid_size) / sizeof(max_grid_size[0]); ++i)
				max_grid_size[i] = device_prop.maxGridSize[i];
			max_texture_1d_linear = device_prop.maxTexture1DLinear;
			texture_alignment = device_prop.textureAlignment;
			pci_bus_id = device_prop.pciBusID;
			pci_device_id = device_prop.pciDeviceID;
		#ifdef _WIN32
			tcc_mode = (device_prop.tccDriver != 0);
		#endif

			cuda_safe_call(cudaSetDevice(device_id));

			cublas_safe_call(cublasCreate(&cublas_handle));

			cusparse_safe_call(cusparseCreate(&cusparse_handle));
		}
Exemplo n.º 4
0
		void layer_updater_cuda::get_data_from_device(const std::vector<cuda_linear_buffer_device::ptr>& device_data, layer_data::ptr host_data) const
		{
			unsigned int part_id = 0;
			for(layer_data::iterator it = host_data->begin(); it != host_data->end(); ++it, ++part_id)
			{
				cuda_linear_buffer_device::const_ptr src = device_data[part_id];
				cuda_safe_call(cudaMemcpy(&(*it->begin()), *src, it->size() * sizeof(float), cudaMemcpyDeviceToHost));
			}
		}
		unsigned int supervised_data_reader_functor::operator()()
		{
			unsigned int entries_read_count = 0;
			try
			{
				PUSH_RANGE("Reading supervised data", 0);
				unsigned int input_neuron_count = reader->get_input_configuration().get_neuron_count();
				unsigned int output_neuron_count = reader->get_output_configuration().get_neuron_count();
				size_t input_neuron_elem_size = reader->get_input_neuron_elem_size();
				while(entries_read_count < entries_to_read_count)
				{
					bool entry_read = reader->read(
						input + (input_neuron_count * entries_read_count * input_neuron_elem_size),
						output + (output_neuron_count * entries_read_count));

					if (!entry_read)
						break;

					entries_read_count++;
				}
				POP_RANGE;

				cuda_safe_call(cudaMemcpyAsync(
					d_input,
					input,
					entries_read_count * input_neuron_count * input_neuron_elem_size,
					cudaMemcpyHostToDevice,
					stream));
				cuda_safe_call(cudaMemcpyAsync(
					d_output,
					output,
					entries_read_count * output_neuron_count * sizeof(float),
					cudaMemcpyHostToDevice,
					stream));
			}
			catch (std::runtime_error& e)
			{
				*error = e.what();
			}

			return entries_read_count;
		}
Exemplo n.º 6
0
		std::vector<const_cuda_linear_buffer_device_smart_ptr> layer_hessian_cuda::get_data_squared(const_layer_data_smart_ptr host_data) const
		{
			std::vector<const_cuda_linear_buffer_device_smart_ptr> res;

			for(std::vector<std::vector<float> >::const_iterator it = host_data->begin(); it != host_data->end(); ++it)
			{
				size_t buffer_size = it->size() * sizeof(float);
				cuda_linear_buffer_device_smart_ptr new_buf(new cuda_linear_buffer_device(buffer_size));
				cuda_safe_call(cudaMemcpy(*new_buf, &(*it->begin()), buffer_size, cudaMemcpyHostToDevice));
				cuda_util::multiply_by_itself(
					*cuda_config,
					*new_buf,
					*new_buf,
					new_buf->get_size() / sizeof(float),
					0);
				cuda_safe_call(cudaStreamSynchronize(0));
				res.push_back(new_buf);
			}

			return res;
		}
Exemplo n.º 7
0
		std::ostream& operator<< (std::ostream& out, const cuda_running_configuration& running_configuration)
		{
			out << "--- CUDA versions ---" << std::endl;
			out << "Driver version = " << running_configuration.driver_version / 1000 << "." << (running_configuration.driver_version % 100) / 10 << std::endl;
			out << "Runtime version = " << running_configuration.runtime_version / 1000 << "." << (running_configuration.runtime_version % 100) / 10 << std::endl;

			out << "--- Device ---" << std::endl;

			out << "Device Id = " << running_configuration.device_id << std::endl;
			out << "Device name = " << running_configuration.device_name << std::endl;
			out << "Compute capability = " << running_configuration.compute_capability_major << "." << running_configuration.compute_capability_minor << std::endl;
			out << "Clock rate = " << (running_configuration.clock_rate / 1000) << " MHz" << std::endl;
			out << "Memory clock rate = " << (running_configuration.memory_clock_rate / 1000) << " MHz" << std::endl;
			out << "Memory bus width = " << running_configuration.memory_bus_width << " bits" << std::endl;
			out << "Global memory size = " << running_configuration.global_memory_size / (1024 * 1024) << " MB" << std::endl;
			out << "ECC support = " << (running_configuration.ecc_enabled ? "Enabled" : "Disabled") << std::endl;
			out << "L2 cache size = " << running_configuration.l2_cache_size << " bytes" << std::endl;
			out << "Multiprocessor count = " << running_configuration.multiprocessor_count << std::endl;
			out << "Shared memory per block size = " << running_configuration.smem_per_block << " bytes" << std::endl;
			out << "Maximum number of threads per multiprocessor = " << running_configuration.max_threads_per_multiprocessor << std::endl;
			out << "Maximum number of threads per block = " << running_configuration.max_threads_per_block << std::endl;
			out << "Maximum sizes of each dimension of a block = "
				<< running_configuration.max_threads_dim[0] << " x "
				<< running_configuration.max_threads_dim[1] << " x "
				<< running_configuration.max_threads_dim[2] << std::endl;
			out << "Maximum sizes of each dimension of a grid = "
				<< running_configuration.max_grid_size[0] << " x "
				<< running_configuration.max_grid_size[1] << " x "
				<< running_configuration.max_grid_size[2] << std::endl;
			out << "Maximum size of 1D texture bound to linear memory = " << running_configuration.max_texture_1d_linear << std::endl;
			out << "Texture alignment = " << running_configuration.texture_alignment << " bytes" << std::endl;
			out << "PCI Bus ID = " << running_configuration.pci_bus_id << std::endl;
			out << "PCI Location ID = " << running_configuration.pci_device_id << std::endl;
			#ifdef WIN32
				out << "Driver mode = " << (running_configuration.tcc_mode ? "TCC" : "WDDM") << std::endl;
			#endif

			out << "--- Settings ---" << std::endl;

			out << "Max global memory usage ratio = " << running_configuration.max_global_memory_usage_ratio << std::endl;

			out << "--- Status ---" << std::endl;

			size_t free_memory;
			size_t total_memory;
			cuda_safe_call(cudaMemGetInfo(&free_memory, &total_memory));

			out << "Free memory = " << free_memory / (1024 * 1024) << " MB" << std::endl;
			out << "Total memory = " << total_memory / (1024 * 1024) << " MB" << std::endl;

			return out;
		}
Exemplo n.º 8
0
		std::vector<cuda_linear_buffer_device::ptr> layer_updater_cuda::get_data(layer_data::const_ptr host_data) const
		{
			std::vector<cuda_linear_buffer_device::ptr> res;

			for(std::vector<std::vector<float> >::const_iterator it = host_data->begin(); it != host_data->end(); ++it)
			{
				size_t buffer_size = it->size() * sizeof(float);
				cuda_linear_buffer_device::ptr new_buf(new cuda_linear_buffer_device(buffer_size));
				cuda_safe_call(cudaMemcpy(*new_buf, &(*it->begin()), buffer_size, cudaMemcpyHostToDevice));
				res.push_back(new_buf);
			}

			return res;
		}
Exemplo n.º 9
0
		std::vector<cuda_linear_buffer_device::const_ptr> layer_updater_cuda::set_get_data_custom(layer_data_custom::const_ptr host_data_custom)
		{
			notify_data_custom(host_data_custom);

			std::vector<cuda_linear_buffer_device::const_ptr> res;

			for(std::vector<std::vector<int> >::const_iterator it = host_data_custom->begin(); it != host_data_custom->end(); ++it)
			{
				size_t buffer_size = it->size() * sizeof(int);
				cuda_linear_buffer_device::ptr new_buf(new cuda_linear_buffer_device(buffer_size));
				cuda_safe_call(cudaMemcpy(*new_buf, &(*it->begin()), buffer_size, cudaMemcpyHostToDevice));
				res.push_back(new_buf);
			}

			return res;
		}
Exemplo n.º 10
0
		void cuda_running_configuration::set_device() const
		{
			cuda_safe_call(cudaSetDevice(device_id));
		}