예제 #1
0
	void mem_copy_to(device_memory& mem)
	{
		cuda_push_context();
		if(mem.device_pointer)
			cuda_assert(cuMemcpyHtoD(cuda_device_ptr(mem.device_pointer), (void*)mem.data_pointer, mem.memory_size()))
		cuda_pop_context();
	}
예제 #2
0
	~CUDADevice()
	{
		task_pool.stop();

		cuda_push_context();
		cuda_assert(cuCtxDetach(cuContext))
	}
예제 #3
0
	bool load_kernels(bool experimental)
	{
		/* check if cuda init succeeded */
		if(cuContext == 0)
			return false;
		
		/* check if GPU is supported with current feature set */
		if(!support_device(experimental))
			return false;

		/* get kernel */
		string cubin = compile_kernel();

		if(cubin == "")
			return false;

		/* open module */
		cuda_push_context();

		CUresult result = cuModuleLoad(&cuModule, cubin.c_str());
		if(cuda_error_(result, "cuModuleLoad"))
			cuda_error_message(string_printf("Failed loading CUDA kernel %s.", cubin.c_str()));

		cuda_pop_context();

		return (result == CUDA_SUCCESS);
	}
예제 #4
0
	void mem_zero(device_memory& mem)
	{
		memset((void*)mem.data_pointer, 0, mem.memory_size());

		cuda_push_context();
		cuda_assert(cuMemsetD8(cuda_device_ptr(mem.device_pointer), 0, mem.memory_size()))
		cuda_pop_context();
	}
예제 #5
0
	void mem_alloc(device_memory& mem, MemoryType type)
	{
		cuda_push_context();
		CUdeviceptr device_pointer;
		cuda_assert(cuMemAlloc(&device_pointer, mem.memory_size()))
		mem.device_pointer = (device_ptr)device_pointer;
		cuda_pop_context();
	}
예제 #6
0
	void mem_free(device_memory& mem)
	{
		if(mem.device_pointer) {
			cuda_push_context();
			cuda_assert(cuMemFree(cuda_device_ptr(mem.device_pointer)))
			cuda_pop_context();

			mem.device_pointer = 0;
		}
	}
예제 #7
0
	void mem_copy_from(device_memory& mem, int y, int w, int h, int elem)
	{
		size_t offset = elem*y*w;
		size_t size = elem*w*h;

		cuda_push_context();
		cuda_assert(cuMemcpyDtoH((uchar*)mem.data_pointer + offset,
			(CUdeviceptr)((uchar*)mem.device_pointer + offset), size))
		cuda_pop_context();
	}
예제 #8
0
	void tex_alloc(const char *name, device_memory& mem, bool interpolation, bool periodic)
	{
		/* determine format */
		CUarray_format_enum format;
		size_t dsize = datatype_size(mem.data_type);
		size_t size = mem.memory_size();

		switch(mem.data_type) {
			case TYPE_UCHAR: format = CU_AD_FORMAT_UNSIGNED_INT8; break;
			case TYPE_UINT: format = CU_AD_FORMAT_UNSIGNED_INT32; break;
			case TYPE_INT: format = CU_AD_FORMAT_SIGNED_INT32; break;
			case TYPE_FLOAT: format = CU_AD_FORMAT_FLOAT; break;
			default: assert(0); return;
		}

		CUtexref texref = NULL;

		cuda_push_context();
		cuda_assert(cuModuleGetTexRef(&texref, cuModule, name))

		if(!texref) {
			cuda_pop_context();
			return;
		}

		if(interpolation) {
			CUarray handle = NULL;
			CUDA_ARRAY_DESCRIPTOR desc;

			desc.Width = mem.data_width;
			desc.Height = mem.data_height;
			desc.Format = format;
			desc.NumChannels = mem.data_elements;

			cuda_assert(cuArrayCreate(&handle, &desc))

			if(!handle) {
				cuda_pop_context();
				return;
			}

			if(mem.data_height > 1) {
				CUDA_MEMCPY2D param;
				memset(&param, 0, sizeof(param));
				param.dstMemoryType = CU_MEMORYTYPE_ARRAY;
				param.dstArray = handle;
				param.srcMemoryType = CU_MEMORYTYPE_HOST;
				param.srcHost = (void*)mem.data_pointer;
				param.srcPitch = mem.data_width*dsize*mem.data_elements;
				param.WidthInBytes = param.srcPitch;
				param.Height = mem.data_height;

				cuda_assert(cuMemcpy2D(&param))
			}
			else
예제 #9
0
	void const_copy_to(const char *name, void *host, size_t size)
	{
		CUdeviceptr mem;
		size_t bytes;

		cuda_push_context();
		cuda_assert(cuModuleGetGlobal(&mem, &bytes, cuModule, name))
		//assert(bytes == size);
		cuda_assert(cuMemcpyHtoD(mem, host, size))
		cuda_pop_context();
	}
예제 #10
0
	~CUDADevice()
	{
		cuda_push_context();
		cuda_assert(cuCtxDetach(cuContext))
	}