コード例 #1
0
ファイル: gdev_nvidia.c プロジェクト: GoelDeepak/gdev
/* add a new VAS object into the device VAS list. */
static void __gdev_vas_list_add(struct gdev_vas *vas)
{
	struct gdev_device *gdev = vas->gdev;
	unsigned long flags;
	
	gdev_lock_save(&gdev->vas_lock, &flags);
	gdev_list_add(&vas->list_entry, &gdev->vas_list);
	gdev_unlock_restore(&gdev->vas_lock, &flags);
}
コード例 #2
0
ファイル: execution.c プロジェクト: GoelDeepak/gdev
/**
 * Invokes the kernel f on a grid_width x grid_height grid of blocks. Each 
 * block contains the number of threads specified by a previous call to 
 * cuFuncSetBlockShape().
 *
 * Parameters:
 * f - Kernel to launch
 * grid_width - Width of grid in blocks
 * grid_height - Height of grid in blocks
 *
 * Returns:
 * CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, 
 * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE, 
 * CUDA_ERROR_LAUNCH_FAILED, CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES, 
 * CUDA_ERROR_LAUNCH_TIMEOUT, CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING 
 */
CUresult cuLaunchGrid(CUfunction f, int grid_width, int grid_height)
{
	struct CUfunc_st *func = f;
	struct CUmod_st *mod = func->mod;
	struct CUctx_st *ctx = mod->ctx;
	struct gdev_kernel *k;
	struct gdev_cuda_fence *fence;
	Ghandle handle;

	if (!gdev_initialized)
		return CUDA_ERROR_NOT_INITIALIZED;
	if (!ctx || ctx != gdev_ctx_current)
		return CUDA_ERROR_INVALID_CONTEXT;
	if (!func || grid_width <= 0 || grid_height <= 0)
		return CUDA_ERROR_INVALID_VALUE;
	if (!(fence = (struct gdev_cuda_fence *)MALLOC(sizeof(*fence))))
		return CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES;

	k = &func->kernel;
	k->grid_x = grid_width;
	k->grid_y = grid_height;
	k->grid_z = 1;
	k->grid_id = ++ctx->launch_id;

	k->smem_base = gdev_cuda_align_base(0);
	k->lmem_base = k->smem_base + gdev_cuda_align_base(k->smem_size);

	handle = gdev_ctx_current->gdev_handle;

	if (glaunch(handle, k, &fence->id))
		return CUDA_ERROR_LAUNCH_FAILED;
	fence->addr_ref = 0; /* no address to unreference later. */
	gdev_list_init(&fence->list_entry, fence);
	gdev_list_add(&fence->list_entry, &ctx->sync_list);

	return CUDA_SUCCESS;
}
コード例 #3
0
ファイル: memory.c プロジェクト: Aeternam/gdev
/**
 * Copies from device to host memory. dstHost and srcDevice specify the base 
 * pointers of the destination and source, respectively. ByteCount specifies 
 * the number of bytes to copy.
 *
 * cuMemcpyDtoHAsync() is asynchronous and can optionally be associated to a 
 * stream by passing a non-zero hStream argument. It only works on page-locked 
 * memory and returns an error if a pointer to pageable memory is passed as 
 * input.
 *
 * Parameters:
 * dstHost - Destination host pointer
 * srcDevice - Source device pointer
 * ByteCount - Size of memory copy in bytes
 * hStream - Stream identifier
 *
 * Returns:
 * CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, 
 * CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE 
 */
CUresult cuMemcpyDtoHAsync_v2(void *dstHost, CUdeviceptr srcDevice, unsigned int ByteCount, CUstream hStream)
{
	CUresult res;
	struct CUctx_st *ctx;
	Ghandle handle, handle_r;
	struct CUstream_st *stream = hStream;
	void *dst_buf = dstHost;
	uint64_t src_addr = srcDevice;
	uint64_t src_addr_r, dst_addr_r, dst_addr;
	uint32_t size = ByteCount;
	struct gdev_cuda_fence *fence;
	uint32_t id;

	if (!stream)
		return cuMemcpyDtoH(dst_buf, src_addr, size);

	if (!gdev_initialized)
		return CUDA_ERROR_NOT_INITIALIZED;
	if (!dst_buf || !src_addr || !size)
		return CUDA_ERROR_INVALID_VALUE;

	res = cuCtxGetCurrent(&ctx);
	if (res != CUDA_SUCCESS)
		return res;
	if (ctx != stream->ctx)
		return CUDA_ERROR_INVALID_CONTEXT;

	fence = (struct gdev_cuda_fence *)MALLOC(sizeof(*fence));
	if (!fence)
		return CUDA_ERROR_OUT_OF_MEMORY; /* this API shouldn't return it... */

	handle = ctx->gdev_handle;
	handle_r = stream->gdev_handle;

	/* reference the device memory address. */
	if (!(src_addr_r = gref(handle, src_addr, size, handle_r)))
		goto fail_gref;

	/* translate from buffer to address. */
	if (!(dst_addr = gvirtget(handle, dst_buf)))
		goto fail_gvirtget;

	/* reference the host memory address. */
	if (!(dst_addr_r = gref(handle, dst_addr, size, handle_r)))
		goto fail_gref_dma;

	/* now we can just copy data in the global address space. */
	if (gmemcpy_async(handle_r, dst_addr_r, src_addr_r, size, &id))
		goto fail_gmemcpy;

	fence->id = id;
	fence->addr_ref = src_addr_r;
	gdev_list_init(&fence->list_entry, fence);
	gdev_list_add(&fence->list_entry, &stream->sync_list);

	return CUDA_SUCCESS;

fail_gmemcpy:
	gunref(handle_r, dst_addr_r);
fail_gref_dma:
fail_gvirtget:
	gunref(handle_r, src_addr_r);
fail_gref:
	FREE(fence);

	return CUDA_ERROR_UNKNOWN;
}