Esempio n. 1
0
/// Initializes the parallel sum object to sum num_element entries from a cl_mem buffer.
/// allocate_temp_buffers: if true will automatically allocate/deallocate buffers. Otherwise you need to do this elsewhere
void CRoutine_Sum_NVidia::Init(int n)
{
	int status = CL_SUCCESS;

	mInputSize = n;
	mBufferSize = n;

	// The NVidia SDK kernel on which this routine is based is designed only for power-of-two
	// sized buffers. Because of this, we'll create internal buffers that round up to the
	// next highest power of two.
	if(!isPow2(mBufferSize))
		mBufferSize = nextPow2(mBufferSize);

	// TODO: Workaround for issue 32 in which kernel fails to compute sums for N = [33 - 64]
	// https://github.com/bkloppenborg/liboi/issues/32
	if(mBufferSize < 128)
		mBufferSize = 128;

	BuildKernels();

	if(mTempBuffer1 == NULL)
	{
		mTempBuffer1 = clCreateBuffer(mContext, CL_MEM_READ_WRITE, mBufferSize * sizeof(cl_float), NULL, &status);
		CHECK_OPENCL_ERROR(status, "clCreateBuffer failed.");
	}

	if(mTempBuffer2 == NULL)
	{
		mTempBuffer2 = clCreateBuffer(mContext, CL_MEM_READ_WRITE, mBufferSize * sizeof(cl_float), NULL, &status);
		CHECK_OPENCL_ERROR(status, "clCreateBuffer failed.");
	}
}
Esempio n. 2
0
/// Initializes the parallel sum object to sum num_element entries from a cl_mem buffer.
/// allocate_temp_buffers: if true will automatically allocate/deallocate buffers. Otherwise you need to do this elsewhere
void CRoutine_Sum::Init(int n)
{
	int err = CL_SUCCESS;

	mInputSize = n;
	mBufferSize = n;

	// The NVidia SDK kernel on which this routine is based is designed only for power-of-two
	// sized buffers. Because of this, we'll create internal buffers that round up to the
	// next highest power of two.
	if(!isPow2(mBufferSize))
		mBufferSize = nextPow2(mBufferSize);

	// TODO: Workaround for issue 32
	// https://github.com/bkloppenborg/liboi/issues/32
	if(mBufferSize < 128)
		mBufferSize = 128;

	BuildKernels();

	if(mTempBuffer1 == NULL)
	{
		mTempBuffer1 = clCreateBuffer(mContext, CL_MEM_READ_WRITE, mBufferSize * sizeof(cl_float), NULL, &err);
		mTempBuffer2 = clCreateBuffer(mContext, CL_MEM_READ_WRITE, mBufferSize * sizeof(cl_float), NULL, &err);
		COpenCL::CheckOCLError("Could not create parallel sum temporary buffer.", err);
	}
}