/// Initializes the parallel sum object to sum num_element entries from a cl_mem buffer. /// allocate_temp_buffers: if true will automatically allocate/deallocate buffers. Otherwise you need to do this elsewhere void CRoutine_Sum_NVidia::Init(int n) { int status = CL_SUCCESS; mInputSize = n; mBufferSize = n; // The NVidia SDK kernel on which this routine is based is designed only for power-of-two // sized buffers. Because of this, we'll create internal buffers that round up to the // next highest power of two. if(!isPow2(mBufferSize)) mBufferSize = nextPow2(mBufferSize); // TODO: Workaround for issue 32 in which kernel fails to compute sums for N = [33 - 64] // https://github.com/bkloppenborg/liboi/issues/32 if(mBufferSize < 128) mBufferSize = 128; BuildKernels(); if(mTempBuffer1 == NULL) { mTempBuffer1 = clCreateBuffer(mContext, CL_MEM_READ_WRITE, mBufferSize * sizeof(cl_float), NULL, &status); CHECK_OPENCL_ERROR(status, "clCreateBuffer failed."); } if(mTempBuffer2 == NULL) { mTempBuffer2 = clCreateBuffer(mContext, CL_MEM_READ_WRITE, mBufferSize * sizeof(cl_float), NULL, &status); CHECK_OPENCL_ERROR(status, "clCreateBuffer failed."); } }
/// Initializes the parallel sum object to sum num_element entries from a cl_mem buffer. /// allocate_temp_buffers: if true will automatically allocate/deallocate buffers. Otherwise you need to do this elsewhere void CRoutine_Sum::Init(int n) { int err = CL_SUCCESS; mInputSize = n; mBufferSize = n; // The NVidia SDK kernel on which this routine is based is designed only for power-of-two // sized buffers. Because of this, we'll create internal buffers that round up to the // next highest power of two. if(!isPow2(mBufferSize)) mBufferSize = nextPow2(mBufferSize); // TODO: Workaround for issue 32 // https://github.com/bkloppenborg/liboi/issues/32 if(mBufferSize < 128) mBufferSize = 128; BuildKernels(); if(mTempBuffer1 == NULL) { mTempBuffer1 = clCreateBuffer(mContext, CL_MEM_READ_WRITE, mBufferSize * sizeof(cl_float), NULL, &err); mTempBuffer2 = clCreateBuffer(mContext, CL_MEM_READ_WRITE, mBufferSize * sizeof(cl_float), NULL, &err); COpenCL::CheckOCLError("Could not create parallel sum temporary buffer.", err); } }