OpenCLNonbondedUtilities::OpenCLNonbondedUtilities(OpenCLContext& context) : context(context), cutoff(-1.0), useCutoff(false), anyExclusions(false), usePadding(true), numForceBuffers(0), exclusionIndices(NULL), exclusionRowIndices(NULL), exclusionTiles(NULL), exclusions(NULL), interactingTiles(NULL), interactingAtoms(NULL), interactionCount(NULL), blockCenter(NULL), blockBoundingBox(NULL), sortedBlocks(NULL), sortedBlockCenter(NULL), sortedBlockBoundingBox(NULL), oldPositions(NULL), rebuildNeighborList(NULL), blockSorter(NULL), nonbondedForceGroup(0) { // Decide how many thread blocks and force buffers to use. deviceIsCpu = (context.getDevice().getInfo<CL_DEVICE_TYPE>() == CL_DEVICE_TYPE_CPU); if (deviceIsCpu) { numForceThreadBlocks = context.getNumThreadBlocks(); forceThreadBlockSize = 1; numForceBuffers = numForceThreadBlocks; } else if (context.getSIMDWidth() == 32) { if (context.getSupports64BitGlobalAtomics()) { numForceThreadBlocks = 4*context.getDevice().getInfo<CL_DEVICE_MAX_COMPUTE_UNITS>(); forceThreadBlockSize = 256; // Even though using longForceBuffer, still need a single forceBuffer for the reduceForces kernel to convert the long results into float4 which will be used by later kernels. numForceBuffers = 1; } else { numForceThreadBlocks = 3*context.getDevice().getInfo<CL_DEVICE_MAX_COMPUTE_UNITS>(); forceThreadBlockSize = 256; numForceBuffers = numForceThreadBlocks*forceThreadBlockSize/OpenCLContext::TileSize; } } else { numForceThreadBlocks = context.getNumThreadBlocks(); forceThreadBlockSize = (context.getSIMDWidth() >= 32 ? OpenCLContext::ThreadBlockSize : 32); if (context.getSupports64BitGlobalAtomics()) { // Even though using longForceBuffer, still need a single forceBuffer for the reduceForces kernel to convert the long results into float4 which will be used by later kernels. numForceBuffers = 1; } else { numForceBuffers = numForceThreadBlocks*forceThreadBlockSize/OpenCLContext::TileSize; } } }
OpenCLCompact::OpenCLCompact(OpenCLContext& context) : context(context), dgBlockCounts(NULL) { dgBlockCounts = OpenCLArray::create<cl_uint>(context, context.getNumThreadBlocks(), "dgBlockCounts"); cl::Program program = context.createProgram(OpenCLKernelSources::compact); countKernel = cl::Kernel(program, "countElts"); moveValidKernel = cl::Kernel(program, "moveValidElementsStaged"); }