コード例 #1
0
ファイル: CudaFFT3D.cpp プロジェクト: PauloLira/openmm
void CudaFFT3D::execFFT(CudaArray& in, CudaArray& out, bool forward) {
    CUfunction kernel1 = (forward ? zkernel : invzkernel);
    CUfunction kernel2 = (forward ? xkernel : invxkernel);
    CUfunction kernel3 = (forward ? ykernel : invykernel);
    void* args1[] = {&in.getDevicePointer(), &out.getDevicePointer()};
    void* args2[] = {&out.getDevicePointer(), &in.getDevicePointer()};
    if (packRealAsComplex) {
        CUfunction packKernel = (forward ? packForwardKernel : packBackwardKernel);
        CUfunction unpackKernel = (forward ? unpackForwardKernel : unpackBackwardKernel);
        int gridSize = xsize*ysize*zsize/2;

        // Pack the data into a half sized grid.

        context.executeKernel(packKernel, args1, gridSize, 128);

        // Perform the FFT.

        context.executeKernel(kernel1, args2, gridSize, zthreads);
        context.executeKernel(kernel2, args1, gridSize, xthreads);
        context.executeKernel(kernel3, args2, gridSize, ythreads);

        // Unpack the data.

        context.executeKernel(unpackKernel, args1, gridSize, 128);
    }
    else {
        context.executeKernel(kernel1, args1, xsize*ysize*zsize, zthreads);
        context.executeKernel(kernel2, args2, xsize*ysize*zsize, xthreads);
        context.executeKernel(kernel3, args1, xsize*ysize*zsize, ythreads);
    }
}
コード例 #2
0
ファイル: CudaSort.cpp プロジェクト: jchodera/openmm
void CudaSort::sort(CudaArray& data) {
    if (data.getSize() != dataLength || data.getElementSize() != trait->getDataSize())
        throw OpenMMException("CudaSort called with different data size");
    if (data.getSize() == 0)
        return;
    if (isShortList) {
        // We can use a simpler sort kernel that does the entire operation in one kernel.
        
        if (dataLength <= CudaContext::ThreadBlockSize*context.getNumThreadBlocks()) {
            void* sortArgs[] = {&data.getDevicePointer(), &buckets.getDevicePointer(), &dataLength};
            context.executeKernel(shortList2Kernel, sortArgs, dataLength);
            buckets.copyTo(data);
        }
        else {
            void* sortArgs[] = {&data.getDevicePointer(), &dataLength};
            context.executeKernel(shortListKernel, sortArgs, sortKernelSize, sortKernelSize, dataLength*trait->getDataSize());
        }
    }
    else {
        // Compute the range of data values.

        unsigned int numBuckets = bucketOffset.getSize();
        void* rangeArgs[] = {&data.getDevicePointer(), &dataLength, &dataRange.getDevicePointer(), &numBuckets, &bucketOffset.getDevicePointer()};
        context.executeKernel(computeRangeKernel, rangeArgs, rangeKernelSize, rangeKernelSize, 2*rangeKernelSize*trait->getKeySize());

        // Assign array elements to buckets.

        void* elementsArgs[] = {&data.getDevicePointer(), &dataLength, &numBuckets, &dataRange.getDevicePointer(),
                &bucketOffset.getDevicePointer(), &bucketOfElement.getDevicePointer(), &offsetInBucket.getDevicePointer()};
        context.executeKernel(assignElementsKernel, elementsArgs, data.getSize(), 128);

        // Compute the position of each bucket.

        void* computeArgs[] = {&numBuckets, &bucketOffset.getDevicePointer()};
        context.executeKernel(computeBucketPositionsKernel, computeArgs, positionsKernelSize, positionsKernelSize, positionsKernelSize*sizeof(int));

        // Copy the data into the buckets.

        void* copyArgs[] = {&data.getDevicePointer(), &buckets.getDevicePointer(), &dataLength, &bucketOffset.getDevicePointer(),
                &bucketOfElement.getDevicePointer(), &offsetInBucket.getDevicePointer()};
        context.executeKernel(copyToBucketsKernel, copyArgs, data.getSize());

        // Sort each bucket.

        void* sortArgs[] = {&data.getDevicePointer(), &buckets.getDevicePointer(), &numBuckets, &bucketOffset.getDevicePointer()};
        context.executeKernel(sortBucketsKernel, sortArgs, ((data.getSize()+sortKernelSize-1)/sortKernelSize)*sortKernelSize, sortKernelSize, sortKernelSize*trait->getDataSize());
    }
}