Example #1
0
void CudaIntegrateRPMDStepKernel::execute(ContextImpl& context, const RPMDIntegrator& integrator, bool forcesAreValid) {
    cu.setAsCurrent();
    CudaIntegrationUtilities& integration = cu.getIntegrationUtilities();
    
    // Loop over copies and compute the force on each one.
    
    if (!forcesAreValid)
        computeForces(context);
    
    // Apply the PILE-L thermostat.
    
    bool useDoublePrecision = (cu.getUseDoublePrecision() || cu.getUseMixedPrecision());
    double dt = integrator.getStepSize();
    float dtFloat = (float) dt;
    void* dtPtr = (useDoublePrecision ? (void*) &dt : (void*) &dtFloat);
    double kT = integrator.getTemperature()*BOLTZ;
    float kTFloat = (float) kT;
    void* kTPtr = (useDoublePrecision ? (void*) &kT : (void*) &kTFloat);
    double friction = integrator.getFriction();
    float frictionFloat = (float) friction;
    void* frictionPtr = (useDoublePrecision ? (void*) &friction : (void*) &frictionFloat);
    int randomIndex = integration.prepareRandomNumbers(numParticles*numCopies);
    void* pileArgs[] = {&velocities->getDevicePointer(), &integration.getRandom().getDevicePointer(), &randomIndex, dtPtr, kTPtr, frictionPtr};
    cu.executeKernel(pileKernel, pileArgs, numParticles*numCopies, workgroupSize);

    // Update positions and velocities.
    
    void* stepArgs[] = {&positions->getDevicePointer(), &velocities->getDevicePointer(), &forces->getDevicePointer(), dtPtr, kTPtr};
    cu.executeKernel(stepKernel, stepArgs, numParticles*numCopies, workgroupSize);

    // Calculate forces based on the updated positions.
    
    computeForces(context);
    
    // Update velocities.

    void* velocitiesArgs[] = {&velocities->getDevicePointer(), &forces->getDevicePointer(), dtPtr};
    cu.executeKernel(velocitiesKernel, velocitiesArgs, numParticles*numCopies, workgroupSize);

    // Apply the PILE-L thermostat again.

    randomIndex = integration.prepareRandomNumbers(numParticles*numCopies);
    cu.executeKernel(pileKernel, pileArgs, numParticles*numCopies, workgroupSize);

    // Update the time and step count.

    cu.setTime(cu.getTime()+dt);
    cu.setStepCount(cu.getStepCount()+1);
    cu.reorderAtoms();
    if (cu.getAtomsWereReordered() && cu.getNonbondedUtilities().getUsePeriodic()) {
        // Atoms may have been translated into a different periodic box, so apply
        // the same translation to all the beads.

        int i = numCopies-1;
        void* args[] = {&positions->getDevicePointer(), &cu.getPosq().getDevicePointer(), &cu.getAtomIndexArray().getDevicePointer(), &i};
        cu.executeKernel(translateKernel, args, cu.getNumAtoms());
    }
}
void ReferenceIntegrateRPMDStepKernel::initialize(const System& system, const RPMDIntegrator& integrator) {
    int numCopies = integrator.getNumCopies();
    int numParticles = system.getNumParticles();
    positions.resize(numCopies);
    velocities.resize(numCopies);
    forces.resize(numCopies);
    for (int i = 0; i < numCopies; i++) {
        positions[i].resize(numParticles);
        velocities[i].resize(numParticles);
        forces[i].resize(numParticles);
    }
    fftpack_init_1d(&fft, numCopies);
    SimTKOpenMMUtilities::setRandomNumberSeed((unsigned int) integrator.getRandomNumberSeed());
    
    // Build a list of contractions.
    
    groupsNotContracted = -1;
    const map<int, int>& contractions = integrator.getContractions();
    int maxContractedCopies = 0;
    for (map<int, int>::const_iterator iter = contractions.begin(); iter != contractions.end(); ++iter) {
        int group = iter->first;
        int copies = iter->second;
        if (group < 0 || group > 31)
            throw OpenMMException("RPMDIntegrator: Force group must be between 0 and 31");
        if (copies < 0 || copies > numCopies)
            throw OpenMMException("RPMDIntegrator: Number of copies for contraction cannot be greater than the total number of copies being simulated");
        if (copies != numCopies) {
            if (groupsByCopies.find(copies) == groupsByCopies.end()) {
                groupsByCopies[copies] = 1<<group;
                contractionFFT[copies] = NULL;
                fftpack_init_1d(&contractionFFT[copies], copies);
                if (copies > maxContractedCopies)
                    maxContractedCopies = copies;
            }
            else
                groupsByCopies[copies] |= 1<<group;
            groupsNotContracted -= 1<<group;
        }
    }
    
    // Create workspace for doing contractions.
    
    contractedPositions.resize(maxContractedCopies);
    contractedForces.resize(maxContractedCopies);
    for (int i = 0; i < maxContractedCopies; i++) {
        contractedPositions[i].resize(numParticles);
        contractedForces[i].resize(numParticles);
    }
}
void CudaIntegrateRPMDStepKernel::initialize(const System& system, const RPMDIntegrator& integrator) {
    cu.getPlatformData().initializeContexts(system);
    numCopies = integrator.getNumCopies();
    numParticles = system.getNumParticles();
    workgroupSize = numCopies;
    if (numCopies != findFFTDimension(numCopies))
        throw OpenMMException("RPMDIntegrator: the number of copies must be a multiple of powers of 2, 3, and 5.");
    int paddedParticles = cu.getPaddedNumAtoms();
    bool useDoublePrecision = (cu.getUseDoublePrecision() || cu.getUseMixedPrecision());
    int elementSize = (useDoublePrecision ? sizeof(double4) : sizeof(float4));
    forces = CudaArray::create<long long>(cu, numCopies*paddedParticles*3, "rpmdForces");
    positions = new CudaArray(cu, numCopies*paddedParticles, elementSize, "rpmdPositions");
    velocities = new CudaArray(cu, numCopies*paddedParticles, elementSize, "rpmdVelocities");
    cu.getIntegrationUtilities().initRandomNumberGenerator((unsigned int) integrator.getRandomNumberSeed());
    
    // Fill in the posq and velm arrays with safe values to avoid a risk of nans.
    
    if (useDoublePrecision) {
        vector<double4> temp(positions->getSize());
        for (int i = 0; i < positions->getSize(); i++)
            temp[i] = make_double4(0, 0, 0, 0);
        positions->upload(temp);
        for (int i = 0; i < velocities->getSize(); i++)
            temp[i] = make_double4(0, 0, 0, 1);
        velocities->upload(temp);
    }
    else {
        vector<float4> temp(positions->getSize());
        for (int i = 0; i < positions->getSize(); i++)
            temp[i] = make_float4(0, 0, 0, 0);
        positions->upload(temp);
        for (int i = 0; i < velocities->getSize(); i++)
            temp[i] = make_float4(0, 0, 0, 1);
        velocities->upload(temp);
    }
    
    // Build a list of contractions.
    
    groupsNotContracted = -1;
    const map<int, int>& contractions = integrator.getContractions();
    int maxContractedCopies = 0;
    for (map<int, int>::const_iterator iter = contractions.begin(); iter != contractions.end(); ++iter) {
        int group = iter->first;
        int copies = iter->second;
        if (group < 0 || group > 31)
            throw OpenMMException("RPMDIntegrator: Force group must be between 0 and 31");
        if (copies < 0 || copies > numCopies)
            throw OpenMMException("RPMDIntegrator: Number of copies for contraction cannot be greater than the total number of copies being simulated");
        if (copies != findFFTDimension(copies))
            throw OpenMMException("RPMDIntegrator: Number of copies for contraction must be a multiple of powers of 2, 3, and 5.");
        if (copies != numCopies) {
            if (groupsByCopies.find(copies) == groupsByCopies.end()) {
                groupsByCopies[copies] = 1<<group;
                if (copies > maxContractedCopies)
                    maxContractedCopies = copies;
            }
            else
                groupsByCopies[copies] |= 1<<group;
            groupsNotContracted -= 1<<group;
        }
    }
    if (maxContractedCopies > 0) {
        contractedForces = CudaArray::create<long long>(cu, maxContractedCopies*paddedParticles*3, "rpmdContractedForces");
        contractedPositions = new CudaArray(cu, maxContractedCopies*paddedParticles, elementSize, "rpmdContractedPositions");
    }

    // Create kernels.
    
    map<string, string> defines;
    defines["NUM_ATOMS"] = cu.intToString(cu.getNumAtoms());
    defines["PADDED_NUM_ATOMS"] = cu.intToString(cu.getPaddedNumAtoms());
    defines["NUM_COPIES"] = cu.intToString(numCopies);
    defines["THREAD_BLOCK_SIZE"] = cu.intToString(workgroupSize);
    defines["HBAR"] = cu.doubleToString(1.054571628e-34*AVOGADRO/(1000*1e-12));
    defines["SCALE"] = cu.doubleToString(1.0/sqrt((double) numCopies));
    defines["M_PI"] = cu.doubleToString(M_PI);
    map<string, string> replacements;
    replacements["FFT_Q_FORWARD"] = createFFT(numCopies, "q", true);
    replacements["FFT_Q_BACKWARD"] = createFFT(numCopies, "q", false);
    replacements["FFT_V_FORWARD"] = createFFT(numCopies, "v", true);
    replacements["FFT_V_BACKWARD"] = createFFT(numCopies, "v", false);
    CUmodule module = cu.createModule(cu.replaceStrings(CudaKernelSources::vectorOps+CudaRpmdKernelSources::rpmd, replacements), defines, "");
    pileKernel = cu.getKernel(module, "applyPileThermostat");
    stepKernel = cu.getKernel(module, "integrateStep");
    velocitiesKernel = cu.getKernel(module, "advanceVelocities");
    copyToContextKernel = cu.getKernel(module, "copyDataToContext");
    copyFromContextKernel = cu.getKernel(module, "copyDataFromContext");
    translateKernel = cu.getKernel(module, "applyCellTranslations");
    
    // Create kernels for doing contractions.
    
    for (map<int, int>::const_iterator iter = groupsByCopies.begin(); iter != groupsByCopies.end(); ++iter) {
        int copies = iter->first;
        replacements.clear();
        replacements["NUM_CONTRACTED_COPIES"] = cu.intToString(copies);
        replacements["POS_SCALE"] = cu.doubleToString(1.0/numCopies);
        replacements["FORCE_SCALE"] = cu.doubleToString(0x100000000/(double) copies);
        replacements["FFT_Q_FORWARD"] = createFFT(numCopies, "q", true);
        replacements["FFT_Q_BACKWARD"] = createFFT(copies, "q", false);
        replacements["FFT_F_FORWARD"] = createFFT(copies, "f", true);
        replacements["FFT_F_BACKWARD"] = createFFT(numCopies, "f", false);
        module = cu.createModule(cu.replaceStrings(CudaKernelSources::vectorOps+CudaRpmdKernelSources::rpmdContraction, replacements), defines, "");
        positionContractionKernels[copies] = cu.getKernel(module, "contractPositions");
        forceContractionKernels[copies] = cu.getKernel(module, "contractForces");
    }
}
void ReferenceIntegrateRPMDStepKernel::execute(ContextImpl& context, const RPMDIntegrator& integrator, bool forcesAreValid) {
    const int numCopies = positions.size();
    const int numParticles = positions[0].size();
    const RealOpenMM dt = integrator.getStepSize();
    const RealOpenMM halfdt = 0.5*dt;
    const System& system = context.getSystem();
    vector<RealVec>& pos = extractPositions(context);
    vector<RealVec>& vel = extractVelocities(context);
    vector<RealVec>& f = extractForces(context);
    
    // Loop over copies and compute the force on each one.
    
    if (!forcesAreValid)
        computeForces(context, integrator);

    // Apply the PILE-L thermostat.
    
    vector<t_complex> v(numCopies);
    vector<t_complex> q(numCopies);
    const RealOpenMM hbar = 1.054571628e-34*AVOGADRO/(1000*1e-12);
    const RealOpenMM scale = 1.0/sqrt((RealOpenMM) numCopies);
    const RealOpenMM nkT = numCopies*BOLTZ*integrator.getTemperature();
    const RealOpenMM twown = 2.0*nkT/hbar;
    const RealOpenMM c1_0 = exp(-halfdt*integrator.getFriction());
    const RealOpenMM c2_0 = sqrt(1.0-c1_0*c1_0);
    if (integrator.getApplyThermostat()) {
        for (int particle = 0; particle < numParticles; particle++) {
            if (system.getParticleMass(particle) == 0.0)
                continue;
            const RealOpenMM c3_0 = c2_0*sqrt(nkT/system.getParticleMass(particle));
            for (int component = 0; component < 3; component++) {
                for (int k = 0; k < numCopies; k++)
                    v[k] = t_complex(scale*velocities[k][particle][component], 0.0);
                fftpack_exec_1d(fft, FFTPACK_FORWARD, &v[0], &v[0]);

                // Apply a local Langevin thermostat to the centroid mode.

                v[0].re = v[0].re*c1_0 + c3_0*SimTKOpenMMUtilities::getNormallyDistributedRandomNumber();

                // Use critical damping white noise for the remaining modes.

                for (int k = 1; k <= numCopies/2; k++) {
                    const bool isCenter = (numCopies%2 == 0 && k == numCopies/2);
                    const RealOpenMM wk = twown*sin(k*M_PI/numCopies);
                    const RealOpenMM c1 = exp(-2.0*wk*halfdt);
                    const RealOpenMM c2 = sqrt((1.0-c1*c1)/2) * (isCenter ? sqrt(2.0) : 1.0);
                    const RealOpenMM c3 = c2*sqrt(nkT/system.getParticleMass(particle));
                    RealOpenMM rand1 = c3*SimTKOpenMMUtilities::getNormallyDistributedRandomNumber();
                    RealOpenMM rand2 = (isCenter ? 0.0 : c3*SimTKOpenMMUtilities::getNormallyDistributedRandomNumber());
                    v[k] = v[k]*c1 + t_complex(rand1, rand2);
                    if (k < numCopies-k)
                        v[numCopies-k] = v[numCopies-k]*c1 + t_complex(rand1, -rand2);
                }
                fftpack_exec_1d(fft, FFTPACK_BACKWARD, &v[0], &v[0]);
                for (int k = 0; k < numCopies; k++)
                    velocities[k][particle][component] = scale*v[k].re;
            }
        }
    }

    // Update velocities.
    
    for (int i = 0; i < numCopies; i++)
        for (int j = 0; j < numParticles; j++)
            if (system.getParticleMass(j) != 0.0)
                velocities[i][j] += forces[i][j]*(halfdt/system.getParticleMass(j));
    
    // Evolve the free ring polymer by transforming to the frequency domain.

    for (int particle = 0; particle < numParticles; particle++) {
        if (system.getParticleMass(particle) == 0.0)
            continue;
        for (int component = 0; component < 3; component++) {
            for (int k = 0; k < numCopies; k++) {
                q[k] = t_complex(scale*positions[k][particle][component], 0.0);
                v[k] = t_complex(scale*velocities[k][particle][component], 0.0);
            }
            fftpack_exec_1d(fft, FFTPACK_FORWARD, &q[0], &q[0]);
            fftpack_exec_1d(fft, FFTPACK_FORWARD, &v[0], &v[0]);
            q[0] += v[0]*dt;
            for (int k = 1; k < numCopies; k++) {
                const RealOpenMM wk = twown*sin(k*M_PI/numCopies);
                const RealOpenMM wt = wk*dt;
                const RealOpenMM coswt = cos(wt);
                const RealOpenMM sinwt = sin(wt);
                const RealOpenMM wm = wk*system.getParticleMass(particle);
                const t_complex vprime = v[k]*coswt - q[k]*(wk*sinwt); // Advance velocity from t to t+dt
                q[k] = v[k]*(sinwt/wk) + q[k]*coswt; // Advance position from t to t+dt
                v[k] = vprime;
            }
            fftpack_exec_1d(fft, FFTPACK_BACKWARD, &q[0], &q[0]);
            fftpack_exec_1d(fft, FFTPACK_BACKWARD, &v[0], &v[0]);
            for (int k = 0; k < numCopies; k++) {
                positions[k][particle][component] = scale*q[k].re;
                velocities[k][particle][component] = scale*v[k].re;
            }
        }
    }
    
    // Calculate forces based on the updated positions.
    
    computeForces(context, integrator);

    // Update velocities.
    
    for (int i = 0; i < numCopies; i++)
        for (int j = 0; j < numParticles; j++)
            if (system.getParticleMass(j) != 0.0)
                velocities[i][j] += forces[i][j]*(halfdt/system.getParticleMass(j));

    // Apply the PILE-L thermostat again.
    
    if (integrator.getApplyThermostat()) {
        for (int particle = 0; particle < numParticles; particle++) {
            if (system.getParticleMass(particle) == 0.0)
                continue;
            const RealOpenMM c3_0 = c2_0*sqrt(nkT/system.getParticleMass(particle));
            for (int component = 0; component < 3; component++) {
                for (int k = 0; k < numCopies; k++)
                    v[k] = t_complex(scale*velocities[k][particle][component], 0.0);
                fftpack_exec_1d(fft, FFTPACK_FORWARD, &v[0], &v[0]);

                // Apply a local Langevin thermostat to the centroid mode.

                v[0].re = v[0].re*c1_0 + c3_0*SimTKOpenMMUtilities::getNormallyDistributedRandomNumber();

                // Use critical damping white noise for the remaining modes.

                for (int k = 1; k <= numCopies/2; k++) {
                    const bool isCenter = (numCopies%2 == 0 && k == numCopies/2);
                    const RealOpenMM wk = twown*sin(k*M_PI/numCopies);
                    const RealOpenMM c1 = exp(-2.0*wk*halfdt);
                    const RealOpenMM c2 = sqrt((1.0-c1*c1)/2) * (isCenter ? sqrt(2.0) : 1.0);
                    const RealOpenMM c3 = c2*sqrt(nkT/system.getParticleMass(particle));
                    RealOpenMM rand1 = c3*SimTKOpenMMUtilities::getNormallyDistributedRandomNumber();
                    RealOpenMM rand2 = (isCenter ? 0.0 : c3*SimTKOpenMMUtilities::getNormallyDistributedRandomNumber());
                    v[k] = v[k]*c1 + t_complex(rand1, rand2);
                    if (k < numCopies-k)
                        v[numCopies-k] = v[numCopies-k]*c1 + t_complex(rand1, -rand2);
                }
                fftpack_exec_1d(fft, FFTPACK_BACKWARD, &v[0], &v[0]);
                for (int k = 0; k < numCopies; k++)
                    velocities[k][particle][component] = scale*v[k].re;
            }
        }
    }
    
    // Update the time.
    
    context.setTime(context.getTime()+dt);
}