void CudaIntegrateRPMDStepKernel::execute(ContextImpl& context, const RPMDIntegrator& integrator, bool forcesAreValid) { cu.setAsCurrent(); CudaIntegrationUtilities& integration = cu.getIntegrationUtilities(); // Loop over copies and compute the force on each one. if (!forcesAreValid) computeForces(context); // Apply the PILE-L thermostat. bool useDoublePrecision = (cu.getUseDoublePrecision() || cu.getUseMixedPrecision()); double dt = integrator.getStepSize(); float dtFloat = (float) dt; void* dtPtr = (useDoublePrecision ? (void*) &dt : (void*) &dtFloat); double kT = integrator.getTemperature()*BOLTZ; float kTFloat = (float) kT; void* kTPtr = (useDoublePrecision ? (void*) &kT : (void*) &kTFloat); double friction = integrator.getFriction(); float frictionFloat = (float) friction; void* frictionPtr = (useDoublePrecision ? (void*) &friction : (void*) &frictionFloat); int randomIndex = integration.prepareRandomNumbers(numParticles*numCopies); void* pileArgs[] = {&velocities->getDevicePointer(), &integration.getRandom().getDevicePointer(), &randomIndex, dtPtr, kTPtr, frictionPtr}; cu.executeKernel(pileKernel, pileArgs, numParticles*numCopies, workgroupSize); // Update positions and velocities. void* stepArgs[] = {&positions->getDevicePointer(), &velocities->getDevicePointer(), &forces->getDevicePointer(), dtPtr, kTPtr}; cu.executeKernel(stepKernel, stepArgs, numParticles*numCopies, workgroupSize); // Calculate forces based on the updated positions. computeForces(context); // Update velocities. void* velocitiesArgs[] = {&velocities->getDevicePointer(), &forces->getDevicePointer(), dtPtr}; cu.executeKernel(velocitiesKernel, velocitiesArgs, numParticles*numCopies, workgroupSize); // Apply the PILE-L thermostat again. randomIndex = integration.prepareRandomNumbers(numParticles*numCopies); cu.executeKernel(pileKernel, pileArgs, numParticles*numCopies, workgroupSize); // Update the time and step count. cu.setTime(cu.getTime()+dt); cu.setStepCount(cu.getStepCount()+1); cu.reorderAtoms(); if (cu.getAtomsWereReordered() && cu.getNonbondedUtilities().getUsePeriodic()) { // Atoms may have been translated into a different periodic box, so apply // the same translation to all the beads. int i = numCopies-1; void* args[] = {&positions->getDevicePointer(), &cu.getPosq().getDevicePointer(), &cu.getAtomIndexArray().getDevicePointer(), &i}; cu.executeKernel(translateKernel, args, cu.getNumAtoms()); } }
void ReferenceIntegrateRPMDStepKernel::initialize(const System& system, const RPMDIntegrator& integrator) { int numCopies = integrator.getNumCopies(); int numParticles = system.getNumParticles(); positions.resize(numCopies); velocities.resize(numCopies); forces.resize(numCopies); for (int i = 0; i < numCopies; i++) { positions[i].resize(numParticles); velocities[i].resize(numParticles); forces[i].resize(numParticles); } fftpack_init_1d(&fft, numCopies); SimTKOpenMMUtilities::setRandomNumberSeed((unsigned int) integrator.getRandomNumberSeed()); // Build a list of contractions. groupsNotContracted = -1; const map<int, int>& contractions = integrator.getContractions(); int maxContractedCopies = 0; for (map<int, int>::const_iterator iter = contractions.begin(); iter != contractions.end(); ++iter) { int group = iter->first; int copies = iter->second; if (group < 0 || group > 31) throw OpenMMException("RPMDIntegrator: Force group must be between 0 and 31"); if (copies < 0 || copies > numCopies) throw OpenMMException("RPMDIntegrator: Number of copies for contraction cannot be greater than the total number of copies being simulated"); if (copies != numCopies) { if (groupsByCopies.find(copies) == groupsByCopies.end()) { groupsByCopies[copies] = 1<<group; contractionFFT[copies] = NULL; fftpack_init_1d(&contractionFFT[copies], copies); if (copies > maxContractedCopies) maxContractedCopies = copies; } else groupsByCopies[copies] |= 1<<group; groupsNotContracted -= 1<<group; } } // Create workspace for doing contractions. contractedPositions.resize(maxContractedCopies); contractedForces.resize(maxContractedCopies); for (int i = 0; i < maxContractedCopies; i++) { contractedPositions[i].resize(numParticles); contractedForces[i].resize(numParticles); } }
void CudaIntegrateRPMDStepKernel::initialize(const System& system, const RPMDIntegrator& integrator) { cu.getPlatformData().initializeContexts(system); numCopies = integrator.getNumCopies(); numParticles = system.getNumParticles(); workgroupSize = numCopies; if (numCopies != findFFTDimension(numCopies)) throw OpenMMException("RPMDIntegrator: the number of copies must be a multiple of powers of 2, 3, and 5."); int paddedParticles = cu.getPaddedNumAtoms(); bool useDoublePrecision = (cu.getUseDoublePrecision() || cu.getUseMixedPrecision()); int elementSize = (useDoublePrecision ? sizeof(double4) : sizeof(float4)); forces = CudaArray::create<long long>(cu, numCopies*paddedParticles*3, "rpmdForces"); positions = new CudaArray(cu, numCopies*paddedParticles, elementSize, "rpmdPositions"); velocities = new CudaArray(cu, numCopies*paddedParticles, elementSize, "rpmdVelocities"); cu.getIntegrationUtilities().initRandomNumberGenerator((unsigned int) integrator.getRandomNumberSeed()); // Fill in the posq and velm arrays with safe values to avoid a risk of nans. if (useDoublePrecision) { vector<double4> temp(positions->getSize()); for (int i = 0; i < positions->getSize(); i++) temp[i] = make_double4(0, 0, 0, 0); positions->upload(temp); for (int i = 0; i < velocities->getSize(); i++) temp[i] = make_double4(0, 0, 0, 1); velocities->upload(temp); } else { vector<float4> temp(positions->getSize()); for (int i = 0; i < positions->getSize(); i++) temp[i] = make_float4(0, 0, 0, 0); positions->upload(temp); for (int i = 0; i < velocities->getSize(); i++) temp[i] = make_float4(0, 0, 0, 1); velocities->upload(temp); } // Build a list of contractions. groupsNotContracted = -1; const map<int, int>& contractions = integrator.getContractions(); int maxContractedCopies = 0; for (map<int, int>::const_iterator iter = contractions.begin(); iter != contractions.end(); ++iter) { int group = iter->first; int copies = iter->second; if (group < 0 || group > 31) throw OpenMMException("RPMDIntegrator: Force group must be between 0 and 31"); if (copies < 0 || copies > numCopies) throw OpenMMException("RPMDIntegrator: Number of copies for contraction cannot be greater than the total number of copies being simulated"); if (copies != findFFTDimension(copies)) throw OpenMMException("RPMDIntegrator: Number of copies for contraction must be a multiple of powers of 2, 3, and 5."); if (copies != numCopies) { if (groupsByCopies.find(copies) == groupsByCopies.end()) { groupsByCopies[copies] = 1<<group; if (copies > maxContractedCopies) maxContractedCopies = copies; } else groupsByCopies[copies] |= 1<<group; groupsNotContracted -= 1<<group; } } if (maxContractedCopies > 0) { contractedForces = CudaArray::create<long long>(cu, maxContractedCopies*paddedParticles*3, "rpmdContractedForces"); contractedPositions = new CudaArray(cu, maxContractedCopies*paddedParticles, elementSize, "rpmdContractedPositions"); } // Create kernels. map<string, string> defines; defines["NUM_ATOMS"] = cu.intToString(cu.getNumAtoms()); defines["PADDED_NUM_ATOMS"] = cu.intToString(cu.getPaddedNumAtoms()); defines["NUM_COPIES"] = cu.intToString(numCopies); defines["THREAD_BLOCK_SIZE"] = cu.intToString(workgroupSize); defines["HBAR"] = cu.doubleToString(1.054571628e-34*AVOGADRO/(1000*1e-12)); defines["SCALE"] = cu.doubleToString(1.0/sqrt((double) numCopies)); defines["M_PI"] = cu.doubleToString(M_PI); map<string, string> replacements; replacements["FFT_Q_FORWARD"] = createFFT(numCopies, "q", true); replacements["FFT_Q_BACKWARD"] = createFFT(numCopies, "q", false); replacements["FFT_V_FORWARD"] = createFFT(numCopies, "v", true); replacements["FFT_V_BACKWARD"] = createFFT(numCopies, "v", false); CUmodule module = cu.createModule(cu.replaceStrings(CudaKernelSources::vectorOps+CudaRpmdKernelSources::rpmd, replacements), defines, ""); pileKernel = cu.getKernel(module, "applyPileThermostat"); stepKernel = cu.getKernel(module, "integrateStep"); velocitiesKernel = cu.getKernel(module, "advanceVelocities"); copyToContextKernel = cu.getKernel(module, "copyDataToContext"); copyFromContextKernel = cu.getKernel(module, "copyDataFromContext"); translateKernel = cu.getKernel(module, "applyCellTranslations"); // Create kernels for doing contractions. for (map<int, int>::const_iterator iter = groupsByCopies.begin(); iter != groupsByCopies.end(); ++iter) { int copies = iter->first; replacements.clear(); replacements["NUM_CONTRACTED_COPIES"] = cu.intToString(copies); replacements["POS_SCALE"] = cu.doubleToString(1.0/numCopies); replacements["FORCE_SCALE"] = cu.doubleToString(0x100000000/(double) copies); replacements["FFT_Q_FORWARD"] = createFFT(numCopies, "q", true); replacements["FFT_Q_BACKWARD"] = createFFT(copies, "q", false); replacements["FFT_F_FORWARD"] = createFFT(copies, "f", true); replacements["FFT_F_BACKWARD"] = createFFT(numCopies, "f", false); module = cu.createModule(cu.replaceStrings(CudaKernelSources::vectorOps+CudaRpmdKernelSources::rpmdContraction, replacements), defines, ""); positionContractionKernels[copies] = cu.getKernel(module, "contractPositions"); forceContractionKernels[copies] = cu.getKernel(module, "contractForces"); } }
void ReferenceIntegrateRPMDStepKernel::execute(ContextImpl& context, const RPMDIntegrator& integrator, bool forcesAreValid) { const int numCopies = positions.size(); const int numParticles = positions[0].size(); const RealOpenMM dt = integrator.getStepSize(); const RealOpenMM halfdt = 0.5*dt; const System& system = context.getSystem(); vector<RealVec>& pos = extractPositions(context); vector<RealVec>& vel = extractVelocities(context); vector<RealVec>& f = extractForces(context); // Loop over copies and compute the force on each one. if (!forcesAreValid) computeForces(context, integrator); // Apply the PILE-L thermostat. vector<t_complex> v(numCopies); vector<t_complex> q(numCopies); const RealOpenMM hbar = 1.054571628e-34*AVOGADRO/(1000*1e-12); const RealOpenMM scale = 1.0/sqrt((RealOpenMM) numCopies); const RealOpenMM nkT = numCopies*BOLTZ*integrator.getTemperature(); const RealOpenMM twown = 2.0*nkT/hbar; const RealOpenMM c1_0 = exp(-halfdt*integrator.getFriction()); const RealOpenMM c2_0 = sqrt(1.0-c1_0*c1_0); if (integrator.getApplyThermostat()) { for (int particle = 0; particle < numParticles; particle++) { if (system.getParticleMass(particle) == 0.0) continue; const RealOpenMM c3_0 = c2_0*sqrt(nkT/system.getParticleMass(particle)); for (int component = 0; component < 3; component++) { for (int k = 0; k < numCopies; k++) v[k] = t_complex(scale*velocities[k][particle][component], 0.0); fftpack_exec_1d(fft, FFTPACK_FORWARD, &v[0], &v[0]); // Apply a local Langevin thermostat to the centroid mode. v[0].re = v[0].re*c1_0 + c3_0*SimTKOpenMMUtilities::getNormallyDistributedRandomNumber(); // Use critical damping white noise for the remaining modes. for (int k = 1; k <= numCopies/2; k++) { const bool isCenter = (numCopies%2 == 0 && k == numCopies/2); const RealOpenMM wk = twown*sin(k*M_PI/numCopies); const RealOpenMM c1 = exp(-2.0*wk*halfdt); const RealOpenMM c2 = sqrt((1.0-c1*c1)/2) * (isCenter ? sqrt(2.0) : 1.0); const RealOpenMM c3 = c2*sqrt(nkT/system.getParticleMass(particle)); RealOpenMM rand1 = c3*SimTKOpenMMUtilities::getNormallyDistributedRandomNumber(); RealOpenMM rand2 = (isCenter ? 0.0 : c3*SimTKOpenMMUtilities::getNormallyDistributedRandomNumber()); v[k] = v[k]*c1 + t_complex(rand1, rand2); if (k < numCopies-k) v[numCopies-k] = v[numCopies-k]*c1 + t_complex(rand1, -rand2); } fftpack_exec_1d(fft, FFTPACK_BACKWARD, &v[0], &v[0]); for (int k = 0; k < numCopies; k++) velocities[k][particle][component] = scale*v[k].re; } } } // Update velocities. for (int i = 0; i < numCopies; i++) for (int j = 0; j < numParticles; j++) if (system.getParticleMass(j) != 0.0) velocities[i][j] += forces[i][j]*(halfdt/system.getParticleMass(j)); // Evolve the free ring polymer by transforming to the frequency domain. for (int particle = 0; particle < numParticles; particle++) { if (system.getParticleMass(particle) == 0.0) continue; for (int component = 0; component < 3; component++) { for (int k = 0; k < numCopies; k++) { q[k] = t_complex(scale*positions[k][particle][component], 0.0); v[k] = t_complex(scale*velocities[k][particle][component], 0.0); } fftpack_exec_1d(fft, FFTPACK_FORWARD, &q[0], &q[0]); fftpack_exec_1d(fft, FFTPACK_FORWARD, &v[0], &v[0]); q[0] += v[0]*dt; for (int k = 1; k < numCopies; k++) { const RealOpenMM wk = twown*sin(k*M_PI/numCopies); const RealOpenMM wt = wk*dt; const RealOpenMM coswt = cos(wt); const RealOpenMM sinwt = sin(wt); const RealOpenMM wm = wk*system.getParticleMass(particle); const t_complex vprime = v[k]*coswt - q[k]*(wk*sinwt); // Advance velocity from t to t+dt q[k] = v[k]*(sinwt/wk) + q[k]*coswt; // Advance position from t to t+dt v[k] = vprime; } fftpack_exec_1d(fft, FFTPACK_BACKWARD, &q[0], &q[0]); fftpack_exec_1d(fft, FFTPACK_BACKWARD, &v[0], &v[0]); for (int k = 0; k < numCopies; k++) { positions[k][particle][component] = scale*q[k].re; velocities[k][particle][component] = scale*v[k].re; } } } // Calculate forces based on the updated positions. computeForces(context, integrator); // Update velocities. for (int i = 0; i < numCopies; i++) for (int j = 0; j < numParticles; j++) if (system.getParticleMass(j) != 0.0) velocities[i][j] += forces[i][j]*(halfdt/system.getParticleMass(j)); // Apply the PILE-L thermostat again. if (integrator.getApplyThermostat()) { for (int particle = 0; particle < numParticles; particle++) { if (system.getParticleMass(particle) == 0.0) continue; const RealOpenMM c3_0 = c2_0*sqrt(nkT/system.getParticleMass(particle)); for (int component = 0; component < 3; component++) { for (int k = 0; k < numCopies; k++) v[k] = t_complex(scale*velocities[k][particle][component], 0.0); fftpack_exec_1d(fft, FFTPACK_FORWARD, &v[0], &v[0]); // Apply a local Langevin thermostat to the centroid mode. v[0].re = v[0].re*c1_0 + c3_0*SimTKOpenMMUtilities::getNormallyDistributedRandomNumber(); // Use critical damping white noise for the remaining modes. for (int k = 1; k <= numCopies/2; k++) { const bool isCenter = (numCopies%2 == 0 && k == numCopies/2); const RealOpenMM wk = twown*sin(k*M_PI/numCopies); const RealOpenMM c1 = exp(-2.0*wk*halfdt); const RealOpenMM c2 = sqrt((1.0-c1*c1)/2) * (isCenter ? sqrt(2.0) : 1.0); const RealOpenMM c3 = c2*sqrt(nkT/system.getParticleMass(particle)); RealOpenMM rand1 = c3*SimTKOpenMMUtilities::getNormallyDistributedRandomNumber(); RealOpenMM rand2 = (isCenter ? 0.0 : c3*SimTKOpenMMUtilities::getNormallyDistributedRandomNumber()); v[k] = v[k]*c1 + t_complex(rand1, rand2); if (k < numCopies-k) v[numCopies-k] = v[numCopies-k]*c1 + t_complex(rand1, -rand2); } fftpack_exec_1d(fft, FFTPACK_BACKWARD, &v[0], &v[0]); for (int k = 0; k < numCopies; k++) velocities[k][particle][component] = scale*v[k].re; } } } // Update the time. context.setTime(context.getTime()+dt); }