double PmeKSpace::compute_energy_orthogonal_helper(float *q_arr, const Lattice &lattice, double ewald, double *virial) { double energy = 0.0; double v0 = 0.; double v1 = 0.; double v2 = 0.; double v3 = 0.; double v4 = 0.; double v5 = 0.; int n; int k1, k2, k3, ind; int K1, K2, K3; K1=myGrid.K1; K2=myGrid.K2; K3=myGrid.K3; i_pi_volume = 1.0/(M_PI * lattice.volume()); piob = M_PI/ewald; piob *= piob; double recipx = lattice.a_r().x; double recipy = lattice.b_r().y; double recipz = lattice.c_r().z; init_exp(exp1, K1, 0, K1, recipx); init_exp(exp2, K2, k2_start, k2_end, recipy); init_exp(exp3, K3, k3_start, k3_end, recipz); double recips[] = {recipx, recipy, recipz}; const int NPARTS=CmiMyNodeSize(); //this controls the granularity of loop parallelism ALLOCA(double, partialEnergy, NPARTS); ALLOCA(double, partialVirial, 6*NPARTS); int unitDist[] = {K1/NPARTS, K1%NPARTS}; //parallelize the following loop using CkLoop void *params[] = {this, q_arr, recips, partialEnergy, partialVirial, unitDist}; #if USE_CKLOOP CProxy_FuncCkLoop ckLoop = CkpvAccess(BOCclass_group).ckLoop; CkLoop_Parallelize(ckLoop, compute_energy_orthogonal_ckloop, 6, (void *)params, NPARTS, 0, NPARTS-1); #endif /* //The transformed loop used to compute energy int unit = K1/NPARTS; int remains = K1%NPARTS; for(int i=0; i<NPARTS; i++){ int k1from, k1to; if(i<remains){ k1from = i*(unit+1); k1to = k1from+unit; }else{ k1from = remains*(unit+1)+(i-remains)*unit; k1to = k1from+unit-1; } double *pEnergy = partialEnergy+i; double *pVirial = partialVirial+i*6; compute_energy_orthogonal_subset(q_arr, recips, pVirial, pEnergy, k1from, k1to); } */ for(int i=0; i<NPARTS; i++){ v0 += partialVirial[i*6+0]; v1 += partialVirial[i*6+1]; v2 += partialVirial[i*6+2]; v3 += partialVirial[i*6+3]; v4 += partialVirial[i*6+4]; v5 += partialVirial[i*6+5]; energy += partialEnergy[i]; } virial[0] = v0; virial[1] = v1; virial[2] = v2; virial[3] = v3; virial[4] = v4; virial[5] = v5; return energy; }
double PmeKSpace::compute_energy(float *q_arr, const Lattice &lattice, double ewald, double *virial) { double energy = 0.0; double v0 = 0.; double v1 = 0.; double v2 = 0.; double v3 = 0.; double v4 = 0.; double v5 = 0.; int n; int k1, k2, k3, ind; int K1, K2, K3; K1=myGrid.K1; K2=myGrid.K2; K3=myGrid.K3; i_pi_volume = 1.0/(M_PI * lattice.volume()); piob = M_PI/ewald; piob *= piob; if ( lattice.orthogonal() ) { // if ( 0 ) { // JCP FOR TESTING //This branch is the usual call path. #if USE_CKLOOP int useCkLoop = Node::Object()->simParameters->useCkLoop; if(useCkLoop>=CKLOOP_CTRL_PME_KSPACE){ return compute_energy_orthogonal_helper(q_arr, lattice, ewald, virial); } #endif double recipx = lattice.a_r().x; double recipy = lattice.b_r().y; double recipz = lattice.c_r().z; init_exp(exp1, K1, 0, K1, recipx); init_exp(exp2, K2, k2_start, k2_end, recipy); init_exp(exp3, K3, k3_start, k3_end, recipz); ind = 0; for ( k1=0; k1<K1; ++k1 ) { double m1, m11, b1, xp1; b1 = bm1[k1]; int k1_s = k1<=K1/2 ? k1 : k1-K1; m1 = k1_s*recipx; m11 = m1*m1; xp1 = i_pi_volume*exp1[abs(k1_s)]; for ( k2=k2_start; k2<k2_end; ++k2 ) { double m2, m22, b1b2, xp2; b1b2 = b1*bm2[k2]; int k2_s = k2<=K2/2 ? k2 : k2-K2; m2 = k2_s*recipy; m22 = m2*m2; xp2 = exp2[abs(k2_s)]*xp1; k3 = k3_start; if ( k1==0 && k2==0 && k3==0 ) { q_arr[ind++] = 0.0; q_arr[ind++] = 0.0; ++k3; } for ( ; k3<k3_end; ++k3 ) { double m3, m33, xp3, msq, imsq, vir, fac; double theta3, theta, q2, qr, qc, C; theta3 = bm3[k3] *b1b2; m3 = k3*recipz; m33 = m3*m3; xp3 = exp3[k3]; qr = q_arr[ind]; qc=q_arr[ind+1]; q2 = 2*(qr*qr + qc*qc)*theta3; if ( (k3 == 0) || ( k3 == K3/2 && ! (K3 & 1) ) ) q2 *= 0.5; msq = m11 + m22 + m33; imsq = 1.0/msq; C = xp2*xp3*imsq; theta = theta3*C; q_arr[ind] *= theta; q_arr[ind+1] *= theta; vir = -2*(piob+imsq); fac = q2*C; energy += fac; v0 += fac*(1.0+vir*m11); v1 += fac*vir*m1*m2; v2 += fac*vir*m1*m3; v3 += fac*(1.0+vir*m22); v4 += fac*vir*m2*m3; v5 += fac*(1.0+vir*m33); ind += 2; } } } } else if ( cross(lattice.a(),lattice.b()).unit() == lattice.c().unit() ) { // } else if ( 0 ) { // JCP FOR TESTING Vector recip1 = lattice.a_r(); Vector recip2 = lattice.b_r(); Vector recip3 = lattice.c_r(); double recip3_x = recip3.x; double recip3_y = recip3.y; double recip3_z = recip3.z; init_exp(exp3, K3, k3_start, k3_end, recip3.length()); ind = 0; for ( k1=0; k1<K1; ++k1 ) { double b1; Vector m1; b1 = bm1[k1]; int k1_s = k1<=K1/2 ? k1 : k1-K1; m1 = k1_s*recip1; // xp1 = i_pi_volume*exp1[abs(k1_s)]; for ( k2=k2_start; k2<k2_end; ++k2 ) { double xp2, b1b2, m2_x, m2_y, m2_z; b1b2 = b1*bm2[k2]; int k2_s = k2<=K2/2 ? k2 : k2-K2; m2_x = m1.x + k2_s*recip2.x; m2_y = m1.y + k2_s*recip2.y; m2_z = m1.z + k2_s*recip2.z; // xp2 = exp2[abs(k2_s)]*xp1; xp2 = i_pi_volume*exp(-piob*(m2_x*m2_x+m2_y*m2_y+m2_z*m2_z)); k3 = k3_start; if ( k1==0 && k2==0 && k3==0 ) { q_arr[ind++] = 0.0; q_arr[ind++] = 0.0; ++k3; } for ( ; k3<k3_end; ++k3 ) { double xp3, msq, imsq, vir, fac; double theta3, theta, q2, qr, qc, C; double m_x, m_y, m_z; theta3 = bm3[k3] *b1b2; m_x = m2_x + k3*recip3_x; m_y = m2_y + k3*recip3_y; m_z = m2_z + k3*recip3_z; msq = m_x*m_x + m_y*m_y + m_z*m_z; xp3 = exp3[k3]; qr = q_arr[ind]; qc=q_arr[ind+1]; q2 = 2*(qr*qr + qc*qc)*theta3; if ( (k3 == 0) || ( k3 == K3/2 && ! (K3 & 1) ) ) q2 *= 0.5; imsq = 1.0/msq; C = xp2*xp3*imsq; theta = theta3*C; q_arr[ind] *= theta; q_arr[ind+1] *= theta; vir = -2*(piob+imsq); fac = q2*C; energy += fac; v0 += fac*(1.0+vir*m_x*m_x); v1 += fac*vir*m_x*m_y; v2 += fac*vir*m_x*m_z; v3 += fac*(1.0+vir*m_y*m_y); v4 += fac*vir*m_y*m_z; v5 += fac*(1.0+vir*m_z*m_z); ind += 2; } } } } else { Vector recip1 = lattice.a_r(); Vector recip2 = lattice.b_r(); Vector recip3 = lattice.c_r(); double recip3_x = recip3.x; double recip3_y = recip3.y; double recip3_z = recip3.z; ind = 0; for ( k1=0; k1<K1; ++k1 ) { double b1; Vector m1; b1 = bm1[k1]; int k1_s = k1<=K1/2 ? k1 : k1-K1; m1 = k1_s*recip1; // xp1 = i_pi_volume*exp1[abs(k1_s)]; for ( k2=k2_start; k2<k2_end; ++k2 ) { double b1b2, m2_x, m2_y, m2_z; b1b2 = b1*bm2[k2]; int k2_s = k2<=K2/2 ? k2 : k2-K2; m2_x = m1.x + k2_s*recip2.x; m2_y = m1.y + k2_s*recip2.y; m2_z = m1.z + k2_s*recip2.z; // xp2 = exp2[abs(k2_s)]*xp1; k3 = k3_start; if ( k1==0 && k2==0 && k3==0 ) { q_arr[ind++] = 0.0; q_arr[ind++] = 0.0; ++k3; } for ( ; k3<k3_end; ++k3 ) { double xp3, msq, imsq, vir, fac; double theta3, theta, q2, qr, qc, C; double m_x, m_y, m_z; theta3 = bm3[k3] *b1b2; m_x = m2_x + k3*recip3_x; m_y = m2_y + k3*recip3_y; m_z = m2_z + k3*recip3_z; msq = m_x*m_x + m_y*m_y + m_z*m_z; // xp3 = exp3[k3]; xp3 = i_pi_volume*exp(-piob*msq); qr = q_arr[ind]; qc=q_arr[ind+1]; q2 = 2*(qr*qr + qc*qc)*theta3; if ( (k3 == 0) || ( k3 == K3/2 && ! (K3 & 1) ) ) q2 *= 0.5; imsq = 1.0/msq; C = xp3*imsq; theta = theta3*C; q_arr[ind] *= theta; q_arr[ind+1] *= theta; vir = -2*(piob+imsq); fac = q2*C; energy += fac; v0 += fac*(1.0+vir*m_x*m_x); v1 += fac*vir*m_x*m_y; v2 += fac*vir*m_x*m_z; v3 += fac*(1.0+vir*m_y*m_y); v4 += fac*vir*m_y*m_z; v5 += fac*(1.0+vir*m_z*m_z); ind += 2; } } } } virial[0] = 0.5 * v0; virial[1] = 0.5 * v1; virial[2] = 0.5 * v2; virial[3] = 0.5 * v3; virial[4] = 0.5 * v4; virial[5] = 0.5 * v5; return 0.5*energy; }
void ComputeMsmSerialMgr::recvCoord(MsmSerialCoordMsg *msg) { if ( ! numSources ) { numSources = (PatchMap::Object())->numNodesWithPatches(); coordMsgs = new MsmSerialCoordMsg*[numSources]; for ( int i=0; i<numSources; ++i ) { coordMsgs[i] = 0; } numArrived = 0; numAtoms = Node::Object()->molecule->numAtoms; coord = new ComputeMsmSerialAtom[numAtoms]; force = new MsmSerialForce[numAtoms]; } int i; for ( i=0; i < msg->numAtoms; ++i ) { coord[msg->coord[i].id] = msg->coord[i]; } coordMsgs[numArrived] = msg; ++numArrived; if ( numArrived < numSources ) return; numArrived = 0; // ALL DATA ARRIVED --- CALCULATE FORCES Lattice lattice = msg->lattice; SimParameters *simParams = Node::Object()->simParameters; double energy = 0; double virial[3][3]; int rc = 0; // return code if ( ! msmsolver ) { // // setup MSM solver // msmsolver = NL_msm_create(); if ( ! msmsolver ) NAMD_die("unable to create MSM solver"); double dielectric = simParams->dielectric; double cutoff = simParams->cutoff; double gridspacing = simParams->MSMGridSpacing; double padding = simParams->MSMPadding; int approx = simParams->MSMApprox; int split = simParams->MSMSplit; int nlevels = simParams->MSMLevels; int msmflags = 0; msmflags |= (lattice.a_p() ? NL_MSM_PERIODIC_VEC1 : 0); msmflags |= (lattice.b_p() ? NL_MSM_PERIODIC_VEC2 : 0); msmflags |= (lattice.c_p() ? NL_MSM_PERIODIC_VEC3 : 0); msmflags |= NL_MSM_COMPUTE_LONG_RANGE; // compute only long-range part //msmflags |= NL_MSM_COMPUTE_ALL; //printf("msmflags = %x\n", msmflags); rc = NL_msm_configure(msmsolver, gridspacing, approx, split, nlevels); if (rc) NAMD_die("unable to configure MSM solver"); Vector u=lattice.a(), v=lattice.b(), w=lattice.c(), c=lattice.origin(); Vector ru=lattice.a_r(), rv=lattice.b_r(), rw=lattice.c_r(); if ((msmflags & NL_MSM_PERIODIC_ALL) != NL_MSM_PERIODIC_ALL) { // called only if there is some non-periodic boundary int isperiodic = (msmflags & NL_MSM_PERIODIC_ALL); //printf("calling rescale\n"); rescale_nonperiodic_cell(u, v, w, c, ru, rv, rw, isperiodic, numAtoms, coord, padding, gridspacing); } double vec1[3], vec2[3], vec3[3], center[3]; vec1[0] = u.x; vec1[1] = u.y; vec1[2] = u.z; vec2[0] = v.x; vec2[1] = v.y; vec2[2] = v.z; vec3[0] = w.x; vec3[1] = w.y; vec3[2] = w.z; center[0] = c.x; center[1] = c.y; center[2] = c.z; #if 0 printf("dielectric = %g\n", dielectric); printf("vec1 = %g %g %g\n", vec1[0], vec1[1], vec1[2]); printf("vec2 = %g %g %g\n", vec2[0], vec2[1], vec2[2]); printf("vec3 = %g %g %g\n", vec3[0], vec3[1], vec3[2]); printf("center = %g %g %g\n", center[0], center[1], center[2]); printf("cutoff = %g\n", cutoff); printf("numatoms = %d\n", numAtoms); #endif rc = NL_msm_setup(msmsolver, cutoff, vec1, vec2, vec3, center, msmflags); if (rc) NAMD_die("unable to set up MSM solver"); msmcoord = new double[4*numAtoms]; msmforce = new double[3*numAtoms]; if (msmcoord==0 || msmforce==0) NAMD_die("can't allocate MSM atom buffers"); // scale charges - these won't change double celec = sqrt(COULOMB / simParams->dielectric); for (i = 0; i < numAtoms; i++) { msmcoord[4*i+3] = celec * coord[i].charge; } } // evaluate long-range MSM forces for (i = 0; i < numAtoms; i++) { msmcoord[4*i ] = coord[i].position.x; msmcoord[4*i+1] = coord[i].position.y; msmcoord[4*i+2] = coord[i].position.z; } for (i = 0; i < numAtoms; i++) { msmforce[3*i ] = 0; msmforce[3*i+1] = 0; msmforce[3*i+2] = 0; } rc = NL_msm_compute_force(msmsolver, msmforce, &energy, msmcoord, numAtoms); if (rc) NAMD_die("error evaluating MSM forces"); for (i = 0; i < numAtoms; i++) { force[i].x = msmforce[3*i ]; force[i].y = msmforce[3*i+1]; force[i].z = msmforce[3*i+2]; } // MSM does not yet calculate virial for (int k=0; k < 3; k++) { for (int l=0; l < 3; l++) { virial[k][l] = 0; } } // distribute forces for (int j=0; j < numSources; j++) { MsmSerialCoordMsg *cmsg = coordMsgs[j]; coordMsgs[j] = 0; MsmSerialForceMsg *fmsg = new (cmsg->numAtoms, 0) MsmSerialForceMsg; for (int i=0; i < cmsg->numAtoms; i++) { fmsg->force[i] = force[cmsg->coord[i].id]; } if ( ! j ) { // set virial and energy only for first message fmsg->energy = energy; for (int k=0; k < 3; k++) { for (int l=0; l < 3; l++) { fmsg->virial[k][l] = virial[k][l]; } } } else { // set other messages to zero, add into reduction only once fmsg->energy = 0; for (int k=0; k < 3; k++) { for (int l=0; l < 3; l++) { fmsg->virial[k][l] = 0; } } } msmProxy[cmsg->sourceNode].recvForce(fmsg); delete cmsg; } }