static real rms_force(t_commrec *cr, rvec f[], int ns, t_shell s[], int ndir, real *sf_dir, real *Epot) { int i, shell, ntot; double buf[4]; buf[0] = *sf_dir; for (i = 0; i < ns; i++) { shell = s[i].shell; buf[0] += norm2(f[shell]); } ntot = ns; if (PAR(cr)) { buf[1] = ntot; buf[2] = *sf_dir; buf[3] = *Epot; gmx_sumd(4, buf, cr); ntot = (int)(buf[1] + 0.5); *sf_dir = buf[2]; *Epot = buf[3]; } ntot += ndir; return (ntot ? sqrt(buf[0]/ntot) : 0); }
void sum_bin(t_bin *b, t_commrec *cr) { int i; for (i = b->nreal; (i < b->maxreal); i++) { b->rbuf[i] = 0; } gmx_sumd(b->maxreal, b->rbuf, cr); }
/* Get the center from local positions that already have the correct * PBC representation */ extern void get_center_comm( const t_commrec *cr, rvec x_loc[], /* Local positions */ real weight_loc[], /* Local masses or other weights */ int nr_loc, /* Local number of atoms */ int nr_group, /* Total number of atoms of the group */ rvec center) /* Weighted center */ { double weight_sum, denom; dvec dsumvec; double buf[4]; weight_sum = get_sum_of_positions(x_loc, weight_loc, nr_loc, dsumvec); /* Add the local contributions from all nodes. Put the sum vector and the * weight in a buffer array so that we get along with a single communication * call. */ if (PAR(cr)) { buf[0] = dsumvec[XX]; buf[1] = dsumvec[YY]; buf[2] = dsumvec[ZZ]; buf[3] = weight_sum; /* Communicate buffer */ gmx_sumd(4, buf, cr); dsumvec[XX] = buf[0]; dsumvec[YY] = buf[1]; dsumvec[ZZ] = buf[2]; weight_sum = buf[3]; } if (weight_loc != nullptr) { denom = 1.0/weight_sum; /* Divide by the sum of weight to get center of mass e.g. */ } else { denom = 1.0/nr_group; /* Divide by the number of atoms to get the geometrical center */ } center[XX] = dsumvec[XX]*denom; center[YY] = dsumvec[YY]*denom; center[ZZ] = dsumvec[ZZ]*denom; }
static void pull_reduce_double(t_commrec *cr, pull_comm_t *comm, int n, double *data) { if (cr != NULL && PAR(cr)) { if (comm->bParticipateAll) { /* Sum the contributions over all DD ranks */ gmx_sumd(n, data, cr); } else { #if GMX_MPI #if MPI_IN_PLACE_EXISTS MPI_Allreduce(MPI_IN_PLACE, data, n, MPI_DOUBLE, MPI_SUM, comm->mpi_comm_com); #else double *buf; snew(buf, n); MPI_Allreduce(data, buf, n, MPI_DOUBLE, MPI_SUM, comm->mpi_comm_com); /* Copy the result from the buffer to the input/output data */ for (int i = 0; i < n; i++) { data[i] = buf[i]; } sfree(buf); #endif #else gmx_incons("comm->bParticipateAll=FALSE without GMX_MPI"); #endif } } }
/* calculates center of mass of selection index from all coordinates x */ void pull_calc_coms(t_commrec *cr, t_pull *pull, t_mdatoms *md, t_pbc *pbc, double t, rvec x[], rvec *xp) { int g, i, ii, m; real mass, w, wm, twopi_box = 0; double wmass, wwmass, invwmass; dvec com, comp; double cm, sm, cmp, smp, ccm, csm, ssm, csw, snw; rvec *xx[2], x_pbc = {0, 0, 0}, dx; t_pull_group *pgrp; if (pull->rbuf == NULL) { snew(pull->rbuf, pull->ngroup); } if (pull->dbuf == NULL) { snew(pull->dbuf, 3*pull->ngroup); } if (pull->bRefAt) { pull_set_pbcatoms(cr, pull, md, x, pull->rbuf); } if (pull->cosdim >= 0) { for (m = pull->cosdim+1; m < pull->npbcdim; m++) { if (pbc->box[m][pull->cosdim] != 0) { gmx_fatal(FARGS, "Can not do cosine weighting for trilinic dimensions"); } } twopi_box = 2.0*M_PI/pbc->box[pull->cosdim][pull->cosdim]; } for (g = 0; g < pull->ngroup; g++) { pgrp = &pull->group[g]; clear_dvec(com); clear_dvec(comp); wmass = 0; wwmass = 0; cm = 0; sm = 0; cmp = 0; smp = 0; ccm = 0; csm = 0; ssm = 0; if (!(g == 0 && PULL_CYL(pull))) { if (pgrp->epgrppbc == epgrppbcREFAT) { /* Set the pbc atom */ copy_rvec(pull->rbuf[g], x_pbc); } w = 1; for (i = 0; i < pgrp->nat_loc; i++) { ii = pgrp->ind_loc[i]; mass = md->massT[ii]; if (pgrp->epgrppbc != epgrppbcCOS) { if (pgrp->weight_loc) { w = pgrp->weight_loc[i]; } wm = w*mass; wmass += wm; wwmass += wm*w; if (pgrp->epgrppbc == epgrppbcNONE) { /* Plain COM: sum the coordinates */ for (m = 0; m < DIM; m++) { com[m] += wm*x[ii][m]; } if (xp) { for (m = 0; m < DIM; m++) { comp[m] += wm*xp[ii][m]; } } } else { /* Sum the difference with the reference atom */ pbc_dx(pbc, x[ii], x_pbc, dx); for (m = 0; m < DIM; m++) { com[m] += wm*dx[m]; } if (xp) { /* For xp add the difference between xp and x to dx, * such that we use the same periodic image, * also when xp has a large displacement. */ for (m = 0; m < DIM; m++) { comp[m] += wm*(dx[m] + xp[ii][m] - x[ii][m]); } } } } else { /* Determine cos and sin sums */ csw = cos(x[ii][pull->cosdim]*twopi_box); snw = sin(x[ii][pull->cosdim]*twopi_box); cm += csw*mass; sm += snw*mass; ccm += csw*csw*mass; csm += csw*snw*mass; ssm += snw*snw*mass; if (xp) { csw = cos(xp[ii][pull->cosdim]*twopi_box); snw = sin(xp[ii][pull->cosdim]*twopi_box); cmp += csw*mass; smp += snw*mass; } } } } /* Copy local sums to a buffer for global summing */ switch (pgrp->epgrppbc) { case epgrppbcNONE: case epgrppbcREFAT: copy_dvec(com, pull->dbuf[g*3]); copy_dvec(comp, pull->dbuf[g*3+1]); pull->dbuf[g*3+2][0] = wmass; pull->dbuf[g*3+2][1] = wwmass; pull->dbuf[g*3+2][2] = 0; break; case epgrppbcCOS: pull->dbuf[g*3 ][0] = cm; pull->dbuf[g*3 ][1] = sm; pull->dbuf[g*3 ][2] = 0; pull->dbuf[g*3+1][0] = ccm; pull->dbuf[g*3+1][1] = csm; pull->dbuf[g*3+1][2] = ssm; pull->dbuf[g*3+2][0] = cmp; pull->dbuf[g*3+2][1] = smp; pull->dbuf[g*3+2][2] = 0; break; } } if (cr && PAR(cr)) { /* Sum the contributions over the nodes */ gmx_sumd(pull->ngroup*3*DIM, pull->dbuf[0], cr); } for (g = 0; g < pull->ngroup; g++) { pgrp = &pull->group[g]; if (pgrp->nat > 0 && !(g == 0 && PULL_CYL(pull))) { if (pgrp->epgrppbc != epgrppbcCOS) { /* Determine the inverse mass */ wmass = pull->dbuf[g*3+2][0]; wwmass = pull->dbuf[g*3+2][1]; invwmass = 1/wmass; /* invtm==0 signals a frozen group, so then we should keep it zero */ if (pgrp->invtm > 0) { pgrp->wscale = wmass/wwmass; pgrp->invtm = 1.0/(pgrp->wscale*wmass); } /* Divide by the total mass */ for (m = 0; m < DIM; m++) { pgrp->x[m] = pull->dbuf[g*3 ][m]*invwmass; if (xp) { pgrp->xp[m] = pull->dbuf[g*3+1][m]*invwmass; } if (pgrp->epgrppbc == epgrppbcREFAT) { pgrp->x[m] += pull->rbuf[g][m]; if (xp) { pgrp->xp[m] += pull->rbuf[g][m]; } } } } else { /* Determine the optimal location of the cosine weight */ csw = pull->dbuf[g*3][0]; snw = pull->dbuf[g*3][1]; pgrp->x[pull->cosdim] = atan2_0_2pi(snw, csw)/twopi_box; /* Set the weights for the local atoms */ wmass = sqrt(csw*csw + snw*snw); wwmass = (pull->dbuf[g*3+1][0]*csw*csw + pull->dbuf[g*3+1][1]*csw*snw + pull->dbuf[g*3+1][2]*snw*snw)/(wmass*wmass); pgrp->wscale = wmass/wwmass; pgrp->invtm = 1.0/(pgrp->wscale*wmass); /* Set the weights for the local atoms */ csw *= pgrp->invtm; snw *= pgrp->invtm; for (i = 0; i < pgrp->nat_loc; i++) { ii = pgrp->ind_loc[i]; pgrp->weight_loc[i] = csw*cos(twopi_box*x[ii][pull->cosdim]) + snw*sin(twopi_box*x[ii][pull->cosdim]); } if (xp) { csw = pull->dbuf[g*3+2][0]; snw = pull->dbuf[g*3+2][1]; pgrp->xp[pull->cosdim] = atan2_0_2pi(snw, csw)/twopi_box; } } if (debug) { fprintf(debug, "Pull group %d wmass %f wwmass %f invtm %f\n", g, wmass, wwmass, pgrp->invtm); } } } if (PULL_CYL(pull)) { /* Calculate the COMs for the cyclinder reference groups */ make_cyl_refgrps(cr, pull, md, pbc, t, x, xp); } }
static void make_cyl_refgrps(t_commrec *cr, t_pull *pull, t_mdatoms *md, t_pbc *pbc, double t, rvec *x, rvec *xp) { int c, i, ii, m, start, end; rvec g_x, dx, dir; double r0_2, sum_a, sum_ap, dr2, mass, weight, wmass, wwmass, inp; t_pull_coord *pcrd; t_pull_group *pref, *pgrp, *pdyna; gmx_ga2la_t ga2la = NULL; if (pull->dbuf_cyl == NULL) { snew(pull->dbuf_cyl, pull->ncoord*4); } if (cr && DOMAINDECOMP(cr)) { ga2la = cr->dd->ga2la; } start = 0; end = md->homenr; r0_2 = dsqr(pull->cyl_r0); /* loop over all groups to make a reference group for each*/ for (c = 0; c < pull->ncoord; c++) { pcrd = &pull->coord[c]; /* pref will be the same group for all pull coordinates */ pref = &pull->group[pcrd->group[0]]; pgrp = &pull->group[pcrd->group[1]]; pdyna = &pull->dyna[c]; copy_rvec(pcrd->vec, dir); sum_a = 0; sum_ap = 0; wmass = 0; wwmass = 0; pdyna->nat_loc = 0; for (m = 0; m < DIM; m++) { g_x[m] = pgrp->x[m] - pcrd->vec[m]*(pcrd->init + pcrd->rate*t); } /* loop over all atoms in the main ref group */ for (i = 0; i < pref->nat; i++) { ii = pref->ind[i]; if (ga2la) { if (!ga2la_get_home(ga2la, pref->ind[i], &ii)) { ii = -1; } } if (ii >= start && ii < end) { pbc_dx_aiuc(pbc, x[ii], g_x, dx); inp = iprod(dir, dx); dr2 = 0; for (m = 0; m < DIM; m++) { dr2 += dsqr(dx[m] - inp*dir[m]); } if (dr2 < r0_2) { /* add to index, to sum of COM, to weight array */ if (pdyna->nat_loc >= pdyna->nalloc_loc) { pdyna->nalloc_loc = over_alloc_large(pdyna->nat_loc+1); srenew(pdyna->ind_loc, pdyna->nalloc_loc); srenew(pdyna->weight_loc, pdyna->nalloc_loc); } pdyna->ind_loc[pdyna->nat_loc] = ii; mass = md->massT[ii]; weight = get_weight(sqrt(dr2), pull->cyl_r1, pull->cyl_r0); pdyna->weight_loc[pdyna->nat_loc] = weight; sum_a += mass*weight*inp; if (xp) { pbc_dx_aiuc(pbc, xp[ii], g_x, dx); inp = iprod(dir, dx); sum_ap += mass*weight*inp; } wmass += mass*weight; wwmass += mass*sqr(weight); pdyna->nat_loc++; } } } pull->dbuf_cyl[c*4+0] = wmass; pull->dbuf_cyl[c*4+1] = wwmass; pull->dbuf_cyl[c*4+2] = sum_a; pull->dbuf_cyl[c*4+3] = sum_ap; } if (cr && PAR(cr)) { /* Sum the contributions over the nodes */ gmx_sumd(pull->ncoord*4, pull->dbuf_cyl, cr); } for (c = 0; c < pull->ncoord; c++) { pcrd = &pull->coord[c]; pdyna = &pull->dyna[c]; pgrp = &pull->group[pcrd->group[1]]; wmass = pull->dbuf_cyl[c*4+0]; wwmass = pull->dbuf_cyl[c*4+1]; pdyna->wscale = wmass/wwmass; pdyna->invtm = 1.0/(pdyna->wscale*wmass); for (m = 0; m < DIM; m++) { g_x[m] = pgrp->x[m] - pcrd->vec[m]*(pcrd->init + pcrd->rate*t); pdyna->x[m] = g_x[m] + pcrd->vec[m]*pull->dbuf_cyl[c*4+2]/wmass; if (xp) { pdyna->xp[m] = g_x[m] + pcrd->vec[m]*pull->dbuf_cyl[c*4+3]/wmass; } } if (debug) { fprintf(debug, "Pull cylinder group %d:%8.3f%8.3f%8.3f m:%8.3f\n", c, pdyna->x[0], pdyna->x[1], pdyna->x[2], 1.0/pdyna->invtm); } } }
gmx_bool pme_load_balance(pme_load_balancing_t pme_lb, t_commrec *cr, FILE *fp_err, FILE *fp_log, t_inputrec *ir, t_state *state, double cycles, interaction_const_t *ic, struct nonbonded_verlet_t *nbv, struct gmx_pme_t ** pmedata, gmx_int64_t step) { gmx_bool OK; pme_setup_t *set; double cycles_fast; char buf[STRLEN], sbuf[22]; real rtab; gmx_bool bUsesSimpleTables = TRUE; if (pme_lb->stage == pme_lb->nstage) { return FALSE; } if (PAR(cr)) { gmx_sumd(1, &cycles, cr); cycles /= cr->nnodes; } set = &pme_lb->setup[pme_lb->cur]; set->count++; rtab = ir->rlistlong + ir->tabext; if (set->count % 2 == 1) { /* Skip the first cycle, because the first step after a switch * is much slower due to allocation and/or caching effects. */ return TRUE; } sprintf(buf, "step %4s: ", gmx_step_str(step, sbuf)); print_grid(fp_err, fp_log, buf, "timed with", set, cycles); if (set->count <= 2) { set->cycles = cycles; } else { if (cycles*PME_LB_ACCEL_TOL < set->cycles && pme_lb->stage == pme_lb->nstage - 1) { /* The performance went up a lot (due to e.g. DD load balancing). * Add a stage, keep the minima, but rescan all setups. */ pme_lb->nstage++; if (debug) { fprintf(debug, "The performance for grid %d %d %d went from %.3f to %.1f M-cycles, this is more than %f\n" "Increased the number stages to %d" " and ignoring the previous performance\n", set->grid[XX], set->grid[YY], set->grid[ZZ], cycles*1e-6, set->cycles*1e-6, PME_LB_ACCEL_TOL, pme_lb->nstage); } } set->cycles = min(set->cycles, cycles); } if (set->cycles < pme_lb->setup[pme_lb->fastest].cycles) { pme_lb->fastest = pme_lb->cur; if (DOMAINDECOMP(cr)) { /* We found a new fastest setting, ensure that with subsequent * shorter cut-off's the dynamic load balancing does not make * the use of the current cut-off impossible. This solution is * a trade-off, as the PME load balancing and DD domain size * load balancing can interact in complex ways. * With the Verlet kernels, DD load imbalance will usually be * mainly due to bonded interaction imbalance, which will often * quickly push the domain boundaries beyond the limit for the * optimal, PME load balanced, cut-off. But it could be that * better overal performance can be obtained with a slightly * shorter cut-off and better DD load balancing. */ change_dd_dlb_cutoff_limit(cr); } } cycles_fast = pme_lb->setup[pme_lb->fastest].cycles; /* Check in stage 0 if we should stop scanning grids. * Stop when the time is more than SLOW_FAC longer than the fastest. */ if (pme_lb->stage == 0 && pme_lb->cur > 0 && cycles > pme_lb->setup[pme_lb->fastest].cycles*PME_LB_SLOW_FAC) { pme_lb->n = pme_lb->cur + 1; /* Done with scanning, go to stage 1 */ switch_to_stage1(pme_lb); } if (pme_lb->stage == 0) { int gridsize_start; gridsize_start = set->grid[XX]*set->grid[YY]*set->grid[ZZ]; do { if (pme_lb->cur+1 < pme_lb->n) { /* We had already generated the next setup */ OK = TRUE; } else { /* Find the next setup */ OK = pme_loadbal_increase_cutoff(pme_lb, ir->pme_order, cr->dd); if (!OK) { pme_lb->elimited = epmelblimPMEGRID; } } if (OK && ir->ePBC != epbcNONE) { OK = (sqr(pme_lb->setup[pme_lb->cur+1].rlistlong) <= max_cutoff2(ir->ePBC, state->box)); if (!OK) { pme_lb->elimited = epmelblimBOX; } } if (OK) { pme_lb->cur++; if (DOMAINDECOMP(cr)) { OK = change_dd_cutoff(cr, state, ir, pme_lb->setup[pme_lb->cur].rlistlong); if (!OK) { /* Failed: do not use this setup */ pme_lb->cur--; pme_lb->elimited = epmelblimDD; } } } if (!OK) { /* We hit the upper limit for the cut-off, * the setup should not go further than cur. */ pme_lb->n = pme_lb->cur + 1; print_loadbal_limited(fp_err, fp_log, step, pme_lb); /* Switch to the next stage */ switch_to_stage1(pme_lb); } } while (OK && !(pme_lb->setup[pme_lb->cur].grid[XX]* pme_lb->setup[pme_lb->cur].grid[YY]* pme_lb->setup[pme_lb->cur].grid[ZZ] < gridsize_start*PME_LB_GRID_SCALE_FAC && pme_lb->setup[pme_lb->cur].grid_efficiency < pme_lb->setup[pme_lb->cur-1].grid_efficiency*PME_LB_GRID_EFFICIENCY_REL_FAC)); } if (pme_lb->stage > 0 && pme_lb->end == 1) { pme_lb->cur = 0; pme_lb->stage = pme_lb->nstage; } else if (pme_lb->stage > 0 && pme_lb->end > 1) { /* If stage = nstage-1: * scan over all setups, rerunning only those setups * which are not much slower than the fastest * else: * use the next setup */ do { pme_lb->cur++; if (pme_lb->cur == pme_lb->end) { pme_lb->stage++; pme_lb->cur = pme_lb->start; } } while (pme_lb->stage == pme_lb->nstage - 1 && pme_lb->setup[pme_lb->cur].count > 0 && pme_lb->setup[pme_lb->cur].cycles > cycles_fast*PME_LB_SLOW_FAC); if (pme_lb->stage == pme_lb->nstage) { /* We are done optimizing, use the fastest setup we found */ pme_lb->cur = pme_lb->fastest; } } if (DOMAINDECOMP(cr) && pme_lb->stage > 0) { OK = change_dd_cutoff(cr, state, ir, pme_lb->setup[pme_lb->cur].rlistlong); if (!OK) { /* Failsafe solution */ if (pme_lb->cur > 1 && pme_lb->stage == pme_lb->nstage) { pme_lb->stage--; } pme_lb->fastest = 0; pme_lb->start = 0; pme_lb->end = pme_lb->cur; pme_lb->cur = pme_lb->start; pme_lb->elimited = epmelblimDD; print_loadbal_limited(fp_err, fp_log, step, pme_lb); } } /* Change the Coulomb cut-off and the PME grid */ set = &pme_lb->setup[pme_lb->cur]; ic->rcoulomb = set->rcut_coulomb; ic->rlist = set->rlist; ic->rlistlong = set->rlistlong; ir->nstcalclr = set->nstcalclr; ic->ewaldcoeff_q = set->ewaldcoeff_q; /* TODO: centralize the code that sets the potentials shifts */ if (ic->coulomb_modifier == eintmodPOTSHIFT) { ic->sh_ewald = gmx_erfc(ic->ewaldcoeff_q*ic->rcoulomb); } if (EVDW_PME(ic->vdwtype)) { /* We have PME for both Coulomb and VdW, set rvdw equal to rcoulomb */ ic->rvdw = set->rcut_coulomb; ic->ewaldcoeff_lj = set->ewaldcoeff_lj; if (ic->vdw_modifier == eintmodPOTSHIFT) { real crc2; ic->dispersion_shift.cpot = -pow(ic->rvdw, -6.0); ic->repulsion_shift.cpot = -pow(ic->rvdw, -12.0); ic->sh_invrc6 = -ic->dispersion_shift.cpot; crc2 = sqr(ic->ewaldcoeff_lj*ic->rvdw); ic->sh_lj_ewald = (exp(-crc2)*(1 + crc2 + 0.5*crc2*crc2) - 1)*pow(ic->rvdw, -6.0); } } bUsesSimpleTables = uses_simple_tables(ir->cutoff_scheme, nbv, 0); nbnxn_gpu_pme_loadbal_update_param(nbv, ic); /* With tMPI + GPUs some ranks may be sharing GPU(s) and therefore * also sharing texture references. To keep the code simple, we don't * treat texture references as shared resources, but this means that * the coulomb_tab texture ref will get updated by multiple threads. * Hence, to ensure that the non-bonded kernels don't start before all * texture binding operations are finished, we need to wait for all ranks * to arrive here before continuing. * * Note that we could omit this barrier if GPUs are not shared (or * texture objects are used), but as this is initialization code, there * is not point in complicating things. */ #ifdef GMX_THREAD_MPI if (PAR(cr) && use_GPU(nbv)) { gmx_barrier(cr); } #endif /* GMX_THREAD_MPI */ /* Usually we won't need the simple tables with GPUs. * But we do with hybrid acceleration and with free energy. * To avoid bugs, we always re-initialize the simple tables here. */ init_interaction_const_tables(NULL, ic, bUsesSimpleTables, rtab); if (cr->duty & DUTY_PME) { if (pme_lb->setup[pme_lb->cur].pmedata == NULL) { /* Generate a new PME data structure, * copying part of the old pointers. */ gmx_pme_reinit(&set->pmedata, cr, pme_lb->setup[0].pmedata, ir, set->grid); } *pmedata = set->pmedata; } else { /* Tell our PME-only node to switch grid */ gmx_pme_send_switchgrid(cr, set->grid, set->ewaldcoeff_q, set->ewaldcoeff_lj); } if (debug) { print_grid(NULL, debug, "", "switched to", set, -1); } if (pme_lb->stage == pme_lb->nstage) { print_grid(fp_err, fp_log, "", "optimal", set, -1); } return TRUE; }
static void calc_cgcm_av_stddev(t_block *cgs, int n, rvec *x, rvec av, rvec stddev, t_commrec *cr_sum) { int *cgindex; dvec s1, s2; double buf[7]; int cg, d, k0, k1, k, nrcg; real inv_ncg; rvec cg_cm; clear_dvec(s1); clear_dvec(s2); cgindex = cgs->index; for (cg = 0; cg < n; cg++) { k0 = cgindex[cg]; k1 = cgindex[cg+1]; nrcg = k1 - k0; if (nrcg == 1) { copy_rvec(x[k0], cg_cm); } else { inv_ncg = 1.0/nrcg; clear_rvec(cg_cm); for (k = k0; (k < k1); k++) { rvec_inc(cg_cm, x[k]); } for (d = 0; (d < DIM); d++) { cg_cm[d] *= inv_ncg; } } for (d = 0; d < DIM; d++) { s1[d] += cg_cm[d]; s2[d] += cg_cm[d]*cg_cm[d]; } } if (cr_sum != NULL) { for (d = 0; d < DIM; d++) { buf[d] = s1[d]; buf[DIM+d] = s2[d]; } buf[6] = n; gmx_sumd(7, buf, cr_sum); for (d = 0; d < DIM; d++) { s1[d] = buf[d]; s2[d] = buf[DIM+d]; } n = (int)(buf[6] + 0.5); } dsvmul(1.0/n, s1, s1); dsvmul(1.0/n, s2, s2); for (d = 0; d < DIM; d++) { av[d] = s1[d]; stddev[d] = sqrt(s2[d] - s1[d]*s1[d]); } }
double do_tpi(FILE *fplog, t_commrec *cr, int nfile, const t_filenm fnm[], const output_env_t oenv, gmx_bool bVerbose, gmx_bool gmx_unused bCompact, int gmx_unused nstglobalcomm, gmx_vsite_t gmx_unused *vsite, gmx_constr_t gmx_unused constr, int gmx_unused stepout, t_inputrec *inputrec, gmx_mtop_t *top_global, t_fcdata *fcd, t_state *state, t_mdatoms *mdatoms, t_nrnb *nrnb, gmx_wallcycle_t wcycle, gmx_edsam_t gmx_unused ed, t_forcerec *fr, int gmx_unused repl_ex_nst, int gmx_unused repl_ex_nex, int gmx_unused repl_ex_seed, gmx_membed_t gmx_unused membed, real gmx_unused cpt_period, real gmx_unused max_hours, const char gmx_unused *deviceOptions, int gmx_unused imdport, unsigned long gmx_unused Flags, gmx_walltime_accounting_t walltime_accounting) { const char *TPI = "Test Particle Insertion"; gmx_localtop_t *top; gmx_groups_t *groups; gmx_enerdata_t *enerd; rvec *f; real lambda, t, temp, beta, drmax, epot; double embU, sum_embU, *sum_UgembU, V, V_all, VembU_all; t_trxstatus *status; t_trxframe rerun_fr; gmx_bool bDispCorr, bCharge, bRFExcl, bNotLastFrame, bStateChanged, bNS; tensor force_vir, shake_vir, vir, pres; int cg_tp, a_tp0, a_tp1, ngid, gid_tp, nener, e; rvec *x_mol; rvec mu_tot, x_init, dx, x_tp; int nnodes, frame; gmx_int64_t frame_step_prev, frame_step; gmx_int64_t nsteps, stepblocksize = 0, step; gmx_int64_t rnd_count_stride, rnd_count; gmx_int64_t seed; double rnd[4]; int i, start, end; FILE *fp_tpi = NULL; char *ptr, *dump_pdb, **leg, str[STRLEN], str2[STRLEN]; double dbl, dump_ener; gmx_bool bCavity; int nat_cavity = 0, d; real *mass_cavity = NULL, mass_tot; int nbin; double invbinw, *bin, refvolshift, logV, bUlogV; real dvdl, prescorr, enercorr, dvdlcorr; gmx_bool bEnergyOutOfBounds; const char *tpid_leg[2] = {"direct", "reweighted"}; /* Since there is no upper limit to the insertion energies, * we need to set an upper limit for the distribution output. */ real bU_bin_limit = 50; real bU_logV_bin_limit = bU_bin_limit + 10; nnodes = cr->nnodes; top = gmx_mtop_generate_local_top(top_global, inputrec); groups = &top_global->groups; bCavity = (inputrec->eI == eiTPIC); if (bCavity) { ptr = getenv("GMX_TPIC_MASSES"); if (ptr == NULL) { nat_cavity = 1; } else { /* Read (multiple) masses from env var GMX_TPIC_MASSES, * The center of mass of the last atoms is then used for TPIC. */ nat_cavity = 0; while (sscanf(ptr, "%lf%n", &dbl, &i) > 0) { srenew(mass_cavity, nat_cavity+1); mass_cavity[nat_cavity] = dbl; fprintf(fplog, "mass[%d] = %f\n", nat_cavity+1, mass_cavity[nat_cavity]); nat_cavity++; ptr += i; } if (nat_cavity == 0) { gmx_fatal(FARGS, "Found %d masses in GMX_TPIC_MASSES", nat_cavity); } } } /* init_em(fplog,TPI,inputrec,&lambda,nrnb,mu_tot, state->box,fr,mdatoms,top,cr,nfile,fnm,NULL,NULL);*/ /* We never need full pbc for TPI */ fr->ePBC = epbcXYZ; /* Determine the temperature for the Boltzmann weighting */ temp = inputrec->opts.ref_t[0]; if (fplog) { for (i = 1; (i < inputrec->opts.ngtc); i++) { if (inputrec->opts.ref_t[i] != temp) { fprintf(fplog, "\nWARNING: The temperatures of the different temperature coupling groups are not identical\n\n"); fprintf(stderr, "\nWARNING: The temperatures of the different temperature coupling groups are not identical\n\n"); } } fprintf(fplog, "\n The temperature for test particle insertion is %.3f K\n\n", temp); } beta = 1.0/(BOLTZ*temp); /* Number of insertions per frame */ nsteps = inputrec->nsteps; /* Use the same neighborlist with more insertions points * in a sphere of radius drmax around the initial point */ /* This should be a proper mdp parameter */ drmax = inputrec->rtpi; /* An environment variable can be set to dump all configurations * to pdb with an insertion energy <= this value. */ dump_pdb = getenv("GMX_TPI_DUMP"); dump_ener = 0; if (dump_pdb) { sscanf(dump_pdb, "%lf", &dump_ener); } atoms2md(top_global, inputrec, 0, NULL, top_global->natoms, mdatoms); update_mdatoms(mdatoms, inputrec->fepvals->init_lambda); snew(enerd, 1); init_enerdata(groups->grps[egcENER].nr, inputrec->fepvals->n_lambda, enerd); snew(f, top_global->natoms); /* Print to log file */ walltime_accounting_start(walltime_accounting); wallcycle_start(wcycle, ewcRUN); print_start(fplog, cr, walltime_accounting, "Test Particle Insertion"); /* The last charge group is the group to be inserted */ cg_tp = top->cgs.nr - 1; a_tp0 = top->cgs.index[cg_tp]; a_tp1 = top->cgs.index[cg_tp+1]; if (debug) { fprintf(debug, "TPI cg %d, atoms %d-%d\n", cg_tp, a_tp0, a_tp1); } if (a_tp1 - a_tp0 > 1 && (inputrec->rlist < inputrec->rcoulomb || inputrec->rlist < inputrec->rvdw)) { gmx_fatal(FARGS, "Can not do TPI for multi-atom molecule with a twin-range cut-off"); } snew(x_mol, a_tp1-a_tp0); bDispCorr = (inputrec->eDispCorr != edispcNO); bCharge = FALSE; for (i = a_tp0; i < a_tp1; i++) { /* Copy the coordinates of the molecule to be insterted */ copy_rvec(state->x[i], x_mol[i-a_tp0]); /* Check if we need to print electrostatic energies */ bCharge |= (mdatoms->chargeA[i] != 0 || (mdatoms->chargeB && mdatoms->chargeB[i] != 0)); } bRFExcl = (bCharge && EEL_RF(fr->eeltype) && fr->eeltype != eelRF_NEC); calc_cgcm(fplog, cg_tp, cg_tp+1, &(top->cgs), state->x, fr->cg_cm); if (bCavity) { if (norm(fr->cg_cm[cg_tp]) > 0.5*inputrec->rlist && fplog) { fprintf(fplog, "WARNING: Your TPI molecule is not centered at 0,0,0\n"); fprintf(stderr, "WARNING: Your TPI molecule is not centered at 0,0,0\n"); } } else { /* Center the molecule to be inserted at zero */ for (i = 0; i < a_tp1-a_tp0; i++) { rvec_dec(x_mol[i], fr->cg_cm[cg_tp]); } } if (fplog) { fprintf(fplog, "\nWill insert %d atoms %s partial charges\n", a_tp1-a_tp0, bCharge ? "with" : "without"); fprintf(fplog, "\nWill insert %d times in each frame of %s\n", (int)nsteps, opt2fn("-rerun", nfile, fnm)); } if (!bCavity) { if (inputrec->nstlist > 1) { if (drmax == 0 && a_tp1-a_tp0 == 1) { gmx_fatal(FARGS, "Re-using the neighborlist %d times for insertions of a single atom in a sphere of radius %f does not make sense", inputrec->nstlist, drmax); } if (fplog) { fprintf(fplog, "Will use the same neighborlist for %d insertions in a sphere of radius %f\n", inputrec->nstlist, drmax); } } } else { if (fplog) { fprintf(fplog, "Will insert randomly in a sphere of radius %f around the center of the cavity\n", drmax); } } ngid = groups->grps[egcENER].nr; gid_tp = GET_CGINFO_GID(fr->cginfo[cg_tp]); nener = 1 + ngid; if (bDispCorr) { nener += 1; } if (bCharge) { nener += ngid; if (bRFExcl) { nener += 1; } if (EEL_FULL(fr->eeltype)) { nener += 1; } } snew(sum_UgembU, nener); /* Copy the random seed set by the user */ seed = inputrec->ld_seed; /* We use the frame step number as one random counter. * The second counter use the insertion (step) count. But we * need multiple random numbers per insertion. This number is * not fixed, since we generate random locations in a sphere * by putting locations in a cube and some of these fail. * A count of 20 is already extremely unlikely, so 10000 is * a safe margin for random numbers per insertion. */ rnd_count_stride = 10000; if (MASTER(cr)) { fp_tpi = xvgropen(opt2fn("-tpi", nfile, fnm), "TPI energies", "Time (ps)", "(kJ mol\\S-1\\N) / (nm\\S3\\N)", oenv); xvgr_subtitle(fp_tpi, "f. are averages over one frame", oenv); snew(leg, 4+nener); e = 0; sprintf(str, "-kT log(<Ve\\S-\\betaU\\N>/<V>)"); leg[e++] = strdup(str); sprintf(str, "f. -kT log<e\\S-\\betaU\\N>"); leg[e++] = strdup(str); sprintf(str, "f. <e\\S-\\betaU\\N>"); leg[e++] = strdup(str); sprintf(str, "f. V"); leg[e++] = strdup(str); sprintf(str, "f. <Ue\\S-\\betaU\\N>"); leg[e++] = strdup(str); for (i = 0; i < ngid; i++) { sprintf(str, "f. <U\\sVdW %s\\Ne\\S-\\betaU\\N>", *(groups->grpname[groups->grps[egcENER].nm_ind[i]])); leg[e++] = strdup(str); } if (bDispCorr) { sprintf(str, "f. <U\\sdisp c\\Ne\\S-\\betaU\\N>"); leg[e++] = strdup(str); } if (bCharge) { for (i = 0; i < ngid; i++) { sprintf(str, "f. <U\\sCoul %s\\Ne\\S-\\betaU\\N>", *(groups->grpname[groups->grps[egcENER].nm_ind[i]])); leg[e++] = strdup(str); } if (bRFExcl) { sprintf(str, "f. <U\\sRF excl\\Ne\\S-\\betaU\\N>"); leg[e++] = strdup(str); } if (EEL_FULL(fr->eeltype)) { sprintf(str, "f. <U\\sCoul recip\\Ne\\S-\\betaU\\N>"); leg[e++] = strdup(str); } } xvgr_legend(fp_tpi, 4+nener, (const char**)leg, oenv); for (i = 0; i < 4+nener; i++) { sfree(leg[i]); } sfree(leg); } clear_rvec(x_init); V_all = 0; VembU_all = 0; invbinw = 10; nbin = 10; snew(bin, nbin); /* Avoid frame step numbers <= -1 */ frame_step_prev = -1; bNotLastFrame = read_first_frame(oenv, &status, opt2fn("-rerun", nfile, fnm), &rerun_fr, TRX_NEED_X); frame = 0; if (rerun_fr.natoms - (bCavity ? nat_cavity : 0) != mdatoms->nr - (a_tp1 - a_tp0)) { gmx_fatal(FARGS, "Number of atoms in trajectory (%d)%s " "is not equal the number in the run input file (%d) " "minus the number of atoms to insert (%d)\n", rerun_fr.natoms, bCavity ? " minus one" : "", mdatoms->nr, a_tp1-a_tp0); } refvolshift = log(det(rerun_fr.box)); switch (inputrec->eI) { case eiTPI: stepblocksize = inputrec->nstlist; break; case eiTPIC: stepblocksize = 1; break; default: gmx_fatal(FARGS, "Unknown integrator %s", ei_names[inputrec->eI]); } #ifdef GMX_SIMD /* Make sure we don't detect SIMD overflow generated before this point */ gmx_simd_check_and_reset_overflow(); #endif while (bNotLastFrame) { frame_step = rerun_fr.step; if (frame_step <= frame_step_prev) { /* We don't have step number in the trajectory file, * or we have constant or decreasing step numbers. * Ensure we have increasing step numbers, since we use * the step numbers as a counter for random numbers. */ frame_step = frame_step_prev + 1; } frame_step_prev = frame_step; lambda = rerun_fr.lambda; t = rerun_fr.time; sum_embU = 0; for (e = 0; e < nener; e++) { sum_UgembU[e] = 0; } /* Copy the coordinates from the input trajectory */ for (i = 0; i < rerun_fr.natoms; i++) { copy_rvec(rerun_fr.x[i], state->x[i]); } copy_mat(rerun_fr.box, state->box); V = det(state->box); logV = log(V); bStateChanged = TRUE; bNS = TRUE; step = cr->nodeid*stepblocksize; while (step < nsteps) { /* Initialize the second counter for random numbers using * the insertion step index. This ensures that we get * the same random numbers independently of how many * MPI ranks we use. Also for the same seed, we get * the same initial random sequence for different nsteps. */ rnd_count = step*rnd_count_stride; if (!bCavity) { /* Random insertion in the whole volume */ bNS = (step % inputrec->nstlist == 0); if (bNS) { /* Generate a random position in the box */ gmx_rng_cycle_2uniform(frame_step, rnd_count++, seed, RND_SEED_TPI, rnd); gmx_rng_cycle_2uniform(frame_step, rnd_count++, seed, RND_SEED_TPI, rnd+2); for (d = 0; d < DIM; d++) { x_init[d] = rnd[d]*state->box[d][d]; } } if (inputrec->nstlist == 1) { copy_rvec(x_init, x_tp); } else { /* Generate coordinates within |dx|=drmax of x_init */ do { gmx_rng_cycle_2uniform(frame_step, rnd_count++, seed, RND_SEED_TPI, rnd); gmx_rng_cycle_2uniform(frame_step, rnd_count++, seed, RND_SEED_TPI, rnd+2); for (d = 0; d < DIM; d++) { dx[d] = (2*rnd[d] - 1)*drmax; } } while (norm2(dx) > drmax*drmax); rvec_add(x_init, dx, x_tp); } } else { /* Random insertion around a cavity location * given by the last coordinate of the trajectory. */ if (step == 0) { if (nat_cavity == 1) { /* Copy the location of the cavity */ copy_rvec(rerun_fr.x[rerun_fr.natoms-1], x_init); } else { /* Determine the center of mass of the last molecule */ clear_rvec(x_init); mass_tot = 0; for (i = 0; i < nat_cavity; i++) { for (d = 0; d < DIM; d++) { x_init[d] += mass_cavity[i]*rerun_fr.x[rerun_fr.natoms-nat_cavity+i][d]; } mass_tot += mass_cavity[i]; } for (d = 0; d < DIM; d++) { x_init[d] /= mass_tot; } } } /* Generate coordinates within |dx|=drmax of x_init */ do { gmx_rng_cycle_2uniform(frame_step, rnd_count++, seed, RND_SEED_TPI, rnd); gmx_rng_cycle_2uniform(frame_step, rnd_count++, seed, RND_SEED_TPI, rnd+2); for (d = 0; d < DIM; d++) { dx[d] = (2*rnd[d] - 1)*drmax; } } while (norm2(dx) > drmax*drmax); rvec_add(x_init, dx, x_tp); } if (a_tp1 - a_tp0 == 1) { /* Insert a single atom, just copy the insertion location */ copy_rvec(x_tp, state->x[a_tp0]); } else { /* Copy the coordinates from the top file */ for (i = a_tp0; i < a_tp1; i++) { copy_rvec(x_mol[i-a_tp0], state->x[i]); } /* Rotate the molecule randomly */ gmx_rng_cycle_2uniform(frame_step, rnd_count++, seed, RND_SEED_TPI, rnd); gmx_rng_cycle_2uniform(frame_step, rnd_count++, seed, RND_SEED_TPI, rnd+2); rotate_conf(a_tp1-a_tp0, state->x+a_tp0, NULL, 2*M_PI*rnd[0], 2*M_PI*rnd[1], 2*M_PI*rnd[2]); /* Shift to the insertion location */ for (i = a_tp0; i < a_tp1; i++) { rvec_inc(state->x[i], x_tp); } } /* Clear some matrix variables */ clear_mat(force_vir); clear_mat(shake_vir); clear_mat(vir); clear_mat(pres); /* Set the charge group center of mass of the test particle */ copy_rvec(x_init, fr->cg_cm[top->cgs.nr-1]); /* Calc energy (no forces) on new positions. * Since we only need the intermolecular energy * and the RF exclusion terms of the inserted molecule occur * within a single charge group we can pass NULL for the graph. * This also avoids shifts that would move charge groups * out of the box. * * Some checks above ensure than we can not have * twin-range interactions together with nstlist > 1, * therefore we do not need to remember the LR energies. */ /* Make do_force do a single node force calculation */ cr->nnodes = 1; do_force(fplog, cr, inputrec, step, nrnb, wcycle, top, &top_global->groups, state->box, state->x, &state->hist, f, force_vir, mdatoms, enerd, fcd, state->lambda, NULL, fr, NULL, mu_tot, t, NULL, NULL, FALSE, GMX_FORCE_NONBONDED | GMX_FORCE_ENERGY | (bNS ? GMX_FORCE_DYNAMICBOX | GMX_FORCE_NS | GMX_FORCE_DO_LR : 0) | (bStateChanged ? GMX_FORCE_STATECHANGED : 0)); cr->nnodes = nnodes; bStateChanged = FALSE; bNS = FALSE; /* Calculate long range corrections to pressure and energy */ calc_dispcorr(fplog, inputrec, fr, step, top_global->natoms, state->box, lambda, pres, vir, &prescorr, &enercorr, &dvdlcorr); /* figure out how to rearrange the next 4 lines MRS 8/4/2009 */ enerd->term[F_DISPCORR] = enercorr; enerd->term[F_EPOT] += enercorr; enerd->term[F_PRES] += prescorr; enerd->term[F_DVDL_VDW] += dvdlcorr; epot = enerd->term[F_EPOT]; bEnergyOutOfBounds = FALSE; #ifdef GMX_SIMD_X86_SSE2_OR_HIGHER /* With SSE the energy can overflow, check for this */ if (gmx_mm_check_and_reset_overflow()) { if (debug) { fprintf(debug, "Found an SSE overflow, assuming the energy is out of bounds\n"); } bEnergyOutOfBounds = TRUE; } #endif /* If the compiler doesn't optimize this check away * we catch the NAN energies. * The epot>GMX_REAL_MAX check catches inf values, * which should nicely result in embU=0 through the exp below, * but it does not hurt to check anyhow. */ /* Non-bonded Interaction usually diverge at r=0. * With tabulated interaction functions the first few entries * should be capped in a consistent fashion between * repulsion, dispersion and Coulomb to avoid accidental * negative values in the total energy. * The table generation code in tables.c does this. * With user tbales the user should take care of this. */ if (epot != epot || epot > GMX_REAL_MAX) { bEnergyOutOfBounds = TRUE; } if (bEnergyOutOfBounds) { if (debug) { fprintf(debug, "\n time %.3f, step %d: non-finite energy %f, using exp(-bU)=0\n", t, (int)step, epot); } embU = 0; } else { embU = exp(-beta*epot); sum_embU += embU; /* Determine the weighted energy contributions of each energy group */ e = 0; sum_UgembU[e++] += epot*embU; if (fr->bBHAM) { for (i = 0; i < ngid; i++) { sum_UgembU[e++] += (enerd->grpp.ener[egBHAMSR][GID(i, gid_tp, ngid)] + enerd->grpp.ener[egBHAMLR][GID(i, gid_tp, ngid)])*embU; } } else { for (i = 0; i < ngid; i++) { sum_UgembU[e++] += (enerd->grpp.ener[egLJSR][GID(i, gid_tp, ngid)] + enerd->grpp.ener[egLJLR][GID(i, gid_tp, ngid)])*embU; } } if (bDispCorr) { sum_UgembU[e++] += enerd->term[F_DISPCORR]*embU; } if (bCharge) { for (i = 0; i < ngid; i++) { sum_UgembU[e++] += (enerd->grpp.ener[egCOULSR][GID(i, gid_tp, ngid)] + enerd->grpp.ener[egCOULLR][GID(i, gid_tp, ngid)])*embU; } if (bRFExcl) { sum_UgembU[e++] += enerd->term[F_RF_EXCL]*embU; } if (EEL_FULL(fr->eeltype)) { sum_UgembU[e++] += enerd->term[F_COUL_RECIP]*embU; } } } if (embU == 0 || beta*epot > bU_bin_limit) { bin[0]++; } else { i = (int)((bU_logV_bin_limit - (beta*epot - logV + refvolshift))*invbinw + 0.5); if (i < 0) { i = 0; } if (i >= nbin) { realloc_bins(&bin, &nbin, i+10); } bin[i]++; } if (debug) { fprintf(debug, "TPI %7d %12.5e %12.5f %12.5f %12.5f\n", (int)step, epot, x_tp[XX], x_tp[YY], x_tp[ZZ]); } if (dump_pdb && epot <= dump_ener) { sprintf(str, "t%g_step%d.pdb", t, (int)step); sprintf(str2, "t: %f step %d ener: %f", t, (int)step, epot); write_sto_conf_mtop(str, str2, top_global, state->x, state->v, inputrec->ePBC, state->box); } step++; if ((step/stepblocksize) % cr->nnodes != cr->nodeid) { /* Skip all steps assigned to the other MPI ranks */ step += (cr->nnodes - 1)*stepblocksize; } } if (PAR(cr)) { /* When running in parallel sum the energies over the processes */ gmx_sumd(1, &sum_embU, cr); gmx_sumd(nener, sum_UgembU, cr); } frame++; V_all += V; VembU_all += V*sum_embU/nsteps; if (fp_tpi) { if (bVerbose || frame%10 == 0 || frame < 10) { fprintf(stderr, "mu %10.3e <mu> %10.3e\n", -log(sum_embU/nsteps)/beta, -log(VembU_all/V_all)/beta); } fprintf(fp_tpi, "%10.3f %12.5e %12.5e %12.5e %12.5e", t, VembU_all == 0 ? 20/beta : -log(VembU_all/V_all)/beta, sum_embU == 0 ? 20/beta : -log(sum_embU/nsteps)/beta, sum_embU/nsteps, V); for (e = 0; e < nener; e++) { fprintf(fp_tpi, " %12.5e", sum_UgembU[e]/nsteps); } fprintf(fp_tpi, "\n"); fflush(fp_tpi); } bNotLastFrame = read_next_frame(oenv, status, &rerun_fr); } /* End of the loop */ walltime_accounting_end(walltime_accounting); close_trj(status); if (fp_tpi != NULL) { gmx_fio_fclose(fp_tpi); } if (fplog != NULL) { fprintf(fplog, "\n"); fprintf(fplog, " <V> = %12.5e nm^3\n", V_all/frame); fprintf(fplog, " <mu> = %12.5e kJ/mol\n", -log(VembU_all/V_all)/beta); } /* Write the Boltzmann factor histogram */ if (PAR(cr)) { /* When running in parallel sum the bins over the processes */ i = nbin; global_max(cr, &i); realloc_bins(&bin, &nbin, i); gmx_sumd(nbin, bin, cr); } if (MASTER(cr)) { fp_tpi = xvgropen(opt2fn("-tpid", nfile, fnm), "TPI energy distribution", "\\betaU - log(V/<V>)", "count", oenv); sprintf(str, "number \\betaU > %g: %9.3e", bU_bin_limit, bin[0]); xvgr_subtitle(fp_tpi, str, oenv); xvgr_legend(fp_tpi, 2, (const char **)tpid_leg, oenv); for (i = nbin-1; i > 0; i--) { bUlogV = -i/invbinw + bU_logV_bin_limit - refvolshift + log(V_all/frame); fprintf(fp_tpi, "%6.2f %10d %12.5e\n", bUlogV, (int)(bin[i]+0.5), bin[i]*exp(-bUlogV)*V_all/VembU_all); } gmx_fio_fclose(fp_tpi); } sfree(bin); sfree(sum_UgembU); walltime_accounting_set_nsteps_done(walltime_accounting, frame*inputrec->nsteps); return 0; }
gmx_bool pme_load_balance(pme_load_balancing_t pme_lb, t_commrec *cr, FILE *fp_err, FILE *fp_log, t_inputrec *ir, t_state *state, double cycles, interaction_const_t *ic, nonbonded_verlet_t *nbv, gmx_pme_t *pmedata, gmx_large_int_t step) { gmx_bool OK; pme_setup_t *set; double cycles_fast; char buf[STRLEN], sbuf[22]; real rtab; gmx_bool bUsesSimpleTables = TRUE; if (pme_lb->stage == pme_lb->nstage) { return FALSE; } if (PAR(cr)) { gmx_sumd(1, &cycles, cr); cycles /= cr->nnodes; } set = &pme_lb->setup[pme_lb->cur]; set->count++; rtab = ir->rlistlong + ir->tabext; if (set->count % 2 == 1) { /* Skip the first cycle, because the first step after a switch * is much slower due to allocation and/or caching effects. */ return TRUE; } sprintf(buf, "step %4s: ", gmx_step_str(step, sbuf)); print_grid(fp_err, fp_log, buf, "timed with", set, cycles); if (set->count <= 2) { set->cycles = cycles; } else { if (cycles*PME_LB_ACCEL_TOL < set->cycles && pme_lb->stage == pme_lb->nstage - 1) { /* The performance went up a lot (due to e.g. DD load balancing). * Add a stage, keep the minima, but rescan all setups. */ pme_lb->nstage++; if (debug) { fprintf(debug, "The performance for grid %d %d %d went from %.3f to %.1f M-cycles, this is more than %f\n" "Increased the number stages to %d" " and ignoring the previous performance\n", set->grid[XX], set->grid[YY], set->grid[ZZ], cycles*1e-6, set->cycles*1e-6, PME_LB_ACCEL_TOL, pme_lb->nstage); } } set->cycles = min(set->cycles, cycles); } if (set->cycles < pme_lb->setup[pme_lb->fastest].cycles) { pme_lb->fastest = pme_lb->cur; if (DOMAINDECOMP(cr)) { /* We found a new fastest setting, ensure that with subsequent * shorter cut-off's the dynamic load balancing does not make * the use of the current cut-off impossible. This solution is * a trade-off, as the PME load balancing and DD domain size * load balancing can interact in complex ways. * With the Verlet kernels, DD load imbalance will usually be * mainly due to bonded interaction imbalance, which will often * quickly push the domain boundaries beyond the limit for the * optimal, PME load balanced, cut-off. But it could be that * better overal performance can be obtained with a slightly * shorter cut-off and better DD load balancing. */ change_dd_dlb_cutoff_limit(cr); } } cycles_fast = pme_lb->setup[pme_lb->fastest].cycles; /* Check in stage 0 if we should stop scanning grids. * Stop when the time is more than SLOW_FAC longer than the fastest. */ if (pme_lb->stage == 0 && pme_lb->cur > 0 && cycles > pme_lb->setup[pme_lb->fastest].cycles*PME_LB_SLOW_FAC) { pme_lb->n = pme_lb->cur + 1; /* Done with scanning, go to stage 1 */ switch_to_stage1(pme_lb); } if (pme_lb->stage == 0) { int gridsize_start; gridsize_start = set->grid[XX]*set->grid[YY]*set->grid[ZZ]; do { if (pme_lb->cur+1 < pme_lb->n) { /* We had already generated the next setup */ OK = TRUE; } else { /* Find the next setup */ OK = pme_loadbal_increase_cutoff(pme_lb, ir->pme_order); } if (OK && ir->ePBC != epbcNONE) { OK = (sqr(pme_lb->setup[pme_lb->cur+1].rlistlong) <= max_cutoff2(ir->ePBC, state->box)); if (!OK) { pme_lb->elimited = epmelblimBOX; } } if (OK) { pme_lb->cur++; if (DOMAINDECOMP(cr)) { OK = change_dd_cutoff(cr, state, ir, pme_lb->setup[pme_lb->cur].rlistlong); if (!OK) { /* Failed: do not use this setup */ pme_lb->cur--; pme_lb->elimited = epmelblimDD; } } } if (!OK) { /* We hit the upper limit for the cut-off, * the setup should not go further than cur. */ pme_lb->n = pme_lb->cur + 1; print_loadbal_limited(fp_err, fp_log, step, pme_lb); /* Switch to the next stage */ switch_to_stage1(pme_lb); } } while (OK && !(pme_lb->setup[pme_lb->cur].grid[XX]* pme_lb->setup[pme_lb->cur].grid[YY]* pme_lb->setup[pme_lb->cur].grid[ZZ] < gridsize_start*PME_LB_GRID_SCALE_FAC && pme_lb->setup[pme_lb->cur].grid_efficiency < pme_lb->setup[pme_lb->cur-1].grid_efficiency*PME_LB_GRID_EFFICIENCY_REL_FAC)); } if (pme_lb->stage > 0 && pme_lb->end == 1) { pme_lb->cur = 0; pme_lb->stage = pme_lb->nstage; } else if (pme_lb->stage > 0 && pme_lb->end > 1) { /* If stage = nstage-1: * scan over all setups, rerunning only those setups * which are not much slower than the fastest * else: * use the next setup */ do { pme_lb->cur++; if (pme_lb->cur == pme_lb->end) { pme_lb->stage++; pme_lb->cur = pme_lb->start; } } while (pme_lb->stage == pme_lb->nstage - 1 && pme_lb->setup[pme_lb->cur].count > 0 && pme_lb->setup[pme_lb->cur].cycles > cycles_fast*PME_LB_SLOW_FAC); if (pme_lb->stage == pme_lb->nstage) { /* We are done optimizing, use the fastest setup we found */ pme_lb->cur = pme_lb->fastest; } } if (DOMAINDECOMP(cr) && pme_lb->stage > 0) { OK = change_dd_cutoff(cr, state, ir, pme_lb->setup[pme_lb->cur].rlistlong); if (!OK) { /* Failsafe solution */ if (pme_lb->cur > 1 && pme_lb->stage == pme_lb->nstage) { pme_lb->stage--; } pme_lb->fastest = 0; pme_lb->start = 0; pme_lb->end = pme_lb->cur; pme_lb->cur = pme_lb->start; pme_lb->elimited = epmelblimDD; print_loadbal_limited(fp_err, fp_log, step, pme_lb); } } /* Change the Coulomb cut-off and the PME grid */ set = &pme_lb->setup[pme_lb->cur]; ic->rcoulomb = set->rcut_coulomb; ic->rlist = set->rlist; ic->rlistlong = set->rlistlong; ir->nstcalclr = set->nstcalclr; ic->ewaldcoeff = set->ewaldcoeff; bUsesSimpleTables = uses_simple_tables(ir->cutoff_scheme, nbv, 0); if (pme_lb->cutoff_scheme == ecutsVERLET && nbv->grp[0].kernel_type == nbnxnk8x8x8_CUDA) { nbnxn_cuda_pme_loadbal_update_param(nbv->cu_nbv, ic); } else { init_interaction_const_tables(NULL, ic, bUsesSimpleTables, rtab); } if (pme_lb->cutoff_scheme == ecutsVERLET && nbv->ngrp > 1) { init_interaction_const_tables(NULL, ic, bUsesSimpleTables, rtab); } if (cr->duty & DUTY_PME) { if (pme_lb->setup[pme_lb->cur].pmedata == NULL) { /* Generate a new PME data structure, * copying part of the old pointers. */ gmx_pme_reinit(&set->pmedata, cr, pme_lb->setup[0].pmedata, ir, set->grid); } *pmedata = set->pmedata; } else { /* Tell our PME-only node to switch grid */ gmx_pme_send_switchgrid(cr, set->grid, set->ewaldcoeff); } if (debug) { print_grid(NULL, debug, "", "switched to", set, -1); } if (pme_lb->stage == pme_lb->nstage) { print_grid(fp_err, fp_log, "", "optimal", set, -1); } return TRUE; }
void do_shakefirst(FILE *fplog,gmx_constr_t constr, t_inputrec *inputrec,t_mdatoms *md, t_state *state,rvec buf[],rvec f[], t_graph *graph,t_commrec *cr,t_nrnb *nrnb, t_forcerec *fr,t_idef *idef) { int i,m,start,end,step; double mass,tmass,vcm[4]; real dt=inputrec->delta_t; real dvdlambda; start = md->start; end = md->homenr + start; if (debug) fprintf(debug,"vcm: start=%d, homenr=%d, end=%d\n", start,md->homenr,end); /* Do a first SHAKE to reset particles... */ step = inputrec->init_step; if (fplog) fprintf(fplog,"\nConstraining the starting coordinates (step %d)\n",step); dvdlambda = 0; constrain(NULL,TRUE,FALSE,constr,idef, inputrec,cr,step,0,md, state->x,state->x,NULL, state->box,state->lambda,&dvdlambda, NULL,NULL,nrnb,econqCoord); if (EI_STATE_VELOCITY(inputrec->eI)) { for(i=start; (i<end); i++) { for(m=0; (m<DIM); m++) { /* Reverse the velocity */ state->v[i][m] = -state->v[i][m]; /* Store the position at t-dt in buf */ buf[i][m] = state->x[i][m] + dt*state->v[i][m]; } } /* Shake the positions at t=-dt with the positions at t=0 * as reference coordinates. */ if (fplog) fprintf(fplog,"\nConstraining the coordinates at t0-dt (step %d)\n", step); dvdlambda = 0; constrain(NULL,TRUE,FALSE,constr,idef, inputrec,cr,step,-1,md, state->x,buf,NULL, state->box,state->lambda,&dvdlambda, state->v,NULL,nrnb,econqCoord); for(m=0; (m<4); m++) vcm[m] = 0; for(i=start; i<end; i++) { mass = md->massT[i]; for(m=0; m<DIM; m++) { /* Re-reverse the velocities */ state->v[i][m] = -state->v[i][m]; vcm[m] += state->v[i][m]*mass; } vcm[3] += mass; } if (inputrec->nstcomm != 0 || debug) { /* Compute the global sum of vcm */ if (debug) fprintf(debug,"vcm: %8.3f %8.3f %8.3f," " total mass = %12.5e\n",vcm[XX],vcm[YY],vcm[ZZ],vcm[3]); if (PAR(cr)) gmx_sumd(4,vcm,cr); tmass = vcm[3]; for(m=0; (m<DIM); m++) vcm[m] /= tmass; if (debug) fprintf(debug,"vcm: %8.3f %8.3f %8.3f," " total mass = %12.5e\n",vcm[XX],vcm[YY],vcm[ZZ],tmass); if (inputrec->nstcomm != 0) { /* Now we have the velocity of center of mass, let's remove it */ for(i=start; (i<end); i++) { for(m=0; (m<DIM); m++) state->v[i][m] -= vcm[m]; } } } } }
void do_force(FILE *fplog,t_commrec *cr, t_inputrec *inputrec, int step,t_nrnb *nrnb,gmx_wallcycle_t wcycle, gmx_localtop_t *top, gmx_groups_t *groups, matrix box,rvec x[],history_t *hist, rvec f[],rvec buf[], tensor vir_force, t_mdatoms *mdatoms, gmx_enerdata_t *enerd,t_fcdata *fcd, real lambda,t_graph *graph, t_forcerec *fr,gmx_vsite_t *vsite,rvec mu_tot, real t,FILE *field,gmx_edsam_t ed, int flags) { static rvec box_size; int cg0,cg1,i,j; int start,homenr; static double mu[2*DIM]; rvec mu_tot_AB[2]; bool bSepDVDL,bStateChanged,bNS,bFillGrid,bCalcCGCM,bBS,bDoForces; matrix boxs; real e,v,dvdl; t_pbc pbc; float cycles_ppdpme,cycles_pme,cycles_force; start = mdatoms->start; homenr = mdatoms->homenr; bSepDVDL = (fr->bSepDVDL && do_per_step(step,inputrec->nstlog)); clear_mat(vir_force); if (PARTDECOMP(cr)) { pd_cg_range(cr,&cg0,&cg1); } else { cg0 = 0; if (DOMAINDECOMP(cr)) cg1 = cr->dd->ncg_tot; else cg1 = top->cgs.nr; if (fr->n_tpi > 0) cg1--; } bStateChanged = (flags & GMX_FORCE_STATECHANGED); bNS = (flags & GMX_FORCE_NS); bFillGrid = (bNS && bStateChanged); bCalcCGCM = (bFillGrid && !DOMAINDECOMP(cr)); bDoForces = (flags & GMX_FORCE_FORCES); if (bStateChanged) { update_forcerec(fplog,fr,box); /* Calculate total (local) dipole moment in a temporary common array. * This makes it possible to sum them over nodes faster. */ calc_mu(start,homenr, x,mdatoms->chargeA,mdatoms->chargeB,mdatoms->nChargePerturbed, mu,mu+DIM); } if (fr->ePBC != epbcNONE) { /* Compute shift vectors every step, * because of pressure coupling or box deformation! */ if (DYNAMIC_BOX(*inputrec) && bStateChanged) calc_shifts(box,fr->shift_vec); if (bCalcCGCM) { put_charge_groups_in_box(fplog,cg0,cg1,fr->ePBC,box, &(top->cgs),x,fr->cg_cm); inc_nrnb(nrnb,eNR_CGCM,homenr); inc_nrnb(nrnb,eNR_RESETX,cg1-cg0); } else if (EI_ENERGY_MINIMIZATION(inputrec->eI) && graph) { unshift_self(graph,box,x); } } else if (bCalcCGCM) { calc_cgcm(fplog,cg0,cg1,&(top->cgs),x,fr->cg_cm); inc_nrnb(nrnb,eNR_CGCM,homenr); } if (bCalcCGCM) { if (PAR(cr)) { move_cgcm(fplog,cr,fr->cg_cm); } if (gmx_debug_at) pr_rvecs(debug,0,"cgcm",fr->cg_cm,top->cgs.nr); } #ifdef GMX_MPI if (!(cr->duty & DUTY_PME)) { /* Send particle coordinates to the pme nodes. * Since this is only implemented for domain decomposition * and domain decomposition does not use the graph, * we do not need to worry about shifting. */ wallcycle_start(wcycle,ewcPP_PMESENDX); GMX_MPE_LOG(ev_send_coordinates_start); bBS = (inputrec->nwall == 2); if (bBS) { copy_mat(box,boxs); svmul(inputrec->wall_ewald_zfac,boxs[ZZ],boxs[ZZ]); } gmx_pme_send_x(cr,bBS ? boxs : box,x,mdatoms->nChargePerturbed,lambda); GMX_MPE_LOG(ev_send_coordinates_finish); wallcycle_stop(wcycle,ewcPP_PMESENDX); } #endif /* GMX_MPI */ /* Communicate coordinates and sum dipole if necessary */ if (PAR(cr)) { wallcycle_start(wcycle,ewcMOVEX); if (DOMAINDECOMP(cr)) { dd_move_x(cr->dd,box,x,buf); } else { move_x(fplog,cr,GMX_LEFT,GMX_RIGHT,x,nrnb); } /* When we don't need the total dipole we sum it in global_stat */ if (NEED_MUTOT(*inputrec)) gmx_sumd(2*DIM,mu,cr); wallcycle_stop(wcycle,ewcMOVEX); } for(i=0; i<2; i++) for(j=0;j<DIM;j++) mu_tot_AB[i][j] = mu[i*DIM + j]; if (fr->efep == efepNO) copy_rvec(mu_tot_AB[0],mu_tot); else for(j=0; j<DIM; j++) mu_tot[j] = (1.0 - lambda)*mu_tot_AB[0][j] + lambda*mu_tot_AB[1][j]; /* Reset energies */ reset_energies(&(inputrec->opts),fr,bNS,enerd,MASTER(cr)); if (bNS) { wallcycle_start(wcycle,ewcNS); if (graph && bStateChanged) /* Calculate intramolecular shift vectors to make molecules whole */ mk_mshift(fplog,graph,fr->ePBC,box,x); /* Reset long range forces if necessary */ if (fr->bTwinRange) { clear_rvecs(fr->f_twin_n,fr->f_twin); clear_rvecs(SHIFTS,fr->fshift_twin); } /* Do the actual neighbour searching and if twin range electrostatics * also do the calculation of long range forces and energies. */ dvdl = 0; ns(fplog,fr,x,f,box,groups,&(inputrec->opts),top,mdatoms, cr,nrnb,step,lambda,&dvdl,&enerd->grpp,bFillGrid,bDoForces); if (bSepDVDL) fprintf(fplog,sepdvdlformat,"LR non-bonded",0,dvdl); enerd->dvdl_lr = dvdl; enerd->term[F_DVDL] += dvdl; wallcycle_stop(wcycle,ewcNS); } if (DOMAINDECOMP(cr)) { if (!(cr->duty & DUTY_PME)) { wallcycle_start(wcycle,ewcPPDURINGPME); dd_force_flop_start(cr->dd,nrnb); } } /* Start the force cycle counter. * This counter is stopped in do_forcelow_level. * No parallel communication should occur while this counter is running, * since that will interfere with the dynamic load balancing. */ wallcycle_start(wcycle,ewcFORCE); if (bDoForces) { /* Reset PME/Ewald forces if necessary */ if (fr->bF_NoVirSum) { GMX_BARRIER(cr->mpi_comm_mygroup); if (fr->bDomDec) clear_rvecs(fr->f_novirsum_n,fr->f_novirsum); else clear_rvecs(homenr,fr->f_novirsum+start); GMX_BARRIER(cr->mpi_comm_mygroup); } /* Copy long range forces into normal buffers */ if (fr->bTwinRange) { for(i=0; i<fr->f_twin_n; i++) copy_rvec(fr->f_twin[i],f[i]); for(i=0; i<SHIFTS; i++) copy_rvec(fr->fshift_twin[i],fr->fshift[i]); } else { if (DOMAINDECOMP(cr)) clear_rvecs(cr->dd->nat_tot,f); else clear_rvecs(mdatoms->nr,f); clear_rvecs(SHIFTS,fr->fshift); } clear_rvec(fr->vir_diag_posres); GMX_BARRIER(cr->mpi_comm_mygroup); } if (inputrec->ePull == epullCONSTRAINT) clear_pull_forces(inputrec->pull); /* update QMMMrec, if necessary */ if(fr->bQMMM) update_QMMMrec(cr,fr,x,mdatoms,box,top); if ((flags & GMX_FORCE_BONDED) && top->idef.il[F_POSRES].nr > 0) { /* Position restraints always require full pbc */ set_pbc(&pbc,inputrec->ePBC,box); v = posres(top->idef.il[F_POSRES].nr,top->idef.il[F_POSRES].iatoms, top->idef.iparams_posres, (const rvec*)x,fr->f_novirsum,fr->vir_diag_posres, inputrec->ePBC==epbcNONE ? NULL : &pbc,lambda,&dvdl, fr->rc_scaling,fr->ePBC,fr->posres_com,fr->posres_comB); if (bSepDVDL) { fprintf(fplog,sepdvdlformat, interaction_function[F_POSRES].longname,v,dvdl); } enerd->term[F_POSRES] += v; enerd->term[F_DVDL] += dvdl; inc_nrnb(nrnb,eNR_POSRES,top->idef.il[F_POSRES].nr/2); } /* Compute the bonded and non-bonded forces */ do_force_lowlevel(fplog,step,fr,inputrec,&(top->idef), cr,nrnb,wcycle,mdatoms,&(inputrec->opts), x,hist,f,enerd,fcd,box,lambda,graph,&(top->excls),mu_tot_AB, flags,&cycles_force); GMX_BARRIER(cr->mpi_comm_mygroup); if (ed) { do_flood(fplog,cr,x,f,ed,box,step); } if (DOMAINDECOMP(cr)) { dd_force_flop_stop(cr->dd,nrnb); if (wcycle) dd_cycles_add(cr->dd,cycles_force,ddCyclF); } if (bDoForces) { /* Compute forces due to electric field */ calc_f_el(MASTER(cr) ? field : NULL, start,homenr,mdatoms->chargeA,x,f,inputrec->ex,inputrec->et,t); /* When using PME/Ewald we compute the long range virial there. * otherwise we do it based on long range forces from twin range * cut-off based calculation (or not at all). */ /* Communicate the forces */ if (PAR(cr)) { wallcycle_start(wcycle,ewcMOVEF); if (DOMAINDECOMP(cr)) { dd_move_f(cr->dd,f,buf,fr->fshift); /* Position restraint do not introduce inter-cg forces */ if (EEL_FULL(fr->eeltype) && cr->dd->n_intercg_excl) dd_move_f(cr->dd,fr->f_novirsum,buf,NULL); } else { move_f(fplog,cr,GMX_LEFT,GMX_RIGHT,f,buf,nrnb); } wallcycle_stop(wcycle,ewcMOVEF); } } if (bDoForces) { if (vsite) { wallcycle_start(wcycle,ewcVSITESPREAD); spread_vsite_f(fplog,vsite,x,f,fr->fshift,nrnb, &top->idef,fr->ePBC,fr->bMolPBC,graph,box,cr); wallcycle_stop(wcycle,ewcVSITESPREAD); } /* Calculation of the virial must be done after vsites! */ calc_virial(fplog,mdatoms->start,mdatoms->homenr,x,f, vir_force,graph,box,nrnb,fr,inputrec->ePBC); } if (inputrec->ePull == epullUMBRELLA || inputrec->ePull == epullCONST_F) { /* Calculate the center of mass forces, this requires communication, * which is why pull_potential is called close to other communication. * The virial contribution is calculated directly, * which is why we call pull_potential after calc_virial. */ set_pbc(&pbc,inputrec->ePBC,box); dvdl = 0; enerd->term[F_COM_PULL] = pull_potential(inputrec->ePull,inputrec->pull,mdatoms,&pbc, cr,t,lambda,x,f,vir_force,&dvdl); if (bSepDVDL) fprintf(fplog,sepdvdlformat,"Com pull",enerd->term[F_COM_PULL],dvdl); enerd->term[F_DVDL] += dvdl; } if (!(cr->duty & DUTY_PME)) { cycles_ppdpme = wallcycle_stop(wcycle,ewcPPDURINGPME); dd_cycles_add(cr->dd,cycles_ppdpme,ddCyclPPduringPME); } #ifdef GMX_MPI if (PAR(cr) && !(cr->duty & DUTY_PME)) { /* In case of node-splitting, the PP nodes receive the long-range * forces, virial and energy from the PME nodes here. */ wallcycle_start(wcycle,ewcPP_PMEWAITRECVF); dvdl = 0; gmx_pme_receive_f(cr,fr->f_novirsum,fr->vir_el_recip,&e,&dvdl, &cycles_pme); if (bSepDVDL) fprintf(fplog,sepdvdlformat,"PME mesh",e,dvdl); enerd->term[F_COUL_RECIP] += e; enerd->term[F_DVDL] += dvdl; if (wcycle) dd_cycles_add(cr->dd,cycles_pme,ddCyclPME); wallcycle_stop(wcycle,ewcPP_PMEWAITRECVF); } #endif if (bDoForces && fr->bF_NoVirSum) { if (vsite) { /* Spread the mesh force on virtual sites to the other particles... * This is parallellized. MPI communication is performed * if the constructing atoms aren't local. */ wallcycle_start(wcycle,ewcVSITESPREAD); spread_vsite_f(fplog,vsite,x,fr->f_novirsum,NULL,nrnb, &top->idef,fr->ePBC,fr->bMolPBC,graph,box,cr); wallcycle_stop(wcycle,ewcVSITESPREAD); } /* Now add the forces, this is local */ if (fr->bDomDec) { sum_forces(0,fr->f_novirsum_n,f,fr->f_novirsum); } else { sum_forces(start,start+homenr,f,fr->f_novirsum); } if (EEL_FULL(fr->eeltype)) { /* Add the mesh contribution to the virial */ m_add(vir_force,fr->vir_el_recip,vir_force); } if (debug) pr_rvecs(debug,0,"vir_force",vir_force,DIM); } /* Sum the potential energy terms from group contributions */ sum_epot(&(inputrec->opts),enerd); if (fr->print_force >= 0 && bDoForces) print_large_forces(stderr,mdatoms,cr,step,fr->print_force,x,f); }
void do_shakefirst(FILE *log,bool bTYZ,real lambda,real ener[], t_parm *parm,t_nsborder *nsb,t_mdatoms *md, rvec x[],rvec vold[],rvec buf[],rvec f[], rvec v[],t_graph *graph,t_commrec *cr,t_nrnb *nrnb, t_groups *grps,t_forcerec *fr,t_topology *top, t_edsamyn *edyn,t_pull *pulldata) { int i,m,start,homenr,end,step; tensor shake_vir; double mass,tmass,vcm[4]; real dt=parm->ir.delta_t; real dt_1; if (count_constraints(top,cr)) { start = START(nsb); homenr = HOMENR(nsb); end = start+homenr; if (debug) fprintf(debug,"vcm: start=%d, homenr=%d, end=%d\n",start,homenr,end); /* Do a first SHAKE to reset particles... */ step = -2; if(log) fprintf(log,"\nConstraining the starting coordinates (step %d)\n",step); clear_mat(shake_vir); update(nsb->natoms,start,homenr,step,lambda,&ener[F_DVDL], parm,1.0,md,x,graph, NULL,NULL,vold,NULL,x,top,grps,shake_vir,cr,nrnb,bTYZ, FALSE,edyn,pulldata,FALSE); /* Compute coordinates at t=-dt, store them in buf */ /* for(i=0; (i<nsb->natoms); i++) {*/ for(i=start; (i<end); i++) { for(m=0; (m<DIM); m++) { f[i][m]=x[i][m]; buf[i][m]=x[i][m]-dt*v[i][m]; } } /* Shake the positions at t=-dt with the positions at t=0 * as reference coordinates. */ step = -1; if(log) fprintf(log,"\nConstraining the coordinates at t0-dt (step %d)\n",step); clear_mat(shake_vir); update(nsb->natoms,start,homenr, step,lambda,&ener[F_DVDL],parm,1.0,md,f,graph, NULL,NULL,vold,NULL,buf,top,grps,shake_vir,cr,nrnb,bTYZ,FALSE, edyn,pulldata,FALSE); /* Compute the velocities at t=-dt/2 using the coordinates at * t=-dt and t=0 * Compute velocity of center of mass and total mass */ for(m=0; (m<4); m++) vcm[m] = 0; dt_1=1.0/dt; for(i=start; (i<end); i++) { /*for(i=0; (i<nsb->natoms); i++) {*/ mass = md->massA[i]; for(m=0; (m<DIM); m++) { v[i][m]=(x[i][m]-f[i][m])*dt_1; vcm[m] += v[i][m]*mass; } vcm[3] += mass; } /* Compute the global sum of vcm */ if (debug) fprintf(debug,"vcm: %8.3f %8.3f %8.3f," " total mass = %12.5e\n",vcm[XX],vcm[YY],vcm[ZZ],vcm[3]); if (PAR(cr)) gmx_sumd(4,vcm,cr); tmass = vcm[3]; for(m=0; (m<DIM); m++) vcm[m] /= tmass; if (debug) fprintf(debug,"vcm: %8.3f %8.3f %8.3f," " total mass = %12.5e\n",vcm[XX],vcm[YY],vcm[ZZ],tmass); /* Now we have the velocity of center of mass, let's remove it */ for(i=start; (i<end); i++) { for(m=0; (m<DIM); m++) v[i][m] -= vcm[m]; } } }
/* calculates center of mass of selection index from all coordinates x */ void pull_calc_coms(t_commrec *cr, t_pull *pull, t_mdatoms *md, t_pbc *pbc, double t, rvec x[], rvec *xp) { int g; real twopi_box = 0; if (pull->rbuf == NULL) { snew(pull->rbuf, pull->ngroup); } if (pull->dbuf == NULL) { snew(pull->dbuf, 3*pull->ngroup); } if (pull->bRefAt && pull->bSetPBCatoms) { pull_set_pbcatoms(cr, pull, x, pull->rbuf); if (cr != NULL && DOMAINDECOMP(cr)) { /* We can keep these PBC reference coordinates fixed for nstlist * steps, since atoms won't jump over PBC. * This avoids a global reduction at the next nstlist-1 steps. * Note that the exact values of the pbc reference coordinates * are irrelevant, as long all atoms in the group are within * half a box distance of the reference coordinate. */ pull->bSetPBCatoms = FALSE; } } if (pull->cosdim >= 0) { int m; assert(pull->npbcdim <= DIM); for (m = pull->cosdim+1; m < pull->npbcdim; m++) { if (pbc->box[m][pull->cosdim] != 0) { gmx_fatal(FARGS, "Can not do cosine weighting for trilinic dimensions"); } } twopi_box = 2.0*M_PI/pbc->box[pull->cosdim][pull->cosdim]; } for (g = 0; g < pull->ngroup; g++) { t_pull_group *pgrp; pgrp = &pull->group[g]; if (pgrp->bCalcCOM) { if (pgrp->epgrppbc != epgrppbcCOS) { dvec com, comp; double wmass, wwmass; rvec x_pbc = { 0, 0, 0 }; int i; clear_dvec(com); clear_dvec(comp); wmass = 0; wwmass = 0; if (pgrp->epgrppbc == epgrppbcREFAT) { /* Set the pbc atom */ copy_rvec(pull->rbuf[g], x_pbc); } for (i = 0; i < pgrp->nat_loc; i++) { int ii, m; real mass, wm; ii = pgrp->ind_loc[i]; mass = md->massT[ii]; if (pgrp->weight_loc == NULL) { wm = mass; wmass += wm; } else { real w; w = pgrp->weight_loc[i]; wm = w*mass; wmass += wm; wwmass += wm*w; } if (pgrp->epgrppbc == epgrppbcNONE) { /* Plain COM: sum the coordinates */ for (m = 0; m < DIM; m++) { com[m] += wm*x[ii][m]; } if (xp) { for (m = 0; m < DIM; m++) { comp[m] += wm*xp[ii][m]; } } } else { rvec dx; /* Sum the difference with the reference atom */ pbc_dx(pbc, x[ii], x_pbc, dx); for (m = 0; m < DIM; m++) { com[m] += wm*dx[m]; } if (xp) { /* For xp add the difference between xp and x to dx, * such that we use the same periodic image, * also when xp has a large displacement. */ for (m = 0; m < DIM; m++) { comp[m] += wm*(dx[m] + xp[ii][m] - x[ii][m]); } } } } /* We do this check after the loop above to avoid more nesting. * If we have a single-atom group the mass is irrelevant, so * we can remove the mass factor to avoid division by zero. * Note that with constraint pulling the mass does matter, but * in that case a check group mass != 0 has been done before. */ if (pgrp->nat == 1 && pgrp->nat_loc == 1 && wmass == 0) { int m; /* Copy the single atom coordinate */ for (m = 0; m < DIM; m++) { com[m] = x[pgrp->ind_loc[0]][m]; } /* Set all mass factors to 1 to get the correct COM */ wmass = 1; wwmass = 1; } if (pgrp->weight_loc == NULL) { wwmass = wmass; } /* Copy local sums to a buffer for global summing */ copy_dvec(com, pull->dbuf[g*3]); copy_dvec(comp, pull->dbuf[g*3+1]); pull->dbuf[g*3+2][0] = wmass; pull->dbuf[g*3+2][1] = wwmass; pull->dbuf[g*3+2][2] = 0; } else { /* Cosine weighting geometry */ double cm, sm, cmp, smp, ccm, csm, ssm, csw, snw; int i; cm = 0; sm = 0; cmp = 0; smp = 0; ccm = 0; csm = 0; ssm = 0; for (i = 0; i < pgrp->nat_loc; i++) { int ii; real mass; ii = pgrp->ind_loc[i]; mass = md->massT[ii]; /* Determine cos and sin sums */ csw = cos(x[ii][pull->cosdim]*twopi_box); snw = sin(x[ii][pull->cosdim]*twopi_box); cm += csw*mass; sm += snw*mass; ccm += csw*csw*mass; csm += csw*snw*mass; ssm += snw*snw*mass; if (xp) { csw = cos(xp[ii][pull->cosdim]*twopi_box); snw = sin(xp[ii][pull->cosdim]*twopi_box); cmp += csw*mass; smp += snw*mass; } } /* Copy local sums to a buffer for global summing */ pull->dbuf[g*3 ][0] = cm; pull->dbuf[g*3 ][1] = sm; pull->dbuf[g*3 ][2] = 0; pull->dbuf[g*3+1][0] = ccm; pull->dbuf[g*3+1][1] = csm; pull->dbuf[g*3+1][2] = ssm; pull->dbuf[g*3+2][0] = cmp; pull->dbuf[g*3+2][1] = smp; pull->dbuf[g*3+2][2] = 0; } } } if (cr && PAR(cr)) { /* Sum the contributions over the nodes */ gmx_sumd(pull->ngroup*3*DIM, pull->dbuf[0], cr); } for (g = 0; g < pull->ngroup; g++) { t_pull_group *pgrp; pgrp = &pull->group[g]; if (pgrp->nat > 0 && pgrp->bCalcCOM) { if (pgrp->epgrppbc != epgrppbcCOS) { double wmass, wwmass; int m; /* Determine the inverse mass */ wmass = pull->dbuf[g*3+2][0]; wwmass = pull->dbuf[g*3+2][1]; pgrp->mwscale = 1.0/wmass; /* invtm==0 signals a frozen group, so then we should keep it zero */ if (pgrp->invtm != 0) { pgrp->wscale = wmass/wwmass; pgrp->invtm = wwmass/(wmass*wmass); } /* Divide by the total mass */ for (m = 0; m < DIM; m++) { pgrp->x[m] = pull->dbuf[g*3 ][m]*pgrp->mwscale; if (xp) { pgrp->xp[m] = pull->dbuf[g*3+1][m]*pgrp->mwscale; } if (pgrp->epgrppbc == epgrppbcREFAT) { pgrp->x[m] += pull->rbuf[g][m]; if (xp) { pgrp->xp[m] += pull->rbuf[g][m]; } } } } else { /* Cosine weighting geometry */ double csw, snw, wmass, wwmass; int i, ii; /* Determine the optimal location of the cosine weight */ csw = pull->dbuf[g*3][0]; snw = pull->dbuf[g*3][1]; pgrp->x[pull->cosdim] = atan2_0_2pi(snw, csw)/twopi_box; /* Set the weights for the local atoms */ wmass = sqrt(csw*csw + snw*snw); wwmass = (pull->dbuf[g*3+1][0]*csw*csw + pull->dbuf[g*3+1][1]*csw*snw + pull->dbuf[g*3+1][2]*snw*snw)/(wmass*wmass); pgrp->mwscale = 1.0/wmass; pgrp->wscale = wmass/wwmass; pgrp->invtm = wwmass/(wmass*wmass); /* Set the weights for the local atoms */ csw *= pgrp->invtm; snw *= pgrp->invtm; for (i = 0; i < pgrp->nat_loc; i++) { ii = pgrp->ind_loc[i]; pgrp->weight_loc[i] = csw*cos(twopi_box*x[ii][pull->cosdim]) + snw*sin(twopi_box*x[ii][pull->cosdim]); } if (xp) { csw = pull->dbuf[g*3+2][0]; snw = pull->dbuf[g*3+2][1]; pgrp->xp[pull->cosdim] = atan2_0_2pi(snw, csw)/twopi_box; } } if (debug) { fprintf(debug, "Pull group %d wmass %f invtm %f\n", g, 1.0/pgrp->mwscale, pgrp->invtm); } } } if (pull->bCylinder) { /* Calculate the COMs for the cyclinder reference groups */ make_cyl_refgrps(cr, pull, md, pbc, t, x); } }
static void make_cyl_refgrps(t_commrec *cr, t_pull *pull, t_mdatoms *md, t_pbc *pbc, double t, rvec *x) { /* The size and stride per coord for the reduction buffer */ const int stride = 9; int c, i, ii, m, start, end; rvec g_x, dx, dir; double inv_cyl_r2; t_pull_coord *pcrd; t_pull_group *pref, *pgrp, *pdyna; gmx_ga2la_t ga2la = NULL; if (pull->dbuf_cyl == NULL) { snew(pull->dbuf_cyl, pull->ncoord*stride); } if (cr && DOMAINDECOMP(cr)) { ga2la = cr->dd->ga2la; } start = 0; end = md->homenr; inv_cyl_r2 = 1/dsqr(pull->cylinder_r); /* loop over all groups to make a reference group for each*/ for (c = 0; c < pull->ncoord; c++) { double sum_a, wmass, wwmass; dvec radf_fac0, radf_fac1; pcrd = &pull->coord[c]; sum_a = 0; wmass = 0; wwmass = 0; clear_dvec(radf_fac0); clear_dvec(radf_fac1); if (pcrd->eGeom == epullgCYL) { /* pref will be the same group for all pull coordinates */ pref = &pull->group[pcrd->group[0]]; pgrp = &pull->group[pcrd->group[1]]; pdyna = &pull->dyna[c]; copy_rvec(pcrd->vec, dir); pdyna->nat_loc = 0; /* We calculate distances with respect to the reference location * of this cylinder group (g_x), which we already have now since * we reduced the other group COM over the ranks. This resolves * any PBC issues and we don't need to use a PBC-atom here. */ for (m = 0; m < DIM; m++) { g_x[m] = pgrp->x[m] - pcrd->vec[m]*(pcrd->init + pcrd->rate*t); } /* loop over all atoms in the main ref group */ for (i = 0; i < pref->nat; i++) { ii = pref->ind[i]; if (ga2la) { if (!ga2la_get_home(ga2la, pref->ind[i], &ii)) { ii = -1; } } if (ii >= start && ii < end) { double dr2, dr2_rel, inp; dvec dr; pbc_dx_aiuc(pbc, x[ii], g_x, dx); inp = iprod(dir, dx); dr2 = 0; for (m = 0; m < DIM; m++) { /* Determine the radial components */ dr[m] = dx[m] - inp*dir[m]; dr2 += dr[m]*dr[m]; } dr2_rel = dr2*inv_cyl_r2; if (dr2_rel < 1) { double mass, weight, dweight_r; dvec mdw; /* add to index, to sum of COM, to weight array */ if (pdyna->nat_loc >= pdyna->nalloc_loc) { pdyna->nalloc_loc = over_alloc_large(pdyna->nat_loc+1); srenew(pdyna->ind_loc, pdyna->nalloc_loc); srenew(pdyna->weight_loc, pdyna->nalloc_loc); srenew(pdyna->mdw, pdyna->nalloc_loc); srenew(pdyna->dv, pdyna->nalloc_loc); } pdyna->ind_loc[pdyna->nat_loc] = ii; mass = md->massT[ii]; /* The radial weight function is 1-2x^2+x^4, * where x=r/cylinder_r. Since this function depends * on the radial component, we also get radial forces * on both groups. */ weight = 1 + (-2 + dr2_rel)*dr2_rel; dweight_r = (-4 + 4*dr2_rel)*inv_cyl_r2; pdyna->weight_loc[pdyna->nat_loc] = weight; sum_a += mass*weight*inp; wmass += mass*weight; wwmass += mass*weight*weight; dsvmul(mass*dweight_r, dr, mdw); copy_dvec(mdw, pdyna->mdw[pdyna->nat_loc]); /* Currently we only have the axial component of the * distance (inp) up to an unkown offset. We add this * offset after the reduction needs to determine the * COM of the cylinder group. */ pdyna->dv[pdyna->nat_loc] = inp; for (m = 0; m < DIM; m++) { radf_fac0[m] += mdw[m]; radf_fac1[m] += mdw[m]*inp; } pdyna->nat_loc++; } } } } pull->dbuf_cyl[c*stride+0] = wmass; pull->dbuf_cyl[c*stride+1] = wwmass; pull->dbuf_cyl[c*stride+2] = sum_a; pull->dbuf_cyl[c*stride+3] = radf_fac0[XX]; pull->dbuf_cyl[c*stride+4] = radf_fac0[YY]; pull->dbuf_cyl[c*stride+5] = radf_fac0[ZZ]; pull->dbuf_cyl[c*stride+6] = radf_fac1[XX]; pull->dbuf_cyl[c*stride+7] = radf_fac1[YY]; pull->dbuf_cyl[c*stride+8] = radf_fac1[ZZ]; } if (cr != NULL && PAR(cr)) { /* Sum the contributions over the ranks */ gmx_sumd(pull->ncoord*stride, pull->dbuf_cyl, cr); } for (c = 0; c < pull->ncoord; c++) { pcrd = &pull->coord[c]; if (pcrd->eGeom == epullgCYL) { double wmass, wwmass, inp, dist; pdyna = &pull->dyna[c]; pgrp = &pull->group[pcrd->group[1]]; wmass = pull->dbuf_cyl[c*stride+0]; wwmass = pull->dbuf_cyl[c*stride+1]; pdyna->mwscale = 1.0/wmass; /* Cylinder pulling can't be used with constraints, but we set * wscale and invtm anyhow, in case someone would like to use them. */ pdyna->wscale = wmass/wwmass; pdyna->invtm = wwmass/(wmass*wmass); /* We store the deviation of the COM from the reference location * used above, since we need it when we apply the radial forces * to the atoms in the cylinder group. */ pcrd->cyl_dev = 0; for (m = 0; m < DIM; m++) { g_x[m] = pgrp->x[m] - pcrd->vec[m]*(pcrd->init + pcrd->rate*t); dist = -pcrd->vec[m]*pull->dbuf_cyl[c*stride+2]*pdyna->mwscale; pdyna->x[m] = g_x[m] - dist; pcrd->cyl_dev += dist; } /* Now we know the exact COM of the cylinder reference group, * we can determine the radial force factor (ffrad) that when * multiplied with the axial pull force will give the radial * force on the pulled (non-cylinder) group. */ for (m = 0; m < DIM; m++) { pcrd->ffrad[m] = (pull->dbuf_cyl[c*stride+6+m] + pull->dbuf_cyl[c*stride+3+m]*pcrd->cyl_dev)/wmass; } if (debug) { fprintf(debug, "Pull cylinder group %d:%8.3f%8.3f%8.3f m:%8.3f\n", c, pdyna->x[0], pdyna->x[1], pdyna->x[2], 1.0/pdyna->invtm); fprintf(debug, "ffrad %8.3f %8.3f %8.3f\n", pcrd->ffrad[XX], pcrd->ffrad[YY], pcrd->ffrad[ZZ]); } } } }