//! Construct a reduction mask for which parts (blocks) of the force array are touched on which thread task static void calc_bonded_reduction_mask(int natoms, f_thread_t *f_thread, const t_idef *idef, int thread, int nthread) { static_assert(BITMASK_SIZE == GMX_OPENMP_MAX_THREADS, "For the error message below we assume these two are equal."); if (nthread > BITMASK_SIZE) { #pragma omp master gmx_fatal(FARGS, "You are using %d OpenMP threads, which is larger than GMX_OPENMP_MAX_THREADS (%d). Decrease the number of OpenMP threads or rebuild GROMACS with a larger value for GMX_OPENMP_MAX_THREADS.", nthread, GMX_OPENMP_MAX_THREADS); #pragma omp barrier } GMX_ASSERT(nthread <= BITMASK_SIZE, "We need at least nthread bits in the mask"); int nblock = (natoms + reduction_block_size - 1) >> reduction_block_bits; if (nblock > f_thread->block_nalloc) { f_thread->block_nalloc = over_alloc_large(nblock); srenew(f_thread->mask, f_thread->block_nalloc); srenew(f_thread->block_index, f_thread->block_nalloc); sfree_aligned(f_thread->f); snew_aligned(f_thread->f, f_thread->block_nalloc*reduction_block_size, 128); } gmx_bitmask_t *mask = f_thread->mask; for (int b = 0; b < nblock; b++) { bitmask_clear(&mask[b]); } for (int ftype = 0; ftype < F_NRE; ftype++) { if (ftype_is_bonded_potential(ftype)) { int nb = idef->il[ftype].nr; if (nb > 0) { int nat1 = interaction_function[ftype].nratoms + 1; int nb0 = idef->il_thread_division[ftype*(nthread + 1) + thread]; int nb1 = idef->il_thread_division[ftype*(nthread + 1) + thread + 1]; for (int i = nb0; i < nb1; i += nat1) { for (int a = 1; a < nat1; a++) { bitmask_set_bit(&mask[idef->il[ftype].iatoms[i+a] >> reduction_block_bits], thread); } } } } }
static void make_cyl_refgrps(t_commrec *cr, t_pull *pull, t_mdatoms *md, t_pbc *pbc, double t, rvec *x, rvec *xp) { int c, i, ii, m, start, end; rvec g_x, dx, dir; double r0_2, sum_a, sum_ap, dr2, mass, weight, wmass, wwmass, inp; t_pull_coord *pcrd; t_pull_group *pref, *pgrp, *pdyna; gmx_ga2la_t ga2la = NULL; if (pull->dbuf_cyl == NULL) { snew(pull->dbuf_cyl, pull->ncoord*4); } if (cr && DOMAINDECOMP(cr)) { ga2la = cr->dd->ga2la; } start = 0; end = md->homenr; r0_2 = dsqr(pull->cyl_r0); /* loop over all groups to make a reference group for each*/ for (c = 0; c < pull->ncoord; c++) { pcrd = &pull->coord[c]; /* pref will be the same group for all pull coordinates */ pref = &pull->group[pcrd->group[0]]; pgrp = &pull->group[pcrd->group[1]]; pdyna = &pull->dyna[c]; copy_rvec(pcrd->vec, dir); sum_a = 0; sum_ap = 0; wmass = 0; wwmass = 0; pdyna->nat_loc = 0; for (m = 0; m < DIM; m++) { g_x[m] = pgrp->x[m] - pcrd->vec[m]*(pcrd->init + pcrd->rate*t); } /* loop over all atoms in the main ref group */ for (i = 0; i < pref->nat; i++) { ii = pref->ind[i]; if (ga2la) { if (!ga2la_get_home(ga2la, pref->ind[i], &ii)) { ii = -1; } } if (ii >= start && ii < end) { pbc_dx_aiuc(pbc, x[ii], g_x, dx); inp = iprod(dir, dx); dr2 = 0; for (m = 0; m < DIM; m++) { dr2 += dsqr(dx[m] - inp*dir[m]); } if (dr2 < r0_2) { /* add to index, to sum of COM, to weight array */ if (pdyna->nat_loc >= pdyna->nalloc_loc) { pdyna->nalloc_loc = over_alloc_large(pdyna->nat_loc+1); srenew(pdyna->ind_loc, pdyna->nalloc_loc); srenew(pdyna->weight_loc, pdyna->nalloc_loc); } pdyna->ind_loc[pdyna->nat_loc] = ii; mass = md->massT[ii]; weight = get_weight(sqrt(dr2), pull->cyl_r1, pull->cyl_r0); pdyna->weight_loc[pdyna->nat_loc] = weight; sum_a += mass*weight*inp; if (xp) { pbc_dx_aiuc(pbc, xp[ii], g_x, dx); inp = iprod(dir, dx); sum_ap += mass*weight*inp; } wmass += mass*weight; wwmass += mass*sqr(weight); pdyna->nat_loc++; } } } pull->dbuf_cyl[c*4+0] = wmass; pull->dbuf_cyl[c*4+1] = wwmass; pull->dbuf_cyl[c*4+2] = sum_a; pull->dbuf_cyl[c*4+3] = sum_ap; } if (cr && PAR(cr)) { /* Sum the contributions over the nodes */ gmx_sumd(pull->ncoord*4, pull->dbuf_cyl, cr); } for (c = 0; c < pull->ncoord; c++) { pcrd = &pull->coord[c]; pdyna = &pull->dyna[c]; pgrp = &pull->group[pcrd->group[1]]; wmass = pull->dbuf_cyl[c*4+0]; wwmass = pull->dbuf_cyl[c*4+1]; pdyna->wscale = wmass/wwmass; pdyna->invtm = 1.0/(pdyna->wscale*wmass); for (m = 0; m < DIM; m++) { g_x[m] = pgrp->x[m] - pcrd->vec[m]*(pcrd->init + pcrd->rate*t); pdyna->x[m] = g_x[m] + pcrd->vec[m]*pull->dbuf_cyl[c*4+2]/wmass; if (xp) { pdyna->xp[m] = g_x[m] + pcrd->vec[m]*pull->dbuf_cyl[c*4+3]/wmass; } } if (debug) { fprintf(debug, "Pull cylinder group %d:%8.3f%8.3f%8.3f m:%8.3f\n", c, pdyna->x[0], pdyna->x[1], pdyna->x[2], 1.0/pdyna->invtm); } } }
int setup_specat_communication(gmx_domdec_t *dd, ind_req_t *ireq, gmx_domdec_specat_comm_t *spac, gmx_hash_t *ga2la_specat, int at_start, int vbuf_fac, const char *specat_type, const char *add_err) { int nsend[2], nlast, nsend_zero[2] = {0, 0}, *nsend_ptr; int d, dim, ndir, dir, nr, ns, i, nrecv_local, n0, start, indr, ind, buf[2]; int nat_tot_specat, nat_tot_prev, nalloc_old; gmx_bool bPBC; gmx_specatsend_t *spas; if (debug) { fprintf(debug, "Begin setup_specat_communication for %s\n", specat_type); } /* nsend[0]: the number of atoms requested by this node only, * we communicate this for more efficients checks * nsend[1]: the total number of requested atoms */ nsend[0] = ireq->n; nsend[1] = nsend[0]; nlast = nsend[1]; for (d = dd->ndim-1; d >= 0; d--) { /* Pulse the grid forward and backward */ dim = dd->dim[d]; bPBC = (dim < dd->npbcdim); if (dd->nc[dim] == 2) { /* Only 2 cells, so we only need to communicate once */ ndir = 1; } else { ndir = 2; } for (dir = 0; dir < ndir; dir++) { if (!bPBC && dd->nc[dim] > 2 && ((dir == 0 && dd->ci[dim] == dd->nc[dim] - 1) || (dir == 1 && dd->ci[dim] == 0))) { /* No pbc: the fist/last cell should not request atoms */ nsend_ptr = nsend_zero; } else { nsend_ptr = nsend; } /* Communicate the number of indices */ dd_sendrecv_int(dd, d, dir == 0 ? dddirForward : dddirBackward, nsend_ptr, 2, spac->nreq[d][dir], 2); nr = spac->nreq[d][dir][1]; if (nlast+nr > ireq->nalloc) { ireq->nalloc = over_alloc_dd(nlast+nr); srenew(ireq->ind, ireq->nalloc); } /* Communicate the indices */ dd_sendrecv_int(dd, d, dir == 0 ? dddirForward : dddirBackward, ireq->ind, nsend_ptr[1], ireq->ind+nlast, nr); nlast += nr; } nsend[1] = nlast; } if (debug) { fprintf(debug, "Communicated the counts\n"); } /* Search for the requested atoms and communicate the indices we have */ nat_tot_specat = at_start; nrecv_local = 0; for (d = 0; d < dd->ndim; d++) { /* Pulse the grid forward and backward */ if (dd->dim[d] >= dd->npbcdim || dd->nc[dd->dim[d]] > 2) { ndir = 2; } else { ndir = 1; } nat_tot_prev = nat_tot_specat; for (dir = ndir-1; dir >= 0; dir--) { if (nat_tot_specat > spac->bSendAtom_nalloc) { nalloc_old = spac->bSendAtom_nalloc; spac->bSendAtom_nalloc = over_alloc_dd(nat_tot_specat); srenew(spac->bSendAtom, spac->bSendAtom_nalloc); for (i = nalloc_old; i < spac->bSendAtom_nalloc; i++) { spac->bSendAtom[i] = FALSE; } } spas = &spac->spas[d][dir]; n0 = spac->nreq[d][dir][0]; nr = spac->nreq[d][dir][1]; if (debug) { fprintf(debug, "dim=%d, dir=%d, searching for %d atoms\n", d, dir, nr); } start = nlast - nr; spas->nsend = 0; nsend[0] = 0; for (i = 0; i < nr; i++) { indr = ireq->ind[start+i]; ind = -1; /* Check if this is a home atom and if so ind will be set */ if (!ga2la_get_home(dd->ga2la, indr, &ind)) { /* Search in the communicated atoms */ ind = gmx_hash_get_minone(ga2la_specat, indr); } if (ind >= 0) { if (i < n0 || !spac->bSendAtom[ind]) { if (spas->nsend+1 > spas->a_nalloc) { spas->a_nalloc = over_alloc_large(spas->nsend+1); srenew(spas->a, spas->a_nalloc); } /* Store the local index so we know which coordinates * to send out later. */ spas->a[spas->nsend] = ind; spac->bSendAtom[ind] = TRUE; if (spas->nsend+1 > spac->ibuf_nalloc) { spac->ibuf_nalloc = over_alloc_large(spas->nsend+1); srenew(spac->ibuf, spac->ibuf_nalloc); } /* Store the global index so we can send it now */ spac->ibuf[spas->nsend] = indr; if (i < n0) { nsend[0]++; } spas->nsend++; } } } nlast = start; /* Clear the local flags */ for (i = 0; i < spas->nsend; i++) { spac->bSendAtom[spas->a[i]] = FALSE; } /* Send and receive the number of indices to communicate */ nsend[1] = spas->nsend; dd_sendrecv_int(dd, d, dir == 0 ? dddirBackward : dddirForward, nsend, 2, buf, 2); if (debug) { fprintf(debug, "Send to rank %d, %d (%d) indices, " "receive from rank %d, %d (%d) indices\n", dd->neighbor[d][1-dir], nsend[1], nsend[0], dd->neighbor[d][dir], buf[1], buf[0]); if (gmx_debug_at) { for (i = 0; i < spas->nsend; i++) { fprintf(debug, " %d", spac->ibuf[i]+1); } fprintf(debug, "\n"); } } nrecv_local += buf[0]; spas->nrecv = buf[1]; if (nat_tot_specat + spas->nrecv > dd->gatindex_nalloc) { dd->gatindex_nalloc = over_alloc_dd(nat_tot_specat + spas->nrecv); srenew(dd->gatindex, dd->gatindex_nalloc); } /* Send and receive the indices */ dd_sendrecv_int(dd, d, dir == 0 ? dddirBackward : dddirForward, spac->ibuf, spas->nsend, dd->gatindex+nat_tot_specat, spas->nrecv); nat_tot_specat += spas->nrecv; } /* Allocate the x/f communication buffers */ ns = spac->spas[d][0].nsend; nr = spac->spas[d][0].nrecv; if (ndir == 2) { ns += spac->spas[d][1].nsend; nr += spac->spas[d][1].nrecv; } if (vbuf_fac*ns > spac->vbuf_nalloc) { spac->vbuf_nalloc = over_alloc_dd(vbuf_fac*ns); srenew(spac->vbuf, spac->vbuf_nalloc); } if (vbuf_fac == 2 && vbuf_fac*nr > spac->vbuf2_nalloc) { spac->vbuf2_nalloc = over_alloc_dd(vbuf_fac*nr); srenew(spac->vbuf2, spac->vbuf2_nalloc); } /* Make a global to local index for the communication atoms */ for (i = nat_tot_prev; i < nat_tot_specat; i++) { gmx_hash_change_or_set(ga2la_specat, dd->gatindex[i], i); } } /* Check that in the end we got the number of atoms we asked for */ if (nrecv_local != ireq->n) { if (debug) { fprintf(debug, "Requested %d, received %d (tot recv %d)\n", ireq->n, nrecv_local, nat_tot_specat-at_start); if (gmx_debug_at) { for (i = 0; i < ireq->n; i++) { ind = gmx_hash_get_minone(ga2la_specat, ireq->ind[i]); fprintf(debug, " %s%d", (ind >= 0) ? "" : "!", ireq->ind[i]+1); } fprintf(debug, "\n"); } } fprintf(stderr, "\nDD cell %d %d %d: Neighboring cells do not have atoms:", dd->ci[XX], dd->ci[YY], dd->ci[ZZ]); for (i = 0; i < ireq->n; i++) { if (gmx_hash_get_minone(ga2la_specat, ireq->ind[i]) < 0) { fprintf(stderr, " %d", ireq->ind[i]+1); } } fprintf(stderr, "\n"); gmx_fatal(FARGS, "DD cell %d %d %d could only obtain %d of the %d atoms that are connected via %ss from the neighboring cells. This probably means your %s lengths are too long compared to the domain decomposition cell size. Decrease the number of domain decomposition grid cells%s%s.", dd->ci[XX], dd->ci[YY], dd->ci[ZZ], nrecv_local, ireq->n, specat_type, specat_type, add_err, dd_dlb_is_on(dd) ? " or use the -rcon option of mdrun" : ""); } spac->at_start = at_start; spac->at_end = nat_tot_specat; if (debug) { fprintf(debug, "Done setup_specat_communication\n"); } return nat_tot_specat; }
int dd_make_local_vsites(gmx_domdec_t *dd, int at_start, t_ilist *lil) { gmx_domdec_specat_comm_t *spac; ind_req_t *ireq; gmx_hash_t ga2la_specat; int ftype, nral, i, j, a; t_ilist *lilf; t_iatom *iatoms; int at_end; spac = dd->vsite_comm; ireq = &spac->ireq[0]; ga2la_specat = dd->ga2la_vsite; ireq->n = 0; /* Loop over all the home vsites */ for (ftype = 0; ftype < F_NRE; ftype++) { if (interaction_function[ftype].flags & IF_VSITE) { nral = NRAL(ftype); lilf = &lil[ftype]; for (i = 0; i < lilf->nr; i += 1+nral) { iatoms = lilf->iatoms + i; /* Check if we have the other atoms */ for (j = 1; j < 1+nral; j++) { if (iatoms[j] < 0) { /* This is not a home atom, * we need to ask our neighbors. */ a = -iatoms[j] - 1; /* Check to not ask for the same atom more than once */ if (gmx_hash_get_minone(dd->ga2la_vsite, a) == -1) { /* Add this non-home atom to the list */ if (ireq->n+1 > ireq->nalloc) { ireq->nalloc = over_alloc_large(ireq->n+1); srenew(ireq->ind, ireq->nalloc); } ireq->ind[ireq->n++] = a; /* Temporarily mark with -2, * we get the index later. */ gmx_hash_set(ga2la_specat, a, -2); } } } } } } at_end = setup_specat_communication(dd, ireq, dd->vsite_comm, ga2la_specat, at_start, 1, "vsite", ""); /* Fill in the missing indices */ for (ftype = 0; ftype < F_NRE; ftype++) { if (interaction_function[ftype].flags & IF_VSITE) { nral = NRAL(ftype); lilf = &lil[ftype]; for (i = 0; i < lilf->nr; i += 1+nral) { iatoms = lilf->iatoms + i; for (j = 1; j < 1+nral; j++) { if (iatoms[j] < 0) { iatoms[j] = gmx_hash_get_minone(ga2la_specat, -iatoms[j]-1); } } } } } return at_end; }
void gmx_sort_ilist_fe(t_idef *idef, const real *qA, const real *qB) { int ftype, nral, i, ic, ib, a; t_iparams *iparams; t_ilist *ilist; t_iatom *iatoms; gmx_bool bPert; t_iatom *iabuf; int iabuf_nalloc; if (qB == NULL) { qB = qA; } iabuf_nalloc = 0; iabuf = NULL; iparams = idef->iparams; for (ftype = 0; ftype < F_NRE; ftype++) { if (interaction_function[ftype].flags & IF_BOND) { ilist = &idef->il[ftype]; iatoms = ilist->iatoms; nral = NRAL(ftype); ic = 0; ib = 0; i = 0; while (i < ilist->nr) { /* Check if this interaction is perturbed */ if (ip_q_pert(ftype, iatoms+i, iparams, qA, qB)) { /* Copy to the perturbed buffer */ if (ib + 1 + nral > iabuf_nalloc) { iabuf_nalloc = over_alloc_large(ib+1+nral); srenew(iabuf, iabuf_nalloc); } for (a = 0; a < 1+nral; a++) { iabuf[ib++] = iatoms[i++]; } } else { /* Copy in place */ for (a = 0; a < 1+nral; a++) { iatoms[ic++] = iatoms[i++]; } } } /* Now we now the number of non-perturbed interactions */ ilist->nr_nonperturbed = ic; /* Copy the buffer with perturbed interactions to the ilist */ for (a = 0; a < ib; a++) { iatoms[ic++] = iabuf[a]; } if (debug) { fprintf(debug, "%s non-pert %d pert %d\n", interaction_function[ftype].longname, ilist->nr_nonperturbed, ilist->nr-ilist->nr_nonperturbed); } } } sfree(iabuf); idef->ilsort = ilsortFE_SORTED; }
int dd_make_local_constraints(gmx_domdec_t *dd, int at_start, const struct gmx_mtop_t *mtop, const int *cginfo, gmx_constr_t constr, int nrec, t_ilist *il_local) { gmx_domdec_constraints_t *dc; t_ilist *ilc_local, *ils_local; ind_req_t *ireq; const t_blocka *at2con_mt; const int **at2settle_mt; gmx_hash_t *ga2la_specat; int at_end, i, j; t_iatom *iap; // This code should not be called unless this condition is true, // because that's the only time init_domdec_constraints is // called... GMX_RELEASE_ASSERT(dd->bInterCGcons || dd->bInterCGsettles, "dd_make_local_constraints called when there are no local constraints"); // ... and init_domdec_constraints always sets // dd->constraint_comm... GMX_RELEASE_ASSERT(dd->constraint_comm, "Invalid use of dd_make_local_constraints before construction of constraint_comm"); // ... which static analysis needs to be reassured about, because // otherwise, when dd->bInterCGsettles is // true. dd->constraint_comm is unilaterally dereferenced before // the call to atoms_to_settles. dc = dd->constraints; ilc_local = &il_local[F_CONSTR]; ils_local = &il_local[F_SETTLE]; dc->ncon = 0; ilc_local->nr = 0; if (dd->constraint_comm) { at2con_mt = atom2constraints_moltype(constr); ireq = &dd->constraint_comm->ireq[0]; ireq->n = 0; } else { // Currently unreachable at2con_mt = NULL; ireq = NULL; } if (dd->bInterCGsettles) { at2settle_mt = atom2settle_moltype(constr); ils_local->nr = 0; } else { /* Settle works inside charge groups, we assigned them already */ at2settle_mt = NULL; } if (at2settle_mt == NULL) { atoms_to_constraints(dd, mtop, cginfo, at2con_mt, nrec, ilc_local, ireq); } else { int t0_set; int thread; /* Do the constraints, if present, on the first thread. * Do the settles on all other threads. */ t0_set = ((at2con_mt != NULL && dc->nthread > 1) ? 1 : 0); #pragma omp parallel for num_threads(dc->nthread) schedule(static) for (thread = 0; thread < dc->nthread; thread++) { try { if (at2con_mt && thread == 0) { atoms_to_constraints(dd, mtop, cginfo, at2con_mt, nrec, ilc_local, ireq); } if (thread >= t0_set) { int cg0, cg1; t_ilist *ilst; ind_req_t *ireqt; /* Distribute the settle check+assignments over * dc->nthread or dc->nthread-1 threads. */ cg0 = (dd->ncg_home*(thread-t0_set ))/(dc->nthread-t0_set); cg1 = (dd->ncg_home*(thread-t0_set+1))/(dc->nthread-t0_set); if (thread == t0_set) { ilst = ils_local; } else { ilst = &dc->ils[thread]; } ilst->nr = 0; ireqt = &dd->constraint_comm->ireq[thread]; if (thread > 0) { ireqt->n = 0; } atoms_to_settles(dd, mtop, cginfo, at2settle_mt, cg0, cg1, ilst, ireqt); } } GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR; } /* Combine the generate settles and requested indices */ for (thread = 1; thread < dc->nthread; thread++) { t_ilist *ilst; ind_req_t *ireqt; int ia; if (thread > t0_set) { ilst = &dc->ils[thread]; if (ils_local->nr + ilst->nr > ils_local->nalloc) { ils_local->nalloc = over_alloc_large(ils_local->nr + ilst->nr); srenew(ils_local->iatoms, ils_local->nalloc); } for (ia = 0; ia < ilst->nr; ia++) { ils_local->iatoms[ils_local->nr+ia] = ilst->iatoms[ia]; } ils_local->nr += ilst->nr; } ireqt = &dd->constraint_comm->ireq[thread]; if (ireq->n+ireqt->n > ireq->nalloc) { ireq->nalloc = over_alloc_large(ireq->n+ireqt->n); srenew(ireq->ind, ireq->nalloc); } for (ia = 0; ia < ireqt->n; ia++) { ireq->ind[ireq->n+ia] = ireqt->ind[ia]; } ireq->n += ireqt->n; } if (debug) { fprintf(debug, "Settles: total %3d\n", ils_local->nr/4); } } if (dd->constraint_comm) { int nral1; at_end = setup_specat_communication(dd, ireq, dd->constraint_comm, dd->constraints->ga2la, at_start, 2, "constraint", " or lincs-order"); /* Fill in the missing indices */ ga2la_specat = dd->constraints->ga2la; nral1 = 1 + NRAL(F_CONSTR); for (i = 0; i < ilc_local->nr; i += nral1) { iap = ilc_local->iatoms + i; for (j = 1; j < nral1; j++) { if (iap[j] < 0) { iap[j] = gmx_hash_get_minone(ga2la_specat, -iap[j]-1); } } } nral1 = 1 + NRAL(F_SETTLE); for (i = 0; i < ils_local->nr; i += nral1) { iap = ils_local->iatoms + i; for (j = 1; j < nral1; j++) { if (iap[j] < 0) { iap[j] = gmx_hash_get_minone(ga2la_specat, -iap[j]-1); } } } } else { // Currently unreachable at_end = at_start; } return at_end; }
/*! \brief Looks up constraint for the local atoms */ static void atoms_to_constraints(gmx_domdec_t *dd, const gmx_mtop_t *mtop, const int *cginfo, const t_blocka *at2con_mt, int nrec, t_ilist *ilc_local, ind_req_t *ireq) { const t_blocka *at2con; gmx_ga2la_t *ga2la; gmx_mtop_atomlookup_t alook; int ncon1; gmx_molblock_t *molb; t_iatom *ia1, *ia2, *iap; int nhome, cg, a, a_gl, a_mol, a_loc, b_lo, offset, mb, molnr, b_mol, i, con, con_offset; gmx_domdec_constraints_t *dc; gmx_domdec_specat_comm_t *dcc; dc = dd->constraints; dcc = dd->constraint_comm; ga2la = dd->ga2la; alook = gmx_mtop_atomlookup_init(mtop); nhome = 0; for (cg = 0; cg < dd->ncg_home; cg++) { if (GET_CGINFO_CONSTR(cginfo[cg])) { for (a = dd->cgindex[cg]; a < dd->cgindex[cg+1]; a++) { a_gl = dd->gatindex[a]; gmx_mtop_atomnr_to_molblock_ind(alook, a_gl, &mb, &molnr, &a_mol); molb = &mtop->molblock[mb]; ncon1 = mtop->moltype[molb->type].ilist[F_CONSTR].nr/NRAL(F_SETTLE); ia1 = mtop->moltype[molb->type].ilist[F_CONSTR].iatoms; ia2 = mtop->moltype[molb->type].ilist[F_CONSTRNC].iatoms; /* Calculate the global constraint number offset for the molecule. * This is only required for the global index to make sure * that we use each constraint only once. */ con_offset = dc->molb_con_offset[mb] + molnr*dc->molb_ncon_mol[mb]; /* The global atom number offset for this molecule */ offset = a_gl - a_mol; at2con = &at2con_mt[molb->type]; for (i = at2con->index[a_mol]; i < at2con->index[a_mol+1]; i++) { con = at2con->a[i]; iap = constr_iatomptr(ncon1, ia1, ia2, con); if (a_mol == iap[1]) { b_mol = iap[2]; } else { b_mol = iap[1]; } if (ga2la_get_home(ga2la, offset+b_mol, &a_loc)) { /* Add this fully home constraint at the first atom */ if (a_mol < b_mol) { if (dc->ncon+1 > dc->con_nalloc) { dc->con_nalloc = over_alloc_large(dc->ncon+1); srenew(dc->con_gl, dc->con_nalloc); srenew(dc->con_nlocat, dc->con_nalloc); } dc->con_gl[dc->ncon] = con_offset + con; dc->con_nlocat[dc->ncon] = 2; if (ilc_local->nr + 3 > ilc_local->nalloc) { ilc_local->nalloc = over_alloc_dd(ilc_local->nr + 3); srenew(ilc_local->iatoms, ilc_local->nalloc); } b_lo = a_loc; ilc_local->iatoms[ilc_local->nr++] = iap[0]; ilc_local->iatoms[ilc_local->nr++] = (a_gl == iap[1] ? a : b_lo); ilc_local->iatoms[ilc_local->nr++] = (a_gl == iap[1] ? b_lo : a ); dc->ncon++; nhome++; } } else { /* We need the nrec constraints coupled to this constraint, * so we need to walk out of the home cell by nrec+1 atoms, * since already atom bg is not locally present. * Therefore we call walk_out with nrec recursions to go * after this first call. */ walk_out(con, con_offset, b_mol, offset, nrec, ncon1, ia1, ia2, at2con, dd->ga2la, TRUE, dc, dcc, ilc_local, ireq); } } } } } gmx_mtop_atomlookup_destroy(alook); if (debug) { fprintf(debug, "Constraints: home %3d border %3d atoms: %3d\n", nhome, dc->ncon-nhome, dd->constraint_comm ? ireq->n : 0); } }
/*! \brief Looks up SETTLE constraints for a range of charge-groups */ static void atoms_to_settles(gmx_domdec_t *dd, const gmx_mtop_t *mtop, const int *cginfo, const int **at2settle_mt, int cg_start, int cg_end, t_ilist *ils_local, ind_req_t *ireq) { gmx_ga2la_t *ga2la; gmx_mtop_atomlookup_t alook; int settle; int nral, sa; int cg, a, a_gl, a_glsa, a_gls[3], a_locs[3]; int mb, molnr, a_mol, offset; const gmx_molblock_t *molb; const t_iatom *ia1; gmx_bool a_home[3]; int nlocal; gmx_bool bAssign; ga2la = dd->ga2la; alook = gmx_mtop_atomlookup_settle_init(mtop); nral = NRAL(F_SETTLE); for (cg = cg_start; cg < cg_end; cg++) { if (GET_CGINFO_SETTLE(cginfo[cg])) { for (a = dd->cgindex[cg]; a < dd->cgindex[cg+1]; a++) { a_gl = dd->gatindex[a]; gmx_mtop_atomnr_to_molblock_ind(alook, a_gl, &mb, &molnr, &a_mol); molb = &mtop->molblock[mb]; settle = at2settle_mt[molb->type][a_mol]; if (settle >= 0) { offset = a_gl - a_mol; ia1 = mtop->moltype[molb->type].ilist[F_SETTLE].iatoms; bAssign = FALSE; nlocal = 0; for (sa = 0; sa < nral; sa++) { a_glsa = offset + ia1[settle*(1+nral)+1+sa]; a_gls[sa] = a_glsa; a_home[sa] = ga2la_get_home(ga2la, a_glsa, &a_locs[sa]); if (a_home[sa]) { if (nlocal == 0 && a_gl == a_glsa) { bAssign = TRUE; } nlocal++; } } if (bAssign) { if (ils_local->nr+1+nral > ils_local->nalloc) { ils_local->nalloc = over_alloc_dd(ils_local->nr+1+nral); srenew(ils_local->iatoms, ils_local->nalloc); } ils_local->iatoms[ils_local->nr++] = ia1[settle*4]; for (sa = 0; sa < nral; sa++) { if (ga2la_get_home(ga2la, a_gls[sa], &a_locs[sa])) { ils_local->iatoms[ils_local->nr++] = a_locs[sa]; } else { ils_local->iatoms[ils_local->nr++] = -a_gls[sa] - 1; /* Add this non-home atom to the list */ if (ireq->n+1 > ireq->nalloc) { ireq->nalloc = over_alloc_large(ireq->n+1); srenew(ireq->ind, ireq->nalloc); } ireq->ind[ireq->n++] = a_gls[sa]; /* A check on double atom requests is * not required for settle. */ } } } } } } } gmx_mtop_atomlookup_destroy(alook); }
/*! \brief Walks over the constraints out from the local atoms into the non-local atoms and adds them to a list */ static void walk_out(int con, int con_offset, int a, int offset, int nrec, int ncon1, const t_iatom *ia1, const t_iatom *ia2, const t_blocka *at2con, const gmx_ga2la_t *ga2la, gmx_bool bHomeConnect, gmx_domdec_constraints_t *dc, gmx_domdec_specat_comm_t *dcc, t_ilist *il_local, ind_req_t *ireq) { int a1_gl, a2_gl, a_loc, i, coni, b; const t_iatom *iap; if (dc->gc_req[con_offset+con] == 0) { /* Add this non-home constraint to the list */ if (dc->ncon+1 > dc->con_nalloc) { dc->con_nalloc = over_alloc_large(dc->ncon+1); srenew(dc->con_gl, dc->con_nalloc); srenew(dc->con_nlocat, dc->con_nalloc); } dc->con_gl[dc->ncon] = con_offset + con; dc->con_nlocat[dc->ncon] = (bHomeConnect ? 1 : 0); dc->gc_req[con_offset+con] = 1; if (il_local->nr + 3 > il_local->nalloc) { il_local->nalloc = over_alloc_dd(il_local->nr+3); srenew(il_local->iatoms, il_local->nalloc); } iap = constr_iatomptr(ncon1, ia1, ia2, con); il_local->iatoms[il_local->nr++] = iap[0]; a1_gl = offset + iap[1]; a2_gl = offset + iap[2]; /* The following indexing code can probably be optizimed */ if (ga2la_get_home(ga2la, a1_gl, &a_loc)) { il_local->iatoms[il_local->nr++] = a_loc; } else { /* We set this index later */ il_local->iatoms[il_local->nr++] = -a1_gl - 1; } if (ga2la_get_home(ga2la, a2_gl, &a_loc)) { il_local->iatoms[il_local->nr++] = a_loc; } else { /* We set this index later */ il_local->iatoms[il_local->nr++] = -a2_gl - 1; } dc->ncon++; } /* Check to not ask for the same atom more than once */ if (gmx_hash_get_minone(dc->ga2la, offset+a) == -1) { assert(dcc); /* Add this non-home atom to the list */ if (ireq->n+1 > ireq->nalloc) { ireq->nalloc = over_alloc_large(ireq->n+1); srenew(ireq->ind, ireq->nalloc); } ireq->ind[ireq->n++] = offset + a; /* Temporarily mark with -2, we get the index later */ gmx_hash_set(dc->ga2la, offset+a, -2); } if (nrec > 0) { for (i = at2con->index[a]; i < at2con->index[a+1]; i++) { coni = at2con->a[i]; if (coni != con) { /* Walk further */ iap = constr_iatomptr(ncon1, ia1, ia2, coni); if (a == iap[1]) { b = iap[2]; } else { b = iap[1]; } if (!ga2la_get_home(ga2la, offset+b, &a_loc)) { walk_out(coni, con_offset, b, offset, nrec-1, ncon1, ia1, ia2, at2con, ga2la, FALSE, dc, dcc, il_local, ireq); } } } } }
static void make_cyl_refgrps(t_commrec *cr, struct pull_t *pull, t_mdatoms *md, t_pbc *pbc, double t, rvec *x) { /* The size and stride per coord for the reduction buffer */ const int stride = 9; int c, i, ii, m, start, end; rvec g_x, dx, dir; double inv_cyl_r2; pull_comm_t *comm; gmx_ga2la_t *ga2la = NULL; comm = &pull->comm; if (comm->dbuf_cyl == NULL) { snew(comm->dbuf_cyl, pull->ncoord*stride); } if (cr && DOMAINDECOMP(cr)) { ga2la = cr->dd->ga2la; } start = 0; end = md->homenr; inv_cyl_r2 = 1.0/gmx::square(pull->params.cylinder_r); /* loop over all groups to make a reference group for each*/ for (c = 0; c < pull->ncoord; c++) { pull_coord_work_t *pcrd; double sum_a, wmass, wwmass; dvec radf_fac0, radf_fac1; pcrd = &pull->coord[c]; sum_a = 0; wmass = 0; wwmass = 0; clear_dvec(radf_fac0); clear_dvec(radf_fac1); if (pcrd->params.eGeom == epullgCYL) { pull_group_work_t *pref, *pgrp, *pdyna; /* pref will be the same group for all pull coordinates */ pref = &pull->group[pcrd->params.group[0]]; pgrp = &pull->group[pcrd->params.group[1]]; pdyna = &pull->dyna[c]; copy_rvec(pcrd->vec, dir); pdyna->nat_loc = 0; /* We calculate distances with respect to the reference location * of this cylinder group (g_x), which we already have now since * we reduced the other group COM over the ranks. This resolves * any PBC issues and we don't need to use a PBC-atom here. */ if (pcrd->params.rate != 0) { /* With rate=0, value_ref is set initially */ pcrd->value_ref = pcrd->params.init + pcrd->params.rate*t; } for (m = 0; m < DIM; m++) { g_x[m] = pgrp->x[m] - pcrd->vec[m]*pcrd->value_ref; } /* loop over all atoms in the main ref group */ for (i = 0; i < pref->params.nat; i++) { ii = pref->params.ind[i]; if (ga2la) { if (!ga2la_get_home(ga2la, pref->params.ind[i], &ii)) { ii = -1; } } if (ii >= start && ii < end) { double dr2, dr2_rel, inp; dvec dr; pbc_dx_aiuc(pbc, x[ii], g_x, dx); inp = iprod(dir, dx); dr2 = 0; for (m = 0; m < DIM; m++) { /* Determine the radial components */ dr[m] = dx[m] - inp*dir[m]; dr2 += dr[m]*dr[m]; } dr2_rel = dr2*inv_cyl_r2; if (dr2_rel < 1) { double mass, weight, dweight_r; dvec mdw; /* add to index, to sum of COM, to weight array */ if (pdyna->nat_loc >= pdyna->nalloc_loc) { pdyna->nalloc_loc = over_alloc_large(pdyna->nat_loc+1); srenew(pdyna->ind_loc, pdyna->nalloc_loc); srenew(pdyna->weight_loc, pdyna->nalloc_loc); srenew(pdyna->mdw, pdyna->nalloc_loc); srenew(pdyna->dv, pdyna->nalloc_loc); } pdyna->ind_loc[pdyna->nat_loc] = ii; mass = md->massT[ii]; /* The radial weight function is 1-2x^2+x^4, * where x=r/cylinder_r. Since this function depends * on the radial component, we also get radial forces * on both groups. */ weight = 1 + (-2 + dr2_rel)*dr2_rel; dweight_r = (-4 + 4*dr2_rel)*inv_cyl_r2; pdyna->weight_loc[pdyna->nat_loc] = weight; sum_a += mass*weight*inp; wmass += mass*weight; wwmass += mass*weight*weight; dsvmul(mass*dweight_r, dr, mdw); copy_dvec(mdw, pdyna->mdw[pdyna->nat_loc]); /* Currently we only have the axial component of the * distance (inp) up to an unkown offset. We add this * offset after the reduction needs to determine the * COM of the cylinder group. */ pdyna->dv[pdyna->nat_loc] = inp; for (m = 0; m < DIM; m++) { radf_fac0[m] += mdw[m]; radf_fac1[m] += mdw[m]*inp; } pdyna->nat_loc++; } } } } comm->dbuf_cyl[c*stride+0] = wmass; comm->dbuf_cyl[c*stride+1] = wwmass; comm->dbuf_cyl[c*stride+2] = sum_a; comm->dbuf_cyl[c*stride+3] = radf_fac0[XX]; comm->dbuf_cyl[c*stride+4] = radf_fac0[YY]; comm->dbuf_cyl[c*stride+5] = radf_fac0[ZZ]; comm->dbuf_cyl[c*stride+6] = radf_fac1[XX]; comm->dbuf_cyl[c*stride+7] = radf_fac1[YY]; comm->dbuf_cyl[c*stride+8] = radf_fac1[ZZ]; } if (cr != NULL && PAR(cr)) { /* Sum the contributions over the ranks */ pull_reduce_double(cr, comm, pull->ncoord*stride, comm->dbuf_cyl); } for (c = 0; c < pull->ncoord; c++) { pull_coord_work_t *pcrd; pcrd = &pull->coord[c]; if (pcrd->params.eGeom == epullgCYL) { pull_group_work_t *pdyna, *pgrp; double wmass, wwmass, dist; pdyna = &pull->dyna[c]; pgrp = &pull->group[pcrd->params.group[1]]; wmass = comm->dbuf_cyl[c*stride+0]; wwmass = comm->dbuf_cyl[c*stride+1]; pdyna->mwscale = 1.0/wmass; /* Cylinder pulling can't be used with constraints, but we set * wscale and invtm anyhow, in case someone would like to use them. */ pdyna->wscale = wmass/wwmass; pdyna->invtm = wwmass/(wmass*wmass); /* We store the deviation of the COM from the reference location * used above, since we need it when we apply the radial forces * to the atoms in the cylinder group. */ pcrd->cyl_dev = 0; for (m = 0; m < DIM; m++) { g_x[m] = pgrp->x[m] - pcrd->vec[m]*pcrd->value_ref; dist = -pcrd->vec[m]*comm->dbuf_cyl[c*stride+2]*pdyna->mwscale; pdyna->x[m] = g_x[m] - dist; pcrd->cyl_dev += dist; } /* Now we know the exact COM of the cylinder reference group, * we can determine the radial force factor (ffrad) that when * multiplied with the axial pull force will give the radial * force on the pulled (non-cylinder) group. */ for (m = 0; m < DIM; m++) { pcrd->ffrad[m] = (comm->dbuf_cyl[c*stride+6+m] + comm->dbuf_cyl[c*stride+3+m]*pcrd->cyl_dev)/wmass; } if (debug) { fprintf(debug, "Pull cylinder group %d:%8.3f%8.3f%8.3f m:%8.3f\n", c, pdyna->x[0], pdyna->x[1], pdyna->x[2], 1.0/pdyna->invtm); fprintf(debug, "ffrad %8.3f %8.3f %8.3f\n", pcrd->ffrad[XX], pcrd->ffrad[YY], pcrd->ffrad[ZZ]); } } } }
int dd_make_local_constraints(gmx_domdec_t *dd,int at_start, const gmx_mtop_t *mtop, const int *cginfo, gmx_constr_t constr,int nrec, t_ilist *il_local) { gmx_domdec_constraints_t *dc; t_ilist *ilc_local,*ils_local; ind_req_t *ireq; const t_blocka *at2con_mt; const int **at2settle_mt; gmx_hash_t ga2la_specat; int at_end,i,j; t_iatom *iap; dc = dd->constraints; ilc_local = &il_local[F_CONSTR]; ils_local = &il_local[F_SETTLE]; dc->ncon = 0; ilc_local->nr = 0; if (dd->constraint_comm) { at2con_mt = atom2constraints_moltype(constr); ireq = &dd->constraint_comm->ireq[0]; ireq->n = 0; } else { at2con_mt = NULL; ireq = NULL; } if (dd->bInterCGsettles) { at2settle_mt = atom2settle_moltype(constr); ils_local->nr = 0; } else { /* Settle works inside charge groups, we assigned them already */ at2settle_mt = NULL; } if (at2settle_mt == NULL) { atoms_to_constraints(dd,mtop,cginfo,at2con_mt,nrec, ilc_local,ireq); } else { int t0_set; int thread; /* Do the constraints, if present, on the first thread. * Do the settles on all other threads. */ t0_set = ((at2con_mt != NULL && dc->nthread > 1) ? 1 : 0); #pragma omp parallel for num_threads(dc->nthread) schedule(static) for(thread=0; thread<dc->nthread; thread++) { if (at2con_mt && thread == 0) { atoms_to_constraints(dd,mtop,cginfo,at2con_mt,nrec, ilc_local,ireq); } if (thread >= t0_set) { int cg0,cg1; t_ilist *ilst; ind_req_t *ireqt; /* Distribute the settle check+assignments over * dc->nthread or dc->nthread-1 threads. */ cg0 = (dd->ncg_home*(thread-t0_set ))/(dc->nthread-t0_set); cg1 = (dd->ncg_home*(thread-t0_set+1))/(dc->nthread-t0_set); if (thread == t0_set) { ilst = ils_local; } else { ilst = &dc->ils[thread]; } ilst->nr = 0; ireqt = &dd->constraint_comm->ireq[thread]; if (thread > 0) { ireqt->n = 0; } atoms_to_settles(dd,mtop,cginfo,at2settle_mt, cg0,cg1, ilst,ireqt); } } /* Combine the generate settles and requested indices */ for(thread=1; thread<dc->nthread; thread++) { t_ilist *ilst; ind_req_t *ireqt; int ia; if (thread > t0_set) { ilst = &dc->ils[thread]; if (ils_local->nr + ilst->nr > ils_local->nalloc) { ils_local->nalloc = over_alloc_large(ils_local->nr + ilst->nr); srenew(ils_local->iatoms,ils_local->nalloc); } for(ia=0; ia<ilst->nr; ia++) { ils_local->iatoms[ils_local->nr+ia] = ilst->iatoms[ia]; } ils_local->nr += ilst->nr; } ireqt = &dd->constraint_comm->ireq[thread]; if (ireq->n+ireqt->n > ireq->nalloc) { ireq->nalloc = over_alloc_large(ireq->n+ireqt->n); srenew(ireq->ind,ireq->nalloc); } for(ia=0; ia<ireqt->n; ia++) { ireq->ind[ireq->n+ia] = ireqt->ind[ia]; } ireq->n += ireqt->n; } if (debug) { fprintf(debug,"Settles: total %3d\n",ils_local->nr/4); } } if (dd->constraint_comm) { int nral1; at_end = setup_specat_communication(dd,ireq,dd->constraint_comm, dd->constraints->ga2la, at_start,2, "constraint"," or lincs-order"); /* Fill in the missing indices */ ga2la_specat = dd->constraints->ga2la; nral1 = 1 + NRAL(F_CONSTR); for(i=0; i<ilc_local->nr; i+=nral1) { iap = ilc_local->iatoms + i; for(j=1; j<nral1; j++) { if (iap[j] < 0) { iap[j] = gmx_hash_get_minone(ga2la_specat,-iap[j]-1); } } } nral1 = 1 + NRAL(F_SETTLE); for(i=0; i<ils_local->nr; i+=nral1) { iap = ils_local->iatoms + i; for(j=1; j<nral1; j++) { if (iap[j] < 0) { iap[j] = gmx_hash_get_minone(ga2la_specat,-iap[j]-1); } } } } else { at_end = at_start; } return at_end; }
/* Sets up the work division over the threads */ static void lincs_thread_setup(struct gmx_lincsdata *li,int natoms) { lincs_thread_t *li_m; int th; unsigned *atf; int a; if (natoms > li->atf_nalloc) { li->atf_nalloc = over_alloc_large(natoms); srenew(li->atf,li->atf_nalloc); } atf = li->atf; /* Clear the atom flags */ for(a=0; a<natoms; a++) { atf[a] = 0; } for(th=0; th<li->nth; th++) { lincs_thread_t *li_th; int b; li_th = &li->th[th]; /* The constraints are divided equally over the threads */ li_th->b0 = (li->nc* th )/li->nth; li_th->b1 = (li->nc*(th+1))/li->nth; if (th < sizeof(*atf)*8) { /* For each atom set a flag for constraints from each */ for(b=li_th->b0; b<li_th->b1; b++) { atf[li->bla[b*2] ] |= (1U<<th); atf[li->bla[b*2+1]] |= (1U<<th); } } } #pragma omp parallel for num_threads(li->nth) schedule(static) for(th=0; th<li->nth; th++) { lincs_thread_t *li_th; unsigned mask; int b; li_th = &li->th[th]; if (li_th->b1 - li_th->b0 > li_th->ind_nalloc) { li_th->ind_nalloc = over_alloc_large(li_th->b1-li_th->b0); srenew(li_th->ind,li_th->ind_nalloc); srenew(li_th->ind_r,li_th->ind_nalloc); } if (th < sizeof(*atf)*8) { mask = (1U<<th) - 1U; li_th->nind = 0; li_th->nind_r = 0; for(b=li_th->b0; b<li_th->b1; b++) { /* We let the constraint with the lowest thread index * operate on atoms with constraints from multiple threads. */ if (((atf[li->bla[b*2]] & mask) == 0) && ((atf[li->bla[b*2+1]] & mask) == 0)) { /* Add the constraint to the local atom update index */ li_th->ind[li_th->nind++] = b; } else { /* Add the constraint to the rest block */ li_th->ind_r[li_th->nind_r++] = b; } } } else { /* We are out of bits, assign all constraints to rest */ for(b=li_th->b0; b<li_th->b1; b++) { li_th->ind_r[li_th->nind_r++] = b; } } } /* We need to copy all constraints which have not be assigned * to a thread to a separate list which will be handled by one thread. */ li_m = &li->th[li->nth]; li_m->nind = 0; for(th=0; th<li->nth; th++) { lincs_thread_t *li_th; int b; li_th = &li->th[th]; if (li_m->nind + li_th->nind_r > li_m->ind_nalloc) { li_m->ind_nalloc = over_alloc_large(li_m->nind+li_th->nind_r); srenew(li_m->ind,li_m->ind_nalloc); } for(b=0; b<li_th->nind_r; b++) { li_m->ind[li_m->nind++] = li_th->ind_r[b]; } if (debug) { fprintf(debug,"LINCS thread %d: %d constraints\n", th,li_th->nind); } } if (debug) { fprintf(debug,"LINCS thread r: %d constraints\n", li_m->nind); } }