Exemple #1
0
static real rms_force(t_commrec *cr, rvec f[], int ns, t_shell s[],
                      int ndir, real *sf_dir, real *Epot)
{
    int    i, shell, ntot;
    double buf[4];

    buf[0] = *sf_dir;
    for (i = 0; i < ns; i++)
    {
        shell    = s[i].shell;
        buf[0]  += norm2(f[shell]);
    }
    ntot = ns;

    if (PAR(cr))
    {
        buf[1] = ntot;
        buf[2] = *sf_dir;
        buf[3] = *Epot;
        gmx_sumd(4, buf, cr);
        ntot    = (int)(buf[1] + 0.5);
        *sf_dir = buf[2];
        *Epot   = buf[3];
    }
    ntot += ndir;

    return (ntot ? sqrt(buf[0]/ntot) : 0);
}
Exemple #2
0
void sum_bin(t_bin *b, t_commrec *cr)
{
    int i;

    for (i = b->nreal; (i < b->maxreal); i++)
    {
        b->rbuf[i] = 0;
    }
    gmx_sumd(b->maxreal, b->rbuf, cr);
}
Exemple #3
0
/* Get the center from local positions that already have the correct
 * PBC representation */
extern void get_center_comm(
        const t_commrec *cr,
        rvec             x_loc[],      /* Local positions */
        real             weight_loc[], /* Local masses or other weights */
        int              nr_loc,       /* Local number of atoms */
        int              nr_group,     /* Total number of atoms of the group */
        rvec             center)       /* Weighted center */
{
    double weight_sum, denom;
    dvec   dsumvec;
    double buf[4];


    weight_sum = get_sum_of_positions(x_loc, weight_loc, nr_loc, dsumvec);

    /* Add the local contributions from all nodes. Put the sum vector and the
     * weight in a buffer array so that we get along with a single communication
     * call. */
    if (PAR(cr))
    {
        buf[0] = dsumvec[XX];
        buf[1] = dsumvec[YY];
        buf[2] = dsumvec[ZZ];
        buf[3] = weight_sum;

        /* Communicate buffer */
        gmx_sumd(4, buf, cr);

        dsumvec[XX] = buf[0];
        dsumvec[YY] = buf[1];
        dsumvec[ZZ] = buf[2];
        weight_sum  = buf[3];
    }

    if (weight_loc != nullptr)
    {
        denom = 1.0/weight_sum; /* Divide by the sum of weight to get center of mass e.g. */
    }
    else
    {
        denom = 1.0/nr_group;   /* Divide by the number of atoms to get the geometrical center */

    }
    center[XX] = dsumvec[XX]*denom;
    center[YY] = dsumvec[YY]*denom;
    center[ZZ] = dsumvec[ZZ]*denom;
}
Exemple #4
0
static void pull_reduce_double(t_commrec   *cr,
                               pull_comm_t *comm,
                               int          n,
                               double      *data)
{
    if (cr != NULL && PAR(cr))
    {
        if (comm->bParticipateAll)
        {
            /* Sum the contributions over all DD ranks */
            gmx_sumd(n, data, cr);
        }
        else
        {
#if GMX_MPI
#if MPI_IN_PLACE_EXISTS
            MPI_Allreduce(MPI_IN_PLACE, data, n, MPI_DOUBLE, MPI_SUM,
                          comm->mpi_comm_com);
#else
            double *buf;

            snew(buf, n);

            MPI_Allreduce(data, buf, n, MPI_DOUBLE, MPI_SUM,
                          comm->mpi_comm_com);

            /* Copy the result from the buffer to the input/output data */
            for (int i = 0; i < n; i++)
            {
                data[i] = buf[i];
            }
            sfree(buf);
#endif
#else
            gmx_incons("comm->bParticipateAll=FALSE without GMX_MPI");
#endif
        }
    }
}
Exemple #5
0
/* calculates center of mass of selection index from all coordinates x */
void pull_calc_coms(t_commrec *cr,
                    t_pull *pull, t_mdatoms *md, t_pbc *pbc, double t,
                    rvec x[], rvec *xp)
{
    int           g, i, ii, m;
    real          mass, w, wm, twopi_box = 0;
    double        wmass, wwmass, invwmass;
    dvec          com, comp;
    double        cm, sm, cmp, smp, ccm, csm, ssm, csw, snw;
    rvec         *xx[2], x_pbc = {0, 0, 0}, dx;
    t_pull_group *pgrp;

    if (pull->rbuf == NULL)
    {
        snew(pull->rbuf, pull->ngroup);
    }
    if (pull->dbuf == NULL)
    {
        snew(pull->dbuf, 3*pull->ngroup);
    }

    if (pull->bRefAt)
    {
        pull_set_pbcatoms(cr, pull, md, x, pull->rbuf);
    }

    if (pull->cosdim >= 0)
    {
        for (m = pull->cosdim+1; m < pull->npbcdim; m++)
        {
            if (pbc->box[m][pull->cosdim] != 0)
            {
                gmx_fatal(FARGS, "Can not do cosine weighting for trilinic dimensions");
            }
        }
        twopi_box = 2.0*M_PI/pbc->box[pull->cosdim][pull->cosdim];
    }

    for (g = 0; g < pull->ngroup; g++)
    {
        pgrp = &pull->group[g];
        clear_dvec(com);
        clear_dvec(comp);
        wmass  = 0;
        wwmass = 0;
        cm     = 0;
        sm     = 0;
        cmp    = 0;
        smp    = 0;
        ccm    = 0;
        csm    = 0;
        ssm    = 0;
        if (!(g == 0 && PULL_CYL(pull)))
        {
            if (pgrp->epgrppbc == epgrppbcREFAT)
            {
                /* Set the pbc atom */
                copy_rvec(pull->rbuf[g], x_pbc);
            }
            w = 1;
            for (i = 0; i < pgrp->nat_loc; i++)
            {
                ii   = pgrp->ind_loc[i];
                mass = md->massT[ii];
                if (pgrp->epgrppbc != epgrppbcCOS)
                {
                    if (pgrp->weight_loc)
                    {
                        w = pgrp->weight_loc[i];
                    }
                    wm      = w*mass;
                    wmass  += wm;
                    wwmass += wm*w;
                    if (pgrp->epgrppbc == epgrppbcNONE)
                    {
                        /* Plain COM: sum the coordinates */
                        for (m = 0; m < DIM; m++)
                        {
                            com[m]    += wm*x[ii][m];
                        }
                        if (xp)
                        {
                            for (m = 0; m < DIM; m++)
                            {
                                comp[m] += wm*xp[ii][m];
                            }
                        }
                    }
                    else
                    {
                        /* Sum the difference with the reference atom */
                        pbc_dx(pbc, x[ii], x_pbc, dx);
                        for (m = 0; m < DIM; m++)
                        {
                            com[m]    += wm*dx[m];
                        }
                        if (xp)
                        {
                            /* For xp add the difference between xp and x to dx,
                             * such that we use the same periodic image,
                             * also when xp has a large displacement.
                             */
                            for (m = 0; m < DIM; m++)
                            {
                                comp[m] += wm*(dx[m] + xp[ii][m] - x[ii][m]);
                            }
                        }
                    }
                }
                else
                {
                    /* Determine cos and sin sums */
                    csw  = cos(x[ii][pull->cosdim]*twopi_box);
                    snw  = sin(x[ii][pull->cosdim]*twopi_box);
                    cm  += csw*mass;
                    sm  += snw*mass;
                    ccm += csw*csw*mass;
                    csm += csw*snw*mass;
                    ssm += snw*snw*mass;

                    if (xp)
                    {
                        csw  = cos(xp[ii][pull->cosdim]*twopi_box);
                        snw  = sin(xp[ii][pull->cosdim]*twopi_box);
                        cmp += csw*mass;
                        smp += snw*mass;
                    }
                }
            }
        }

        /* Copy local sums to a buffer for global summing */
        switch (pgrp->epgrppbc)
        {
            case epgrppbcNONE:
            case epgrppbcREFAT:
                copy_dvec(com, pull->dbuf[g*3]);
                copy_dvec(comp, pull->dbuf[g*3+1]);
                pull->dbuf[g*3+2][0] = wmass;
                pull->dbuf[g*3+2][1] = wwmass;
                pull->dbuf[g*3+2][2] = 0;
                break;
            case epgrppbcCOS:
                pull->dbuf[g*3  ][0] = cm;
                pull->dbuf[g*3  ][1] = sm;
                pull->dbuf[g*3  ][2] = 0;
                pull->dbuf[g*3+1][0] = ccm;
                pull->dbuf[g*3+1][1] = csm;
                pull->dbuf[g*3+1][2] = ssm;
                pull->dbuf[g*3+2][0] = cmp;
                pull->dbuf[g*3+2][1] = smp;
                pull->dbuf[g*3+2][2] = 0;
                break;
        }
    }

    if (cr && PAR(cr))
    {
        /* Sum the contributions over the nodes */
        gmx_sumd(pull->ngroup*3*DIM, pull->dbuf[0], cr);
    }

    for (g = 0; g < pull->ngroup; g++)
    {
        pgrp = &pull->group[g];
        if (pgrp->nat > 0 && !(g == 0 && PULL_CYL(pull)))
        {
            if (pgrp->epgrppbc != epgrppbcCOS)
            {
                /* Determine the inverse mass */
                wmass    = pull->dbuf[g*3+2][0];
                wwmass   = pull->dbuf[g*3+2][1];
                invwmass = 1/wmass;
                /* invtm==0 signals a frozen group, so then we should keep it zero */
                if (pgrp->invtm > 0)
                {
                    pgrp->wscale = wmass/wwmass;
                    pgrp->invtm  = 1.0/(pgrp->wscale*wmass);
                }
                /* Divide by the total mass */
                for (m = 0; m < DIM; m++)
                {
                    pgrp->x[m]    = pull->dbuf[g*3  ][m]*invwmass;
                    if (xp)
                    {
                        pgrp->xp[m] = pull->dbuf[g*3+1][m]*invwmass;
                    }
                    if (pgrp->epgrppbc == epgrppbcREFAT)
                    {
                        pgrp->x[m]    += pull->rbuf[g][m];
                        if (xp)
                        {
                            pgrp->xp[m] += pull->rbuf[g][m];
                        }
                    }
                }
            }
            else
            {
                /* Determine the optimal location of the cosine weight */
                csw                   = pull->dbuf[g*3][0];
                snw                   = pull->dbuf[g*3][1];
                pgrp->x[pull->cosdim] = atan2_0_2pi(snw, csw)/twopi_box;
                /* Set the weights for the local atoms */
                wmass  = sqrt(csw*csw + snw*snw);
                wwmass = (pull->dbuf[g*3+1][0]*csw*csw +
                          pull->dbuf[g*3+1][1]*csw*snw +
                          pull->dbuf[g*3+1][2]*snw*snw)/(wmass*wmass);
                pgrp->wscale = wmass/wwmass;
                pgrp->invtm  = 1.0/(pgrp->wscale*wmass);
                /* Set the weights for the local atoms */
                csw *= pgrp->invtm;
                snw *= pgrp->invtm;
                for (i = 0; i < pgrp->nat_loc; i++)
                {
                    ii                  = pgrp->ind_loc[i];
                    pgrp->weight_loc[i] = csw*cos(twopi_box*x[ii][pull->cosdim]) +
                        snw*sin(twopi_box*x[ii][pull->cosdim]);
                }
                if (xp)
                {
                    csw                    = pull->dbuf[g*3+2][0];
                    snw                    = pull->dbuf[g*3+2][1];
                    pgrp->xp[pull->cosdim] = atan2_0_2pi(snw, csw)/twopi_box;
                }
            }
            if (debug)
            {
                fprintf(debug, "Pull group %d wmass %f wwmass %f invtm %f\n",
                        g, wmass, wwmass, pgrp->invtm);
            }
        }
    }

    if (PULL_CYL(pull))
    {
        /* Calculate the COMs for the cyclinder reference groups */
        make_cyl_refgrps(cr, pull, md, pbc, t, x, xp);
    }
}
Exemple #6
0
static void make_cyl_refgrps(t_commrec *cr, t_pull *pull, t_mdatoms *md,
                             t_pbc *pbc, double t, rvec *x, rvec *xp)
{
    int           c, i, ii, m, start, end;
    rvec          g_x, dx, dir;
    double        r0_2, sum_a, sum_ap, dr2, mass, weight, wmass, wwmass, inp;
    t_pull_coord *pcrd;
    t_pull_group *pref, *pgrp, *pdyna;
    gmx_ga2la_t   ga2la = NULL;

    if (pull->dbuf_cyl == NULL)
    {
        snew(pull->dbuf_cyl, pull->ncoord*4);
    }

    if (cr && DOMAINDECOMP(cr))
    {
        ga2la = cr->dd->ga2la;
    }

    start = 0;
    end   = md->homenr;

    r0_2 = dsqr(pull->cyl_r0);

    /* loop over all groups to make a reference group for each*/
    for (c = 0; c < pull->ncoord; c++)
    {
        pcrd  = &pull->coord[c];

        /* pref will be the same group for all pull coordinates */
        pref  = &pull->group[pcrd->group[0]];
        pgrp  = &pull->group[pcrd->group[1]];
        pdyna = &pull->dyna[c];
        copy_rvec(pcrd->vec, dir);
        sum_a          = 0;
        sum_ap         = 0;
        wmass          = 0;
        wwmass         = 0;
        pdyna->nat_loc = 0;

        for (m = 0; m < DIM; m++)
        {
            g_x[m] = pgrp->x[m] - pcrd->vec[m]*(pcrd->init + pcrd->rate*t);
        }

        /* loop over all atoms in the main ref group */
        for (i = 0; i < pref->nat; i++)
        {
            ii = pref->ind[i];
            if (ga2la)
            {
                if (!ga2la_get_home(ga2la, pref->ind[i], &ii))
                {
                    ii = -1;
                }
            }
            if (ii >= start && ii < end)
            {
                pbc_dx_aiuc(pbc, x[ii], g_x, dx);
                inp = iprod(dir, dx);
                dr2 = 0;
                for (m = 0; m < DIM; m++)
                {
                    dr2 += dsqr(dx[m] - inp*dir[m]);
                }

                if (dr2 < r0_2)
                {
                    /* add to index, to sum of COM, to weight array */
                    if (pdyna->nat_loc >= pdyna->nalloc_loc)
                    {
                        pdyna->nalloc_loc = over_alloc_large(pdyna->nat_loc+1);
                        srenew(pdyna->ind_loc, pdyna->nalloc_loc);
                        srenew(pdyna->weight_loc, pdyna->nalloc_loc);
                    }
                    pdyna->ind_loc[pdyna->nat_loc] = ii;
                    mass   = md->massT[ii];
                    weight = get_weight(sqrt(dr2), pull->cyl_r1, pull->cyl_r0);
                    pdyna->weight_loc[pdyna->nat_loc] = weight;
                    sum_a += mass*weight*inp;
                    if (xp)
                    {
                        pbc_dx_aiuc(pbc, xp[ii], g_x, dx);
                        inp     = iprod(dir, dx);
                        sum_ap += mass*weight*inp;
                    }
                    wmass  += mass*weight;
                    wwmass += mass*sqr(weight);
                    pdyna->nat_loc++;
                }
            }
        }
        pull->dbuf_cyl[c*4+0] = wmass;
        pull->dbuf_cyl[c*4+1] = wwmass;
        pull->dbuf_cyl[c*4+2] = sum_a;
        pull->dbuf_cyl[c*4+3] = sum_ap;
    }

    if (cr && PAR(cr))
    {
        /* Sum the contributions over the nodes */
        gmx_sumd(pull->ncoord*4, pull->dbuf_cyl, cr);
    }

    for (c = 0; c < pull->ncoord; c++)
    {
        pcrd  = &pull->coord[c];

        pdyna = &pull->dyna[c];
        pgrp  = &pull->group[pcrd->group[1]];

        wmass         = pull->dbuf_cyl[c*4+0];
        wwmass        = pull->dbuf_cyl[c*4+1];
        pdyna->wscale = wmass/wwmass;
        pdyna->invtm  = 1.0/(pdyna->wscale*wmass);

        for (m = 0; m < DIM; m++)
        {
            g_x[m]      = pgrp->x[m] - pcrd->vec[m]*(pcrd->init + pcrd->rate*t);
            pdyna->x[m] = g_x[m] + pcrd->vec[m]*pull->dbuf_cyl[c*4+2]/wmass;
            if (xp)
            {
                pdyna->xp[m] = g_x[m] + pcrd->vec[m]*pull->dbuf_cyl[c*4+3]/wmass;
            }
        }

        if (debug)
        {
            fprintf(debug, "Pull cylinder group %d:%8.3f%8.3f%8.3f m:%8.3f\n",
                    c, pdyna->x[0], pdyna->x[1],
                    pdyna->x[2], 1.0/pdyna->invtm);
        }
    }
}
gmx_bool pme_load_balance(pme_load_balancing_t       pme_lb,
                          t_commrec                 *cr,
                          FILE                      *fp_err,
                          FILE                      *fp_log,
                          t_inputrec                *ir,
                          t_state                   *state,
                          double                     cycles,
                          interaction_const_t       *ic,
                          struct nonbonded_verlet_t *nbv,
                          struct gmx_pme_t **        pmedata,
                          gmx_int64_t                step)
{
    gmx_bool     OK;
    pme_setup_t *set;
    double       cycles_fast;
    char         buf[STRLEN], sbuf[22];
    real         rtab;
    gmx_bool     bUsesSimpleTables = TRUE;

    if (pme_lb->stage == pme_lb->nstage)
    {
        return FALSE;
    }

    if (PAR(cr))
    {
        gmx_sumd(1, &cycles, cr);
        cycles /= cr->nnodes;
    }

    set = &pme_lb->setup[pme_lb->cur];
    set->count++;

    rtab = ir->rlistlong + ir->tabext;

    if (set->count % 2 == 1)
    {
        /* Skip the first cycle, because the first step after a switch
         * is much slower due to allocation and/or caching effects.
         */
        return TRUE;
    }

    sprintf(buf, "step %4s: ", gmx_step_str(step, sbuf));
    print_grid(fp_err, fp_log, buf, "timed with", set, cycles);

    if (set->count <= 2)
    {
        set->cycles = cycles;
    }
    else
    {
        if (cycles*PME_LB_ACCEL_TOL < set->cycles &&
            pme_lb->stage == pme_lb->nstage - 1)
        {
            /* The performance went up a lot (due to e.g. DD load balancing).
             * Add a stage, keep the minima, but rescan all setups.
             */
            pme_lb->nstage++;

            if (debug)
            {
                fprintf(debug, "The performance for grid %d %d %d went from %.3f to %.1f M-cycles, this is more than %f\n"
                        "Increased the number stages to %d"
                        " and ignoring the previous performance\n",
                        set->grid[XX], set->grid[YY], set->grid[ZZ],
                        cycles*1e-6, set->cycles*1e-6, PME_LB_ACCEL_TOL,
                        pme_lb->nstage);
            }
        }
        set->cycles = min(set->cycles, cycles);
    }

    if (set->cycles < pme_lb->setup[pme_lb->fastest].cycles)
    {
        pme_lb->fastest = pme_lb->cur;

        if (DOMAINDECOMP(cr))
        {
            /* We found a new fastest setting, ensure that with subsequent
             * shorter cut-off's the dynamic load balancing does not make
             * the use of the current cut-off impossible. This solution is
             * a trade-off, as the PME load balancing and DD domain size
             * load balancing can interact in complex ways.
             * With the Verlet kernels, DD load imbalance will usually be
             * mainly due to bonded interaction imbalance, which will often
             * quickly push the domain boundaries beyond the limit for the
             * optimal, PME load balanced, cut-off. But it could be that
             * better overal performance can be obtained with a slightly
             * shorter cut-off and better DD load balancing.
             */
            change_dd_dlb_cutoff_limit(cr);
        }
    }
    cycles_fast = pme_lb->setup[pme_lb->fastest].cycles;

    /* Check in stage 0 if we should stop scanning grids.
     * Stop when the time is more than SLOW_FAC longer than the fastest.
     */
    if (pme_lb->stage == 0 && pme_lb->cur > 0 &&
        cycles > pme_lb->setup[pme_lb->fastest].cycles*PME_LB_SLOW_FAC)
    {
        pme_lb->n = pme_lb->cur + 1;
        /* Done with scanning, go to stage 1 */
        switch_to_stage1(pme_lb);
    }

    if (pme_lb->stage == 0)
    {
        int gridsize_start;

        gridsize_start = set->grid[XX]*set->grid[YY]*set->grid[ZZ];

        do
        {
            if (pme_lb->cur+1 < pme_lb->n)
            {
                /* We had already generated the next setup */
                OK = TRUE;
            }
            else
            {
                /* Find the next setup */
                OK = pme_loadbal_increase_cutoff(pme_lb, ir->pme_order, cr->dd);

                if (!OK)
                {
                    pme_lb->elimited = epmelblimPMEGRID;
                }
            }

            if (OK && ir->ePBC != epbcNONE)
            {
                OK = (sqr(pme_lb->setup[pme_lb->cur+1].rlistlong)
                      <= max_cutoff2(ir->ePBC, state->box));
                if (!OK)
                {
                    pme_lb->elimited = epmelblimBOX;
                }
            }

            if (OK)
            {
                pme_lb->cur++;

                if (DOMAINDECOMP(cr))
                {
                    OK = change_dd_cutoff(cr, state, ir,
                                          pme_lb->setup[pme_lb->cur].rlistlong);
                    if (!OK)
                    {
                        /* Failed: do not use this setup */
                        pme_lb->cur--;
                        pme_lb->elimited = epmelblimDD;
                    }
                }
            }
            if (!OK)
            {
                /* We hit the upper limit for the cut-off,
                 * the setup should not go further than cur.
                 */
                pme_lb->n = pme_lb->cur + 1;
                print_loadbal_limited(fp_err, fp_log, step, pme_lb);
                /* Switch to the next stage */
                switch_to_stage1(pme_lb);
            }
        }
        while (OK &&
               !(pme_lb->setup[pme_lb->cur].grid[XX]*
                 pme_lb->setup[pme_lb->cur].grid[YY]*
                 pme_lb->setup[pme_lb->cur].grid[ZZ] <
                 gridsize_start*PME_LB_GRID_SCALE_FAC
                 &&
                 pme_lb->setup[pme_lb->cur].grid_efficiency <
                 pme_lb->setup[pme_lb->cur-1].grid_efficiency*PME_LB_GRID_EFFICIENCY_REL_FAC));
    }

    if (pme_lb->stage > 0 && pme_lb->end == 1)
    {
        pme_lb->cur   = 0;
        pme_lb->stage = pme_lb->nstage;
    }
    else if (pme_lb->stage > 0 && pme_lb->end > 1)
    {
        /* If stage = nstage-1:
         *   scan over all setups, rerunning only those setups
         *   which are not much slower than the fastest
         * else:
         *   use the next setup
         */
        do
        {
            pme_lb->cur++;
            if (pme_lb->cur == pme_lb->end)
            {
                pme_lb->stage++;
                pme_lb->cur = pme_lb->start;
            }
        }
        while (pme_lb->stage == pme_lb->nstage - 1 &&
               pme_lb->setup[pme_lb->cur].count > 0 &&
               pme_lb->setup[pme_lb->cur].cycles > cycles_fast*PME_LB_SLOW_FAC);

        if (pme_lb->stage == pme_lb->nstage)
        {
            /* We are done optimizing, use the fastest setup we found */
            pme_lb->cur = pme_lb->fastest;
        }
    }

    if (DOMAINDECOMP(cr) && pme_lb->stage > 0)
    {
        OK = change_dd_cutoff(cr, state, ir, pme_lb->setup[pme_lb->cur].rlistlong);
        if (!OK)
        {
            /* Failsafe solution */
            if (pme_lb->cur > 1 && pme_lb->stage == pme_lb->nstage)
            {
                pme_lb->stage--;
            }
            pme_lb->fastest  = 0;
            pme_lb->start    = 0;
            pme_lb->end      = pme_lb->cur;
            pme_lb->cur      = pme_lb->start;
            pme_lb->elimited = epmelblimDD;
            print_loadbal_limited(fp_err, fp_log, step, pme_lb);
        }
    }

    /* Change the Coulomb cut-off and the PME grid */

    set = &pme_lb->setup[pme_lb->cur];

    ic->rcoulomb     = set->rcut_coulomb;
    ic->rlist        = set->rlist;
    ic->rlistlong    = set->rlistlong;
    ir->nstcalclr    = set->nstcalclr;
    ic->ewaldcoeff_q = set->ewaldcoeff_q;
    /* TODO: centralize the code that sets the potentials shifts */
    if (ic->coulomb_modifier == eintmodPOTSHIFT)
    {
        ic->sh_ewald = gmx_erfc(ic->ewaldcoeff_q*ic->rcoulomb);
    }
    if (EVDW_PME(ic->vdwtype))
    {
        /* We have PME for both Coulomb and VdW, set rvdw equal to rcoulomb */
        ic->rvdw            = set->rcut_coulomb;
        ic->ewaldcoeff_lj   = set->ewaldcoeff_lj;
        if (ic->vdw_modifier == eintmodPOTSHIFT)
        {
            real crc2;

            ic->dispersion_shift.cpot = -pow(ic->rvdw, -6.0);
            ic->repulsion_shift.cpot  = -pow(ic->rvdw, -12.0);
            ic->sh_invrc6             = -ic->dispersion_shift.cpot;
            crc2                      = sqr(ic->ewaldcoeff_lj*ic->rvdw);
            ic->sh_lj_ewald           = (exp(-crc2)*(1 + crc2 + 0.5*crc2*crc2) - 1)*pow(ic->rvdw, -6.0);
        }
    }

    bUsesSimpleTables = uses_simple_tables(ir->cutoff_scheme, nbv, 0);
    nbnxn_gpu_pme_loadbal_update_param(nbv, ic);

    /* With tMPI + GPUs some ranks may be sharing GPU(s) and therefore
     * also sharing texture references. To keep the code simple, we don't
     * treat texture references as shared resources, but this means that
     * the coulomb_tab texture ref will get updated by multiple threads.
     * Hence, to ensure that the non-bonded kernels don't start before all
     * texture binding operations are finished, we need to wait for all ranks
     * to arrive here before continuing.
     *
     * Note that we could omit this barrier if GPUs are not shared (or
     * texture objects are used), but as this is initialization code, there
     * is not point in complicating things.
     */
#ifdef GMX_THREAD_MPI
    if (PAR(cr) && use_GPU(nbv))
    {
        gmx_barrier(cr);
    }
#endif  /* GMX_THREAD_MPI */

    /* Usually we won't need the simple tables with GPUs.
     * But we do with hybrid acceleration and with free energy.
     * To avoid bugs, we always re-initialize the simple tables here.
     */
    init_interaction_const_tables(NULL, ic, bUsesSimpleTables, rtab);

    if (cr->duty & DUTY_PME)
    {
        if (pme_lb->setup[pme_lb->cur].pmedata == NULL)
        {
            /* Generate a new PME data structure,
             * copying part of the old pointers.
             */
            gmx_pme_reinit(&set->pmedata,
                           cr, pme_lb->setup[0].pmedata, ir,
                           set->grid);
        }
        *pmedata = set->pmedata;
    }
    else
    {
        /* Tell our PME-only node to switch grid */
        gmx_pme_send_switchgrid(cr, set->grid, set->ewaldcoeff_q, set->ewaldcoeff_lj);
    }

    if (debug)
    {
        print_grid(NULL, debug, "", "switched to", set, -1);
    }

    if (pme_lb->stage == pme_lb->nstage)
    {
        print_grid(fp_err, fp_log, "", "optimal", set, -1);
    }

    return TRUE;
}
Exemple #8
0
static void calc_cgcm_av_stddev(t_block *cgs, int n, rvec *x, rvec av, rvec stddev,
                                t_commrec *cr_sum)
{
    int   *cgindex;
    dvec   s1, s2;
    double buf[7];
    int    cg, d, k0, k1, k, nrcg;
    real   inv_ncg;
    rvec   cg_cm;

    clear_dvec(s1);
    clear_dvec(s2);

    cgindex = cgs->index;
    for (cg = 0; cg < n; cg++)
    {
        k0      = cgindex[cg];
        k1      = cgindex[cg+1];
        nrcg    = k1 - k0;
        if (nrcg == 1)
        {
            copy_rvec(x[k0], cg_cm);
        }
        else
        {
            inv_ncg = 1.0/nrcg;

            clear_rvec(cg_cm);
            for (k = k0; (k < k1); k++)
            {
                rvec_inc(cg_cm, x[k]);
            }
            for (d = 0; (d < DIM); d++)
            {
                cg_cm[d] *= inv_ncg;
            }
        }
        for (d = 0; d < DIM; d++)
        {
            s1[d] += cg_cm[d];
            s2[d] += cg_cm[d]*cg_cm[d];
        }
    }

    if (cr_sum != NULL)
    {
        for (d = 0; d < DIM; d++)
        {
            buf[d]     = s1[d];
            buf[DIM+d] = s2[d];
        }
        buf[6] = n;
        gmx_sumd(7, buf, cr_sum);
        for (d = 0; d < DIM; d++)
        {
            s1[d] = buf[d];
            s2[d] = buf[DIM+d];
        }
        n = (int)(buf[6] + 0.5);
    }

    dsvmul(1.0/n, s1, s1);
    dsvmul(1.0/n, s2, s2);

    for (d = 0; d < DIM; d++)
    {
        av[d]     = s1[d];
        stddev[d] = sqrt(s2[d] - s1[d]*s1[d]);
    }
}
Exemple #9
0
double do_tpi(FILE *fplog, t_commrec *cr,
              int nfile, const t_filenm fnm[],
              const output_env_t oenv, gmx_bool bVerbose, gmx_bool gmx_unused bCompact,
              int gmx_unused nstglobalcomm,
              gmx_vsite_t gmx_unused *vsite, gmx_constr_t gmx_unused constr,
              int gmx_unused stepout,
              t_inputrec *inputrec,
              gmx_mtop_t *top_global, t_fcdata *fcd,
              t_state *state,
              t_mdatoms *mdatoms,
              t_nrnb *nrnb, gmx_wallcycle_t wcycle,
              gmx_edsam_t gmx_unused ed,
              t_forcerec *fr,
              int gmx_unused repl_ex_nst, int gmx_unused repl_ex_nex, int gmx_unused repl_ex_seed,
              gmx_membed_t gmx_unused membed,
              real gmx_unused cpt_period, real gmx_unused max_hours,
              const char gmx_unused *deviceOptions,
              int gmx_unused imdport,
              unsigned long gmx_unused Flags,
              gmx_walltime_accounting_t walltime_accounting)
{
    const char     *TPI = "Test Particle Insertion";
    gmx_localtop_t *top;
    gmx_groups_t   *groups;
    gmx_enerdata_t *enerd;
    rvec           *f;
    real            lambda, t, temp, beta, drmax, epot;
    double          embU, sum_embU, *sum_UgembU, V, V_all, VembU_all;
    t_trxstatus    *status;
    t_trxframe      rerun_fr;
    gmx_bool        bDispCorr, bCharge, bRFExcl, bNotLastFrame, bStateChanged, bNS;
    tensor          force_vir, shake_vir, vir, pres;
    int             cg_tp, a_tp0, a_tp1, ngid, gid_tp, nener, e;
    rvec           *x_mol;
    rvec            mu_tot, x_init, dx, x_tp;
    int             nnodes, frame;
    gmx_int64_t     frame_step_prev, frame_step;
    gmx_int64_t     nsteps, stepblocksize = 0, step;
    gmx_int64_t     rnd_count_stride, rnd_count;
    gmx_int64_t     seed;
    double          rnd[4];
    int             i, start, end;
    FILE           *fp_tpi = NULL;
    char           *ptr, *dump_pdb, **leg, str[STRLEN], str2[STRLEN];
    double          dbl, dump_ener;
    gmx_bool        bCavity;
    int             nat_cavity  = 0, d;
    real           *mass_cavity = NULL, mass_tot;
    int             nbin;
    double          invbinw, *bin, refvolshift, logV, bUlogV;
    real            dvdl, prescorr, enercorr, dvdlcorr;
    gmx_bool        bEnergyOutOfBounds;
    const char     *tpid_leg[2] = {"direct", "reweighted"};

    /* Since there is no upper limit to the insertion energies,
     * we need to set an upper limit for the distribution output.
     */
    real bU_bin_limit      = 50;
    real bU_logV_bin_limit = bU_bin_limit + 10;

    nnodes = cr->nnodes;

    top = gmx_mtop_generate_local_top(top_global, inputrec);

    groups = &top_global->groups;

    bCavity = (inputrec->eI == eiTPIC);
    if (bCavity)
    {
        ptr = getenv("GMX_TPIC_MASSES");
        if (ptr == NULL)
        {
            nat_cavity = 1;
        }
        else
        {
            /* Read (multiple) masses from env var GMX_TPIC_MASSES,
             * The center of mass of the last atoms is then used for TPIC.
             */
            nat_cavity = 0;
            while (sscanf(ptr, "%lf%n", &dbl, &i) > 0)
            {
                srenew(mass_cavity, nat_cavity+1);
                mass_cavity[nat_cavity] = dbl;
                fprintf(fplog, "mass[%d] = %f\n",
                        nat_cavity+1, mass_cavity[nat_cavity]);
                nat_cavity++;
                ptr += i;
            }
            if (nat_cavity == 0)
            {
                gmx_fatal(FARGS, "Found %d masses in GMX_TPIC_MASSES", nat_cavity);
            }
        }
    }

    /*
       init_em(fplog,TPI,inputrec,&lambda,nrnb,mu_tot,
       state->box,fr,mdatoms,top,cr,nfile,fnm,NULL,NULL);*/
    /* We never need full pbc for TPI */
    fr->ePBC = epbcXYZ;
    /* Determine the temperature for the Boltzmann weighting */
    temp = inputrec->opts.ref_t[0];
    if (fplog)
    {
        for (i = 1; (i < inputrec->opts.ngtc); i++)
        {
            if (inputrec->opts.ref_t[i] != temp)
            {
                fprintf(fplog, "\nWARNING: The temperatures of the different temperature coupling groups are not identical\n\n");
                fprintf(stderr, "\nWARNING: The temperatures of the different temperature coupling groups are not identical\n\n");
            }
        }
        fprintf(fplog,
                "\n  The temperature for test particle insertion is %.3f K\n\n",
                temp);
    }
    beta = 1.0/(BOLTZ*temp);

    /* Number of insertions per frame */
    nsteps = inputrec->nsteps;

    /* Use the same neighborlist with more insertions points
     * in a sphere of radius drmax around the initial point
     */
    /* This should be a proper mdp parameter */
    drmax = inputrec->rtpi;

    /* An environment variable can be set to dump all configurations
     * to pdb with an insertion energy <= this value.
     */
    dump_pdb  = getenv("GMX_TPI_DUMP");
    dump_ener = 0;
    if (dump_pdb)
    {
        sscanf(dump_pdb, "%lf", &dump_ener);
    }

    atoms2md(top_global, inputrec, 0, NULL, top_global->natoms, mdatoms);
    update_mdatoms(mdatoms, inputrec->fepvals->init_lambda);

    snew(enerd, 1);
    init_enerdata(groups->grps[egcENER].nr, inputrec->fepvals->n_lambda, enerd);
    snew(f, top_global->natoms);

    /* Print to log file  */
    walltime_accounting_start(walltime_accounting);
    wallcycle_start(wcycle, ewcRUN);
    print_start(fplog, cr, walltime_accounting, "Test Particle Insertion");

    /* The last charge group is the group to be inserted */
    cg_tp = top->cgs.nr - 1;
    a_tp0 = top->cgs.index[cg_tp];
    a_tp1 = top->cgs.index[cg_tp+1];
    if (debug)
    {
        fprintf(debug, "TPI cg %d, atoms %d-%d\n", cg_tp, a_tp0, a_tp1);
    }
    if (a_tp1 - a_tp0 > 1 &&
        (inputrec->rlist < inputrec->rcoulomb ||
         inputrec->rlist < inputrec->rvdw))
    {
        gmx_fatal(FARGS, "Can not do TPI for multi-atom molecule with a twin-range cut-off");
    }
    snew(x_mol, a_tp1-a_tp0);

    bDispCorr = (inputrec->eDispCorr != edispcNO);
    bCharge   = FALSE;
    for (i = a_tp0; i < a_tp1; i++)
    {
        /* Copy the coordinates of the molecule to be insterted */
        copy_rvec(state->x[i], x_mol[i-a_tp0]);
        /* Check if we need to print electrostatic energies */
        bCharge |= (mdatoms->chargeA[i] != 0 ||
                    (mdatoms->chargeB && mdatoms->chargeB[i] != 0));
    }
    bRFExcl = (bCharge && EEL_RF(fr->eeltype) && fr->eeltype != eelRF_NEC);

    calc_cgcm(fplog, cg_tp, cg_tp+1, &(top->cgs), state->x, fr->cg_cm);
    if (bCavity)
    {
        if (norm(fr->cg_cm[cg_tp]) > 0.5*inputrec->rlist && fplog)
        {
            fprintf(fplog, "WARNING: Your TPI molecule is not centered at 0,0,0\n");
            fprintf(stderr, "WARNING: Your TPI molecule is not centered at 0,0,0\n");
        }
    }
    else
    {
        /* Center the molecule to be inserted at zero */
        for (i = 0; i < a_tp1-a_tp0; i++)
        {
            rvec_dec(x_mol[i], fr->cg_cm[cg_tp]);
        }
    }

    if (fplog)
    {
        fprintf(fplog, "\nWill insert %d atoms %s partial charges\n",
                a_tp1-a_tp0, bCharge ? "with" : "without");

        fprintf(fplog, "\nWill insert %d times in each frame of %s\n",
                (int)nsteps, opt2fn("-rerun", nfile, fnm));
    }

    if (!bCavity)
    {
        if (inputrec->nstlist > 1)
        {
            if (drmax == 0 && a_tp1-a_tp0 == 1)
            {
                gmx_fatal(FARGS, "Re-using the neighborlist %d times for insertions of a single atom in a sphere of radius %f does not make sense", inputrec->nstlist, drmax);
            }
            if (fplog)
            {
                fprintf(fplog, "Will use the same neighborlist for %d insertions in a sphere of radius %f\n", inputrec->nstlist, drmax);
            }
        }
    }
    else
    {
        if (fplog)
        {
            fprintf(fplog, "Will insert randomly in a sphere of radius %f around the center of the cavity\n", drmax);
        }
    }

    ngid   = groups->grps[egcENER].nr;
    gid_tp = GET_CGINFO_GID(fr->cginfo[cg_tp]);
    nener  = 1 + ngid;
    if (bDispCorr)
    {
        nener += 1;
    }
    if (bCharge)
    {
        nener += ngid;
        if (bRFExcl)
        {
            nener += 1;
        }
        if (EEL_FULL(fr->eeltype))
        {
            nener += 1;
        }
    }
    snew(sum_UgembU, nener);

    /* Copy the random seed set by the user */
    seed = inputrec->ld_seed;
    /* We use the frame step number as one random counter.
     * The second counter use the insertion (step) count. But we
     * need multiple random numbers per insertion. This number is
     * not fixed, since we generate random locations in a sphere
     * by putting locations in a cube and some of these fail.
     * A count of 20 is already extremely unlikely, so 10000 is
     * a safe margin for random numbers per insertion.
     */
    rnd_count_stride = 10000;

    if (MASTER(cr))
    {
        fp_tpi = xvgropen(opt2fn("-tpi", nfile, fnm),
                          "TPI energies", "Time (ps)",
                          "(kJ mol\\S-1\\N) / (nm\\S3\\N)", oenv);
        xvgr_subtitle(fp_tpi, "f. are averages over one frame", oenv);
        snew(leg, 4+nener);
        e = 0;
        sprintf(str, "-kT log(<Ve\\S-\\betaU\\N>/<V>)");
        leg[e++] = strdup(str);
        sprintf(str, "f. -kT log<e\\S-\\betaU\\N>");
        leg[e++] = strdup(str);
        sprintf(str, "f. <e\\S-\\betaU\\N>");
        leg[e++] = strdup(str);
        sprintf(str, "f. V");
        leg[e++] = strdup(str);
        sprintf(str, "f. <Ue\\S-\\betaU\\N>");
        leg[e++] = strdup(str);
        for (i = 0; i < ngid; i++)
        {
            sprintf(str, "f. <U\\sVdW %s\\Ne\\S-\\betaU\\N>",
                    *(groups->grpname[groups->grps[egcENER].nm_ind[i]]));
            leg[e++] = strdup(str);
        }
        if (bDispCorr)
        {
            sprintf(str, "f. <U\\sdisp c\\Ne\\S-\\betaU\\N>");
            leg[e++] = strdup(str);
        }
        if (bCharge)
        {
            for (i = 0; i < ngid; i++)
            {
                sprintf(str, "f. <U\\sCoul %s\\Ne\\S-\\betaU\\N>",
                        *(groups->grpname[groups->grps[egcENER].nm_ind[i]]));
                leg[e++] = strdup(str);
            }
            if (bRFExcl)
            {
                sprintf(str, "f. <U\\sRF excl\\Ne\\S-\\betaU\\N>");
                leg[e++] = strdup(str);
            }
            if (EEL_FULL(fr->eeltype))
            {
                sprintf(str, "f. <U\\sCoul recip\\Ne\\S-\\betaU\\N>");
                leg[e++] = strdup(str);
            }
        }
        xvgr_legend(fp_tpi, 4+nener, (const char**)leg, oenv);
        for (i = 0; i < 4+nener; i++)
        {
            sfree(leg[i]);
        }
        sfree(leg);
    }
    clear_rvec(x_init);
    V_all     = 0;
    VembU_all = 0;

    invbinw = 10;
    nbin    = 10;
    snew(bin, nbin);

    /* Avoid frame step numbers <= -1 */
    frame_step_prev = -1;

    bNotLastFrame = read_first_frame(oenv, &status, opt2fn("-rerun", nfile, fnm),
                                     &rerun_fr, TRX_NEED_X);
    frame = 0;

    if (rerun_fr.natoms - (bCavity ? nat_cavity : 0) !=
        mdatoms->nr - (a_tp1 - a_tp0))
    {
        gmx_fatal(FARGS, "Number of atoms in trajectory (%d)%s "
                  "is not equal the number in the run input file (%d) "
                  "minus the number of atoms to insert (%d)\n",
                  rerun_fr.natoms, bCavity ? " minus one" : "",
                  mdatoms->nr, a_tp1-a_tp0);
    }

    refvolshift = log(det(rerun_fr.box));

    switch (inputrec->eI)
    {
        case eiTPI:
            stepblocksize = inputrec->nstlist;
            break;
        case eiTPIC:
            stepblocksize = 1;
            break;
        default:
            gmx_fatal(FARGS, "Unknown integrator %s", ei_names[inputrec->eI]);
    }

#ifdef GMX_SIMD
    /* Make sure we don't detect SIMD overflow generated before this point */
    gmx_simd_check_and_reset_overflow();
#endif

    while (bNotLastFrame)
    {
        frame_step      = rerun_fr.step;
        if (frame_step <= frame_step_prev)
        {
            /* We don't have step number in the trajectory file,
             * or we have constant or decreasing step numbers.
             * Ensure we have increasing step numbers, since we use
             * the step numbers as a counter for random numbers.
             */
            frame_step  = frame_step_prev + 1;
        }
        frame_step_prev = frame_step;

        lambda = rerun_fr.lambda;
        t      = rerun_fr.time;

        sum_embU = 0;
        for (e = 0; e < nener; e++)
        {
            sum_UgembU[e] = 0;
        }

        /* Copy the coordinates from the input trajectory */
        for (i = 0; i < rerun_fr.natoms; i++)
        {
            copy_rvec(rerun_fr.x[i], state->x[i]);
        }
        copy_mat(rerun_fr.box, state->box);

        V    = det(state->box);
        logV = log(V);

        bStateChanged = TRUE;
        bNS           = TRUE;

        step = cr->nodeid*stepblocksize;
        while (step < nsteps)
        {
            /* Initialize the second counter for random numbers using
             * the insertion step index. This ensures that we get
             * the same random numbers independently of how many
             * MPI ranks we use. Also for the same seed, we get
             * the same initial random sequence for different nsteps.
             */
            rnd_count = step*rnd_count_stride;

            if (!bCavity)
            {
                /* Random insertion in the whole volume */
                bNS = (step % inputrec->nstlist == 0);
                if (bNS)
                {
                    /* Generate a random position in the box */
                    gmx_rng_cycle_2uniform(frame_step, rnd_count++, seed, RND_SEED_TPI, rnd);
                    gmx_rng_cycle_2uniform(frame_step, rnd_count++, seed, RND_SEED_TPI, rnd+2);
                    for (d = 0; d < DIM; d++)
                    {
                        x_init[d] = rnd[d]*state->box[d][d];
                    }
                }
                if (inputrec->nstlist == 1)
                {
                    copy_rvec(x_init, x_tp);
                }
                else
                {
                    /* Generate coordinates within |dx|=drmax of x_init */
                    do
                    {
                        gmx_rng_cycle_2uniform(frame_step, rnd_count++, seed, RND_SEED_TPI, rnd);
                        gmx_rng_cycle_2uniform(frame_step, rnd_count++, seed, RND_SEED_TPI, rnd+2);
                        for (d = 0; d < DIM; d++)
                        {
                            dx[d] = (2*rnd[d] - 1)*drmax;
                        }
                    }
                    while (norm2(dx) > drmax*drmax);
                    rvec_add(x_init, dx, x_tp);
                }
            }
            else
            {
                /* Random insertion around a cavity location
                 * given by the last coordinate of the trajectory.
                 */
                if (step == 0)
                {
                    if (nat_cavity == 1)
                    {
                        /* Copy the location of the cavity */
                        copy_rvec(rerun_fr.x[rerun_fr.natoms-1], x_init);
                    }
                    else
                    {
                        /* Determine the center of mass of the last molecule */
                        clear_rvec(x_init);
                        mass_tot = 0;
                        for (i = 0; i < nat_cavity; i++)
                        {
                            for (d = 0; d < DIM; d++)
                            {
                                x_init[d] +=
                                    mass_cavity[i]*rerun_fr.x[rerun_fr.natoms-nat_cavity+i][d];
                            }
                            mass_tot += mass_cavity[i];
                        }
                        for (d = 0; d < DIM; d++)
                        {
                            x_init[d] /= mass_tot;
                        }
                    }
                }
                /* Generate coordinates within |dx|=drmax of x_init */
                do
                {
                    gmx_rng_cycle_2uniform(frame_step, rnd_count++, seed, RND_SEED_TPI, rnd);
                    gmx_rng_cycle_2uniform(frame_step, rnd_count++, seed, RND_SEED_TPI, rnd+2);
                    for (d = 0; d < DIM; d++)
                    {
                        dx[d] = (2*rnd[d] - 1)*drmax;
                    }
                }
                while (norm2(dx) > drmax*drmax);
                rvec_add(x_init, dx, x_tp);
            }

            if (a_tp1 - a_tp0 == 1)
            {
                /* Insert a single atom, just copy the insertion location */
                copy_rvec(x_tp, state->x[a_tp0]);
            }
            else
            {
                /* Copy the coordinates from the top file */
                for (i = a_tp0; i < a_tp1; i++)
                {
                    copy_rvec(x_mol[i-a_tp0], state->x[i]);
                }
                /* Rotate the molecule randomly */
                gmx_rng_cycle_2uniform(frame_step, rnd_count++, seed, RND_SEED_TPI, rnd);
                gmx_rng_cycle_2uniform(frame_step, rnd_count++, seed, RND_SEED_TPI, rnd+2);
                rotate_conf(a_tp1-a_tp0, state->x+a_tp0, NULL,
                            2*M_PI*rnd[0],
                            2*M_PI*rnd[1],
                            2*M_PI*rnd[2]);
                /* Shift to the insertion location */
                for (i = a_tp0; i < a_tp1; i++)
                {
                    rvec_inc(state->x[i], x_tp);
                }
            }

            /* Clear some matrix variables  */
            clear_mat(force_vir);
            clear_mat(shake_vir);
            clear_mat(vir);
            clear_mat(pres);

            /* Set the charge group center of mass of the test particle */
            copy_rvec(x_init, fr->cg_cm[top->cgs.nr-1]);

            /* Calc energy (no forces) on new positions.
             * Since we only need the intermolecular energy
             * and the RF exclusion terms of the inserted molecule occur
             * within a single charge group we can pass NULL for the graph.
             * This also avoids shifts that would move charge groups
             * out of the box.
             *
             * Some checks above ensure than we can not have
             * twin-range interactions together with nstlist > 1,
             * therefore we do not need to remember the LR energies.
             */
            /* Make do_force do a single node force calculation */
            cr->nnodes = 1;
            do_force(fplog, cr, inputrec,
                     step, nrnb, wcycle, top, &top_global->groups,
                     state->box, state->x, &state->hist,
                     f, force_vir, mdatoms, enerd, fcd,
                     state->lambda,
                     NULL, fr, NULL, mu_tot, t, NULL, NULL, FALSE,
                     GMX_FORCE_NONBONDED | GMX_FORCE_ENERGY |
                     (bNS ? GMX_FORCE_DYNAMICBOX | GMX_FORCE_NS | GMX_FORCE_DO_LR : 0) |
                     (bStateChanged ? GMX_FORCE_STATECHANGED : 0));
            cr->nnodes    = nnodes;
            bStateChanged = FALSE;
            bNS           = FALSE;

            /* Calculate long range corrections to pressure and energy */
            calc_dispcorr(fplog, inputrec, fr, step, top_global->natoms, state->box,
                          lambda, pres, vir, &prescorr, &enercorr, &dvdlcorr);
            /* figure out how to rearrange the next 4 lines MRS 8/4/2009 */
            enerd->term[F_DISPCORR]  = enercorr;
            enerd->term[F_EPOT]     += enercorr;
            enerd->term[F_PRES]     += prescorr;
            enerd->term[F_DVDL_VDW] += dvdlcorr;

            epot               = enerd->term[F_EPOT];
            bEnergyOutOfBounds = FALSE;
#ifdef GMX_SIMD_X86_SSE2_OR_HIGHER
            /* With SSE the energy can overflow, check for this */
            if (gmx_mm_check_and_reset_overflow())
            {
                if (debug)
                {
                    fprintf(debug, "Found an SSE overflow, assuming the energy is out of bounds\n");
                }
                bEnergyOutOfBounds = TRUE;
            }
#endif
            /* If the compiler doesn't optimize this check away
             * we catch the NAN energies.
             * The epot>GMX_REAL_MAX check catches inf values,
             * which should nicely result in embU=0 through the exp below,
             * but it does not hurt to check anyhow.
             */
            /* Non-bonded Interaction usually diverge at r=0.
             * With tabulated interaction functions the first few entries
             * should be capped in a consistent fashion between
             * repulsion, dispersion and Coulomb to avoid accidental
             * negative values in the total energy.
             * The table generation code in tables.c does this.
             * With user tbales the user should take care of this.
             */
            if (epot != epot || epot > GMX_REAL_MAX)
            {
                bEnergyOutOfBounds = TRUE;
            }
            if (bEnergyOutOfBounds)
            {
                if (debug)
                {
                    fprintf(debug, "\n  time %.3f, step %d: non-finite energy %f, using exp(-bU)=0\n", t, (int)step, epot);
                }
                embU = 0;
            }
            else
            {
                embU      = exp(-beta*epot);
                sum_embU += embU;
                /* Determine the weighted energy contributions of each energy group */
                e                = 0;
                sum_UgembU[e++] += epot*embU;
                if (fr->bBHAM)
                {
                    for (i = 0; i < ngid; i++)
                    {
                        sum_UgembU[e++] +=
                            (enerd->grpp.ener[egBHAMSR][GID(i, gid_tp, ngid)] +
                             enerd->grpp.ener[egBHAMLR][GID(i, gid_tp, ngid)])*embU;
                    }
                }
                else
                {
                    for (i = 0; i < ngid; i++)
                    {
                        sum_UgembU[e++] +=
                            (enerd->grpp.ener[egLJSR][GID(i, gid_tp, ngid)] +
                             enerd->grpp.ener[egLJLR][GID(i, gid_tp, ngid)])*embU;
                    }
                }
                if (bDispCorr)
                {
                    sum_UgembU[e++] += enerd->term[F_DISPCORR]*embU;
                }
                if (bCharge)
                {
                    for (i = 0; i < ngid; i++)
                    {
                        sum_UgembU[e++] +=
                            (enerd->grpp.ener[egCOULSR][GID(i, gid_tp, ngid)] +
                             enerd->grpp.ener[egCOULLR][GID(i, gid_tp, ngid)])*embU;
                    }
                    if (bRFExcl)
                    {
                        sum_UgembU[e++] += enerd->term[F_RF_EXCL]*embU;
                    }
                    if (EEL_FULL(fr->eeltype))
                    {
                        sum_UgembU[e++] += enerd->term[F_COUL_RECIP]*embU;
                    }
                }
            }

            if (embU == 0 || beta*epot > bU_bin_limit)
            {
                bin[0]++;
            }
            else
            {
                i = (int)((bU_logV_bin_limit
                           - (beta*epot - logV + refvolshift))*invbinw
                          + 0.5);
                if (i < 0)
                {
                    i = 0;
                }
                if (i >= nbin)
                {
                    realloc_bins(&bin, &nbin, i+10);
                }
                bin[i]++;
            }

            if (debug)
            {
                fprintf(debug, "TPI %7d %12.5e %12.5f %12.5f %12.5f\n",
                        (int)step, epot, x_tp[XX], x_tp[YY], x_tp[ZZ]);
            }

            if (dump_pdb && epot <= dump_ener)
            {
                sprintf(str, "t%g_step%d.pdb", t, (int)step);
                sprintf(str2, "t: %f step %d ener: %f", t, (int)step, epot);
                write_sto_conf_mtop(str, str2, top_global, state->x, state->v,
                                    inputrec->ePBC, state->box);
            }

            step++;
            if ((step/stepblocksize) % cr->nnodes != cr->nodeid)
            {
                /* Skip all steps assigned to the other MPI ranks */
                step += (cr->nnodes - 1)*stepblocksize;
            }
        }

        if (PAR(cr))
        {
            /* When running in parallel sum the energies over the processes */
            gmx_sumd(1,    &sum_embU, cr);
            gmx_sumd(nener, sum_UgembU, cr);
        }

        frame++;
        V_all     += V;
        VembU_all += V*sum_embU/nsteps;

        if (fp_tpi)
        {
            if (bVerbose || frame%10 == 0 || frame < 10)
            {
                fprintf(stderr, "mu %10.3e <mu> %10.3e\n",
                        -log(sum_embU/nsteps)/beta, -log(VembU_all/V_all)/beta);
            }

            fprintf(fp_tpi, "%10.3f %12.5e %12.5e %12.5e %12.5e",
                    t,
                    VembU_all == 0 ? 20/beta : -log(VembU_all/V_all)/beta,
                    sum_embU == 0  ? 20/beta : -log(sum_embU/nsteps)/beta,
                    sum_embU/nsteps, V);
            for (e = 0; e < nener; e++)
            {
                fprintf(fp_tpi, " %12.5e", sum_UgembU[e]/nsteps);
            }
            fprintf(fp_tpi, "\n");
            fflush(fp_tpi);
        }

        bNotLastFrame = read_next_frame(oenv, status, &rerun_fr);
    } /* End of the loop  */
    walltime_accounting_end(walltime_accounting);

    close_trj(status);

    if (fp_tpi != NULL)
    {
        gmx_fio_fclose(fp_tpi);
    }

    if (fplog != NULL)
    {
        fprintf(fplog, "\n");
        fprintf(fplog, "  <V>  = %12.5e nm^3\n", V_all/frame);
        fprintf(fplog, "  <mu> = %12.5e kJ/mol\n", -log(VembU_all/V_all)/beta);
    }

    /* Write the Boltzmann factor histogram */
    if (PAR(cr))
    {
        /* When running in parallel sum the bins over the processes */
        i = nbin;
        global_max(cr, &i);
        realloc_bins(&bin, &nbin, i);
        gmx_sumd(nbin, bin, cr);
    }
    if (MASTER(cr))
    {
        fp_tpi = xvgropen(opt2fn("-tpid", nfile, fnm),
                          "TPI energy distribution",
                          "\\betaU - log(V/<V>)", "count", oenv);
        sprintf(str, "number \\betaU > %g: %9.3e", bU_bin_limit, bin[0]);
        xvgr_subtitle(fp_tpi, str, oenv);
        xvgr_legend(fp_tpi, 2, (const char **)tpid_leg, oenv);
        for (i = nbin-1; i > 0; i--)
        {
            bUlogV = -i/invbinw + bU_logV_bin_limit - refvolshift + log(V_all/frame);
            fprintf(fp_tpi, "%6.2f %10d %12.5e\n",
                    bUlogV,
                    (int)(bin[i]+0.5),
                    bin[i]*exp(-bUlogV)*V_all/VembU_all);
        }
        gmx_fio_fclose(fp_tpi);
    }
    sfree(bin);

    sfree(sum_UgembU);

    walltime_accounting_set_nsteps_done(walltime_accounting, frame*inputrec->nsteps);

    return 0;
}
Exemple #10
0
gmx_bool pme_load_balance(pme_load_balancing_t pme_lb,
                          t_commrec           *cr,
                          FILE                *fp_err,
                          FILE                *fp_log,
                          t_inputrec          *ir,
                          t_state             *state,
                          double               cycles,
                          interaction_const_t *ic,
                          nonbonded_verlet_t  *nbv,
                          gmx_pme_t           *pmedata,
                          gmx_large_int_t      step)
{
    gmx_bool     OK;
    pme_setup_t *set;
    double       cycles_fast;
    char         buf[STRLEN], sbuf[22];
    real         rtab;
    gmx_bool     bUsesSimpleTables = TRUE;

    if (pme_lb->stage == pme_lb->nstage)
    {
        return FALSE;
    }

    if (PAR(cr))
    {
        gmx_sumd(1, &cycles, cr);
        cycles /= cr->nnodes;
    }

    set = &pme_lb->setup[pme_lb->cur];
    set->count++;

    rtab = ir->rlistlong + ir->tabext;

    if (set->count % 2 == 1)
    {
        /* Skip the first cycle, because the first step after a switch
         * is much slower due to allocation and/or caching effects.
         */
        return TRUE;
    }

    sprintf(buf, "step %4s: ", gmx_step_str(step, sbuf));
    print_grid(fp_err, fp_log, buf, "timed with", set, cycles);

    if (set->count <= 2)
    {
        set->cycles = cycles;
    }
    else
    {
        if (cycles*PME_LB_ACCEL_TOL < set->cycles &&
            pme_lb->stage == pme_lb->nstage - 1)
        {
            /* The performance went up a lot (due to e.g. DD load balancing).
             * Add a stage, keep the minima, but rescan all setups.
             */
            pme_lb->nstage++;

            if (debug)
            {
                fprintf(debug, "The performance for grid %d %d %d went from %.3f to %.1f M-cycles, this is more than %f\n"
                        "Increased the number stages to %d"
                        " and ignoring the previous performance\n",
                        set->grid[XX], set->grid[YY], set->grid[ZZ],
                        cycles*1e-6, set->cycles*1e-6, PME_LB_ACCEL_TOL,
                        pme_lb->nstage);
            }
        }
        set->cycles = min(set->cycles, cycles);
    }

    if (set->cycles < pme_lb->setup[pme_lb->fastest].cycles)
    {
        pme_lb->fastest = pme_lb->cur;

        if (DOMAINDECOMP(cr))
        {
            /* We found a new fastest setting, ensure that with subsequent
             * shorter cut-off's the dynamic load balancing does not make
             * the use of the current cut-off impossible. This solution is
             * a trade-off, as the PME load balancing and DD domain size
             * load balancing can interact in complex ways.
             * With the Verlet kernels, DD load imbalance will usually be
             * mainly due to bonded interaction imbalance, which will often
             * quickly push the domain boundaries beyond the limit for the
             * optimal, PME load balanced, cut-off. But it could be that
             * better overal performance can be obtained with a slightly
             * shorter cut-off and better DD load balancing.
             */
            change_dd_dlb_cutoff_limit(cr);
        }
    }
    cycles_fast = pme_lb->setup[pme_lb->fastest].cycles;

    /* Check in stage 0 if we should stop scanning grids.
     * Stop when the time is more than SLOW_FAC longer than the fastest.
     */
    if (pme_lb->stage == 0 && pme_lb->cur > 0 &&
        cycles > pme_lb->setup[pme_lb->fastest].cycles*PME_LB_SLOW_FAC)
    {
        pme_lb->n = pme_lb->cur + 1;
        /* Done with scanning, go to stage 1 */
        switch_to_stage1(pme_lb);
    }

    if (pme_lb->stage == 0)
    {
        int gridsize_start;

        gridsize_start = set->grid[XX]*set->grid[YY]*set->grid[ZZ];

        do
        {
            if (pme_lb->cur+1 < pme_lb->n)
            {
                /* We had already generated the next setup */
                OK = TRUE;
            }
            else
            {
                /* Find the next setup */
                OK = pme_loadbal_increase_cutoff(pme_lb, ir->pme_order);
            }

            if (OK && ir->ePBC != epbcNONE)
            {
                OK = (sqr(pme_lb->setup[pme_lb->cur+1].rlistlong)
                      <= max_cutoff2(ir->ePBC, state->box));
                if (!OK)
                {
                    pme_lb->elimited = epmelblimBOX;
                }
            }

            if (OK)
            {
                pme_lb->cur++;

                if (DOMAINDECOMP(cr))
                {
                    OK = change_dd_cutoff(cr, state, ir,
                                          pme_lb->setup[pme_lb->cur].rlistlong);
                    if (!OK)
                    {
                        /* Failed: do not use this setup */
                        pme_lb->cur--;
                        pme_lb->elimited = epmelblimDD;
                    }
                }
            }
            if (!OK)
            {
                /* We hit the upper limit for the cut-off,
                 * the setup should not go further than cur.
                 */
                pme_lb->n = pme_lb->cur + 1;
                print_loadbal_limited(fp_err, fp_log, step, pme_lb);
                /* Switch to the next stage */
                switch_to_stage1(pme_lb);
            }
        }
        while (OK &&
               !(pme_lb->setup[pme_lb->cur].grid[XX]*
                 pme_lb->setup[pme_lb->cur].grid[YY]*
                 pme_lb->setup[pme_lb->cur].grid[ZZ] <
                 gridsize_start*PME_LB_GRID_SCALE_FAC
                 &&
                 pme_lb->setup[pme_lb->cur].grid_efficiency <
                 pme_lb->setup[pme_lb->cur-1].grid_efficiency*PME_LB_GRID_EFFICIENCY_REL_FAC));
    }

    if (pme_lb->stage > 0 && pme_lb->end == 1)
    {
        pme_lb->cur   = 0;
        pme_lb->stage = pme_lb->nstage;
    }
    else if (pme_lb->stage > 0 && pme_lb->end > 1)
    {
        /* If stage = nstage-1:
         *   scan over all setups, rerunning only those setups
         *   which are not much slower than the fastest
         * else:
         *   use the next setup
         */
        do
        {
            pme_lb->cur++;
            if (pme_lb->cur == pme_lb->end)
            {
                pme_lb->stage++;
                pme_lb->cur = pme_lb->start;
            }
        }
        while (pme_lb->stage == pme_lb->nstage - 1 &&
               pme_lb->setup[pme_lb->cur].count > 0 &&
               pme_lb->setup[pme_lb->cur].cycles > cycles_fast*PME_LB_SLOW_FAC);

        if (pme_lb->stage == pme_lb->nstage)
        {
            /* We are done optimizing, use the fastest setup we found */
            pme_lb->cur = pme_lb->fastest;
        }
    }

    if (DOMAINDECOMP(cr) && pme_lb->stage > 0)
    {
        OK = change_dd_cutoff(cr, state, ir, pme_lb->setup[pme_lb->cur].rlistlong);
        if (!OK)
        {
            /* Failsafe solution */
            if (pme_lb->cur > 1 && pme_lb->stage == pme_lb->nstage)
            {
                pme_lb->stage--;
            }
            pme_lb->fastest  = 0;
            pme_lb->start    = 0;
            pme_lb->end      = pme_lb->cur;
            pme_lb->cur      = pme_lb->start;
            pme_lb->elimited = epmelblimDD;
            print_loadbal_limited(fp_err, fp_log, step, pme_lb);
        }
    }

    /* Change the Coulomb cut-off and the PME grid */

    set = &pme_lb->setup[pme_lb->cur];

    ic->rcoulomb   = set->rcut_coulomb;
    ic->rlist      = set->rlist;
    ic->rlistlong  = set->rlistlong;
    ir->nstcalclr  = set->nstcalclr;
    ic->ewaldcoeff = set->ewaldcoeff;

    bUsesSimpleTables = uses_simple_tables(ir->cutoff_scheme, nbv, 0);
    if (pme_lb->cutoff_scheme == ecutsVERLET &&
        nbv->grp[0].kernel_type == nbnxnk8x8x8_CUDA)
    {
        nbnxn_cuda_pme_loadbal_update_param(nbv->cu_nbv, ic);
    }
    else
    {
        init_interaction_const_tables(NULL, ic, bUsesSimpleTables,
                                      rtab);
    }

    if (pme_lb->cutoff_scheme == ecutsVERLET && nbv->ngrp > 1)
    {
        init_interaction_const_tables(NULL, ic, bUsesSimpleTables,
                                      rtab);
    }

    if (cr->duty & DUTY_PME)
    {
        if (pme_lb->setup[pme_lb->cur].pmedata == NULL)
        {
            /* Generate a new PME data structure,
             * copying part of the old pointers.
             */
            gmx_pme_reinit(&set->pmedata,
                           cr, pme_lb->setup[0].pmedata, ir,
                           set->grid);
        }
        *pmedata = set->pmedata;
    }
    else
    {
        /* Tell our PME-only node to switch grid */
        gmx_pme_send_switchgrid(cr, set->grid, set->ewaldcoeff);
    }

    if (debug)
    {
        print_grid(NULL, debug, "", "switched to", set, -1);
    }

    if (pme_lb->stage == pme_lb->nstage)
    {
        print_grid(fp_err, fp_log, "", "optimal", set, -1);
    }

    return TRUE;
}
void do_shakefirst(FILE *fplog,gmx_constr_t constr,
		   t_inputrec *inputrec,t_mdatoms *md,
		   t_state *state,rvec buf[],rvec f[],
		   t_graph *graph,t_commrec *cr,t_nrnb *nrnb,
		   t_forcerec *fr,t_idef *idef)
{
  int    i,m,start,end,step;
  double mass,tmass,vcm[4];
  real   dt=inputrec->delta_t;
  real   dvdlambda;

  start = md->start;
  end   = md->homenr + start;
  if (debug)
    fprintf(debug,"vcm: start=%d, homenr=%d, end=%d\n",
	    start,md->homenr,end);
  /* Do a first SHAKE to reset particles... */
  step = inputrec->init_step;
  if (fplog)
    fprintf(fplog,"\nConstraining the starting coordinates (step %d)\n",step);
  dvdlambda = 0;
  constrain(NULL,TRUE,FALSE,constr,idef,
	    inputrec,cr,step,0,md,
	    state->x,state->x,NULL,
	    state->box,state->lambda,&dvdlambda,
	    NULL,NULL,nrnb,econqCoord);

  if (EI_STATE_VELOCITY(inputrec->eI)) {
    for(i=start; (i<end); i++) {
      for(m=0; (m<DIM); m++) {
	/* Reverse the velocity */
	state->v[i][m] = -state->v[i][m];
	/* Store the position at t-dt in buf */
	buf[i][m] = state->x[i][m] + dt*state->v[i][m];
      }
    }
    
    /* Shake the positions at t=-dt with the positions at t=0
     * as reference coordinates.
     */
    if (fplog)
      fprintf(fplog,"\nConstraining the coordinates at t0-dt (step %d)\n",
	      step);
    dvdlambda = 0;
    constrain(NULL,TRUE,FALSE,constr,idef,
	      inputrec,cr,step,-1,md,
	      state->x,buf,NULL,
	      state->box,state->lambda,&dvdlambda,
	      state->v,NULL,nrnb,econqCoord);
    
    for(m=0; (m<4); m++)
      vcm[m] = 0;
    for(i=start; i<end; i++) {
      mass = md->massT[i];
      for(m=0; m<DIM; m++) {
	/* Re-reverse the velocities */
	state->v[i][m] = -state->v[i][m];
	vcm[m] += state->v[i][m]*mass;
      }
      vcm[3] += mass;
    }
    
    if (inputrec->nstcomm != 0 || debug) {
      /* Compute the global sum of vcm */
      if (debug)
	fprintf(debug,"vcm: %8.3f  %8.3f  %8.3f,"
		" total mass = %12.5e\n",vcm[XX],vcm[YY],vcm[ZZ],vcm[3]);
      if (PAR(cr))
	gmx_sumd(4,vcm,cr);
      tmass = vcm[3];
      for(m=0; (m<DIM); m++)
	vcm[m] /= tmass;
      if (debug) 
	fprintf(debug,"vcm: %8.3f  %8.3f  %8.3f,"
		" total mass = %12.5e\n",vcm[XX],vcm[YY],vcm[ZZ],tmass);
      if (inputrec->nstcomm != 0) {
	/* Now we have the velocity of center of mass, let's remove it */
	for(i=start; (i<end); i++) {
	  for(m=0; (m<DIM); m++)
	    state->v[i][m] -= vcm[m];
	}
      }
    }
  }
}
void do_force(FILE *fplog,t_commrec *cr,
	      t_inputrec *inputrec,
	      int step,t_nrnb *nrnb,gmx_wallcycle_t wcycle,
	      gmx_localtop_t *top,
	      gmx_groups_t *groups,
	      matrix box,rvec x[],history_t *hist,
	      rvec f[],rvec buf[],
	      tensor vir_force,
	      t_mdatoms *mdatoms,
	      gmx_enerdata_t *enerd,t_fcdata *fcd,
	      real lambda,t_graph *graph,
	      t_forcerec *fr,gmx_vsite_t *vsite,rvec mu_tot,
	      real t,FILE *field,gmx_edsam_t ed,
	      int flags)
{
  static rvec box_size;
  int    cg0,cg1,i,j;
  int    start,homenr;
  static double mu[2*DIM]; 
  rvec   mu_tot_AB[2];
  bool   bSepDVDL,bStateChanged,bNS,bFillGrid,bCalcCGCM,bBS,bDoForces;
  matrix boxs;
  real   e,v,dvdl;
  t_pbc  pbc;
  float  cycles_ppdpme,cycles_pme,cycles_force;
  
  start  = mdatoms->start;
  homenr = mdatoms->homenr;

  bSepDVDL = (fr->bSepDVDL && do_per_step(step,inputrec->nstlog));

  clear_mat(vir_force);
  
  if (PARTDECOMP(cr)) {
    pd_cg_range(cr,&cg0,&cg1);
  } else {
    cg0 = 0;
    if (DOMAINDECOMP(cr))
      cg1 = cr->dd->ncg_tot;
    else
      cg1 = top->cgs.nr;
    if (fr->n_tpi > 0)
      cg1--;
  }

  bStateChanged = (flags & GMX_FORCE_STATECHANGED);
  bNS           = (flags & GMX_FORCE_NS);
  bFillGrid     = (bNS && bStateChanged);
  bCalcCGCM     = (bFillGrid && !DOMAINDECOMP(cr));
  bDoForces     = (flags & GMX_FORCE_FORCES);

  if (bStateChanged) {
    update_forcerec(fplog,fr,box);
    
    /* Calculate total (local) dipole moment in a temporary common array. 
     * This makes it possible to sum them over nodes faster.
     */
    calc_mu(start,homenr,
	    x,mdatoms->chargeA,mdatoms->chargeB,mdatoms->nChargePerturbed,
	    mu,mu+DIM);
  }
  
  if (fr->ePBC != epbcNONE) { 
    /* Compute shift vectors every step,
     * because of pressure coupling or box deformation!
     */
    if (DYNAMIC_BOX(*inputrec) && bStateChanged)
      calc_shifts(box,fr->shift_vec);
    
    if (bCalcCGCM) { 
      put_charge_groups_in_box(fplog,cg0,cg1,fr->ePBC,box,
			       &(top->cgs),x,fr->cg_cm);
      inc_nrnb(nrnb,eNR_CGCM,homenr);
      inc_nrnb(nrnb,eNR_RESETX,cg1-cg0);
    } 
    else if (EI_ENERGY_MINIMIZATION(inputrec->eI) && graph) {
      unshift_self(graph,box,x);
    }
  } 
  else if (bCalcCGCM) {
    calc_cgcm(fplog,cg0,cg1,&(top->cgs),x,fr->cg_cm);
    inc_nrnb(nrnb,eNR_CGCM,homenr);
  }
  
  if (bCalcCGCM) {
    if (PAR(cr)) {
      move_cgcm(fplog,cr,fr->cg_cm);
    }
    if (gmx_debug_at)
      pr_rvecs(debug,0,"cgcm",fr->cg_cm,top->cgs.nr);
  }

#ifdef GMX_MPI
  if (!(cr->duty & DUTY_PME)) {
    /* Send particle coordinates to the pme nodes.
     * Since this is only implemented for domain decomposition
     * and domain decomposition does not use the graph,
     * we do not need to worry about shifting.
     */    

    wallcycle_start(wcycle,ewcPP_PMESENDX);
    GMX_MPE_LOG(ev_send_coordinates_start);

    bBS = (inputrec->nwall == 2);
    if (bBS) {
      copy_mat(box,boxs);
      svmul(inputrec->wall_ewald_zfac,boxs[ZZ],boxs[ZZ]);
    }

    gmx_pme_send_x(cr,bBS ? boxs : box,x,mdatoms->nChargePerturbed,lambda);

    GMX_MPE_LOG(ev_send_coordinates_finish);
    wallcycle_stop(wcycle,ewcPP_PMESENDX);
  }
#endif /* GMX_MPI */

  /* Communicate coordinates and sum dipole if necessary */
  if (PAR(cr)) {
    wallcycle_start(wcycle,ewcMOVEX);
    if (DOMAINDECOMP(cr)) {
      dd_move_x(cr->dd,box,x,buf);
    } else {
      move_x(fplog,cr,GMX_LEFT,GMX_RIGHT,x,nrnb);
    }
    /* When we don't need the total dipole we sum it in global_stat */
    if (NEED_MUTOT(*inputrec))
      gmx_sumd(2*DIM,mu,cr);
    wallcycle_stop(wcycle,ewcMOVEX);
  }
  for(i=0; i<2; i++)
    for(j=0;j<DIM;j++)
      mu_tot_AB[i][j] = mu[i*DIM + j];
  if (fr->efep == efepNO)
    copy_rvec(mu_tot_AB[0],mu_tot);
  else
    for(j=0; j<DIM; j++)
      mu_tot[j] = (1.0 - lambda)*mu_tot_AB[0][j] + lambda*mu_tot_AB[1][j];

  /* Reset energies */
  reset_energies(&(inputrec->opts),fr,bNS,enerd,MASTER(cr));    
  if (bNS) {
    wallcycle_start(wcycle,ewcNS);
    
    if (graph && bStateChanged)
      /* Calculate intramolecular shift vectors to make molecules whole */
      mk_mshift(fplog,graph,fr->ePBC,box,x);

    /* Reset long range forces if necessary */
    if (fr->bTwinRange) {
      clear_rvecs(fr->f_twin_n,fr->f_twin);
      clear_rvecs(SHIFTS,fr->fshift_twin);
    }
    /* Do the actual neighbour searching and if twin range electrostatics
     * also do the calculation of long range forces and energies.
     */
    dvdl = 0; 
    ns(fplog,fr,x,f,box,groups,&(inputrec->opts),top,mdatoms,
       cr,nrnb,step,lambda,&dvdl,&enerd->grpp,bFillGrid,bDoForces);
    if (bSepDVDL)
      fprintf(fplog,sepdvdlformat,"LR non-bonded",0,dvdl);
    enerd->dvdl_lr       = dvdl;
    enerd->term[F_DVDL] += dvdl;

    wallcycle_stop(wcycle,ewcNS);
  }
  
  if (DOMAINDECOMP(cr)) {
    if (!(cr->duty & DUTY_PME)) {
      wallcycle_start(wcycle,ewcPPDURINGPME);
      dd_force_flop_start(cr->dd,nrnb);
    }
  }
  /* Start the force cycle counter.
   * This counter is stopped in do_forcelow_level.
   * No parallel communication should occur while this counter is running,
   * since that will interfere with the dynamic load balancing.
   */
  wallcycle_start(wcycle,ewcFORCE);

  if (bDoForces) {
      /* Reset PME/Ewald forces if necessary */
    if (fr->bF_NoVirSum) 
    {
      GMX_BARRIER(cr->mpi_comm_mygroup);
      if (fr->bDomDec)
	clear_rvecs(fr->f_novirsum_n,fr->f_novirsum);
      else
	clear_rvecs(homenr,fr->f_novirsum+start);
      GMX_BARRIER(cr->mpi_comm_mygroup);
    }
    /* Copy long range forces into normal buffers */
    if (fr->bTwinRange) {
      for(i=0; i<fr->f_twin_n; i++)
	copy_rvec(fr->f_twin[i],f[i]);
      for(i=0; i<SHIFTS; i++)
	copy_rvec(fr->fshift_twin[i],fr->fshift[i]);
    } 
    else {
      if (DOMAINDECOMP(cr))
	clear_rvecs(cr->dd->nat_tot,f);
      else
	clear_rvecs(mdatoms->nr,f);
      clear_rvecs(SHIFTS,fr->fshift);
    }
    clear_rvec(fr->vir_diag_posres);
    GMX_BARRIER(cr->mpi_comm_mygroup);
  }
  if (inputrec->ePull == epullCONSTRAINT)
    clear_pull_forces(inputrec->pull);

  /* update QMMMrec, if necessary */
  if(fr->bQMMM)
    update_QMMMrec(cr,fr,x,mdatoms,box,top);

  if ((flags & GMX_FORCE_BONDED) && top->idef.il[F_POSRES].nr > 0) {
    /* Position restraints always require full pbc */
    set_pbc(&pbc,inputrec->ePBC,box);
    v = posres(top->idef.il[F_POSRES].nr,top->idef.il[F_POSRES].iatoms,
	       top->idef.iparams_posres,
	       (const rvec*)x,fr->f_novirsum,fr->vir_diag_posres,
	       inputrec->ePBC==epbcNONE ? NULL : &pbc,lambda,&dvdl,
	       fr->rc_scaling,fr->ePBC,fr->posres_com,fr->posres_comB);
    if (bSepDVDL) {
      fprintf(fplog,sepdvdlformat,
	      interaction_function[F_POSRES].longname,v,dvdl);
    }
    enerd->term[F_POSRES] += v;
    enerd->term[F_DVDL]   += dvdl;
    inc_nrnb(nrnb,eNR_POSRES,top->idef.il[F_POSRES].nr/2);
  }
  /* Compute the bonded and non-bonded forces */    
  do_force_lowlevel(fplog,step,fr,inputrec,&(top->idef),
		    cr,nrnb,wcycle,mdatoms,&(inputrec->opts),
		    x,hist,f,enerd,fcd,box,lambda,graph,&(top->excls),mu_tot_AB,
		    flags,&cycles_force);
  GMX_BARRIER(cr->mpi_comm_mygroup);

  if (ed) {
    do_flood(fplog,cr,x,f,ed,box,step);
  }
	
  if (DOMAINDECOMP(cr)) {
    dd_force_flop_stop(cr->dd,nrnb);
    if (wcycle)
      dd_cycles_add(cr->dd,cycles_force,ddCyclF);
  }
  
  if (bDoForces) {
    /* Compute forces due to electric field */
    calc_f_el(MASTER(cr) ? field : NULL,
	      start,homenr,mdatoms->chargeA,x,f,inputrec->ex,inputrec->et,t);
    
    /* When using PME/Ewald we compute the long range virial there.
     * otherwise we do it based on long range forces from twin range
     * cut-off based calculation (or not at all).
     */
    
    /* Communicate the forces */
    if (PAR(cr)) {
      wallcycle_start(wcycle,ewcMOVEF);
      if (DOMAINDECOMP(cr)) {
	dd_move_f(cr->dd,f,buf,fr->fshift);
	/* Position restraint do not introduce inter-cg forces */
	if (EEL_FULL(fr->eeltype) && cr->dd->n_intercg_excl)
	  dd_move_f(cr->dd,fr->f_novirsum,buf,NULL);
      } else {
	move_f(fplog,cr,GMX_LEFT,GMX_RIGHT,f,buf,nrnb);
      }
      wallcycle_stop(wcycle,ewcMOVEF);
    }
  }

  if (bDoForces) {
    if (vsite) {
      wallcycle_start(wcycle,ewcVSITESPREAD);
      spread_vsite_f(fplog,vsite,x,f,fr->fshift,nrnb,
		     &top->idef,fr->ePBC,fr->bMolPBC,graph,box,cr);
      wallcycle_stop(wcycle,ewcVSITESPREAD);
    }
    
    /* Calculation of the virial must be done after vsites! */
    calc_virial(fplog,mdatoms->start,mdatoms->homenr,x,f,
		vir_force,graph,box,nrnb,fr,inputrec->ePBC);
  }

  if (inputrec->ePull == epullUMBRELLA || inputrec->ePull == epullCONST_F) {
    /* Calculate the center of mass forces, this requires communication,
     * which is why pull_potential is called close to other communication.
     * The virial contribution is calculated directly,
     * which is why we call pull_potential after calc_virial.
     */
    set_pbc(&pbc,inputrec->ePBC,box);
    dvdl = 0; 
    enerd->term[F_COM_PULL] =
      pull_potential(inputrec->ePull,inputrec->pull,mdatoms,&pbc,
		     cr,t,lambda,x,f,vir_force,&dvdl);
    if (bSepDVDL)
      fprintf(fplog,sepdvdlformat,"Com pull",enerd->term[F_COM_PULL],dvdl);
    enerd->term[F_DVDL] += dvdl;
  }

  if (!(cr->duty & DUTY_PME)) {
    cycles_ppdpme = wallcycle_stop(wcycle,ewcPPDURINGPME);
    dd_cycles_add(cr->dd,cycles_ppdpme,ddCyclPPduringPME);
  }

#ifdef GMX_MPI
  if (PAR(cr) && !(cr->duty & DUTY_PME)) {
    /* In case of node-splitting, the PP nodes receive the long-range 
     * forces, virial and energy from the PME nodes here.
     */    
    wallcycle_start(wcycle,ewcPP_PMEWAITRECVF);
    dvdl = 0;
    gmx_pme_receive_f(cr,fr->f_novirsum,fr->vir_el_recip,&e,&dvdl,
		      &cycles_pme);
    if (bSepDVDL)
      fprintf(fplog,sepdvdlformat,"PME mesh",e,dvdl);
    enerd->term[F_COUL_RECIP] += e;
    enerd->term[F_DVDL] += dvdl;
    if (wcycle)
      dd_cycles_add(cr->dd,cycles_pme,ddCyclPME);
    wallcycle_stop(wcycle,ewcPP_PMEWAITRECVF);
  }
#endif

  if (bDoForces && fr->bF_NoVirSum) {
    if (vsite) {
      /* Spread the mesh force on virtual sites to the other particles... 
       * This is parallellized. MPI communication is performed
       * if the constructing atoms aren't local.
       */
      wallcycle_start(wcycle,ewcVSITESPREAD);
      spread_vsite_f(fplog,vsite,x,fr->f_novirsum,NULL,nrnb,
		     &top->idef,fr->ePBC,fr->bMolPBC,graph,box,cr);
      wallcycle_stop(wcycle,ewcVSITESPREAD);
    }
    /* Now add the forces, this is local */
    if (fr->bDomDec) {
      sum_forces(0,fr->f_novirsum_n,f,fr->f_novirsum);
    } else {
      sum_forces(start,start+homenr,f,fr->f_novirsum);
    }
    if (EEL_FULL(fr->eeltype)) {
      /* Add the mesh contribution to the virial */
      m_add(vir_force,fr->vir_el_recip,vir_force);
    }
    if (debug)
      pr_rvecs(debug,0,"vir_force",vir_force,DIM);
  }

  /* Sum the potential energy terms from group contributions */
  sum_epot(&(inputrec->opts),enerd);

  if (fr->print_force >= 0 && bDoForces)
    print_large_forces(stderr,mdatoms,cr,step,fr->print_force,x,f);
}
Exemple #13
0
void do_shakefirst(FILE *log,bool bTYZ,real lambda,real ener[],
		   t_parm *parm,t_nsborder *nsb,t_mdatoms *md,
		   rvec x[],rvec vold[],rvec buf[],rvec f[],
		   rvec v[],t_graph *graph,t_commrec *cr,t_nrnb *nrnb,
		   t_groups *grps,t_forcerec *fr,t_topology *top,
		   t_edsamyn *edyn,t_pull *pulldata)
{
  int    i,m,start,homenr,end,step;
  tensor shake_vir;
  double mass,tmass,vcm[4];
  real   dt=parm->ir.delta_t;
  real   dt_1;

  if (count_constraints(top,cr)) {
    start  = START(nsb);
    homenr = HOMENR(nsb);
    end    = start+homenr;
    if (debug)
      fprintf(debug,"vcm: start=%d, homenr=%d, end=%d\n",start,homenr,end);
    /* Do a first SHAKE to reset particles... */
    step = -2;
    if(log)
        fprintf(log,"\nConstraining the starting coordinates (step %d)\n",step);
    clear_mat(shake_vir);
    update(nsb->natoms,start,homenr,step,lambda,&ener[F_DVDL],
	   parm,1.0,md,x,graph,
	   NULL,NULL,vold,NULL,x,top,grps,shake_vir,cr,nrnb,bTYZ,
	   FALSE,edyn,pulldata,FALSE);
    /* Compute coordinates at t=-dt, store them in buf */
    /* for(i=0; (i<nsb->natoms); i++) {*/
    for(i=start; (i<end); i++) {
      for(m=0; (m<DIM); m++) { 
	f[i][m]=x[i][m];
	buf[i][m]=x[i][m]-dt*v[i][m];
      }
    }
    
    /* Shake the positions at t=-dt with the positions at t=0
     * as reference coordinates.
     */
    step = -1;
    if(log)
        fprintf(log,"\nConstraining the coordinates at t0-dt (step %d)\n",step);
    clear_mat(shake_vir);
    update(nsb->natoms,start,homenr,
	   step,lambda,&ener[F_DVDL],parm,1.0,md,f,graph,
	   NULL,NULL,vold,NULL,buf,top,grps,shake_vir,cr,nrnb,bTYZ,FALSE,
	   edyn,pulldata,FALSE);
    
    /* Compute the velocities at t=-dt/2 using the coordinates at
     * t=-dt and t=0
     * Compute velocity of center of mass and total mass
     */
    for(m=0; (m<4); m++)
      vcm[m] = 0;
    dt_1=1.0/dt;
    for(i=start; (i<end); i++) {
      /*for(i=0; (i<nsb->natoms); i++) {*/
      mass = md->massA[i];
      for(m=0; (m<DIM); m++) {
	v[i][m]=(x[i][m]-f[i][m])*dt_1;
	vcm[m] += v[i][m]*mass;
      }
      vcm[3] += mass;
    }
    /* Compute the global sum of vcm */
    if (debug)
      fprintf(debug,"vcm: %8.3f  %8.3f  %8.3f,"
	      " total mass = %12.5e\n",vcm[XX],vcm[YY],vcm[ZZ],vcm[3]);
    if (PAR(cr))
      gmx_sumd(4,vcm,cr);
    tmass = vcm[3];
    for(m=0; (m<DIM); m++)
      vcm[m] /= tmass;
    if (debug) 
      fprintf(debug,"vcm: %8.3f  %8.3f  %8.3f,"
	      " total mass = %12.5e\n",vcm[XX],vcm[YY],vcm[ZZ],tmass);
    /* Now we have the velocity of center of mass, let's remove it */
    for(i=start; (i<end); i++) {
      for(m=0; (m<DIM); m++)
	v[i][m] -= vcm[m];
    }
  }
}
Exemple #14
0
/* calculates center of mass of selection index from all coordinates x */
void pull_calc_coms(t_commrec *cr,
                    t_pull *pull, t_mdatoms *md, t_pbc *pbc, double t,
                    rvec x[], rvec *xp)
{
    int  g;
    real twopi_box = 0;

    if (pull->rbuf == NULL)
    {
        snew(pull->rbuf, pull->ngroup);
    }
    if (pull->dbuf == NULL)
    {
        snew(pull->dbuf, 3*pull->ngroup);
    }

    if (pull->bRefAt && pull->bSetPBCatoms)
    {
        pull_set_pbcatoms(cr, pull, x, pull->rbuf);

        if (cr != NULL && DOMAINDECOMP(cr))
        {
            /* We can keep these PBC reference coordinates fixed for nstlist
             * steps, since atoms won't jump over PBC.
             * This avoids a global reduction at the next nstlist-1 steps.
             * Note that the exact values of the pbc reference coordinates
             * are irrelevant, as long all atoms in the group are within
             * half a box distance of the reference coordinate.
             */
            pull->bSetPBCatoms = FALSE;
        }
    }

    if (pull->cosdim >= 0)
    {
        int m;

        assert(pull->npbcdim <= DIM);

        for (m = pull->cosdim+1; m < pull->npbcdim; m++)
        {
            if (pbc->box[m][pull->cosdim] != 0)
            {
                gmx_fatal(FARGS, "Can not do cosine weighting for trilinic dimensions");
            }
        }
        twopi_box = 2.0*M_PI/pbc->box[pull->cosdim][pull->cosdim];
    }

    for (g = 0; g < pull->ngroup; g++)
    {
        t_pull_group *pgrp;

        pgrp = &pull->group[g];

        if (pgrp->bCalcCOM)
        {
            if (pgrp->epgrppbc != epgrppbcCOS)
            {
                dvec   com, comp;
                double wmass, wwmass;
                rvec   x_pbc = { 0, 0, 0 };
                int    i;

                clear_dvec(com);
                clear_dvec(comp);
                wmass  = 0;
                wwmass = 0;

                if (pgrp->epgrppbc == epgrppbcREFAT)
                {
                    /* Set the pbc atom */
                    copy_rvec(pull->rbuf[g], x_pbc);
                }

                for (i = 0; i < pgrp->nat_loc; i++)
                {
                    int  ii, m;
                    real mass, wm;

                    ii   = pgrp->ind_loc[i];
                    mass = md->massT[ii];
                    if (pgrp->weight_loc == NULL)
                    {
                        wm     = mass;
                        wmass += wm;
                    }
                    else
                    {
                        real w;

                        w       = pgrp->weight_loc[i];
                        wm      = w*mass;
                        wmass  += wm;
                        wwmass += wm*w;
                    }
                    if (pgrp->epgrppbc == epgrppbcNONE)
                    {
                        /* Plain COM: sum the coordinates */
                        for (m = 0; m < DIM; m++)
                        {
                            com[m]    += wm*x[ii][m];
                        }
                        if (xp)
                        {
                            for (m = 0; m < DIM; m++)
                            {
                                comp[m] += wm*xp[ii][m];
                            }
                        }
                    }
                    else
                    {
                        rvec dx;

                        /* Sum the difference with the reference atom */
                        pbc_dx(pbc, x[ii], x_pbc, dx);
                        for (m = 0; m < DIM; m++)
                        {
                            com[m]    += wm*dx[m];
                        }
                        if (xp)
                        {
                            /* For xp add the difference between xp and x to dx,
                             * such that we use the same periodic image,
                             * also when xp has a large displacement.
                             */
                            for (m = 0; m < DIM; m++)
                            {
                                comp[m] += wm*(dx[m] + xp[ii][m] - x[ii][m]);
                            }
                        }
                    }
                }

                /* We do this check after the loop above to avoid more nesting.
                 * If we have a single-atom group the mass is irrelevant, so
                 * we can remove the mass factor to avoid division by zero.
                 * Note that with constraint pulling the mass does matter, but
                 * in that case a check group mass != 0 has been done before.
                 */
                if (pgrp->nat == 1 && pgrp->nat_loc == 1 && wmass == 0)
                {
                    int m;

                    /* Copy the single atom coordinate */
                    for (m = 0; m < DIM; m++)
                    {
                        com[m] = x[pgrp->ind_loc[0]][m];
                    }
                    /* Set all mass factors to 1 to get the correct COM */
                    wmass  = 1;
                    wwmass = 1;
                }

                if (pgrp->weight_loc == NULL)
                {
                    wwmass = wmass;
                }

                /* Copy local sums to a buffer for global summing */
                copy_dvec(com, pull->dbuf[g*3]);
                copy_dvec(comp, pull->dbuf[g*3+1]);
                pull->dbuf[g*3+2][0] = wmass;
                pull->dbuf[g*3+2][1] = wwmass;
                pull->dbuf[g*3+2][2] = 0;
            }
            else
            {
                /* Cosine weighting geometry */
                double cm, sm, cmp, smp, ccm, csm, ssm, csw, snw;
                int    i;

                cm  = 0;
                sm  = 0;
                cmp = 0;
                smp = 0;
                ccm = 0;
                csm = 0;
                ssm = 0;

                for (i = 0; i < pgrp->nat_loc; i++)
                {
                    int  ii;
                    real mass;

                    ii   = pgrp->ind_loc[i];
                    mass = md->massT[ii];
                    /* Determine cos and sin sums */
                    csw  = cos(x[ii][pull->cosdim]*twopi_box);
                    snw  = sin(x[ii][pull->cosdim]*twopi_box);
                    cm  += csw*mass;
                    sm  += snw*mass;
                    ccm += csw*csw*mass;
                    csm += csw*snw*mass;
                    ssm += snw*snw*mass;

                    if (xp)
                    {
                        csw  = cos(xp[ii][pull->cosdim]*twopi_box);
                        snw  = sin(xp[ii][pull->cosdim]*twopi_box);
                        cmp += csw*mass;
                        smp += snw*mass;
                    }
                }

                /* Copy local sums to a buffer for global summing */
                pull->dbuf[g*3  ][0] = cm;
                pull->dbuf[g*3  ][1] = sm;
                pull->dbuf[g*3  ][2] = 0;
                pull->dbuf[g*3+1][0] = ccm;
                pull->dbuf[g*3+1][1] = csm;
                pull->dbuf[g*3+1][2] = ssm;
                pull->dbuf[g*3+2][0] = cmp;
                pull->dbuf[g*3+2][1] = smp;
                pull->dbuf[g*3+2][2] = 0;
            }
        }
    }

    if (cr && PAR(cr))
    {
        /* Sum the contributions over the nodes */
        gmx_sumd(pull->ngroup*3*DIM, pull->dbuf[0], cr);
    }

    for (g = 0; g < pull->ngroup; g++)
    {
        t_pull_group *pgrp;

        pgrp = &pull->group[g];
        if (pgrp->nat > 0 && pgrp->bCalcCOM)
        {
            if (pgrp->epgrppbc != epgrppbcCOS)
            {
                double wmass, wwmass;
                int    m;

                /* Determine the inverse mass */
                wmass             = pull->dbuf[g*3+2][0];
                wwmass            = pull->dbuf[g*3+2][1];
                pgrp->mwscale     = 1.0/wmass;
                /* invtm==0 signals a frozen group, so then we should keep it zero */
                if (pgrp->invtm != 0)
                {
                    pgrp->wscale  = wmass/wwmass;
                    pgrp->invtm   = wwmass/(wmass*wmass);
                }
                /* Divide by the total mass */
                for (m = 0; m < DIM; m++)
                {
                    pgrp->x[m]    = pull->dbuf[g*3  ][m]*pgrp->mwscale;
                    if (xp)
                    {
                        pgrp->xp[m] = pull->dbuf[g*3+1][m]*pgrp->mwscale;
                    }
                    if (pgrp->epgrppbc == epgrppbcREFAT)
                    {
                        pgrp->x[m]    += pull->rbuf[g][m];
                        if (xp)
                        {
                            pgrp->xp[m] += pull->rbuf[g][m];
                        }
                    }
                }
            }
            else
            {
                /* Cosine weighting geometry */
                double csw, snw, wmass, wwmass;
                int    i, ii;

                /* Determine the optimal location of the cosine weight */
                csw                   = pull->dbuf[g*3][0];
                snw                   = pull->dbuf[g*3][1];
                pgrp->x[pull->cosdim] = atan2_0_2pi(snw, csw)/twopi_box;
                /* Set the weights for the local atoms */
                wmass  = sqrt(csw*csw + snw*snw);
                wwmass = (pull->dbuf[g*3+1][0]*csw*csw +
                          pull->dbuf[g*3+1][1]*csw*snw +
                          pull->dbuf[g*3+1][2]*snw*snw)/(wmass*wmass);

                pgrp->mwscale = 1.0/wmass;
                pgrp->wscale  = wmass/wwmass;
                pgrp->invtm   = wwmass/(wmass*wmass);
                /* Set the weights for the local atoms */
                csw *= pgrp->invtm;
                snw *= pgrp->invtm;
                for (i = 0; i < pgrp->nat_loc; i++)
                {
                    ii                  = pgrp->ind_loc[i];
                    pgrp->weight_loc[i] = csw*cos(twopi_box*x[ii][pull->cosdim]) +
                        snw*sin(twopi_box*x[ii][pull->cosdim]);
                }
                if (xp)
                {
                    csw                    = pull->dbuf[g*3+2][0];
                    snw                    = pull->dbuf[g*3+2][1];
                    pgrp->xp[pull->cosdim] = atan2_0_2pi(snw, csw)/twopi_box;
                }
            }
            if (debug)
            {
                fprintf(debug, "Pull group %d wmass %f invtm %f\n",
                        g, 1.0/pgrp->mwscale, pgrp->invtm);
            }
        }
    }

    if (pull->bCylinder)
    {
        /* Calculate the COMs for the cyclinder reference groups */
        make_cyl_refgrps(cr, pull, md, pbc, t, x);
    }
}
Exemple #15
0
static void make_cyl_refgrps(t_commrec *cr, t_pull *pull, t_mdatoms *md,
                             t_pbc *pbc, double t, rvec *x)
{
    /* The size and stride per coord for the reduction buffer */
    const int     stride = 9;
    int           c, i, ii, m, start, end;
    rvec          g_x, dx, dir;
    double        inv_cyl_r2;
    t_pull_coord *pcrd;
    t_pull_group *pref, *pgrp, *pdyna;
    gmx_ga2la_t   ga2la = NULL;

    if (pull->dbuf_cyl == NULL)
    {
        snew(pull->dbuf_cyl, pull->ncoord*stride);
    }

    if (cr && DOMAINDECOMP(cr))
    {
        ga2la = cr->dd->ga2la;
    }

    start = 0;
    end   = md->homenr;

    inv_cyl_r2 = 1/dsqr(pull->cylinder_r);

    /* loop over all groups to make a reference group for each*/
    for (c = 0; c < pull->ncoord; c++)
    {
        double sum_a, wmass, wwmass;
        dvec   radf_fac0, radf_fac1;

        pcrd   = &pull->coord[c];

        sum_a  = 0;
        wmass  = 0;
        wwmass = 0;
        clear_dvec(radf_fac0);
        clear_dvec(radf_fac1);

        if (pcrd->eGeom == epullgCYL)
        {
            /* pref will be the same group for all pull coordinates */
            pref  = &pull->group[pcrd->group[0]];
            pgrp  = &pull->group[pcrd->group[1]];
            pdyna = &pull->dyna[c];
            copy_rvec(pcrd->vec, dir);
            pdyna->nat_loc = 0;

            /* We calculate distances with respect to the reference location
             * of this cylinder group (g_x), which we already have now since
             * we reduced the other group COM over the ranks. This resolves
             * any PBC issues and we don't need to use a PBC-atom here.
             */
            for (m = 0; m < DIM; m++)
            {
                g_x[m] = pgrp->x[m] - pcrd->vec[m]*(pcrd->init + pcrd->rate*t);
            }

            /* loop over all atoms in the main ref group */
            for (i = 0; i < pref->nat; i++)
            {
                ii = pref->ind[i];
                if (ga2la)
                {
                    if (!ga2la_get_home(ga2la, pref->ind[i], &ii))
                    {
                        ii = -1;
                    }
                }
                if (ii >= start && ii < end)
                {
                    double dr2, dr2_rel, inp;
                    dvec   dr;

                    pbc_dx_aiuc(pbc, x[ii], g_x, dx);
                    inp = iprod(dir, dx);
                    dr2 = 0;
                    for (m = 0; m < DIM; m++)
                    {
                        /* Determine the radial components */
                        dr[m] = dx[m] - inp*dir[m];
                        dr2  += dr[m]*dr[m];
                    }
                    dr2_rel = dr2*inv_cyl_r2;

                    if (dr2_rel < 1)
                    {
                        double mass, weight, dweight_r;
                        dvec   mdw;

                        /* add to index, to sum of COM, to weight array */
                        if (pdyna->nat_loc >= pdyna->nalloc_loc)
                        {
                            pdyna->nalloc_loc = over_alloc_large(pdyna->nat_loc+1);
                            srenew(pdyna->ind_loc,    pdyna->nalloc_loc);
                            srenew(pdyna->weight_loc, pdyna->nalloc_loc);
                            srenew(pdyna->mdw,        pdyna->nalloc_loc);
                            srenew(pdyna->dv,         pdyna->nalloc_loc);
                        }
                        pdyna->ind_loc[pdyna->nat_loc] = ii;

                        mass      = md->massT[ii];
                        /* The radial weight function is 1-2x^2+x^4,
                         * where x=r/cylinder_r. Since this function depends
                         * on the radial component, we also get radial forces
                         * on both groups.
                         */
                        weight    = 1 + (-2 + dr2_rel)*dr2_rel;
                        dweight_r = (-4 + 4*dr2_rel)*inv_cyl_r2;
                        pdyna->weight_loc[pdyna->nat_loc] = weight;
                        sum_a    += mass*weight*inp;
                        wmass    += mass*weight;
                        wwmass   += mass*weight*weight;
                        dsvmul(mass*dweight_r, dr, mdw);
                        copy_dvec(mdw, pdyna->mdw[pdyna->nat_loc]);
                        /* Currently we only have the axial component of the
                         * distance (inp) up to an unkown offset. We add this
                         * offset after the reduction needs to determine the
                         * COM of the cylinder group.
                         */
                        pdyna->dv[pdyna->nat_loc] = inp;
                        for (m = 0; m < DIM; m++)
                        {
                            radf_fac0[m] += mdw[m];
                            radf_fac1[m] += mdw[m]*inp;
                        }
                        pdyna->nat_loc++;
                    }
                }
            }
        }
        pull->dbuf_cyl[c*stride+0] = wmass;
        pull->dbuf_cyl[c*stride+1] = wwmass;
        pull->dbuf_cyl[c*stride+2] = sum_a;
        pull->dbuf_cyl[c*stride+3] = radf_fac0[XX];
        pull->dbuf_cyl[c*stride+4] = radf_fac0[YY];
        pull->dbuf_cyl[c*stride+5] = radf_fac0[ZZ];
        pull->dbuf_cyl[c*stride+6] = radf_fac1[XX];
        pull->dbuf_cyl[c*stride+7] = radf_fac1[YY];
        pull->dbuf_cyl[c*stride+8] = radf_fac1[ZZ];
    }

    if (cr != NULL && PAR(cr))
    {
        /* Sum the contributions over the ranks */
        gmx_sumd(pull->ncoord*stride, pull->dbuf_cyl, cr);
    }

    for (c = 0; c < pull->ncoord; c++)
    {
        pcrd  = &pull->coord[c];

        if (pcrd->eGeom == epullgCYL)
        {
            double wmass, wwmass, inp, dist;

            pdyna = &pull->dyna[c];
            pgrp  = &pull->group[pcrd->group[1]];

            wmass          = pull->dbuf_cyl[c*stride+0];
            wwmass         = pull->dbuf_cyl[c*stride+1];
            pdyna->mwscale = 1.0/wmass;
            /* Cylinder pulling can't be used with constraints, but we set
             * wscale and invtm anyhow, in case someone would like to use them.
             */
            pdyna->wscale  = wmass/wwmass;
            pdyna->invtm   = wwmass/(wmass*wmass);

            /* We store the deviation of the COM from the reference location
             * used above, since we need it when we apply the radial forces
             * to the atoms in the cylinder group.
             */
            pcrd->cyl_dev  = 0;
            for (m = 0; m < DIM; m++)
            {
                g_x[m]         = pgrp->x[m] - pcrd->vec[m]*(pcrd->init + pcrd->rate*t);
                dist           = -pcrd->vec[m]*pull->dbuf_cyl[c*stride+2]*pdyna->mwscale;
                pdyna->x[m]    = g_x[m] - dist;
                pcrd->cyl_dev += dist;
            }
            /* Now we know the exact COM of the cylinder reference group,
             * we can determine the radial force factor (ffrad) that when
             * multiplied with the axial pull force will give the radial
             * force on the pulled (non-cylinder) group.
             */
            for (m = 0; m < DIM; m++)
            {
                pcrd->ffrad[m] = (pull->dbuf_cyl[c*stride+6+m] +
                                  pull->dbuf_cyl[c*stride+3+m]*pcrd->cyl_dev)/wmass;
            }

            if (debug)
            {
                fprintf(debug, "Pull cylinder group %d:%8.3f%8.3f%8.3f m:%8.3f\n",
                        c, pdyna->x[0], pdyna->x[1],
                        pdyna->x[2], 1.0/pdyna->invtm);
                fprintf(debug, "ffrad %8.3f %8.3f %8.3f\n",
                        pcrd->ffrad[XX], pcrd->ffrad[YY], pcrd->ffrad[ZZ]);
            }
        }
    }
}