Пример #1
0
static void pull_set_pbcatom(t_commrec *cr, t_pull_group *pgrp,
                             t_mdatoms *md, rvec *x,
                             rvec x_pbc)
{
    int a, m;

    if (cr && PAR(cr))
    {
        if (DOMAINDECOMP(cr))
        {
            if (!ga2la_get_home(cr->dd->ga2la, pgrp->pbcatom, &a))
            {
                a = -1;
            }
        }
        else
        {
            a = pgrp->pbcatom;
        }

        if (a >= 0 && a < md->homenr)
        {
            copy_rvec(x[a], x_pbc);
        }
        else
        {
            clear_rvec(x_pbc);
        }
    }
    else
    {
        copy_rvec(x[pgrp->pbcatom], x_pbc);
    }
}
Пример #2
0
static void write_constr_pdb(const char *fn, const char *title,
                             gmx_mtop_t *mtop,
                             int start, int homenr, t_commrec *cr,
                             rvec x[], matrix box)
{
    char          fname[STRLEN], format[STRLEN];
    FILE         *out;
    int           dd_ac0 = 0, dd_ac1 = 0, i, ii, resnr;
    gmx_domdec_t *dd;
    char         *anm, *resnm;

    dd = NULL;
    if (DOMAINDECOMP(cr))
    {
        dd = cr->dd;
        dd_get_constraint_range(dd, &dd_ac0, &dd_ac1);
        start  = 0;
        homenr = dd_ac1;
    }

    if (PAR(cr))
    {
        sprintf(fname, "%s_n%d.pdb", fn, cr->sim_nodeid);
    }
    else
    {
        sprintf(fname, "%s.pdb", fn);
    }
    sprintf(format, "%s\n", get_pdbformat());

    out = gmx_fio_fopen(fname, "w");

    fprintf(out, "TITLE     %s\n", title);
    gmx_write_pdb_box(out, -1, box);
    for (i = start; i < start+homenr; i++)
    {
        if (dd != NULL)
        {
            if (i >= dd->nat_home && i < dd_ac0)
            {
                continue;
            }
            ii = dd->gatindex[i];
        }
        else
        {
            ii = i;
        }
        gmx_mtop_atominfo_global(mtop, ii, &anm, &resnr, &resnm);
        fprintf(out, format, "ATOM", (ii+1)%100000,
                anm, resnm, ' ', resnr%10000, ' ',
                10*x[i][XX], 10*x[i][YY], 10*x[i][ZZ]);
    }
    fprintf(out, "TER\n");

    gmx_fio_fclose(out);
}
Пример #3
0
void make_local_shells(t_commrec *cr,t_mdatoms *md,
		       struct gmx_shellfc *shfc)
{
  t_shell *shell;
  int a0,a1,*ind,nshell,i;
  gmx_domdec_t *dd=NULL;

  if (PAR(cr)) {
    if (DOMAINDECOMP(cr)) {
      dd = cr->dd;
      a0 = 0;
      a1 = dd->nat_home;
    } else {
      pd_at_range(cr,&a0,&a1);
    }
  } else {
    /* Single node: we need all shells, just copy the pointer */
    shfc->nshell = shfc->nshell_gl;
    shfc->shell  = shfc->shell_gl;
    
    return;
  }

  ind = shfc->shell_index_gl;

  nshell = 0;
  shell  = shfc->shell; 
  for(i=a0; i<a1; i++) {
    if (md->ptype[i] == eptShell) {
      if (nshell+1 > shfc->shell_nalloc) {
	shfc->shell_nalloc = over_alloc_dd(nshell+1);
	srenew(shell,shfc->shell_nalloc);
      }
      if (dd) {
	shell[nshell] = shfc->shell_gl[ind[dd->gatindex[i]]];
      } else {
	shell[nshell] = shfc->shell_gl[ind[i]];
      }
      /* With inter-cg shells we can no do shell prediction,
       * so we do not need the nuclei numbers.
       */
      if (!shfc->bInterCG) {
	shell[nshell].nucl1   = i + shell[nshell].nucl1 - shell[nshell].shell;
	if (shell[nshell].nnucl > 1)
	  shell[nshell].nucl2 = i + shell[nshell].nucl2 - shell[nshell].shell;
	if (shell[nshell].nnucl > 2)
	  shell[nshell].nucl3 = i + shell[nshell].nucl3 - shell[nshell].shell;
      }
      shell[nshell].shell = i;
      nshell++;
    }
  }

  shfc->nshell = nshell;
  shfc->shell  = shell;
}
Пример #4
0
void
do_redist_pos_coeffs(struct gmx_pme_t *pme, t_commrec *cr, int start, int homenr,
                     gmx_bool bFirst, rvec x[], real *data)
{
    int             d;
    pme_atomcomm_t *atc;
    atc = &pme->atc[0];

    for (d = pme->ndecompdim - 1; d >= 0; d--)
    {
        int             n_d;
        rvec           *x_d;
        real           *param_d;

        if (d == pme->ndecompdim - 1)
        {
            n_d     = homenr;
            x_d     = x + start;
            param_d = data;
        }
        else
        {
            n_d     = pme->atc[d + 1].n;
            x_d     = atc->x;
            param_d = atc->coefficient;
        }
        atc      = &pme->atc[d];
        atc->npd = n_d;
        if (atc->npd > atc->pd_nalloc)
        {
            atc->pd_nalloc = over_alloc_dd(atc->npd);
            srenew(atc->pd, atc->pd_nalloc);
        }
        pme_calc_pidx_wrapper(n_d, pme->recipbox, x_d, atc);
        where();
        /* Redistribute x (only once) and qA/c6A or qB/c6B */
        if (DOMAINDECOMP(cr))
        {
            dd_pmeredist_pos_coeffs(pme, n_d, bFirst, x_d, param_d, atc);
        }
    }
}
Пример #5
0
real pull_potential(int ePull,t_pull *pull, t_mdatoms *md, t_pbc *pbc,
		    t_commrec *cr, double t, real lambda,
		    rvec *x, rvec *f, tensor vir, real *dvdlambda)
{
  real V,dVdl;

  pull_calc_coms(cr,pull,md,pbc,t,x,NULL);

  do_pull_pot(ePull,pull,pbc,t,lambda,
	      &V,pull->bVirial && MASTER(cr) ? vir : NULL,&dVdl);

  /* Distribute forces over pulled groups */
  apply_forces(pull, md, DOMAINDECOMP(cr) ? cr->dd->ga2la : NULL, f);

  if (MASTER(cr)) {
    *dvdlambda += dVdl;
  }

  return (MASTER(cr) ? V : 0.0);
}
Пример #6
0
static void pull_set_pbcatom(t_commrec *cr, pull_group_work_t *pgrp,
                             rvec *x,
                             rvec x_pbc)
{
    int a;

    if (cr != NULL && DOMAINDECOMP(cr))
    {
        if (ga2la_get_home(cr->dd->ga2la, pgrp->params.pbcatom, &a))
        {
            copy_rvec(x[a], x_pbc);
        }
        else
        {
            clear_rvec(x_pbc);
        }
    }
    else
    {
        copy_rvec(x[pgrp->params.pbcatom], x_pbc);
    }
}
Пример #7
0
void do_force_lowlevel(FILE       *fplog,   gmx_large_int_t step,
                       t_forcerec *fr,      t_inputrec *ir,
                       t_idef     *idef,    t_commrec  *cr,
                       t_nrnb     *nrnb,    gmx_wallcycle_t wcycle,
                       t_mdatoms  *md,
                       t_grpopts  *opts,
                       rvec       x[],      history_t  *hist,
                       rvec       f[],
                       rvec       f_longrange[],
                       gmx_enerdata_t *enerd,
                       t_fcdata   *fcd,
                       gmx_mtop_t     *mtop,
                       gmx_localtop_t *top,
                       gmx_genborn_t *born,
                       t_atomtypes *atype,
                       gmx_bool       bBornRadii,
                       matrix     box,
                       t_lambda   *fepvals,
                       real       *lambda,
                       t_graph    *graph,
                       t_blocka   *excl,
                       rvec       mu_tot[],
                       int        flags,
                       float      *cycles_pme)
{
    int         i, j, status;
    int         donb_flags;
    gmx_bool    bDoEpot, bSepDVDL, bSB;
    int         pme_flags;
    matrix      boxs;
    rvec        box_size;
    real        Vsr, Vlr, Vcorr = 0;
    t_pbc       pbc;
    real        dvdgb;
    char        buf[22];
    double      clam_i, vlam_i;
    real        dvdl_dum[efptNR], dvdl, dvdl_nb[efptNR], lam_i[efptNR];
    real        dvdlsum;

#ifdef GMX_MPI
    double  t0 = 0.0, t1, t2, t3; /* time measurement for coarse load balancing */
#endif

#define PRINT_SEPDVDL(s, v, dvdlambda) if (bSepDVDL) {fprintf(fplog, sepdvdlformat, s, v, dvdlambda); }

    GMX_MPE_LOG(ev_force_start);
    set_pbc(&pbc, fr->ePBC, box);

    /* reset free energy components */
    for (i = 0; i < efptNR; i++)
    {
        dvdl_nb[i]  = 0;
        dvdl_dum[i] = 0;
    }

    /* Reset box */
    for (i = 0; (i < DIM); i++)
    {
        box_size[i] = box[i][i];
    }

    bSepDVDL = (fr->bSepDVDL && do_per_step(step, ir->nstlog));
    debug_gmx();

    /* do QMMM first if requested */
    if (fr->bQMMM)
    {
        enerd->term[F_EQM] = calculate_QMMM(cr, x, f, fr, md);
    }

    if (bSepDVDL)
    {
        fprintf(fplog, "Step %s: non-bonded V and dVdl for node %d:\n",
                gmx_step_str(step, buf), cr->nodeid);
    }

    /* Call the short range functions all in one go. */
    GMX_MPE_LOG(ev_do_fnbf_start);

#ifdef GMX_MPI
    /*#define TAKETIME ((cr->npmenodes) && (fr->timesteps < 12))*/
#define TAKETIME FALSE
    if (TAKETIME)
    {
        MPI_Barrier(cr->mpi_comm_mygroup);
        t0 = MPI_Wtime();
    }
#endif

    if (ir->nwall)
    {
        /* foreign lambda component for walls */
        dvdl = do_walls(ir, fr, box, md, x, f, lambda[efptVDW],
                        enerd->grpp.ener[egLJSR], nrnb);
        PRINT_SEPDVDL("Walls", 0.0, dvdl);
        enerd->dvdl_lin[efptVDW] += dvdl;
    }

    /* If doing GB, reset dvda and calculate the Born radii */
    if (ir->implicit_solvent)
    {
        wallcycle_sub_start(wcycle, ewcsNONBONDED);

        for (i = 0; i < born->nr; i++)
        {
            fr->dvda[i] = 0;
        }

        if (bBornRadii)
        {
            calc_gb_rad(cr, fr, ir, top, atype, x, &(fr->gblist), born, md, nrnb);
        }

        wallcycle_sub_stop(wcycle, ewcsNONBONDED);
    }

    where();
    /* We only do non-bonded calculation with group scheme here, the verlet
     * calls are done from do_force_cutsVERLET(). */
    if (fr->cutoff_scheme == ecutsGROUP && (flags & GMX_FORCE_NONBONDED))
    {
        donb_flags = 0;
        /* Add short-range interactions */
        donb_flags |= GMX_NONBONDED_DO_SR;

        if (flags & GMX_FORCE_FORCES)
        {
            donb_flags |= GMX_NONBONDED_DO_FORCE;
        }
        if (flags & GMX_FORCE_ENERGY)
        {
            donb_flags |= GMX_NONBONDED_DO_POTENTIAL;
        }
        if (flags & GMX_FORCE_DO_LR)
        {
            donb_flags |= GMX_NONBONDED_DO_LR;
        }

        wallcycle_sub_start(wcycle, ewcsNONBONDED);
        do_nonbonded(cr, fr, x, f, f_longrange, md, excl,
                     &enerd->grpp, box_size, nrnb,
                     lambda, dvdl_nb, -1, -1, donb_flags);

        /* If we do foreign lambda and we have soft-core interactions
         * we have to recalculate the (non-linear) energies contributions.
         */
        if (fepvals->n_lambda > 0 && (flags & GMX_FORCE_DHDL) && fepvals->sc_alpha != 0)
        {
            for (i = 0; i < enerd->n_lambda; i++)
            {
                for (j = 0; j < efptNR; j++)
                {
                    lam_i[j] = (i == 0 ? lambda[j] : fepvals->all_lambda[j][i-1]);
                }
                reset_foreign_enerdata(enerd);
                do_nonbonded(cr, fr, x, f, f_longrange, md, excl,
                             &(enerd->foreign_grpp), box_size, nrnb,
                             lam_i, dvdl_dum, -1, -1,
                             (donb_flags & ~GMX_NONBONDED_DO_FORCE) | GMX_NONBONDED_DO_FOREIGNLAMBDA);
                sum_epot(&ir->opts, &(enerd->foreign_grpp), enerd->foreign_term);
                enerd->enerpart_lambda[i] += enerd->foreign_term[F_EPOT];
            }
        }
        wallcycle_sub_stop(wcycle, ewcsNONBONDED);
        where();
    }

    /* If we are doing GB, calculate bonded forces and apply corrections
     * to the solvation forces */
    /* MRS: Eventually, many need to include free energy contribution here! */
    if (ir->implicit_solvent)
    {
        wallcycle_sub_start(wcycle, ewcsBONDED);
        calc_gb_forces(cr, md, born, top, atype, x, f, fr, idef,
                       ir->gb_algorithm, ir->sa_algorithm, nrnb, bBornRadii, &pbc, graph, enerd);
        wallcycle_sub_stop(wcycle, ewcsBONDED);
    }

#ifdef GMX_MPI
    if (TAKETIME)
    {
        t1          = MPI_Wtime();
        fr->t_fnbf += t1-t0;
    }
#endif

    if (fepvals->sc_alpha != 0)
    {
        enerd->dvdl_nonlin[efptVDW] += dvdl_nb[efptVDW];
    }
    else
    {
        enerd->dvdl_lin[efptVDW] += dvdl_nb[efptVDW];
    }

    if (fepvals->sc_alpha != 0)

    /* even though coulomb part is linear, we already added it, beacuse we
       need to go through the vdw calculation anyway */
    {
        enerd->dvdl_nonlin[efptCOUL] += dvdl_nb[efptCOUL];
    }
    else
    {
        enerd->dvdl_lin[efptCOUL] += dvdl_nb[efptCOUL];
    }

    Vsr = 0;
    if (bSepDVDL)
    {
        for (i = 0; i < enerd->grpp.nener; i++)
        {
            Vsr +=
                (fr->bBHAM ?
                 enerd->grpp.ener[egBHAMSR][i] :
                 enerd->grpp.ener[egLJSR][i])
                + enerd->grpp.ener[egCOULSR][i] + enerd->grpp.ener[egGB][i];
        }
        dvdlsum = dvdl_nb[efptVDW] + dvdl_nb[efptCOUL];
        PRINT_SEPDVDL("VdW and Coulomb SR particle-p.", Vsr, dvdlsum);
    }
    debug_gmx();

    GMX_MPE_LOG(ev_do_fnbf_finish);

    if (debug)
    {
        pr_rvecs(debug, 0, "fshift after SR", fr->fshift, SHIFTS);
    }

    /* Shift the coordinates. Must be done before bonded forces and PPPM,
     * but is also necessary for SHAKE and update, therefore it can NOT
     * go when no bonded forces have to be evaluated.
     */

    /* Here sometimes we would not need to shift with NBFonly,
     * but we do so anyhow for consistency of the returned coordinates.
     */
    if (graph)
    {
        shift_self(graph, box, x);
        if (TRICLINIC(box))
        {
            inc_nrnb(nrnb, eNR_SHIFTX, 2*graph->nnodes);
        }
        else
        {
            inc_nrnb(nrnb, eNR_SHIFTX, graph->nnodes);
        }
    }
    /* Check whether we need to do bondeds or correct for exclusions */
    if (fr->bMolPBC &&
        ((flags & GMX_FORCE_BONDED)
         || EEL_RF(fr->eeltype) || EEL_FULL(fr->eeltype)))
    {
        /* Since all atoms are in the rectangular or triclinic unit-cell,
         * only single box vector shifts (2 in x) are required.
         */
        set_pbc_dd(&pbc, fr->ePBC, cr->dd, TRUE, box);
    }
    debug_gmx();

    if (flags & GMX_FORCE_BONDED)
    {
        GMX_MPE_LOG(ev_calc_bonds_start);

        wallcycle_sub_start(wcycle, ewcsBONDED);
        calc_bonds(fplog, cr->ms,
                   idef, x, hist, f, fr, &pbc, graph, enerd, nrnb, lambda, md, fcd,
                   DOMAINDECOMP(cr) ? cr->dd->gatindex : NULL, atype, born,
                   flags,
                   fr->bSepDVDL && do_per_step(step, ir->nstlog), step);

        /* Check if we have to determine energy differences
         * at foreign lambda's.
         */
        if (fepvals->n_lambda > 0 && (flags & GMX_FORCE_DHDL) &&
            idef->ilsort != ilsortNO_FE)
        {
            if (idef->ilsort != ilsortFE_SORTED)
            {
                gmx_incons("The bonded interactions are not sorted for free energy");
            }
            for (i = 0; i < enerd->n_lambda; i++)
            {
                reset_foreign_enerdata(enerd);
                for (j = 0; j < efptNR; j++)
                {
                    lam_i[j] = (i == 0 ? lambda[j] : fepvals->all_lambda[j][i-1]);
                }
                calc_bonds_lambda(fplog, idef, x, fr, &pbc, graph, &(enerd->foreign_grpp), enerd->foreign_term, nrnb, lam_i, md,
                                  fcd, DOMAINDECOMP(cr) ? cr->dd->gatindex : NULL);
                sum_epot(&ir->opts, &(enerd->foreign_grpp), enerd->foreign_term);
                enerd->enerpart_lambda[i] += enerd->foreign_term[F_EPOT];
            }
        }
        debug_gmx();
        GMX_MPE_LOG(ev_calc_bonds_finish);
        wallcycle_sub_stop(wcycle, ewcsBONDED);
    }

    where();

    *cycles_pme = 0;
    if (EEL_FULL(fr->eeltype))
    {
        bSB = (ir->nwall == 2);
        if (bSB)
        {
            copy_mat(box, boxs);
            svmul(ir->wall_ewald_zfac, boxs[ZZ], boxs[ZZ]);
            box_size[ZZ] *= ir->wall_ewald_zfac;
        }

        clear_mat(fr->vir_el_recip);

        if (fr->bEwald)
        {
            Vcorr = 0;
            dvdl  = 0;

            /* With the Verlet scheme exclusion forces are calculated
             * in the non-bonded kernel.
             */
            /* The TPI molecule does not have exclusions with the rest
             * of the system and no intra-molecular PME grid contributions
             * will be calculated in gmx_pme_calc_energy.
             */
            if ((ir->cutoff_scheme == ecutsGROUP && fr->n_tpi == 0) ||
                ir->ewald_geometry != eewg3D ||
                ir->epsilon_surface != 0)
            {
                int nthreads, t;

                wallcycle_sub_start(wcycle, ewcsEWALD_CORRECTION);

                if (fr->n_tpi > 0)
                {
                    gmx_fatal(FARGS, "TPI with PME currently only works in a 3D geometry with tin-foil boundary conditions");
                }

                nthreads = gmx_omp_nthreads_get(emntBonded);
#pragma omp parallel for num_threads(nthreads) schedule(static)
                for (t = 0; t < nthreads; t++)
                {
                    int     s, e, i;
                    rvec   *fnv;
                    tensor *vir;
                    real   *Vcorrt, *dvdlt;
                    if (t == 0)
                    {
                        fnv    = fr->f_novirsum;
                        vir    = &fr->vir_el_recip;
                        Vcorrt = &Vcorr;
                        dvdlt  = &dvdl;
                    }
                    else
                    {
                        fnv    = fr->f_t[t].f;
                        vir    = &fr->f_t[t].vir;
                        Vcorrt = &fr->f_t[t].Vcorr;
                        dvdlt  = &fr->f_t[t].dvdl[efptCOUL];
                        for (i = 0; i < fr->natoms_force; i++)
                        {
                            clear_rvec(fnv[i]);
                        }
                        clear_mat(*vir);
                    }
                    *dvdlt  = 0;
                    *Vcorrt =
                        ewald_LRcorrection(fplog,
                                           fr->excl_load[t], fr->excl_load[t+1],
                                           cr, t, fr,
                                           md->chargeA,
                                           md->nChargePerturbed ? md->chargeB : NULL,
                                           ir->cutoff_scheme != ecutsVERLET,
                                           excl, x, bSB ? boxs : box, mu_tot,
                                           ir->ewald_geometry,
                                           ir->epsilon_surface,
                                           fnv, *vir,
                                           lambda[efptCOUL], dvdlt);
                }
                if (nthreads > 1)
                {
                    reduce_thread_forces(fr->natoms_force, fr->f_novirsum,
                                         fr->vir_el_recip,
                                         &Vcorr, efptCOUL, &dvdl,
                                         nthreads, fr->f_t);
                }

                wallcycle_sub_stop(wcycle, ewcsEWALD_CORRECTION);
            }

            if (fr->n_tpi == 0)
            {
                Vcorr += ewald_charge_correction(cr, fr, lambda[efptCOUL], box,
                                                 &dvdl, fr->vir_el_recip);
            }

            PRINT_SEPDVDL("Ewald excl./charge/dip. corr.", Vcorr, dvdl);
            enerd->dvdl_lin[efptCOUL] += dvdl;
        }

        status = 0;
        Vlr    = 0;
        dvdl   = 0;
        switch (fr->eeltype)
        {
            case eelPME:
            case eelPMESWITCH:
            case eelPMEUSER:
            case eelPMEUSERSWITCH:
            case eelP3M_AD:
                if (cr->duty & DUTY_PME)
                {
                    assert(fr->n_tpi >= 0);
                    if (fr->n_tpi == 0 || (flags & GMX_FORCE_STATECHANGED))
                    {
                        pme_flags = GMX_PME_SPREAD_Q | GMX_PME_SOLVE;
                        if (flags & GMX_FORCE_FORCES)
                        {
                            pme_flags |= GMX_PME_CALC_F;
                        }
                        if (flags & (GMX_FORCE_VIRIAL | GMX_FORCE_ENERGY))
                        {
                            pme_flags |= GMX_PME_CALC_ENER_VIR;
                        }
                        if (fr->n_tpi > 0)
                        {
                            /* We don't calculate f, but we do want the potential */
                            pme_flags |= GMX_PME_CALC_POT;
                        }
                        wallcycle_start(wcycle, ewcPMEMESH);
                        status = gmx_pme_do(fr->pmedata,
                                            md->start, md->homenr - fr->n_tpi,
                                            x, fr->f_novirsum,
                                            md->chargeA, md->chargeB,
                                            bSB ? boxs : box, cr,
                                            DOMAINDECOMP(cr) ? dd_pme_maxshift_x(cr->dd) : 0,
                                            DOMAINDECOMP(cr) ? dd_pme_maxshift_y(cr->dd) : 0,
                                            nrnb, wcycle,
                                            fr->vir_el_recip, fr->ewaldcoeff,
                                            &Vlr, lambda[efptCOUL], &dvdl,
                                            pme_flags);
                        *cycles_pme = wallcycle_stop(wcycle, ewcPMEMESH);

                        /* We should try to do as little computation after
                         * this as possible, because parallel PME synchronizes
                         * the nodes, so we want all load imbalance of the rest
                         * of the force calculation to be before the PME call.
                         * DD load balancing is done on the whole time of
                         * the force call (without PME).
                         */
                    }
                    if (fr->n_tpi > 0)
                    {
                        /* Determine the PME grid energy of the test molecule
                         * with the PME grid potential of the other charges.
                         */
                        gmx_pme_calc_energy(fr->pmedata, fr->n_tpi,
                                            x + md->homenr - fr->n_tpi,
                                            md->chargeA + md->homenr - fr->n_tpi,
                                            &Vlr);
                    }
                    PRINT_SEPDVDL("PME mesh", Vlr, dvdl);
                }
                break;
            case eelEWALD:
                Vlr = do_ewald(fplog, FALSE, ir, x, fr->f_novirsum,
                               md->chargeA, md->chargeB,
                               box_size, cr, md->homenr,
                               fr->vir_el_recip, fr->ewaldcoeff,
                               lambda[efptCOUL], &dvdl, fr->ewald_table);
                PRINT_SEPDVDL("Ewald long-range", Vlr, dvdl);
                break;
            default:
                gmx_fatal(FARGS, "No such electrostatics method implemented %s",
                          eel_names[fr->eeltype]);
        }
        if (status != 0)
        {
            gmx_fatal(FARGS, "Error %d in long range electrostatics routine %s",
                      status, EELTYPE(fr->eeltype));
        }
        /* Note that with separate PME nodes we get the real energies later */
        enerd->dvdl_lin[efptCOUL] += dvdl;
        enerd->term[F_COUL_RECIP]  = Vlr + Vcorr;
        if (debug)
        {
            fprintf(debug, "Vlr = %g, Vcorr = %g, Vlr_corr = %g\n",
                    Vlr, Vcorr, enerd->term[F_COUL_RECIP]);
            pr_rvecs(debug, 0, "vir_el_recip after corr", fr->vir_el_recip, DIM);
            pr_rvecs(debug, 0, "fshift after LR Corrections", fr->fshift, SHIFTS);
        }
    }
    else
    {
        if (EEL_RF(fr->eeltype))
        {
            /* With the Verlet scheme exclusion forces are calculated
             * in the non-bonded kernel.
             */
            if (ir->cutoff_scheme != ecutsVERLET && fr->eeltype != eelRF_NEC)
            {
                dvdl                   = 0;
                enerd->term[F_RF_EXCL] =
                    RF_excl_correction(fplog, fr, graph, md, excl, x, f,
                                       fr->fshift, &pbc, lambda[efptCOUL], &dvdl);
            }

            enerd->dvdl_lin[efptCOUL] += dvdl;
            PRINT_SEPDVDL("RF exclusion correction",
                          enerd->term[F_RF_EXCL], dvdl);
        }
    }
    where();
    debug_gmx();

    if (debug)
    {
        print_nrnb(debug, nrnb);
    }
    debug_gmx();

#ifdef GMX_MPI
    if (TAKETIME)
    {
        t2 = MPI_Wtime();
        MPI_Barrier(cr->mpi_comm_mygroup);
        t3          = MPI_Wtime();
        fr->t_wait += t3-t2;
        if (fr->timesteps == 11)
        {
            fprintf(stderr, "* PP load balancing info: node %d, step %s, rel wait time=%3.0f%% , load string value: %7.2f\n",
                    cr->nodeid, gmx_step_str(fr->timesteps, buf),
                    100*fr->t_wait/(fr->t_wait+fr->t_fnbf),
                    (fr->t_fnbf+fr->t_wait)/fr->t_fnbf);
        }
        fr->timesteps++;
    }
#endif

    if (debug)
    {
        pr_rvecs(debug, 0, "fshift after bondeds", fr->fshift, SHIFTS);
    }

    GMX_MPE_LOG(ev_force_finish);

}
Пример #8
0
void do_force_lowlevel(t_forcerec *fr,      t_inputrec *ir,
                       t_idef     *idef,    t_commrec  *cr,
                       t_nrnb     *nrnb,    gmx_wallcycle_t wcycle,
                       t_mdatoms  *md,
                       rvec       x[],      history_t  *hist,
                       rvec       f[],
                       rvec       f_longrange[],
                       gmx_enerdata_t *enerd,
                       t_fcdata   *fcd,
                       gmx_localtop_t *top,
                       gmx_genborn_t *born,
                       gmx_bool       bBornRadii,
                       matrix     box,
                       t_lambda   *fepvals,
                       real       *lambda,
                       t_graph    *graph,
                       t_blocka   *excl,
                       rvec       mu_tot[],
                       int        flags,
                       float      *cycles_pme)
{
    int         i, j;
    int         donb_flags;
    gmx_bool    bSB;
    int         pme_flags;
    matrix      boxs;
    rvec        box_size;
    t_pbc       pbc;
    real        dvdl_dum[efptNR], dvdl_nb[efptNR];

#ifdef GMX_MPI
    double  t0 = 0.0, t1, t2, t3; /* time measurement for coarse load balancing */
#endif

    set_pbc(&pbc, fr->ePBC, box);

    /* reset free energy components */
    for (i = 0; i < efptNR; i++)
    {
        dvdl_nb[i]  = 0;
        dvdl_dum[i] = 0;
    }

    /* Reset box */
    for (i = 0; (i < DIM); i++)
    {
        box_size[i] = box[i][i];
    }

    debug_gmx();

    /* do QMMM first if requested */
    if (fr->bQMMM)
    {
        enerd->term[F_EQM] = calculate_QMMM(cr, x, f, fr);
    }

    /* Call the short range functions all in one go. */

#ifdef GMX_MPI
    /*#define TAKETIME ((cr->npmenodes) && (fr->timesteps < 12))*/
#define TAKETIME FALSE
    if (TAKETIME)
    {
        MPI_Barrier(cr->mpi_comm_mygroup);
        t0 = MPI_Wtime();
    }
#endif

    if (ir->nwall)
    {
        /* foreign lambda component for walls */
        real dvdl_walls = do_walls(ir, fr, box, md, x, f, lambda[efptVDW],
                                   enerd->grpp.ener[egLJSR], nrnb);
        enerd->dvdl_lin[efptVDW] += dvdl_walls;
    }

    /* If doing GB, reset dvda and calculate the Born radii */
    if (ir->implicit_solvent)
    {
        wallcycle_sub_start(wcycle, ewcsNONBONDED);

        for (i = 0; i < born->nr; i++)
        {
            fr->dvda[i] = 0;
        }

        if (bBornRadii)
        {
            calc_gb_rad(cr, fr, ir, top, x, &(fr->gblist), born, md, nrnb);
        }

        wallcycle_sub_stop(wcycle, ewcsNONBONDED);
    }

    where();
    /* We only do non-bonded calculation with group scheme here, the verlet
     * calls are done from do_force_cutsVERLET(). */
    if (fr->cutoff_scheme == ecutsGROUP && (flags & GMX_FORCE_NONBONDED))
    {
        donb_flags = 0;
        /* Add short-range interactions */
        donb_flags |= GMX_NONBONDED_DO_SR;

        /* Currently all group scheme kernels always calculate (shift-)forces */
        if (flags & GMX_FORCE_FORCES)
        {
            donb_flags |= GMX_NONBONDED_DO_FORCE;
        }
        if (flags & GMX_FORCE_VIRIAL)
        {
            donb_flags |= GMX_NONBONDED_DO_SHIFTFORCE;
        }
        if (flags & GMX_FORCE_ENERGY)
        {
            donb_flags |= GMX_NONBONDED_DO_POTENTIAL;
        }
        if (flags & GMX_FORCE_DO_LR)
        {
            donb_flags |= GMX_NONBONDED_DO_LR;
        }

        wallcycle_sub_start(wcycle, ewcsNONBONDED);
        do_nonbonded(fr, x, f, f_longrange, md, excl,
                     &enerd->grpp, nrnb,
                     lambda, dvdl_nb, -1, -1, donb_flags);

        /* If we do foreign lambda and we have soft-core interactions
         * we have to recalculate the (non-linear) energies contributions.
         */
        if (fepvals->n_lambda > 0 && (flags & GMX_FORCE_DHDL) && fepvals->sc_alpha != 0)
        {
            for (i = 0; i < enerd->n_lambda; i++)
            {
                real lam_i[efptNR];

                for (j = 0; j < efptNR; j++)
                {
                    lam_i[j] = (i == 0 ? lambda[j] : fepvals->all_lambda[j][i-1]);
                }
                reset_foreign_enerdata(enerd);
                do_nonbonded(fr, x, f, f_longrange, md, excl,
                             &(enerd->foreign_grpp), nrnb,
                             lam_i, dvdl_dum, -1, -1,
                             (donb_flags & ~GMX_NONBONDED_DO_FORCE) | GMX_NONBONDED_DO_FOREIGNLAMBDA);
                sum_epot(&(enerd->foreign_grpp), enerd->foreign_term);
                enerd->enerpart_lambda[i] += enerd->foreign_term[F_EPOT];
            }
        }
        wallcycle_sub_stop(wcycle, ewcsNONBONDED);
        where();
    }

    /* If we are doing GB, calculate bonded forces and apply corrections
     * to the solvation forces */
    /* MRS: Eventually, many need to include free energy contribution here! */
    if (ir->implicit_solvent)
    {
        wallcycle_sub_start(wcycle, ewcsLISTED);
        calc_gb_forces(cr, md, born, top, x, f, fr, idef,
                       ir->gb_algorithm, ir->sa_algorithm, nrnb, &pbc, graph, enerd);
        wallcycle_sub_stop(wcycle, ewcsLISTED);
    }

#ifdef GMX_MPI
    if (TAKETIME)
    {
        t1          = MPI_Wtime();
        fr->t_fnbf += t1-t0;
    }
#endif

    if (fepvals->sc_alpha != 0)
    {
        enerd->dvdl_nonlin[efptVDW] += dvdl_nb[efptVDW];
    }
    else
    {
        enerd->dvdl_lin[efptVDW] += dvdl_nb[efptVDW];
    }

    if (fepvals->sc_alpha != 0)

    /* even though coulomb part is linear, we already added it, beacuse we
       need to go through the vdw calculation anyway */
    {
        enerd->dvdl_nonlin[efptCOUL] += dvdl_nb[efptCOUL];
    }
    else
    {
        enerd->dvdl_lin[efptCOUL] += dvdl_nb[efptCOUL];
    }

    debug_gmx();


    if (debug)
    {
        pr_rvecs(debug, 0, "fshift after SR", fr->fshift, SHIFTS);
    }

    /* Shift the coordinates. Must be done before listed forces and PPPM,
     * but is also necessary for SHAKE and update, therefore it can NOT
     * go when no listed forces have to be evaluated.
     *
     * The shifting and PBC code is deliberately not timed, since with
     * the Verlet scheme it only takes non-zero time with triclinic
     * boxes, and even then the time is around a factor of 100 less
     * than the next smallest counter.
     */


    /* Here sometimes we would not need to shift with NBFonly,
     * but we do so anyhow for consistency of the returned coordinates.
     */
    if (graph)
    {
        shift_self(graph, box, x);
        if (TRICLINIC(box))
        {
            inc_nrnb(nrnb, eNR_SHIFTX, 2*graph->nnodes);
        }
        else
        {
            inc_nrnb(nrnb, eNR_SHIFTX, graph->nnodes);
        }
    }
    /* Check whether we need to do listed interactions or correct for exclusions */
    if (fr->bMolPBC &&
        ((flags & GMX_FORCE_LISTED)
         || EEL_RF(fr->eeltype) || EEL_FULL(fr->eeltype) || EVDW_PME(fr->vdwtype)))
    {
        /* TODO There are no electrostatics methods that require this
           transformation, when using the Verlet scheme, so update the
           above conditional. */
        /* Since all atoms are in the rectangular or triclinic unit-cell,
         * only single box vector shifts (2 in x) are required.
         */
        set_pbc_dd(&pbc, fr->ePBC, cr->dd, TRUE, box);
    }
    debug_gmx();

    do_force_listed(wcycle, box, ir->fepvals, cr->ms,
                    idef, (const rvec *) x, hist, f, fr,
                    &pbc, graph, enerd, nrnb, lambda, md, fcd,
                    DOMAINDECOMP(cr) ? cr->dd->gatindex : NULL,
                    flags);

    where();

    *cycles_pme = 0;
    clear_mat(fr->vir_el_recip);
    clear_mat(fr->vir_lj_recip);

    /* Do long-range electrostatics and/or LJ-PME, including related short-range
     * corrections.
     */
    if (EEL_FULL(fr->eeltype) || EVDW_PME(fr->vdwtype))
    {
        int  status            = 0;
        real Vlr_q             = 0, Vlr_lj = 0, Vcorr_q = 0, Vcorr_lj = 0;
        real dvdl_long_range_q = 0, dvdl_long_range_lj = 0;

        bSB = (ir->nwall == 2);
        if (bSB)
        {
            copy_mat(box, boxs);
            svmul(ir->wall_ewald_zfac, boxs[ZZ], boxs[ZZ]);
            box_size[ZZ] *= ir->wall_ewald_zfac;
        }

        if (EEL_PME_EWALD(fr->eeltype) || EVDW_PME(fr->vdwtype))
        {
            real dvdl_long_range_correction_q   = 0;
            real dvdl_long_range_correction_lj  = 0;
            /* With the Verlet scheme exclusion forces are calculated
             * in the non-bonded kernel.
             */
            /* The TPI molecule does not have exclusions with the rest
             * of the system and no intra-molecular PME grid
             * contributions will be calculated in
             * gmx_pme_calc_energy.
             */
            if ((ir->cutoff_scheme == ecutsGROUP && fr->n_tpi == 0) ||
                ir->ewald_geometry != eewg3D ||
                ir->epsilon_surface != 0)
            {
                int nthreads, t;

                wallcycle_sub_start(wcycle, ewcsEWALD_CORRECTION);

                if (fr->n_tpi > 0)
                {
                    gmx_fatal(FARGS, "TPI with PME currently only works in a 3D geometry with tin-foil boundary conditions");
                }

                nthreads = fr->nthread_ewc;
#pragma omp parallel for num_threads(nthreads) schedule(static)
                for (t = 0; t < nthreads; t++)
                {
                    try
                    {
                        tensor *vir_q, *vir_lj;
                        real   *Vcorrt_q, *Vcorrt_lj, *dvdlt_q, *dvdlt_lj;
                        if (t == 0)
                        {
                            vir_q     = &fr->vir_el_recip;
                            vir_lj    = &fr->vir_lj_recip;
                            Vcorrt_q  = &Vcorr_q;
                            Vcorrt_lj = &Vcorr_lj;
                            dvdlt_q   = &dvdl_long_range_correction_q;
                            dvdlt_lj  = &dvdl_long_range_correction_lj;
                        }
                        else
                        {
                            vir_q     = &fr->ewc_t[t].vir_q;
                            vir_lj    = &fr->ewc_t[t].vir_lj;
                            Vcorrt_q  = &fr->ewc_t[t].Vcorr_q;
                            Vcorrt_lj = &fr->ewc_t[t].Vcorr_lj;
                            dvdlt_q   = &fr->ewc_t[t].dvdl[efptCOUL];
                            dvdlt_lj  = &fr->ewc_t[t].dvdl[efptVDW];
                            clear_mat(*vir_q);
                            clear_mat(*vir_lj);
                        }
                        *dvdlt_q  = 0;
                        *dvdlt_lj = 0;

                        /* Threading is only supported with the Verlet cut-off
                         * scheme and then only single particle forces (no
                         * exclusion forces) are calculated, so we can store
                         * the forces in the normal, single fr->f_novirsum array.
                         */
                        ewald_LRcorrection(fr->excl_load[t], fr->excl_load[t+1],
                                           cr, t, fr,
                                           md->chargeA, md->chargeB,
                                           md->sqrt_c6A, md->sqrt_c6B,
                                           md->sigmaA, md->sigmaB,
                                           md->sigma3A, md->sigma3B,
                                           md->nChargePerturbed || md->nTypePerturbed,
                                           ir->cutoff_scheme != ecutsVERLET,
                                           excl, x, bSB ? boxs : box, mu_tot,
                                           ir->ewald_geometry,
                                           ir->epsilon_surface,
                                           fr->f_novirsum, *vir_q, *vir_lj,
                                           Vcorrt_q, Vcorrt_lj,
                                           lambda[efptCOUL], lambda[efptVDW],
                                           dvdlt_q, dvdlt_lj);
                    }
                    GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
                }
                if (nthreads > 1)
                {
                    reduce_thread_energies(fr->vir_el_recip, fr->vir_lj_recip,
                                           &Vcorr_q, &Vcorr_lj,
                                           &dvdl_long_range_correction_q,
                                           &dvdl_long_range_correction_lj,
                                           nthreads, fr->ewc_t);
                }
                wallcycle_sub_stop(wcycle, ewcsEWALD_CORRECTION);
            }
Пример #9
0
void mdoutf_write_to_trajectory_files(FILE *fplog, t_commrec *cr,
                                      gmx_mdoutf_t of,
                                      int mdof_flags,
                                      gmx_mtop_t *top_global,
                                      gmx_int64_t step, double t,
                                      t_state *state_local, t_state *state_global,
                                      rvec *f_local, rvec *f_global)
{
    rvec *local_v;
    rvec *global_v;

    /* MRS -- defining these variables is to manage the difference
     * between half step and full step velocities, but there must be a better way . . . */

    local_v  = state_local->v;
    global_v = state_global->v;

    if (DOMAINDECOMP(cr))
    {
        if (mdof_flags & MDOF_CPT)
        {
            dd_collect_state(cr->dd, state_local, state_global);
        }
        else
        {
            if (mdof_flags & (MDOF_X | MDOF_X_COMPRESSED))
            {
                dd_collect_vec(cr->dd, state_local, state_local->x,
                               state_global->x);
            }
            if (mdof_flags & MDOF_V)
            {
                dd_collect_vec(cr->dd, state_local, local_v,
                               global_v);
            }
        }
        if (mdof_flags & MDOF_F)
        {
            dd_collect_vec(cr->dd, state_local, f_local, f_global);
        }
    }
    else
    {
        if (mdof_flags & MDOF_CPT)
        {
            /* All pointers in state_local are equal to state_global,
             * but we need to copy the non-pointer entries.
             */
            state_global->lambda = state_local->lambda;
            state_global->veta   = state_local->veta;
            state_global->vol0   = state_local->vol0;
            copy_mat(state_local->box, state_global->box);
            copy_mat(state_local->boxv, state_global->boxv);
            copy_mat(state_local->svir_prev, state_global->svir_prev);
            copy_mat(state_local->fvir_prev, state_global->fvir_prev);
            copy_mat(state_local->pres_prev, state_global->pres_prev);
        }
    }

    if (MASTER(cr))
    {
        if (mdof_flags & MDOF_CPT)
        {
            fflush_tng(of->tng);
            fflush_tng(of->tng_low_prec);
            write_checkpoint(of->fn_cpt, of->bKeepAndNumCPT,
                             fplog, cr, of->eIntegrator, of->simulation_part,
                             of->bExpanded, of->elamstats, step, t, state_global);
        }

        if (mdof_flags & (MDOF_X | MDOF_V | MDOF_F))
        {
            if (of->fp_trn)
            {
                gmx_trr_write_frame(of->fp_trn, step, t, state_local->lambda[efptFEP],
                                    state_local->box, top_global->natoms,
                                    (mdof_flags & MDOF_X) ? state_global->x : NULL,
                                    (mdof_flags & MDOF_V) ? global_v : NULL,
                                    (mdof_flags & MDOF_F) ? f_global : NULL);
                if (gmx_fio_flush(of->fp_trn) != 0)
                {
                    gmx_file("Cannot write trajectory; maybe you are out of disk space?");
                }
            }

            gmx_fwrite_tng(of->tng, FALSE, step, t, state_local->lambda[efptFEP],
                           state_local->box,
                           top_global->natoms,
                           (mdof_flags & MDOF_X) ? state_global->x : NULL,
                           (mdof_flags & MDOF_V) ? global_v : NULL,
                           (mdof_flags & MDOF_F) ? f_global : NULL);
        }
        if (mdof_flags & MDOF_X_COMPRESSED)
        {
            rvec *xxtc = NULL;

            if (of->natoms_x_compressed == of->natoms_global)
            {
                /* We are writing the positions of all of the atoms to
                   the compressed output */
                xxtc = state_global->x;
            }
            else
            {
                /* We are writing the positions of only a subset of
                   the atoms to the compressed output, so we have to
                   make a copy of the subset of coordinates. */
                int i, j;

                snew(xxtc, of->natoms_x_compressed);
                for (i = 0, j = 0; (i < of->natoms_global); i++)
                {
                    if (ggrpnr(of->groups, egcCompressedX, i) == 0)
                    {
                        copy_rvec(state_global->x[i], xxtc[j++]);
                    }
                }
            }
            if (write_xtc(of->fp_xtc, of->natoms_x_compressed, step, t,
                          state_local->box, xxtc, of->x_compression_precision) == 0)
            {
                gmx_fatal(FARGS, "XTC error - maybe you are out of disk space?");
            }
            gmx_fwrite_tng(of->tng_low_prec,
                           TRUE,
                           step,
                           t,
                           state_local->lambda[efptFEP],
                           state_local->box,
                           of->natoms_x_compressed,
                           xxtc,
                           NULL,
                           NULL);
            if (of->natoms_x_compressed != of->natoms_global)
            {
                sfree(xxtc);
            }
        }
    }
}
void pme_loadbal_do(pme_load_balancing_t *pme_lb,
                    t_commrec            *cr,
                    FILE                 *fp_err,
                    FILE                 *fp_log,
                    t_inputrec           *ir,
                    t_forcerec           *fr,
                    t_state              *state,
                    gmx_wallcycle_t       wcycle,
                    gmx_int64_t           step,
                    gmx_int64_t           step_rel,
                    gmx_bool             *bPrinting)
{
    int    n_prev;
    double cycles_prev;

    assert(pme_lb != NULL);

    if (!pme_lb->bActive)
    {
        return;
    }

    n_prev      = pme_lb->cycles_n;
    cycles_prev = pme_lb->cycles_c;
    wallcycle_get(wcycle, ewcSTEP, &pme_lb->cycles_n, &pme_lb->cycles_c);
    if (pme_lb->cycles_n == 0)
    {
        /* Before the first step we haven't done any steps yet */
        return;
    }
    /* Sanity check, we expect nstlist cycle counts */
    if (pme_lb->cycles_n - n_prev != ir->nstlist)
    {
        /* We could return here, but it's safer to issue and error and quit */
        gmx_incons("pme_loadbal_do called at an interval != nstlist");
    }

    /* PME grid + cut-off optimization with GPUs or PME ranks */
    if (!pme_lb->bBalance && pme_lb->bSepPMERanks)
    {
        if (pme_lb->bTriggerOnDLB)
        {
            pme_lb->bBalance = dd_dlb_is_on(cr->dd);
        }
        /* We should ignore the first timing to avoid timing allocation
         * overhead. And since the PME load balancing is called just
         * before DD repartitioning, the ratio returned by dd_pme_f_ratio
         * is not over the last nstlist steps, but the nstlist steps before
         * that. So the first useful ratio is available at step_rel=3*nstlist.
         */
        else if (step_rel >= 3*ir->nstlist)
        {
            if (DDMASTER(cr->dd))
            {
                /* If PME rank load is too high, start tuning */
                pme_lb->bBalance =
                    (dd_pme_f_ratio(cr->dd) >= loadBalanceTriggerFactor);
            }
            dd_bcast(cr->dd, sizeof(gmx_bool), &pme_lb->bBalance);
        }

        pme_lb->bActive = (pme_lb->bBalance ||
                           step_rel <= pme_lb->step_rel_stop);
    }

    /* The location in the code of this balancing termination is strange.
     * You would expect to have it after the call to pme_load_balance()
     * below, since there pme_lb->stage is updated.
     * But when terminating directly after deciding on and selecting the
     * optimal setup, DLB will turn on right away if it was locked before.
     * This might be due to PME reinitialization. So we check stage here
     * to allow for another nstlist steps with DLB locked to stabilize
     * the performance.
     */
    if (pme_lb->bBalance && pme_lb->stage == pme_lb->nstage)
    {
        pme_lb->bBalance = FALSE;

        if (DOMAINDECOMP(cr) && dd_dlb_is_locked(cr->dd))
        {
            /* Unlock the DLB=auto, DLB is allowed to activate */
            dd_dlb_unlock(cr->dd);
            md_print_warn(cr, fp_log, "NOTE: DLB can now turn on, when beneficial\n");

            /* We don't deactivate the tuning yet, since we will balance again
             * after DLB gets turned on, if it does within PMETune_period.
             */
            continue_pme_loadbal(pme_lb, TRUE);
            pme_lb->bTriggerOnDLB = TRUE;
            pme_lb->step_rel_stop = step_rel + PMETunePeriod*ir->nstlist;
        }
        else
        {
            /* We're completely done with PME tuning */
            pme_lb->bActive = FALSE;
        }

        if (DOMAINDECOMP(cr))
        {
            /* Set the cut-off limit to the final selected cut-off,
             * so we don't have artificial DLB limits.
             * This also ensures that we won't disable the currently
             * optimal setting during a second round of PME balancing.
             */
            set_dd_dlb_max_cutoff(cr, fr->ic->rlistlong);
        }
    }

    if (pme_lb->bBalance)
    {
        /* We might not have collected nstlist steps in cycles yet,
         * since init_step might not be a multiple of nstlist,
         * but the first data collected is skipped anyhow.
         */
        pme_load_balance(pme_lb, cr,
                         fp_err, fp_log,
                         ir, state, pme_lb->cycles_c - cycles_prev,
                         fr->ic, fr->nbv, &fr->pmedata,
                         step);

        /* Update constants in forcerec/inputrec to keep them in sync with fr->ic */
        fr->ewaldcoeff_q  = fr->ic->ewaldcoeff_q;
        fr->ewaldcoeff_lj = fr->ic->ewaldcoeff_lj;
        fr->rlist         = fr->ic->rlist;
        fr->rlistlong     = fr->ic->rlistlong;
        fr->rcoulomb      = fr->ic->rcoulomb;
        fr->rvdw          = fr->ic->rvdw;

        if (ir->eDispCorr != edispcNO)
        {
            calc_enervirdiff(NULL, ir->eDispCorr, fr);
        }
    }

    if (!pme_lb->bBalance &&
        (!pme_lb->bSepPMERanks || step_rel > pme_lb->step_rel_stop))
    {
        /* We have just deactivated the balancing and we're not measuring PP/PME
         * imbalance during the first steps of the run: deactivate the tuning.
         */
        pme_lb->bActive = FALSE;
    }

    if (!(pme_lb->bActive) && DOMAINDECOMP(cr) && dd_dlb_is_locked(cr->dd))
    {
        /* Make sure DLB is allowed when we deactivate PME tuning */
        dd_dlb_unlock(cr->dd);
        md_print_warn(cr, fp_log, "NOTE: DLB can now turn on, when beneficial\n");
    }

    *bPrinting = pme_lb->bBalance;
}
Пример #11
0
/* calculates center of mass of selection index from all coordinates x */
void pull_calc_coms(t_commrec *cr,
                    struct pull_t *pull, t_mdatoms *md, t_pbc *pbc, double t,
                    rvec x[], rvec *xp)
{
    int          g;
    real         twopi_box = 0;
    pull_comm_t *comm;

    comm = &pull->comm;

    if (comm->rbuf == NULL)
    {
        snew(comm->rbuf, pull->ngroup);
    }
    if (comm->dbuf == NULL)
    {
        snew(comm->dbuf, 3*pull->ngroup);
    }

    if (pull->bRefAt && pull->bSetPBCatoms)
    {
        pull_set_pbcatoms(cr, pull, x, comm->rbuf);

        if (cr != NULL && DOMAINDECOMP(cr))
        {
            /* We can keep these PBC reference coordinates fixed for nstlist
             * steps, since atoms won't jump over PBC.
             * This avoids a global reduction at the next nstlist-1 steps.
             * Note that the exact values of the pbc reference coordinates
             * are irrelevant, as long all atoms in the group are within
             * half a box distance of the reference coordinate.
             */
            pull->bSetPBCatoms = FALSE;
        }
    }

    if (pull->cosdim >= 0)
    {
        int m;

        assert(pull->npbcdim <= DIM);

        for (m = pull->cosdim+1; m < pull->npbcdim; m++)
        {
            if (pbc->box[m][pull->cosdim] != 0)
            {
                gmx_fatal(FARGS, "Can not do cosine weighting for trilinic dimensions");
            }
        }
        twopi_box = 2.0*M_PI/pbc->box[pull->cosdim][pull->cosdim];
    }

    for (g = 0; g < pull->ngroup; g++)
    {
        pull_group_work_t *pgrp;

        pgrp = &pull->group[g];

        if (pgrp->bCalcCOM)
        {
            if (pgrp->epgrppbc != epgrppbcCOS)
            {
                dvec   com, comp;
                double wmass, wwmass;
                rvec   x_pbc = { 0, 0, 0 };
                int    i;

                clear_dvec(com);
                clear_dvec(comp);
                wmass  = 0;
                wwmass = 0;

                if (pgrp->epgrppbc == epgrppbcREFAT)
                {
                    /* Set the pbc atom */
                    copy_rvec(comm->rbuf[g], x_pbc);
                }

                for (i = 0; i < pgrp->nat_loc; i++)
                {
                    int  ii, m;
                    real mass, wm;

                    ii   = pgrp->ind_loc[i];
                    mass = md->massT[ii];
                    if (pgrp->weight_loc == NULL)
                    {
                        wm     = mass;
                        wmass += wm;
                    }
                    else
                    {
                        real w;

                        w       = pgrp->weight_loc[i];
                        wm      = w*mass;
                        wmass  += wm;
                        wwmass += wm*w;
                    }
                    if (pgrp->epgrppbc == epgrppbcNONE)
                    {
                        /* Plain COM: sum the coordinates */
                        for (m = 0; m < DIM; m++)
                        {
                            com[m]    += wm*x[ii][m];
                        }
                        if (xp)
                        {
                            for (m = 0; m < DIM; m++)
                            {
                                comp[m] += wm*xp[ii][m];
                            }
                        }
                    }
                    else
                    {
                        rvec dx;

                        /* Sum the difference with the reference atom */
                        pbc_dx(pbc, x[ii], x_pbc, dx);
                        for (m = 0; m < DIM; m++)
                        {
                            com[m]    += wm*dx[m];
                        }
                        if (xp)
                        {
                            /* For xp add the difference between xp and x to dx,
                             * such that we use the same periodic image,
                             * also when xp has a large displacement.
                             */
                            for (m = 0; m < DIM; m++)
                            {
                                comp[m] += wm*(dx[m] + xp[ii][m] - x[ii][m]);
                            }
                        }
                    }
                }

                /* We do this check after the loop above to avoid more nesting.
                 * If we have a single-atom group the mass is irrelevant, so
                 * we can remove the mass factor to avoid division by zero.
                 * Note that with constraint pulling the mass does matter, but
                 * in that case a check group mass != 0 has been done before.
                 */
                if (pgrp->params.nat == 1 && pgrp->nat_loc == 1 && wmass == 0)
                {
                    int m;

                    /* Copy the single atom coordinate */
                    for (m = 0; m < DIM; m++)
                    {
                        com[m] = x[pgrp->ind_loc[0]][m];
                    }
                    /* Set all mass factors to 1 to get the correct COM */
                    wmass  = 1;
                    wwmass = 1;
                }

                if (pgrp->weight_loc == NULL)
                {
                    wwmass = wmass;
                }

                /* Copy local sums to a buffer for global summing */
                copy_dvec(com,  comm->dbuf[g*3]);
                copy_dvec(comp, comm->dbuf[g*3 + 1]);
                comm->dbuf[g*3 + 2][0] = wmass;
                comm->dbuf[g*3 + 2][1] = wwmass;
                comm->dbuf[g*3 + 2][2] = 0;
            }
            else
            {
                /* Cosine weighting geometry */
                double cm, sm, cmp, smp, ccm, csm, ssm, csw, snw;
                int    i;

                cm  = 0;
                sm  = 0;
                cmp = 0;
                smp = 0;
                ccm = 0;
                csm = 0;
                ssm = 0;

                for (i = 0; i < pgrp->nat_loc; i++)
                {
                    int  ii;
                    real mass;

                    ii   = pgrp->ind_loc[i];
                    mass = md->massT[ii];
                    /* Determine cos and sin sums */
                    csw  = cos(x[ii][pull->cosdim]*twopi_box);
                    snw  = sin(x[ii][pull->cosdim]*twopi_box);
                    cm  += csw*mass;
                    sm  += snw*mass;
                    ccm += csw*csw*mass;
                    csm += csw*snw*mass;
                    ssm += snw*snw*mass;

                    if (xp)
                    {
                        csw  = cos(xp[ii][pull->cosdim]*twopi_box);
                        snw  = sin(xp[ii][pull->cosdim]*twopi_box);
                        cmp += csw*mass;
                        smp += snw*mass;
                    }
                }

                /* Copy local sums to a buffer for global summing */
                comm->dbuf[g*3  ][0] = cm;
                comm->dbuf[g*3  ][1] = sm;
                comm->dbuf[g*3  ][2] = 0;
                comm->dbuf[g*3+1][0] = ccm;
                comm->dbuf[g*3+1][1] = csm;
                comm->dbuf[g*3+1][2] = ssm;
                comm->dbuf[g*3+2][0] = cmp;
                comm->dbuf[g*3+2][1] = smp;
                comm->dbuf[g*3+2][2] = 0;
            }
        }
    }

    pull_reduce_double(cr, comm, pull->ngroup*3*DIM, comm->dbuf[0]);

    for (g = 0; g < pull->ngroup; g++)
    {
        pull_group_work_t *pgrp;

        pgrp = &pull->group[g];
        if (pgrp->params.nat > 0 && pgrp->bCalcCOM)
        {
            if (pgrp->epgrppbc != epgrppbcCOS)
            {
                double wmass, wwmass;
                int    m;

                /* Determine the inverse mass */
                wmass             = comm->dbuf[g*3+2][0];
                wwmass            = comm->dbuf[g*3+2][1];
                pgrp->mwscale     = 1.0/wmass;
                /* invtm==0 signals a frozen group, so then we should keep it zero */
                if (pgrp->invtm != 0)
                {
                    pgrp->wscale  = wmass/wwmass;
                    pgrp->invtm   = wwmass/(wmass*wmass);
                }
                /* Divide by the total mass */
                for (m = 0; m < DIM; m++)
                {
                    pgrp->x[m]      = comm->dbuf[g*3  ][m]*pgrp->mwscale;
                    if (xp)
                    {
                        pgrp->xp[m] = comm->dbuf[g*3+1][m]*pgrp->mwscale;
                    }
                    if (pgrp->epgrppbc == epgrppbcREFAT)
                    {
                        pgrp->x[m]      += comm->rbuf[g][m];
                        if (xp)
                        {
                            pgrp->xp[m] += comm->rbuf[g][m];
                        }
                    }
                }
            }
            else
            {
                /* Cosine weighting geometry */
                double csw, snw, wmass, wwmass;
                int    i, ii;

                /* Determine the optimal location of the cosine weight */
                csw                   = comm->dbuf[g*3][0];
                snw                   = comm->dbuf[g*3][1];
                pgrp->x[pull->cosdim] = atan2_0_2pi(snw, csw)/twopi_box;
                /* Set the weights for the local atoms */
                wmass  = sqrt(csw*csw + snw*snw);
                wwmass = (comm->dbuf[g*3+1][0]*csw*csw +
                          comm->dbuf[g*3+1][1]*csw*snw +
                          comm->dbuf[g*3+1][2]*snw*snw)/(wmass*wmass);

                pgrp->mwscale = 1.0/wmass;
                pgrp->wscale  = wmass/wwmass;
                pgrp->invtm   = wwmass/(wmass*wmass);
                /* Set the weights for the local atoms */
                csw *= pgrp->invtm;
                snw *= pgrp->invtm;
                for (i = 0; i < pgrp->nat_loc; i++)
                {
                    ii                  = pgrp->ind_loc[i];
                    pgrp->weight_loc[i] = csw*cos(twopi_box*x[ii][pull->cosdim]) +
                        snw*sin(twopi_box*x[ii][pull->cosdim]);
                }
                if (xp)
                {
                    csw                    = comm->dbuf[g*3+2][0];
                    snw                    = comm->dbuf[g*3+2][1];
                    pgrp->xp[pull->cosdim] = atan2_0_2pi(snw, csw)/twopi_box;
                }
            }
            if (debug)
            {
                fprintf(debug, "Pull group %d wmass %f invtm %f\n",
                        g, 1.0/pgrp->mwscale, pgrp->invtm);
            }
        }
    }

    if (pull->bCylinder)
    {
        /* Calculate the COMs for the cyclinder reference groups */
        make_cyl_refgrps(cr, pull, md, pbc, t, x);
    }
}
Пример #12
0
gmx_constr_t init_constraints(FILE *fplog,
                              gmx_mtop_t *mtop, t_inputrec *ir,
                              gmx_edsam_t ed, t_state *state,
                              t_commrec *cr)
{
    int                  ncon, nset, nmol, settle_type, i, natoms, mt, nflexcon;
    struct gmx_constr   *constr;
    char                *env;
    t_ilist             *ilist;
    gmx_mtop_ilistloop_t iloop;

    ncon =
        gmx_mtop_ftype_count(mtop, F_CONSTR) +
        gmx_mtop_ftype_count(mtop, F_CONSTRNC);
    nset = gmx_mtop_ftype_count(mtop, F_SETTLE);

    if (ncon+nset == 0 && ir->ePull != epullCONSTRAINT && ed == NULL)
    {
        return NULL;
    }

    snew(constr, 1);

    constr->ncon_tot = ncon;
    constr->nflexcon = 0;
    if (ncon > 0)
    {
        constr->n_at2con_mt = mtop->nmoltype;
        snew(constr->at2con_mt, constr->n_at2con_mt);
        for (mt = 0; mt < mtop->nmoltype; mt++)
        {
            constr->at2con_mt[mt] = make_at2con(0, mtop->moltype[mt].atoms.nr,
                                                mtop->moltype[mt].ilist,
                                                mtop->ffparams.iparams,
                                                EI_DYNAMICS(ir->eI), &nflexcon);
            for (i = 0; i < mtop->nmolblock; i++)
            {
                if (mtop->molblock[i].type == mt)
                {
                    constr->nflexcon += mtop->molblock[i].nmol*nflexcon;
                }
            }
        }

        if (constr->nflexcon > 0)
        {
            if (fplog)
            {
                fprintf(fplog, "There are %d flexible constraints\n",
                        constr->nflexcon);
                if (ir->fc_stepsize == 0)
                {
                    fprintf(fplog, "\n"
                            "WARNING: step size for flexible constraining = 0\n"
                            "         All flexible constraints will be rigid.\n"
                            "         Will try to keep all flexible constraints at their original length,\n"
                            "         but the lengths may exhibit some drift.\n\n");
                    constr->nflexcon = 0;
                }
            }
            if (constr->nflexcon > 0)
            {
                please_cite(fplog, "Hess2002");
            }
        }

        if (ir->eConstrAlg == econtLINCS)
        {
            constr->lincsd = init_lincs(fplog, mtop,
                                        constr->nflexcon, constr->at2con_mt,
                                        DOMAINDECOMP(cr) && cr->dd->bInterCGcons,
                                        ir->nLincsIter, ir->nProjOrder);
        }

        if (ir->eConstrAlg == econtSHAKE)
        {
            if (DOMAINDECOMP(cr) && cr->dd->bInterCGcons)
            {
                gmx_fatal(FARGS, "SHAKE is not supported with domain decomposition and constraint that cross charge group boundaries, use LINCS");
            }
            if (constr->nflexcon)
            {
                gmx_fatal(FARGS, "For this system also velocities and/or forces need to be constrained, this can not be done with SHAKE, you should select LINCS");
            }
            please_cite(fplog, "Ryckaert77a");
            if (ir->bShakeSOR)
            {
                please_cite(fplog, "Barth95a");
            }

            constr->shaked = shake_init();
        }
    }

    if (nset > 0)
    {
        please_cite(fplog, "Miyamoto92a");

        constr->bInterCGsettles = inter_charge_group_settles(mtop);

        /* Check that we have only one settle type */
        settle_type = -1;
        iloop       = gmx_mtop_ilistloop_init(mtop);
        while (gmx_mtop_ilistloop_next(iloop, &ilist, &nmol))
        {
            for (i = 0; i < ilist[F_SETTLE].nr; i += 4)
            {
                if (settle_type == -1)
                {
                    settle_type = ilist[F_SETTLE].iatoms[i];
                }
                else if (ilist[F_SETTLE].iatoms[i] != settle_type)
                {
                    gmx_fatal(FARGS,
                              "The [molecules] section of your topology specifies more than one block of\n"
                              "a [moleculetype] with a [settles] block. Only one such is allowed. If you\n"
                              "are trying to partition your solvent into different *groups* (e.g. for\n"
                              "freezing, T-coupling, etc.) then you are using the wrong approach. Index\n"
                              "files specify groups. Otherwise, you may wish to change the least-used\n"
                              "block of molecules with SETTLE constraints into 3 normal constraints.");
                }
            }
        }

        constr->n_at2settle_mt = mtop->nmoltype;
        snew(constr->at2settle_mt, constr->n_at2settle_mt);
        for (mt = 0; mt < mtop->nmoltype; mt++)
        {
            constr->at2settle_mt[mt] =
                make_at2settle(mtop->moltype[mt].atoms.nr,
                               &mtop->moltype[mt].ilist[F_SETTLE]);
        }
    }

    constr->maxwarn = 999;
    env             = getenv("GMX_MAXCONSTRWARN");
    if (env)
    {
        constr->maxwarn = 0;
        sscanf(env, "%d", &constr->maxwarn);
        if (fplog)
        {
            fprintf(fplog,
                    "Setting the maximum number of constraint warnings to %d\n",
                    constr->maxwarn);
        }
        if (MASTER(cr))
        {
            fprintf(stderr,
                    "Setting the maximum number of constraint warnings to %d\n",
                    constr->maxwarn);
        }
    }
    if (constr->maxwarn < 0 && fplog)
    {
        fprintf(fplog, "maxwarn < 0, will not stop on constraint errors\n");
    }
    constr->warncount_lincs  = 0;
    constr->warncount_settle = 0;

    /* Initialize the essential dynamics sampling.
     * Put the pointer to the ED struct in constr */
    constr->ed = ed;
    if (ed != NULL || state->edsamstate.nED > 0)
    {
        init_edsam(mtop, ir, cr, ed, state->x, state->box, &state->edsamstate);
    }

    constr->warn_mtop = mtop;

    return constr;
}
Пример #13
0
static void make_cyl_refgrps(t_commrec *cr, t_pull *pull, t_mdatoms *md,
                             t_pbc *pbc, double t, rvec *x, rvec *xp)
{
    int           c, i, ii, m, start, end;
    rvec          g_x, dx, dir;
    double        r0_2, sum_a, sum_ap, dr2, mass, weight, wmass, wwmass, inp;
    t_pull_coord *pcrd;
    t_pull_group *pref, *pgrp, *pdyna;
    gmx_ga2la_t   ga2la = NULL;

    if (pull->dbuf_cyl == NULL)
    {
        snew(pull->dbuf_cyl, pull->ncoord*4);
    }

    if (cr && DOMAINDECOMP(cr))
    {
        ga2la = cr->dd->ga2la;
    }

    start = 0;
    end   = md->homenr;

    r0_2 = dsqr(pull->cyl_r0);

    /* loop over all groups to make a reference group for each*/
    for (c = 0; c < pull->ncoord; c++)
    {
        pcrd  = &pull->coord[c];

        /* pref will be the same group for all pull coordinates */
        pref  = &pull->group[pcrd->group[0]];
        pgrp  = &pull->group[pcrd->group[1]];
        pdyna = &pull->dyna[c];
        copy_rvec(pcrd->vec, dir);
        sum_a          = 0;
        sum_ap         = 0;
        wmass          = 0;
        wwmass         = 0;
        pdyna->nat_loc = 0;

        for (m = 0; m < DIM; m++)
        {
            g_x[m] = pgrp->x[m] - pcrd->vec[m]*(pcrd->init + pcrd->rate*t);
        }

        /* loop over all atoms in the main ref group */
        for (i = 0; i < pref->nat; i++)
        {
            ii = pref->ind[i];
            if (ga2la)
            {
                if (!ga2la_get_home(ga2la, pref->ind[i], &ii))
                {
                    ii = -1;
                }
            }
            if (ii >= start && ii < end)
            {
                pbc_dx_aiuc(pbc, x[ii], g_x, dx);
                inp = iprod(dir, dx);
                dr2 = 0;
                for (m = 0; m < DIM; m++)
                {
                    dr2 += dsqr(dx[m] - inp*dir[m]);
                }

                if (dr2 < r0_2)
                {
                    /* add to index, to sum of COM, to weight array */
                    if (pdyna->nat_loc >= pdyna->nalloc_loc)
                    {
                        pdyna->nalloc_loc = over_alloc_large(pdyna->nat_loc+1);
                        srenew(pdyna->ind_loc, pdyna->nalloc_loc);
                        srenew(pdyna->weight_loc, pdyna->nalloc_loc);
                    }
                    pdyna->ind_loc[pdyna->nat_loc] = ii;
                    mass   = md->massT[ii];
                    weight = get_weight(sqrt(dr2), pull->cyl_r1, pull->cyl_r0);
                    pdyna->weight_loc[pdyna->nat_loc] = weight;
                    sum_a += mass*weight*inp;
                    if (xp)
                    {
                        pbc_dx_aiuc(pbc, xp[ii], g_x, dx);
                        inp     = iprod(dir, dx);
                        sum_ap += mass*weight*inp;
                    }
                    wmass  += mass*weight;
                    wwmass += mass*sqr(weight);
                    pdyna->nat_loc++;
                }
            }
        }
        pull->dbuf_cyl[c*4+0] = wmass;
        pull->dbuf_cyl[c*4+1] = wwmass;
        pull->dbuf_cyl[c*4+2] = sum_a;
        pull->dbuf_cyl[c*4+3] = sum_ap;
    }

    if (cr && PAR(cr))
    {
        /* Sum the contributions over the nodes */
        gmx_sumd(pull->ncoord*4, pull->dbuf_cyl, cr);
    }

    for (c = 0; c < pull->ncoord; c++)
    {
        pcrd  = &pull->coord[c];

        pdyna = &pull->dyna[c];
        pgrp  = &pull->group[pcrd->group[1]];

        wmass         = pull->dbuf_cyl[c*4+0];
        wwmass        = pull->dbuf_cyl[c*4+1];
        pdyna->wscale = wmass/wwmass;
        pdyna->invtm  = 1.0/(pdyna->wscale*wmass);

        for (m = 0; m < DIM; m++)
        {
            g_x[m]      = pgrp->x[m] - pcrd->vec[m]*(pcrd->init + pcrd->rate*t);
            pdyna->x[m] = g_x[m] + pcrd->vec[m]*pull->dbuf_cyl[c*4+2]/wmass;
            if (xp)
            {
                pdyna->xp[m] = g_x[m] + pcrd->vec[m]*pull->dbuf_cyl[c*4+3]/wmass;
            }
        }

        if (debug)
        {
            fprintf(debug, "Pull cylinder group %d:%8.3f%8.3f%8.3f m:%8.3f\n",
                    c, pdyna->x[0], pdyna->x[1],
                    pdyna->x[2], 1.0/pdyna->invtm);
        }
    }
}
Пример #14
0
static void do_lincs(rvec *x,rvec *xp,matrix box,t_pbc *pbc,
                     struct gmx_lincsdata *lincsd,real *invmass,
					 t_commrec *cr,
                     real wangle,int *warn,
                     real invdt,rvec *v,
                     gmx_bool bCalcVir,tensor rmdr)
{
    int     b,i,j,k,n,iter;
    real    tmp0,tmp1,tmp2,im1,im2,mvb,rlen,len,len2,dlen2,wfac,lam;  
    rvec    dx;
    int     ncons,*bla,*blnr,*blbnb;
    rvec    *r;
    real    *blc,*blmf,*bllen,*blcc,*rhs1,*rhs2,*sol,*lambda;
    int     *nlocat;
    
    ncons  = lincsd->nc;
    bla    = lincsd->bla;
    r      = lincsd->tmpv;
    blnr   = lincsd->blnr;
    blbnb  = lincsd->blbnb;
    blc    = lincsd->blc;
    blmf   = lincsd->blmf;
    bllen  = lincsd->bllen;
    blcc   = lincsd->tmpncc;
    rhs1   = lincsd->tmp1;
    rhs2   = lincsd->tmp2;
    sol    = lincsd->tmp3;
    lambda = lincsd->lambda;
    
    if (DOMAINDECOMP(cr) && cr->dd->constraints)
    {
        nlocat = dd_constraints_nlocalatoms(cr->dd);
    }
    else if (PARTDECOMP(cr))
    {
        nlocat = pd_constraints_nlocalatoms(cr->pd);
    }
    else
    {
        nlocat = NULL;
    }
    
    *warn = 0;

    if (pbc)
    {
        /* Compute normalized i-j vectors */
        for(b=0; b<ncons; b++)
        {
            pbc_dx_aiuc(pbc,x[bla[2*b]],x[bla[2*b+1]],dx);
            unitv(dx,r[b]);
        }  
        for(b=0; b<ncons; b++)
        {
            for(n=blnr[b]; n<blnr[b+1]; n++)
            {
                blcc[n] = blmf[n]*iprod(r[b],r[blbnb[n]]);
            }
            pbc_dx_aiuc(pbc,xp[bla[2*b]],xp[bla[2*b+1]],dx);
            mvb = blc[b]*(iprod(r[b],dx) - bllen[b]);
            rhs1[b] = mvb;
            sol[b]  = mvb;
        }
    }
    else
    {
        /* Compute normalized i-j vectors */
        for(b=0; b<ncons; b++)
        {
            i = bla[2*b];
            j = bla[2*b+1];
            tmp0 = x[i][0] - x[j][0];
            tmp1 = x[i][1] - x[j][1];
            tmp2 = x[i][2] - x[j][2];
            rlen = gmx_invsqrt(tmp0*tmp0+tmp1*tmp1+tmp2*tmp2);
            r[b][0] = rlen*tmp0;
            r[b][1] = rlen*tmp1;
            r[b][2] = rlen*tmp2;
        } /* 16 ncons flops */
        
        for(b=0; b<ncons; b++)
        {
            tmp0 = r[b][0];
            tmp1 = r[b][1];
            tmp2 = r[b][2];
            len = bllen[b];
            i = bla[2*b];
            j = bla[2*b+1];
            for(n=blnr[b]; n<blnr[b+1]; n++)
            {
                k = blbnb[n];
                blcc[n] = blmf[n]*(tmp0*r[k][0] + tmp1*r[k][1] + tmp2*r[k][2]); 
            } /* 6 nr flops */
            mvb = blc[b]*(tmp0*(xp[i][0] - xp[j][0]) +
                          tmp1*(xp[i][1] - xp[j][1]) +    
                          tmp2*(xp[i][2] - xp[j][2]) - len);
            rhs1[b] = mvb;
            sol[b]  = mvb;
            /* 10 flops */
        }
        /* Together: 26*ncons + 6*nrtot flops */
    }
    
    lincs_matrix_expand(lincsd,blcc,rhs1,rhs2,sol);
    /* nrec*(ncons+2*nrtot) flops */
    
    for(b=0; b<ncons; b++)
    {
        i = bla[2*b];
        j = bla[2*b+1];
        mvb = blc[b]*sol[b];
        lambda[b] = -mvb;
        im1 = invmass[i];
        im2 = invmass[j];
        tmp0 = r[b][0]*mvb;
        tmp1 = r[b][1]*mvb;
        tmp2 = r[b][2]*mvb;
        xp[i][0] -= tmp0*im1;
        xp[i][1] -= tmp1*im1;
        xp[i][2] -= tmp2*im1;
        xp[j][0] += tmp0*im2;
        xp[j][1] += tmp1*im2;
        xp[j][2] += tmp2*im2;
    } /* 16 ncons flops */


    /*     
     ********  Correction for centripetal effects  ********  
     */
  
    wfac = cos(DEG2RAD*wangle);
    wfac = wfac*wfac;
	
    for(iter=0; iter<lincsd->nIter; iter++)
    {
        if (DOMAINDECOMP(cr) && cr->dd->constraints)
        {
            /* Communicate the corrected non-local coordinates */
            dd_move_x_constraints(cr->dd,box,xp,NULL);
        } 
		else if (PARTDECOMP(cr))
		{
			pd_move_x_constraints(cr,xp,NULL);
		}	
        
        for(b=0; b<ncons; b++)
        {
            len = bllen[b];
            if (pbc)
            {
                pbc_dx_aiuc(pbc,xp[bla[2*b]],xp[bla[2*b+1]],dx);
            }
            else
            {
                rvec_sub(xp[bla[2*b]],xp[bla[2*b+1]],dx);
            }
            len2 = len*len;
            dlen2 = 2*len2 - norm2(dx);
            if (dlen2 < wfac*len2 && (nlocat==NULL || nlocat[b]))
            {
                *warn = b;
            }
            if (dlen2 > 0)
            {
                mvb = blc[b]*(len - dlen2*gmx_invsqrt(dlen2));
            }
            else
            {
                mvb = blc[b]*len;
            }
            rhs1[b] = mvb;
            sol[b]  = mvb;
        } /* 20*ncons flops */
        
        lincs_matrix_expand(lincsd,blcc,rhs1,rhs2,sol);
        /* nrec*(ncons+2*nrtot) flops */
        
        for(b=0; b<ncons; b++)
        {
            i = bla[2*b];
            j = bla[2*b+1];
            lam = lambda[b];
            mvb = blc[b]*sol[b];
            lambda[b] = lam - mvb;
            im1 = invmass[i];
            im2 = invmass[j];
            tmp0 = r[b][0]*mvb;
            tmp1 = r[b][1]*mvb;
            tmp2 = r[b][2]*mvb;
            xp[i][0] -= tmp0*im1;
            xp[i][1] -= tmp1*im1;
            xp[i][2] -= tmp2*im1;
            xp[j][0] += tmp0*im2;
            xp[j][1] += tmp1*im2;
            xp[j][2] += tmp2*im2;
        } /* 17 ncons flops */
    } /* nit*ncons*(37+9*nrec) flops */
    
    if (v)
    {
        /* Correct the velocities */
        for(b=0; b<ncons; b++)
        {
            i = bla[2*b];
            j = bla[2*b+1];
            im1 = invmass[i]*lambda[b]*invdt;
            im2 = invmass[j]*lambda[b]*invdt;
            v[i][0] += im1*r[b][0];
            v[i][1] += im1*r[b][1];
            v[i][2] += im1*r[b][2];
            v[j][0] -= im2*r[b][0];
            v[j][1] -= im2*r[b][1];
            v[j][2] -= im2*r[b][2];
        } /* 16 ncons flops */
    }
    
    if (nlocat)
    {
        /* Only account for local atoms */
        for(b=0; b<ncons; b++)
        {
            lambda[b] *= 0.5*nlocat[b];
        }
    }
    
    if (bCalcVir)
    {
        /* Constraint virial */
        for(b=0; b<ncons; b++)
        {
            tmp0 = bllen[b]*lambda[b];
            for(i=0; i<DIM; i++)
            {
                tmp1 = tmp0*r[b][i];
                for(j=0; j<DIM; j++)
                {
                    rmdr[i][j] -= tmp1*r[b][j];
                }
            }
        } /* 22 ncons flops */
    }
    
    /* Total:
     * 26*ncons + 6*nrtot + nrec*(ncons+2*nrtot)
     * + nit * (20*ncons + nrec*(ncons+2*nrtot) + 17 ncons)
     *
     * (26+nrec)*ncons + (6+2*nrec)*nrtot
     * + nit * ((37+nrec)*ncons + 2*nrec*nrtot)
     * if nit=1
     * (63+nrec)*ncons + (6+4*nrec)*nrtot
     */
}
Пример #15
0
gmx_bool replica_exchange(FILE *fplog,const t_commrec *cr,struct gmx_repl_ex *re,
                          t_state *state,real *ener,
                          t_state *state_local,
                          int step,real time)
{
    gmx_multisim_t *ms;
    int  exchange=-1,shift;
    gmx_bool bExchanged=FALSE;
    
    ms = cr->ms;
  
    if (MASTER(cr))
    {
        exchange = get_replica_exchange(fplog,ms,re,ener,det(state->box),
                                        step,time);
        bExchanged = (exchange >= 0);
    }
    
    if (PAR(cr))
    {
#ifdef GMX_MPI
        MPI_Bcast(&bExchanged,sizeof(gmx_bool),MPI_BYTE,MASTERRANK(cr),
                  cr->mpi_comm_mygroup);
#endif
    }
    
    if (bExchanged)
    {
        /* Exchange the states */

        if (PAR(cr))
        {
            /* Collect the global state on the master node */
            if (DOMAINDECOMP(cr))
            {
                dd_collect_state(cr->dd,state_local,state);
            }
            else
            {
                pd_collect_state(cr,state);
            }
        }
        
        if (MASTER(cr))
        {
            /* Exchange the global states between the master nodes */
            if (debug)
            {
                fprintf(debug,"Exchanging %d with %d\n",ms->sim,exchange);
            }
            exchange_state(ms,exchange,state);
            
            if (re->type == ereTEMP)
            {
                scale_velocities(state,sqrt(re->q[ms->sim]/re->q[exchange]));
            }
        }

        /* With domain decomposition the global state is distributed later */
        if (!DOMAINDECOMP(cr))
        {
            /* Copy the global state to the local state data structure */
            copy_state_nonatomdata(state,state_local);
            
            if (PAR(cr))
            {
                bcast_state(cr,state,FALSE);
            }
        }
    }
        
    return bExchanged;
}
Пример #16
0
/* calculates center of mass of selection index from all coordinates x */
void pull_calc_coms(t_commrec *cr,
                    t_pull *pull, t_mdatoms *md, t_pbc *pbc, double t,
                    rvec x[], rvec *xp)
{
    int           g, i, ii, m;
    real          mass, w, wm, twopi_box = 0;
    double        wmass, wwmass, invwmass;
    dvec          com, comp;
    double        cm, sm, cmp, smp, ccm, csm, ssm, csw, snw;
    rvec         *xx[2], x_pbc = {0, 0, 0}, dx;
    t_pull_group *pgrp;

    if (pull->rbuf == NULL)
    {
        snew(pull->rbuf, pull->ngroup);
    }
    if (pull->dbuf == NULL)
    {
        snew(pull->dbuf, 3*pull->ngroup);
    }

    if (pull->bRefAt && pull->bSetPBCatoms)
    {
        pull_set_pbcatoms(cr, pull, x, pull->rbuf);

        if (cr != NULL && DOMAINDECOMP(cr))
        {
            /* We can keep these PBC reference coordinates fixed for nstlist
             * steps, since atoms won't jump over PBC.
             * This avoids a global reduction at the next nstlist-1 steps.
             * Note that the exact values of the pbc reference coordinates
             * are irrelevant, as long all atoms in the group are within
             * half a box distance of the reference coordinate.
             */
            pull->bSetPBCatoms = FALSE;
        }
    }

    if (pull->cosdim >= 0)
    {
        for (m = pull->cosdim+1; m < pull->npbcdim; m++)
        {
            if (pbc->box[m][pull->cosdim] != 0)
            {
                gmx_fatal(FARGS, "Can not do cosine weighting for trilinic dimensions");
            }
        }
        twopi_box = 2.0*M_PI/pbc->box[pull->cosdim][pull->cosdim];
    }

    for (g = 0; g < pull->ngroup; g++)
    {
        pgrp = &pull->group[g];
        clear_dvec(com);
        clear_dvec(comp);
        wmass  = 0;
        wwmass = 0;
        cm     = 0;
        sm     = 0;
        cmp    = 0;
        smp    = 0;
        ccm    = 0;
        csm    = 0;
        ssm    = 0;
        if (!(g == 0 && PULL_CYL(pull)))
        {
            if (pgrp->epgrppbc == epgrppbcREFAT)
            {
                /* Set the pbc atom */
                copy_rvec(pull->rbuf[g], x_pbc);
            }
            w = 1;
            for (i = 0; i < pgrp->nat_loc; i++)
            {
                ii   = pgrp->ind_loc[i];
                mass = md->massT[ii];
                if (pgrp->epgrppbc != epgrppbcCOS)
                {
                    if (pgrp->weight_loc)
                    {
                        w = pgrp->weight_loc[i];
                    }
                    wm      = w*mass;
                    wmass  += wm;
                    wwmass += wm*w;
                    if (pgrp->epgrppbc == epgrppbcNONE)
                    {
                        /* Plain COM: sum the coordinates */
                        for (m = 0; m < DIM; m++)
                        {
                            com[m]    += wm*x[ii][m];
                        }
                        if (xp)
                        {
                            for (m = 0; m < DIM; m++)
                            {
                                comp[m] += wm*xp[ii][m];
                            }
                        }
                    }
                    else
                    {
                        /* Sum the difference with the reference atom */
                        pbc_dx(pbc, x[ii], x_pbc, dx);
                        for (m = 0; m < DIM; m++)
                        {
                            com[m]    += wm*dx[m];
                        }
                        if (xp)
                        {
                            /* For xp add the difference between xp and x to dx,
                             * such that we use the same periodic image,
                             * also when xp has a large displacement.
                             */
                            for (m = 0; m < DIM; m++)
                            {
                                comp[m] += wm*(dx[m] + xp[ii][m] - x[ii][m]);
                            }
                        }
                    }
                }
                else
                {
                    /* Determine cos and sin sums */
                    csw  = cos(x[ii][pull->cosdim]*twopi_box);
                    snw  = sin(x[ii][pull->cosdim]*twopi_box);
                    cm  += csw*mass;
                    sm  += snw*mass;
                    ccm += csw*csw*mass;
                    csm += csw*snw*mass;
                    ssm += snw*snw*mass;

                    if (xp)
                    {
                        csw  = cos(xp[ii][pull->cosdim]*twopi_box);
                        snw  = sin(xp[ii][pull->cosdim]*twopi_box);
                        cmp += csw*mass;
                        smp += snw*mass;
                    }
                }
            }
        }

        /* Copy local sums to a buffer for global summing */
        switch (pgrp->epgrppbc)
        {
            case epgrppbcNONE:
            case epgrppbcREFAT:
                copy_dvec(com, pull->dbuf[g*3]);
                copy_dvec(comp, pull->dbuf[g*3+1]);
                pull->dbuf[g*3+2][0] = wmass;
                pull->dbuf[g*3+2][1] = wwmass;
                pull->dbuf[g*3+2][2] = 0;
                break;
            case epgrppbcCOS:
                pull->dbuf[g*3  ][0] = cm;
                pull->dbuf[g*3  ][1] = sm;
                pull->dbuf[g*3  ][2] = 0;
                pull->dbuf[g*3+1][0] = ccm;
                pull->dbuf[g*3+1][1] = csm;
                pull->dbuf[g*3+1][2] = ssm;
                pull->dbuf[g*3+2][0] = cmp;
                pull->dbuf[g*3+2][1] = smp;
                pull->dbuf[g*3+2][2] = 0;
                break;
        }
    }

    if (cr && PAR(cr))
    {
        /* Sum the contributions over the nodes */
        gmx_sumd(pull->ngroup*3*DIM, pull->dbuf[0], cr);
    }

    for (g = 0; g < pull->ngroup; g++)
    {
        pgrp = &pull->group[g];
        if (pgrp->nat > 0 && !(g == 0 && PULL_CYL(pull)))
        {
            if (pgrp->epgrppbc != epgrppbcCOS)
            {
                /* Determine the inverse mass */
                wmass    = pull->dbuf[g*3+2][0];
                wwmass   = pull->dbuf[g*3+2][1];
                invwmass = 1/wmass;
                /* invtm==0 signals a frozen group, so then we should keep it zero */
                if (pgrp->invtm > 0)
                {
                    pgrp->wscale = wmass/wwmass;
                    pgrp->invtm  = 1.0/(pgrp->wscale*wmass);
                }
                /* Divide by the total mass */
                for (m = 0; m < DIM; m++)
                {
                    pgrp->x[m]    = pull->dbuf[g*3  ][m]*invwmass;
                    if (xp)
                    {
                        pgrp->xp[m] = pull->dbuf[g*3+1][m]*invwmass;
                    }
                    if (pgrp->epgrppbc == epgrppbcREFAT)
                    {
                        pgrp->x[m]    += pull->rbuf[g][m];
                        if (xp)
                        {
                            pgrp->xp[m] += pull->rbuf[g][m];
                        }
                    }
                }
            }
            else
            {
                /* Determine the optimal location of the cosine weight */
                csw                   = pull->dbuf[g*3][0];
                snw                   = pull->dbuf[g*3][1];
                pgrp->x[pull->cosdim] = atan2_0_2pi(snw, csw)/twopi_box;
                /* Set the weights for the local atoms */
                wmass  = sqrt(csw*csw + snw*snw);
                wwmass = (pull->dbuf[g*3+1][0]*csw*csw +
                          pull->dbuf[g*3+1][1]*csw*snw +
                          pull->dbuf[g*3+1][2]*snw*snw)/(wmass*wmass);
                pgrp->wscale = wmass/wwmass;
                pgrp->invtm  = 1.0/(pgrp->wscale*wmass);
                /* Set the weights for the local atoms */
                csw *= pgrp->invtm;
                snw *= pgrp->invtm;
                for (i = 0; i < pgrp->nat_loc; i++)
                {
                    ii                  = pgrp->ind_loc[i];
                    pgrp->weight_loc[i] = csw*cos(twopi_box*x[ii][pull->cosdim]) +
                        snw*sin(twopi_box*x[ii][pull->cosdim]);
                }
                if (xp)
                {
                    csw                    = pull->dbuf[g*3+2][0];
                    snw                    = pull->dbuf[g*3+2][1];
                    pgrp->xp[pull->cosdim] = atan2_0_2pi(snw, csw)/twopi_box;
                }
            }
            if (debug)
            {
                fprintf(debug, "Pull group %d wmass %f wwmass %f invtm %f\n",
                        g, wmass, wwmass, pgrp->invtm);
            }
        }
    }

    if (PULL_CYL(pull))
    {
        /* Calculate the COMs for the cyclinder reference groups */
        make_cyl_refgrps(cr, pull, md, pbc, t, x, xp);
    }
}
Пример #17
0
void do_force_lowlevel(FILE       *fplog,   gmx_large_int_t step,
                       t_forcerec *fr,      t_inputrec *ir,
                       t_idef     *idef,    t_commrec  *cr,
                       t_nrnb     *nrnb,    gmx_wallcycle_t wcycle,
                       t_mdatoms  *md,
                       t_grpopts  *opts,
                       rvec       x[],      history_t  *hist,
                       rvec       f[],
                       gmx_enerdata_t *enerd,
                       t_fcdata   *fcd,
                       gmx_mtop_t     *mtop,
                       gmx_localtop_t *top,
                       gmx_genborn_t *born,
                       t_atomtypes *atype,
                       gmx_bool       bBornRadii,
                       matrix     box,
                       real       lambda,
                       t_graph    *graph,
                       t_blocka   *excl,
                       rvec       mu_tot[],
                       int        flags,
                       float      *cycles_pme)
{
    int     i,status;
    int     donb_flags;
    gmx_bool    bDoEpot,bSepDVDL,bSB;
    int     pme_flags;
    matrix  boxs;
    rvec    box_size;
    real    dvdlambda,Vsr,Vlr,Vcorr=0,vdip,vcharge;
    t_pbc   pbc;
    real    dvdgb;
    char    buf[22];
    gmx_enerdata_t ed_lam;
    double  lam_i;
    real    dvdl_dum;

#ifdef GMX_MPI
    double  t0=0.0,t1,t2,t3; /* time measurement for coarse load balancing */
#endif

#define PRINT_SEPDVDL(s,v,dvdl) if (bSepDVDL) fprintf(fplog,sepdvdlformat,s,v,dvdl);

    GMX_MPE_LOG(ev_force_start);
    set_pbc(&pbc,fr->ePBC,box);

    /* Reset box */
    for(i=0; (i<DIM); i++)
    {
        box_size[i]=box[i][i];
    }

    bSepDVDL=(fr->bSepDVDL && do_per_step(step,ir->nstlog));
    debug_gmx();

    /* do QMMM first if requested */
    if(fr->bQMMM)
    {
        enerd->term[F_EQM] = calculate_QMMM(cr,x,f,fr,md);
    }

    if (bSepDVDL)
    {
        fprintf(fplog,"Step %s: non-bonded V and dVdl for node %d:\n",
                gmx_step_str(step,buf),cr->nodeid);
    }

    /* Call the short range functions all in one go. */
    GMX_MPE_LOG(ev_do_fnbf_start);

    dvdlambda = 0;

#ifdef GMX_MPI
    /*#define TAKETIME ((cr->npmenodes) && (fr->timesteps < 12))*/
#define TAKETIME FALSE
    if (TAKETIME)
    {
        MPI_Barrier(cr->mpi_comm_mygroup);
        t0=MPI_Wtime();
    }
#endif

    if (ir->nwall)
    {
        dvdlambda = do_walls(ir,fr,box,md,x,f,lambda,
                             enerd->grpp.ener[egLJSR],nrnb);
        PRINT_SEPDVDL("Walls",0.0,dvdlambda);
        enerd->dvdl_lin += dvdlambda;
    }

    /* If doing GB, reset dvda and calculate the Born radii */
    if (ir->implicit_solvent)
    {
        /* wallcycle_start(wcycle,ewcGB); */

        for(i=0; i<born->nr; i++)
        {
            fr->dvda[i]=0;
        }

        if(bBornRadii)
        {
            calc_gb_rad(cr,fr,ir,top,atype,x,&(fr->gblist),born,md,nrnb);
        }

        /* wallcycle_stop(wcycle, ewcGB); */
    }

    where();
    donb_flags = 0;
    if (flags & GMX_FORCE_FORCES)
    {
        donb_flags |= GMX_DONB_FORCES;
    }
    do_nonbonded(cr,fr,x,f,md,excl,
                 fr->bBHAM ?
                 enerd->grpp.ener[egBHAMSR] :
                 enerd->grpp.ener[egLJSR],
                 enerd->grpp.ener[egCOULSR],
                 enerd->grpp.ener[egGB],box_size,nrnb,
                 lambda,&dvdlambda,-1,-1,donb_flags);
    /* If we do foreign lambda and we have soft-core interactions
     * we have to recalculate the (non-linear) energies contributions.
     */
    if (ir->n_flambda > 0 && (flags & GMX_FORCE_DHDL) && ir->sc_alpha != 0)
    {
        init_enerdata(mtop->groups.grps[egcENER].nr,ir->n_flambda,&ed_lam);

        for(i=0; i<enerd->n_lambda; i++)
        {
            lam_i = (i==0 ? lambda : ir->flambda[i-1]);
            dvdl_dum = 0;
            reset_enerdata(&ir->opts,fr,TRUE,&ed_lam,FALSE);
            do_nonbonded(cr,fr,x,f,md,excl,
                         fr->bBHAM ?
                         ed_lam.grpp.ener[egBHAMSR] :
                         ed_lam.grpp.ener[egLJSR],
                         ed_lam.grpp.ener[egCOULSR],
                         enerd->grpp.ener[egGB], box_size,nrnb,
                         lam_i,&dvdl_dum,-1,-1,
                         GMX_DONB_FOREIGNLAMBDA);
            sum_epot(&ir->opts,&ed_lam);
            enerd->enerpart_lambda[i] += ed_lam.term[F_EPOT];
        }
        destroy_enerdata(&ed_lam);
    }
    where();

    /* If we are doing GB, calculate bonded forces and apply corrections
     * to the solvation forces */
    if (ir->implicit_solvent)  {
        calc_gb_forces(cr,md,born,top,atype,x,f,fr,idef,
                       ir->gb_algorithm,ir->sa_algorithm,nrnb,bBornRadii,&pbc,graph,enerd);
    }

#ifdef GMX_MPI
    if (TAKETIME)
    {
        t1=MPI_Wtime();
        fr->t_fnbf += t1-t0;
    }
#endif

    if (ir->sc_alpha != 0)
    {
        enerd->dvdl_nonlin += dvdlambda;
    }
    else
    {
        enerd->dvdl_lin    += dvdlambda;
    }
    Vsr = 0;
    if (bSepDVDL)
    {
        for(i=0; i<enerd->grpp.nener; i++)
        {
            Vsr +=
                (fr->bBHAM ?
                 enerd->grpp.ener[egBHAMSR][i] :
                 enerd->grpp.ener[egLJSR][i])
                + enerd->grpp.ener[egCOULSR][i] + enerd->grpp.ener[egGB][i];
        }
    }
    PRINT_SEPDVDL("VdW and Coulomb SR particle-p.",Vsr,dvdlambda);
    debug_gmx();

    GMX_MPE_LOG(ev_do_fnbf_finish);

    if (debug)
    {
        pr_rvecs(debug,0,"fshift after SR",fr->fshift,SHIFTS);
    }

    /* Shift the coordinates. Must be done before bonded forces and PPPM,
     * but is also necessary for SHAKE and update, therefore it can NOT
     * go when no bonded forces have to be evaluated.
     */

    /* Here sometimes we would not need to shift with NBFonly,
     * but we do so anyhow for consistency of the returned coordinates.
     */
    if (graph)
    {
        shift_self(graph,box,x);
        if (TRICLINIC(box))
        {
            inc_nrnb(nrnb,eNR_SHIFTX,2*graph->nnodes);
        }
        else
        {
            inc_nrnb(nrnb,eNR_SHIFTX,graph->nnodes);
        }
    }
    /* Check whether we need to do bondeds or correct for exclusions */
    if (fr->bMolPBC &&
            ((flags & GMX_FORCE_BONDED)
             || EEL_RF(fr->eeltype) || EEL_FULL(fr->eeltype)))
    {
        /* Since all atoms are in the rectangular or triclinic unit-cell,
         * only single box vector shifts (2 in x) are required.
         */
        set_pbc_dd(&pbc,fr->ePBC,cr->dd,TRUE,box);
    }
    debug_gmx();

    if (flags & GMX_FORCE_BONDED)
    {
        GMX_MPE_LOG(ev_calc_bonds_start);
        calc_bonds(fplog,cr->ms,
                   idef,x,hist,f,fr,&pbc,graph,enerd,nrnb,lambda,md,fcd,
                   DOMAINDECOMP(cr) ? cr->dd->gatindex : NULL, atype, born,
                   fr->bSepDVDL && do_per_step(step,ir->nstlog),step);

        /* Check if we have to determine energy differences
         * at foreign lambda's.
         */
        if (ir->n_flambda > 0 && (flags & GMX_FORCE_DHDL) &&
                idef->ilsort != ilsortNO_FE)
        {
            if (idef->ilsort != ilsortFE_SORTED)
            {
                gmx_incons("The bonded interactions are not sorted for free energy");
            }
            init_enerdata(mtop->groups.grps[egcENER].nr,ir->n_flambda,&ed_lam);

            for(i=0; i<enerd->n_lambda; i++)
            {
                lam_i = (i==0 ? lambda : ir->flambda[i-1]);
                dvdl_dum = 0;
                reset_enerdata(&ir->opts,fr,TRUE,&ed_lam,FALSE);
                calc_bonds_lambda(fplog,
                                  idef,x,fr,&pbc,graph,&ed_lam,nrnb,lam_i,md,
                                  fcd,
                                  DOMAINDECOMP(cr) ? cr->dd->gatindex : NULL);
                sum_epot(&ir->opts,&ed_lam);
                enerd->enerpart_lambda[i] += ed_lam.term[F_EPOT];
            }
            destroy_enerdata(&ed_lam);
        }
        debug_gmx();
        GMX_MPE_LOG(ev_calc_bonds_finish);
    }

    where();

    *cycles_pme = 0;
    if (EEL_FULL(fr->eeltype))
    {
        bSB = (ir->nwall == 2);
        if (bSB)
        {
            copy_mat(box,boxs);
            svmul(ir->wall_ewald_zfac,boxs[ZZ],boxs[ZZ]);
            box_size[ZZ] *= ir->wall_ewald_zfac;
        }

        clear_mat(fr->vir_el_recip);

        if (fr->bEwald)
        {
            if (fr->n_tpi == 0)
            {
                dvdlambda = 0;
                Vcorr = ewald_LRcorrection(fplog,md->start,md->start+md->homenr,
                                           cr,fr,
                                           md->chargeA,
                                           md->nChargePerturbed ? md->chargeB : NULL,
                                           excl,x,bSB ? boxs : box,mu_tot,
                                           ir->ewald_geometry,
                                           ir->epsilon_surface,
                                           lambda,&dvdlambda,&vdip,&vcharge);
                PRINT_SEPDVDL("Ewald excl./charge/dip. corr.",Vcorr,dvdlambda);
                enerd->dvdl_lin += dvdlambda;
            }
            else
            {
                if (ir->ewald_geometry != eewg3D || ir->epsilon_surface != 0)
                {
                    gmx_fatal(FARGS,"TPI with PME currently only works in a 3D geometry with tin-foil boundary conditions");
                }
                /* The TPI molecule does not have exclusions with the rest
                 * of the system and no intra-molecular PME grid contributions
                 * will be calculated in gmx_pme_calc_energy.
                 */
                Vcorr = 0;
            }
        }
        else
        {
            Vcorr = shift_LRcorrection(fplog,md->start,md->homenr,cr,fr,
                                       md->chargeA,excl,x,TRUE,box,
                                       fr->vir_el_recip);
        }

        dvdlambda = 0;
        status = 0;
        switch (fr->eeltype)
        {
        case eelPPPM:
            status = gmx_pppm_do(fplog,fr->pmedata,FALSE,x,fr->f_novirsum,
                                 md->chargeA,
                                 box_size,fr->phi,cr,md->start,md->homenr,
                                 nrnb,ir->pme_order,&Vlr);
            break;
        case eelPME:
        case eelPMESWITCH:
        case eelPMEUSER:
        case eelPMEUSERSWITCH:
            if (cr->duty & DUTY_PME)
            {
                if (fr->n_tpi == 0 || (flags & GMX_FORCE_STATECHANGED))
                {
                    pme_flags = GMX_PME_SPREAD_Q | GMX_PME_SOLVE;
                    if (flags & GMX_FORCE_FORCES)
                    {
                        pme_flags |= GMX_PME_CALC_F;
                    }
                    if (flags & GMX_FORCE_VIRIAL)
                    {
                        pme_flags |= GMX_PME_CALC_ENER_VIR;
                    }
                    if (fr->n_tpi > 0)
                    {
                        /* We don't calculate f, but we do want the potential */
                        pme_flags |= GMX_PME_CALC_POT;
                    }
                    wallcycle_start(wcycle,ewcPMEMESH);
                    status = gmx_pme_do(fr->pmedata,
                                        md->start,md->homenr - fr->n_tpi,
                                        x,fr->f_novirsum,
                                        md->chargeA,md->chargeB,
                                        bSB ? boxs : box,cr,
                                        DOMAINDECOMP(cr) ? dd_pme_maxshift_x(cr->dd) : 0,
                                        DOMAINDECOMP(cr) ? dd_pme_maxshift_y(cr->dd) : 0,
                                        nrnb,wcycle,
                                        fr->vir_el_recip,fr->ewaldcoeff,
                                        &Vlr,lambda,&dvdlambda,
                                        pme_flags);
                    *cycles_pme = wallcycle_stop(wcycle,ewcPMEMESH);

                    /* We should try to do as little computation after
                     * this as possible, because parallel PME synchronizes
                     * the nodes, so we want all load imbalance of the rest
                     * of the force calculation to be before the PME call.
                     * DD load balancing is done on the whole time of
                     * the force call (without PME).
                     */
                }
                if (fr->n_tpi > 0)
                {
                    /* Determine the PME grid energy of the test molecule
                     * with the PME grid potential of the other charges.
                     */
                    gmx_pme_calc_energy(fr->pmedata,fr->n_tpi,
                                        x + md->homenr - fr->n_tpi,
                                        md->chargeA + md->homenr - fr->n_tpi,
                                        &Vlr);
                }
                PRINT_SEPDVDL("PME mesh",Vlr,dvdlambda);
            }
            else
            {
                /* Energies and virial are obtained later from the PME nodes */
                /* but values have to be zeroed out here */
                Vlr=0.0;
            }
            break;
        case eelEWALD:
            Vlr = do_ewald(fplog,FALSE,ir,x,fr->f_novirsum,
                           md->chargeA,md->chargeB,
                           box_size,cr,md->homenr,
                           fr->vir_el_recip,fr->ewaldcoeff,
                           lambda,&dvdlambda,fr->ewald_table);
            PRINT_SEPDVDL("Ewald long-range",Vlr,dvdlambda);
            break;
        default:
            Vlr = 0;
            gmx_fatal(FARGS,"No such electrostatics method implemented %s",
                      eel_names[fr->eeltype]);
        }
        if (status != 0)
        {
            gmx_fatal(FARGS,"Error %d in long range electrostatics routine %s",
                      status,EELTYPE(fr->eeltype));
        }
        enerd->dvdl_lin += dvdlambda;
        enerd->term[F_COUL_RECIP] = Vlr + Vcorr;
        if (debug)
        {
            fprintf(debug,"Vlr = %g, Vcorr = %g, Vlr_corr = %g\n",
                    Vlr,Vcorr,enerd->term[F_COUL_RECIP]);
            pr_rvecs(debug,0,"vir_el_recip after corr",fr->vir_el_recip,DIM);
            pr_rvecs(debug,0,"fshift after LR Corrections",fr->fshift,SHIFTS);
        }
    }
    else
    {
        if (EEL_RF(fr->eeltype))
        {
            dvdlambda = 0;

            if (fr->eeltype != eelRF_NEC)
            {
                enerd->term[F_RF_EXCL] =
                    RF_excl_correction(fplog,fr,graph,md,excl,x,f,
                                       fr->fshift,&pbc,lambda,&dvdlambda);
            }

            enerd->dvdl_lin += dvdlambda;
            PRINT_SEPDVDL("RF exclusion correction",
                          enerd->term[F_RF_EXCL],dvdlambda);
        }
    }
    where();
    debug_gmx();

    if (debug)
    {
        print_nrnb(debug,nrnb);
    }
    debug_gmx();

#ifdef GMX_MPI
    if (TAKETIME)
    {
        t2=MPI_Wtime();
        MPI_Barrier(cr->mpi_comm_mygroup);
        t3=MPI_Wtime();
        fr->t_wait += t3-t2;
        if (fr->timesteps == 11)
        {
            fprintf(stderr,"* PP load balancing info: node %d, step %s, rel wait time=%3.0f%% , load string value: %7.2f\n",
                    cr->nodeid, gmx_step_str(fr->timesteps,buf),
                    100*fr->t_wait/(fr->t_wait+fr->t_fnbf),
                    (fr->t_fnbf+fr->t_wait)/fr->t_fnbf);
        }
        fr->timesteps++;
    }
#endif

    if (debug)
    {
        pr_rvecs(debug,0,"fshift after bondeds",fr->fshift,SHIFTS);
    }

    GMX_MPE_LOG(ev_force_finish);

}
Пример #18
0
void global_stat(FILE *fplog, gmx_global_stat_t gs,
                 t_commrec *cr, gmx_enerdata_t *enerd,
                 tensor fvir, tensor svir, rvec mu_tot,
                 t_inputrec *inputrec,
                 gmx_ekindata_t *ekind, gmx_constr_t constr,
                 t_vcm *vcm,
                 int nsig, real *sig,
                 gmx_mtop_t *top_global, t_state *state_local,
                 gmx_bool bSumEkinhOld, int flags)
/* instead of current system, gmx_booleans for summing virial, kinetic energy, and other terms */
{
    t_bin     *rb;
    int       *itc0, *itc1;
    int        ie    = 0, ifv = 0, isv = 0, irmsd = 0, imu = 0;
    int        idedl = 0, idvdll = 0, idvdlnl = 0, iepl = 0, icm = 0, imass = 0, ica = 0, inb = 0;
    int        isig  = -1;
    int        icj   = -1, ici = -1, icx = -1;
    int        inn[egNR];
    real       copyenerd[F_NRE];
    int        nener, j;
    real      *rmsd_data = NULL;
    double     nb;
    gmx_bool   bVV, bTemp, bEner, bPres, bConstrVir, bEkinAveVel, bReadEkin;

    bVV           = EI_VV(inputrec->eI);
    bTemp         = flags & CGLO_TEMPERATURE;
    bEner         = flags & CGLO_ENERGY;
    bPres         = (flags & CGLO_PRESSURE);
    bConstrVir    = (flags & CGLO_CONSTRAINT);
    bEkinAveVel   = (inputrec->eI == eiVV || (inputrec->eI == eiVVAK && bPres));
    bReadEkin     = (flags & CGLO_READEKIN);

    rb   = gs->rb;
    itc0 = gs->itc0;
    itc1 = gs->itc1;


    reset_bin(rb);
    /* This routine copies all the data to be summed to one big buffer
     * using the t_bin struct.
     */

    /* First, we neeed to identify which enerd->term should be
       communicated.  Temperature and pressure terms should only be
       communicated and summed when they need to be, to avoid repeating
       the sums and overcounting. */

    nener = filter_enerdterm(enerd->term, TRUE, copyenerd, bTemp, bPres, bEner);

    /* First, the data that needs to be communicated with velocity verlet every time
       This is just the constraint virial.*/
    if (bConstrVir)
    {
        isv = add_binr(rb, DIM*DIM, svir[0]);
        where();
    }

/* We need the force virial and the kinetic energy for the first time through with velocity verlet */
    if (bTemp || !bVV)
    {
        if (ekind)
        {
            for (j = 0; (j < inputrec->opts.ngtc); j++)
            {
                if (bSumEkinhOld)
                {
                    itc0[j] = add_binr(rb, DIM*DIM, ekind->tcstat[j].ekinh_old[0]);
                }
                if (bEkinAveVel && !bReadEkin)
                {
                    itc1[j] = add_binr(rb, DIM*DIM, ekind->tcstat[j].ekinf[0]);
                }
                else if (!bReadEkin)
                {
                    itc1[j] = add_binr(rb, DIM*DIM, ekind->tcstat[j].ekinh[0]);
                }
            }
            /* these probably need to be put into one of these categories */
            where();
            idedl = add_binr(rb, 1, &(ekind->dekindl));
            where();
            ica   = add_binr(rb, 1, &(ekind->cosacc.mvcos));
            where();
        }
    }
    where();

    if (bPres || !bVV)
    {
        ifv = add_binr(rb, DIM*DIM, fvir[0]);
    }


    if (bEner)
    {
        where();
        ie  = add_binr(rb, nener, copyenerd);
        where();
        if (constr)
        {
            rmsd_data = constr_rmsd_data(constr);
            if (rmsd_data)
            {
                irmsd = add_binr(rb, inputrec->eI == eiSD2 ? 3 : 2, rmsd_data);
            }
        }
        if (!NEED_MUTOT(*inputrec))
        {
            imu = add_binr(rb, DIM, mu_tot);
            where();
        }

        for (j = 0; (j < egNR); j++)
        {
            inn[j] = add_binr(rb, enerd->grpp.nener, enerd->grpp.ener[j]);
        }
        where();
        if (inputrec->efep != efepNO)
        {
            idvdll  = add_bind(rb, efptNR, enerd->dvdl_lin);
            idvdlnl = add_bind(rb, efptNR, enerd->dvdl_nonlin);
            if (enerd->n_lambda > 0)
            {
                iepl = add_bind(rb, enerd->n_lambda, enerd->enerpart_lambda);
            }
        }
    }

    if (vcm)
    {
        icm   = add_binr(rb, DIM*vcm->nr, vcm->group_p[0]);
        where();
        imass = add_binr(rb, vcm->nr, vcm->group_mass);
        where();
        if (vcm->mode == ecmANGULAR)
        {
            icj   = add_binr(rb, DIM*vcm->nr, vcm->group_j[0]);
            where();
            icx   = add_binr(rb, DIM*vcm->nr, vcm->group_x[0]);
            where();
            ici   = add_binr(rb, DIM*DIM*vcm->nr, vcm->group_i[0][0]);
            where();
        }
    }

    if (DOMAINDECOMP(cr))
    {
        nb  = cr->dd->nbonded_local;
        inb = add_bind(rb, 1, &nb);
    }
    where();
    if (nsig > 0)
    {
        isig = add_binr(rb, nsig, sig);
    }

    /* Global sum it all */
    if (debug)
    {
        fprintf(debug, "Summing %d energies\n", rb->maxreal);
    }
    sum_bin(rb, cr);
    where();

    /* Extract all the data locally */

    if (bConstrVir)
    {
        extract_binr(rb, isv, DIM*DIM, svir[0]);
    }

    /* We need the force virial and the kinetic energy for the first time through with velocity verlet */
    if (bTemp || !bVV)
    {
        if (ekind)
        {
            for (j = 0; (j < inputrec->opts.ngtc); j++)
            {
                if (bSumEkinhOld)
                {
                    extract_binr(rb, itc0[j], DIM*DIM, ekind->tcstat[j].ekinh_old[0]);
                }
                if (bEkinAveVel && !bReadEkin)
                {
                    extract_binr(rb, itc1[j], DIM*DIM, ekind->tcstat[j].ekinf[0]);
                }
                else if (!bReadEkin)
                {
                    extract_binr(rb, itc1[j], DIM*DIM, ekind->tcstat[j].ekinh[0]);
                }
            }
            extract_binr(rb, idedl, 1, &(ekind->dekindl));
            extract_binr(rb, ica, 1, &(ekind->cosacc.mvcos));
            where();
        }
    }
    if (bPres || !bVV)
    {
        extract_binr(rb, ifv, DIM*DIM, fvir[0]);
    }

    if (bEner)
    {
        extract_binr(rb, ie, nener, copyenerd);
        if (rmsd_data)
        {
            extract_binr(rb, irmsd, inputrec->eI == eiSD2 ? 3 : 2, rmsd_data);
        }
        if (!NEED_MUTOT(*inputrec))
        {
            extract_binr(rb, imu, DIM, mu_tot);
        }

        for (j = 0; (j < egNR); j++)
        {
            extract_binr(rb, inn[j], enerd->grpp.nener, enerd->grpp.ener[j]);
        }
        if (inputrec->efep != efepNO)
        {
            extract_bind(rb, idvdll, efptNR, enerd->dvdl_lin);
            extract_bind(rb, idvdlnl, efptNR, enerd->dvdl_nonlin);
            if (enerd->n_lambda > 0)
            {
                extract_bind(rb, iepl, enerd->n_lambda, enerd->enerpart_lambda);
            }
        }
        if (DOMAINDECOMP(cr))
        {
            extract_bind(rb, inb, 1, &nb);
            if ((int)(nb + 0.5) != cr->dd->nbonded_global)
            {
                dd_print_missing_interactions(fplog, cr, (int)(nb + 0.5), top_global, state_local);
            }
        }
        where();

        filter_enerdterm(copyenerd, FALSE, enerd->term, bTemp, bPres, bEner);
    }

    if (vcm)
    {
        extract_binr(rb, icm, DIM*vcm->nr, vcm->group_p[0]);
        where();
        extract_binr(rb, imass, vcm->nr, vcm->group_mass);
        where();
        if (vcm->mode == ecmANGULAR)
        {
            extract_binr(rb, icj, DIM*vcm->nr, vcm->group_j[0]);
            where();
            extract_binr(rb, icx, DIM*vcm->nr, vcm->group_x[0]);
            where();
            extract_binr(rb, ici, DIM*DIM*vcm->nr, vcm->group_i[0][0]);
            where();
        }
    }

    if (nsig > 0)
    {
        extract_binr(rb, isig, nsig, sig);
    }
    where();
}
Пример #19
0
gmx_bool pme_load_balance(pme_load_balancing_t       pme_lb,
                          t_commrec                 *cr,
                          FILE                      *fp_err,
                          FILE                      *fp_log,
                          t_inputrec                *ir,
                          t_state                   *state,
                          double                     cycles,
                          interaction_const_t       *ic,
                          struct nonbonded_verlet_t *nbv,
                          struct gmx_pme_t **        pmedata,
                          gmx_int64_t                step)
{
    gmx_bool     OK;
    pme_setup_t *set;
    double       cycles_fast;
    char         buf[STRLEN], sbuf[22];
    real         rtab;
    gmx_bool     bUsesSimpleTables = TRUE;

    if (pme_lb->stage == pme_lb->nstage)
    {
        return FALSE;
    }

    if (PAR(cr))
    {
        gmx_sumd(1, &cycles, cr);
        cycles /= cr->nnodes;
    }

    set = &pme_lb->setup[pme_lb->cur];
    set->count++;

    rtab = ir->rlistlong + ir->tabext;

    if (set->count % 2 == 1)
    {
        /* Skip the first cycle, because the first step after a switch
         * is much slower due to allocation and/or caching effects.
         */
        return TRUE;
    }

    sprintf(buf, "step %4s: ", gmx_step_str(step, sbuf));
    print_grid(fp_err, fp_log, buf, "timed with", set, cycles);

    if (set->count <= 2)
    {
        set->cycles = cycles;
    }
    else
    {
        if (cycles*PME_LB_ACCEL_TOL < set->cycles &&
            pme_lb->stage == pme_lb->nstage - 1)
        {
            /* The performance went up a lot (due to e.g. DD load balancing).
             * Add a stage, keep the minima, but rescan all setups.
             */
            pme_lb->nstage++;

            if (debug)
            {
                fprintf(debug, "The performance for grid %d %d %d went from %.3f to %.1f M-cycles, this is more than %f\n"
                        "Increased the number stages to %d"
                        " and ignoring the previous performance\n",
                        set->grid[XX], set->grid[YY], set->grid[ZZ],
                        cycles*1e-6, set->cycles*1e-6, PME_LB_ACCEL_TOL,
                        pme_lb->nstage);
            }
        }
        set->cycles = min(set->cycles, cycles);
    }

    if (set->cycles < pme_lb->setup[pme_lb->fastest].cycles)
    {
        pme_lb->fastest = pme_lb->cur;

        if (DOMAINDECOMP(cr))
        {
            /* We found a new fastest setting, ensure that with subsequent
             * shorter cut-off's the dynamic load balancing does not make
             * the use of the current cut-off impossible. This solution is
             * a trade-off, as the PME load balancing and DD domain size
             * load balancing can interact in complex ways.
             * With the Verlet kernels, DD load imbalance will usually be
             * mainly due to bonded interaction imbalance, which will often
             * quickly push the domain boundaries beyond the limit for the
             * optimal, PME load balanced, cut-off. But it could be that
             * better overal performance can be obtained with a slightly
             * shorter cut-off and better DD load balancing.
             */
            change_dd_dlb_cutoff_limit(cr);
        }
    }
    cycles_fast = pme_lb->setup[pme_lb->fastest].cycles;

    /* Check in stage 0 if we should stop scanning grids.
     * Stop when the time is more than SLOW_FAC longer than the fastest.
     */
    if (pme_lb->stage == 0 && pme_lb->cur > 0 &&
        cycles > pme_lb->setup[pme_lb->fastest].cycles*PME_LB_SLOW_FAC)
    {
        pme_lb->n = pme_lb->cur + 1;
        /* Done with scanning, go to stage 1 */
        switch_to_stage1(pme_lb);
    }

    if (pme_lb->stage == 0)
    {
        int gridsize_start;

        gridsize_start = set->grid[XX]*set->grid[YY]*set->grid[ZZ];

        do
        {
            if (pme_lb->cur+1 < pme_lb->n)
            {
                /* We had already generated the next setup */
                OK = TRUE;
            }
            else
            {
                /* Find the next setup */
                OK = pme_loadbal_increase_cutoff(pme_lb, ir->pme_order, cr->dd);

                if (!OK)
                {
                    pme_lb->elimited = epmelblimPMEGRID;
                }
            }

            if (OK && ir->ePBC != epbcNONE)
            {
                OK = (sqr(pme_lb->setup[pme_lb->cur+1].rlistlong)
                      <= max_cutoff2(ir->ePBC, state->box));
                if (!OK)
                {
                    pme_lb->elimited = epmelblimBOX;
                }
            }

            if (OK)
            {
                pme_lb->cur++;

                if (DOMAINDECOMP(cr))
                {
                    OK = change_dd_cutoff(cr, state, ir,
                                          pme_lb->setup[pme_lb->cur].rlistlong);
                    if (!OK)
                    {
                        /* Failed: do not use this setup */
                        pme_lb->cur--;
                        pme_lb->elimited = epmelblimDD;
                    }
                }
            }
            if (!OK)
            {
                /* We hit the upper limit for the cut-off,
                 * the setup should not go further than cur.
                 */
                pme_lb->n = pme_lb->cur + 1;
                print_loadbal_limited(fp_err, fp_log, step, pme_lb);
                /* Switch to the next stage */
                switch_to_stage1(pme_lb);
            }
        }
        while (OK &&
               !(pme_lb->setup[pme_lb->cur].grid[XX]*
                 pme_lb->setup[pme_lb->cur].grid[YY]*
                 pme_lb->setup[pme_lb->cur].grid[ZZ] <
                 gridsize_start*PME_LB_GRID_SCALE_FAC
                 &&
                 pme_lb->setup[pme_lb->cur].grid_efficiency <
                 pme_lb->setup[pme_lb->cur-1].grid_efficiency*PME_LB_GRID_EFFICIENCY_REL_FAC));
    }

    if (pme_lb->stage > 0 && pme_lb->end == 1)
    {
        pme_lb->cur   = 0;
        pme_lb->stage = pme_lb->nstage;
    }
    else if (pme_lb->stage > 0 && pme_lb->end > 1)
    {
        /* If stage = nstage-1:
         *   scan over all setups, rerunning only those setups
         *   which are not much slower than the fastest
         * else:
         *   use the next setup
         */
        do
        {
            pme_lb->cur++;
            if (pme_lb->cur == pme_lb->end)
            {
                pme_lb->stage++;
                pme_lb->cur = pme_lb->start;
            }
        }
        while (pme_lb->stage == pme_lb->nstage - 1 &&
               pme_lb->setup[pme_lb->cur].count > 0 &&
               pme_lb->setup[pme_lb->cur].cycles > cycles_fast*PME_LB_SLOW_FAC);

        if (pme_lb->stage == pme_lb->nstage)
        {
            /* We are done optimizing, use the fastest setup we found */
            pme_lb->cur = pme_lb->fastest;
        }
    }

    if (DOMAINDECOMP(cr) && pme_lb->stage > 0)
    {
        OK = change_dd_cutoff(cr, state, ir, pme_lb->setup[pme_lb->cur].rlistlong);
        if (!OK)
        {
            /* Failsafe solution */
            if (pme_lb->cur > 1 && pme_lb->stage == pme_lb->nstage)
            {
                pme_lb->stage--;
            }
            pme_lb->fastest  = 0;
            pme_lb->start    = 0;
            pme_lb->end      = pme_lb->cur;
            pme_lb->cur      = pme_lb->start;
            pme_lb->elimited = epmelblimDD;
            print_loadbal_limited(fp_err, fp_log, step, pme_lb);
        }
    }

    /* Change the Coulomb cut-off and the PME grid */

    set = &pme_lb->setup[pme_lb->cur];

    ic->rcoulomb     = set->rcut_coulomb;
    ic->rlist        = set->rlist;
    ic->rlistlong    = set->rlistlong;
    ir->nstcalclr    = set->nstcalclr;
    ic->ewaldcoeff_q = set->ewaldcoeff_q;
    /* TODO: centralize the code that sets the potentials shifts */
    if (ic->coulomb_modifier == eintmodPOTSHIFT)
    {
        ic->sh_ewald = gmx_erfc(ic->ewaldcoeff_q*ic->rcoulomb);
    }
    if (EVDW_PME(ic->vdwtype))
    {
        /* We have PME for both Coulomb and VdW, set rvdw equal to rcoulomb */
        ic->rvdw            = set->rcut_coulomb;
        ic->ewaldcoeff_lj   = set->ewaldcoeff_lj;
        if (ic->vdw_modifier == eintmodPOTSHIFT)
        {
            real crc2;

            ic->dispersion_shift.cpot = -pow(ic->rvdw, -6.0);
            ic->repulsion_shift.cpot  = -pow(ic->rvdw, -12.0);
            ic->sh_invrc6             = -ic->dispersion_shift.cpot;
            crc2                      = sqr(ic->ewaldcoeff_lj*ic->rvdw);
            ic->sh_lj_ewald           = (exp(-crc2)*(1 + crc2 + 0.5*crc2*crc2) - 1)*pow(ic->rvdw, -6.0);
        }
    }

    bUsesSimpleTables = uses_simple_tables(ir->cutoff_scheme, nbv, 0);
    nbnxn_gpu_pme_loadbal_update_param(nbv, ic);

    /* With tMPI + GPUs some ranks may be sharing GPU(s) and therefore
     * also sharing texture references. To keep the code simple, we don't
     * treat texture references as shared resources, but this means that
     * the coulomb_tab texture ref will get updated by multiple threads.
     * Hence, to ensure that the non-bonded kernels don't start before all
     * texture binding operations are finished, we need to wait for all ranks
     * to arrive here before continuing.
     *
     * Note that we could omit this barrier if GPUs are not shared (or
     * texture objects are used), but as this is initialization code, there
     * is not point in complicating things.
     */
#ifdef GMX_THREAD_MPI
    if (PAR(cr) && use_GPU(nbv))
    {
        gmx_barrier(cr);
    }
#endif  /* GMX_THREAD_MPI */

    /* Usually we won't need the simple tables with GPUs.
     * But we do with hybrid acceleration and with free energy.
     * To avoid bugs, we always re-initialize the simple tables here.
     */
    init_interaction_const_tables(NULL, ic, bUsesSimpleTables, rtab);

    if (cr->duty & DUTY_PME)
    {
        if (pme_lb->setup[pme_lb->cur].pmedata == NULL)
        {
            /* Generate a new PME data structure,
             * copying part of the old pointers.
             */
            gmx_pme_reinit(&set->pmedata,
                           cr, pme_lb->setup[0].pmedata, ir,
                           set->grid);
        }
        *pmedata = set->pmedata;
    }
    else
    {
        /* Tell our PME-only node to switch grid */
        gmx_pme_send_switchgrid(cr, set->grid, set->ewaldcoeff_q, set->ewaldcoeff_lj);
    }

    if (debug)
    {
        print_grid(NULL, debug, "", "switched to", set, -1);
    }

    if (pme_lb->stage == pme_lb->nstage)
    {
        print_grid(fp_err, fp_log, "", "optimal", set, -1);
    }

    return TRUE;
}
Пример #20
0
Файл: qmmm.c Проект: t-/adaptive
void update_QMMMrec(t_commrec *cr,
		    t_forcerec *fr,
		    rvec x[],
		    t_mdatoms *md,
		    matrix box,
		    gmx_localtop_t *top)
{
  /* updates the coordinates of both QM atoms and MM atoms and stores
   * them in the QMMMrec.
   *
   * NOTE: is NOT yet working if there are no PBC. Also in ns.c, simple
   * ns needs to be fixed!
   */
  int
    mm_max=0,mm_nr=0,mm_nr_new,i,j,is,k,shift;
  t_j_particle
    *mm_j_particles=NULL,*qm_i_particles=NULL;
  t_QMMMrec
    *qr;
  t_nblist
    QMMMlist;
  rvec
    dx,crd;
  int
    *MMatoms;
  t_QMrec
    *qm;
  t_MMrec
    *mm;
  t_pbc
    pbc;
  int
    *parallelMMarray=NULL;
  real
    c12au,c6au;

  c6au  = (HARTREE2KJ*AVOGADRO*pow(BOHR2NM,6));
  c12au = (HARTREE2KJ*AVOGADRO*pow(BOHR2NM,12));

  /* every cpu has this array. On every processor we fill this array
   * with 1's and 0's. 1's indicate the atoms is a QM atom on the
   * current cpu in a later stage these arrays are all summed. indexes
   * > 0 indicate the atom is a QM atom. Every node therefore knows
   * whcih atoms are part of the QM subsystem.
   */
  /* copy some pointers */
  qr          = fr->qr;
  mm          = qr->mm;
  QMMMlist    = fr->QMMMlist;



  /*  init_pbc(box);  needs to be called first, see pbc.h */
  set_pbc_dd(&pbc,fr->ePBC,DOMAINDECOMP(cr) ? cr->dd : NULL,FALSE,box);
  /* only in standard (normal) QMMM we need the neighbouring MM
   * particles to provide a electric field of point charges for the QM
   * atoms.
   */
  if(qr->QMMMscheme==eQMMMschemenormal){ /* also implies 1 QM-layer */
    /* we NOW create/update a number of QMMMrec entries:
     *
     * 1) the shiftQM, containing the shifts of the QM atoms
     *
     * 2) the indexMM array, containing the index of the MM atoms
     *
     * 3) the shiftMM, containing the shifts of the MM atoms
     *
     * 4) the shifted coordinates of the MM atoms
     *
     * the shifts are used for computing virial of the QM/MM particles.
     */
    qm = qr->qm[0]; /* in case of normal QMMM, there is only one group */
    snew(qm_i_particles,QMMMlist.nri);
    if(QMMMlist.nri){
      qm_i_particles[0].shift = XYZ2IS(0,0,0);
      for(i=0;i<QMMMlist.nri;i++){
	qm_i_particles[i].j     = QMMMlist.iinr[i];

	if(i){
	  qm_i_particles[i].shift = pbc_dx_aiuc(&pbc,x[QMMMlist.iinr[0]],
						x[QMMMlist.iinr[i]],dx);

	}
	/* However, since nri >= nrQMatoms, we do a quicksort, and throw
	 * out double, triple, etc. entries later, as we do for the MM
	 * list too.
	 */

	/* compute the shift for the MM j-particles with respect to
	 * the QM i-particle and store them.
	 */

	crd[0] = IS2X(QMMMlist.shift[i]) + IS2X(qm_i_particles[i].shift);
	crd[1] = IS2Y(QMMMlist.shift[i]) + IS2Y(qm_i_particles[i].shift);
	crd[2] = IS2Z(QMMMlist.shift[i]) + IS2Z(qm_i_particles[i].shift);
	is = XYZ2IS(crd[0],crd[1],crd[2]);
	for(j=QMMMlist.jindex[i];
	    j<QMMMlist.jindex[i+1];
	    j++){
	  if(mm_nr >= mm_max){
	    mm_max += 1000;
	    srenew(mm_j_particles,mm_max);
	  }

	  mm_j_particles[mm_nr].j = QMMMlist.jjnr[j];
	  mm_j_particles[mm_nr].shift = is;
	  mm_nr++;
	}
      }

      /* quicksort QM and MM shift arrays and throw away multiple entries */



      qsort(qm_i_particles,QMMMlist.nri,
	    (size_t)sizeof(qm_i_particles[0]),
	    struct_comp);
      qsort(mm_j_particles,mm_nr,
	    (size_t)sizeof(mm_j_particles[0]),
	    struct_comp);
      /* remove multiples in the QM shift array, since in init_QMMM() we
       * went through the atom numbers from 0 to md.nr, the order sorted
       * here matches the one of QMindex already.
       */
      j=0;
      for(i=0;i<QMMMlist.nri;i++){
	if (i==0 || qm_i_particles[i].j!=qm_i_particles[i-1].j){
	  qm_i_particles[j++] = qm_i_particles[i];
	}
      }
      mm_nr_new = 0;
      if(qm->bTS||qm->bOPT){
	/* only remove double entries for the MM array */
	for(i=0;i<mm_nr;i++){
	  if((i==0 || mm_j_particles[i].j!=mm_j_particles[i-1].j)
	     && !md->bQM[mm_j_particles[i].j]){
	    mm_j_particles[mm_nr_new++] = mm_j_particles[i];
	  }
	}
      }
      /* we also remove mm atoms that have no charges!
      * actually this is already done in the ns.c
      */
      else{
	for(i=0;i<mm_nr;i++){
	  if((i==0 || mm_j_particles[i].j!=mm_j_particles[i-1].j)
	     && !md->bQM[mm_j_particles[i].j]
	     && (md->chargeA[mm_j_particles[i].j]
		 || (md->chargeB && md->chargeB[mm_j_particles[i].j]))) {
	    mm_j_particles[mm_nr_new++] = mm_j_particles[i];
	  }
	}
      }
      mm_nr = mm_nr_new;
      /* store the data retrieved above into the QMMMrec
       */
      k=0;
      /* Keep the compiler happy,
       * shift will always be set in the loop for i=0
       */
      shift = 0;
      for(i=0;i<qm->nrQMatoms;i++){
	/* not all qm particles might have appeared as i
	 * particles. They might have been part of the same charge
	 * group for instance.
	 */
	if (qm->indexQM[i] == qm_i_particles[k].j) {
	  shift = qm_i_particles[k++].shift;
	}
	/* use previous shift, assuming they belong the same charge
	 * group anyway,
	 */

	qm->shiftQM[i] = shift;
      }
    }
    /* parallel excecution */
    if(PAR(cr)){
      snew(parallelMMarray,2*(md->nr));
      /* only MM particles have a 1 at their atomnumber. The second part
       * of the array contains the shifts. Thus:
       * p[i]=1/0 depending on wether atomnumber i is a MM particle in the QM
       * step or not. p[i+md->nr] is the shift of atomnumber i.
       */
      for(i=0;i<2*(md->nr);i++){
	parallelMMarray[i]=0;
      }

      for(i=0;i<mm_nr;i++){
	parallelMMarray[mm_j_particles[i].j]=1;
	parallelMMarray[mm_j_particles[i].j+(md->nr)]=mm_j_particles[i].shift;
      }
      gmx_sumi(md->nr,parallelMMarray,cr);
      mm_nr=0;

      mm_max = 0;
      for(i=0;i<md->nr;i++){
	if(parallelMMarray[i]){
	  if(mm_nr >= mm_max){
	    mm_max += 1000;
	    srenew(mm->indexMM,mm_max);
	    srenew(mm->shiftMM,mm_max);
	  }
	  mm->indexMM[mm_nr]  = i;
	  mm->shiftMM[mm_nr++]= parallelMMarray[i+md->nr]/parallelMMarray[i];
	}
      }
      mm->nrMMatoms=mm_nr;
      free(parallelMMarray);
    }
    /* serial execution */
    else{
      mm->nrMMatoms = mm_nr;
      srenew(mm->shiftMM,mm_nr);
      srenew(mm->indexMM,mm_nr);
      for(i=0;i<mm_nr;i++){
	mm->indexMM[i]=mm_j_particles[i].j;
	mm->shiftMM[i]=mm_j_particles[i].shift;
      }

    }
    /* (re) allocate memory for the MM coordiate array. The QM
     * coordinate array was already allocated in init_QMMM, and is
     * only (re)filled in the update_QMMM_coordinates routine
     */
    srenew(mm->xMM,mm->nrMMatoms);
    /* now we (re) fill the array that contains the MM charges with
     * the forcefield charges. If requested, these charges will be
     * scaled by a factor
     */
    srenew(mm->MMcharges,mm->nrMMatoms);
    for(i=0;i<mm->nrMMatoms;i++){/* no free energy yet */
      mm->MMcharges[i]=md->chargeA[mm->indexMM[i]]*mm->scalefactor;
    }
    if(qm->bTS||qm->bOPT){
      /* store (copy) the c6 and c12 parameters into the MMrec struct
       */
      srenew(mm->c6,mm->nrMMatoms);
      srenew(mm->c12,mm->nrMMatoms);
      for (i=0;i<mm->nrMMatoms;i++){
	mm->c6[i]  = C6(fr->nbfp,top->idef.atnr,
			md->typeA[mm->indexMM[i]],
			md->typeA[mm->indexMM[i]])/c6au;
	mm->c12[i] =C12(fr->nbfp,top->idef.atnr,
			md->typeA[mm->indexMM[i]],
			md->typeA[mm->indexMM[i]])/c12au;
      }
      punch_QMMM_excl(qr->qm[0],mm,&(top->excls));
    }
    /* the next routine fills the coordinate fields in the QMMM rec of
     * both the qunatum atoms and the MM atoms, using the shifts
     * calculated above.
     */

    update_QMMM_coord(x,fr,qr->qm[0],qr->mm);
    free(qm_i_particles);
    free(mm_j_particles);
  }
  else { /* ONIOM */ /* ????? */
    mm->nrMMatoms=0;
    /* do for each layer */
    for (j=0;j<qr->nrQMlayers;j++){
      qm = qr->qm[j];
      qm->shiftQM[0]=XYZ2IS(0,0,0);
      for(i=1;i<qm->nrQMatoms;i++){
	qm->shiftQM[i] = pbc_dx_aiuc(&pbc,x[qm->indexQM[0]],x[qm->indexQM[i]],
				     dx);
      }
      update_QMMM_coord(x,fr,qm,mm);
    }
  }
} /* update_QMMM_rec */
Пример #21
0
void check_resource_division_efficiency(const gmx_hw_info_t *hwinfo,
                                        const gmx_hw_opt_t  *hw_opt,
                                        gmx_bool             bNtOmpOptionSet,
                                        t_commrec           *cr,
                                        FILE                *fplog)
{
#if defined GMX_OPENMP && defined GMX_MPI
    int         nth_omp_min, nth_omp_max, ngpu;
    char        buf[1000];
#ifdef GMX_THREAD_MPI
    const char *mpi_option = " (option -ntmpi)";
#else
    const char *mpi_option = "";
#endif

    /* This function should be called after thread-MPI (when configured) and
     * OpenMP have been initialized. Check that here.
     */
#ifdef GMX_THREAD_MPI
    GMX_RELEASE_ASSERT(nthreads_omp_faster_default >= nthreads_omp_mpi_ok_max, "Inconsistent OpenMP thread count default values");
    GMX_RELEASE_ASSERT(hw_opt->nthreads_tmpi >= 1, "Must have at least one thread-MPI rank");
#endif
    GMX_RELEASE_ASSERT(gmx_omp_nthreads_get(emntDefault) >= 1, "Must have at least one OpenMP thread");

    nth_omp_min = gmx_omp_nthreads_get(emntDefault);
    nth_omp_max = gmx_omp_nthreads_get(emntDefault);
    ngpu        = hw_opt->gpu_opt.n_dev_use;

    /* Thread-MPI seems to have a bug with reduce on 1 node, so use a cond. */
    if (cr->nnodes + cr->npmenodes > 1)
    {
        int count[3], count_max[3];

        count[0] = -nth_omp_min;
        count[1] =  nth_omp_max;
        count[2] =  ngpu;

        MPI_Allreduce(count, count_max, 3, MPI_INT, MPI_MAX, cr->mpi_comm_mysim);

        /* In case of an inhomogeneous run setup we use the maximum counts */
        nth_omp_min = -count_max[0];
        nth_omp_max =  count_max[1];
        ngpu        =  count_max[2];
    }

    int nthreads_omp_mpi_ok_min;

    if (ngpu == 0)
    {
        nthreads_omp_mpi_ok_min = nthreads_omp_mpi_ok_min_cpu;
    }
    else
    {
        /* With GPUs we set the minimum number of OpenMP threads to 2 to catch
         * cases where the user specifies #ranks == #cores.
         */
        nthreads_omp_mpi_ok_min = nthreads_omp_mpi_ok_min_gpu;
    }

    if (DOMAINDECOMP(cr) && cr->nnodes > 1)
    {
        if (nth_omp_max < nthreads_omp_mpi_ok_min ||
            (!(ngpu > 0 && !gmx_gpu_sharing_supported()) &&
             nth_omp_max > nthreads_omp_mpi_ok_max))
        {
            /* Note that we print target_max here, not ok_max */
            sprintf(buf, "Your choice of number of MPI ranks and amount of resources results in using %d OpenMP threads per rank, which is most likely inefficient. The optimum is usually between %d and %d threads per rank.",
                    nth_omp_max,
                    nthreads_omp_mpi_ok_min,
                    nthreads_omp_mpi_target_max);

            if (bNtOmpOptionSet)
            {
                md_print_warn(cr, fplog, "NOTE: %s\n", buf);
            }
            else
            {
                /* This fatal error, and the one below, is nasty, but it's
                 * probably the only way to ensure that all users don't waste
                 * a lot of resources, since many users don't read logs/stderr.
                 */
                gmx_fatal(FARGS, "%s If you want to run with this setup, specify the -ntomp option. But we suggest to change the number of MPI ranks%s.", buf, mpi_option);
            }
        }
    }
    else
    {
        /* No domain decomposition (or only one domain) */
        if (!(ngpu > 0 && !gmx_gpu_sharing_supported()) &&
            nth_omp_max > nthreads_omp_faster(hwinfo->cpuid_info, ngpu > 0))
        {
            /* To arrive here, the user/system set #ranks and/or #OMPthreads */
            gmx_bool bEnvSet;
            char     buf2[256];

            bEnvSet = (getenv("OMP_NUM_THREADS") != NULL);

            if (bNtOmpOptionSet || bEnvSet)
            {
                sprintf(buf2, "You requested %d OpenMP threads", nth_omp_max);
            }
            else
            {
                sprintf(buf2, "Your choice of %d MPI rank%s and the use of %d total threads %sleads to the use of %d OpenMP threads",
                        cr->nnodes + cr->npmenodes,
                        cr->nnodes + cr->npmenodes == 1 ? "" : "s",
                        hw_opt->nthreads_tot > 0 ? hw_opt->nthreads_tot : hwinfo->nthreads_hw_avail,
                        hwinfo->nphysicalnode > 1 ? "on a node " : "",
                        nth_omp_max);
            }
            sprintf(buf, "%s, whereas we expect the optimum to be with more MPI ranks with %d to %d OpenMP threads.",
                    buf2, nthreads_omp_mpi_ok_min, nthreads_omp_mpi_target_max);

            /* We can not quit with a fatal error when OMP_NUM_THREADS is set
             * with different values per rank or node, since in that case
             * the user can not set -ntomp to override the error.
             */
            if (bNtOmpOptionSet || (bEnvSet && nth_omp_min != nth_omp_max))
            {
                md_print_warn(cr, fplog, "NOTE: %s\n", buf);
            }
            else
            {
                gmx_fatal(FARGS, "%s If you want to run with this many OpenMP threads, specify the -ntomp option. But we suggest to increase the number of MPI ranks%s.", buf, mpi_option);
            }
        }
    }
#else /* GMX_OPENMP && GMX_MPI */
      /* No OpenMP and/or MPI: it doesn't make much sense to check */
    GMX_UNUSED_VALUE(hw_opt);
    GMX_UNUSED_VALUE(bNtOmpOptionSet);
    /* Check if we have more than 1 physical core, if detected,
     * or more than 1 hardware thread if physical cores were not detected.
     */
#if !(defined GMX_OPENMP) && !(defined GMX_MPI)
    if ((hwinfo->ncore > 1) ||
        (hwinfo->ncore == 0 && hwinfo->nthreads_hw_avail > 1))
    {
        md_print_warn(cr, fplog, "NOTE: GROMACS was compiled without OpenMP and (thread-)MPI support, can only use a single CPU core\n");
    }
#else
    GMX_UNUSED_VALUE(hwinfo);
    GMX_UNUSED_VALUE(cr);
    GMX_UNUSED_VALUE(fplog);
#endif

#endif /* GMX_OPENMP && GMX_MPI */
}
Пример #22
0
static void init_adir(FILE *log, gmx_shellfc_t shfc,
                      gmx_constr_t constr, t_idef *idef, t_inputrec *ir,
                      t_commrec *cr, int dd_ac1,
                      gmx_int64_t step, t_mdatoms *md, int start, int end,
                      rvec *x_old, rvec *x_init, rvec *x,
                      rvec *f, rvec *acc_dir,
                      gmx_bool bMolPBC, matrix box,
                      real *lambda, real *dvdlambda, t_nrnb *nrnb)
{
    rvec           *xnold, *xnew;
    double          w_dt;
    int             gf, ga, gt;
    real            dt, scale;
    int             n, d;
    unsigned short *ptype;
    rvec            p, dx;

    if (DOMAINDECOMP(cr))
    {
        n = dd_ac1;
    }
    else
    {
        n = end - start;
    }
    if (n > shfc->adir_nalloc)
    {
        shfc->adir_nalloc = over_alloc_dd(n);
        srenew(shfc->adir_xnold, shfc->adir_nalloc);
        srenew(shfc->adir_xnew, shfc->adir_nalloc);
    }
    xnold = shfc->adir_xnold;
    xnew  = shfc->adir_xnew;

    ptype = md->ptype;

    dt = ir->delta_t;

    /* Does NOT work with freeze or acceleration groups (yet) */
    for (n = start; n < end; n++)
    {
        w_dt = md->invmass[n]*dt;

        for (d = 0; d < DIM; d++)
        {
            if ((ptype[n] != eptVSite) && (ptype[n] != eptShell))
            {
                xnold[n-start][d] = x[n][d] - (x_init[n][d] - x_old[n][d]);
                xnew[n-start][d]  = 2*x[n][d] - x_old[n][d] + f[n][d]*w_dt*dt;
            }
            else
            {
                xnold[n-start][d] = x[n][d];
                xnew[n-start][d]  = x[n][d];
            }
        }
    }
    constrain(log, FALSE, FALSE, constr, idef, ir, NULL, cr, step, 0, md,
              x, xnold-start, NULL, bMolPBC, box,
              lambda[efptBONDED], &(dvdlambda[efptBONDED]),
              NULL, NULL, nrnb, econqCoord, FALSE, 0, 0);
    constrain(log, FALSE, FALSE, constr, idef, ir, NULL, cr, step, 0, md,
              x, xnew-start, NULL, bMolPBC, box,
              lambda[efptBONDED], &(dvdlambda[efptBONDED]),
              NULL, NULL, nrnb, econqCoord, FALSE, 0, 0);

    for (n = start; n < end; n++)
    {
        for (d = 0; d < DIM; d++)
        {
            xnew[n-start][d] =
                -(2*x[n][d]-xnold[n-start][d]-xnew[n-start][d])/sqr(dt)
                - f[n][d]*md->invmass[n];
        }
        clear_rvec(acc_dir[n]);
    }

    /* Project the acceleration on the old bond directions */
    constrain(log, FALSE, FALSE, constr, idef, ir, NULL, cr, step, 0, md,
              x_old, xnew-start, acc_dir, bMolPBC, box,
              lambda[efptBONDED], &(dvdlambda[efptBONDED]),
              NULL, NULL, nrnb, econqDeriv_FlexCon, FALSE, 0, 0);
}
Пример #23
0
void do_force_lowlevel(t_forcerec *fr,      t_inputrec *ir,
                       t_idef     *idef,    t_commrec  *cr,
                       t_nrnb     *nrnb,    gmx_wallcycle_t wcycle,
                       t_mdatoms  *md,
                       rvec       x[],      history_t  *hist,
                       rvec       f[],
                       rvec       f_longrange[],
                       gmx_enerdata_t *enerd,
                       t_fcdata   *fcd,
                       gmx_localtop_t *top,
                       gmx_genborn_t *born,
                       gmx_bool       bBornRadii,
                       matrix     box,
                       t_lambda   *fepvals,
                       real       *lambda,
                       t_graph    *graph,
                       t_blocka   *excl,
                       rvec       mu_tot[],
                       int        flags,
                       float      *cycles_pme)
{
    int         i, j;
    int         donb_flags;
    gmx_bool    bSB;
    int         pme_flags;
    matrix      boxs;
    rvec        box_size;
    t_pbc       pbc;
    real        dvdl_dum[efptNR], dvdl_nb[efptNR];

#ifdef GMX_MPI
    double  t0 = 0.0, t1, t2, t3; /* time measurement for coarse load balancing */
#endif

    set_pbc(&pbc, fr->ePBC, box);

    /* reset free energy components */
    for (i = 0; i < efptNR; i++)
    {
        dvdl_nb[i]  = 0;
        dvdl_dum[i] = 0;
    }

    /* Reset box */
    for (i = 0; (i < DIM); i++)
    {
        box_size[i] = box[i][i];
    }

    debug_gmx();

    /* do QMMM first if requested */
    if (fr->bQMMM)
    {
        enerd->term[F_EQM] = calculate_QMMM(cr, x, f, fr);
    }

    /* Call the short range functions all in one go. */

#ifdef GMX_MPI
    /*#define TAKETIME ((cr->npmenodes) && (fr->timesteps < 12))*/
#define TAKETIME FALSE
    if (TAKETIME)
    {
        MPI_Barrier(cr->mpi_comm_mygroup);
        t0 = MPI_Wtime();
    }
#endif

    if (ir->nwall)
    {
        /* foreign lambda component for walls */
        real dvdl_walls = do_walls(ir, fr, box, md, x, f, lambda[efptVDW],
                                   enerd->grpp.ener[egLJSR], nrnb);
        enerd->dvdl_lin[efptVDW] += dvdl_walls;
    }

    /* If doing GB, reset dvda and calculate the Born radii */
    if (ir->implicit_solvent)
    {
        wallcycle_sub_start(wcycle, ewcsNONBONDED);

        for (i = 0; i < born->nr; i++)
        {
            fr->dvda[i] = 0;
        }

        if (bBornRadii)
        {
            calc_gb_rad(cr, fr, ir, top, x, &(fr->gblist), born, md, nrnb);
        }

        wallcycle_sub_stop(wcycle, ewcsNONBONDED);
    }

    where();
    /* We only do non-bonded calculation with group scheme here, the verlet
     * calls are done from do_force_cutsVERLET(). */
    if (fr->cutoff_scheme == ecutsGROUP && (flags & GMX_FORCE_NONBONDED))
    {
        donb_flags = 0;
        /* Add short-range interactions */
        donb_flags |= GMX_NONBONDED_DO_SR;

        /* Currently all group scheme kernels always calculate (shift-)forces */
        if (flags & GMX_FORCE_FORCES)
        {
            donb_flags |= GMX_NONBONDED_DO_FORCE;
        }
        if (flags & GMX_FORCE_VIRIAL)
        {
            donb_flags |= GMX_NONBONDED_DO_SHIFTFORCE;
        }
        if (flags & GMX_FORCE_ENERGY)
        {
            donb_flags |= GMX_NONBONDED_DO_POTENTIAL;
        }
        if (flags & GMX_FORCE_DO_LR)
        {
            donb_flags |= GMX_NONBONDED_DO_LR;
        }

        wallcycle_sub_start(wcycle, ewcsNONBONDED);
        do_nonbonded(fr, x, f, f_longrange, md, excl,
                     &enerd->grpp, nrnb,
                     lambda, dvdl_nb, -1, -1, donb_flags);

        /* If we do foreign lambda and we have soft-core interactions
         * we have to recalculate the (non-linear) energies contributions.
         */
        if (fepvals->n_lambda > 0 && (flags & GMX_FORCE_DHDL) && fepvals->sc_alpha != 0)
        {
            for (i = 0; i < enerd->n_lambda; i++)
            {
                real lam_i[efptNR];

                for (j = 0; j < efptNR; j++)
                {
                    lam_i[j] = (i == 0 ? lambda[j] : fepvals->all_lambda[j][i-1]);
                }
                reset_foreign_enerdata(enerd);
                do_nonbonded(fr, x, f, f_longrange, md, excl,
                             &(enerd->foreign_grpp), nrnb,
                             lam_i, dvdl_dum, -1, -1,
                             (donb_flags & ~GMX_NONBONDED_DO_FORCE) | GMX_NONBONDED_DO_FOREIGNLAMBDA);
                sum_epot(&(enerd->foreign_grpp), enerd->foreign_term);
                enerd->enerpart_lambda[i] += enerd->foreign_term[F_EPOT];
            }
        }
        wallcycle_sub_stop(wcycle, ewcsNONBONDED);
        where();
    }

    /* If we are doing GB, calculate bonded forces and apply corrections
     * to the solvation forces */
    /* MRS: Eventually, many need to include free energy contribution here! */
    if (ir->implicit_solvent)
    {
        wallcycle_sub_start(wcycle, ewcsLISTED);
        calc_gb_forces(cr, md, born, top, x, f, fr, idef,
                       ir->gb_algorithm, ir->sa_algorithm, nrnb, &pbc, graph, enerd);
        wallcycle_sub_stop(wcycle, ewcsLISTED);
    }

#ifdef GMX_MPI
    if (TAKETIME)
    {
        t1          = MPI_Wtime();
        fr->t_fnbf += t1-t0;
    }
#endif

    if (fepvals->sc_alpha != 0)
    {
        enerd->dvdl_nonlin[efptVDW] += dvdl_nb[efptVDW];
    }
    else
    {
        enerd->dvdl_lin[efptVDW] += dvdl_nb[efptVDW];
    }

    if (fepvals->sc_alpha != 0)

    /* even though coulomb part is linear, we already added it, beacuse we
       need to go through the vdw calculation anyway */
    {
        enerd->dvdl_nonlin[efptCOUL] += dvdl_nb[efptCOUL];
    }
    else
    {
        enerd->dvdl_lin[efptCOUL] += dvdl_nb[efptCOUL];
    }

    debug_gmx();


    if (debug)
    {
        pr_rvecs(debug, 0, "fshift after SR", fr->fshift, SHIFTS);
    }

    /* Shift the coordinates. Must be done before listed forces and PPPM,
     * but is also necessary for SHAKE and update, therefore it can NOT
     * go when no listed forces have to be evaluated.
     *
     * The shifting and PBC code is deliberately not timed, since with
     * the Verlet scheme it only takes non-zero time with triclinic
     * boxes, and even then the time is around a factor of 100 less
     * than the next smallest counter.
     */


    /* Here sometimes we would not need to shift with NBFonly,
     * but we do so anyhow for consistency of the returned coordinates.
     */
    if (graph)
    {
        shift_self(graph, box, x);
        if (TRICLINIC(box))
        {
            inc_nrnb(nrnb, eNR_SHIFTX, 2*graph->nnodes);
        }
        else
        {
            inc_nrnb(nrnb, eNR_SHIFTX, graph->nnodes);
        }
    }
    /* Check whether we need to do listed interactions or correct for exclusions */
    if (fr->bMolPBC &&
        ((flags & GMX_FORCE_LISTED)
         || EEL_RF(fr->eeltype) || EEL_FULL(fr->eeltype) || EVDW_PME(fr->vdwtype)))
    {
        /* TODO There are no electrostatics methods that require this
           transformation, when using the Verlet scheme, so update the
           above conditional. */
        /* Since all atoms are in the rectangular or triclinic unit-cell,
         * only single box vector shifts (2 in x) are required.
         */
        set_pbc_dd(&pbc, fr->ePBC, cr->dd, TRUE, box);
    }
    debug_gmx();

    do_force_listed(wcycle, box, ir->fepvals, cr->ms,
                    idef, (const rvec *) x, hist, f, fr,
                    &pbc, graph, enerd, nrnb, lambda, md, fcd,
                    DOMAINDECOMP(cr) ? cr->dd->gatindex : NULL,
                    flags);

    where();

    *cycles_pme = 0;
    clear_mat(fr->vir_el_recip);
    clear_mat(fr->vir_lj_recip);

    /* Do long-range electrostatics and/or LJ-PME, including related short-range
     * corrections.
     */
    if (EEL_FULL(fr->eeltype) || EVDW_PME(fr->vdwtype))
    {
        int  status            = 0;
        real Vlr_q             = 0, Vlr_lj = 0, Vcorr_q = 0, Vcorr_lj = 0;
        real dvdl_long_range_q = 0, dvdl_long_range_lj = 0;

        bSB = (ir->nwall == 2);
        if (bSB)
        {
            copy_mat(box, boxs);
            svmul(ir->wall_ewald_zfac, boxs[ZZ], boxs[ZZ]);
            box_size[ZZ] *= ir->wall_ewald_zfac;
        }

        if (EEL_PME_EWALD(fr->eeltype) || EVDW_PME(fr->vdwtype))
        {
            real dvdl_long_range_correction_q   = 0;
            real dvdl_long_range_correction_lj  = 0;
            /* With the Verlet scheme exclusion forces are calculated
             * in the non-bonded kernel.
             */
            /* The TPI molecule does not have exclusions with the rest
             * of the system and no intra-molecular PME grid
             * contributions will be calculated in
             * gmx_pme_calc_energy.
             */
            if ((ir->cutoff_scheme == ecutsGROUP && fr->n_tpi == 0) ||
                ir->ewald_geometry != eewg3D ||
                ir->epsilon_surface != 0)
            {
                int nthreads, t;

                wallcycle_sub_start(wcycle, ewcsEWALD_CORRECTION);

                if (fr->n_tpi > 0)
                {
                    gmx_fatal(FARGS, "TPI with PME currently only works in a 3D geometry with tin-foil boundary conditions");
                }

                nthreads = gmx_omp_nthreads_get(emntBonded);
#pragma omp parallel for num_threads(nthreads) schedule(static)
                for (t = 0; t < nthreads; t++)
                {
                    int     i;
                    rvec   *fnv;
                    tensor *vir_q, *vir_lj;
                    real   *Vcorrt_q, *Vcorrt_lj, *dvdlt_q, *dvdlt_lj;
                    if (t == 0)
                    {
                        fnv       = fr->f_novirsum;
                        vir_q     = &fr->vir_el_recip;
                        vir_lj    = &fr->vir_lj_recip;
                        Vcorrt_q  = &Vcorr_q;
                        Vcorrt_lj = &Vcorr_lj;
                        dvdlt_q   = &dvdl_long_range_correction_q;
                        dvdlt_lj  = &dvdl_long_range_correction_lj;
                    }
                    else
                    {
                        fnv       = fr->f_t[t].f;
                        vir_q     = &fr->f_t[t].vir_q;
                        vir_lj    = &fr->f_t[t].vir_lj;
                        Vcorrt_q  = &fr->f_t[t].Vcorr_q;
                        Vcorrt_lj = &fr->f_t[t].Vcorr_lj;
                        dvdlt_q   = &fr->f_t[t].dvdl[efptCOUL];
                        dvdlt_lj  = &fr->f_t[t].dvdl[efptVDW];
                        for (i = 0; i < fr->natoms_force; i++)
                        {
                            clear_rvec(fnv[i]);
                        }
                        clear_mat(*vir_q);
                        clear_mat(*vir_lj);
                    }
                    *dvdlt_q  = 0;
                    *dvdlt_lj = 0;

                    ewald_LRcorrection(fr->excl_load[t], fr->excl_load[t+1],
                                       cr, t, fr,
                                       md->chargeA, md->chargeB,
                                       md->sqrt_c6A, md->sqrt_c6B,
                                       md->sigmaA, md->sigmaB,
                                       md->sigma3A, md->sigma3B,
                                       md->nChargePerturbed || md->nTypePerturbed,
                                       ir->cutoff_scheme != ecutsVERLET,
                                       excl, x, bSB ? boxs : box, mu_tot,
                                       ir->ewald_geometry,
                                       ir->epsilon_surface,
                                       fnv, *vir_q, *vir_lj,
                                       Vcorrt_q, Vcorrt_lj,
                                       lambda[efptCOUL], lambda[efptVDW],
                                       dvdlt_q, dvdlt_lj);
                }
                if (nthreads > 1)
                {
                    reduce_thread_forces(fr->natoms_force, fr->f_novirsum,
                                         fr->vir_el_recip, fr->vir_lj_recip,
                                         &Vcorr_q, &Vcorr_lj,
                                         &dvdl_long_range_correction_q,
                                         &dvdl_long_range_correction_lj,
                                         nthreads, fr->f_t);
                }
                wallcycle_sub_stop(wcycle, ewcsEWALD_CORRECTION);
            }

            if (EEL_PME_EWALD(fr->eeltype) && fr->n_tpi == 0)
            {
                /* This is not in a subcounter because it takes a
                   negligible and constant-sized amount of time */
                Vcorr_q += ewald_charge_correction(cr, fr, lambda[efptCOUL], box,
                                                   &dvdl_long_range_correction_q,
                                                   fr->vir_el_recip);
            }

            enerd->dvdl_lin[efptCOUL] += dvdl_long_range_correction_q;
            enerd->dvdl_lin[efptVDW]  += dvdl_long_range_correction_lj;

            if ((EEL_PME(fr->eeltype) || EVDW_PME(fr->vdwtype)) && (cr->duty & DUTY_PME))
            {
                /* Do reciprocal PME for Coulomb and/or LJ. */
                assert(fr->n_tpi >= 0);
                if (fr->n_tpi == 0 || (flags & GMX_FORCE_STATECHANGED))
                {
                    pme_flags = GMX_PME_SPREAD | GMX_PME_SOLVE;
                    if (EEL_PME(fr->eeltype))
                    {
                        pme_flags     |= GMX_PME_DO_COULOMB;
                    }
                    if (EVDW_PME(fr->vdwtype))
                    {
                        pme_flags |= GMX_PME_DO_LJ;
                    }
                    if (flags & GMX_FORCE_FORCES)
                    {
                        pme_flags |= GMX_PME_CALC_F;
                    }
                    if (flags & GMX_FORCE_VIRIAL)
                    {
                        pme_flags |= GMX_PME_CALC_ENER_VIR;
                    }
                    if (fr->n_tpi > 0)
                    {
                        /* We don't calculate f, but we do want the potential */
                        pme_flags |= GMX_PME_CALC_POT;
                    }
                    wallcycle_start(wcycle, ewcPMEMESH);
                    status = gmx_pme_do(fr->pmedata,
                                        0, md->homenr - fr->n_tpi,
                                        x, fr->f_novirsum,
                                        md->chargeA, md->chargeB,
                                        md->sqrt_c6A, md->sqrt_c6B,
                                        md->sigmaA, md->sigmaB,
                                        bSB ? boxs : box, cr,
                                        DOMAINDECOMP(cr) ? dd_pme_maxshift_x(cr->dd) : 0,
                                        DOMAINDECOMP(cr) ? dd_pme_maxshift_y(cr->dd) : 0,
                                        nrnb, wcycle,
                                        fr->vir_el_recip, fr->ewaldcoeff_q,
                                        fr->vir_lj_recip, fr->ewaldcoeff_lj,
                                        &Vlr_q, &Vlr_lj,
                                        lambda[efptCOUL], lambda[efptVDW],
                                        &dvdl_long_range_q, &dvdl_long_range_lj, pme_flags);
                    *cycles_pme = wallcycle_stop(wcycle, ewcPMEMESH);
                    if (status != 0)
                    {
                        gmx_fatal(FARGS, "Error %d in reciprocal PME routine", status);
                    }
                    /* We should try to do as little computation after
                     * this as possible, because parallel PME synchronizes
                     * the nodes, so we want all load imbalance of the
                     * rest of the force calculation to be before the PME
                     * call.  DD load balancing is done on the whole time
                     * of the force call (without PME).
                     */
                }
                if (fr->n_tpi > 0)
                {
                    if (EVDW_PME(ir->vdwtype))
                    {

                        gmx_fatal(FARGS, "Test particle insertion not implemented with LJ-PME");
                    }
                    /* Determine the PME grid energy of the test molecule
                     * with the PME grid potential of the other charges.
                     */
                    gmx_pme_calc_energy(fr->pmedata, fr->n_tpi,
                                        x + md->homenr - fr->n_tpi,
                                        md->chargeA + md->homenr - fr->n_tpi,
                                        &Vlr_q);
                }
            }
        }

        if (!EEL_PME(fr->eeltype) && EEL_PME_EWALD(fr->eeltype))
        {
            Vlr_q = do_ewald(ir, x, fr->f_novirsum,
                             md->chargeA, md->chargeB,
                             box_size, cr, md->homenr,
                             fr->vir_el_recip, fr->ewaldcoeff_q,
                             lambda[efptCOUL], &dvdl_long_range_q, fr->ewald_table);
        }

        /* Note that with separate PME nodes we get the real energies later */
        enerd->dvdl_lin[efptCOUL] += dvdl_long_range_q;
        enerd->dvdl_lin[efptVDW]  += dvdl_long_range_lj;
        enerd->term[F_COUL_RECIP]  = Vlr_q + Vcorr_q;
        enerd->term[F_LJ_RECIP]    = Vlr_lj + Vcorr_lj;
        if (debug)
        {
            fprintf(debug, "Vlr_q = %g, Vcorr_q = %g, Vlr_corr_q = %g\n",
                    Vlr_q, Vcorr_q, enerd->term[F_COUL_RECIP]);
            pr_rvecs(debug, 0, "vir_el_recip after corr", fr->vir_el_recip, DIM);
            pr_rvecs(debug, 0, "fshift after LR Corrections", fr->fshift, SHIFTS);
            fprintf(debug, "Vlr_lj: %g, Vcorr_lj = %g, Vlr_corr_lj = %g\n",
                    Vlr_lj, Vcorr_lj, enerd->term[F_LJ_RECIP]);
            pr_rvecs(debug, 0, "vir_lj_recip after corr", fr->vir_lj_recip, DIM);
        }
    }
    else
    {
        /* Is there a reaction-field exclusion correction needed? */
        if (EEL_RF(fr->eeltype) && eelRF_NEC != fr->eeltype)
        {
            /* With the Verlet scheme, exclusion forces are calculated
             * in the non-bonded kernel.
             */
            if (ir->cutoff_scheme != ecutsVERLET)
            {
                real dvdl_rf_excl      = 0;
                enerd->term[F_RF_EXCL] =
                    RF_excl_correction(fr, graph, md, excl, x, f,
                                       fr->fshift, &pbc, lambda[efptCOUL], &dvdl_rf_excl);

                enerd->dvdl_lin[efptCOUL] += dvdl_rf_excl;
            }
        }
    }
    where();
    debug_gmx();

    if (debug)
    {
        print_nrnb(debug, nrnb);
    }
    debug_gmx();

#ifdef GMX_MPI
    if (TAKETIME)
    {
        t2 = MPI_Wtime();
        MPI_Barrier(cr->mpi_comm_mygroup);
        t3          = MPI_Wtime();
        fr->t_wait += t3-t2;
        if (fr->timesteps == 11)
        {
            char buf[22];
            fprintf(stderr, "* PP load balancing info: rank %d, step %s, rel wait time=%3.0f%% , load string value: %7.2f\n",
                    cr->nodeid, gmx_step_str(fr->timesteps, buf),
                    100*fr->t_wait/(fr->t_wait+fr->t_fnbf),
                    (fr->t_fnbf+fr->t_wait)/fr->t_fnbf);
        }
        fr->timesteps++;
    }
#endif

    if (debug)
    {
        pr_rvecs(debug, 0, "fshift after bondeds", fr->fshift, SHIFTS);
    }

}
Пример #24
0
gmx_bool pme_load_balance(pme_load_balancing_t pme_lb,
                          t_commrec           *cr,
                          FILE                *fp_err,
                          FILE                *fp_log,
                          t_inputrec          *ir,
                          t_state             *state,
                          double               cycles,
                          interaction_const_t *ic,
                          nonbonded_verlet_t  *nbv,
                          gmx_pme_t           *pmedata,
                          gmx_large_int_t      step)
{
    gmx_bool     OK;
    pme_setup_t *set;
    double       cycles_fast;
    char         buf[STRLEN], sbuf[22];
    real         rtab;
    gmx_bool     bUsesSimpleTables = TRUE;

    if (pme_lb->stage == pme_lb->nstage)
    {
        return FALSE;
    }

    if (PAR(cr))
    {
        gmx_sumd(1, &cycles, cr);
        cycles /= cr->nnodes;
    }

    set = &pme_lb->setup[pme_lb->cur];
    set->count++;

    rtab = ir->rlistlong + ir->tabext;

    if (set->count % 2 == 1)
    {
        /* Skip the first cycle, because the first step after a switch
         * is much slower due to allocation and/or caching effects.
         */
        return TRUE;
    }

    sprintf(buf, "step %4s: ", gmx_step_str(step, sbuf));
    print_grid(fp_err, fp_log, buf, "timed with", set, cycles);

    if (set->count <= 2)
    {
        set->cycles = cycles;
    }
    else
    {
        if (cycles*PME_LB_ACCEL_TOL < set->cycles &&
            pme_lb->stage == pme_lb->nstage - 1)
        {
            /* The performance went up a lot (due to e.g. DD load balancing).
             * Add a stage, keep the minima, but rescan all setups.
             */
            pme_lb->nstage++;

            if (debug)
            {
                fprintf(debug, "The performance for grid %d %d %d went from %.3f to %.1f M-cycles, this is more than %f\n"
                        "Increased the number stages to %d"
                        " and ignoring the previous performance\n",
                        set->grid[XX], set->grid[YY], set->grid[ZZ],
                        cycles*1e-6, set->cycles*1e-6, PME_LB_ACCEL_TOL,
                        pme_lb->nstage);
            }
        }
        set->cycles = min(set->cycles, cycles);
    }

    if (set->cycles < pme_lb->setup[pme_lb->fastest].cycles)
    {
        pme_lb->fastest = pme_lb->cur;

        if (DOMAINDECOMP(cr))
        {
            /* We found a new fastest setting, ensure that with subsequent
             * shorter cut-off's the dynamic load balancing does not make
             * the use of the current cut-off impossible. This solution is
             * a trade-off, as the PME load balancing and DD domain size
             * load balancing can interact in complex ways.
             * With the Verlet kernels, DD load imbalance will usually be
             * mainly due to bonded interaction imbalance, which will often
             * quickly push the domain boundaries beyond the limit for the
             * optimal, PME load balanced, cut-off. But it could be that
             * better overal performance can be obtained with a slightly
             * shorter cut-off and better DD load balancing.
             */
            change_dd_dlb_cutoff_limit(cr);
        }
    }
    cycles_fast = pme_lb->setup[pme_lb->fastest].cycles;

    /* Check in stage 0 if we should stop scanning grids.
     * Stop when the time is more than SLOW_FAC longer than the fastest.
     */
    if (pme_lb->stage == 0 && pme_lb->cur > 0 &&
        cycles > pme_lb->setup[pme_lb->fastest].cycles*PME_LB_SLOW_FAC)
    {
        pme_lb->n = pme_lb->cur + 1;
        /* Done with scanning, go to stage 1 */
        switch_to_stage1(pme_lb);
    }

    if (pme_lb->stage == 0)
    {
        int gridsize_start;

        gridsize_start = set->grid[XX]*set->grid[YY]*set->grid[ZZ];

        do
        {
            if (pme_lb->cur+1 < pme_lb->n)
            {
                /* We had already generated the next setup */
                OK = TRUE;
            }
            else
            {
                /* Find the next setup */
                OK = pme_loadbal_increase_cutoff(pme_lb, ir->pme_order);
            }

            if (OK && ir->ePBC != epbcNONE)
            {
                OK = (sqr(pme_lb->setup[pme_lb->cur+1].rlistlong)
                      <= max_cutoff2(ir->ePBC, state->box));
                if (!OK)
                {
                    pme_lb->elimited = epmelblimBOX;
                }
            }

            if (OK)
            {
                pme_lb->cur++;

                if (DOMAINDECOMP(cr))
                {
                    OK = change_dd_cutoff(cr, state, ir,
                                          pme_lb->setup[pme_lb->cur].rlistlong);
                    if (!OK)
                    {
                        /* Failed: do not use this setup */
                        pme_lb->cur--;
                        pme_lb->elimited = epmelblimDD;
                    }
                }
            }
            if (!OK)
            {
                /* We hit the upper limit for the cut-off,
                 * the setup should not go further than cur.
                 */
                pme_lb->n = pme_lb->cur + 1;
                print_loadbal_limited(fp_err, fp_log, step, pme_lb);
                /* Switch to the next stage */
                switch_to_stage1(pme_lb);
            }
        }
        while (OK &&
               !(pme_lb->setup[pme_lb->cur].grid[XX]*
                 pme_lb->setup[pme_lb->cur].grid[YY]*
                 pme_lb->setup[pme_lb->cur].grid[ZZ] <
                 gridsize_start*PME_LB_GRID_SCALE_FAC
                 &&
                 pme_lb->setup[pme_lb->cur].grid_efficiency <
                 pme_lb->setup[pme_lb->cur-1].grid_efficiency*PME_LB_GRID_EFFICIENCY_REL_FAC));
    }

    if (pme_lb->stage > 0 && pme_lb->end == 1)
    {
        pme_lb->cur   = 0;
        pme_lb->stage = pme_lb->nstage;
    }
    else if (pme_lb->stage > 0 && pme_lb->end > 1)
    {
        /* If stage = nstage-1:
         *   scan over all setups, rerunning only those setups
         *   which are not much slower than the fastest
         * else:
         *   use the next setup
         */
        do
        {
            pme_lb->cur++;
            if (pme_lb->cur == pme_lb->end)
            {
                pme_lb->stage++;
                pme_lb->cur = pme_lb->start;
            }
        }
        while (pme_lb->stage == pme_lb->nstage - 1 &&
               pme_lb->setup[pme_lb->cur].count > 0 &&
               pme_lb->setup[pme_lb->cur].cycles > cycles_fast*PME_LB_SLOW_FAC);

        if (pme_lb->stage == pme_lb->nstage)
        {
            /* We are done optimizing, use the fastest setup we found */
            pme_lb->cur = pme_lb->fastest;
        }
    }

    if (DOMAINDECOMP(cr) && pme_lb->stage > 0)
    {
        OK = change_dd_cutoff(cr, state, ir, pme_lb->setup[pme_lb->cur].rlistlong);
        if (!OK)
        {
            /* Failsafe solution */
            if (pme_lb->cur > 1 && pme_lb->stage == pme_lb->nstage)
            {
                pme_lb->stage--;
            }
            pme_lb->fastest  = 0;
            pme_lb->start    = 0;
            pme_lb->end      = pme_lb->cur;
            pme_lb->cur      = pme_lb->start;
            pme_lb->elimited = epmelblimDD;
            print_loadbal_limited(fp_err, fp_log, step, pme_lb);
        }
    }

    /* Change the Coulomb cut-off and the PME grid */

    set = &pme_lb->setup[pme_lb->cur];

    ic->rcoulomb   = set->rcut_coulomb;
    ic->rlist      = set->rlist;
    ic->rlistlong  = set->rlistlong;
    ir->nstcalclr  = set->nstcalclr;
    ic->ewaldcoeff = set->ewaldcoeff;

    bUsesSimpleTables = uses_simple_tables(ir->cutoff_scheme, nbv, 0);
    if (pme_lb->cutoff_scheme == ecutsVERLET &&
        nbv->grp[0].kernel_type == nbnxnk8x8x8_CUDA)
    {
        nbnxn_cuda_pme_loadbal_update_param(nbv->cu_nbv, ic);
    }
    else
    {
        init_interaction_const_tables(NULL, ic, bUsesSimpleTables,
                                      rtab);
    }

    if (pme_lb->cutoff_scheme == ecutsVERLET && nbv->ngrp > 1)
    {
        init_interaction_const_tables(NULL, ic, bUsesSimpleTables,
                                      rtab);
    }

    if (cr->duty & DUTY_PME)
    {
        if (pme_lb->setup[pme_lb->cur].pmedata == NULL)
        {
            /* Generate a new PME data structure,
             * copying part of the old pointers.
             */
            gmx_pme_reinit(&set->pmedata,
                           cr, pme_lb->setup[0].pmedata, ir,
                           set->grid);
        }
        *pmedata = set->pmedata;
    }
    else
    {
        /* Tell our PME-only node to switch grid */
        gmx_pme_send_switchgrid(cr, set->grid, set->ewaldcoeff);
    }

    if (debug)
    {
        print_grid(NULL, debug, "", "switched to", set, -1);
    }

    if (pme_lb->stage == pme_lb->nstage)
    {
        print_grid(fp_err, fp_log, "", "optimal", set, -1);
    }

    return TRUE;
}
Пример #25
0
void set_lincs(t_idef *idef,t_mdatoms *md,
               gmx_bool bDynamics,t_commrec *cr,
               struct gmx_lincsdata *li)
{
    int      start,natoms,nflexcon;
    t_blocka at2con;
    t_iatom  *iatom;
    int      i,k,ncc_alloc,ni,con,nconnect,concon;
    int      type,a1,a2;
    real     lenA=0,lenB;
    gmx_bool     bLocal;

    li->nc = 0;
    li->ncc = 0;
		
    /* This is the local topology, so there are only F_CONSTR constraints */
    if (idef->il[F_CONSTR].nr == 0)
    {
        /* There are no constraints,
         * we do not need to fill any data structures.
         */
        return;
    }
    
    if (debug)
    {
        fprintf(debug,"Building the LINCS connectivity\n");
    }
    
    if (DOMAINDECOMP(cr))
    {
        if (cr->dd->constraints)
        {
            dd_get_constraint_range(cr->dd,&start,&natoms);
        }
        else
        {
            natoms = cr->dd->nat_home;
        }
        start = 0;
    }
    else if(PARTDECOMP(cr))
	{
		pd_get_constraint_range(cr->pd,&start,&natoms);
	}
	else
    {
        start  = md->start;
        natoms = md->homenr;
    }
    at2con = make_at2con(start,natoms,idef->il,idef->iparams,bDynamics,
                         &nflexcon);

	
    if (idef->il[F_CONSTR].nr/3 > li->nc_alloc || li->nc_alloc == 0)
    {
        li->nc_alloc = over_alloc_dd(idef->il[F_CONSTR].nr/3);
        srenew(li->bllen0,li->nc_alloc);
        srenew(li->ddist,li->nc_alloc);
        srenew(li->bla,2*li->nc_alloc);
        srenew(li->blc,li->nc_alloc);
        srenew(li->blc1,li->nc_alloc);
        srenew(li->blnr,li->nc_alloc+1);
        srenew(li->bllen,li->nc_alloc);
        srenew(li->tmpv,li->nc_alloc);
        srenew(li->tmp1,li->nc_alloc);
        srenew(li->tmp2,li->nc_alloc);
        srenew(li->tmp3,li->nc_alloc);
        srenew(li->lambda,li->nc_alloc);
        if (li->ncg_triangle > 0)
        {
            /* This is allocating too much, but it is difficult to improve */
            srenew(li->triangle,li->nc_alloc);
            srenew(li->tri_bits,li->nc_alloc);
        }
    }
    
    iatom = idef->il[F_CONSTR].iatoms;
    
    ncc_alloc = li->ncc_alloc;
    li->blnr[0] = 0;
    
    ni = idef->il[F_CONSTR].nr/3;

    con = 0;
    nconnect = 0;
    li->blnr[con] = nconnect;
    for(i=0; i<ni; i++)
    {
        bLocal = TRUE;
        type = iatom[3*i];
        a1   = iatom[3*i+1];
        a2   = iatom[3*i+2];
        lenA = idef->iparams[type].constr.dA;
        lenB = idef->iparams[type].constr.dB;
        /* Skip the flexible constraints when not doing dynamics */
        if (bDynamics || lenA!=0 || lenB!=0)
        {
            li->bllen0[con]  = lenA;
            li->ddist[con]   = lenB - lenA;
            /* Set the length to the topology A length */
            li->bllen[con]   = li->bllen0[con];
            li->bla[2*con]   = a1;
            li->bla[2*con+1] = a2;
            /* Construct the constraint connection matrix blbnb */
            for(k=at2con.index[a1-start]; k<at2con.index[a1-start+1]; k++)
            {
                concon = at2con.a[k];
                if (concon != i)
                {
                    if (nconnect >= ncc_alloc)
                    {
                        ncc_alloc = over_alloc_small(nconnect+1);
                        srenew(li->blbnb,ncc_alloc);
                    }
                    li->blbnb[nconnect++] = concon;
                }
            }
            for(k=at2con.index[a2-start]; k<at2con.index[a2-start+1]; k++)
            {
                concon = at2con.a[k];
                if (concon != i)
                {
                    if (nconnect+1 > ncc_alloc)
                    {
                        ncc_alloc = over_alloc_small(nconnect+1);
                        srenew(li->blbnb,ncc_alloc);
                    }
                    li->blbnb[nconnect++] = concon;
                }
            }
            li->blnr[con+1] = nconnect;
            
            if (cr->dd == NULL)
            {
                /* Order the blbnb matrix to optimize memory access */
                qsort(&(li->blbnb[li->blnr[con]]),li->blnr[con+1]-li->blnr[con],
                      sizeof(li->blbnb[0]),int_comp);
            }
            /* Increase the constraint count */
            con++;
        }
    }
    
    done_blocka(&at2con);

    /* This is the real number of constraints,
     * without dynamics the flexible constraints are not present.
     */
    li->nc = con;
    
    li->ncc = li->blnr[con];
    if (cr->dd == NULL)
    {
        /* Since the matrix is static, we can free some memory */
        ncc_alloc = li->ncc;
        srenew(li->blbnb,ncc_alloc);
    }
    
    if (ncc_alloc > li->ncc_alloc)
    {
        li->ncc_alloc = ncc_alloc;
        srenew(li->blmf,li->ncc_alloc);
        srenew(li->blmf1,li->ncc_alloc);
        srenew(li->tmpncc,li->ncc_alloc);
    }
    
    if (debug)
    {
        fprintf(debug,"Number of constraints is %d, couplings %d\n",
                li->nc,li->ncc);
    }

    set_lincs_matrix(li,md->invmass,md->lambda);
}
Пример #26
0
static void make_cyl_refgrps(t_commrec *cr, struct pull_t *pull, t_mdatoms *md,
                             t_pbc *pbc, double t, rvec *x)
{
    /* The size and stride per coord for the reduction buffer */
    const int       stride = 9;
    int             c, i, ii, m, start, end;
    rvec            g_x, dx, dir;
    double          inv_cyl_r2;
    pull_comm_t    *comm;
    gmx_ga2la_t    *ga2la = NULL;

    comm = &pull->comm;

    if (comm->dbuf_cyl == NULL)
    {
        snew(comm->dbuf_cyl, pull->ncoord*stride);
    }

    if (cr && DOMAINDECOMP(cr))
    {
        ga2la = cr->dd->ga2la;
    }

    start = 0;
    end   = md->homenr;

    inv_cyl_r2 = 1.0/gmx::square(pull->params.cylinder_r);

    /* loop over all groups to make a reference group for each*/
    for (c = 0; c < pull->ncoord; c++)
    {
        pull_coord_work_t *pcrd;
        double             sum_a, wmass, wwmass;
        dvec               radf_fac0, radf_fac1;

        pcrd   = &pull->coord[c];

        sum_a  = 0;
        wmass  = 0;
        wwmass = 0;
        clear_dvec(radf_fac0);
        clear_dvec(radf_fac1);

        if (pcrd->params.eGeom == epullgCYL)
        {
            pull_group_work_t *pref, *pgrp, *pdyna;

            /* pref will be the same group for all pull coordinates */
            pref  = &pull->group[pcrd->params.group[0]];
            pgrp  = &pull->group[pcrd->params.group[1]];
            pdyna = &pull->dyna[c];
            copy_rvec(pcrd->vec, dir);
            pdyna->nat_loc = 0;

            /* We calculate distances with respect to the reference location
             * of this cylinder group (g_x), which we already have now since
             * we reduced the other group COM over the ranks. This resolves
             * any PBC issues and we don't need to use a PBC-atom here.
             */
            if (pcrd->params.rate != 0)
            {
                /* With rate=0, value_ref is set initially */
                pcrd->value_ref = pcrd->params.init + pcrd->params.rate*t;
            }
            for (m = 0; m < DIM; m++)
            {
                g_x[m] = pgrp->x[m] - pcrd->vec[m]*pcrd->value_ref;
            }

            /* loop over all atoms in the main ref group */
            for (i = 0; i < pref->params.nat; i++)
            {
                ii = pref->params.ind[i];
                if (ga2la)
                {
                    if (!ga2la_get_home(ga2la, pref->params.ind[i], &ii))
                    {
                        ii = -1;
                    }
                }
                if (ii >= start && ii < end)
                {
                    double dr2, dr2_rel, inp;
                    dvec   dr;

                    pbc_dx_aiuc(pbc, x[ii], g_x, dx);
                    inp = iprod(dir, dx);
                    dr2 = 0;
                    for (m = 0; m < DIM; m++)
                    {
                        /* Determine the radial components */
                        dr[m] = dx[m] - inp*dir[m];
                        dr2  += dr[m]*dr[m];
                    }
                    dr2_rel = dr2*inv_cyl_r2;

                    if (dr2_rel < 1)
                    {
                        double mass, weight, dweight_r;
                        dvec   mdw;

                        /* add to index, to sum of COM, to weight array */
                        if (pdyna->nat_loc >= pdyna->nalloc_loc)
                        {
                            pdyna->nalloc_loc = over_alloc_large(pdyna->nat_loc+1);
                            srenew(pdyna->ind_loc,    pdyna->nalloc_loc);
                            srenew(pdyna->weight_loc, pdyna->nalloc_loc);
                            srenew(pdyna->mdw,        pdyna->nalloc_loc);
                            srenew(pdyna->dv,         pdyna->nalloc_loc);
                        }
                        pdyna->ind_loc[pdyna->nat_loc] = ii;

                        mass      = md->massT[ii];
                        /* The radial weight function is 1-2x^2+x^4,
                         * where x=r/cylinder_r. Since this function depends
                         * on the radial component, we also get radial forces
                         * on both groups.
                         */
                        weight    = 1 + (-2 + dr2_rel)*dr2_rel;
                        dweight_r = (-4 + 4*dr2_rel)*inv_cyl_r2;
                        pdyna->weight_loc[pdyna->nat_loc] = weight;
                        sum_a    += mass*weight*inp;
                        wmass    += mass*weight;
                        wwmass   += mass*weight*weight;
                        dsvmul(mass*dweight_r, dr, mdw);
                        copy_dvec(mdw, pdyna->mdw[pdyna->nat_loc]);
                        /* Currently we only have the axial component of the
                         * distance (inp) up to an unkown offset. We add this
                         * offset after the reduction needs to determine the
                         * COM of the cylinder group.
                         */
                        pdyna->dv[pdyna->nat_loc] = inp;
                        for (m = 0; m < DIM; m++)
                        {
                            radf_fac0[m] += mdw[m];
                            radf_fac1[m] += mdw[m]*inp;
                        }
                        pdyna->nat_loc++;
                    }
                }
            }
        }
        comm->dbuf_cyl[c*stride+0] = wmass;
        comm->dbuf_cyl[c*stride+1] = wwmass;
        comm->dbuf_cyl[c*stride+2] = sum_a;
        comm->dbuf_cyl[c*stride+3] = radf_fac0[XX];
        comm->dbuf_cyl[c*stride+4] = radf_fac0[YY];
        comm->dbuf_cyl[c*stride+5] = radf_fac0[ZZ];
        comm->dbuf_cyl[c*stride+6] = radf_fac1[XX];
        comm->dbuf_cyl[c*stride+7] = radf_fac1[YY];
        comm->dbuf_cyl[c*stride+8] = radf_fac1[ZZ];
    }

    if (cr != NULL && PAR(cr))
    {
        /* Sum the contributions over the ranks */
        pull_reduce_double(cr, comm, pull->ncoord*stride, comm->dbuf_cyl);
    }

    for (c = 0; c < pull->ncoord; c++)
    {
        pull_coord_work_t *pcrd;

        pcrd  = &pull->coord[c];

        if (pcrd->params.eGeom == epullgCYL)
        {
            pull_group_work_t *pdyna, *pgrp;
            double             wmass, wwmass, dist;

            pdyna = &pull->dyna[c];
            pgrp  = &pull->group[pcrd->params.group[1]];

            wmass          = comm->dbuf_cyl[c*stride+0];
            wwmass         = comm->dbuf_cyl[c*stride+1];
            pdyna->mwscale = 1.0/wmass;
            /* Cylinder pulling can't be used with constraints, but we set
             * wscale and invtm anyhow, in case someone would like to use them.
             */
            pdyna->wscale  = wmass/wwmass;
            pdyna->invtm   = wwmass/(wmass*wmass);

            /* We store the deviation of the COM from the reference location
             * used above, since we need it when we apply the radial forces
             * to the atoms in the cylinder group.
             */
            pcrd->cyl_dev  = 0;
            for (m = 0; m < DIM; m++)
            {
                g_x[m]         = pgrp->x[m] - pcrd->vec[m]*pcrd->value_ref;
                dist           = -pcrd->vec[m]*comm->dbuf_cyl[c*stride+2]*pdyna->mwscale;
                pdyna->x[m]    = g_x[m] - dist;
                pcrd->cyl_dev += dist;
            }
            /* Now we know the exact COM of the cylinder reference group,
             * we can determine the radial force factor (ffrad) that when
             * multiplied with the axial pull force will give the radial
             * force on the pulled (non-cylinder) group.
             */
            for (m = 0; m < DIM; m++)
            {
                pcrd->ffrad[m] = (comm->dbuf_cyl[c*stride+6+m] +
                                  comm->dbuf_cyl[c*stride+3+m]*pcrd->cyl_dev)/wmass;
            }

            if (debug)
            {
                fprintf(debug, "Pull cylinder group %d:%8.3f%8.3f%8.3f m:%8.3f\n",
                        c, pdyna->x[0], pdyna->x[1],
                        pdyna->x[2], 1.0/pdyna->invtm);
                fprintf(debug, "ffrad %8.3f %8.3f %8.3f\n",
                        pcrd->ffrad[XX], pcrd->ffrad[YY], pcrd->ffrad[ZZ]);
            }
        }
    }
}
Пример #27
0
gmx_bool replica_exchange(FILE *fplog, const t_commrec *cr, struct gmx_repl_ex *re,
                          t_state *state, gmx_enerdata_t *enerd,
                          t_state *state_local, gmx_int64_t step, real time)
{
    int i, j;
    int replica_id = 0;
    int exchange_partner;
    int maxswap = 0;
    /* Number of rounds of exchanges needed to deal with any multiple
     * exchanges. */
    /* Where each replica ends up after the exchange attempt(s). */
    /* The order in which multiple exchanges will occur. */
    gmx_bool bThisReplicaExchanged = FALSE;

    if (MASTER(cr))
    {
        replica_id  = re->repl;
        test_for_replica_exchange(fplog, cr->ms, re, enerd, det(state_local->box), step, time);
        prepare_to_do_exchange(fplog, re->destinations, replica_id, re->nrepl, &maxswap,
                               re->order, re->cyclic, re->incycle, &bThisReplicaExchanged);
    }
    /* Do intra-simulation broadcast so all processors belonging to
     * each simulation know whether they need to participate in
     * collecting the state. Otherwise, they might as well get on with
     * the next thing to do. */
    if (DOMAINDECOMP(cr))
    {
#ifdef GMX_MPI
        MPI_Bcast(&bThisReplicaExchanged, sizeof(gmx_bool), MPI_BYTE, MASTERRANK(cr),
                  cr->mpi_comm_mygroup);
#endif
    }

    if (bThisReplicaExchanged)
    {
        /* Exchange the states */
        /* Collect the global state on the master node */
        if (DOMAINDECOMP(cr))
        {
            dd_collect_state(cr->dd, state_local, state);
        }
        else
        {
            copy_state_nonatomdata(state_local, state);
        }

        if (MASTER(cr))
        {
            /* There will be only one swap cycle with standard replica
             * exchange, but there may be multiple swap cycles if we
             * allow multiple swaps. */

            for (j = 0; j < maxswap; j++)
            {
                exchange_partner = re->order[replica_id][j];

                if (exchange_partner != replica_id)
                {
                    /* Exchange the global states between the master nodes */
                    if (debug)
                    {
                        fprintf(debug, "Exchanging %d with %d\n", replica_id, exchange_partner);
                    }
                    exchange_state(cr->ms, exchange_partner, state);
                }
            }
            /* For temperature-type replica exchange, we need to scale
             * the velocities. */
            if (re->type == ereTEMP || re->type == ereTL)
            {
                scale_velocities(state, sqrt(re->q[ereTEMP][replica_id]/re->q[ereTEMP][re->destinations[replica_id]]));
            }

        }

        /* With domain decomposition the global state is distributed later */
        if (!DOMAINDECOMP(cr))
        {
            /* Copy the global state to the local state data structure */
            copy_state_nonatomdata(state, state_local);
        }
    }

    return bThisReplicaExchanged;
}
Пример #28
0
void mdAlgorithmsSetupAtomData(t_commrec         *cr,
                               const t_inputrec  *ir,
                               const gmx_mtop_t  *top_global,
                               gmx_localtop_t    *top,
                               t_forcerec        *fr,
                               t_graph          **graph,
                               t_mdatoms         *mdatoms,
                               gmx_vsite_t       *vsite,
                               gmx_shellfc_t     *shellfc)
{
    bool  usingDomDec = DOMAINDECOMP(cr);

    int   numAtomIndex, numHomeAtoms;
    int  *atomIndex;

    if (usingDomDec)
    {
        numAtomIndex = dd_natoms_mdatoms(cr->dd);
        atomIndex    = cr->dd->gatindex;
        numHomeAtoms = cr->dd->nat_home;
    }
    else
    {
        numAtomIndex = -1;
        atomIndex    = NULL;
        numHomeAtoms = top_global->natoms;
    }
    atoms2md(top_global, ir, numAtomIndex, atomIndex, numHomeAtoms, mdatoms);

    if (usingDomDec)
    {
        dd_sort_local_top(cr->dd, mdatoms, top);
    }
    else
    {
        /* Currently gmx_generate_local_top allocates and returns a pointer.
         * We should implement a more elegant solution.
         */
        gmx_localtop_t *tmpTop;

        tmpTop = gmx_mtop_generate_local_top(top_global, ir->efep != efepNO);
        *top   = *tmpTop;
        sfree(tmpTop);
    }

    if (vsite)
    {
        if (usingDomDec)
        {
            /* The vsites were already assigned by the domdec topology code.
             * We only need to do the thread division here.
             */
            split_vsites_over_threads(top->idef.il, top->idef.iparams,
                                      mdatoms, FALSE, vsite);
        }
        else
        {
            set_vsite_top(vsite, top, mdatoms, cr);
        }
    }

    if (!usingDomDec && ir->ePBC != epbcNONE && !fr->bMolPBC)
    {
        GMX_ASSERT(graph != NULL, "We use a graph with PBC (no periodic mols) and without DD");

        *graph = mk_graph(NULL, &(top->idef), 0, top_global->natoms, FALSE, FALSE);
    }
    else if (graph != NULL)
    {
        *graph = NULL;
    }

    /* Note that with DD only flexible constraints, not shells, are supported
     * and these don't require setup in make_local_shells().
     */
    if (!usingDomDec && shellfc)
    {
        make_local_shells(cr, mdatoms, shellfc);
    }

    setup_bonded_threading(fr, &top->idef);
}
Пример #29
0
int relax_shell_flexcon(FILE *fplog, t_commrec *cr, gmx_bool bVerbose,
                        gmx_int64_t mdstep, t_inputrec *inputrec,
                        gmx_bool bDoNS, int force_flags,
                        gmx_localtop_t *top,
                        gmx_constr_t constr,
                        gmx_enerdata_t *enerd, t_fcdata *fcd,
                        t_state *state, rvec f[],
                        tensor force_vir,
                        t_mdatoms *md,
                        t_nrnb *nrnb, gmx_wallcycle_t wcycle,
                        t_graph *graph,
                        gmx_groups_t *groups,
                        struct gmx_shellfc *shfc,
                        t_forcerec *fr,
                        gmx_bool bBornRadii,
                        double t, rvec mu_tot,
                        gmx_bool *bConverged,
                        gmx_vsite_t *vsite,
                        FILE *fp_field)
{
    int        nshell;
    t_shell   *shell;
    t_idef    *idef;
    rvec      *pos[2], *force[2], *acc_dir = NULL, *x_old = NULL;
    real       Epot[2], df[2];
    rvec       dx;
    real       sf_dir, invdt;
    real       ftol, xiH, xiS, dum = 0;
    char       sbuf[22];
    gmx_bool   bCont, bInit;
    int        nat, dd_ac0, dd_ac1 = 0, i;
    int        start = 0, homenr = md->homenr, end = start+homenr, cg0, cg1;
    int        nflexcon, g, number_steps, d, Min = 0, count = 0;
#define  Try (1-Min)             /* At start Try = 1 */

    bCont        = (mdstep == inputrec->init_step) && inputrec->bContinuation;
    bInit        = (mdstep == inputrec->init_step) || shfc->bRequireInit;
    ftol         = inputrec->em_tol;
    number_steps = inputrec->niter;
    nshell       = shfc->nshell;
    shell        = shfc->shell;
    nflexcon     = shfc->nflexcon;

    idef = &top->idef;

    if (DOMAINDECOMP(cr))
    {
        nat = dd_natoms_vsite(cr->dd);
        if (nflexcon > 0)
        {
            dd_get_constraint_range(cr->dd, &dd_ac0, &dd_ac1);
            nat = max(nat, dd_ac1);
        }
    }
    else
    {
        nat = state->natoms;
    }

    if (nat > shfc->x_nalloc)
    {
        /* Allocate local arrays */
        shfc->x_nalloc = over_alloc_dd(nat);
        for (i = 0; (i < 2); i++)
        {
            srenew(shfc->x[i], shfc->x_nalloc);
            srenew(shfc->f[i], shfc->x_nalloc);
        }
    }
    for (i = 0; (i < 2); i++)
    {
        pos[i]   = shfc->x[i];
        force[i] = shfc->f[i];
    }

    /* When we had particle decomposition, this code only worked with
     * PD when all particles involved with each shell were in the same
     * charge group. Not sure if this is still relevant. */
    if (bDoNS && inputrec->ePBC != epbcNONE && !DOMAINDECOMP(cr))
    {
        /* This is the only time where the coordinates are used
         * before do_force is called, which normally puts all
         * charge groups in the box.
         */
        cg0 = 0;
        cg1 = top->cgs.nr;
        put_charge_groups_in_box(fplog, cg0, cg1, fr->ePBC, state->box,
                                 &(top->cgs), state->x, fr->cg_cm);
        if (graph)
        {
            mk_mshift(fplog, graph, fr->ePBC, state->box, state->x);
        }
    }

    /* After this all coordinate arrays will contain whole molecules */
    if (graph)
    {
        shift_self(graph, state->box, state->x);
    }

    if (nflexcon)
    {
        if (nat > shfc->flex_nalloc)
        {
            shfc->flex_nalloc = over_alloc_dd(nat);
            srenew(shfc->acc_dir, shfc->flex_nalloc);
            srenew(shfc->x_old, shfc->flex_nalloc);
        }
        acc_dir = shfc->acc_dir;
        x_old   = shfc->x_old;
        for (i = 0; i < homenr; i++)
        {
            for (d = 0; d < DIM; d++)
            {
                shfc->x_old[i][d] =
                    state->x[start+i][d] - state->v[start+i][d]*inputrec->delta_t;
            }
        }
    }

    /* Do a prediction of the shell positions */
    if (shfc->bPredict && !bCont)
    {
        predict_shells(fplog, state->x, state->v, inputrec->delta_t, nshell, shell,
                       md->massT, NULL, bInit);
    }

    /* do_force expected the charge groups to be in the box */
    if (graph)
    {
        unshift_self(graph, state->box, state->x);
    }

    /* Calculate the forces first time around */
    if (gmx_debug_at)
    {
        pr_rvecs(debug, 0, "x b4 do_force", state->x + start, homenr);
    }
    do_force(fplog, cr, inputrec, mdstep, nrnb, wcycle, top, groups,
             state->box, state->x, &state->hist,
             force[Min], force_vir, md, enerd, fcd,
             state->lambda, graph,
             fr, vsite, mu_tot, t, fp_field, NULL, bBornRadii,
             (bDoNS ? GMX_FORCE_NS : 0) | force_flags);

    sf_dir = 0;
    if (nflexcon)
    {
        init_adir(fplog, shfc,
                  constr, idef, inputrec, cr, dd_ac1, mdstep, md, start, end,
                  shfc->x_old-start, state->x, state->x, force[Min],
                  shfc->acc_dir-start,
                  fr->bMolPBC, state->box, state->lambda, &dum, nrnb);

        for (i = start; i < end; i++)
        {
            sf_dir += md->massT[i]*norm2(shfc->acc_dir[i-start]);
        }
    }

    Epot[Min] = enerd->term[F_EPOT];

    df[Min] = rms_force(cr, shfc->f[Min], nshell, shell, nflexcon, &sf_dir, &Epot[Min]);
    df[Try] = 0;
    if (debug)
    {
        fprintf(debug, "df = %g  %g\n", df[Min], df[Try]);
    }

    if (gmx_debug_at)
    {
        pr_rvecs(debug, 0, "force0", force[Min], md->nr);
    }

    if (nshell+nflexcon > 0)
    {
        /* Copy x to pos[Min] & pos[Try]: during minimization only the
         * shell positions are updated, therefore the other particles must
         * be set here.
         */
        memcpy(pos[Min], state->x, nat*sizeof(state->x[0]));
        memcpy(pos[Try], state->x, nat*sizeof(state->x[0]));
    }

    if (bVerbose && MASTER(cr))
    {
        print_epot(stdout, mdstep, 0, Epot[Min], df[Min], nflexcon, sf_dir);
    }

    if (debug)
    {
        fprintf(debug, "%17s: %14.10e\n",
                interaction_function[F_EKIN].longname, enerd->term[F_EKIN]);
        fprintf(debug, "%17s: %14.10e\n",
                interaction_function[F_EPOT].longname, enerd->term[F_EPOT]);
        fprintf(debug, "%17s: %14.10e\n",
                interaction_function[F_ETOT].longname, enerd->term[F_ETOT]);
        fprintf(debug, "SHELLSTEP %s\n", gmx_step_str(mdstep, sbuf));
    }

    /* First check whether we should do shells, or whether the force is
     * low enough even without minimization.
     */
    *bConverged = (df[Min] < ftol);

    for (count = 1; (!(*bConverged) && (count < number_steps)); count++)
    {
        if (vsite)
        {
            construct_vsites(vsite, pos[Min], inputrec->delta_t, state->v,
                             idef->iparams, idef->il,
                             fr->ePBC, fr->bMolPBC, cr, state->box);
        }

        if (nflexcon)
        {
            init_adir(fplog, shfc,
                      constr, idef, inputrec, cr, dd_ac1, mdstep, md, start, end,
                      x_old-start, state->x, pos[Min], force[Min], acc_dir-start,
                      fr->bMolPBC, state->box, state->lambda, &dum, nrnb);

            directional_sd(pos[Min], pos[Try], acc_dir-start, start, end,
                           fr->fc_stepsize);
        }

        /* New positions, Steepest descent */
        shell_pos_sd(pos[Min], pos[Try], force[Min], nshell, shell, count);

        /* do_force expected the charge groups to be in the box */
        if (graph)
        {
            unshift_self(graph, state->box, pos[Try]);
        }

        if (gmx_debug_at)
        {
            pr_rvecs(debug, 0, "RELAX: pos[Min]  ", pos[Min] + start, homenr);
            pr_rvecs(debug, 0, "RELAX: pos[Try]  ", pos[Try] + start, homenr);
        }
        /* Try the new positions */
        do_force(fplog, cr, inputrec, 1, nrnb, wcycle,
                 top, groups, state->box, pos[Try], &state->hist,
                 force[Try], force_vir,
                 md, enerd, fcd, state->lambda, graph,
                 fr, vsite, mu_tot, t, fp_field, NULL, bBornRadii,
                 force_flags);

        if (gmx_debug_at)
        {
            pr_rvecs(debug, 0, "RELAX: force[Min]", force[Min] + start, homenr);
            pr_rvecs(debug, 0, "RELAX: force[Try]", force[Try] + start, homenr);
        }
        sf_dir = 0;
        if (nflexcon)
        {
            init_adir(fplog, shfc,
                      constr, idef, inputrec, cr, dd_ac1, mdstep, md, start, end,
                      x_old-start, state->x, pos[Try], force[Try], acc_dir-start,
                      fr->bMolPBC, state->box, state->lambda, &dum, nrnb);

            for (i = start; i < end; i++)
            {
                sf_dir += md->massT[i]*norm2(acc_dir[i-start]);
            }
        }

        Epot[Try] = enerd->term[F_EPOT];

        df[Try] = rms_force(cr, force[Try], nshell, shell, nflexcon, &sf_dir, &Epot[Try]);

        if (debug)
        {
            fprintf(debug, "df = %g  %g\n", df[Min], df[Try]);
        }

        if (debug)
        {
            if (gmx_debug_at)
            {
                pr_rvecs(debug, 0, "F na do_force", force[Try] + start, homenr);
            }
            if (gmx_debug_at)
            {
                fprintf(debug, "SHELL ITER %d\n", count);
                dump_shells(debug, pos[Try], force[Try], ftol, nshell, shell);
            }
        }

        if (bVerbose && MASTER(cr))
        {
            print_epot(stdout, mdstep, count, Epot[Try], df[Try], nflexcon, sf_dir);
        }

        *bConverged = (df[Try] < ftol);

        if ((df[Try] < df[Min]))
        {
            if (debug)
            {
                fprintf(debug, "Swapping Min and Try\n");
            }
            if (nflexcon)
            {
                /* Correct the velocities for the flexible constraints */
                invdt = 1/inputrec->delta_t;
                for (i = start; i < end; i++)
                {
                    for (d = 0; d < DIM; d++)
                    {
                        state->v[i][d] += (pos[Try][i][d] - pos[Min][i][d])*invdt;
                    }
                }
            }
            Min  = Try;
        }
        else
        {
            decrease_step_size(nshell, shell);
        }
    }
    if (MASTER(cr) && !(*bConverged))
    {
        /* Note that the energies and virial are incorrect when not converged */
        if (fplog)
        {
            fprintf(fplog,
                    "step %s: EM did not converge in %d iterations, RMS force %.3f\n",
                    gmx_step_str(mdstep, sbuf), number_steps, df[Min]);
        }
        fprintf(stderr,
                "step %s: EM did not converge in %d iterations, RMS force %.3f\n",
                gmx_step_str(mdstep, sbuf), number_steps, df[Min]);
    }

    /* Copy back the coordinates and the forces */
    memcpy(state->x, pos[Min], nat*sizeof(state->x[0]));
    memcpy(f, force[Min], nat*sizeof(f[0]));

    return count;
}
void pme_loadbal_init(pme_load_balancing_t     **pme_lb_p,
                      t_commrec                 *cr,
                      FILE                      *fp_log,
                      const t_inputrec          *ir,
                      matrix                     box,
                      const interaction_const_t *ic,
                      struct gmx_pme_t          *pmedata,
                      gmx_bool                   bUseGPU,
                      gmx_bool                  *bPrinting)
{
    pme_load_balancing_t *pme_lb;
    real                  spm, sp;
    int                   d;

    snew(pme_lb, 1);

    pme_lb->bSepPMERanks  = !(cr->duty & DUTY_PME);

    /* Initially we turn on balancing directly on based on PP/PME imbalance */
    pme_lb->bTriggerOnDLB = FALSE;

    /* Any number of stages >= 2 is supported */
    pme_lb->nstage        = 2;

    pme_lb->cutoff_scheme = ir->cutoff_scheme;

    if (pme_lb->cutoff_scheme == ecutsVERLET)
    {
        pme_lb->rbuf_coulomb = ic->rlist - ic->rcoulomb;
        pme_lb->rbuf_vdw     = pme_lb->rbuf_coulomb;
    }
    else
    {
        if (ic->rcoulomb > ic->rlist)
        {
            pme_lb->rbuf_coulomb = ic->rlistlong - ic->rcoulomb;
        }
        else
        {
            pme_lb->rbuf_coulomb = ic->rlist - ic->rcoulomb;
        }
        if (ic->rvdw > ic->rlist)
        {
            pme_lb->rbuf_vdw = ic->rlistlong - ic->rvdw;
        }
        else
        {
            pme_lb->rbuf_vdw = ic->rlist - ic->rvdw;
        }
    }

    copy_mat(box, pme_lb->box_start);
    if (ir->ePBC == epbcXY && ir->nwall == 2)
    {
        svmul(ir->wall_ewald_zfac, pme_lb->box_start[ZZ], pme_lb->box_start[ZZ]);
    }

    pme_lb->n = 1;
    snew(pme_lb->setup, pme_lb->n);

    pme_lb->rcut_vdw                 = ic->rvdw;
    pme_lb->rcut_coulomb_start       = ir->rcoulomb;
    pme_lb->nstcalclr_start          = ir->nstcalclr;

    pme_lb->cur                      = 0;
    pme_lb->setup[0].rcut_coulomb    = ic->rcoulomb;
    pme_lb->setup[0].rlist           = ic->rlist;
    pme_lb->setup[0].rlistlong       = ic->rlistlong;
    pme_lb->setup[0].nstcalclr       = ir->nstcalclr;
    pme_lb->setup[0].grid[XX]        = ir->nkx;
    pme_lb->setup[0].grid[YY]        = ir->nky;
    pme_lb->setup[0].grid[ZZ]        = ir->nkz;
    pme_lb->setup[0].ewaldcoeff_q    = ic->ewaldcoeff_q;
    pme_lb->setup[0].ewaldcoeff_lj   = ic->ewaldcoeff_lj;

    pme_lb->setup[0].pmedata         = pmedata;

    spm = 0;
    for (d = 0; d < DIM; d++)
    {
        sp = norm(pme_lb->box_start[d])/pme_lb->setup[0].grid[d];
        if (sp > spm)
        {
            spm = sp;
        }
    }
    pme_lb->setup[0].spacing = spm;

    if (ir->fourier_spacing > 0)
    {
        pme_lb->cut_spacing = ir->rcoulomb/ir->fourier_spacing;
    }
    else
    {
        pme_lb->cut_spacing = ir->rcoulomb/pme_lb->setup[0].spacing;
    }

    pme_lb->stage = 0;

    pme_lb->fastest     = 0;
    pme_lb->lower_limit = 0;
    pme_lb->start       = 0;
    pme_lb->end         = 0;
    pme_lb->elimited    = epmelblimNO;

    pme_lb->cycles_n = 0;
    pme_lb->cycles_c = 0;

    /* Tune with GPUs and/or separate PME ranks.
     * When running only on a CPU without PME ranks, PME tuning will only help
     * with small numbers of atoms in the cut-off sphere.
     */
    pme_lb->bActive  = (wallcycle_have_counter() && (bUseGPU ||
                                                     pme_lb->bSepPMERanks));

    /* With GPUs and no separate PME ranks we can't measure the PP/PME
     * imbalance, so we start balancing right away.
     * Otherwise we only start balancing after we observe imbalance.
     */
    pme_lb->bBalance = (pme_lb->bActive && (bUseGPU && !pme_lb->bSepPMERanks));

    pme_lb->step_rel_stop = PMETunePeriod*ir->nstlist;

    /* Delay DD load balancing when GPUs are used */
    if (pme_lb->bActive && DOMAINDECOMP(cr) && cr->dd->nnodes > 1 && bUseGPU)
    {
        /* Lock DLB=auto to off (does nothing when DLB=yes/no.
         * With GPUs and separate PME nodes, we want to first
         * do PME tuning without DLB, since DLB might limit
         * the cut-off, which never improves performance.
         * We allow for DLB + PME tuning after a first round of tuning.
         */
        dd_dlb_lock(cr->dd);
        if (dd_dlb_is_locked(cr->dd))
        {
            md_print_warn(cr, fp_log, "NOTE: DLB will not turn on during the first phase of PME tuning\n");
        }
    }

    *pme_lb_p = pme_lb;

    *bPrinting = pme_lb->bBalance;
}