gmx_bool pme_load_balance(pme_load_balancing_t       pme_lb,
                          t_commrec                 *cr,
                          FILE                      *fp_err,
                          FILE                      *fp_log,
                          t_inputrec                *ir,
                          t_state                   *state,
                          double                     cycles,
                          interaction_const_t       *ic,
                          struct nonbonded_verlet_t *nbv,
                          struct gmx_pme_t **        pmedata,
                          gmx_int64_t                step)
{
    gmx_bool     OK;
    pme_setup_t *set;
    double       cycles_fast;
    char         buf[STRLEN], sbuf[22];
    real         rtab;
    gmx_bool     bUsesSimpleTables = TRUE;

    if (pme_lb->stage == pme_lb->nstage)
    {
        return FALSE;
    }

    if (PAR(cr))
    {
        gmx_sumd(1, &cycles, cr);
        cycles /= cr->nnodes;
    }

    set = &pme_lb->setup[pme_lb->cur];
    set->count++;

    rtab = ir->rlistlong + ir->tabext;

    if (set->count % 2 == 1)
    {
        /* Skip the first cycle, because the first step after a switch
         * is much slower due to allocation and/or caching effects.
         */
        return TRUE;
    }

    sprintf(buf, "step %4s: ", gmx_step_str(step, sbuf));
    print_grid(fp_err, fp_log, buf, "timed with", set, cycles);

    if (set->count <= 2)
    {
        set->cycles = cycles;
    }
    else
    {
        if (cycles*PME_LB_ACCEL_TOL < set->cycles &&
            pme_lb->stage == pme_lb->nstage - 1)
        {
            /* The performance went up a lot (due to e.g. DD load balancing).
             * Add a stage, keep the minima, but rescan all setups.
             */
            pme_lb->nstage++;

            if (debug)
            {
                fprintf(debug, "The performance for grid %d %d %d went from %.3f to %.1f M-cycles, this is more than %f\n"
                        "Increased the number stages to %d"
                        " and ignoring the previous performance\n",
                        set->grid[XX], set->grid[YY], set->grid[ZZ],
                        cycles*1e-6, set->cycles*1e-6, PME_LB_ACCEL_TOL,
                        pme_lb->nstage);
            }
        }
        set->cycles = min(set->cycles, cycles);
    }

    if (set->cycles < pme_lb->setup[pme_lb->fastest].cycles)
    {
        pme_lb->fastest = pme_lb->cur;

        if (DOMAINDECOMP(cr))
        {
            /* We found a new fastest setting, ensure that with subsequent
             * shorter cut-off's the dynamic load balancing does not make
             * the use of the current cut-off impossible. This solution is
             * a trade-off, as the PME load balancing and DD domain size
             * load balancing can interact in complex ways.
             * With the Verlet kernels, DD load imbalance will usually be
             * mainly due to bonded interaction imbalance, which will often
             * quickly push the domain boundaries beyond the limit for the
             * optimal, PME load balanced, cut-off. But it could be that
             * better overal performance can be obtained with a slightly
             * shorter cut-off and better DD load balancing.
             */
            change_dd_dlb_cutoff_limit(cr);
        }
    }
    cycles_fast = pme_lb->setup[pme_lb->fastest].cycles;

    /* Check in stage 0 if we should stop scanning grids.
     * Stop when the time is more than SLOW_FAC longer than the fastest.
     */
    if (pme_lb->stage == 0 && pme_lb->cur > 0 &&
        cycles > pme_lb->setup[pme_lb->fastest].cycles*PME_LB_SLOW_FAC)
    {
        pme_lb->n = pme_lb->cur + 1;
        /* Done with scanning, go to stage 1 */
        switch_to_stage1(pme_lb);
    }

    if (pme_lb->stage == 0)
    {
        int gridsize_start;

        gridsize_start = set->grid[XX]*set->grid[YY]*set->grid[ZZ];

        do
        {
            if (pme_lb->cur+1 < pme_lb->n)
            {
                /* We had already generated the next setup */
                OK = TRUE;
            }
            else
            {
                /* Find the next setup */
                OK = pme_loadbal_increase_cutoff(pme_lb, ir->pme_order, cr->dd);

                if (!OK)
                {
                    pme_lb->elimited = epmelblimPMEGRID;
                }
            }

            if (OK && ir->ePBC != epbcNONE)
            {
                OK = (sqr(pme_lb->setup[pme_lb->cur+1].rlistlong)
                      <= max_cutoff2(ir->ePBC, state->box));
                if (!OK)
                {
                    pme_lb->elimited = epmelblimBOX;
                }
            }

            if (OK)
            {
                pme_lb->cur++;

                if (DOMAINDECOMP(cr))
                {
                    OK = change_dd_cutoff(cr, state, ir,
                                          pme_lb->setup[pme_lb->cur].rlistlong);
                    if (!OK)
                    {
                        /* Failed: do not use this setup */
                        pme_lb->cur--;
                        pme_lb->elimited = epmelblimDD;
                    }
                }
            }
            if (!OK)
            {
                /* We hit the upper limit for the cut-off,
                 * the setup should not go further than cur.
                 */
                pme_lb->n = pme_lb->cur + 1;
                print_loadbal_limited(fp_err, fp_log, step, pme_lb);
                /* Switch to the next stage */
                switch_to_stage1(pme_lb);
            }
        }
        while (OK &&
               !(pme_lb->setup[pme_lb->cur].grid[XX]*
                 pme_lb->setup[pme_lb->cur].grid[YY]*
                 pme_lb->setup[pme_lb->cur].grid[ZZ] <
                 gridsize_start*PME_LB_GRID_SCALE_FAC
                 &&
                 pme_lb->setup[pme_lb->cur].grid_efficiency <
                 pme_lb->setup[pme_lb->cur-1].grid_efficiency*PME_LB_GRID_EFFICIENCY_REL_FAC));
    }

    if (pme_lb->stage > 0 && pme_lb->end == 1)
    {
        pme_lb->cur   = 0;
        pme_lb->stage = pme_lb->nstage;
    }
    else if (pme_lb->stage > 0 && pme_lb->end > 1)
    {
        /* If stage = nstage-1:
         *   scan over all setups, rerunning only those setups
         *   which are not much slower than the fastest
         * else:
         *   use the next setup
         */
        do
        {
            pme_lb->cur++;
            if (pme_lb->cur == pme_lb->end)
            {
                pme_lb->stage++;
                pme_lb->cur = pme_lb->start;
            }
        }
        while (pme_lb->stage == pme_lb->nstage - 1 &&
               pme_lb->setup[pme_lb->cur].count > 0 &&
               pme_lb->setup[pme_lb->cur].cycles > cycles_fast*PME_LB_SLOW_FAC);

        if (pme_lb->stage == pme_lb->nstage)
        {
            /* We are done optimizing, use the fastest setup we found */
            pme_lb->cur = pme_lb->fastest;
        }
    }

    if (DOMAINDECOMP(cr) && pme_lb->stage > 0)
    {
        OK = change_dd_cutoff(cr, state, ir, pme_lb->setup[pme_lb->cur].rlistlong);
        if (!OK)
        {
            /* Failsafe solution */
            if (pme_lb->cur > 1 && pme_lb->stage == pme_lb->nstage)
            {
                pme_lb->stage--;
            }
            pme_lb->fastest  = 0;
            pme_lb->start    = 0;
            pme_lb->end      = pme_lb->cur;
            pme_lb->cur      = pme_lb->start;
            pme_lb->elimited = epmelblimDD;
            print_loadbal_limited(fp_err, fp_log, step, pme_lb);
        }
    }

    /* Change the Coulomb cut-off and the PME grid */

    set = &pme_lb->setup[pme_lb->cur];

    ic->rcoulomb     = set->rcut_coulomb;
    ic->rlist        = set->rlist;
    ic->rlistlong    = set->rlistlong;
    ir->nstcalclr    = set->nstcalclr;
    ic->ewaldcoeff_q = set->ewaldcoeff_q;
    /* TODO: centralize the code that sets the potentials shifts */
    if (ic->coulomb_modifier == eintmodPOTSHIFT)
    {
        ic->sh_ewald = gmx_erfc(ic->ewaldcoeff_q*ic->rcoulomb);
    }
    if (EVDW_PME(ic->vdwtype))
    {
        /* We have PME for both Coulomb and VdW, set rvdw equal to rcoulomb */
        ic->rvdw            = set->rcut_coulomb;
        ic->ewaldcoeff_lj   = set->ewaldcoeff_lj;
        if (ic->vdw_modifier == eintmodPOTSHIFT)
        {
            real crc2;

            ic->dispersion_shift.cpot = -pow(ic->rvdw, -6.0);
            ic->repulsion_shift.cpot  = -pow(ic->rvdw, -12.0);
            ic->sh_invrc6             = -ic->dispersion_shift.cpot;
            crc2                      = sqr(ic->ewaldcoeff_lj*ic->rvdw);
            ic->sh_lj_ewald           = (exp(-crc2)*(1 + crc2 + 0.5*crc2*crc2) - 1)*pow(ic->rvdw, -6.0);
        }
    }

    bUsesSimpleTables = uses_simple_tables(ir->cutoff_scheme, nbv, 0);
    nbnxn_gpu_pme_loadbal_update_param(nbv, ic);

    /* With tMPI + GPUs some ranks may be sharing GPU(s) and therefore
     * also sharing texture references. To keep the code simple, we don't
     * treat texture references as shared resources, but this means that
     * the coulomb_tab texture ref will get updated by multiple threads.
     * Hence, to ensure that the non-bonded kernels don't start before all
     * texture binding operations are finished, we need to wait for all ranks
     * to arrive here before continuing.
     *
     * Note that we could omit this barrier if GPUs are not shared (or
     * texture objects are used), but as this is initialization code, there
     * is not point in complicating things.
     */
#ifdef GMX_THREAD_MPI
    if (PAR(cr) && use_GPU(nbv))
    {
        gmx_barrier(cr);
    }
#endif  /* GMX_THREAD_MPI */

    /* Usually we won't need the simple tables with GPUs.
     * But we do with hybrid acceleration and with free energy.
     * To avoid bugs, we always re-initialize the simple tables here.
     */
    init_interaction_const_tables(NULL, ic, bUsesSimpleTables, rtab);

    if (cr->duty & DUTY_PME)
    {
        if (pme_lb->setup[pme_lb->cur].pmedata == NULL)
        {
            /* Generate a new PME data structure,
             * copying part of the old pointers.
             */
            gmx_pme_reinit(&set->pmedata,
                           cr, pme_lb->setup[0].pmedata, ir,
                           set->grid);
        }
        *pmedata = set->pmedata;
    }
    else
    {
        /* Tell our PME-only node to switch grid */
        gmx_pme_send_switchgrid(cr, set->grid, set->ewaldcoeff_q, set->ewaldcoeff_lj);
    }

    if (debug)
    {
        print_grid(NULL, debug, "", "switched to", set, -1);
    }

    if (pme_lb->stage == pme_lb->nstage)
    {
        print_grid(fp_err, fp_log, "", "optimal", set, -1);
    }

    return TRUE;
}
Exemple #2
0
int init_mmcg(	int nfile,		// number of files
		const t_filenm fnm[],	// file names
		t_inputrec* ir, 	// input record and box stuff
		gmx_mtop_t *top_global,	// global topology
		int *allcgnr, 		// charge groups number
		int *allcgid[],		// charge groups ids
		int *allsolnr, 		// solvent groups number
		int *allsolid[],	// solvent groups ids
		t_commrec *cr		// communicators
		)
{

      char *mmcgf, *topf, line[STRLEN+1], strtmp[STRLEN+1], *pos;
      FILE *mmcgfp, *topfp;
      int k0,k=0,i,resnr;
      int icg,cg0,cg1;
      int allatresnr, *allatresid;

      t_block all_cgs;
      all_cgs = gmx_mtop_global_cgs(top_global); // index of cgs for whole system

      mmcgf = ftp2fn(efGMX,nfile,fnm);

      if((mmcgfp = fopen(mmcgf,"r"))==NULL)
	{ // opening-of mmcg-data failed
        fprintf(stderr, "ERROR : Impossible to open the mmcg-data file\n");
	return 1;
	}
      while (!feof(mmcgfp))
	{ // counting lines in mmcg-data file
          if (fgets (line,STRLEN+1,mmcgfp)) allatresnr++;
        }

      // reading mmcg data
      snew (allatresid,allatresnr);
      rewind (mmcgfp);
      for (i=0,k=0; i<allatresnr;i++) {
         fgets(line,STRLEN+1,mmcgfp);
	 if(line[0] == '#') { 
             if(strstr(line,"nstwtlist")) { // number of step for list regeneration
		pos = strchr(line,'='); pos++; 
                ir->mmcg.nstwtlist = atoi(pos);
	     }
	     else if (strstr(line,"shell1wt")) { // inner limit of water droplet
		pos = strchr(line,'='); pos++; 
                ir->mmcg.shell1wt = atoi(pos);
	     }
	     else if (strstr(line,"shell2wt")) { // outer limit of water droplet
		pos = strchr(line,'='); pos++; 
                ir->mmcg.shell2wt = atoi(pos);
	     }
	     continue;
	}
        // reading all-atoms resids
        allatresid[k] = atoi(line);
        if(allatresid[k] != 0) k++;
      } // end for, mmcg file scanning 

      allatresnr = k;
      if (cr->nnodes!=1) gmx_barrier(cr);
      fclose(mmcgfp);

      // printing all-atoms resids in the log file
      fprintf(log,"MM/CG : gmx_resids: %d\n", allatresnr);
      for(i=0; i<allatresnr;i++)
	fprintf (log, "  %d\n", allatresid[i]);
      fprintf(log,"\n");

      // internal enumeration
      for (i=0; i<allatresnr;i++) (allatresid[i])--;

      // allocating charge groups and solvent
      snew(*allcgid, ncg_mtop(top_global));
      int maxsol;

      // It seems that GROMACS did not store the correct number of solvent
      // molecules in top_global... At this point the value of nmol is more than 10 million!
      // => srenew() at the end of the routine when allsolnr will be computed
      int moltype_id=0;
      while (strcmp(top_global->moltype->name[moltype_id],"SOL")) {
	moltype_id++;
      }
      maxsol = top_global->molblock[moltype_id].nmol;
      MPI_Bcast(&maxsol,1,MPI_INT,0,cr->mpi_comm_mysim);
      
      snew(*allsolid, maxsol);
      // charge groups to monitor, first and last indices
      cg0 = 0;
      cg1 = ncg_mtop(top_global); 
      char **atomname_=NULL,**resname_=NULL; // we need these to get the information of atoms
	  				     // cf. mtop_utils.h

      for(icg=cg0; icg<cg1; icg++) {
	// get residue number and name of the cg's first atom
	k0 = all_cgs.index[icg];
	gmx_mtop_atominfo_global(top_global,k0,&atomname_,&resnr,&resname_);

	if (!strcmp(resname_,"SOL")) { // Water
	  (*allsolid)[*allsolnr] = icg;
	  (*allsolnr)++; // counting waters
	}
	for (i=0; i<allatresnr; i++) {
	  if (resnr == allatresid[i]) { // touché !
	    (*allcgid)[*allcgnr] = icg;
	    (*allcgnr)++; // counting cg
	  }
        }
      }
      if(cr->nnodes!=1) gmx_barrier(cr);
      return 0;
}
Exemple #3
0
void init_gamess(t_commrec *cr, t_QMrec *qm, t_MMrec *mm)
{
    /* it works hopelessly complicated :-)
     * first a file is written. Then the standard gamess input/output
     * routine is called (no system()!) to set up all fortran arrays.
     * this routine writes a punch file, like in a normal gamess run.
     * via this punch file the other games routines, needed for gradient
     * and energy evaluations are called. This setup works fine for
     * dynamics simulations. 7-6-2002 (London)
     */
    int
        i, j, rank;
    FILE
       *out;
    char
        periodic_system[37][3] = {
        "XX", "H ", "He", "Li", "Be", "B ", "C ", "N ",
        "O ", "F ", "Ne", "Na", "Mg", "Al", "Si", "P ",
        "S ", "Cl", "Ar", "K ", "Ca", "Sc", "Ti", "V ",
        "Cr", "Mn", "Fe", "Co", "Ni", "Cu", "Zn", "Ga",
        "Ge", "As", "Se", "Br", "Kr"
    };

    if (PAR(cr))
    {

        if (MASTER(cr))
        {
            out = fopen("FOR009", "w");
            /* of these options I am not completely sure....  the overall
             * preformance on more than 4 cpu's is rather poor at the moment.
             */
            fprintf(out, "memory 48000000\nPARALLEL IOMODE SCREENED\n");
            fprintf(out, "ELEC %d\nMULT %d\nSUPER ON\nNOSYM\nGEOMETRY ANGSTROM\n",
                    qm->nelectrons, qm->multiplicity);
            for (i = 0; i < qm->nrQMatoms; i++)
            {
#ifdef DOUBLE
                fprintf(out, "%10.7lf  %10.7lf  %10.7lf  %5.3lf  %2s\n",
                        i/2.,
                        i/3.,
                        i/4.,
                        qm->atomicnumberQM[i]*1.0,
                        periodic_system[qm->atomicnumberQM[i]]);
#else
                fprintf(out, "%10.7f  %10.7f  %10.7f  %5.3f  %2s\n",
                        i/2.,
                        i/3.,
                        i/4.,
                        qm->atomicnumberQM[i]*1.0,
                        periodic_system[qm->atomicnumberQM[i]]);
#endif
            }
            if (mm->nrMMatoms)
            {
                for (j = i; j < i+2; j++)
                {
#ifdef DOUBLE
                    fprintf(out, "%10.7lf  %10.7lf  %10.7lf  %5.3lf  BQ\n",
                            j/5.,
                            j/6.,
                            j/7.,
                            1.0);
#else
                    fprintf(out, "%10.7f  %10.7f  %10.7f  %5.3f  BQ\n",
                            j/5.,
                            j/6.,
                            j/7.,
                            2.0);
#endif
                }
            }
            if (!qm->bTS)
            {
                fprintf(out, "END\nBASIS %s\nRUNTYPE GRADIENT\nSCFTYPE %s\n",
                        eQMbasis_names[qm->QMbasis],
                        eQMmethod_names[qm->QMmethod]); /* see enum.h */
            }
            else
            {
                fprintf(out, "END\nBASIS %s\nRUNTYPE SADDLE\nSCFTYPE %s\n",
                        eQMbasis_names[qm->QMbasis],
                        eQMmethod_names[qm->QMmethod]); /* see enum.h */
            }
            fclose(out);
        }
        gmx_barrier(cr);
        F77_FUNC(inigms, IMIGMS) ();
    }
    else /* normal serial run */

    {
        out = fopen("FOR009", "w");
        /* of these options I am not completely sure....  the overall
         * preformance on more than 4 cpu's is rather poor at the moment.
         */
        fprintf(out, "ELEC %d\nMULT %d\nSUPER ON\nNOSYM\nGEOMETRY ANGSTROM\n",
                qm->nelectrons, qm->multiplicity);
        for (i = 0; i < qm->nrQMatoms; i++)
        {
#ifdef DOUBLE
            fprintf(out, "%10.7lf  %10.7lf  %10.7lf  %5.3lf  %2s\n",
                    i/2.,
                    i/3.,
                    i/4.,
                    qm->atomicnumberQM[i]*1.0,
                    periodic_system[qm->atomicnumberQM[i]]);
#else
            fprintf(out, "%10.7f  %10.7f  %10.7f  %5.3f  %2s\n",
                    i/2.,
                    i/3.,
                    i/4.,
                    qm->atomicnumberQM[i]*1.0,
                    periodic_system[qm->atomicnumberQM[i]]);
#endif
        }
        if (mm->nrMMatoms)
        {
            for (j = i; j < i+2; j++)
            {
#ifdef DOUBLE
                fprintf(out, "%10.7lf  %10.7lf  %10.7lf  %5.3lf  BQ\n",
                        j/5.,
                        j/6.,
                        j/7.,
                        1.0);
#else
                fprintf(out, "%10.7f  %10.7f  %10.7f  %5.3f  BQ\n",
                        j/5.,
                        j/6.,
                        j/7.,
                        2.0);
#endif
            }
        }
        if (!qm->bTS)
        {
            fprintf(out, "END\nBASIS %s\nRUNTYPE GRADIENT\nSCFTYPE %s\n",
                    eQMbasis_names[qm->QMbasis],
                    eQMmethod_names[qm->QMmethod]); /* see enum.h */
        }
        else
        {
            fprintf(out, "END\nBASIS %s\nRUNTYPE SADDLE\nSCFTYPE %s\n",
                    eQMbasis_names[qm->QMbasis],
                    eQMmethod_names[qm->QMmethod]); /* see enum.h */
        }
        F77_FUNC(inigms, IMIGMS) ();
    }
}
Exemple #4
0
void do_mmcg (int natoms,		// number of atoms in simulation
 	    t_inputrec   *inputrec, 	// input record and box stuff
	    t_mdatoms    *md,		// the atoms
	    t_state      *state,	// positions & velocities
	    gmx_mtop_t   *top,		// global topology
	    t_commrec    *cr,   	// communicators
	    rvec         *cg_cm,	// centre of mass of charge groups
	    int *allcgid, 		// charge groups ids
	    int allcgnr,		// charge groups number
	    int *allsolid,		// solvent groups ids
	    int allsolnr,		// solvent groups number
    	    FILE *log)			// logfile
{
 int i, j, p, q, qmin;
 real dvmod, shell2w2, dmin, d;
 rvec vecdist, dv, *v;
 shell2w2 = pow (inputrec->mmcg.shell2wt, 2.0);	// Threshold
 v = state->v;			               	// Velocities
 t_block cgs;					// charge groups
 cgs = gmx_mtop_global_cgs(top);
 rvec *all_cg_cm=NULL;
 snew(all_cg_cm,allcgnr);

 // for DD, we need to get cg_cm of all given charge groups (fr->cg_cm is local), 
 // the nearest one from a monitored water
 // may be more than one DD cell distant.
 if (DOMAINDECOMP(cr)) {
    if(cr->nnodes!=1) gmx_barrier(cr);
    for(i=0; i<allcgnr; i++) {
	int sender = 0,senderf,k;
	for(j=0; j<cr->dd->ncg_home; j++) { // is the cg a home cg ?
	   if(cr->dd->index_gl[j]==allcgid[i] ) {
		sender = cr->sim_nodeid;
		for(k=0;k<3;k++) all_cg_cm[i][k]=cg_cm[j][k];//FIXME improve ! compilation error when done with pointers
	   } 
	}
	MPI_Allreduce(&sender,&senderf,1,MPI_INT,MPI_SUM,cr->dd->mpi_comm_all);
	for(k=0;k<3;k++) MPI_Bcast(&all_cg_cm[i][k],sizeof(all_cg_cm[i][k]),MPI_BYTE,senderf,cr->dd->mpi_comm_all); //FIXME again !
    }
 }

 for(i=0; i<allsolnr; i++) {	// Loop over waters - START
    p = allsolid[i];
    if (PARTDECOMP(cr)) { // water i is in the node
        if((cgs.index[p]>=md->start) && (cgs.index[p]<(md->start+md->homenr))) {
      		for (j=0; j<allcgnr; j++) { // Looking for min dist (dmin)
		   q = allcgid[j];	    // and for the nearest charge group (qmin)
		   d = distance2(cg_cm[p],cg_cm[q]);
		   if(!j) 		{ dmin = d; qmin = q; } 
		   else if (d < dmin) 	{ dmin = d; qmin = q; }
        	}
	
      		if (dmin >= shell2w2) {	// Modifing velocity
		    rvec_sub(cg_cm[p], cg_cm[qmin], vecdist);
	 	    unitv (vecdist, vecdist);
		      for (j=cgs.index[p]; j<(3+cgs.index[p]); j++) {
	  		 dvmod = iprod (v[j], vecdist);
	  		 if (dvmod <= 0) continue;
	  		 svmul (2.0*dvmod, vecdist, dv);
	  		 rvec_dec (&v[j], dv); // Warning (=>?)				
		      }
      		}
	}
    }
    if (DOMAINDECOMP(cr)) { 
	int g_atnr; // global atom ID
	int l_atnr; // local atom ID in the DD cell
	int l_cgnr; // local charge group number

	for(g_atnr=cgs.index[p];g_atnr<=cgs.index[p]+2;g_atnr++) {
	    if(ga2la_get_home(cr->dd->ga2la,g_atnr,&l_atnr)) {// search in global to local lookup table 
							      // if the atom (not water) is in home atoms 
							      // and get local atom number
      		l_cgnr = cr->dd->la2lc[l_atnr]; // get local charge group number
	
		for (j=0; j<allcgnr; j++) { // Looking for min dist (dmin)
					    // and for the nearest charge group (qmin)
		   d = distance2(cg_cm[l_cgnr],all_cg_cm[j]);
		   if(!j) 		{ dmin = d; qmin = j; } 
		   else if (d < dmin) 	{ dmin = d; qmin = j; }
        	}

      		if (dmin >= shell2w2) {	// Modifing velocity
		    rvec_sub(cg_cm[l_cgnr], all_cg_cm[qmin], vecdist);
	 	    unitv (vecdist, vecdist);
	  	    dvmod = iprod (v[l_atnr], vecdist);
	  	    if (dvmod <= 0) continue;
	  	    svmul (2.0*dvmod, vecdist, dv);
	  	    rvec_dec (&v[l_atnr], dv);	// Warning (=>?)
      		}
	    }
	}
    }
  } // Loop over waters - END
  return;
}