C++ (Cpp) gmx_sum示例

编程语言: C++ (Cpp)

方法/功能: gmx_sum

hotexamples.com的示例: 15

C++ (Cpp) gmx_sum - 已找到15个示例。这些是从开源项目中提取的最受好评的gmx_sum现实C++ (Cpp)示例。您可以评价示例，以帮助我们提高示例质量。

示例#1

显示文件

文件： pullutil.c 项目： daniellandau/gromacs

static void pull_set_pbcatoms(t_commrec *cr, t_pull *pull,
                              t_mdatoms *md, rvec *x,
                              rvec *x_pbc)
{
    int g, n, m;

    n = 0;
    for (g = 0; g < pull->ngroup; g++)
    {
        if ((g == 0 && PULL_CYL(pull)) || pull->group[g].pbcatom == -1)
        {
            clear_rvec(x_pbc[g]);
        }
        else
        {
            pull_set_pbcatom(cr, &pull->group[g], md, x, x_pbc[g]);
            for (m = 0; m < DIM; m++)
            {
                if (pull->dim[m] == 0)
                {
                    x_pbc[g][m] = 0.0;
                }
            }
            n++;
        }
    }

    if (cr && PAR(cr) && n > 0)
    {
        /* Sum over the nodes to get x_pbc from the home node of pbcatom */
        gmx_sum(pull->ngroup*DIM, x_pbc[0], cr);
    }
}

示例#2

显示文件

文件： umbrella.c 项目： arrow50311/GMX504-ryan-v5.0

real Umbrella_Communicate(real Q_local,real *k_Q,real *Q_0)
{
  int i;
  int i_omp=gmx_omp_get_thread_num();
  // int n_omp=gmx_omp_get_num_procs(); // Wrong number
  int n_omp=udata.n_omp;
  // int i_mpi;
  static real Q_semilocal[UMB_MAX_OMP];
  real Q_global;

  if (n_omp>UMB_MAX_OMP) {
    fprintf(stderr,"Seg fault is probably about to happen because Q_semilocal is not big enough to accommodate %d omp threads. See %d in %s.\n",n_omp,__LINE__,__FILE__);
  }

  Q_semilocal[i_omp]=Q_local;
  #pragma omp barrier
  // #pragma omp master
  // {
  if (i_omp==0) {
    Q_local=0;
    // fprintf(stderr,"%f %f %f %f %f\n",Q_semilocal[0],Q_semilocal[1],Q_semilocal[2],Q_semilocal[3],Q_semilocal[4]);
    for (i=0; i<n_omp; i++) {
      Q_local+=Q_semilocal[i];
    }
    // fprintf(stderr,"%f %f %f %f %f\n",Q_semilocal[0],Q_semilocal[1],Q_semilocal[2],Q_semilocal[3],Q_semilocal[4]);
//    #ifdef GMX_DOUBLE
//    MPI_Allreduce(&Q_local,&Q_global,1,MPI_DOUBLE,MPI_SUM,MPI_COMM_WORLD);
//    #else
//    MPI_Allreduce(&Q_local,&Q_global,1,MPI_FLOAT, MPI_SUM,MPI_COMM_WORLD);
//    #endif
//    for (i=0; i<n_omp; i++) {
//      Q_semilocal[i]=Q_global;
//    }
    // gmx_sum declared in src/gromacs/legacyheaders/network.h (in main.h)
    #ifdef GMX_MPI
    gmx_sum(1,&Q_local,udata.cr);
    #endif
    for (i=0; i<n_omp; i++) {
      Q_semilocal[i]=Q_local;
    }
    // fprintf(stderr,"%f %f %f %f %f\n",Q_semilocal[0],Q_semilocal[1],Q_semilocal[2],Q_semilocal[3],Q_semilocal[4]);
  }
  #pragma omp barrier
  Q_global=Q_semilocal[i_omp];
  *k_Q=udata.k_Q;
  *Q_0=udata.Q_0+(udata.Q_init-udata.Q_0)*exp(-udata.step/udata.Q_steps);

  // #pragma omp master
  // {
  if (i_omp==0) {
    // MPI_Comm_rank(MPI_COMM_WORLD,&i_mpi);
    // if (i_mpi==0) { // udata.fp=NULL on other mpi processes.
      if ((udata.step % udata.freq)==0 && udata.fp!=NULL) {
        fprintf(udata.fp,"%d %g %g\n",udata.step,Q_global,0.5*(*k_Q)*(Q_global-(*Q_0))*(Q_global-(*Q_0)));
      }
    // }
  }

  return Q_global;
}

示例#3

显示文件

文件： pullutil.c 项目： jcfschulz/gromacs

static void pull_set_pbcatoms(t_commrec *cr, t_pull *pull,
                              rvec *x,
                              rvec *x_pbc)
{
    int g, n, m;

    n = 0;
    for (g = 0; g < pull->ngroup; g++)
    {
        if (!pull->group[g].bCalcCOM || pull->group[g].pbcatom == -1)
        {
            clear_rvec(x_pbc[g]);
        }
        else
        {
            pull_set_pbcatom(cr, &pull->group[g], x, x_pbc[g]);
            n++;
        }
    }

    if (cr && PAR(cr) && n > 0)
    {
        /* Sum over the nodes to get x_pbc from the home node of pbcatom */
        gmx_sum(pull->ngroup*DIM, x_pbc[0], cr);
    }
}

示例#4

显示文件

文件： groupcoord.c 项目： martinhoefling/gromacs

/* Assemble the positions of the group such that every node has all of them. 
 * The atom indices are retrieved from anrs_loc[0..nr_loc] 
 * Note that coll_ind[i] = i is needed in the serial case */
extern void communicate_group_positions(
        t_commrec  *cr, 
        rvec       *xcoll,        /* OUT: Collective array of positions */
        ivec       *shifts,       /* IN+OUT: Collective array of shifts for xcoll */
        ivec       *extra_shifts, /* BUF: Extra shifts since last time step */
        const gmx_bool bNS,       /* IN:  NS step, the shifts have changed */
        rvec       *x_loc,        /* IN:  Local positions on this node */ 
        const int  nr,            /* IN:  Total number of atoms in the group */
        const int  nr_loc,        /* IN:  Local number of atoms in the group */
        int        *anrs_loc,     /* IN:  Local atom numbers */
        int        *coll_ind,     /* IN:  Collective index */
        rvec       *xcoll_old,    /* IN+OUT: Positions from the last time step, used to make group whole */
        matrix     box)
{
    int i;


    /* Zero out the groups' global position array */
    clear_rvecs(nr, xcoll);

    /* Put the local positions that this node has into the right place of 
     * the collective array. Note that in the serial case, coll_ind[i] = i */
    for (i=0; i<nr_loc; i++)
        copy_rvec(x_loc[anrs_loc[i]], xcoll[coll_ind[i]]);

    if (PAR(cr))
    {
        /* Add the arrays from all nodes together */
        gmx_sum(nr*3, xcoll[0], cr);
    }
    /* To make the group whole, start with a whole group and each
     * step move the assembled positions at closest distance to the positions
     * from the last step. First shift the positions with the saved shift
     * vectors (these are 0 when this routine is called for the first time!) */
    shift_positions_group(box, xcoll, shifts, nr);

    /* Now check if some shifts changed since the last step.
     * This only needs to be done when the shifts are expected to have changed,
     * i.e. after neighboursearching */
    if (bNS)
    {
        get_shifts_group(3, box, xcoll, nr, xcoll_old, extra_shifts);

        /* Shift with the additional shifts such that we get a whole group now */
        shift_positions_group(box, xcoll, extra_shifts, nr);

        /* Add the shift vectors together for the next time step */
        for (i=0; i<nr; i++)
        {
            shifts[i][XX] += extra_shifts[i][XX];
            shifts[i][YY] += extra_shifts[i][YY];
            shifts[i][ZZ] += extra_shifts[i][ZZ];
        }

        /* Store current correctly-shifted positions for comparison in the next NS time step */
        for (i=0; i<nr; i++)
            copy_rvec(xcoll[i],xcoll_old[i]);
    }
}

示例#5

显示文件

文件： tgroup.c 项目： alejandrox1/gromacs_flatbottom

static void accumulate_ekin(t_commrec *cr,t_grpopts *opts,
			    gmx_ekindata_t *ekind)
{
  int g;

  if(PAR(cr))
    for(g=0; (g<opts->ngtc); g++) 
      gmx_sum(DIM*DIM,ekind->tcstat[g].ekin[0],cr);
}

示例#6

显示文件

文件： pullutil.cpp 项目： MelroLeandro/gromacs

static void pull_reduce_real(t_commrec   *cr,
                             pull_comm_t *comm,
                             int          n,
                             real        *data)
{
    if (cr != NULL && PAR(cr))
    {
        if (comm->bParticipateAll)
        {
            /* Sum the contributions over all DD ranks */
            gmx_sum(n, data, cr);
        }
        else
        {
#if GMX_MPI
#if MPI_IN_PLACE_EXISTS
            MPI_Allreduce(MPI_IN_PLACE, data, n, GMX_MPI_REAL, MPI_SUM,
                          comm->mpi_comm_com);
#else
            real *buf;

            snew(buf, n);

            MPI_Allreduce(data, buf, n, GMX_MPI_REAL, MPI_SUM,
                          comm->mpi_comm_com);

            /* Copy the result from the buffer to the input/output data */
            for (int i = 0; i < n; i++)
            {
                data[i] = buf[i];
            }
            sfree(buf);
#endif
#else
            gmx_incons("comm->bParticipateAll=FALSE without GMX_MPI");
#endif
        }
    }
}

示例#7

显示文件

文件： gmx_pme_error.c 项目： BradleyDickson/ABPenabledGROMACS

/* Estimate the reciprocal space part error of the SPME Ewald sum. */
static real estimate_reciprocal(
        t_inputinfo *info, 
        rvec x[],           /* array of particles */
        real q[],           /* array of charges */
        int nr,             /* number of charges = size of the charge array */
        FILE *fp_out,
        gmx_bool bVerbose,
        unsigned int seed,  /* The seed for the random number generator */
        int *nsamples,      /* Return the number of samples used if Monte Carlo
                             * algorithm is used for self energy error estimate */
        t_commrec *cr)
{
    real e_rec=0;   /* reciprocal error estimate */
    real e_rec1=0;  /* Error estimate term 1*/
    real e_rec2=0;  /* Error estimate term 2*/
    real e_rec3=0;  /* Error estimate term 3 */
    real e_rec3x=0; /* part of Error estimate term 3 in x */
    real e_rec3y=0; /* part of Error estimate term 3 in y */
    real e_rec3z=0; /* part of Error estimate term 3 in z */
    int i,ci;
    int nx,ny,nz;   /* grid coordinates */
    real q2_all=0;  /* sum of squared charges */
    rvec gridpx;    /* reciprocal grid point in x direction*/
    rvec gridpxy;   /* reciprocal grid point in x and y direction*/
    rvec gridp;     /* complete reciprocal grid point in 3 directions*/
    rvec tmpvec;    /* template to create points from basis vectors */
    rvec tmpvec2;   /* template to create points from basis vectors */
    real coeff=0;   /* variable to compute coefficients of the error estimate */
    real coeff2=0;   /* variable to compute coefficients of the error estimate */
    real tmp=0;     /* variables to compute different factors from vectors */
    real tmp1=0;
    real tmp2=0;
    gmx_bool bFraction;
    
    /* Random number generator */
    gmx_rng_t rng=NULL;
    int *numbers=NULL;

    /* Index variables for parallel work distribution */
    int startglobal,stopglobal;
    int startlocal, stoplocal;
    int x_per_core;
    int xtot;

#ifdef TAKETIME
    double t0=0.0;
    double t1=0.0;
#endif

    rng=gmx_rng_init(seed);

    clear_rvec(gridpx);
    clear_rvec(gridpxy);
    clear_rvec(gridp);
    clear_rvec(tmpvec);
    clear_rvec(tmpvec2);

    for(i=0;i<nr;i++)
    {
        q2_all += q[i]*q[i];
    }
    
    /* Calculate indices for work distribution */
    startglobal=-info->nkx[0]/2;
    stopglobal = info->nkx[0]/2;
    xtot = stopglobal*2+1;
    if (PAR(cr))
    {
        x_per_core = ceil((real)xtot / (real)cr->nnodes);
        startlocal = startglobal + x_per_core*cr->nodeid;
        stoplocal = startlocal + x_per_core -1;
        if (stoplocal > stopglobal)
             stoplocal = stopglobal;
    }
    else
    {
        startlocal = startglobal;
        stoplocal  = stopglobal;
        x_per_core = xtot;
    }
/*     
#ifdef GMX_LIB_MPI
    MPI_Barrier(MPI_COMM_WORLD);
#endif
*/

#ifdef GMX_LIB_MPI
#ifdef TAKETIME
    if (MASTER(cr))
        t0 = MPI_Wtime();
#endif
#endif
    
    if (MASTER(cr)){
                         
        fprintf(stderr, "Calculating reciprocal error part 1 ...");
        
    }

    for(nx=startlocal; nx<=stoplocal; nx++)
    {   
        svmul(nx,info->recipbox[XX],gridpx);
        for(ny=-info->nky[0]/2; ny<info->nky[0]/2+1; ny++)
        {
            svmul(ny,info->recipbox[YY],tmpvec);
            rvec_add(gridpx,tmpvec,gridpxy);
            for(nz=-info->nkz[0]/2; nz<info->nkz[0]/2+1; nz++)
            {
                if (  0 == nx &&  0 == ny &&  0 == nz )
                    continue;
                svmul(nz,info->recipbox[ZZ],tmpvec);
                rvec_add(gridpxy,tmpvec,gridp);
                tmp=norm2(gridp);
                coeff=exp(-1.0 * M_PI * M_PI * tmp / info->ewald_beta[0] / info->ewald_beta[0] ) ;
                coeff/= 2.0 * M_PI * info->volume * tmp;
                coeff2=tmp ;
                
                
                tmp=eps_poly2(nx,info->nkx[0],info->pme_order[0]);
                tmp+=eps_poly2(ny,info->nkx[0],info->pme_order[0]);
                tmp+=eps_poly2(nz,info->nkx[0],info->pme_order[0]);
                
                tmp1=eps_poly1(nx,info->nkx[0],info->pme_order[0]);
                tmp2=eps_poly1(ny,info->nky[0],info->pme_order[0]);
                
                tmp+=2.0 * tmp1 * tmp2;
                
                tmp1=eps_poly1(nz,info->nkz[0],info->pme_order[0]);
                tmp2=eps_poly1(ny,info->nky[0],info->pme_order[0]);
                
                tmp+=2.0 * tmp1 * tmp2;
                
                tmp1=eps_poly1(nz,info->nkz[0],info->pme_order[0]);
                tmp2=eps_poly1(nx,info->nkx[0],info->pme_order[0]);
                
                tmp+=2.0 * tmp1 * tmp2;
                
                tmp1=eps_poly1(nx,info->nkx[0],info->pme_order[0]);
                tmp1+=eps_poly1(ny,info->nky[0],info->pme_order[0]);
                tmp1+=eps_poly1(nz,info->nkz[0],info->pme_order[0]);
                
                tmp+= tmp1 * tmp1;
                
                e_rec1+= 32.0 * M_PI * M_PI * coeff * coeff * coeff2 * tmp  * q2_all * q2_all / nr ;

                tmp1=eps_poly3(nx,info->nkx[0],info->pme_order[0]);
                tmp1*=info->nkx[0];
                tmp2=iprod(gridp,info->recipbox[XX]);
                
                tmp=tmp1*tmp2;
                
                tmp1=eps_poly3(ny,info->nky[0],info->pme_order[0]);
                tmp1*=info->nky[0];
                tmp2=iprod(gridp,info->recipbox[YY]);
                
                tmp+=tmp1*tmp2;
                
                tmp1=eps_poly3(nz,info->nkz[0],info->pme_order[0]);
                tmp1*=info->nkz[0];
                tmp2=iprod(gridp,info->recipbox[ZZ]);
                
                tmp+=tmp1*tmp2;
                
                tmp*=4.0 * M_PI;
                
                tmp1=eps_poly4(nx,info->nkx[0],info->pme_order[0]);
                tmp1*=norm2(info->recipbox[XX]);
                tmp1*=info->nkx[0] * info->nkx[0];
                
                tmp+=tmp1;
                
                tmp1=eps_poly4(ny,info->nky[0],info->pme_order[0]);
                tmp1*=norm2(info->recipbox[YY]);
                tmp1*=info->nky[0] * info->nky[0];
                
                tmp+=tmp1;
                
                tmp1=eps_poly4(nz,info->nkz[0],info->pme_order[0]);
                tmp1*=norm2(info->recipbox[ZZ]);
                tmp1*=info->nkz[0] * info->nkz[0];
                
                tmp+=tmp1;
                
                e_rec2+= 4.0 * coeff * coeff * tmp * q2_all * q2_all / nr ;
                
            }
        }
        if (MASTER(cr))
            fprintf(stderr, "\rCalculating reciprocal error part 1 ... %3.0f%%", 100.0*(nx-startlocal+1)/(x_per_core));
        
    }

    if (MASTER(cr))
        fprintf(stderr, "\n");
    
    /* Use just a fraction of all charges to estimate the self energy error term? */
    bFraction =  (info->fracself > 0.0) && (info->fracself < 1.0);

    if (bFraction)
    {
        /* Here xtot is the number of samples taken for the Monte Carlo calculation
         * of the average of term IV of equation 35 in Wang2010. Round up to a
         * number of samples that is divisible by the number of nodes */
        x_per_core  = ceil(info->fracself * nr / (real)cr->nnodes);
        xtot = x_per_core * cr->nnodes;
    }
    else
    {
        /* In this case we use all nr particle positions */
        xtot = nr;
        x_per_core = ceil( (real)xtot / (real)cr->nnodes );
    }

    startlocal = x_per_core *  cr->nodeid;
    stoplocal  = min(startlocal + x_per_core, xtot);  /* min needed if xtot == nr */

    if (bFraction)
    {
        /* Make shure we get identical results in serial and parallel. Therefore,
         * take the sample indices from a single, global random number array that
         * is constructed on the master node and that only depends on the seed */
        snew(numbers, xtot);
        if (MASTER(cr))
        {
            for (i=0; i<xtot; i++)
            {
                numbers[i] = floor(gmx_rng_uniform_real(rng) * nr );
            }
        }
        /* Broadcast the random number array to the other nodes */
        if (PAR(cr))
        {
            nblock_bc(cr,xtot,numbers);
        }

        if (bVerbose && MASTER(cr))
        {
            fprintf(stdout, "Using %d sample%s to approximate the self interaction error term",
                    xtot, xtot==1?"":"s");
            if (PAR(cr))
                fprintf(stdout, " (%d sample%s per node)", x_per_core, x_per_core==1?"":"s");
            fprintf(stdout, ".\n");
        }
    }

    /* Return the number of positions used for the Monte Carlo algorithm */
    *nsamples = xtot;

    for(i=startlocal;i<stoplocal;i++)
    {
        e_rec3x=0;
        e_rec3y=0;
        e_rec3z=0;

        if (bFraction)
        {
            /* Randomly pick a charge */
            ci = numbers[i];
        }
        else
        {
            /* Use all charges */
            ci = i;
        }

        /* for(nx=startlocal; nx<=stoplocal; nx++)*/
        for(nx=-info->nkx[0]/2; nx<info->nkx[0]/2+1; nx++) 
        {   
            svmul(nx,info->recipbox[XX],gridpx);
            for(ny=-info->nky[0]/2; ny<info->nky[0]/2+1; ny++)
            {
                svmul(ny,info->recipbox[YY],tmpvec);
                rvec_add(gridpx,tmpvec,gridpxy);
                for(nz=-info->nkz[0]/2; nz<info->nkz[0]/2+1; nz++)
                {
                    
                    if (  0 == nx && 0 == ny && 0 == nz)
                        continue;
                    
                    svmul(nz,info->recipbox[ZZ],tmpvec);
                    rvec_add(gridpxy,tmpvec,gridp);
                    tmp=norm2(gridp);
                    coeff=exp(-1.0 * M_PI * M_PI * tmp / info->ewald_beta[0] / info->ewald_beta[0] );
                    coeff/= tmp ;
                    e_rec3x+=coeff*eps_self(nx,info->nkx[0],info->recipbox[XX],info->pme_order[0],x[ci]);
                    e_rec3y+=coeff*eps_self(ny,info->nky[0],info->recipbox[YY],info->pme_order[0],x[ci]);
                    e_rec3z+=coeff*eps_self(nz,info->nkz[0],info->recipbox[ZZ],info->pme_order[0],x[ci]);

                }
            }
        }

        clear_rvec(tmpvec2);

        svmul(e_rec3x,info->recipbox[XX],tmpvec);
        rvec_inc(tmpvec2,tmpvec);
        svmul(e_rec3y,info->recipbox[YY],tmpvec);
        rvec_inc(tmpvec2,tmpvec);
        svmul(e_rec3z,info->recipbox[ZZ],tmpvec);
        rvec_inc(tmpvec2,tmpvec);

        e_rec3 += q[ci]*q[ci]*q[ci]*q[ci]*norm2(tmpvec2) / ( xtot * M_PI * info->volume * M_PI * info->volume);
        if (MASTER(cr)){
            fprintf(stderr, "\rCalculating reciprocal error part 2 ... %3.0f%%",
                    100.0*(i+1)/stoplocal);

        }
    }

    if (MASTER(cr))
        fprintf(stderr, "\n");

#ifdef GMX_LIB_MPI
#ifdef TAKETIME
    if (MASTER(cr))
    {
        t1= MPI_Wtime() - t0;
        fprintf(fp_out, "Recip. err. est. took   : %lf s\n", t1);
    }
#endif
#endif
   
#ifdef DEBUG
    if (PAR(cr))
    {
        fprintf(stderr, "Node %3d: nx=[%3d...%3d]  e_rec3=%e\n", 
                cr->nodeid, startlocal, stoplocal, e_rec3);
    }
#endif

    if (PAR(cr))
    {
        gmx_sum(1,&e_rec1,cr);
        gmx_sum(1,&e_rec2,cr);
        gmx_sum(1,&e_rec3,cr);
    }
    
    /* e_rec1*=8.0 * q2_all / info->volume / info->volume / nr ;
       e_rec2*=  q2_all / M_PI / M_PI / info->volume / info->volume / nr ;
       e_rec3/= M_PI * M_PI * info->volume * info->volume * nr ; 
     */
    e_rec=sqrt(e_rec1+e_rec2+e_rec3);
    
    
    return ONE_4PI_EPS0 * e_rec;
}

示例#8

显示文件

文件： pmetest.c 项目： Ruyk/gromacs

static void do_my_pme(FILE *fp,real tm,gmx_bool bVerbose,t_inputrec *ir,
		      rvec x[],rvec xbuf[],rvec f[],
		      real charge[],real qbuf[],real qqbuf[],
		      matrix box,gmx_bool bSort,
		      t_commrec *cr,t_nsborder *nsb,t_nrnb *nrnb,
		      t_block *excl,real qtot,
		      t_forcerec *fr,int index[],FILE *fp_xvg,
		      int ngroups,unsigned short cENER[])
{
  real   ener,vcorr,q,xx,dvdl=0,vdip,vcharge;
  tensor vir,vir_corr,vir_tot;
  rvec   mu_tot[2];
  int    i,m,ii,ig,jg;
  real   **epme,*qptr;
  
  /* Initiate local variables */  
  fr->f_el_recip = f;
  clear_mat(vir);
  clear_mat(vir_corr);
  
  if (ngroups > 1) {
    fprintf(fp,"There are %d energy groups\n",ngroups);
    snew(epme,ngroups);
    for(i=0; (i<ngroups); i++)
      snew(epme[i],ngroups);
  }
    
  /* Put x is in the box, this part needs to be parallellized properly */
  /*put_atoms_in_box(box,nsb->natoms,x);*/
  /* Here sorting of X (and q) is done.
   * Alternatively, one could just put the atoms in one of the
   * cr->nnodes slabs. That is much cheaper than sorting.
   */
  for(i=0; (i<nsb->natoms); i++)
    index[i] = i;
  if (bSort) {
    xptr = x;
    qsort(index,nsb->natoms,sizeof(index[0]),comp_xptr);
    xptr = NULL; /* To trap unintentional use of the ptr */
  }
  /* After sorting we only need the part that is to be computed on 
   * this processor. We also compute the mu_tot here (system dipole)
   */
  clear_rvec(mu_tot[0]);
  for(i=START(nsb); (i<START(nsb)+HOMENR(nsb)); i++) {
    ii      = index[i];
    q       = charge[ii];
    qbuf[i] = q;
    for(m=0; (m<DIM); m++) {
      xx         = x[ii][m];
      xbuf[i][m] = xx;
      mu_tot[0][m] += q*xx;
    }
    clear_rvec(f[ii]);
  }
  copy_rvec(mu_tot[0],mu_tot[1]);
  if (debug) {
    pr_rvec(debug,0,"qbuf",qbuf,nsb->natoms,TRUE);
    pr_rvecs(debug,0,"xbuf",xbuf,nsb->natoms);
    pr_rvecs(debug,0,"box",box,DIM);
  }
  for(ig=0; (ig<ngroups); ig++) {
    for(jg=ig; (jg<ngroups); jg++) {
      if (ngroups > 1) {
	for(i=START(nsb); (i<START(nsb)+HOMENR(nsb)); i++) {
	  if ((cENER[i] == ig) || (cENER[i] == jg))
	    qqbuf[i] = qbuf[i];
	  else
	    qqbuf[i] = 0;
	}
	qptr = qqbuf;
      }
      else
	qptr = qbuf;
      ener  = do_pme(fp,bVerbose,ir,xbuf,f,qptr,qptr,box,cr,
		     nsb,nrnb,vir,fr->ewaldcoeff,FALSE,0,&dvdl,FALSE);
      vcorr = ewald_LRcorrection(fp,nsb,cr,fr,qptr,qptr,excl,xbuf,box,mu_tot,
				 ir->ewald_geometry,ir->epsilon_surface,
				 0,&dvdl,&vdip,&vcharge);
      gmx_sum(1,&ener,cr);
      gmx_sum(1,&vcorr,cr);
      if (ngroups > 1)
	epme[ig][jg] = ener+vcorr;
    }
  }
  if (ngroups > 1) {
    if (fp_xvg) 
      fprintf(fp_xvg,"%10.3f",tm);
    for(ig=0; (ig<ngroups); ig++) {
      for(jg=ig; (jg<ngroups); jg++) {
	if (ig != jg)
	  epme[ig][jg] -= epme[ig][ig]+epme[jg][jg];
	if (fp_xvg) 
	  fprintf(fp_xvg,"  %12.5e",epme[ig][jg]);
      }
    }
    if (fp_xvg) 
      fprintf(fp_xvg,"\n");
  }
  else {
    fprintf(fp,"Time: %10.3f Energy: %12.5e  Correction: %12.5e  Total: %12.5e\n",
	    tm,ener,vcorr,ener+vcorr);
    if (fp_xvg) 
      fprintf(fp_xvg,"%10.3f %12.5e %12.5e %12.5e\n",tm,ener+vcorr,vdip,vcharge);
    if (bVerbose) {
      m_add(vir,vir_corr,vir_tot);
      gmx_sum(9,vir_tot[0],cr);
      pr_rvecs(fp,0,"virial",vir_tot,DIM); 
    }
    fflush(fp);
  }
}

示例#9

显示文件

文件： sim_util.c 项目： Chadi-akel/cere

void do_force(FILE *log,t_commrec *cr,t_commrec *mcr,
	      t_parm *parm,t_nsborder *nsb,tensor vir_part,tensor pme_vir,
	      int step,t_nrnb *nrnb,t_topology *top,t_groups *grps,
	      rvec x[],rvec v[],rvec f[],rvec buf[],
	      t_mdatoms *mdatoms,real ener[],t_fcdata *fcd,bool bVerbose,
	      real lambda,t_graph *graph,
	      bool bNS,bool bNBFonly,t_forcerec *fr, rvec mu_tot,
	      bool bGatherOnly)
{
  static rvec box_size;
  static real dvdl_lr = 0;
  int    cg0,cg1,i,j;
  int    start,homenr;
  static real mu_and_q[DIM+1]; 
  real   qsum;
  
  start  = START(nsb);
  homenr = HOMENR(nsb);
  cg0    = CG0(nsb);
  cg1    = CG1(nsb);
  
  update_forcerec(log,fr,parm->box);

  /* Calculate total (local) dipole moment in a temporary common array. 
   * This makes it possible to sum them over nodes faster.
   */
  calc_mu_and_q(nsb,x,mdatoms->chargeT,mu_and_q,mu_and_q+DIM);

  if (fr->ePBC != epbcNONE) { 
    /* Compute shift vectors every step, because of pressure coupling! */
    if (parm->ir.epc != epcNO)
      calc_shifts(parm->box,box_size,fr->shift_vec);
    
    if (bNS) { 
      put_charge_groups_in_box(log,cg0,cg1,parm->box,box_size,
			       &(top->blocks[ebCGS]),x,fr->cg_cm);
      inc_nrnb(nrnb,eNR_RESETX,homenr);
    } else if (parm->ir.eI==eiSteep || parm->ir.eI==eiCG)
      unshift_self(graph,parm->box,x);

  }
  else if (bNS)
    calc_cgcm(log,cg0,cg1,&(top->blocks[ebCGS]),x,fr->cg_cm);
 
  if (bNS) {
    inc_nrnb(nrnb,eNR_CGCM,cg1-cg0);
    if (PAR(cr))
      move_cgcm(log,cr,fr->cg_cm,nsb->workload);
    if (debug)
      pr_rvecs(debug,0,"cgcm",fr->cg_cm,nsb->cgtotal);
  }
  
  /* Communicate coordinates and sum dipole and net charge if necessary */
  if (PAR(cr)) {
    move_x(log,cr->left,cr->right,x,nsb,nrnb);
    gmx_sum(DIM+1,mu_and_q,cr);
  }
  for(i=0;i<DIM;i++)
    mu_tot[i]=mu_and_q[i];
  qsum=mu_and_q[DIM];
  
  /* Reset energies */
  reset_energies(&(parm->ir.opts),grps,fr,bNS,ener);    
  if (bNS) {
    if (fr->ePBC != epbcNONE)
      /* Calculate intramolecular shift vectors to make molecules whole */
      mk_mshift(log,graph,parm->box,x);
	       
    /* Reset long range forces if necessary */
    if (fr->bTwinRange) {
      clear_rvecs(nsb->natoms,fr->f_twin);
      clear_rvecs(SHIFTS,fr->fshift_twin);
    }
    /* Do the actual neighbour searching and if twin range electrostatics
     * also do the calculation of long range forces and energies.
     */
    dvdl_lr = 0; 

    ns(log,fr,x,f,parm->box,grps,&(parm->ir.opts),top,mdatoms,
       cr,nrnb,nsb,step,lambda,&dvdl_lr);
  }
  /* Reset PME/Ewald forces if necessary */
  if (EEL_LR(fr->eeltype)) 
    clear_rvecs(homenr,fr->f_pme+start);
    
  /* Copy long range forces into normal buffers */
  if (fr->bTwinRange) {
    for(i=0; i<nsb->natoms; i++)
      copy_rvec(fr->f_twin[i],f[i]);
    for(i=0; i<SHIFTS; i++)
      copy_rvec(fr->fshift_twin[i],fr->fshift[i]);
  } 
  else {
    clear_rvecs(nsb->natoms,f);
    clear_rvecs(SHIFTS,fr->fshift);
  }
  
  /* Compute the forces */    
  force(log,step,fr,&(parm->ir),&(top->idef),nsb,cr,mcr,nrnb,grps,mdatoms,
	top->atoms.grps[egcENER].nr,&(parm->ir.opts),
	x,f,ener,fcd,bVerbose,parm->box,lambda,graph,&(top->atoms.excl),
	bNBFonly,pme_vir,mu_tot,qsum,bGatherOnly);
	
  /* Take long range contribution to free energy into account */
  ener[F_DVDL] += dvdl_lr;
  
#ifdef DEBUG
  if (bNS)
    print_nrnb(log,nrnb);
#endif

  /* The short-range virial from surrounding boxes */
  clear_mat(vir_part);
  calc_vir(log,SHIFTS,fr->shift_vec,fr->fshift,vir_part);
  inc_nrnb(nrnb,eNR_VIRIAL,SHIFTS);

  if (debug) 
    pr_rvecs(debug,0,"vir_shifts",vir_part,DIM);

  /* Compute forces due to electric field */
  calc_f_el(start,homenr,mdatoms->chargeT,f,parm->ir.ex);

  /* When using PME/Ewald we compute the long range virial (pme_vir) there.
   * otherwise we do it based on long range forces from twin range
   * cut-off based calculation (or not at all).
   */
  
  /* Communicate the forces */
  if (PAR(cr))
    move_f(log,cr->left,cr->right,f,buf,nsb,nrnb);
}

示例#10

显示文件

文件： gmx_pme_error.c 项目： andersx/gmx-debug

/* Estimate the reciprocal space part error of the SPME Ewald sum. */
static real estimate_reciprocal(
        t_inputinfo *info, 
        rvec x[],           /* array of particles */
        real q[],           /* array of charges */
        int nr,             /* number of charges = size of the charge array */
        FILE *fp_out,
        t_commrec *cr)
{
    real e_rec=0;   /* reciprocal error estimate */
    real e_rec1=0;  /* Error estimate term 1*/
    real e_rec2=0;  /* Error estimate term 2*/
    real e_rec3=0;  /* Error estimate term 3 */
    real e_rec3x=0; /* part of Error estimate term 3 in x */
    real e_rec3y=0; /* part of Error estimate term 3 in y */
    real e_rec3z=0; /* part of Error estimate term 3 in z */
    int i,ci;
    int nx,ny,nz;   /* grid coordinates */
    real q2_all=0;  /* sum of squared charges */
    rvec gridpx;    /* reciprocal grid point in x direction*/
    rvec gridpxy;   /* reciprocal grid point in x and y direction*/
    rvec gridp;     /* complete reciprocal grid point in 3 directions*/
    rvec tmpvec;    /* template to create points from basis vectors */
    rvec tmpvec2;   /* template to create points from basis vectors */
    real coeff=0;   /* variable to compute coefficients of the error estimate */
    real coeff2=0;   /* variable to compute coefficients of the error estimate */
    real tmp=0;     /* variables to compute different factors from vectors */
    real tmp1=0;
    real tmp2=0;
    real xtmp=0;
    real ytmp=0;
    real ztmp=0;
    double ewald_error;
    
    /* Random number generator */
    
    gmx_rng_t rng=NULL;
    /*rng=gmx_rng_init(gmx_rng_make_seed());  */

    /* Index variables for parallel work distribution */
    int startglobal,stopglobal;
    int startlocal, stoplocal;
    int x_per_core;
    int nrsamples;
    real xtot;    

/* #define TAKETIME */
#ifdef TAKETIME
    double t0=0.0;
    double t1=0.0;
    double t2=0.0;
#endif

    rng=gmx_rng_init(cr->nodeid);

    clear_rvec(gridpx);
    clear_rvec(gridpxy);
    clear_rvec(gridp);
    clear_rvec(tmpvec);
    clear_rvec(tmpvec2);

    for(i=0;i<nr;i++)
    {
        q2_all += q[i]*q[i];
    }
    
    /* Calculate indices for work distribution */
    startglobal=-info->nkx[0]/2;
    stopglobal = info->nkx[0]/2;
    xtot = stopglobal*2+1;
    if (PAR(cr))
    {
        x_per_core = ceil(xtot / cr->nnodes);
        startlocal = startglobal + x_per_core*cr->nodeid;
        stoplocal = startlocal + x_per_core -1;
        if (stoplocal > stopglobal)
             stoplocal = stopglobal;
    }
    else
    {
        startlocal = startglobal;
        stoplocal  = stopglobal;
        x_per_core = xtot;
    }
/*     
#ifdef GMX_MPI
    MPI_Barrier(MPI_COMM_WORLD);
#endif
*/

#ifdef TAKETIME
    if (MASTER(cr))
        t0 = MPI_Wtime();
#endif
    
    if (MASTER(cr)){
                         
        fprintf(stderr, "Calculating reciprocal error part 1 ...");
        
    }

    for(nx=startlocal; nx<=stoplocal; nx++)
    {   
        svmul(nx,info->recipbox[XX],gridpx);
        for(ny=-info->nky[0]/2; ny<info->nky[0]/2+1; ny++)
        {
            svmul(ny,info->recipbox[YY],tmpvec);
            rvec_add(gridpx,tmpvec,gridpxy);
            for(nz=-info->nkz[0]/2; nz<info->nkz[0]/2+1; nz++)
            {
                if (  0 == nx &&  0 == ny &&  0 == nz )
                    continue;
                svmul(nz,info->recipbox[ZZ],tmpvec);
                rvec_add(gridpxy,tmpvec,gridp);
                tmp=norm2(gridp);
                coeff=exp(-1.0 * M_PI * M_PI * tmp / info->ewald_beta[0] / info->ewald_beta[0] ) ;
                coeff/= 2.0 * M_PI * info->volume * tmp;
                coeff2=tmp ;
                
                
                tmp=eps_poly2(nx,info->nkx[0],info->pme_order[0]);
                tmp+=eps_poly2(ny,info->nkx[0],info->pme_order[0]);
                tmp+=eps_poly2(nz,info->nkx[0],info->pme_order[0]);
                
                tmp1=eps_poly1(nx,info->nkx[0],info->pme_order[0]);
                tmp2=eps_poly1(ny,info->nky[0],info->pme_order[0]);
                
                tmp+=2.0 * tmp1 * tmp2;
                
                tmp1=eps_poly1(nz,info->nkz[0],info->pme_order[0]);
                tmp2=eps_poly1(ny,info->nky[0],info->pme_order[0]);
                
                tmp+=2.0 * tmp1 * tmp2;
                
                tmp1=eps_poly1(nz,info->nkz[0],info->pme_order[0]);
                tmp2=eps_poly1(nx,info->nkx[0],info->pme_order[0]);
                
                tmp+=2.0 * tmp1 * tmp2;
                
                tmp1=eps_poly1(nx,info->nkx[0],info->pme_order[0]);
                tmp1+=eps_poly1(ny,info->nky[0],info->pme_order[0]);
                tmp1+=eps_poly1(nz,info->nkz[0],info->pme_order[0]);
                
                tmp+= tmp1 * tmp1;
                
                e_rec1+= 32.0 * M_PI * M_PI * coeff * coeff * coeff2 * tmp  * q2_all * q2_all / nr ;

                tmp1=eps_poly3(nx,info->nkx[0],info->pme_order[0]);
                tmp1*=info->nkx[0];
                tmp2=iprod(gridp,info->recipbox[XX]);
                
                tmp=tmp1*tmp2;
                
                tmp1=eps_poly3(ny,info->nky[0],info->pme_order[0]);
                tmp1*=info->nky[0];
                tmp2=iprod(gridp,info->recipbox[YY]);
                
                tmp+=tmp1*tmp2;
                
                tmp1=eps_poly3(nz,info->nkz[0],info->pme_order[0]);
                tmp1*=info->nkz[0];
                tmp2=iprod(gridp,info->recipbox[ZZ]);
                
                tmp+=tmp1*tmp2;
                
                tmp*=4.0 * M_PI;
                
                tmp1=eps_poly4(nx,info->nkx[0],info->pme_order[0]);
                tmp1*=norm2(info->recipbox[XX]);
                tmp1*=info->nkx[0] * info->nkx[0];
                
                tmp+=tmp1;
                
                tmp1=eps_poly4(ny,info->nky[0],info->pme_order[0]);
                tmp1*=norm2(info->recipbox[YY]);
                tmp1*=info->nky[0] * info->nky[0];
                
                tmp+=tmp1;
                
                tmp1=eps_poly4(nz,info->nkz[0],info->pme_order[0]);
                tmp1*=norm2(info->recipbox[ZZ]);
                tmp1*=info->nkz[0] * info->nkz[0];
                
                tmp+=tmp1;
                
                e_rec2+= 4.0 * coeff * coeff * tmp * q2_all * q2_all / nr ;
                
            }
        }
        if (MASTER(cr))
            fprintf(stderr, "\rCalculating reciprocal error part 1 ... %3.0f%%", 100.0*(nx-startlocal+1)/(x_per_core));
        
    }

/* 
#ifdef GMX_MPI
    MPI_Barrier(MPI_COMM_WORLD);
#endif
*/

    if (MASTER(cr))
        fprintf(stderr, "\n");
    if (info->fracself>0)
    {
        nrsamples=ceil(info->fracself*nr);
    }
    else
    {
        nrsamples=nr;
    }
    
    
    xtot=nrsamples;


    startglobal=0;
    stopglobal=nr;

    if(PAR(cr))
    {
        x_per_core=ceil(xtot/cr->nnodes);
        startlocal=startglobal+x_per_core*cr->nodeid;
        stoplocal=startglobal+x_per_core*(cr->nodeid+1);
        if (stoplocal>stopglobal)
            stoplocal=stopglobal;
    }
    else
    {
        startlocal=startglobal;
        stoplocal=stopglobal;
        x_per_core=xtot;
    }



    for(i=startlocal;i<stoplocal;i++)
    {
        e_rec3x=0;
        e_rec3y=0;
        e_rec3z=0;

        if (info->fracself<0) {
            ci=i;
        }else {
            ci=floor(gmx_rng_uniform_real(rng) * nr );
            if (ci==nr)
            {
                ci=nr-1;
            }
        }

        /* for(nx=startlocal; nx<=stoplocal; nx++)*/
        for(nx=-info->nkx[0]/2; nx<info->nkx[0]/2+1; nx++) 
        {   
            svmul(nx,info->recipbox[XX],gridpx);
            for(ny=-info->nky[0]/2; ny<info->nky[0]/2+1; ny++)
            {
                svmul(ny,info->recipbox[YY],tmpvec);
                rvec_add(gridpx,tmpvec,gridpxy);
                for(nz=-info->nkz[0]/2; nz<info->nkz[0]/2+1; nz++)
                {
                    
                    if (  0 == nx && 0 == ny && 0 == nz)
                        continue;
                    
                    svmul(nz,info->recipbox[ZZ],tmpvec);
                    rvec_add(gridpxy,tmpvec,gridp);
                    tmp=norm2(gridp);
                    coeff=exp(-1.0 * M_PI * M_PI * tmp / info->ewald_beta[0] / info->ewald_beta[0] );
                    coeff/= tmp ;
                    e_rec3x+=coeff*eps_self(nx,info->nkx[0],info->recipbox[XX],info->pme_order[0],x[ci]);
                    e_rec3y+=coeff*eps_self(ny,info->nky[0],info->recipbox[YY],info->pme_order[0],x[ci]);
                    e_rec3z+=coeff*eps_self(nz,info->nkz[0],info->recipbox[ZZ],info->pme_order[0],x[ci]);

                }
            }
        }

        clear_rvec(tmpvec2);

        svmul(e_rec3x,info->recipbox[XX],tmpvec);
        rvec_inc(tmpvec2,tmpvec);
        svmul(e_rec3y,info->recipbox[YY],tmpvec);
        rvec_inc(tmpvec2,tmpvec);
        svmul(e_rec3z,info->recipbox[ZZ],tmpvec);
        rvec_inc(tmpvec2,tmpvec);

        e_rec3 += q[ci]*q[ci]*q[ci]*q[ci]*norm2(tmpvec2) / ( nrsamples * M_PI * info->volume * M_PI * info->volume); 
        if (MASTER(cr)){
            fprintf(stderr, "\rCalculating reciprocal error part 2 ... %3.0f%%",
                    100.0*(i+1)/stoplocal);

        }
       
    }

    if (MASTER(cr))
        fprintf(stderr, "\n");


#ifdef TAKETIME
    if (MASTER(cr))
    {
        t1= MPI_Wtime() - t0;
        fprintf(fp_out, "Recip. err. est. took   : %lf s\n", t1);
    }
#endif
   
#ifdef DEBUG
    if (PAR(cr))
    {
        fprintf(stderr, "Node %3d: nx=[%3d...%3d]  e_rec3=%e\n", 
                cr->nodeid, startlocal, stoplocal, e_rec3);
    }
#endif
   

/*
#ifdef GMX_MPI
    MPI_Barrier(MPI_COMM_WORLD);
#endif
  */ 

#ifdef TAKETIME
    if (MASTER(cr))
    {
        t2= MPI_Wtime() - t0;
        fprintf(fp_out, "barrier   : %lf s\n", t2-t1);
    }
#endif

    if (PAR(cr))
    {
        gmx_sum(1,&e_rec1,cr);
        gmx_sum(1,&e_rec2,cr);
        gmx_sum(1,&e_rec3,cr);
    }
    
#ifdef TAKETIME
    if (MASTER(cr))
        fprintf(fp_out, "final reduce : %lf s\n", MPI_Wtime() - t0-t2);
#endif
    /* e_rec1*=8.0 * q2_all / info->volume / info->volume / nr ;
       e_rec2*=  q2_all / M_PI / M_PI / info->volume / info->volume / nr ;
       e_rec3/= M_PI * M_PI * info->volume * info->volume * nr ; 
     */
    e_rec=sqrt(e_rec1+e_rec2+e_rec3);
    
    
    return ONE_4PI_EPS0 * e_rec;
}

示例#11

显示文件

文件： genborn_sse2_double.c 项目： alexholehouse/gromacs

int 
calc_gb_rad_still_sse2_double(t_commrec *cr, t_forcerec *fr,
                              int natoms, gmx_localtop_t *top,
                              const t_atomtypes *atype, double *x, t_nblist *nl,
                              gmx_genborn_t *born)
{
	int i,k,n,ii,is3,ii3,nj0,nj1,offset;
	int jnrA,jnrB,j3A,j3B;
    int *mdtype;
	double shX,shY,shZ;
    int *jjnr;
    double *shiftvec;
    
	double gpi_ai,gpi2;
	double factor;
	double *gb_radius;
    double *vsolv;
    double *work;
    double *dadx;
    
	__m128d ix,iy,iz;
	__m128d jx,jy,jz;
	__m128d dx,dy,dz;
	__m128d tx,ty,tz;
	__m128d rsq,rinv,rinv2,rinv4,rinv6;
	__m128d ratio,gpi,rai,raj,vai,vaj,rvdw;
	__m128d ccf,dccf,theta,cosq,term,sinq,res,prod,prod_ai,tmp;
	__m128d mask,icf4,icf6,mask_cmp;
	    
	const __m128d half   = _mm_set1_pd(0.5);
	const __m128d three  = _mm_set1_pd(3.0);
	const __m128d one    = _mm_set1_pd(1.0);
	const __m128d two    = _mm_set1_pd(2.0);
	const __m128d zero   = _mm_set1_pd(0.0);
	const __m128d four   = _mm_set1_pd(4.0);
	
	const __m128d still_p5inv  = _mm_set1_pd(STILL_P5INV);
	const __m128d still_pip5   = _mm_set1_pd(STILL_PIP5);
	const __m128d still_p4     = _mm_set1_pd(STILL_P4);
    
	factor  = 0.5 * ONE_4PI_EPS0;
    
    gb_radius = born->gb_radius;
    vsolv     = born->vsolv;
    work      = born->gpol_still_work;
	jjnr      = nl->jjnr;
    shiftvec  = fr->shift_vec[0];
    dadx      = fr->dadx;
    
	jnrA = jnrB = 0;
    jx = _mm_setzero_pd();
    jy = _mm_setzero_pd();
    jz = _mm_setzero_pd();
    
	n = 0;
    
	for(i=0;i<natoms;i++)
	{
		work[i]=0;
	}
    
	for(i=0;i<nl->nri;i++)
	{
        ii     = nl->iinr[i];
		ii3	   = ii*3;
        is3    = 3*nl->shift[i];     
        shX    = shiftvec[is3];  
        shY    = shiftvec[is3+1];
        shZ    = shiftvec[is3+2];
        nj0    = nl->jindex[i];      
        nj1    = nl->jindex[i+1];    
        
        ix     = _mm_set1_pd(shX+x[ii3+0]);
		iy     = _mm_set1_pd(shY+x[ii3+1]);
		iz     = _mm_set1_pd(shZ+x[ii3+2]);
		

		/* Polarization energy for atom ai */
		gpi    = _mm_setzero_pd();
		
        rai     = _mm_load1_pd(gb_radius+ii);
        prod_ai = _mm_set1_pd(STILL_P4*vsolv[ii]);

		for(k=nj0;k<nj1-1;k+=2)
		{
			jnrA        = jjnr[k];   
			jnrB        = jjnr[k+1];
            
            j3A         = 3*jnrA;  
			j3B         = 3*jnrB;
            
            GMX_MM_LOAD_1RVEC_2POINTERS_PD(x+j3A,x+j3B,jx,jy,jz);
            
            GMX_MM_LOAD_2VALUES_PD(gb_radius+jnrA,gb_radius+jnrB,raj);
			GMX_MM_LOAD_2VALUES_PD(vsolv+jnrA,vsolv+jnrB,vaj);
            
			dx          = _mm_sub_pd(ix,jx);
			dy          = _mm_sub_pd(iy,jy);
			dz          = _mm_sub_pd(iz,jz);
            
            rsq         = gmx_mm_calc_rsq_pd(dx,dy,dz);
            rinv        = gmx_mm_invsqrt_pd(rsq);
            rinv2       = _mm_mul_pd(rinv,rinv);
            rinv4       = _mm_mul_pd(rinv2,rinv2);
            rinv6       = _mm_mul_pd(rinv4,rinv2);
            
            rvdw        = _mm_add_pd(rai,raj);
            ratio       = _mm_mul_pd(rsq, gmx_mm_inv_pd( _mm_mul_pd(rvdw,rvdw)));
            
            mask_cmp    = _mm_cmple_pd(ratio,still_p5inv);

            /* gmx_mm_sincos_pd() is quite expensive, so avoid calculating it if we can! */
            if( 0 == _mm_movemask_pd(mask_cmp) )
            {
                /* if ratio>still_p5inv for ALL elements */
                ccf         = one;
                dccf        = _mm_setzero_pd();
            }
            else 
            {
                ratio       = _mm_min_pd(ratio,still_p5inv);
                theta       = _mm_mul_pd(ratio,still_pip5);
                gmx_mm_sincos_pd(theta,&sinq,&cosq);
                term        = _mm_mul_pd(half,_mm_sub_pd(one,cosq));
                ccf         = _mm_mul_pd(term,term);
                dccf        = _mm_mul_pd(_mm_mul_pd(two,term),
                                         _mm_mul_pd(sinq,theta));
            }

            prod        = _mm_mul_pd(still_p4,vaj);
            icf4        = _mm_mul_pd(ccf,rinv4);
            icf6        = _mm_mul_pd( _mm_sub_pd( _mm_mul_pd(four,ccf),dccf), rinv6);
                        
            GMX_MM_INCREMENT_2VALUES_PD(work+jnrA,work+jnrB,_mm_mul_pd(prod_ai,icf4));
            
            gpi           = _mm_add_pd(gpi, _mm_mul_pd(prod,icf4) );
            
            _mm_store_pd(dadx,_mm_mul_pd(prod,icf6));
            dadx+=2;
            _mm_store_pd(dadx,_mm_mul_pd(prod_ai,icf6));
            dadx+=2;
		} 
        
        if(k<nj1)
		{
			jnrA        = jjnr[k];   
            
            j3A         = 3*jnrA;  
            
            GMX_MM_LOAD_1RVEC_1POINTER_PD(x+j3A,jx,jy,jz);
            
            GMX_MM_LOAD_1VALUE_PD(gb_radius+jnrA,raj);
			GMX_MM_LOAD_1VALUE_PD(vsolv+jnrA,vaj);
            
			dx          = _mm_sub_sd(ix,jx);
			dy          = _mm_sub_sd(iy,jy);
			dz          = _mm_sub_sd(iz,jz);
            
            rsq         = gmx_mm_calc_rsq_pd(dx,dy,dz);
            rinv        = gmx_mm_invsqrt_pd(rsq);
            rinv2       = _mm_mul_sd(rinv,rinv);
            rinv4       = _mm_mul_sd(rinv2,rinv2);
            rinv6       = _mm_mul_sd(rinv4,rinv2);
            
            rvdw        = _mm_add_sd(rai,raj);
            ratio       = _mm_mul_sd(rsq, gmx_mm_inv_pd( _mm_mul_pd(rvdw,rvdw)));
            
            mask_cmp    = _mm_cmple_sd(ratio,still_p5inv);
            
            /* gmx_mm_sincos_pd() is quite expensive, so avoid calculating it if we can! */
            if( 0 == _mm_movemask_pd(mask_cmp) )
            {
                /* if ratio>still_p5inv for ALL elements */
                ccf         = one;
                dccf        = _mm_setzero_pd();
            }
            else 
            {
                ratio       = _mm_min_sd(ratio,still_p5inv);
                theta       = _mm_mul_sd(ratio,still_pip5);
                gmx_mm_sincos_pd(theta,&sinq,&cosq);
                term        = _mm_mul_sd(half,_mm_sub_sd(one,cosq));
                ccf         = _mm_mul_sd(term,term);
                dccf        = _mm_mul_sd(_mm_mul_sd(two,term),
                                         _mm_mul_sd(sinq,theta));
            }
            
            prod        = _mm_mul_sd(still_p4,vaj);
            icf4        = _mm_mul_sd(ccf,rinv4);
            icf6        = _mm_mul_sd( _mm_sub_sd( _mm_mul_sd(four,ccf),dccf), rinv6);

            GMX_MM_INCREMENT_1VALUE_PD(work+jnrA,_mm_mul_sd(prod_ai,icf4));
            
            gpi           = _mm_add_sd(gpi, _mm_mul_sd(prod,icf4) );
            
            _mm_store_pd(dadx,_mm_mul_pd(prod,icf6));
            dadx+=2;
            _mm_store_pd(dadx,_mm_mul_pd(prod_ai,icf6));
            dadx+=2;
		} 
        gmx_mm_update_1pot_pd(gpi,work+ii);
	}
    
	/* Sum up the polarization energy from other nodes */
	if(PARTDECOMP(cr))
	{
		gmx_sum(natoms, work, cr);
	}
	else if(DOMAINDECOMP(cr))
	{
		dd_atom_sum_real(cr->dd, work);
	}
	
	/* Compute the radii */
	for(i=0;i<fr->natoms_force;i++) /* PELA born->nr */
	{		
		if(born->use[i] != 0)
		{
			gpi_ai           = born->gpol[i] + work[i]; /* add gpi to the initial pol energy gpi_ai*/
			gpi2             = gpi_ai * gpi_ai;
			born->bRad[i]   = factor*gmx_invsqrt(gpi2);
			fr->invsqrta[i] = gmx_invsqrt(born->bRad[i]);
		}
	}
    
	/* Extra (local) communication required for DD */
	if(DOMAINDECOMP(cr))
	{
		dd_atom_spread_real(cr->dd, born->bRad);
		dd_atom_spread_real(cr->dd, fr->invsqrta);
	}
    
	return 0;	
}

示例#12

显示文件

文件： genborn_sse2_double.c 项目： alexholehouse/gromacs

int 
calc_gb_rad_hct_obc_sse2_double(t_commrec *cr, t_forcerec * fr, int natoms, gmx_localtop_t *top,
                                const t_atomtypes *atype, double *x, t_nblist *nl, gmx_genborn_t *born,t_mdatoms *md,int gb_algorithm)
{
	int i,ai,k,n,ii,ii3,is3,nj0,nj1,at0,at1,offset;
    int jnrA,jnrB;
    int j3A,j3B;
	double shX,shY,shZ;
	double rr,rr_inv,rr_inv2,sum_tmp,sum,sum2,sum3,gbr;
	double sum_ai2, sum_ai3,tsum,tchain,doffset;
	double *obc_param;
    double *gb_radius;
    double *work;
    int *  jjnr;
    double *dadx;
    double *shiftvec;
    double min_rad,rad;
    
	__m128d ix,iy,iz,jx,jy,jz;
	__m128d dx,dy,dz,t1,t2,t3,t4;
	__m128d rsq,rinv,r;
	__m128d rai,rai_inv,raj, raj_inv,rai_inv2,sk,sk2,lij,dlij,duij;
	__m128d uij,lij2,uij2,lij3,uij3,diff2;
	__m128d lij_inv,sk2_inv,prod,log_term,tmp,tmp_sum;
	__m128d sum_ai, tmp_ai,sk_ai,sk_aj,sk2_ai,sk2_aj,sk2_rinv;
	__m128d dadx1,dadx2;
    __m128d logterm;
	__m128d mask;
	__m128d obc_mask1,obc_mask2,obc_mask3;    
    
    __m128d oneeighth   = _mm_set1_pd(0.125);
    __m128d onefourth   = _mm_set1_pd(0.25);
    
	const __m128d half  = _mm_set1_pd(0.5);
	const __m128d three = _mm_set1_pd(3.0);
	const __m128d one   = _mm_set1_pd(1.0);
	const __m128d two   = _mm_set1_pd(2.0);
	const __m128d zero  = _mm_set1_pd(0.0);
	const __m128d neg   = _mm_set1_pd(-1.0);
	
	/* Set the dielectric offset */
	doffset   = born->gb_doffset;
	gb_radius = born->gb_radius;
    obc_param = born->param;
    work      = born->gpol_hct_work;
    jjnr      = nl->jjnr;
    dadx      = fr->dadx;
    shiftvec  = fr->shift_vec[0];
    
    jx        = _mm_setzero_pd();
    jy        = _mm_setzero_pd();
    jz        = _mm_setzero_pd();
    
    jnrA = jnrB = 0;
    
	for(i=0;i<born->nr;i++)
	{
		work[i] = 0;
	}
	
	for(i=0;i<nl->nri;i++)
	{
        ii     = nl->iinr[i];
		ii3	   = ii*3;
        is3    = 3*nl->shift[i];     
        shX    = shiftvec[is3];  
        shY    = shiftvec[is3+1];
        shZ    = shiftvec[is3+2];
        nj0    = nl->jindex[i];      
        nj1    = nl->jindex[i+1];    
        
        ix     = _mm_set1_pd(shX+x[ii3+0]);
		iy     = _mm_set1_pd(shY+x[ii3+1]);
		iz     = _mm_set1_pd(shZ+x[ii3+2]);
		        
		rai    = _mm_load1_pd(gb_radius+ii);
		rai_inv= gmx_mm_inv_pd(rai);
        
		sum_ai = _mm_setzero_pd();
		
		sk_ai  = _mm_load1_pd(born->param+ii);
		sk2_ai = _mm_mul_pd(sk_ai,sk_ai);
        
		for(k=nj0;k<nj1-1;k+=2)
		{
			jnrA        = jjnr[k];   
			jnrB        = jjnr[k+1];
			
            j3A         = 3*jnrA;  
			j3B         = 3*jnrB;
            
            GMX_MM_LOAD_1RVEC_2POINTERS_PD(x+j3A,x+j3B,jx,jy,jz);
            GMX_MM_LOAD_2VALUES_PD(gb_radius+jnrA,gb_radius+jnrB,raj);
            GMX_MM_LOAD_2VALUES_PD(obc_param+jnrA,obc_param+jnrB,sk_aj);
			
            dx    = _mm_sub_pd(ix, jx);
			dy    = _mm_sub_pd(iy, jy);
			dz    = _mm_sub_pd(iz, jz);
			
            rsq         = gmx_mm_calc_rsq_pd(dx,dy,dz);
            
            rinv        = gmx_mm_invsqrt_pd(rsq);
            r           = _mm_mul_pd(rsq,rinv);
            
			/* Compute raj_inv aj1-4 */
            raj_inv     = gmx_mm_inv_pd(raj);
            
            /* Evaluate influence of atom aj -> ai */
            t1            = _mm_add_pd(r,sk_aj);
            t2            = _mm_sub_pd(r,sk_aj);
            t3            = _mm_sub_pd(sk_aj,r);
            obc_mask1     = _mm_cmplt_pd(rai, t1);
            obc_mask2     = _mm_cmplt_pd(rai, t2);
            obc_mask3     = _mm_cmplt_pd(rai, t3);
            
            uij           = gmx_mm_inv_pd(t1);
            lij           = _mm_or_pd(   _mm_and_pd(obc_mask2,gmx_mm_inv_pd(t2)),
                                      _mm_andnot_pd(obc_mask2,rai_inv));
            dlij          = _mm_and_pd(one,obc_mask2);
            uij2          = _mm_mul_pd(uij, uij);
            uij3          = _mm_mul_pd(uij2,uij);
            lij2          = _mm_mul_pd(lij, lij);
            lij3          = _mm_mul_pd(lij2,lij);
                        
            diff2         = _mm_sub_pd(uij2,lij2);
            lij_inv       = gmx_mm_invsqrt_pd(lij2);
            sk2_aj        = _mm_mul_pd(sk_aj,sk_aj);
            sk2_rinv      = _mm_mul_pd(sk2_aj,rinv);
            prod          = _mm_mul_pd(onefourth,sk2_rinv);
                        
            logterm       = gmx_mm_log_pd(_mm_mul_pd(uij,lij_inv));
            
            t1            = _mm_sub_pd(lij,uij);
            t2            = _mm_mul_pd(diff2,
                                       _mm_sub_pd(_mm_mul_pd(onefourth,r),
                                                  prod));
            t3            = _mm_mul_pd(half,_mm_mul_pd(rinv,logterm));
            t1            = _mm_add_pd(t1,_mm_add_pd(t2,t3));
            t4            = _mm_mul_pd(two,_mm_sub_pd(rai_inv,lij));
            t4            = _mm_and_pd(t4,obc_mask3);
            t1            = _mm_mul_pd(half,_mm_add_pd(t1,t4));
                        
            sum_ai        = _mm_add_pd(sum_ai, _mm_and_pd(t1,obc_mask1) );
            
            t1            = _mm_add_pd(_mm_mul_pd(half,lij2),
                                       _mm_mul_pd(prod,lij3));
            t1            = _mm_sub_pd(t1,
                                       _mm_mul_pd(onefourth,
                                                  _mm_add_pd(_mm_mul_pd(lij,rinv),
                                                             _mm_mul_pd(lij3,r))));
            t2            = _mm_mul_pd(onefourth,
                                       _mm_add_pd(_mm_mul_pd(uij,rinv),
                                                  _mm_mul_pd(uij3,r)));
            t2            = _mm_sub_pd(t2,
                                       _mm_add_pd(_mm_mul_pd(half,uij2),
                                                  _mm_mul_pd(prod,uij3)));
            t3            = _mm_mul_pd(_mm_mul_pd(onefourth,logterm),
                                       _mm_mul_pd(rinv,rinv));
            t3            = _mm_sub_pd(t3,
                                       _mm_mul_pd(_mm_mul_pd(diff2,oneeighth),
                                                  _mm_add_pd(one,
                                                             _mm_mul_pd(sk2_rinv,rinv))));
            t1            = _mm_mul_pd(rinv,
                                       _mm_add_pd(_mm_mul_pd(dlij,t1),
                                                  _mm_add_pd(t2,t3)));
            
            dadx1         = _mm_and_pd(t1,obc_mask1);
            
            /* Evaluate influence of atom ai -> aj */
            t1            = _mm_add_pd(r,sk_ai);
            t2            = _mm_sub_pd(r,sk_ai);
            t3            = _mm_sub_pd(sk_ai,r);
            obc_mask1     = _mm_cmplt_pd(raj, t1);
            obc_mask2     = _mm_cmplt_pd(raj, t2);
            obc_mask3     = _mm_cmplt_pd(raj, t3);
            
            uij           = gmx_mm_inv_pd(t1);
            lij           = _mm_or_pd(   _mm_and_pd(obc_mask2,gmx_mm_inv_pd(t2)),
                                      _mm_andnot_pd(obc_mask2,raj_inv));
            dlij          = _mm_and_pd(one,obc_mask2);
            uij2          = _mm_mul_pd(uij, uij);
            uij3          = _mm_mul_pd(uij2,uij);
            lij2          = _mm_mul_pd(lij, lij);
            lij3          = _mm_mul_pd(lij2,lij);
                        
            diff2         = _mm_sub_pd(uij2,lij2);
            lij_inv       = gmx_mm_invsqrt_pd(lij2);
            sk2_rinv      = _mm_mul_pd(sk2_ai,rinv);
            prod          = _mm_mul_pd(onefourth,sk2_rinv);
                        
            logterm       = gmx_mm_log_pd(_mm_mul_pd(uij,lij_inv));
            
            t1            = _mm_sub_pd(lij,uij);
            t2            = _mm_mul_pd(diff2,
                                       _mm_sub_pd(_mm_mul_pd(onefourth,r),
                                                  prod));
            t3            = _mm_mul_pd(half,_mm_mul_pd(rinv,logterm));
            t1            = _mm_add_pd(t1,_mm_add_pd(t2,t3));
            t4            = _mm_mul_pd(two,_mm_sub_pd(raj_inv,lij));
            t4            = _mm_and_pd(t4,obc_mask3);
            t1            = _mm_mul_pd(half,_mm_add_pd(t1,t4));
                        
            GMX_MM_INCREMENT_2VALUES_PD(work+jnrA,work+jnrB,_mm_and_pd(t1,obc_mask1));
            
            t1            = _mm_add_pd(_mm_mul_pd(half,lij2),
                                       _mm_mul_pd(prod,lij3));
            t1            = _mm_sub_pd(t1,
                                       _mm_mul_pd(onefourth,
                                                  _mm_add_pd(_mm_mul_pd(lij,rinv),
                                                             _mm_mul_pd(lij3,r))));
            t2            = _mm_mul_pd(onefourth,
                                       _mm_add_pd(_mm_mul_pd(uij,rinv),
                                                  _mm_mul_pd(uij3,r)));
            t2            = _mm_sub_pd(t2,
                                       _mm_add_pd(_mm_mul_pd(half,uij2),
                                                  _mm_mul_pd(prod,uij3)));
            t3            = _mm_mul_pd(_mm_mul_pd(onefourth,logterm),
                                       _mm_mul_pd(rinv,rinv));
            t3            = _mm_sub_pd(t3,
                                       _mm_mul_pd(_mm_mul_pd(diff2,oneeighth),
                                                  _mm_add_pd(one,
                                                             _mm_mul_pd(sk2_rinv,rinv))));
            t1            = _mm_mul_pd(rinv,
                                       _mm_add_pd(_mm_mul_pd(dlij,t1),
                                                  _mm_add_pd(t2,t3)));
            
            dadx2         = _mm_and_pd(t1,obc_mask1);
            
            _mm_store_pd(dadx,dadx1);
            dadx += 2;
            _mm_store_pd(dadx,dadx2);
            dadx += 2;
        } /* end normal inner loop */
        
		if(k<nj1)
		{
			jnrA        = jjnr[k];   
			
            j3A         = 3*jnrA;  
            
            GMX_MM_LOAD_1RVEC_1POINTER_PD(x+j3A,jx,jy,jz);
            GMX_MM_LOAD_1VALUE_PD(gb_radius+jnrA,raj);
            GMX_MM_LOAD_1VALUE_PD(obc_param+jnrA,sk_aj);
			
            dx    = _mm_sub_sd(ix, jx);
			dy    = _mm_sub_sd(iy, jy);
			dz    = _mm_sub_sd(iz, jz);
			
            rsq         = gmx_mm_calc_rsq_pd(dx,dy,dz);
            
            rinv        = gmx_mm_invsqrt_pd(rsq);
            r           = _mm_mul_sd(rsq,rinv);
            
			/* Compute raj_inv aj1-4 */
            raj_inv     = gmx_mm_inv_pd(raj);
            
            /* Evaluate influence of atom aj -> ai */
            t1            = _mm_add_sd(r,sk_aj);
            t2            = _mm_sub_sd(r,sk_aj);
            t3            = _mm_sub_sd(sk_aj,r);
            obc_mask1     = _mm_cmplt_sd(rai, t1);
            obc_mask2     = _mm_cmplt_sd(rai, t2);
            obc_mask3     = _mm_cmplt_sd(rai, t3);
            
            uij           = gmx_mm_inv_pd(t1);
            lij           = _mm_or_pd(_mm_and_pd(obc_mask2,gmx_mm_inv_pd(t2)),
                                      _mm_andnot_pd(obc_mask2,rai_inv));
            dlij          = _mm_and_pd(one,obc_mask2);
            uij2          = _mm_mul_sd(uij, uij);
            uij3          = _mm_mul_sd(uij2,uij);
            lij2          = _mm_mul_sd(lij, lij);
            lij3          = _mm_mul_sd(lij2,lij);
            
            diff2         = _mm_sub_sd(uij2,lij2);
            lij_inv       = gmx_mm_invsqrt_pd(lij2);
            sk2_aj        = _mm_mul_sd(sk_aj,sk_aj);
            sk2_rinv      = _mm_mul_sd(sk2_aj,rinv);
            prod          = _mm_mul_sd(onefourth,sk2_rinv);
            
            logterm       = gmx_mm_log_pd(_mm_mul_sd(uij,lij_inv));
            
            t1            = _mm_sub_sd(lij,uij);
            t2            = _mm_mul_sd(diff2,
                                       _mm_sub_sd(_mm_mul_pd(onefourth,r),
                                                  prod));
            t3            = _mm_mul_sd(half,_mm_mul_sd(rinv,logterm));
            t1            = _mm_add_sd(t1,_mm_add_sd(t2,t3));
            t4            = _mm_mul_sd(two,_mm_sub_sd(rai_inv,lij));
            t4            = _mm_and_pd(t4,obc_mask3);
            t1            = _mm_mul_sd(half,_mm_add_sd(t1,t4));
            
            sum_ai        = _mm_add_sd(sum_ai, _mm_and_pd(t1,obc_mask1) );
            
            t1            = _mm_add_sd(_mm_mul_sd(half,lij2),
                                       _mm_mul_sd(prod,lij3));
            t1            = _mm_sub_sd(t1,
                                       _mm_mul_sd(onefourth,
                                                  _mm_add_sd(_mm_mul_sd(lij,rinv),
                                                             _mm_mul_sd(lij3,r))));
            t2            = _mm_mul_sd(onefourth,
                                       _mm_add_sd(_mm_mul_sd(uij,rinv),
                                                  _mm_mul_sd(uij3,r)));
            t2            = _mm_sub_sd(t2,
                                       _mm_add_sd(_mm_mul_sd(half,uij2),
                                                  _mm_mul_sd(prod,uij3)));
            t3            = _mm_mul_sd(_mm_mul_sd(onefourth,logterm),
                                       _mm_mul_sd(rinv,rinv));
            t3            = _mm_sub_sd(t3,
                                       _mm_mul_sd(_mm_mul_sd(diff2,oneeighth),
                                                  _mm_add_sd(one,
                                                             _mm_mul_sd(sk2_rinv,rinv))));
            t1            = _mm_mul_sd(rinv,
                                       _mm_add_sd(_mm_mul_sd(dlij,t1),
                                                  _mm_add_pd(t2,t3)));
            
            dadx1         = _mm_and_pd(t1,obc_mask1);
            
            /* Evaluate influence of atom ai -> aj */
            t1            = _mm_add_sd(r,sk_ai);
            t2            = _mm_sub_sd(r,sk_ai);
            t3            = _mm_sub_sd(sk_ai,r);
            obc_mask1     = _mm_cmplt_sd(raj, t1);
            obc_mask2     = _mm_cmplt_sd(raj, t2);
            obc_mask3     = _mm_cmplt_sd(raj, t3);
            
            uij           = gmx_mm_inv_pd(t1);
            lij           = _mm_or_pd(   _mm_and_pd(obc_mask2,gmx_mm_inv_pd(t2)),
                                      _mm_andnot_pd(obc_mask2,raj_inv));
            dlij          = _mm_and_pd(one,obc_mask2);
            uij2          = _mm_mul_sd(uij, uij);
            uij3          = _mm_mul_sd(uij2,uij);
            lij2          = _mm_mul_sd(lij, lij);
            lij3          = _mm_mul_sd(lij2,lij);
            
            diff2         = _mm_sub_sd(uij2,lij2);
            lij_inv       = gmx_mm_invsqrt_pd(lij2);
            sk2_rinv      = _mm_mul_sd(sk2_ai,rinv);
            prod          = _mm_mul_sd(onefourth,sk2_rinv);
            
            logterm       = gmx_mm_log_pd(_mm_mul_sd(uij,lij_inv));
            
            t1            = _mm_sub_sd(lij,uij);
            t2            = _mm_mul_sd(diff2,
                                       _mm_sub_sd(_mm_mul_sd(onefourth,r),
                                                  prod));
            t3            = _mm_mul_sd(half,_mm_mul_sd(rinv,logterm));
            t1            = _mm_add_sd(t1,_mm_add_sd(t2,t3));
            t4            = _mm_mul_sd(two,_mm_sub_sd(raj_inv,lij));
            t4            = _mm_and_pd(t4,obc_mask3);
            t1            = _mm_mul_sd(half,_mm_add_sd(t1,t4));
            
            GMX_MM_INCREMENT_1VALUE_PD(work+jnrA,_mm_and_pd(t1,obc_mask1));
            
            t1            = _mm_add_sd(_mm_mul_sd(half,lij2),
                                       _mm_mul_sd(prod,lij3));
            t1            = _mm_sub_sd(t1,
                                       _mm_mul_sd(onefourth,
                                                  _mm_add_sd(_mm_mul_sd(lij,rinv),
                                                             _mm_mul_sd(lij3,r))));
            t2            = _mm_mul_sd(onefourth,
                                       _mm_add_sd(_mm_mul_sd(uij,rinv),
                                                  _mm_mul_sd(uij3,r)));
            t2            = _mm_sub_sd(t2,
                                       _mm_add_sd(_mm_mul_sd(half,uij2),
                                                  _mm_mul_sd(prod,uij3)));
            t3            = _mm_mul_sd(_mm_mul_sd(onefourth,logterm),
                                       _mm_mul_sd(rinv,rinv));
            t3            = _mm_sub_sd(t3,
                                       _mm_mul_sd(_mm_mul_sd(diff2,oneeighth),
                                                  _mm_add_sd(one,
                                                             _mm_mul_sd(sk2_rinv,rinv))));
            t1            = _mm_mul_sd(rinv,
                                       _mm_add_sd(_mm_mul_sd(dlij,t1),
                                                  _mm_add_sd(t2,t3)));
            
            dadx2         = _mm_and_pd(t1,obc_mask1);
            
            _mm_store_pd(dadx,dadx1);
            dadx += 2;
            _mm_store_pd(dadx,dadx2);
            dadx += 2;
        } 
        gmx_mm_update_1pot_pd(sum_ai,work+ii);
        
	}
	
	/* Parallel summations */
	if(PARTDECOMP(cr))
	{
		gmx_sum(natoms, work, cr);
	}
	else if(DOMAINDECOMP(cr))
	{
		dd_atom_sum_real(cr->dd, work);
	}
	
    if(gb_algorithm==egbHCT)
    {
        /* HCT */
        for(i=0;i<fr->natoms_force;i++) /* PELA born->nr */
        {
			if(born->use[i] != 0)
            {
                rr      = top->atomtypes.gb_radius[md->typeA[i]]-doffset; 
                sum     = 1.0/rr - work[i];
                min_rad = rr + doffset;
                rad     = 1.0/sum; 
                
                born->bRad[i]   = rad > min_rad ? rad : min_rad;
                fr->invsqrta[i] = gmx_invsqrt(born->bRad[i]);
            }
        }
        
        /* Extra communication required for DD */
        if(DOMAINDECOMP(cr))
        {
            dd_atom_spread_real(cr->dd, born->bRad);
            dd_atom_spread_real(cr->dd, fr->invsqrta);
        }
    }
    else
    {
        /* OBC */
        for(i=0;i<fr->natoms_force;i++) /* PELA born->nr */
        {
			if(born->use[i] != 0)
            {
                rr      = top->atomtypes.gb_radius[md->typeA[i]];
                rr_inv2 = 1.0/rr;
                rr      = rr-doffset; 
                rr_inv  = 1.0/rr;
                sum     = rr * work[i];
                sum2    = sum  * sum;
                sum3    = sum2 * sum;
                
                tsum    = tanh(born->obc_alpha*sum-born->obc_beta*sum2+born->obc_gamma*sum3);
                born->bRad[i] = rr_inv - tsum*rr_inv2;
                born->bRad[i] = 1.0 / born->bRad[i];
                
                fr->invsqrta[i]=gmx_invsqrt(born->bRad[i]);
                
                tchain  = rr * (born->obc_alpha-2*born->obc_beta*sum+3*born->obc_gamma*sum2);
                born->drobc[i] = (1.0-tsum*tsum)*tchain*rr_inv2;
            }
        }
        /* Extra (local) communication required for DD */
        if(DOMAINDECOMP(cr))
        {
            dd_atom_spread_real(cr->dd, born->bRad);
            dd_atom_spread_real(cr->dd, fr->invsqrta);
            dd_atom_spread_real(cr->dd, born->drobc);
        }
    }
    
	
	
	return 0;
}

示例#13

显示文件

文件： groupcoord.cpp 项目： friforever/gromacs

/* Assemble the positions of the group such that every node has all of them.
 * The atom indices are retrieved from anrs_loc[0..nr_loc]
 * Note that coll_ind[i] = i is needed in the serial case */
extern void communicate_group_positions(
        const t_commrec *cr,           /* Pointer to MPI communication data */
        rvec            *xcoll,        /* Collective array of positions */
        ivec            *shifts,       /* Collective array of shifts for xcoll (can be NULL) */
        ivec            *extra_shifts, /* (optional) Extra shifts since last time step */
        const gmx_bool   bNS,          /* (optional) NS step, the shifts have changed */
        const rvec      *x_loc,        /* Local positions on this node */
        const int        nr,           /* Total number of atoms in the group */
        const int        nr_loc,       /* Local number of atoms in the group */
        const int       *anrs_loc,     /* Local atom numbers */
        const int       *coll_ind,     /* Collective index */
        rvec            *xcoll_old,    /* (optional) Positions from the last time step,
                                          used to make group whole */
        const matrix     box)          /* (optional) The box */
{
    int i;


    /* Zero out the groups' global position array */
    clear_rvecs(nr, xcoll);

    /* Put the local positions that this node has into the right place of
     * the collective array. Note that in the serial case, coll_ind[i] = i */
    for (i = 0; i < nr_loc; i++)
    {
        copy_rvec(x_loc[anrs_loc[i]], xcoll[coll_ind[i]]);
    }

    if (PAR(cr))
    {
        /* Add the arrays from all nodes together */
        gmx_sum(nr*3, xcoll[0], cr);
    }
    /* Now we have all the positions of the group in the xcoll array present on all
     * nodes.
     *
     * The rest of the code is for making the group whole again in case atoms changed
     * their PBC representation / crossed a box boundary. We only do that if the
     * shifts array is allocated. */
    if (nullptr != shifts)
    {
        /* To make the group whole, start with a whole group and each
         * step move the assembled positions at closest distance to the positions
         * from the last step. First shift the positions with the saved shift
         * vectors (these are 0 when this routine is called for the first time!) */
        shift_positions_group(box, xcoll, shifts, nr);

        /* Now check if some shifts changed since the last step.
         * This only needs to be done when the shifts are expected to have changed,
         * i.e. after neighbor searching */
        if (bNS)
        {
            get_shifts_group(3, box, xcoll, nr, xcoll_old, extra_shifts);

            /* Shift with the additional shifts such that we get a whole group now */
            shift_positions_group(box, xcoll, extra_shifts, nr);

            /* Add the shift vectors together for the next time step */
            for (i = 0; i < nr; i++)
            {
                shifts[i][XX] += extra_shifts[i][XX];
                shifts[i][YY] += extra_shifts[i][YY];
                shifts[i][ZZ] += extra_shifts[i][ZZ];
            }

            /* Store current correctly-shifted positions for comparison in the next NS time step */
            for (i = 0; i < nr; i++)
            {
                copy_rvec(xcoll[i], xcoll_old[i]);
            }
        }
    }
}

示例#14

显示文件

文件： orires.c 项目： Chadi-akel/cere

void init_orires(FILE *log,int nfa,t_iatom forceatoms[],t_iparams ip[],
		 rvec *xref,t_mdatoms *md,t_inputrec *ir,
		 t_commrec *mcr,t_fcdata *fcd)
{
  int  i,j,d,ex,nr,*nr_ex;
  real mtot;
  rvec com;
  t_oriresdata *od;
  
  od = &(fcd->orires);
  od->fc  = ir->orires_fc;
  od->nex = 0;
  od->S   = NULL;

  if (ir->orires_tau > 0)
    od->edt = exp(-ir->delta_t/ir->orires_tau);
  else
    od->edt = 0;
  od->edt1 = 1 - od->edt;
  od->exp_min_t_tau = 1.0;
  od->nr = nfa/3;
  
  if (od->nr == 0)
    return;

  nr_ex = NULL;

  for(i=0; i<nfa; i+=3) {
    ex = ip[forceatoms[i]].orires.ex;
    if (ex >= od->nex) {
      srenew(nr_ex,ex+1);
      for(j=od->nex; j<ex+1; j++)
	nr_ex[j] = 0;
      od->nex = ex+1;
    }
    nr_ex[ex]++;
  }
  snew(od->S,od->nex);
  /* When not doing time averaging, the instaneous and time averaged data
   * are indentical and the pointers can point to the same memory.
   */
  snew(od->Dinsl,od->nr);
  if (mcr)
    snew(od->Dins,od->nr);
  else
    od->Dins = od->Dinsl;
  if (fabs(ir->orires_tau) < GMX_REAL_MIN)
    od->Dtav = od->Dins;
  else
    snew(od->Dtav,od->nr);
  snew(od->oinsl,od->nr);
  if (mcr)
    snew(od->oins,od->nr);
  else
    od->oins = od->oinsl;
  if ( fabs(ir->orires_tau) < GMX_REAL_MIN)
    od->otav = od->oins;
  else
    snew(od->otav,od->nr);
  snew(od->tmp,od->nex);
  snew(od->TMP,od->nex);
  for(ex=0; ex<od->nex; ex++) {
    snew(od->TMP[ex],5);
    for(i=0; i<5; i++)
      snew(od->TMP[ex][i],5);
  }

  od->nref = 0;
  for(i=0; i<md->nr; i++)
    if (md->cORF[i] == 0)
      od->nref++;
  snew(od->mref,od->nref);
  snew(od->xref,od->nref);
  snew(od->xtmp,od->nref);

  /* Determine the reference structure on the master node.
   * Copy it to the other nodes after checking multi compatibility,
   * so we are sure the subsystems match before copying.
   */
  clear_rvec(com);
  mtot = 0.0;
  j = 0;
  for(i=0; i<md->nr; i++) {
    if (md->cORF[i] == 0) {
      od->mref[j] = md->massT[i];
      if (mcr==NULL || MASTER(mcr)) {
	copy_rvec(xref[i],od->xref[j]);
	for(d=0; d<DIM; d++)
	  com[d] += od->mref[j]*xref[i][d];
      }
      mtot += od->mref[j];
      j++;
    }
  }
  od->invmref = 1.0/mtot;
  svmul(od->invmref,com,com);
  if (mcr==NULL || MASTER(mcr))
    for(j=0; j<od->nref; j++)
      rvec_dec(od->xref[j],com);
  
  fprintf(log,"Found %d orientation experiments\n",od->nex);
  for(i=0; i<od->nex; i++)
    fprintf(log,"  experiment %d has %d restraints\n",i+1,nr_ex[i]);

  sfree(nr_ex);

  fprintf(log,"  the fit group consists of %d atoms and has total mass %g\n",
	  od->nref,mtot);
  
  if (mcr) {
    fprintf(log,"  the orientation restraints are ensemble averaged over %d systems\n",mcr->nnodes);

    check_multi_int(log,mcr,fcd->orires.nr,
		    "the number of orientation restraints");
    check_multi_int(log,mcr,fcd->orires.nref,
		    "the number of fit atoms for orientation restraining");
    /* Copy the reference coordinates from the master to the other nodes */
    gmx_sum(DIM*fcd->orires.nref,fcd->orires.xref[0],mcr);
  }
}

示例#15

显示文件

文件： orires.c 项目： Chadi-akel/cere

real calc_orires_dev(t_commrec *mcr,
		     int nfa,t_iatom forceatoms[],t_iparams ip[],
		     t_mdatoms *md,rvec x[],t_fcdata *fcd)
{
  int          fa,d,i,j,type,ex,nref;
  real         edt,edt1,invn,pfac,r2,invr,corrfac,weight,wsv2,sw,dev;
  tensor       *S,R,TMP;
  rvec5        *Dinsl,*Dins,*Dtav,*rhs;
  real         *mref,***T;
  rvec         *xref,*xtmp,com,r_unrot,r;
  t_oriresdata *od;
  bool         bTAV;
  static real  two_thr=2.0/3.0;

  od = &(fcd->orires);

  bTAV = (fabs(od->edt)>GMX_REAL_MIN);
  edt  = od->edt;
  edt1 = od->edt1;
  S    = od->S;
  Dinsl= od->Dinsl;
  Dins = od->Dins;
  Dtav = od->Dtav;
  T    = od->TMP;
  rhs  = od->tmp;
  nref = od->nref;
  mref = od->mref;
  xref = od->xref;
  xtmp = od->xtmp;
  
  od->exp_min_t_tau *= edt;

  if (mcr)
    invn = 1.0/mcr->nnodes;
  else
    invn = 1.0;

  j=0;
  for(i=0; i<md->nr; i++)
    if (md->cORF[i] == 0) {
      copy_rvec(x[i],xtmp[j]);
      for(d=0; d<DIM; d++)
	com[d] += mref[j]*xref[j][d];
      j++;
    }
  svmul(od->invmref,com,com);
  for(j=0; j<nref; j++)
    rvec_dec(xtmp[j],com);
  /* Calculate the rotation matrix to rotate x to the reference orientation */
  calc_fit_R(nref,mref,xref,xtmp,R);
  copy_mat(R,od->R);

  d = 0;
  for(fa=0; fa<nfa; fa+=3) {
    type = forceatoms[fa];
    rvec_sub(x[forceatoms[fa+1]],x[forceatoms[fa+2]],r_unrot);
    mvmul(R,r_unrot,r);
    r2   = norm2(r);
    invr = invsqrt(r2);
    /* Calculate the prefactor for the D tensor, this includes the factor 3! */
    pfac = ip[type].orires.c*invr*invr*3;
    for(i=0; i<ip[type].orires.pow; i++)
      pfac *= invr;
    Dinsl[d][0] = pfac*(2*r[0]*r[0] + r[1]*r[1] - r2);
    Dinsl[d][1] = pfac*(2*r[0]*r[1]);
    Dinsl[d][2] = pfac*(2*r[0]*r[2]);
    Dinsl[d][3] = pfac*(2*r[1]*r[1] + r[0]*r[0] - r2);
    Dinsl[d][4] = pfac*(2*r[1]*r[2]);

    if (mcr)
      for(i=0; i<5; i++)
	Dins[d][i] = Dinsl[d][i]*invn;
    
    d++;
  }
  
  if (mcr)
    gmx_sum(5*od->nr,Dins[0],mcr);
  
  /* Correction factor to correct for the lack of history for short times */
  corrfac = 1.0/(1.0-od->exp_min_t_tau);
  
  /* Calculate the order tensor S for each experiment via optimization */
  for(ex=0; ex<od->nex; ex++)
    for(i=0; i<5; i++) {
      rhs[ex][i] = 0;
      for(j=0; j<=i; j++)
	T[ex][i][j] = 0;
    }
  d = 0;
  for(fa=0; fa<nfa; fa+=3) {
    if (bTAV)
      for(i=0; i<5; i++)
	Dtav[d][i] = edt*Dtav[d][i] + edt1*Dins[d][i];

    type   = forceatoms[fa];
    ex     = ip[type].orires.ex;
    weight = ip[type].orires.kfac;
    /* Calculate the vector rhs and half the matrix T for the 5 equations */
    for(i=0; i<5; i++) {
      rhs[ex][i] += Dtav[d][i]*ip[type].orires.obs*weight;
      for(j=0; j<=i; j++)
	T[ex][i][j] += Dtav[d][i]*Dtav[d][j]*weight;
    }
    d++;
  }
  /* Now we have all the data we can calculate S */
  for(ex=0; ex<od->nex; ex++) {
    /* Correct corrfac and copy one half of T to the other half */
    for(i=0; i<5; i++) {
      rhs[ex][i]  *= corrfac;
      T[ex][i][i] *= sqr(corrfac);
      for(j=0; j<i; j++) {
	T[ex][i][j] *= sqr(corrfac);
	T[ex][j][i]  = T[ex][i][j];
      }
    }
    m_inv_gen(T[ex],5,T[ex]);
    /* Calculate the orientation tensor S for this experiment */
    S[ex][0][0] = 0;
    S[ex][0][1] = 0;
    S[ex][0][2] = 0;
    S[ex][1][1] = 0;
    S[ex][1][2] = 0;
    for(i=0; i<5; i++) {
      S[ex][0][0] += 1.5*T[ex][0][i]*rhs[ex][i];
      S[ex][0][1] += 1.5*T[ex][1][i]*rhs[ex][i];
      S[ex][0][2] += 1.5*T[ex][2][i]*rhs[ex][i];
      S[ex][1][1] += 1.5*T[ex][3][i]*rhs[ex][i];
      S[ex][1][2] += 1.5*T[ex][4][i]*rhs[ex][i];
    }
    S[ex][1][0] = S[ex][0][1];
    S[ex][2][0] = S[ex][0][2];
    S[ex][2][1] = S[ex][1][2];
    S[ex][2][2] = -S[ex][0][0] - S[ex][1][1];
  }
  
  wsv2 = 0;
  sw   = 0;
  
  d = 0;
  for(fa=0; fa<nfa; fa+=3) {
    type = forceatoms[fa];
    ex = ip[type].orires.ex;

    od->otav[d] = two_thr*
      corrfac*(S[ex][0][0]*Dtav[d][0] + S[ex][0][1]*Dtav[d][1] +
	       S[ex][0][2]*Dtav[d][2] + S[ex][1][1]*Dtav[d][3] +
	       S[ex][1][2]*Dtav[d][4]);
    if (bTAV)
      od->oins[d] = two_thr*(S[ex][0][0]*Dins[d][0] + S[ex][0][1]*Dins[d][1] +
			     S[ex][0][2]*Dins[d][2] + S[ex][1][1]*Dins[d][3] +
			     S[ex][1][2]*Dins[d][4]);
    if (mcr)
      /* When ensemble averaging is used recalculate the local orientation
       * for output to the energy file.
       */
      od->oinsl[d] = two_thr*
	(S[ex][0][0]*Dinsl[d][0] + S[ex][0][1]*Dinsl[d][1] +
	 S[ex][0][2]*Dinsl[d][2] + S[ex][1][1]*Dinsl[d][3] +
	 S[ex][1][2]*Dinsl[d][4]);
    
    dev = od->otav[d] - ip[type].orires.obs;
    
    wsv2 += ip[type].orires.kfac*sqr(dev);
    sw   += ip[type].orires.kfac;
    
    d++;
  }
  od->rmsdev = sqrt(wsv2/sw);
  
  /* Rotate the S matrices back, so we get the correct grad(tr(S D)) */
  for(ex=0; ex<od->nex; ex++) {
    tmmul(R,S[ex],TMP);
    mmul(TMP,R,S[ex]);
  }

  return od->rmsdev;
  
  /* Approx. 120*nfa/3 flops */
}