Exemplo n.º 1
0
/** Check some common input. */
int Ewald::CheckInput(Box const& boxIn, int debugIn, double cutoffIn, double dsumTolIn,
                      double ew_coeffIn, double lw_coeffIn, double switch_widthIn,
                      double erfcTableDxIn, double skinnbIn)
{
  debug_ = debugIn;
  cutoff_ = cutoffIn;
  dsumTol_ = dsumTolIn;
  ew_coeff_ = ew_coeffIn;
  lw_coeff_ = lw_coeffIn;
  switch_width_ = switch_widthIn;
  erfcTableDx_ = erfcTableDxIn;
  // Check input
  if (cutoff_ < Constants::SMALL) {
    mprinterr("Error: Direct space cutoff (%g) is too small.\n", cutoff_);
    return 1;
  }
  char dir[3] = {'X', 'Y', 'Z'};
  for (int i = 0; i < 3; i++) {
    if (cutoff_ > boxIn[i]/2.0) {
      mprinterr("Error: Cutoff must be less than half the box length (%g > %g, %c)\n",
                cutoff_, boxIn[i]/2.0, dir[i]);
      return 1;
    }
  }
  if (skinnbIn < 0.0) {
    mprinterr("Error: skinnb is less than 0.0\n");
    return 1;
  }
  if (switch_width_ < 0.0) switch_width_ = 0.0;
  if (switch_width_ > cutoff_) {
    mprinterr("Error: Switch width must be less than the cutoff.\n");
    return 1;
  }

  // Set defaults if necessary
  if (dsumTol_ < Constants::SMALL)
    dsumTol_ = 1E-5;
  if (DABS(ew_coeff_) < Constants::SMALL)
    ew_coeff_ = FindEwaldCoefficient( cutoff_, dsumTol_ );
  if (erfcTableDx_ <= 0.0) erfcTableDx_ = 1.0 / 5000;
  // TODO make this optional
  FillErfcTable( cutoff_, ew_coeff_ ); 
  // TODO do for C6 as well
  // TODO for C6 correction term
  if (lw_coeff_ < 0.0)
    lw_coeff_ = 0.0;
  else if (DABS(lw_coeff_) < Constants::SMALL)
    lw_coeff_ = ew_coeff_;

  // Calculate some common factors.
  cut2_ = cutoff_ * cutoff_;
  double cut0 = cutoff_ - switch_width_;
  cut2_0_ = cut0 * cut0;

  return 0;
}
Exemplo n.º 2
0
inline void
courantOnXY(real_t *cournox,
            real_t *cournoy,
            const int Hnx,
            const int Hnxyt,
            const int Hnvar, const int slices, const int Hstep, real_t c[Hstep][Hnxyt], real_t q[Hnvar][Hstep][Hnxyt],
	    real_t *tmpm1,
	    real_t *tmpm2
	    ) 
{
#ifdef WOMP
  int s, i;
  // real_t maxValC = zero;
  real_t tmp1 = *cournox, tmp2 = *cournoy;

#pragma omp parallel for shared(tmpm1, tmpm2) private(s,i) reduction(max:tmp1) reduction(max:tmp2)
  for (s = 0; s < slices; s++) {
    for (i = 0; i < Hnx; i++) {
      tmp1 = MAX(tmp1, c[s][i] + DABS(q[IU][s][i]));
      tmp2 = MAX(tmp2, c[s][i] + DABS(q[IV][s][i]));
    }
  }
  *cournox = tmp1;
  *cournoy = tmp2;
  { 
    int nops = (slices) * Hnx;
    FLOPS(2 * nops, 0 * nops, 2 * nops, 0 * nops);
  }
#else
  int i, s;
  real_t tmp1, tmp2;
  for (s = 0; s < slices; s++) {
    for (i = 0; i < Hnx; i++) {
      tmp1 = c[s][i] + DABS(q[IU][s][i]);
      tmp2 = c[s][i] + DABS(q[IV][s][i]);
      *cournox = MAX(*cournox, tmp1);
      *cournoy = MAX(*cournoy, tmp2);
    }
  }
  { 
    int nops = (slices) * Hnx;
    FLOPS(2 * nops, 0 * nops, 5 * nops, 0 * nops);
  }
#endif
#undef IHVW
}
Exemplo n.º 3
0
static void
courantOnXY (hydro_real_t *cournox,
	     hydro_real_t *cournoy,
	     const int Hnx,
	     const int Hnxyt,
	     const int Hnvar, const int slices, const int Hstep,
	     hydro_real_t *c, hydro_real_t *q)
{
  int i, s;
  // double maxValC = zero;
  hydro_real_t tmp1, tmp2;

  // #define IHVW(i,v) ((i) + (v) * nxyt)
  //     maxValC = c[0];
  //     for (i = 0; i < Hnx; i++) {
  //         maxValC = MAX(maxValC, c[i]);
  //     }
  //     for (i = 0; i < Hnx; i++) {
  //         *cournox = MAX(*cournox, maxValC + DABS(q[IU][i]));
  //         *cournoy = MAX(*cournoy, maxValC + DABS(q[IV][i]));
  //     }
  hydro_real_t _cournox = *cournox;
  hydro_real_t _cournoy = *cournoy;
#pragma acc kernels present(q[0:Hnvar*Hstep*Hnxyt],c[0:Hstep*Hnxyt])
  {
#pragma acc loop independent reduction(max:_cournox) reduction(max:_cournoy) gang(128)
  for (s = 0; s < slices; s++)
    {
#pragma acc loop independent reduction(max:_cournox) reduction(max:_cournoy) worker(64)
      for (i = 0; i < Hnx; i++)
	{
	  tmp1 = c[IDXE(s,i)] + DABS (q[IDX(IU,s,i)]);
	  tmp2 = c[IDXE(s,i)] + DABS (q[IDX(IV,s,i)]);
	  _cournox = MAX (_cournox, tmp1);
	  _cournoy = MAX (_cournoy, tmp2);
	}
    }
  }
  *cournox = _cournox;
  *cournoy = _cournoy;

#undef IHVW
}
Exemplo n.º 4
0
/** Complimentary error function: 2/sqrt(PI) * SUM[exp(-t^2)*dt]
  * Original code: SANDER: erfcfun.F90
  */
double Ewald::erfc_func(double xIn) {
  double erfc;
  double absx = DABS( xIn );
    
  if (xIn > 26.0)
    erfc = 0.0;
  else if (xIn < -5.5)
    erfc = 2.0;
  else if (absx <= 0.5) {
    double cval = xIn * xIn;
    double pval = ((-0.356098437018154E-1*cval+0.699638348861914E1)*cval + 0.219792616182942E2) *
                  cval + 0.242667955230532E3;
    double qval = ((cval+0.150827976304078E2)*cval+0.911649054045149E2)*cval + 0.215058875869861E3;
    double erf = xIn * pval/qval;
    erfc = 1.0 - erf;
  } else if (absx < 4.0) {
    double cval = absx;
    double pval=((((((-0.136864857382717E-6*cval+0.564195517478974)*cval+
                     0.721175825088309E1)*cval+0.431622272220567E2)*cval+
                   0.152989285046940E3)*cval+0.339320816734344E3)*cval+
                 0.451918953711873E3)*cval+0.300459261020162E3;
    double qval=((((((cval+0.127827273196294E2)*cval+0.770001529352295E2)*cval+
                    0.277585444743988E3)*cval+0.638980264465631E3)*cval+
                  0.931354094850610E3)*cval+0.790950925327898E3)*cval+
                0.300459260956983E3;
    double nonexperfc;
    if ( xIn > 0.0 )
      nonexperfc = pval/qval;
    else
      nonexperfc = 2.0*exp(xIn*xIn) - pval/qval;
    erfc = exp(-absx*absx)*nonexperfc;
  } else {
    double cval = 1.0/(xIn*xIn);
    double pval = (((0.223192459734185E-1*cval+0.278661308609648)*cval+
                    0.226956593539687)*cval+0.494730910623251E-1)*cval+
                  0.299610707703542E-2;
    double qval = (((cval+0.198733201817135E1)*cval+0.105167510706793E1)*cval+
                   0.191308926107830)*cval+0.106209230528468E-1;
    cval = (-cval*pval/qval + 0.564189583547756)/absx;
    double nonexperfc;
    if ( xIn > 0.0 )
      nonexperfc = cval;
    else
      nonexperfc = 2.0*exp(xIn*xIn) - cval;
    erfc = exp(-absx*absx)*nonexperfc;
  }
  return erfc;
}
Exemplo n.º 5
0
void
riemann(int narray,
        const double Hsmallr,
        const double Hsmallc,
        const double Hgamma,
        const int Hniter_riemann,
        const int Hnvar,
        const int Hnxyt,
        const int slices, const int Hstep,
	double *qleft,
	double *qright, double *qgdnv, int *sgnm) {
  //double qleft[Hnvar][Hstep][Hnxyt],
  //double qright[Hnvar][Hstep][Hnxyt], //
  //double qgdnv[Hnvar][Hstep][Hnxyt], //
  //int sgnm[Hstep][Hnxyt]) {
  // #define IHVW(i, v) ((i) + (v) * Hnxyt)
  int i, s;
  double smallp_ = Square(Hsmallc) / Hgamma;
  double gamma6_ = (Hgamma + one) / (two * Hgamma);
  double smallpp_ = Hsmallr * smallp_;

  // Pressure, density and velocity
#pragma acc parallel pcopy(qleft[0:Hnvar*Hstep*Hnxyt], qright[0:Hnvar*Hstep*Hnxyt]) pcopyout(qgdnv[0:Hnvar*Hstep*Hnxyt], sgnm[0:Hstep*Hnxyt])
#pragma acc loop gang
  for (s = 0; s < slices; s++) {
#pragma acc loop vector
    for (i = 0; i < narray; i++) {
      double smallp = smallp_;
      double gamma6 = gamma6_;
      double smallpp = smallpp_;
      double rl_i = MAX(qleft[IDX(ID,s,i)], Hsmallr);
      double ul_i = qleft[IDX(IU,s,i)];
      double pl_i = MAX(qleft[IDX(IP,s,i)], (double) (rl_i * smallp));
      double rr_i = MAX(qright[IDX(ID,s,i)], Hsmallr);
      double ur_i = qright[IDX(IU,s,i)];
      double pr_i = MAX(qright[IDX(IP,s,i)], (double) (rr_i * smallp));
      CFLOPS(2);

      // Lagrangian sound speed
      double cl_i = Hgamma * pl_i * rl_i;
      double cr_i = Hgamma * pr_i * rr_i;
      CFLOPS(4);
      // First guess

      double wl_i = sqrt(cl_i);
      double wr_i = sqrt(cr_i);
      double pstar_i = MAX(((wr_i * pl_i + wl_i * pr_i) + wl_i * wr_i * (ul_i - ur_i)) / (wl_i + wr_i), 0.0);
      CFLOPS(9);

      // Newton-Raphson iterations to find pstar at the required accuracy
      {
        int iter;
        int goon = 1;
        for (iter = 0; iter < Hniter_riemann; iter++) {
          if (goon) {
            double wwl, wwr;
            wwl = sqrt(cl_i * (one + gamma6 * (pstar_i - pl_i) / pl_i));
            wwr = sqrt(cr_i * (one + gamma6 * (pstar_i - pr_i) / pr_i));
            double ql = two * wwl * Square(wwl) / (Square(wwl) + cl_i);
            double qr = two * wwr * Square(wwr) / (Square(wwr) + cr_i);
            double usl = ul_i - (pstar_i - pl_i) / wwl;
            double usr = ur_i + (pstar_i - pr_i) / wwr;
            double delp_i = MAX((qr * ql / (qr + ql) * (usl - usr)), (-pstar_i));
            CFLOPS(38);

            // PRINTARRAY(delp, narray, "delp", H);
            pstar_i = pstar_i + delp_i;
            CFLOPS(1);

            // Convergence indicator
            double uo_i = DABS(delp_i / (pstar_i + smallpp));
            CFLOPS(2);

            goon = uo_i > PRECISION;
          }
        }                       // iter_riemann
      }

      if (wr_i) {               // Bug CUDA !!
        wr_i = sqrt(cr_i * (one + gamma6 * (pstar_i - pr_i) / pr_i));
        wl_i = sqrt(cl_i * (one + gamma6 * (pstar_i - pl_i) / pl_i));
        CFLOPS(10);
      }

      double ustar_i = half * (ul_i + (pl_i - pstar_i) / wl_i + ur_i - (pr_i - pstar_i) / wr_i);
      CFLOPS(8);

      int left = ustar_i > 0;
      double ro_i, uo_i, po_i, wo_i;

      if (left) {
        sgnm[IDXE(s,i)] = 1;
        ro_i = rl_i;
        uo_i = ul_i;
        po_i = pl_i;
        wo_i = wl_i;
      } else {
        sgnm[IDXE(s,i)] = -1;
        ro_i = rr_i;
        uo_i = ur_i;
        po_i = pr_i;
        wo_i = wr_i;
      }

      double co_i = sqrt(DABS(Hgamma * po_i / ro_i));
      co_i = MAX(Hsmallc, co_i);
      CFLOPS(2);

      double rstar_i = ro_i / (one + ro_i * (po_i - pstar_i) / Square(wo_i));
      rstar_i = MAX(rstar_i, Hsmallr);
      CFLOPS(6);

      double cstar_i = sqrt(DABS(Hgamma * pstar_i / rstar_i));
      cstar_i = MAX(Hsmallc, cstar_i);
      CFLOPS(2);

      double spout_i = co_i - sgnm[IDXE(s,i)] * uo_i;
      double spin_i = cstar_i - sgnm[IDXE(s,i)] * ustar_i;
      double ushock_i = wo_i / ro_i - sgnm[IDXE(s,i)] * uo_i;
      CFLOPS(7);

      if (pstar_i >= po_i) {
        spin_i = ushock_i;
        spout_i = ushock_i;
      }

      double scr_i = MAX((double) (spout_i - spin_i), (double) (Hsmallc + DABS(spout_i + spin_i)));
      CFLOPS(3);

      double frac_i = (one + (spout_i + spin_i) / scr_i) * half;
      frac_i = MAX(zero, (double) (MIN(one, frac_i)));
      CFLOPS(4);

      int addSpout = spout_i < zero;
      int addSpin = spin_i > zero;
      // double originalQgdnv = !addSpout & !addSpin;
      double qgdnv_ID, qgdnv_IU, qgdnv_IP;

      if (addSpout) {
        qgdnv_ID = ro_i;
        qgdnv_IU = uo_i;
        qgdnv_IP = po_i;
      } else if (addSpin) {
        qgdnv_ID = rstar_i;
        qgdnv_IU = ustar_i;
        qgdnv_IP = pstar_i;
      } else {
        qgdnv_ID = (frac_i * rstar_i + (one - frac_i) * ro_i);
        qgdnv_IU = (frac_i * ustar_i + (one - frac_i) * uo_i);
        qgdnv_IP = (frac_i * pstar_i + (one - frac_i) * po_i);
      }

      qgdnv[IDX(ID,s,i)] = qgdnv_ID;
      qgdnv[IDX(IU,s,i)] = qgdnv_IU;
      qgdnv[IDX(IP,s,i)] = qgdnv_IP;

      // transverse velocity
      if (left) {
        qgdnv[IDX(IV,s,i)] = qleft[IDX(IV,s,i)];
      } else {
        qgdnv[IDX(IV,s,i)] = qright[IDX(IV,s,i)];
      }
    }
  }

  // other passive variables
  if (Hnvar > IP) {
    int invar;
#pragma acc parallel pcopy(qleft[0:Hnvar*Hstep*Hnxyt], qright[0:Hnvar*Hstep*Hnxyt], sgnm[0:Hstep*Hnxyt]) pcopyout(qgdnv[0:Hnvar*Hstep*Hnxyt])
#pragma acc loop gang collapse(2)
    for (invar = IP + 1; invar < Hnvar; invar++) {
      for (s = 0; s < slices; s++) {
#pragma acc loop vector
        for (i = 0; i < narray; i++) {
          int left = (sgnm[IDXE(s,i)] == 1);
	qgdnv[IDX(invar,s,i)] = qleft[IDX(invar,s,i)] * left + qright[IDX(invar,s,i)] * !left;
        }
      }
    }
  }
}                               // riemann
Exemplo n.º 6
0
//
// compute gamma's and diGamma's including optional error checking
//
void computeGammas(struct stepStruct *step,
                   double *pi,
                   double **A,
                   double **B,
                   int N,
                   int T)
{
    int i,
        j,
        t;

    double denom;

#ifdef CHECK_GAMMAS
    double ftemp,
           ftemp2;
#endif // CHECK_GAMMAS

    // compute gamma's and diGamma's
    for(t = 0; t < T - 1; ++t)
    {
        denom = 0.0;
        for(i = 0; i < N; ++i)
        {
            for(j = 0; j < N; ++j)
            {
                denom += step[t].alpha[i] * A[i][j] * B[j][step[t + 1].obs] * step[t + 1].beta[j];
            }
        }

#ifdef CHECK_GAMMAS
        ftemp2 = 0.0;
#endif // CHECK_GAMMAS

        for(i = 0; i < N; ++i)
        {
            step[t].gamma[i] = 0.0;
            for(j = 0; j < N; ++j)
            {
                step[t].diGamma[i][j] = (step[t].alpha[i] * A[i][j] * B[j][step[t + 1].obs] * step[t + 1].beta[j])
                                        / denom;
                step[t].gamma[i] += step[t].diGamma[i][j];
            }

#ifdef CHECK_GAMMAS
            // verify that gamma[i] == alpha[i]*beta[i] / sum(alpha[j]*beta[j])
            ftemp2 += step[t].gamma[i];
            ftemp = 0.0;
            for(j = 0; j < N; ++j)
            {
                ftemp += step[t].alpha[j] * step[t].beta[j];
            }
            ftemp = (step[t].alpha[i] * step[t].beta[i]) / ftemp;
            if(DABS(ftemp - step[t].gamma[i]) > EPSILON)
            {
                printf("gamma[%d] = %f (%f) ", i, step[t].gamma[i], ftemp);
                printf("********** Error !!!\n");
            }
#endif // CHECK_GAMMAS

        }// next i

#ifdef CHECK_GAMMAS
        if(DABS(1.0 - ftemp2) > EPSILON)
        {
            printf("sum of gamma's = %f (should sum to 1.0)\n", ftemp2);
        }
#endif // CHECK_GAMMAS

    }// next t

}// end computeGammas
Exemplo n.º 7
0
void
slope (const int n,
       const int Hnvar,
       const int Hnxyt,
       const hydro_real_t Hslope_type,
       const int slices, const int Hstep, hydro_real_t *q, hydro_real_t *dq){
       //const int slices, const int Hstep, double* q[Hnvar][Hstep][Hnxyt], double* dq) {
  //int nbv, i, ijmin, ijmax, s;
  //double dlft, drgt, dcen, dsgn, slop, dlim;
  // long ihvwin, ihvwimn, ihvwipn;
  // #define IHVW(i, v) ((i) + (v) * Hnxyt)

  WHERE ("slope");
  //ijmin = 0;
  //ijmax = n;

  #pragma acc kernels present(q[0: Hnvar * Hstep * Hnxyt], dq[0:Hnvar * Hstep * Hnxyt])
  {

///    double dlft, drgt, dcen, dsgn, slop, dlim;
    int  ijmin, ijmax;
    ijmin = 0;
    ijmax = n;
    //#pragma hmppcg unroll i:4
#ifdef GRIDIFY
#ifndef GRIDIFY_TUNE_PHI
#pragma hmppcg gridify(nbv*s,i)
#else
#pragma hmppcg gridify(nbv*s,i), blocksize 512x1
#endif
#endif /* GRIDIFY */
#ifndef GRIDIFY
#pragma acc loop independent
#endif /* !GRIDIFY */
    for (int nbv = 0; nbv < Hnvar; nbv++)
    {
#ifndef GRIDIFY
#pragma acc loop independent
#endif /* !GRIDIFY */
      for (int s = 0; s < slices; s++)
      {
#ifndef GRIDIFY
#pragma acc loop independent
#endif /* !GRIDIFY */
        for (int i = ijmin + 1; i < ijmax - 1; i++)
        {
hydro_real_t dlft, drgt, dcen, dsgn, slop, dlim;
            dlft = Hslope_type * (q[IDX (nbv, s, i)]      - q[IDX (nbv, s, i - 1)]);
            drgt = Hslope_type * (q[IDX (nbv, s, i + 1)]  - q[IDX (nbv, s, i)]);
            dcen = half * (dlft + drgt) / Hslope_type;
            dsgn = (dcen > zero) ? one:-one;	// sign(one, dcen);
            slop = MIN(DABS(dlft), DABS(drgt));
            dlim = slop;
            if ((dlft * drgt) <= zero){
	            dlim = zero;
	          }
          
            dq[IDX(nbv, s, i)] = dsgn * MIN(dlim, DABS(dcen));

           #ifdef FLOPS
            flops += 8;
           #endif
        }
      }
    }
  }//kernels region
}				// slope