コード例 #1
0
//user function
inline void preproc_kernel(const double *u, double *du,
double *ax, double *bx, double *cx, double *ay, double *by, double *cy,
double *az, double *bz, double *cz, int *idx){

  double a, b, c, d;

  if(idx[0]==0 || idx[0]==nx-1 || idx[1]==0 || idx[1]==ny-1 || idx[2]==0 || idx[2]==nz-1) {
    d = 0.0f;
    a = 0.0f;
    b = 1.0f;
    c = 0.0f;
  } else {
    d = lambda*( u[OPS_ACC0(-1,0,0)] + u[OPS_ACC0(1,0,0)]
               + u[OPS_ACC0(0,-1,0)] + u[OPS_ACC0(0,1,0)]
               + u[OPS_ACC0(0,0,-1)] + u[OPS_ACC0(0,0,1)]
               - 6.0f*u[OPS_ACC0(0,0,0)]);
    a = -0.5f * lambda;
    b =  1.0f + lambda;
    c = -0.5f * lambda;

  }

  du[OPS_ACC1(0,0,0)] = d;
  ax[OPS_ACC2(0,0,0)] = a;
  bx[OPS_ACC3(0,0,0)] = b;
  cx[OPS_ACC4(0,0,0)] = c;
  ay[OPS_ACC5(0,0,0)] = a;
  by[OPS_ACC6(0,0,0)] = b;
  cy[OPS_ACC7(0,0,0)] = c;
  az[OPS_ACC8(0,0,0)] = a;
  bz[OPS_ACC9(0,0,0)] = b;
  cz[OPS_ACC10(0,0,0)] = c;
}
コード例 #2
0
// user function
inline void calc_dt_kernel(const double *celldx, const double *celldy,
                           const double *soundspeed, const double *viscosity,
                           const double *density0, const double *xvel0,
                           const double *xarea, const double *volume,
                           const double *yvel0, const double *yarea,
                           double *dt_min, const double *celldz,
                           const double *zvel0, const double *zarea) {

  double div, ds, dtut, dtvt, dtct, dtwt, dtdivt, cc, dv1, dv2, du1, du2, dw1,
      dw2;

  ds = MIN(MIN(celldx[OPS_ACC0(0, 0, 0)], celldy[OPS_ACC1(0, 0, 0)]),
           celldz[OPS_ACC11(0, 0, 0)]);
  ds = 1.0 / (ds * ds);

  cc = soundspeed[OPS_ACC2(0, 0, 0)] * soundspeed[OPS_ACC2(0, 0, 0)];
  cc = cc + 2.0 * viscosity[OPS_ACC3(0, 0, 0)] / density0[OPS_ACC4(0, 0, 0)];

  dtct = ds * cc;
  dtct = dtc_safe * 1.0 / MAX(sqrt(dtct), g_small);

  du1 = (xvel0[OPS_ACC5(0, 0, 0)] + xvel0[OPS_ACC5(0, 1, 0)] +
         xvel0[OPS_ACC5(0, 0, 1)] + xvel0[OPS_ACC5(0, 1, 1)]) *
        xarea[OPS_ACC6(0, 0, 0)];
  du2 = (xvel0[OPS_ACC5(1, 0, 0)] + xvel0[OPS_ACC5(1, 1, 0)] +
         xvel0[OPS_ACC5(1, 0, 1)] + xvel0[OPS_ACC5(1, 1, 1)]) *
        xarea[OPS_ACC6(0, 0, 0)];

  dtut = dtu_safe * 4.0 * volume[OPS_ACC7(0, 0, 0)] /
         MAX(MAX(fabs(du1), fabs(du2)), 1.0e-5 * volume[OPS_ACC7(0, 0, 0)]);

  dv1 = (yvel0[OPS_ACC8(0, 0, 0)] + yvel0[OPS_ACC8(1, 0, 0)] +
         yvel0[OPS_ACC8(0, 0, 1)] + yvel0[OPS_ACC8(1, 0, 1)]) *
        yarea[OPS_ACC9(0, 0, 0)];
  dv2 = (yvel0[OPS_ACC8(0, 1, 0)] + yvel0[OPS_ACC8(1, 1, 0)] +
         yvel0[OPS_ACC8(0, 1, 1)] + yvel0[OPS_ACC8(1, 1, 1)]) *
        yarea[OPS_ACC9(0, 0, 0)];

  dtvt = dtv_safe * 4.0 * volume[OPS_ACC7(0, 0, 0)] /
         MAX(MAX(fabs(dv1), fabs(dv2)), 1.0e-5 * volume[OPS_ACC7(0, 0, 0)]);

  dw1 = (zvel0[OPS_ACC12(0, 0, 0)] + zvel0[OPS_ACC12(0, 1, 0)] +
         zvel0[OPS_ACC12(1, 0, 0)] + zvel0[OPS_ACC12(1, 1, 0)]) *
        zarea[OPS_ACC13(0, 0, 0)];
  dw2 = (zvel0[OPS_ACC12(0, 0, 1)] + zvel0[OPS_ACC12(0, 1, 1)] +
         zvel0[OPS_ACC12(1, 0, 1)] + zvel0[OPS_ACC12(1, 1, 1)]) *
        zarea[OPS_ACC13(0, 0, 0)];

  dtwt = dtw_safe * 4.0 * volume[OPS_ACC7(0, 0, 0)] /
         MAX(MAX(fabs(dw1), fabs(dw2)), 1.0e-5 * volume[OPS_ACC7(0, 0, 0)]);

  div = du2 - du1 + dv2 - dv1 + dw2 - dw1;
  dtdivt = dtdiv_safe * 4.0 * (volume[OPS_ACC7(0, 0, 0)]) /
           MAX(volume[OPS_ACC7(0, 0, 0)] * 1.0e-05, fabs(div));

  dt_min[OPS_ACC10(0, 0, 0)] =
      MIN(MIN(MIN(dtct, dtut), MIN(dtvt, dtdivt)), dtwt);
}
コード例 #3
0
// user function
inline void
advec_cell_kernel4_zdir(double *density1, double *energy1,
                        const double *mass_flux_z, const double *vol_flux_z,
                        const double *pre_vol, const double *post_vol,
                        double *pre_mass, double *post_mass, double *advec_vol,
                        double *post_ener, const double *ener_flux) {

  pre_mass[OPS_ACC6(0, 0, 0)] =
      density1[OPS_ACC0(0, 0, 0)] * pre_vol[OPS_ACC4(0, 0, 0)];
  post_mass[OPS_ACC7(0, 0, 0)] = pre_mass[OPS_ACC6(0, 0, 0)] +
                                 mass_flux_z[OPS_ACC2(0, 0, 0)] -
                                 mass_flux_z[OPS_ACC2(0, 0, 1)];
  post_ener[OPS_ACC9(0, 0, 0)] =
      (energy1[OPS_ACC1(0, 0, 0)] * pre_mass[OPS_ACC6(0, 0, 0)] +
       ener_flux[OPS_ACC10(0, 0, 0)] - ener_flux[OPS_ACC10(0, 0, 1)]) /
      post_mass[OPS_ACC7(0, 0, 0)];
  advec_vol[OPS_ACC8(0, 0, 0)] = pre_vol[OPS_ACC4(0, 0, 0)] +
                                 vol_flux_z[OPS_ACC3(0, 0, 0)] -
                                 vol_flux_z[OPS_ACC3(0, 0, 1)];
  density1[OPS_ACC0(0, 0, 0)] =
      post_mass[OPS_ACC7(0, 0, 0)] / advec_vol[OPS_ACC8(0, 0, 0)];
  energy1[OPS_ACC1(0, 0, 0)] = post_ener[OPS_ACC9(0, 0, 0)];
}
コード例 #4
0
// user function
inline void calc_dt_kernel(const double *celldx, const double *celldy,
                           const double *soundspeed, const double *viscosity,
                           const double *density0, const double *xvel0,
                           const double *xarea, const double *volume,
                           const double *yvel0, const double *yarea,
                           double *dt_min) {

  double div, dsx, dsy, dtut, dtvt, dtct, dtdivt, cc, dv1, dv2;

  dsx = celldx[OPS_ACC0(0, 0)];
  dsy = celldy[OPS_ACC1(0, 0)];

  cc = soundspeed[OPS_ACC2(0, 0)] * soundspeed[OPS_ACC2(0, 0)];
  cc = cc + 2.0 * viscosity[OPS_ACC3(0, 0)] / density0[OPS_ACC4(0, 0)];
  cc = MAX(sqrt(cc), g_small);

  dtct = dtc_safe * MIN(dsx, dsy) / cc;

  div = 0.0;

  dv1 = (xvel0[OPS_ACC5(0, 0)] + xvel0[OPS_ACC5(0, 1)]) * xarea[OPS_ACC6(0, 0)];
  dv2 = (xvel0[OPS_ACC5(1, 0)] + xvel0[OPS_ACC5(1, 1)]) * xarea[OPS_ACC6(1, 0)];

  div = div + dv2 - dv1;

  dtut = dtu_safe * 2.0 * volume[OPS_ACC7(0, 0)] /
         MAX(MAX(fabs(dv1), fabs(dv2)), g_small * volume[OPS_ACC7(0, 0)]);

  dv1 = (yvel0[OPS_ACC8(0, 0)] + yvel0[OPS_ACC8(1, 0)]) * yarea[OPS_ACC9(0, 0)];
  dv2 = (yvel0[OPS_ACC8(0, 1)] + yvel0[OPS_ACC8(1, 1)]) * yarea[OPS_ACC9(0, 1)];

  div = div + dv2 - dv1;

  dtvt = dtv_safe * 2.0 * volume[OPS_ACC7(0, 0)] /
         MAX(MAX(fabs(dv1), fabs(dv2)), g_small * volume[OPS_ACC7(0, 0)]);

  div = div / (2.0 * volume[OPS_ACC7(0, 0)]);

  if (div < -g_small)
    dtdivt = dtdiv_safe * (-1.0 / div);
  else
    dtdivt = g_big;

  dt_min[OPS_ACC10(0, 0)] = MIN(MIN(dtct, dtut), MIN(dtvt, dtdivt));
}
コード例 #5
0
//user function
inline 
void generate_chunk_kernel( const double *vertexx,
                     const double *vertexy, const double *vertexz,
                     double *energy0, double *density0,
                     double *xvel0,  double *yvel0, double *zvel0,
                     const double *cellx, const double *celly, const double *cellz) {

  double radius, x_cent, y_cent, z_cent;


  energy0[OPS_ACC3(0,0,0)]= states[0].energy;
  density0[OPS_ACC4(0,0,0)]= states[0].density;
  xvel0[OPS_ACC5(0,0,0)]=states[0].xvel;
  yvel0[OPS_ACC6(0,0,0)]=states[0].yvel;
  zvel0[OPS_ACC7(0,0,0)]=states[0].zvel;

  for(int i = 1; i<number_of_states; i++) {

    x_cent=states[i].xmin;
    y_cent=states[i].ymin;
    z_cent=states[i].zmin;

    if (states[i].geometry == g_cube) {
      if(vertexx[OPS_ACC0(1,0,0)] >= states[i].xmin  && vertexx[OPS_ACC0(0,0,0)] < states[i].xmax) {
        if(vertexy[OPS_ACC1(0,1,0)] >= states[i].ymin && vertexy[OPS_ACC1(0,0,0)] < states[i].ymax) {
          if(vertexz[OPS_ACC2(0,0,1)] >= states[i].zmin && vertexz[OPS_ACC2(0,0,0)] < states[i].zmax) {

            energy0[OPS_ACC3(0,0,0)] = states[i].energy;
            density0[OPS_ACC4(0,0,0)] = states[i].density;

            for (int ix=0;ix<2;ix++){
              for (int iy=0;iy<2;iy++){
                for (int iz=0;iz<2;iz++){
                  xvel0[OPS_ACC5(ix,iy,iz)] = states[i].xvel;
                  yvel0[OPS_ACC6(ix,iy,iz)] = states[i].yvel;
                  zvel0[OPS_ACC7(ix,iy,iz)] = states[i].zvel;
                }
              }
            }
          }
        }
      }
    }
    else if(states[i].geometry == g_sphe) {
      radius = sqrt ((cellx[OPS_ACC8(0,0,0)] - x_cent) * (cellx[OPS_ACC8(0,0,0)] - x_cent) +
                     (celly[OPS_ACC9(0,0,0)] - y_cent) * (celly[OPS_ACC9(0,0,0)] - y_cent) +
                     (cellz[OPS_ACC10(0,0,0)] - z_cent) * (cellz[OPS_ACC10(0,0,0)] - z_cent));
      if(radius <= states[i].radius) {
        energy0[OPS_ACC3(0,0,0)] = states[i].energy;
        density0[OPS_ACC4(0,0,0)] = states[i].density;

        for (int ix=0;ix<2;ix++){
          for (int iy=0;iy<2;iy++){
            for (int iz=0;iz<2;iz++){
              xvel0[OPS_ACC5(ix,iy,iz)] = states[i].xvel;
              yvel0[OPS_ACC6(ix,iy,iz)] = states[i].yvel;
              zvel0[OPS_ACC7(ix,iy,iz)] = states[i].zvel;
            }
          }
        }
      }
    }
    else if(states[i].geometry == g_point) {
      if(vertexx[OPS_ACC0(0,0,0)] == x_cent && vertexy[OPS_ACC1(0,0,0)] == y_cent && vertexz[OPS_ACC2(0,0,0)] == z_cent) {
        energy0[OPS_ACC3(0,0,0)] = states[i].energy;
        density0[OPS_ACC4(0,0,0)] = states[i].density;

        for (int ix=0;ix<2;ix++){
          for (int iy=0;iy<2;iy++){
            for (int iz=0;iz<2;iz++){
              xvel0[OPS_ACC5(ix,iy,iz)] = states[i].xvel;
              yvel0[OPS_ACC6(ix,iy,iz)] = states[i].yvel;
              zvel0[OPS_ACC7(ix,iy,iz)] = states[i].zvel;
            }
          }
        }
      }
    }
  }
}
コード例 #6
0
                       zvel0[OPS_ACC13(0, 1, 1)] + zvel0[OPS_ACC13(1, 1, 1)])) *
                     0.125 * dt * 0.5;

        total_flux = right_flux - left_flux + top_flux - bottom_flux +
                     front_flux - back_flux;

        volume_change[OPS_ACC4(0, 0, 0)] =
            (volume[OPS_ACC5(0, 0, 0)]) /
            (volume[OPS_ACC5(0, 0, 0)] + total_flux);
        recip_volume = 1.0 / volume[OPS_ACC5(0, 0, 0)];
        energy_change =
            (pressure[OPS_ACC6(0, 0, 0)] / density0[OPS_ACC7(0, 0, 0)] +
             viscosity[OPS_ACC9(0, 0, 0)] / density0[OPS_ACC7(0, 0, 0)]) *
            total_flux * recip_volume;
        energy1[OPS_ACC11(0, 0, 0)] =
            energy0[OPS_ACC10(0, 0, 0)] - energy_change;
        density1[OPS_ACC8(0, 0, 0)] =
            density0[OPS_ACC7(0, 0, 0)] * volume_change[OPS_ACC4(0, 0, 0)];
      }
    }
  }
}
#undef OPS_ACC0
#undef OPS_ACC1
#undef OPS_ACC2
#undef OPS_ACC3
#undef OPS_ACC4
#undef OPS_ACC5
#undef OPS_ACC6
#undef OPS_ACC7
#undef OPS_ACC8
コード例 #7
0
    const double *restrict pre_vol, const double *restrict post_vol,
    double *restrict pre_mass, double *restrict post_mass,
    double *restrict advec_vol, double *restrict post_ener,
    const double *restrict ener_flux, int x_size, int y_size) {
#pragma omp parallel for
  for (int n_y = 0; n_y < y_size; n_y++) {
    for (int n_x = 0; n_x < x_size; n_x++) {

      pre_mass[OPS_ACC6(0, 0)] =
          density1[OPS_ACC0(0, 0)] * pre_vol[OPS_ACC4(0, 0)];
      post_mass[OPS_ACC7(0, 0)] = pre_mass[OPS_ACC6(0, 0)] +
                                  mass_flux_y[OPS_ACC2(0, 0)] -
                                  mass_flux_y[OPS_ACC2(0, 1)];
      post_ener[OPS_ACC9(0, 0)] =
          (energy1[OPS_ACC1(0, 0)] * pre_mass[OPS_ACC6(0, 0)] +
           ener_flux[OPS_ACC10(0, 0)] - ener_flux[OPS_ACC10(0, 1)]) /
          post_mass[OPS_ACC7(0, 0)];
      advec_vol[OPS_ACC8(0, 0)] = pre_vol[OPS_ACC4(0, 0)] +
                                  vol_flux_y[OPS_ACC3(0, 0)] -
                                  vol_flux_y[OPS_ACC3(0, 1)];
      density1[OPS_ACC0(0, 0)] =
          post_mass[OPS_ACC7(0, 0)] / advec_vol[OPS_ACC8(0, 0)];
      energy1[OPS_ACC1(0, 0)] = post_ener[OPS_ACC9(0, 0)];
    }
  }
}
#undef OPS_ACC0
#undef OPS_ACC1
#undef OPS_ACC2
#undef OPS_ACC3
#undef OPS_ACC4
コード例 #8
0
//user function
inline 
void viscosity_kernel( const double *xvel0, const double *yvel0,
                       const double *celldx, const double *celldy,
                       const double *pressure, const double *density0,
                       double *viscosity, const double *zvel0, const double *celldz, const double *xarea, const double *yarea, const double *zarea) {

  double grad2,
         pgradx,pgrady,pgradz,
         pgradx2,pgrady2,pgradz2,
         grad,
         ygrad, xgrad, zgrad,
         div,
         strain2,
         limiter,
         pgrad;

  double ugradx1=xvel0[OPS_ACC0(0,0,0)]+xvel0[OPS_ACC0(0,1,0)]+xvel0[OPS_ACC0(0,0,1)]+xvel0[OPS_ACC0(0,1,1)];
  double ugradx2=xvel0[OPS_ACC0(1,0,0)]+xvel0[OPS_ACC0(1,1,0)]+xvel0[OPS_ACC0(1,0,1)]+xvel0[OPS_ACC0(1,1,1)];
  double ugrady1=xvel0[OPS_ACC0(0,0,0)]+xvel0[OPS_ACC0(1,0,0)]+xvel0[OPS_ACC0(0,0,1)]+xvel0[OPS_ACC0(1,0,1)];
  double ugrady2=xvel0[OPS_ACC0(0,1,0)]+xvel0[OPS_ACC0(1,1,0)]+xvel0[OPS_ACC0(0,1,1)]+xvel0[OPS_ACC0(1,1,1)];
  double ugradz1=xvel0[OPS_ACC0(0,0,0)]+xvel0[OPS_ACC0(1,0,0)]+xvel0[OPS_ACC0(0,1,0)]+xvel0[OPS_ACC0(1,1,0)];
  double ugradz2=xvel0[OPS_ACC0(0,0,1)]+xvel0[OPS_ACC0(1,0,1)]+xvel0[OPS_ACC0(0,1,1)]+xvel0[OPS_ACC0(1,1,1)];

  double vgradx1=yvel0[OPS_ACC1(0,0,0)]+yvel0[OPS_ACC1(0,1,0)]+yvel0[OPS_ACC1(0,0,1)]+yvel0[OPS_ACC1(0,1,1)];
  double vgradx2=yvel0[OPS_ACC1(1,0,0)]+yvel0[OPS_ACC1(1,1,0)]+yvel0[OPS_ACC1(1,0,1)]+yvel0[OPS_ACC1(1,1,1)];
  double vgrady1=yvel0[OPS_ACC1(0,0,0)]+yvel0[OPS_ACC1(1,0,0)]+yvel0[OPS_ACC1(0,0,1)]+yvel0[OPS_ACC1(1,0,1)];
  double vgrady2=yvel0[OPS_ACC1(0,1,0)]+yvel0[OPS_ACC1(1,1,0)]+yvel0[OPS_ACC1(0,1,1)]+yvel0[OPS_ACC1(1,1,1)];
  double vgradz1=yvel0[OPS_ACC1(0,0,0)]+yvel0[OPS_ACC1(1,0,0)]+yvel0[OPS_ACC1(0,1,0)]+yvel0[OPS_ACC1(1,1,0)];
  double vgradz2=yvel0[OPS_ACC1(0,0,1)]+yvel0[OPS_ACC1(1,0,1)]+yvel0[OPS_ACC1(0,1,1)]+yvel0[OPS_ACC1(1,1,1)];

  double wgradx1=zvel0[OPS_ACC7(0,0,0)]+zvel0[OPS_ACC7(0,1,0)]+zvel0[OPS_ACC7(0,0,1)]+zvel0[OPS_ACC7(0,1,1)];
  double wgradx2=zvel0[OPS_ACC7(1,0,0)]+zvel0[OPS_ACC7(1,1,0)]+zvel0[OPS_ACC7(1,0,1)]+zvel0[OPS_ACC7(1,1,1)];
  double wgrady1=zvel0[OPS_ACC7(0,0,0)]+zvel0[OPS_ACC7(1,0,0)]+zvel0[OPS_ACC7(0,0,1)]+zvel0[OPS_ACC7(1,0,1)];
  double wgrady2=zvel0[OPS_ACC7(0,1,0)]+zvel0[OPS_ACC7(1,1,0)]+zvel0[OPS_ACC7(0,1,1)]+zvel0[OPS_ACC7(1,1,1)];
  double wgradz1=zvel0[OPS_ACC7(0,0,0)]+zvel0[OPS_ACC7(1,0,0)]+zvel0[OPS_ACC7(0,1,0)]+zvel0[OPS_ACC7(1,1,0)];
  double wgradz2=zvel0[OPS_ACC7(0,0,1)]+zvel0[OPS_ACC7(1,0,1)]+zvel0[OPS_ACC7(0,1,1)]+zvel0[OPS_ACC7(1,1,1)];

  div = xarea[OPS_ACC9(0,0,0)]*(ugradx2-ugradx1) + yarea[OPS_ACC10(0,0,0)]*(vgrady2-vgrady1) + zarea[OPS_ACC11(0,0,0)]*(wgradz2-wgradz1);

  double xx = 0.25*(ugradx2-ugradx1)/(celldx[OPS_ACC2(0,0,0)]);
  double yy = 0.25*(vgrady2-vgrady1)/(celldy[OPS_ACC3(0,0,0)]);
  double zz = 0.25*(wgradz2-wgradz1)/(celldz[OPS_ACC8(0,0,0)]);
  double xy = 0.25*(ugrady2-ugrady1)/(celldy[OPS_ACC3(0,0,0)])+0.25*(vgradx2-vgradx1)/(celldx[OPS_ACC2(0,0,0)]);
  double xz = 0.25*(ugradz2-ugradz1)/(celldz[OPS_ACC8(0,0,0)])+0.25*(wgradx2-wgradx1)/(celldx[OPS_ACC2(0,0,0)]);
  double yz = 0.25*(vgradz2-vgradz1)/(celldz[OPS_ACC8(0,0,0)])+0.25*(wgrady2-wgrady1)/(celldy[OPS_ACC3(0,0,0)]);


  pgradx = (pressure[OPS_ACC4(1,0,0)] - pressure[OPS_ACC4(-1,0,0)])/(celldx[OPS_ACC2(0,0,0)]+ celldx[OPS_ACC2(1,0,0)]);
  pgrady = (pressure[OPS_ACC4(0,1,0)] - pressure[OPS_ACC4(0,-1,0)])/(celldy[OPS_ACC3(0,0,0)]+ celldy[OPS_ACC3(0,1,0)]);
  pgradz = (pressure[OPS_ACC4(0,0,1)] - pressure[OPS_ACC4(0,0,-1)])/(celldz[OPS_ACC8(0,0,0)]+ celldz[OPS_ACC8(0,0,1)]);

  pgradx2 = pgradx * pgradx;
  pgrady2 = pgrady * pgrady;
  pgradz2 = pgradz * pgradz;
  limiter = (xx*pgradx2+yy*pgrady2+zz*pgradz2
          +  xy*pgradx*pgrady+xz*pgradx*pgradz+yz*pgrady*pgradz)
                / MAX(pgradx2+pgrady2+pgradz2,1.0e-16);

  if( (limiter > 0.0) || (div >= 0.0)) {
        viscosity[OPS_ACC6(0,0,0)] = 0.0;
  }
  else {
    pgradx = SIGN( MAX(1.0e-16, fabs(pgradx)), pgradx);
    pgrady = SIGN( MAX(1.0e-16, fabs(pgrady)), pgrady);
    pgradz = SIGN( MAX(1.0e-16, fabs(pgradz)), pgradz);
    pgrad = sqrt(pgradx*pgradx + pgrady*pgrady + pgradz*pgradz);
    xgrad = fabs(celldx[OPS_ACC2(0,0,0)] * pgrad/pgradx);
    ygrad = fabs(celldy[OPS_ACC3(0,0,0)] * pgrad/pgrady);
    zgrad = fabs(celldz[OPS_ACC8(0,0,0)] * pgrad/pgradz);
    grad  = MIN(xgrad,MIN(ygrad,zgrad));
    grad2 = grad*grad;

    viscosity[OPS_ACC6(0,0,0)] = 2.0 * (density0[OPS_ACC5(0,0,0)]) * grad2 * limiter * limiter;
  }
}
コード例 #9
0
            zvel0[OPS_ACC11(0, 0, 0)] -
            stepbymass[OPS_ACC2(0, 0, 0)] *
                (zarea[OPS_ACC13(0, 0, 0)] * (pressure[OPS_ACC6(0, 0, 0)] -
                                              pressure[OPS_ACC6(0, 0, -1)]) +
                 zarea[OPS_ACC13(0, -1, 0)] * (pressure[OPS_ACC6(0, -1, 0)] -
                                               pressure[OPS_ACC6(0, -1, -1)]) +
                 zarea[OPS_ACC13(-1, 0, 0)] * (pressure[OPS_ACC6(-1, 0, 0)] -
                                               pressure[OPS_ACC6(-1, 0, -1)]) +
                 zarea[OPS_ACC13(-1, -1, 0)] *
                     (pressure[OPS_ACC6(-1, -1, 0)] -
                      pressure[OPS_ACC6(-1, -1, -1)]));

        xvel1[OPS_ACC4(0, 0, 0)] =
            xvel1[OPS_ACC4(0, 0, 0)] -
            stepbymass[OPS_ACC2(0, 0, 0)] *
                (xarea[OPS_ACC5(0, 0, 0)] * (viscosity[OPS_ACC10(0, 0, 0)] -
                                             viscosity[OPS_ACC10(-1, 0, 0)]) +
                 xarea[OPS_ACC5(0, -1, 0)] * (viscosity[OPS_ACC10(0, -1, 0)] -
                                              viscosity[OPS_ACC10(-1, -1, 0)]) +
                 xarea[OPS_ACC5(0, 0, -1)] * (viscosity[OPS_ACC10(0, 0, -1)] -
                                              viscosity[OPS_ACC10(-1, 0, -1)]) +
                 xarea[OPS_ACC5(0, -1, -1)] *
                     (viscosity[OPS_ACC10(0, -1, -1)] -
                      viscosity[OPS_ACC10(-1, -1, -1)]));

        yvel1[OPS_ACC8(0, 0, 0)] =
            yvel1[OPS_ACC8(0, 0, 0)] -
            stepbymass[OPS_ACC2(0, 0, 0)] *
                (yarea[OPS_ACC9(0, 0, 0)] * (viscosity[OPS_ACC10(0, 0, 0)] -
                                             viscosity[OPS_ACC10(0, -1, 0)]) +
                 yarea[OPS_ACC9(-1, 0, 0)] * (viscosity[OPS_ACC10(-1, 0, 0)] -
コード例 #10
0
// host stub function
void ops_par_loop_accelerate_kernel_execute(ops_kernel_descriptor *desc) {
  ops_block block = desc->block;
  int dim = desc->dim;
  int *range = desc->range;
  ops_arg arg0 = desc->args[0];
  ops_arg arg1 = desc->args[1];
  ops_arg arg2 = desc->args[2];
  ops_arg arg3 = desc->args[3];
  ops_arg arg4 = desc->args[4];
  ops_arg arg5 = desc->args[5];
  ops_arg arg6 = desc->args[6];
  ops_arg arg7 = desc->args[7];
  ops_arg arg8 = desc->args[8];
  ops_arg arg9 = desc->args[9];
  ops_arg arg10 = desc->args[10];
  ops_arg arg11 = desc->args[11];
  ops_arg arg12 = desc->args[12];
  ops_arg arg13 = desc->args[13];

  // Timing
  double t1, t2, c1, c2;

  ops_arg args[14] = {arg0, arg1, arg2, arg3,  arg4,  arg5,  arg6,
                      arg7, arg8, arg9, arg10, arg11, arg12, arg13};

#ifdef CHECKPOINTING
  if (!ops_checkpointing_before(args, 14, range, 105))
    return;
#endif

  if (OPS_diags > 1) {
    OPS_kernels[105].count++;
    ops_timers_core(&c2, &t2);
  }

  // compute locally allocated range for the sub-block
  int start[3];
  int end[3];

  for (int n = 0; n < 3; n++) {
    start[n] = range[2 * n];
    end[n] = range[2 * n + 1];
  }

#ifdef OPS_DEBUG
  ops_register_args(args, "accelerate_kernel");
#endif

  // set up initial pointers and exchange halos if necessary
  int base0 = args[0].dat->base_offset;
  const double *__restrict__ density0 = (double *)(args[0].data + base0);

  int base1 = args[1].dat->base_offset;
  const double *__restrict__ volume = (double *)(args[1].data + base1);

  int base2 = args[2].dat->base_offset;
  double *__restrict__ stepbymass = (double *)(args[2].data + base2);

  int base3 = args[3].dat->base_offset;
  const double *__restrict__ xvel0 = (double *)(args[3].data + base3);

  int base4 = args[4].dat->base_offset;
  double *__restrict__ xvel1 = (double *)(args[4].data + base4);

  int base5 = args[5].dat->base_offset;
  const double *__restrict__ xarea = (double *)(args[5].data + base5);

  int base6 = args[6].dat->base_offset;
  const double *__restrict__ pressure = (double *)(args[6].data + base6);

  int base7 = args[7].dat->base_offset;
  const double *__restrict__ yvel0 = (double *)(args[7].data + base7);

  int base8 = args[8].dat->base_offset;
  double *__restrict__ yvel1 = (double *)(args[8].data + base8);

  int base9 = args[9].dat->base_offset;
  const double *__restrict__ yarea = (double *)(args[9].data + base9);

  int base10 = args[10].dat->base_offset;
  const double *__restrict__ viscosity = (double *)(args[10].data + base10);

  int base11 = args[11].dat->base_offset;
  const double *__restrict__ zvel0 = (double *)(args[11].data + base11);

  int base12 = args[12].dat->base_offset;
  double *__restrict__ zvel1 = (double *)(args[12].data + base12);

  int base13 = args[13].dat->base_offset;
  const double *__restrict__ zarea = (double *)(args[13].data + base13);

  // initialize global variable with the dimension of dats
  int xdim0_accelerate_kernel = args[0].dat->size[0];
  int ydim0_accelerate_kernel = args[0].dat->size[1];
  int xdim1_accelerate_kernel = args[1].dat->size[0];
  int ydim1_accelerate_kernel = args[1].dat->size[1];
  int xdim2_accelerate_kernel = args[2].dat->size[0];
  int ydim2_accelerate_kernel = args[2].dat->size[1];
  int xdim3_accelerate_kernel = args[3].dat->size[0];
  int ydim3_accelerate_kernel = args[3].dat->size[1];
  int xdim4_accelerate_kernel = args[4].dat->size[0];
  int ydim4_accelerate_kernel = args[4].dat->size[1];
  int xdim5_accelerate_kernel = args[5].dat->size[0];
  int ydim5_accelerate_kernel = args[5].dat->size[1];
  int xdim6_accelerate_kernel = args[6].dat->size[0];
  int ydim6_accelerate_kernel = args[6].dat->size[1];
  int xdim7_accelerate_kernel = args[7].dat->size[0];
  int ydim7_accelerate_kernel = args[7].dat->size[1];
  int xdim8_accelerate_kernel = args[8].dat->size[0];
  int ydim8_accelerate_kernel = args[8].dat->size[1];
  int xdim9_accelerate_kernel = args[9].dat->size[0];
  int ydim9_accelerate_kernel = args[9].dat->size[1];
  int xdim10_accelerate_kernel = args[10].dat->size[0];
  int ydim10_accelerate_kernel = args[10].dat->size[1];
  int xdim11_accelerate_kernel = args[11].dat->size[0];
  int ydim11_accelerate_kernel = args[11].dat->size[1];
  int xdim12_accelerate_kernel = args[12].dat->size[0];
  int ydim12_accelerate_kernel = args[12].dat->size[1];
  int xdim13_accelerate_kernel = args[13].dat->size[0];
  int ydim13_accelerate_kernel = args[13].dat->size[1];

  if (OPS_diags > 1) {
    ops_timers_core(&c1, &t1);
    OPS_kernels[105].mpi_time += t1 - t2;
  }

#pragma omp parallel for collapse(2)
  for (int n_z = start[2]; n_z < end[2]; n_z++) {
    for (int n_y = start[1]; n_y < end[1]; n_y++) {
#ifdef intel
#pragma loop_count(10000)
#pragma omp simd aligned(density0, volume, stepbymass, xvel0, xvel1, xarea,    \
                         pressure, yvel0, yvel1, yarea, viscosity, zvel0,      \
                         zvel1, zarea)
#else
#pragma simd
#endif
      for (int n_x = start[0]; n_x < end[0]; n_x++) {

        double nodal_mass = 0.0;
        nodal_mass =
            (density0[OPS_ACC0(-1, -1, 0)] * volume[OPS_ACC1(-1, -1, 0)] +
             density0[OPS_ACC0(0, -1, 0)] * volume[OPS_ACC1(0, -1, 0)] +
             density0[OPS_ACC0(0, 0, 0)] * volume[OPS_ACC1(0, 0, 0)] +
             density0[OPS_ACC0(-1, 0, 0)] * volume[OPS_ACC1(-1, 0, 0)] +
             density0[OPS_ACC0(-1, -1, -1)] * volume[OPS_ACC1(-1, -1, -1)] +
             density0[OPS_ACC0(0, -1, -1)] * volume[OPS_ACC1(0, -1, -1)] +
             density0[OPS_ACC0(0, 0, -1)] * volume[OPS_ACC1(0, 0, -1)] +
             density0[OPS_ACC0(-1, 0, -1)] * volume[OPS_ACC1(-1, 0, -1)]) *
            0.125;

        stepbymass[OPS_ACC2(0, 0, 0)] = 0.25 * dt / nodal_mass;

        xvel1[OPS_ACC4(0, 0, 0)] =
            xvel0[OPS_ACC3(0, 0, 0)] -
            stepbymass[OPS_ACC2(0, 0, 0)] *
                (xarea[OPS_ACC5(0, 0, 0)] * (pressure[OPS_ACC6(0, 0, 0)] -
                                             pressure[OPS_ACC6(-1, 0, 0)]) +
                 xarea[OPS_ACC5(0, -1, 0)] * (pressure[OPS_ACC6(0, -1, 0)] -
                                              pressure[OPS_ACC6(-1, -1, 0)]) +
                 xarea[OPS_ACC5(0, 0, -1)] * (pressure[OPS_ACC6(0, 0, -1)] -
                                              pressure[OPS_ACC6(-1, 0, -1)]) +
                 xarea[OPS_ACC5(0, -1, -1)] * (pressure[OPS_ACC6(0, -1, -1)] -
                                               pressure[OPS_ACC6(-1, -1, -1)]));

        yvel1[OPS_ACC8(0, 0, 0)] =
            yvel0[OPS_ACC7(0, 0, 0)] -
            stepbymass[OPS_ACC2(0, 0, 0)] *
                (yarea[OPS_ACC9(0, 0, 0)] * (pressure[OPS_ACC6(0, 0, 0)] -
                                             pressure[OPS_ACC6(0, -1, 0)]) +
                 yarea[OPS_ACC9(-1, 0, 0)] * (pressure[OPS_ACC6(-1, 0, 0)] -
                                              pressure[OPS_ACC6(-1, -1, 0)]) +
                 yarea[OPS_ACC9(0, 0, -1)] * (pressure[OPS_ACC6(0, 0, -1)] -
                                              pressure[OPS_ACC6(0, -1, -1)]) +
                 yarea[OPS_ACC9(-1, 0, -1)] * (pressure[OPS_ACC6(-1, 0, -1)] -
                                               pressure[OPS_ACC6(-1, -1, -1)]));

        zvel1[OPS_ACC12(0, 0, 0)] =
            zvel0[OPS_ACC11(0, 0, 0)] -
            stepbymass[OPS_ACC2(0, 0, 0)] *
                (zarea[OPS_ACC13(0, 0, 0)] * (pressure[OPS_ACC6(0, 0, 0)] -
                                              pressure[OPS_ACC6(0, 0, -1)]) +
                 zarea[OPS_ACC13(0, -1, 0)] * (pressure[OPS_ACC6(0, -1, 0)] -
                                               pressure[OPS_ACC6(0, -1, -1)]) +
                 zarea[OPS_ACC13(-1, 0, 0)] * (pressure[OPS_ACC6(-1, 0, 0)] -
                                               pressure[OPS_ACC6(-1, 0, -1)]) +
                 zarea[OPS_ACC13(-1, -1, 0)] *
                     (pressure[OPS_ACC6(-1, -1, 0)] -
                      pressure[OPS_ACC6(-1, -1, -1)]));

        xvel1[OPS_ACC4(0, 0, 0)] =
            xvel1[OPS_ACC4(0, 0, 0)] -
            stepbymass[OPS_ACC2(0, 0, 0)] *
                (xarea[OPS_ACC5(0, 0, 0)] * (viscosity[OPS_ACC10(0, 0, 0)] -
                                             viscosity[OPS_ACC10(-1, 0, 0)]) +
                 xarea[OPS_ACC5(0, -1, 0)] * (viscosity[OPS_ACC10(0, -1, 0)] -
                                              viscosity[OPS_ACC10(-1, -1, 0)]) +
                 xarea[OPS_ACC5(0, 0, -1)] * (viscosity[OPS_ACC10(0, 0, -1)] -
                                              viscosity[OPS_ACC10(-1, 0, -1)]) +
                 xarea[OPS_ACC5(0, -1, -1)] *
                     (viscosity[OPS_ACC10(0, -1, -1)] -
                      viscosity[OPS_ACC10(-1, -1, -1)]));

        yvel1[OPS_ACC8(0, 0, 0)] =
            yvel1[OPS_ACC8(0, 0, 0)] -
            stepbymass[OPS_ACC2(0, 0, 0)] *
                (yarea[OPS_ACC9(0, 0, 0)] * (viscosity[OPS_ACC10(0, 0, 0)] -
                                             viscosity[OPS_ACC10(0, -1, 0)]) +
                 yarea[OPS_ACC9(-1, 0, 0)] * (viscosity[OPS_ACC10(-1, 0, 0)] -
                                              viscosity[OPS_ACC10(-1, -1, 0)]) +
                 yarea[OPS_ACC9(0, 0, -1)] * (viscosity[OPS_ACC10(0, 0, -1)] -
                                              viscosity[OPS_ACC10(0, -1, -1)]) +
                 yarea[OPS_ACC9(-1, 0, -1)] *
                     (viscosity[OPS_ACC10(-1, 0, -1)] -
                      viscosity[OPS_ACC10(-1, -1, -1)]));

        zvel1[OPS_ACC12(0, 0, 0)] =
            zvel1[OPS_ACC12(0, 0, 0)] -
            stepbymass[OPS_ACC2(0, 0, 0)] *
                (zarea[OPS_ACC13(0, 0, 0)] * (viscosity[OPS_ACC10(0, 0, 0)] -
                                              viscosity[OPS_ACC10(0, 0, -1)]) +
                 zarea[OPS_ACC13(0, -1, 0)] *
                     (viscosity[OPS_ACC10(0, -1, 0)] -
                      viscosity[OPS_ACC10(0, -1, -1)]) +
                 zarea[OPS_ACC13(-1, 0, 0)] *
                     (viscosity[OPS_ACC10(-1, 0, 0)] -
                      viscosity[OPS_ACC10(-1, 0, -1)]) +
                 zarea[OPS_ACC13(-1, -1, 0)] *
                     (viscosity[OPS_ACC10(-1, -1, 0)] -
                      viscosity[OPS_ACC10(-1, -1, -1)]));
      }
    }
  }
  if (OPS_diags > 1) {
    ops_timers_core(&c2, &t2);
    OPS_kernels[105].time += t2 - t1;
  }

  if (OPS_diags > 1) {
    // Update kernel record
    ops_timers_core(&c1, &t1);
    OPS_kernels[105].mpi_time += t1 - t2;
    OPS_kernels[105].transfer += ops_compute_transfer(dim, start, end, &arg0);
    OPS_kernels[105].transfer += ops_compute_transfer(dim, start, end, &arg1);
    OPS_kernels[105].transfer += ops_compute_transfer(dim, start, end, &arg2);
    OPS_kernels[105].transfer += ops_compute_transfer(dim, start, end, &arg3);
    OPS_kernels[105].transfer += ops_compute_transfer(dim, start, end, &arg4);
    OPS_kernels[105].transfer += ops_compute_transfer(dim, start, end, &arg5);
    OPS_kernels[105].transfer += ops_compute_transfer(dim, start, end, &arg6);
    OPS_kernels[105].transfer += ops_compute_transfer(dim, start, end, &arg7);
    OPS_kernels[105].transfer += ops_compute_transfer(dim, start, end, &arg8);
    OPS_kernels[105].transfer += ops_compute_transfer(dim, start, end, &arg9);
    OPS_kernels[105].transfer += ops_compute_transfer(dim, start, end, &arg10);
    OPS_kernels[105].transfer += ops_compute_transfer(dim, start, end, &arg11);
    OPS_kernels[105].transfer += ops_compute_transfer(dim, start, end, &arg12);
    OPS_kernels[105].transfer += ops_compute_transfer(dim, start, end, &arg13);
  }
}