//user function inline void preproc_kernel(const double *u, double *du, double *ax, double *bx, double *cx, double *ay, double *by, double *cy, double *az, double *bz, double *cz, int *idx){ double a, b, c, d; if(idx[0]==0 || idx[0]==nx-1 || idx[1]==0 || idx[1]==ny-1 || idx[2]==0 || idx[2]==nz-1) { d = 0.0f; a = 0.0f; b = 1.0f; c = 0.0f; } else { d = lambda*( u[OPS_ACC0(-1,0,0)] + u[OPS_ACC0(1,0,0)] + u[OPS_ACC0(0,-1,0)] + u[OPS_ACC0(0,1,0)] + u[OPS_ACC0(0,0,-1)] + u[OPS_ACC0(0,0,1)] - 6.0f*u[OPS_ACC0(0,0,0)]); a = -0.5f * lambda; b = 1.0f + lambda; c = -0.5f * lambda; } du[OPS_ACC1(0,0,0)] = d; ax[OPS_ACC2(0,0,0)] = a; bx[OPS_ACC3(0,0,0)] = b; cx[OPS_ACC4(0,0,0)] = c; ay[OPS_ACC5(0,0,0)] = a; by[OPS_ACC6(0,0,0)] = b; cy[OPS_ACC7(0,0,0)] = c; az[OPS_ACC8(0,0,0)] = a; bz[OPS_ACC9(0,0,0)] = b; cz[OPS_ACC10(0,0,0)] = c; }
// user function inline void calc_dt_kernel(const double *celldx, const double *celldy, const double *soundspeed, const double *viscosity, const double *density0, const double *xvel0, const double *xarea, const double *volume, const double *yvel0, const double *yarea, double *dt_min, const double *celldz, const double *zvel0, const double *zarea) { double div, ds, dtut, dtvt, dtct, dtwt, dtdivt, cc, dv1, dv2, du1, du2, dw1, dw2; ds = MIN(MIN(celldx[OPS_ACC0(0, 0, 0)], celldy[OPS_ACC1(0, 0, 0)]), celldz[OPS_ACC11(0, 0, 0)]); ds = 1.0 / (ds * ds); cc = soundspeed[OPS_ACC2(0, 0, 0)] * soundspeed[OPS_ACC2(0, 0, 0)]; cc = cc + 2.0 * viscosity[OPS_ACC3(0, 0, 0)] / density0[OPS_ACC4(0, 0, 0)]; dtct = ds * cc; dtct = dtc_safe * 1.0 / MAX(sqrt(dtct), g_small); du1 = (xvel0[OPS_ACC5(0, 0, 0)] + xvel0[OPS_ACC5(0, 1, 0)] + xvel0[OPS_ACC5(0, 0, 1)] + xvel0[OPS_ACC5(0, 1, 1)]) * xarea[OPS_ACC6(0, 0, 0)]; du2 = (xvel0[OPS_ACC5(1, 0, 0)] + xvel0[OPS_ACC5(1, 1, 0)] + xvel0[OPS_ACC5(1, 0, 1)] + xvel0[OPS_ACC5(1, 1, 1)]) * xarea[OPS_ACC6(0, 0, 0)]; dtut = dtu_safe * 4.0 * volume[OPS_ACC7(0, 0, 0)] / MAX(MAX(fabs(du1), fabs(du2)), 1.0e-5 * volume[OPS_ACC7(0, 0, 0)]); dv1 = (yvel0[OPS_ACC8(0, 0, 0)] + yvel0[OPS_ACC8(1, 0, 0)] + yvel0[OPS_ACC8(0, 0, 1)] + yvel0[OPS_ACC8(1, 0, 1)]) * yarea[OPS_ACC9(0, 0, 0)]; dv2 = (yvel0[OPS_ACC8(0, 1, 0)] + yvel0[OPS_ACC8(1, 1, 0)] + yvel0[OPS_ACC8(0, 1, 1)] + yvel0[OPS_ACC8(1, 1, 1)]) * yarea[OPS_ACC9(0, 0, 0)]; dtvt = dtv_safe * 4.0 * volume[OPS_ACC7(0, 0, 0)] / MAX(MAX(fabs(dv1), fabs(dv2)), 1.0e-5 * volume[OPS_ACC7(0, 0, 0)]); dw1 = (zvel0[OPS_ACC12(0, 0, 0)] + zvel0[OPS_ACC12(0, 1, 0)] + zvel0[OPS_ACC12(1, 0, 0)] + zvel0[OPS_ACC12(1, 1, 0)]) * zarea[OPS_ACC13(0, 0, 0)]; dw2 = (zvel0[OPS_ACC12(0, 0, 1)] + zvel0[OPS_ACC12(0, 1, 1)] + zvel0[OPS_ACC12(1, 0, 1)] + zvel0[OPS_ACC12(1, 1, 1)]) * zarea[OPS_ACC13(0, 0, 0)]; dtwt = dtw_safe * 4.0 * volume[OPS_ACC7(0, 0, 0)] / MAX(MAX(fabs(dw1), fabs(dw2)), 1.0e-5 * volume[OPS_ACC7(0, 0, 0)]); div = du2 - du1 + dv2 - dv1 + dw2 - dw1; dtdivt = dtdiv_safe * 4.0 * (volume[OPS_ACC7(0, 0, 0)]) / MAX(volume[OPS_ACC7(0, 0, 0)] * 1.0e-05, fabs(div)); dt_min[OPS_ACC10(0, 0, 0)] = MIN(MIN(MIN(dtct, dtut), MIN(dtvt, dtdivt)), dtwt); }
// user function inline void advec_cell_kernel4_zdir(double *density1, double *energy1, const double *mass_flux_z, const double *vol_flux_z, const double *pre_vol, const double *post_vol, double *pre_mass, double *post_mass, double *advec_vol, double *post_ener, const double *ener_flux) { pre_mass[OPS_ACC6(0, 0, 0)] = density1[OPS_ACC0(0, 0, 0)] * pre_vol[OPS_ACC4(0, 0, 0)]; post_mass[OPS_ACC7(0, 0, 0)] = pre_mass[OPS_ACC6(0, 0, 0)] + mass_flux_z[OPS_ACC2(0, 0, 0)] - mass_flux_z[OPS_ACC2(0, 0, 1)]; post_ener[OPS_ACC9(0, 0, 0)] = (energy1[OPS_ACC1(0, 0, 0)] * pre_mass[OPS_ACC6(0, 0, 0)] + ener_flux[OPS_ACC10(0, 0, 0)] - ener_flux[OPS_ACC10(0, 0, 1)]) / post_mass[OPS_ACC7(0, 0, 0)]; advec_vol[OPS_ACC8(0, 0, 0)] = pre_vol[OPS_ACC4(0, 0, 0)] + vol_flux_z[OPS_ACC3(0, 0, 0)] - vol_flux_z[OPS_ACC3(0, 0, 1)]; density1[OPS_ACC0(0, 0, 0)] = post_mass[OPS_ACC7(0, 0, 0)] / advec_vol[OPS_ACC8(0, 0, 0)]; energy1[OPS_ACC1(0, 0, 0)] = post_ener[OPS_ACC9(0, 0, 0)]; }
// user function inline void calc_dt_kernel(const double *celldx, const double *celldy, const double *soundspeed, const double *viscosity, const double *density0, const double *xvel0, const double *xarea, const double *volume, const double *yvel0, const double *yarea, double *dt_min) { double div, dsx, dsy, dtut, dtvt, dtct, dtdivt, cc, dv1, dv2; dsx = celldx[OPS_ACC0(0, 0)]; dsy = celldy[OPS_ACC1(0, 0)]; cc = soundspeed[OPS_ACC2(0, 0)] * soundspeed[OPS_ACC2(0, 0)]; cc = cc + 2.0 * viscosity[OPS_ACC3(0, 0)] / density0[OPS_ACC4(0, 0)]; cc = MAX(sqrt(cc), g_small); dtct = dtc_safe * MIN(dsx, dsy) / cc; div = 0.0; dv1 = (xvel0[OPS_ACC5(0, 0)] + xvel0[OPS_ACC5(0, 1)]) * xarea[OPS_ACC6(0, 0)]; dv2 = (xvel0[OPS_ACC5(1, 0)] + xvel0[OPS_ACC5(1, 1)]) * xarea[OPS_ACC6(1, 0)]; div = div + dv2 - dv1; dtut = dtu_safe * 2.0 * volume[OPS_ACC7(0, 0)] / MAX(MAX(fabs(dv1), fabs(dv2)), g_small * volume[OPS_ACC7(0, 0)]); dv1 = (yvel0[OPS_ACC8(0, 0)] + yvel0[OPS_ACC8(1, 0)]) * yarea[OPS_ACC9(0, 0)]; dv2 = (yvel0[OPS_ACC8(0, 1)] + yvel0[OPS_ACC8(1, 1)]) * yarea[OPS_ACC9(0, 1)]; div = div + dv2 - dv1; dtvt = dtv_safe * 2.0 * volume[OPS_ACC7(0, 0)] / MAX(MAX(fabs(dv1), fabs(dv2)), g_small * volume[OPS_ACC7(0, 0)]); div = div / (2.0 * volume[OPS_ACC7(0, 0)]); if (div < -g_small) dtdivt = dtdiv_safe * (-1.0 / div); else dtdivt = g_big; dt_min[OPS_ACC10(0, 0)] = MIN(MIN(dtct, dtut), MIN(dtvt, dtdivt)); }
//user function inline void generate_chunk_kernel( const double *vertexx, const double *vertexy, const double *vertexz, double *energy0, double *density0, double *xvel0, double *yvel0, double *zvel0, const double *cellx, const double *celly, const double *cellz) { double radius, x_cent, y_cent, z_cent; energy0[OPS_ACC3(0,0,0)]= states[0].energy; density0[OPS_ACC4(0,0,0)]= states[0].density; xvel0[OPS_ACC5(0,0,0)]=states[0].xvel; yvel0[OPS_ACC6(0,0,0)]=states[0].yvel; zvel0[OPS_ACC7(0,0,0)]=states[0].zvel; for(int i = 1; i<number_of_states; i++) { x_cent=states[i].xmin; y_cent=states[i].ymin; z_cent=states[i].zmin; if (states[i].geometry == g_cube) { if(vertexx[OPS_ACC0(1,0,0)] >= states[i].xmin && vertexx[OPS_ACC0(0,0,0)] < states[i].xmax) { if(vertexy[OPS_ACC1(0,1,0)] >= states[i].ymin && vertexy[OPS_ACC1(0,0,0)] < states[i].ymax) { if(vertexz[OPS_ACC2(0,0,1)] >= states[i].zmin && vertexz[OPS_ACC2(0,0,0)] < states[i].zmax) { energy0[OPS_ACC3(0,0,0)] = states[i].energy; density0[OPS_ACC4(0,0,0)] = states[i].density; for (int ix=0;ix<2;ix++){ for (int iy=0;iy<2;iy++){ for (int iz=0;iz<2;iz++){ xvel0[OPS_ACC5(ix,iy,iz)] = states[i].xvel; yvel0[OPS_ACC6(ix,iy,iz)] = states[i].yvel; zvel0[OPS_ACC7(ix,iy,iz)] = states[i].zvel; } } } } } } } else if(states[i].geometry == g_sphe) { radius = sqrt ((cellx[OPS_ACC8(0,0,0)] - x_cent) * (cellx[OPS_ACC8(0,0,0)] - x_cent) + (celly[OPS_ACC9(0,0,0)] - y_cent) * (celly[OPS_ACC9(0,0,0)] - y_cent) + (cellz[OPS_ACC10(0,0,0)] - z_cent) * (cellz[OPS_ACC10(0,0,0)] - z_cent)); if(radius <= states[i].radius) { energy0[OPS_ACC3(0,0,0)] = states[i].energy; density0[OPS_ACC4(0,0,0)] = states[i].density; for (int ix=0;ix<2;ix++){ for (int iy=0;iy<2;iy++){ for (int iz=0;iz<2;iz++){ xvel0[OPS_ACC5(ix,iy,iz)] = states[i].xvel; yvel0[OPS_ACC6(ix,iy,iz)] = states[i].yvel; zvel0[OPS_ACC7(ix,iy,iz)] = states[i].zvel; } } } } } else if(states[i].geometry == g_point) { if(vertexx[OPS_ACC0(0,0,0)] == x_cent && vertexy[OPS_ACC1(0,0,0)] == y_cent && vertexz[OPS_ACC2(0,0,0)] == z_cent) { energy0[OPS_ACC3(0,0,0)] = states[i].energy; density0[OPS_ACC4(0,0,0)] = states[i].density; for (int ix=0;ix<2;ix++){ for (int iy=0;iy<2;iy++){ for (int iz=0;iz<2;iz++){ xvel0[OPS_ACC5(ix,iy,iz)] = states[i].xvel; yvel0[OPS_ACC6(ix,iy,iz)] = states[i].yvel; zvel0[OPS_ACC7(ix,iy,iz)] = states[i].zvel; } } } } } } }
zvel0[OPS_ACC13(0, 1, 1)] + zvel0[OPS_ACC13(1, 1, 1)])) * 0.125 * dt * 0.5; total_flux = right_flux - left_flux + top_flux - bottom_flux + front_flux - back_flux; volume_change[OPS_ACC4(0, 0, 0)] = (volume[OPS_ACC5(0, 0, 0)]) / (volume[OPS_ACC5(0, 0, 0)] + total_flux); recip_volume = 1.0 / volume[OPS_ACC5(0, 0, 0)]; energy_change = (pressure[OPS_ACC6(0, 0, 0)] / density0[OPS_ACC7(0, 0, 0)] + viscosity[OPS_ACC9(0, 0, 0)] / density0[OPS_ACC7(0, 0, 0)]) * total_flux * recip_volume; energy1[OPS_ACC11(0, 0, 0)] = energy0[OPS_ACC10(0, 0, 0)] - energy_change; density1[OPS_ACC8(0, 0, 0)] = density0[OPS_ACC7(0, 0, 0)] * volume_change[OPS_ACC4(0, 0, 0)]; } } } } #undef OPS_ACC0 #undef OPS_ACC1 #undef OPS_ACC2 #undef OPS_ACC3 #undef OPS_ACC4 #undef OPS_ACC5 #undef OPS_ACC6 #undef OPS_ACC7 #undef OPS_ACC8
const double *restrict pre_vol, const double *restrict post_vol, double *restrict pre_mass, double *restrict post_mass, double *restrict advec_vol, double *restrict post_ener, const double *restrict ener_flux, int x_size, int y_size) { #pragma omp parallel for for (int n_y = 0; n_y < y_size; n_y++) { for (int n_x = 0; n_x < x_size; n_x++) { pre_mass[OPS_ACC6(0, 0)] = density1[OPS_ACC0(0, 0)] * pre_vol[OPS_ACC4(0, 0)]; post_mass[OPS_ACC7(0, 0)] = pre_mass[OPS_ACC6(0, 0)] + mass_flux_y[OPS_ACC2(0, 0)] - mass_flux_y[OPS_ACC2(0, 1)]; post_ener[OPS_ACC9(0, 0)] = (energy1[OPS_ACC1(0, 0)] * pre_mass[OPS_ACC6(0, 0)] + ener_flux[OPS_ACC10(0, 0)] - ener_flux[OPS_ACC10(0, 1)]) / post_mass[OPS_ACC7(0, 0)]; advec_vol[OPS_ACC8(0, 0)] = pre_vol[OPS_ACC4(0, 0)] + vol_flux_y[OPS_ACC3(0, 0)] - vol_flux_y[OPS_ACC3(0, 1)]; density1[OPS_ACC0(0, 0)] = post_mass[OPS_ACC7(0, 0)] / advec_vol[OPS_ACC8(0, 0)]; energy1[OPS_ACC1(0, 0)] = post_ener[OPS_ACC9(0, 0)]; } } } #undef OPS_ACC0 #undef OPS_ACC1 #undef OPS_ACC2 #undef OPS_ACC3 #undef OPS_ACC4
//user function inline void viscosity_kernel( const double *xvel0, const double *yvel0, const double *celldx, const double *celldy, const double *pressure, const double *density0, double *viscosity, const double *zvel0, const double *celldz, const double *xarea, const double *yarea, const double *zarea) { double grad2, pgradx,pgrady,pgradz, pgradx2,pgrady2,pgradz2, grad, ygrad, xgrad, zgrad, div, strain2, limiter, pgrad; double ugradx1=xvel0[OPS_ACC0(0,0,0)]+xvel0[OPS_ACC0(0,1,0)]+xvel0[OPS_ACC0(0,0,1)]+xvel0[OPS_ACC0(0,1,1)]; double ugradx2=xvel0[OPS_ACC0(1,0,0)]+xvel0[OPS_ACC0(1,1,0)]+xvel0[OPS_ACC0(1,0,1)]+xvel0[OPS_ACC0(1,1,1)]; double ugrady1=xvel0[OPS_ACC0(0,0,0)]+xvel0[OPS_ACC0(1,0,0)]+xvel0[OPS_ACC0(0,0,1)]+xvel0[OPS_ACC0(1,0,1)]; double ugrady2=xvel0[OPS_ACC0(0,1,0)]+xvel0[OPS_ACC0(1,1,0)]+xvel0[OPS_ACC0(0,1,1)]+xvel0[OPS_ACC0(1,1,1)]; double ugradz1=xvel0[OPS_ACC0(0,0,0)]+xvel0[OPS_ACC0(1,0,0)]+xvel0[OPS_ACC0(0,1,0)]+xvel0[OPS_ACC0(1,1,0)]; double ugradz2=xvel0[OPS_ACC0(0,0,1)]+xvel0[OPS_ACC0(1,0,1)]+xvel0[OPS_ACC0(0,1,1)]+xvel0[OPS_ACC0(1,1,1)]; double vgradx1=yvel0[OPS_ACC1(0,0,0)]+yvel0[OPS_ACC1(0,1,0)]+yvel0[OPS_ACC1(0,0,1)]+yvel0[OPS_ACC1(0,1,1)]; double vgradx2=yvel0[OPS_ACC1(1,0,0)]+yvel0[OPS_ACC1(1,1,0)]+yvel0[OPS_ACC1(1,0,1)]+yvel0[OPS_ACC1(1,1,1)]; double vgrady1=yvel0[OPS_ACC1(0,0,0)]+yvel0[OPS_ACC1(1,0,0)]+yvel0[OPS_ACC1(0,0,1)]+yvel0[OPS_ACC1(1,0,1)]; double vgrady2=yvel0[OPS_ACC1(0,1,0)]+yvel0[OPS_ACC1(1,1,0)]+yvel0[OPS_ACC1(0,1,1)]+yvel0[OPS_ACC1(1,1,1)]; double vgradz1=yvel0[OPS_ACC1(0,0,0)]+yvel0[OPS_ACC1(1,0,0)]+yvel0[OPS_ACC1(0,1,0)]+yvel0[OPS_ACC1(1,1,0)]; double vgradz2=yvel0[OPS_ACC1(0,0,1)]+yvel0[OPS_ACC1(1,0,1)]+yvel0[OPS_ACC1(0,1,1)]+yvel0[OPS_ACC1(1,1,1)]; double wgradx1=zvel0[OPS_ACC7(0,0,0)]+zvel0[OPS_ACC7(0,1,0)]+zvel0[OPS_ACC7(0,0,1)]+zvel0[OPS_ACC7(0,1,1)]; double wgradx2=zvel0[OPS_ACC7(1,0,0)]+zvel0[OPS_ACC7(1,1,0)]+zvel0[OPS_ACC7(1,0,1)]+zvel0[OPS_ACC7(1,1,1)]; double wgrady1=zvel0[OPS_ACC7(0,0,0)]+zvel0[OPS_ACC7(1,0,0)]+zvel0[OPS_ACC7(0,0,1)]+zvel0[OPS_ACC7(1,0,1)]; double wgrady2=zvel0[OPS_ACC7(0,1,0)]+zvel0[OPS_ACC7(1,1,0)]+zvel0[OPS_ACC7(0,1,1)]+zvel0[OPS_ACC7(1,1,1)]; double wgradz1=zvel0[OPS_ACC7(0,0,0)]+zvel0[OPS_ACC7(1,0,0)]+zvel0[OPS_ACC7(0,1,0)]+zvel0[OPS_ACC7(1,1,0)]; double wgradz2=zvel0[OPS_ACC7(0,0,1)]+zvel0[OPS_ACC7(1,0,1)]+zvel0[OPS_ACC7(0,1,1)]+zvel0[OPS_ACC7(1,1,1)]; div = xarea[OPS_ACC9(0,0,0)]*(ugradx2-ugradx1) + yarea[OPS_ACC10(0,0,0)]*(vgrady2-vgrady1) + zarea[OPS_ACC11(0,0,0)]*(wgradz2-wgradz1); double xx = 0.25*(ugradx2-ugradx1)/(celldx[OPS_ACC2(0,0,0)]); double yy = 0.25*(vgrady2-vgrady1)/(celldy[OPS_ACC3(0,0,0)]); double zz = 0.25*(wgradz2-wgradz1)/(celldz[OPS_ACC8(0,0,0)]); double xy = 0.25*(ugrady2-ugrady1)/(celldy[OPS_ACC3(0,0,0)])+0.25*(vgradx2-vgradx1)/(celldx[OPS_ACC2(0,0,0)]); double xz = 0.25*(ugradz2-ugradz1)/(celldz[OPS_ACC8(0,0,0)])+0.25*(wgradx2-wgradx1)/(celldx[OPS_ACC2(0,0,0)]); double yz = 0.25*(vgradz2-vgradz1)/(celldz[OPS_ACC8(0,0,0)])+0.25*(wgrady2-wgrady1)/(celldy[OPS_ACC3(0,0,0)]); pgradx = (pressure[OPS_ACC4(1,0,0)] - pressure[OPS_ACC4(-1,0,0)])/(celldx[OPS_ACC2(0,0,0)]+ celldx[OPS_ACC2(1,0,0)]); pgrady = (pressure[OPS_ACC4(0,1,0)] - pressure[OPS_ACC4(0,-1,0)])/(celldy[OPS_ACC3(0,0,0)]+ celldy[OPS_ACC3(0,1,0)]); pgradz = (pressure[OPS_ACC4(0,0,1)] - pressure[OPS_ACC4(0,0,-1)])/(celldz[OPS_ACC8(0,0,0)]+ celldz[OPS_ACC8(0,0,1)]); pgradx2 = pgradx * pgradx; pgrady2 = pgrady * pgrady; pgradz2 = pgradz * pgradz; limiter = (xx*pgradx2+yy*pgrady2+zz*pgradz2 + xy*pgradx*pgrady+xz*pgradx*pgradz+yz*pgrady*pgradz) / MAX(pgradx2+pgrady2+pgradz2,1.0e-16); if( (limiter > 0.0) || (div >= 0.0)) { viscosity[OPS_ACC6(0,0,0)] = 0.0; } else { pgradx = SIGN( MAX(1.0e-16, fabs(pgradx)), pgradx); pgrady = SIGN( MAX(1.0e-16, fabs(pgrady)), pgrady); pgradz = SIGN( MAX(1.0e-16, fabs(pgradz)), pgradz); pgrad = sqrt(pgradx*pgradx + pgrady*pgrady + pgradz*pgradz); xgrad = fabs(celldx[OPS_ACC2(0,0,0)] * pgrad/pgradx); ygrad = fabs(celldy[OPS_ACC3(0,0,0)] * pgrad/pgrady); zgrad = fabs(celldz[OPS_ACC8(0,0,0)] * pgrad/pgradz); grad = MIN(xgrad,MIN(ygrad,zgrad)); grad2 = grad*grad; viscosity[OPS_ACC6(0,0,0)] = 2.0 * (density0[OPS_ACC5(0,0,0)]) * grad2 * limiter * limiter; } }
zvel0[OPS_ACC11(0, 0, 0)] - stepbymass[OPS_ACC2(0, 0, 0)] * (zarea[OPS_ACC13(0, 0, 0)] * (pressure[OPS_ACC6(0, 0, 0)] - pressure[OPS_ACC6(0, 0, -1)]) + zarea[OPS_ACC13(0, -1, 0)] * (pressure[OPS_ACC6(0, -1, 0)] - pressure[OPS_ACC6(0, -1, -1)]) + zarea[OPS_ACC13(-1, 0, 0)] * (pressure[OPS_ACC6(-1, 0, 0)] - pressure[OPS_ACC6(-1, 0, -1)]) + zarea[OPS_ACC13(-1, -1, 0)] * (pressure[OPS_ACC6(-1, -1, 0)] - pressure[OPS_ACC6(-1, -1, -1)])); xvel1[OPS_ACC4(0, 0, 0)] = xvel1[OPS_ACC4(0, 0, 0)] - stepbymass[OPS_ACC2(0, 0, 0)] * (xarea[OPS_ACC5(0, 0, 0)] * (viscosity[OPS_ACC10(0, 0, 0)] - viscosity[OPS_ACC10(-1, 0, 0)]) + xarea[OPS_ACC5(0, -1, 0)] * (viscosity[OPS_ACC10(0, -1, 0)] - viscosity[OPS_ACC10(-1, -1, 0)]) + xarea[OPS_ACC5(0, 0, -1)] * (viscosity[OPS_ACC10(0, 0, -1)] - viscosity[OPS_ACC10(-1, 0, -1)]) + xarea[OPS_ACC5(0, -1, -1)] * (viscosity[OPS_ACC10(0, -1, -1)] - viscosity[OPS_ACC10(-1, -1, -1)])); yvel1[OPS_ACC8(0, 0, 0)] = yvel1[OPS_ACC8(0, 0, 0)] - stepbymass[OPS_ACC2(0, 0, 0)] * (yarea[OPS_ACC9(0, 0, 0)] * (viscosity[OPS_ACC10(0, 0, 0)] - viscosity[OPS_ACC10(0, -1, 0)]) + yarea[OPS_ACC9(-1, 0, 0)] * (viscosity[OPS_ACC10(-1, 0, 0)] -
// host stub function void ops_par_loop_accelerate_kernel_execute(ops_kernel_descriptor *desc) { ops_block block = desc->block; int dim = desc->dim; int *range = desc->range; ops_arg arg0 = desc->args[0]; ops_arg arg1 = desc->args[1]; ops_arg arg2 = desc->args[2]; ops_arg arg3 = desc->args[3]; ops_arg arg4 = desc->args[4]; ops_arg arg5 = desc->args[5]; ops_arg arg6 = desc->args[6]; ops_arg arg7 = desc->args[7]; ops_arg arg8 = desc->args[8]; ops_arg arg9 = desc->args[9]; ops_arg arg10 = desc->args[10]; ops_arg arg11 = desc->args[11]; ops_arg arg12 = desc->args[12]; ops_arg arg13 = desc->args[13]; // Timing double t1, t2, c1, c2; ops_arg args[14] = {arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9, arg10, arg11, arg12, arg13}; #ifdef CHECKPOINTING if (!ops_checkpointing_before(args, 14, range, 105)) return; #endif if (OPS_diags > 1) { OPS_kernels[105].count++; ops_timers_core(&c2, &t2); } // compute locally allocated range for the sub-block int start[3]; int end[3]; for (int n = 0; n < 3; n++) { start[n] = range[2 * n]; end[n] = range[2 * n + 1]; } #ifdef OPS_DEBUG ops_register_args(args, "accelerate_kernel"); #endif // set up initial pointers and exchange halos if necessary int base0 = args[0].dat->base_offset; const double *__restrict__ density0 = (double *)(args[0].data + base0); int base1 = args[1].dat->base_offset; const double *__restrict__ volume = (double *)(args[1].data + base1); int base2 = args[2].dat->base_offset; double *__restrict__ stepbymass = (double *)(args[2].data + base2); int base3 = args[3].dat->base_offset; const double *__restrict__ xvel0 = (double *)(args[3].data + base3); int base4 = args[4].dat->base_offset; double *__restrict__ xvel1 = (double *)(args[4].data + base4); int base5 = args[5].dat->base_offset; const double *__restrict__ xarea = (double *)(args[5].data + base5); int base6 = args[6].dat->base_offset; const double *__restrict__ pressure = (double *)(args[6].data + base6); int base7 = args[7].dat->base_offset; const double *__restrict__ yvel0 = (double *)(args[7].data + base7); int base8 = args[8].dat->base_offset; double *__restrict__ yvel1 = (double *)(args[8].data + base8); int base9 = args[9].dat->base_offset; const double *__restrict__ yarea = (double *)(args[9].data + base9); int base10 = args[10].dat->base_offset; const double *__restrict__ viscosity = (double *)(args[10].data + base10); int base11 = args[11].dat->base_offset; const double *__restrict__ zvel0 = (double *)(args[11].data + base11); int base12 = args[12].dat->base_offset; double *__restrict__ zvel1 = (double *)(args[12].data + base12); int base13 = args[13].dat->base_offset; const double *__restrict__ zarea = (double *)(args[13].data + base13); // initialize global variable with the dimension of dats int xdim0_accelerate_kernel = args[0].dat->size[0]; int ydim0_accelerate_kernel = args[0].dat->size[1]; int xdim1_accelerate_kernel = args[1].dat->size[0]; int ydim1_accelerate_kernel = args[1].dat->size[1]; int xdim2_accelerate_kernel = args[2].dat->size[0]; int ydim2_accelerate_kernel = args[2].dat->size[1]; int xdim3_accelerate_kernel = args[3].dat->size[0]; int ydim3_accelerate_kernel = args[3].dat->size[1]; int xdim4_accelerate_kernel = args[4].dat->size[0]; int ydim4_accelerate_kernel = args[4].dat->size[1]; int xdim5_accelerate_kernel = args[5].dat->size[0]; int ydim5_accelerate_kernel = args[5].dat->size[1]; int xdim6_accelerate_kernel = args[6].dat->size[0]; int ydim6_accelerate_kernel = args[6].dat->size[1]; int xdim7_accelerate_kernel = args[7].dat->size[0]; int ydim7_accelerate_kernel = args[7].dat->size[1]; int xdim8_accelerate_kernel = args[8].dat->size[0]; int ydim8_accelerate_kernel = args[8].dat->size[1]; int xdim9_accelerate_kernel = args[9].dat->size[0]; int ydim9_accelerate_kernel = args[9].dat->size[1]; int xdim10_accelerate_kernel = args[10].dat->size[0]; int ydim10_accelerate_kernel = args[10].dat->size[1]; int xdim11_accelerate_kernel = args[11].dat->size[0]; int ydim11_accelerate_kernel = args[11].dat->size[1]; int xdim12_accelerate_kernel = args[12].dat->size[0]; int ydim12_accelerate_kernel = args[12].dat->size[1]; int xdim13_accelerate_kernel = args[13].dat->size[0]; int ydim13_accelerate_kernel = args[13].dat->size[1]; if (OPS_diags > 1) { ops_timers_core(&c1, &t1); OPS_kernels[105].mpi_time += t1 - t2; } #pragma omp parallel for collapse(2) for (int n_z = start[2]; n_z < end[2]; n_z++) { for (int n_y = start[1]; n_y < end[1]; n_y++) { #ifdef intel #pragma loop_count(10000) #pragma omp simd aligned(density0, volume, stepbymass, xvel0, xvel1, xarea, \ pressure, yvel0, yvel1, yarea, viscosity, zvel0, \ zvel1, zarea) #else #pragma simd #endif for (int n_x = start[0]; n_x < end[0]; n_x++) { double nodal_mass = 0.0; nodal_mass = (density0[OPS_ACC0(-1, -1, 0)] * volume[OPS_ACC1(-1, -1, 0)] + density0[OPS_ACC0(0, -1, 0)] * volume[OPS_ACC1(0, -1, 0)] + density0[OPS_ACC0(0, 0, 0)] * volume[OPS_ACC1(0, 0, 0)] + density0[OPS_ACC0(-1, 0, 0)] * volume[OPS_ACC1(-1, 0, 0)] + density0[OPS_ACC0(-1, -1, -1)] * volume[OPS_ACC1(-1, -1, -1)] + density0[OPS_ACC0(0, -1, -1)] * volume[OPS_ACC1(0, -1, -1)] + density0[OPS_ACC0(0, 0, -1)] * volume[OPS_ACC1(0, 0, -1)] + density0[OPS_ACC0(-1, 0, -1)] * volume[OPS_ACC1(-1, 0, -1)]) * 0.125; stepbymass[OPS_ACC2(0, 0, 0)] = 0.25 * dt / nodal_mass; xvel1[OPS_ACC4(0, 0, 0)] = xvel0[OPS_ACC3(0, 0, 0)] - stepbymass[OPS_ACC2(0, 0, 0)] * (xarea[OPS_ACC5(0, 0, 0)] * (pressure[OPS_ACC6(0, 0, 0)] - pressure[OPS_ACC6(-1, 0, 0)]) + xarea[OPS_ACC5(0, -1, 0)] * (pressure[OPS_ACC6(0, -1, 0)] - pressure[OPS_ACC6(-1, -1, 0)]) + xarea[OPS_ACC5(0, 0, -1)] * (pressure[OPS_ACC6(0, 0, -1)] - pressure[OPS_ACC6(-1, 0, -1)]) + xarea[OPS_ACC5(0, -1, -1)] * (pressure[OPS_ACC6(0, -1, -1)] - pressure[OPS_ACC6(-1, -1, -1)])); yvel1[OPS_ACC8(0, 0, 0)] = yvel0[OPS_ACC7(0, 0, 0)] - stepbymass[OPS_ACC2(0, 0, 0)] * (yarea[OPS_ACC9(0, 0, 0)] * (pressure[OPS_ACC6(0, 0, 0)] - pressure[OPS_ACC6(0, -1, 0)]) + yarea[OPS_ACC9(-1, 0, 0)] * (pressure[OPS_ACC6(-1, 0, 0)] - pressure[OPS_ACC6(-1, -1, 0)]) + yarea[OPS_ACC9(0, 0, -1)] * (pressure[OPS_ACC6(0, 0, -1)] - pressure[OPS_ACC6(0, -1, -1)]) + yarea[OPS_ACC9(-1, 0, -1)] * (pressure[OPS_ACC6(-1, 0, -1)] - pressure[OPS_ACC6(-1, -1, -1)])); zvel1[OPS_ACC12(0, 0, 0)] = zvel0[OPS_ACC11(0, 0, 0)] - stepbymass[OPS_ACC2(0, 0, 0)] * (zarea[OPS_ACC13(0, 0, 0)] * (pressure[OPS_ACC6(0, 0, 0)] - pressure[OPS_ACC6(0, 0, -1)]) + zarea[OPS_ACC13(0, -1, 0)] * (pressure[OPS_ACC6(0, -1, 0)] - pressure[OPS_ACC6(0, -1, -1)]) + zarea[OPS_ACC13(-1, 0, 0)] * (pressure[OPS_ACC6(-1, 0, 0)] - pressure[OPS_ACC6(-1, 0, -1)]) + zarea[OPS_ACC13(-1, -1, 0)] * (pressure[OPS_ACC6(-1, -1, 0)] - pressure[OPS_ACC6(-1, -1, -1)])); xvel1[OPS_ACC4(0, 0, 0)] = xvel1[OPS_ACC4(0, 0, 0)] - stepbymass[OPS_ACC2(0, 0, 0)] * (xarea[OPS_ACC5(0, 0, 0)] * (viscosity[OPS_ACC10(0, 0, 0)] - viscosity[OPS_ACC10(-1, 0, 0)]) + xarea[OPS_ACC5(0, -1, 0)] * (viscosity[OPS_ACC10(0, -1, 0)] - viscosity[OPS_ACC10(-1, -1, 0)]) + xarea[OPS_ACC5(0, 0, -1)] * (viscosity[OPS_ACC10(0, 0, -1)] - viscosity[OPS_ACC10(-1, 0, -1)]) + xarea[OPS_ACC5(0, -1, -1)] * (viscosity[OPS_ACC10(0, -1, -1)] - viscosity[OPS_ACC10(-1, -1, -1)])); yvel1[OPS_ACC8(0, 0, 0)] = yvel1[OPS_ACC8(0, 0, 0)] - stepbymass[OPS_ACC2(0, 0, 0)] * (yarea[OPS_ACC9(0, 0, 0)] * (viscosity[OPS_ACC10(0, 0, 0)] - viscosity[OPS_ACC10(0, -1, 0)]) + yarea[OPS_ACC9(-1, 0, 0)] * (viscosity[OPS_ACC10(-1, 0, 0)] - viscosity[OPS_ACC10(-1, -1, 0)]) + yarea[OPS_ACC9(0, 0, -1)] * (viscosity[OPS_ACC10(0, 0, -1)] - viscosity[OPS_ACC10(0, -1, -1)]) + yarea[OPS_ACC9(-1, 0, -1)] * (viscosity[OPS_ACC10(-1, 0, -1)] - viscosity[OPS_ACC10(-1, -1, -1)])); zvel1[OPS_ACC12(0, 0, 0)] = zvel1[OPS_ACC12(0, 0, 0)] - stepbymass[OPS_ACC2(0, 0, 0)] * (zarea[OPS_ACC13(0, 0, 0)] * (viscosity[OPS_ACC10(0, 0, 0)] - viscosity[OPS_ACC10(0, 0, -1)]) + zarea[OPS_ACC13(0, -1, 0)] * (viscosity[OPS_ACC10(0, -1, 0)] - viscosity[OPS_ACC10(0, -1, -1)]) + zarea[OPS_ACC13(-1, 0, 0)] * (viscosity[OPS_ACC10(-1, 0, 0)] - viscosity[OPS_ACC10(-1, 0, -1)]) + zarea[OPS_ACC13(-1, -1, 0)] * (viscosity[OPS_ACC10(-1, -1, 0)] - viscosity[OPS_ACC10(-1, -1, -1)])); } } } if (OPS_diags > 1) { ops_timers_core(&c2, &t2); OPS_kernels[105].time += t2 - t1; } if (OPS_diags > 1) { // Update kernel record ops_timers_core(&c1, &t1); OPS_kernels[105].mpi_time += t1 - t2; OPS_kernels[105].transfer += ops_compute_transfer(dim, start, end, &arg0); OPS_kernels[105].transfer += ops_compute_transfer(dim, start, end, &arg1); OPS_kernels[105].transfer += ops_compute_transfer(dim, start, end, &arg2); OPS_kernels[105].transfer += ops_compute_transfer(dim, start, end, &arg3); OPS_kernels[105].transfer += ops_compute_transfer(dim, start, end, &arg4); OPS_kernels[105].transfer += ops_compute_transfer(dim, start, end, &arg5); OPS_kernels[105].transfer += ops_compute_transfer(dim, start, end, &arg6); OPS_kernels[105].transfer += ops_compute_transfer(dim, start, end, &arg7); OPS_kernels[105].transfer += ops_compute_transfer(dim, start, end, &arg8); OPS_kernels[105].transfer += ops_compute_transfer(dim, start, end, &arg9); OPS_kernels[105].transfer += ops_compute_transfer(dim, start, end, &arg10); OPS_kernels[105].transfer += ops_compute_transfer(dim, start, end, &arg11); OPS_kernels[105].transfer += ops_compute_transfer(dim, start, end, &arg12); OPS_kernels[105].transfer += ops_compute_transfer(dim, start, end, &arg13); } }