inline void update_halo_kernel1_fr2(double *density0, double *density1, double *energy0, double *energy1, double *pressure, double *viscosity, double *soundspeed , const int* fields) { if(fields[FIELD_DENSITY0] == 1) density0[OPS_ACC0(0,0,0)] = density0[OPS_ACC0(0,0,-3)]; if(fields[FIELD_DENSITY1] == 1) density1[OPS_ACC1(0,0,0)] = density1[OPS_ACC1(0,0,-3)]; if(fields[FIELD_ENERGY0] == 1) energy0[OPS_ACC2(0,0,0)] = energy0[OPS_ACC2(0,0,-3)]; if(fields[FIELD_ENERGY1] == 1) energy1[OPS_ACC3(0,0,0)] = energy1[OPS_ACC3(0,0,-3)]; if(fields[FIELD_PRESSURE] == 1) pressure[OPS_ACC4(0,0,0)] = pressure[OPS_ACC4(0,0,-3)]; if(fields[FIELD_VISCOSITY] == 1) viscosity[OPS_ACC5(0,0,0)] = viscosity[OPS_ACC5(0,0,-3)]; if(fields[FIELD_SOUNDSPEED] == 1) soundspeed[OPS_ACC6(0,0,0)] = soundspeed[OPS_ACC6(0,0,-3)]; }
inline void advec_mom_kernel_x1(double *pre_vol, double *post_vol, const double *volume, const double *vol_flux_x, const double *vol_flux_y, const double *vol_flux_z) { post_vol[OPS_ACC1(0, 0, 0)] = volume[OPS_ACC2(0, 0, 0)] + vol_flux_y[OPS_ACC4(0, 1, 0)] - vol_flux_y[OPS_ACC4(0, 0, 0)] + vol_flux_z[OPS_ACC5(0, 0, 1)] - vol_flux_z[OPS_ACC5(0, 0, 0)]; pre_vol[OPS_ACC0(0, 0, 0)] = post_vol[OPS_ACC1(0, 0, 0)] + vol_flux_x[OPS_ACC3(1, 0, 0)] - vol_flux_x[OPS_ACC3(0, 0, 0)]; }
// user function inline void advec_cell_kernel1_ydir(double *pre_vol, double *post_vol, const double *volume, const double *vol_flux_x, const double *vol_flux_y) { pre_vol[OPS_ACC0(0, 0)] = volume[OPS_ACC2(0, 0)] + (vol_flux_y[OPS_ACC4(0, 1)] - vol_flux_y[OPS_ACC4(0, 0)] + vol_flux_x[OPS_ACC3(1, 0)] - vol_flux_x[OPS_ACC3(0, 0)]); post_vol[OPS_ACC1(0, 0)] = pre_vol[OPS_ACC0(0, 0)] - (vol_flux_y[OPS_ACC4(0, 1)] - vol_flux_y[OPS_ACC4(0, 0)]); }
// user function inline void field_summary_kernel(const double *volume, const double *density0, const double *energy0, const double *pressure, const double *xvel0, const double *yvel0, double *vol, double *mass, double *ie, double *ke, double *press) { double vsqrd, cell_vol, cell_mass; vsqrd = 0.0; vsqrd = vsqrd + 0.25 * (xvel0[OPS_ACC4(0, 0)] * xvel0[OPS_ACC4(0, 0)] + yvel0[OPS_ACC5(0, 0)] * yvel0[OPS_ACC5(0, 0)]); vsqrd = vsqrd + 0.25 * (xvel0[OPS_ACC4(1, 0)] * xvel0[OPS_ACC4(1, 0)] + yvel0[OPS_ACC5(1, 0)] * yvel0[OPS_ACC5(1, 0)]); vsqrd = vsqrd + 0.25 * (xvel0[OPS_ACC4(0, 1)] * xvel0[OPS_ACC4(0, 1)] + yvel0[OPS_ACC5(0, 1)] * yvel0[OPS_ACC5(0, 1)]); vsqrd = vsqrd + 0.25 * (xvel0[OPS_ACC4(1, 1)] * xvel0[OPS_ACC4(1, 1)] + yvel0[OPS_ACC5(1, 1)] * yvel0[OPS_ACC5(1, 1)]); cell_vol = volume[OPS_ACC0(0, 0)]; cell_mass = cell_vol * density0[OPS_ACC1(0, 0)]; *vol = *vol + cell_vol; *mass = *mass + cell_mass; *ie = *ie + cell_mass * energy0[OPS_ACC2(0, 0)]; *ke = *ke + cell_mass * 0.5 * vsqrd; *press = *press + cell_vol * pressure[OPS_ACC3(0, 0)]; }
inline void advec_cell_kernel4_xdir( double *density1, double *energy1, const double *mass_flux_x, const double *vol_flux_x, const double *pre_vol, const double *post_vol, double *pre_mass, double *post_mass, double *advec_vol, double *post_ener, const double *ener_flux) { pre_mass[OPS_ACC6(0,0,0)] = density1[OPS_ACC0(0,0,0)] * pre_vol[OPS_ACC4(0,0,0)]; post_mass[OPS_ACC7(0,0,0)] = pre_mass[OPS_ACC6(0,0,0)] + mass_flux_x[OPS_ACC2(0,0,0)] - mass_flux_x[OPS_ACC2(1,0,0)]; post_ener[OPS_ACC9(0,0,0)] = ( energy1[OPS_ACC1(0,0,0)] * pre_mass[OPS_ACC6(0,0,0)] + ener_flux[OPS_ACC10(0,0,0)] - ener_flux[OPS_ACC10(1,0,0)])/post_mass[OPS_ACC7(0,0,0)]; advec_vol[OPS_ACC8(0,0,0)] = pre_vol[OPS_ACC4(0,0,0)] + vol_flux_x[OPS_ACC3(0,0,0)] - vol_flux_x[OPS_ACC3(1,0,0)]; density1[OPS_ACC0(0,0,0)] = post_mass[OPS_ACC7(0,0,0)]/advec_vol[OPS_ACC8(0,0,0)]; energy1[OPS_ACC1(0,0,0)] = post_ener[OPS_ACC9(0,0,0)]; }
inline void advec_mom_kernel_post_pre_advec_y(double *node_mass_post, const double *post_vol, const double *density1, double *node_mass_pre, const double *node_flux) { node_mass_post[OPS_ACC0(0, 0)] = 0.25 * (density1[OPS_ACC2(0, -1)] * post_vol[OPS_ACC1(0, -1)] + density1[OPS_ACC2(0, 0)] * post_vol[OPS_ACC1(0, 0)] + density1[OPS_ACC2(-1, -1)] * post_vol[OPS_ACC1(-1, -1)] + density1[OPS_ACC2(-1, 0)] * post_vol[OPS_ACC1(-1, 0)]); node_mass_pre[OPS_ACC3(0, 0)] = node_mass_post[OPS_ACC0(0, 0)] - node_flux[OPS_ACC4(0, -1)] + node_flux[OPS_ACC4(0, 0)]; }
inline void update_halo_kernel1_r2(double *density0, double *energy0, double *energy1, double *u, double *p, double *sd, const int *fields) { if (fields[FIELD_DENSITY] == 1) density0[OPS_ACC0(0, 0)] = density0[OPS_ACC0(-3, 0)]; if (fields[FIELD_ENERGY0] == 1) energy0[OPS_ACC1(0, 0)] = energy0[OPS_ACC1(-3, 0)]; if (fields[FIELD_ENERGY1] == 1) energy1[OPS_ACC2(0, 0)] = energy1[OPS_ACC2(-3, 0)]; if (fields[FIELD_U] == 1) u[OPS_ACC3(0, 0)] = u[OPS_ACC3(-3, 0)]; if (fields[FIELD_P] == 1) p[OPS_ACC4(0, 0)] = p[OPS_ACC4(-3, 0)]; if (fields[FIELD_SD] == 1) sd[OPS_ACC5(0, 0)] = sd[OPS_ACC5(-3, 0)]; }
// user function inline void calc_dt_kernel_print(const double *xvel0, const double *yvel0, const double *zvel0, const double *density0, const double *energy0, const double *pressure, const double *soundspeed, double *output) { output[0] = xvel0[OPS_ACC0(0, 0, 0)]; output[1] = yvel0[OPS_ACC1(0, 0, 0)]; output[2] = zvel0[OPS_ACC2(0, 0, 0)]; output[3] = xvel0[OPS_ACC0(1, 0, 0)]; output[4] = yvel0[OPS_ACC1(1, 0, 0)]; output[5] = zvel0[OPS_ACC2(0, 0, 0)]; output[6] = xvel0[OPS_ACC0(1, 1, 0)]; output[7] = yvel0[OPS_ACC1(1, 1, 0)]; output[8] = zvel0[OPS_ACC2(0, 0, 0)]; output[9] = xvel0[OPS_ACC0(0, 1, 0)]; output[10] = yvel0[OPS_ACC1(0, 1, 0)]; output[11] = zvel0[OPS_ACC2(0, 0, 0)]; output[12] = xvel0[OPS_ACC0(0, 0, 1)]; output[13] = yvel0[OPS_ACC1(0, 0, 1)]; output[14] = zvel0[OPS_ACC2(0, 0, 1)]; output[15] = xvel0[OPS_ACC0(1, 0, 1)]; output[16] = yvel0[OPS_ACC1(1, 0, 1)]; output[17] = zvel0[OPS_ACC2(0, 0, 1)]; output[18] = xvel0[OPS_ACC0(1, 1, 1)]; output[19] = yvel0[OPS_ACC1(1, 1, 1)]; output[20] = zvel0[OPS_ACC2(0, 0, 1)]; output[21] = xvel0[OPS_ACC0(0, 1, 1)]; output[22] = yvel0[OPS_ACC1(0, 1, 1)]; output[23] = zvel0[OPS_ACC2(0, 0, 1)]; output[24] = density0[OPS_ACC3(0, 0, 0)]; output[25] = energy0[OPS_ACC4(0, 0, 0)]; output[26] = pressure[OPS_ACC5(0, 0, 0)]; output[27] = soundspeed[OPS_ACC6(0, 0, 0)]; }
//user function inline void preproc_kernel(const double *u, double *du, double *ax, double *bx, double *cx, double *ay, double *by, double *cy, double *az, double *bz, double *cz, int *idx){ double a, b, c, d; if(idx[0]==0 || idx[0]==nx-1 || idx[1]==0 || idx[1]==ny-1 || idx[2]==0 || idx[2]==nz-1) { d = 0.0f; a = 0.0f; b = 1.0f; c = 0.0f; } else { d = lambda*( u[OPS_ACC0(-1,0,0)] + u[OPS_ACC0(1,0,0)] + u[OPS_ACC0(0,-1,0)] + u[OPS_ACC0(0,1,0)] + u[OPS_ACC0(0,0,-1)] + u[OPS_ACC0(0,0,1)] - 6.0f*u[OPS_ACC0(0,0,0)]); a = -0.5f * lambda; b = 1.0f + lambda; c = -0.5f * lambda; } du[OPS_ACC1(0,0,0)] = d; ax[OPS_ACC2(0,0,0)] = a; bx[OPS_ACC3(0,0,0)] = b; cx[OPS_ACC4(0,0,0)] = c; ay[OPS_ACC5(0,0,0)] = a; by[OPS_ACC6(0,0,0)] = b; cy[OPS_ACC7(0,0,0)] = c; az[OPS_ACC8(0,0,0)] = a; bz[OPS_ACC9(0,0,0)] = b; cz[OPS_ACC10(0,0,0)] = c; }
// user function inline void save_kernel(double *rho_old, double *rhou_old, double *rhoE_old, const double *rho_new, const double *rhou_new, const double *rhoE_new) { rho_old[OPS_ACC0(0)] = rho_new[OPS_ACC3(0)]; rhou_old[OPS_ACC1(0)] = rhou_new[OPS_ACC4(0)]; rhoE_old[OPS_ACC2(0)] = rhoE_new[OPS_ACC5(0)]; }
// user function inline void calc_dt_kernel_get(const double *cellx, const double *celly, double *xl_pos, double *yl_pos, const double *cellz, double *zl_pos) { *xl_pos = cellx[OPS_ACC0(0, 0, 0)]; *yl_pos = celly[OPS_ACC1(0, 0, 0)]; *zl_pos = cellz[OPS_ACC4(0, 0, 0)]; }
// user function inline void calc_dt_kernel(const double *celldx, const double *celldy, const double *soundspeed, const double *viscosity, const double *density0, const double *xvel0, const double *xarea, const double *volume, const double *yvel0, const double *yarea, double *dt_min, const double *celldz, const double *zvel0, const double *zarea) { double div, ds, dtut, dtvt, dtct, dtwt, dtdivt, cc, dv1, dv2, du1, du2, dw1, dw2; ds = MIN(MIN(celldx[OPS_ACC0(0, 0, 0)], celldy[OPS_ACC1(0, 0, 0)]), celldz[OPS_ACC11(0, 0, 0)]); ds = 1.0 / (ds * ds); cc = soundspeed[OPS_ACC2(0, 0, 0)] * soundspeed[OPS_ACC2(0, 0, 0)]; cc = cc + 2.0 * viscosity[OPS_ACC3(0, 0, 0)] / density0[OPS_ACC4(0, 0, 0)]; dtct = ds * cc; dtct = dtc_safe * 1.0 / MAX(sqrt(dtct), g_small); du1 = (xvel0[OPS_ACC5(0, 0, 0)] + xvel0[OPS_ACC5(0, 1, 0)] + xvel0[OPS_ACC5(0, 0, 1)] + xvel0[OPS_ACC5(0, 1, 1)]) * xarea[OPS_ACC6(0, 0, 0)]; du2 = (xvel0[OPS_ACC5(1, 0, 0)] + xvel0[OPS_ACC5(1, 1, 0)] + xvel0[OPS_ACC5(1, 0, 1)] + xvel0[OPS_ACC5(1, 1, 1)]) * xarea[OPS_ACC6(0, 0, 0)]; dtut = dtu_safe * 4.0 * volume[OPS_ACC7(0, 0, 0)] / MAX(MAX(fabs(du1), fabs(du2)), 1.0e-5 * volume[OPS_ACC7(0, 0, 0)]); dv1 = (yvel0[OPS_ACC8(0, 0, 0)] + yvel0[OPS_ACC8(1, 0, 0)] + yvel0[OPS_ACC8(0, 0, 1)] + yvel0[OPS_ACC8(1, 0, 1)]) * yarea[OPS_ACC9(0, 0, 0)]; dv2 = (yvel0[OPS_ACC8(0, 1, 0)] + yvel0[OPS_ACC8(1, 1, 0)] + yvel0[OPS_ACC8(0, 1, 1)] + yvel0[OPS_ACC8(1, 1, 1)]) * yarea[OPS_ACC9(0, 0, 0)]; dtvt = dtv_safe * 4.0 * volume[OPS_ACC7(0, 0, 0)] / MAX(MAX(fabs(dv1), fabs(dv2)), 1.0e-5 * volume[OPS_ACC7(0, 0, 0)]); dw1 = (zvel0[OPS_ACC12(0, 0, 0)] + zvel0[OPS_ACC12(0, 1, 0)] + zvel0[OPS_ACC12(1, 0, 0)] + zvel0[OPS_ACC12(1, 1, 0)]) * zarea[OPS_ACC13(0, 0, 0)]; dw2 = (zvel0[OPS_ACC12(0, 0, 1)] + zvel0[OPS_ACC12(0, 1, 1)] + zvel0[OPS_ACC12(1, 0, 1)] + zvel0[OPS_ACC12(1, 1, 1)]) * zarea[OPS_ACC13(0, 0, 0)]; dtwt = dtw_safe * 4.0 * volume[OPS_ACC7(0, 0, 0)] / MAX(MAX(fabs(dw1), fabs(dw2)), 1.0e-5 * volume[OPS_ACC7(0, 0, 0)]); div = du2 - du1 + dv2 - dv1 + dw2 - dw1; dtdivt = dtdiv_safe * 4.0 * (volume[OPS_ACC7(0, 0, 0)]) / MAX(volume[OPS_ACC7(0, 0, 0)] * 1.0e-05, fabs(div)); dt_min[OPS_ACC10(0, 0, 0)] = MIN(MIN(MIN(dtct, dtut), MIN(dtvt, dtdivt)), dtwt); }
//user function inline void poisson_kernel_populate(const int *dispx, const int *dispy, const int *idx, double *u, double *f, double *ref) { double x = dx * (double)(idx[0]+dispx[0]); double y = dy * (double)(idx[1]+dispy[0]); u[OPS_ACC3(0,0)] = sin(M_PI*x)*cos(2.0*M_PI*y); f[OPS_ACC4(0,0)] = -5.0*M_PI*M_PI*sin(M_PI*x)*cos(2.0*M_PI*y); ref[OPS_ACC5(0,0)] = sin(M_PI*x)*cos(2.0*M_PI*y); }
inline void advec_mom_kernel1_y_nonvector(const double *node_flux, const double *node_mass_pre, double *mom_flux, const double *celldy, const double *vel1) { double sigma, wind, width; double vdiffuw, vdiffdw, auw, adw, limiter; int upwind, donor, downwind, dif; double advec_vel_temp; if ((node_flux[OPS_ACC0(0, 0, 0)]) < 0.0) { upwind = 2; donor = 1; downwind = 0; dif = donor; } else { upwind = -1; donor = 0; downwind = 1; dif = upwind; } sigma = fabs(node_flux[OPS_ACC0(0, 0, 0)]) / node_mass_pre[OPS_ACC1(0, donor, 0)]; width = celldy[OPS_ACC3(0, 0, 0)]; vdiffuw = vel1[OPS_ACC4(0, donor, 0)] - vel1[OPS_ACC4(0, upwind, 0)]; vdiffdw = vel1[OPS_ACC4(0, downwind, 0)] - vel1[OPS_ACC4(0, donor, 0)]; limiter = 0.0; if (vdiffuw * vdiffdw > 0.0) { auw = fabs(vdiffuw); adw = fabs(vdiffdw); wind = 1.0; if (vdiffdw <= 0.0) wind = -1.0; limiter = wind * MIN(width * ((2.0 - sigma) * adw / width + (1.0 + sigma) * auw / celldy[OPS_ACC3(0, dif, 0)]) / 6.0, MIN(auw, adw)); } advec_vel_temp = vel1[OPS_ACC4(0, donor, 0)] + (1.0 - sigma) * limiter; mom_flux[OPS_ACC2(0, 0, 0)] = advec_vel_temp * node_flux[OPS_ACC0(0, 0, 0)]; }
// user function inline void calvar_kernel(const double *rho_new, const double *rhou_new, const double *rhoE_new, double *workarray2, double *workarray3) { double p, rhoi, u; rhoi = 1 / rho_new[OPS_ACC0(0)]; u = rhou_new[OPS_ACC1(0)] * rhoi; p = gam1 * (rhoE_new[OPS_ACC2(0)] - 0.5 * rho_new[OPS_ACC0(0)] * u * u); workarray2[OPS_ACC3(0)] = p + rhou_new[OPS_ACC1(0)] * u; workarray3[OPS_ACC4(0)] = (p + rhoE_new[OPS_ACC2(0)]) * u; }
// user function inline void updateRK3_kernel(double *rho_new, double *rhou_new, double *rhoE_new, double *rho_old, double *rhou_old, double *rhoE_old, const double *rho_res, const double *rhou_res, const double *rhoE_res, const double *a1, const double *a2) { rho_new[OPS_ACC0(0)] = rho_old[OPS_ACC3(0)] + dt * a1[0] * (-rho_res[OPS_ACC6(0)]); rhou_new[OPS_ACC1(0)] = rhou_old[OPS_ACC4(0)] + dt * a1[0] * (-rhou_res[OPS_ACC7(0)]); rhoE_new[OPS_ACC2(0)] = rhoE_old[OPS_ACC5(0)] + dt * a1[0] * (-rhoE_res[OPS_ACC8(0)]); rho_old[OPS_ACC3(0)] = rho_old[OPS_ACC3(0)] + dt * a2[0] * (-rho_res[OPS_ACC6(0)]); rhou_old[OPS_ACC4(0)] = rhou_old[OPS_ACC4(0)] + dt * a2[0] * (-rhou_res[OPS_ACC7(0)]); rhoE_old[OPS_ACC5(0)] = rhoE_old[OPS_ACC5(0)] + dt * a2[0] * (-rhoE_res[OPS_ACC8(0)]); }
// user function inline void initialise_chunk_kernel_volume(double *volume, const double *celldy, double *xarea, const double *celldx, double *yarea) { double d_x, d_y; d_x = (grid.xmax - grid.xmin) / (double)grid.x_cells; d_y = (grid.ymax - grid.ymin) / (double)grid.y_cells; volume[OPS_ACC0(0, 0)] = d_x * d_y; xarea[OPS_ACC2(0, 0)] = celldy[OPS_ACC1(0, 0)]; yarea[OPS_ACC4(0, 0)] = celldx[OPS_ACC3(0, 0)]; }
// user function inline void calc_dt_kernel_print(const double *xvel0, const double *yvel0, const double *density0, const double *energy0, const double *pressure, const double *soundspeed, double *output) { output[0] = xvel0[OPS_ACC0(1, 0)]; output[1] = yvel0[OPS_ACC1(1, 0)]; output[2] = xvel0[OPS_ACC0(-1, 0)]; output[3] = yvel0[OPS_ACC1(-1, 0)]; output[4] = xvel0[OPS_ACC0(0, 1)]; output[5] = yvel0[OPS_ACC1(0, 1)]; output[6] = xvel0[OPS_ACC0(0, -1)]; output[7] = yvel0[OPS_ACC1(0, -1)]; output[8] = density0[OPS_ACC2(0, 0)]; output[9] = energy0[OPS_ACC3(0, 0)]; output[10] = pressure[OPS_ACC4(0, 0)]; output[11] = soundspeed[OPS_ACC5(0, 0)]; }
// user function inline void calc_dt_kernel(const double *celldx, const double *celldy, const double *soundspeed, const double *viscosity, const double *density0, const double *xvel0, const double *xarea, const double *volume, const double *yvel0, const double *yarea, double *dt_min) { double div, dsx, dsy, dtut, dtvt, dtct, dtdivt, cc, dv1, dv2; dsx = celldx[OPS_ACC0(0, 0)]; dsy = celldy[OPS_ACC1(0, 0)]; cc = soundspeed[OPS_ACC2(0, 0)] * soundspeed[OPS_ACC2(0, 0)]; cc = cc + 2.0 * viscosity[OPS_ACC3(0, 0)] / density0[OPS_ACC4(0, 0)]; cc = MAX(sqrt(cc), g_small); dtct = dtc_safe * MIN(dsx, dsy) / cc; div = 0.0; dv1 = (xvel0[OPS_ACC5(0, 0)] + xvel0[OPS_ACC5(0, 1)]) * xarea[OPS_ACC6(0, 0)]; dv2 = (xvel0[OPS_ACC5(1, 0)] + xvel0[OPS_ACC5(1, 1)]) * xarea[OPS_ACC6(1, 0)]; div = div + dv2 - dv1; dtut = dtu_safe * 2.0 * volume[OPS_ACC7(0, 0)] / MAX(MAX(fabs(dv1), fabs(dv2)), g_small * volume[OPS_ACC7(0, 0)]); dv1 = (yvel0[OPS_ACC8(0, 0)] + yvel0[OPS_ACC8(1, 0)]) * yarea[OPS_ACC9(0, 0)]; dv2 = (yvel0[OPS_ACC8(0, 1)] + yvel0[OPS_ACC8(1, 1)]) * yarea[OPS_ACC9(0, 1)]; div = div + dv2 - dv1; dtvt = dtv_safe * 2.0 * volume[OPS_ACC7(0, 0)] / MAX(MAX(fabs(dv1), fabs(dv2)), g_small * volume[OPS_ACC7(0, 0)]); div = div / (2.0 * volume[OPS_ACC7(0, 0)]); if (div < -g_small) dtdivt = dtdiv_safe * (-1.0 / div); else dtdivt = g_big; dt_min[OPS_ACC10(0, 0)] = MIN(MIN(dtct, dtut), MIN(dtvt, dtdivt)); }
// user function inline void initialize_kernel(double *x, double *rho_new, double *rhou_new, double *rhoE_new, double *rhoin, int *idx) { x[OPS_ACC0(0)] = xmin + (idx[0] - 2) * dx; if (x[OPS_ACC0(0)] >= -4.0) { rho_new[OPS_ACC1(0)] = 1.0 + eps * sin(lambda * x[OPS_ACC0(0)]); rhou_new[OPS_ACC2(0)] = ur * rho_new[OPS_ACC1(0)]; rhoE_new[OPS_ACC3(0)] = (pr / gam1) + 0.5 * pow(rhou_new[OPS_ACC2(0)], 2) / rho_new[OPS_ACC1(0)]; } else { rho_new[OPS_ACC1(0)] = rhol; rhou_new[OPS_ACC2(0)] = ul * rho_new[OPS_ACC1(0)]; rhoE_new[OPS_ACC3(0)] = (pl / gam1) + 0.5 * pow(rhou_new[OPS_ACC2(0)], 2) / rho_new[OPS_ACC1(0)]; } rhoin[OPS_ACC4(0)] = gam1 * (rhoE_new[OPS_ACC3(0)] - 0.5 * rhou_new[OPS_ACC2(0)] * rhou_new[OPS_ACC2(0)] / rho_new[OPS_ACC1(0)]); }
inline void advec_cell_kernel3_ydir(const double *vol_flux_y, const double *pre_vol, const int *yy, const double *vertexdy, const double *density1, const double *energy1, double *mass_flux_y, double *ener_flux) { double sigmat, sigmav, sigmam, sigma3, sigma4; double diffuw, diffdw, limiter; double one_by_six = 1.0 / 6.0; int y_max = field.y_max; int upwind, donor, downwind, dif; if (vol_flux_y[OPS_ACC0(0, 0)] > 0.0) { upwind = -2; donor = -1; downwind = 0; dif = donor; } else if (yy[OPS_ACC2(0, 1)] < y_max + 2 - 2) { upwind = 1; donor = 0; downwind = -1; dif = upwind; } else { upwind = 0; donor = 0; downwind = -1; dif = upwind; } sigmat = fabs(vol_flux_y[OPS_ACC0(0, 0)]) / pre_vol[OPS_ACC1(0, donor)]; sigma3 = (1.0 + sigmat) * (vertexdy[OPS_ACC3(0, 0)] / vertexdy[OPS_ACC3(0, dif)]); sigma4 = 2.0 - sigmat; sigmav = sigmat; diffuw = density1[OPS_ACC4(0, donor)] - density1[OPS_ACC4(0, upwind)]; diffdw = density1[OPS_ACC4(0, downwind)] - density1[OPS_ACC4(0, donor)]; if ((diffuw * diffdw) > 0.0) limiter = (1.0 - sigmav) * SIGN(1.0, diffdw) * MIN(MIN(fabs(diffuw), fabs(diffdw)), one_by_six * (sigma3 * fabs(diffuw) + sigma4 * fabs(diffdw))); else limiter = 0.0; mass_flux_y[OPS_ACC6(0, 0)] = (vol_flux_y[OPS_ACC0(0, 0)]) * (density1[OPS_ACC4(0, donor)] + limiter); sigmam = fabs(mass_flux_y[OPS_ACC6(0, 0)]) / (density1[OPS_ACC4(0, donor)] * pre_vol[OPS_ACC1(0, donor)]); diffuw = energy1[OPS_ACC5(0, donor)] - energy1[OPS_ACC5(0, upwind)]; diffdw = energy1[OPS_ACC5(0, downwind)] - energy1[OPS_ACC5(0, donor)]; if ((diffuw * diffdw) > 0.0) limiter = (1.0 - sigmam) * SIGN(1.0, diffdw) * MIN(MIN(fabs(diffuw), fabs(diffdw)), one_by_six * (sigma3 * fabs(diffuw) + sigma4 * fabs(diffdw))); else limiter = 0.0; ener_flux[OPS_ACC7(0, 0)] = mass_flux_y[OPS_ACC6(0, 0)] * (energy1[OPS_ACC5(0, donor)] + limiter); }
xdim5_reset_field_kernel2 * (y) + \ xdim5_reset_field_kernel2 * ydim5_reset_field_kernel2 * (z)) // user function void reset_field_kernel2_c_wrapper(double *restrict xvel0, const double *restrict xvel1, double *restrict yvel0, const double *restrict yvel1, double *restrict zvel0, const double *restrict zvel1, int x_size, int y_size, int z_size) { #pragma omp parallel for for (int n_z = 0; n_z < z_size; n_z++) { for (int n_y = 0; n_y < y_size; n_y++) { for (int n_x = 0; n_x < x_size; n_x++) { xvel0[OPS_ACC0(0, 0, 0)] = xvel1[OPS_ACC1(0, 0, 0)]; yvel0[OPS_ACC2(0, 0, 0)] = yvel1[OPS_ACC3(0, 0, 0)]; zvel0[OPS_ACC4(0, 0, 0)] = zvel1[OPS_ACC5(0, 0, 0)]; } } } } #undef OPS_ACC0 #undef OPS_ACC1 #undef OPS_ACC2 #undef OPS_ACC3 #undef OPS_ACC4 #undef OPS_ACC5
const double *restrict density1, double *restrict node_mass_pre, const double *restrict node_flux, int x_size, int y_size, int z_size) { #pragma omp parallel for for (int n_z = 0; n_z < z_size; n_z++) { for (int n_y = 0; n_y < y_size; n_y++) { for (int n_x = 0; n_x < x_size; n_x++) { node_mass_post[OPS_ACC0(0, 0, 0)] = 0.125 * (density1[OPS_ACC2(0, -1, 0)] * post_vol[OPS_ACC1(0, -1, 0)] + density1[OPS_ACC2(0, 0, 0)] * post_vol[OPS_ACC1(0, 0, 0)] + density1[OPS_ACC2(-1, -1, 0)] * post_vol[OPS_ACC1(-1, -1, 0)] + density1[OPS_ACC2(-1, 0, 0)] * post_vol[OPS_ACC1(-1, 0, 0)] + density1[OPS_ACC2(0, -1, -1)] * post_vol[OPS_ACC1(0, -1, -1)] + density1[OPS_ACC2(0, 0, -1)] * post_vol[OPS_ACC1(0, 0, -1)] + density1[OPS_ACC2(-1, -1, -1)] * post_vol[OPS_ACC1(-1, -1, -1)] + density1[OPS_ACC2(-1, 0, -1)] * post_vol[OPS_ACC1(-1, 0, -1)]); node_mass_pre[OPS_ACC3(0, 0, 0)] = node_mass_post[OPS_ACC0(0, 0, 0)] - node_flux[OPS_ACC4(0, 0, -1)] + node_flux[OPS_ACC4(0, 0, 0)]; } } } } #undef OPS_ACC0 #undef OPS_ACC1 #undef OPS_ACC2 #undef OPS_ACC3 #undef OPS_ACC4
// host stub function void ops_par_loop_update_halo_kernel1_b2_execute(ops_kernel_descriptor *desc) { ops_block block = desc->block; int dim = desc->dim; int *range = desc->range; ops_arg arg0 = desc->args[0]; ops_arg arg1 = desc->args[1]; ops_arg arg2 = desc->args[2]; ops_arg arg3 = desc->args[3]; ops_arg arg4 = desc->args[4]; ops_arg arg5 = desc->args[5]; ops_arg arg6 = desc->args[6]; ops_arg arg7 = desc->args[7]; // Timing double t1, t2, c1, c2; ops_arg args[8] = {arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7}; #ifdef CHECKPOINTING if (!ops_checkpointing_before(args, 8, range, 9)) return; #endif if (OPS_diags > 1) { OPS_kernels[9].count++; ops_timers_core(&c2, &t2); } // compute locally allocated range for the sub-block int start[2]; int end[2]; for (int n = 0; n < 2; n++) { start[n] = range[2 * n]; end[n] = range[2 * n + 1]; } #ifdef OPS_DEBUG ops_register_args(args, "update_halo_kernel1_b2"); #endif // set up initial pointers and exchange halos if necessary int base0 = args[0].dat->base_offset; double *__restrict__ density0 = (double *)(args[0].data + base0); int base1 = args[1].dat->base_offset; double *__restrict__ density1 = (double *)(args[1].data + base1); int base2 = args[2].dat->base_offset; double *__restrict__ energy0 = (double *)(args[2].data + base2); int base3 = args[3].dat->base_offset; double *__restrict__ energy1 = (double *)(args[3].data + base3); int base4 = args[4].dat->base_offset; double *__restrict__ pressure = (double *)(args[4].data + base4); int base5 = args[5].dat->base_offset; double *__restrict__ viscosity = (double *)(args[5].data + base5); int base6 = args[6].dat->base_offset; double *__restrict__ soundspeed = (double *)(args[6].data + base6); const int *__restrict__ fields = (int *)args[7].data; // initialize global variable with the dimension of dats int xdim0_update_halo_kernel1_b2 = args[0].dat->size[0]; int xdim1_update_halo_kernel1_b2 = args[1].dat->size[0]; int xdim2_update_halo_kernel1_b2 = args[2].dat->size[0]; int xdim3_update_halo_kernel1_b2 = args[3].dat->size[0]; int xdim4_update_halo_kernel1_b2 = args[4].dat->size[0]; int xdim5_update_halo_kernel1_b2 = args[5].dat->size[0]; int xdim6_update_halo_kernel1_b2 = args[6].dat->size[0]; if (OPS_diags > 1) { ops_timers_core(&c1, &t1); OPS_kernels[9].mpi_time += t1 - t2; } #pragma omp parallel for for (int n_y = start[1]; n_y < end[1]; n_y++) { #ifdef intel #pragma loop_count(10000) #pragma omp simd aligned(density0, density1, energy0, energy1, pressure, \ viscosity, soundspeed) #else #pragma simd #endif for (int n_x = start[0]; n_x < end[0]; n_x++) { if (fields[FIELD_DENSITY0] == 1) density0[OPS_ACC0(0, 0)] = density0[OPS_ACC0(0, 3)]; if (fields[FIELD_DENSITY1] == 1) density1[OPS_ACC1(0, 0)] = density1[OPS_ACC1(0, 3)]; if (fields[FIELD_ENERGY0] == 1) energy0[OPS_ACC2(0, 0)] = energy0[OPS_ACC2(0, 3)]; if (fields[FIELD_ENERGY1] == 1) energy1[OPS_ACC3(0, 0)] = energy1[OPS_ACC3(0, 3)]; if (fields[FIELD_PRESSURE] == 1) pressure[OPS_ACC4(0, 0)] = pressure[OPS_ACC4(0, 3)]; if (fields[FIELD_VISCOSITY] == 1) viscosity[OPS_ACC5(0, 0)] = viscosity[OPS_ACC5(0, 3)]; if (fields[FIELD_SOUNDSPEED] == 1) soundspeed[OPS_ACC6(0, 0)] = soundspeed[OPS_ACC6(0, 3)]; } } if (OPS_diags > 1) { ops_timers_core(&c2, &t2); OPS_kernels[9].time += t2 - t1; } if (OPS_diags > 1) { // Update kernel record ops_timers_core(&c1, &t1); OPS_kernels[9].mpi_time += t1 - t2; OPS_kernels[9].transfer += ops_compute_transfer(dim, start, end, &arg0); OPS_kernels[9].transfer += ops_compute_transfer(dim, start, end, &arg1); OPS_kernels[9].transfer += ops_compute_transfer(dim, start, end, &arg2); OPS_kernels[9].transfer += ops_compute_transfer(dim, start, end, &arg3); OPS_kernels[9].transfer += ops_compute_transfer(dim, start, end, &arg4); OPS_kernels[9].transfer += ops_compute_transfer(dim, start, end, &arg5); OPS_kernels[9].transfer += ops_compute_transfer(dim, start, end, &arg6); } }
// user function inline void generate_chunk_kernel(const double *vertexx, const double *vertexy, double *energy0, double *density0, double *xvel0, double *yvel0, const double *cellx, const double *celly) { double radius, x_cent, y_cent; int is_in = 0; int is_in2 = 0; energy0[OPS_ACC2(0, 0)] = states[0].energy; density0[OPS_ACC3(0, 0)] = states[0].density; xvel0[OPS_ACC4(0, 0)] = states[0].xvel; yvel0[OPS_ACC5(0, 0)] = states[0].yvel; for (int i = 1; i < number_of_states; i++) { x_cent = states[i].xmin; y_cent = states[i].ymin; is_in = 0; is_in2 = 0; if (states[i].geometry == g_rect) { for (int i1 = -1; i1 <= 0; i1++) { for (int j1 = -1; j1 <= 0; j1++) { if (vertexx[OPS_ACC0(1 + i1, 0)] >= states[i].xmin && vertexx[OPS_ACC0(0 + i1, 0)] < states[i].xmax) { if (vertexy[OPS_ACC1(0, 1 + j1)] >= states[i].ymin && vertexy[OPS_ACC1(0, 0 + j1)] < states[i].ymax) { is_in = 1; } } } } if (vertexx[OPS_ACC0(1, 0)] >= states[i].xmin && vertexx[OPS_ACC0(0, 0)] < states[i].xmax) { if (vertexy[OPS_ACC1(0, 1)] >= states[i].ymin && vertexy[OPS_ACC1(0, 0)] < states[i].ymax) { is_in2 = 1; } } if (is_in2) { energy0[OPS_ACC2(0, 0)] = states[i].energy; density0[OPS_ACC3(0, 0)] = states[i].density; } if (is_in) { xvel0[OPS_ACC4(0, 0)] = states[i].xvel; yvel0[OPS_ACC5(0, 0)] = states[i].yvel; } } else if (states[i].geometry == g_circ) { for (int i1 = -1; i1 <= 0; i1++) { for (int j1 = -1; j1 <= 0; j1++) { radius = sqrt((cellx[OPS_ACC6(i1, 0)] - x_cent) * (cellx[OPS_ACC6(i1, 0)] - x_cent) + (celly[OPS_ACC7(0, j1)] - y_cent) * (celly[OPS_ACC7(0, j1)] - y_cent)); if (radius <= states[i].radius) { is_in = 1; } } } if (radius <= states[i].radius) is_in2 = 1; if (is_in2) { energy0[OPS_ACC2(0, 0)] = states[i].energy; density0[OPS_ACC3(0, 0)] = states[i].density; } if (is_in) { xvel0[OPS_ACC4(0, 0)] = states[i].xvel; yvel0[OPS_ACC5(0, 0)] = states[i].yvel; } } else if (states[i].geometry == g_point) { for (int i1 = -1; i1 <= 0; i1++) { for (int j1 = -1; j1 <= 0; j1++) { if (vertexx[OPS_ACC0(i1, 0)] == x_cent && vertexy[OPS_ACC1(0, j1)] == y_cent) { is_in = 1; } } } if (vertexx[OPS_ACC0(0, 0)] == x_cent && vertexy[OPS_ACC1(0, 0)] == y_cent) is_in2 = 1; if (is_in2) { energy0[OPS_ACC2(0, 0)] = states[i].energy; density0[OPS_ACC3(0, 0)] = states[i].density; } if (is_in) { xvel0[OPS_ACC4(0, 0)] = states[i].xvel; yvel0[OPS_ACC5(0, 0)] = states[i].yvel; } } } }
// host stub function void ops_par_loop_initialise_chunk_kernel_volume_execute( ops_kernel_descriptor *desc) { ops_block block = desc->block; int dim = desc->dim; int *range = desc->range; ops_arg arg0 = desc->args[0]; ops_arg arg1 = desc->args[1]; ops_arg arg2 = desc->args[2]; ops_arg arg3 = desc->args[3]; ops_arg arg4 = desc->args[4]; ops_arg arg5 = desc->args[5]; ops_arg arg6 = desc->args[6]; // Timing double t1, t2, c1, c2; ops_arg args[7] = {arg0, arg1, arg2, arg3, arg4, arg5, arg6}; #ifdef CHECKPOINTING if (!ops_checkpointing_before(args, 7, range, 9)) return; #endif if (OPS_diags > 1) { OPS_kernels[9].count++; ops_timers_core(&c2, &t2); } // compute locally allocated range for the sub-block int start[3]; int end[3]; for (int n = 0; n < 3; n++) { start[n] = range[2 * n]; end[n] = range[2 * n + 1]; } #ifdef OPS_DEBUG ops_register_args(args, "initialise_chunk_kernel_volume"); #endif // set up initial pointers and exchange halos if necessary int base0 = args[0].dat->base_offset; double *__restrict__ volume = (double *)(args[0].data + base0); int base1 = args[1].dat->base_offset; const double *__restrict__ celldy = (double *)(args[1].data + base1); int base2 = args[2].dat->base_offset; double *__restrict__ xarea = (double *)(args[2].data + base2); int base3 = args[3].dat->base_offset; const double *__restrict__ celldx = (double *)(args[3].data + base3); int base4 = args[4].dat->base_offset; double *__restrict__ yarea = (double *)(args[4].data + base4); int base5 = args[5].dat->base_offset; const double *__restrict__ celldz = (double *)(args[5].data + base5); int base6 = args[6].dat->base_offset; double *__restrict__ zarea = (double *)(args[6].data + base6); // initialize global variable with the dimension of dats int xdim0_initialise_chunk_kernel_volume = args[0].dat->size[0]; int ydim0_initialise_chunk_kernel_volume = args[0].dat->size[1]; int xdim1_initialise_chunk_kernel_volume = args[1].dat->size[0]; int ydim1_initialise_chunk_kernel_volume = args[1].dat->size[1]; int xdim2_initialise_chunk_kernel_volume = args[2].dat->size[0]; int ydim2_initialise_chunk_kernel_volume = args[2].dat->size[1]; int xdim3_initialise_chunk_kernel_volume = args[3].dat->size[0]; int ydim3_initialise_chunk_kernel_volume = args[3].dat->size[1]; int xdim4_initialise_chunk_kernel_volume = args[4].dat->size[0]; int ydim4_initialise_chunk_kernel_volume = args[4].dat->size[1]; int xdim5_initialise_chunk_kernel_volume = args[5].dat->size[0]; int ydim5_initialise_chunk_kernel_volume = args[5].dat->size[1]; int xdim6_initialise_chunk_kernel_volume = args[6].dat->size[0]; int ydim6_initialise_chunk_kernel_volume = args[6].dat->size[1]; if (OPS_diags > 1) { ops_timers_core(&c1, &t1); OPS_kernels[9].mpi_time += t1 - t2; } #pragma omp parallel for collapse(2) for (int n_z = start[2]; n_z < end[2]; n_z++) { for (int n_y = start[1]; n_y < end[1]; n_y++) { #ifdef intel #pragma loop_count(10000) #pragma omp simd aligned(volume, celldy, xarea, celldx, yarea, celldz, zarea) #else #pragma simd #endif for (int n_x = start[0]; n_x < end[0]; n_x++) { double d_x, d_y, d_z; d_x = (grid.xmax - grid.xmin) / (double)grid.x_cells; d_y = (grid.ymax - grid.ymin) / (double)grid.y_cells; d_z = (grid.zmax - grid.zmin) / (double)grid.z_cells; volume[OPS_ACC0(0, 0, 0)] = d_x * d_y * d_z; xarea[OPS_ACC2(0, 0, 0)] = celldy[OPS_ACC1(0, 0, 0)] * celldz[OPS_ACC5(0, 0, 0)]; yarea[OPS_ACC4(0, 0, 0)] = celldx[OPS_ACC3(0, 0, 0)] * celldz[OPS_ACC5(0, 0, 0)]; zarea[OPS_ACC6(0, 0, 0)] = celldx[OPS_ACC3(0, 0, 0)] * celldy[OPS_ACC1(0, 0, 0)]; } } } if (OPS_diags > 1) { ops_timers_core(&c2, &t2); OPS_kernels[9].time += t2 - t1; } if (OPS_diags > 1) { // Update kernel record ops_timers_core(&c1, &t1); OPS_kernels[9].mpi_time += t1 - t2; OPS_kernels[9].transfer += ops_compute_transfer(dim, start, end, &arg0); OPS_kernels[9].transfer += ops_compute_transfer(dim, start, end, &arg1); OPS_kernels[9].transfer += ops_compute_transfer(dim, start, end, &arg2); OPS_kernels[9].transfer += ops_compute_transfer(dim, start, end, &arg3); OPS_kernels[9].transfer += ops_compute_transfer(dim, start, end, &arg4); OPS_kernels[9].transfer += ops_compute_transfer(dim, start, end, &arg5); OPS_kernels[9].transfer += ops_compute_transfer(dim, start, end, &arg6); } }
// host stub function void ops_par_loop_viscosity_kernel_execute(ops_kernel_descriptor *desc) { ops_block block = desc->block; int dim = desc->dim; int *range = desc->range; ops_arg arg0 = desc->args[0]; ops_arg arg1 = desc->args[1]; ops_arg arg2 = desc->args[2]; ops_arg arg3 = desc->args[3]; ops_arg arg4 = desc->args[4]; ops_arg arg5 = desc->args[5]; ops_arg arg6 = desc->args[6]; // Timing double t1, t2, c1, c2; ops_arg args[7] = {arg0, arg1, arg2, arg3, arg4, arg5, arg6}; #ifdef CHECKPOINTING if (!ops_checkpointing_before(args, 7, range, 50)) return; #endif if (OPS_diags > 1) { OPS_kernels[50].count++; ops_timers_core(&c2, &t2); } // compute locally allocated range for the sub-block int start[2]; int end[2]; for (int n = 0; n < 2; n++) { start[n] = range[2 * n]; end[n] = range[2 * n + 1]; } #ifdef OPS_DEBUG ops_register_args(args, "viscosity_kernel"); #endif // set up initial pointers and exchange halos if necessary int base0 = args[0].dat->base_offset; const double *__restrict__ xvel0 = (double *)(args[0].data + base0); int base1 = args[1].dat->base_offset; const double *__restrict__ yvel0 = (double *)(args[1].data + base1); int base2 = args[2].dat->base_offset; const double *__restrict__ celldx = (double *)(args[2].data + base2); int base3 = args[3].dat->base_offset; const double *__restrict__ celldy = (double *)(args[3].data + base3); int base4 = args[4].dat->base_offset; const double *__restrict__ pressure = (double *)(args[4].data + base4); int base5 = args[5].dat->base_offset; const double *__restrict__ density0 = (double *)(args[5].data + base5); int base6 = args[6].dat->base_offset; double *__restrict__ viscosity = (double *)(args[6].data + base6); // initialize global variable with the dimension of dats int xdim0_viscosity_kernel = args[0].dat->size[0]; int xdim1_viscosity_kernel = args[1].dat->size[0]; int xdim2_viscosity_kernel = args[2].dat->size[0]; int xdim3_viscosity_kernel = args[3].dat->size[0]; int xdim4_viscosity_kernel = args[4].dat->size[0]; int xdim5_viscosity_kernel = args[5].dat->size[0]; int xdim6_viscosity_kernel = args[6].dat->size[0]; if (OPS_diags > 1) { ops_timers_core(&c1, &t1); OPS_kernels[50].mpi_time += t1 - t2; } #pragma omp parallel for for (int n_y = start[1]; n_y < end[1]; n_y++) { #ifdef intel #pragma loop_count(10000) #pragma omp simd aligned(xvel0, yvel0, celldx, celldy, pressure, density0, \ viscosity) #else #pragma simd #endif for (int n_x = start[0]; n_x < end[0]; n_x++) { double ugrad, vgrad, grad2, pgradx, pgrady, pgradx2, pgrady2, grad, ygrad, xgrad, div, strain2, limiter, pgrad; ugrad = (xvel0[OPS_ACC0(1, 0)] + xvel0[OPS_ACC0(1, 1)]) - (xvel0[OPS_ACC0(0, 0)] + xvel0[OPS_ACC0(0, 1)]); vgrad = (yvel0[OPS_ACC1(0, 1)] + yvel0[OPS_ACC1(1, 1)]) - (yvel0[OPS_ACC1(0, 0)] + yvel0[OPS_ACC1(1, 0)]); div = (celldx[OPS_ACC2(0, 0)]) * (ugrad) + (celldy[OPS_ACC3(0, 0)]) * (vgrad); strain2 = 0.5 * (xvel0[OPS_ACC0(0, 1)] + xvel0[OPS_ACC0(1, 1)] - xvel0[OPS_ACC0(0, 0)] - xvel0[OPS_ACC0(1, 0)]) / (celldy[OPS_ACC3(0, 0)]) + 0.5 * (yvel0[OPS_ACC1(1, 0)] + yvel0[OPS_ACC1(1, 1)] - yvel0[OPS_ACC1(0, 0)] - yvel0[OPS_ACC1(0, 1)]) / (celldx[OPS_ACC2(0, 0)]); pgradx = (pressure[OPS_ACC4(1, 0)] - pressure[OPS_ACC4(-1, 0)]) / (celldx[OPS_ACC2(0, 0)] + celldx[OPS_ACC2(1, 0)]); pgrady = (pressure[OPS_ACC4(0, 1)] - pressure[OPS_ACC4(0, -1)]) / (celldy[OPS_ACC3(0, 0)] + celldy[OPS_ACC3(0, 1)]); pgradx2 = pgradx * pgradx; pgrady2 = pgrady * pgrady; limiter = ((0.5 * (ugrad) / celldx[OPS_ACC2(0, 0)]) * pgradx2 + (0.5 * (vgrad) / celldy[OPS_ACC3(0, 0)]) * pgrady2 + strain2 * pgradx * pgrady) / MAX(pgradx2 + pgrady2, 1.0e-16); if ((limiter > 0.0) || (div >= 0.0)) { viscosity[OPS_ACC6(0, 0)] = 0.0; } else { pgradx = SIGN(MAX(1.0e-16, fabs(pgradx)), pgradx); pgrady = SIGN(MAX(1.0e-16, fabs(pgrady)), pgrady); pgrad = sqrt(pgradx * pgradx + pgrady * pgrady); xgrad = fabs(celldx[OPS_ACC2(0, 0)] * pgrad / pgradx); ygrad = fabs(celldy[OPS_ACC3(0, 0)] * pgrad / pgrady); grad = MIN(xgrad, ygrad); grad2 = grad * grad; viscosity[OPS_ACC6(0, 0)] = 2.0 * (density0[OPS_ACC5(0, 0)]) * grad2 * limiter * limiter; } } } if (OPS_diags > 1) { ops_timers_core(&c2, &t2); OPS_kernels[50].time += t2 - t1; } if (OPS_diags > 1) { // Update kernel record ops_timers_core(&c1, &t1); OPS_kernels[50].mpi_time += t1 - t2; OPS_kernels[50].transfer += ops_compute_transfer(dim, start, end, &arg0); OPS_kernels[50].transfer += ops_compute_transfer(dim, start, end, &arg1); OPS_kernels[50].transfer += ops_compute_transfer(dim, start, end, &arg2); OPS_kernels[50].transfer += ops_compute_transfer(dim, start, end, &arg3); OPS_kernels[50].transfer += ops_compute_transfer(dim, start, end, &arg4); OPS_kernels[50].transfer += ops_compute_transfer(dim, start, end, &arg5); OPS_kernels[50].transfer += ops_compute_transfer(dim, start, end, &arg6); } }
//user function inline void generate_chunk_kernel( const double *vertexx, const double *vertexy, const double *vertexz, double *energy0, double *density0, double *xvel0, double *yvel0, double *zvel0, const double *cellx, const double *celly, const double *cellz) { double radius, x_cent, y_cent, z_cent; energy0[OPS_ACC3(0,0,0)]= states[0].energy; density0[OPS_ACC4(0,0,0)]= states[0].density; xvel0[OPS_ACC5(0,0,0)]=states[0].xvel; yvel0[OPS_ACC6(0,0,0)]=states[0].yvel; zvel0[OPS_ACC7(0,0,0)]=states[0].zvel; for(int i = 1; i<number_of_states; i++) { x_cent=states[i].xmin; y_cent=states[i].ymin; z_cent=states[i].zmin; if (states[i].geometry == g_cube) { if(vertexx[OPS_ACC0(1,0,0)] >= states[i].xmin && vertexx[OPS_ACC0(0,0,0)] < states[i].xmax) { if(vertexy[OPS_ACC1(0,1,0)] >= states[i].ymin && vertexy[OPS_ACC1(0,0,0)] < states[i].ymax) { if(vertexz[OPS_ACC2(0,0,1)] >= states[i].zmin && vertexz[OPS_ACC2(0,0,0)] < states[i].zmax) { energy0[OPS_ACC3(0,0,0)] = states[i].energy; density0[OPS_ACC4(0,0,0)] = states[i].density; for (int ix=0;ix<2;ix++){ for (int iy=0;iy<2;iy++){ for (int iz=0;iz<2;iz++){ xvel0[OPS_ACC5(ix,iy,iz)] = states[i].xvel; yvel0[OPS_ACC6(ix,iy,iz)] = states[i].yvel; zvel0[OPS_ACC7(ix,iy,iz)] = states[i].zvel; } } } } } } } else if(states[i].geometry == g_sphe) { radius = sqrt ((cellx[OPS_ACC8(0,0,0)] - x_cent) * (cellx[OPS_ACC8(0,0,0)] - x_cent) + (celly[OPS_ACC9(0,0,0)] - y_cent) * (celly[OPS_ACC9(0,0,0)] - y_cent) + (cellz[OPS_ACC10(0,0,0)] - z_cent) * (cellz[OPS_ACC10(0,0,0)] - z_cent)); if(radius <= states[i].radius) { energy0[OPS_ACC3(0,0,0)] = states[i].energy; density0[OPS_ACC4(0,0,0)] = states[i].density; for (int ix=0;ix<2;ix++){ for (int iy=0;iy<2;iy++){ for (int iz=0;iz<2;iz++){ xvel0[OPS_ACC5(ix,iy,iz)] = states[i].xvel; yvel0[OPS_ACC6(ix,iy,iz)] = states[i].yvel; zvel0[OPS_ACC7(ix,iy,iz)] = states[i].zvel; } } } } } else if(states[i].geometry == g_point) { if(vertexx[OPS_ACC0(0,0,0)] == x_cent && vertexy[OPS_ACC1(0,0,0)] == y_cent && vertexz[OPS_ACC2(0,0,0)] == z_cent) { energy0[OPS_ACC3(0,0,0)] = states[i].energy; density0[OPS_ACC4(0,0,0)] = states[i].density; for (int ix=0;ix<2;ix++){ for (int iy=0;iy<2;iy++){ for (int iz=0;iz<2;iz++){ xvel0[OPS_ACC5(ix,iy,iz)] = states[i].xvel; yvel0[OPS_ACC6(ix,iy,iz)] = states[i].yvel; zvel0[OPS_ACC7(ix,iy,iz)] = states[i].zvel; } } } } } } }
(zvel0[OPS_ACC13(0, 0, 0)] + zvel0[OPS_ACC13(1, 0, 0)] + zvel0[OPS_ACC13(0, 1, 0)] + zvel0[OPS_ACC13(1, 1, 0)] + zvel0[OPS_ACC13(0, 0, 0)] + zvel0[OPS_ACC13(1, 0, 0)] + zvel0[OPS_ACC13(0, 1, 0)] + zvel0[OPS_ACC13(1, 1, 0)])) * 0.125 * dt * 0.5; front_flux = (zarea[OPS_ACC12(0, 0, 1)] * (zvel0[OPS_ACC13(0, 0, 1)] + zvel0[OPS_ACC13(1, 0, 1)] + zvel0[OPS_ACC13(0, 1, 1)] + zvel0[OPS_ACC13(1, 1, 1)] + zvel0[OPS_ACC13(0, 0, 1)] + zvel0[OPS_ACC13(1, 0, 1)] + zvel0[OPS_ACC13(0, 1, 1)] + zvel0[OPS_ACC13(1, 1, 1)])) * 0.125 * dt * 0.5; total_flux = right_flux - left_flux + top_flux - bottom_flux + front_flux - back_flux; volume_change[OPS_ACC4(0, 0, 0)] = (volume[OPS_ACC5(0, 0, 0)]) / (volume[OPS_ACC5(0, 0, 0)] + total_flux); recip_volume = 1.0 / volume[OPS_ACC5(0, 0, 0)]; energy_change = (pressure[OPS_ACC6(0, 0, 0)] / density0[OPS_ACC7(0, 0, 0)] + viscosity[OPS_ACC9(0, 0, 0)] / density0[OPS_ACC7(0, 0, 0)]) * total_flux * recip_volume; energy1[OPS_ACC11(0, 0, 0)] = energy0[OPS_ACC10(0, 0, 0)] - energy_change; density1[OPS_ACC8(0, 0, 0)] = density0[OPS_ACC7(0, 0, 0)] * volume_change[OPS_ACC4(0, 0, 0)]; } } } }
// user function void advec_cell_kernel4_ydir_c_wrapper( double *restrict density1, double *restrict energy1, const double *restrict mass_flux_y, const double *restrict vol_flux_y, const double *restrict pre_vol, const double *restrict post_vol, double *restrict pre_mass, double *restrict post_mass, double *restrict advec_vol, double *restrict post_ener, const double *restrict ener_flux, int x_size, int y_size) { #pragma omp parallel for for (int n_y = 0; n_y < y_size; n_y++) { for (int n_x = 0; n_x < x_size; n_x++) { pre_mass[OPS_ACC6(0, 0)] = density1[OPS_ACC0(0, 0)] * pre_vol[OPS_ACC4(0, 0)]; post_mass[OPS_ACC7(0, 0)] = pre_mass[OPS_ACC6(0, 0)] + mass_flux_y[OPS_ACC2(0, 0)] - mass_flux_y[OPS_ACC2(0, 1)]; post_ener[OPS_ACC9(0, 0)] = (energy1[OPS_ACC1(0, 0)] * pre_mass[OPS_ACC6(0, 0)] + ener_flux[OPS_ACC10(0, 0)] - ener_flux[OPS_ACC10(0, 1)]) / post_mass[OPS_ACC7(0, 0)]; advec_vol[OPS_ACC8(0, 0)] = pre_vol[OPS_ACC4(0, 0)] + vol_flux_y[OPS_ACC3(0, 0)] - vol_flux_y[OPS_ACC3(0, 1)]; density1[OPS_ACC0(0, 0)] = post_mass[OPS_ACC7(0, 0)] / advec_vol[OPS_ACC8(0, 0)]; energy1[OPS_ACC1(0, 0)] = post_ener[OPS_ACC9(0, 0)]; } }