Пример #1
0
void reset_field()
{
  error_condition = 0; // Not used yet due to issue with OpenA reduction

  //initialize sizes using global values
  int x_min = field.x_min;
  int x_max = field.x_max;
  int y_min = field.y_min;
  int y_max = field.y_max;
  int z_min = field.z_min;
  int z_max = field.z_max;
  
  int rangexyz_inner[] = {x_min,x_max,y_min,y_max,z_min,z_max}; // inner range without border

  ops_par_loop(reset_field_kernel1, "reset_field_kernel1", clover_grid, 3, rangexyz_inner,
    ops_arg_dat(density0, S3D_000, "double", OPS_WRITE),
    ops_arg_dat(density1, S3D_000, "double", OPS_READ),
    ops_arg_dat(energy0, S3D_000, "double", OPS_WRITE),
    ops_arg_dat(energy1, S3D_000, "double", OPS_READ));

  int rangexyz_inner_plus1xyz[] = {x_min,x_max+1,y_min,y_max+1,z_min,z_max+1}; // inner range without border

  ops_par_loop(reset_field_kernel2, "reset_field_kernel2", clover_grid, 3, rangexyz_inner_plus1xyz,
    ops_arg_dat(xvel0, S3D_000, "double", OPS_WRITE),
    ops_arg_dat(xvel1, S3D_000, "double", OPS_READ),
    ops_arg_dat(yvel0, S3D_000, "double", OPS_WRITE),
    ops_arg_dat(yvel1, S3D_000, "double", OPS_READ),
    ops_arg_dat(zvel0, S3D_000, "double", OPS_WRITE),
    ops_arg_dat(zvel1, S3D_000, "double", OPS_READ));
}
Пример #2
0
void tea_leaf_cheby_init(
  ops_dat u,
	ops_dat u0,
  ops_dat p,
	ops_dat r,
	ops_dat Mi,
  ops_dat w,
	ops_dat z,
	ops_dat Kx,
	ops_dat Ky,
	ops_dat cp,
	ops_dat bfp,
	double rx, double ry,
	double theta, int preconditioner_type)
{
  //initialize sizes using global values
  int x_min = field.x_min;
  int x_max = field.x_max;
  int y_min = field.y_min;
  int y_max = field.y_max;

  int rangexy[] = {x_min,x_max,y_min,y_max};

  ops_par_loop(tea_leaf_cheby_init_kernel, "tea_leaf_cheby_init_kernel", tea_grid, 2, rangexy,
      ops_arg_dat(w, 1, S2D_00, "double", OPS_WRITE),
      ops_arg_dat(r, 1, S2D_00, "double", OPS_WRITE),
      ops_arg_dat(Kx, 1, S2D_00_P10, "double", OPS_READ),
      ops_arg_dat(Ky, 1, S2D_00_0P1, "double", OPS_READ),
      ops_arg_dat(u, 1, S2D_00_0M1_M10_P10_0P1, "double", OPS_READ),
      ops_arg_dat(u0, 1, S2D_00, "double", OPS_READ),
      ops_arg_gbl(&rx, 1, "double", OPS_READ),
      ops_arg_gbl(&ry, 1, "double", OPS_READ));

  if (preconditioner_type != TL_PREC_NONE) {

    if (preconditioner_type == TL_PREC_JAC_BLOCK)
      tea_block_solve(r, z, cp, bfp, Kx, Ky, rx, ry);
    else if (preconditioner_type == TL_PREC_JAC_DIAG)
      tea_diag_solve(r, z, Mi, Kx, Ky, rx, ry);

    ops_par_loop(tea_leaf_recip3_kernel, "tea_leaf_recip3_kernel", tea_grid, 2, rangexy,
      ops_arg_dat(p, 1, S2D_00, "double", OPS_WRITE),
      ops_arg_dat(z, 1, S2D_00, "double", OPS_READ),
      ops_arg_gbl(&theta, 1, "double", OPS_READ));
  } else {
    ops_par_loop(tea_leaf_recip3_kernel, "tea_leaf_recip3_kernel", tea_grid, 2, rangexy,
      ops_arg_dat(p, 1, S2D_00, "double", OPS_WRITE),
      ops_arg_dat(r, 1, S2D_00, "double", OPS_READ),
      ops_arg_gbl(&theta, 1, "double", OPS_READ));
  }

  double one = 1.0;
  ops_par_loop(tea_leaf_xpy_kernel, "tea_leaf_xpy_kernel", tea_grid, 2, rangexy,
      ops_arg_dat(u, 1, S2D_00, "double", OPS_INC),
      ops_arg_dat(p, 1, S2D_00, "double", OPS_READ));

}
Пример #3
0
void tea_leaf_cheby_iterate(
  ops_dat u,
  ops_dat u0,
  ops_dat p,
  ops_dat r,
  ops_dat Mi,
  ops_dat w,
  ops_dat z,
  ops_dat Kx,
  ops_dat Ky,
  ops_dat cp,
  ops_dat bfp,
  double *ch_alphas,
  double *ch_betas,
  double rx, double ry, int step, int preconditioner_type)
{
  int x_min = field.x_min;
  int x_max = field.x_max;
  int y_min = field.y_min;
  int y_max = field.y_max;
  int rangexy[] = {x_min,x_max,y_min,y_max};

  ops_par_loop(tea_leaf_cheby_init_kernel, "tea_leaf_cheby_init_kernel", tea_grid, 2, rangexy,
      ops_arg_dat(w, 1, S2D_00, "double", OPS_WRITE),
      ops_arg_dat(r, 1, S2D_00, "double", OPS_WRITE),
      ops_arg_dat(Kx, 1, S2D_00_P10, "double", OPS_READ),
      ops_arg_dat(Ky, 1, S2D_00_0P1, "double", OPS_READ),
      ops_arg_dat(u, 1, S2D_00_0M1_M10_P10_0P1, "double", OPS_READ),
      ops_arg_dat(u0, 1, S2D_00, "double", OPS_READ),
      ops_arg_gbl(&rx, 1, "double", OPS_READ),
      ops_arg_gbl(&ry, 1, "double", OPS_READ));

   if (preconditioner_type != TL_PREC_NONE) {

    if (preconditioner_type == TL_PREC_JAC_BLOCK)
      tea_block_solve(r, z, cp, bfp, Kx, Ky, rx, ry);
    else if (preconditioner_type == TL_PREC_JAC_DIAG)
      tea_diag_solve(r, z, Mi, Kx, Ky, rx, ry);

    ops_par_loop(tea_leaf_axpby_kernel, "tea_leaf_axpby_kernel", tea_grid, 2, rangexy,
      ops_arg_dat(p, 1, S2D_00, "double", OPS_RW),
      ops_arg_dat(z, 1, S2D_00, "double", OPS_READ),
      ops_arg_gbl(&ch_alphas[step], 1, "double", OPS_READ),
      ops_arg_gbl(&ch_betas[step], 1, "double", OPS_READ));

  } else {
    ops_par_loop(tea_leaf_axpby_kernel, "tea_leaf_axpby_kernel", tea_grid, 2, rangexy,
      ops_arg_dat(p, 1, S2D_00, "double", OPS_RW),
      ops_arg_dat(r, 1, S2D_00, "double", OPS_READ),
      ops_arg_gbl(&ch_alphas[step], 1, "double", OPS_READ),
      ops_arg_gbl(&ch_betas[step], 1, "double", OPS_READ));
  }

  ops_par_loop(tea_leaf_xpy_kernel, "tea_leaf_xpy_kernel", tea_grid, 2, rangexy,
    ops_arg_dat(u, 1, S2D_00, "double", OPS_INC),
    ops_arg_dat(p, 1, S2D_00, "double", OPS_READ));
}
Пример #4
0
void accelerate()
{
  error_condition = 0; // Not used yet due to issue with OpenA reduction

  //initialize sizes using global values
  int x_min = field.x_min;
  int x_max = field.x_max;
  int y_min = field.y_min;
  int y_max = field.y_max;


  int rangexy_inner_plus1[] = {x_min,x_max+1,y_min,y_max+1}; // inner range plus 1

  ops_par_loop(accelerate_kernel, "accelerate_kernel", clover_grid, 2, rangexy_inner_plus1,
    ops_arg_dat(density0, 1, S2D_00_M10_0M1_M1M1, "double", OPS_READ),
    ops_arg_dat(volume, 1, S2D_00_M10_0M1_M1M1, "double", OPS_READ),
    ops_arg_dat(work_array1, 1, S2D_00, "double", OPS_WRITE),
    ops_arg_dat(xvel0, 1, S2D_00, "double", OPS_READ),
    ops_arg_dat(xvel1, 1, S2D_00, "double", OPS_INC),
    ops_arg_dat(xarea, 1, S2D_00_0M1, "double", OPS_READ),
    ops_arg_dat(pressure, 1, S2D_00_M10_0M1_M1M1, "double", OPS_READ),
    ops_arg_dat(yvel0, 1, S2D_00, "double", OPS_READ),
    ops_arg_dat(yvel1, 1, S2D_00, "double", OPS_INC),
    ops_arg_dat(yarea, 1, S2D_00_M10, "double", OPS_READ),
    ops_arg_dat(viscosity, 1, S2D_00_M10_0M1_M1M1, "double", OPS_READ));
}
Пример #5
0
void advec_mom(int which_vel, int sweep_number, int dir)
{
  //initialize sizes using global values
  int x_cells = grid.x_cells;
  int y_cells = grid.y_cells;
  int x_min = field.x_min;
  int x_max = field.x_max;
  int y_min = field.y_min;
  int y_max = field.y_max;
  int z_min = field.z_min;
  int z_max = field.z_max;
  
  int rangexyz[] = {x_min-2,x_max+2,y_min-2,y_max+2,z_min-2,z_max+2}; // full range over grid

  int mom_sweep;
  ops_dat vel1;

  if( which_vel == 1) {
    vel1 = xvel1;
  }
  else if( which_vel == 2) {
    vel1 = yvel1;
  }
  else if( which_vel == 3) {
    vel1 = zvel1;
  }

  if(sweep_number==1 && dir == 1) {
      ops_par_loop(advec_mom_kernel_x1, "advec_mom_kernel_x1", clover_grid, 3, rangexyz,
        ops_arg_dat(work_array6, S3D_000, "double", OPS_WRITE),
        ops_arg_dat(work_array7, S3D_000, "double", OPS_WRITE),
        ops_arg_dat(volume, S3D_000, "double", OPS_READ),
        ops_arg_dat(vol_flux_x, S3D_000_P100, "double", OPS_READ),
        ops_arg_dat(vol_flux_y, S3D_000_0P10, "double", OPS_READ),
        ops_arg_dat(vol_flux_z, S3D_000_00P1, "double", OPS_READ));
  }
  else if(sweep_number==1 && dir == 3) {
    ops_par_loop(advec_mom_kernel_z1, "advec_mom_kernel_z1", clover_grid, 3, rangexyz,
        ops_arg_dat(work_array6, S3D_000, "double", OPS_WRITE),
        ops_arg_dat(work_array7, S3D_000, "double", OPS_WRITE),
        ops_arg_dat(volume, S3D_000, "double", OPS_READ),
        ops_arg_dat(vol_flux_x, S3D_000_P100, "double", OPS_READ),
        ops_arg_dat(vol_flux_y, S3D_000_0P10, "double", OPS_READ),
        ops_arg_dat(vol_flux_z, S3D_000_00P1, "double", OPS_READ));
  }
  else if (sweep_number==2 && advect_x) {
    ops_par_loop(advec_mom_kernel_x2, "advec_mom_kernel_x2", clover_grid, 3, rangexyz,
        ops_arg_dat(work_array6, S3D_000, "double", OPS_WRITE),
        ops_arg_dat(work_array7, S3D_000, "double", OPS_WRITE),
        ops_arg_dat(volume, S3D_000, "double", OPS_READ),
        ops_arg_dat(vol_flux_y, S3D_000_0P10, "double", OPS_READ),
        ops_arg_dat(vol_flux_z, S3D_000_00P1, "double", OPS_READ));
  }
  else if (sweep_number==2 && !advect_x) {
    ops_par_loop(advec_mom_kernel_y2, "advec_mom_kernel_y2", clover_grid, 3, rangexyz,
        ops_arg_dat(work_array6, S3D_000, "double", OPS_WRITE),
        ops_arg_dat(work_array7, S3D_000, "double", OPS_WRITE),
        ops_arg_dat(volume, S3D_000, "double", OPS_READ),
        ops_arg_dat(vol_flux_x, S3D_000_P100, "double", OPS_READ),
        ops_arg_dat(vol_flux_y, S3D_000_0P10, "double", OPS_READ));
  }
  else if (sweep_number==3 && dir == 1) {
    ops_par_loop(advec_mom_kernel_x3, "advec_mom_kernel_x3", clover_grid, 3, rangexyz,
        ops_arg_dat(work_array6, S3D_000, "double", OPS_WRITE),
        ops_arg_dat(work_array7, S3D_000, "double", OPS_WRITE),
        ops_arg_dat(volume, S3D_000, "double", OPS_READ),
        ops_arg_dat(vol_flux_x, S3D_000_P100, "double", OPS_READ));
  }
  else if (sweep_number==3 && dir == 3) {
    ops_par_loop(advec_mom_kernel_z3, "advec_mom_kernel_z3", clover_grid, 3, rangexyz,
        ops_arg_dat(work_array6, S3D_000, "double", OPS_WRITE),
        ops_arg_dat(work_array7, S3D_000, "double", OPS_WRITE),
        ops_arg_dat(volume, S3D_000, "double", OPS_READ),
        ops_arg_dat(vol_flux_z, S3D_000_00P1, "double", OPS_READ));
  }

  
  int range_partx_party_1[] = {x_min-1,x_max+2,y_min,y_max+1}; // partial x range partial y range

  int range_fully_party_1[] = {x_min,x_max+1,y_min-2,y_max+2}; // full y range partial x range
  int range_partx_party_2[] = {x_min,x_max+1,y_min-1,y_max+2}; // partial x range partial y range

  if (dir == 1) {
    if (which_vel == 1) {
      //Find staggered mesh mass fluxes, nodal masses and volumes.
      int range_fullx_party_partz_1[] = {x_min-2,x_max+2,y_min,y_max+1,z_min,z_max+1}; // full x range partial y,z range
      ops_par_loop(advec_mom_kernel_mass_flux_x, "advec_mom_kernel_mass_flux_x", clover_grid, 3, range_fullx_party_partz_1,
          ops_arg_dat(work_array1, S3D_000, "double", OPS_WRITE),
          ops_arg_dat(mass_flux_x, S3D_000_fP1M1M1, "double", OPS_READ));

      //Staggered cell mass post and pre advection
      int range_partx_party_partz_1[] = {x_min-1,x_max+2,y_min,y_max+1,z_min,z_max+1}; // partial x,y,z range
      ops_par_loop(advec_mom_kernel_post_pre_advec_x, "advec_mom_kernel_post_pre_advec_x", clover_grid, 3, range_partx_party_partz_1,
          ops_arg_dat(work_array2/*node_mass_post*/, S3D_000, "double", OPS_WRITE),
          ops_arg_dat(work_array7/*post_vol*/, S3D_000_fM1M1M1, "double", OPS_READ),
          ops_arg_dat(density1, S3D_000_fM1M1M1, "double", OPS_READ),
          ops_arg_dat(work_array3/*node_mass_pre*/, S3D_000, "double", OPS_WRITE),
          ops_arg_dat(work_array1/*node_flux*/, S3D_000_M100, "double", OPS_READ));
    }
    
    int range_innder_plus1xyz_minus1x[] = {x_min-1,x_max+1,y_min,y_max+1,z_min,z_max+1}; // partial x range partial y range
    ops_par_loop(advec_mom_kernel1_x_nonvector, "advec_mom_kernel1_x", clover_grid, 3, range_innder_plus1xyz_minus1x,
        ops_arg_dat(work_array1/*node_flux*/, S3D_000, "double", OPS_READ),
        ops_arg_dat(work_array3/*node_mass_pre*/, S3D_000_P100, "double", OPS_READ),
        ops_arg_dat(work_array5/*mom_flux*/, S3D_000, "double", OPS_WRITE),
        ops_arg_dat(celldx, S3D_000_P100_M100_STRID3D_X, "double", OPS_READ),
        ops_arg_dat(vel1, S3D_000_P100_P200_M100, "double", OPS_READ));

    int range_partx_party_partz_2[] = {x_min,x_max+1,y_min,y_max+1,z_min,z_max+1};
    ops_par_loop(advec_mom_kernel2_x, "advec_mom_kernel2_x", clover_grid, 3, range_partx_party_partz_2,
        ops_arg_dat(vel1, S3D_000, "double", OPS_RW),
        ops_arg_dat(work_array2/*node_mass_post*/, S3D_000, "double", OPS_READ),
        ops_arg_dat(work_array3/*node_mass_pre*/, S3D_000, "double", OPS_READ),
        ops_arg_dat(work_array5/*mom_flux*/, S3D_000_M100, "double", OPS_READ)
        );
  }
  else if (dir == 2) {
    if (which_vel == 1) {
      //Find staggered mesh mass fluxes, nodal masses and volumes.
      int range_fully_partx_partz_1[] = {x_min,x_max+1,y_min-2,y_max+2,z_min,z_max+1}; // full x range partial y,z range
      ops_par_loop(advec_mom_kernel_mass_flux_y, "advec_mom_kernel_mass_flux_y", clover_grid, 3, range_fully_partx_partz_1,
          ops_arg_dat(work_array1, S3D_000, "double", OPS_WRITE),
          ops_arg_dat(mass_flux_y, S3D_000_fM1P1M1, "double", OPS_READ));

      //Staggered cell mass post and pre advection
      int range_party_partx_partz_1[] = {x_min,x_max+1,y_min-1,y_max+2,z_min,z_max+1}; // full x range partial y,z range
      ops_par_loop(advec_mom_kernel_post_pre_advec_y, "advec_mom_kernel_post_pre_advec_y", clover_grid, 3, range_party_partx_partz_1,
          ops_arg_dat(work_array2/*node_mass_post*/, S3D_000, "double", OPS_WRITE),
          ops_arg_dat(work_array7, S3D_000_fM1M1M1, "double", OPS_READ),
          ops_arg_dat(density1, S3D_000_fM1M1M1, "double", OPS_READ),
          ops_arg_dat(work_array3/*node_mass_pre*/, S3D_000, "double", OPS_WRITE),
          ops_arg_dat(work_array1/*node_flux*/, S3D_000_0M10, "double", OPS_READ));
    }
    int range_plus1xyz_minus1y[] = {x_min,x_max+1,y_min-1,y_max+1,z_min,z_max+1}; // partial x range partial y range
    ops_par_loop(advec_mom_kernel1_y_nonvector, "advec_mom_kernel1_y", clover_grid, 3, range_plus1xyz_minus1y,
        ops_arg_dat(work_array1/*node_flux*/, S3D_000, "double", OPS_READ),
        ops_arg_dat(work_array3/*node_mass_pre*/, S3D_000_0P10, "double", OPS_READ),
        ops_arg_dat(work_array5/*mom_flux*/, S3D_000, "double", OPS_WRITE),
        ops_arg_dat(celldy, S3D_000_0P10_0M10_STRID3D_Y, "double", OPS_READ),
        ops_arg_dat(vel1, S3D_000_0P10_0P20_0M10, "double", OPS_READ));

    int range_partx_party_partz_2[] = {x_min,x_max+1,y_min,y_max+1,z_min,z_max+1};
    ops_par_loop(advec_mom_kernel2_y, "advec_mom_kernel2_y", clover_grid, 3, range_partx_party_partz_2,
        ops_arg_dat(vel1, S3D_000, "double", OPS_RW),
        ops_arg_dat(work_array2/*node_mass_post*/, S3D_000, "double", OPS_READ),
        ops_arg_dat(work_array3/*node_mass_pre*/, S3D_000, "double", OPS_READ),
        ops_arg_dat(work_array5/*mom_flux*/, S3D_000_0M10, "double", OPS_READ));

  }
  else if (dir == 3) {
    if (which_vel == 1) {
      //Find staggered mesh mass fluxes, nodal masses and volumes.
      int range_fullz_partx_party_1[] = {x_min,x_max+1,y_min,y_max+1,z_min-2,z_max+2}; // full x range partial y,z range
      ops_par_loop(advec_mom_kernel_mass_flux_z, "advec_mom_kernel_mass_flux_z", clover_grid, 3, range_fullz_partx_party_1,
          ops_arg_dat(work_array1, S3D_000, "double", OPS_WRITE),
          ops_arg_dat(mass_flux_z, S3D_000_fM1M1P1, "double", OPS_READ));

      //Staggered cell mass post and pre advection
      int range_party_partx_partz_1[] = {x_min,x_max+1,y_min,y_max+1,z_min-1,z_max+2}; // full x range partial y,z range
      ops_par_loop(advec_mom_kernel_post_pre_advec_z, "advec_mom_kernel_post_pre_advec_z", clover_grid, 3, range_party_partx_partz_1,
          ops_arg_dat(work_array2/*node_mass_post*/, S3D_000, "double", OPS_WRITE),
          ops_arg_dat(work_array7, S3D_000_fM1M1M1, "double", OPS_READ),
          ops_arg_dat(density1, S3D_000_fM1M1M1, "double", OPS_READ),
          ops_arg_dat(work_array3/*node_mass_pre*/, S3D_000, "double", OPS_WRITE),
          ops_arg_dat(work_array1/*node_flux*/, S3D_000_00M1, "double", OPS_READ));
    }
    int range_plus1xyz_minus1z[] = {x_min,x_max+1,y_min,y_max+1,z_min-1,z_max+1}; // partial x range partial y range
    ops_par_loop(advec_mom_kernel1_z_nonvector, "advec_mom_kernel1_z", clover_grid, 3, range_plus1xyz_minus1z,
        ops_arg_dat(work_array1/*node_flux*/, S3D_000, "double", OPS_READ),
        ops_arg_dat(work_array3/*node_mass_pre*/, S3D_000_00P1, "double", OPS_READ),
        ops_arg_dat(work_array5/*mom_flux*/, S3D_000, "double", OPS_WRITE),
        ops_arg_dat(celldz, S3D_000_00P1_00M1_STRID3D_Z, "double", OPS_READ),
        ops_arg_dat(vel1, S3D_000_00P1_00P2_00M1, "double", OPS_READ));

    int range_partx_party_partz_2[] = {x_min,x_max+1,y_min,y_max+1,z_min,z_max+1}; // full x range partial y range
    ops_par_loop(advec_mom_kernel2_z, "advec_mom_kernel2_z", clover_grid, 3, range_partx_party_partz_2,
        ops_arg_dat(vel1, S3D_000, "double", OPS_RW),
        ops_arg_dat(work_array2/*node_mass_post*/, S3D_000, "double", OPS_READ),
        ops_arg_dat(work_array3/*node_mass_pre*/, S3D_000, "double", OPS_READ),
        ops_arg_dat(work_array5/*mom_flux*/, S3D_000_00M1, "double", OPS_READ));

  }

}
Пример #6
0
void field_summary()
{
  double qa_diff;

  //initialize sizes using global values
  int x_min = field.x_min;
  int x_max = field.x_max;
  int y_min = field.y_min;
  int y_max = field.y_max;

  int rangexy_inner[] = {x_min,x_max,y_min,y_max}; // inner range without border

  double vol= 0.0 , mass = 0.0, ie = 0.0, temp = 0.0;

  ops_par_loop(field_summary_kernel, "field_summary_kernel", tea_grid, 2, rangexy_inner,
      ops_arg_dat(volume, 1, S2D_00, "double", OPS_READ),
      ops_arg_dat(density, 1, S2D_00, "double", OPS_READ),
      ops_arg_dat(energy1, 1, S2D_00, "double", OPS_READ),
      ops_arg_dat(u, 1, S2D_00, "double", OPS_READ),
      ops_arg_reduce(red_vol, 1, "double", OPS_INC),
      ops_arg_reduce(red_mass, 1, "double", OPS_INC),
      ops_arg_reduce(red_ie, 1, "double", OPS_INC),
      ops_arg_reduce(red_temp, 1, "double", OPS_INC));

  //printf("mass = %lf\n",mass);
  ops_reduction_result(red_vol,&vol);
  ops_reduction_result(red_mass,&mass);
  ops_reduction_result(red_ie,&ie);
  ops_reduction_result(red_temp,&temp);


  ops_fprintf(g_out,"\n");
  ops_fprintf(g_out,"\n Time %lf\n",clover_time);
  ops_fprintf(g_out,"              %-10s  %-10s  %-15s  %-10s  %-s\n",
  " Volume"," Mass"," Density"," Internal Energy","Temperature");
  ops_fprintf(g_out," step:   %3d   %-10.3E  %-10.3E  %-15.3E  %-10.3E  %-.3E",
          step, vol, mass, mass/vol, ie, temp);

  if(complete == 1) {
    if(test_problem>0) {
      if (test_problem == 1)
        qa_diff = fabs((100.0 * (temp / 157.55084183279294)) - 100.0);
      if (test_problem == 2) // tea_bm_short.in
        qa_diff = fabs((100.0 * (temp / 106.27221178646569)) - 100.0);
      if (test_problem == 3)
        qa_diff = fabs((100.0 * (temp / 99.955877498324000)) - 100.0);
      if (test_problem == 4)
        qa_diff = fabs((100.0 * (temp / 97.277332050749976)) - 100.0);
      if (test_problem == 5)
        qa_diff = fabs((100.0 * (temp / 95.462351583362249)) - 100.0);
      ops_printf("Test problem %3d is within   %-10.7E%% of the expected solution\n",test_problem, qa_diff);
      ops_fprintf(g_out,"\nTest problem %3d is within   %10.7E%% of the expected solution\n",test_problem, qa_diff);
      if(qa_diff < 0.001) {
        ops_printf(" This test is considered PASSED\n");
        ops_fprintf(g_out," This test is considered PASSED\n");
      }
      else
      {
        ops_printf(" This test is considered FAILED\n");
        ops_fprintf(g_out," This test is considered FAILED\n");
      }
    }
  }
  fflush(g_out);

  //ops_exit();//exit for now
  //exit(0);

}
Пример #7
0
void calc_dt(double* local_dt, char* local_control,
             double* xl_pos, double* yl_pos, int* jldt, int* kldt)
{
  int small;
  double jk_control = 1.1;

  small = 0;

  int dtl_control;

  //initialize sizes using global values
  int x_min = field.x_min;
  int x_max = field.x_max;
  int y_min = field.y_min;
  int y_max = field.y_max;

  int rangexy_inner[] = {x_min,x_max,y_min,y_max}; // inner range without border

  ops_par_loop(calc_dt_kernel, "calc_dt_kernel", clover_grid, 2, rangexy_inner,
    ops_arg_dat(celldx, 1, S2D_00_P10_STRID2D_X, "double", OPS_READ),
    ops_arg_dat(celldy, 1, S2D_00_0P1_STRID2D_Y, "double", OPS_READ),
    ops_arg_dat(soundspeed, 1, S2D_00, "double", OPS_READ),
    ops_arg_dat(viscosity, 1, S2D_00, "double", OPS_READ),
    ops_arg_dat(density0, 1, S2D_00, "double", OPS_READ),
    ops_arg_dat(xvel0, 1, S2D_00_P10_0P1_P1P1, "double", OPS_READ),
    ops_arg_dat(xarea, 1, S2D_00_P10, "double", OPS_READ),
    ops_arg_dat(volume, 1, S2D_00, "double", OPS_READ),
    ops_arg_dat(yvel0, 1, S2D_00_P10_0P1_P1P1, "double", OPS_READ),
    ops_arg_dat(yarea, 1, S2D_00_0P1, "double", OPS_READ),
    ops_arg_dat(work_array1, 1, S2D_00, "double", OPS_WRITE) );

  ops_par_loop(calc_dt_kernel_min, "calc_dt_kernel_min", clover_grid, 2, rangexy_inner,
    ops_arg_dat(work_array1, 1, S2D_00, "double", OPS_READ),
    ops_arg_reduce(red_local_dt, 1, "double", OPS_MIN));

  //printf("*local_dt = %lf\n",*local_dt);

  //Extract the mimimum timestep information
  dtl_control = 10.01 * (jk_control - (int)(jk_control));
  jk_control = jk_control - (jk_control - (int)(jk_control));
  //*jldt = ((int)jk_control)%x_max;
  //*kldt = 1 + (jk_control/x_max);
  *jldt = ((int)jk_control)%(x_max-2);
  *kldt = 1 + (jk_control/(x_max-2));

  int rangexy_getpoint[] = {*jldt-1+2,*jldt+2,*kldt-1+2,*kldt+2}; // get point value //note +2 added due to boundary

  //int rangexy_getpointx[] = {*jldt-1+2,*jldt+2,y_min-2,y_max+2}; // get point value //note +2 added due to boundary
  //int rangexy_getpointy[] = {x_min-2,x_max+2,*kldt-1+2,*kldt+2}; // get point value //note +2 added due to boundary

  ops_par_loop(calc_dt_kernel_get, "calc_dt_kernel_getx", clover_grid, 2, rangexy_getpoint,
    ops_arg_dat(cellx, 1, S2D_00_STRID2D_X, "double", OPS_READ),
    ops_arg_dat(celly, 1, S2D_00_STRID2D_Y, "double", OPS_READ),
    ops_arg_reduce(red_xl_pos, 1, "double", OPS_INC),
    ops_arg_reduce(red_yl_pos, 1, "double", OPS_INC));

  ops_reduction_result(red_local_dt, local_dt);
  ops_reduction_result(red_xl_pos, xl_pos);
  ops_reduction_result(red_yl_pos, yl_pos);
  *local_dt = MIN(*local_dt, g_big);

  if(*local_dt < dtmin) small = 1;

  if(small != 0) {
    ops_printf("Timestep information:\n");
    ops_printf("j, k                 : %d, %d\n",*jldt,*kldt);
    ops_printf("x, y                 : %lf, %lf\n",*xl_pos,*xl_pos);
    ops_printf("timestep : %lf\n",*local_dt);

    double output[12] = {0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0};
    ops_par_loop(calc_dt_kernel_print, "calc_dt_kernel_print", clover_grid, 2,rangexy_getpoint,
      ops_arg_dat(xvel0, 1, S2D_10_M10_01_0M1, "double", OPS_READ),
      ops_arg_dat(yvel0, 1, S2D_10_M10_01_0M1, "double", OPS_READ),
      ops_arg_dat(density0, 1, S2D_00, "double", OPS_READ),
      ops_arg_dat(energy0, 1, S2D_00, "double", OPS_READ),
      ops_arg_dat(pressure, 1, S2D_00, "double", OPS_READ),
      ops_arg_dat(soundspeed, 1, S2D_00, "double", OPS_READ),
      ops_arg_reduce(red_output, 12, "double", OPS_INC));

    ops_reduction_result(red_output, output);
    ops_printf("Cell velocities:\n");
    ops_printf("%E, %E \n",output[0],output[1]); //xvel0(jldt  ,kldt  ),yvel0(jldt  ,kldt  )
    ops_printf("%E, %E \n",output[2],output[3]); //xvel0(jldt+1,kldt  ),yvel0(jldt+1,kldt  )
    ops_printf("%E, %E \n",output[4],output[5]); //xvel0(jldt+1,kldt+1),yvel0(jldt+1,kldt+1)
    ops_printf("%E, %E \n",output[6],output[7]); //xvel0(jldt  ,kldt+1),yvel0(jldt  ,kldt+1)

    ops_printf("density, energy, pressure, soundspeed = %lf, %lf, %lf, %lf \n",
        output[8], output[9], output[10], output[11]);
  }

  if(dtl_control == 1) sprintf(local_control, "sound");
  if(dtl_control == 2) sprintf(local_control, "xvel");
  if(dtl_control == 3) sprintf(local_control, "yvel");
  if(dtl_control == 4) sprintf(local_control, "div");

}
Пример #8
0
void advec_cell(int sweep_number, int dir)
{
  //initialize sizes using global values
  int x_min = field.x_min;
  int x_max = field.x_max;
  int y_min = field.y_min;
  int y_max = field.y_max;
  int z_min = field.z_min;
  int z_max = field.z_max;

  int rangexyz[] = {x_min-2,x_max+2,y_min-2,y_max+2,z_min-2,z_max+2}; // full range over grid
  int rangexyz_inner[] = {x_min,x_max,y_min,y_max,z_min,z_max}; // inner range without border

  int rangexyz_inner_plus2x[] = {x_min,x_max+2,y_min,y_max,z_min,z_max}; // inner range with +2 in x
  int rangexyz_inner_plus2yz[] = {x_min,x_max,y_min,y_max+2,z_min,z_max+2}; // inner range with +2 in y and z
  int rangexyz_inner_plus2z[] = {x_min,x_max,y_min,y_max,z_min,z_max+2}; // inner range with +2 in z

  //printf("direction: %d sweep_number %d \n", dir, sweep_number);

  if(dir == g_xdir) {

    if(sweep_number == 1) {
      ops_par_loop(advec_cell_kernel1_xdir, "advec_cell_kernel1_xdir", clover_grid, 3, rangexyz,
        ops_arg_dat(work_array1, 1, S3D_000, "double", OPS_WRITE),
        ops_arg_dat(work_array2, 1, S3D_000, "double", OPS_WRITE),
        ops_arg_dat(volume, 1, S3D_000, "double", OPS_READ),
        ops_arg_dat(vol_flux_x, 1, S3D_000_P100, "double", OPS_READ),
        ops_arg_dat(vol_flux_y, 1, S3D_000_0P10, "double", OPS_READ),
        ops_arg_dat(vol_flux_z, 1, S3D_000_00P1, "double", OPS_READ));
    }
    else if (sweep_number == 3) {
      ops_par_loop(advec_cell_kernel2_xdir, "advec_cell_kernel2_xdir", clover_grid, 3, rangexyz,
        ops_arg_dat(work_array1, 1, S3D_000, "double", OPS_WRITE),
        ops_arg_dat(work_array2, 1, S3D_000, "double", OPS_WRITE),
        ops_arg_dat(volume, 1, S3D_000, "double", OPS_READ),
        ops_arg_dat(vol_flux_x, 1, S3D_000_P100, "double", OPS_READ));
    }

    ops_par_loop(advec_cell_kernel3_xdir, "advec_cell_kernel3_xdir", clover_grid, 3, rangexyz_inner_plus2x,
      ops_arg_dat(vol_flux_x, 1, S3D_000, "double", OPS_READ),
      ops_arg_dat(work_array1, 1, S3D_000_M100, "double", OPS_READ),
      ops_arg_dat(xx, 1, S3D_000_P100_STRID3D_X, "int", OPS_READ),
      ops_arg_dat(vertexdx, 1, S3D_000_P100_M100_STRID3D_X, "double", OPS_READ),
      ops_arg_dat(density1, 1, S3D_000_P100_M100_M200, "double", OPS_READ),
      ops_arg_dat(energy1, 1, S3D_000_P100_M100_M200, "double", OPS_READ),
      ops_arg_dat(mass_flux_x, 1, S3D_000, "double", OPS_WRITE),
      ops_arg_dat(work_array7, 1, S3D_000, "double", OPS_WRITE));

    ops_par_loop(advec_cell_kernel4_xdir, "advec_cell_kernel4_xdir", clover_grid, 3, rangexyz_inner,
      ops_arg_dat(density1, 1, S3D_000, "double", OPS_RW),
      ops_arg_dat(energy1, 1, S3D_000, "double", OPS_RW),
      ops_arg_dat(mass_flux_x, 1, S3D_000_P100, "double", OPS_READ),
      ops_arg_dat(vol_flux_x, 1, S3D_000_P100, "double", OPS_READ),
      ops_arg_dat(work_array1, 1, S3D_000, "double", OPS_READ),
      ops_arg_dat(work_array2, 1, S3D_000, "double", OPS_READ),
      ops_arg_dat(work_array3, 1, S3D_000, "double", OPS_WRITE),
      ops_arg_dat(work_array4, 1, S3D_000, "double", OPS_WRITE),
      ops_arg_dat(work_array5, 1, S3D_000, "double", OPS_WRITE),
      ops_arg_dat(work_array6, 1, S3D_000, "double", OPS_WRITE),
      ops_arg_dat(work_array7, 1, S3D_000_P100, "double", OPS_READ));

  }
  else if(dir == g_ydir) {
    if(sweep_number == 2) {
      if (advect_x) {
      ops_par_loop(advec_cell_kernel1_ydir, "advec_cell_kernel1_ydir", clover_grid, 3, rangexyz,
        ops_arg_dat(work_array1, 1, S3D_000, "double", OPS_WRITE),
        ops_arg_dat(work_array2, 1, S3D_000, "double", OPS_WRITE),
        ops_arg_dat(volume, 1, S3D_000, "double", OPS_READ),
        ops_arg_dat(vol_flux_z, 1, S3D_000_00P1, "double", OPS_READ),
        ops_arg_dat(vol_flux_y, 1, S3D_000_0P10, "double", OPS_READ));
    }
    else {
      ops_par_loop(advec_cell_kernel2_ydir, "advec_cell_kernel2_ydir", clover_grid, 3, rangexyz,
        ops_arg_dat(work_array1, 1, S3D_000, "double", OPS_WRITE),
        ops_arg_dat(work_array2, 1, S3D_000, "double", OPS_WRITE),
        ops_arg_dat(volume, 1, S3D_000, "double", OPS_READ),
        ops_arg_dat(vol_flux_y, 1, S3D_000_0P10, "double", OPS_READ),
        ops_arg_dat(vol_flux_x, 1, S3D_000_P100, "double", OPS_READ));
    }
  }

    ops_par_loop(advec_cell_kernel3_ydir, "advec_cell_kernel3_ydir", clover_grid, 3, rangexyz_inner_plus2yz,
      ops_arg_dat(vol_flux_y, 1, S3D_000, "double", OPS_READ),
      ops_arg_dat(work_array1, 1, S3D_000_0M10, "double", OPS_READ),
      ops_arg_dat(yy, 1, S3D_000_0P10_STRID3D_Y, "int", OPS_READ),
      ops_arg_dat(vertexdy, 1, S3D_000_0P10_0M10_STRID3D_Y, "double", OPS_READ),
      ops_arg_dat(density1, 1, S3D_000_0P10_0M10_0M20, "double", OPS_READ),
      ops_arg_dat(energy1, 1, S3D_000_0P10_0M10_0M20, "double", OPS_READ),
      ops_arg_dat(mass_flux_y, 1, S3D_000, "double", OPS_WRITE),
      ops_arg_dat(work_array7, 1, S3D_000, "double", OPS_WRITE));


    ops_par_loop(advec_cell_kernel4_ydir, "advec_cell_kernel4_ydir", clover_grid, 3, rangexyz_inner,
      ops_arg_dat(density1, 1, S3D_000, "double", OPS_RW),
      ops_arg_dat(energy1, 1, S3D_000, "double", OPS_RW),
      ops_arg_dat(mass_flux_y, 1, S3D_000_0P10, "double", OPS_READ),
      ops_arg_dat(vol_flux_y, 1, S3D_000_0P10, "double", OPS_READ),
      ops_arg_dat(work_array1, 1, S3D_000, "double", OPS_READ),
      ops_arg_dat(work_array2, 1, S3D_000, "double", OPS_READ),
      ops_arg_dat(work_array3, 1, S3D_000, "double", OPS_WRITE),
      ops_arg_dat(work_array4, 1, S3D_000, "double", OPS_WRITE),
      ops_arg_dat(work_array5, 1, S3D_000, "double", OPS_WRITE),
      ops_arg_dat(work_array6, 1, S3D_000, "double", OPS_WRITE),
      ops_arg_dat(work_array7, 1, S3D_000_0P10, "double", OPS_READ));

  }
  else if(dir == g_zdir) {

    if(sweep_number == 1) {
      ops_par_loop(advec_cell_kernel1_zdir, "advec_cell_kernel1_zdir", clover_grid, 3, rangexyz,
        ops_arg_dat(work_array1, 1, S3D_000, "double", OPS_WRITE),
        ops_arg_dat(work_array2, 1, S3D_000, "double", OPS_WRITE),
        ops_arg_dat(volume, 1, S3D_000, "double", OPS_READ),
        ops_arg_dat(vol_flux_x, 1, S3D_000_P100, "double", OPS_READ),
        ops_arg_dat(vol_flux_y, 1, S3D_000_0P10, "double", OPS_READ),
        ops_arg_dat(vol_flux_z, 1, S3D_000_00P1, "double", OPS_READ));
    }
    else if (sweep_number == 3) {
      ops_par_loop(advec_cell_kernel2_zdir, "advec_cell_kernel2_zdir", clover_grid, 3, rangexyz,
        ops_arg_dat(work_array1, 1, S3D_000, "double", OPS_WRITE),
        ops_arg_dat(work_array2, 1, S3D_000, "double", OPS_WRITE),
        ops_arg_dat(volume, 1, S3D_000, "double", OPS_READ),
        ops_arg_dat(vol_flux_z, 1, S3D_000_00P1, "double", OPS_READ));
    }

    ops_par_loop(advec_cell_kernel3_zdir, "advec_cell_kernel3_zdir", clover_grid, 3, rangexyz_inner_plus2z,
      ops_arg_dat(vol_flux_z, 1, S3D_000, "double", OPS_READ),
      ops_arg_dat(work_array1, 1, S3D_000_00M1, "double", OPS_READ),
      ops_arg_dat(zz, 1, S3D_000_00P1_STRID3D_Z, "int", OPS_READ),
      ops_arg_dat(vertexdz, 1, S3D_000_00P1_00M1_STRID3D_Z, "double", OPS_READ),
      ops_arg_dat(density1, 1, S3D_000_00P1_00M1_00M2, "double", OPS_READ),
      ops_arg_dat(energy1, 1, S3D_000_00P1_00M1_00M2, "double", OPS_READ),
      ops_arg_dat(mass_flux_z, 1, S3D_000, "double", OPS_WRITE),
      ops_arg_dat(work_array7, 1, S3D_000, "double", OPS_WRITE));

    ops_par_loop(advec_cell_kernel4_zdir, "advec_cell_kernel4_zdir", clover_grid, 3, rangexyz_inner,
      ops_arg_dat(density1, 1, S3D_000, "double", OPS_RW),
      ops_arg_dat(energy1, 1, S3D_000, "double", OPS_RW),
      ops_arg_dat(mass_flux_z, 1, S3D_000_00P1, "double", OPS_READ),
      ops_arg_dat(vol_flux_z, 1, S3D_000_00P1, "double", OPS_READ),
      ops_arg_dat(work_array1, 1, S3D_000, "double", OPS_READ),
      ops_arg_dat(work_array2, 1, S3D_000, "double", OPS_READ),
      ops_arg_dat(work_array3, 1, S3D_000, "double", OPS_WRITE),
      ops_arg_dat(work_array4, 1, S3D_000, "double", OPS_WRITE),
      ops_arg_dat(work_array5, 1, S3D_000, "double", OPS_WRITE),
      ops_arg_dat(work_array6, 1, S3D_000, "double", OPS_WRITE),
      ops_arg_dat(work_array7, 1, S3D_000_00P1, "double", OPS_READ));

  }
}
Пример #9
0
void update_halo(int* fields, int depth)
{
  //initialize sizes using global values
  int x_min = field.x_min;
  int x_max = field.x_max;
  int y_min = field.y_min;
  int y_max = field.y_max;
  int z_min = field.z_min;
  int z_max = field.z_max;

  //
  //density0, energy0, density1, energy1, pressure, viscosity and soundspeed
  // all has the same boundary ranges
  //


  int rangexy_b2a[] = {x_min-depth,x_max+depth,y_min-2,y_min-1,z_min-depth,z_max+depth};
  if(depth ==2)
  ops_par_loop(update_halo_kernel1_b2, "update_halo_kernel1", clover_grid, 3, rangexy_b2a,
              ops_arg_dat_opt(density0, 1, S3D_000_0P30, "double", OPS_RW, fields[FIELD_DENSITY0]),
              ops_arg_dat_opt(density1, 1, S3D_000_0P30, "double", OPS_RW, fields[FIELD_DENSITY1]),
              ops_arg_dat_opt(energy0, 1, S3D_000_0P30, "double", OPS_RW, fields[FIELD_ENERGY0]),
              ops_arg_dat_opt(energy1, 1, S3D_000_0P30, "double", OPS_RW, fields[FIELD_ENERGY1]),
              ops_arg_dat_opt(pressure, 1, S3D_000_0P30, "double", OPS_RW, fields[FIELD_PRESSURE]),
              ops_arg_dat_opt(viscosity, 1, S3D_000_0P30, "double", OPS_RW, fields[FIELD_VISCOSITY]),
              ops_arg_dat_opt(soundspeed, 1, S3D_000_0P30, "double", OPS_RW, fields[FIELD_SOUNDSPEED]),
              ops_arg_gbl(fields, NUM_FIELDS, "int", OPS_READ));

  int rangexy_b1a[] = {x_min-depth,x_max+depth,y_min-1,y_min,z_min-depth,z_max+depth};
  ops_par_loop(update_halo_kernel1_b1, "update_halo_kernel1", clover_grid, 3, rangexy_b1a,
              ops_arg_dat_opt(density0, 1, S3D_000_0P10, "double", OPS_RW, fields[FIELD_DENSITY0]),
              ops_arg_dat_opt(density1, 1, S3D_000_0P10, "double", OPS_RW, fields[FIELD_DENSITY1]),
              ops_arg_dat_opt(energy0, 1, S3D_000_0P10, "double", OPS_RW, fields[FIELD_ENERGY0]),
              ops_arg_dat_opt(energy1, 1, S3D_000_0P10, "double", OPS_RW, fields[FIELD_ENERGY1]),
              ops_arg_dat_opt(pressure, 1, S3D_000_0P10, "double", OPS_RW, fields[FIELD_PRESSURE]),
              ops_arg_dat_opt(viscosity, 1, S3D_000_0P10, "double", OPS_RW, fields[FIELD_VISCOSITY]),
              ops_arg_dat_opt(soundspeed, 1, S3D_000_0P10, "double", OPS_RW, fields[FIELD_SOUNDSPEED]),
              ops_arg_gbl(fields, NUM_FIELDS, "int", OPS_READ));

  int rangexy_t2a[] = {x_min-depth,x_max+depth,y_max+1,y_max+2,z_min-depth,z_max+depth};
  if(depth ==2)
  ops_par_loop(update_halo_kernel1_t2, "update_halo_kernel1", clover_grid, 3, rangexy_t2a,
              ops_arg_dat_opt(density0, 1, S3D_000_0M30, "double", OPS_RW, fields[FIELD_DENSITY0]),
              ops_arg_dat_opt(density1, 1, S3D_000_0M30, "double", OPS_RW, fields[FIELD_DENSITY1]),
              ops_arg_dat_opt(energy0, 1, S3D_000_0M30, "double", OPS_RW, fields[FIELD_ENERGY0]),
              ops_arg_dat_opt(energy1, 1, S3D_000_0M30, "double", OPS_RW, fields[FIELD_ENERGY1]),
              ops_arg_dat_opt(pressure, 1, S3D_000_0M30, "double", OPS_RW, fields[FIELD_PRESSURE]),
              ops_arg_dat_opt(viscosity, 1, S3D_000_0M30, "double", OPS_RW, fields[FIELD_VISCOSITY]),
              ops_arg_dat_opt(soundspeed, 1, S3D_000_0M30, "double", OPS_RW, fields[FIELD_SOUNDSPEED]),
              ops_arg_gbl(fields, NUM_FIELDS, "int", OPS_READ));

  int rangexy_t1a[] = {x_min-depth,x_max+depth,y_max,y_max+1,z_min-depth,z_max+depth};
  ops_par_loop(update_halo_kernel1_t1, "update_halo_kernel1", clover_grid, 3, rangexy_t1a,
               ops_arg_dat_opt(density0, 1, S3D_000_0M10, "double", OPS_RW, fields[FIELD_DENSITY0]),
               ops_arg_dat_opt(density1, 1, S3D_000_0M10, "double", OPS_RW, fields[FIELD_DENSITY1]),
               ops_arg_dat_opt(energy0, 1, S3D_000_0M10, "double", OPS_RW, fields[FIELD_ENERGY0]),
               ops_arg_dat_opt(energy1, 1, S3D_000_0M10, "double", OPS_RW, fields[FIELD_ENERGY1]),
               ops_arg_dat_opt(pressure, 1, S3D_000_0M10, "double", OPS_RW, fields[FIELD_PRESSURE]),
               ops_arg_dat_opt(viscosity, 1, S3D_000_0M10, "double", OPS_RW, fields[FIELD_VISCOSITY]),
               ops_arg_dat_opt(soundspeed, 1, S3D_000_0M10, "double", OPS_RW, fields[FIELD_SOUNDSPEED]),
               ops_arg_gbl(fields, NUM_FIELDS, "int", OPS_READ));

  int rangexy_l2a[] = {x_min-2,x_min-1,y_min-depth,y_max+depth,z_min-depth,z_max+depth};
  if(depth ==2)
  ops_par_loop(update_halo_kernel1_l2, "update_halo_kernel", clover_grid, 3, rangexy_l2a,
               ops_arg_dat_opt(density0, 1, S3D_000_P300, "double", OPS_RW, fields[FIELD_DENSITY0]),
               ops_arg_dat_opt(density1, 1, S3D_000_P300, "double", OPS_RW, fields[FIELD_DENSITY1]),
               ops_arg_dat_opt(energy0, 1, S3D_000_P300, "double", OPS_RW, fields[FIELD_ENERGY0]),
               ops_arg_dat_opt(energy1, 1, S3D_000_P300, "double", OPS_RW, fields[FIELD_ENERGY1]),
               ops_arg_dat_opt(pressure, 1, S3D_000_P300, "double", OPS_RW, fields[FIELD_PRESSURE]),
               ops_arg_dat_opt(viscosity, 1, S3D_000_P300, "double", OPS_RW, fields[FIELD_VISCOSITY]),
               ops_arg_dat_opt(soundspeed, 1, S3D_000_P300, "double", OPS_RW, fields[FIELD_SOUNDSPEED]),
               ops_arg_gbl(fields, NUM_FIELDS, "int", OPS_READ));

  int rangexy_l1a[] = {x_min-1,x_min,y_min-depth,y_max+depth,z_min-depth,z_max+depth};
  ops_par_loop(update_halo_kernel1_l1, "update_halo_kernel", clover_grid, 3, rangexy_l1a,
               ops_arg_dat_opt(density0, 1, S3D_000_P100, "double", OPS_RW, fields[FIELD_DENSITY0]),
               ops_arg_dat_opt(density1, 1, S3D_000_P100, "double", OPS_RW, fields[FIELD_DENSITY1]),
               ops_arg_dat_opt(energy0, 1, S3D_000_P100, "double", OPS_RW, fields[FIELD_ENERGY0]),
               ops_arg_dat_opt(energy1, 1, S3D_000_P100, "double", OPS_RW, fields[FIELD_ENERGY1]),
               ops_arg_dat_opt(pressure, 1, S3D_000_P100, "double", OPS_RW, fields[FIELD_PRESSURE]),
               ops_arg_dat_opt(viscosity, 1, S3D_000_P100, "double", OPS_RW, fields[FIELD_VISCOSITY]),
               ops_arg_dat_opt(soundspeed, 1, S3D_000_P100, "double", OPS_RW, fields[FIELD_SOUNDSPEED]),
               ops_arg_gbl(fields, NUM_FIELDS, "int", OPS_READ));

  int rangexy_r2a[] = {x_max+1,x_max+2,y_min-depth,y_max+depth,z_min-depth,z_max+depth};
  if(depth ==2)
  ops_par_loop(update_halo_kernel1_r2, "update_halo_kernel", clover_grid, 3, rangexy_r2a,
               ops_arg_dat_opt(density0, 1, S3D_000_M300, "double", OPS_RW, fields[FIELD_DENSITY0]),
               ops_arg_dat_opt(density1, 1, S3D_000_M300, "double", OPS_RW, fields[FIELD_DENSITY1]),
               ops_arg_dat_opt(energy0, 1, S3D_000_M300, "double", OPS_RW, fields[FIELD_ENERGY0]),
               ops_arg_dat_opt(energy1, 1, S3D_000_M300, "double", OPS_RW, fields[FIELD_ENERGY1]),
               ops_arg_dat_opt(pressure, 1, S3D_000_M300, "double", OPS_RW, fields[FIELD_PRESSURE]),
               ops_arg_dat_opt(viscosity, 1, S3D_000_M300, "double", OPS_RW, fields[FIELD_VISCOSITY]),
               ops_arg_dat_opt(soundspeed, 1, S3D_000_M300, "double", OPS_RW, fields[FIELD_SOUNDSPEED]),
               ops_arg_gbl(fields, NUM_FIELDS, "int", OPS_READ));

  int rangexy_r1a[] = {x_max,x_max+1,y_min-depth,y_max+depth,z_min-depth,z_max+depth};
  ops_par_loop(update_halo_kernel1_r1, "update_halo_kernel", clover_grid, 3,
               rangexy_r1a, ops_arg_dat_opt(density0, 1, S3D_000_M100, "double",
                                            OPS_RW, fields[FIELD_DENSITY0]),
               ops_arg_dat_opt(density1, 1, S3D_000_M100, "double", OPS_RW,
                               fields[FIELD_DENSITY1]),
               ops_arg_dat_opt(energy0, 1, S3D_000_M100, "double", OPS_RW,
                               fields[FIELD_ENERGY0]),
               ops_arg_dat_opt(energy1, 1, S3D_000_M100, "double", OPS_RW,
                               fields[FIELD_ENERGY1]),
               ops_arg_dat_opt(pressure, 1, S3D_000_M100, "double", OPS_RW,
                               fields[FIELD_PRESSURE]),
               ops_arg_dat_opt(viscosity, 1, S3D_000_M100, "double", OPS_RW,
                               fields[FIELD_VISCOSITY]),
               ops_arg_dat_opt(soundspeed, 1, S3D_000_M100, "double", OPS_RW,
                               fields[FIELD_SOUNDSPEED]),
               ops_arg_gbl(fields, NUM_FIELDS, "int", OPS_READ));

  int rangexy_ba2a[] = {x_min-depth,x_max+depth,y_min-depth,y_max+depth,z_min-2,z_min-1};
  if(depth ==2)
  ops_par_loop(update_halo_kernel1_ba2, "update_halo_kernel", clover_grid, 3, rangexy_ba2a,
              ops_arg_dat_opt(density0, 1, S3D_000_00P3, "double", OPS_RW, fields[FIELD_DENSITY0]),
              ops_arg_dat_opt(density1, 1, S3D_000_00P3, "double", OPS_RW, fields[FIELD_DENSITY1]),
              ops_arg_dat_opt(energy0, 1, S3D_000_00P3, "double", OPS_RW, fields[FIELD_ENERGY0]),
              ops_arg_dat_opt(energy1, 1, S3D_000_00P3, "double", OPS_RW, fields[FIELD_ENERGY1]),
              ops_arg_dat_opt(pressure, 1, S3D_000_00P3, "double", OPS_RW, fields[FIELD_PRESSURE]),
              ops_arg_dat_opt(viscosity, 1, S3D_000_00P3, "double", OPS_RW, fields[FIELD_VISCOSITY]),
              ops_arg_dat_opt(soundspeed, 1, S3D_000_00P3, "double", OPS_RW, fields[FIELD_SOUNDSPEED]),
              ops_arg_gbl(fields, NUM_FIELDS, "int", OPS_READ));

  int rangexy_ba1a[] = {x_min-depth,x_max+depth,y_min-depth,y_max+depth,z_min-1,z_min};
  ops_par_loop(update_halo_kernel1_ba1, "update_halo_kernel", clover_grid, 3, rangexy_ba1a,
              ops_arg_dat_opt(density0, 1, S3D_000_00P1, "double", OPS_RW, fields[FIELD_DENSITY0]),
              ops_arg_dat_opt(density1, 1, S3D_000_00P1, "double", OPS_RW, fields[FIELD_DENSITY1]),
              ops_arg_dat_opt(energy0, 1, S3D_000_00P1, "double", OPS_RW, fields[FIELD_ENERGY0]),
              ops_arg_dat_opt(energy1, 1, S3D_000_00P1, "double", OPS_RW, fields[FIELD_ENERGY1]),
              ops_arg_dat_opt(pressure, 1, S3D_000_00P1, "double", OPS_RW, fields[FIELD_PRESSURE]),
              ops_arg_dat_opt(viscosity, 1, S3D_000_00P1, "double", OPS_RW, fields[FIELD_VISCOSITY]),
              ops_arg_dat_opt(soundspeed, 1, S3D_000_00P1, "double", OPS_RW, fields[FIELD_SOUNDSPEED]),
              ops_arg_gbl(fields, NUM_FIELDS, "int", OPS_READ));

  int rangexy_fr2a[] = {x_min-depth,x_max+depth,y_min-depth,y_max+depth,z_max+1,z_max+2};
  if(depth ==2)
  ops_par_loop(update_halo_kernel1_fr2, "update_halo_kernel", clover_grid, 3, rangexy_fr2a,
              ops_arg_dat_opt(density0, 1, S3D_000_00M3, "double", OPS_RW, fields[FIELD_DENSITY0]),
              ops_arg_dat_opt(density1, 1, S3D_000_00M3, "double", OPS_RW, fields[FIELD_DENSITY1]),
              ops_arg_dat_opt(energy0, 1, S3D_000_00M3, "double", OPS_RW, fields[FIELD_ENERGY0]),
              ops_arg_dat_opt(energy1, 1, S3D_000_00M3, "double", OPS_RW, fields[FIELD_ENERGY1]),
              ops_arg_dat_opt(pressure, 1, S3D_000_00M3, "double", OPS_RW, fields[FIELD_PRESSURE]),
              ops_arg_dat_opt(viscosity, 1, S3D_000_00M3, "double", OPS_RW, fields[FIELD_VISCOSITY]),
              ops_arg_dat_opt(soundspeed, 1, S3D_000_00M3, "double", OPS_RW, fields[FIELD_SOUNDSPEED]),
              ops_arg_gbl(fields, NUM_FIELDS, "int", OPS_READ));

  int rangexy_fr1a[] = {x_min-depth,x_max+depth,y_min-depth,y_max+depth,z_max,z_max+1};
  ops_par_loop(update_halo_kernel1_fr1, "update_halo_kernel", clover_grid, 3, rangexy_fr1a,
              ops_arg_dat_opt(density0, 1, S3D_000_00M1, "double", OPS_RW, fields[FIELD_DENSITY0]),
              ops_arg_dat_opt(density1, 1, S3D_000_00M1, "double", OPS_RW, fields[FIELD_DENSITY1]),
              ops_arg_dat_opt(energy0, 1, S3D_000_00M1, "double", OPS_RW, fields[FIELD_ENERGY1]),
              ops_arg_dat_opt(energy1, 1, S3D_000_00M1, "double", OPS_RW, fields[FIELD_DENSITY0]),
              ops_arg_dat_opt(pressure, 1, S3D_000_00M1, "double", OPS_RW, fields[FIELD_PRESSURE]),
              ops_arg_dat_opt(viscosity, 1, S3D_000_00M1, "double", OPS_RW, fields[FIELD_VISCOSITY]),
              ops_arg_dat_opt(soundspeed, 1, S3D_000_00M1, "double", OPS_RW, fields[FIELD_SOUNDSPEED]),
              ops_arg_gbl(fields, NUM_FIELDS, "int", OPS_READ));



  //
  //xvel0, xvel1 has the same boundary ranges and assignment
  //


  int rangexy_b2b[] = {x_min-depth,x_max+1+depth,y_min-2,y_min-1,z_min-depth,z_max+1+depth};
  if(depth == 2)
  ops_par_loop(update_halo_kernel2_xvel_plus_4_bot, "update_halo_kernel2_xvel_plus_4_bot", clover_grid, 3, rangexy_b2b,
               ops_arg_dat_opt(xvel0, 1, S3D_000_0P40, "double", OPS_RW, fields[FIELD_XVEL0]),
               ops_arg_dat_opt(xvel1, 1, S3D_000_0P40, "double", OPS_RW, fields[FIELD_XVEL1]),
               ops_arg_gbl(fields, NUM_FIELDS, "int", OPS_READ));

  int rangexy_b1b[] = {x_min-depth,x_max+1+depth,y_min-1,y_min,z_min-depth,z_max+1+depth};
  ops_par_loop(update_halo_kernel2_xvel_plus_2_bot, "update_halo_kernel2_xvel_plus_2_bot", clover_grid, 3, rangexy_b1b,
               ops_arg_dat_opt(xvel0, 1, S3D_000_0P20, "double", OPS_RW, fields[FIELD_XVEL0]),
               ops_arg_dat_opt(xvel1, 1, S3D_000_0P20, "double", OPS_RW, fields[FIELD_XVEL1]),
               ops_arg_gbl(fields, NUM_FIELDS, "int", OPS_READ));

  int rangexy_t2b[] = {x_min-depth,x_max+1+depth,y_max+2,y_max+3,z_min-depth,z_max+1+depth};
  if(depth == 2)
  ops_par_loop(update_halo_kernel2_xvel_plus_4_top, "update_halo_kernel2_xvel_minus_4_top", clover_grid, 3, rangexy_t2b,
               ops_arg_dat_opt(xvel0, 1, S3D_000_0M40, "double", OPS_RW, fields[FIELD_XVEL0]),
               ops_arg_dat_opt(xvel1, 1, S3D_000_0M40, "double", OPS_RW, fields[FIELD_XVEL1]),
               ops_arg_gbl(fields, NUM_FIELDS, "int", OPS_READ));

  int rangexy_t1b[] = {x_min-depth,x_max+1+depth,y_max+1,y_max+2,z_min-depth,z_max+1+depth};
  ops_par_loop(update_halo_kernel2_xvel_plus_2_top, "update_halo_kernel2_xvel_minus_2_top", clover_grid, 3, rangexy_t1b,
               ops_arg_dat_opt(xvel0, 1, S3D_000_0M20, "double", OPS_RW, fields[FIELD_XVEL0]),
               ops_arg_dat_opt(xvel1, 1, S3D_000_0M20, "double", OPS_RW, fields[FIELD_XVEL1]),
               ops_arg_gbl(fields, NUM_FIELDS, "int", OPS_READ));

  int rangexy_l2b[] = {x_min-2,x_min-1,y_min-depth,y_max+1+depth,z_min-depth,z_max+1+depth};
  if(depth ==2)
  ops_par_loop(update_halo_kernel2_xvel_minus_4_left, "update_halo_kernel2_xvel_plus_4_left", clover_grid, 3, rangexy_l2b,
               ops_arg_dat_opt(xvel0, 1, S3D_000_P400, "double", OPS_RW, fields[FIELD_XVEL0]),
               ops_arg_dat_opt(xvel1, 1, S3D_000_P400, "double", OPS_RW, fields[FIELD_XVEL1]),
               ops_arg_gbl(fields, NUM_FIELDS, "int", OPS_READ));

  int rangexy_l1b[] = {x_min-1,x_min,y_min-depth,y_max+1+depth,z_min-depth,z_max+1+depth};
  ops_par_loop(update_halo_kernel2_xvel_minus_2_left, "update_halo_kernel2_xvel_plus_2_left", clover_grid, 3, rangexy_l1b,
               ops_arg_dat_opt(xvel0, 1, S3D_000_P200, "double", OPS_RW, fields[FIELD_XVEL0]),
               ops_arg_dat_opt(xvel1, 1, S3D_000_P200, "double", OPS_RW, fields[FIELD_XVEL1]),
               ops_arg_gbl(fields, NUM_FIELDS, "int", OPS_READ));

  int rangexy_r2b[] = {x_max+2,x_max+3,y_min-depth,y_max+1+depth,z_min-depth,z_max+1+depth};
  if(depth ==2)
  ops_par_loop(update_halo_kernel2_xvel_minus_4_right, "update_halo_kernel2_xvel_minus_4_right", clover_grid, 3, rangexy_r2b,
               ops_arg_dat_opt(xvel0, 1, S3D_000_M400, "double", OPS_RW, fields[FIELD_XVEL0]),
               ops_arg_dat_opt(xvel1, 1, S3D_000_M400, "double", OPS_RW, fields[FIELD_XVEL1]),
               ops_arg_gbl(fields, NUM_FIELDS, "int", OPS_READ));

  int rangexy_r1b[] = {x_max+1,x_max+2,y_min-depth,y_max+1+depth,z_min-depth,z_max+1+depth};
  ops_par_loop(update_halo_kernel2_xvel_minus_2_right, "update_halo_kernel2_xvel_minus_2_right", clover_grid, 3, rangexy_r1b,
               ops_arg_dat_opt(xvel0, 1, S3D_000_M200, "double", OPS_RW, fields[FIELD_XVEL0]),
               ops_arg_dat_opt(xvel1, 1, S3D_000_M200, "double", OPS_RW, fields[FIELD_XVEL1]),
               ops_arg_gbl(fields, NUM_FIELDS, "int", OPS_READ));

  int rangexy_ba2b[] = {x_min-depth,x_max+1+depth,y_min-depth,y_max+1+depth,z_min-2,z_min-1};
  if(depth ==2)
  ops_par_loop(update_halo_kernel2_xvel_plus_4_back, "update_halo_kernel2_xvel_plus_4_back", clover_grid, 3, rangexy_ba2b,
              ops_arg_dat_opt(xvel0, 1, S3D_000_00P4, "double", OPS_RW, fields[FIELD_XVEL0]),
              ops_arg_dat_opt(xvel1, 1, S3D_000_00P4, "double", OPS_RW, fields[FIELD_XVEL1]),
              ops_arg_gbl(fields, NUM_FIELDS, "int", OPS_READ));

  int rangexy_ba1b[] = {x_min-depth,x_max+1+depth,y_min-depth,y_max+1+depth,z_min-1,z_min};
  ops_par_loop(update_halo_kernel2_xvel_plus_2_back, "update_halo_kernel2_xvel_plus_2_back", clover_grid, 3, rangexy_ba1b,
              ops_arg_dat_opt(xvel0, 1, S3D_000_00P2, "double", OPS_RW, fields[FIELD_XVEL0]),
              ops_arg_dat_opt(xvel1, 1, S3D_000_00P2, "double", OPS_RW, fields[FIELD_XVEL1]),
              ops_arg_gbl(fields, NUM_FIELDS, "int", OPS_READ));

  int rangexy_fr2b[] = {x_min-depth,x_max+1+depth,y_min-depth,y_max+1+depth,z_max+2,z_max+3};
  if(depth ==2)
  ops_par_loop(update_halo_kernel2_xvel_plus_4_front, "update_halo_kernel2_xvel_minus_4_front", clover_grid, 3, rangexy_fr2b,
              ops_arg_dat_opt(xvel0, 1, S3D_000_00M4, "double", OPS_RW, fields[FIELD_XVEL0]),
              ops_arg_dat_opt(xvel1, 1, S3D_000_00M4, "double", OPS_RW, fields[FIELD_XVEL1]),
              ops_arg_gbl(fields, NUM_FIELDS, "int", OPS_READ));

  int rangexy_fr1b[] = {x_min-depth,x_max+1+depth,y_min-depth,y_max+1+depth,z_max+1,z_max+2};
  ops_par_loop(update_halo_kernel2_xvel_plus_2_front, "update_halo_kernel2_xvel_minus_2_front", clover_grid, 3, rangexy_fr1b,
              ops_arg_dat_opt(xvel0, 1, S3D_000_00M2, "double", OPS_RW, fields[FIELD_XVEL0]),
              ops_arg_dat_opt(xvel1, 1, S3D_000_00M2, "double", OPS_RW, fields[FIELD_XVEL1]),
              ops_arg_gbl(fields, NUM_FIELDS, "int", OPS_READ));

  //
  //yvel0, yvel1 has the same boundary ranges and assignment
  //

  if(depth == 2)
  ops_par_loop(update_halo_kernel2_yvel_minus_4_bot, "update_halo_kernel2_yvel_plus_4_bot", clover_grid, 3, rangexy_b2b,
               ops_arg_dat_opt(yvel0, 1, S3D_000_0P40, "double", OPS_RW, fields[FIELD_YVEL0]),
               ops_arg_dat_opt(yvel1, 1, S3D_000_0P40, "double", OPS_RW, fields[FIELD_YVEL1]),
               ops_arg_gbl(fields, NUM_FIELDS, "int", OPS_READ));

  ops_par_loop(update_halo_kernel2_yvel_minus_2_bot, "update_halo_kernel2_yvel_plus_2_bot", clover_grid, 3, rangexy_b1b,
               ops_arg_dat_opt(yvel0, 1, S3D_000_0P20, "double", OPS_RW, fields[FIELD_YVEL0]),
               ops_arg_dat_opt(yvel1, 1, S3D_000_0P20, "double", OPS_RW, fields[FIELD_YVEL1]),
               ops_arg_gbl(fields, NUM_FIELDS, "int", OPS_READ));

  if(depth == 2)
  ops_par_loop(update_halo_kernel2_yvel_minus_4_top, "update_halo_kernel2_yvel_minus_4_top", clover_grid, 3, rangexy_t2b,
               ops_arg_dat_opt(yvel0, 1, S3D_000_0M40, "double", OPS_RW, fields[FIELD_YVEL0]),
               ops_arg_dat_opt(yvel1, 1, S3D_000_0M40, "double", OPS_RW, fields[FIELD_YVEL1]),
               ops_arg_gbl(fields, NUM_FIELDS, "int", OPS_READ));

  ops_par_loop(update_halo_kernel2_yvel_minus_2_top, "update_halo_kernel2_yvel_minus_2_top", clover_grid, 3, rangexy_t1b,
               ops_arg_dat_opt(yvel0, 1, S3D_000_0M20, "double", OPS_RW, fields[FIELD_YVEL0]),
               ops_arg_dat_opt(yvel1, 1, S3D_000_0M20, "double", OPS_RW, fields[FIELD_YVEL1]),
               ops_arg_gbl(fields, NUM_FIELDS, "int", OPS_READ));

  if(depth ==2)
  ops_par_loop(update_halo_kernel2_yvel_plus_4_left, "update_halo_kernel2_yvel_plus_4_left", clover_grid, 3, rangexy_l2b,
               ops_arg_dat_opt(yvel0, 1, S3D_000_P400, "double", OPS_RW, fields[FIELD_YVEL0]),
               ops_arg_dat_opt(yvel1, 1, S3D_000_P400, "double", OPS_RW, fields[FIELD_YVEL1]),
               ops_arg_gbl(fields, NUM_FIELDS, "int", OPS_READ));

  ops_par_loop(update_halo_kernel2_yvel_plus_2_left, "update_halo_kernel2_yvel_plus_2_left", clover_grid, 3, rangexy_l1b,
               ops_arg_dat_opt(yvel0, 1, S3D_000_P200, "double", OPS_RW, fields[FIELD_YVEL0]),
               ops_arg_dat_opt(yvel1, 1, S3D_000_P200, "double", OPS_RW, fields[FIELD_YVEL1]),
               ops_arg_gbl(fields, NUM_FIELDS, "int", OPS_READ));

  if(depth ==2)
  ops_par_loop(update_halo_kernel2_yvel_plus_4_right, "update_halo_kernel2_yvel_minus_4_right", clover_grid, 3, rangexy_r2b,
               ops_arg_dat_opt(yvel0, 1, S3D_000_M400, "double", OPS_RW, fields[FIELD_YVEL0]),
               ops_arg_dat_opt(yvel1, 1, S3D_000_M400, "double", OPS_RW, fields[FIELD_YVEL1]),
               ops_arg_gbl(fields, NUM_FIELDS, "int", OPS_READ));


  ops_par_loop(update_halo_kernel2_yvel_plus_2_right, "update_halo_kernel2_yvel_minus_2_right", clover_grid, 3, rangexy_r1b,
               ops_arg_dat_opt(yvel0, 1, S3D_000_M200, "double", OPS_RW, fields[FIELD_YVEL0]),
               ops_arg_dat_opt(yvel1, 1, S3D_000_M200, "double", OPS_RW, fields[FIELD_YVEL1]),
               ops_arg_gbl(fields, NUM_FIELDS, "int", OPS_READ));

  if(depth ==2)
  ops_par_loop(update_halo_kernel2_yvel_plus_4_back, "update_halo_kernel2_yvel_plus_4_back", clover_grid, 3, rangexy_ba2b,
             ops_arg_dat_opt(yvel0, 1, S3D_000_00P4, "double", OPS_RW, fields[FIELD_YVEL0]),
             ops_arg_dat_opt(yvel1, 1, S3D_000_00P4, "double", OPS_RW, fields[FIELD_YVEL1]),
             ops_arg_gbl(fields, NUM_FIELDS, "int", OPS_READ));

  ops_par_loop(update_halo_kernel2_yvel_plus_2_back, "update_halo_kernel2_yvel_plus_2_back", clover_grid, 3, rangexy_ba1b,
             ops_arg_dat_opt(yvel0, 1, S3D_000_00P2, "double", OPS_RW, fields[FIELD_YVEL0]),
             ops_arg_dat_opt(yvel1, 1, S3D_000_00P2, "double", OPS_RW, fields[FIELD_YVEL1]),
             ops_arg_gbl(fields, NUM_FIELDS, "int", OPS_READ));

  if(depth ==2)
  ops_par_loop(update_halo_kernel2_yvel_plus_4_front, "update_halo_kernel2_yvel_minus_4_front", clover_grid, 3, rangexy_fr2b,
             ops_arg_dat_opt(yvel0, 1, S3D_000_00M4, "double", OPS_RW, fields[FIELD_YVEL0]),
             ops_arg_dat_opt(yvel1, 1, S3D_000_00M4, "double", OPS_RW, fields[FIELD_YVEL1]),
             ops_arg_gbl(fields, NUM_FIELDS, "int", OPS_READ));

  ops_par_loop(update_halo_kernel2_yvel_plus_2_front, "update_halo_kernel2_yvel_minus_2_front", clover_grid, 3, rangexy_fr1b,
             ops_arg_dat_opt(yvel0, 1, S3D_000_00M2, "double", OPS_RW, fields[FIELD_YVEL0]),
             ops_arg_dat_opt(yvel1, 1, S3D_000_00M2, "double", OPS_RW, fields[FIELD_YVEL1]),
             ops_arg_gbl(fields, NUM_FIELDS, "int", OPS_READ));

  //
  //zvel0, zvel1 has the same boundary ranges and assignment
  //

  if(depth == 2)
  ops_par_loop(update_halo_kernel2_zvel_plus_4_bot, "update_halo_kernel2_zvel_plus_4_bot", clover_grid, 3, rangexy_b2b,
               ops_arg_dat_opt(zvel0, 1, S3D_000_0P40, "double", OPS_RW, fields[FIELD_ZVEL0]),
               ops_arg_dat_opt(zvel1, 1, S3D_000_0P40, "double", OPS_RW, fields[FIELD_ZVEL1]),
               ops_arg_gbl(fields, NUM_FIELDS, "int", OPS_READ));

  ops_par_loop(update_halo_kernel2_zvel_plus_2_bot, "update_halo_kernel2_zvel_plus_2_bot", clover_grid, 3, rangexy_b1b,
               ops_arg_dat_opt(zvel0, 1, S3D_000_0P20, "double", OPS_RW, fields[FIELD_ZVEL0]),
               ops_arg_dat_opt(zvel1, 1, S3D_000_0P20, "double", OPS_RW, fields[FIELD_ZVEL1]),
               ops_arg_gbl(fields, NUM_FIELDS, "int", OPS_READ));

  if(depth == 2)
  ops_par_loop(update_halo_kernel2_zvel_plus_4_top, "update_halo_kernel2_zvel_minus_4_top", clover_grid, 3, rangexy_t2b,
               ops_arg_dat_opt(zvel0, 1, S3D_000_0M40, "double", OPS_RW, fields[FIELD_ZVEL0]),
               ops_arg_dat_opt(zvel1, 1, S3D_000_0M40, "double", OPS_RW, fields[FIELD_ZVEL1]),
               ops_arg_gbl(fields, NUM_FIELDS, "int", OPS_READ));

  ops_par_loop(update_halo_kernel2_zvel_plus_2_top, "update_halo_kernel2_zvel_minus_2_top", clover_grid, 3, rangexy_t1b,
               ops_arg_dat_opt(zvel0, 1, S3D_000_0M20, "double", OPS_RW, fields[FIELD_ZVEL0]),
               ops_arg_dat_opt(zvel1, 1, S3D_000_0M20, "double", OPS_RW, fields[FIELD_ZVEL1]),
               ops_arg_gbl(fields, NUM_FIELDS, "int", OPS_READ));

  if(depth ==2)
  ops_par_loop(update_halo_kernel2_zvel_plus_4_left, "update_halo_kernel2_zvel_plus_4_left", clover_grid, 3, rangexy_l2b,
               ops_arg_dat_opt(zvel0, 1, S3D_000_P400, "double", OPS_RW, fields[FIELD_ZVEL0]),
               ops_arg_dat_opt(zvel1, 1, S3D_000_P400, "double", OPS_RW, fields[FIELD_ZVEL1]),
               ops_arg_gbl(fields, NUM_FIELDS, "int", OPS_READ));

  ops_par_loop(update_halo_kernel2_zvel_plus_2_left, "update_halo_kernel2_zvel_plus_2_left", clover_grid, 3, rangexy_l1b,
               ops_arg_dat_opt(zvel0, 1, S3D_000_P200, "double", OPS_RW, fields[FIELD_ZVEL0]),
               ops_arg_dat_opt(zvel1, 1, S3D_000_P200, "double", OPS_RW, fields[FIELD_ZVEL1]),
               ops_arg_gbl(fields, NUM_FIELDS, "int", OPS_READ));

  if(depth ==2)
  ops_par_loop(update_halo_kernel2_zvel_plus_4_right, "update_halo_kernel2_zvel_minus_4_right", clover_grid, 3, rangexy_r2b,
               ops_arg_dat_opt(zvel0, 1, S3D_000_M400, "double", OPS_RW, fields[FIELD_ZVEL0]),
               ops_arg_dat_opt(zvel1, 1, S3D_000_M400, "double", OPS_RW, fields[FIELD_ZVEL1]),
               ops_arg_gbl(fields, NUM_FIELDS, "int", OPS_READ));


  ops_par_loop(update_halo_kernel2_zvel_plus_2_right, "update_halo_kernel2_zvel_minus_2_right", clover_grid, 3, rangexy_r1b,
               ops_arg_dat_opt(zvel0, 1, S3D_000_M200, "double", OPS_RW, fields[FIELD_ZVEL0]),
               ops_arg_dat_opt(zvel1, 1, S3D_000_M200, "double", OPS_RW, fields[FIELD_ZVEL1]),
               ops_arg_gbl(fields, NUM_FIELDS, "int", OPS_READ));

  if(depth ==2)
  ops_par_loop(update_halo_kernel2_zvel_minus_4_back, "update_halo_kernel2_zvel_plus_4_back", clover_grid, 3, rangexy_ba2b,
             ops_arg_dat_opt(zvel0, 1, S3D_000_00P4, "double", OPS_RW, fields[FIELD_ZVEL0]),
             ops_arg_dat_opt(zvel1, 1, S3D_000_00P4, "double", OPS_RW, fields[FIELD_ZVEL1]),
             ops_arg_gbl(fields, NUM_FIELDS, "int", OPS_READ));

  ops_par_loop(update_halo_kernel2_zvel_minus_2_back, "update_halo_kernel2_zvel_plus_2_back", clover_grid, 3, rangexy_ba1b,
             ops_arg_dat_opt(zvel0, 1, S3D_000_00P2, "double", OPS_RW, fields[FIELD_ZVEL0]),
             ops_arg_dat_opt(zvel1, 1, S3D_000_00P2, "double", OPS_RW, fields[FIELD_ZVEL1]),
             ops_arg_gbl(fields, NUM_FIELDS, "int", OPS_READ));

  if(depth ==2)
  ops_par_loop(update_halo_kernel2_zvel_minus_4_front, "update_halo_kernel2_zvel_minus_4_front", clover_grid, 3, rangexy_fr2b,
             ops_arg_dat_opt(zvel0, 1, S3D_000_00M4, "double", OPS_RW, fields[FIELD_ZVEL0]),
             ops_arg_dat_opt(zvel1, 1, S3D_000_00M4, "double", OPS_RW, fields[FIELD_ZVEL1]),
             ops_arg_gbl(fields, NUM_FIELDS, "int", OPS_READ));

  ops_par_loop(update_halo_kernel2_zvel_minus_2_front, "update_halo_kernel2_zvel_minus_2_front", clover_grid, 3, rangexy_fr1b,
             ops_arg_dat_opt(zvel0, 1, S3D_000_00M2, "double", OPS_RW, fields[FIELD_ZVEL0]),
             ops_arg_dat_opt(zvel1, 1, S3D_000_00M2, "double", OPS_RW, fields[FIELD_ZVEL1]),
             ops_arg_gbl(fields, NUM_FIELDS, "int", OPS_READ));

  //
  //vol_flux_x, mass_flux_x has the same ranges
  //

  int rangexy_b2c[] = {x_min-depth,x_max+1+depth,y_min-2,y_min-1,z_min-depth,z_max+depth};
  if(depth ==2)
  ops_par_loop(update_halo_kernel3_plus_4_a, "update_halo_kernel3_plus_4_a", clover_grid, 3, rangexy_b2c,
               ops_arg_dat_opt(vol_flux_x, 1, S3D_000_0P40, "double", OPS_RW, fields[FIELD_VOL_FLUX_X]),
               ops_arg_dat_opt(mass_flux_x, 1, S3D_000_0P40, "double", OPS_RW, fields[FIELD_MASS_FLUX_X]),
               ops_arg_gbl(fields, NUM_FIELDS, "int", OPS_READ));

  int rangexy_b1c[] = {x_min-depth,x_max+1+depth,y_min-1,y_min,z_min-depth,z_max+depth};
  ops_par_loop(update_halo_kernel3_plus_2_a, "update_halo_kernel3_plus_2_a", clover_grid, 3, rangexy_b1c,
               ops_arg_dat_opt(vol_flux_x, 1, S3D_000_0P20, "double", OPS_RW, fields[FIELD_VOL_FLUX_X]),
               ops_arg_dat_opt(mass_flux_x, 1, S3D_000_0P20, "double", OPS_RW, fields[FIELD_MASS_FLUX_X]),
               ops_arg_gbl(fields, NUM_FIELDS, "int", OPS_READ));


  int rangexy_t2c[] = {x_min-depth,x_max+1+depth,y_max+1,y_max+2,z_min-depth,z_max+depth};
  if(depth ==2)
  ops_par_loop(update_halo_kernel3_plus_4_b, "update_halo_kernel3_plus_4_b", clover_grid, 3, rangexy_t2c,
               ops_arg_dat_opt(vol_flux_x, 1, S3D_000_0M40, "double", OPS_RW, fields[FIELD_VOL_FLUX_X]),
               ops_arg_dat_opt(mass_flux_x, 1, S3D_000_0M40, "double", OPS_RW, fields[FIELD_MASS_FLUX_X]),
               ops_arg_gbl(fields, NUM_FIELDS, "int", OPS_READ));

  int rangexy_t1c[] = {x_min-depth,x_max+1+depth,y_max,y_max+1,z_min-depth,z_max+depth};
  ops_par_loop(update_halo_kernel3_plus_2_b, "update_halo_kernel3_plus_2_b", clover_grid, 3, rangexy_t1c,
               ops_arg_dat_opt(vol_flux_x, 1, S3D_000_0M20, "double", OPS_RW, fields[FIELD_VOL_FLUX_X]),
               ops_arg_dat_opt(mass_flux_x, 1, S3D_000_0M20, "double", OPS_RW, fields[FIELD_MASS_FLUX_X]),
               ops_arg_gbl(fields, NUM_FIELDS, "int", OPS_READ));


  int rangexy_l2c[] = {x_min-2,x_min-1,y_min-depth,y_max+depth,z_min-depth,z_max+depth};
  if(depth ==2)
  ops_par_loop(update_halo_kernel3_minus_4_a, "update_halo_kernel3_minus_4_a", clover_grid, 3, rangexy_l2c,
               ops_arg_dat_opt(vol_flux_x, 1, S3D_000_P400, "double", OPS_RW, fields[FIELD_VOL_FLUX_X]),
               ops_arg_dat_opt(mass_flux_x, 1, S3D_000_P400, "double", OPS_RW, fields[FIELD_MASS_FLUX_X]),
               ops_arg_gbl(fields, NUM_FIELDS, "int", OPS_READ));

  int rangexy_l1c[] = {x_min-1,x_min,y_min-depth,y_max+depth,z_min-depth,z_max+depth};
  ops_par_loop(update_halo_kernel3_minus_2_a, "update_halo_kernel3_minus_2_a", clover_grid, 3, rangexy_l1c,
               ops_arg_dat_opt(vol_flux_x, 1, S3D_000_P200, "double", OPS_RW, fields[FIELD_VOL_FLUX_X]),
               ops_arg_dat_opt(mass_flux_x, 1, S3D_000_P200, "double", OPS_RW, fields[FIELD_MASS_FLUX_X]),
               ops_arg_gbl(fields, NUM_FIELDS, "int", OPS_READ));

  int rangexy_r2c[] = {x_max+2,x_max+3,y_min-depth,y_max+depth,z_min-depth,z_max+depth}; //
  if(depth ==2)
  ops_par_loop(update_halo_kernel3_minus_4_b, "update_halo_kernel3_minus_4_b", clover_grid, 3, rangexy_r2c,
               ops_arg_dat_opt(vol_flux_x, 1, S3D_000_M400, "double", OPS_RW, fields[FIELD_VOL_FLUX_X]),
               ops_arg_dat_opt(mass_flux_x, 1, S3D_000_M400, "double", OPS_RW, fields[FIELD_MASS_FLUX_X]),
               ops_arg_gbl(fields, NUM_FIELDS, "int", OPS_READ));

  int rangexy_r1c[] = {x_max+1,x_max+2,y_min-depth,y_max+depth,z_min-depth,z_max+depth}; //
  ops_par_loop(update_halo_kernel3_minus_2_b, "update_halo_kernel3_minus_2_b", clover_grid, 3, rangexy_r1c,
               ops_arg_dat_opt(vol_flux_x, 1, S3D_000_M200, "double", OPS_RW, fields[FIELD_VOL_FLUX_X]),
               ops_arg_dat_opt(mass_flux_x, 1, S3D_000_M200, "double", OPS_RW, fields[FIELD_MASS_FLUX_X]),
               ops_arg_gbl(fields, NUM_FIELDS, "int", OPS_READ));

  int rangexy_back2c[] = {x_min-depth,x_max+depth,y_min-depth,y_max+depth,z_min-2,z_min-1};
  if(depth ==2)
  ops_par_loop(update_halo_kernel3_plus_4_back, "update_halo_kernel3_plus_4_back", clover_grid, 3, rangexy_back2c,
              ops_arg_dat_opt(vol_flux_x, 1, S3D_000_00P4, "double", OPS_RW, fields[FIELD_VOL_FLUX_X]),
              ops_arg_dat_opt(mass_flux_x, 1, S3D_000_00P4, "double", OPS_RW, fields[FIELD_MASS_FLUX_X]),
              ops_arg_gbl(fields, NUM_FIELDS, "int", OPS_READ));

  int rangexy_back1c[] = {x_min-depth,x_max+depth,y_min-depth,y_max+depth,z_min-1,z_min};
  ops_par_loop(update_halo_kernel3_plus_2_back, "update_halo_kernel3_plus_2_back", clover_grid, 3, rangexy_back1c,
              ops_arg_dat_opt(vol_flux_x, 1, S3D_000_00P2, "double", OPS_RW, fields[FIELD_VOL_FLUX_X]),
              ops_arg_dat_opt(mass_flux_x, 1, S3D_000_00P2, "double", OPS_RW, fields[FIELD_MASS_FLUX_X]),
              ops_arg_gbl(fields, NUM_FIELDS, "int", OPS_READ));


  int rangexy_front2c[] = {x_min-depth,x_max+depth,y_min-depth,y_max+depth,z_max+1,z_max+2};
  if(depth ==2)
  ops_par_loop(update_halo_kernel3_plus_4_front, "update_halo_kernel3_plus_4_front", clover_grid, 3, rangexy_front2c,
              ops_arg_dat_opt(vol_flux_x, 1, S3D_000_00M4, "double", OPS_RW, fields[FIELD_VOL_FLUX_X]),
              ops_arg_dat_opt(mass_flux_x, 1, S3D_000_00M4, "double", OPS_RW, fields[FIELD_MASS_FLUX_X]),
              ops_arg_gbl(fields, NUM_FIELDS, "int", OPS_READ));

  int rangexy_front1c[] = {x_min-depth,x_max+depth,y_min-depth,y_max+depth,z_max,z_max+1};
  ops_par_loop(update_halo_kernel3_plus_2_front, "update_halo_kernel3_plus_2_front", clover_grid, 3, rangexy_front1c,
              ops_arg_dat_opt(vol_flux_x, 1, S3D_000_00M2, "double", OPS_RW, fields[FIELD_VOL_FLUX_X]),
              ops_arg_dat_opt(mass_flux_x, 1, S3D_000_00M2, "double", OPS_RW, fields[FIELD_MASS_FLUX_X]),
              ops_arg_gbl(fields, NUM_FIELDS, "int", OPS_READ));

  //
  //vol_flux_y, mass_flux_y has the same ranges
  //

  int rangexy_b2d[] = {x_min-depth,x_max+depth,y_min-2,y_min-1,z_min-depth,z_max+depth};
  if(depth ==2)
  ops_par_loop(update_halo_kernel4_minus_4_a, "update_halo_kernel4_minus_4_a", clover_grid, 3, rangexy_b2d,
               ops_arg_dat_opt(vol_flux_y, 1, S3D_000_0P40, "double", OPS_RW, fields[FIELD_VOL_FLUX_Y]),
               ops_arg_dat_opt(mass_flux_y, 1, S3D_000_0P40, "double", OPS_RW, fields[FIELD_MASS_FLUX_Y]),
               ops_arg_gbl(fields, NUM_FIELDS, "int", OPS_READ));

  int rangexy_b1d[] = {x_min-depth,x_max+depth,y_min-1,y_min,z_min-depth,z_max+depth};
  ops_par_loop(update_halo_kernel4_minus_2_a, "update_halo_kernel4_minus_2_a", clover_grid, 3, rangexy_b1d,
               ops_arg_dat_opt(vol_flux_y, 1, S3D_000_0P20, "double", OPS_RW, fields[FIELD_VOL_FLUX_Y]),
               ops_arg_dat_opt(mass_flux_y, 1, S3D_000_0P20, "double", OPS_RW, fields[FIELD_MASS_FLUX_Y]),
               ops_arg_gbl(fields, NUM_FIELDS, "int", OPS_READ));

  int rangexy_t2d[] = {x_min-depth,x_max+depth,y_max+2,y_max+3,z_min-depth,z_max+depth}; //
  if(depth ==2)
  ops_par_loop(update_halo_kernel4_minus_4_b, "update_halo_kernel4_minus_4_b", clover_grid, 3, rangexy_t2d,
               ops_arg_dat_opt(vol_flux_y, 1, S3D_000_0M40, "double", OPS_RW, fields[FIELD_VOL_FLUX_Y]),
               ops_arg_dat_opt(mass_flux_y, 1, S3D_000_0M40, "double", OPS_RW, fields[FIELD_MASS_FLUX_Y]),
               ops_arg_gbl(fields, NUM_FIELDS, "int", OPS_READ));

  int rangexy_t1d[] = {x_min-depth,x_max+depth,y_max+1,y_max+2,z_min-depth,z_max+depth}; //
  ops_par_loop(update_halo_kernel4_minus_2_b, "update_halo_kernel4_minus_2_b", clover_grid, 3, rangexy_t1d,
               ops_arg_dat_opt(vol_flux_y, 1, S3D_000_0M20, "double", OPS_RW, fields[FIELD_VOL_FLUX_Y]),
               ops_arg_dat_opt(mass_flux_y, 1, S3D_000_0M20, "double", OPS_RW, fields[FIELD_MASS_FLUX_Y]),
               ops_arg_gbl(fields, NUM_FIELDS, "int", OPS_READ));

  int rangexy_l2d[] = {x_min-2,x_min-1,y_min-depth,y_max+1+depth,z_min-depth,z_max+depth};
  if(depth ==2)
  ops_par_loop(update_halo_kernel4_plus_4_a, "update_halo_kernel4_plus_4_a", clover_grid, 3, rangexy_l2d,
               ops_arg_dat_opt(vol_flux_y, 1, S3D_000_P400, "double", OPS_RW, fields[FIELD_VOL_FLUX_Y]),
               ops_arg_dat_opt(mass_flux_y, 1, S3D_000_P400, "double", OPS_RW, fields[FIELD_MASS_FLUX_Y]),
               ops_arg_gbl(fields, NUM_FIELDS, "int", OPS_READ));

  int rangexy_l1d[] = {x_min-1,x_min,y_min-depth,y_max+1+depth,z_min-depth,z_max+depth};
  ops_par_loop(update_halo_kernel4_plus_2_a, "update_halo_kernel4_plus_2_a", clover_grid, 3, rangexy_l1d,
               ops_arg_dat_opt(vol_flux_y, 1, S3D_000_P200, "double", OPS_RW, fields[FIELD_VOL_FLUX_Y]),
               ops_arg_dat_opt(mass_flux_y, 1, S3D_000_P200, "double", OPS_RW, fields[FIELD_MASS_FLUX_Y]),
               ops_arg_gbl(fields, NUM_FIELDS, "int", OPS_READ));

  int rangexy_r2d[] = {x_max+1,x_max+2,y_min-depth,y_max+1+depth,z_min-depth,z_max+depth};
  if(depth ==2)
  ops_par_loop(update_halo_kernel4_plus_4_b, "update_halo_kernel4_plus_4_b", clover_grid, 3, rangexy_r2d,
               ops_arg_dat_opt(vol_flux_y, 1, S3D_000_M400, "double", OPS_RW, fields[FIELD_VOL_FLUX_Y]),
               ops_arg_dat_opt(mass_flux_y, 1, S3D_000_M400, "double", OPS_RW, fields[FIELD_MASS_FLUX_Y]),
               ops_arg_gbl(fields, NUM_FIELDS, "int", OPS_READ));

  int rangexy_r1d[] = {x_max,x_max+1,y_min-depth,y_max+1+depth,z_min-depth,z_max+depth};
  ops_par_loop(update_halo_kernel4_plus_2_b, "update_halo_kernel4_plus_2_b",clover_grid, 3, rangexy_r1d,
               ops_arg_dat_opt(vol_flux_y, 1, S3D_000_M200, "double", OPS_RW, fields[FIELD_VOL_FLUX_Y]),
               ops_arg_dat_opt(mass_flux_y, 1, S3D_000_M200, "double", OPS_RW, fields[FIELD_MASS_FLUX_Y]),
               ops_arg_gbl(fields, NUM_FIELDS, "int", OPS_READ));

   if(depth ==2)
   ops_par_loop(update_halo_kernel4_plus_4_back, "update_halo_kernel4_plus_4_back", clover_grid, 3, rangexy_back2c,
               ops_arg_dat_opt(vol_flux_y, 1, S3D_000_00P4, "double", OPS_RW, fields[FIELD_VOL_FLUX_Y]),
               ops_arg_dat_opt(mass_flux_y, 1, S3D_000_00P4, "double", OPS_RW, fields[FIELD_MASS_FLUX_Y]),
               ops_arg_gbl(fields, NUM_FIELDS, "int", OPS_READ));

   ops_par_loop(update_halo_kernel4_plus_2_back, "update_halo_kernel4_plus_2_back", clover_grid, 3, rangexy_back1c,
               ops_arg_dat_opt(vol_flux_y, 1, S3D_000_00P2, "double", OPS_RW, fields[FIELD_VOL_FLUX_Y]),
               ops_arg_dat_opt(mass_flux_y, 1, S3D_000_00P2, "double", OPS_RW, fields[FIELD_MASS_FLUX_Y]),
               ops_arg_gbl(fields, NUM_FIELDS, "int", OPS_READ));


   if(depth ==2)
   ops_par_loop(update_halo_kernel4_plus_4_front, "update_halo_kernel4_plus_4_front", clover_grid, 3, rangexy_front2c,
               ops_arg_dat_opt(vol_flux_y, 1, S3D_000_00M4, "double", OPS_RW, fields[FIELD_VOL_FLUX_Y]),
               ops_arg_dat_opt(mass_flux_y, 1, S3D_000_00M4, "double", OPS_RW, fields[FIELD_MASS_FLUX_Y]),
               ops_arg_gbl(fields, NUM_FIELDS, "int", OPS_READ));

   ops_par_loop(update_halo_kernel4_plus_2_front, "update_halo_kernel4_plus_2_front", clover_grid, 3, rangexy_front1c,
               ops_arg_dat_opt(vol_flux_y, 1, S3D_000_00M2, "double", OPS_RW, fields[FIELD_VOL_FLUX_Y]),
               ops_arg_dat_opt(mass_flux_y, 1, S3D_000_00M2, "double", OPS_RW, fields[FIELD_MASS_FLUX_Y]),
               ops_arg_gbl(fields, NUM_FIELDS, "int", OPS_READ));

  //
  //vol_flux_z, mass_flux_z has the same ranges
  //

  int rangexy_b2e[] = {x_min-depth,x_max+depth,y_min-2,y_min-1,z_min-depth,z_max+1+depth};
  if(depth ==2)
  ops_par_loop(update_halo_kernel5_plus_4_a, "update_halo_kernel5_plus_4_a", clover_grid, 3, rangexy_b2e,
               ops_arg_dat_opt(vol_flux_z, 1, S3D_000_0P40, "double", OPS_RW, fields[FIELD_VOL_FLUX_Z]),
               ops_arg_dat_opt(mass_flux_z, 1, S3D_000_0P40, "double", OPS_RW, fields[FIELD_MASS_FLUX_Z]),
               ops_arg_gbl(fields, NUM_FIELDS, "int", OPS_READ));

  int rangexy_b1e[] = {x_min-depth,x_max+depth,y_min-1,y_min,z_min-depth,z_max+1+depth};
  ops_par_loop(update_halo_kernel5_plus_2_a, "update_halo_kernel5_plus_2_a", clover_grid, 3, rangexy_b1e,
               ops_arg_dat_opt(vol_flux_z, 1, S3D_000_0P20, "double", OPS_RW, fields[FIELD_VOL_FLUX_Z]),
               ops_arg_dat_opt(mass_flux_z, 1, S3D_000_0P20, "double", OPS_RW, fields[FIELD_MASS_FLUX_Z]),
               ops_arg_gbl(fields, NUM_FIELDS, "int", OPS_READ));

  int rangexy_t2e[] = {x_min-depth,x_max+depth,y_max+1,y_max+2,z_min-depth,z_max+1+depth};
  if(depth ==2)
  ops_par_loop(update_halo_kernel5_plus_4_b, "update_halo_kernel5_plus_4_b", clover_grid, 3, rangexy_t2e,
               ops_arg_dat_opt(vol_flux_z, 1, S3D_000_0M40, "double", OPS_RW, fields[FIELD_VOL_FLUX_Z]),
               ops_arg_dat_opt(mass_flux_z, 1, S3D_000_0M40, "double", OPS_RW, fields[FIELD_MASS_FLUX_Z]),
               ops_arg_gbl(fields, NUM_FIELDS, "int", OPS_READ));

  int rangexy_t1e[] = {x_min-depth,x_max+depth,y_max+0,y_max+1,z_min-depth,z_max+1+depth};
  ops_par_loop(update_halo_kernel5_plus_2_b, "update_halo_kernel5_plus_2_b", clover_grid, 3, rangexy_t1e,
               ops_arg_dat_opt(vol_flux_z, 1, S3D_000_0M20, "double", OPS_RW, fields[FIELD_VOL_FLUX_Z]),
               ops_arg_dat_opt(mass_flux_z, 1, S3D_000_0M20, "double", OPS_RW, fields[FIELD_MASS_FLUX_Z]),
               ops_arg_gbl(fields, NUM_FIELDS, "int", OPS_READ));

  int rangexy_l2e[] = {x_min-2,x_min-1,y_min-depth,y_max+depth,z_min-depth,z_max+1+depth};
  if(depth ==2)
  ops_par_loop(update_halo_kernel5_plus_4_left, "update_halo_kernel5_plus_4_left", clover_grid, 3, rangexy_l2e,
               ops_arg_dat_opt(vol_flux_z, 1, S3D_000_P400, "double", OPS_RW, fields[FIELD_VOL_FLUX_Z]),
               ops_arg_dat_opt(mass_flux_z, 1, S3D_000_P400, "double", OPS_RW, fields[FIELD_MASS_FLUX_Z]),
               ops_arg_gbl(fields, NUM_FIELDS, "int", OPS_READ));

  int rangexy_l1e[] = {x_min-1,x_min,y_min-depth,y_max+depth,z_min-depth,z_max+1+depth};
  ops_par_loop(update_halo_kernel5_plus_2_left, "update_halo_kernel5_plus_2_left", clover_grid, 3, rangexy_l1e,
               ops_arg_dat_opt(vol_flux_z, 1, S3D_000_P200, "double", OPS_RW, fields[FIELD_VOL_FLUX_Z]),
               ops_arg_dat_opt(mass_flux_z, 1, S3D_000_P200, "double", OPS_RW, fields[FIELD_MASS_FLUX_Z]),
               ops_arg_gbl(fields, NUM_FIELDS, "int", OPS_READ));

  int rangexy_r2e[] = {x_max+1,x_max+2,y_min-depth,y_max+depth,z_min-depth,z_max+1+depth};
  if(depth ==2)
  ops_par_loop(update_halo_kernel5_plus_4_right, "update_halo_kernel5_plus_4_right", clover_grid, 3, rangexy_r2e,
               ops_arg_dat_opt(vol_flux_z, 1, S3D_000_M400, "double", OPS_RW, fields[FIELD_VOL_FLUX_Z]),
               ops_arg_dat_opt(mass_flux_z, 1, S3D_000_M400, "double", OPS_RW, fields[FIELD_MASS_FLUX_Z]),
               ops_arg_gbl(fields, NUM_FIELDS, "int", OPS_READ));

  int rangexy_r1e[] = {x_max,x_max+1,y_min-depth,y_max+depth,z_min-depth,z_max+1+depth};
  ops_par_loop(update_halo_kernel5_plus_2_right, "update_halo_kernel5_plus_2_right",clover_grid, 3, rangexy_r1e,
               ops_arg_dat_opt(vol_flux_z, 1, S3D_000_M200, "double", OPS_RW, fields[FIELD_VOL_FLUX_Z]),
               ops_arg_dat_opt(mass_flux_z, 1, S3D_000_M200, "double", OPS_RW, fields[FIELD_MASS_FLUX_Z]),
               ops_arg_gbl(fields, NUM_FIELDS, "int", OPS_READ));

   if(depth ==2) //TODO: is this really the same range? x should have +1
   ops_par_loop(update_halo_kernel5_minus_4_back, "update_halo_kernel5_minus_4_back", clover_grid, 3, rangexy_back2c,
               ops_arg_dat_opt(vol_flux_z, 1, S3D_000_00P4, "double", OPS_RW, fields[FIELD_VOL_FLUX_Z]),
               ops_arg_dat_opt(mass_flux_z, 1, S3D_000_00P4, "double", OPS_RW, fields[FIELD_MASS_FLUX_Z]),
               ops_arg_gbl(fields, NUM_FIELDS, "int", OPS_READ));

   ops_par_loop(update_halo_kernel5_minus_2_back, "update_halo_kernel5_minus_2_back", clover_grid, 3, rangexy_back1c,
               ops_arg_dat_opt(vol_flux_z, 1, S3D_000_00P2, "double", OPS_RW, fields[FIELD_VOL_FLUX_Z]),
               ops_arg_dat_opt(mass_flux_z, 1, S3D_000_00P2, "double", OPS_RW, fields[FIELD_MASS_FLUX_Z]),
               ops_arg_gbl(fields, NUM_FIELDS, "int", OPS_READ));

   int rangexy_front2d[] = {x_min-depth,x_max+depth,y_min-depth,y_max+depth,z_max+2,z_max+3};
   if(depth ==2)
   ops_par_loop(update_halo_kernel5_minus_4_front, "update_halo_kernel5_minus_4_front", clover_grid, 3, rangexy_front2d,
               ops_arg_dat_opt(vol_flux_z, 1, S3D_000_00M4, "double", OPS_RW, fields[FIELD_VOL_FLUX_Z]),
               ops_arg_dat_opt(mass_flux_z, 1, S3D_000_00M4, "double", OPS_RW, fields[FIELD_MASS_FLUX_Z]),
               ops_arg_gbl(fields, NUM_FIELDS, "int", OPS_READ));
   int rangexy_front1d[] = {x_min-depth,x_max+depth,y_min-depth,y_max+depth,z_max+1,z_max+2};
   ops_par_loop(update_halo_kernel5_minus_2_front, "update_halo_kernel5_minus_2_front", clover_grid, 3, rangexy_front1d,
               ops_arg_dat_opt(vol_flux_z, 1, S3D_000_00M2, "double", OPS_RW, fields[FIELD_VOL_FLUX_Z]),
               ops_arg_dat_opt(mass_flux_z, 1, S3D_000_00M2, "double", OPS_RW, fields[FIELD_MASS_FLUX_Z]),
               ops_arg_gbl(fields, NUM_FIELDS, "int", OPS_READ));
}
Пример #10
0
int main(int argc, char **argv)
{
  /**-------------------------- Initialisation --------------------------**/

  // OPS initialisation
  ops_init(argc,argv,6);

  int logical_size_x = 200;
  int logical_size_y = 200;
  int ngrid_x = 1;
  int ngrid_y = 1;
  int n_iter = 10000;
  dx = 0.01;
  dy = 0.01;
  ops_decl_const("dx",1,"double",&dx);
  ops_decl_const("dy",1,"double",&dy);

  //declare blocks
  ops_block *blocks = (ops_block *)malloc(ngrid_x*ngrid_y*sizeof(ops_block*));
  char buf[50];
  for (int j = 0; j < ngrid_y; j++) {
    for (int i = 0; i < ngrid_x; i++) {
      sprintf(buf,"block %d,%d",i,j);
      blocks[i+ngrid_x*j] = ops_decl_block(2,buf);
    }
  }

  //declare stencils
  int s2D_00[]         = {0,0};
  ops_stencil S2D_00 = ops_decl_stencil( 2, 1, s2D_00, "00");
  int s2D_00_P10_M10_0P1_0M1[]         = {0,0, 1,0, -1,0, 0,1, 0,-1};
  ops_stencil S2D_00_P10_M10_0P1_0M1 = ops_decl_stencil( 2, 5, s2D_00_P10_M10_0P1_0M1, "00:10:-10:01:0-1");

  ops_reduction red_err = ops_decl_reduction_handle(sizeof(double), "double", "err");

  //declare datasets
  int d_p[2] = {1,1}; //max halo depths for the dat in the possitive direction
  int d_m[2] = {-1,-1}; //max halo depths for the dat in the negative direction
  int base[2] = {0,0};
  int uniform_size[2] = {(logical_size_x-1)/ngrid_x+1,(logical_size_y-1)/ngrid_y+1};
  double* temp = NULL;
  ops_dat *coordx = (ops_dat *)malloc(ngrid_x*ngrid_y*sizeof(ops_dat*));
  ops_dat *coordy = (ops_dat *)malloc(ngrid_x*ngrid_y*sizeof(ops_dat*));
  ops_dat *u = (ops_dat *)malloc(ngrid_x*ngrid_y*sizeof(ops_dat*));
  ops_dat *u2 = (ops_dat *)malloc(ngrid_x*ngrid_y*sizeof(ops_dat*));
  ops_dat *f = (ops_dat *)malloc(ngrid_x*ngrid_y*sizeof(ops_dat*));
  ops_dat *ref = (ops_dat *)malloc(ngrid_x*ngrid_y*sizeof(ops_dat*));
  int *sizes = (int*)malloc(2*ngrid_x*ngrid_y*sizeof(int));
  int *disps = (int*)malloc(2*ngrid_x*ngrid_y*sizeof(int));

  for (int j = 0; j < ngrid_y; j++) {
    for (int i = 0; i < ngrid_x; i++) {
      int size[2] = {uniform_size[0], uniform_size[1]};
      if ((i+1)*size[0]>logical_size_x) size[0] = logical_size_x - i*size[0];
      if ((j+1)*size[1]>logical_size_y) size[1] = logical_size_y - j*size[1];
      sprintf(buf,"coordx %d,%d",i,j);
      coordx[i+ngrid_x*j] = ops_decl_dat(blocks[i+ngrid_x*j], 1, size, base, d_m, d_p, temp, "double", buf);
      sprintf(buf,"coordy %d,%d",i,j);
      coordy[i+ngrid_x*j] = ops_decl_dat(blocks[i+ngrid_x*j], 1, size, base, d_m, d_p, temp, "double", buf);
      sprintf(buf,"u %d,%d",i,j);
      u[i+ngrid_x*j] = ops_decl_dat(blocks[i+ngrid_x*j], 1, size, base, d_m, d_p, temp, "double", buf);
      sprintf(buf,"u2 %d,%d",i,j);
      u2[i+ngrid_x*j] = ops_decl_dat(blocks[i+ngrid_x*j], 1, size, base, d_m, d_p, temp, "double", buf);
      sprintf(buf,"f %d,%d",i,j);
      f[i+ngrid_x*j] = ops_decl_dat(blocks[i+ngrid_x*j], 1, size, base, d_m, d_p, temp, "double", buf);
      sprintf(buf,"ref %d,%d",i,j);
      ref[i+ngrid_x*j] = ops_decl_dat(blocks[i+ngrid_x*j], 1, size, base, d_m, d_p, temp, "double", buf);

      sizes[2*(i+ngrid_x*j)]   = size[0];
      sizes[2*(i+ngrid_x*j)+1] = size[1];
      disps[2*(i+ngrid_x*j)]   = i*uniform_size[0];
      disps[2*(i+ngrid_x*j)+1] = j*uniform_size[1];
    }
  }

  ops_halo *halos = (ops_halo *)malloc(2*(ngrid_x*(ngrid_y-1)+(ngrid_x-1)*ngrid_y)*sizeof(ops_halo *));
  int off = 0;
  for (int j = 0; j < ngrid_y; j++) {
    for (int i = 0; i < ngrid_x; i++) {
      if (i > 0) {
        int halo_iter[] = {1,sizes[2*(i+ngrid_x*j)+1]};
        int base_from[] = {sizes[2*(i-1+ngrid_x*j)]-1,0};
        int base_to[] = {-1,0};
        int dir[] = {1,2};
        halos[off++] = ops_decl_halo(u[i-1+ngrid_x*j], u[i+ngrid_x*j], halo_iter, base_from, base_to, dir, dir);
        base_from[0] = 0; base_to[0] = sizes[2*(i+ngrid_x*j)];
        halos[off++] = ops_decl_halo(u[i+ngrid_x*j], u[i-1+ngrid_x*j], halo_iter, base_from, base_to, dir, dir);
      }
      if (j > 0) {
        int halo_iter[] = {sizes[2*(i+ngrid_x*j)],1};
        int base_from[] = {0,sizes[2*(i+ngrid_x*(j-1))+1]-1};
        int base_to[] = {0,-1};
        int dir[] = {1,2};
        halos[off++] = ops_decl_halo(u[i+ngrid_x*(j-1)], u[i+ngrid_x*j], halo_iter, base_from, base_to, dir, dir);
        base_from[1] = 0; base_to[1] = sizes[2*(i+ngrid_x*j)+1];
        halos[off++] = ops_decl_halo(u[i+ngrid_x*j], u[i+ngrid_x*(j-1)], halo_iter, base_from, base_to, dir, dir);
      }
    }
  }
  if (off != 2*(ngrid_x*(ngrid_y-1)+(ngrid_x-1)*ngrid_y)) printf("Something is not right\n");
  ops_halo_group u_halos = ops_decl_halo_group(off,halos);

  ops_partition("");
  ops_checkpointing_init("check.h5", 5.0);
  /**-------------------------- Computations --------------------------**/


  double ct0, ct1, et0, et1;
  ops_timers_core(&ct0, &et0);

  //populate forcing, reference solution and boundary conditions
  for (int j = 0; j < ngrid_y; j++) {
    for (int i = 0; i < ngrid_x; i++) {
      int iter_range[] = {-1,sizes[2*(i+ngrid_x*j)]+1,-1,sizes[2*(i+ngrid_x*j)+1]+1};
      ops_par_loop(poisson_kernel_populate, "poisson_kernel_populate", blocks[i+ngrid_x*j], 2, iter_range,
               ops_arg_gbl(&disps[2*(i+ngrid_x*j)], 1, "int", OPS_READ),
               ops_arg_gbl(&disps[2*(i+ngrid_x*j)+1], 1, "int", OPS_READ),
               ops_arg_idx(),
               ops_arg_dat(u[i+ngrid_x*j], S2D_00, "double", OPS_WRITE),
               ops_arg_dat(f[i+ngrid_x*j], S2D_00, "double", OPS_WRITE),
               ops_arg_dat(ref[i+ngrid_x*j], S2D_00, "double", OPS_WRITE));
    }
  }

  //initial guess 0
  for (int j = 0; j < ngrid_y; j++) {
    for (int i = 0; i < ngrid_x; i++) {
      int iter_range[] = {0,sizes[2*(i+ngrid_x*j)],0,sizes[2*(i+ngrid_x*j)+1]};
      ops_par_loop(poisson_kernel_initialguess, "poisson_kernel_initialguess", blocks[i+ngrid_x*j], 2, iter_range,
               ops_arg_dat(u[i+ngrid_x*j], S2D_00, "double", OPS_WRITE));
    }
  }

  for (int iter = 0; iter < n_iter; iter++) {
    ops_halo_transfer(u_halos);
    for (int j = 0; j < ngrid_y; j++) {
      for (int i = 0; i < ngrid_x; i++) {
        int iter_range[] = {0,sizes[2*(i+ngrid_x*j)],0,sizes[2*(i+ngrid_x*j)+1]};
        ops_par_loop(poisson_kernel_stencil, "poisson_kernel_stencil", blocks[i+ngrid_x*j], 2, iter_range,
                 ops_arg_dat(u[i+ngrid_x*j], S2D_00_P10_M10_0P1_0M1, "double", OPS_READ),
                 ops_arg_dat(f[i+ngrid_x*j], S2D_00, "double", OPS_READ),
                 ops_arg_dat(u2[i+ngrid_x*j], S2D_00, "double", OPS_WRITE));
      }
    }
    for (int j = 0; j < ngrid_y; j++) {
      for (int i = 0; i < ngrid_x; i++) {
        int iter_range[] = {0,sizes[2*(i+ngrid_x*j)],0,sizes[2*(i+ngrid_x*j)+1]};
        ops_par_loop(poisson_kernel_update, "poisson_kernel_update", blocks[i+ngrid_x*j], 2, iter_range,
                 ops_arg_dat(u2[i+ngrid_x*j], S2D_00, "double", OPS_READ),
                 ops_arg_dat(u[i+ngrid_x*j] , S2D_00, "double", OPS_WRITE));
      }
    }
  }

  double err = 0.0;
  for (int j = 0; j < ngrid_y; j++) {
    for (int i = 0; i < ngrid_x; i++) {
      int iter_range[] = {0,sizes[2*(i+ngrid_x*j)],0,sizes[2*(i+ngrid_x*j)+1]};
      ops_par_loop(poisson_kernel_error, "poisson_kernel_error", blocks[i+ngrid_x*j], 2, iter_range,
               ops_arg_dat(u[i+ngrid_x*j],    S2D_00, "double", OPS_READ),
               ops_arg_dat(ref[i+ngrid_x*j] , S2D_00, "double", OPS_READ),
               ops_arg_reduce(red_err, 1, "double", OPS_INC));
    }
  }

  ops_reduction_result(red_err,&err);
  ops_printf("Total error: %g\n",err);

  ops_timers_core(&ct1, &et1);
  ops_timing_output();

  ops_printf("\nTotal Wall time %lf\n",et1-et0);

  ops_exit();
}