void reset_field() { error_condition = 0; // Not used yet due to issue with OpenA reduction //initialize sizes using global values int x_min = field.x_min; int x_max = field.x_max; int y_min = field.y_min; int y_max = field.y_max; int z_min = field.z_min; int z_max = field.z_max; int rangexyz_inner[] = {x_min,x_max,y_min,y_max,z_min,z_max}; // inner range without border ops_par_loop(reset_field_kernel1, "reset_field_kernel1", clover_grid, 3, rangexyz_inner, ops_arg_dat(density0, S3D_000, "double", OPS_WRITE), ops_arg_dat(density1, S3D_000, "double", OPS_READ), ops_arg_dat(energy0, S3D_000, "double", OPS_WRITE), ops_arg_dat(energy1, S3D_000, "double", OPS_READ)); int rangexyz_inner_plus1xyz[] = {x_min,x_max+1,y_min,y_max+1,z_min,z_max+1}; // inner range without border ops_par_loop(reset_field_kernel2, "reset_field_kernel2", clover_grid, 3, rangexyz_inner_plus1xyz, ops_arg_dat(xvel0, S3D_000, "double", OPS_WRITE), ops_arg_dat(xvel1, S3D_000, "double", OPS_READ), ops_arg_dat(yvel0, S3D_000, "double", OPS_WRITE), ops_arg_dat(yvel1, S3D_000, "double", OPS_READ), ops_arg_dat(zvel0, S3D_000, "double", OPS_WRITE), ops_arg_dat(zvel1, S3D_000, "double", OPS_READ)); }
void flux_calc() { error_condition = 0; int x_cells = grid.x_cells; int y_cells = grid.y_cells; int x_min = field.x_min; int x_max = field.x_max; int y_min = field.y_min; int y_max = field.y_max; int z_min = field.z_min; int z_max = field.z_max; int rangexyz_inner_plus1x[] = {x_min,x_max+1,y_min,y_max,z_min,z_max}; ops_par_loop_flux_calc_kernelx("flux_calc_kernelx", clover_grid, 3, rangexyz_inner_plus1x, ops_arg_dat(vol_flux_x, S3D_000, "double", OPS_WRITE), ops_arg_dat(xarea, S3D_000, "double", OPS_READ), ops_arg_dat(xvel0, S3D_000_f0P1P1, "double", OPS_READ), ops_arg_dat(xvel1, S3D_000_f0P1P1, "double", OPS_READ)); int rangexyz_inner_plus1y[] = {x_min,x_max,y_min,y_max+1,z_min,z_max}; ops_par_loop_flux_calc_kernely("flux_calc_kernely", clover_grid, 3, rangexyz_inner_plus1y, ops_arg_dat(vol_flux_y, S3D_000, "double", OPS_WRITE), ops_arg_dat(yarea, S3D_000, "double", OPS_READ), ops_arg_dat(yvel0, S3D_000_fP10P1, "double", OPS_READ), ops_arg_dat(yvel1, S3D_000_fP10P1, "double", OPS_READ)); int rangexyz_inner_plus1z[] = {x_min,x_max,y_min,y_max,z_min,z_max+1}; ops_par_loop_flux_calc_kernelz("flux_calc_kernelz", clover_grid, 3, rangexyz_inner_plus1z, ops_arg_dat(vol_flux_z, S3D_000, "double", OPS_WRITE), ops_arg_dat(zarea, S3D_000, "double", OPS_READ), ops_arg_dat(zvel0, S3D_000_fP1P10, "double", OPS_READ), ops_arg_dat(zvel1, S3D_000_fP1P10, "double", OPS_READ)); }
void initialise_chunk() { int x_cells = grid.x_cells; int y_cells = grid.y_cells; int z_cells = grid.z_cells; int x_min = field.x_min; int x_max = field.x_max; int y_min = field.y_min; int y_max = field.y_max; int z_min = field.z_min; int z_max = field.z_max; int rangex[] = {x_min-2, x_max+3, y_min-2, y_max+3, z_min-2, z_max+3}; int rangey[] = {x_min-2, x_max+3, y_min-2, y_max+3, z_min-2, z_max+3}; int rangez[] = {x_min-2, x_max+3, y_min-2, y_max+3, z_min-2, z_max+3}; int rangefull[] = {-2, x_cells+8, -2, y_cells+8, -2, z_cells+8}; ops_par_loop_initialise_chunk_kernel_xx("initialise_chunk_kernel_xx", clover_grid, 3, rangefull, ops_arg_dat(xx, S3D_000_STRID3D_X, "int", OPS_WRITE), ops_arg_idx()); ops_par_loop_initialise_chunk_kernel_yy("initialise_chunk_kernel_yy", clover_grid, 3, rangefull, ops_arg_dat(yy, S3D_000_STRID3D_Y, "int", OPS_WRITE), ops_arg_idx()); ops_par_loop_initialise_chunk_kernel_zz("initialise_chunk_kernel_zz", clover_grid, 3, rangefull, ops_arg_dat(zz, S3D_000_STRID3D_Z, "int", OPS_WRITE), ops_arg_idx()); ops_par_loop_initialise_chunk_kernel_x("initialise_chunk_kernel_x", clover_grid, 3, rangex, ops_arg_dat(vertexx, S3D_000_STRID3D_X, "double", OPS_WRITE), ops_arg_dat(xx, S3D_000_STRID3D_X, "int", OPS_READ), ops_arg_dat(vertexdx, S3D_000_STRID3D_X, "double", OPS_WRITE)); ops_par_loop_initialise_chunk_kernel_y("initialise_chunk_kernel_y", clover_grid, 3, rangey, ops_arg_dat(vertexy, S3D_000_STRID3D_Y, "double", OPS_WRITE), ops_arg_dat(yy, S3D_000_STRID3D_Y, "int", OPS_READ), ops_arg_dat(vertexdy, S3D_000_STRID3D_Y, "double", OPS_WRITE)); ops_par_loop_initialise_chunk_kernel_z("initialise_chunk_kernel_z", clover_grid, 3, rangez, ops_arg_dat(vertexz, S3D_000_STRID3D_Z, "double", OPS_WRITE), ops_arg_dat(zz, S3D_000_STRID3D_Z, "int", OPS_READ), ops_arg_dat(vertexdz, S3D_000_STRID3D_Z, "double", OPS_WRITE)); rangex[0] = x_min-2; rangex[1] = x_max+2; ops_par_loop_initialise_chunk_kernel_cellx("initialise_chunk_kernel_cellx", clover_grid, 3, rangex, ops_arg_dat(vertexx, S3D_000_P100_STRID3D_X, "double", OPS_READ), ops_arg_dat(cellx, S3D_000_STRID3D_X, "double", OPS_WRITE), ops_arg_dat(celldx, S3D_000_STRID3D_X, "double", OPS_WRITE)); rangey[2] = y_min-2; rangey[3] = y_max+2; ops_par_loop_initialise_chunk_kernel_celly("initialise_chunk_kernel_celly", clover_grid, 3, rangey, ops_arg_dat(vertexy, S3D_000_0P10_STRID3D_Y, "double", OPS_READ), ops_arg_dat(celly, S3D_000_STRID3D_Y, "double", OPS_WRITE), ops_arg_dat(celldy, S3D_000_STRID3D_Y, "double", OPS_WRITE)); rangez[4] = z_min-2; rangez[5] = z_max+2; ops_par_loop_initialise_chunk_kernel_cellz("initialise_chunk_kernel_cellz", clover_grid, 3, rangez, ops_arg_dat(vertexz, S3D_000_00P1_STRID3D_Z, "double", OPS_READ), ops_arg_dat(cellz, S3D_000_STRID3D_Z, "double", OPS_WRITE), ops_arg_dat(celldz, S3D_000_STRID3D_Z, "double", OPS_WRITE)); int rangexyz[] = {x_min-2,x_max+2,y_min-2,y_max+2,z_min-2,z_max+2}; ops_par_loop_initialise_chunk_kernel_volume("initialise_chunk_kernel_volume", clover_grid, 3, rangexyz, ops_arg_dat(volume, S3D_000, "double", OPS_WRITE), ops_arg_dat(celldy, S3D_000_STRID3D_Y, "double", OPS_READ), ops_arg_dat(xarea, S3D_000, "double", OPS_WRITE), ops_arg_dat(celldx, S3D_000_STRID3D_X, "double", OPS_READ), ops_arg_dat(yarea, S3D_000, "double", OPS_WRITE), ops_arg_dat(celldz, S3D_000_STRID3D_Z, "double", OPS_READ), ops_arg_dat(zarea, S3D_000, "double", OPS_WRITE)); }
void advec_mom(int which_vel, int sweep_number, int dir) { int x_min = field.x_min; int x_max = field.x_max; int y_min = field.y_min; int y_max = field.y_max; int z_min = field.z_min; int z_max = field.z_max; int rangexyz[] = {x_min - 2, x_max + 2, y_min - 2, y_max + 2, z_min - 2, z_max + 2}; ops_dat vel1; if (which_vel == 1) { vel1 = xvel1; } else if (which_vel == 2) { vel1 = yvel1; } else if (which_vel == 3) { vel1 = zvel1; } if (sweep_number == 1 && dir == 1) { ops_par_loop_advec_mom_kernel_x1( "advec_mom_kernel_x1", clover_grid, 3, rangexyz, ops_arg_dat(work_array6, 1, S3D_000, "double", OPS_WRITE), ops_arg_dat(work_array7, 1, S3D_000, "double", OPS_WRITE), ops_arg_dat(volume, 1, S3D_000, "double", OPS_READ), ops_arg_dat(vol_flux_x, 1, S3D_000_P100, "double", OPS_READ), ops_arg_dat(vol_flux_y, 1, S3D_000_0P10, "double", OPS_READ), ops_arg_dat(vol_flux_z, 1, S3D_000_00P1, "double", OPS_READ)); } else if (sweep_number == 1 && dir == 3) { ops_par_loop_advec_mom_kernel_z1( "advec_mom_kernel_z1", clover_grid, 3, rangexyz, ops_arg_dat(work_array6, 1, S3D_000, "double", OPS_WRITE), ops_arg_dat(work_array7, 1, S3D_000, "double", OPS_WRITE), ops_arg_dat(volume, 1, S3D_000, "double", OPS_READ), ops_arg_dat(vol_flux_x, 1, S3D_000_P100, "double", OPS_READ), ops_arg_dat(vol_flux_y, 1, S3D_000_0P10, "double", OPS_READ), ops_arg_dat(vol_flux_z, 1, S3D_000_00P1, "double", OPS_READ)); } else if (sweep_number == 2 && advect_x) { ops_par_loop_advec_mom_kernel_x2( "advec_mom_kernel_x2", clover_grid, 3, rangexyz, ops_arg_dat(work_array6, 1, S3D_000, "double", OPS_WRITE), ops_arg_dat(work_array7, 1, S3D_000, "double", OPS_WRITE), ops_arg_dat(volume, 1, S3D_000, "double", OPS_READ), ops_arg_dat(vol_flux_y, 1, S3D_000_0P10, "double", OPS_READ), ops_arg_dat(vol_flux_z, 1, S3D_000_00P1, "double", OPS_READ)); } else if (sweep_number == 2 && !advect_x) { ops_par_loop_advec_mom_kernel_y2( "advec_mom_kernel_y2", clover_grid, 3, rangexyz, ops_arg_dat(work_array6, 1, S3D_000, "double", OPS_WRITE), ops_arg_dat(work_array7, 1, S3D_000, "double", OPS_WRITE), ops_arg_dat(volume, 1, S3D_000, "double", OPS_READ), ops_arg_dat(vol_flux_x, 1, S3D_000_P100, "double", OPS_READ), ops_arg_dat(vol_flux_y, 1, S3D_000_0P10, "double", OPS_READ)); } else if (sweep_number == 3 && dir == 1) { ops_par_loop_advec_mom_kernel_x3( "advec_mom_kernel_x3", clover_grid, 3, rangexyz, ops_arg_dat(work_array6, 1, S3D_000, "double", OPS_WRITE), ops_arg_dat(work_array7, 1, S3D_000, "double", OPS_WRITE), ops_arg_dat(volume, 1, S3D_000, "double", OPS_READ), ops_arg_dat(vol_flux_x, 1, S3D_000_P100, "double", OPS_READ)); } else if (sweep_number == 3 && dir == 3) { ops_par_loop_advec_mom_kernel_z3( "advec_mom_kernel_z3", clover_grid, 3, rangexyz, ops_arg_dat(work_array6, 1, S3D_000, "double", OPS_WRITE), ops_arg_dat(work_array7, 1, S3D_000, "double", OPS_WRITE), ops_arg_dat(volume, 1, S3D_000, "double", OPS_READ), ops_arg_dat(vol_flux_z, 1, S3D_000_00P1, "double", OPS_READ)); } if (dir == 1) { if (which_vel == 1) { int range_fullx_party_partz_1[] = {x_min - 2, x_max + 2, y_min, y_max + 1, z_min, z_max + 1}; ops_par_loop_advec_mom_kernel_mass_flux_x( "advec_mom_kernel_mass_flux_x", clover_grid, 3, range_fullx_party_partz_1, ops_arg_dat(work_array1, 1, S3D_000, "double", OPS_WRITE), ops_arg_dat(mass_flux_x, 1, S3D_000_fP1M1M1, "double", OPS_READ)); int range_partx_party_partz_1[] = {x_min - 1, x_max + 2, y_min, y_max + 1, z_min, z_max + 1}; ops_par_loop_advec_mom_kernel_post_pre_advec_x( "advec_mom_kernel_post_pre_advec_x", clover_grid, 3, range_partx_party_partz_1, ops_arg_dat(work_array2, 1, S3D_000, "double", OPS_WRITE), ops_arg_dat(work_array7, 1, S3D_000_fM1M1M1, "double", OPS_READ), ops_arg_dat(density1, 1, S3D_000_fM1M1M1, "double", OPS_READ), ops_arg_dat(work_array3, 1, S3D_000, "double", OPS_WRITE), ops_arg_dat(work_array1, 1, S3D_000_M100, "double", OPS_READ)); } int range_innder_plus1xyz_minus1x[] = {x_min - 1, x_max + 1, y_min, y_max + 1, z_min, z_max + 1}; ops_par_loop_advec_mom_kernel1_x_nonvector( "advec_mom_kernel1_x", clover_grid, 3, range_innder_plus1xyz_minus1x, ops_arg_dat(work_array1, 1, S3D_000, "double", OPS_READ), ops_arg_dat(work_array3, 1, S3D_000_P100, "double", OPS_READ), ops_arg_dat(work_array5, 1, S3D_000, "double", OPS_WRITE), ops_arg_dat(celldx, 1, S3D_000_P100_M100_STRID3D_X, "double", OPS_READ), ops_arg_dat(vel1, 1, S3D_000_P100_P200_M100, "double", OPS_READ)); int range_partx_party_partz_2[] = {x_min, x_max + 1, y_min, y_max + 1, z_min, z_max + 1}; ops_par_loop_advec_mom_kernel2_x( "advec_mom_kernel2_x", clover_grid, 3, range_partx_party_partz_2, ops_arg_dat(vel1, 1, S3D_000, "double", OPS_RW), ops_arg_dat(work_array2, 1, S3D_000, "double", OPS_READ), ops_arg_dat(work_array3, 1, S3D_000, "double", OPS_READ), ops_arg_dat(work_array5, 1, S3D_000_M100, "double", OPS_READ)); } else if (dir == 2) { if (which_vel == 1) { int range_fully_partx_partz_1[] = {x_min, x_max + 1, y_min - 2, y_max + 2, z_min, z_max + 1}; ops_par_loop_advec_mom_kernel_mass_flux_y( "advec_mom_kernel_mass_flux_y", clover_grid, 3, range_fully_partx_partz_1, ops_arg_dat(work_array1, 1, S3D_000, "double", OPS_WRITE), ops_arg_dat(mass_flux_y, 1, S3D_000_fM1P1M1, "double", OPS_READ)); int range_party_partx_partz_1[] = {x_min, x_max + 1, y_min - 1, y_max + 2, z_min, z_max + 1}; ops_par_loop_advec_mom_kernel_post_pre_advec_y( "advec_mom_kernel_post_pre_advec_y", clover_grid, 3, range_party_partx_partz_1, ops_arg_dat(work_array2, 1, S3D_000, "double", OPS_WRITE), ops_arg_dat(work_array7, 1, S3D_000_fM1M1M1, "double", OPS_READ), ops_arg_dat(density1, 1, S3D_000_fM1M1M1, "double", OPS_READ), ops_arg_dat(work_array3, 1, S3D_000, "double", OPS_WRITE), ops_arg_dat(work_array1, 1, S3D_000_0M10, "double", OPS_READ)); } int range_plus1xyz_minus1y[] = {x_min, x_max + 1, y_min - 1, y_max + 1, z_min, z_max + 1}; ops_par_loop_advec_mom_kernel1_y_nonvector( "advec_mom_kernel1_y", clover_grid, 3, range_plus1xyz_minus1y, ops_arg_dat(work_array1, 1, S3D_000, "double", OPS_READ), ops_arg_dat(work_array3, 1, S3D_000_0P10, "double", OPS_READ), ops_arg_dat(work_array5, 1, S3D_000, "double", OPS_WRITE), ops_arg_dat(celldy, 1, S3D_000_0P10_0M10_STRID3D_Y, "double", OPS_READ), ops_arg_dat(vel1, 1, S3D_000_0P10_0P20_0M10, "double", OPS_READ)); int range_partx_party_partz_2[] = {x_min, x_max + 1, y_min, y_max + 1, z_min, z_max + 1}; ops_par_loop_advec_mom_kernel2_y( "advec_mom_kernel2_y", clover_grid, 3, range_partx_party_partz_2, ops_arg_dat(vel1, 1, S3D_000, "double", OPS_RW), ops_arg_dat(work_array2, 1, S3D_000, "double", OPS_READ), ops_arg_dat(work_array3, 1, S3D_000, "double", OPS_READ), ops_arg_dat(work_array5, 1, S3D_000_0M10, "double", OPS_READ)); } else if (dir == 3) { if (which_vel == 1) { int range_fullz_partx_party_1[] = {x_min, x_max + 1, y_min, y_max + 1, z_min - 2, z_max + 2}; ops_par_loop_advec_mom_kernel_mass_flux_z( "advec_mom_kernel_mass_flux_z", clover_grid, 3, range_fullz_partx_party_1, ops_arg_dat(work_array1, 1, S3D_000, "double", OPS_WRITE), ops_arg_dat(mass_flux_z, 1, S3D_000_fM1M1P1, "double", OPS_READ)); int range_party_partx_partz_1[] = {x_min, x_max + 1, y_min, y_max + 1, z_min - 1, z_max + 2}; ops_par_loop_advec_mom_kernel_post_pre_advec_z( "advec_mom_kernel_post_pre_advec_z", clover_grid, 3, range_party_partx_partz_1, ops_arg_dat(work_array2, 1, S3D_000, "double", OPS_WRITE), ops_arg_dat(work_array7, 1, S3D_000_fM1M1M1, "double", OPS_READ), ops_arg_dat(density1, 1, S3D_000_fM1M1M1, "double", OPS_READ), ops_arg_dat(work_array3, 1, S3D_000, "double", OPS_WRITE), ops_arg_dat(work_array1, 1, S3D_000_00M1, "double", OPS_READ)); } int range_plus1xyz_minus1z[] = {x_min, x_max + 1, y_min, y_max + 1, z_min - 1, z_max + 1}; ops_par_loop_advec_mom_kernel1_z_nonvector( "advec_mom_kernel1_z", clover_grid, 3, range_plus1xyz_minus1z, ops_arg_dat(work_array1, 1, S3D_000, "double", OPS_READ), ops_arg_dat(work_array3, 1, S3D_000_00P1, "double", OPS_READ), ops_arg_dat(work_array5, 1, S3D_000, "double", OPS_WRITE), ops_arg_dat(celldz, 1, S3D_000_00P1_00M1_STRID3D_Z, "double", OPS_READ), ops_arg_dat(vel1, 1, S3D_000_00P1_00P2_00M1, "double", OPS_READ)); int range_partx_party_partz_2[] = {x_min, x_max + 1, y_min, y_max + 1, z_min, z_max + 1}; ops_par_loop_advec_mom_kernel2_z( "advec_mom_kernel2_z", clover_grid, 3, range_partx_party_partz_2, ops_arg_dat(vel1, 1, S3D_000, "double", OPS_RW), ops_arg_dat(work_array2, 1, S3D_000, "double", OPS_READ), ops_arg_dat(work_array3, 1, S3D_000, "double", OPS_READ), ops_arg_dat(work_array5, 1, S3D_000_00M1, "double", OPS_READ)); } }
void advec_mom(int which_vel, int sweep_number, int dir) { //initialize sizes using global values int x_cells = grid.x_cells; int y_cells = grid.y_cells; int x_min = field.x_min; int x_max = field.x_max; int y_min = field.y_min; int y_max = field.y_max; int z_min = field.z_min; int z_max = field.z_max; int rangexyz[] = {x_min-2,x_max+2,y_min-2,y_max+2,z_min-2,z_max+2}; // full range over grid int mom_sweep; ops_dat vel1; if( which_vel == 1) { vel1 = xvel1; } else if( which_vel == 2) { vel1 = yvel1; } else if( which_vel == 3) { vel1 = zvel1; } if(sweep_number==1 && dir == 1) { ops_par_loop(advec_mom_kernel_x1, "advec_mom_kernel_x1", clover_grid, 3, rangexyz, ops_arg_dat(work_array6, S3D_000, "double", OPS_WRITE), ops_arg_dat(work_array7, S3D_000, "double", OPS_WRITE), ops_arg_dat(volume, S3D_000, "double", OPS_READ), ops_arg_dat(vol_flux_x, S3D_000_P100, "double", OPS_READ), ops_arg_dat(vol_flux_y, S3D_000_0P10, "double", OPS_READ), ops_arg_dat(vol_flux_z, S3D_000_00P1, "double", OPS_READ)); } else if(sweep_number==1 && dir == 3) { ops_par_loop(advec_mom_kernel_z1, "advec_mom_kernel_z1", clover_grid, 3, rangexyz, ops_arg_dat(work_array6, S3D_000, "double", OPS_WRITE), ops_arg_dat(work_array7, S3D_000, "double", OPS_WRITE), ops_arg_dat(volume, S3D_000, "double", OPS_READ), ops_arg_dat(vol_flux_x, S3D_000_P100, "double", OPS_READ), ops_arg_dat(vol_flux_y, S3D_000_0P10, "double", OPS_READ), ops_arg_dat(vol_flux_z, S3D_000_00P1, "double", OPS_READ)); } else if (sweep_number==2 && advect_x) { ops_par_loop(advec_mom_kernel_x2, "advec_mom_kernel_x2", clover_grid, 3, rangexyz, ops_arg_dat(work_array6, S3D_000, "double", OPS_WRITE), ops_arg_dat(work_array7, S3D_000, "double", OPS_WRITE), ops_arg_dat(volume, S3D_000, "double", OPS_READ), ops_arg_dat(vol_flux_y, S3D_000_0P10, "double", OPS_READ), ops_arg_dat(vol_flux_z, S3D_000_00P1, "double", OPS_READ)); } else if (sweep_number==2 && !advect_x) { ops_par_loop(advec_mom_kernel_y2, "advec_mom_kernel_y2", clover_grid, 3, rangexyz, ops_arg_dat(work_array6, S3D_000, "double", OPS_WRITE), ops_arg_dat(work_array7, S3D_000, "double", OPS_WRITE), ops_arg_dat(volume, S3D_000, "double", OPS_READ), ops_arg_dat(vol_flux_x, S3D_000_P100, "double", OPS_READ), ops_arg_dat(vol_flux_y, S3D_000_0P10, "double", OPS_READ)); } else if (sweep_number==3 && dir == 1) { ops_par_loop(advec_mom_kernel_x3, "advec_mom_kernel_x3", clover_grid, 3, rangexyz, ops_arg_dat(work_array6, S3D_000, "double", OPS_WRITE), ops_arg_dat(work_array7, S3D_000, "double", OPS_WRITE), ops_arg_dat(volume, S3D_000, "double", OPS_READ), ops_arg_dat(vol_flux_x, S3D_000_P100, "double", OPS_READ)); } else if (sweep_number==3 && dir == 3) { ops_par_loop(advec_mom_kernel_z3, "advec_mom_kernel_z3", clover_grid, 3, rangexyz, ops_arg_dat(work_array6, S3D_000, "double", OPS_WRITE), ops_arg_dat(work_array7, S3D_000, "double", OPS_WRITE), ops_arg_dat(volume, S3D_000, "double", OPS_READ), ops_arg_dat(vol_flux_z, S3D_000_00P1, "double", OPS_READ)); } int range_partx_party_1[] = {x_min-1,x_max+2,y_min,y_max+1}; // partial x range partial y range int range_fully_party_1[] = {x_min,x_max+1,y_min-2,y_max+2}; // full y range partial x range int range_partx_party_2[] = {x_min,x_max+1,y_min-1,y_max+2}; // partial x range partial y range if (dir == 1) { if (which_vel == 1) { //Find staggered mesh mass fluxes, nodal masses and volumes. int range_fullx_party_partz_1[] = {x_min-2,x_max+2,y_min,y_max+1,z_min,z_max+1}; // full x range partial y,z range ops_par_loop(advec_mom_kernel_mass_flux_x, "advec_mom_kernel_mass_flux_x", clover_grid, 3, range_fullx_party_partz_1, ops_arg_dat(work_array1, S3D_000, "double", OPS_WRITE), ops_arg_dat(mass_flux_x, S3D_000_fP1M1M1, "double", OPS_READ)); //Staggered cell mass post and pre advection int range_partx_party_partz_1[] = {x_min-1,x_max+2,y_min,y_max+1,z_min,z_max+1}; // partial x,y,z range ops_par_loop(advec_mom_kernel_post_pre_advec_x, "advec_mom_kernel_post_pre_advec_x", clover_grid, 3, range_partx_party_partz_1, ops_arg_dat(work_array2/*node_mass_post*/, S3D_000, "double", OPS_WRITE), ops_arg_dat(work_array7/*post_vol*/, S3D_000_fM1M1M1, "double", OPS_READ), ops_arg_dat(density1, S3D_000_fM1M1M1, "double", OPS_READ), ops_arg_dat(work_array3/*node_mass_pre*/, S3D_000, "double", OPS_WRITE), ops_arg_dat(work_array1/*node_flux*/, S3D_000_M100, "double", OPS_READ)); } int range_innder_plus1xyz_minus1x[] = {x_min-1,x_max+1,y_min,y_max+1,z_min,z_max+1}; // partial x range partial y range ops_par_loop(advec_mom_kernel1_x_nonvector, "advec_mom_kernel1_x", clover_grid, 3, range_innder_plus1xyz_minus1x, ops_arg_dat(work_array1/*node_flux*/, S3D_000, "double", OPS_READ), ops_arg_dat(work_array3/*node_mass_pre*/, S3D_000_P100, "double", OPS_READ), ops_arg_dat(work_array5/*mom_flux*/, S3D_000, "double", OPS_WRITE), ops_arg_dat(celldx, S3D_000_P100_M100_STRID3D_X, "double", OPS_READ), ops_arg_dat(vel1, S3D_000_P100_P200_M100, "double", OPS_READ)); int range_partx_party_partz_2[] = {x_min,x_max+1,y_min,y_max+1,z_min,z_max+1}; ops_par_loop(advec_mom_kernel2_x, "advec_mom_kernel2_x", clover_grid, 3, range_partx_party_partz_2, ops_arg_dat(vel1, S3D_000, "double", OPS_RW), ops_arg_dat(work_array2/*node_mass_post*/, S3D_000, "double", OPS_READ), ops_arg_dat(work_array3/*node_mass_pre*/, S3D_000, "double", OPS_READ), ops_arg_dat(work_array5/*mom_flux*/, S3D_000_M100, "double", OPS_READ) ); } else if (dir == 2) { if (which_vel == 1) { //Find staggered mesh mass fluxes, nodal masses and volumes. int range_fully_partx_partz_1[] = {x_min,x_max+1,y_min-2,y_max+2,z_min,z_max+1}; // full x range partial y,z range ops_par_loop(advec_mom_kernel_mass_flux_y, "advec_mom_kernel_mass_flux_y", clover_grid, 3, range_fully_partx_partz_1, ops_arg_dat(work_array1, S3D_000, "double", OPS_WRITE), ops_arg_dat(mass_flux_y, S3D_000_fM1P1M1, "double", OPS_READ)); //Staggered cell mass post and pre advection int range_party_partx_partz_1[] = {x_min,x_max+1,y_min-1,y_max+2,z_min,z_max+1}; // full x range partial y,z range ops_par_loop(advec_mom_kernel_post_pre_advec_y, "advec_mom_kernel_post_pre_advec_y", clover_grid, 3, range_party_partx_partz_1, ops_arg_dat(work_array2/*node_mass_post*/, S3D_000, "double", OPS_WRITE), ops_arg_dat(work_array7, S3D_000_fM1M1M1, "double", OPS_READ), ops_arg_dat(density1, S3D_000_fM1M1M1, "double", OPS_READ), ops_arg_dat(work_array3/*node_mass_pre*/, S3D_000, "double", OPS_WRITE), ops_arg_dat(work_array1/*node_flux*/, S3D_000_0M10, "double", OPS_READ)); } int range_plus1xyz_minus1y[] = {x_min,x_max+1,y_min-1,y_max+1,z_min,z_max+1}; // partial x range partial y range ops_par_loop(advec_mom_kernel1_y_nonvector, "advec_mom_kernel1_y", clover_grid, 3, range_plus1xyz_minus1y, ops_arg_dat(work_array1/*node_flux*/, S3D_000, "double", OPS_READ), ops_arg_dat(work_array3/*node_mass_pre*/, S3D_000_0P10, "double", OPS_READ), ops_arg_dat(work_array5/*mom_flux*/, S3D_000, "double", OPS_WRITE), ops_arg_dat(celldy, S3D_000_0P10_0M10_STRID3D_Y, "double", OPS_READ), ops_arg_dat(vel1, S3D_000_0P10_0P20_0M10, "double", OPS_READ)); int range_partx_party_partz_2[] = {x_min,x_max+1,y_min,y_max+1,z_min,z_max+1}; ops_par_loop(advec_mom_kernel2_y, "advec_mom_kernel2_y", clover_grid, 3, range_partx_party_partz_2, ops_arg_dat(vel1, S3D_000, "double", OPS_RW), ops_arg_dat(work_array2/*node_mass_post*/, S3D_000, "double", OPS_READ), ops_arg_dat(work_array3/*node_mass_pre*/, S3D_000, "double", OPS_READ), ops_arg_dat(work_array5/*mom_flux*/, S3D_000_0M10, "double", OPS_READ)); } else if (dir == 3) { if (which_vel == 1) { //Find staggered mesh mass fluxes, nodal masses and volumes. int range_fullz_partx_party_1[] = {x_min,x_max+1,y_min,y_max+1,z_min-2,z_max+2}; // full x range partial y,z range ops_par_loop(advec_mom_kernel_mass_flux_z, "advec_mom_kernel_mass_flux_z", clover_grid, 3, range_fullz_partx_party_1, ops_arg_dat(work_array1, S3D_000, "double", OPS_WRITE), ops_arg_dat(mass_flux_z, S3D_000_fM1M1P1, "double", OPS_READ)); //Staggered cell mass post and pre advection int range_party_partx_partz_1[] = {x_min,x_max+1,y_min,y_max+1,z_min-1,z_max+2}; // full x range partial y,z range ops_par_loop(advec_mom_kernel_post_pre_advec_z, "advec_mom_kernel_post_pre_advec_z", clover_grid, 3, range_party_partx_partz_1, ops_arg_dat(work_array2/*node_mass_post*/, S3D_000, "double", OPS_WRITE), ops_arg_dat(work_array7, S3D_000_fM1M1M1, "double", OPS_READ), ops_arg_dat(density1, S3D_000_fM1M1M1, "double", OPS_READ), ops_arg_dat(work_array3/*node_mass_pre*/, S3D_000, "double", OPS_WRITE), ops_arg_dat(work_array1/*node_flux*/, S3D_000_00M1, "double", OPS_READ)); } int range_plus1xyz_minus1z[] = {x_min,x_max+1,y_min,y_max+1,z_min-1,z_max+1}; // partial x range partial y range ops_par_loop(advec_mom_kernel1_z_nonvector, "advec_mom_kernel1_z", clover_grid, 3, range_plus1xyz_minus1z, ops_arg_dat(work_array1/*node_flux*/, S3D_000, "double", OPS_READ), ops_arg_dat(work_array3/*node_mass_pre*/, S3D_000_00P1, "double", OPS_READ), ops_arg_dat(work_array5/*mom_flux*/, S3D_000, "double", OPS_WRITE), ops_arg_dat(celldz, S3D_000_00P1_00M1_STRID3D_Z, "double", OPS_READ), ops_arg_dat(vel1, S3D_000_00P1_00P2_00M1, "double", OPS_READ)); int range_partx_party_partz_2[] = {x_min,x_max+1,y_min,y_max+1,z_min,z_max+1}; // full x range partial y range ops_par_loop(advec_mom_kernel2_z, "advec_mom_kernel2_z", clover_grid, 3, range_partx_party_partz_2, ops_arg_dat(vel1, S3D_000, "double", OPS_RW), ops_arg_dat(work_array2/*node_mass_post*/, S3D_000, "double", OPS_READ), ops_arg_dat(work_array3/*node_mass_pre*/, S3D_000, "double", OPS_READ), ops_arg_dat(work_array5/*mom_flux*/, S3D_000_00M1, "double", OPS_READ)); } }
void initialise_chunk() { int x_cells = grid.x_cells; int y_cells = grid.y_cells; int x_min = field.x_min; int x_max = field.x_max; int y_min = field.y_min; int y_max = field.y_max; int range[] = {x_min, x_max, y_min, y_max}; ops_par_loop_initialise_chunk_kernel_zero( "initialise_chunk_kernel_zero", tea_grid, 2, range, ops_arg_dat(density, 1, S2D_00, "double", OPS_WRITE)); ops_par_loop_initialise_chunk_kernel_zero( "initialise_chunk_kernel_zero", tea_grid, 2, range, ops_arg_dat(energy0, 1, S2D_00, "double", OPS_WRITE)); ops_par_loop_initialise_chunk_kernel_zero( "initialise_chunk_kernel_zero", tea_grid, 2, range, ops_arg_dat(energy1, 1, S2D_00, "double", OPS_WRITE)); ops_par_loop_initialise_chunk_kernel_zero( "initialise_chunk_kernel_zero", tea_grid, 2, range, ops_arg_dat(u, 1, S2D_00, "double", OPS_WRITE)); ops_par_loop_initialise_chunk_kernel_zero( "initialise_chunk_kernel_zero", tea_grid, 2, range, ops_arg_dat(u0, 1, S2D_00, "double", OPS_WRITE)); ops_par_loop_initialise_chunk_kernel_zero( "initialise_chunk_kernel_zero", tea_grid, 2, range, ops_arg_dat(vector_r, 1, S2D_00, "double", OPS_WRITE)); ops_par_loop_initialise_chunk_kernel_zero( "initialise_chunk_kernel_zero", tea_grid, 2, range, ops_arg_dat(vector_rstore, 1, S2D_00, "double", OPS_WRITE)); ops_par_loop_initialise_chunk_kernel_zero( "initialise_chunk_kernel_zero", tea_grid, 2, range, ops_arg_dat(vector_rtemp, 1, S2D_00, "double", OPS_WRITE)); ops_par_loop_initialise_chunk_kernel_zero( "initialise_chunk_kernel_zero", tea_grid, 2, range, ops_arg_dat(vector_Mi, 1, S2D_00, "double", OPS_WRITE)); ops_par_loop_initialise_chunk_kernel_zero( "initialise_chunk_kernel_zero", tea_grid, 2, range, ops_arg_dat(vector_w, 1, S2D_00, "double", OPS_WRITE)); ops_par_loop_initialise_chunk_kernel_zero( "initialise_chunk_kernel_zero", tea_grid, 2, range, ops_arg_dat(vector_z, 1, S2D_00, "double", OPS_WRITE)); ops_par_loop_initialise_chunk_kernel_zero( "initialise_chunk_kernel_zero", tea_grid, 2, range, ops_arg_dat(vector_utemp, 1, S2D_00, "double", OPS_WRITE)); ops_par_loop_initialise_chunk_kernel_zero( "initialise_chunk_kernel_zero", tea_grid, 2, range, ops_arg_dat(vector_Kx, 1, S2D_00, "double", OPS_WRITE)); ops_par_loop_initialise_chunk_kernel_zero( "initialise_chunk_kernel_zero", tea_grid, 2, range, ops_arg_dat(vector_Ky, 1, S2D_00, "double", OPS_WRITE)); ops_par_loop_initialise_chunk_kernel_zero( "initialise_chunk_kernel_zero", tea_grid, 2, range, ops_arg_dat(vector_p, 1, S2D_00, "double", OPS_WRITE)); ops_par_loop_initialise_chunk_kernel_zero( "initialise_chunk_kernel_zero", tea_grid, 2, range, ops_arg_dat(vector_sd, 1, S2D_00, "double", OPS_WRITE)); ops_par_loop_initialise_chunk_kernel_zero( "initialise_chunk_kernel_zero", tea_grid, 2, range, ops_arg_dat(tri_cp, 1, S2D_00, "double", OPS_WRITE)); ops_par_loop_initialise_chunk_kernel_zero( "initialise_chunk_kernel_zero", tea_grid, 2, range, ops_arg_dat(tri_bfp, 1, S2D_00, "double", OPS_WRITE)); int rangefull1[] = {-2, x_cells + 2, -2, y_cells + 2}; ops_par_loop_initialise_chunk_kernel_zero( "initialise_chunk_kernel_zero", tea_grid, 2, rangefull1, ops_arg_dat(volume, 1, S2D_00, "double", OPS_WRITE)); int rangefull2[] = {-2, x_cells + 3, -2, y_cells + 2}; ops_par_loop_initialise_chunk_kernel_zero( "initialise_chunk_kernel_zero", tea_grid, 2, rangefull2, ops_arg_dat(xarea, 1, S2D_00, "double", OPS_WRITE)); int rangefull3[] = {-2, x_cells + 2, -2, y_cells + 3}; ops_par_loop_initialise_chunk_kernel_zero( "initialise_chunk_kernel_zero", tea_grid, 2, rangefull3, ops_arg_dat(yarea, 1, S2D_00, "double", OPS_WRITE)); int rangex[] = {x_min - 2, x_max + 2, y_min - 2, y_max + 2}; ops_par_loop_initialise_chunk_kernel_zero_x( "initialise_chunk_kernel_zero_x", tea_grid, 2, rangex, ops_arg_dat(cellx, 1, S2D_00_STRID2D_X, "double", OPS_WRITE)); ops_par_loop_initialise_chunk_kernel_zero_x( "initialise_chunk_kernel_zero_x", tea_grid, 2, rangex, ops_arg_dat(celldx, 1, S2D_00_STRID2D_X, "double", OPS_WRITE)); rangex[1]++; ops_par_loop_initialise_chunk_kernel_zero_x( "initialise_chunk_kernel_zero_x", tea_grid, 2, rangex, ops_arg_dat(vertexx, 1, S2D_00_STRID2D_X, "double", OPS_WRITE)); ops_par_loop_initialise_chunk_kernel_zero_x( "initialise_chunk_kernel_zero_x", tea_grid, 2, rangex, ops_arg_dat(vertexdx, 1, S2D_00_STRID2D_X, "double", OPS_WRITE)); int rangey2[] = {x_min - 2, x_max + 2, y_min - 2, y_max + 2}; ops_par_loop_initialise_chunk_kernel_zero_y( "initialise_chunk_kernel_zero_y", tea_grid, 2, rangey2, ops_arg_dat(celly, 1, S2D_00_STRID2D_Y, "double", OPS_WRITE)); ops_par_loop_initialise_chunk_kernel_zero_y( "initialise_chunk_kernel_zero_y", tea_grid, 2, rangey2, ops_arg_dat(celldy, 1, S2D_00_STRID2D_Y, "double", OPS_WRITE)); rangey2[3]++; ops_par_loop_initialise_chunk_kernel_zero_y( "initialise_chunk_kernel_zero_y", tea_grid, 2, rangey2, ops_arg_dat(vertexy, 1, S2D_00_STRID2D_Y, "double", OPS_WRITE)); ops_par_loop_initialise_chunk_kernel_zero_y( "initialise_chunk_kernel_zero_y", tea_grid, 2, rangey2, ops_arg_dat(vertexdy, 1, S2D_00_STRID2D_Y, "double", OPS_WRITE)); int rangefull[] = {x_min - 2, x_max + 3, y_min - 2, y_max + 3}; ops_execute(); ops_par_loop_initialise_chunk_kernel_xx( "initialise_chunk_kernel_xx", tea_grid, 2, rangefull, ops_arg_dat(xx, 1, S2D_00_STRID2D_X, "int", OPS_WRITE), ops_arg_idx()); ops_par_loop_initialise_chunk_kernel_yy( "initialise_chunk_kernel_yy", tea_grid, 2, rangefull, ops_arg_dat(yy, 1, S2D_00_STRID2D_Y, "int", OPS_WRITE), ops_arg_idx()); ops_par_loop_initialise_chunk_kernel_x( "initialise_chunk_kernel_x", tea_grid, 2, rangex, ops_arg_dat(vertexx, 1, S2D_00_STRID2D_X, "double", OPS_WRITE), ops_arg_dat(xx, 1, S2D_00_STRID2D_X, "int", OPS_READ), ops_arg_dat(vertexdx, 1, S2D_00_STRID2D_X, "double", OPS_WRITE)); ops_par_loop_initialise_chunk_kernel_y( "initialise_chunk_kernel_y", tea_grid, 2, rangey2, ops_arg_dat(vertexy, 1, S2D_00_STRID2D_Y, "double", OPS_WRITE), ops_arg_dat(yy, 1, S2D_00_STRID2D_Y, "int", OPS_READ), ops_arg_dat(vertexdy, 1, S2D_00_STRID2D_Y, "double", OPS_WRITE)); rangex[0] = x_min - 2; rangex[1] = x_max + 2; rangex[2] = y_min - 2; rangex[3] = y_max + 2; ops_par_loop_initialise_chunk_kernel_cellx( "initialise_chunk_kernel_cellx", tea_grid, 2, rangex, ops_arg_dat(vertexx, 1, S2D_00_P10_STRID2D_X, "double", OPS_READ), ops_arg_dat(cellx, 1, S2D_00_STRID2D_X, "double", OPS_WRITE), ops_arg_dat(celldx, 1, S2D_00_STRID2D_X, "double", OPS_WRITE)); int rangey[] = {x_min - 2, x_max + 3, y_min - 2, y_max + 2}; ops_par_loop_initialise_chunk_kernel_celly( "initialise_chunk_kernel_celly", tea_grid, 2, rangey, ops_arg_dat(vertexy, 1, S2D_00_0P1_STRID2D_Y, "double", OPS_READ), ops_arg_dat(celly, 1, S2D_00_STRID2D_Y, "double", OPS_WRITE), ops_arg_dat(celldy, 1, S2D_00_STRID2D_Y, "double", OPS_WRITE)); int rangexy[] = {x_min - 2, x_max + 2, y_min - 2, y_max + 2}; ops_par_loop_initialise_chunk_kernel_volume( "initialise_chunk_kernel_volume", tea_grid, 2, rangexy, ops_arg_dat(volume, 1, S2D_00, "double", OPS_WRITE), ops_arg_dat(celldy, 1, S2D_00_STRID2D_Y, "double", OPS_READ), ops_arg_dat(xarea, 1, S2D_00, "double", OPS_WRITE), ops_arg_dat(celldx, 1, S2D_00_STRID2D_X, "double", OPS_READ), ops_arg_dat(yarea, 1, S2D_00, "double", OPS_WRITE)); }
void field_summary() { double qa_diff; //initialize sizes using global values int x_min = field.x_min; int x_max = field.x_max; int y_min = field.y_min; int y_max = field.y_max; int rangexy_inner[] = {x_min,x_max,y_min,y_max}; // inner range without border double vol= 0.0 , mass = 0.0, ie = 0.0, temp = 0.0; ops_par_loop(field_summary_kernel, "field_summary_kernel", tea_grid, 2, rangexy_inner, ops_arg_dat(volume, 1, S2D_00, "double", OPS_READ), ops_arg_dat(density, 1, S2D_00, "double", OPS_READ), ops_arg_dat(energy1, 1, S2D_00, "double", OPS_READ), ops_arg_dat(u, 1, S2D_00, "double", OPS_READ), ops_arg_reduce(red_vol, 1, "double", OPS_INC), ops_arg_reduce(red_mass, 1, "double", OPS_INC), ops_arg_reduce(red_ie, 1, "double", OPS_INC), ops_arg_reduce(red_temp, 1, "double", OPS_INC)); //printf("mass = %lf\n",mass); ops_reduction_result(red_vol,&vol); ops_reduction_result(red_mass,&mass); ops_reduction_result(red_ie,&ie); ops_reduction_result(red_temp,&temp); ops_fprintf(g_out,"\n"); ops_fprintf(g_out,"\n Time %lf\n",clover_time); ops_fprintf(g_out," %-10s %-10s %-15s %-10s %-s\n", " Volume"," Mass"," Density"," Internal Energy","Temperature"); ops_fprintf(g_out," step: %3d %-10.3E %-10.3E %-15.3E %-10.3E %-.3E", step, vol, mass, mass/vol, ie, temp); if(complete == 1) { if(test_problem>0) { if (test_problem == 1) qa_diff = fabs((100.0 * (temp / 157.55084183279294)) - 100.0); if (test_problem == 2) // tea_bm_short.in qa_diff = fabs((100.0 * (temp / 106.27221178646569)) - 100.0); if (test_problem == 3) qa_diff = fabs((100.0 * (temp / 99.955877498324000)) - 100.0); if (test_problem == 4) qa_diff = fabs((100.0 * (temp / 97.277332050749976)) - 100.0); if (test_problem == 5) qa_diff = fabs((100.0 * (temp / 95.462351583362249)) - 100.0); ops_printf("Test problem %3d is within %-10.7E%% of the expected solution\n",test_problem, qa_diff); ops_fprintf(g_out,"\nTest problem %3d is within %10.7E%% of the expected solution\n",test_problem, qa_diff); if(qa_diff < 0.001) { ops_printf(" This test is considered PASSED\n"); ops_fprintf(g_out," This test is considered PASSED\n"); } else { ops_printf(" This test is considered FAILED\n"); ops_fprintf(g_out," This test is considered FAILED\n"); } } } fflush(g_out); //ops_exit();//exit for now //exit(0); }
int main(int argc, const char **argv) { c0 = 0.500000000000000; rc0 = 1.0 / 280.0; rc1 = 4.0 / 105.0; rc2 = 1.0 / 5.0; rc3 = 4.0 / 5.0; nx0 = 1000; deltai0 = 0.00100000000000000; deltat = 0.000400000000000000; rkold[0] = 1.0 / 4.0; rkold[1] = 3.0 / 20.0; rkold[2] = 3.0 / 5.0; rknew[0] = 2.0 / 3.0; rknew[1] = 5.0 / 12.0; rknew[2] = 3.0 / 5.0; ops_init(argc, argv, 1); ops_init_backend(); ops_decl_const2("c0", 1, "double", &c0); ops_decl_const2("rc0", 1, "double", &rc0); ops_decl_const2("rc1", 1, "double", &rc1); ops_decl_const2("rc2", 1, "double", &rc2); ops_decl_const2("rc3", 1, "double", &rc3); ops_decl_const2("nx0", 1, "int", &nx0); ops_decl_const2("deltai0", 1, "double", &deltai0); ops_decl_const2("deltat", 1, "double", &deltat); ops_block complex_numbers_block; complex_numbers_block = ops_decl_block(1, "complex_numbers_block"); ops_dat phi; ops_dat phi_old; ops_dat wk0; ops_dat wk1; int halo_p[] = {4}; int halo_m[] = {-4}; int size[] = {nx0}; int base[] = {0}; double *val = NULL; phi = ops_decl_dat(complex_numbers_block, 1, size, base, halo_m, halo_p, val, "double", "phi"); phi_old = ops_decl_dat(complex_numbers_block, 1, size, base, halo_m, halo_p, val, "double", "phi_old"); wk0 = ops_decl_dat(complex_numbers_block, 1, size, base, halo_m, halo_p, val, "double", "wk0"); wk1 = ops_decl_dat(complex_numbers_block, 1, size, base, halo_m, halo_p, val, "double", "wk1"); int stencil1_temp[] = {0}; ops_stencil stencil1 = ops_decl_stencil(1, 1, stencil1_temp, "0"); int stencil0_temp[] = {-4, -3, -2, -1, 1, 2, 3, 4}; ops_stencil stencil0 = ops_decl_stencil(1, 8, stencil0_temp, "-4,-3,-2,-1,1,2,3,4"); ops_reduction real = ops_decl_reduction_handle(sizeof(double), "double", "reduction_real"); ops_reduction imaginary = ops_decl_reduction_handle(sizeof(double), "double", "reduction_imaginary"); ops_halo_group halo_exchange0; { int halo_iter[] = {4}; int from_base[] = {0}; int to_base[] = {nx0}; int dir[] = {1}; ops_halo halo0 = ops_decl_halo(phi, phi, halo_iter, from_base, to_base, dir, dir); ops_halo grp[] = {halo0}; halo_exchange0 = ops_decl_halo_group(1, grp); } ops_halo_group halo_exchange1; { int halo_iter[] = {4}; int from_base[] = {nx0 - 4}; int to_base[] = {-4}; int dir[] = {1}; ops_halo halo0 = ops_decl_halo(phi, phi, halo_iter, from_base, to_base, dir, dir); ops_halo grp[] = {halo0}; halo_exchange1 = ops_decl_halo_group(1, grp); } ops_partition(""); int iter_range5[] = {-4, nx0 + 4}; ops_par_loop_complex_numbers_block0_5_kernel( "Initialisation", complex_numbers_block, 1, iter_range5, ops_arg_dat(phi, 1, stencil1, "double", OPS_WRITE), ops_arg_idx()); ops_halo_transfer(halo_exchange0); ops_halo_transfer(halo_exchange1); double cpu_start, elapsed_start; ops_timers(&cpu_start, &elapsed_start); for (int iteration = 0; iteration < 1; iteration++) { int iter_range4[] = {-4, nx0 + 4}; ops_par_loop_complex_numbers_block0_4_kernel( "Save equations", complex_numbers_block, 1, iter_range4, ops_arg_dat(phi, 1, stencil1, "double", OPS_READ), ops_arg_dat(phi_old, 1, stencil1, "double", OPS_WRITE)); for (int stage = 0; stage < 3; stage++) { int iter_range0[] = {0, nx0}; ops_par_loop_complex_numbers_block0_0_kernel( "D(phi[x0 t] x0)", complex_numbers_block, 1, iter_range0, ops_arg_dat(phi, 1, stencil0, "double", OPS_READ), ops_arg_dat(wk0, 1, stencil1, "double", OPS_WRITE)); int iter_range1[] = {0, nx0}; ops_par_loop_complex_numbers_block0_1_kernel( "Residual of equation", complex_numbers_block, 1, iter_range1, ops_arg_dat(wk0, 1, stencil1, "double", OPS_READ), ops_arg_dat(wk1, 1, stencil1, "double", OPS_WRITE)); int iter_range2[] = {-4, nx0 + 4}; ops_par_loop_complex_numbers_block0_2_kernel( "RK new (subloop) update", complex_numbers_block, 1, iter_range2, ops_arg_dat(phi_old, 1, stencil1, "double", OPS_READ), ops_arg_dat(wk1, 1, stencil1, "double", OPS_READ), ops_arg_dat(phi, 1, stencil1, "double", OPS_WRITE), ops_arg_gbl(&rknew[stage], 1, "double", OPS_READ)); int iter_range3[] = {-4, nx0 + 4}; ops_par_loop_complex_numbers_block0_3_kernel( "RK old update", complex_numbers_block, 1, iter_range3, ops_arg_dat(wk1, 1, stencil1, "double", OPS_READ), ops_arg_dat(phi_old, 1, stencil1, "double", OPS_RW), ops_arg_gbl(&rkold[stage], 1, "double", OPS_READ)); ops_halo_transfer(halo_exchange0); ops_halo_transfer(halo_exchange1); } int iter_range0[] = {0, nx0}; ops_par_loop_complex_numbers_block0_cn_kernel( "Complex numbers", complex_numbers_block, 1, iter_range0, ops_arg_dat(phi, 1, stencil0, "double", OPS_READ), ops_arg_reduce(real, 1, "double", OPS_INC), ops_arg_reduce(imaginary, 1, "double", OPS_INC)); } double cpu_end, elapsed_end; ops_timers(&cpu_end, &elapsed_end); ops_printf("\nTimings are:\n"); ops_printf("-----------------------------------------\n"); ops_printf("Total Wall time %lf\n", elapsed_end - elapsed_start); ops_fetch_block_hdf5_file(complex_numbers_block, "complex_numbers_2500.h5"); ops_fetch_dat_hdf5_file(phi, "complex_numbers_2500.h5"); ops_exit(); }
void generate() { int x_min = field.x_min; int x_max = field.x_max; int y_min = field.y_min; int y_max = field.y_max; int z_min = field.z_min; int z_max = field.z_max; int rangexyz[] = {x_min-2,x_max+2,y_min-2,y_max+2,z_min-2,z_max+2}; ops_par_loop_generate_chunk_kernel("generate_chunk_kernel", clover_grid, 3, rangexyz, ops_arg_dat(vertexx, S3D_000_P100_STRID3D_X, "double", OPS_READ), ops_arg_dat(vertexy, S3D_000_0P10_STRID3D_Y, "double", OPS_READ), ops_arg_dat(vertexz, S3D_000_00P1_STRID3D_Z, "double", OPS_READ), ops_arg_dat(energy0, S3D_000, "double", OPS_WRITE), ops_arg_dat(density0, S3D_000, "double", OPS_WRITE), ops_arg_dat(xvel0, S3D_000_fP1P1P1, "double", OPS_WRITE), ops_arg_dat(yvel0, S3D_000_fP1P1P1, "double", OPS_WRITE), ops_arg_dat(zvel0, S3D_000_fP1P1P1, "double", OPS_WRITE), ops_arg_dat(cellx, S3D_000_STRID3D_X, "double", OPS_READ), ops_arg_dat(celly, S3D_000_STRID3D_Y, "double", OPS_READ), ops_arg_dat(cellz, S3D_000_STRID3D_Z, "double", OPS_READ)); }
void tea_leaf_cheby_init( ops_dat u, ops_dat u0, ops_dat p, ops_dat r, ops_dat Mi, ops_dat w, ops_dat z, ops_dat Kx, ops_dat Ky, ops_dat cp, ops_dat bfp, double rx, double ry, double theta, int preconditioner_type) { //initialize sizes using global values int x_min = field.x_min; int x_max = field.x_max; int y_min = field.y_min; int y_max = field.y_max; int rangexy[] = {x_min,x_max,y_min,y_max}; ops_par_loop(tea_leaf_cheby_init_kernel, "tea_leaf_cheby_init_kernel", tea_grid, 2, rangexy, ops_arg_dat(w, 1, S2D_00, "double", OPS_WRITE), ops_arg_dat(r, 1, S2D_00, "double", OPS_WRITE), ops_arg_dat(Kx, 1, S2D_00_P10, "double", OPS_READ), ops_arg_dat(Ky, 1, S2D_00_0P1, "double", OPS_READ), ops_arg_dat(u, 1, S2D_00_0M1_M10_P10_0P1, "double", OPS_READ), ops_arg_dat(u0, 1, S2D_00, "double", OPS_READ), ops_arg_gbl(&rx, 1, "double", OPS_READ), ops_arg_gbl(&ry, 1, "double", OPS_READ)); if (preconditioner_type != TL_PREC_NONE) { if (preconditioner_type == TL_PREC_JAC_BLOCK) tea_block_solve(r, z, cp, bfp, Kx, Ky, rx, ry); else if (preconditioner_type == TL_PREC_JAC_DIAG) tea_diag_solve(r, z, Mi, Kx, Ky, rx, ry); ops_par_loop(tea_leaf_recip3_kernel, "tea_leaf_recip3_kernel", tea_grid, 2, rangexy, ops_arg_dat(p, 1, S2D_00, "double", OPS_WRITE), ops_arg_dat(z, 1, S2D_00, "double", OPS_READ), ops_arg_gbl(&theta, 1, "double", OPS_READ)); } else { ops_par_loop(tea_leaf_recip3_kernel, "tea_leaf_recip3_kernel", tea_grid, 2, rangexy, ops_arg_dat(p, 1, S2D_00, "double", OPS_WRITE), ops_arg_dat(r, 1, S2D_00, "double", OPS_READ), ops_arg_gbl(&theta, 1, "double", OPS_READ)); } double one = 1.0; ops_par_loop(tea_leaf_xpy_kernel, "tea_leaf_xpy_kernel", tea_grid, 2, rangexy, ops_arg_dat(u, 1, S2D_00, "double", OPS_INC), ops_arg_dat(p, 1, S2D_00, "double", OPS_READ)); }
void accelerate() { error_condition = 0; // Not used yet due to issue with OpenA reduction //initialize sizes using global values int x_min = field.x_min; int x_max = field.x_max; int y_min = field.y_min; int y_max = field.y_max; int rangexy_inner_plus1[] = {x_min,x_max+1,y_min,y_max+1}; // inner range plus 1 ops_par_loop(accelerate_kernel, "accelerate_kernel", clover_grid, 2, rangexy_inner_plus1, ops_arg_dat(density0, 1, S2D_00_M10_0M1_M1M1, "double", OPS_READ), ops_arg_dat(volume, 1, S2D_00_M10_0M1_M1M1, "double", OPS_READ), ops_arg_dat(work_array1, 1, S2D_00, "double", OPS_WRITE), ops_arg_dat(xvel0, 1, S2D_00, "double", OPS_READ), ops_arg_dat(xvel1, 1, S2D_00, "double", OPS_INC), ops_arg_dat(xarea, 1, S2D_00_0M1, "double", OPS_READ), ops_arg_dat(pressure, 1, S2D_00_M10_0M1_M1M1, "double", OPS_READ), ops_arg_dat(yvel0, 1, S2D_00, "double", OPS_READ), ops_arg_dat(yvel1, 1, S2D_00, "double", OPS_INC), ops_arg_dat(yarea, 1, S2D_00_M10, "double", OPS_READ), ops_arg_dat(viscosity, 1, S2D_00_M10_0M1_M1M1, "double", OPS_READ)); }
void tea_leaf_cheby_iterate( ops_dat u, ops_dat u0, ops_dat p, ops_dat r, ops_dat Mi, ops_dat w, ops_dat z, ops_dat Kx, ops_dat Ky, ops_dat cp, ops_dat bfp, double *ch_alphas, double *ch_betas, double rx, double ry, int step, int preconditioner_type) { int x_min = field.x_min; int x_max = field.x_max; int y_min = field.y_min; int y_max = field.y_max; int rangexy[] = {x_min,x_max,y_min,y_max}; ops_par_loop(tea_leaf_cheby_init_kernel, "tea_leaf_cheby_init_kernel", tea_grid, 2, rangexy, ops_arg_dat(w, 1, S2D_00, "double", OPS_WRITE), ops_arg_dat(r, 1, S2D_00, "double", OPS_WRITE), ops_arg_dat(Kx, 1, S2D_00_P10, "double", OPS_READ), ops_arg_dat(Ky, 1, S2D_00_0P1, "double", OPS_READ), ops_arg_dat(u, 1, S2D_00_0M1_M10_P10_0P1, "double", OPS_READ), ops_arg_dat(u0, 1, S2D_00, "double", OPS_READ), ops_arg_gbl(&rx, 1, "double", OPS_READ), ops_arg_gbl(&ry, 1, "double", OPS_READ)); if (preconditioner_type != TL_PREC_NONE) { if (preconditioner_type == TL_PREC_JAC_BLOCK) tea_block_solve(r, z, cp, bfp, Kx, Ky, rx, ry); else if (preconditioner_type == TL_PREC_JAC_DIAG) tea_diag_solve(r, z, Mi, Kx, Ky, rx, ry); ops_par_loop(tea_leaf_axpby_kernel, "tea_leaf_axpby_kernel", tea_grid, 2, rangexy, ops_arg_dat(p, 1, S2D_00, "double", OPS_RW), ops_arg_dat(z, 1, S2D_00, "double", OPS_READ), ops_arg_gbl(&ch_alphas[step], 1, "double", OPS_READ), ops_arg_gbl(&ch_betas[step], 1, "double", OPS_READ)); } else { ops_par_loop(tea_leaf_axpby_kernel, "tea_leaf_axpby_kernel", tea_grid, 2, rangexy, ops_arg_dat(p, 1, S2D_00, "double", OPS_RW), ops_arg_dat(r, 1, S2D_00, "double", OPS_READ), ops_arg_gbl(&ch_alphas[step], 1, "double", OPS_READ), ops_arg_gbl(&ch_betas[step], 1, "double", OPS_READ)); } ops_par_loop(tea_leaf_xpy_kernel, "tea_leaf_xpy_kernel", tea_grid, 2, rangexy, ops_arg_dat(u, 1, S2D_00, "double", OPS_INC), ops_arg_dat(p, 1, S2D_00, "double", OPS_READ)); }
void calc_dt(double* local_dt, char* local_control, double* xl_pos, double* yl_pos, int* jldt, int* kldt) { int small; double jk_control = 1.1; small = 0; int dtl_control; //initialize sizes using global values int x_min = field.x_min; int x_max = field.x_max; int y_min = field.y_min; int y_max = field.y_max; int rangexy_inner[] = {x_min,x_max,y_min,y_max}; // inner range without border ops_par_loop(calc_dt_kernel, "calc_dt_kernel", clover_grid, 2, rangexy_inner, ops_arg_dat(celldx, 1, S2D_00_P10_STRID2D_X, "double", OPS_READ), ops_arg_dat(celldy, 1, S2D_00_0P1_STRID2D_Y, "double", OPS_READ), ops_arg_dat(soundspeed, 1, S2D_00, "double", OPS_READ), ops_arg_dat(viscosity, 1, S2D_00, "double", OPS_READ), ops_arg_dat(density0, 1, S2D_00, "double", OPS_READ), ops_arg_dat(xvel0, 1, S2D_00_P10_0P1_P1P1, "double", OPS_READ), ops_arg_dat(xarea, 1, S2D_00_P10, "double", OPS_READ), ops_arg_dat(volume, 1, S2D_00, "double", OPS_READ), ops_arg_dat(yvel0, 1, S2D_00_P10_0P1_P1P1, "double", OPS_READ), ops_arg_dat(yarea, 1, S2D_00_0P1, "double", OPS_READ), ops_arg_dat(work_array1, 1, S2D_00, "double", OPS_WRITE) ); ops_par_loop(calc_dt_kernel_min, "calc_dt_kernel_min", clover_grid, 2, rangexy_inner, ops_arg_dat(work_array1, 1, S2D_00, "double", OPS_READ), ops_arg_reduce(red_local_dt, 1, "double", OPS_MIN)); //printf("*local_dt = %lf\n",*local_dt); //Extract the mimimum timestep information dtl_control = 10.01 * (jk_control - (int)(jk_control)); jk_control = jk_control - (jk_control - (int)(jk_control)); //*jldt = ((int)jk_control)%x_max; //*kldt = 1 + (jk_control/x_max); *jldt = ((int)jk_control)%(x_max-2); *kldt = 1 + (jk_control/(x_max-2)); int rangexy_getpoint[] = {*jldt-1+2,*jldt+2,*kldt-1+2,*kldt+2}; // get point value //note +2 added due to boundary //int rangexy_getpointx[] = {*jldt-1+2,*jldt+2,y_min-2,y_max+2}; // get point value //note +2 added due to boundary //int rangexy_getpointy[] = {x_min-2,x_max+2,*kldt-1+2,*kldt+2}; // get point value //note +2 added due to boundary ops_par_loop(calc_dt_kernel_get, "calc_dt_kernel_getx", clover_grid, 2, rangexy_getpoint, ops_arg_dat(cellx, 1, S2D_00_STRID2D_X, "double", OPS_READ), ops_arg_dat(celly, 1, S2D_00_STRID2D_Y, "double", OPS_READ), ops_arg_reduce(red_xl_pos, 1, "double", OPS_INC), ops_arg_reduce(red_yl_pos, 1, "double", OPS_INC)); ops_reduction_result(red_local_dt, local_dt); ops_reduction_result(red_xl_pos, xl_pos); ops_reduction_result(red_yl_pos, yl_pos); *local_dt = MIN(*local_dt, g_big); if(*local_dt < dtmin) small = 1; if(small != 0) { ops_printf("Timestep information:\n"); ops_printf("j, k : %d, %d\n",*jldt,*kldt); ops_printf("x, y : %lf, %lf\n",*xl_pos,*xl_pos); ops_printf("timestep : %lf\n",*local_dt); double output[12] = {0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0}; ops_par_loop(calc_dt_kernel_print, "calc_dt_kernel_print", clover_grid, 2,rangexy_getpoint, ops_arg_dat(xvel0, 1, S2D_10_M10_01_0M1, "double", OPS_READ), ops_arg_dat(yvel0, 1, S2D_10_M10_01_0M1, "double", OPS_READ), ops_arg_dat(density0, 1, S2D_00, "double", OPS_READ), ops_arg_dat(energy0, 1, S2D_00, "double", OPS_READ), ops_arg_dat(pressure, 1, S2D_00, "double", OPS_READ), ops_arg_dat(soundspeed, 1, S2D_00, "double", OPS_READ), ops_arg_reduce(red_output, 12, "double", OPS_INC)); ops_reduction_result(red_output, output); ops_printf("Cell velocities:\n"); ops_printf("%E, %E \n",output[0],output[1]); //xvel0(jldt ,kldt ),yvel0(jldt ,kldt ) ops_printf("%E, %E \n",output[2],output[3]); //xvel0(jldt+1,kldt ),yvel0(jldt+1,kldt ) ops_printf("%E, %E \n",output[4],output[5]); //xvel0(jldt+1,kldt+1),yvel0(jldt+1,kldt+1) ops_printf("%E, %E \n",output[6],output[7]); //xvel0(jldt ,kldt+1),yvel0(jldt ,kldt+1) ops_printf("density, energy, pressure, soundspeed = %lf, %lf, %lf, %lf \n", output[8], output[9], output[10], output[11]); } if(dtl_control == 1) sprintf(local_control, "sound"); if(dtl_control == 2) sprintf(local_control, "xvel"); if(dtl_control == 3) sprintf(local_control, "yvel"); if(dtl_control == 4) sprintf(local_control, "div"); }
void advec_cell(int sweep_number, int dir) { //initialize sizes using global values int x_min = field.x_min; int x_max = field.x_max; int y_min = field.y_min; int y_max = field.y_max; int z_min = field.z_min; int z_max = field.z_max; int rangexyz[] = {x_min-2,x_max+2,y_min-2,y_max+2,z_min-2,z_max+2}; // full range over grid int rangexyz_inner[] = {x_min,x_max,y_min,y_max,z_min,z_max}; // inner range without border int rangexyz_inner_plus2x[] = {x_min,x_max+2,y_min,y_max,z_min,z_max}; // inner range with +2 in x int rangexyz_inner_plus2yz[] = {x_min,x_max,y_min,y_max+2,z_min,z_max+2}; // inner range with +2 in y and z int rangexyz_inner_plus2z[] = {x_min,x_max,y_min,y_max,z_min,z_max+2}; // inner range with +2 in z //printf("direction: %d sweep_number %d \n", dir, sweep_number); if(dir == g_xdir) { if(sweep_number == 1) { ops_par_loop(advec_cell_kernel1_xdir, "advec_cell_kernel1_xdir", clover_grid, 3, rangexyz, ops_arg_dat(work_array1, 1, S3D_000, "double", OPS_WRITE), ops_arg_dat(work_array2, 1, S3D_000, "double", OPS_WRITE), ops_arg_dat(volume, 1, S3D_000, "double", OPS_READ), ops_arg_dat(vol_flux_x, 1, S3D_000_P100, "double", OPS_READ), ops_arg_dat(vol_flux_y, 1, S3D_000_0P10, "double", OPS_READ), ops_arg_dat(vol_flux_z, 1, S3D_000_00P1, "double", OPS_READ)); } else if (sweep_number == 3) { ops_par_loop(advec_cell_kernel2_xdir, "advec_cell_kernel2_xdir", clover_grid, 3, rangexyz, ops_arg_dat(work_array1, 1, S3D_000, "double", OPS_WRITE), ops_arg_dat(work_array2, 1, S3D_000, "double", OPS_WRITE), ops_arg_dat(volume, 1, S3D_000, "double", OPS_READ), ops_arg_dat(vol_flux_x, 1, S3D_000_P100, "double", OPS_READ)); } ops_par_loop(advec_cell_kernel3_xdir, "advec_cell_kernel3_xdir", clover_grid, 3, rangexyz_inner_plus2x, ops_arg_dat(vol_flux_x, 1, S3D_000, "double", OPS_READ), ops_arg_dat(work_array1, 1, S3D_000_M100, "double", OPS_READ), ops_arg_dat(xx, 1, S3D_000_P100_STRID3D_X, "int", OPS_READ), ops_arg_dat(vertexdx, 1, S3D_000_P100_M100_STRID3D_X, "double", OPS_READ), ops_arg_dat(density1, 1, S3D_000_P100_M100_M200, "double", OPS_READ), ops_arg_dat(energy1, 1, S3D_000_P100_M100_M200, "double", OPS_READ), ops_arg_dat(mass_flux_x, 1, S3D_000, "double", OPS_WRITE), ops_arg_dat(work_array7, 1, S3D_000, "double", OPS_WRITE)); ops_par_loop(advec_cell_kernel4_xdir, "advec_cell_kernel4_xdir", clover_grid, 3, rangexyz_inner, ops_arg_dat(density1, 1, S3D_000, "double", OPS_RW), ops_arg_dat(energy1, 1, S3D_000, "double", OPS_RW), ops_arg_dat(mass_flux_x, 1, S3D_000_P100, "double", OPS_READ), ops_arg_dat(vol_flux_x, 1, S3D_000_P100, "double", OPS_READ), ops_arg_dat(work_array1, 1, S3D_000, "double", OPS_READ), ops_arg_dat(work_array2, 1, S3D_000, "double", OPS_READ), ops_arg_dat(work_array3, 1, S3D_000, "double", OPS_WRITE), ops_arg_dat(work_array4, 1, S3D_000, "double", OPS_WRITE), ops_arg_dat(work_array5, 1, S3D_000, "double", OPS_WRITE), ops_arg_dat(work_array6, 1, S3D_000, "double", OPS_WRITE), ops_arg_dat(work_array7, 1, S3D_000_P100, "double", OPS_READ)); } else if(dir == g_ydir) { if(sweep_number == 2) { if (advect_x) { ops_par_loop(advec_cell_kernel1_ydir, "advec_cell_kernel1_ydir", clover_grid, 3, rangexyz, ops_arg_dat(work_array1, 1, S3D_000, "double", OPS_WRITE), ops_arg_dat(work_array2, 1, S3D_000, "double", OPS_WRITE), ops_arg_dat(volume, 1, S3D_000, "double", OPS_READ), ops_arg_dat(vol_flux_z, 1, S3D_000_00P1, "double", OPS_READ), ops_arg_dat(vol_flux_y, 1, S3D_000_0P10, "double", OPS_READ)); } else { ops_par_loop(advec_cell_kernel2_ydir, "advec_cell_kernel2_ydir", clover_grid, 3, rangexyz, ops_arg_dat(work_array1, 1, S3D_000, "double", OPS_WRITE), ops_arg_dat(work_array2, 1, S3D_000, "double", OPS_WRITE), ops_arg_dat(volume, 1, S3D_000, "double", OPS_READ), ops_arg_dat(vol_flux_y, 1, S3D_000_0P10, "double", OPS_READ), ops_arg_dat(vol_flux_x, 1, S3D_000_P100, "double", OPS_READ)); } } ops_par_loop(advec_cell_kernel3_ydir, "advec_cell_kernel3_ydir", clover_grid, 3, rangexyz_inner_plus2yz, ops_arg_dat(vol_flux_y, 1, S3D_000, "double", OPS_READ), ops_arg_dat(work_array1, 1, S3D_000_0M10, "double", OPS_READ), ops_arg_dat(yy, 1, S3D_000_0P10_STRID3D_Y, "int", OPS_READ), ops_arg_dat(vertexdy, 1, S3D_000_0P10_0M10_STRID3D_Y, "double", OPS_READ), ops_arg_dat(density1, 1, S3D_000_0P10_0M10_0M20, "double", OPS_READ), ops_arg_dat(energy1, 1, S3D_000_0P10_0M10_0M20, "double", OPS_READ), ops_arg_dat(mass_flux_y, 1, S3D_000, "double", OPS_WRITE), ops_arg_dat(work_array7, 1, S3D_000, "double", OPS_WRITE)); ops_par_loop(advec_cell_kernel4_ydir, "advec_cell_kernel4_ydir", clover_grid, 3, rangexyz_inner, ops_arg_dat(density1, 1, S3D_000, "double", OPS_RW), ops_arg_dat(energy1, 1, S3D_000, "double", OPS_RW), ops_arg_dat(mass_flux_y, 1, S3D_000_0P10, "double", OPS_READ), ops_arg_dat(vol_flux_y, 1, S3D_000_0P10, "double", OPS_READ), ops_arg_dat(work_array1, 1, S3D_000, "double", OPS_READ), ops_arg_dat(work_array2, 1, S3D_000, "double", OPS_READ), ops_arg_dat(work_array3, 1, S3D_000, "double", OPS_WRITE), ops_arg_dat(work_array4, 1, S3D_000, "double", OPS_WRITE), ops_arg_dat(work_array5, 1, S3D_000, "double", OPS_WRITE), ops_arg_dat(work_array6, 1, S3D_000, "double", OPS_WRITE), ops_arg_dat(work_array7, 1, S3D_000_0P10, "double", OPS_READ)); } else if(dir == g_zdir) { if(sweep_number == 1) { ops_par_loop(advec_cell_kernel1_zdir, "advec_cell_kernel1_zdir", clover_grid, 3, rangexyz, ops_arg_dat(work_array1, 1, S3D_000, "double", OPS_WRITE), ops_arg_dat(work_array2, 1, S3D_000, "double", OPS_WRITE), ops_arg_dat(volume, 1, S3D_000, "double", OPS_READ), ops_arg_dat(vol_flux_x, 1, S3D_000_P100, "double", OPS_READ), ops_arg_dat(vol_flux_y, 1, S3D_000_0P10, "double", OPS_READ), ops_arg_dat(vol_flux_z, 1, S3D_000_00P1, "double", OPS_READ)); } else if (sweep_number == 3) { ops_par_loop(advec_cell_kernel2_zdir, "advec_cell_kernel2_zdir", clover_grid, 3, rangexyz, ops_arg_dat(work_array1, 1, S3D_000, "double", OPS_WRITE), ops_arg_dat(work_array2, 1, S3D_000, "double", OPS_WRITE), ops_arg_dat(volume, 1, S3D_000, "double", OPS_READ), ops_arg_dat(vol_flux_z, 1, S3D_000_00P1, "double", OPS_READ)); } ops_par_loop(advec_cell_kernel3_zdir, "advec_cell_kernel3_zdir", clover_grid, 3, rangexyz_inner_plus2z, ops_arg_dat(vol_flux_z, 1, S3D_000, "double", OPS_READ), ops_arg_dat(work_array1, 1, S3D_000_00M1, "double", OPS_READ), ops_arg_dat(zz, 1, S3D_000_00P1_STRID3D_Z, "int", OPS_READ), ops_arg_dat(vertexdz, 1, S3D_000_00P1_00M1_STRID3D_Z, "double", OPS_READ), ops_arg_dat(density1, 1, S3D_000_00P1_00M1_00M2, "double", OPS_READ), ops_arg_dat(energy1, 1, S3D_000_00P1_00M1_00M2, "double", OPS_READ), ops_arg_dat(mass_flux_z, 1, S3D_000, "double", OPS_WRITE), ops_arg_dat(work_array7, 1, S3D_000, "double", OPS_WRITE)); ops_par_loop(advec_cell_kernel4_zdir, "advec_cell_kernel4_zdir", clover_grid, 3, rangexyz_inner, ops_arg_dat(density1, 1, S3D_000, "double", OPS_RW), ops_arg_dat(energy1, 1, S3D_000, "double", OPS_RW), ops_arg_dat(mass_flux_z, 1, S3D_000_00P1, "double", OPS_READ), ops_arg_dat(vol_flux_z, 1, S3D_000_00P1, "double", OPS_READ), ops_arg_dat(work_array1, 1, S3D_000, "double", OPS_READ), ops_arg_dat(work_array2, 1, S3D_000, "double", OPS_READ), ops_arg_dat(work_array3, 1, S3D_000, "double", OPS_WRITE), ops_arg_dat(work_array4, 1, S3D_000, "double", OPS_WRITE), ops_arg_dat(work_array5, 1, S3D_000, "double", OPS_WRITE), ops_arg_dat(work_array6, 1, S3D_000, "double", OPS_WRITE), ops_arg_dat(work_array7, 1, S3D_000_00P1, "double", OPS_READ)); } }
int main(int argc, char **argv) { /**-------------------------- Initialisation --------------------------**/ // OPS initialisation ops_init(argc,argv,6); int logical_size_x = 200; int logical_size_y = 200; int ngrid_x = 1; int ngrid_y = 1; int n_iter = 10000; dx = 0.01; dy = 0.01; ops_decl_const("dx",1,"double",&dx); ops_decl_const("dy",1,"double",&dy); //declare blocks ops_block *blocks = (ops_block *)malloc(ngrid_x*ngrid_y*sizeof(ops_block*)); char buf[50]; for (int j = 0; j < ngrid_y; j++) { for (int i = 0; i < ngrid_x; i++) { sprintf(buf,"block %d,%d",i,j); blocks[i+ngrid_x*j] = ops_decl_block(2,buf); } } //declare stencils int s2D_00[] = {0,0}; ops_stencil S2D_00 = ops_decl_stencil( 2, 1, s2D_00, "00"); int s2D_00_P10_M10_0P1_0M1[] = {0,0, 1,0, -1,0, 0,1, 0,-1}; ops_stencil S2D_00_P10_M10_0P1_0M1 = ops_decl_stencil( 2, 5, s2D_00_P10_M10_0P1_0M1, "00:10:-10:01:0-1"); ops_reduction red_err = ops_decl_reduction_handle(sizeof(double), "double", "err"); //declare datasets int d_p[2] = {1,1}; //max halo depths for the dat in the possitive direction int d_m[2] = {-1,-1}; //max halo depths for the dat in the negative direction int base[2] = {0,0}; int uniform_size[2] = {(logical_size_x-1)/ngrid_x+1,(logical_size_y-1)/ngrid_y+1}; double* temp = NULL; ops_dat *coordx = (ops_dat *)malloc(ngrid_x*ngrid_y*sizeof(ops_dat*)); ops_dat *coordy = (ops_dat *)malloc(ngrid_x*ngrid_y*sizeof(ops_dat*)); ops_dat *u = (ops_dat *)malloc(ngrid_x*ngrid_y*sizeof(ops_dat*)); ops_dat *u2 = (ops_dat *)malloc(ngrid_x*ngrid_y*sizeof(ops_dat*)); ops_dat *f = (ops_dat *)malloc(ngrid_x*ngrid_y*sizeof(ops_dat*)); ops_dat *ref = (ops_dat *)malloc(ngrid_x*ngrid_y*sizeof(ops_dat*)); int *sizes = (int*)malloc(2*ngrid_x*ngrid_y*sizeof(int)); int *disps = (int*)malloc(2*ngrid_x*ngrid_y*sizeof(int)); for (int j = 0; j < ngrid_y; j++) { for (int i = 0; i < ngrid_x; i++) { int size[2] = {uniform_size[0], uniform_size[1]}; if ((i+1)*size[0]>logical_size_x) size[0] = logical_size_x - i*size[0]; if ((j+1)*size[1]>logical_size_y) size[1] = logical_size_y - j*size[1]; sprintf(buf,"coordx %d,%d",i,j); coordx[i+ngrid_x*j] = ops_decl_dat(blocks[i+ngrid_x*j], 1, size, base, d_m, d_p, temp, "double", buf); sprintf(buf,"coordy %d,%d",i,j); coordy[i+ngrid_x*j] = ops_decl_dat(blocks[i+ngrid_x*j], 1, size, base, d_m, d_p, temp, "double", buf); sprintf(buf,"u %d,%d",i,j); u[i+ngrid_x*j] = ops_decl_dat(blocks[i+ngrid_x*j], 1, size, base, d_m, d_p, temp, "double", buf); sprintf(buf,"u2 %d,%d",i,j); u2[i+ngrid_x*j] = ops_decl_dat(blocks[i+ngrid_x*j], 1, size, base, d_m, d_p, temp, "double", buf); sprintf(buf,"f %d,%d",i,j); f[i+ngrid_x*j] = ops_decl_dat(blocks[i+ngrid_x*j], 1, size, base, d_m, d_p, temp, "double", buf); sprintf(buf,"ref %d,%d",i,j); ref[i+ngrid_x*j] = ops_decl_dat(blocks[i+ngrid_x*j], 1, size, base, d_m, d_p, temp, "double", buf); sizes[2*(i+ngrid_x*j)] = size[0]; sizes[2*(i+ngrid_x*j)+1] = size[1]; disps[2*(i+ngrid_x*j)] = i*uniform_size[0]; disps[2*(i+ngrid_x*j)+1] = j*uniform_size[1]; } } ops_halo *halos = (ops_halo *)malloc(2*(ngrid_x*(ngrid_y-1)+(ngrid_x-1)*ngrid_y)*sizeof(ops_halo *)); int off = 0; for (int j = 0; j < ngrid_y; j++) { for (int i = 0; i < ngrid_x; i++) { if (i > 0) { int halo_iter[] = {1,sizes[2*(i+ngrid_x*j)+1]}; int base_from[] = {sizes[2*(i-1+ngrid_x*j)]-1,0}; int base_to[] = {-1,0}; int dir[] = {1,2}; halos[off++] = ops_decl_halo(u[i-1+ngrid_x*j], u[i+ngrid_x*j], halo_iter, base_from, base_to, dir, dir); base_from[0] = 0; base_to[0] = sizes[2*(i+ngrid_x*j)]; halos[off++] = ops_decl_halo(u[i+ngrid_x*j], u[i-1+ngrid_x*j], halo_iter, base_from, base_to, dir, dir); } if (j > 0) { int halo_iter[] = {sizes[2*(i+ngrid_x*j)],1}; int base_from[] = {0,sizes[2*(i+ngrid_x*(j-1))+1]-1}; int base_to[] = {0,-1}; int dir[] = {1,2}; halos[off++] = ops_decl_halo(u[i+ngrid_x*(j-1)], u[i+ngrid_x*j], halo_iter, base_from, base_to, dir, dir); base_from[1] = 0; base_to[1] = sizes[2*(i+ngrid_x*j)+1]; halos[off++] = ops_decl_halo(u[i+ngrid_x*j], u[i+ngrid_x*(j-1)], halo_iter, base_from, base_to, dir, dir); } } } if (off != 2*(ngrid_x*(ngrid_y-1)+(ngrid_x-1)*ngrid_y)) printf("Something is not right\n"); ops_halo_group u_halos = ops_decl_halo_group(off,halos); ops_partition(""); ops_checkpointing_init("check.h5", 5.0); /**-------------------------- Computations --------------------------**/ double ct0, ct1, et0, et1; ops_timers_core(&ct0, &et0); //populate forcing, reference solution and boundary conditions for (int j = 0; j < ngrid_y; j++) { for (int i = 0; i < ngrid_x; i++) { int iter_range[] = {-1,sizes[2*(i+ngrid_x*j)]+1,-1,sizes[2*(i+ngrid_x*j)+1]+1}; ops_par_loop(poisson_kernel_populate, "poisson_kernel_populate", blocks[i+ngrid_x*j], 2, iter_range, ops_arg_gbl(&disps[2*(i+ngrid_x*j)], 1, "int", OPS_READ), ops_arg_gbl(&disps[2*(i+ngrid_x*j)+1], 1, "int", OPS_READ), ops_arg_idx(), ops_arg_dat(u[i+ngrid_x*j], S2D_00, "double", OPS_WRITE), ops_arg_dat(f[i+ngrid_x*j], S2D_00, "double", OPS_WRITE), ops_arg_dat(ref[i+ngrid_x*j], S2D_00, "double", OPS_WRITE)); } } //initial guess 0 for (int j = 0; j < ngrid_y; j++) { for (int i = 0; i < ngrid_x; i++) { int iter_range[] = {0,sizes[2*(i+ngrid_x*j)],0,sizes[2*(i+ngrid_x*j)+1]}; ops_par_loop(poisson_kernel_initialguess, "poisson_kernel_initialguess", blocks[i+ngrid_x*j], 2, iter_range, ops_arg_dat(u[i+ngrid_x*j], S2D_00, "double", OPS_WRITE)); } } for (int iter = 0; iter < n_iter; iter++) { ops_halo_transfer(u_halos); for (int j = 0; j < ngrid_y; j++) { for (int i = 0; i < ngrid_x; i++) { int iter_range[] = {0,sizes[2*(i+ngrid_x*j)],0,sizes[2*(i+ngrid_x*j)+1]}; ops_par_loop(poisson_kernel_stencil, "poisson_kernel_stencil", blocks[i+ngrid_x*j], 2, iter_range, ops_arg_dat(u[i+ngrid_x*j], S2D_00_P10_M10_0P1_0M1, "double", OPS_READ), ops_arg_dat(f[i+ngrid_x*j], S2D_00, "double", OPS_READ), ops_arg_dat(u2[i+ngrid_x*j], S2D_00, "double", OPS_WRITE)); } } for (int j = 0; j < ngrid_y; j++) { for (int i = 0; i < ngrid_x; i++) { int iter_range[] = {0,sizes[2*(i+ngrid_x*j)],0,sizes[2*(i+ngrid_x*j)+1]}; ops_par_loop(poisson_kernel_update, "poisson_kernel_update", blocks[i+ngrid_x*j], 2, iter_range, ops_arg_dat(u2[i+ngrid_x*j], S2D_00, "double", OPS_READ), ops_arg_dat(u[i+ngrid_x*j] , S2D_00, "double", OPS_WRITE)); } } } double err = 0.0; for (int j = 0; j < ngrid_y; j++) { for (int i = 0; i < ngrid_x; i++) { int iter_range[] = {0,sizes[2*(i+ngrid_x*j)],0,sizes[2*(i+ngrid_x*j)+1]}; ops_par_loop(poisson_kernel_error, "poisson_kernel_error", blocks[i+ngrid_x*j], 2, iter_range, ops_arg_dat(u[i+ngrid_x*j], S2D_00, "double", OPS_READ), ops_arg_dat(ref[i+ngrid_x*j] , S2D_00, "double", OPS_READ), ops_arg_reduce(red_err, 1, "double", OPS_INC)); } } ops_reduction_result(red_err,&err); ops_printf("Total error: %g\n",err); ops_timers_core(&ct1, &et1); ops_timing_output(); ops_printf("\nTotal Wall time %lf\n",et1-et0); ops_exit(); }
void field_summary() { double qa_diff; int x_min = field.x_min; int x_max = field.x_max; int y_min = field.y_min; int y_max = field.y_max; int z_min = field.z_min; int z_max = field.z_max; int rangexyz_inner[] = {x_min, x_max, y_min, y_max, z_min, z_max}; ideal_gas(FALSE); double vol = 0.0, mass = 0.0, ie = 0.0, ke = 0.0, press = 0.0; ops_par_loop_field_summary_kernel( "field_summary_kernel", clover_grid, 3, rangexyz_inner, ops_arg_dat(volume, 1, S3D_000, "double", OPS_READ), ops_arg_dat(density0, 1, S3D_000, "double", OPS_READ), ops_arg_dat(energy0, 1, S3D_000, "double", OPS_READ), ops_arg_dat(pressure, 1, S3D_000, "double", OPS_READ), ops_arg_dat(xvel0, 1, S3D_000_fP1P1P1, "double", OPS_READ), ops_arg_dat(yvel0, 1, S3D_000_fP1P1P1, "double", OPS_READ), ops_arg_dat(zvel0, 1, S3D_000_fP1P1P1, "double", OPS_READ), ops_arg_reduce(red_vol, 1, "double", OPS_INC), ops_arg_reduce(red_mass, 1, "double", OPS_INC), ops_arg_reduce(red_ie, 1, "double", OPS_INC), ops_arg_reduce(red_ke, 1, "double", OPS_INC), ops_arg_reduce(red_press, 1, "double", OPS_INC)); ops_reduction_result(red_vol, &vol); ops_reduction_result(red_mass, &mass); ops_reduction_result(red_ie, &ie); ops_reduction_result(red_ke, &ke); ops_reduction_result(red_press, &press); ops_fprintf(g_out, "\n"); ops_fprintf(g_out, "\n Time %lf\n", clover_time); ops_fprintf(g_out, " %-10s %-10s %-10s %-10s %-15s %-15s %-s\n", " Volume", " Mass", " Density", " Pressure", " Internal Energy", "Kinetic Energy", "Total Energy"); ops_fprintf(g_out, " step: %3d %-10.3E %-10.3E %-10.3E %-10.3E " "%-15.3E %-15.3E %-.3E", step, vol, mass, mass / vol, press / vol, ie, ke, ie + ke); if (complete == TRUE && test_problem) { qa_diff = DBL_MAX; if (test_problem == 1) qa_diff = fabs((100.0 * (ke / 3.64560737191257)) - 100.0); if (test_problem == 2) qa_diff = fabs((100.0 * (ke / 20.0546870878964)) - 100.0); if (test_problem == 3) qa_diff = fabs((100.0 * (ke / 0.37517221925665)) - 100.0); if (test_problem == 4) qa_diff = fabs((100.0 * (ke / 17.9845165368889)) - 100.0); if (test_problem == 5) qa_diff = fabs((100.0 * (ke / 2.05018938455107)) - 100.0); ops_printf( "\n\nTest problem %d is within %3.15E %% of the expected solution\n", test_problem, qa_diff); ops_fprintf( g_out, "\n\nTest problem %d is within %3.15E %% of the expected solution\n", test_problem, qa_diff); if (qa_diff < 0.001) { ops_printf("This test is considered PASSED\n"); ops_fprintf(g_out, "This test is considered PASSED\n"); } else { ops_printf("This test is considered FAILED\n"); ops_fprintf(g_out, "This test is considered FAILED\n"); } } fflush(g_out); }