int main(int argc, const char **argv) { c0 = 0.500000000000000; rc0 = 1.0 / 280.0; rc1 = 4.0 / 105.0; rc2 = 1.0 / 5.0; rc3 = 4.0 / 5.0; nx0 = 1000; deltai0 = 0.00100000000000000; deltat = 0.000400000000000000; rkold[0] = 1.0 / 4.0; rkold[1] = 3.0 / 20.0; rkold[2] = 3.0 / 5.0; rknew[0] = 2.0 / 3.0; rknew[1] = 5.0 / 12.0; rknew[2] = 3.0 / 5.0; ops_init(argc, argv, 1); ops_init_backend(); ops_decl_const2("c0", 1, "double", &c0); ops_decl_const2("rc0", 1, "double", &rc0); ops_decl_const2("rc1", 1, "double", &rc1); ops_decl_const2("rc2", 1, "double", &rc2); ops_decl_const2("rc3", 1, "double", &rc3); ops_decl_const2("nx0", 1, "int", &nx0); ops_decl_const2("deltai0", 1, "double", &deltai0); ops_decl_const2("deltat", 1, "double", &deltat); ops_block complex_numbers_block; complex_numbers_block = ops_decl_block(1, "complex_numbers_block"); ops_dat phi; ops_dat phi_old; ops_dat wk0; ops_dat wk1; int halo_p[] = {4}; int halo_m[] = {-4}; int size[] = {nx0}; int base[] = {0}; double *val = NULL; phi = ops_decl_dat(complex_numbers_block, 1, size, base, halo_m, halo_p, val, "double", "phi"); phi_old = ops_decl_dat(complex_numbers_block, 1, size, base, halo_m, halo_p, val, "double", "phi_old"); wk0 = ops_decl_dat(complex_numbers_block, 1, size, base, halo_m, halo_p, val, "double", "wk0"); wk1 = ops_decl_dat(complex_numbers_block, 1, size, base, halo_m, halo_p, val, "double", "wk1"); int stencil1_temp[] = {0}; ops_stencil stencil1 = ops_decl_stencil(1, 1, stencil1_temp, "0"); int stencil0_temp[] = {-4, -3, -2, -1, 1, 2, 3, 4}; ops_stencil stencil0 = ops_decl_stencil(1, 8, stencil0_temp, "-4,-3,-2,-1,1,2,3,4"); ops_reduction real = ops_decl_reduction_handle(sizeof(double), "double", "reduction_real"); ops_reduction imaginary = ops_decl_reduction_handle(sizeof(double), "double", "reduction_imaginary"); ops_halo_group halo_exchange0; { int halo_iter[] = {4}; int from_base[] = {0}; int to_base[] = {nx0}; int dir[] = {1}; ops_halo halo0 = ops_decl_halo(phi, phi, halo_iter, from_base, to_base, dir, dir); ops_halo grp[] = {halo0}; halo_exchange0 = ops_decl_halo_group(1, grp); } ops_halo_group halo_exchange1; { int halo_iter[] = {4}; int from_base[] = {nx0 - 4}; int to_base[] = {-4}; int dir[] = {1}; ops_halo halo0 = ops_decl_halo(phi, phi, halo_iter, from_base, to_base, dir, dir); ops_halo grp[] = {halo0}; halo_exchange1 = ops_decl_halo_group(1, grp); } ops_partition(""); int iter_range5[] = {-4, nx0 + 4}; ops_par_loop_complex_numbers_block0_5_kernel( "Initialisation", complex_numbers_block, 1, iter_range5, ops_arg_dat(phi, 1, stencil1, "double", OPS_WRITE), ops_arg_idx()); ops_halo_transfer(halo_exchange0); ops_halo_transfer(halo_exchange1); double cpu_start, elapsed_start; ops_timers(&cpu_start, &elapsed_start); for (int iteration = 0; iteration < 1; iteration++) { int iter_range4[] = {-4, nx0 + 4}; ops_par_loop_complex_numbers_block0_4_kernel( "Save equations", complex_numbers_block, 1, iter_range4, ops_arg_dat(phi, 1, stencil1, "double", OPS_READ), ops_arg_dat(phi_old, 1, stencil1, "double", OPS_WRITE)); for (int stage = 0; stage < 3; stage++) { int iter_range0[] = {0, nx0}; ops_par_loop_complex_numbers_block0_0_kernel( "D(phi[x0 t] x0)", complex_numbers_block, 1, iter_range0, ops_arg_dat(phi, 1, stencil0, "double", OPS_READ), ops_arg_dat(wk0, 1, stencil1, "double", OPS_WRITE)); int iter_range1[] = {0, nx0}; ops_par_loop_complex_numbers_block0_1_kernel( "Residual of equation", complex_numbers_block, 1, iter_range1, ops_arg_dat(wk0, 1, stencil1, "double", OPS_READ), ops_arg_dat(wk1, 1, stencil1, "double", OPS_WRITE)); int iter_range2[] = {-4, nx0 + 4}; ops_par_loop_complex_numbers_block0_2_kernel( "RK new (subloop) update", complex_numbers_block, 1, iter_range2, ops_arg_dat(phi_old, 1, stencil1, "double", OPS_READ), ops_arg_dat(wk1, 1, stencil1, "double", OPS_READ), ops_arg_dat(phi, 1, stencil1, "double", OPS_WRITE), ops_arg_gbl(&rknew[stage], 1, "double", OPS_READ)); int iter_range3[] = {-4, nx0 + 4}; ops_par_loop_complex_numbers_block0_3_kernel( "RK old update", complex_numbers_block, 1, iter_range3, ops_arg_dat(wk1, 1, stencil1, "double", OPS_READ), ops_arg_dat(phi_old, 1, stencil1, "double", OPS_RW), ops_arg_gbl(&rkold[stage], 1, "double", OPS_READ)); ops_halo_transfer(halo_exchange0); ops_halo_transfer(halo_exchange1); } int iter_range0[] = {0, nx0}; ops_par_loop_complex_numbers_block0_cn_kernel( "Complex numbers", complex_numbers_block, 1, iter_range0, ops_arg_dat(phi, 1, stencil0, "double", OPS_READ), ops_arg_reduce(real, 1, "double", OPS_INC), ops_arg_reduce(imaginary, 1, "double", OPS_INC)); } double cpu_end, elapsed_end; ops_timers(&cpu_end, &elapsed_end); ops_printf("\nTimings are:\n"); ops_printf("-----------------------------------------\n"); ops_printf("Total Wall time %lf\n", elapsed_end - elapsed_start); ops_fetch_block_hdf5_file(complex_numbers_block, "complex_numbers_2500.h5"); ops_fetch_dat_hdf5_file(phi, "complex_numbers_2500.h5"); ops_exit(); }
int main(int argc, char **argv) { /**-------------------------- Initialisation --------------------------**/ // OPS initialisation ops_init(argc,argv,6); int logical_size_x = 200; int logical_size_y = 200; int ngrid_x = 1; int ngrid_y = 1; int n_iter = 10000; dx = 0.01; dy = 0.01; ops_decl_const("dx",1,"double",&dx); ops_decl_const("dy",1,"double",&dy); //declare blocks ops_block *blocks = (ops_block *)malloc(ngrid_x*ngrid_y*sizeof(ops_block*)); char buf[50]; for (int j = 0; j < ngrid_y; j++) { for (int i = 0; i < ngrid_x; i++) { sprintf(buf,"block %d,%d",i,j); blocks[i+ngrid_x*j] = ops_decl_block(2,buf); } } //declare stencils int s2D_00[] = {0,0}; ops_stencil S2D_00 = ops_decl_stencil( 2, 1, s2D_00, "00"); int s2D_00_P10_M10_0P1_0M1[] = {0,0, 1,0, -1,0, 0,1, 0,-1}; ops_stencil S2D_00_P10_M10_0P1_0M1 = ops_decl_stencil( 2, 5, s2D_00_P10_M10_0P1_0M1, "00:10:-10:01:0-1"); ops_reduction red_err = ops_decl_reduction_handle(sizeof(double), "double", "err"); //declare datasets int d_p[2] = {1,1}; //max halo depths for the dat in the possitive direction int d_m[2] = {-1,-1}; //max halo depths for the dat in the negative direction int base[2] = {0,0}; int uniform_size[2] = {(logical_size_x-1)/ngrid_x+1,(logical_size_y-1)/ngrid_y+1}; double* temp = NULL; ops_dat *coordx = (ops_dat *)malloc(ngrid_x*ngrid_y*sizeof(ops_dat*)); ops_dat *coordy = (ops_dat *)malloc(ngrid_x*ngrid_y*sizeof(ops_dat*)); ops_dat *u = (ops_dat *)malloc(ngrid_x*ngrid_y*sizeof(ops_dat*)); ops_dat *u2 = (ops_dat *)malloc(ngrid_x*ngrid_y*sizeof(ops_dat*)); ops_dat *f = (ops_dat *)malloc(ngrid_x*ngrid_y*sizeof(ops_dat*)); ops_dat *ref = (ops_dat *)malloc(ngrid_x*ngrid_y*sizeof(ops_dat*)); int *sizes = (int*)malloc(2*ngrid_x*ngrid_y*sizeof(int)); int *disps = (int*)malloc(2*ngrid_x*ngrid_y*sizeof(int)); for (int j = 0; j < ngrid_y; j++) { for (int i = 0; i < ngrid_x; i++) { int size[2] = {uniform_size[0], uniform_size[1]}; if ((i+1)*size[0]>logical_size_x) size[0] = logical_size_x - i*size[0]; if ((j+1)*size[1]>logical_size_y) size[1] = logical_size_y - j*size[1]; sprintf(buf,"coordx %d,%d",i,j); coordx[i+ngrid_x*j] = ops_decl_dat(blocks[i+ngrid_x*j], 1, size, base, d_m, d_p, temp, "double", buf); sprintf(buf,"coordy %d,%d",i,j); coordy[i+ngrid_x*j] = ops_decl_dat(blocks[i+ngrid_x*j], 1, size, base, d_m, d_p, temp, "double", buf); sprintf(buf,"u %d,%d",i,j); u[i+ngrid_x*j] = ops_decl_dat(blocks[i+ngrid_x*j], 1, size, base, d_m, d_p, temp, "double", buf); sprintf(buf,"u2 %d,%d",i,j); u2[i+ngrid_x*j] = ops_decl_dat(blocks[i+ngrid_x*j], 1, size, base, d_m, d_p, temp, "double", buf); sprintf(buf,"f %d,%d",i,j); f[i+ngrid_x*j] = ops_decl_dat(blocks[i+ngrid_x*j], 1, size, base, d_m, d_p, temp, "double", buf); sprintf(buf,"ref %d,%d",i,j); ref[i+ngrid_x*j] = ops_decl_dat(blocks[i+ngrid_x*j], 1, size, base, d_m, d_p, temp, "double", buf); sizes[2*(i+ngrid_x*j)] = size[0]; sizes[2*(i+ngrid_x*j)+1] = size[1]; disps[2*(i+ngrid_x*j)] = i*uniform_size[0]; disps[2*(i+ngrid_x*j)+1] = j*uniform_size[1]; } } ops_halo *halos = (ops_halo *)malloc(2*(ngrid_x*(ngrid_y-1)+(ngrid_x-1)*ngrid_y)*sizeof(ops_halo *)); int off = 0; for (int j = 0; j < ngrid_y; j++) { for (int i = 0; i < ngrid_x; i++) { if (i > 0) { int halo_iter[] = {1,sizes[2*(i+ngrid_x*j)+1]}; int base_from[] = {sizes[2*(i-1+ngrid_x*j)]-1,0}; int base_to[] = {-1,0}; int dir[] = {1,2}; halos[off++] = ops_decl_halo(u[i-1+ngrid_x*j], u[i+ngrid_x*j], halo_iter, base_from, base_to, dir, dir); base_from[0] = 0; base_to[0] = sizes[2*(i+ngrid_x*j)]; halos[off++] = ops_decl_halo(u[i+ngrid_x*j], u[i-1+ngrid_x*j], halo_iter, base_from, base_to, dir, dir); } if (j > 0) { int halo_iter[] = {sizes[2*(i+ngrid_x*j)],1}; int base_from[] = {0,sizes[2*(i+ngrid_x*(j-1))+1]-1}; int base_to[] = {0,-1}; int dir[] = {1,2}; halos[off++] = ops_decl_halo(u[i+ngrid_x*(j-1)], u[i+ngrid_x*j], halo_iter, base_from, base_to, dir, dir); base_from[1] = 0; base_to[1] = sizes[2*(i+ngrid_x*j)+1]; halos[off++] = ops_decl_halo(u[i+ngrid_x*j], u[i+ngrid_x*(j-1)], halo_iter, base_from, base_to, dir, dir); } } } if (off != 2*(ngrid_x*(ngrid_y-1)+(ngrid_x-1)*ngrid_y)) printf("Something is not right\n"); ops_halo_group u_halos = ops_decl_halo_group(off,halos); ops_partition(""); ops_checkpointing_init("check.h5", 5.0); /**-------------------------- Computations --------------------------**/ double ct0, ct1, et0, et1; ops_timers_core(&ct0, &et0); //populate forcing, reference solution and boundary conditions for (int j = 0; j < ngrid_y; j++) { for (int i = 0; i < ngrid_x; i++) { int iter_range[] = {-1,sizes[2*(i+ngrid_x*j)]+1,-1,sizes[2*(i+ngrid_x*j)+1]+1}; ops_par_loop(poisson_kernel_populate, "poisson_kernel_populate", blocks[i+ngrid_x*j], 2, iter_range, ops_arg_gbl(&disps[2*(i+ngrid_x*j)], 1, "int", OPS_READ), ops_arg_gbl(&disps[2*(i+ngrid_x*j)+1], 1, "int", OPS_READ), ops_arg_idx(), ops_arg_dat(u[i+ngrid_x*j], S2D_00, "double", OPS_WRITE), ops_arg_dat(f[i+ngrid_x*j], S2D_00, "double", OPS_WRITE), ops_arg_dat(ref[i+ngrid_x*j], S2D_00, "double", OPS_WRITE)); } } //initial guess 0 for (int j = 0; j < ngrid_y; j++) { for (int i = 0; i < ngrid_x; i++) { int iter_range[] = {0,sizes[2*(i+ngrid_x*j)],0,sizes[2*(i+ngrid_x*j)+1]}; ops_par_loop(poisson_kernel_initialguess, "poisson_kernel_initialguess", blocks[i+ngrid_x*j], 2, iter_range, ops_arg_dat(u[i+ngrid_x*j], S2D_00, "double", OPS_WRITE)); } } for (int iter = 0; iter < n_iter; iter++) { ops_halo_transfer(u_halos); for (int j = 0; j < ngrid_y; j++) { for (int i = 0; i < ngrid_x; i++) { int iter_range[] = {0,sizes[2*(i+ngrid_x*j)],0,sizes[2*(i+ngrid_x*j)+1]}; ops_par_loop(poisson_kernel_stencil, "poisson_kernel_stencil", blocks[i+ngrid_x*j], 2, iter_range, ops_arg_dat(u[i+ngrid_x*j], S2D_00_P10_M10_0P1_0M1, "double", OPS_READ), ops_arg_dat(f[i+ngrid_x*j], S2D_00, "double", OPS_READ), ops_arg_dat(u2[i+ngrid_x*j], S2D_00, "double", OPS_WRITE)); } } for (int j = 0; j < ngrid_y; j++) { for (int i = 0; i < ngrid_x; i++) { int iter_range[] = {0,sizes[2*(i+ngrid_x*j)],0,sizes[2*(i+ngrid_x*j)+1]}; ops_par_loop(poisson_kernel_update, "poisson_kernel_update", blocks[i+ngrid_x*j], 2, iter_range, ops_arg_dat(u2[i+ngrid_x*j], S2D_00, "double", OPS_READ), ops_arg_dat(u[i+ngrid_x*j] , S2D_00, "double", OPS_WRITE)); } } } double err = 0.0; for (int j = 0; j < ngrid_y; j++) { for (int i = 0; i < ngrid_x; i++) { int iter_range[] = {0,sizes[2*(i+ngrid_x*j)],0,sizes[2*(i+ngrid_x*j)+1]}; ops_par_loop(poisson_kernel_error, "poisson_kernel_error", blocks[i+ngrid_x*j], 2, iter_range, ops_arg_dat(u[i+ngrid_x*j], S2D_00, "double", OPS_READ), ops_arg_dat(ref[i+ngrid_x*j] , S2D_00, "double", OPS_READ), ops_arg_reduce(red_err, 1, "double", OPS_INC)); } } ops_reduction_result(red_err,&err); ops_printf("Total error: %g\n",err); ops_timers_core(&ct1, &et1); ops_timing_output(); ops_printf("\nTotal Wall time %lf\n",et1-et0); ops_exit(); }