int main(int argc, char **argv) { // OP initialisation op_init(argc,argv,2); //MPI for user I/O int my_rank; int comm_size; MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); MPI_Comm_size(MPI_COMM_WORLD, &comm_size); //timer double cpu_t1, cpu_t2, wall_t1, wall_t2; int *pp; double *A, *r, *u, *du; int nnode, nedge; /**------------------------BEGIN I/O and PARTITIONING ---------------------**/ int g_nnode, g_nedge, g_n, g_e; g_nnode = (NN-1)*(NN-1); g_nedge = (NN-1)*(NN-1) + 4*(NN-1)*(NN-2); int *g_pp = 0; double *g_A = 0, *g_r = 0, *g_u = 0, *g_du = 0; op_printf("Global number of nodes, edges = %d, %d\n",g_nnode,g_nedge); if(my_rank == MPI_ROOT) { g_pp = (int *)malloc(sizeof(int)*2*g_nedge); g_A = (double *)malloc(sizeof(double)*g_nedge); g_r = (double *)malloc(sizeof(double)*g_nnode); g_u = (double *)malloc(sizeof(double)*g_nnode); g_du = (double *)malloc(sizeof(double)*g_nnode); // create matrix and r.h.s., and set coordinates needed for renumbering / partitioning g_e = 0; for (int i=1; i<NN; i++) { for (int j=1; j<NN; j++) { g_n = i-1 + (j-1)*(NN-1); g_r[g_n] = 0.0f; g_u[g_n] = 0.0f; g_du[g_n] = 0.0f; g_pp[2*g_e] = g_n; g_pp[2*g_e+1] = g_n; g_A[g_e] = -1.0f; g_e++; for (int pass=0; pass<4; pass++) { int i2 = i; int j2 = j; if (pass==0) i2 += -1; if (pass==1) i2 += 1; if (pass==2) j2 += -1; if (pass==3) j2 += 1; if ( (i2==0) || (i2==NN) || (j2==0) || (j2==NN) ) { g_r[g_n] += 0.25f; } else { g_pp[2*g_e] = g_n; g_pp[2*g_e+1] = i2-1 + (j2-1)*(NN-1); g_A[g_e] = 0.25f; g_e++; } } } } } /* Compute local sizes */ nnode = compute_local_size (g_nnode, comm_size, my_rank); nedge = compute_local_size (g_nedge, comm_size, my_rank); op_printf("Number of nodes, edges on process %d = %d, %d\n" ,my_rank,nnode,nedge); /*Allocate memory to hold local sets, mapping tables and data*/ pp = (int *)malloc(2*sizeof(int)*nedge); A = (double *) malloc(nedge*sizeof(double)); r = (double *) malloc(nnode*sizeof(double)); u = (double *) malloc(nnode*sizeof(double)); du = (double *) malloc(nnode*sizeof(double)); /* scatter sets, mappings and data on sets*/ scatter_int_array(g_pp, pp, comm_size, g_nedge,nedge, 2); scatter_double_array(g_A, A, comm_size, g_nedge,nedge, 1); scatter_double_array(g_r, r, comm_size, g_nnode,nnode, 1); scatter_double_array(g_u, u, comm_size, g_nnode,nnode, 1); scatter_double_array(g_du, du, comm_size, g_nnode,nnode, 1); /*Freeing memory allocated to gloabal arrays on rank 0 after scattering to all processes*/ if(my_rank == MPI_ROOT) { free(g_pp); free(g_A); free(g_r); free(g_u); free(g_du); } /**------------------------END I/O and PARTITIONING ---------------------**/ // declare sets, pointers, and datasets op_set nodes = op_decl_set(nnode,"nodes"); op_set edges = op_decl_set(nedge,"edges"); op_map ppedge = op_decl_map(edges,nodes,2,pp, "ppedge"); op_dat p_A = op_decl_dat(edges,1,"double", A, "p_A" ); op_dat p_r = op_decl_dat(nodes,1,"double", r, "p_r" ); op_dat p_u = op_decl_dat(nodes,1,"double", u, "p_u" ); op_dat p_du = op_decl_dat(nodes,1,"double", du,"p_du"); alpha = 1.0f; op_decl_const(1,"double",&alpha); op_diagnostic_output(); //trigger partitioning and halo creation routines op_partition("PTSCOTCH", "KWAY", NULL, NULL, NULL); //initialise timers for total execution wall time op_timers(&cpu_t1, &wall_t1); // main iteration loop double u_sum, u_max, beta = 1.0f; for (int iter=0; iter<NITER; iter++) { op_par_loop(res,"res", edges, op_arg_dat(p_A, -1,OP_ID, 1,"double", OP_READ), op_arg_dat(p_u, 1,ppedge, 1,"double", OP_READ), op_arg_dat(p_du, 0,ppedge, 1,"double", OP_INC), op_arg_gbl(&beta, 1,"double", OP_READ)); u_sum = 0.0f; u_max = 0.0f; op_par_loop(update,"update", nodes, op_arg_dat(p_r, -1,OP_ID, 1,"double",OP_READ), op_arg_dat(p_du, -1,OP_ID, 1,"double",OP_RW), op_arg_dat(p_u, -1,OP_ID, 1,"double",OP_INC), op_arg_gbl(&u_sum,1,"double",OP_INC), op_arg_gbl(&u_max,1,"double",OP_MAX)); op_printf("\n u max/rms = %f %f \n\n",u_max, sqrt(u_sum/g_nnode)); } op_timers(&cpu_t2, &wall_t2); //get results data array op_fetch_data(p_u, u); //output the result dat array to files op_print_dat_to_txtfile(p_u, "out_grid_mpi.dat"); //ASCI op_print_dat_to_binfile(p_u, "out_grid_mpi.bin"); //Binary printf("solution on rank %d\n", my_rank); for (int i = 0; i<nnode; i++) { printf(" %7.4f",u[i]);fflush(stdout); } printf("\n"); //print each mpi process's timing info for each kernel op_timing_output(); //print total time for niter interations op_printf("Max total runtime = %f\n",wall_t2-wall_t1); //gather results from all ranks and check double* ug = (double *)malloc(sizeof(double)*op_get_size(nodes)); op_fetch_data_hdf5(p_u, ug, 0, op_get_size(nodes)-1); int result = check_result<double>(ug, NN, TOLERANCE); free(ug); op_exit(); free(u); free(pp); free(A); free(r); free(du); return result; }
int main(int argc, char **argv) { MPI_Init(&argc, &argv); int rank, size; MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &size); int *groups = (int *)malloc(size * sizeof(int)); int *groups2 = (int *)malloc(size * sizeof(int)); int my_type = 1; //This is to be read from a configuration file MPI_Allgather(&my_type, 1, MPI_INT, groups, 1, MPI_INT, MPI_COMM_WORLD); int num_groups = 0; for (int i = 0; i < size; i++) num_groups = num_groups > groups[i] ? num_groups : groups[i]; num_groups++; //The global group MPI_Group global_grp; MPI_Comm_group(MPI_COMM_WORLD, &global_grp); //Create sub-groups and sub-communicators MPI_Group mpigroups[num_groups]; MPI_Comm mpicomms[num_groups]; int count = 0; for (int i = 0; i < num_groups; ++i) { count = 0; for (int j = 0; j < size; ++j) { if (groups[j] == i) { groups2[count++] = j; } } MPI_Group_incl(global_grp, count, groups2, &mpigroups[i]); MPI_Comm_create(MPI_COMM_WORLD, mpigroups[i], &mpicomms[i]); } //coupling procs for (int i = 0; i < 1; ++i) { count = 0; for (int j = 0; j < size; ++j) { if (groups[j] == i) { groups2[count++] = j; } } } // OP initialisation op_mpi_init(argc,argv,2,MPI_COMM_WORLD, mpicomms[1]); int niter; double rms; //timer double cpu_t1, cpu_t2, wall_t1, wall_t2; // set constants and initialise flow field and residual op_printf("initialising flow field \n"); char file[] = "new_grid.h5"; // declare sets, pointers, datasets and global constants op_set nodes = op_decl_set_hdf5(file, "nodes"); op_set edges = op_decl_set_hdf5(file, "edges"); op_set bedges = op_decl_set_hdf5(file, "bedges"); op_set cells = op_decl_set_hdf5(file, "cells"); op_map pedge = op_decl_map_hdf5(edges, nodes, 2, file, "pedge"); op_map pecell = op_decl_map_hdf5(edges, cells,2, file, "pecell"); op_map pbedge = op_decl_map_hdf5(bedges,nodes,2, file, "pbedge"); op_map pbecell = op_decl_map_hdf5(bedges,cells,1, file, "pbecell"); op_map pcell = op_decl_map_hdf5(cells, nodes,4, file, "pcell"); op_map pbndbnd = op_decl_map_hdf5(bedges, bedges,1, file, "pbndbnd"); op_map m_test = op_decl_map_hdf5(cells, nodes,4, file, "m_test"); if (m_test == NULL) printf("m_test not found\n"); op_dat p_bound = op_decl_dat_hdf5(bedges,1,"int" ,file,"p_bound"); op_dat p_x = op_decl_dat_hdf5(nodes ,2,"double",file,"p_x"); op_dat p_q = op_decl_dat_hdf5(cells ,4,"double",file,"p_q"); op_dat p_qold = op_decl_dat_hdf5(cells ,4,"double",file,"p_qold"); op_dat p_adt = op_decl_dat_hdf5(cells ,1,"double",file,"p_adt"); op_dat p_res = op_decl_dat_hdf5(cells ,4,"double",file,"p_res"); op_dat p_test = op_decl_dat_hdf5(cells ,4,"double",file,"p_test"); if (p_test == NULL) printf("p_test not found\n"); op_get_const_hdf5("gam", 1, "double", (char *)&gam, "new_grid.h5"); op_get_const_hdf5("gm1", 1, "double", (char *)&gm1, "new_grid.h5"); op_get_const_hdf5("cfl", 1, "double", (char *)&cfl, "new_grid.h5"); op_get_const_hdf5("eps", 1, "double", (char *)&eps, "new_grid.h5"); op_get_const_hdf5("mach", 1, "double", (char *)&mach, "new_grid.h5"); op_get_const_hdf5("alpha", 1, "double", (char *)&alpha, "new_grid.h5"); op_get_const_hdf5("qinf", 4, "double", (char *)&qinf, "new_grid.h5"); op_decl_const(1,"double",&gam ); op_decl_const(1,"double",&gm1 ); op_decl_const(1,"double",&cfl ); op_decl_const(1,"double",&eps ); op_decl_const(1,"double",&mach ); op_decl_const(1,"double",&alpha); op_decl_const(4,"double",qinf ); op_diagnostic_output(); //write back original data just to compare you read the file correctly //do an h5diff between new_grid_out.h5 and new_grid.h5 to //compare two hdf5 files op_dump_to_hdf5("new_grid_out.h5"); op_write_const_hdf5("gam",1,"double",(char *)&gam, "new_grid_out.h5"); op_write_const_hdf5("gm1",1,"double",(char *)&gm1, "new_grid_out.h5"); op_write_const_hdf5("cfl",1,"double",(char *)&cfl, "new_grid_out.h5"); op_write_const_hdf5("eps",1,"double",(char *)&eps, "new_grid_out.h5"); op_write_const_hdf5("mach",1,"double",(char *)&mach, "new_grid_out.h5"); op_write_const_hdf5("alpha",1,"double",(char *)&alpha, "new_grid_out.h5"); op_write_const_hdf5("qinf",4,"double",(char *)qinf, "new_grid_out.h5"); //trigger partitioning and halo creation routines op_partition("PTSCOTCH", "KWAY", edges, pecell, p_x); //op_partition("PARMETIS", "KWAY", edges, pecell, p_x); int g_ncell = op_get_size(cells); //create some temporaries so we can exchange data defined on the boundary double *ptr = NULL; op_dat center = op_decl_dat_temp(bedges, 3, "double", ptr, "center"); op_dat pres = op_decl_dat_temp(bedges, 1, "double", ptr, "pres"); int *ptr2 = NULL; op_dat p_bound2 = op_decl_dat_temp(bedges, 1, "int", ptr2, "p_bound2"); op_dat center2 = op_decl_dat_temp(bedges, 3, "double", ptr, "center2"); op_dat pres2 = op_decl_dat_temp(bedges, 1, "double", ptr, "pres2"); //create import and export handles op_export_handle handle = op_export_init(count, groups2, pbndbnd); op_import_handle handle2 = op_import_init(count, groups2, center); //initialise timers for total execution wall time op_timers(&cpu_t1, &wall_t1); // main time-marching loop niter = 1000; for(int iter=1; iter<=niter; iter++) { // save old flow solution op_par_loop(save_soln,"save_soln", cells, op_arg_dat(p_q, -1,OP_ID, 4,"double",OP_READ ), op_arg_dat(p_qold,-1,OP_ID, 4,"double",OP_WRITE)); // predictor/corrector update loop for(int k=0; k<2; k++) { // calculate area/timstep op_par_loop(adt_calc,"adt_calc",cells, op_arg_dat(p_x, 0,pcell, 2,"double",OP_READ ), op_arg_dat(p_x, 1,pcell, 2,"double",OP_READ ), op_arg_dat(p_x, 2,pcell, 2,"double",OP_READ ), op_arg_dat(p_x, 3,pcell, 2,"double",OP_READ ), op_arg_dat(p_q, -1,OP_ID, 4,"double",OP_READ ), op_arg_dat(p_adt,-1,OP_ID, 1,"double",OP_WRITE)); // calculate flux residual op_par_loop(res_calc,"res_calc",edges, op_arg_dat(p_x, 0,pedge, 2,"double",OP_READ), op_arg_dat(p_x, 1,pedge, 2,"double",OP_READ), op_arg_dat(p_q, 0,pecell,4,"double",OP_READ), op_arg_dat(p_q, 1,pecell,4,"double",OP_READ), op_arg_dat(p_adt, 0,pecell,1,"double",OP_READ), op_arg_dat(p_adt, 1,pecell,1,"double",OP_READ), op_arg_dat(p_res, 0,pecell,4,"double",OP_INC ), op_arg_dat(p_res, 1,pecell,4,"double",OP_INC )); op_par_loop(bres_calc,"bres_calc",bedges, op_arg_dat(p_x, 0,pbedge, 2,"double",OP_READ), op_arg_dat(p_x, 1,pbedge, 2,"double",OP_READ), op_arg_dat(p_q, 0,pbecell,4,"double",OP_READ), op_arg_dat(p_adt, 0,pbecell,1,"double",OP_READ), op_arg_dat(p_res, 0,pbecell,4,"double",OP_INC ), op_arg_dat(p_bound,-1,OP_ID ,1,"int", OP_READ), op_arg_dat(center, -1, OP_ID, 3, "double", OP_WRITE), op_arg_dat(pres, -1, OP_ID, 1, "double", OP_WRITE)); // update flow field rms = 0.0; op_par_loop(update,"update",cells, op_arg_dat(p_qold,-1,OP_ID, 4,"double",OP_READ ), op_arg_dat(p_q, -1,OP_ID, 4,"double",OP_WRITE), op_arg_dat(p_res, -1,OP_ID, 4,"double",OP_RW ), op_arg_dat(p_adt, -1,OP_ID, 1,"double",OP_READ ), op_arg_gbl(&rms,1,"double",OP_INC)); } // print iteration history rms = sqrt(rms/(double)g_ncell); if (iter%100 == 0) { op_printf(" %d %10.5e \n",iter,rms); //Export data op_dat arr[] = {p_bound, center, pres}; op_export_data(handle, 3, arr); //Import data op_dat arr2[] = {p_bound2, center2, pres2}; op_import_data(handle2, 3, arr2); //check whether the two are the same op_par_loop(comparethem, "comparethem", bedges, op_arg_dat(p_bound,-1, OP_ID, 1, "int", OP_READ), op_arg_dat(p_bound2,-1, OP_ID, 1, "int", OP_READ), op_arg_dat(center,-1, OP_ID, 3, "double", OP_READ), op_arg_dat(center2,-1, OP_ID, 3, "double", OP_READ), op_arg_dat(pres,-1, OP_ID, 1, "double", OP_READ), op_arg_dat(pres2,-1, OP_ID, 1, "double", OP_READ)); } } op_timers(&cpu_t2, &wall_t2); double* q = (double *)malloc(sizeof(double)*op_get_size(cells)*4); op_fetch_data_hdf5(p_q, q, 0, op_get_size(cells)-1); free(q); op_fetch_data_hdf5_file(p_q, "file_name.h5"); //printf("Root process = %d\n",op_is_root()); //output the result dat array to files //op_write_hdf5("new_grid_out.h5"); //compress using // ~/hdf5/bin/h5repack -f GZIP=9 new_grid.h5 new_grid_pack.h5 op_timing_output(); op_printf("Max total runtime = \n%f\n",wall_t2-wall_t1); op_exit(); }
int main(int argc, char **argv) { // OP initialisation op_init(argc,argv,2); int niter; double rms; //timer double cpu_t1, cpu_t2, wall_t1, wall_t2; // set constants and initialise flow field and residual op_printf("initialising flow field \n"); char file[] = "new_grid.h5"; // declare sets, pointers, datasets and global constants op_set nodes = op_decl_set_hdf5(file, "nodes"); op_set edges = op_decl_set_hdf5(file, "edges"); op_set bedges = op_decl_set_hdf5(file, "bedges"); op_set cells = op_decl_set_hdf5(file, "cells"); op_map pedge = op_decl_map_hdf5(edges, nodes, 2, file, "pedge"); op_map pecell = op_decl_map_hdf5(edges, cells,2, file, "pecell"); op_map pbedge = op_decl_map_hdf5(bedges,nodes,2, file, "pbedge"); op_map pbecell = op_decl_map_hdf5(bedges,cells,1, file, "pbecell"); op_map pcell = op_decl_map_hdf5(cells, nodes,4, file, "pcell"); op_map m_test = op_decl_map_hdf5(cells, nodes,4, file, "m_test"); if (m_test == NULL) printf("m_test not found\n"); op_dat p_bound = op_decl_dat_hdf5(bedges,1,"int" ,file,"p_bound"); op_dat p_x = op_decl_dat_hdf5(nodes ,2,"double",file,"p_x"); op_dat p_q = op_decl_dat_hdf5(cells ,4,"double",file,"p_q"); op_dat p_qold = op_decl_dat_hdf5(cells ,4,"double",file,"p_qold"); op_dat p_adt = op_decl_dat_hdf5(cells ,1,"double",file,"p_adt"); op_dat p_res = op_decl_dat_hdf5(cells ,4,"double",file,"p_res"); op_dat p_test = op_decl_dat_hdf5(cells ,4,"double",file,"p_test"); if (p_test == NULL) printf("p_test not found\n"); op_get_const_hdf5("gam", 1, "double", (char *)&gam, "new_grid.h5"); op_get_const_hdf5("gm1", 1, "double", (char *)&gm1, "new_grid.h5"); op_get_const_hdf5("cfl", 1, "double", (char *)&cfl, "new_grid.h5"); op_get_const_hdf5("eps", 1, "double", (char *)&eps, "new_grid.h5"); op_get_const_hdf5("mach", 1, "double", (char *)&mach, "new_grid.h5"); op_get_const_hdf5("alpha", 1, "double", (char *)&alpha, "new_grid.h5"); op_get_const_hdf5("qinf", 4, "double", (char *)&qinf, "new_grid.h5"); op_decl_const2("gam",1,"double",&gam); op_decl_const2("gm1",1,"double",&gm1); op_decl_const2("cfl",1,"double",&cfl); op_decl_const2("eps",1,"double",&eps); op_decl_const2("mach",1,"double",&mach); op_decl_const2("alpha",1,"double",&alpha); op_decl_const2("qinf",4,"double",qinf); op_diagnostic_output(); //write back original data just to compare you read the file correctly //do an h5diff between new_grid_out.h5 and new_grid.h5 to //compare two hdf5 files op_dump_to_hdf5("new_grid_out.h5"); op_write_const_hdf5("gam",1,"double",(char *)&gam, "new_grid_out.h5"); op_write_const_hdf5("gm1",1,"double",(char *)&gm1, "new_grid_out.h5"); op_write_const_hdf5("cfl",1,"double",(char *)&cfl, "new_grid_out.h5"); op_write_const_hdf5("eps",1,"double",(char *)&eps, "new_grid_out.h5"); op_write_const_hdf5("mach",1,"double",(char *)&mach, "new_grid_out.h5"); op_write_const_hdf5("alpha",1,"double",(char *)&alpha, "new_grid_out.h5"); op_write_const_hdf5("qinf",4,"double",(char *)qinf, "new_grid_out.h5"); //trigger partitioning and halo creation routines op_partition("PTSCOTCH", "KWAY", edges, pecell, p_x); //op_partition("PARMETIS", "KWAY", edges, pecell, p_x); int g_ncell = op_get_size(cells); //initialise timers for total execution wall time op_timers(&cpu_t1, &wall_t1); // main time-marching loop niter = 1000; for(int iter=1; iter<=niter; iter++) { // save old flow solution op_par_loop_save_soln("save_soln",cells, op_arg_dat(p_q,-1,OP_ID,4,"double",OP_READ), op_arg_dat(p_qold,-1,OP_ID,4,"double",OP_WRITE)); // predictor/corrector update loop for(int k=0; k<2; k++) { // calculate area/timstep op_par_loop_adt_calc("adt_calc",cells, op_arg_dat(p_x,0,pcell,2,"double",OP_READ), op_arg_dat(p_x,1,pcell,2,"double",OP_READ), op_arg_dat(p_x,2,pcell,2,"double",OP_READ), op_arg_dat(p_x,3,pcell,2,"double",OP_READ), op_arg_dat(p_q,-1,OP_ID,4,"double",OP_READ), op_arg_dat(p_adt,-1,OP_ID,1,"double",OP_WRITE)); // calculate flux residual op_par_loop_res_calc("res_calc",edges, op_arg_dat(p_x,0,pedge,2,"double",OP_READ), op_arg_dat(p_x,1,pedge,2,"double",OP_READ), op_arg_dat(p_q,0,pecell,4,"double",OP_READ), op_arg_dat(p_q,1,pecell,4,"double",OP_READ), op_arg_dat(p_adt,0,pecell,1,"double",OP_READ), op_arg_dat(p_adt,1,pecell,1,"double",OP_READ), op_arg_dat(p_res,0,pecell,4,"double",OP_INC), op_arg_dat(p_res,1,pecell,4,"double",OP_INC)); op_par_loop_bres_calc("bres_calc",bedges, op_arg_dat(p_x,0,pbedge,2,"double",OP_READ), op_arg_dat(p_x,1,pbedge,2,"double",OP_READ), op_arg_dat(p_q,0,pbecell,4,"double",OP_READ), op_arg_dat(p_adt,0,pbecell,1,"double",OP_READ), op_arg_dat(p_res,0,pbecell,4,"double",OP_INC), op_arg_dat(p_bound,-1,OP_ID,1,"int",OP_READ)); // update flow field rms = 0.0; op_par_loop_update("update",cells, op_arg_dat(p_qold,-1,OP_ID,4,"double",OP_READ), op_arg_dat(p_q,-1,OP_ID,4,"double",OP_WRITE), op_arg_dat(p_res,-1,OP_ID,4,"double",OP_RW), op_arg_dat(p_adt,-1,OP_ID,1,"double",OP_READ), op_arg_gbl(&rms,1,"double",OP_INC)); } // print iteration history rms = sqrt(rms/(double)g_ncell); if (iter%100 == 0) op_printf(" %d %10.5e \n",iter,rms); } op_timers(&cpu_t2, &wall_t2); double* q = (double *)malloc(sizeof(double)*op_get_size(cells)*4); op_fetch_data_hdf5(p_q, q, 0, op_get_size(cells)-1); free(q); op_fetch_data_hdf5_file(p_q, "file_name.h5"); //printf("Root process = %d\n",op_is_root()); //output the result dat array to files //op_write_hdf5("new_grid_out.h5"); //compress using // ~/hdf5/bin/h5repack -f GZIP=9 new_grid.h5 new_grid_pack.h5 op_timing_output(); op_printf("Max total runtime = \n%f\n",wall_t2-wall_t1); op_exit(); }