int main(int argc, char **argv) { // OP initialisation op_init(argc,argv,2); int niter; double rms; //timer double cpu_t1, cpu_t2, wall_t1, wall_t2; // set constants and initialise flow field and residual op_printf("initialising flow field \n"); char file[] = "new_grid.h5"; // declare sets, pointers, datasets and global constants op_set nodes = op_decl_set_hdf5(file, "nodes"); op_set edges = op_decl_set_hdf5(file, "edges"); op_set bedges = op_decl_set_hdf5(file, "bedges"); op_set cells = op_decl_set_hdf5(file, "cells"); op_map pedge = op_decl_map_hdf5(edges, nodes, 2, file, "pedge"); op_map pecell = op_decl_map_hdf5(edges, cells,2, file, "pecell"); op_map pbedge = op_decl_map_hdf5(bedges,nodes,2, file, "pbedge"); op_map pbecell = op_decl_map_hdf5(bedges,cells,1, file, "pbecell"); op_map pcell = op_decl_map_hdf5(cells, nodes,4, file, "pcell"); op_dat p_bound = op_decl_dat_hdf5(bedges,1,"int" ,file,"p_bound"); op_dat p_x = op_decl_dat_hdf5(nodes ,2,"double",file,"p_x"); op_dat p_q = op_decl_dat_hdf5(cells ,4,"double",file,"p_q"); op_dat p_qold = op_decl_dat_hdf5(cells ,4,"double",file,"p_qold"); op_dat p_adt = op_decl_dat_hdf5(cells ,1,"double",file,"p_adt"); op_dat p_res = op_decl_dat_hdf5(cells ,4,"double",file,"p_res"); op_get_const_hdf5("gam", 1, "double", (char *)&gam, "new_grid.h5"); op_get_const_hdf5("gm1", 1, "double", (char *)&gm1, "new_grid.h5"); op_get_const_hdf5("cfl", 1, "double", (char *)&cfl, "new_grid.h5"); op_get_const_hdf5("eps", 1, "double", (char *)&eps, "new_grid.h5"); op_get_const_hdf5("mach", 1, "double", (char *)&mach, "new_grid.h5"); op_get_const_hdf5("alpha", 1, "double", (char *)&alpha, "new_grid.h5"); op_get_const_hdf5("qinf", 4, "double", (char *)&qinf, "new_grid.h5"); op_decl_const(1,"double",&gam ); op_decl_const(1,"double",&gm1 ); op_decl_const(1,"double",&cfl ); op_decl_const(1,"double",&eps ); op_decl_const(1,"double",&mach ); op_decl_const(1,"double",&alpha); op_decl_const(4,"double",qinf ); op_diagnostic_output(); //write back original data just to compare you read the file correctly //do an h5diff between new_grid_out.h5 and new_grid.h5 to //compare two hdf5 files op_write_hdf5("new_grid_out.h5"); op_write_const_hdf5("gam",1,"double",(char *)&gam, "new_grid_out.h5"); op_write_const_hdf5("gm1",1,"double",(char *)&gm1, "new_grid_out.h5"); op_write_const_hdf5("cfl",1,"double",(char *)&cfl, "new_grid_out.h5"); op_write_const_hdf5("eps",1,"double",(char *)&eps, "new_grid_out.h5"); op_write_const_hdf5("mach",1,"double",(char *)&mach, "new_grid_out.h5"); op_write_const_hdf5("alpha",1,"double",(char *)&alpha, "new_grid_out.h5"); op_write_const_hdf5("qinf",4,"double",(char *)qinf, "new_grid_out.h5"); //trigger partitioning and halo creation routines op_partition("PTSCOTCH", "KWAY", edges, pecell, p_x); int g_ncell = op_get_size(cells); //initialise timers for total execution wall time op_timers(&cpu_t1, &wall_t1); // main time-marching loop niter = 1000; for(int iter=1; iter<=niter; iter++) { // save old flow solution op_par_loop(save_soln,"save_soln", cells, op_arg_dat(p_q, -1,OP_ID, 4,"double",OP_READ ), op_arg_dat(p_qold,-1,OP_ID, 4,"double",OP_WRITE)); // predictor/corrector update loop for(int k=0; k<2; k++) { // calculate area/timstep op_par_loop(adt_calc,"adt_calc",cells, op_arg_dat(p_x, -4,pcell, 2,"double",OP_READ ), op_arg_dat(p_q, -1,OP_ID, 4,"double",OP_READ ), op_arg_dat(p_adt,-1,OP_ID, 1,"double",OP_WRITE)); // calculate flux residual op_par_loop(res_calc,"res_calc",edges, op_arg_dat(p_x, -2,pedge, 2,"double",OP_READ), op_arg_dat(p_q, -2,pecell,4,"double",OP_READ), op_arg_dat(p_adt, -2,pecell,1,"double",OP_READ), op_arg_dat(p_res, -2,pecell,4,"double",OP_INC )); op_par_loop(bres_calc,"bres_calc",bedges, op_arg_dat(p_x, -2,pbedge, 2,"double",OP_READ), op_arg_dat(p_q, 0,pbecell,4,"double",OP_READ), op_arg_dat(p_adt, 0,pbecell,1,"double",OP_READ), op_arg_dat(p_res, 0,pbecell,4,"double",OP_INC ), op_arg_dat(p_bound,-1,OP_ID ,1,"int", OP_READ)); // update flow field rms = 0.0; op_par_loop(update,"update",cells, op_arg_dat(p_qold,-1,OP_ID, 4,"double",OP_READ ), op_arg_dat(p_q, -1,OP_ID, 4,"double",OP_WRITE), op_arg_dat(p_res, -1,OP_ID, 4,"double",OP_RW ), op_arg_dat(p_adt, -1,OP_ID, 1,"double",OP_READ ), op_arg_gbl(&rms,1,"double",OP_INC)); } // print iteration history rms = sqrt(rms/(double)g_ncell); if (iter%100 == 0) op_printf(" %d %10.5e \n",iter,rms); } op_timers(&cpu_t2, &wall_t2); op_timing_output(); op_printf("Max total runtime = \n%f\n",wall_t2-wall_t1); op_exit(); }
// // main program // int main(int argc, char **argv){ int my_rank; int comm_size; MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); MPI_Comm_size(MPI_COMM_WORLD, &comm_size); //timer double cpu_t1, cpu_t2, wall_t1, wall_t2; double time; double max_time; int *becell, *ecell, *bound, *bedge, *edge, *cell; double *x, *q, *qold, *adt, *res; int niter; double rms; op_timers(&cpu_t1, &wall_t1); // set constants if(my_rank == MPI_ROOT )printf("initialising flow field\n"); gam = 1.4f; gm1 = gam - 1.0f; cfl = 0.9f; eps = 0.05f; double mach = 0.4f; double alpha = 3.0f*atan(1.0f)/45.0f; double p = 1.0f; double r = 1.0f; double u = sqrt(gam*p/r)*mach; double e = p/(r*gm1) + 0.5f*u*u; qinf[0] = r; qinf[1] = r*u; qinf[2] = 0.0f; qinf[3] = r*e; // OP initialisation op_init(argc,argv,2); /**------------------------BEGIN Parallel I/O -------------------**/ char file[] = "new_grid.h5";//"new_grid-26mil.h5";//"new_grid.h5"; // declare sets, pointers, datasets and global constants - reading in from file op_set nodes = op_decl_set_hdf5(file, "nodes"); op_set edges = op_decl_set_hdf5(file, "edges"); op_set bedges = op_decl_set_hdf5(file, "bedges"); op_set cells = op_decl_set_hdf5(file, "cells"); op_map pedge = op_decl_map_hdf5(edges, nodes, 2, file, "pedge"); op_map pecell = op_decl_map_hdf5(edges, cells,2, file, "pecell"); op_map pbedge = op_decl_map_hdf5(bedges,nodes,2, file, "pbedge"); op_map pbecell = op_decl_map_hdf5(bedges,cells,1, file, "pbecell"); op_map pcell = op_decl_map_hdf5(cells, nodes,4, file, "pcell"); op_dat p_bound = op_decl_dat_hdf5(bedges,1,"int" ,file,"p_bound"); op_dat p_x = op_decl_dat_hdf5(nodes ,2,"double",file,"p_x"); op_dat p_q = op_decl_dat_hdf5(cells ,4,"double",file,"p_q"); op_dat p_qold = op_decl_dat_hdf5(cells ,4,"double",file,"p_qold"); op_dat p_adt = op_decl_dat_hdf5(cells ,1,"double",file,"p_adt"); op_dat p_res = op_decl_dat_hdf5(cells ,4,"double",file,"p_res"); /**------------------------END Parallel I/O -----------------------**/ op_timers(&cpu_t2, &wall_t2); time = wall_t2-wall_t1; MPI_Reduce(&time,&max_time,1,MPI_DOUBLE, MPI_MAX,MPI_ROOT, MPI_COMM_WORLD); if(my_rank==MPI_ROOT)printf("Max total file read time = %f\n",max_time); op_decl_const(1,"double",&gam ); op_decl_const(1,"double",&gm1 ); op_decl_const(1,"double",&cfl ); op_decl_const(1,"double",&eps ); op_decl_const(1,"double",&mach ); op_decl_const(1,"double",&alpha); op_decl_const(4,"double",qinf ); op_diagnostic_output(); //write back original data just to compare you read the file correctly //do an h5diff between new_grid_writeback.h5 and new_grid.h5 to //compare two hdf5 files op_write_hdf5("new_grid_out.h5"); //partition with ParMetis //op_partition_geom(p_x); //op_partition_random(cells); //op_partition_kway(pecell); //op_partition_geomkway(p_x, pcell); //partition with PT-Scotch op_partition_ptscotch(pecell); //create halos op_halo_create(); int g_ncell = 0; int* sizes = (int *)malloc(sizeof(int)*comm_size); MPI_Allgather(&cells->size, 1, MPI_INT, sizes, 1, MPI_INT, MPI_COMM_WORLD); for(int i = 0; i<comm_size; i++)g_ncell = g_ncell + sizes[i]; free(sizes); //initialise timers for total execution wall time op_timers(&cpu_t1, &wall_t1); niter = 1000; for(int iter=1; iter<=niter; iter++) { //save old flow solution op_par_loop(save_soln,"save_soln", cells, op_arg_dat(p_q, -1,OP_ID, 4,"double",OP_READ ), op_arg_dat(p_qold,-1,OP_ID, 4,"double",OP_WRITE)); // predictor/corrector update loop for(int k=0; k<2; k++) { // calculate area/timstep op_par_loop(adt_calc,"adt_calc",cells, op_arg_dat(p_x, 0,pcell, 2,"double",OP_READ ), op_arg_dat(p_x, 1,pcell, 2,"double",OP_READ ), op_arg_dat(p_x, 2,pcell, 2,"double",OP_READ ), op_arg_dat(p_x, 3,pcell, 2,"double",OP_READ ), op_arg_dat(p_q, -1,OP_ID, 4,"double",OP_READ ), op_arg_dat(p_adt,-1,OP_ID, 1,"double",OP_WRITE)); // calculate flux residual op_par_loop(res_calc,"res_calc",edges, op_arg_dat(p_x, 0,pedge, 2,"double",OP_READ), op_arg_dat(p_x, 1,pedge, 2,"double",OP_READ), op_arg_dat(p_q, 0,pecell,4,"double",OP_READ), op_arg_dat(p_q, 1,pecell,4,"double",OP_READ), op_arg_dat(p_adt, 0,pecell,1,"double",OP_READ), op_arg_dat(p_adt, 1,pecell,1,"double",OP_READ), op_arg_dat(p_res, 0,pecell,4,"double",OP_INC ), op_arg_dat(p_res, 1,pecell,4,"double",OP_INC )); op_par_loop(bres_calc,"bres_calc",bedges, op_arg_dat(p_x, 0,pbedge, 2,"double",OP_READ), op_arg_dat(p_x, 1,pbedge, 2,"double",OP_READ), op_arg_dat(p_q, 0,pbecell,4,"double",OP_READ), op_arg_dat(p_adt, 0,pbecell,1,"double",OP_READ), op_arg_dat(p_res, 0,pbecell,4,"double",OP_INC ), op_arg_dat(p_bound,-1,OP_ID ,1,"int", OP_READ)); // update flow field rms = 0.0; op_par_loop(update,"update",cells, op_arg_dat(p_qold,-1,OP_ID, 4,"double",OP_READ ), op_arg_dat(p_q, -1,OP_ID, 4,"double",OP_WRITE), op_arg_dat(p_res, -1,OP_ID, 4,"double",OP_RW ), op_arg_dat(p_adt, -1,OP_ID, 1,"double",OP_READ ), op_arg_gbl(&rms,1,"double",OP_INC)); } //print iteration history if(my_rank==MPI_ROOT) { rms = sqrt(rms/(double) g_ncell); if (iter%100 == 0) printf("%d %10.5e \n",iter,rms); } } op_timers(&cpu_t2, &wall_t2); //get results data array op_dat temp = op_mpi_get_data(p_q); //output the result dat array to files //op_write_hdf5("new_grid_out.h5"); //compress using // ~/hdf5/bin/h5repack -f GZIP=9 new_grid.h5 new_grid_pack.h5 //free memory allocated to halos op_halo_destroy(); //return all op_dats, op_maps back to original element order op_partition_reverse(); //print each mpi process's timing info for each kernel op_mpi_timing_output(); //print total time for niter interations time = wall_t2-wall_t1; MPI_Reduce(&time,&max_time,1,MPI_DOUBLE, MPI_MAX,MPI_ROOT, MPI_COMM_WORLD); if(my_rank==MPI_ROOT)printf("Max total runtime = %f\n",max_time); op_exit(); MPI_Finalize(); //user mpi finalize }
int main(int argc, char **argv) { int my_rank; int comm_size; MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); MPI_Comm_size(MPI_COMM_WORLD, &comm_size); int *becell, *ecell, *bound, *bedge, *edge, *cell; double *x, *q, *qold, *adt, *res; int nnode,ncell,nedge,nbedge; /**------------------------BEGIN I/O -------------------**/ char file[] = "new_grid-26mil.dat"; char file_out[] = "new_grid_out.h5"; /* read in grid from disk on root processor */ FILE *fp; if ( (fp = fopen(file,"r")) == NULL) { printf("can't open file %s\n",file); exit(-1); } int g_nnode,g_ncell,g_nedge,g_nbedge; check_scan(fscanf(fp,"%d %d %d %d \n",&g_nnode, &g_ncell, &g_nedge, &g_nbedge), 4); int *g_becell = 0, *g_ecell = 0, *g_bound = 0, *g_bedge = 0, *g_edge = 0, *g_cell = 0; double *g_x = 0,*g_q = 0, *g_qold = 0, *g_adt = 0, *g_res = 0; // set constants if(my_rank == MPI_ROOT )printf("initialising flow field\n"); gam = 1.4f; gm1 = gam - 1.0f; cfl = 0.9f; eps = 0.05f; double mach = 0.4f; double alpha = 3.0f*atan(1.0f)/45.0f; double p = 1.0f; double r = 1.0f; double u = sqrt(gam*p/r)*mach; double e = p/(r*gm1) + 0.5f*u*u; qinf[0] = r; qinf[1] = r*u; qinf[2] = 0.0f; qinf[3] = r*e; if(my_rank == MPI_ROOT) { printf("reading in grid \n"); printf("Global number of nodes, cells, edges, bedges = %d, %d, %d, %d\n" ,g_nnode,g_ncell,g_nedge,g_nbedge); g_cell = (int *) malloc(4*g_ncell*sizeof(int)); g_edge = (int *) malloc(2*g_nedge*sizeof(int)); g_ecell = (int *) malloc(2*g_nedge*sizeof(int)); g_bedge = (int *) malloc(2*g_nbedge*sizeof(int)); g_becell = (int *) malloc( g_nbedge*sizeof(int)); g_bound = (int *) malloc( g_nbedge*sizeof(int)); g_x = (double *) malloc(2*g_nnode*sizeof(double)); g_q = (double *) malloc(4*g_ncell*sizeof(double)); g_qold = (double *) malloc(4*g_ncell*sizeof(double)); g_res = (double *) malloc(4*g_ncell*sizeof(double)); g_adt = (double *) malloc( g_ncell*sizeof(double)); for (int n=0; n<g_nnode; n++){ check_scan(fscanf(fp,"%lf %lf \n",&g_x[2*n], &g_x[2*n+1]), 2); } for (int n=0; n<g_ncell; n++) { check_scan(fscanf(fp,"%d %d %d %d \n",&g_cell[4*n ], &g_cell[4*n+1], &g_cell[4*n+2], &g_cell[4*n+3]), 4); } for (int n=0; n<g_nedge; n++) { check_scan(fscanf(fp,"%d %d %d %d \n",&g_edge[2*n],&g_edge[2*n+1], &g_ecell[2*n],&g_ecell[2*n+1]), 4); } for (int n=0; n<g_nbedge; n++) { check_scan(fscanf(fp,"%d %d %d %d \n",&g_bedge[2*n],&g_bedge[2*n+1], &g_becell[n],&g_bound[n]), 4); } //initialise flow field and residual for (int n=0; n<g_ncell; n++) { for (int m=0; m<4; m++) { g_q[4*n+m] = qinf[m]; g_res[4*n+m] = 0.0f; } } } fclose(fp); nnode = compute_local_size (g_nnode, comm_size, my_rank); ncell = compute_local_size (g_ncell, comm_size, my_rank); nedge = compute_local_size (g_nedge, comm_size, my_rank); nbedge = compute_local_size (g_nbedge, comm_size, my_rank); printf("Number of nodes, cells, edges, bedges on process %d = %d, %d, %d, %d\n" ,my_rank,nnode,ncell,nedge,nbedge); //Allocate memory to hold local sets, mapping tables and data cell = (int *) malloc(4*ncell*sizeof(int)); edge = (int *) malloc(2*nedge*sizeof(int)); ecell = (int *) malloc(2*nedge*sizeof(int)); bedge = (int *) malloc(2*nbedge*sizeof(int)); becell = (int *) malloc( nbedge*sizeof(int)); bound = (int *) malloc( nbedge*sizeof(int)); x = (double *) malloc(2*nnode*sizeof(double)); q = (double *) malloc(4*ncell*sizeof(double)); qold = (double *) malloc(4*ncell*sizeof(double)); res = (double *) malloc(4*ncell*sizeof(double)); adt = (double *) malloc( ncell*sizeof(double)); //scatter sets, mappings and data on sets scatter_int_array(g_cell, cell, comm_size, g_ncell,ncell, 4); scatter_int_array(g_edge, edge, comm_size, g_nedge,nedge, 2); scatter_int_array(g_ecell, ecell, comm_size, g_nedge,nedge, 2); scatter_int_array(g_bedge, bedge, comm_size, g_nbedge,nbedge, 2); scatter_int_array(g_becell, becell, comm_size, g_nbedge,nbedge, 1); scatter_int_array(g_bound, bound, comm_size, g_nbedge,nbedge, 1); scatter_double_array(g_x, x, comm_size, g_nnode,nnode, 2); scatter_double_array(g_q, q, comm_size, g_ncell,ncell, 4); scatter_double_array(g_qold, qold, comm_size, g_ncell,ncell, 4); scatter_double_array(g_res, res, comm_size, g_ncell,ncell, 4); scatter_double_array(g_adt, adt, comm_size, g_ncell,ncell, 1); if(my_rank == MPI_ROOT) { //Freeing memory allocated to gloabal arrays on rank 0 //after scattering to all processes free(g_cell); free(g_edge); free(g_ecell); free(g_bedge); free(g_becell); free(g_bound); free(g_x ); free(g_q);free(g_qold);free(g_adt);free(g_res); } // OP initialisation op_init(argc,argv,2); /**------------------------END I/O -----------------------**/ /* FIXME: It's not clear to the compiler that sth. is going on behind the scenes here. Hence theses variables are reported as unused */ op_set nodes = op_decl_set(nnode, "nodes"); op_set edges = op_decl_set(nedge, "edges"); op_set bedges = op_decl_set(nbedge, "bedges"); op_set cells = op_decl_set(ncell, "cells"); op_map pedge = op_decl_map(edges, nodes,2,edge, "pedge"); op_map pecell = op_decl_map(edges, cells,2,ecell, "pecell"); op_map pbedge = op_decl_map(bedges,nodes,2,bedge, "pbedge"); op_map pbecell = op_decl_map(bedges,cells,1,becell,"pbecell"); op_map pcell = op_decl_map(cells, nodes,4,cell, "pcell"); op_dat p_bound = op_decl_dat(bedges,1,"int" ,bound,"p_bound"); op_dat p_x = op_decl_dat(nodes ,2,"double",x ,"p_x"); op_dat p_q = op_decl_dat(cells ,4,"double",q ,"p_q"); op_dat p_qold = op_decl_dat(cells ,4,"double",qold ,"p_qold"); op_dat p_adt = op_decl_dat(cells ,1,"double",adt ,"p_adt"); op_dat p_res = op_decl_dat(cells ,4,"double",res ,"p_res"); op_decl_const(1,"double",&gam ); op_decl_const(1,"double",&gm1 ); op_decl_const(1,"double",&cfl ); op_decl_const(1,"double",&eps ); op_decl_const(1,"double",&mach ); op_decl_const(1,"double",&alpha); op_decl_const(4,"double",qinf ); op_write_hdf5(file_out); op_diagnostic_output(); //create halos - for sanity check op_halo_create(); op_exit(); MPI_Finalize(); //user mpi finalize }