int main(int argc, char **argv) { // OP initialisation op_init(argc,argv,2); //MPI for user I/O int my_rank; int comm_size; MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); MPI_Comm_size(MPI_COMM_WORLD, &comm_size); //timer double cpu_t1, cpu_t2, wall_t1, wall_t2; int *becell, *ecell, *bound, *bedge, *edge, *cell; float *x, *q, *qold, *adt, *res; int nnode,ncell,nedge,nbedge,niter; float rms; /**------------------------BEGIN I/O and PARTITIONING -------------------**/ op_timers(&cpu_t1, &wall_t1); /* read in grid from disk on root processor */ FILE *fp; if ( (fp = fopen("new_grid.dat","r")) == NULL) { op_printf("can't open file new_grid.dat\n"); exit(-1); } int g_nnode,g_ncell,g_nedge,g_nbedge; check_scan(fscanf(fp,"%d %d %d %d \n",&g_nnode, &g_ncell, &g_nedge, &g_nbedge), 4); int *g_becell = 0, *g_ecell = 0, *g_bound = 0, *g_bedge = 0, *g_edge = 0, *g_cell = 0; float *g_x = 0,*g_q = 0, *g_qold = 0, *g_adt = 0, *g_res = 0; // set constants op_printf("initialising flow field\n"); gam = 1.4f; gm1 = gam - 1.0f; cfl = 0.9f; eps = 0.05f; float mach = 0.4f; float alpha = 3.0f*atan(1.0f)/45.0f; float p = 1.0f; float r = 1.0f; float u = sqrt(gam*p/r)*mach; float e = p/(r*gm1) + 0.5f*u*u; qinf[0] = r; qinf[1] = r*u; qinf[2] = 0.0f; qinf[3] = r*e; op_printf("reading in grid \n"); op_printf("Global number of nodes, cells, edges, bedges = %d, %d, %d, %d\n" ,g_nnode,g_ncell,g_nedge,g_nbedge); if(my_rank == MPI_ROOT) { g_cell = (int *) malloc(4*g_ncell*sizeof(int)); g_edge = (int *) malloc(2*g_nedge*sizeof(int)); g_ecell = (int *) malloc(2*g_nedge*sizeof(int)); g_bedge = (int *) malloc(2*g_nbedge*sizeof(int)); g_becell = (int *) malloc( g_nbedge*sizeof(int)); g_bound = (int *) malloc( g_nbedge*sizeof(int)); g_x = (float *) malloc(2*g_nnode*sizeof(float)); g_q = (float *) malloc(4*g_ncell*sizeof(float)); g_qold = (float *) malloc(4*g_ncell*sizeof(float)); g_res = (float *) malloc(4*g_ncell*sizeof(float)); g_adt = (float *) malloc( g_ncell*sizeof(float)); for (int n=0; n<g_nnode; n++){ check_scan(fscanf(fp,"%f %f \n",&g_x[2*n], &g_x[2*n+1]), 2); } for (int n=0; n<g_ncell; n++) { check_scan(fscanf(fp,"%d %d %d %d \n",&g_cell[4*n ], &g_cell[4*n+1], &g_cell[4*n+2], &g_cell[4*n+3]), 4); } for (int n=0; n<g_nedge; n++) { check_scan(fscanf(fp,"%d %d %d %d \n",&g_edge[2*n],&g_edge[2*n+1], &g_ecell[2*n],&g_ecell[2*n+1]), 4); } for (int n=0; n<g_nbedge; n++) { check_scan(fscanf(fp,"%d %d %d %d \n",&g_bedge[2*n],&g_bedge[2*n+1], &g_becell[n],&g_bound[n]), 4); } //initialise flow field and residual for (int n=0; n<g_ncell; n++) { for (int m=0; m<4; m++) { g_q[4*n+m] = qinf[m]; g_res[4*n+m] = 0.0f; } } } fclose(fp); nnode = compute_local_size (g_nnode, comm_size, my_rank); ncell = compute_local_size (g_ncell, comm_size, my_rank); nedge = compute_local_size (g_nedge, comm_size, my_rank); nbedge = compute_local_size (g_nbedge, comm_size, my_rank); op_printf("Number of nodes, cells, edges, bedges on process %d = %d, %d, %d, %d\n" ,my_rank,nnode,ncell,nedge,nbedge); /*Allocate memory to hold local sets, mapping tables and data*/ cell = (int *) malloc(4*ncell*sizeof(int)); edge = (int *) malloc(2*nedge*sizeof(int)); ecell = (int *) malloc(2*nedge*sizeof(int)); bedge = (int *) malloc(2*nbedge*sizeof(int)); becell = (int *) malloc( nbedge*sizeof(int)); bound = (int *) malloc( nbedge*sizeof(int)); x = (float *) malloc(2*nnode*sizeof(float)); q = (float *) malloc(4*ncell*sizeof(float)); qold = (float *) malloc(4*ncell*sizeof(float)); res = (float *) malloc(4*ncell*sizeof(float)); adt = (float *) malloc( ncell*sizeof(float)); /* scatter sets, mappings and data on sets*/ scatter_int_array(g_cell, cell, comm_size, g_ncell,ncell, 4); scatter_int_array(g_edge, edge, comm_size, g_nedge,nedge, 2); scatter_int_array(g_ecell, ecell, comm_size, g_nedge,nedge, 2); scatter_int_array(g_bedge, bedge, comm_size, g_nbedge,nbedge, 2); scatter_int_array(g_becell, becell, comm_size, g_nbedge,nbedge, 1); scatter_int_array(g_bound, bound, comm_size, g_nbedge,nbedge, 1); scatter_float_array(g_x, x, comm_size, g_nnode,nnode, 2); scatter_float_array(g_q, q, comm_size, g_ncell,ncell, 4); scatter_float_array(g_qold, qold, comm_size, g_ncell,ncell, 4); scatter_float_array(g_res, res, comm_size, g_ncell,ncell, 4); scatter_float_array(g_adt, adt, comm_size, g_ncell,ncell, 1); /*Freeing memory allocated to gloabal arrays on rank 0 after scattering to all processes*/ if(my_rank == MPI_ROOT) { free(g_cell); free(g_edge); free(g_ecell); free(g_bedge); free(g_becell); free(g_bound); free(g_x ); free(g_q); free(g_qold); free(g_adt); free(g_res); } op_timers(&cpu_t2, &wall_t2); op_printf("Max total file read time = %f\n", wall_t2-wall_t1); /**------------------------END I/O and PARTITIONING -----------------------**/ // declare sets, pointers, datasets and global constants op_set nodes = op_decl_set(nnode, "nodes"); op_set edges = op_decl_set(nedge, "edges"); op_set bedges = op_decl_set(nbedge, "bedges"); op_set cells = op_decl_set(ncell, "cells"); op_map pedge = op_decl_map(edges, nodes,2,edge, "pedge"); op_map pecell = op_decl_map(edges, cells,2,ecell, "pecell"); op_map pbedge = op_decl_map(bedges,nodes,2,bedge, "pbedge"); op_map pbecell = op_decl_map(bedges,cells,1,becell,"pbecell"); op_map pcell = op_decl_map(cells, nodes,4,cell, "pcell"); op_dat p_bound = op_decl_dat(bedges,1,"int" ,bound,"p_bound"); op_dat p_x = op_decl_dat(nodes ,2,"float",x ,"p_x"); op_dat p_q = op_decl_dat(cells ,4,"float",q ,"p_q"); op_dat p_qold = op_decl_dat(cells ,4,"float",qold ,"p_qold"); op_dat p_adt = op_decl_dat(cells ,1,"float",adt ,"p_adt"); op_dat p_res = op_decl_dat(cells ,4,"float",res ,"p_res"); op_decl_const(1,"float",&gam ); op_decl_const(1,"float",&gm1 ); op_decl_const(1,"float",&cfl ); op_decl_const(1,"float",&eps ); op_decl_const(1,"float",&mach ); op_decl_const(1,"float",&alpha); op_decl_const(4,"float",qinf ); op_diagnostic_output(); //trigger partitioning and halo creation routines op_partition("PTSCOTCH", "KWAY", NULL, pecell, p_x); //initialise timers for total execution wall time op_timers(&cpu_t1, &wall_t1); niter = 1000; for(int iter=1; iter<=niter; iter++) { //save old flow solution op_par_loop(save_soln,"save_soln", cells, op_arg_dat(p_q, -1,OP_ID, 4,"float",OP_READ ), op_arg_dat(p_qold,-1,OP_ID, 4,"float",OP_WRITE)); // predictor/corrector update loop for(int k=0; k<2; k++) { // calculate area/timstep op_par_loop(adt_calc,"adt_calc",cells, op_arg_dat(p_x, 0,pcell, 2,"float",OP_READ ), op_arg_dat(p_x, 1,pcell, 2,"float",OP_READ ), op_arg_dat(p_x, 2,pcell, 2,"float",OP_READ ), op_arg_dat(p_x, 3,pcell, 2,"float",OP_READ ), op_arg_dat(p_q, -1,OP_ID, 4,"float",OP_READ ), op_arg_dat(p_adt,-1,OP_ID, 1,"float",OP_WRITE)); // calculate flux residual op_par_loop(res_calc,"res_calc",edges, op_arg_dat(p_x, 0,pedge, 2,"float",OP_READ), op_arg_dat(p_x, 1,pedge, 2,"float",OP_READ), op_arg_dat(p_q, 0,pecell,4,"float",OP_READ), op_arg_dat(p_q, 1,pecell,4,"float",OP_READ), op_arg_dat(p_adt, 0,pecell,1,"float",OP_READ), op_arg_dat(p_adt, 1,pecell,1,"float",OP_READ), op_arg_dat(p_res, 0,pecell,4,"float",OP_INC ), op_arg_dat(p_res, 1,pecell,4,"float",OP_INC )); op_par_loop(bres_calc,"bres_calc",bedges, op_arg_dat(p_x, 0,pbedge, 2,"float",OP_READ), op_arg_dat(p_x, 1,pbedge, 2,"float",OP_READ), op_arg_dat(p_q, 0,pbecell,4,"float",OP_READ), op_arg_dat(p_adt, 0,pbecell,1,"float",OP_READ), op_arg_dat(p_res, 0,pbecell,4,"float",OP_INC ), op_arg_dat(p_bound,-1,OP_ID ,1,"int", OP_READ)); // update flow field rms = 0.0; op_par_loop(update,"update",cells, op_arg_dat(p_qold,-1,OP_ID, 4,"float",OP_READ ), op_arg_dat(p_q, -1,OP_ID, 4,"float",OP_WRITE), op_arg_dat(p_res, -1,OP_ID, 4,"float",OP_RW ), op_arg_dat(p_adt, -1,OP_ID, 1,"float",OP_READ ), op_arg_gbl(&rms,1,"float",OP_INC)); } //print iteration history rms = sqrt(rms/(float) g_ncell); if (iter%100 == 0) op_printf("%d %10.5e \n",iter,rms); } op_timers(&cpu_t2, &wall_t2); //get results data array - perhaps can be later handled by a remporary dat //op_dat temp = op_mpi_get_data(p_q); //output the result dat array to files //print_dat_tofile(temp, "out_grid.dat"); //ASCI //print_dat_tobinfile(temp, "out_grid.bin"); //Binary op_timing_output(); //print total time for niter interations op_printf("Max total runtime = %f\n",wall_t2-wall_t1); op_exit(); free(cell); free(edge); free(ecell); free(bedge); free(becell); free(bound); free(x); free(q); free(qold); free(res); free(adt); }
int main(int argc, char **argv) { // OP initialisation op_init(argc,argv,2); //MPI for user I/O int my_rank; int comm_size; MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); MPI_Comm_size(MPI_COMM_WORLD, &comm_size); int *becell, *ecell, *bound, *bedge, *edge, *cell; float *x, *q, *qold, *adt, *res; int nnode,ncell,nedge,nbedge; /**------------------------BEGIN I/O -------------------**/ char file[] = "new_grid.dat"; char file_out[] = "new_grid_out.h5"; /* read in grid from disk on root processor */ FILE *fp; if ( (fp = fopen(file,"r")) == NULL) { op_printf("can't open file %s\n",file); exit(-1); } int g_nnode,g_ncell,g_nedge,g_nbedge; check_scan(fscanf(fp,"%d %d %d %d \n",&g_nnode, &g_ncell, &g_nedge, &g_nbedge), 4); int *g_becell = 0, *g_ecell = 0, *g_bound = 0, *g_bedge = 0, *g_edge = 0, *g_cell = 0; float *g_x = 0,*g_q = 0, *g_qold = 0, *g_adt = 0, *g_res = 0; // set constants op_printf("initialising flow field\n"); gam = 1.4f; gm1 = gam - 1.0f; cfl = 0.9f; eps = 0.05f; float mach = 0.4f; float alpha = 3.0f*atan(1.0f)/45.0f; float p = 1.0f; float r = 1.0f; float u = sqrt(gam*p/r)*mach; float e = p/(r*gm1) + 0.5f*u*u; qinf[0] = r; qinf[1] = r*u; qinf[2] = 0.0f; qinf[3] = r*e; op_printf("reading in grid \n"); op_printf("Global number of nodes, cells, edges, bedges = %d, %d, %d, %d\n" ,g_nnode,g_ncell,g_nedge,g_nbedge); if(my_rank == MPI_ROOT) { g_cell = (int *) malloc(4*g_ncell*sizeof(int)); g_edge = (int *) malloc(2*g_nedge*sizeof(int)); g_ecell = (int *) malloc(2*g_nedge*sizeof(int)); g_bedge = (int *) malloc(2*g_nbedge*sizeof(int)); g_becell = (int *) malloc( g_nbedge*sizeof(int)); g_bound = (int *) malloc( g_nbedge*sizeof(int)); g_x = (float *) malloc(2*g_nnode*sizeof(float)); g_q = (float *) malloc(4*g_ncell*sizeof(float)); g_qold = (float *) malloc(4*g_ncell*sizeof(float)); g_res = (float *) malloc(4*g_ncell*sizeof(float)); g_adt = (float *) malloc( g_ncell*sizeof(float)); for (int n=0; n<g_nnode; n++){ check_scan(fscanf(fp,"%f %f \n",&g_x[2*n], &g_x[2*n+1]), 2); } for (int n=0; n<g_ncell; n++) { check_scan(fscanf(fp,"%d %d %d %d \n",&g_cell[4*n ], &g_cell[4*n+1], &g_cell[4*n+2], &g_cell[4*n+3]), 4); } for (int n=0; n<g_nedge; n++) { check_scan(fscanf(fp,"%d %d %d %d \n",&g_edge[2*n],&g_edge[2*n+1], &g_ecell[2*n],&g_ecell[2*n+1]), 4); } for (int n=0; n<g_nbedge; n++) { check_scan(fscanf(fp,"%d %d %d %d \n",&g_bedge[2*n],&g_bedge[2*n+1], &g_becell[n],&g_bound[n]), 4); } //initialise flow field and residual for (int n=0; n<g_ncell; n++) { for (int m=0; m<4; m++) { g_q[4*n+m] = qinf[m]; g_res[4*n+m] = 0.0f; } } } fclose(fp); nnode = compute_local_size (g_nnode, comm_size, my_rank); ncell = compute_local_size (g_ncell, comm_size, my_rank); nedge = compute_local_size (g_nedge, comm_size, my_rank); nbedge = compute_local_size (g_nbedge, comm_size, my_rank); op_printf("Number of nodes, cells, edges, bedges on process %d = %d, %d, %d, %d\n" ,my_rank,nnode,ncell,nedge,nbedge); /*Allocate memory to hold local sets, mapping tables and data*/ cell = (int *) malloc(4*ncell*sizeof(int)); edge = (int *) malloc(2*nedge*sizeof(int)); ecell = (int *) malloc(2*nedge*sizeof(int)); bedge = (int *) malloc(2*nbedge*sizeof(int)); becell = (int *) malloc( nbedge*sizeof(int)); bound = (int *) malloc( nbedge*sizeof(int)); x = (float *) malloc(2*nnode*sizeof(float)); q = (float *) malloc(4*ncell*sizeof(float)); qold = (float *) malloc(4*ncell*sizeof(float)); res = (float *) malloc(4*ncell*sizeof(float)); adt = (float *) malloc( ncell*sizeof(float)); /* scatter sets, mappings and data on sets*/ scatter_int_array(g_cell, cell, comm_size, g_ncell,ncell, 4); scatter_int_array(g_edge, edge, comm_size, g_nedge,nedge, 2); scatter_int_array(g_ecell, ecell, comm_size, g_nedge,nedge, 2); scatter_int_array(g_bedge, bedge, comm_size, g_nbedge,nbedge, 2); scatter_int_array(g_becell, becell, comm_size, g_nbedge,nbedge, 1); scatter_int_array(g_bound, bound, comm_size, g_nbedge,nbedge, 1); scatter_float_array(g_x, x, comm_size, g_nnode,nnode, 2); scatter_float_array(g_q, q, comm_size, g_ncell,ncell, 4); scatter_float_array(g_qold, qold, comm_size, g_ncell,ncell, 4); scatter_float_array(g_res, res, comm_size, g_ncell,ncell, 4); scatter_float_array(g_adt, adt, comm_size, g_ncell,ncell, 1); /*Freeing memory allocated to gloabal arrays on rank 0 after scattering to all processes*/ if(my_rank == MPI_ROOT) { free(g_cell); free(g_edge); free(g_ecell); free(g_bedge); free(g_becell); free(g_bound); free(g_x ); free(g_q); free(g_qold); free(g_adt); free(g_res); } /**------------------------END I/O -----------------------**/ /* FIXME: It's not clear to the compiler that sth. is going on behind the scenes here. Hence theses variables are reported as unused */ op_set nodes = op_decl_set(nnode, "nodes"); op_set edges = op_decl_set(nedge, "edges"); op_set bedges = op_decl_set(nbedge, "bedges"); op_set cells = op_decl_set(ncell, "cells"); op_map pedge = op_decl_map(edges, nodes,2,edge, "pedge"); op_map pecell = op_decl_map(edges, cells,2,ecell, "pecell"); op_map pbedge = op_decl_map(bedges,nodes,2,bedge, "pbedge"); op_map pbecell = op_decl_map(bedges,cells,1,becell,"pbecell"); op_map pcell = op_decl_map(cells, nodes,4,cell, "pcell"); op_dat p_bound = op_decl_dat(bedges,1,"int" ,bound,"p_bound"); op_dat p_x = op_decl_dat(nodes ,2,"float",x ,"p_x"); op_dat p_q = op_decl_dat(cells ,4,"float",q ,"p_q"); op_dat p_qold = op_decl_dat(cells ,4,"float",qold ,"p_qold"); op_dat p_adt = op_decl_dat(cells ,1,"float",adt ,"p_adt"); op_dat p_res = op_decl_dat(cells ,4,"float",res ,"p_res"); op_decl_const(1,"float",&gam ); op_decl_const(1,"float",&gm1 ); op_decl_const(1,"float",&cfl ); op_decl_const(1,"float",&eps ); op_decl_const(1,"float",&mach ); op_decl_const(1,"float",&alpha); op_decl_const(4,"float",qinf ); op_dump_to_hdf5(file_out); op_write_const_hdf5("gam", 1,"float",(char *)&gam, "new_grid_out.h5"); op_write_const_hdf5("gm1", 1,"float",(char *)&gm1, "new_grid_out.h5"); op_write_const_hdf5("cfl", 1,"float",(char *)&cfl, "new_grid_out.h5"); op_write_const_hdf5("eps", 1,"float",(char *)&eps, "new_grid_out.h5"); op_write_const_hdf5("mach", 1,"float",(char *)&mach, "new_grid_out.h5"); op_write_const_hdf5("alpha",1,"float",(char *)&alpha,"new_grid_out.h5"); op_write_const_hdf5("qinf", 4,"float",(char *)qinf, "new_grid_out.h5"); //create halos - for sanity check op_halo_create(); op_exit(); }
int main(int argc, char **argv) { // OP initialisation op_init(argc, argv, 2); // MPI for user I/O int my_rank; int comm_size; MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); MPI_Comm_size(MPI_COMM_WORLD, &comm_size); // timer double cpu_t1, cpu_t2, wall_t1, wall_t2; int *pp; float *A, *r, *u, *du; int nnode, nedge; /**------------------------BEGIN I/O and PARTITIONING ---------------------**/ int g_nnode, g_nedge, g_n, g_e; g_nnode = (NN - 1) * (NN - 1); g_nedge = (NN - 1) * (NN - 1) + 4 * (NN - 1) * (NN - 2); int *g_pp = 0; float *g_A = 0, *g_r = 0, *g_u = 0, *g_du = 0; op_printf("Global number of nodes, edges = %d, %d\n", g_nnode, g_nedge); if (my_rank == MPI_ROOT) { g_pp = (int *)malloc(sizeof(int) * 2 * g_nedge); g_A = (float *)malloc(sizeof(float) * g_nedge); g_r = (float *)malloc(sizeof(float) * g_nnode); g_u = (float *)malloc(sizeof(float) * g_nnode); g_du = (float *)malloc(sizeof(float) * g_nnode); // create matrix and r.h.s., and set coordinates needed for renumbering / // partitioning g_e = 0; for (int i = 1; i < NN; i++) { for (int j = 1; j < NN; j++) { g_n = i - 1 + (j - 1) * (NN - 1); g_r[g_n] = 0.0f; g_u[g_n] = 0.0f; g_du[g_n] = 0.0f; g_pp[2 * g_e] = g_n; g_pp[2 * g_e + 1] = g_n; g_A[g_e] = -1.0f; g_e++; for (int pass = 0; pass < 4; pass++) { int i2 = i; int j2 = j; if (pass == 0) i2 += -1; if (pass == 1) i2 += 1; if (pass == 2) j2 += -1; if (pass == 3) j2 += 1; if ((i2 == 0) || (i2 == NN) || (j2 == 0) || (j2 == NN)) { g_r[g_n] += 0.25f; } else { g_pp[2 * g_e] = g_n; g_pp[2 * g_e + 1] = i2 - 1 + (j2 - 1) * (NN - 1); g_A[g_e] = 0.25f; g_e++; } } } } } /* Compute local sizes */ nnode = compute_local_size(g_nnode, comm_size, my_rank); nedge = compute_local_size(g_nedge, comm_size, my_rank); op_printf("Number of nodes, edges on process %d = %d, %d\n", my_rank, nnode, nedge); /*Allocate memory to hold local sets, mapping tables and data*/ pp = (int *)malloc(2 * sizeof(int) * nedge); A = (float *)malloc(nedge * sizeof(float)); r = (float *)malloc(nnode * sizeof(float)); u = (float *)malloc(nnode * sizeof(float)); du = (float *)malloc(nnode * sizeof(float)); /* scatter sets, mappings and data on sets*/ scatter_int_array(g_pp, pp, comm_size, g_nedge, nedge, 2); scatter_float_array(g_A, A, comm_size, g_nedge, nedge, 1); scatter_float_array(g_r, r, comm_size, g_nnode, nnode, 1); scatter_float_array(g_u, u, comm_size, g_nnode, nnode, 1); scatter_float_array(g_du, du, comm_size, g_nnode, nnode, 1); /*Freeing memory allocated to gloabal arrays on rank 0 after scattering to all processes*/ if (my_rank == MPI_ROOT) { free(g_pp); free(g_A); free(g_r); free(g_u); free(g_du); } /**------------------------END I/O and PARTITIONING ---------------------**/ // declare sets, pointers, and datasets op_set nodes = op_decl_set(nnode, "nodes"); op_set edges = op_decl_set(nedge, "edges"); op_map ppedge = op_decl_map(edges, nodes, 2, pp, "ppedge"); op_dat p_A = op_decl_dat(edges, 1, "float", A, "p_A"); op_dat p_r = op_decl_dat(nodes, 1, "float", r, "p_r"); op_dat p_u = op_decl_dat(nodes, 1, "float", u, "p_u"); op_dat p_du = op_decl_dat(nodes, 1, "float", du, "p_du"); alpha = 1.0f; op_decl_const(1, "float", &alpha); op_diagnostic_output(); // trigger partitioning and halo creation routines op_partition("PTSCOTCH", "KWAY", NULL, NULL, NULL); // initialise timers for total execution wall time op_timers(&cpu_t1, &wall_t1); // main iteration loop float u_sum, u_max, beta = 1.0f; for (int iter = 0; iter < NITER; iter++) { op_par_loop(res, "res", edges, op_arg_dat(p_A, -1, OP_ID, 1, "float", OP_READ), op_arg_dat(p_u, 1, ppedge, 1, "float", OP_READ), op_arg_dat(p_du, 0, ppedge, 1, "float", OP_INC), op_arg_gbl(&beta, 1, "float", OP_READ)); u_sum = 0.0f; u_max = 0.0f; op_par_loop(update, "update", nodes, op_arg_dat(p_r, -1, OP_ID, 1, "float", OP_READ), op_arg_dat(p_du, -1, OP_ID, 1, "float", OP_RW), op_arg_dat(p_u, -1, OP_ID, 1, "float", OP_INC), op_arg_gbl(&u_sum, 1, "float", OP_INC), op_arg_gbl(&u_max, 1, "float", OP_MAX)); op_printf("\n u max/rms = %f %f \n\n", u_max, sqrt(u_sum / g_nnode)); } op_timers(&cpu_t2, &wall_t2); // get results data array op_fetch_data(p_u, u); // output the result dat array to files op_print_dat_to_txtfile(p_u, "out_grid_mpi.dat"); // ASCI op_print_dat_to_binfile(p_u, "out_grid_mpi.bin"); // Binary printf("solution on rank %d\n", my_rank); for (int i = 0; i < nnode; i++) { printf(" %7.4f", u[i]); fflush(stdout); } printf("\n"); // print each mpi process's timing info for each kernel op_timing_output(); // print total time for niter interations op_printf("Max total runtime = %f\n", wall_t2 - wall_t1); // gather results from all ranks and check float *ug = (float *)malloc(sizeof(float) * op_get_size(nodes)); op_fetch_data_idx(p_u, ug, 0, op_get_size(nodes) - 1); int result = check_result<float>(ug, NN, TOLERANCE); free(ug); op_exit(); free(u); free(pp); free(A); free(r); free(du); return result; }
int main(int argc, char **argv){ int my_rank; int comm_size; MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); MPI_Comm_size(MPI_COMM_WORLD, &comm_size); //timer double cpu_t1, cpu_t2, wall_t1, wall_t2; double time; double max_time; int *pp; float *A, *r, *u, *du; int nnode, nedge, n, e; float dx; /**------------------------BEGIN I/O and PARTITIONING ---------------------**/ int g_nnode, g_nedge, g_dx, g_n, g_e; g_nnode = (NN-1)*(NN-1); g_nedge = (NN-1)*(NN-1) + 4*(NN-1)*(NN-2); g_dx = 1.0f / ((float) NN); int *g_pp; float *g_A, *g_r, *g_u, *g_du; if(my_rank == MPI_ROOT) { printf("Global number of nodes, edges = %d, %d\n",g_nnode,g_nedge); g_pp = (int *)malloc(sizeof(int)*2*g_nedge); g_A = (float *)malloc(sizeof(float)*g_nedge); g_r = (float *)malloc(sizeof(float)*g_nnode); g_u = (float *)malloc(sizeof(float)*g_nnode); g_du = (float *)malloc(sizeof(float)*g_nnode); // create matrix and r.h.s., and set coordinates needed for renumbering / partitioning g_e = 0; for (int i=1; i<NN; i++) { for (int j=1; j<NN; j++) { g_n = i-1 + (j-1)*(NN-1); g_r[g_n] = 0.0f; g_u[g_n] = 0.0f; g_du[g_n] = 0.0f; g_pp[2*g_e] = g_n; g_pp[2*g_e+1] = g_n; g_A[g_e] = -1.0f; g_e++; for (int pass=0; pass<4; pass++) { int i2 = i; int j2 = j; if (pass==0) i2 += -1; if (pass==1) i2 += 1; if (pass==2) j2 += -1; if (pass==3) j2 += 1; if ( (i2==0) || (i2==NN) || (j2==0) || (j2==NN) ) { g_r[g_n] += 0.25f; } else { g_pp[2*g_e] = g_n; g_pp[2*g_e+1] = i2-1 + (j2-1)*(NN-1); g_A[g_e] = 0.25f; g_e++; } } } } } /* Compute local sizes */ nnode = compute_local_size (g_nnode, comm_size, my_rank); nedge = compute_local_size (g_nedge, comm_size, my_rank); printf("Number of nodes, edges on process %d = %d, %d\n" ,my_rank,nnode,nedge); /*Allocate memory to hold local sets, mapping tables and data*/ pp = (int *)malloc(2*sizeof(int)*nedge); A = (float *) malloc(nedge*sizeof(float)); r = (float *) malloc(nnode*sizeof(float)); u = (float *) malloc(nnode*sizeof(float)); du = (float *) malloc(nnode*sizeof(float)); /* scatter sets, mappings and data on sets*/ scatter_int_array(g_pp, pp, comm_size, g_nedge,nedge, 2); scatter_float_array(g_A, A, comm_size, g_nedge,nedge, 1); scatter_float_array(g_r, r, comm_size, g_nnode,nnode, 1); scatter_float_array(g_u, u, comm_size, g_nnode,nnode, 1); scatter_float_array(g_du, du, comm_size, g_nnode,nnode, 1); if(my_rank == MPI_ROOT) { /*Freeing memory allocated to gloabal arrays on rank 0 after scattering to all processes*/ free(g_pp); free(g_A); free(g_r); free(g_u); free(g_du); } /**------------------------END I/O and PARTITIONING ---------------------**/ // OP initialisation op_init(argc,argv,2); // declare sets, pointers, and datasets op_set nodes = op_decl_set(nnode,"nodes"); op_set edges = op_decl_set(nedge,"edges"); op_map ppedge = op_decl_map(edges,nodes,2,pp, "ppedge"); op_dat p_A = op_decl_dat(edges,1,"float", A, "p_A" ); op_dat p_r = op_decl_dat(nodes,1,"float", r, "p_r" ); op_dat p_u = op_decl_dat(nodes,1,"float", u, "p_u" ); op_dat p_du = op_decl_dat(nodes,1,"float", du,"p_du"); alpha = 1.0f; op_decl_const(1,"float",&alpha); op_diagnostic_output(); //random partitioning for diagnostics pourposes //op_partition_random(nodes); //create halos op_halo_create(); //initialise timers for total execution wall time op_timers(&cpu_t1, &wall_t1); // main iteration loop float u_sum, u_max, beta = 1.0f; for (int iter=0; iter<NITER; iter++) { op_par_loop(res,"res", edges, op_arg_dat(p_A, -1,OP_ID, 1,"float", OP_READ), op_arg_dat(p_u, 1,ppedge, 1,"float", OP_READ), op_arg_dat(p_du, 0,ppedge, 1,"float", OP_INC), op_arg_gbl(&beta, 1,"float", OP_READ)); u_sum = 0.0f; u_max = 0.0f; op_par_loop(update,"update", nodes, op_arg_dat(p_r, -1,OP_ID, 1,"float",OP_READ), op_arg_dat(p_du, -1,OP_ID, 1,"float",OP_RW), op_arg_dat(p_u, -1,OP_ID, 1,"float",OP_INC), op_arg_gbl(&u_sum,1,"float",OP_INC), op_arg_gbl(&u_max,1,"float",OP_MAX)); if(my_rank == MPI_ROOT) printf("\n u max/rms = %f %f \n\n",u_max, sqrt(u_sum/g_nnode)); } op_timers(&cpu_t2, &wall_t2); //get results data array op_dat temp = op_mpi_get_data(p_u); //output the result dat array to files print_dat_tofile(temp, "out_grid.dat"); //ASCI print_dat_tobinfile(temp, "out_grid.bin"); //Binary //free memory allocated to halos op_halo_destroy(); //return all op_dats, op_maps back to original element order op_partition_reverse(); //print each mpi process's timing info for each kernel op_mpi_timing_output(); //print total time for niter interations time = wall_t2-wall_t1; MPI_Reduce(&time,&max_time,1,MPI_DOUBLE, MPI_MAX,MPI_ROOT, MPI_COMM_WORLD); if(my_rank==MPI_ROOT)printf("Max total runtime = %f\n",max_time); MPI_Finalize(); //user mpi finalize }